summaryrefslogtreecommitdiffstats
path: root/third_party/rust/unicode-segmentation
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
commit36d22d82aa202bb199967e9512281e9a53db42c9 (patch)
tree105e8c98ddea1c1e4784a60a5a6410fa416be2de /third_party/rust/unicode-segmentation
parentInitial commit. (diff)
downloadfirefox-esr-upstream.tar.xz
firefox-esr-upstream.zip
Adding upstream version 115.7.0esr.upstream/115.7.0esrupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--third_party/rust/unicode-segmentation/.cargo-checksum.json1
-rw-r--r--third_party/rust/unicode-segmentation/COPYRIGHT7
-rw-r--r--third_party/rust/unicode-segmentation/Cargo.toml63
-rw-r--r--third_party/rust/unicode-segmentation/LICENSE-APACHE201
-rw-r--r--third_party/rust/unicode-segmentation/LICENSE-MIT25
-rw-r--r--third_party/rust/unicode-segmentation/README.md99
-rw-r--r--third_party/rust/unicode-segmentation/benches/graphemes.rs63
-rw-r--r--third_party/rust/unicode-segmentation/benches/unicode_words.rs64
-rw-r--r--third_party/rust/unicode-segmentation/benches/word_bounds.rs64
-rwxr-xr-xthird_party/rust/unicode-segmentation/scripts/unicode.py381
-rwxr-xr-xthird_party/rust/unicode-segmentation/scripts/unicode_gen_breaktests.py212
-rw-r--r--third_party/rust/unicode-segmentation/src/grapheme.rs801
-rw-r--r--third_party/rust/unicode-segmentation/src/lib.rs307
-rw-r--r--third_party/rust/unicode-segmentation/src/sentence.rs415
-rw-r--r--third_party/rust/unicode-segmentation/src/tables.rs2675
-rw-r--r--third_party/rust/unicode-segmentation/src/test.rs247
-rw-r--r--third_party/rust/unicode-segmentation/src/testdata.rs5250
-rw-r--r--third_party/rust/unicode-segmentation/src/word.rs754
18 files changed, 11629 insertions, 0 deletions
diff --git a/third_party/rust/unicode-segmentation/.cargo-checksum.json b/third_party/rust/unicode-segmentation/.cargo-checksum.json
new file mode 100644
index 0000000000..acc5d5d0e5
--- /dev/null
+++ b/third_party/rust/unicode-segmentation/.cargo-checksum.json
@@ -0,0 +1 @@
+{"files":{"COPYRIGHT":"23860c2a7b5d96b21569afedf033469bab9fe14a1b24a35068b8641c578ce24d","Cargo.toml":"55e5a65c91693dd47a27409e54ad6d5ce805ce003b822e4a568bfd070725e956","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"7b63ecd5f1902af1b63729947373683c32745c16a10e8e6292e2e2dcd7e90ae0","README.md":"efe7aa058e004e12d683039dbc4440e2fec3088364201a620703acedbeef8cb2","benches/graphemes.rs":"88a9f672ea7a03cc15fae36ce544a6e7234e532359402483978858ccda47db3d","benches/unicode_words.rs":"95c3a178ebe07c8cb2c560546ee911bfc4f1e1db81a6cd2c1cef1c99ed2a421a","benches/word_bounds.rs":"66acf40c0a4b06cdb6dd97c1759aba8dea961bb30cd7f223de3ebff8198520b2","scripts/unicode.py":"d4ba970a0419f33d20f3deb888be12427bfbb40aa25a5719968600d45cf4dadb","scripts/unicode_gen_breaktests.py":"ee96982d8959bec75c2382233cfca7e239f12a89a1be5fbf942601a215bb9283","src/grapheme.rs":"b5a32bdbb529e9417e8ada8d92656339b6ffb4e9bed8e6d32a0409c13a03050b","src/lib.rs":"572789173717edd0fe037ae656530663406951636c548e6793711b7d5caad910","src/sentence.rs":"aac52f69207e0b68925ab0c6c18cc36ed3da8e918006d96d724f0f19d4d9d643","src/tables.rs":"ba9fa1774b6294ed14565ec6be0f2ec316759d54e3af7c002b6848973d7b1f3c","src/test.rs":"f039fa285d510244672a067bdbe98ce7ff940e4f2ff82926466e012ac48ad95a","src/testdata.rs":"533c02ecace1bec3d46b65d101c7619bc83a2fb2c187a2c960346533c09a0e3e","src/word.rs":"6eeea9351c12f0a4404606596a487e0e8aa948ba4b134c7cb827ee41557a39fe"},"package":"0fdbf052a0783de01e944a6ce7a8cb939e295b1e7be835a1112c3b9a7f047a5a"} \ No newline at end of file
diff --git a/third_party/rust/unicode-segmentation/COPYRIGHT b/third_party/rust/unicode-segmentation/COPYRIGHT
new file mode 100644
index 0000000000..b286ec16ab
--- /dev/null
+++ b/third_party/rust/unicode-segmentation/COPYRIGHT
@@ -0,0 +1,7 @@
+Licensed under the Apache License, Version 2.0
+<LICENSE-APACHE or
+http://www.apache.org/licenses/LICENSE-2.0> or the MIT
+license <LICENSE-MIT or http://opensource.org/licenses/MIT>,
+at your option. All files in the project carrying such
+notice may not be copied, modified, or distributed except
+according to those terms.
diff --git a/third_party/rust/unicode-segmentation/Cargo.toml b/third_party/rust/unicode-segmentation/Cargo.toml
new file mode 100644
index 0000000000..0da56c81e8
--- /dev/null
+++ b/third_party/rust/unicode-segmentation/Cargo.toml
@@ -0,0 +1,63 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g., crates.io) dependencies.
+#
+# If you are reading this file be aware that the original Cargo.toml
+# will likely look very different (and much more reasonable).
+# See Cargo.toml.orig for the original contents.
+
+[package]
+edition = "2018"
+name = "unicode-segmentation"
+version = "1.10.0"
+authors = [
+ "kwantam <kwantam@gmail.com>",
+ "Manish Goregaokar <manishsmail@gmail.com>",
+]
+exclude = [
+ "target/*",
+ "Cargo.lock",
+ "scripts/tmp",
+ "benches/texts/*",
+ "*.txt",
+]
+description = """
+This crate provides Grapheme Cluster, Word and Sentence boundaries
+according to Unicode Standard Annex #29 rules.
+"""
+homepage = "https://github.com/unicode-rs/unicode-segmentation"
+documentation = "https://unicode-rs.github.io/unicode-segmentation"
+readme = "README.md"
+keywords = [
+ "text",
+ "unicode",
+ "grapheme",
+ "word",
+ "boundary",
+]
+license = "MIT/Apache-2.0"
+repository = "https://github.com/unicode-rs/unicode-segmentation"
+
+[[bench]]
+name = "graphemes"
+harness = false
+
+[[bench]]
+name = "unicode_words"
+harness = false
+
+[[bench]]
+name = "word_bounds"
+harness = false
+
+[dev-dependencies.criterion]
+version = "0.3"
+
+[dev-dependencies.quickcheck]
+version = "0.7"
+
+[features]
+no_std = []
diff --git a/third_party/rust/unicode-segmentation/LICENSE-APACHE b/third_party/rust/unicode-segmentation/LICENSE-APACHE
new file mode 100644
index 0000000000..16fe87b06e
--- /dev/null
+++ b/third_party/rust/unicode-segmentation/LICENSE-APACHE
@@ -0,0 +1,201 @@
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
+
+APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+Copyright [yyyy] [name of copyright owner]
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
diff --git a/third_party/rust/unicode-segmentation/LICENSE-MIT b/third_party/rust/unicode-segmentation/LICENSE-MIT
new file mode 100644
index 0000000000..e69282e381
--- /dev/null
+++ b/third_party/rust/unicode-segmentation/LICENSE-MIT
@@ -0,0 +1,25 @@
+Copyright (c) 2015 The Rust Project Developers
+
+Permission is hereby granted, free of charge, to any
+person obtaining a copy of this software and associated
+documentation files (the "Software"), to deal in the
+Software without restriction, including without
+limitation the rights to use, copy, modify, merge,
+publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software
+is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice
+shall be included in all copies or substantial portions
+of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
+ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
+SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
+IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
diff --git a/third_party/rust/unicode-segmentation/README.md b/third_party/rust/unicode-segmentation/README.md
new file mode 100644
index 0000000000..48d9a92059
--- /dev/null
+++ b/third_party/rust/unicode-segmentation/README.md
@@ -0,0 +1,99 @@
+Iterators which split strings on Grapheme Cluster or Word boundaries, according
+to the [Unicode Standard Annex #29](http://www.unicode.org/reports/tr29/) rules.
+
+[![Build Status](https://travis-ci.org/unicode-rs/unicode-segmentation.svg)](https://travis-ci.org/unicode-rs/unicode-segmentation)
+
+[Documentation](https://unicode-rs.github.io/unicode-segmentation/unicode_segmentation/index.html)
+
+```rust
+use unicode_segmentation::UnicodeSegmentation;
+
+fn main() {
+ let s = "a̐éö̲\r\n";
+ let g = s.graphemes(true).collect::<Vec<&str>>();
+ let b: &[_] = &["a̐", "é", "ö̲", "\r\n"];
+ assert_eq!(g, b);
+
+ let s = "The quick (\"brown\") fox can't jump 32.3 feet, right?";
+ let w = s.unicode_words().collect::<Vec<&str>>();
+ let b: &[_] = &["The", "quick", "brown", "fox", "can't", "jump", "32.3", "feet", "right"];
+ assert_eq!(w, b);
+
+ let s = "The quick (\"brown\") fox";
+ let w = s.split_word_bounds().collect::<Vec<&str>>();
+ let b: &[_] = &["The", " ", "quick", " ", "(", "\"", "brown", "\"", ")", " ", " ", "fox"];
+ assert_eq!(w, b);
+}
+```
+
+# no_std
+
+unicode-segmentation does not depend on libstd, so it can be used in crates
+with the `#![no_std]` attribute.
+
+# crates.io
+
+You can use this package in your project by adding the following
+to your `Cargo.toml`:
+
+```toml
+[dependencies]
+unicode-segmentation = "1.9.0"
+```
+
+# Change Log
+
+## 1.7.1
+
+* Update docs on version number
+
+## 1.7.0
+
+* [#87](https://github.com/unicode-rs/unicode-segmentation/pull/87) Upgrade to Unicode 13
+* [#79](https://github.com/unicode-rs/unicode-segmentation/pull/79) Implement a special-case lookup for ascii grapheme categories
+* [#77](https://github.com/unicode-rs/unicode-segmentation/pull/77) Optimization for grapheme iteration
+
+## 1.6.0
+
+* [#72](https://github.com/unicode-rs/unicode-segmentation/pull/72) Upgrade to Unicode 12
+
+## 1.5.0
+
+* [#68](https://github.com/unicode-rs/unicode-segmentation/pull/68) Upgrade to Unicode 11
+
+## 1.4.0
+
+* [#56](https://github.com/unicode-rs/unicode-segmentation/pull/56) Upgrade to Unicode 10
+
+## 1.3.0
+
+* [#24](https://github.com/unicode-rs/unicode-segmentation/pull/24) Add support for sentence boundaries
+* [#44](https://github.com/unicode-rs/unicode-segmentation/pull/44) Treat `gc=No` as a subset of `gc=N`
+
+## 1.2.1
+
+* [#37](https://github.com/unicode-rs/unicode-segmentation/pull/37):
+ Fix panic in `provide_context`.
+* [#40](https://github.com/unicode-rs/unicode-segmentation/pull/40):
+ Fix crash in `prev_boundary`.
+
+## 1.2.0
+
+* New `GraphemeCursor` API allows random access and bidirectional iteration.
+* Fixed incorrect splitting of certain emoji modifier sequences.
+
+## 1.1.0
+
+* Add `as_str` methods to the iterator types.
+
+## 1.0.3
+
+* Code cleanup and additional tests.
+
+## 1.0.1
+
+* Fix a bug affecting some grapheme clusters containing Prepend characters.
+
+## 1.0.0
+
+* Upgrade to Unicode 9.0.0.
diff --git a/third_party/rust/unicode-segmentation/benches/graphemes.rs b/third_party/rust/unicode-segmentation/benches/graphemes.rs
new file mode 100644
index 0000000000..3a0b9b76a3
--- /dev/null
+++ b/third_party/rust/unicode-segmentation/benches/graphemes.rs
@@ -0,0 +1,63 @@
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use unicode_segmentation;
+
+use std::fs;
+use unicode_segmentation::UnicodeSegmentation;
+
+fn graphemes(c: &mut Criterion, lang: &str, path: &str) {
+ let text = fs::read_to_string(path).unwrap();
+
+ c.bench_function(&format!("graphemes_{}", lang), |bench| {
+ bench.iter(|| {
+ for g in UnicodeSegmentation::graphemes(black_box(&*text), true) {
+ black_box(g);
+ }
+ })
+ });
+}
+
+fn graphemes_arabic(c: &mut Criterion) {
+ graphemes(c, "arabic", "benches/texts/arabic.txt");
+}
+
+fn graphemes_english(c: &mut Criterion) {
+ graphemes(c, "english", "benches/texts/english.txt");
+}
+
+fn graphemes_hindi(c: &mut Criterion) {
+ graphemes(c, "hindi", "benches/texts/hindi.txt");
+}
+
+fn graphemes_japanese(c: &mut Criterion) {
+ graphemes(c, "japanese", "benches/texts/japanese.txt");
+}
+
+fn graphemes_korean(c: &mut Criterion) {
+ graphemes(c, "korean", "benches/texts/korean.txt");
+}
+
+fn graphemes_mandarin(c: &mut Criterion) {
+ graphemes(c, "mandarin", "benches/texts/mandarin.txt");
+}
+
+fn graphemes_russian(c: &mut Criterion) {
+ graphemes(c, "russian", "benches/texts/russian.txt");
+}
+
+fn graphemes_source_code(c: &mut Criterion) {
+ graphemes(c, "source_code", "benches/texts/source_code.txt");
+}
+
+criterion_group!(
+ benches,
+ graphemes_arabic,
+ graphemes_english,
+ graphemes_hindi,
+ graphemes_japanese,
+ graphemes_korean,
+ graphemes_mandarin,
+ graphemes_russian,
+ graphemes_source_code,
+);
+
+criterion_main!(benches);
diff --git a/third_party/rust/unicode-segmentation/benches/unicode_words.rs b/third_party/rust/unicode-segmentation/benches/unicode_words.rs
new file mode 100644
index 0000000000..c87851a376
--- /dev/null
+++ b/third_party/rust/unicode-segmentation/benches/unicode_words.rs
@@ -0,0 +1,64 @@
+#[macro_use]
+extern crate bencher;
+extern crate unicode_segmentation;
+
+use bencher::Bencher;
+use std::fs;
+use unicode_segmentation::UnicodeSegmentation;
+
+fn unicode_words(bench: &mut Bencher, path: &str) {
+ let text = fs::read_to_string(path).unwrap();
+ bench.iter(|| {
+ for w in text.unicode_words() {
+ bencher::black_box(w);
+ }
+ });
+
+ bench.bytes = text.len() as u64;
+}
+
+fn unicode_words_arabic(bench: &mut Bencher) {
+ unicode_words(bench, "benches/texts/arabic.txt");
+}
+
+fn unicode_words_english(bench: &mut Bencher) {
+ unicode_words(bench, "benches/texts/english.txt");
+}
+
+fn unicode_words_hindi(bench: &mut Bencher) {
+ unicode_words(bench, "benches/texts/hindi.txt");
+}
+
+fn unicode_words_japanese(bench: &mut Bencher) {
+ unicode_words(bench, "benches/texts/japanese.txt");
+}
+
+fn unicode_words_korean(bench: &mut Bencher) {
+ unicode_words(bench, "benches/texts/korean.txt");
+}
+
+fn unicode_words_mandarin(bench: &mut Bencher) {
+ unicode_words(bench, "benches/texts/mandarin.txt");
+}
+
+fn unicode_words_russian(bench: &mut Bencher) {
+ unicode_words(bench, "benches/texts/russian.txt");
+}
+
+fn unicode_words_source_code(bench: &mut Bencher) {
+ unicode_words(bench, "benches/texts/source_code.txt");
+}
+
+benchmark_group!(
+ benches,
+ unicode_words_arabic,
+ unicode_words_english,
+ unicode_words_hindi,
+ unicode_words_japanese,
+ unicode_words_korean,
+ unicode_words_mandarin,
+ unicode_words_russian,
+ unicode_words_source_code,
+);
+
+benchmark_main!(benches);
diff --git a/third_party/rust/unicode-segmentation/benches/word_bounds.rs b/third_party/rust/unicode-segmentation/benches/word_bounds.rs
new file mode 100644
index 0000000000..6b01ddb109
--- /dev/null
+++ b/third_party/rust/unicode-segmentation/benches/word_bounds.rs
@@ -0,0 +1,64 @@
+#[macro_use]
+extern crate bencher;
+extern crate unicode_segmentation;
+
+use bencher::Bencher;
+use std::fs;
+use unicode_segmentation::UnicodeSegmentation;
+
+fn word_bounds(bench: &mut Bencher, path: &str) {
+ let text = fs::read_to_string(path).unwrap();
+ bench.iter(|| {
+ for w in text.split_word_bounds() {
+ bencher::black_box(w);
+ }
+ });
+
+ bench.bytes = text.len() as u64;
+}
+
+fn word_bounds_arabic(bench: &mut Bencher) {
+ word_bounds(bench, "benches/texts/arabic.txt");
+}
+
+fn word_bounds_english(bench: &mut Bencher) {
+ word_bounds(bench, "benches/texts/english.txt");
+}
+
+fn word_bounds_hindi(bench: &mut Bencher) {
+ word_bounds(bench, "benches/texts/hindi.txt");
+}
+
+fn word_bounds_japanese(bench: &mut Bencher) {
+ word_bounds(bench, "benches/texts/japanese.txt");
+}
+
+fn word_bounds_korean(bench: &mut Bencher) {
+ word_bounds(bench, "benches/texts/korean.txt");
+}
+
+fn word_bounds_mandarin(bench: &mut Bencher) {
+ word_bounds(bench, "benches/texts/mandarin.txt");
+}
+
+fn word_bounds_russian(bench: &mut Bencher) {
+ word_bounds(bench, "benches/texts/russian.txt");
+}
+
+fn word_bounds_source_code(bench: &mut Bencher) {
+ word_bounds(bench, "benches/texts/source_code.txt");
+}
+
+benchmark_group!(
+ benches,
+ word_bounds_arabic,
+ word_bounds_english,
+ word_bounds_hindi,
+ word_bounds_japanese,
+ word_bounds_korean,
+ word_bounds_mandarin,
+ word_bounds_russian,
+ word_bounds_source_code,
+);
+
+benchmark_main!(benches);
diff --git a/third_party/rust/unicode-segmentation/scripts/unicode.py b/third_party/rust/unicode-segmentation/scripts/unicode.py
new file mode 100755
index 0000000000..7aed85e7c7
--- /dev/null
+++ b/third_party/rust/unicode-segmentation/scripts/unicode.py
@@ -0,0 +1,381 @@
+#!/usr/bin/env python
+#
+# Copyright 2011-2015 The Rust Project Developers. See the COPYRIGHT
+# file at the top-level directory of this distribution and at
+# http://rust-lang.org/COPYRIGHT.
+#
+# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+# option. This file may not be copied, modified, or distributed
+# except according to those terms.
+
+# This script uses the following Unicode tables:
+# - DerivedCoreProperties.txt
+# - auxiliary/GraphemeBreakProperty.txt
+# - auxiliary/WordBreakProperty.txt
+# - ReadMe.txt
+# - UnicodeData.txt
+#
+# Since this should not require frequent updates, we just store this
+# out-of-line and check the unicode.rs file into git.
+
+import fileinput, re, os, sys
+
+preamble = '''// Copyright 2012-2018 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// NOTE: The following code was generated by "scripts/unicode.py", do not edit directly
+
+#![allow(missing_docs, non_upper_case_globals, non_snake_case)]
+'''
+
+# Mapping taken from Table 12 from:
+# http://www.unicode.org/reports/tr44/#General_Category_Values
+expanded_categories = {
+ 'Lu': ['LC', 'L'], 'Ll': ['LC', 'L'], 'Lt': ['LC', 'L'],
+ 'Lm': ['L'], 'Lo': ['L'],
+ 'Mn': ['M'], 'Mc': ['M'], 'Me': ['M'],
+ 'Nd': ['N'], 'Nl': ['N'], 'No': ['N'],
+ 'Pc': ['P'], 'Pd': ['P'], 'Ps': ['P'], 'Pe': ['P'],
+ 'Pi': ['P'], 'Pf': ['P'], 'Po': ['P'],
+ 'Sm': ['S'], 'Sc': ['S'], 'Sk': ['S'], 'So': ['S'],
+ 'Zs': ['Z'], 'Zl': ['Z'], 'Zp': ['Z'],
+ 'Cc': ['C'], 'Cf': ['C'], 'Cs': ['C'], 'Co': ['C'], 'Cn': ['C'],
+}
+
+# these are the surrogate codepoints, which are not valid rust characters
+surrogate_codepoints = (0xd800, 0xdfff)
+
+UNICODE_VERSION = (15, 0, 0)
+
+UNICODE_VERSION_NUMBER = "%s.%s.%s" %UNICODE_VERSION
+
+def is_surrogate(n):
+ return surrogate_codepoints[0] <= n <= surrogate_codepoints[1]
+
+def fetch(f):
+ if not os.path.exists(os.path.basename(f)):
+ if "emoji" in f:
+ os.system("curl -O https://www.unicode.org/Public/%s/ucd/emoji/%s"
+ % (UNICODE_VERSION_NUMBER, f))
+ else:
+ os.system("curl -O https://www.unicode.org/Public/%s/ucd/%s"
+ % (UNICODE_VERSION_NUMBER, f))
+
+ if not os.path.exists(os.path.basename(f)):
+ sys.stderr.write("cannot load %s" % f)
+ exit(1)
+
+def load_gencats(f):
+ fetch(f)
+ gencats = {}
+
+ udict = {};
+ range_start = -1;
+ for line in fileinput.input(f):
+ data = line.split(';');
+ if len(data) != 15:
+ continue
+ cp = int(data[0], 16);
+ if is_surrogate(cp):
+ continue
+ if range_start >= 0:
+ for i in range(range_start, cp):
+ udict[i] = data;
+ range_start = -1;
+ if data[1].endswith(", First>"):
+ range_start = cp;
+ continue;
+ udict[cp] = data;
+
+ for code in udict:
+ [code_org, name, gencat, combine, bidi,
+ decomp, deci, digit, num, mirror,
+ old, iso, upcase, lowcase, titlecase ] = udict[code];
+
+ # place letter in categories as appropriate
+ for cat in [gencat, "Assigned"] + expanded_categories.get(gencat, []):
+ if cat not in gencats:
+ gencats[cat] = []
+ gencats[cat].append(code)
+
+ gencats = group_cats(gencats)
+ return gencats
+
+def group_cats(cats):
+ cats_out = {}
+ for cat in cats:
+ cats_out[cat] = group_cat(cats[cat])
+ return cats_out
+
+def group_cat(cat):
+ cat_out = []
+ letters = sorted(set(cat))
+ cur_start = letters.pop(0)
+ cur_end = cur_start
+ for letter in letters:
+ assert letter > cur_end, \
+ "cur_end: %s, letter: %s" % (hex(cur_end), hex(letter))
+ if letter == cur_end + 1:
+ cur_end = letter
+ else:
+ cat_out.append((cur_start, cur_end))
+ cur_start = cur_end = letter
+ cat_out.append((cur_start, cur_end))
+ return cat_out
+
+def ungroup_cat(cat):
+ cat_out = []
+ for (lo, hi) in cat:
+ while lo <= hi:
+ cat_out.append(lo)
+ lo += 1
+ return cat_out
+
+def format_table_content(f, content, indent):
+ line = " "*indent
+ first = True
+ for chunk in content.split(","):
+ if len(line) + len(chunk) < 98:
+ if first:
+ line += chunk
+ else:
+ line += ", " + chunk
+ first = False
+ else:
+ f.write(line + ",\n")
+ line = " "*indent + chunk
+ f.write(line)
+
+def load_properties(f, interestingprops):
+ fetch(f)
+ props = {}
+ re1 = re.compile(r"^ *([0-9A-F]+) *; *(\w+)")
+ re2 = re.compile(r"^ *([0-9A-F]+)\.\.([0-9A-F]+) *; *(\w+)")
+
+ for line in fileinput.input(os.path.basename(f)):
+ prop = None
+ d_lo = 0
+ d_hi = 0
+ m = re1.match(line)
+ if m:
+ d_lo = m.group(1)
+ d_hi = m.group(1)
+ prop = m.group(2)
+ else:
+ m = re2.match(line)
+ if m:
+ d_lo = m.group(1)
+ d_hi = m.group(2)
+ prop = m.group(3)
+ else:
+ continue
+ if interestingprops and prop not in interestingprops:
+ continue
+ d_lo = int(d_lo, 16)
+ d_hi = int(d_hi, 16)
+ if prop not in props:
+ props[prop] = []
+ props[prop].append((d_lo, d_hi))
+
+ # optimize if possible
+ for prop in props:
+ props[prop] = group_cat(ungroup_cat(props[prop]))
+
+ return props
+
+def escape_char(c):
+ return "'\\u{%x}'" % c
+
+def emit_table(f, name, t_data, t_type = "&'static [(char, char)]", is_pub=True,
+ pfun=lambda x: "(%s,%s)" % (escape_char(x[0]), escape_char(x[1])), is_const=True):
+ pub_string = "const"
+ if not is_const:
+ pub_string = "let"
+ if is_pub:
+ pub_string = "pub " + pub_string
+ f.write(" %s %s: %s = &[\n" % (pub_string, name, t_type))
+ data = ""
+ first = True
+ for dat in t_data:
+ if not first:
+ data += ","
+ first = False
+ data += pfun(dat)
+ format_table_content(f, data, 8)
+ f.write("\n ];\n\n")
+
+def emit_util_mod(f):
+ f.write("""
+pub mod util {
+ #[inline]
+ pub fn bsearch_range_table(c: char, r: &'static [(char,char)]) -> bool {
+ use core::cmp::Ordering::{Equal, Less, Greater};
+ r.binary_search_by(|&(lo,hi)| {
+ if lo <= c && c <= hi { Equal }
+ else if hi < c { Less }
+ else { Greater }
+ }).is_ok()
+ }
+
+ #[inline]
+ fn is_alphabetic(c: char) -> bool {
+ match c {
+ 'a' ..= 'z' | 'A' ..= 'Z' => true,
+ c if c > '\x7f' => super::derived_property::Alphabetic(c),
+ _ => false,
+ }
+ }
+
+ #[inline]
+ fn is_numeric(c: char) -> bool {
+ match c {
+ '0' ..= '9' => true,
+ c if c > '\x7f' => super::general_category::N(c),
+ _ => false,
+ }
+ }
+
+ #[inline]
+ pub fn is_alphanumeric(c: char) -> bool {
+ is_alphabetic(c) || is_numeric(c)
+ }
+}
+
+""")
+
+def emit_property_module(f, mod, tbl, emit):
+ f.write("mod %s {\n" % mod)
+ for cat in sorted(emit):
+ emit_table(f, "%s_table" % cat, tbl[cat], is_pub=False)
+ f.write(" #[inline]\n")
+ f.write(" pub fn %s(c: char) -> bool {\n" % cat)
+ f.write(" super::util::bsearch_range_table(c, %s_table)\n" % cat)
+ f.write(" }\n\n")
+ f.write("}\n\n")
+
+def emit_break_module(f, break_table, break_cats, name):
+ Name = name.capitalize()
+ f.write("""pub mod %s {
+ use core::result::Result::{Ok, Err};
+
+ pub use self::%sCat::*;
+
+ #[allow(non_camel_case_types)]
+ #[derive(Clone, Copy, PartialEq, Eq, Debug)]
+ pub enum %sCat {
+""" % (name, Name, Name))
+
+ break_cats.append("Any")
+ break_cats.sort()
+ for cat in break_cats:
+ f.write((" %sC_" % Name[0]) + cat + ",\n")
+ f.write(""" }
+
+ fn bsearch_range_value_table(c: char, r: &'static [(char, char, %sCat)]) -> (u32, u32, %sCat) {
+ use core::cmp::Ordering::{Equal, Less, Greater};
+ match r.binary_search_by(|&(lo, hi, _)| {
+ if lo <= c && c <= hi { Equal }
+ else if hi < c { Less }
+ else { Greater }
+ }) {
+ Ok(idx) => {
+ let (lower, upper, cat) = r[idx];
+ (lower as u32, upper as u32, cat)
+ }
+ Err(idx) => {
+ (
+ if idx > 0 { r[idx-1].1 as u32 + 1 } else { 0 },
+ r.get(idx).map(|c|c.0 as u32 - 1).unwrap_or(core::u32::MAX),
+ %sC_Any,
+ )
+ }
+ }
+ }
+
+ pub fn %s_category(c: char) -> (u32, u32, %sCat) {
+ bsearch_range_value_table(c, %s_cat_table)
+ }
+
+""" % (Name, Name, Name[0], name, Name, name))
+
+ emit_table(f, "%s_cat_table" % name, break_table, "&'static [(char, char, %sCat)]" % Name,
+ pfun=lambda x: "(%s,%s,%sC_%s)" % (escape_char(x[0]), escape_char(x[1]), Name[0], x[2]),
+ is_pub=False, is_const=True)
+ f.write("}\n")
+
+if __name__ == "__main__":
+ r = "tables.rs"
+ if os.path.exists(r):
+ os.remove(r)
+ with open(r, "w") as rf:
+ # write the file's preamble
+ rf.write(preamble)
+ rf.write("""
+/// The version of [Unicode](http://www.unicode.org/)
+/// that this version of unicode-segmentation is based on.
+pub const UNICODE_VERSION: (u64, u64, u64) = (%s, %s, %s);
+""" % UNICODE_VERSION)
+
+ # download and parse all the data
+ gencats = load_gencats("UnicodeData.txt")
+ derived = load_properties("DerivedCoreProperties.txt", ["Alphabetic"])
+
+ emit_util_mod(rf)
+ for (name, cat, pfuns) in ("general_category", gencats, ["N"]), \
+ ("derived_property", derived, ["Alphabetic"]):
+ emit_property_module(rf, name, cat, pfuns)
+
+ ### grapheme cluster module
+ # from http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Break_Property_Values
+ grapheme_cats = load_properties("auxiliary/GraphemeBreakProperty.txt", [])
+
+ # Control
+ # Note:
+ # This category also includes Cs (surrogate codepoints), but Rust's `char`s are
+ # Unicode Scalar Values only, and surrogates are thus invalid `char`s.
+ # Thus, we have to remove Cs from the Control category
+ grapheme_cats["Control"] = group_cat(list(
+ set(ungroup_cat(grapheme_cats["Control"]))
+ - set(ungroup_cat([surrogate_codepoints]))))
+
+ grapheme_table = []
+ for cat in grapheme_cats:
+ grapheme_table.extend([(x, y, cat) for (x, y) in grapheme_cats[cat]])
+ emoji_props = load_properties("emoji-data.txt", ["Extended_Pictographic"])
+ grapheme_table.extend([(x, y, "Extended_Pictographic") for (x, y) in emoji_props["Extended_Pictographic"]])
+ grapheme_table.sort(key=lambda w: w[0])
+ last = -1
+ for chars in grapheme_table:
+ if chars[0] <= last:
+ raise "Grapheme tables and Extended_Pictographic values overlap; need to store these separately!"
+ last = chars[1]
+ emit_break_module(rf, grapheme_table, list(grapheme_cats.keys()) + ["Extended_Pictographic"], "grapheme")
+ rf.write("\n")
+
+ word_cats = load_properties("auxiliary/WordBreakProperty.txt", [])
+ word_table = []
+ for cat in word_cats:
+ word_table.extend([(x, y, cat) for (x, y) in word_cats[cat]])
+ word_table.sort(key=lambda w: w[0])
+ emit_break_module(rf, word_table, list(word_cats.keys()), "word")
+
+ # There are some emoji which are also ALetter, so this needs to be stored separately
+ # For efficiency, we could still merge the two tables and produce an ALetterEP state
+ emoji_table = [(x, y, "Extended_Pictographic") for (x, y) in emoji_props["Extended_Pictographic"]]
+ emit_break_module(rf, emoji_table, ["Extended_Pictographic"], "emoji")
+
+ sentence_cats = load_properties("auxiliary/SentenceBreakProperty.txt", [])
+ sentence_table = []
+ for cat in sentence_cats:
+ sentence_table.extend([(x, y, cat) for (x, y) in sentence_cats[cat]])
+ sentence_table.sort(key=lambda w: w[0])
+ emit_break_module(rf, sentence_table, list(sentence_cats.keys()), "sentence")
diff --git a/third_party/rust/unicode-segmentation/scripts/unicode_gen_breaktests.py b/third_party/rust/unicode-segmentation/scripts/unicode_gen_breaktests.py
new file mode 100755
index 0000000000..113afa9f12
--- /dev/null
+++ b/third_party/rust/unicode-segmentation/scripts/unicode_gen_breaktests.py
@@ -0,0 +1,212 @@
+#!/usr/bin/env python
+# -*- coding: utf-8
+#
+# Copyright 2015 The Rust Project Developers. See the COPYRIGHT
+# file at the top-level directory of this distribution and at
+# http://rust-lang.org/COPYRIGHT.
+#
+# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+# option. This file may not be copied, modified, or distributed
+# except according to those terms.
+
+# This script uses the following Unicode tables:
+# - auxiliary/GraphemeBreakTest.txt
+# - auxiliary/WordBreakTest.txt
+#
+# Since this should not require frequent updates, we just store this
+# out-of-line and check the unicode.rs file into git.
+from __future__ import print_function
+
+import unicode, re, os, fileinput
+
+def load_test_data(f, optsplit=[]):
+ testRe1 = re.compile(r"^÷\s+([^\s].*[^\s])\s+÷\s+#\s+÷\s+\[0.2\].*?([÷×].*)\s+÷\s+\[0.3\]\s*$")
+
+ unicode.fetch(f)
+ data = []
+ for line in fileinput.input(os.path.basename(f)):
+ # lines that include a test start with the ÷ character
+ if len(line) < 2 or not line.startswith('÷'):
+ continue
+
+ m = testRe1.match(line)
+ if not m:
+ print("error: no match on line where test was expected: %s" % line)
+ continue
+
+ # process the characters in this test case
+ chars = process_split_string(m.group(1))
+ # skip test case if it contains invalid characters (viz., surrogates)
+ if not chars:
+ continue
+
+ # now process test cases
+ (chars, info) = process_split_info(m.group(2), chars, optsplit)
+
+ # make sure that we have break info for each break!
+ assert len(chars) - 1 == len(info)
+
+ data.append((chars, info))
+
+ return data
+
+def process_split_info(s, c, o):
+ outcs = []
+ outis = []
+ workcs = c.pop(0)
+
+ # are we on a × or a ÷?
+ isX = False
+ if s.startswith('×'):
+ isX = True
+
+ # find each instance of '(÷|×) [x.y] '
+ while s:
+ # find the currently considered rule number
+ sInd = s.index('[') + 1
+ eInd = s.index(']')
+
+ # if it's '× [a.b]' where 'a.b' is in o, then
+ # we consider it a split even though it's not
+ # marked as one
+ # if it's ÷ then it's always a split
+ if not isX or s[sInd:eInd] in o:
+ outis.append(s[sInd:eInd])
+ outcs.append(workcs)
+ workcs = c.pop(0)
+ else:
+ workcs.extend(c.pop(0))
+
+ idx = 1
+ while idx < len(s):
+ if s[idx:].startswith('×'):
+ isX = True
+ break
+ if s[idx:].startswith('÷'):
+ isX = False
+ break
+ idx += 1
+ s = s[idx:]
+
+ outcs.append(workcs)
+ return (outcs, outis)
+
+def process_split_string(s):
+ outls = []
+ workls = []
+
+ inls = s.split()
+
+ for i in inls:
+ if i == '÷' or i == '×':
+ outls.append(workls)
+ workls = []
+ continue
+
+ ival = int(i,16)
+
+ if unicode.is_surrogate(ival):
+ return []
+
+ workls.append(ival)
+
+ if workls:
+ outls.append(workls)
+
+ return outls
+
+def showfun(x):
+ outstr = '("'
+ for c in x[0]:
+ outstr += "\\u{%x}" % c
+ outstr += '",&['
+ xfirst = True
+ for xx in x[1:]:
+ if not xfirst:
+ outstr += '],&['
+ xfirst = False
+ sfirst = True
+ for sp in xx:
+ if not sfirst:
+ outstr += ','
+ sfirst = False
+ outstr += '"'
+ for c in sp:
+ outstr += "\\u{%x}" % c
+ outstr += '"'
+ outstr += '])'
+ return outstr
+
+def create_grapheme_data(f):
+ # rules 9.1 and 9.2 are for extended graphemes only
+ optsplits = ['9.1','9.2']
+ d = load_test_data("auxiliary/GraphemeBreakTest.txt", optsplits)
+
+ test_same = []
+ test_diff = []
+
+ for (c, i) in d:
+ allchars = [cn for s in c for cn in s]
+ extgraphs = []
+ extwork = []
+
+ extwork.extend(c[0])
+ for n in range(0,len(i)):
+ if i[n] in optsplits:
+ extwork.extend(c[n+1])
+ else:
+ extgraphs.append(extwork)
+ extwork = []
+ extwork.extend(c[n+1])
+
+ # these are the extended grapheme clusters
+ extgraphs.append(extwork)
+
+ if extgraphs == c:
+ test_same.append((allchars, c))
+ else:
+ test_diff.append((allchars, extgraphs, c))
+
+ stype = "&'static [(&'static str, &'static [&'static str])]"
+ dtype = "&'static [(&'static str, &'static [&'static str], &'static [&'static str])]"
+ f.write(" // official Unicode test data\n")
+ f.write(" // http://www.unicode.org/Public/%s/ucd/auxiliary/GraphemeBreakTest.txt\n" % unicode.UNICODE_VERSION_NUMBER)
+ unicode.emit_table(f, "TEST_SAME", test_same, stype, True, showfun, True)
+ unicode.emit_table(f, "TEST_DIFF", test_diff, dtype, True, showfun, True)
+
+def create_words_data(f):
+ d = load_test_data("auxiliary/WordBreakTest.txt")
+
+ test = []
+
+ for (c, i) in d:
+ allchars = [cn for s in c for cn in s]
+ test.append((allchars, c))
+
+ wtype = "&'static [(&'static str, &'static [&'static str])]"
+ f.write(" // official Unicode test data\n")
+ f.write(" // http://www.unicode.org/Public/%s/ucd/auxiliary/WordBreakTest.txt\n" % unicode.UNICODE_VERSION_NUMBER)
+ unicode.emit_table(f, "TEST_WORD", test, wtype, True, showfun, True)
+
+def create_sentence_data(f):
+ d = load_test_data("auxiliary/SentenceBreakTest.txt")
+
+ test = []
+
+ for (c, i) in d:
+ allchars = [cn for s in c for cn in s]
+ test.append((allchars, c))
+
+ wtype = "&'static [(&'static str, &'static [&'static str])]"
+ f.write(" // official Unicode test data\n")
+ f.write(" // http://www.unicode.org/Public/%s/ucd/auxiliary/SentenceBreakTest.txt\n" % unicode.UNICODE_VERSION_NUMBER)
+ unicode.emit_table(f, "TEST_SENTENCE", test, wtype, True, showfun, True)
+
+if __name__ == "__main__":
+ with open("testdata.rs", "w") as rf:
+ rf.write(unicode.preamble)
+ create_grapheme_data(rf)
+ create_words_data(rf)
+ create_sentence_data(rf)
diff --git a/third_party/rust/unicode-segmentation/src/grapheme.rs b/third_party/rust/unicode-segmentation/src/grapheme.rs
new file mode 100644
index 0000000000..eb554c900e
--- /dev/null
+++ b/third_party/rust/unicode-segmentation/src/grapheme.rs
@@ -0,0 +1,801 @@
+// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use core::cmp;
+
+use crate::tables::grapheme::GraphemeCat;
+
+/// External iterator for grapheme clusters and byte offsets.
+///
+/// This struct is created by the [`grapheme_indices`] method on the [`UnicodeSegmentation`]
+/// trait. See its documentation for more.
+///
+/// [`grapheme_indices`]: trait.UnicodeSegmentation.html#tymethod.grapheme_indices
+/// [`UnicodeSegmentation`]: trait.UnicodeSegmentation.html
+#[derive(Clone)]
+pub struct GraphemeIndices<'a> {
+ start_offset: usize,
+ iter: Graphemes<'a>,
+}
+
+impl<'a> GraphemeIndices<'a> {
+ #[inline]
+ /// View the underlying data (the part yet to be iterated) as a slice of the original string.
+ ///
+ /// ```rust
+ /// # use unicode_segmentation::UnicodeSegmentation;
+ /// let mut iter = "abc".grapheme_indices(true);
+ /// assert_eq!(iter.as_str(), "abc");
+ /// iter.next();
+ /// assert_eq!(iter.as_str(), "bc");
+ /// iter.next();
+ /// iter.next();
+ /// assert_eq!(iter.as_str(), "");
+ /// ```
+ pub fn as_str(&self) -> &'a str {
+ self.iter.as_str()
+ }
+}
+
+impl<'a> Iterator for GraphemeIndices<'a> {
+ type Item = (usize, &'a str);
+
+ #[inline]
+ fn next(&mut self) -> Option<(usize, &'a str)> {
+ self.iter
+ .next()
+ .map(|s| (s.as_ptr() as usize - self.start_offset, s))
+ }
+
+ #[inline]
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ self.iter.size_hint()
+ }
+}
+
+impl<'a> DoubleEndedIterator for GraphemeIndices<'a> {
+ #[inline]
+ fn next_back(&mut self) -> Option<(usize, &'a str)> {
+ self.iter
+ .next_back()
+ .map(|s| (s.as_ptr() as usize - self.start_offset, s))
+ }
+}
+
+/// External iterator for a string's
+/// [grapheme clusters](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries).
+///
+/// This struct is created by the [`graphemes`] method on the [`UnicodeSegmentation`] trait. See its
+/// documentation for more.
+///
+/// [`graphemes`]: trait.UnicodeSegmentation.html#tymethod.graphemes
+/// [`UnicodeSegmentation`]: trait.UnicodeSegmentation.html
+#[derive(Clone, Debug)]
+pub struct Graphemes<'a> {
+ string: &'a str,
+ cursor: GraphemeCursor,
+ cursor_back: GraphemeCursor,
+}
+
+impl<'a> Graphemes<'a> {
+ #[inline]
+ /// View the underlying data (the part yet to be iterated) as a slice of the original string.
+ ///
+ /// ```rust
+ /// # use unicode_segmentation::UnicodeSegmentation;
+ /// let mut iter = "abc".graphemes(true);
+ /// assert_eq!(iter.as_str(), "abc");
+ /// iter.next();
+ /// assert_eq!(iter.as_str(), "bc");
+ /// iter.next();
+ /// iter.next();
+ /// assert_eq!(iter.as_str(), "");
+ /// ```
+ pub fn as_str(&self) -> &'a str {
+ &self.string[self.cursor.cur_cursor()..self.cursor_back.cur_cursor()]
+ }
+}
+
+impl<'a> Iterator for Graphemes<'a> {
+ type Item = &'a str;
+
+ #[inline]
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ let slen = self.cursor_back.cur_cursor() - self.cursor.cur_cursor();
+ (cmp::min(slen, 1), Some(slen))
+ }
+
+ #[inline]
+ fn next(&mut self) -> Option<&'a str> {
+ let start = self.cursor.cur_cursor();
+ if start == self.cursor_back.cur_cursor() {
+ return None;
+ }
+ let next = self.cursor.next_boundary(self.string, 0).unwrap().unwrap();
+ Some(&self.string[start..next])
+ }
+}
+
+impl<'a> DoubleEndedIterator for Graphemes<'a> {
+ #[inline]
+ fn next_back(&mut self) -> Option<&'a str> {
+ let end = self.cursor_back.cur_cursor();
+ if end == self.cursor.cur_cursor() {
+ return None;
+ }
+ let prev = self
+ .cursor_back
+ .prev_boundary(self.string, 0)
+ .unwrap()
+ .unwrap();
+ Some(&self.string[prev..end])
+ }
+}
+
+#[inline]
+pub fn new_graphemes<'b>(s: &'b str, is_extended: bool) -> Graphemes<'b> {
+ let len = s.len();
+ Graphemes {
+ string: s,
+ cursor: GraphemeCursor::new(0, len, is_extended),
+ cursor_back: GraphemeCursor::new(len, len, is_extended),
+ }
+}
+
+#[inline]
+pub fn new_grapheme_indices<'b>(s: &'b str, is_extended: bool) -> GraphemeIndices<'b> {
+ GraphemeIndices {
+ start_offset: s.as_ptr() as usize,
+ iter: new_graphemes(s, is_extended),
+ }
+}
+
+// maybe unify with PairResult?
+// An enum describing information about a potential boundary.
+#[derive(PartialEq, Eq, Clone, Debug)]
+enum GraphemeState {
+ // No information is known.
+ Unknown,
+ // It is known to not be a boundary.
+ NotBreak,
+ // It is known to be a boundary.
+ Break,
+ // The codepoint after is a Regional Indicator Symbol, so a boundary iff
+ // it is preceded by an even number of RIS codepoints. (GB12, GB13)
+ Regional,
+ // The codepoint after is Extended_Pictographic,
+ // so whether it's a boundary depends on pre-context according to GB11.
+ Emoji,
+}
+
+/// Cursor-based segmenter for grapheme clusters.
+///
+/// This allows working with ropes and other datastructures where the string is not contiguous or
+/// fully known at initialization time.
+#[derive(Clone, Debug)]
+pub struct GraphemeCursor {
+ // Current cursor position.
+ offset: usize,
+ // Total length of the string.
+ len: usize,
+ // A config flag indicating whether this cursor computes legacy or extended
+ // grapheme cluster boundaries (enables GB9a and GB9b if set).
+ is_extended: bool,
+ // Information about the potential boundary at `offset`
+ state: GraphemeState,
+ // Category of codepoint immediately preceding cursor, if known.
+ cat_before: Option<GraphemeCat>,
+ // Category of codepoint immediately after cursor, if known.
+ cat_after: Option<GraphemeCat>,
+ // If set, at least one more codepoint immediately preceding this offset
+ // is needed to resolve whether there's a boundary at `offset`.
+ pre_context_offset: Option<usize>,
+ // The number of RIS codepoints preceding `offset`. If `pre_context_offset`
+ // is set, then counts the number of RIS between that and `offset`, otherwise
+ // is an accurate count relative to the string.
+ ris_count: Option<usize>,
+ // Set if a call to `prev_boundary` or `next_boundary` was suspended due
+ // to needing more input.
+ resuming: bool,
+ // Cached grapheme category and associated scalar value range.
+ grapheme_cat_cache: (u32, u32, GraphemeCat),
+}
+
+/// An error return indicating that not enough content was available in the
+/// provided chunk to satisfy the query, and that more content must be provided.
+#[derive(PartialEq, Eq, Debug)]
+pub enum GraphemeIncomplete {
+ /// More pre-context is needed. The caller should call `provide_context`
+ /// with a chunk ending at the offset given, then retry the query. This
+ /// will only be returned if the `chunk_start` parameter is nonzero.
+ PreContext(usize),
+
+ /// When requesting `prev_boundary`, the cursor is moving past the beginning
+ /// of the current chunk, so the chunk before that is requested. This will
+ /// only be returned if the `chunk_start` parameter is nonzero.
+ PrevChunk,
+
+ /// When requesting `next_boundary`, the cursor is moving past the end of the
+ /// current chunk, so the chunk after that is requested. This will only be
+ /// returned if the chunk ends before the `len` parameter provided on
+ /// creation of the cursor.
+ NextChunk, // requesting chunk following the one given
+
+ /// An error returned when the chunk given does not contain the cursor position.
+ InvalidOffset,
+}
+
+// An enum describing the result from lookup of a pair of categories.
+#[derive(PartialEq, Eq)]
+enum PairResult {
+ NotBreak, // definitely not a break
+ Break, // definitely a break
+ Extended, // a break iff not in extended mode
+ Regional, // a break if preceded by an even number of RIS
+ Emoji, // a break if preceded by emoji base and (Extend)*
+}
+
+#[inline]
+fn check_pair(before: GraphemeCat, after: GraphemeCat) -> PairResult {
+ use self::PairResult::*;
+ use crate::tables::grapheme::GraphemeCat::*;
+ match (before, after) {
+ (GC_CR, GC_LF) => NotBreak, // GB3
+ (GC_Control, _) => Break, // GB4
+ (GC_CR, _) => Break, // GB4
+ (GC_LF, _) => Break, // GB4
+ (_, GC_Control) => Break, // GB5
+ (_, GC_CR) => Break, // GB5
+ (_, GC_LF) => Break, // GB5
+ (GC_L, GC_L) => NotBreak, // GB6
+ (GC_L, GC_V) => NotBreak, // GB6
+ (GC_L, GC_LV) => NotBreak, // GB6
+ (GC_L, GC_LVT) => NotBreak, // GB6
+ (GC_LV, GC_V) => NotBreak, // GB7
+ (GC_LV, GC_T) => NotBreak, // GB7
+ (GC_V, GC_V) => NotBreak, // GB7
+ (GC_V, GC_T) => NotBreak, // GB7
+ (GC_LVT, GC_T) => NotBreak, // GB8
+ (GC_T, GC_T) => NotBreak, // GB8
+ (_, GC_Extend) => NotBreak, // GB9
+ (_, GC_ZWJ) => NotBreak, // GB9
+ (_, GC_SpacingMark) => Extended, // GB9a
+ (GC_Prepend, _) => Extended, // GB9b
+ (GC_ZWJ, GC_Extended_Pictographic) => Emoji, // GB11
+ (GC_Regional_Indicator, GC_Regional_Indicator) => Regional, // GB12, GB13
+ (_, _) => Break, // GB999
+ }
+}
+
+impl GraphemeCursor {
+ /// Create a new cursor. The string and initial offset are given at creation
+ /// time, but the contents of the string are not. The `is_extended` parameter
+ /// controls whether extended grapheme clusters are selected.
+ ///
+ /// The `offset` parameter must be on a codepoint boundary.
+ ///
+ /// ```rust
+ /// # use unicode_segmentation::GraphemeCursor;
+ /// let s = "हिन्दी";
+ /// let mut legacy = GraphemeCursor::new(0, s.len(), false);
+ /// assert_eq!(legacy.next_boundary(s, 0), Ok(Some("ह".len())));
+ /// let mut extended = GraphemeCursor::new(0, s.len(), true);
+ /// assert_eq!(extended.next_boundary(s, 0), Ok(Some("हि".len())));
+ /// ```
+ pub fn new(offset: usize, len: usize, is_extended: bool) -> GraphemeCursor {
+ let state = if offset == 0 || offset == len {
+ GraphemeState::Break
+ } else {
+ GraphemeState::Unknown
+ };
+ GraphemeCursor {
+ offset: offset,
+ len: len,
+ state: state,
+ is_extended: is_extended,
+ cat_before: None,
+ cat_after: None,
+ pre_context_offset: None,
+ ris_count: None,
+ resuming: false,
+ grapheme_cat_cache: (0, 0, GraphemeCat::GC_Control),
+ }
+ }
+
+ fn grapheme_category(&mut self, ch: char) -> GraphemeCat {
+ use crate::tables::grapheme as gr;
+ use crate::tables::grapheme::GraphemeCat::*;
+
+ if ch <= '\u{7e}' {
+ // Special-case optimization for ascii, except U+007F. This
+ // improves performance even for many primarily non-ascii texts,
+ // due to use of punctuation and white space characters from the
+ // ascii range.
+ if ch >= '\u{20}' {
+ GC_Any
+ } else if ch == '\n' {
+ GC_LF
+ } else if ch == '\r' {
+ GC_CR
+ } else {
+ GC_Control
+ }
+ } else {
+ // If this char isn't within the cached range, update the cache to the
+ // range that includes it.
+ if (ch as u32) < self.grapheme_cat_cache.0 || (ch as u32) > self.grapheme_cat_cache.1 {
+ self.grapheme_cat_cache = gr::grapheme_category(ch);
+ }
+ self.grapheme_cat_cache.2
+ }
+ }
+
+ // Not sure I'm gonna keep this, the advantage over new() seems thin.
+
+ /// Set the cursor to a new location in the same string.
+ ///
+ /// ```rust
+ /// # use unicode_segmentation::GraphemeCursor;
+ /// let s = "abcd";
+ /// let mut cursor = GraphemeCursor::new(0, s.len(), false);
+ /// assert_eq!(cursor.cur_cursor(), 0);
+ /// cursor.set_cursor(2);
+ /// assert_eq!(cursor.cur_cursor(), 2);
+ /// ```
+ pub fn set_cursor(&mut self, offset: usize) {
+ if offset != self.offset {
+ self.offset = offset;
+ self.state = if offset == 0 || offset == self.len {
+ GraphemeState::Break
+ } else {
+ GraphemeState::Unknown
+ };
+ // reset state derived from text around cursor
+ self.cat_before = None;
+ self.cat_after = None;
+ self.ris_count = None;
+ }
+ }
+
+ #[inline]
+ /// The current offset of the cursor. Equal to the last value provided to
+ /// `new()` or `set_cursor()`, or returned from `next_boundary()` or
+ /// `prev_boundary()`.
+ ///
+ /// ```rust
+ /// # use unicode_segmentation::GraphemeCursor;
+ /// // Two flags (🇷🇸🇮🇴), each flag is two RIS codepoints, each RIS is 4 bytes.
+ /// let flags = "\u{1F1F7}\u{1F1F8}\u{1F1EE}\u{1F1F4}";
+ /// let mut cursor = GraphemeCursor::new(4, flags.len(), false);
+ /// assert_eq!(cursor.cur_cursor(), 4);
+ /// assert_eq!(cursor.next_boundary(flags, 0), Ok(Some(8)));
+ /// assert_eq!(cursor.cur_cursor(), 8);
+ /// ```
+ pub fn cur_cursor(&self) -> usize {
+ self.offset
+ }
+
+ /// Provide additional pre-context when it is needed to decide a boundary.
+ /// The end of the chunk must coincide with the value given in the
+ /// `GraphemeIncomplete::PreContext` request.
+ ///
+ /// ```rust
+ /// # use unicode_segmentation::{GraphemeCursor, GraphemeIncomplete};
+ /// let flags = "\u{1F1F7}\u{1F1F8}\u{1F1EE}\u{1F1F4}";
+ /// let mut cursor = GraphemeCursor::new(8, flags.len(), false);
+ /// // Not enough pre-context to decide if there's a boundary between the two flags.
+ /// assert_eq!(cursor.is_boundary(&flags[8..], 8), Err(GraphemeIncomplete::PreContext(8)));
+ /// // Provide one more Regional Indicator Symbol of pre-context
+ /// cursor.provide_context(&flags[4..8], 4);
+ /// // Still not enough context to decide.
+ /// assert_eq!(cursor.is_boundary(&flags[8..], 8), Err(GraphemeIncomplete::PreContext(4)));
+ /// // Provide additional requested context.
+ /// cursor.provide_context(&flags[0..4], 0);
+ /// // That's enough to decide (it always is when context goes to the start of the string)
+ /// assert_eq!(cursor.is_boundary(&flags[8..], 8), Ok(true));
+ /// ```
+ pub fn provide_context(&mut self, chunk: &str, chunk_start: usize) {
+ use crate::tables::grapheme as gr;
+ assert!(chunk_start + chunk.len() == self.pre_context_offset.unwrap());
+ self.pre_context_offset = None;
+ if self.is_extended && chunk_start + chunk.len() == self.offset {
+ let ch = chunk.chars().rev().next().unwrap();
+ if self.grapheme_category(ch) == gr::GC_Prepend {
+ self.decide(false); // GB9b
+ return;
+ }
+ }
+ match self.state {
+ GraphemeState::Regional => self.handle_regional(chunk, chunk_start),
+ GraphemeState::Emoji => self.handle_emoji(chunk, chunk_start),
+ _ => {
+ if self.cat_before.is_none() && self.offset == chunk.len() + chunk_start {
+ let ch = chunk.chars().rev().next().unwrap();
+ self.cat_before = Some(self.grapheme_category(ch));
+ }
+ }
+ }
+ }
+
+ #[inline]
+ fn decide(&mut self, is_break: bool) {
+ self.state = if is_break {
+ GraphemeState::Break
+ } else {
+ GraphemeState::NotBreak
+ };
+ }
+
+ #[inline]
+ fn decision(&mut self, is_break: bool) -> Result<bool, GraphemeIncomplete> {
+ self.decide(is_break);
+ Ok(is_break)
+ }
+
+ #[inline]
+ fn is_boundary_result(&self) -> Result<bool, GraphemeIncomplete> {
+ if self.state == GraphemeState::Break {
+ Ok(true)
+ } else if self.state == GraphemeState::NotBreak {
+ Ok(false)
+ } else if let Some(pre_context_offset) = self.pre_context_offset {
+ Err(GraphemeIncomplete::PreContext(pre_context_offset))
+ } else {
+ unreachable!("inconsistent state");
+ }
+ }
+
+ #[inline]
+ fn handle_regional(&mut self, chunk: &str, chunk_start: usize) {
+ use crate::tables::grapheme as gr;
+ let mut ris_count = self.ris_count.unwrap_or(0);
+ for ch in chunk.chars().rev() {
+ if self.grapheme_category(ch) != gr::GC_Regional_Indicator {
+ self.ris_count = Some(ris_count);
+ self.decide((ris_count % 2) == 0);
+ return;
+ }
+ ris_count += 1;
+ }
+ self.ris_count = Some(ris_count);
+ if chunk_start == 0 {
+ self.decide((ris_count % 2) == 0);
+ return;
+ }
+ self.pre_context_offset = Some(chunk_start);
+ self.state = GraphemeState::Regional;
+ }
+
+ #[inline]
+ fn handle_emoji(&mut self, chunk: &str, chunk_start: usize) {
+ use crate::tables::grapheme as gr;
+ let mut iter = chunk.chars().rev();
+ if let Some(ch) = iter.next() {
+ if self.grapheme_category(ch) != gr::GC_ZWJ {
+ self.decide(true);
+ return;
+ }
+ }
+ for ch in iter {
+ match self.grapheme_category(ch) {
+ gr::GC_Extend => (),
+ gr::GC_Extended_Pictographic => {
+ self.decide(false);
+ return;
+ }
+ _ => {
+ self.decide(true);
+ return;
+ }
+ }
+ }
+ if chunk_start == 0 {
+ self.decide(true);
+ return;
+ }
+ self.pre_context_offset = Some(chunk_start);
+ self.state = GraphemeState::Emoji;
+ }
+
+ #[inline]
+ /// Determine whether the current cursor location is a grapheme cluster boundary.
+ /// Only a part of the string need be supplied. If `chunk_start` is nonzero or
+ /// the length of `chunk` is not equal to `len` on creation, then this method
+ /// may return `GraphemeIncomplete::PreContext`. The caller should then
+ /// call `provide_context` with the requested chunk, then retry calling this
+ /// method.
+ ///
+ /// For partial chunks, if the cursor is not at the beginning or end of the
+ /// string, the chunk should contain at least the codepoint following the cursor.
+ /// If the string is nonempty, the chunk must be nonempty.
+ ///
+ /// All calls should have consistent chunk contents (ie, if a chunk provides
+ /// content for a given slice, all further chunks covering that slice must have
+ /// the same content for it).
+ ///
+ /// ```rust
+ /// # use unicode_segmentation::GraphemeCursor;
+ /// let flags = "\u{1F1F7}\u{1F1F8}\u{1F1EE}\u{1F1F4}";
+ /// let mut cursor = GraphemeCursor::new(8, flags.len(), false);
+ /// assert_eq!(cursor.is_boundary(flags, 0), Ok(true));
+ /// cursor.set_cursor(12);
+ /// assert_eq!(cursor.is_boundary(flags, 0), Ok(false));
+ /// ```
+ pub fn is_boundary(
+ &mut self,
+ chunk: &str,
+ chunk_start: usize,
+ ) -> Result<bool, GraphemeIncomplete> {
+ use crate::tables::grapheme as gr;
+ if self.state == GraphemeState::Break {
+ return Ok(true);
+ }
+ if self.state == GraphemeState::NotBreak {
+ return Ok(false);
+ }
+ if self.offset < chunk_start || self.offset >= chunk_start + chunk.len() {
+ if self.offset > chunk_start + chunk.len() || self.cat_after.is_none() {
+ return Err(GraphemeIncomplete::InvalidOffset);
+ }
+ }
+ if let Some(pre_context_offset) = self.pre_context_offset {
+ return Err(GraphemeIncomplete::PreContext(pre_context_offset));
+ }
+ let offset_in_chunk = self.offset - chunk_start;
+ if self.cat_after.is_none() {
+ let ch = chunk[offset_in_chunk..].chars().next().unwrap();
+ self.cat_after = Some(self.grapheme_category(ch));
+ }
+ if self.offset == chunk_start {
+ let mut need_pre_context = true;
+ match self.cat_after.unwrap() {
+ gr::GC_Regional_Indicator => self.state = GraphemeState::Regional,
+ gr::GC_Extended_Pictographic => self.state = GraphemeState::Emoji,
+ _ => need_pre_context = self.cat_before.is_none(),
+ }
+ if need_pre_context {
+ self.pre_context_offset = Some(chunk_start);
+ return Err(GraphemeIncomplete::PreContext(chunk_start));
+ }
+ }
+ if self.cat_before.is_none() {
+ let ch = chunk[..offset_in_chunk].chars().rev().next().unwrap();
+ self.cat_before = Some(self.grapheme_category(ch));
+ }
+ match check_pair(self.cat_before.unwrap(), self.cat_after.unwrap()) {
+ PairResult::NotBreak => return self.decision(false),
+ PairResult::Break => return self.decision(true),
+ PairResult::Extended => {
+ let is_extended = self.is_extended;
+ return self.decision(!is_extended);
+ }
+ PairResult::Regional => {
+ if let Some(ris_count) = self.ris_count {
+ return self.decision((ris_count % 2) == 0);
+ }
+ self.handle_regional(&chunk[..offset_in_chunk], chunk_start);
+ self.is_boundary_result()
+ }
+ PairResult::Emoji => {
+ self.handle_emoji(&chunk[..offset_in_chunk], chunk_start);
+ self.is_boundary_result()
+ }
+ }
+ }
+
+ #[inline]
+ /// Find the next boundary after the current cursor position. Only a part of
+ /// the string need be supplied. If the chunk is incomplete, then this
+ /// method might return `GraphemeIncomplete::PreContext` or
+ /// `GraphemeIncomplete::NextChunk`. In the former case, the caller should
+ /// call `provide_context` with the requested chunk, then retry. In the
+ /// latter case, the caller should provide the chunk following the one
+ /// given, then retry.
+ ///
+ /// See `is_boundary` for expectations on the provided chunk.
+ ///
+ /// ```rust
+ /// # use unicode_segmentation::GraphemeCursor;
+ /// let flags = "\u{1F1F7}\u{1F1F8}\u{1F1EE}\u{1F1F4}";
+ /// let mut cursor = GraphemeCursor::new(4, flags.len(), false);
+ /// assert_eq!(cursor.next_boundary(flags, 0), Ok(Some(8)));
+ /// assert_eq!(cursor.next_boundary(flags, 0), Ok(Some(16)));
+ /// assert_eq!(cursor.next_boundary(flags, 0), Ok(None));
+ /// ```
+ ///
+ /// And an example that uses partial strings:
+ ///
+ /// ```rust
+ /// # use unicode_segmentation::{GraphemeCursor, GraphemeIncomplete};
+ /// let s = "abcd";
+ /// let mut cursor = GraphemeCursor::new(0, s.len(), false);
+ /// assert_eq!(cursor.next_boundary(&s[..2], 0), Ok(Some(1)));
+ /// assert_eq!(cursor.next_boundary(&s[..2], 0), Err(GraphemeIncomplete::NextChunk));
+ /// assert_eq!(cursor.next_boundary(&s[2..4], 2), Ok(Some(2)));
+ /// assert_eq!(cursor.next_boundary(&s[2..4], 2), Ok(Some(3)));
+ /// assert_eq!(cursor.next_boundary(&s[2..4], 2), Ok(Some(4)));
+ /// assert_eq!(cursor.next_boundary(&s[2..4], 2), Ok(None));
+ /// ```
+ pub fn next_boundary(
+ &mut self,
+ chunk: &str,
+ chunk_start: usize,
+ ) -> Result<Option<usize>, GraphemeIncomplete> {
+ if self.offset == self.len {
+ return Ok(None);
+ }
+ let mut iter = chunk[self.offset - chunk_start..].chars();
+ let mut ch = iter.next().unwrap();
+ loop {
+ if self.resuming {
+ if self.cat_after.is_none() {
+ self.cat_after = Some(self.grapheme_category(ch));
+ }
+ } else {
+ self.offset += ch.len_utf8();
+ self.state = GraphemeState::Unknown;
+ self.cat_before = self.cat_after.take();
+ if self.cat_before.is_none() {
+ self.cat_before = Some(self.grapheme_category(ch));
+ }
+ if self.cat_before.unwrap() == GraphemeCat::GC_Regional_Indicator {
+ self.ris_count = self.ris_count.map(|c| c + 1);
+ } else {
+ self.ris_count = Some(0);
+ }
+ if let Some(next_ch) = iter.next() {
+ ch = next_ch;
+ self.cat_after = Some(self.grapheme_category(ch));
+ } else if self.offset == self.len {
+ self.decide(true);
+ } else {
+ self.resuming = true;
+ return Err(GraphemeIncomplete::NextChunk);
+ }
+ }
+ self.resuming = true;
+ if self.is_boundary(chunk, chunk_start)? {
+ self.resuming = false;
+ return Ok(Some(self.offset));
+ }
+ self.resuming = false;
+ }
+ }
+
+ /// Find the previous boundary after the current cursor position. Only a part
+ /// of the string need be supplied. If the chunk is incomplete, then this
+ /// method might return `GraphemeIncomplete::PreContext` or
+ /// `GraphemeIncomplete::PrevChunk`. In the former case, the caller should
+ /// call `provide_context` with the requested chunk, then retry. In the
+ /// latter case, the caller should provide the chunk preceding the one
+ /// given, then retry.
+ ///
+ /// See `is_boundary` for expectations on the provided chunk.
+ ///
+ /// ```rust
+ /// # use unicode_segmentation::GraphemeCursor;
+ /// let flags = "\u{1F1F7}\u{1F1F8}\u{1F1EE}\u{1F1F4}";
+ /// let mut cursor = GraphemeCursor::new(12, flags.len(), false);
+ /// assert_eq!(cursor.prev_boundary(flags, 0), Ok(Some(8)));
+ /// assert_eq!(cursor.prev_boundary(flags, 0), Ok(Some(0)));
+ /// assert_eq!(cursor.prev_boundary(flags, 0), Ok(None));
+ /// ```
+ ///
+ /// And an example that uses partial strings (note the exact return is not
+ /// guaranteed, and may be `PrevChunk` or `PreContext` arbitrarily):
+ ///
+ /// ```rust
+ /// # use unicode_segmentation::{GraphemeCursor, GraphemeIncomplete};
+ /// let s = "abcd";
+ /// let mut cursor = GraphemeCursor::new(4, s.len(), false);
+ /// assert_eq!(cursor.prev_boundary(&s[2..4], 2), Ok(Some(3)));
+ /// assert_eq!(cursor.prev_boundary(&s[2..4], 2), Err(GraphemeIncomplete::PrevChunk));
+ /// assert_eq!(cursor.prev_boundary(&s[0..2], 0), Ok(Some(2)));
+ /// assert_eq!(cursor.prev_boundary(&s[0..2], 0), Ok(Some(1)));
+ /// assert_eq!(cursor.prev_boundary(&s[0..2], 0), Ok(Some(0)));
+ /// assert_eq!(cursor.prev_boundary(&s[0..2], 0), Ok(None));
+ /// ```
+ pub fn prev_boundary(
+ &mut self,
+ chunk: &str,
+ chunk_start: usize,
+ ) -> Result<Option<usize>, GraphemeIncomplete> {
+ if self.offset == 0 {
+ return Ok(None);
+ }
+ if self.offset == chunk_start {
+ return Err(GraphemeIncomplete::PrevChunk);
+ }
+ let mut iter = chunk[..self.offset - chunk_start].chars().rev();
+ let mut ch = iter.next().unwrap();
+ loop {
+ if self.offset == chunk_start {
+ self.resuming = true;
+ return Err(GraphemeIncomplete::PrevChunk);
+ }
+ if self.resuming {
+ self.cat_before = Some(self.grapheme_category(ch));
+ } else {
+ self.offset -= ch.len_utf8();
+ self.cat_after = self.cat_before.take();
+ self.state = GraphemeState::Unknown;
+ if let Some(ris_count) = self.ris_count {
+ self.ris_count = if ris_count > 0 {
+ Some(ris_count - 1)
+ } else {
+ None
+ };
+ }
+ if let Some(prev_ch) = iter.next() {
+ ch = prev_ch;
+ self.cat_before = Some(self.grapheme_category(ch));
+ } else if self.offset == 0 {
+ self.decide(true);
+ } else {
+ self.resuming = true;
+ self.cat_after = Some(self.grapheme_category(ch));
+ return Err(GraphemeIncomplete::PrevChunk);
+ }
+ }
+ self.resuming = true;
+ if self.is_boundary(chunk, chunk_start)? {
+ self.resuming = false;
+ return Ok(Some(self.offset));
+ }
+ self.resuming = false;
+ }
+ }
+}
+
+#[test]
+fn test_grapheme_cursor_ris_precontext() {
+ let s = "\u{1f1fa}\u{1f1f8}\u{1f1fa}\u{1f1f8}\u{1f1fa}\u{1f1f8}";
+ let mut c = GraphemeCursor::new(8, s.len(), true);
+ assert_eq!(
+ c.is_boundary(&s[4..], 4),
+ Err(GraphemeIncomplete::PreContext(4))
+ );
+ c.provide_context(&s[..4], 0);
+ assert_eq!(c.is_boundary(&s[4..], 4), Ok(true));
+}
+
+#[test]
+fn test_grapheme_cursor_chunk_start_require_precontext() {
+ let s = "\r\n";
+ let mut c = GraphemeCursor::new(1, s.len(), true);
+ assert_eq!(
+ c.is_boundary(&s[1..], 1),
+ Err(GraphemeIncomplete::PreContext(1))
+ );
+ c.provide_context(&s[..1], 0);
+ assert_eq!(c.is_boundary(&s[1..], 1), Ok(false));
+}
+
+#[test]
+fn test_grapheme_cursor_prev_boundary() {
+ let s = "abcd";
+ let mut c = GraphemeCursor::new(3, s.len(), true);
+ assert_eq!(
+ c.prev_boundary(&s[2..], 2),
+ Err(GraphemeIncomplete::PrevChunk)
+ );
+ assert_eq!(c.prev_boundary(&s[..2], 0), Ok(Some(2)));
+}
+
+#[test]
+fn test_grapheme_cursor_prev_boundary_chunk_start() {
+ let s = "abcd";
+ let mut c = GraphemeCursor::new(2, s.len(), true);
+ assert_eq!(
+ c.prev_boundary(&s[2..], 2),
+ Err(GraphemeIncomplete::PrevChunk)
+ );
+ assert_eq!(c.prev_boundary(&s[..2], 0), Ok(Some(1)));
+}
diff --git a/third_party/rust/unicode-segmentation/src/lib.rs b/third_party/rust/unicode-segmentation/src/lib.rs
new file mode 100644
index 0000000000..809c5dcae8
--- /dev/null
+++ b/third_party/rust/unicode-segmentation/src/lib.rs
@@ -0,0 +1,307 @@
+// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! Iterators which split strings on Grapheme Cluster, Word or Sentence boundaries, according
+//! to the [Unicode Standard Annex #29](http://www.unicode.org/reports/tr29/) rules.
+//!
+//! ```rust
+//! extern crate unicode_segmentation;
+//!
+//! use unicode_segmentation::UnicodeSegmentation;
+//!
+//! fn main() {
+//! let s = "a̐éö̲\r\n";
+//! let g = UnicodeSegmentation::graphemes(s, true).collect::<Vec<&str>>();
+//! let b: &[_] = &["a̐", "é", "ö̲", "\r\n"];
+//! assert_eq!(g, b);
+//!
+//! let s = "The quick (\"brown\") fox can't jump 32.3 feet, right?";
+//! let w = s.unicode_words().collect::<Vec<&str>>();
+//! let b: &[_] = &["The", "quick", "brown", "fox", "can't", "jump", "32.3", "feet", "right"];
+//! assert_eq!(w, b);
+//!
+//! let s = "The quick (\"brown\") fox";
+//! let w = s.split_word_bounds().collect::<Vec<&str>>();
+//! let b: &[_] = &["The", " ", "quick", " ", "(", "\"", "brown", "\"", ")", " ", "fox"];
+//! assert_eq!(w, b);
+//! }
+//! ```
+//!
+//! # no_std
+//!
+//! unicode-segmentation does not depend on libstd, so it can be used in crates
+//! with the `#![no_std]` attribute.
+//!
+//! # crates.io
+//!
+//! You can use this package in your project by adding the following
+//! to your `Cargo.toml`:
+//!
+//! ```toml
+//! [dependencies]
+//! unicode-segmentation = "1.9.0"
+//! ```
+
+#![deny(missing_docs, unsafe_code)]
+#![doc(
+ html_logo_url = "https://unicode-rs.github.io/unicode-rs_sm.png",
+ html_favicon_url = "https://unicode-rs.github.io/unicode-rs_sm.png"
+)]
+#![no_std]
+
+#[cfg(test)]
+#[macro_use]
+extern crate std;
+
+#[cfg(test)]
+#[macro_use]
+extern crate quickcheck;
+
+pub use grapheme::{GraphemeCursor, GraphemeIncomplete};
+pub use grapheme::{GraphemeIndices, Graphemes};
+pub use sentence::{USentenceBoundIndices, USentenceBounds, UnicodeSentences};
+pub use tables::UNICODE_VERSION;
+pub use word::{UWordBoundIndices, UWordBounds, UnicodeWordIndices, UnicodeWords};
+
+mod grapheme;
+#[rustfmt::skip]
+mod tables;
+mod sentence;
+mod word;
+
+#[cfg(test)]
+mod test;
+#[cfg(test)]
+mod testdata;
+
+/// Methods for segmenting strings according to
+/// [Unicode Standard Annex #29](http://www.unicode.org/reports/tr29/).
+pub trait UnicodeSegmentation {
+ /// Returns an iterator over the [grapheme clusters][graphemes] of `self`.
+ ///
+ /// [graphemes]: http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries
+ ///
+ /// If `is_extended` is true, the iterator is over the
+ /// *extended grapheme clusters*;
+ /// otherwise, the iterator is over the *legacy grapheme clusters*.
+ /// [UAX#29](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries)
+ /// recommends extended grapheme cluster boundaries for general processing.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// # use self::unicode_segmentation::UnicodeSegmentation;
+ /// let gr1 = UnicodeSegmentation::graphemes("a\u{310}e\u{301}o\u{308}\u{332}", true)
+ /// .collect::<Vec<&str>>();
+ /// let b: &[_] = &["a\u{310}", "e\u{301}", "o\u{308}\u{332}"];
+ ///
+ /// assert_eq!(&gr1[..], b);
+ ///
+ /// let gr2 = UnicodeSegmentation::graphemes("a\r\nb🇷🇺🇸🇹", true).collect::<Vec<&str>>();
+ /// let b: &[_] = &["a", "\r\n", "b", "🇷🇺", "🇸🇹"];
+ ///
+ /// assert_eq!(&gr2[..], b);
+ /// ```
+ fn graphemes<'a>(&'a self, is_extended: bool) -> Graphemes<'a>;
+
+ /// Returns an iterator over the grapheme clusters of `self` and their
+ /// byte offsets. See `graphemes()` for more information.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// # use self::unicode_segmentation::UnicodeSegmentation;
+ /// let gr_inds = UnicodeSegmentation::grapheme_indices("a̐éö̲\r\n", true)
+ /// .collect::<Vec<(usize, &str)>>();
+ /// let b: &[_] = &[(0, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")];
+ ///
+ /// assert_eq!(&gr_inds[..], b);
+ /// ```
+ fn grapheme_indices<'a>(&'a self, is_extended: bool) -> GraphemeIndices<'a>;
+
+ /// Returns an iterator over the words of `self`, separated on
+ /// [UAX#29 word boundaries](http://www.unicode.org/reports/tr29/#Word_Boundaries).
+ ///
+ /// Here, "words" are just those substrings which, after splitting on
+ /// UAX#29 word boundaries, contain any alphanumeric characters. That is, the
+ /// substring must contain at least one character with the
+ /// [Alphabetic](http://unicode.org/reports/tr44/#Alphabetic)
+ /// property, or with
+ /// [General_Category=Number](http://unicode.org/reports/tr44/#General_Category_Values).
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// # use self::unicode_segmentation::UnicodeSegmentation;
+ /// let uws = "The quick (\"brown\") fox can't jump 32.3 feet, right?";
+ /// let uw1 = uws.unicode_words().collect::<Vec<&str>>();
+ /// let b: &[_] = &["The", "quick", "brown", "fox", "can't", "jump", "32.3", "feet", "right"];
+ ///
+ /// assert_eq!(&uw1[..], b);
+ /// ```
+ fn unicode_words<'a>(&'a self) -> UnicodeWords<'a>;
+
+ /// Returns an iterator over the words of `self`, separated on
+ /// [UAX#29 word boundaries](http://www.unicode.org/reports/tr29/#Word_Boundaries), and their
+ /// offsets.
+ ///
+ /// Here, "words" are just those substrings which, after splitting on
+ /// UAX#29 word boundaries, contain any alphanumeric characters. That is, the
+ /// substring must contain at least one character with the
+ /// [Alphabetic](http://unicode.org/reports/tr44/#Alphabetic)
+ /// property, or with
+ /// [General_Category=Number](http://unicode.org/reports/tr44/#General_Category_Values).
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// # use self::unicode_segmentation::UnicodeSegmentation;
+ /// let uwis = "The quick (\"brown\") fox can't jump 32.3 feet, right?";
+ /// let uwi1 = uwis.unicode_word_indices().collect::<Vec<(usize, &str)>>();
+ /// let b: &[_] = &[(0, "The"), (4, "quick"), (12, "brown"), (20, "fox"), (24, "can't"),
+ /// (30, "jump"), (35, "32.3"), (40, "feet"), (46, "right")];
+ ///
+ /// assert_eq!(&uwi1[..], b);
+ /// ```
+ fn unicode_word_indices<'a>(&'a self) -> UnicodeWordIndices<'a>;
+
+ /// Returns an iterator over substrings of `self` separated on
+ /// [UAX#29 word boundaries](http://www.unicode.org/reports/tr29/#Word_Boundaries).
+ ///
+ /// The concatenation of the substrings returned by this function is just the original string.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// # use self::unicode_segmentation::UnicodeSegmentation;
+ /// let swu1 = "The quick (\"brown\") fox".split_word_bounds().collect::<Vec<&str>>();
+ /// let b: &[_] = &["The", " ", "quick", " ", "(", "\"", "brown", "\"", ")", " ", "fox"];
+ ///
+ /// assert_eq!(&swu1[..], b);
+ /// ```
+ fn split_word_bounds<'a>(&'a self) -> UWordBounds<'a>;
+
+ /// Returns an iterator over substrings of `self`, split on UAX#29 word boundaries,
+ /// and their offsets. See `split_word_bounds()` for more information.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// # use self::unicode_segmentation::UnicodeSegmentation;
+ /// let swi1 = "Brr, it's 29.3°F!".split_word_bound_indices().collect::<Vec<(usize, &str)>>();
+ /// let b: &[_] = &[(0, "Brr"), (3, ","), (4, " "), (5, "it's"), (9, " "), (10, "29.3"),
+ /// (14, "°"), (16, "F"), (17, "!")];
+ ///
+ /// assert_eq!(&swi1[..], b);
+ /// ```
+ fn split_word_bound_indices<'a>(&'a self) -> UWordBoundIndices<'a>;
+
+ /// Returns an iterator over substrings of `self` separated on
+ /// [UAX#29 sentence boundaries](http://www.unicode.org/reports/tr29/#Sentence_Boundaries).
+ ///
+ /// Here, "sentences" are just those substrings which, after splitting on
+ /// UAX#29 sentence boundaries, contain any alphanumeric characters. That is, the
+ /// substring must contain at least one character with the
+ /// [Alphabetic](http://unicode.org/reports/tr44/#Alphabetic)
+ /// property, or with
+ /// [General_Category=Number](http://unicode.org/reports/tr44/#General_Category_Values).
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// # use self::unicode_segmentation::UnicodeSegmentation;
+ /// let uss = "Mr. Fox jumped. [...] The dog was too lazy.";
+ /// let us1 = uss.unicode_sentences().collect::<Vec<&str>>();
+ /// let b: &[_] = &["Mr. ", "Fox jumped. ", "The dog was too lazy."];
+ ///
+ /// assert_eq!(&us1[..], b);
+ /// ```
+ fn unicode_sentences<'a>(&'a self) -> UnicodeSentences<'a>;
+
+ /// Returns an iterator over substrings of `self` separated on
+ /// [UAX#29 sentence boundaries](http://www.unicode.org/reports/tr29/#Sentence_Boundaries).
+ ///
+ /// The concatenation of the substrings returned by this function is just the original string.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// # use self::unicode_segmentation::UnicodeSegmentation;
+ /// let ssbs = "Mr. Fox jumped. [...] The dog was too lazy.";
+ /// let ssb1 = ssbs.split_sentence_bounds().collect::<Vec<&str>>();
+ /// let b: &[_] = &["Mr. ", "Fox jumped. ", "[...] ", "The dog was too lazy."];
+ ///
+ /// assert_eq!(&ssb1[..], b);
+ /// ```
+ fn split_sentence_bounds<'a>(&'a self) -> USentenceBounds<'a>;
+
+ /// Returns an iterator over substrings of `self`, split on UAX#29 sentence boundaries,
+ /// and their offsets. See `split_sentence_bounds()` for more information.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// # use self::unicode_segmentation::UnicodeSegmentation;
+ /// let ssis = "Mr. Fox jumped. [...] The dog was too lazy.";
+ /// let ssi1 = ssis.split_sentence_bound_indices().collect::<Vec<(usize, &str)>>();
+ /// let b: &[_] = &[(0, "Mr. "), (4, "Fox jumped. "), (16, "[...] "),
+ /// (22, "The dog was too lazy.")];
+ ///
+ /// assert_eq!(&ssi1[..], b);
+ /// ```
+ fn split_sentence_bound_indices<'a>(&'a self) -> USentenceBoundIndices<'a>;
+}
+
+impl UnicodeSegmentation for str {
+ #[inline]
+ fn graphemes(&self, is_extended: bool) -> Graphemes {
+ grapheme::new_graphemes(self, is_extended)
+ }
+
+ #[inline]
+ fn grapheme_indices(&self, is_extended: bool) -> GraphemeIndices {
+ grapheme::new_grapheme_indices(self, is_extended)
+ }
+
+ #[inline]
+ fn unicode_words(&self) -> UnicodeWords {
+ word::new_unicode_words(self)
+ }
+
+ #[inline]
+ fn unicode_word_indices(&self) -> UnicodeWordIndices {
+ word::new_unicode_word_indices(self)
+ }
+
+ #[inline]
+ fn split_word_bounds(&self) -> UWordBounds {
+ word::new_word_bounds(self)
+ }
+
+ #[inline]
+ fn split_word_bound_indices(&self) -> UWordBoundIndices {
+ word::new_word_bound_indices(self)
+ }
+
+ #[inline]
+ fn unicode_sentences(&self) -> UnicodeSentences {
+ sentence::new_unicode_sentences(self)
+ }
+
+ #[inline]
+ fn split_sentence_bounds(&self) -> USentenceBounds {
+ sentence::new_sentence_bounds(self)
+ }
+
+ #[inline]
+ fn split_sentence_bound_indices(&self) -> USentenceBoundIndices {
+ sentence::new_sentence_bound_indices(self)
+ }
+}
diff --git a/third_party/rust/unicode-segmentation/src/sentence.rs b/third_party/rust/unicode-segmentation/src/sentence.rs
new file mode 100644
index 0000000000..78d87b4072
--- /dev/null
+++ b/third_party/rust/unicode-segmentation/src/sentence.rs
@@ -0,0 +1,415 @@
+// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use core::cmp;
+use core::iter::Filter;
+
+// All of the logic for forward iteration over sentences
+mod fwd {
+ use crate::tables::sentence::SentenceCat;
+ use core::cmp;
+
+ // Describe a parsed part of source string as described in this table:
+ // https://unicode.org/reports/tr29/#Default_Sentence_Boundaries
+ #[derive(Clone, Copy, PartialEq, Eq)]
+ enum StatePart {
+ Sot,
+ Eot,
+ Other,
+ CR,
+ LF,
+ Sep,
+ ATerm,
+ UpperLower,
+ ClosePlus,
+ SpPlus,
+ STerm,
+ }
+
+ #[derive(Clone, PartialEq, Eq)]
+ struct SentenceBreaksState(pub [StatePart; 4]);
+
+ const INITIAL_STATE: SentenceBreaksState = SentenceBreaksState([
+ StatePart::Sot,
+ StatePart::Sot,
+ StatePart::Sot,
+ StatePart::Sot,
+ ]);
+
+ #[derive(Clone)]
+ pub struct SentenceBreaks<'a> {
+ pub string: &'a str,
+ pos: usize,
+ state: SentenceBreaksState,
+ }
+
+ impl SentenceBreaksState {
+ // Attempt to advance the internal state by one part
+ // Whitespace and some punctutation will be collapsed
+ fn next(&self, cat: SentenceCat) -> SentenceBreaksState {
+ let &SentenceBreaksState(parts) = self;
+ let parts = match (parts[3], cat) {
+ (StatePart::ClosePlus, SentenceCat::SC_Close) => parts,
+ (StatePart::SpPlus, SentenceCat::SC_Sp) => parts,
+ _ => [
+ parts[1],
+ parts[2],
+ parts[3],
+ match cat {
+ SentenceCat::SC_CR => StatePart::CR,
+ SentenceCat::SC_LF => StatePart::LF,
+ SentenceCat::SC_Sep => StatePart::Sep,
+ SentenceCat::SC_ATerm => StatePart::ATerm,
+ SentenceCat::SC_Upper | SentenceCat::SC_Lower => StatePart::UpperLower,
+ SentenceCat::SC_Close => StatePart::ClosePlus,
+ SentenceCat::SC_Sp => StatePart::SpPlus,
+ SentenceCat::SC_STerm => StatePart::STerm,
+ _ => StatePart::Other,
+ },
+ ],
+ };
+ SentenceBreaksState(parts)
+ }
+
+ fn end(&self) -> SentenceBreaksState {
+ let &SentenceBreaksState(parts) = self;
+ SentenceBreaksState([parts[1], parts[2], parts[3], StatePart::Eot])
+ }
+
+ // Helper function to check if state head matches a single `StatePart`
+ fn match1(&self, part: StatePart) -> bool {
+ let &SentenceBreaksState(parts) = self;
+ part == parts[3]
+ }
+
+ // Helper function to check if first two `StateParts` in state match
+ // the given two
+ fn match2(&self, part1: StatePart, part2: StatePart) -> bool {
+ let &SentenceBreaksState(parts) = self;
+ part1 == parts[2] && part2 == parts[3]
+ }
+ }
+
+ // https://unicode.org/reports/tr29/#SB8
+ // TODO cache this, it is currently quadratic
+ fn match_sb8(state: &SentenceBreaksState, ahead: &str) -> bool {
+ let &SentenceBreaksState(parts) = state;
+ let mut idx = if parts[3] == StatePart::SpPlus { 2 } else { 3 };
+ if parts[idx] == StatePart::ClosePlus {
+ idx -= 1
+ }
+
+ if parts[idx] == StatePart::ATerm {
+ use crate::tables::sentence as se;
+
+ for next_char in ahead.chars() {
+ //( ¬(OLetter | Upper | Lower | ParaSep | SATerm) )* Lower
+ match se::sentence_category(next_char).2 {
+ se::SC_Lower => return true,
+ se::SC_OLetter
+ | se::SC_Upper
+ | se::SC_Sep
+ | se::SC_CR
+ | se::SC_LF
+ | se::SC_STerm
+ | se::SC_ATerm => return false,
+ _ => continue,
+ }
+ }
+ }
+
+ false
+ }
+
+ // https://unicode.org/reports/tr29/#SB8a
+ fn match_sb8a(state: &SentenceBreaksState) -> bool {
+ // SATerm Close* Sp*
+ let &SentenceBreaksState(parts) = state;
+ let mut idx = if parts[3] == StatePart::SpPlus { 2 } else { 3 };
+ if parts[idx] == StatePart::ClosePlus {
+ idx -= 1
+ }
+ parts[idx] == StatePart::STerm || parts[idx] == StatePart::ATerm
+ }
+
+ // https://unicode.org/reports/tr29/#SB9
+ fn match_sb9(state: &SentenceBreaksState) -> bool {
+ // SATerm Close*
+ let &SentenceBreaksState(parts) = state;
+ let idx = if parts[3] == StatePart::ClosePlus {
+ 2
+ } else {
+ 3
+ };
+ parts[idx] == StatePart::STerm || parts[idx] == StatePart::ATerm
+ }
+
+ // https://unicode.org/reports/tr29/#SB11
+ fn match_sb11(state: &SentenceBreaksState) -> bool {
+ // SATerm Close* Sp* ParaSep?
+ let &SentenceBreaksState(parts) = state;
+ let mut idx = match parts[3] {
+ StatePart::Sep | StatePart::CR | StatePart::LF => 2,
+ _ => 3,
+ };
+
+ if parts[idx] == StatePart::SpPlus {
+ idx -= 1
+ }
+ if parts[idx] == StatePart::ClosePlus {
+ idx -= 1
+ }
+
+ parts[idx] == StatePart::STerm || parts[idx] == StatePart::ATerm
+ }
+
+ impl<'a> Iterator for SentenceBreaks<'a> {
+ // Returns the index of the character which follows a break
+ type Item = usize;
+
+ #[inline]
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ let slen = self.string.len();
+ // A sentence could be one character
+ (cmp::min(slen, 2), Some(slen + 1))
+ }
+
+ #[inline]
+ fn next(&mut self) -> Option<usize> {
+ use crate::tables::sentence as se;
+
+ for next_char in self.string[self.pos..].chars() {
+ let position_before = self.pos;
+ let state_before = self.state.clone();
+
+ let next_cat = se::sentence_category(next_char).2;
+
+ self.pos += next_char.len_utf8();
+ self.state = self.state.next(next_cat);
+
+ match next_cat {
+ // SB1 https://unicode.org/reports/tr29/#SB1
+ _ if state_before.match1(StatePart::Sot) => return Some(position_before),
+
+ // SB2 is handled when inner iterator (chars) is finished
+
+ // SB3 https://unicode.org/reports/tr29/#SB3
+ SentenceCat::SC_LF if state_before.match1(StatePart::CR) => continue,
+
+ // SB4 https://unicode.org/reports/tr29/#SB4
+ _ if state_before.match1(StatePart::Sep)
+ || state_before.match1(StatePart::CR)
+ || state_before.match1(StatePart::LF) =>
+ {
+ return Some(position_before)
+ }
+
+ // SB5 https://unicode.org/reports/tr29/#SB5
+ SentenceCat::SC_Extend | SentenceCat::SC_Format => self.state = state_before,
+
+ // SB6 https://unicode.org/reports/tr29/#SB6
+ SentenceCat::SC_Numeric if state_before.match1(StatePart::ATerm) => continue,
+
+ // SB7 https://unicode.org/reports/tr29/#SB7
+ SentenceCat::SC_Upper
+ if state_before.match2(StatePart::UpperLower, StatePart::ATerm) =>
+ {
+ continue
+ }
+
+ // SB8 https://unicode.org/reports/tr29/#SB8
+ _ if match_sb8(&state_before, &self.string[position_before..]) => continue,
+
+ // SB8a https://unicode.org/reports/tr29/#SB8a
+ SentenceCat::SC_SContinue | SentenceCat::SC_STerm | SentenceCat::SC_ATerm
+ if match_sb8a(&state_before) =>
+ {
+ continue
+ }
+
+ // SB9 https://unicode.org/reports/tr29/#SB9
+ SentenceCat::SC_Close
+ | SentenceCat::SC_Sp
+ | SentenceCat::SC_Sep
+ | SentenceCat::SC_CR
+ | SentenceCat::SC_LF
+ if match_sb9(&state_before) =>
+ {
+ continue
+ }
+
+ // SB10 https://unicode.org/reports/tr29/#SB10
+ SentenceCat::SC_Sp
+ | SentenceCat::SC_Sep
+ | SentenceCat::SC_CR
+ | SentenceCat::SC_LF
+ if match_sb8a(&state_before) =>
+ {
+ continue
+ }
+
+ // SB11 https://unicode.org/reports/tr29/#SB11
+ _ if match_sb11(&state_before) => return Some(position_before),
+
+ // SB998 https://unicode.org/reports/tr29/#SB998
+ _ => continue,
+ }
+ }
+
+ // SB2 https://unicode.org/reports/tr29/#SB2
+ if self.state.match1(StatePart::Sot) {
+ None
+ } else if self.state.match1(StatePart::Eot) {
+ None
+ } else {
+ self.state = self.state.end();
+ Some(self.pos)
+ }
+ }
+ }
+
+ pub fn new_sentence_breaks<'a>(source: &'a str) -> SentenceBreaks<'a> {
+ SentenceBreaks {
+ string: source,
+ pos: 0,
+ state: INITIAL_STATE,
+ }
+ }
+}
+
+/// An iterator over the substrings of a string which, after splitting the string on
+/// [sentence boundaries](http://www.unicode.org/reports/tr29/#Sentence_Boundaries),
+/// contain any characters with the
+/// [Alphabetic](http://unicode.org/reports/tr44/#Alphabetic)
+/// property, or with
+/// [General_Category=Number](http://unicode.org/reports/tr44/#General_Category_Values).
+///
+/// This struct is created by the [`unicode_sentences`] method on the [`UnicodeSegmentation`]
+/// trait. See its documentation for more.
+///
+/// [`unicode_sentences`]: trait.UnicodeSegmentation.html#tymethod.unicode_sentences
+/// [`UnicodeSegmentation`]: trait.UnicodeSegmentation.html
+#[derive(Clone)]
+pub struct UnicodeSentences<'a> {
+ inner: Filter<USentenceBounds<'a>, fn(&&str) -> bool>,
+}
+
+/// External iterator for a string's
+/// [sentence boundaries](http://www.unicode.org/reports/tr29/#Sentence_Boundaries).
+///
+/// This struct is created by the [`split_sentence_bounds`] method on the [`UnicodeSegmentation`]
+/// trait. See its documentation for more.
+///
+/// [`split_sentence_bounds`]: trait.UnicodeSegmentation.html#tymethod.split_sentence_bounds
+/// [`UnicodeSegmentation`]: trait.UnicodeSegmentation.html
+#[derive(Clone)]
+pub struct USentenceBounds<'a> {
+ iter: fwd::SentenceBreaks<'a>,
+ sentence_start: Option<usize>,
+}
+
+/// External iterator for sentence boundaries and byte offsets.
+///
+/// This struct is created by the [`split_sentence_bound_indices`] method on the
+/// [`UnicodeSegmentation`] trait. See its documentation for more.
+///
+/// [`split_sentence_bound_indices`]: trait.UnicodeSegmentation.html#tymethod.split_sentence_bound_indices
+/// [`UnicodeSegmentation`]: trait.UnicodeSegmentation.html
+#[derive(Clone)]
+pub struct USentenceBoundIndices<'a> {
+ start_offset: usize,
+ iter: USentenceBounds<'a>,
+}
+
+#[inline]
+pub fn new_sentence_bounds<'a>(source: &'a str) -> USentenceBounds<'a> {
+ USentenceBounds {
+ iter: fwd::new_sentence_breaks(source),
+ sentence_start: None,
+ }
+}
+
+#[inline]
+pub fn new_sentence_bound_indices<'a>(source: &'a str) -> USentenceBoundIndices<'a> {
+ USentenceBoundIndices {
+ start_offset: source.as_ptr() as usize,
+ iter: new_sentence_bounds(source),
+ }
+}
+
+#[inline]
+pub fn new_unicode_sentences<'b>(s: &'b str) -> UnicodeSentences<'b> {
+ use super::UnicodeSegmentation;
+ use crate::tables::util::is_alphanumeric;
+
+ fn has_alphanumeric(s: &&str) -> bool {
+ s.chars().any(|c| is_alphanumeric(c))
+ }
+ let has_alphanumeric: fn(&&str) -> bool = has_alphanumeric; // coerce to fn pointer
+
+ UnicodeSentences {
+ inner: s.split_sentence_bounds().filter(has_alphanumeric),
+ }
+}
+
+impl<'a> Iterator for UnicodeSentences<'a> {
+ type Item = &'a str;
+
+ #[inline]
+ fn next(&mut self) -> Option<&'a str> {
+ self.inner.next()
+ }
+}
+
+impl<'a> Iterator for USentenceBounds<'a> {
+ type Item = &'a str;
+
+ #[inline]
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ let (lower, upper) = self.iter.size_hint();
+ (cmp::max(0, lower - 1), upper.map(|u| cmp::max(0, u - 1)))
+ }
+
+ #[inline]
+ fn next(&mut self) -> Option<&'a str> {
+ if self.sentence_start == None {
+ if let Some(start_pos) = self.iter.next() {
+ self.sentence_start = Some(start_pos)
+ } else {
+ return None;
+ }
+ }
+
+ if let Some(break_pos) = self.iter.next() {
+ let start_pos = self.sentence_start.unwrap();
+ let sentence = &self.iter.string[start_pos..break_pos];
+ self.sentence_start = Some(break_pos);
+ Some(sentence)
+ } else {
+ None
+ }
+ }
+}
+
+impl<'a> Iterator for USentenceBoundIndices<'a> {
+ type Item = (usize, &'a str);
+
+ #[inline]
+ fn next(&mut self) -> Option<(usize, &'a str)> {
+ self.iter
+ .next()
+ .map(|s| (s.as_ptr() as usize - self.start_offset, s))
+ }
+
+ #[inline]
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ self.iter.size_hint()
+ }
+}
diff --git a/third_party/rust/unicode-segmentation/src/tables.rs b/third_party/rust/unicode-segmentation/src/tables.rs
new file mode 100644
index 0000000000..5a811c9229
--- /dev/null
+++ b/third_party/rust/unicode-segmentation/src/tables.rs
@@ -0,0 +1,2675 @@
+// Copyright 2012-2018 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// NOTE: The following code was generated by "scripts/unicode.py", do not edit directly
+
+#![allow(missing_docs, non_upper_case_globals, non_snake_case)]
+
+/// The version of [Unicode](http://www.unicode.org/)
+/// that this version of unicode-segmentation is based on.
+pub const UNICODE_VERSION: (u64, u64, u64) = (15, 0, 0);
+
+pub mod util {
+ #[inline]
+ pub fn bsearch_range_table(c: char, r: &'static [(char,char)]) -> bool {
+ use core::cmp::Ordering::{Equal, Less, Greater};
+ r.binary_search_by(|&(lo,hi)| {
+ if lo <= c && c <= hi { Equal }
+ else if hi < c { Less }
+ else { Greater }
+ }).is_ok()
+ }
+
+ #[inline]
+ fn is_alphabetic(c: char) -> bool {
+ match c {
+ 'a' ..= 'z' | 'A' ..= 'Z' => true,
+ c if c > '' => super::derived_property::Alphabetic(c),
+ _ => false,
+ }
+ }
+
+ #[inline]
+ fn is_numeric(c: char) -> bool {
+ match c {
+ '0' ..= '9' => true,
+ c if c > '' => super::general_category::N(c),
+ _ => false,
+ }
+ }
+
+ #[inline]
+ pub fn is_alphanumeric(c: char) -> bool {
+ is_alphabetic(c) || is_numeric(c)
+ }
+}
+
+mod general_category {
+ const N_table: &'static [(char, char)] = &[
+ ('\u{30}', '\u{39}'), ('\u{b2}', '\u{b3}'), ('\u{b9}', '\u{b9}'), ('\u{bc}', '\u{be}'),
+ ('\u{660}', '\u{669}'), ('\u{6f0}', '\u{6f9}'), ('\u{7c0}', '\u{7c9}'), ('\u{966}',
+ '\u{96f}'), ('\u{9e6}', '\u{9ef}'), ('\u{9f4}', '\u{9f9}'), ('\u{a66}', '\u{a6f}'),
+ ('\u{ae6}', '\u{aef}'), ('\u{b66}', '\u{b6f}'), ('\u{b72}', '\u{b77}'), ('\u{be6}',
+ '\u{bf2}'), ('\u{c66}', '\u{c6f}'), ('\u{c78}', '\u{c7e}'), ('\u{ce6}', '\u{cef}'),
+ ('\u{d58}', '\u{d5e}'), ('\u{d66}', '\u{d78}'), ('\u{de6}', '\u{def}'), ('\u{e50}',
+ '\u{e59}'), ('\u{ed0}', '\u{ed9}'), ('\u{f20}', '\u{f33}'), ('\u{1040}', '\u{1049}'),
+ ('\u{1090}', '\u{1099}'), ('\u{1369}', '\u{137c}'), ('\u{16ee}', '\u{16f0}'), ('\u{17e0}',
+ '\u{17e9}'), ('\u{17f0}', '\u{17f9}'), ('\u{1810}', '\u{1819}'), ('\u{1946}', '\u{194f}'),
+ ('\u{19d0}', '\u{19da}'), ('\u{1a80}', '\u{1a89}'), ('\u{1a90}', '\u{1a99}'), ('\u{1b50}',
+ '\u{1b59}'), ('\u{1bb0}', '\u{1bb9}'), ('\u{1c40}', '\u{1c49}'), ('\u{1c50}', '\u{1c59}'),
+ ('\u{2070}', '\u{2070}'), ('\u{2074}', '\u{2079}'), ('\u{2080}', '\u{2089}'), ('\u{2150}',
+ '\u{2182}'), ('\u{2185}', '\u{2189}'), ('\u{2460}', '\u{249b}'), ('\u{24ea}', '\u{24ff}'),
+ ('\u{2776}', '\u{2793}'), ('\u{2cfd}', '\u{2cfd}'), ('\u{3007}', '\u{3007}'), ('\u{3021}',
+ '\u{3029}'), ('\u{3038}', '\u{303a}'), ('\u{3192}', '\u{3195}'), ('\u{3220}', '\u{3229}'),
+ ('\u{3248}', '\u{324f}'), ('\u{3251}', '\u{325f}'), ('\u{3280}', '\u{3289}'), ('\u{32b1}',
+ '\u{32bf}'), ('\u{a620}', '\u{a629}'), ('\u{a6e6}', '\u{a6ef}'), ('\u{a830}', '\u{a835}'),
+ ('\u{a8d0}', '\u{a8d9}'), ('\u{a900}', '\u{a909}'), ('\u{a9d0}', '\u{a9d9}'), ('\u{a9f0}',
+ '\u{a9f9}'), ('\u{aa50}', '\u{aa59}'), ('\u{abf0}', '\u{abf9}'), ('\u{ff10}', '\u{ff19}'),
+ ('\u{10107}', '\u{10133}'), ('\u{10140}', '\u{10178}'), ('\u{1018a}', '\u{1018b}'),
+ ('\u{102e1}', '\u{102fb}'), ('\u{10320}', '\u{10323}'), ('\u{10341}', '\u{10341}'),
+ ('\u{1034a}', '\u{1034a}'), ('\u{103d1}', '\u{103d5}'), ('\u{104a0}', '\u{104a9}'),
+ ('\u{10858}', '\u{1085f}'), ('\u{10879}', '\u{1087f}'), ('\u{108a7}', '\u{108af}'),
+ ('\u{108fb}', '\u{108ff}'), ('\u{10916}', '\u{1091b}'), ('\u{109bc}', '\u{109bd}'),
+ ('\u{109c0}', '\u{109cf}'), ('\u{109d2}', '\u{109ff}'), ('\u{10a40}', '\u{10a48}'),
+ ('\u{10a7d}', '\u{10a7e}'), ('\u{10a9d}', '\u{10a9f}'), ('\u{10aeb}', '\u{10aef}'),
+ ('\u{10b58}', '\u{10b5f}'), ('\u{10b78}', '\u{10b7f}'), ('\u{10ba9}', '\u{10baf}'),
+ ('\u{10cfa}', '\u{10cff}'), ('\u{10d30}', '\u{10d39}'), ('\u{10e60}', '\u{10e7e}'),
+ ('\u{10f1d}', '\u{10f26}'), ('\u{10f51}', '\u{10f54}'), ('\u{10fc5}', '\u{10fcb}'),
+ ('\u{11052}', '\u{1106f}'), ('\u{110f0}', '\u{110f9}'), ('\u{11136}', '\u{1113f}'),
+ ('\u{111d0}', '\u{111d9}'), ('\u{111e1}', '\u{111f4}'), ('\u{112f0}', '\u{112f9}'),
+ ('\u{11450}', '\u{11459}'), ('\u{114d0}', '\u{114d9}'), ('\u{11650}', '\u{11659}'),
+ ('\u{116c0}', '\u{116c9}'), ('\u{11730}', '\u{1173b}'), ('\u{118e0}', '\u{118f2}'),
+ ('\u{11950}', '\u{11959}'), ('\u{11c50}', '\u{11c6c}'), ('\u{11d50}', '\u{11d59}'),
+ ('\u{11da0}', '\u{11da9}'), ('\u{11f50}', '\u{11f59}'), ('\u{11fc0}', '\u{11fd4}'),
+ ('\u{12400}', '\u{1246e}'), ('\u{16a60}', '\u{16a69}'), ('\u{16ac0}', '\u{16ac9}'),
+ ('\u{16b50}', '\u{16b59}'), ('\u{16b5b}', '\u{16b61}'), ('\u{16e80}', '\u{16e96}'),
+ ('\u{1d2c0}', '\u{1d2d3}'), ('\u{1d2e0}', '\u{1d2f3}'), ('\u{1d360}', '\u{1d378}'),
+ ('\u{1d7ce}', '\u{1d7ff}'), ('\u{1e140}', '\u{1e149}'), ('\u{1e2f0}', '\u{1e2f9}'),
+ ('\u{1e4f0}', '\u{1e4f9}'), ('\u{1e8c7}', '\u{1e8cf}'), ('\u{1e950}', '\u{1e959}'),
+ ('\u{1ec71}', '\u{1ecab}'), ('\u{1ecad}', '\u{1ecaf}'), ('\u{1ecb1}', '\u{1ecb4}'),
+ ('\u{1ed01}', '\u{1ed2d}'), ('\u{1ed2f}', '\u{1ed3d}'), ('\u{1f100}', '\u{1f10c}'),
+ ('\u{1fbf0}', '\u{1fbf9}')
+ ];
+
+ #[inline]
+ pub fn N(c: char) -> bool {
+ super::util::bsearch_range_table(c, N_table)
+ }
+
+}
+
+mod derived_property {
+ const Alphabetic_table: &'static [(char, char)] = &[
+ ('\u{41}', '\u{5a}'), ('\u{61}', '\u{7a}'), ('\u{aa}', '\u{aa}'), ('\u{b5}', '\u{b5}'),
+ ('\u{ba}', '\u{ba}'), ('\u{c0}', '\u{d6}'), ('\u{d8}', '\u{f6}'), ('\u{f8}', '\u{2c1}'),
+ ('\u{2c6}', '\u{2d1}'), ('\u{2e0}', '\u{2e4}'), ('\u{2ec}', '\u{2ec}'), ('\u{2ee}',
+ '\u{2ee}'), ('\u{345}', '\u{345}'), ('\u{370}', '\u{374}'), ('\u{376}', '\u{377}'),
+ ('\u{37a}', '\u{37d}'), ('\u{37f}', '\u{37f}'), ('\u{386}', '\u{386}'), ('\u{388}',
+ '\u{38a}'), ('\u{38c}', '\u{38c}'), ('\u{38e}', '\u{3a1}'), ('\u{3a3}', '\u{3f5}'),
+ ('\u{3f7}', '\u{481}'), ('\u{48a}', '\u{52f}'), ('\u{531}', '\u{556}'), ('\u{559}',
+ '\u{559}'), ('\u{560}', '\u{588}'), ('\u{5b0}', '\u{5bd}'), ('\u{5bf}', '\u{5bf}'),
+ ('\u{5c1}', '\u{5c2}'), ('\u{5c4}', '\u{5c5}'), ('\u{5c7}', '\u{5c7}'), ('\u{5d0}',
+ '\u{5ea}'), ('\u{5ef}', '\u{5f2}'), ('\u{610}', '\u{61a}'), ('\u{620}', '\u{657}'),
+ ('\u{659}', '\u{65f}'), ('\u{66e}', '\u{6d3}'), ('\u{6d5}', '\u{6dc}'), ('\u{6e1}',
+ '\u{6e8}'), ('\u{6ed}', '\u{6ef}'), ('\u{6fa}', '\u{6fc}'), ('\u{6ff}', '\u{6ff}'),
+ ('\u{710}', '\u{73f}'), ('\u{74d}', '\u{7b1}'), ('\u{7ca}', '\u{7ea}'), ('\u{7f4}',
+ '\u{7f5}'), ('\u{7fa}', '\u{7fa}'), ('\u{800}', '\u{817}'), ('\u{81a}', '\u{82c}'),
+ ('\u{840}', '\u{858}'), ('\u{860}', '\u{86a}'), ('\u{870}', '\u{887}'), ('\u{889}',
+ '\u{88e}'), ('\u{8a0}', '\u{8c9}'), ('\u{8d4}', '\u{8df}'), ('\u{8e3}', '\u{8e9}'),
+ ('\u{8f0}', '\u{93b}'), ('\u{93d}', '\u{94c}'), ('\u{94e}', '\u{950}'), ('\u{955}',
+ '\u{963}'), ('\u{971}', '\u{983}'), ('\u{985}', '\u{98c}'), ('\u{98f}', '\u{990}'),
+ ('\u{993}', '\u{9a8}'), ('\u{9aa}', '\u{9b0}'), ('\u{9b2}', '\u{9b2}'), ('\u{9b6}',
+ '\u{9b9}'), ('\u{9bd}', '\u{9c4}'), ('\u{9c7}', '\u{9c8}'), ('\u{9cb}', '\u{9cc}'),
+ ('\u{9ce}', '\u{9ce}'), ('\u{9d7}', '\u{9d7}'), ('\u{9dc}', '\u{9dd}'), ('\u{9df}',
+ '\u{9e3}'), ('\u{9f0}', '\u{9f1}'), ('\u{9fc}', '\u{9fc}'), ('\u{a01}', '\u{a03}'),
+ ('\u{a05}', '\u{a0a}'), ('\u{a0f}', '\u{a10}'), ('\u{a13}', '\u{a28}'), ('\u{a2a}',
+ '\u{a30}'), ('\u{a32}', '\u{a33}'), ('\u{a35}', '\u{a36}'), ('\u{a38}', '\u{a39}'),
+ ('\u{a3e}', '\u{a42}'), ('\u{a47}', '\u{a48}'), ('\u{a4b}', '\u{a4c}'), ('\u{a51}',
+ '\u{a51}'), ('\u{a59}', '\u{a5c}'), ('\u{a5e}', '\u{a5e}'), ('\u{a70}', '\u{a75}'),
+ ('\u{a81}', '\u{a83}'), ('\u{a85}', '\u{a8d}'), ('\u{a8f}', '\u{a91}'), ('\u{a93}',
+ '\u{aa8}'), ('\u{aaa}', '\u{ab0}'), ('\u{ab2}', '\u{ab3}'), ('\u{ab5}', '\u{ab9}'),
+ ('\u{abd}', '\u{ac5}'), ('\u{ac7}', '\u{ac9}'), ('\u{acb}', '\u{acc}'), ('\u{ad0}',
+ '\u{ad0}'), ('\u{ae0}', '\u{ae3}'), ('\u{af9}', '\u{afc}'), ('\u{b01}', '\u{b03}'),
+ ('\u{b05}', '\u{b0c}'), ('\u{b0f}', '\u{b10}'), ('\u{b13}', '\u{b28}'), ('\u{b2a}',
+ '\u{b30}'), ('\u{b32}', '\u{b33}'), ('\u{b35}', '\u{b39}'), ('\u{b3d}', '\u{b44}'),
+ ('\u{b47}', '\u{b48}'), ('\u{b4b}', '\u{b4c}'), ('\u{b56}', '\u{b57}'), ('\u{b5c}',
+ '\u{b5d}'), ('\u{b5f}', '\u{b63}'), ('\u{b71}', '\u{b71}'), ('\u{b82}', '\u{b83}'),
+ ('\u{b85}', '\u{b8a}'), ('\u{b8e}', '\u{b90}'), ('\u{b92}', '\u{b95}'), ('\u{b99}',
+ '\u{b9a}'), ('\u{b9c}', '\u{b9c}'), ('\u{b9e}', '\u{b9f}'), ('\u{ba3}', '\u{ba4}'),
+ ('\u{ba8}', '\u{baa}'), ('\u{bae}', '\u{bb9}'), ('\u{bbe}', '\u{bc2}'), ('\u{bc6}',
+ '\u{bc8}'), ('\u{bca}', '\u{bcc}'), ('\u{bd0}', '\u{bd0}'), ('\u{bd7}', '\u{bd7}'),
+ ('\u{c00}', '\u{c0c}'), ('\u{c0e}', '\u{c10}'), ('\u{c12}', '\u{c28}'), ('\u{c2a}',
+ '\u{c39}'), ('\u{c3d}', '\u{c44}'), ('\u{c46}', '\u{c48}'), ('\u{c4a}', '\u{c4c}'),
+ ('\u{c55}', '\u{c56}'), ('\u{c58}', '\u{c5a}'), ('\u{c5d}', '\u{c5d}'), ('\u{c60}',
+ '\u{c63}'), ('\u{c80}', '\u{c83}'), ('\u{c85}', '\u{c8c}'), ('\u{c8e}', '\u{c90}'),
+ ('\u{c92}', '\u{ca8}'), ('\u{caa}', '\u{cb3}'), ('\u{cb5}', '\u{cb9}'), ('\u{cbd}',
+ '\u{cc4}'), ('\u{cc6}', '\u{cc8}'), ('\u{cca}', '\u{ccc}'), ('\u{cd5}', '\u{cd6}'),
+ ('\u{cdd}', '\u{cde}'), ('\u{ce0}', '\u{ce3}'), ('\u{cf1}', '\u{cf3}'), ('\u{d00}',
+ '\u{d0c}'), ('\u{d0e}', '\u{d10}'), ('\u{d12}', '\u{d3a}'), ('\u{d3d}', '\u{d44}'),
+ ('\u{d46}', '\u{d48}'), ('\u{d4a}', '\u{d4c}'), ('\u{d4e}', '\u{d4e}'), ('\u{d54}',
+ '\u{d57}'), ('\u{d5f}', '\u{d63}'), ('\u{d7a}', '\u{d7f}'), ('\u{d81}', '\u{d83}'),
+ ('\u{d85}', '\u{d96}'), ('\u{d9a}', '\u{db1}'), ('\u{db3}', '\u{dbb}'), ('\u{dbd}',
+ '\u{dbd}'), ('\u{dc0}', '\u{dc6}'), ('\u{dcf}', '\u{dd4}'), ('\u{dd6}', '\u{dd6}'),
+ ('\u{dd8}', '\u{ddf}'), ('\u{df2}', '\u{df3}'), ('\u{e01}', '\u{e3a}'), ('\u{e40}',
+ '\u{e46}'), ('\u{e4d}', '\u{e4d}'), ('\u{e81}', '\u{e82}'), ('\u{e84}', '\u{e84}'),
+ ('\u{e86}', '\u{e8a}'), ('\u{e8c}', '\u{ea3}'), ('\u{ea5}', '\u{ea5}'), ('\u{ea7}',
+ '\u{eb9}'), ('\u{ebb}', '\u{ebd}'), ('\u{ec0}', '\u{ec4}'), ('\u{ec6}', '\u{ec6}'),
+ ('\u{ecd}', '\u{ecd}'), ('\u{edc}', '\u{edf}'), ('\u{f00}', '\u{f00}'), ('\u{f40}',
+ '\u{f47}'), ('\u{f49}', '\u{f6c}'), ('\u{f71}', '\u{f83}'), ('\u{f88}', '\u{f97}'),
+ ('\u{f99}', '\u{fbc}'), ('\u{1000}', '\u{1036}'), ('\u{1038}', '\u{1038}'), ('\u{103b}',
+ '\u{103f}'), ('\u{1050}', '\u{108f}'), ('\u{109a}', '\u{109d}'), ('\u{10a0}', '\u{10c5}'),
+ ('\u{10c7}', '\u{10c7}'), ('\u{10cd}', '\u{10cd}'), ('\u{10d0}', '\u{10fa}'), ('\u{10fc}',
+ '\u{1248}'), ('\u{124a}', '\u{124d}'), ('\u{1250}', '\u{1256}'), ('\u{1258}', '\u{1258}'),
+ ('\u{125a}', '\u{125d}'), ('\u{1260}', '\u{1288}'), ('\u{128a}', '\u{128d}'), ('\u{1290}',
+ '\u{12b0}'), ('\u{12b2}', '\u{12b5}'), ('\u{12b8}', '\u{12be}'), ('\u{12c0}', '\u{12c0}'),
+ ('\u{12c2}', '\u{12c5}'), ('\u{12c8}', '\u{12d6}'), ('\u{12d8}', '\u{1310}'), ('\u{1312}',
+ '\u{1315}'), ('\u{1318}', '\u{135a}'), ('\u{1380}', '\u{138f}'), ('\u{13a0}', '\u{13f5}'),
+ ('\u{13f8}', '\u{13fd}'), ('\u{1401}', '\u{166c}'), ('\u{166f}', '\u{167f}'), ('\u{1681}',
+ '\u{169a}'), ('\u{16a0}', '\u{16ea}'), ('\u{16ee}', '\u{16f8}'), ('\u{1700}', '\u{1713}'),
+ ('\u{171f}', '\u{1733}'), ('\u{1740}', '\u{1753}'), ('\u{1760}', '\u{176c}'), ('\u{176e}',
+ '\u{1770}'), ('\u{1772}', '\u{1773}'), ('\u{1780}', '\u{17b3}'), ('\u{17b6}', '\u{17c8}'),
+ ('\u{17d7}', '\u{17d7}'), ('\u{17dc}', '\u{17dc}'), ('\u{1820}', '\u{1878}'), ('\u{1880}',
+ '\u{18aa}'), ('\u{18b0}', '\u{18f5}'), ('\u{1900}', '\u{191e}'), ('\u{1920}', '\u{192b}'),
+ ('\u{1930}', '\u{1938}'), ('\u{1950}', '\u{196d}'), ('\u{1970}', '\u{1974}'), ('\u{1980}',
+ '\u{19ab}'), ('\u{19b0}', '\u{19c9}'), ('\u{1a00}', '\u{1a1b}'), ('\u{1a20}', '\u{1a5e}'),
+ ('\u{1a61}', '\u{1a74}'), ('\u{1aa7}', '\u{1aa7}'), ('\u{1abf}', '\u{1ac0}'), ('\u{1acc}',
+ '\u{1ace}'), ('\u{1b00}', '\u{1b33}'), ('\u{1b35}', '\u{1b43}'), ('\u{1b45}', '\u{1b4c}'),
+ ('\u{1b80}', '\u{1ba9}'), ('\u{1bac}', '\u{1baf}'), ('\u{1bba}', '\u{1be5}'), ('\u{1be7}',
+ '\u{1bf1}'), ('\u{1c00}', '\u{1c36}'), ('\u{1c4d}', '\u{1c4f}'), ('\u{1c5a}', '\u{1c7d}'),
+ ('\u{1c80}', '\u{1c88}'), ('\u{1c90}', '\u{1cba}'), ('\u{1cbd}', '\u{1cbf}'), ('\u{1ce9}',
+ '\u{1cec}'), ('\u{1cee}', '\u{1cf3}'), ('\u{1cf5}', '\u{1cf6}'), ('\u{1cfa}', '\u{1cfa}'),
+ ('\u{1d00}', '\u{1dbf}'), ('\u{1de7}', '\u{1df4}'), ('\u{1e00}', '\u{1f15}'), ('\u{1f18}',
+ '\u{1f1d}'), ('\u{1f20}', '\u{1f45}'), ('\u{1f48}', '\u{1f4d}'), ('\u{1f50}', '\u{1f57}'),
+ ('\u{1f59}', '\u{1f59}'), ('\u{1f5b}', '\u{1f5b}'), ('\u{1f5d}', '\u{1f5d}'), ('\u{1f5f}',
+ '\u{1f7d}'), ('\u{1f80}', '\u{1fb4}'), ('\u{1fb6}', '\u{1fbc}'), ('\u{1fbe}', '\u{1fbe}'),
+ ('\u{1fc2}', '\u{1fc4}'), ('\u{1fc6}', '\u{1fcc}'), ('\u{1fd0}', '\u{1fd3}'), ('\u{1fd6}',
+ '\u{1fdb}'), ('\u{1fe0}', '\u{1fec}'), ('\u{1ff2}', '\u{1ff4}'), ('\u{1ff6}', '\u{1ffc}'),
+ ('\u{2071}', '\u{2071}'), ('\u{207f}', '\u{207f}'), ('\u{2090}', '\u{209c}'), ('\u{2102}',
+ '\u{2102}'), ('\u{2107}', '\u{2107}'), ('\u{210a}', '\u{2113}'), ('\u{2115}', '\u{2115}'),
+ ('\u{2119}', '\u{211d}'), ('\u{2124}', '\u{2124}'), ('\u{2126}', '\u{2126}'), ('\u{2128}',
+ '\u{2128}'), ('\u{212a}', '\u{212d}'), ('\u{212f}', '\u{2139}'), ('\u{213c}', '\u{213f}'),
+ ('\u{2145}', '\u{2149}'), ('\u{214e}', '\u{214e}'), ('\u{2160}', '\u{2188}'), ('\u{24b6}',
+ '\u{24e9}'), ('\u{2c00}', '\u{2ce4}'), ('\u{2ceb}', '\u{2cee}'), ('\u{2cf2}', '\u{2cf3}'),
+ ('\u{2d00}', '\u{2d25}'), ('\u{2d27}', '\u{2d27}'), ('\u{2d2d}', '\u{2d2d}'), ('\u{2d30}',
+ '\u{2d67}'), ('\u{2d6f}', '\u{2d6f}'), ('\u{2d80}', '\u{2d96}'), ('\u{2da0}', '\u{2da6}'),
+ ('\u{2da8}', '\u{2dae}'), ('\u{2db0}', '\u{2db6}'), ('\u{2db8}', '\u{2dbe}'), ('\u{2dc0}',
+ '\u{2dc6}'), ('\u{2dc8}', '\u{2dce}'), ('\u{2dd0}', '\u{2dd6}'), ('\u{2dd8}', '\u{2dde}'),
+ ('\u{2de0}', '\u{2dff}'), ('\u{2e2f}', '\u{2e2f}'), ('\u{3005}', '\u{3007}'), ('\u{3021}',
+ '\u{3029}'), ('\u{3031}', '\u{3035}'), ('\u{3038}', '\u{303c}'), ('\u{3041}', '\u{3096}'),
+ ('\u{309d}', '\u{309f}'), ('\u{30a1}', '\u{30fa}'), ('\u{30fc}', '\u{30ff}'), ('\u{3105}',
+ '\u{312f}'), ('\u{3131}', '\u{318e}'), ('\u{31a0}', '\u{31bf}'), ('\u{31f0}', '\u{31ff}'),
+ ('\u{3400}', '\u{4dbf}'), ('\u{4e00}', '\u{a48c}'), ('\u{a4d0}', '\u{a4fd}'), ('\u{a500}',
+ '\u{a60c}'), ('\u{a610}', '\u{a61f}'), ('\u{a62a}', '\u{a62b}'), ('\u{a640}', '\u{a66e}'),
+ ('\u{a674}', '\u{a67b}'), ('\u{a67f}', '\u{a6ef}'), ('\u{a717}', '\u{a71f}'), ('\u{a722}',
+ '\u{a788}'), ('\u{a78b}', '\u{a7ca}'), ('\u{a7d0}', '\u{a7d1}'), ('\u{a7d3}', '\u{a7d3}'),
+ ('\u{a7d5}', '\u{a7d9}'), ('\u{a7f2}', '\u{a805}'), ('\u{a807}', '\u{a827}'), ('\u{a840}',
+ '\u{a873}'), ('\u{a880}', '\u{a8c3}'), ('\u{a8c5}', '\u{a8c5}'), ('\u{a8f2}', '\u{a8f7}'),
+ ('\u{a8fb}', '\u{a8fb}'), ('\u{a8fd}', '\u{a8ff}'), ('\u{a90a}', '\u{a92a}'), ('\u{a930}',
+ '\u{a952}'), ('\u{a960}', '\u{a97c}'), ('\u{a980}', '\u{a9b2}'), ('\u{a9b4}', '\u{a9bf}'),
+ ('\u{a9cf}', '\u{a9cf}'), ('\u{a9e0}', '\u{a9ef}'), ('\u{a9fa}', '\u{a9fe}'), ('\u{aa00}',
+ '\u{aa36}'), ('\u{aa40}', '\u{aa4d}'), ('\u{aa60}', '\u{aa76}'), ('\u{aa7a}', '\u{aabe}'),
+ ('\u{aac0}', '\u{aac0}'), ('\u{aac2}', '\u{aac2}'), ('\u{aadb}', '\u{aadd}'), ('\u{aae0}',
+ '\u{aaef}'), ('\u{aaf2}', '\u{aaf5}'), ('\u{ab01}', '\u{ab06}'), ('\u{ab09}', '\u{ab0e}'),
+ ('\u{ab11}', '\u{ab16}'), ('\u{ab20}', '\u{ab26}'), ('\u{ab28}', '\u{ab2e}'), ('\u{ab30}',
+ '\u{ab5a}'), ('\u{ab5c}', '\u{ab69}'), ('\u{ab70}', '\u{abea}'), ('\u{ac00}', '\u{d7a3}'),
+ ('\u{d7b0}', '\u{d7c6}'), ('\u{d7cb}', '\u{d7fb}'), ('\u{f900}', '\u{fa6d}'), ('\u{fa70}',
+ '\u{fad9}'), ('\u{fb00}', '\u{fb06}'), ('\u{fb13}', '\u{fb17}'), ('\u{fb1d}', '\u{fb28}'),
+ ('\u{fb2a}', '\u{fb36}'), ('\u{fb38}', '\u{fb3c}'), ('\u{fb3e}', '\u{fb3e}'), ('\u{fb40}',
+ '\u{fb41}'), ('\u{fb43}', '\u{fb44}'), ('\u{fb46}', '\u{fbb1}'), ('\u{fbd3}', '\u{fd3d}'),
+ ('\u{fd50}', '\u{fd8f}'), ('\u{fd92}', '\u{fdc7}'), ('\u{fdf0}', '\u{fdfb}'), ('\u{fe70}',
+ '\u{fe74}'), ('\u{fe76}', '\u{fefc}'), ('\u{ff21}', '\u{ff3a}'), ('\u{ff41}', '\u{ff5a}'),
+ ('\u{ff66}', '\u{ffbe}'), ('\u{ffc2}', '\u{ffc7}'), ('\u{ffca}', '\u{ffcf}'), ('\u{ffd2}',
+ '\u{ffd7}'), ('\u{ffda}', '\u{ffdc}'), ('\u{10000}', '\u{1000b}'), ('\u{1000d}',
+ '\u{10026}'), ('\u{10028}', '\u{1003a}'), ('\u{1003c}', '\u{1003d}'), ('\u{1003f}',
+ '\u{1004d}'), ('\u{10050}', '\u{1005d}'), ('\u{10080}', '\u{100fa}'), ('\u{10140}',
+ '\u{10174}'), ('\u{10280}', '\u{1029c}'), ('\u{102a0}', '\u{102d0}'), ('\u{10300}',
+ '\u{1031f}'), ('\u{1032d}', '\u{1034a}'), ('\u{10350}', '\u{1037a}'), ('\u{10380}',
+ '\u{1039d}'), ('\u{103a0}', '\u{103c3}'), ('\u{103c8}', '\u{103cf}'), ('\u{103d1}',
+ '\u{103d5}'), ('\u{10400}', '\u{1049d}'), ('\u{104b0}', '\u{104d3}'), ('\u{104d8}',
+ '\u{104fb}'), ('\u{10500}', '\u{10527}'), ('\u{10530}', '\u{10563}'), ('\u{10570}',
+ '\u{1057a}'), ('\u{1057c}', '\u{1058a}'), ('\u{1058c}', '\u{10592}'), ('\u{10594}',
+ '\u{10595}'), ('\u{10597}', '\u{105a1}'), ('\u{105a3}', '\u{105b1}'), ('\u{105b3}',
+ '\u{105b9}'), ('\u{105bb}', '\u{105bc}'), ('\u{10600}', '\u{10736}'), ('\u{10740}',
+ '\u{10755}'), ('\u{10760}', '\u{10767}'), ('\u{10780}', '\u{10785}'), ('\u{10787}',
+ '\u{107b0}'), ('\u{107b2}', '\u{107ba}'), ('\u{10800}', '\u{10805}'), ('\u{10808}',
+ '\u{10808}'), ('\u{1080a}', '\u{10835}'), ('\u{10837}', '\u{10838}'), ('\u{1083c}',
+ '\u{1083c}'), ('\u{1083f}', '\u{10855}'), ('\u{10860}', '\u{10876}'), ('\u{10880}',
+ '\u{1089e}'), ('\u{108e0}', '\u{108f2}'), ('\u{108f4}', '\u{108f5}'), ('\u{10900}',
+ '\u{10915}'), ('\u{10920}', '\u{10939}'), ('\u{10980}', '\u{109b7}'), ('\u{109be}',
+ '\u{109bf}'), ('\u{10a00}', '\u{10a03}'), ('\u{10a05}', '\u{10a06}'), ('\u{10a0c}',
+ '\u{10a13}'), ('\u{10a15}', '\u{10a17}'), ('\u{10a19}', '\u{10a35}'), ('\u{10a60}',
+ '\u{10a7c}'), ('\u{10a80}', '\u{10a9c}'), ('\u{10ac0}', '\u{10ac7}'), ('\u{10ac9}',
+ '\u{10ae4}'), ('\u{10b00}', '\u{10b35}'), ('\u{10b40}', '\u{10b55}'), ('\u{10b60}',
+ '\u{10b72}'), ('\u{10b80}', '\u{10b91}'), ('\u{10c00}', '\u{10c48}'), ('\u{10c80}',
+ '\u{10cb2}'), ('\u{10cc0}', '\u{10cf2}'), ('\u{10d00}', '\u{10d27}'), ('\u{10e80}',
+ '\u{10ea9}'), ('\u{10eab}', '\u{10eac}'), ('\u{10eb0}', '\u{10eb1}'), ('\u{10f00}',
+ '\u{10f1c}'), ('\u{10f27}', '\u{10f27}'), ('\u{10f30}', '\u{10f45}'), ('\u{10f70}',
+ '\u{10f81}'), ('\u{10fb0}', '\u{10fc4}'), ('\u{10fe0}', '\u{10ff6}'), ('\u{11000}',
+ '\u{11045}'), ('\u{11071}', '\u{11075}'), ('\u{11080}', '\u{110b8}'), ('\u{110c2}',
+ '\u{110c2}'), ('\u{110d0}', '\u{110e8}'), ('\u{11100}', '\u{11132}'), ('\u{11144}',
+ '\u{11147}'), ('\u{11150}', '\u{11172}'), ('\u{11176}', '\u{11176}'), ('\u{11180}',
+ '\u{111bf}'), ('\u{111c1}', '\u{111c4}'), ('\u{111ce}', '\u{111cf}'), ('\u{111da}',
+ '\u{111da}'), ('\u{111dc}', '\u{111dc}'), ('\u{11200}', '\u{11211}'), ('\u{11213}',
+ '\u{11234}'), ('\u{11237}', '\u{11237}'), ('\u{1123e}', '\u{11241}'), ('\u{11280}',
+ '\u{11286}'), ('\u{11288}', '\u{11288}'), ('\u{1128a}', '\u{1128d}'), ('\u{1128f}',
+ '\u{1129d}'), ('\u{1129f}', '\u{112a8}'), ('\u{112b0}', '\u{112e8}'), ('\u{11300}',
+ '\u{11303}'), ('\u{11305}', '\u{1130c}'), ('\u{1130f}', '\u{11310}'), ('\u{11313}',
+ '\u{11328}'), ('\u{1132a}', '\u{11330}'), ('\u{11332}', '\u{11333}'), ('\u{11335}',
+ '\u{11339}'), ('\u{1133d}', '\u{11344}'), ('\u{11347}', '\u{11348}'), ('\u{1134b}',
+ '\u{1134c}'), ('\u{11350}', '\u{11350}'), ('\u{11357}', '\u{11357}'), ('\u{1135d}',
+ '\u{11363}'), ('\u{11400}', '\u{11441}'), ('\u{11443}', '\u{11445}'), ('\u{11447}',
+ '\u{1144a}'), ('\u{1145f}', '\u{11461}'), ('\u{11480}', '\u{114c1}'), ('\u{114c4}',
+ '\u{114c5}'), ('\u{114c7}', '\u{114c7}'), ('\u{11580}', '\u{115b5}'), ('\u{115b8}',
+ '\u{115be}'), ('\u{115d8}', '\u{115dd}'), ('\u{11600}', '\u{1163e}'), ('\u{11640}',
+ '\u{11640}'), ('\u{11644}', '\u{11644}'), ('\u{11680}', '\u{116b5}'), ('\u{116b8}',
+ '\u{116b8}'), ('\u{11700}', '\u{1171a}'), ('\u{1171d}', '\u{1172a}'), ('\u{11740}',
+ '\u{11746}'), ('\u{11800}', '\u{11838}'), ('\u{118a0}', '\u{118df}'), ('\u{118ff}',
+ '\u{11906}'), ('\u{11909}', '\u{11909}'), ('\u{1190c}', '\u{11913}'), ('\u{11915}',
+ '\u{11916}'), ('\u{11918}', '\u{11935}'), ('\u{11937}', '\u{11938}'), ('\u{1193b}',
+ '\u{1193c}'), ('\u{1193f}', '\u{11942}'), ('\u{119a0}', '\u{119a7}'), ('\u{119aa}',
+ '\u{119d7}'), ('\u{119da}', '\u{119df}'), ('\u{119e1}', '\u{119e1}'), ('\u{119e3}',
+ '\u{119e4}'), ('\u{11a00}', '\u{11a32}'), ('\u{11a35}', '\u{11a3e}'), ('\u{11a50}',
+ '\u{11a97}'), ('\u{11a9d}', '\u{11a9d}'), ('\u{11ab0}', '\u{11af8}'), ('\u{11c00}',
+ '\u{11c08}'), ('\u{11c0a}', '\u{11c36}'), ('\u{11c38}', '\u{11c3e}'), ('\u{11c40}',
+ '\u{11c40}'), ('\u{11c72}', '\u{11c8f}'), ('\u{11c92}', '\u{11ca7}'), ('\u{11ca9}',
+ '\u{11cb6}'), ('\u{11d00}', '\u{11d06}'), ('\u{11d08}', '\u{11d09}'), ('\u{11d0b}',
+ '\u{11d36}'), ('\u{11d3a}', '\u{11d3a}'), ('\u{11d3c}', '\u{11d3d}'), ('\u{11d3f}',
+ '\u{11d41}'), ('\u{11d43}', '\u{11d43}'), ('\u{11d46}', '\u{11d47}'), ('\u{11d60}',
+ '\u{11d65}'), ('\u{11d67}', '\u{11d68}'), ('\u{11d6a}', '\u{11d8e}'), ('\u{11d90}',
+ '\u{11d91}'), ('\u{11d93}', '\u{11d96}'), ('\u{11d98}', '\u{11d98}'), ('\u{11ee0}',
+ '\u{11ef6}'), ('\u{11f00}', '\u{11f10}'), ('\u{11f12}', '\u{11f3a}'), ('\u{11f3e}',
+ '\u{11f40}'), ('\u{11fb0}', '\u{11fb0}'), ('\u{12000}', '\u{12399}'), ('\u{12400}',
+ '\u{1246e}'), ('\u{12480}', '\u{12543}'), ('\u{12f90}', '\u{12ff0}'), ('\u{13000}',
+ '\u{1342f}'), ('\u{13441}', '\u{13446}'), ('\u{14400}', '\u{14646}'), ('\u{16800}',
+ '\u{16a38}'), ('\u{16a40}', '\u{16a5e}'), ('\u{16a70}', '\u{16abe}'), ('\u{16ad0}',
+ '\u{16aed}'), ('\u{16b00}', '\u{16b2f}'), ('\u{16b40}', '\u{16b43}'), ('\u{16b63}',
+ '\u{16b77}'), ('\u{16b7d}', '\u{16b8f}'), ('\u{16e40}', '\u{16e7f}'), ('\u{16f00}',
+ '\u{16f4a}'), ('\u{16f4f}', '\u{16f87}'), ('\u{16f8f}', '\u{16f9f}'), ('\u{16fe0}',
+ '\u{16fe1}'), ('\u{16fe3}', '\u{16fe3}'), ('\u{16ff0}', '\u{16ff1}'), ('\u{17000}',
+ '\u{187f7}'), ('\u{18800}', '\u{18cd5}'), ('\u{18d00}', '\u{18d08}'), ('\u{1aff0}',
+ '\u{1aff3}'), ('\u{1aff5}', '\u{1affb}'), ('\u{1affd}', '\u{1affe}'), ('\u{1b000}',
+ '\u{1b122}'), ('\u{1b132}', '\u{1b132}'), ('\u{1b150}', '\u{1b152}'), ('\u{1b155}',
+ '\u{1b155}'), ('\u{1b164}', '\u{1b167}'), ('\u{1b170}', '\u{1b2fb}'), ('\u{1bc00}',
+ '\u{1bc6a}'), ('\u{1bc70}', '\u{1bc7c}'), ('\u{1bc80}', '\u{1bc88}'), ('\u{1bc90}',
+ '\u{1bc99}'), ('\u{1bc9e}', '\u{1bc9e}'), ('\u{1d400}', '\u{1d454}'), ('\u{1d456}',
+ '\u{1d49c}'), ('\u{1d49e}', '\u{1d49f}'), ('\u{1d4a2}', '\u{1d4a2}'), ('\u{1d4a5}',
+ '\u{1d4a6}'), ('\u{1d4a9}', '\u{1d4ac}'), ('\u{1d4ae}', '\u{1d4b9}'), ('\u{1d4bb}',
+ '\u{1d4bb}'), ('\u{1d4bd}', '\u{1d4c3}'), ('\u{1d4c5}', '\u{1d505}'), ('\u{1d507}',
+ '\u{1d50a}'), ('\u{1d50d}', '\u{1d514}'), ('\u{1d516}', '\u{1d51c}'), ('\u{1d51e}',
+ '\u{1d539}'), ('\u{1d53b}', '\u{1d53e}'), ('\u{1d540}', '\u{1d544}'), ('\u{1d546}',
+ '\u{1d546}'), ('\u{1d54a}', '\u{1d550}'), ('\u{1d552}', '\u{1d6a5}'), ('\u{1d6a8}',
+ '\u{1d6c0}'), ('\u{1d6c2}', '\u{1d6da}'), ('\u{1d6dc}', '\u{1d6fa}'), ('\u{1d6fc}',
+ '\u{1d714}'), ('\u{1d716}', '\u{1d734}'), ('\u{1d736}', '\u{1d74e}'), ('\u{1d750}',
+ '\u{1d76e}'), ('\u{1d770}', '\u{1d788}'), ('\u{1d78a}', '\u{1d7a8}'), ('\u{1d7aa}',
+ '\u{1d7c2}'), ('\u{1d7c4}', '\u{1d7cb}'), ('\u{1df00}', '\u{1df1e}'), ('\u{1df25}',
+ '\u{1df2a}'), ('\u{1e000}', '\u{1e006}'), ('\u{1e008}', '\u{1e018}'), ('\u{1e01b}',
+ '\u{1e021}'), ('\u{1e023}', '\u{1e024}'), ('\u{1e026}', '\u{1e02a}'), ('\u{1e030}',
+ '\u{1e06d}'), ('\u{1e08f}', '\u{1e08f}'), ('\u{1e100}', '\u{1e12c}'), ('\u{1e137}',
+ '\u{1e13d}'), ('\u{1e14e}', '\u{1e14e}'), ('\u{1e290}', '\u{1e2ad}'), ('\u{1e2c0}',
+ '\u{1e2eb}'), ('\u{1e4d0}', '\u{1e4eb}'), ('\u{1e7e0}', '\u{1e7e6}'), ('\u{1e7e8}',
+ '\u{1e7eb}'), ('\u{1e7ed}', '\u{1e7ee}'), ('\u{1e7f0}', '\u{1e7fe}'), ('\u{1e800}',
+ '\u{1e8c4}'), ('\u{1e900}', '\u{1e943}'), ('\u{1e947}', '\u{1e947}'), ('\u{1e94b}',
+ '\u{1e94b}'), ('\u{1ee00}', '\u{1ee03}'), ('\u{1ee05}', '\u{1ee1f}'), ('\u{1ee21}',
+ '\u{1ee22}'), ('\u{1ee24}', '\u{1ee24}'), ('\u{1ee27}', '\u{1ee27}'), ('\u{1ee29}',
+ '\u{1ee32}'), ('\u{1ee34}', '\u{1ee37}'), ('\u{1ee39}', '\u{1ee39}'), ('\u{1ee3b}',
+ '\u{1ee3b}'), ('\u{1ee42}', '\u{1ee42}'), ('\u{1ee47}', '\u{1ee47}'), ('\u{1ee49}',
+ '\u{1ee49}'), ('\u{1ee4b}', '\u{1ee4b}'), ('\u{1ee4d}', '\u{1ee4f}'), ('\u{1ee51}',
+ '\u{1ee52}'), ('\u{1ee54}', '\u{1ee54}'), ('\u{1ee57}', '\u{1ee57}'), ('\u{1ee59}',
+ '\u{1ee59}'), ('\u{1ee5b}', '\u{1ee5b}'), ('\u{1ee5d}', '\u{1ee5d}'), ('\u{1ee5f}',
+ '\u{1ee5f}'), ('\u{1ee61}', '\u{1ee62}'), ('\u{1ee64}', '\u{1ee64}'), ('\u{1ee67}',
+ '\u{1ee6a}'), ('\u{1ee6c}', '\u{1ee72}'), ('\u{1ee74}', '\u{1ee77}'), ('\u{1ee79}',
+ '\u{1ee7c}'), ('\u{1ee7e}', '\u{1ee7e}'), ('\u{1ee80}', '\u{1ee89}'), ('\u{1ee8b}',
+ '\u{1ee9b}'), ('\u{1eea1}', '\u{1eea3}'), ('\u{1eea5}', '\u{1eea9}'), ('\u{1eeab}',
+ '\u{1eebb}'), ('\u{1f130}', '\u{1f149}'), ('\u{1f150}', '\u{1f169}'), ('\u{1f170}',
+ '\u{1f189}'), ('\u{20000}', '\u{2a6df}'), ('\u{2a700}', '\u{2b739}'), ('\u{2b740}',
+ '\u{2b81d}'), ('\u{2b820}', '\u{2cea1}'), ('\u{2ceb0}', '\u{2ebe0}'), ('\u{2f800}',
+ '\u{2fa1d}'), ('\u{30000}', '\u{3134a}'), ('\u{31350}', '\u{323af}')
+ ];
+
+ #[inline]
+ pub fn Alphabetic(c: char) -> bool {
+ super::util::bsearch_range_table(c, Alphabetic_table)
+ }
+
+}
+
+pub mod grapheme {
+ use core::result::Result::{Ok, Err};
+
+ pub use self::GraphemeCat::*;
+
+ #[allow(non_camel_case_types)]
+ #[derive(Clone, Copy, PartialEq, Eq, Debug)]
+ pub enum GraphemeCat {
+ GC_Any,
+ GC_CR,
+ GC_Control,
+ GC_Extend,
+ GC_Extended_Pictographic,
+ GC_L,
+ GC_LF,
+ GC_LV,
+ GC_LVT,
+ GC_Prepend,
+ GC_Regional_Indicator,
+ GC_SpacingMark,
+ GC_T,
+ GC_V,
+ GC_ZWJ,
+ }
+
+ fn bsearch_range_value_table(c: char, r: &'static [(char, char, GraphemeCat)]) -> (u32, u32, GraphemeCat) {
+ use core::cmp::Ordering::{Equal, Less, Greater};
+ match r.binary_search_by(|&(lo, hi, _)| {
+ if lo <= c && c <= hi { Equal }
+ else if hi < c { Less }
+ else { Greater }
+ }) {
+ Ok(idx) => {
+ let (lower, upper, cat) = r[idx];
+ (lower as u32, upper as u32, cat)
+ }
+ Err(idx) => {
+ (
+ if idx > 0 { r[idx-1].1 as u32 + 1 } else { 0 },
+ r.get(idx).map(|c|c.0 as u32 - 1).unwrap_or(core::u32::MAX),
+ GC_Any,
+ )
+ }
+ }
+ }
+
+ pub fn grapheme_category(c: char) -> (u32, u32, GraphemeCat) {
+ bsearch_range_value_table(c, grapheme_cat_table)
+ }
+
+ const grapheme_cat_table: &'static [(char, char, GraphemeCat)] = &[
+ ('\u{0}', '\u{9}', GC_Control), ('\u{a}', '\u{a}', GC_LF), ('\u{b}', '\u{c}', GC_Control),
+ ('\u{d}', '\u{d}', GC_CR), ('\u{e}', '\u{1f}', GC_Control), ('\u{7f}', '\u{9f}',
+ GC_Control), ('\u{a9}', '\u{a9}', GC_Extended_Pictographic), ('\u{ad}', '\u{ad}',
+ GC_Control), ('\u{ae}', '\u{ae}', GC_Extended_Pictographic), ('\u{300}', '\u{36f}',
+ GC_Extend), ('\u{483}', '\u{489}', GC_Extend), ('\u{591}', '\u{5bd}', GC_Extend),
+ ('\u{5bf}', '\u{5bf}', GC_Extend), ('\u{5c1}', '\u{5c2}', GC_Extend), ('\u{5c4}', '\u{5c5}',
+ GC_Extend), ('\u{5c7}', '\u{5c7}', GC_Extend), ('\u{600}', '\u{605}', GC_Prepend),
+ ('\u{610}', '\u{61a}', GC_Extend), ('\u{61c}', '\u{61c}', GC_Control), ('\u{64b}',
+ '\u{65f}', GC_Extend), ('\u{670}', '\u{670}', GC_Extend), ('\u{6d6}', '\u{6dc}', GC_Extend),
+ ('\u{6dd}', '\u{6dd}', GC_Prepend), ('\u{6df}', '\u{6e4}', GC_Extend), ('\u{6e7}',
+ '\u{6e8}', GC_Extend), ('\u{6ea}', '\u{6ed}', GC_Extend), ('\u{70f}', '\u{70f}',
+ GC_Prepend), ('\u{711}', '\u{711}', GC_Extend), ('\u{730}', '\u{74a}', GC_Extend),
+ ('\u{7a6}', '\u{7b0}', GC_Extend), ('\u{7eb}', '\u{7f3}', GC_Extend), ('\u{7fd}', '\u{7fd}',
+ GC_Extend), ('\u{816}', '\u{819}', GC_Extend), ('\u{81b}', '\u{823}', GC_Extend),
+ ('\u{825}', '\u{827}', GC_Extend), ('\u{829}', '\u{82d}', GC_Extend), ('\u{859}', '\u{85b}',
+ GC_Extend), ('\u{890}', '\u{891}', GC_Prepend), ('\u{898}', '\u{89f}', GC_Extend),
+ ('\u{8ca}', '\u{8e1}', GC_Extend), ('\u{8e2}', '\u{8e2}', GC_Prepend), ('\u{8e3}',
+ '\u{902}', GC_Extend), ('\u{903}', '\u{903}', GC_SpacingMark), ('\u{93a}', '\u{93a}',
+ GC_Extend), ('\u{93b}', '\u{93b}', GC_SpacingMark), ('\u{93c}', '\u{93c}', GC_Extend),
+ ('\u{93e}', '\u{940}', GC_SpacingMark), ('\u{941}', '\u{948}', GC_Extend), ('\u{949}',
+ '\u{94c}', GC_SpacingMark), ('\u{94d}', '\u{94d}', GC_Extend), ('\u{94e}', '\u{94f}',
+ GC_SpacingMark), ('\u{951}', '\u{957}', GC_Extend), ('\u{962}', '\u{963}', GC_Extend),
+ ('\u{981}', '\u{981}', GC_Extend), ('\u{982}', '\u{983}', GC_SpacingMark), ('\u{9bc}',
+ '\u{9bc}', GC_Extend), ('\u{9be}', '\u{9be}', GC_Extend), ('\u{9bf}', '\u{9c0}',
+ GC_SpacingMark), ('\u{9c1}', '\u{9c4}', GC_Extend), ('\u{9c7}', '\u{9c8}', GC_SpacingMark),
+ ('\u{9cb}', '\u{9cc}', GC_SpacingMark), ('\u{9cd}', '\u{9cd}', GC_Extend), ('\u{9d7}',
+ '\u{9d7}', GC_Extend), ('\u{9e2}', '\u{9e3}', GC_Extend), ('\u{9fe}', '\u{9fe}', GC_Extend),
+ ('\u{a01}', '\u{a02}', GC_Extend), ('\u{a03}', '\u{a03}', GC_SpacingMark), ('\u{a3c}',
+ '\u{a3c}', GC_Extend), ('\u{a3e}', '\u{a40}', GC_SpacingMark), ('\u{a41}', '\u{a42}',
+ GC_Extend), ('\u{a47}', '\u{a48}', GC_Extend), ('\u{a4b}', '\u{a4d}', GC_Extend),
+ ('\u{a51}', '\u{a51}', GC_Extend), ('\u{a70}', '\u{a71}', GC_Extend), ('\u{a75}', '\u{a75}',
+ GC_Extend), ('\u{a81}', '\u{a82}', GC_Extend), ('\u{a83}', '\u{a83}', GC_SpacingMark),
+ ('\u{abc}', '\u{abc}', GC_Extend), ('\u{abe}', '\u{ac0}', GC_SpacingMark), ('\u{ac1}',
+ '\u{ac5}', GC_Extend), ('\u{ac7}', '\u{ac8}', GC_Extend), ('\u{ac9}', '\u{ac9}',
+ GC_SpacingMark), ('\u{acb}', '\u{acc}', GC_SpacingMark), ('\u{acd}', '\u{acd}', GC_Extend),
+ ('\u{ae2}', '\u{ae3}', GC_Extend), ('\u{afa}', '\u{aff}', GC_Extend), ('\u{b01}', '\u{b01}',
+ GC_Extend), ('\u{b02}', '\u{b03}', GC_SpacingMark), ('\u{b3c}', '\u{b3c}', GC_Extend),
+ ('\u{b3e}', '\u{b3f}', GC_Extend), ('\u{b40}', '\u{b40}', GC_SpacingMark), ('\u{b41}',
+ '\u{b44}', GC_Extend), ('\u{b47}', '\u{b48}', GC_SpacingMark), ('\u{b4b}', '\u{b4c}',
+ GC_SpacingMark), ('\u{b4d}', '\u{b4d}', GC_Extend), ('\u{b55}', '\u{b57}', GC_Extend),
+ ('\u{b62}', '\u{b63}', GC_Extend), ('\u{b82}', '\u{b82}', GC_Extend), ('\u{bbe}', '\u{bbe}',
+ GC_Extend), ('\u{bbf}', '\u{bbf}', GC_SpacingMark), ('\u{bc0}', '\u{bc0}', GC_Extend),
+ ('\u{bc1}', '\u{bc2}', GC_SpacingMark), ('\u{bc6}', '\u{bc8}', GC_SpacingMark), ('\u{bca}',
+ '\u{bcc}', GC_SpacingMark), ('\u{bcd}', '\u{bcd}', GC_Extend), ('\u{bd7}', '\u{bd7}',
+ GC_Extend), ('\u{c00}', '\u{c00}', GC_Extend), ('\u{c01}', '\u{c03}', GC_SpacingMark),
+ ('\u{c04}', '\u{c04}', GC_Extend), ('\u{c3c}', '\u{c3c}', GC_Extend), ('\u{c3e}', '\u{c40}',
+ GC_Extend), ('\u{c41}', '\u{c44}', GC_SpacingMark), ('\u{c46}', '\u{c48}', GC_Extend),
+ ('\u{c4a}', '\u{c4d}', GC_Extend), ('\u{c55}', '\u{c56}', GC_Extend), ('\u{c62}', '\u{c63}',
+ GC_Extend), ('\u{c81}', '\u{c81}', GC_Extend), ('\u{c82}', '\u{c83}', GC_SpacingMark),
+ ('\u{cbc}', '\u{cbc}', GC_Extend), ('\u{cbe}', '\u{cbe}', GC_SpacingMark), ('\u{cbf}',
+ '\u{cbf}', GC_Extend), ('\u{cc0}', '\u{cc1}', GC_SpacingMark), ('\u{cc2}', '\u{cc2}',
+ GC_Extend), ('\u{cc3}', '\u{cc4}', GC_SpacingMark), ('\u{cc6}', '\u{cc6}', GC_Extend),
+ ('\u{cc7}', '\u{cc8}', GC_SpacingMark), ('\u{cca}', '\u{ccb}', GC_SpacingMark), ('\u{ccc}',
+ '\u{ccd}', GC_Extend), ('\u{cd5}', '\u{cd6}', GC_Extend), ('\u{ce2}', '\u{ce3}', GC_Extend),
+ ('\u{cf3}', '\u{cf3}', GC_SpacingMark), ('\u{d00}', '\u{d01}', GC_Extend), ('\u{d02}',
+ '\u{d03}', GC_SpacingMark), ('\u{d3b}', '\u{d3c}', GC_Extend), ('\u{d3e}', '\u{d3e}',
+ GC_Extend), ('\u{d3f}', '\u{d40}', GC_SpacingMark), ('\u{d41}', '\u{d44}', GC_Extend),
+ ('\u{d46}', '\u{d48}', GC_SpacingMark), ('\u{d4a}', '\u{d4c}', GC_SpacingMark), ('\u{d4d}',
+ '\u{d4d}', GC_Extend), ('\u{d4e}', '\u{d4e}', GC_Prepend), ('\u{d57}', '\u{d57}',
+ GC_Extend), ('\u{d62}', '\u{d63}', GC_Extend), ('\u{d81}', '\u{d81}', GC_Extend),
+ ('\u{d82}', '\u{d83}', GC_SpacingMark), ('\u{dca}', '\u{dca}', GC_Extend), ('\u{dcf}',
+ '\u{dcf}', GC_Extend), ('\u{dd0}', '\u{dd1}', GC_SpacingMark), ('\u{dd2}', '\u{dd4}',
+ GC_Extend), ('\u{dd6}', '\u{dd6}', GC_Extend), ('\u{dd8}', '\u{dde}', GC_SpacingMark),
+ ('\u{ddf}', '\u{ddf}', GC_Extend), ('\u{df2}', '\u{df3}', GC_SpacingMark), ('\u{e31}',
+ '\u{e31}', GC_Extend), ('\u{e33}', '\u{e33}', GC_SpacingMark), ('\u{e34}', '\u{e3a}',
+ GC_Extend), ('\u{e47}', '\u{e4e}', GC_Extend), ('\u{eb1}', '\u{eb1}', GC_Extend),
+ ('\u{eb3}', '\u{eb3}', GC_SpacingMark), ('\u{eb4}', '\u{ebc}', GC_Extend), ('\u{ec8}',
+ '\u{ece}', GC_Extend), ('\u{f18}', '\u{f19}', GC_Extend), ('\u{f35}', '\u{f35}', GC_Extend),
+ ('\u{f37}', '\u{f37}', GC_Extend), ('\u{f39}', '\u{f39}', GC_Extend), ('\u{f3e}', '\u{f3f}',
+ GC_SpacingMark), ('\u{f71}', '\u{f7e}', GC_Extend), ('\u{f7f}', '\u{f7f}', GC_SpacingMark),
+ ('\u{f80}', '\u{f84}', GC_Extend), ('\u{f86}', '\u{f87}', GC_Extend), ('\u{f8d}', '\u{f97}',
+ GC_Extend), ('\u{f99}', '\u{fbc}', GC_Extend), ('\u{fc6}', '\u{fc6}', GC_Extend),
+ ('\u{102d}', '\u{1030}', GC_Extend), ('\u{1031}', '\u{1031}', GC_SpacingMark), ('\u{1032}',
+ '\u{1037}', GC_Extend), ('\u{1039}', '\u{103a}', GC_Extend), ('\u{103b}', '\u{103c}',
+ GC_SpacingMark), ('\u{103d}', '\u{103e}', GC_Extend), ('\u{1056}', '\u{1057}',
+ GC_SpacingMark), ('\u{1058}', '\u{1059}', GC_Extend), ('\u{105e}', '\u{1060}', GC_Extend),
+ ('\u{1071}', '\u{1074}', GC_Extend), ('\u{1082}', '\u{1082}', GC_Extend), ('\u{1084}',
+ '\u{1084}', GC_SpacingMark), ('\u{1085}', '\u{1086}', GC_Extend), ('\u{108d}', '\u{108d}',
+ GC_Extend), ('\u{109d}', '\u{109d}', GC_Extend), ('\u{1100}', '\u{115f}', GC_L),
+ ('\u{1160}', '\u{11a7}', GC_V), ('\u{11a8}', '\u{11ff}', GC_T), ('\u{135d}', '\u{135f}',
+ GC_Extend), ('\u{1712}', '\u{1714}', GC_Extend), ('\u{1715}', '\u{1715}', GC_SpacingMark),
+ ('\u{1732}', '\u{1733}', GC_Extend), ('\u{1734}', '\u{1734}', GC_SpacingMark), ('\u{1752}',
+ '\u{1753}', GC_Extend), ('\u{1772}', '\u{1773}', GC_Extend), ('\u{17b4}', '\u{17b5}',
+ GC_Extend), ('\u{17b6}', '\u{17b6}', GC_SpacingMark), ('\u{17b7}', '\u{17bd}', GC_Extend),
+ ('\u{17be}', '\u{17c5}', GC_SpacingMark), ('\u{17c6}', '\u{17c6}', GC_Extend), ('\u{17c7}',
+ '\u{17c8}', GC_SpacingMark), ('\u{17c9}', '\u{17d3}', GC_Extend), ('\u{17dd}', '\u{17dd}',
+ GC_Extend), ('\u{180b}', '\u{180d}', GC_Extend), ('\u{180e}', '\u{180e}', GC_Control),
+ ('\u{180f}', '\u{180f}', GC_Extend), ('\u{1885}', '\u{1886}', GC_Extend), ('\u{18a9}',
+ '\u{18a9}', GC_Extend), ('\u{1920}', '\u{1922}', GC_Extend), ('\u{1923}', '\u{1926}',
+ GC_SpacingMark), ('\u{1927}', '\u{1928}', GC_Extend), ('\u{1929}', '\u{192b}',
+ GC_SpacingMark), ('\u{1930}', '\u{1931}', GC_SpacingMark), ('\u{1932}', '\u{1932}',
+ GC_Extend), ('\u{1933}', '\u{1938}', GC_SpacingMark), ('\u{1939}', '\u{193b}', GC_Extend),
+ ('\u{1a17}', '\u{1a18}', GC_Extend), ('\u{1a19}', '\u{1a1a}', GC_SpacingMark), ('\u{1a1b}',
+ '\u{1a1b}', GC_Extend), ('\u{1a55}', '\u{1a55}', GC_SpacingMark), ('\u{1a56}', '\u{1a56}',
+ GC_Extend), ('\u{1a57}', '\u{1a57}', GC_SpacingMark), ('\u{1a58}', '\u{1a5e}', GC_Extend),
+ ('\u{1a60}', '\u{1a60}', GC_Extend), ('\u{1a62}', '\u{1a62}', GC_Extend), ('\u{1a65}',
+ '\u{1a6c}', GC_Extend), ('\u{1a6d}', '\u{1a72}', GC_SpacingMark), ('\u{1a73}', '\u{1a7c}',
+ GC_Extend), ('\u{1a7f}', '\u{1a7f}', GC_Extend), ('\u{1ab0}', '\u{1ace}', GC_Extend),
+ ('\u{1b00}', '\u{1b03}', GC_Extend), ('\u{1b04}', '\u{1b04}', GC_SpacingMark), ('\u{1b34}',
+ '\u{1b3a}', GC_Extend), ('\u{1b3b}', '\u{1b3b}', GC_SpacingMark), ('\u{1b3c}', '\u{1b3c}',
+ GC_Extend), ('\u{1b3d}', '\u{1b41}', GC_SpacingMark), ('\u{1b42}', '\u{1b42}', GC_Extend),
+ ('\u{1b43}', '\u{1b44}', GC_SpacingMark), ('\u{1b6b}', '\u{1b73}', GC_Extend), ('\u{1b80}',
+ '\u{1b81}', GC_Extend), ('\u{1b82}', '\u{1b82}', GC_SpacingMark), ('\u{1ba1}', '\u{1ba1}',
+ GC_SpacingMark), ('\u{1ba2}', '\u{1ba5}', GC_Extend), ('\u{1ba6}', '\u{1ba7}',
+ GC_SpacingMark), ('\u{1ba8}', '\u{1ba9}', GC_Extend), ('\u{1baa}', '\u{1baa}',
+ GC_SpacingMark), ('\u{1bab}', '\u{1bad}', GC_Extend), ('\u{1be6}', '\u{1be6}', GC_Extend),
+ ('\u{1be7}', '\u{1be7}', GC_SpacingMark), ('\u{1be8}', '\u{1be9}', GC_Extend), ('\u{1bea}',
+ '\u{1bec}', GC_SpacingMark), ('\u{1bed}', '\u{1bed}', GC_Extend), ('\u{1bee}', '\u{1bee}',
+ GC_SpacingMark), ('\u{1bef}', '\u{1bf1}', GC_Extend), ('\u{1bf2}', '\u{1bf3}',
+ GC_SpacingMark), ('\u{1c24}', '\u{1c2b}', GC_SpacingMark), ('\u{1c2c}', '\u{1c33}',
+ GC_Extend), ('\u{1c34}', '\u{1c35}', GC_SpacingMark), ('\u{1c36}', '\u{1c37}', GC_Extend),
+ ('\u{1cd0}', '\u{1cd2}', GC_Extend), ('\u{1cd4}', '\u{1ce0}', GC_Extend), ('\u{1ce1}',
+ '\u{1ce1}', GC_SpacingMark), ('\u{1ce2}', '\u{1ce8}', GC_Extend), ('\u{1ced}', '\u{1ced}',
+ GC_Extend), ('\u{1cf4}', '\u{1cf4}', GC_Extend), ('\u{1cf7}', '\u{1cf7}', GC_SpacingMark),
+ ('\u{1cf8}', '\u{1cf9}', GC_Extend), ('\u{1dc0}', '\u{1dff}', GC_Extend), ('\u{200b}',
+ '\u{200b}', GC_Control), ('\u{200c}', '\u{200c}', GC_Extend), ('\u{200d}', '\u{200d}',
+ GC_ZWJ), ('\u{200e}', '\u{200f}', GC_Control), ('\u{2028}', '\u{202e}', GC_Control),
+ ('\u{203c}', '\u{203c}', GC_Extended_Pictographic), ('\u{2049}', '\u{2049}',
+ GC_Extended_Pictographic), ('\u{2060}', '\u{206f}', GC_Control), ('\u{20d0}', '\u{20f0}',
+ GC_Extend), ('\u{2122}', '\u{2122}', GC_Extended_Pictographic), ('\u{2139}', '\u{2139}',
+ GC_Extended_Pictographic), ('\u{2194}', '\u{2199}', GC_Extended_Pictographic), ('\u{21a9}',
+ '\u{21aa}', GC_Extended_Pictographic), ('\u{231a}', '\u{231b}', GC_Extended_Pictographic),
+ ('\u{2328}', '\u{2328}', GC_Extended_Pictographic), ('\u{2388}', '\u{2388}',
+ GC_Extended_Pictographic), ('\u{23cf}', '\u{23cf}', GC_Extended_Pictographic), ('\u{23e9}',
+ '\u{23f3}', GC_Extended_Pictographic), ('\u{23f8}', '\u{23fa}', GC_Extended_Pictographic),
+ ('\u{24c2}', '\u{24c2}', GC_Extended_Pictographic), ('\u{25aa}', '\u{25ab}',
+ GC_Extended_Pictographic), ('\u{25b6}', '\u{25b6}', GC_Extended_Pictographic), ('\u{25c0}',
+ '\u{25c0}', GC_Extended_Pictographic), ('\u{25fb}', '\u{25fe}', GC_Extended_Pictographic),
+ ('\u{2600}', '\u{2605}', GC_Extended_Pictographic), ('\u{2607}', '\u{2612}',
+ GC_Extended_Pictographic), ('\u{2614}', '\u{2685}', GC_Extended_Pictographic), ('\u{2690}',
+ '\u{2705}', GC_Extended_Pictographic), ('\u{2708}', '\u{2712}', GC_Extended_Pictographic),
+ ('\u{2714}', '\u{2714}', GC_Extended_Pictographic), ('\u{2716}', '\u{2716}',
+ GC_Extended_Pictographic), ('\u{271d}', '\u{271d}', GC_Extended_Pictographic), ('\u{2721}',
+ '\u{2721}', GC_Extended_Pictographic), ('\u{2728}', '\u{2728}', GC_Extended_Pictographic),
+ ('\u{2733}', '\u{2734}', GC_Extended_Pictographic), ('\u{2744}', '\u{2744}',
+ GC_Extended_Pictographic), ('\u{2747}', '\u{2747}', GC_Extended_Pictographic), ('\u{274c}',
+ '\u{274c}', GC_Extended_Pictographic), ('\u{274e}', '\u{274e}', GC_Extended_Pictographic),
+ ('\u{2753}', '\u{2755}', GC_Extended_Pictographic), ('\u{2757}', '\u{2757}',
+ GC_Extended_Pictographic), ('\u{2763}', '\u{2767}', GC_Extended_Pictographic), ('\u{2795}',
+ '\u{2797}', GC_Extended_Pictographic), ('\u{27a1}', '\u{27a1}', GC_Extended_Pictographic),
+ ('\u{27b0}', '\u{27b0}', GC_Extended_Pictographic), ('\u{27bf}', '\u{27bf}',
+ GC_Extended_Pictographic), ('\u{2934}', '\u{2935}', GC_Extended_Pictographic), ('\u{2b05}',
+ '\u{2b07}', GC_Extended_Pictographic), ('\u{2b1b}', '\u{2b1c}', GC_Extended_Pictographic),
+ ('\u{2b50}', '\u{2b50}', GC_Extended_Pictographic), ('\u{2b55}', '\u{2b55}',
+ GC_Extended_Pictographic), ('\u{2cef}', '\u{2cf1}', GC_Extend), ('\u{2d7f}', '\u{2d7f}',
+ GC_Extend), ('\u{2de0}', '\u{2dff}', GC_Extend), ('\u{302a}', '\u{302f}', GC_Extend),
+ ('\u{3030}', '\u{3030}', GC_Extended_Pictographic), ('\u{303d}', '\u{303d}',
+ GC_Extended_Pictographic), ('\u{3099}', '\u{309a}', GC_Extend), ('\u{3297}', '\u{3297}',
+ GC_Extended_Pictographic), ('\u{3299}', '\u{3299}', GC_Extended_Pictographic), ('\u{a66f}',
+ '\u{a672}', GC_Extend), ('\u{a674}', '\u{a67d}', GC_Extend), ('\u{a69e}', '\u{a69f}',
+ GC_Extend), ('\u{a6f0}', '\u{a6f1}', GC_Extend), ('\u{a802}', '\u{a802}', GC_Extend),
+ ('\u{a806}', '\u{a806}', GC_Extend), ('\u{a80b}', '\u{a80b}', GC_Extend), ('\u{a823}',
+ '\u{a824}', GC_SpacingMark), ('\u{a825}', '\u{a826}', GC_Extend), ('\u{a827}', '\u{a827}',
+ GC_SpacingMark), ('\u{a82c}', '\u{a82c}', GC_Extend), ('\u{a880}', '\u{a881}',
+ GC_SpacingMark), ('\u{a8b4}', '\u{a8c3}', GC_SpacingMark), ('\u{a8c4}', '\u{a8c5}',
+ GC_Extend), ('\u{a8e0}', '\u{a8f1}', GC_Extend), ('\u{a8ff}', '\u{a8ff}', GC_Extend),
+ ('\u{a926}', '\u{a92d}', GC_Extend), ('\u{a947}', '\u{a951}', GC_Extend), ('\u{a952}',
+ '\u{a953}', GC_SpacingMark), ('\u{a960}', '\u{a97c}', GC_L), ('\u{a980}', '\u{a982}',
+ GC_Extend), ('\u{a983}', '\u{a983}', GC_SpacingMark), ('\u{a9b3}', '\u{a9b3}', GC_Extend),
+ ('\u{a9b4}', '\u{a9b5}', GC_SpacingMark), ('\u{a9b6}', '\u{a9b9}', GC_Extend), ('\u{a9ba}',
+ '\u{a9bb}', GC_SpacingMark), ('\u{a9bc}', '\u{a9bd}', GC_Extend), ('\u{a9be}', '\u{a9c0}',
+ GC_SpacingMark), ('\u{a9e5}', '\u{a9e5}', GC_Extend), ('\u{aa29}', '\u{aa2e}', GC_Extend),
+ ('\u{aa2f}', '\u{aa30}', GC_SpacingMark), ('\u{aa31}', '\u{aa32}', GC_Extend), ('\u{aa33}',
+ '\u{aa34}', GC_SpacingMark), ('\u{aa35}', '\u{aa36}', GC_Extend), ('\u{aa43}', '\u{aa43}',
+ GC_Extend), ('\u{aa4c}', '\u{aa4c}', GC_Extend), ('\u{aa4d}', '\u{aa4d}', GC_SpacingMark),
+ ('\u{aa7c}', '\u{aa7c}', GC_Extend), ('\u{aab0}', '\u{aab0}', GC_Extend), ('\u{aab2}',
+ '\u{aab4}', GC_Extend), ('\u{aab7}', '\u{aab8}', GC_Extend), ('\u{aabe}', '\u{aabf}',
+ GC_Extend), ('\u{aac1}', '\u{aac1}', GC_Extend), ('\u{aaeb}', '\u{aaeb}', GC_SpacingMark),
+ ('\u{aaec}', '\u{aaed}', GC_Extend), ('\u{aaee}', '\u{aaef}', GC_SpacingMark), ('\u{aaf5}',
+ '\u{aaf5}', GC_SpacingMark), ('\u{aaf6}', '\u{aaf6}', GC_Extend), ('\u{abe3}', '\u{abe4}',
+ GC_SpacingMark), ('\u{abe5}', '\u{abe5}', GC_Extend), ('\u{abe6}', '\u{abe7}',
+ GC_SpacingMark), ('\u{abe8}', '\u{abe8}', GC_Extend), ('\u{abe9}', '\u{abea}',
+ GC_SpacingMark), ('\u{abec}', '\u{abec}', GC_SpacingMark), ('\u{abed}', '\u{abed}',
+ GC_Extend), ('\u{ac00}', '\u{ac00}', GC_LV), ('\u{ac01}', '\u{ac1b}', GC_LVT), ('\u{ac1c}',
+ '\u{ac1c}', GC_LV), ('\u{ac1d}', '\u{ac37}', GC_LVT), ('\u{ac38}', '\u{ac38}', GC_LV),
+ ('\u{ac39}', '\u{ac53}', GC_LVT), ('\u{ac54}', '\u{ac54}', GC_LV), ('\u{ac55}', '\u{ac6f}',
+ GC_LVT), ('\u{ac70}', '\u{ac70}', GC_LV), ('\u{ac71}', '\u{ac8b}', GC_LVT), ('\u{ac8c}',
+ '\u{ac8c}', GC_LV), ('\u{ac8d}', '\u{aca7}', GC_LVT), ('\u{aca8}', '\u{aca8}', GC_LV),
+ ('\u{aca9}', '\u{acc3}', GC_LVT), ('\u{acc4}', '\u{acc4}', GC_LV), ('\u{acc5}', '\u{acdf}',
+ GC_LVT), ('\u{ace0}', '\u{ace0}', GC_LV), ('\u{ace1}', '\u{acfb}', GC_LVT), ('\u{acfc}',
+ '\u{acfc}', GC_LV), ('\u{acfd}', '\u{ad17}', GC_LVT), ('\u{ad18}', '\u{ad18}', GC_LV),
+ ('\u{ad19}', '\u{ad33}', GC_LVT), ('\u{ad34}', '\u{ad34}', GC_LV), ('\u{ad35}', '\u{ad4f}',
+ GC_LVT), ('\u{ad50}', '\u{ad50}', GC_LV), ('\u{ad51}', '\u{ad6b}', GC_LVT), ('\u{ad6c}',
+ '\u{ad6c}', GC_LV), ('\u{ad6d}', '\u{ad87}', GC_LVT), ('\u{ad88}', '\u{ad88}', GC_LV),
+ ('\u{ad89}', '\u{ada3}', GC_LVT), ('\u{ada4}', '\u{ada4}', GC_LV), ('\u{ada5}', '\u{adbf}',
+ GC_LVT), ('\u{adc0}', '\u{adc0}', GC_LV), ('\u{adc1}', '\u{addb}', GC_LVT), ('\u{addc}',
+ '\u{addc}', GC_LV), ('\u{addd}', '\u{adf7}', GC_LVT), ('\u{adf8}', '\u{adf8}', GC_LV),
+ ('\u{adf9}', '\u{ae13}', GC_LVT), ('\u{ae14}', '\u{ae14}', GC_LV), ('\u{ae15}', '\u{ae2f}',
+ GC_LVT), ('\u{ae30}', '\u{ae30}', GC_LV), ('\u{ae31}', '\u{ae4b}', GC_LVT), ('\u{ae4c}',
+ '\u{ae4c}', GC_LV), ('\u{ae4d}', '\u{ae67}', GC_LVT), ('\u{ae68}', '\u{ae68}', GC_LV),
+ ('\u{ae69}', '\u{ae83}', GC_LVT), ('\u{ae84}', '\u{ae84}', GC_LV), ('\u{ae85}', '\u{ae9f}',
+ GC_LVT), ('\u{aea0}', '\u{aea0}', GC_LV), ('\u{aea1}', '\u{aebb}', GC_LVT), ('\u{aebc}',
+ '\u{aebc}', GC_LV), ('\u{aebd}', '\u{aed7}', GC_LVT), ('\u{aed8}', '\u{aed8}', GC_LV),
+ ('\u{aed9}', '\u{aef3}', GC_LVT), ('\u{aef4}', '\u{aef4}', GC_LV), ('\u{aef5}', '\u{af0f}',
+ GC_LVT), ('\u{af10}', '\u{af10}', GC_LV), ('\u{af11}', '\u{af2b}', GC_LVT), ('\u{af2c}',
+ '\u{af2c}', GC_LV), ('\u{af2d}', '\u{af47}', GC_LVT), ('\u{af48}', '\u{af48}', GC_LV),
+ ('\u{af49}', '\u{af63}', GC_LVT), ('\u{af64}', '\u{af64}', GC_LV), ('\u{af65}', '\u{af7f}',
+ GC_LVT), ('\u{af80}', '\u{af80}', GC_LV), ('\u{af81}', '\u{af9b}', GC_LVT), ('\u{af9c}',
+ '\u{af9c}', GC_LV), ('\u{af9d}', '\u{afb7}', GC_LVT), ('\u{afb8}', '\u{afb8}', GC_LV),
+ ('\u{afb9}', '\u{afd3}', GC_LVT), ('\u{afd4}', '\u{afd4}', GC_LV), ('\u{afd5}', '\u{afef}',
+ GC_LVT), ('\u{aff0}', '\u{aff0}', GC_LV), ('\u{aff1}', '\u{b00b}', GC_LVT), ('\u{b00c}',
+ '\u{b00c}', GC_LV), ('\u{b00d}', '\u{b027}', GC_LVT), ('\u{b028}', '\u{b028}', GC_LV),
+ ('\u{b029}', '\u{b043}', GC_LVT), ('\u{b044}', '\u{b044}', GC_LV), ('\u{b045}', '\u{b05f}',
+ GC_LVT), ('\u{b060}', '\u{b060}', GC_LV), ('\u{b061}', '\u{b07b}', GC_LVT), ('\u{b07c}',
+ '\u{b07c}', GC_LV), ('\u{b07d}', '\u{b097}', GC_LVT), ('\u{b098}', '\u{b098}', GC_LV),
+ ('\u{b099}', '\u{b0b3}', GC_LVT), ('\u{b0b4}', '\u{b0b4}', GC_LV), ('\u{b0b5}', '\u{b0cf}',
+ GC_LVT), ('\u{b0d0}', '\u{b0d0}', GC_LV), ('\u{b0d1}', '\u{b0eb}', GC_LVT), ('\u{b0ec}',
+ '\u{b0ec}', GC_LV), ('\u{b0ed}', '\u{b107}', GC_LVT), ('\u{b108}', '\u{b108}', GC_LV),
+ ('\u{b109}', '\u{b123}', GC_LVT), ('\u{b124}', '\u{b124}', GC_LV), ('\u{b125}', '\u{b13f}',
+ GC_LVT), ('\u{b140}', '\u{b140}', GC_LV), ('\u{b141}', '\u{b15b}', GC_LVT), ('\u{b15c}',
+ '\u{b15c}', GC_LV), ('\u{b15d}', '\u{b177}', GC_LVT), ('\u{b178}', '\u{b178}', GC_LV),
+ ('\u{b179}', '\u{b193}', GC_LVT), ('\u{b194}', '\u{b194}', GC_LV), ('\u{b195}', '\u{b1af}',
+ GC_LVT), ('\u{b1b0}', '\u{b1b0}', GC_LV), ('\u{b1b1}', '\u{b1cb}', GC_LVT), ('\u{b1cc}',
+ '\u{b1cc}', GC_LV), ('\u{b1cd}', '\u{b1e7}', GC_LVT), ('\u{b1e8}', '\u{b1e8}', GC_LV),
+ ('\u{b1e9}', '\u{b203}', GC_LVT), ('\u{b204}', '\u{b204}', GC_LV), ('\u{b205}', '\u{b21f}',
+ GC_LVT), ('\u{b220}', '\u{b220}', GC_LV), ('\u{b221}', '\u{b23b}', GC_LVT), ('\u{b23c}',
+ '\u{b23c}', GC_LV), ('\u{b23d}', '\u{b257}', GC_LVT), ('\u{b258}', '\u{b258}', GC_LV),
+ ('\u{b259}', '\u{b273}', GC_LVT), ('\u{b274}', '\u{b274}', GC_LV), ('\u{b275}', '\u{b28f}',
+ GC_LVT), ('\u{b290}', '\u{b290}', GC_LV), ('\u{b291}', '\u{b2ab}', GC_LVT), ('\u{b2ac}',
+ '\u{b2ac}', GC_LV), ('\u{b2ad}', '\u{b2c7}', GC_LVT), ('\u{b2c8}', '\u{b2c8}', GC_LV),
+ ('\u{b2c9}', '\u{b2e3}', GC_LVT), ('\u{b2e4}', '\u{b2e4}', GC_LV), ('\u{b2e5}', '\u{b2ff}',
+ GC_LVT), ('\u{b300}', '\u{b300}', GC_LV), ('\u{b301}', '\u{b31b}', GC_LVT), ('\u{b31c}',
+ '\u{b31c}', GC_LV), ('\u{b31d}', '\u{b337}', GC_LVT), ('\u{b338}', '\u{b338}', GC_LV),
+ ('\u{b339}', '\u{b353}', GC_LVT), ('\u{b354}', '\u{b354}', GC_LV), ('\u{b355}', '\u{b36f}',
+ GC_LVT), ('\u{b370}', '\u{b370}', GC_LV), ('\u{b371}', '\u{b38b}', GC_LVT), ('\u{b38c}',
+ '\u{b38c}', GC_LV), ('\u{b38d}', '\u{b3a7}', GC_LVT), ('\u{b3a8}', '\u{b3a8}', GC_LV),
+ ('\u{b3a9}', '\u{b3c3}', GC_LVT), ('\u{b3c4}', '\u{b3c4}', GC_LV), ('\u{b3c5}', '\u{b3df}',
+ GC_LVT), ('\u{b3e0}', '\u{b3e0}', GC_LV), ('\u{b3e1}', '\u{b3fb}', GC_LVT), ('\u{b3fc}',
+ '\u{b3fc}', GC_LV), ('\u{b3fd}', '\u{b417}', GC_LVT), ('\u{b418}', '\u{b418}', GC_LV),
+ ('\u{b419}', '\u{b433}', GC_LVT), ('\u{b434}', '\u{b434}', GC_LV), ('\u{b435}', '\u{b44f}',
+ GC_LVT), ('\u{b450}', '\u{b450}', GC_LV), ('\u{b451}', '\u{b46b}', GC_LVT), ('\u{b46c}',
+ '\u{b46c}', GC_LV), ('\u{b46d}', '\u{b487}', GC_LVT), ('\u{b488}', '\u{b488}', GC_LV),
+ ('\u{b489}', '\u{b4a3}', GC_LVT), ('\u{b4a4}', '\u{b4a4}', GC_LV), ('\u{b4a5}', '\u{b4bf}',
+ GC_LVT), ('\u{b4c0}', '\u{b4c0}', GC_LV), ('\u{b4c1}', '\u{b4db}', GC_LVT), ('\u{b4dc}',
+ '\u{b4dc}', GC_LV), ('\u{b4dd}', '\u{b4f7}', GC_LVT), ('\u{b4f8}', '\u{b4f8}', GC_LV),
+ ('\u{b4f9}', '\u{b513}', GC_LVT), ('\u{b514}', '\u{b514}', GC_LV), ('\u{b515}', '\u{b52f}',
+ GC_LVT), ('\u{b530}', '\u{b530}', GC_LV), ('\u{b531}', '\u{b54b}', GC_LVT), ('\u{b54c}',
+ '\u{b54c}', GC_LV), ('\u{b54d}', '\u{b567}', GC_LVT), ('\u{b568}', '\u{b568}', GC_LV),
+ ('\u{b569}', '\u{b583}', GC_LVT), ('\u{b584}', '\u{b584}', GC_LV), ('\u{b585}', '\u{b59f}',
+ GC_LVT), ('\u{b5a0}', '\u{b5a0}', GC_LV), ('\u{b5a1}', '\u{b5bb}', GC_LVT), ('\u{b5bc}',
+ '\u{b5bc}', GC_LV), ('\u{b5bd}', '\u{b5d7}', GC_LVT), ('\u{b5d8}', '\u{b5d8}', GC_LV),
+ ('\u{b5d9}', '\u{b5f3}', GC_LVT), ('\u{b5f4}', '\u{b5f4}', GC_LV), ('\u{b5f5}', '\u{b60f}',
+ GC_LVT), ('\u{b610}', '\u{b610}', GC_LV), ('\u{b611}', '\u{b62b}', GC_LVT), ('\u{b62c}',
+ '\u{b62c}', GC_LV), ('\u{b62d}', '\u{b647}', GC_LVT), ('\u{b648}', '\u{b648}', GC_LV),
+ ('\u{b649}', '\u{b663}', GC_LVT), ('\u{b664}', '\u{b664}', GC_LV), ('\u{b665}', '\u{b67f}',
+ GC_LVT), ('\u{b680}', '\u{b680}', GC_LV), ('\u{b681}', '\u{b69b}', GC_LVT), ('\u{b69c}',
+ '\u{b69c}', GC_LV), ('\u{b69d}', '\u{b6b7}', GC_LVT), ('\u{b6b8}', '\u{b6b8}', GC_LV),
+ ('\u{b6b9}', '\u{b6d3}', GC_LVT), ('\u{b6d4}', '\u{b6d4}', GC_LV), ('\u{b6d5}', '\u{b6ef}',
+ GC_LVT), ('\u{b6f0}', '\u{b6f0}', GC_LV), ('\u{b6f1}', '\u{b70b}', GC_LVT), ('\u{b70c}',
+ '\u{b70c}', GC_LV), ('\u{b70d}', '\u{b727}', GC_LVT), ('\u{b728}', '\u{b728}', GC_LV),
+ ('\u{b729}', '\u{b743}', GC_LVT), ('\u{b744}', '\u{b744}', GC_LV), ('\u{b745}', '\u{b75f}',
+ GC_LVT), ('\u{b760}', '\u{b760}', GC_LV), ('\u{b761}', '\u{b77b}', GC_LVT), ('\u{b77c}',
+ '\u{b77c}', GC_LV), ('\u{b77d}', '\u{b797}', GC_LVT), ('\u{b798}', '\u{b798}', GC_LV),
+ ('\u{b799}', '\u{b7b3}', GC_LVT), ('\u{b7b4}', '\u{b7b4}', GC_LV), ('\u{b7b5}', '\u{b7cf}',
+ GC_LVT), ('\u{b7d0}', '\u{b7d0}', GC_LV), ('\u{b7d1}', '\u{b7eb}', GC_LVT), ('\u{b7ec}',
+ '\u{b7ec}', GC_LV), ('\u{b7ed}', '\u{b807}', GC_LVT), ('\u{b808}', '\u{b808}', GC_LV),
+ ('\u{b809}', '\u{b823}', GC_LVT), ('\u{b824}', '\u{b824}', GC_LV), ('\u{b825}', '\u{b83f}',
+ GC_LVT), ('\u{b840}', '\u{b840}', GC_LV), ('\u{b841}', '\u{b85b}', GC_LVT), ('\u{b85c}',
+ '\u{b85c}', GC_LV), ('\u{b85d}', '\u{b877}', GC_LVT), ('\u{b878}', '\u{b878}', GC_LV),
+ ('\u{b879}', '\u{b893}', GC_LVT), ('\u{b894}', '\u{b894}', GC_LV), ('\u{b895}', '\u{b8af}',
+ GC_LVT), ('\u{b8b0}', '\u{b8b0}', GC_LV), ('\u{b8b1}', '\u{b8cb}', GC_LVT), ('\u{b8cc}',
+ '\u{b8cc}', GC_LV), ('\u{b8cd}', '\u{b8e7}', GC_LVT), ('\u{b8e8}', '\u{b8e8}', GC_LV),
+ ('\u{b8e9}', '\u{b903}', GC_LVT), ('\u{b904}', '\u{b904}', GC_LV), ('\u{b905}', '\u{b91f}',
+ GC_LVT), ('\u{b920}', '\u{b920}', GC_LV), ('\u{b921}', '\u{b93b}', GC_LVT), ('\u{b93c}',
+ '\u{b93c}', GC_LV), ('\u{b93d}', '\u{b957}', GC_LVT), ('\u{b958}', '\u{b958}', GC_LV),
+ ('\u{b959}', '\u{b973}', GC_LVT), ('\u{b974}', '\u{b974}', GC_LV), ('\u{b975}', '\u{b98f}',
+ GC_LVT), ('\u{b990}', '\u{b990}', GC_LV), ('\u{b991}', '\u{b9ab}', GC_LVT), ('\u{b9ac}',
+ '\u{b9ac}', GC_LV), ('\u{b9ad}', '\u{b9c7}', GC_LVT), ('\u{b9c8}', '\u{b9c8}', GC_LV),
+ ('\u{b9c9}', '\u{b9e3}', GC_LVT), ('\u{b9e4}', '\u{b9e4}', GC_LV), ('\u{b9e5}', '\u{b9ff}',
+ GC_LVT), ('\u{ba00}', '\u{ba00}', GC_LV), ('\u{ba01}', '\u{ba1b}', GC_LVT), ('\u{ba1c}',
+ '\u{ba1c}', GC_LV), ('\u{ba1d}', '\u{ba37}', GC_LVT), ('\u{ba38}', '\u{ba38}', GC_LV),
+ ('\u{ba39}', '\u{ba53}', GC_LVT), ('\u{ba54}', '\u{ba54}', GC_LV), ('\u{ba55}', '\u{ba6f}',
+ GC_LVT), ('\u{ba70}', '\u{ba70}', GC_LV), ('\u{ba71}', '\u{ba8b}', GC_LVT), ('\u{ba8c}',
+ '\u{ba8c}', GC_LV), ('\u{ba8d}', '\u{baa7}', GC_LVT), ('\u{baa8}', '\u{baa8}', GC_LV),
+ ('\u{baa9}', '\u{bac3}', GC_LVT), ('\u{bac4}', '\u{bac4}', GC_LV), ('\u{bac5}', '\u{badf}',
+ GC_LVT), ('\u{bae0}', '\u{bae0}', GC_LV), ('\u{bae1}', '\u{bafb}', GC_LVT), ('\u{bafc}',
+ '\u{bafc}', GC_LV), ('\u{bafd}', '\u{bb17}', GC_LVT), ('\u{bb18}', '\u{bb18}', GC_LV),
+ ('\u{bb19}', '\u{bb33}', GC_LVT), ('\u{bb34}', '\u{bb34}', GC_LV), ('\u{bb35}', '\u{bb4f}',
+ GC_LVT), ('\u{bb50}', '\u{bb50}', GC_LV), ('\u{bb51}', '\u{bb6b}', GC_LVT), ('\u{bb6c}',
+ '\u{bb6c}', GC_LV), ('\u{bb6d}', '\u{bb87}', GC_LVT), ('\u{bb88}', '\u{bb88}', GC_LV),
+ ('\u{bb89}', '\u{bba3}', GC_LVT), ('\u{bba4}', '\u{bba4}', GC_LV), ('\u{bba5}', '\u{bbbf}',
+ GC_LVT), ('\u{bbc0}', '\u{bbc0}', GC_LV), ('\u{bbc1}', '\u{bbdb}', GC_LVT), ('\u{bbdc}',
+ '\u{bbdc}', GC_LV), ('\u{bbdd}', '\u{bbf7}', GC_LVT), ('\u{bbf8}', '\u{bbf8}', GC_LV),
+ ('\u{bbf9}', '\u{bc13}', GC_LVT), ('\u{bc14}', '\u{bc14}', GC_LV), ('\u{bc15}', '\u{bc2f}',
+ GC_LVT), ('\u{bc30}', '\u{bc30}', GC_LV), ('\u{bc31}', '\u{bc4b}', GC_LVT), ('\u{bc4c}',
+ '\u{bc4c}', GC_LV), ('\u{bc4d}', '\u{bc67}', GC_LVT), ('\u{bc68}', '\u{bc68}', GC_LV),
+ ('\u{bc69}', '\u{bc83}', GC_LVT), ('\u{bc84}', '\u{bc84}', GC_LV), ('\u{bc85}', '\u{bc9f}',
+ GC_LVT), ('\u{bca0}', '\u{bca0}', GC_LV), ('\u{bca1}', '\u{bcbb}', GC_LVT), ('\u{bcbc}',
+ '\u{bcbc}', GC_LV), ('\u{bcbd}', '\u{bcd7}', GC_LVT), ('\u{bcd8}', '\u{bcd8}', GC_LV),
+ ('\u{bcd9}', '\u{bcf3}', GC_LVT), ('\u{bcf4}', '\u{bcf4}', GC_LV), ('\u{bcf5}', '\u{bd0f}',
+ GC_LVT), ('\u{bd10}', '\u{bd10}', GC_LV), ('\u{bd11}', '\u{bd2b}', GC_LVT), ('\u{bd2c}',
+ '\u{bd2c}', GC_LV), ('\u{bd2d}', '\u{bd47}', GC_LVT), ('\u{bd48}', '\u{bd48}', GC_LV),
+ ('\u{bd49}', '\u{bd63}', GC_LVT), ('\u{bd64}', '\u{bd64}', GC_LV), ('\u{bd65}', '\u{bd7f}',
+ GC_LVT), ('\u{bd80}', '\u{bd80}', GC_LV), ('\u{bd81}', '\u{bd9b}', GC_LVT), ('\u{bd9c}',
+ '\u{bd9c}', GC_LV), ('\u{bd9d}', '\u{bdb7}', GC_LVT), ('\u{bdb8}', '\u{bdb8}', GC_LV),
+ ('\u{bdb9}', '\u{bdd3}', GC_LVT), ('\u{bdd4}', '\u{bdd4}', GC_LV), ('\u{bdd5}', '\u{bdef}',
+ GC_LVT), ('\u{bdf0}', '\u{bdf0}', GC_LV), ('\u{bdf1}', '\u{be0b}', GC_LVT), ('\u{be0c}',
+ '\u{be0c}', GC_LV), ('\u{be0d}', '\u{be27}', GC_LVT), ('\u{be28}', '\u{be28}', GC_LV),
+ ('\u{be29}', '\u{be43}', GC_LVT), ('\u{be44}', '\u{be44}', GC_LV), ('\u{be45}', '\u{be5f}',
+ GC_LVT), ('\u{be60}', '\u{be60}', GC_LV), ('\u{be61}', '\u{be7b}', GC_LVT), ('\u{be7c}',
+ '\u{be7c}', GC_LV), ('\u{be7d}', '\u{be97}', GC_LVT), ('\u{be98}', '\u{be98}', GC_LV),
+ ('\u{be99}', '\u{beb3}', GC_LVT), ('\u{beb4}', '\u{beb4}', GC_LV), ('\u{beb5}', '\u{becf}',
+ GC_LVT), ('\u{bed0}', '\u{bed0}', GC_LV), ('\u{bed1}', '\u{beeb}', GC_LVT), ('\u{beec}',
+ '\u{beec}', GC_LV), ('\u{beed}', '\u{bf07}', GC_LVT), ('\u{bf08}', '\u{bf08}', GC_LV),
+ ('\u{bf09}', '\u{bf23}', GC_LVT), ('\u{bf24}', '\u{bf24}', GC_LV), ('\u{bf25}', '\u{bf3f}',
+ GC_LVT), ('\u{bf40}', '\u{bf40}', GC_LV), ('\u{bf41}', '\u{bf5b}', GC_LVT), ('\u{bf5c}',
+ '\u{bf5c}', GC_LV), ('\u{bf5d}', '\u{bf77}', GC_LVT), ('\u{bf78}', '\u{bf78}', GC_LV),
+ ('\u{bf79}', '\u{bf93}', GC_LVT), ('\u{bf94}', '\u{bf94}', GC_LV), ('\u{bf95}', '\u{bfaf}',
+ GC_LVT), ('\u{bfb0}', '\u{bfb0}', GC_LV), ('\u{bfb1}', '\u{bfcb}', GC_LVT), ('\u{bfcc}',
+ '\u{bfcc}', GC_LV), ('\u{bfcd}', '\u{bfe7}', GC_LVT), ('\u{bfe8}', '\u{bfe8}', GC_LV),
+ ('\u{bfe9}', '\u{c003}', GC_LVT), ('\u{c004}', '\u{c004}', GC_LV), ('\u{c005}', '\u{c01f}',
+ GC_LVT), ('\u{c020}', '\u{c020}', GC_LV), ('\u{c021}', '\u{c03b}', GC_LVT), ('\u{c03c}',
+ '\u{c03c}', GC_LV), ('\u{c03d}', '\u{c057}', GC_LVT), ('\u{c058}', '\u{c058}', GC_LV),
+ ('\u{c059}', '\u{c073}', GC_LVT), ('\u{c074}', '\u{c074}', GC_LV), ('\u{c075}', '\u{c08f}',
+ GC_LVT), ('\u{c090}', '\u{c090}', GC_LV), ('\u{c091}', '\u{c0ab}', GC_LVT), ('\u{c0ac}',
+ '\u{c0ac}', GC_LV), ('\u{c0ad}', '\u{c0c7}', GC_LVT), ('\u{c0c8}', '\u{c0c8}', GC_LV),
+ ('\u{c0c9}', '\u{c0e3}', GC_LVT), ('\u{c0e4}', '\u{c0e4}', GC_LV), ('\u{c0e5}', '\u{c0ff}',
+ GC_LVT), ('\u{c100}', '\u{c100}', GC_LV), ('\u{c101}', '\u{c11b}', GC_LVT), ('\u{c11c}',
+ '\u{c11c}', GC_LV), ('\u{c11d}', '\u{c137}', GC_LVT), ('\u{c138}', '\u{c138}', GC_LV),
+ ('\u{c139}', '\u{c153}', GC_LVT), ('\u{c154}', '\u{c154}', GC_LV), ('\u{c155}', '\u{c16f}',
+ GC_LVT), ('\u{c170}', '\u{c170}', GC_LV), ('\u{c171}', '\u{c18b}', GC_LVT), ('\u{c18c}',
+ '\u{c18c}', GC_LV), ('\u{c18d}', '\u{c1a7}', GC_LVT), ('\u{c1a8}', '\u{c1a8}', GC_LV),
+ ('\u{c1a9}', '\u{c1c3}', GC_LVT), ('\u{c1c4}', '\u{c1c4}', GC_LV), ('\u{c1c5}', '\u{c1df}',
+ GC_LVT), ('\u{c1e0}', '\u{c1e0}', GC_LV), ('\u{c1e1}', '\u{c1fb}', GC_LVT), ('\u{c1fc}',
+ '\u{c1fc}', GC_LV), ('\u{c1fd}', '\u{c217}', GC_LVT), ('\u{c218}', '\u{c218}', GC_LV),
+ ('\u{c219}', '\u{c233}', GC_LVT), ('\u{c234}', '\u{c234}', GC_LV), ('\u{c235}', '\u{c24f}',
+ GC_LVT), ('\u{c250}', '\u{c250}', GC_LV), ('\u{c251}', '\u{c26b}', GC_LVT), ('\u{c26c}',
+ '\u{c26c}', GC_LV), ('\u{c26d}', '\u{c287}', GC_LVT), ('\u{c288}', '\u{c288}', GC_LV),
+ ('\u{c289}', '\u{c2a3}', GC_LVT), ('\u{c2a4}', '\u{c2a4}', GC_LV), ('\u{c2a5}', '\u{c2bf}',
+ GC_LVT), ('\u{c2c0}', '\u{c2c0}', GC_LV), ('\u{c2c1}', '\u{c2db}', GC_LVT), ('\u{c2dc}',
+ '\u{c2dc}', GC_LV), ('\u{c2dd}', '\u{c2f7}', GC_LVT), ('\u{c2f8}', '\u{c2f8}', GC_LV),
+ ('\u{c2f9}', '\u{c313}', GC_LVT), ('\u{c314}', '\u{c314}', GC_LV), ('\u{c315}', '\u{c32f}',
+ GC_LVT), ('\u{c330}', '\u{c330}', GC_LV), ('\u{c331}', '\u{c34b}', GC_LVT), ('\u{c34c}',
+ '\u{c34c}', GC_LV), ('\u{c34d}', '\u{c367}', GC_LVT), ('\u{c368}', '\u{c368}', GC_LV),
+ ('\u{c369}', '\u{c383}', GC_LVT), ('\u{c384}', '\u{c384}', GC_LV), ('\u{c385}', '\u{c39f}',
+ GC_LVT), ('\u{c3a0}', '\u{c3a0}', GC_LV), ('\u{c3a1}', '\u{c3bb}', GC_LVT), ('\u{c3bc}',
+ '\u{c3bc}', GC_LV), ('\u{c3bd}', '\u{c3d7}', GC_LVT), ('\u{c3d8}', '\u{c3d8}', GC_LV),
+ ('\u{c3d9}', '\u{c3f3}', GC_LVT), ('\u{c3f4}', '\u{c3f4}', GC_LV), ('\u{c3f5}', '\u{c40f}',
+ GC_LVT), ('\u{c410}', '\u{c410}', GC_LV), ('\u{c411}', '\u{c42b}', GC_LVT), ('\u{c42c}',
+ '\u{c42c}', GC_LV), ('\u{c42d}', '\u{c447}', GC_LVT), ('\u{c448}', '\u{c448}', GC_LV),
+ ('\u{c449}', '\u{c463}', GC_LVT), ('\u{c464}', '\u{c464}', GC_LV), ('\u{c465}', '\u{c47f}',
+ GC_LVT), ('\u{c480}', '\u{c480}', GC_LV), ('\u{c481}', '\u{c49b}', GC_LVT), ('\u{c49c}',
+ '\u{c49c}', GC_LV), ('\u{c49d}', '\u{c4b7}', GC_LVT), ('\u{c4b8}', '\u{c4b8}', GC_LV),
+ ('\u{c4b9}', '\u{c4d3}', GC_LVT), ('\u{c4d4}', '\u{c4d4}', GC_LV), ('\u{c4d5}', '\u{c4ef}',
+ GC_LVT), ('\u{c4f0}', '\u{c4f0}', GC_LV), ('\u{c4f1}', '\u{c50b}', GC_LVT), ('\u{c50c}',
+ '\u{c50c}', GC_LV), ('\u{c50d}', '\u{c527}', GC_LVT), ('\u{c528}', '\u{c528}', GC_LV),
+ ('\u{c529}', '\u{c543}', GC_LVT), ('\u{c544}', '\u{c544}', GC_LV), ('\u{c545}', '\u{c55f}',
+ GC_LVT), ('\u{c560}', '\u{c560}', GC_LV), ('\u{c561}', '\u{c57b}', GC_LVT), ('\u{c57c}',
+ '\u{c57c}', GC_LV), ('\u{c57d}', '\u{c597}', GC_LVT), ('\u{c598}', '\u{c598}', GC_LV),
+ ('\u{c599}', '\u{c5b3}', GC_LVT), ('\u{c5b4}', '\u{c5b4}', GC_LV), ('\u{c5b5}', '\u{c5cf}',
+ GC_LVT), ('\u{c5d0}', '\u{c5d0}', GC_LV), ('\u{c5d1}', '\u{c5eb}', GC_LVT), ('\u{c5ec}',
+ '\u{c5ec}', GC_LV), ('\u{c5ed}', '\u{c607}', GC_LVT), ('\u{c608}', '\u{c608}', GC_LV),
+ ('\u{c609}', '\u{c623}', GC_LVT), ('\u{c624}', '\u{c624}', GC_LV), ('\u{c625}', '\u{c63f}',
+ GC_LVT), ('\u{c640}', '\u{c640}', GC_LV), ('\u{c641}', '\u{c65b}', GC_LVT), ('\u{c65c}',
+ '\u{c65c}', GC_LV), ('\u{c65d}', '\u{c677}', GC_LVT), ('\u{c678}', '\u{c678}', GC_LV),
+ ('\u{c679}', '\u{c693}', GC_LVT), ('\u{c694}', '\u{c694}', GC_LV), ('\u{c695}', '\u{c6af}',
+ GC_LVT), ('\u{c6b0}', '\u{c6b0}', GC_LV), ('\u{c6b1}', '\u{c6cb}', GC_LVT), ('\u{c6cc}',
+ '\u{c6cc}', GC_LV), ('\u{c6cd}', '\u{c6e7}', GC_LVT), ('\u{c6e8}', '\u{c6e8}', GC_LV),
+ ('\u{c6e9}', '\u{c703}', GC_LVT), ('\u{c704}', '\u{c704}', GC_LV), ('\u{c705}', '\u{c71f}',
+ GC_LVT), ('\u{c720}', '\u{c720}', GC_LV), ('\u{c721}', '\u{c73b}', GC_LVT), ('\u{c73c}',
+ '\u{c73c}', GC_LV), ('\u{c73d}', '\u{c757}', GC_LVT), ('\u{c758}', '\u{c758}', GC_LV),
+ ('\u{c759}', '\u{c773}', GC_LVT), ('\u{c774}', '\u{c774}', GC_LV), ('\u{c775}', '\u{c78f}',
+ GC_LVT), ('\u{c790}', '\u{c790}', GC_LV), ('\u{c791}', '\u{c7ab}', GC_LVT), ('\u{c7ac}',
+ '\u{c7ac}', GC_LV), ('\u{c7ad}', '\u{c7c7}', GC_LVT), ('\u{c7c8}', '\u{c7c8}', GC_LV),
+ ('\u{c7c9}', '\u{c7e3}', GC_LVT), ('\u{c7e4}', '\u{c7e4}', GC_LV), ('\u{c7e5}', '\u{c7ff}',
+ GC_LVT), ('\u{c800}', '\u{c800}', GC_LV), ('\u{c801}', '\u{c81b}', GC_LVT), ('\u{c81c}',
+ '\u{c81c}', GC_LV), ('\u{c81d}', '\u{c837}', GC_LVT), ('\u{c838}', '\u{c838}', GC_LV),
+ ('\u{c839}', '\u{c853}', GC_LVT), ('\u{c854}', '\u{c854}', GC_LV), ('\u{c855}', '\u{c86f}',
+ GC_LVT), ('\u{c870}', '\u{c870}', GC_LV), ('\u{c871}', '\u{c88b}', GC_LVT), ('\u{c88c}',
+ '\u{c88c}', GC_LV), ('\u{c88d}', '\u{c8a7}', GC_LVT), ('\u{c8a8}', '\u{c8a8}', GC_LV),
+ ('\u{c8a9}', '\u{c8c3}', GC_LVT), ('\u{c8c4}', '\u{c8c4}', GC_LV), ('\u{c8c5}', '\u{c8df}',
+ GC_LVT), ('\u{c8e0}', '\u{c8e0}', GC_LV), ('\u{c8e1}', '\u{c8fb}', GC_LVT), ('\u{c8fc}',
+ '\u{c8fc}', GC_LV), ('\u{c8fd}', '\u{c917}', GC_LVT), ('\u{c918}', '\u{c918}', GC_LV),
+ ('\u{c919}', '\u{c933}', GC_LVT), ('\u{c934}', '\u{c934}', GC_LV), ('\u{c935}', '\u{c94f}',
+ GC_LVT), ('\u{c950}', '\u{c950}', GC_LV), ('\u{c951}', '\u{c96b}', GC_LVT), ('\u{c96c}',
+ '\u{c96c}', GC_LV), ('\u{c96d}', '\u{c987}', GC_LVT), ('\u{c988}', '\u{c988}', GC_LV),
+ ('\u{c989}', '\u{c9a3}', GC_LVT), ('\u{c9a4}', '\u{c9a4}', GC_LV), ('\u{c9a5}', '\u{c9bf}',
+ GC_LVT), ('\u{c9c0}', '\u{c9c0}', GC_LV), ('\u{c9c1}', '\u{c9db}', GC_LVT), ('\u{c9dc}',
+ '\u{c9dc}', GC_LV), ('\u{c9dd}', '\u{c9f7}', GC_LVT), ('\u{c9f8}', '\u{c9f8}', GC_LV),
+ ('\u{c9f9}', '\u{ca13}', GC_LVT), ('\u{ca14}', '\u{ca14}', GC_LV), ('\u{ca15}', '\u{ca2f}',
+ GC_LVT), ('\u{ca30}', '\u{ca30}', GC_LV), ('\u{ca31}', '\u{ca4b}', GC_LVT), ('\u{ca4c}',
+ '\u{ca4c}', GC_LV), ('\u{ca4d}', '\u{ca67}', GC_LVT), ('\u{ca68}', '\u{ca68}', GC_LV),
+ ('\u{ca69}', '\u{ca83}', GC_LVT), ('\u{ca84}', '\u{ca84}', GC_LV), ('\u{ca85}', '\u{ca9f}',
+ GC_LVT), ('\u{caa0}', '\u{caa0}', GC_LV), ('\u{caa1}', '\u{cabb}', GC_LVT), ('\u{cabc}',
+ '\u{cabc}', GC_LV), ('\u{cabd}', '\u{cad7}', GC_LVT), ('\u{cad8}', '\u{cad8}', GC_LV),
+ ('\u{cad9}', '\u{caf3}', GC_LVT), ('\u{caf4}', '\u{caf4}', GC_LV), ('\u{caf5}', '\u{cb0f}',
+ GC_LVT), ('\u{cb10}', '\u{cb10}', GC_LV), ('\u{cb11}', '\u{cb2b}', GC_LVT), ('\u{cb2c}',
+ '\u{cb2c}', GC_LV), ('\u{cb2d}', '\u{cb47}', GC_LVT), ('\u{cb48}', '\u{cb48}', GC_LV),
+ ('\u{cb49}', '\u{cb63}', GC_LVT), ('\u{cb64}', '\u{cb64}', GC_LV), ('\u{cb65}', '\u{cb7f}',
+ GC_LVT), ('\u{cb80}', '\u{cb80}', GC_LV), ('\u{cb81}', '\u{cb9b}', GC_LVT), ('\u{cb9c}',
+ '\u{cb9c}', GC_LV), ('\u{cb9d}', '\u{cbb7}', GC_LVT), ('\u{cbb8}', '\u{cbb8}', GC_LV),
+ ('\u{cbb9}', '\u{cbd3}', GC_LVT), ('\u{cbd4}', '\u{cbd4}', GC_LV), ('\u{cbd5}', '\u{cbef}',
+ GC_LVT), ('\u{cbf0}', '\u{cbf0}', GC_LV), ('\u{cbf1}', '\u{cc0b}', GC_LVT), ('\u{cc0c}',
+ '\u{cc0c}', GC_LV), ('\u{cc0d}', '\u{cc27}', GC_LVT), ('\u{cc28}', '\u{cc28}', GC_LV),
+ ('\u{cc29}', '\u{cc43}', GC_LVT), ('\u{cc44}', '\u{cc44}', GC_LV), ('\u{cc45}', '\u{cc5f}',
+ GC_LVT), ('\u{cc60}', '\u{cc60}', GC_LV), ('\u{cc61}', '\u{cc7b}', GC_LVT), ('\u{cc7c}',
+ '\u{cc7c}', GC_LV), ('\u{cc7d}', '\u{cc97}', GC_LVT), ('\u{cc98}', '\u{cc98}', GC_LV),
+ ('\u{cc99}', '\u{ccb3}', GC_LVT), ('\u{ccb4}', '\u{ccb4}', GC_LV), ('\u{ccb5}', '\u{cccf}',
+ GC_LVT), ('\u{ccd0}', '\u{ccd0}', GC_LV), ('\u{ccd1}', '\u{cceb}', GC_LVT), ('\u{ccec}',
+ '\u{ccec}', GC_LV), ('\u{cced}', '\u{cd07}', GC_LVT), ('\u{cd08}', '\u{cd08}', GC_LV),
+ ('\u{cd09}', '\u{cd23}', GC_LVT), ('\u{cd24}', '\u{cd24}', GC_LV), ('\u{cd25}', '\u{cd3f}',
+ GC_LVT), ('\u{cd40}', '\u{cd40}', GC_LV), ('\u{cd41}', '\u{cd5b}', GC_LVT), ('\u{cd5c}',
+ '\u{cd5c}', GC_LV), ('\u{cd5d}', '\u{cd77}', GC_LVT), ('\u{cd78}', '\u{cd78}', GC_LV),
+ ('\u{cd79}', '\u{cd93}', GC_LVT), ('\u{cd94}', '\u{cd94}', GC_LV), ('\u{cd95}', '\u{cdaf}',
+ GC_LVT), ('\u{cdb0}', '\u{cdb0}', GC_LV), ('\u{cdb1}', '\u{cdcb}', GC_LVT), ('\u{cdcc}',
+ '\u{cdcc}', GC_LV), ('\u{cdcd}', '\u{cde7}', GC_LVT), ('\u{cde8}', '\u{cde8}', GC_LV),
+ ('\u{cde9}', '\u{ce03}', GC_LVT), ('\u{ce04}', '\u{ce04}', GC_LV), ('\u{ce05}', '\u{ce1f}',
+ GC_LVT), ('\u{ce20}', '\u{ce20}', GC_LV), ('\u{ce21}', '\u{ce3b}', GC_LVT), ('\u{ce3c}',
+ '\u{ce3c}', GC_LV), ('\u{ce3d}', '\u{ce57}', GC_LVT), ('\u{ce58}', '\u{ce58}', GC_LV),
+ ('\u{ce59}', '\u{ce73}', GC_LVT), ('\u{ce74}', '\u{ce74}', GC_LV), ('\u{ce75}', '\u{ce8f}',
+ GC_LVT), ('\u{ce90}', '\u{ce90}', GC_LV), ('\u{ce91}', '\u{ceab}', GC_LVT), ('\u{ceac}',
+ '\u{ceac}', GC_LV), ('\u{cead}', '\u{cec7}', GC_LVT), ('\u{cec8}', '\u{cec8}', GC_LV),
+ ('\u{cec9}', '\u{cee3}', GC_LVT), ('\u{cee4}', '\u{cee4}', GC_LV), ('\u{cee5}', '\u{ceff}',
+ GC_LVT), ('\u{cf00}', '\u{cf00}', GC_LV), ('\u{cf01}', '\u{cf1b}', GC_LVT), ('\u{cf1c}',
+ '\u{cf1c}', GC_LV), ('\u{cf1d}', '\u{cf37}', GC_LVT), ('\u{cf38}', '\u{cf38}', GC_LV),
+ ('\u{cf39}', '\u{cf53}', GC_LVT), ('\u{cf54}', '\u{cf54}', GC_LV), ('\u{cf55}', '\u{cf6f}',
+ GC_LVT), ('\u{cf70}', '\u{cf70}', GC_LV), ('\u{cf71}', '\u{cf8b}', GC_LVT), ('\u{cf8c}',
+ '\u{cf8c}', GC_LV), ('\u{cf8d}', '\u{cfa7}', GC_LVT), ('\u{cfa8}', '\u{cfa8}', GC_LV),
+ ('\u{cfa9}', '\u{cfc3}', GC_LVT), ('\u{cfc4}', '\u{cfc4}', GC_LV), ('\u{cfc5}', '\u{cfdf}',
+ GC_LVT), ('\u{cfe0}', '\u{cfe0}', GC_LV), ('\u{cfe1}', '\u{cffb}', GC_LVT), ('\u{cffc}',
+ '\u{cffc}', GC_LV), ('\u{cffd}', '\u{d017}', GC_LVT), ('\u{d018}', '\u{d018}', GC_LV),
+ ('\u{d019}', '\u{d033}', GC_LVT), ('\u{d034}', '\u{d034}', GC_LV), ('\u{d035}', '\u{d04f}',
+ GC_LVT), ('\u{d050}', '\u{d050}', GC_LV), ('\u{d051}', '\u{d06b}', GC_LVT), ('\u{d06c}',
+ '\u{d06c}', GC_LV), ('\u{d06d}', '\u{d087}', GC_LVT), ('\u{d088}', '\u{d088}', GC_LV),
+ ('\u{d089}', '\u{d0a3}', GC_LVT), ('\u{d0a4}', '\u{d0a4}', GC_LV), ('\u{d0a5}', '\u{d0bf}',
+ GC_LVT), ('\u{d0c0}', '\u{d0c0}', GC_LV), ('\u{d0c1}', '\u{d0db}', GC_LVT), ('\u{d0dc}',
+ '\u{d0dc}', GC_LV), ('\u{d0dd}', '\u{d0f7}', GC_LVT), ('\u{d0f8}', '\u{d0f8}', GC_LV),
+ ('\u{d0f9}', '\u{d113}', GC_LVT), ('\u{d114}', '\u{d114}', GC_LV), ('\u{d115}', '\u{d12f}',
+ GC_LVT), ('\u{d130}', '\u{d130}', GC_LV), ('\u{d131}', '\u{d14b}', GC_LVT), ('\u{d14c}',
+ '\u{d14c}', GC_LV), ('\u{d14d}', '\u{d167}', GC_LVT), ('\u{d168}', '\u{d168}', GC_LV),
+ ('\u{d169}', '\u{d183}', GC_LVT), ('\u{d184}', '\u{d184}', GC_LV), ('\u{d185}', '\u{d19f}',
+ GC_LVT), ('\u{d1a0}', '\u{d1a0}', GC_LV), ('\u{d1a1}', '\u{d1bb}', GC_LVT), ('\u{d1bc}',
+ '\u{d1bc}', GC_LV), ('\u{d1bd}', '\u{d1d7}', GC_LVT), ('\u{d1d8}', '\u{d1d8}', GC_LV),
+ ('\u{d1d9}', '\u{d1f3}', GC_LVT), ('\u{d1f4}', '\u{d1f4}', GC_LV), ('\u{d1f5}', '\u{d20f}',
+ GC_LVT), ('\u{d210}', '\u{d210}', GC_LV), ('\u{d211}', '\u{d22b}', GC_LVT), ('\u{d22c}',
+ '\u{d22c}', GC_LV), ('\u{d22d}', '\u{d247}', GC_LVT), ('\u{d248}', '\u{d248}', GC_LV),
+ ('\u{d249}', '\u{d263}', GC_LVT), ('\u{d264}', '\u{d264}', GC_LV), ('\u{d265}', '\u{d27f}',
+ GC_LVT), ('\u{d280}', '\u{d280}', GC_LV), ('\u{d281}', '\u{d29b}', GC_LVT), ('\u{d29c}',
+ '\u{d29c}', GC_LV), ('\u{d29d}', '\u{d2b7}', GC_LVT), ('\u{d2b8}', '\u{d2b8}', GC_LV),
+ ('\u{d2b9}', '\u{d2d3}', GC_LVT), ('\u{d2d4}', '\u{d2d4}', GC_LV), ('\u{d2d5}', '\u{d2ef}',
+ GC_LVT), ('\u{d2f0}', '\u{d2f0}', GC_LV), ('\u{d2f1}', '\u{d30b}', GC_LVT), ('\u{d30c}',
+ '\u{d30c}', GC_LV), ('\u{d30d}', '\u{d327}', GC_LVT), ('\u{d328}', '\u{d328}', GC_LV),
+ ('\u{d329}', '\u{d343}', GC_LVT), ('\u{d344}', '\u{d344}', GC_LV), ('\u{d345}', '\u{d35f}',
+ GC_LVT), ('\u{d360}', '\u{d360}', GC_LV), ('\u{d361}', '\u{d37b}', GC_LVT), ('\u{d37c}',
+ '\u{d37c}', GC_LV), ('\u{d37d}', '\u{d397}', GC_LVT), ('\u{d398}', '\u{d398}', GC_LV),
+ ('\u{d399}', '\u{d3b3}', GC_LVT), ('\u{d3b4}', '\u{d3b4}', GC_LV), ('\u{d3b5}', '\u{d3cf}',
+ GC_LVT), ('\u{d3d0}', '\u{d3d0}', GC_LV), ('\u{d3d1}', '\u{d3eb}', GC_LVT), ('\u{d3ec}',
+ '\u{d3ec}', GC_LV), ('\u{d3ed}', '\u{d407}', GC_LVT), ('\u{d408}', '\u{d408}', GC_LV),
+ ('\u{d409}', '\u{d423}', GC_LVT), ('\u{d424}', '\u{d424}', GC_LV), ('\u{d425}', '\u{d43f}',
+ GC_LVT), ('\u{d440}', '\u{d440}', GC_LV), ('\u{d441}', '\u{d45b}', GC_LVT), ('\u{d45c}',
+ '\u{d45c}', GC_LV), ('\u{d45d}', '\u{d477}', GC_LVT), ('\u{d478}', '\u{d478}', GC_LV),
+ ('\u{d479}', '\u{d493}', GC_LVT), ('\u{d494}', '\u{d494}', GC_LV), ('\u{d495}', '\u{d4af}',
+ GC_LVT), ('\u{d4b0}', '\u{d4b0}', GC_LV), ('\u{d4b1}', '\u{d4cb}', GC_LVT), ('\u{d4cc}',
+ '\u{d4cc}', GC_LV), ('\u{d4cd}', '\u{d4e7}', GC_LVT), ('\u{d4e8}', '\u{d4e8}', GC_LV),
+ ('\u{d4e9}', '\u{d503}', GC_LVT), ('\u{d504}', '\u{d504}', GC_LV), ('\u{d505}', '\u{d51f}',
+ GC_LVT), ('\u{d520}', '\u{d520}', GC_LV), ('\u{d521}', '\u{d53b}', GC_LVT), ('\u{d53c}',
+ '\u{d53c}', GC_LV), ('\u{d53d}', '\u{d557}', GC_LVT), ('\u{d558}', '\u{d558}', GC_LV),
+ ('\u{d559}', '\u{d573}', GC_LVT), ('\u{d574}', '\u{d574}', GC_LV), ('\u{d575}', '\u{d58f}',
+ GC_LVT), ('\u{d590}', '\u{d590}', GC_LV), ('\u{d591}', '\u{d5ab}', GC_LVT), ('\u{d5ac}',
+ '\u{d5ac}', GC_LV), ('\u{d5ad}', '\u{d5c7}', GC_LVT), ('\u{d5c8}', '\u{d5c8}', GC_LV),
+ ('\u{d5c9}', '\u{d5e3}', GC_LVT), ('\u{d5e4}', '\u{d5e4}', GC_LV), ('\u{d5e5}', '\u{d5ff}',
+ GC_LVT), ('\u{d600}', '\u{d600}', GC_LV), ('\u{d601}', '\u{d61b}', GC_LVT), ('\u{d61c}',
+ '\u{d61c}', GC_LV), ('\u{d61d}', '\u{d637}', GC_LVT), ('\u{d638}', '\u{d638}', GC_LV),
+ ('\u{d639}', '\u{d653}', GC_LVT), ('\u{d654}', '\u{d654}', GC_LV), ('\u{d655}', '\u{d66f}',
+ GC_LVT), ('\u{d670}', '\u{d670}', GC_LV), ('\u{d671}', '\u{d68b}', GC_LVT), ('\u{d68c}',
+ '\u{d68c}', GC_LV), ('\u{d68d}', '\u{d6a7}', GC_LVT), ('\u{d6a8}', '\u{d6a8}', GC_LV),
+ ('\u{d6a9}', '\u{d6c3}', GC_LVT), ('\u{d6c4}', '\u{d6c4}', GC_LV), ('\u{d6c5}', '\u{d6df}',
+ GC_LVT), ('\u{d6e0}', '\u{d6e0}', GC_LV), ('\u{d6e1}', '\u{d6fb}', GC_LVT), ('\u{d6fc}',
+ '\u{d6fc}', GC_LV), ('\u{d6fd}', '\u{d717}', GC_LVT), ('\u{d718}', '\u{d718}', GC_LV),
+ ('\u{d719}', '\u{d733}', GC_LVT), ('\u{d734}', '\u{d734}', GC_LV), ('\u{d735}', '\u{d74f}',
+ GC_LVT), ('\u{d750}', '\u{d750}', GC_LV), ('\u{d751}', '\u{d76b}', GC_LVT), ('\u{d76c}',
+ '\u{d76c}', GC_LV), ('\u{d76d}', '\u{d787}', GC_LVT), ('\u{d788}', '\u{d788}', GC_LV),
+ ('\u{d789}', '\u{d7a3}', GC_LVT), ('\u{d7b0}', '\u{d7c6}', GC_V), ('\u{d7cb}', '\u{d7fb}',
+ GC_T), ('\u{fb1e}', '\u{fb1e}', GC_Extend), ('\u{fe00}', '\u{fe0f}', GC_Extend),
+ ('\u{fe20}', '\u{fe2f}', GC_Extend), ('\u{feff}', '\u{feff}', GC_Control), ('\u{ff9e}',
+ '\u{ff9f}', GC_Extend), ('\u{fff0}', '\u{fffb}', GC_Control), ('\u{101fd}', '\u{101fd}',
+ GC_Extend), ('\u{102e0}', '\u{102e0}', GC_Extend), ('\u{10376}', '\u{1037a}', GC_Extend),
+ ('\u{10a01}', '\u{10a03}', GC_Extend), ('\u{10a05}', '\u{10a06}', GC_Extend), ('\u{10a0c}',
+ '\u{10a0f}', GC_Extend), ('\u{10a38}', '\u{10a3a}', GC_Extend), ('\u{10a3f}', '\u{10a3f}',
+ GC_Extend), ('\u{10ae5}', '\u{10ae6}', GC_Extend), ('\u{10d24}', '\u{10d27}', GC_Extend),
+ ('\u{10eab}', '\u{10eac}', GC_Extend), ('\u{10efd}', '\u{10eff}', GC_Extend), ('\u{10f46}',
+ '\u{10f50}', GC_Extend), ('\u{10f82}', '\u{10f85}', GC_Extend), ('\u{11000}', '\u{11000}',
+ GC_SpacingMark), ('\u{11001}', '\u{11001}', GC_Extend), ('\u{11002}', '\u{11002}',
+ GC_SpacingMark), ('\u{11038}', '\u{11046}', GC_Extend), ('\u{11070}', '\u{11070}',
+ GC_Extend), ('\u{11073}', '\u{11074}', GC_Extend), ('\u{1107f}', '\u{11081}', GC_Extend),
+ ('\u{11082}', '\u{11082}', GC_SpacingMark), ('\u{110b0}', '\u{110b2}', GC_SpacingMark),
+ ('\u{110b3}', '\u{110b6}', GC_Extend), ('\u{110b7}', '\u{110b8}', GC_SpacingMark),
+ ('\u{110b9}', '\u{110ba}', GC_Extend), ('\u{110bd}', '\u{110bd}', GC_Prepend), ('\u{110c2}',
+ '\u{110c2}', GC_Extend), ('\u{110cd}', '\u{110cd}', GC_Prepend), ('\u{11100}', '\u{11102}',
+ GC_Extend), ('\u{11127}', '\u{1112b}', GC_Extend), ('\u{1112c}', '\u{1112c}',
+ GC_SpacingMark), ('\u{1112d}', '\u{11134}', GC_Extend), ('\u{11145}', '\u{11146}',
+ GC_SpacingMark), ('\u{11173}', '\u{11173}', GC_Extend), ('\u{11180}', '\u{11181}',
+ GC_Extend), ('\u{11182}', '\u{11182}', GC_SpacingMark), ('\u{111b3}', '\u{111b5}',
+ GC_SpacingMark), ('\u{111b6}', '\u{111be}', GC_Extend), ('\u{111bf}', '\u{111c0}',
+ GC_SpacingMark), ('\u{111c2}', '\u{111c3}', GC_Prepend), ('\u{111c9}', '\u{111cc}',
+ GC_Extend), ('\u{111ce}', '\u{111ce}', GC_SpacingMark), ('\u{111cf}', '\u{111cf}',
+ GC_Extend), ('\u{1122c}', '\u{1122e}', GC_SpacingMark), ('\u{1122f}', '\u{11231}',
+ GC_Extend), ('\u{11232}', '\u{11233}', GC_SpacingMark), ('\u{11234}', '\u{11234}',
+ GC_Extend), ('\u{11235}', '\u{11235}', GC_SpacingMark), ('\u{11236}', '\u{11237}',
+ GC_Extend), ('\u{1123e}', '\u{1123e}', GC_Extend), ('\u{11241}', '\u{11241}', GC_Extend),
+ ('\u{112df}', '\u{112df}', GC_Extend), ('\u{112e0}', '\u{112e2}', GC_SpacingMark),
+ ('\u{112e3}', '\u{112ea}', GC_Extend), ('\u{11300}', '\u{11301}', GC_Extend), ('\u{11302}',
+ '\u{11303}', GC_SpacingMark), ('\u{1133b}', '\u{1133c}', GC_Extend), ('\u{1133e}',
+ '\u{1133e}', GC_Extend), ('\u{1133f}', '\u{1133f}', GC_SpacingMark), ('\u{11340}',
+ '\u{11340}', GC_Extend), ('\u{11341}', '\u{11344}', GC_SpacingMark), ('\u{11347}',
+ '\u{11348}', GC_SpacingMark), ('\u{1134b}', '\u{1134d}', GC_SpacingMark), ('\u{11357}',
+ '\u{11357}', GC_Extend), ('\u{11362}', '\u{11363}', GC_SpacingMark), ('\u{11366}',
+ '\u{1136c}', GC_Extend), ('\u{11370}', '\u{11374}', GC_Extend), ('\u{11435}', '\u{11437}',
+ GC_SpacingMark), ('\u{11438}', '\u{1143f}', GC_Extend), ('\u{11440}', '\u{11441}',
+ GC_SpacingMark), ('\u{11442}', '\u{11444}', GC_Extend), ('\u{11445}', '\u{11445}',
+ GC_SpacingMark), ('\u{11446}', '\u{11446}', GC_Extend), ('\u{1145e}', '\u{1145e}',
+ GC_Extend), ('\u{114b0}', '\u{114b0}', GC_Extend), ('\u{114b1}', '\u{114b2}',
+ GC_SpacingMark), ('\u{114b3}', '\u{114b8}', GC_Extend), ('\u{114b9}', '\u{114b9}',
+ GC_SpacingMark), ('\u{114ba}', '\u{114ba}', GC_Extend), ('\u{114bb}', '\u{114bc}',
+ GC_SpacingMark), ('\u{114bd}', '\u{114bd}', GC_Extend), ('\u{114be}', '\u{114be}',
+ GC_SpacingMark), ('\u{114bf}', '\u{114c0}', GC_Extend), ('\u{114c1}', '\u{114c1}',
+ GC_SpacingMark), ('\u{114c2}', '\u{114c3}', GC_Extend), ('\u{115af}', '\u{115af}',
+ GC_Extend), ('\u{115b0}', '\u{115b1}', GC_SpacingMark), ('\u{115b2}', '\u{115b5}',
+ GC_Extend), ('\u{115b8}', '\u{115bb}', GC_SpacingMark), ('\u{115bc}', '\u{115bd}',
+ GC_Extend), ('\u{115be}', '\u{115be}', GC_SpacingMark), ('\u{115bf}', '\u{115c0}',
+ GC_Extend), ('\u{115dc}', '\u{115dd}', GC_Extend), ('\u{11630}', '\u{11632}',
+ GC_SpacingMark), ('\u{11633}', '\u{1163a}', GC_Extend), ('\u{1163b}', '\u{1163c}',
+ GC_SpacingMark), ('\u{1163d}', '\u{1163d}', GC_Extend), ('\u{1163e}', '\u{1163e}',
+ GC_SpacingMark), ('\u{1163f}', '\u{11640}', GC_Extend), ('\u{116ab}', '\u{116ab}',
+ GC_Extend), ('\u{116ac}', '\u{116ac}', GC_SpacingMark), ('\u{116ad}', '\u{116ad}',
+ GC_Extend), ('\u{116ae}', '\u{116af}', GC_SpacingMark), ('\u{116b0}', '\u{116b5}',
+ GC_Extend), ('\u{116b6}', '\u{116b6}', GC_SpacingMark), ('\u{116b7}', '\u{116b7}',
+ GC_Extend), ('\u{1171d}', '\u{1171f}', GC_Extend), ('\u{11722}', '\u{11725}', GC_Extend),
+ ('\u{11726}', '\u{11726}', GC_SpacingMark), ('\u{11727}', '\u{1172b}', GC_Extend),
+ ('\u{1182c}', '\u{1182e}', GC_SpacingMark), ('\u{1182f}', '\u{11837}', GC_Extend),
+ ('\u{11838}', '\u{11838}', GC_SpacingMark), ('\u{11839}', '\u{1183a}', GC_Extend),
+ ('\u{11930}', '\u{11930}', GC_Extend), ('\u{11931}', '\u{11935}', GC_SpacingMark),
+ ('\u{11937}', '\u{11938}', GC_SpacingMark), ('\u{1193b}', '\u{1193c}', GC_Extend),
+ ('\u{1193d}', '\u{1193d}', GC_SpacingMark), ('\u{1193e}', '\u{1193e}', GC_Extend),
+ ('\u{1193f}', '\u{1193f}', GC_Prepend), ('\u{11940}', '\u{11940}', GC_SpacingMark),
+ ('\u{11941}', '\u{11941}', GC_Prepend), ('\u{11942}', '\u{11942}', GC_SpacingMark),
+ ('\u{11943}', '\u{11943}', GC_Extend), ('\u{119d1}', '\u{119d3}', GC_SpacingMark),
+ ('\u{119d4}', '\u{119d7}', GC_Extend), ('\u{119da}', '\u{119db}', GC_Extend), ('\u{119dc}',
+ '\u{119df}', GC_SpacingMark), ('\u{119e0}', '\u{119e0}', GC_Extend), ('\u{119e4}',
+ '\u{119e4}', GC_SpacingMark), ('\u{11a01}', '\u{11a0a}', GC_Extend), ('\u{11a33}',
+ '\u{11a38}', GC_Extend), ('\u{11a39}', '\u{11a39}', GC_SpacingMark), ('\u{11a3a}',
+ '\u{11a3a}', GC_Prepend), ('\u{11a3b}', '\u{11a3e}', GC_Extend), ('\u{11a47}', '\u{11a47}',
+ GC_Extend), ('\u{11a51}', '\u{11a56}', GC_Extend), ('\u{11a57}', '\u{11a58}',
+ GC_SpacingMark), ('\u{11a59}', '\u{11a5b}', GC_Extend), ('\u{11a84}', '\u{11a89}',
+ GC_Prepend), ('\u{11a8a}', '\u{11a96}', GC_Extend), ('\u{11a97}', '\u{11a97}',
+ GC_SpacingMark), ('\u{11a98}', '\u{11a99}', GC_Extend), ('\u{11c2f}', '\u{11c2f}',
+ GC_SpacingMark), ('\u{11c30}', '\u{11c36}', GC_Extend), ('\u{11c38}', '\u{11c3d}',
+ GC_Extend), ('\u{11c3e}', '\u{11c3e}', GC_SpacingMark), ('\u{11c3f}', '\u{11c3f}',
+ GC_Extend), ('\u{11c92}', '\u{11ca7}', GC_Extend), ('\u{11ca9}', '\u{11ca9}',
+ GC_SpacingMark), ('\u{11caa}', '\u{11cb0}', GC_Extend), ('\u{11cb1}', '\u{11cb1}',
+ GC_SpacingMark), ('\u{11cb2}', '\u{11cb3}', GC_Extend), ('\u{11cb4}', '\u{11cb4}',
+ GC_SpacingMark), ('\u{11cb5}', '\u{11cb6}', GC_Extend), ('\u{11d31}', '\u{11d36}',
+ GC_Extend), ('\u{11d3a}', '\u{11d3a}', GC_Extend), ('\u{11d3c}', '\u{11d3d}', GC_Extend),
+ ('\u{11d3f}', '\u{11d45}', GC_Extend), ('\u{11d46}', '\u{11d46}', GC_Prepend), ('\u{11d47}',
+ '\u{11d47}', GC_Extend), ('\u{11d8a}', '\u{11d8e}', GC_SpacingMark), ('\u{11d90}',
+ '\u{11d91}', GC_Extend), ('\u{11d93}', '\u{11d94}', GC_SpacingMark), ('\u{11d95}',
+ '\u{11d95}', GC_Extend), ('\u{11d96}', '\u{11d96}', GC_SpacingMark), ('\u{11d97}',
+ '\u{11d97}', GC_Extend), ('\u{11ef3}', '\u{11ef4}', GC_Extend), ('\u{11ef5}', '\u{11ef6}',
+ GC_SpacingMark), ('\u{11f00}', '\u{11f01}', GC_Extend), ('\u{11f02}', '\u{11f02}',
+ GC_Prepend), ('\u{11f03}', '\u{11f03}', GC_SpacingMark), ('\u{11f34}', '\u{11f35}',
+ GC_SpacingMark), ('\u{11f36}', '\u{11f3a}', GC_Extend), ('\u{11f3e}', '\u{11f3f}',
+ GC_SpacingMark), ('\u{11f40}', '\u{11f40}', GC_Extend), ('\u{11f41}', '\u{11f41}',
+ GC_SpacingMark), ('\u{11f42}', '\u{11f42}', GC_Extend), ('\u{13430}', '\u{1343f}',
+ GC_Control), ('\u{13440}', '\u{13440}', GC_Extend), ('\u{13447}', '\u{13455}', GC_Extend),
+ ('\u{16af0}', '\u{16af4}', GC_Extend), ('\u{16b30}', '\u{16b36}', GC_Extend), ('\u{16f4f}',
+ '\u{16f4f}', GC_Extend), ('\u{16f51}', '\u{16f87}', GC_SpacingMark), ('\u{16f8f}',
+ '\u{16f92}', GC_Extend), ('\u{16fe4}', '\u{16fe4}', GC_Extend), ('\u{16ff0}', '\u{16ff1}',
+ GC_SpacingMark), ('\u{1bc9d}', '\u{1bc9e}', GC_Extend), ('\u{1bca0}', '\u{1bca3}',
+ GC_Control), ('\u{1cf00}', '\u{1cf2d}', GC_Extend), ('\u{1cf30}', '\u{1cf46}', GC_Extend),
+ ('\u{1d165}', '\u{1d165}', GC_Extend), ('\u{1d166}', '\u{1d166}', GC_SpacingMark),
+ ('\u{1d167}', '\u{1d169}', GC_Extend), ('\u{1d16d}', '\u{1d16d}', GC_SpacingMark),
+ ('\u{1d16e}', '\u{1d172}', GC_Extend), ('\u{1d173}', '\u{1d17a}', GC_Control), ('\u{1d17b}',
+ '\u{1d182}', GC_Extend), ('\u{1d185}', '\u{1d18b}', GC_Extend), ('\u{1d1aa}', '\u{1d1ad}',
+ GC_Extend), ('\u{1d242}', '\u{1d244}', GC_Extend), ('\u{1da00}', '\u{1da36}', GC_Extend),
+ ('\u{1da3b}', '\u{1da6c}', GC_Extend), ('\u{1da75}', '\u{1da75}', GC_Extend), ('\u{1da84}',
+ '\u{1da84}', GC_Extend), ('\u{1da9b}', '\u{1da9f}', GC_Extend), ('\u{1daa1}', '\u{1daaf}',
+ GC_Extend), ('\u{1e000}', '\u{1e006}', GC_Extend), ('\u{1e008}', '\u{1e018}', GC_Extend),
+ ('\u{1e01b}', '\u{1e021}', GC_Extend), ('\u{1e023}', '\u{1e024}', GC_Extend), ('\u{1e026}',
+ '\u{1e02a}', GC_Extend), ('\u{1e08f}', '\u{1e08f}', GC_Extend), ('\u{1e130}', '\u{1e136}',
+ GC_Extend), ('\u{1e2ae}', '\u{1e2ae}', GC_Extend), ('\u{1e2ec}', '\u{1e2ef}', GC_Extend),
+ ('\u{1e4ec}', '\u{1e4ef}', GC_Extend), ('\u{1e8d0}', '\u{1e8d6}', GC_Extend), ('\u{1e944}',
+ '\u{1e94a}', GC_Extend), ('\u{1f000}', '\u{1f0ff}', GC_Extended_Pictographic), ('\u{1f10d}',
+ '\u{1f10f}', GC_Extended_Pictographic), ('\u{1f12f}', '\u{1f12f}',
+ GC_Extended_Pictographic), ('\u{1f16c}', '\u{1f171}', GC_Extended_Pictographic),
+ ('\u{1f17e}', '\u{1f17f}', GC_Extended_Pictographic), ('\u{1f18e}', '\u{1f18e}',
+ GC_Extended_Pictographic), ('\u{1f191}', '\u{1f19a}', GC_Extended_Pictographic),
+ ('\u{1f1ad}', '\u{1f1e5}', GC_Extended_Pictographic), ('\u{1f1e6}', '\u{1f1ff}',
+ GC_Regional_Indicator), ('\u{1f201}', '\u{1f20f}', GC_Extended_Pictographic), ('\u{1f21a}',
+ '\u{1f21a}', GC_Extended_Pictographic), ('\u{1f22f}', '\u{1f22f}',
+ GC_Extended_Pictographic), ('\u{1f232}', '\u{1f23a}', GC_Extended_Pictographic),
+ ('\u{1f23c}', '\u{1f23f}', GC_Extended_Pictographic), ('\u{1f249}', '\u{1f3fa}',
+ GC_Extended_Pictographic), ('\u{1f3fb}', '\u{1f3ff}', GC_Extend), ('\u{1f400}', '\u{1f53d}',
+ GC_Extended_Pictographic), ('\u{1f546}', '\u{1f64f}', GC_Extended_Pictographic),
+ ('\u{1f680}', '\u{1f6ff}', GC_Extended_Pictographic), ('\u{1f774}', '\u{1f77f}',
+ GC_Extended_Pictographic), ('\u{1f7d5}', '\u{1f7ff}', GC_Extended_Pictographic),
+ ('\u{1f80c}', '\u{1f80f}', GC_Extended_Pictographic), ('\u{1f848}', '\u{1f84f}',
+ GC_Extended_Pictographic), ('\u{1f85a}', '\u{1f85f}', GC_Extended_Pictographic),
+ ('\u{1f888}', '\u{1f88f}', GC_Extended_Pictographic), ('\u{1f8ae}', '\u{1f8ff}',
+ GC_Extended_Pictographic), ('\u{1f90c}', '\u{1f93a}', GC_Extended_Pictographic),
+ ('\u{1f93c}', '\u{1f945}', GC_Extended_Pictographic), ('\u{1f947}', '\u{1faff}',
+ GC_Extended_Pictographic), ('\u{1fc00}', '\u{1fffd}', GC_Extended_Pictographic),
+ ('\u{e0000}', '\u{e001f}', GC_Control), ('\u{e0020}', '\u{e007f}', GC_Extend), ('\u{e0080}',
+ '\u{e00ff}', GC_Control), ('\u{e0100}', '\u{e01ef}', GC_Extend), ('\u{e01f0}', '\u{e0fff}',
+ GC_Control)
+ ];
+
+}
+
+pub mod word {
+ use core::result::Result::{Ok, Err};
+
+ pub use self::WordCat::*;
+
+ #[allow(non_camel_case_types)]
+ #[derive(Clone, Copy, PartialEq, Eq, Debug)]
+ pub enum WordCat {
+ WC_ALetter,
+ WC_Any,
+ WC_CR,
+ WC_Double_Quote,
+ WC_Extend,
+ WC_ExtendNumLet,
+ WC_Format,
+ WC_Hebrew_Letter,
+ WC_Katakana,
+ WC_LF,
+ WC_MidLetter,
+ WC_MidNum,
+ WC_MidNumLet,
+ WC_Newline,
+ WC_Numeric,
+ WC_Regional_Indicator,
+ WC_Single_Quote,
+ WC_WSegSpace,
+ WC_ZWJ,
+ }
+
+ fn bsearch_range_value_table(c: char, r: &'static [(char, char, WordCat)]) -> (u32, u32, WordCat) {
+ use core::cmp::Ordering::{Equal, Less, Greater};
+ match r.binary_search_by(|&(lo, hi, _)| {
+ if lo <= c && c <= hi { Equal }
+ else if hi < c { Less }
+ else { Greater }
+ }) {
+ Ok(idx) => {
+ let (lower, upper, cat) = r[idx];
+ (lower as u32, upper as u32, cat)
+ }
+ Err(idx) => {
+ (
+ if idx > 0 { r[idx-1].1 as u32 + 1 } else { 0 },
+ r.get(idx).map(|c|c.0 as u32 - 1).unwrap_or(core::u32::MAX),
+ WC_Any,
+ )
+ }
+ }
+ }
+
+ pub fn word_category(c: char) -> (u32, u32, WordCat) {
+ bsearch_range_value_table(c, word_cat_table)
+ }
+
+ const word_cat_table: &'static [(char, char, WordCat)] = &[
+ ('\u{a}', '\u{a}', WC_LF), ('\u{b}', '\u{c}', WC_Newline), ('\u{d}', '\u{d}', WC_CR),
+ ('\u{20}', '\u{20}', WC_WSegSpace), ('\u{22}', '\u{22}', WC_Double_Quote), ('\u{27}',
+ '\u{27}', WC_Single_Quote), ('\u{2c}', '\u{2c}', WC_MidNum), ('\u{2e}', '\u{2e}',
+ WC_MidNumLet), ('\u{30}', '\u{39}', WC_Numeric), ('\u{3a}', '\u{3a}', WC_MidLetter),
+ ('\u{3b}', '\u{3b}', WC_MidNum), ('\u{41}', '\u{5a}', WC_ALetter), ('\u{5f}', '\u{5f}',
+ WC_ExtendNumLet), ('\u{61}', '\u{7a}', WC_ALetter), ('\u{85}', '\u{85}', WC_Newline),
+ ('\u{aa}', '\u{aa}', WC_ALetter), ('\u{ad}', '\u{ad}', WC_Format), ('\u{b5}', '\u{b5}',
+ WC_ALetter), ('\u{b7}', '\u{b7}', WC_MidLetter), ('\u{ba}', '\u{ba}', WC_ALetter),
+ ('\u{c0}', '\u{d6}', WC_ALetter), ('\u{d8}', '\u{f6}', WC_ALetter), ('\u{f8}', '\u{2d7}',
+ WC_ALetter), ('\u{2de}', '\u{2ff}', WC_ALetter), ('\u{300}', '\u{36f}', WC_Extend),
+ ('\u{370}', '\u{374}', WC_ALetter), ('\u{376}', '\u{377}', WC_ALetter), ('\u{37a}',
+ '\u{37d}', WC_ALetter), ('\u{37e}', '\u{37e}', WC_MidNum), ('\u{37f}', '\u{37f}',
+ WC_ALetter), ('\u{386}', '\u{386}', WC_ALetter), ('\u{387}', '\u{387}', WC_MidLetter),
+ ('\u{388}', '\u{38a}', WC_ALetter), ('\u{38c}', '\u{38c}', WC_ALetter), ('\u{38e}',
+ '\u{3a1}', WC_ALetter), ('\u{3a3}', '\u{3f5}', WC_ALetter), ('\u{3f7}', '\u{481}',
+ WC_ALetter), ('\u{483}', '\u{489}', WC_Extend), ('\u{48a}', '\u{52f}', WC_ALetter),
+ ('\u{531}', '\u{556}', WC_ALetter), ('\u{559}', '\u{55c}', WC_ALetter), ('\u{55e}',
+ '\u{55e}', WC_ALetter), ('\u{55f}', '\u{55f}', WC_MidLetter), ('\u{560}', '\u{588}',
+ WC_ALetter), ('\u{589}', '\u{589}', WC_MidNum), ('\u{58a}', '\u{58a}', WC_ALetter),
+ ('\u{591}', '\u{5bd}', WC_Extend), ('\u{5bf}', '\u{5bf}', WC_Extend), ('\u{5c1}', '\u{5c2}',
+ WC_Extend), ('\u{5c4}', '\u{5c5}', WC_Extend), ('\u{5c7}', '\u{5c7}', WC_Extend),
+ ('\u{5d0}', '\u{5ea}', WC_Hebrew_Letter), ('\u{5ef}', '\u{5f2}', WC_Hebrew_Letter),
+ ('\u{5f3}', '\u{5f3}', WC_ALetter), ('\u{5f4}', '\u{5f4}', WC_MidLetter), ('\u{600}',
+ '\u{605}', WC_Format), ('\u{60c}', '\u{60d}', WC_MidNum), ('\u{610}', '\u{61a}', WC_Extend),
+ ('\u{61c}', '\u{61c}', WC_Format), ('\u{620}', '\u{64a}', WC_ALetter), ('\u{64b}',
+ '\u{65f}', WC_Extend), ('\u{660}', '\u{669}', WC_Numeric), ('\u{66b}', '\u{66b}',
+ WC_Numeric), ('\u{66c}', '\u{66c}', WC_MidNum), ('\u{66e}', '\u{66f}', WC_ALetter),
+ ('\u{670}', '\u{670}', WC_Extend), ('\u{671}', '\u{6d3}', WC_ALetter), ('\u{6d5}',
+ '\u{6d5}', WC_ALetter), ('\u{6d6}', '\u{6dc}', WC_Extend), ('\u{6dd}', '\u{6dd}',
+ WC_Format), ('\u{6df}', '\u{6e4}', WC_Extend), ('\u{6e5}', '\u{6e6}', WC_ALetter),
+ ('\u{6e7}', '\u{6e8}', WC_Extend), ('\u{6ea}', '\u{6ed}', WC_Extend), ('\u{6ee}', '\u{6ef}',
+ WC_ALetter), ('\u{6f0}', '\u{6f9}', WC_Numeric), ('\u{6fa}', '\u{6fc}', WC_ALetter),
+ ('\u{6ff}', '\u{6ff}', WC_ALetter), ('\u{70f}', '\u{70f}', WC_Format), ('\u{710}',
+ '\u{710}', WC_ALetter), ('\u{711}', '\u{711}', WC_Extend), ('\u{712}', '\u{72f}',
+ WC_ALetter), ('\u{730}', '\u{74a}', WC_Extend), ('\u{74d}', '\u{7a5}', WC_ALetter),
+ ('\u{7a6}', '\u{7b0}', WC_Extend), ('\u{7b1}', '\u{7b1}', WC_ALetter), ('\u{7c0}',
+ '\u{7c9}', WC_Numeric), ('\u{7ca}', '\u{7ea}', WC_ALetter), ('\u{7eb}', '\u{7f3}',
+ WC_Extend), ('\u{7f4}', '\u{7f5}', WC_ALetter), ('\u{7f8}', '\u{7f8}', WC_MidNum),
+ ('\u{7fa}', '\u{7fa}', WC_ALetter), ('\u{7fd}', '\u{7fd}', WC_Extend), ('\u{800}',
+ '\u{815}', WC_ALetter), ('\u{816}', '\u{819}', WC_Extend), ('\u{81a}', '\u{81a}',
+ WC_ALetter), ('\u{81b}', '\u{823}', WC_Extend), ('\u{824}', '\u{824}', WC_ALetter),
+ ('\u{825}', '\u{827}', WC_Extend), ('\u{828}', '\u{828}', WC_ALetter), ('\u{829}',
+ '\u{82d}', WC_Extend), ('\u{840}', '\u{858}', WC_ALetter), ('\u{859}', '\u{85b}',
+ WC_Extend), ('\u{860}', '\u{86a}', WC_ALetter), ('\u{870}', '\u{887}', WC_ALetter),
+ ('\u{889}', '\u{88e}', WC_ALetter), ('\u{890}', '\u{891}', WC_Format), ('\u{898}',
+ '\u{89f}', WC_Extend), ('\u{8a0}', '\u{8c9}', WC_ALetter), ('\u{8ca}', '\u{8e1}',
+ WC_Extend), ('\u{8e2}', '\u{8e2}', WC_Format), ('\u{8e3}', '\u{903}', WC_Extend),
+ ('\u{904}', '\u{939}', WC_ALetter), ('\u{93a}', '\u{93c}', WC_Extend), ('\u{93d}',
+ '\u{93d}', WC_ALetter), ('\u{93e}', '\u{94f}', WC_Extend), ('\u{950}', '\u{950}',
+ WC_ALetter), ('\u{951}', '\u{957}', WC_Extend), ('\u{958}', '\u{961}', WC_ALetter),
+ ('\u{962}', '\u{963}', WC_Extend), ('\u{966}', '\u{96f}', WC_Numeric), ('\u{971}',
+ '\u{980}', WC_ALetter), ('\u{981}', '\u{983}', WC_Extend), ('\u{985}', '\u{98c}',
+ WC_ALetter), ('\u{98f}', '\u{990}', WC_ALetter), ('\u{993}', '\u{9a8}', WC_ALetter),
+ ('\u{9aa}', '\u{9b0}', WC_ALetter), ('\u{9b2}', '\u{9b2}', WC_ALetter), ('\u{9b6}',
+ '\u{9b9}', WC_ALetter), ('\u{9bc}', '\u{9bc}', WC_Extend), ('\u{9bd}', '\u{9bd}',
+ WC_ALetter), ('\u{9be}', '\u{9c4}', WC_Extend), ('\u{9c7}', '\u{9c8}', WC_Extend),
+ ('\u{9cb}', '\u{9cd}', WC_Extend), ('\u{9ce}', '\u{9ce}', WC_ALetter), ('\u{9d7}',
+ '\u{9d7}', WC_Extend), ('\u{9dc}', '\u{9dd}', WC_ALetter), ('\u{9df}', '\u{9e1}',
+ WC_ALetter), ('\u{9e2}', '\u{9e3}', WC_Extend), ('\u{9e6}', '\u{9ef}', WC_Numeric),
+ ('\u{9f0}', '\u{9f1}', WC_ALetter), ('\u{9fc}', '\u{9fc}', WC_ALetter), ('\u{9fe}',
+ '\u{9fe}', WC_Extend), ('\u{a01}', '\u{a03}', WC_Extend), ('\u{a05}', '\u{a0a}',
+ WC_ALetter), ('\u{a0f}', '\u{a10}', WC_ALetter), ('\u{a13}', '\u{a28}', WC_ALetter),
+ ('\u{a2a}', '\u{a30}', WC_ALetter), ('\u{a32}', '\u{a33}', WC_ALetter), ('\u{a35}',
+ '\u{a36}', WC_ALetter), ('\u{a38}', '\u{a39}', WC_ALetter), ('\u{a3c}', '\u{a3c}',
+ WC_Extend), ('\u{a3e}', '\u{a42}', WC_Extend), ('\u{a47}', '\u{a48}', WC_Extend),
+ ('\u{a4b}', '\u{a4d}', WC_Extend), ('\u{a51}', '\u{a51}', WC_Extend), ('\u{a59}', '\u{a5c}',
+ WC_ALetter), ('\u{a5e}', '\u{a5e}', WC_ALetter), ('\u{a66}', '\u{a6f}', WC_Numeric),
+ ('\u{a70}', '\u{a71}', WC_Extend), ('\u{a72}', '\u{a74}', WC_ALetter), ('\u{a75}',
+ '\u{a75}', WC_Extend), ('\u{a81}', '\u{a83}', WC_Extend), ('\u{a85}', '\u{a8d}',
+ WC_ALetter), ('\u{a8f}', '\u{a91}', WC_ALetter), ('\u{a93}', '\u{aa8}', WC_ALetter),
+ ('\u{aaa}', '\u{ab0}', WC_ALetter), ('\u{ab2}', '\u{ab3}', WC_ALetter), ('\u{ab5}',
+ '\u{ab9}', WC_ALetter), ('\u{abc}', '\u{abc}', WC_Extend), ('\u{abd}', '\u{abd}',
+ WC_ALetter), ('\u{abe}', '\u{ac5}', WC_Extend), ('\u{ac7}', '\u{ac9}', WC_Extend),
+ ('\u{acb}', '\u{acd}', WC_Extend), ('\u{ad0}', '\u{ad0}', WC_ALetter), ('\u{ae0}',
+ '\u{ae1}', WC_ALetter), ('\u{ae2}', '\u{ae3}', WC_Extend), ('\u{ae6}', '\u{aef}',
+ WC_Numeric), ('\u{af9}', '\u{af9}', WC_ALetter), ('\u{afa}', '\u{aff}', WC_Extend),
+ ('\u{b01}', '\u{b03}', WC_Extend), ('\u{b05}', '\u{b0c}', WC_ALetter), ('\u{b0f}',
+ '\u{b10}', WC_ALetter), ('\u{b13}', '\u{b28}', WC_ALetter), ('\u{b2a}', '\u{b30}',
+ WC_ALetter), ('\u{b32}', '\u{b33}', WC_ALetter), ('\u{b35}', '\u{b39}', WC_ALetter),
+ ('\u{b3c}', '\u{b3c}', WC_Extend), ('\u{b3d}', '\u{b3d}', WC_ALetter), ('\u{b3e}',
+ '\u{b44}', WC_Extend), ('\u{b47}', '\u{b48}', WC_Extend), ('\u{b4b}', '\u{b4d}', WC_Extend),
+ ('\u{b55}', '\u{b57}', WC_Extend), ('\u{b5c}', '\u{b5d}', WC_ALetter), ('\u{b5f}',
+ '\u{b61}', WC_ALetter), ('\u{b62}', '\u{b63}', WC_Extend), ('\u{b66}', '\u{b6f}',
+ WC_Numeric), ('\u{b71}', '\u{b71}', WC_ALetter), ('\u{b82}', '\u{b82}', WC_Extend),
+ ('\u{b83}', '\u{b83}', WC_ALetter), ('\u{b85}', '\u{b8a}', WC_ALetter), ('\u{b8e}',
+ '\u{b90}', WC_ALetter), ('\u{b92}', '\u{b95}', WC_ALetter), ('\u{b99}', '\u{b9a}',
+ WC_ALetter), ('\u{b9c}', '\u{b9c}', WC_ALetter), ('\u{b9e}', '\u{b9f}', WC_ALetter),
+ ('\u{ba3}', '\u{ba4}', WC_ALetter), ('\u{ba8}', '\u{baa}', WC_ALetter), ('\u{bae}',
+ '\u{bb9}', WC_ALetter), ('\u{bbe}', '\u{bc2}', WC_Extend), ('\u{bc6}', '\u{bc8}',
+ WC_Extend), ('\u{bca}', '\u{bcd}', WC_Extend), ('\u{bd0}', '\u{bd0}', WC_ALetter),
+ ('\u{bd7}', '\u{bd7}', WC_Extend), ('\u{be6}', '\u{bef}', WC_Numeric), ('\u{c00}',
+ '\u{c04}', WC_Extend), ('\u{c05}', '\u{c0c}', WC_ALetter), ('\u{c0e}', '\u{c10}',
+ WC_ALetter), ('\u{c12}', '\u{c28}', WC_ALetter), ('\u{c2a}', '\u{c39}', WC_ALetter),
+ ('\u{c3c}', '\u{c3c}', WC_Extend), ('\u{c3d}', '\u{c3d}', WC_ALetter), ('\u{c3e}',
+ '\u{c44}', WC_Extend), ('\u{c46}', '\u{c48}', WC_Extend), ('\u{c4a}', '\u{c4d}', WC_Extend),
+ ('\u{c55}', '\u{c56}', WC_Extend), ('\u{c58}', '\u{c5a}', WC_ALetter), ('\u{c5d}',
+ '\u{c5d}', WC_ALetter), ('\u{c60}', '\u{c61}', WC_ALetter), ('\u{c62}', '\u{c63}',
+ WC_Extend), ('\u{c66}', '\u{c6f}', WC_Numeric), ('\u{c80}', '\u{c80}', WC_ALetter),
+ ('\u{c81}', '\u{c83}', WC_Extend), ('\u{c85}', '\u{c8c}', WC_ALetter), ('\u{c8e}',
+ '\u{c90}', WC_ALetter), ('\u{c92}', '\u{ca8}', WC_ALetter), ('\u{caa}', '\u{cb3}',
+ WC_ALetter), ('\u{cb5}', '\u{cb9}', WC_ALetter), ('\u{cbc}', '\u{cbc}', WC_Extend),
+ ('\u{cbd}', '\u{cbd}', WC_ALetter), ('\u{cbe}', '\u{cc4}', WC_Extend), ('\u{cc6}',
+ '\u{cc8}', WC_Extend), ('\u{cca}', '\u{ccd}', WC_Extend), ('\u{cd5}', '\u{cd6}', WC_Extend),
+ ('\u{cdd}', '\u{cde}', WC_ALetter), ('\u{ce0}', '\u{ce1}', WC_ALetter), ('\u{ce2}',
+ '\u{ce3}', WC_Extend), ('\u{ce6}', '\u{cef}', WC_Numeric), ('\u{cf1}', '\u{cf2}',
+ WC_ALetter), ('\u{cf3}', '\u{cf3}', WC_Extend), ('\u{d00}', '\u{d03}', WC_Extend),
+ ('\u{d04}', '\u{d0c}', WC_ALetter), ('\u{d0e}', '\u{d10}', WC_ALetter), ('\u{d12}',
+ '\u{d3a}', WC_ALetter), ('\u{d3b}', '\u{d3c}', WC_Extend), ('\u{d3d}', '\u{d3d}',
+ WC_ALetter), ('\u{d3e}', '\u{d44}', WC_Extend), ('\u{d46}', '\u{d48}', WC_Extend),
+ ('\u{d4a}', '\u{d4d}', WC_Extend), ('\u{d4e}', '\u{d4e}', WC_ALetter), ('\u{d54}',
+ '\u{d56}', WC_ALetter), ('\u{d57}', '\u{d57}', WC_Extend), ('\u{d5f}', '\u{d61}',
+ WC_ALetter), ('\u{d62}', '\u{d63}', WC_Extend), ('\u{d66}', '\u{d6f}', WC_Numeric),
+ ('\u{d7a}', '\u{d7f}', WC_ALetter), ('\u{d81}', '\u{d83}', WC_Extend), ('\u{d85}',
+ '\u{d96}', WC_ALetter), ('\u{d9a}', '\u{db1}', WC_ALetter), ('\u{db3}', '\u{dbb}',
+ WC_ALetter), ('\u{dbd}', '\u{dbd}', WC_ALetter), ('\u{dc0}', '\u{dc6}', WC_ALetter),
+ ('\u{dca}', '\u{dca}', WC_Extend), ('\u{dcf}', '\u{dd4}', WC_Extend), ('\u{dd6}', '\u{dd6}',
+ WC_Extend), ('\u{dd8}', '\u{ddf}', WC_Extend), ('\u{de6}', '\u{def}', WC_Numeric),
+ ('\u{df2}', '\u{df3}', WC_Extend), ('\u{e31}', '\u{e31}', WC_Extend), ('\u{e34}', '\u{e3a}',
+ WC_Extend), ('\u{e47}', '\u{e4e}', WC_Extend), ('\u{e50}', '\u{e59}', WC_Numeric),
+ ('\u{eb1}', '\u{eb1}', WC_Extend), ('\u{eb4}', '\u{ebc}', WC_Extend), ('\u{ec8}', '\u{ece}',
+ WC_Extend), ('\u{ed0}', '\u{ed9}', WC_Numeric), ('\u{f00}', '\u{f00}', WC_ALetter),
+ ('\u{f18}', '\u{f19}', WC_Extend), ('\u{f20}', '\u{f29}', WC_Numeric), ('\u{f35}',
+ '\u{f35}', WC_Extend), ('\u{f37}', '\u{f37}', WC_Extend), ('\u{f39}', '\u{f39}', WC_Extend),
+ ('\u{f3e}', '\u{f3f}', WC_Extend), ('\u{f40}', '\u{f47}', WC_ALetter), ('\u{f49}',
+ '\u{f6c}', WC_ALetter), ('\u{f71}', '\u{f84}', WC_Extend), ('\u{f86}', '\u{f87}',
+ WC_Extend), ('\u{f88}', '\u{f8c}', WC_ALetter), ('\u{f8d}', '\u{f97}', WC_Extend),
+ ('\u{f99}', '\u{fbc}', WC_Extend), ('\u{fc6}', '\u{fc6}', WC_Extend), ('\u{102b}',
+ '\u{103e}', WC_Extend), ('\u{1040}', '\u{1049}', WC_Numeric), ('\u{1056}', '\u{1059}',
+ WC_Extend), ('\u{105e}', '\u{1060}', WC_Extend), ('\u{1062}', '\u{1064}', WC_Extend),
+ ('\u{1067}', '\u{106d}', WC_Extend), ('\u{1071}', '\u{1074}', WC_Extend), ('\u{1082}',
+ '\u{108d}', WC_Extend), ('\u{108f}', '\u{108f}', WC_Extend), ('\u{1090}', '\u{1099}',
+ WC_Numeric), ('\u{109a}', '\u{109d}', WC_Extend), ('\u{10a0}', '\u{10c5}', WC_ALetter),
+ ('\u{10c7}', '\u{10c7}', WC_ALetter), ('\u{10cd}', '\u{10cd}', WC_ALetter), ('\u{10d0}',
+ '\u{10fa}', WC_ALetter), ('\u{10fc}', '\u{1248}', WC_ALetter), ('\u{124a}', '\u{124d}',
+ WC_ALetter), ('\u{1250}', '\u{1256}', WC_ALetter), ('\u{1258}', '\u{1258}', WC_ALetter),
+ ('\u{125a}', '\u{125d}', WC_ALetter), ('\u{1260}', '\u{1288}', WC_ALetter), ('\u{128a}',
+ '\u{128d}', WC_ALetter), ('\u{1290}', '\u{12b0}', WC_ALetter), ('\u{12b2}', '\u{12b5}',
+ WC_ALetter), ('\u{12b8}', '\u{12be}', WC_ALetter), ('\u{12c0}', '\u{12c0}', WC_ALetter),
+ ('\u{12c2}', '\u{12c5}', WC_ALetter), ('\u{12c8}', '\u{12d6}', WC_ALetter), ('\u{12d8}',
+ '\u{1310}', WC_ALetter), ('\u{1312}', '\u{1315}', WC_ALetter), ('\u{1318}', '\u{135a}',
+ WC_ALetter), ('\u{135d}', '\u{135f}', WC_Extend), ('\u{1380}', '\u{138f}', WC_ALetter),
+ ('\u{13a0}', '\u{13f5}', WC_ALetter), ('\u{13f8}', '\u{13fd}', WC_ALetter), ('\u{1401}',
+ '\u{166c}', WC_ALetter), ('\u{166f}', '\u{167f}', WC_ALetter), ('\u{1680}', '\u{1680}',
+ WC_WSegSpace), ('\u{1681}', '\u{169a}', WC_ALetter), ('\u{16a0}', '\u{16ea}', WC_ALetter),
+ ('\u{16ee}', '\u{16f8}', WC_ALetter), ('\u{1700}', '\u{1711}', WC_ALetter), ('\u{1712}',
+ '\u{1715}', WC_Extend), ('\u{171f}', '\u{1731}', WC_ALetter), ('\u{1732}', '\u{1734}',
+ WC_Extend), ('\u{1740}', '\u{1751}', WC_ALetter), ('\u{1752}', '\u{1753}', WC_Extend),
+ ('\u{1760}', '\u{176c}', WC_ALetter), ('\u{176e}', '\u{1770}', WC_ALetter), ('\u{1772}',
+ '\u{1773}', WC_Extend), ('\u{17b4}', '\u{17d3}', WC_Extend), ('\u{17dd}', '\u{17dd}',
+ WC_Extend), ('\u{17e0}', '\u{17e9}', WC_Numeric), ('\u{180b}', '\u{180d}', WC_Extend),
+ ('\u{180e}', '\u{180e}', WC_Format), ('\u{180f}', '\u{180f}', WC_Extend), ('\u{1810}',
+ '\u{1819}', WC_Numeric), ('\u{1820}', '\u{1878}', WC_ALetter), ('\u{1880}', '\u{1884}',
+ WC_ALetter), ('\u{1885}', '\u{1886}', WC_Extend), ('\u{1887}', '\u{18a8}', WC_ALetter),
+ ('\u{18a9}', '\u{18a9}', WC_Extend), ('\u{18aa}', '\u{18aa}', WC_ALetter), ('\u{18b0}',
+ '\u{18f5}', WC_ALetter), ('\u{1900}', '\u{191e}', WC_ALetter), ('\u{1920}', '\u{192b}',
+ WC_Extend), ('\u{1930}', '\u{193b}', WC_Extend), ('\u{1946}', '\u{194f}', WC_Numeric),
+ ('\u{19d0}', '\u{19d9}', WC_Numeric), ('\u{1a00}', '\u{1a16}', WC_ALetter), ('\u{1a17}',
+ '\u{1a1b}', WC_Extend), ('\u{1a55}', '\u{1a5e}', WC_Extend), ('\u{1a60}', '\u{1a7c}',
+ WC_Extend), ('\u{1a7f}', '\u{1a7f}', WC_Extend), ('\u{1a80}', '\u{1a89}', WC_Numeric),
+ ('\u{1a90}', '\u{1a99}', WC_Numeric), ('\u{1ab0}', '\u{1ace}', WC_Extend), ('\u{1b00}',
+ '\u{1b04}', WC_Extend), ('\u{1b05}', '\u{1b33}', WC_ALetter), ('\u{1b34}', '\u{1b44}',
+ WC_Extend), ('\u{1b45}', '\u{1b4c}', WC_ALetter), ('\u{1b50}', '\u{1b59}', WC_Numeric),
+ ('\u{1b6b}', '\u{1b73}', WC_Extend), ('\u{1b80}', '\u{1b82}', WC_Extend), ('\u{1b83}',
+ '\u{1ba0}', WC_ALetter), ('\u{1ba1}', '\u{1bad}', WC_Extend), ('\u{1bae}', '\u{1baf}',
+ WC_ALetter), ('\u{1bb0}', '\u{1bb9}', WC_Numeric), ('\u{1bba}', '\u{1be5}', WC_ALetter),
+ ('\u{1be6}', '\u{1bf3}', WC_Extend), ('\u{1c00}', '\u{1c23}', WC_ALetter), ('\u{1c24}',
+ '\u{1c37}', WC_Extend), ('\u{1c40}', '\u{1c49}', WC_Numeric), ('\u{1c4d}', '\u{1c4f}',
+ WC_ALetter), ('\u{1c50}', '\u{1c59}', WC_Numeric), ('\u{1c5a}', '\u{1c7d}', WC_ALetter),
+ ('\u{1c80}', '\u{1c88}', WC_ALetter), ('\u{1c90}', '\u{1cba}', WC_ALetter), ('\u{1cbd}',
+ '\u{1cbf}', WC_ALetter), ('\u{1cd0}', '\u{1cd2}', WC_Extend), ('\u{1cd4}', '\u{1ce8}',
+ WC_Extend), ('\u{1ce9}', '\u{1cec}', WC_ALetter), ('\u{1ced}', '\u{1ced}', WC_Extend),
+ ('\u{1cee}', '\u{1cf3}', WC_ALetter), ('\u{1cf4}', '\u{1cf4}', WC_Extend), ('\u{1cf5}',
+ '\u{1cf6}', WC_ALetter), ('\u{1cf7}', '\u{1cf9}', WC_Extend), ('\u{1cfa}', '\u{1cfa}',
+ WC_ALetter), ('\u{1d00}', '\u{1dbf}', WC_ALetter), ('\u{1dc0}', '\u{1dff}', WC_Extend),
+ ('\u{1e00}', '\u{1f15}', WC_ALetter), ('\u{1f18}', '\u{1f1d}', WC_ALetter), ('\u{1f20}',
+ '\u{1f45}', WC_ALetter), ('\u{1f48}', '\u{1f4d}', WC_ALetter), ('\u{1f50}', '\u{1f57}',
+ WC_ALetter), ('\u{1f59}', '\u{1f59}', WC_ALetter), ('\u{1f5b}', '\u{1f5b}', WC_ALetter),
+ ('\u{1f5d}', '\u{1f5d}', WC_ALetter), ('\u{1f5f}', '\u{1f7d}', WC_ALetter), ('\u{1f80}',
+ '\u{1fb4}', WC_ALetter), ('\u{1fb6}', '\u{1fbc}', WC_ALetter), ('\u{1fbe}', '\u{1fbe}',
+ WC_ALetter), ('\u{1fc2}', '\u{1fc4}', WC_ALetter), ('\u{1fc6}', '\u{1fcc}', WC_ALetter),
+ ('\u{1fd0}', '\u{1fd3}', WC_ALetter), ('\u{1fd6}', '\u{1fdb}', WC_ALetter), ('\u{1fe0}',
+ '\u{1fec}', WC_ALetter), ('\u{1ff2}', '\u{1ff4}', WC_ALetter), ('\u{1ff6}', '\u{1ffc}',
+ WC_ALetter), ('\u{2000}', '\u{2006}', WC_WSegSpace), ('\u{2008}', '\u{200a}', WC_WSegSpace),
+ ('\u{200c}', '\u{200c}', WC_Extend), ('\u{200d}', '\u{200d}', WC_ZWJ), ('\u{200e}',
+ '\u{200f}', WC_Format), ('\u{2018}', '\u{2019}', WC_MidNumLet), ('\u{2024}', '\u{2024}',
+ WC_MidNumLet), ('\u{2027}', '\u{2027}', WC_MidLetter), ('\u{2028}', '\u{2029}', WC_Newline),
+ ('\u{202a}', '\u{202e}', WC_Format), ('\u{202f}', '\u{202f}', WC_ExtendNumLet), ('\u{203f}',
+ '\u{2040}', WC_ExtendNumLet), ('\u{2044}', '\u{2044}', WC_MidNum), ('\u{2054}', '\u{2054}',
+ WC_ExtendNumLet), ('\u{205f}', '\u{205f}', WC_WSegSpace), ('\u{2060}', '\u{2064}',
+ WC_Format), ('\u{2066}', '\u{206f}', WC_Format), ('\u{2071}', '\u{2071}', WC_ALetter),
+ ('\u{207f}', '\u{207f}', WC_ALetter), ('\u{2090}', '\u{209c}', WC_ALetter), ('\u{20d0}',
+ '\u{20f0}', WC_Extend), ('\u{2102}', '\u{2102}', WC_ALetter), ('\u{2107}', '\u{2107}',
+ WC_ALetter), ('\u{210a}', '\u{2113}', WC_ALetter), ('\u{2115}', '\u{2115}', WC_ALetter),
+ ('\u{2119}', '\u{211d}', WC_ALetter), ('\u{2124}', '\u{2124}', WC_ALetter), ('\u{2126}',
+ '\u{2126}', WC_ALetter), ('\u{2128}', '\u{2128}', WC_ALetter), ('\u{212a}', '\u{212d}',
+ WC_ALetter), ('\u{212f}', '\u{2139}', WC_ALetter), ('\u{213c}', '\u{213f}', WC_ALetter),
+ ('\u{2145}', '\u{2149}', WC_ALetter), ('\u{214e}', '\u{214e}', WC_ALetter), ('\u{2160}',
+ '\u{2188}', WC_ALetter), ('\u{24b6}', '\u{24e9}', WC_ALetter), ('\u{2c00}', '\u{2ce4}',
+ WC_ALetter), ('\u{2ceb}', '\u{2cee}', WC_ALetter), ('\u{2cef}', '\u{2cf1}', WC_Extend),
+ ('\u{2cf2}', '\u{2cf3}', WC_ALetter), ('\u{2d00}', '\u{2d25}', WC_ALetter), ('\u{2d27}',
+ '\u{2d27}', WC_ALetter), ('\u{2d2d}', '\u{2d2d}', WC_ALetter), ('\u{2d30}', '\u{2d67}',
+ WC_ALetter), ('\u{2d6f}', '\u{2d6f}', WC_ALetter), ('\u{2d7f}', '\u{2d7f}', WC_Extend),
+ ('\u{2d80}', '\u{2d96}', WC_ALetter), ('\u{2da0}', '\u{2da6}', WC_ALetter), ('\u{2da8}',
+ '\u{2dae}', WC_ALetter), ('\u{2db0}', '\u{2db6}', WC_ALetter), ('\u{2db8}', '\u{2dbe}',
+ WC_ALetter), ('\u{2dc0}', '\u{2dc6}', WC_ALetter), ('\u{2dc8}', '\u{2dce}', WC_ALetter),
+ ('\u{2dd0}', '\u{2dd6}', WC_ALetter), ('\u{2dd8}', '\u{2dde}', WC_ALetter), ('\u{2de0}',
+ '\u{2dff}', WC_Extend), ('\u{2e2f}', '\u{2e2f}', WC_ALetter), ('\u{3000}', '\u{3000}',
+ WC_WSegSpace), ('\u{3005}', '\u{3005}', WC_ALetter), ('\u{302a}', '\u{302f}', WC_Extend),
+ ('\u{3031}', '\u{3035}', WC_Katakana), ('\u{303b}', '\u{303c}', WC_ALetter), ('\u{3099}',
+ '\u{309a}', WC_Extend), ('\u{309b}', '\u{309c}', WC_Katakana), ('\u{30a0}', '\u{30fa}',
+ WC_Katakana), ('\u{30fc}', '\u{30ff}', WC_Katakana), ('\u{3105}', '\u{312f}', WC_ALetter),
+ ('\u{3131}', '\u{318e}', WC_ALetter), ('\u{31a0}', '\u{31bf}', WC_ALetter), ('\u{31f0}',
+ '\u{31ff}', WC_Katakana), ('\u{32d0}', '\u{32fe}', WC_Katakana), ('\u{3300}', '\u{3357}',
+ WC_Katakana), ('\u{a000}', '\u{a48c}', WC_ALetter), ('\u{a4d0}', '\u{a4fd}', WC_ALetter),
+ ('\u{a500}', '\u{a60c}', WC_ALetter), ('\u{a610}', '\u{a61f}', WC_ALetter), ('\u{a620}',
+ '\u{a629}', WC_Numeric), ('\u{a62a}', '\u{a62b}', WC_ALetter), ('\u{a640}', '\u{a66e}',
+ WC_ALetter), ('\u{a66f}', '\u{a672}', WC_Extend), ('\u{a674}', '\u{a67d}', WC_Extend),
+ ('\u{a67f}', '\u{a69d}', WC_ALetter), ('\u{a69e}', '\u{a69f}', WC_Extend), ('\u{a6a0}',
+ '\u{a6ef}', WC_ALetter), ('\u{a6f0}', '\u{a6f1}', WC_Extend), ('\u{a708}', '\u{a7ca}',
+ WC_ALetter), ('\u{a7d0}', '\u{a7d1}', WC_ALetter), ('\u{a7d3}', '\u{a7d3}', WC_ALetter),
+ ('\u{a7d5}', '\u{a7d9}', WC_ALetter), ('\u{a7f2}', '\u{a801}', WC_ALetter), ('\u{a802}',
+ '\u{a802}', WC_Extend), ('\u{a803}', '\u{a805}', WC_ALetter), ('\u{a806}', '\u{a806}',
+ WC_Extend), ('\u{a807}', '\u{a80a}', WC_ALetter), ('\u{a80b}', '\u{a80b}', WC_Extend),
+ ('\u{a80c}', '\u{a822}', WC_ALetter), ('\u{a823}', '\u{a827}', WC_Extend), ('\u{a82c}',
+ '\u{a82c}', WC_Extend), ('\u{a840}', '\u{a873}', WC_ALetter), ('\u{a880}', '\u{a881}',
+ WC_Extend), ('\u{a882}', '\u{a8b3}', WC_ALetter), ('\u{a8b4}', '\u{a8c5}', WC_Extend),
+ ('\u{a8d0}', '\u{a8d9}', WC_Numeric), ('\u{a8e0}', '\u{a8f1}', WC_Extend), ('\u{a8f2}',
+ '\u{a8f7}', WC_ALetter), ('\u{a8fb}', '\u{a8fb}', WC_ALetter), ('\u{a8fd}', '\u{a8fe}',
+ WC_ALetter), ('\u{a8ff}', '\u{a8ff}', WC_Extend), ('\u{a900}', '\u{a909}', WC_Numeric),
+ ('\u{a90a}', '\u{a925}', WC_ALetter), ('\u{a926}', '\u{a92d}', WC_Extend), ('\u{a930}',
+ '\u{a946}', WC_ALetter), ('\u{a947}', '\u{a953}', WC_Extend), ('\u{a960}', '\u{a97c}',
+ WC_ALetter), ('\u{a980}', '\u{a983}', WC_Extend), ('\u{a984}', '\u{a9b2}', WC_ALetter),
+ ('\u{a9b3}', '\u{a9c0}', WC_Extend), ('\u{a9cf}', '\u{a9cf}', WC_ALetter), ('\u{a9d0}',
+ '\u{a9d9}', WC_Numeric), ('\u{a9e5}', '\u{a9e5}', WC_Extend), ('\u{a9f0}', '\u{a9f9}',
+ WC_Numeric), ('\u{aa00}', '\u{aa28}', WC_ALetter), ('\u{aa29}', '\u{aa36}', WC_Extend),
+ ('\u{aa40}', '\u{aa42}', WC_ALetter), ('\u{aa43}', '\u{aa43}', WC_Extend), ('\u{aa44}',
+ '\u{aa4b}', WC_ALetter), ('\u{aa4c}', '\u{aa4d}', WC_Extend), ('\u{aa50}', '\u{aa59}',
+ WC_Numeric), ('\u{aa7b}', '\u{aa7d}', WC_Extend), ('\u{aab0}', '\u{aab0}', WC_Extend),
+ ('\u{aab2}', '\u{aab4}', WC_Extend), ('\u{aab7}', '\u{aab8}', WC_Extend), ('\u{aabe}',
+ '\u{aabf}', WC_Extend), ('\u{aac1}', '\u{aac1}', WC_Extend), ('\u{aae0}', '\u{aaea}',
+ WC_ALetter), ('\u{aaeb}', '\u{aaef}', WC_Extend), ('\u{aaf2}', '\u{aaf4}', WC_ALetter),
+ ('\u{aaf5}', '\u{aaf6}', WC_Extend), ('\u{ab01}', '\u{ab06}', WC_ALetter), ('\u{ab09}',
+ '\u{ab0e}', WC_ALetter), ('\u{ab11}', '\u{ab16}', WC_ALetter), ('\u{ab20}', '\u{ab26}',
+ WC_ALetter), ('\u{ab28}', '\u{ab2e}', WC_ALetter), ('\u{ab30}', '\u{ab69}', WC_ALetter),
+ ('\u{ab70}', '\u{abe2}', WC_ALetter), ('\u{abe3}', '\u{abea}', WC_Extend), ('\u{abec}',
+ '\u{abed}', WC_Extend), ('\u{abf0}', '\u{abf9}', WC_Numeric), ('\u{ac00}', '\u{d7a3}',
+ WC_ALetter), ('\u{d7b0}', '\u{d7c6}', WC_ALetter), ('\u{d7cb}', '\u{d7fb}', WC_ALetter),
+ ('\u{fb00}', '\u{fb06}', WC_ALetter), ('\u{fb13}', '\u{fb17}', WC_ALetter), ('\u{fb1d}',
+ '\u{fb1d}', WC_Hebrew_Letter), ('\u{fb1e}', '\u{fb1e}', WC_Extend), ('\u{fb1f}', '\u{fb28}',
+ WC_Hebrew_Letter), ('\u{fb2a}', '\u{fb36}', WC_Hebrew_Letter), ('\u{fb38}', '\u{fb3c}',
+ WC_Hebrew_Letter), ('\u{fb3e}', '\u{fb3e}', WC_Hebrew_Letter), ('\u{fb40}', '\u{fb41}',
+ WC_Hebrew_Letter), ('\u{fb43}', '\u{fb44}', WC_Hebrew_Letter), ('\u{fb46}', '\u{fb4f}',
+ WC_Hebrew_Letter), ('\u{fb50}', '\u{fbb1}', WC_ALetter), ('\u{fbd3}', '\u{fd3d}',
+ WC_ALetter), ('\u{fd50}', '\u{fd8f}', WC_ALetter), ('\u{fd92}', '\u{fdc7}', WC_ALetter),
+ ('\u{fdf0}', '\u{fdfb}', WC_ALetter), ('\u{fe00}', '\u{fe0f}', WC_Extend), ('\u{fe10}',
+ '\u{fe10}', WC_MidNum), ('\u{fe13}', '\u{fe13}', WC_MidLetter), ('\u{fe14}', '\u{fe14}',
+ WC_MidNum), ('\u{fe20}', '\u{fe2f}', WC_Extend), ('\u{fe33}', '\u{fe34}', WC_ExtendNumLet),
+ ('\u{fe4d}', '\u{fe4f}', WC_ExtendNumLet), ('\u{fe50}', '\u{fe50}', WC_MidNum), ('\u{fe52}',
+ '\u{fe52}', WC_MidNumLet), ('\u{fe54}', '\u{fe54}', WC_MidNum), ('\u{fe55}', '\u{fe55}',
+ WC_MidLetter), ('\u{fe70}', '\u{fe74}', WC_ALetter), ('\u{fe76}', '\u{fefc}', WC_ALetter),
+ ('\u{feff}', '\u{feff}', WC_Format), ('\u{ff07}', '\u{ff07}', WC_MidNumLet), ('\u{ff0c}',
+ '\u{ff0c}', WC_MidNum), ('\u{ff0e}', '\u{ff0e}', WC_MidNumLet), ('\u{ff10}', '\u{ff19}',
+ WC_Numeric), ('\u{ff1a}', '\u{ff1a}', WC_MidLetter), ('\u{ff1b}', '\u{ff1b}', WC_MidNum),
+ ('\u{ff21}', '\u{ff3a}', WC_ALetter), ('\u{ff3f}', '\u{ff3f}', WC_ExtendNumLet),
+ ('\u{ff41}', '\u{ff5a}', WC_ALetter), ('\u{ff66}', '\u{ff9d}', WC_Katakana), ('\u{ff9e}',
+ '\u{ff9f}', WC_Extend), ('\u{ffa0}', '\u{ffbe}', WC_ALetter), ('\u{ffc2}', '\u{ffc7}',
+ WC_ALetter), ('\u{ffca}', '\u{ffcf}', WC_ALetter), ('\u{ffd2}', '\u{ffd7}', WC_ALetter),
+ ('\u{ffda}', '\u{ffdc}', WC_ALetter), ('\u{fff9}', '\u{fffb}', WC_Format), ('\u{10000}',
+ '\u{1000b}', WC_ALetter), ('\u{1000d}', '\u{10026}', WC_ALetter), ('\u{10028}', '\u{1003a}',
+ WC_ALetter), ('\u{1003c}', '\u{1003d}', WC_ALetter), ('\u{1003f}', '\u{1004d}', WC_ALetter),
+ ('\u{10050}', '\u{1005d}', WC_ALetter), ('\u{10080}', '\u{100fa}', WC_ALetter),
+ ('\u{10140}', '\u{10174}', WC_ALetter), ('\u{101fd}', '\u{101fd}', WC_Extend), ('\u{10280}',
+ '\u{1029c}', WC_ALetter), ('\u{102a0}', '\u{102d0}', WC_ALetter), ('\u{102e0}', '\u{102e0}',
+ WC_Extend), ('\u{10300}', '\u{1031f}', WC_ALetter), ('\u{1032d}', '\u{1034a}', WC_ALetter),
+ ('\u{10350}', '\u{10375}', WC_ALetter), ('\u{10376}', '\u{1037a}', WC_Extend), ('\u{10380}',
+ '\u{1039d}', WC_ALetter), ('\u{103a0}', '\u{103c3}', WC_ALetter), ('\u{103c8}', '\u{103cf}',
+ WC_ALetter), ('\u{103d1}', '\u{103d5}', WC_ALetter), ('\u{10400}', '\u{1049d}', WC_ALetter),
+ ('\u{104a0}', '\u{104a9}', WC_Numeric), ('\u{104b0}', '\u{104d3}', WC_ALetter),
+ ('\u{104d8}', '\u{104fb}', WC_ALetter), ('\u{10500}', '\u{10527}', WC_ALetter),
+ ('\u{10530}', '\u{10563}', WC_ALetter), ('\u{10570}', '\u{1057a}', WC_ALetter),
+ ('\u{1057c}', '\u{1058a}', WC_ALetter), ('\u{1058c}', '\u{10592}', WC_ALetter),
+ ('\u{10594}', '\u{10595}', WC_ALetter), ('\u{10597}', '\u{105a1}', WC_ALetter),
+ ('\u{105a3}', '\u{105b1}', WC_ALetter), ('\u{105b3}', '\u{105b9}', WC_ALetter),
+ ('\u{105bb}', '\u{105bc}', WC_ALetter), ('\u{10600}', '\u{10736}', WC_ALetter),
+ ('\u{10740}', '\u{10755}', WC_ALetter), ('\u{10760}', '\u{10767}', WC_ALetter),
+ ('\u{10780}', '\u{10785}', WC_ALetter), ('\u{10787}', '\u{107b0}', WC_ALetter),
+ ('\u{107b2}', '\u{107ba}', WC_ALetter), ('\u{10800}', '\u{10805}', WC_ALetter),
+ ('\u{10808}', '\u{10808}', WC_ALetter), ('\u{1080a}', '\u{10835}', WC_ALetter),
+ ('\u{10837}', '\u{10838}', WC_ALetter), ('\u{1083c}', '\u{1083c}', WC_ALetter),
+ ('\u{1083f}', '\u{10855}', WC_ALetter), ('\u{10860}', '\u{10876}', WC_ALetter),
+ ('\u{10880}', '\u{1089e}', WC_ALetter), ('\u{108e0}', '\u{108f2}', WC_ALetter),
+ ('\u{108f4}', '\u{108f5}', WC_ALetter), ('\u{10900}', '\u{10915}', WC_ALetter),
+ ('\u{10920}', '\u{10939}', WC_ALetter), ('\u{10980}', '\u{109b7}', WC_ALetter),
+ ('\u{109be}', '\u{109bf}', WC_ALetter), ('\u{10a00}', '\u{10a00}', WC_ALetter),
+ ('\u{10a01}', '\u{10a03}', WC_Extend), ('\u{10a05}', '\u{10a06}', WC_Extend), ('\u{10a0c}',
+ '\u{10a0f}', WC_Extend), ('\u{10a10}', '\u{10a13}', WC_ALetter), ('\u{10a15}', '\u{10a17}',
+ WC_ALetter), ('\u{10a19}', '\u{10a35}', WC_ALetter), ('\u{10a38}', '\u{10a3a}', WC_Extend),
+ ('\u{10a3f}', '\u{10a3f}', WC_Extend), ('\u{10a60}', '\u{10a7c}', WC_ALetter), ('\u{10a80}',
+ '\u{10a9c}', WC_ALetter), ('\u{10ac0}', '\u{10ac7}', WC_ALetter), ('\u{10ac9}', '\u{10ae4}',
+ WC_ALetter), ('\u{10ae5}', '\u{10ae6}', WC_Extend), ('\u{10b00}', '\u{10b35}', WC_ALetter),
+ ('\u{10b40}', '\u{10b55}', WC_ALetter), ('\u{10b60}', '\u{10b72}', WC_ALetter),
+ ('\u{10b80}', '\u{10b91}', WC_ALetter), ('\u{10c00}', '\u{10c48}', WC_ALetter),
+ ('\u{10c80}', '\u{10cb2}', WC_ALetter), ('\u{10cc0}', '\u{10cf2}', WC_ALetter),
+ ('\u{10d00}', '\u{10d23}', WC_ALetter), ('\u{10d24}', '\u{10d27}', WC_Extend), ('\u{10d30}',
+ '\u{10d39}', WC_Numeric), ('\u{10e80}', '\u{10ea9}', WC_ALetter), ('\u{10eab}', '\u{10eac}',
+ WC_Extend), ('\u{10eb0}', '\u{10eb1}', WC_ALetter), ('\u{10efd}', '\u{10eff}', WC_Extend),
+ ('\u{10f00}', '\u{10f1c}', WC_ALetter), ('\u{10f27}', '\u{10f27}', WC_ALetter),
+ ('\u{10f30}', '\u{10f45}', WC_ALetter), ('\u{10f46}', '\u{10f50}', WC_Extend), ('\u{10f70}',
+ '\u{10f81}', WC_ALetter), ('\u{10f82}', '\u{10f85}', WC_Extend), ('\u{10fb0}', '\u{10fc4}',
+ WC_ALetter), ('\u{10fe0}', '\u{10ff6}', WC_ALetter), ('\u{11000}', '\u{11002}', WC_Extend),
+ ('\u{11003}', '\u{11037}', WC_ALetter), ('\u{11038}', '\u{11046}', WC_Extend), ('\u{11066}',
+ '\u{1106f}', WC_Numeric), ('\u{11070}', '\u{11070}', WC_Extend), ('\u{11071}', '\u{11072}',
+ WC_ALetter), ('\u{11073}', '\u{11074}', WC_Extend), ('\u{11075}', '\u{11075}', WC_ALetter),
+ ('\u{1107f}', '\u{11082}', WC_Extend), ('\u{11083}', '\u{110af}', WC_ALetter), ('\u{110b0}',
+ '\u{110ba}', WC_Extend), ('\u{110bd}', '\u{110bd}', WC_Format), ('\u{110c2}', '\u{110c2}',
+ WC_Extend), ('\u{110cd}', '\u{110cd}', WC_Format), ('\u{110d0}', '\u{110e8}', WC_ALetter),
+ ('\u{110f0}', '\u{110f9}', WC_Numeric), ('\u{11100}', '\u{11102}', WC_Extend), ('\u{11103}',
+ '\u{11126}', WC_ALetter), ('\u{11127}', '\u{11134}', WC_Extend), ('\u{11136}', '\u{1113f}',
+ WC_Numeric), ('\u{11144}', '\u{11144}', WC_ALetter), ('\u{11145}', '\u{11146}', WC_Extend),
+ ('\u{11147}', '\u{11147}', WC_ALetter), ('\u{11150}', '\u{11172}', WC_ALetter),
+ ('\u{11173}', '\u{11173}', WC_Extend), ('\u{11176}', '\u{11176}', WC_ALetter), ('\u{11180}',
+ '\u{11182}', WC_Extend), ('\u{11183}', '\u{111b2}', WC_ALetter), ('\u{111b3}', '\u{111c0}',
+ WC_Extend), ('\u{111c1}', '\u{111c4}', WC_ALetter), ('\u{111c9}', '\u{111cc}', WC_Extend),
+ ('\u{111ce}', '\u{111cf}', WC_Extend), ('\u{111d0}', '\u{111d9}', WC_Numeric), ('\u{111da}',
+ '\u{111da}', WC_ALetter), ('\u{111dc}', '\u{111dc}', WC_ALetter), ('\u{11200}', '\u{11211}',
+ WC_ALetter), ('\u{11213}', '\u{1122b}', WC_ALetter), ('\u{1122c}', '\u{11237}', WC_Extend),
+ ('\u{1123e}', '\u{1123e}', WC_Extend), ('\u{1123f}', '\u{11240}', WC_ALetter), ('\u{11241}',
+ '\u{11241}', WC_Extend), ('\u{11280}', '\u{11286}', WC_ALetter), ('\u{11288}', '\u{11288}',
+ WC_ALetter), ('\u{1128a}', '\u{1128d}', WC_ALetter), ('\u{1128f}', '\u{1129d}', WC_ALetter),
+ ('\u{1129f}', '\u{112a8}', WC_ALetter), ('\u{112b0}', '\u{112de}', WC_ALetter),
+ ('\u{112df}', '\u{112ea}', WC_Extend), ('\u{112f0}', '\u{112f9}', WC_Numeric), ('\u{11300}',
+ '\u{11303}', WC_Extend), ('\u{11305}', '\u{1130c}', WC_ALetter), ('\u{1130f}', '\u{11310}',
+ WC_ALetter), ('\u{11313}', '\u{11328}', WC_ALetter), ('\u{1132a}', '\u{11330}', WC_ALetter),
+ ('\u{11332}', '\u{11333}', WC_ALetter), ('\u{11335}', '\u{11339}', WC_ALetter),
+ ('\u{1133b}', '\u{1133c}', WC_Extend), ('\u{1133d}', '\u{1133d}', WC_ALetter), ('\u{1133e}',
+ '\u{11344}', WC_Extend), ('\u{11347}', '\u{11348}', WC_Extend), ('\u{1134b}', '\u{1134d}',
+ WC_Extend), ('\u{11350}', '\u{11350}', WC_ALetter), ('\u{11357}', '\u{11357}', WC_Extend),
+ ('\u{1135d}', '\u{11361}', WC_ALetter), ('\u{11362}', '\u{11363}', WC_Extend), ('\u{11366}',
+ '\u{1136c}', WC_Extend), ('\u{11370}', '\u{11374}', WC_Extend), ('\u{11400}', '\u{11434}',
+ WC_ALetter), ('\u{11435}', '\u{11446}', WC_Extend), ('\u{11447}', '\u{1144a}', WC_ALetter),
+ ('\u{11450}', '\u{11459}', WC_Numeric), ('\u{1145e}', '\u{1145e}', WC_Extend), ('\u{1145f}',
+ '\u{11461}', WC_ALetter), ('\u{11480}', '\u{114af}', WC_ALetter), ('\u{114b0}', '\u{114c3}',
+ WC_Extend), ('\u{114c4}', '\u{114c5}', WC_ALetter), ('\u{114c7}', '\u{114c7}', WC_ALetter),
+ ('\u{114d0}', '\u{114d9}', WC_Numeric), ('\u{11580}', '\u{115ae}', WC_ALetter),
+ ('\u{115af}', '\u{115b5}', WC_Extend), ('\u{115b8}', '\u{115c0}', WC_Extend), ('\u{115d8}',
+ '\u{115db}', WC_ALetter), ('\u{115dc}', '\u{115dd}', WC_Extend), ('\u{11600}', '\u{1162f}',
+ WC_ALetter), ('\u{11630}', '\u{11640}', WC_Extend), ('\u{11644}', '\u{11644}', WC_ALetter),
+ ('\u{11650}', '\u{11659}', WC_Numeric), ('\u{11680}', '\u{116aa}', WC_ALetter),
+ ('\u{116ab}', '\u{116b7}', WC_Extend), ('\u{116b8}', '\u{116b8}', WC_ALetter), ('\u{116c0}',
+ '\u{116c9}', WC_Numeric), ('\u{1171d}', '\u{1172b}', WC_Extend), ('\u{11730}', '\u{11739}',
+ WC_Numeric), ('\u{11800}', '\u{1182b}', WC_ALetter), ('\u{1182c}', '\u{1183a}', WC_Extend),
+ ('\u{118a0}', '\u{118df}', WC_ALetter), ('\u{118e0}', '\u{118e9}', WC_Numeric),
+ ('\u{118ff}', '\u{11906}', WC_ALetter), ('\u{11909}', '\u{11909}', WC_ALetter),
+ ('\u{1190c}', '\u{11913}', WC_ALetter), ('\u{11915}', '\u{11916}', WC_ALetter),
+ ('\u{11918}', '\u{1192f}', WC_ALetter), ('\u{11930}', '\u{11935}', WC_Extend), ('\u{11937}',
+ '\u{11938}', WC_Extend), ('\u{1193b}', '\u{1193e}', WC_Extend), ('\u{1193f}', '\u{1193f}',
+ WC_ALetter), ('\u{11940}', '\u{11940}', WC_Extend), ('\u{11941}', '\u{11941}', WC_ALetter),
+ ('\u{11942}', '\u{11943}', WC_Extend), ('\u{11950}', '\u{11959}', WC_Numeric), ('\u{119a0}',
+ '\u{119a7}', WC_ALetter), ('\u{119aa}', '\u{119d0}', WC_ALetter), ('\u{119d1}', '\u{119d7}',
+ WC_Extend), ('\u{119da}', '\u{119e0}', WC_Extend), ('\u{119e1}', '\u{119e1}', WC_ALetter),
+ ('\u{119e3}', '\u{119e3}', WC_ALetter), ('\u{119e4}', '\u{119e4}', WC_Extend), ('\u{11a00}',
+ '\u{11a00}', WC_ALetter), ('\u{11a01}', '\u{11a0a}', WC_Extend), ('\u{11a0b}', '\u{11a32}',
+ WC_ALetter), ('\u{11a33}', '\u{11a39}', WC_Extend), ('\u{11a3a}', '\u{11a3a}', WC_ALetter),
+ ('\u{11a3b}', '\u{11a3e}', WC_Extend), ('\u{11a47}', '\u{11a47}', WC_Extend), ('\u{11a50}',
+ '\u{11a50}', WC_ALetter), ('\u{11a51}', '\u{11a5b}', WC_Extend), ('\u{11a5c}', '\u{11a89}',
+ WC_ALetter), ('\u{11a8a}', '\u{11a99}', WC_Extend), ('\u{11a9d}', '\u{11a9d}', WC_ALetter),
+ ('\u{11ab0}', '\u{11af8}', WC_ALetter), ('\u{11c00}', '\u{11c08}', WC_ALetter),
+ ('\u{11c0a}', '\u{11c2e}', WC_ALetter), ('\u{11c2f}', '\u{11c36}', WC_Extend), ('\u{11c38}',
+ '\u{11c3f}', WC_Extend), ('\u{11c40}', '\u{11c40}', WC_ALetter), ('\u{11c50}', '\u{11c59}',
+ WC_Numeric), ('\u{11c72}', '\u{11c8f}', WC_ALetter), ('\u{11c92}', '\u{11ca7}', WC_Extend),
+ ('\u{11ca9}', '\u{11cb6}', WC_Extend), ('\u{11d00}', '\u{11d06}', WC_ALetter), ('\u{11d08}',
+ '\u{11d09}', WC_ALetter), ('\u{11d0b}', '\u{11d30}', WC_ALetter), ('\u{11d31}', '\u{11d36}',
+ WC_Extend), ('\u{11d3a}', '\u{11d3a}', WC_Extend), ('\u{11d3c}', '\u{11d3d}', WC_Extend),
+ ('\u{11d3f}', '\u{11d45}', WC_Extend), ('\u{11d46}', '\u{11d46}', WC_ALetter), ('\u{11d47}',
+ '\u{11d47}', WC_Extend), ('\u{11d50}', '\u{11d59}', WC_Numeric), ('\u{11d60}', '\u{11d65}',
+ WC_ALetter), ('\u{11d67}', '\u{11d68}', WC_ALetter), ('\u{11d6a}', '\u{11d89}', WC_ALetter),
+ ('\u{11d8a}', '\u{11d8e}', WC_Extend), ('\u{11d90}', '\u{11d91}', WC_Extend), ('\u{11d93}',
+ '\u{11d97}', WC_Extend), ('\u{11d98}', '\u{11d98}', WC_ALetter), ('\u{11da0}', '\u{11da9}',
+ WC_Numeric), ('\u{11ee0}', '\u{11ef2}', WC_ALetter), ('\u{11ef3}', '\u{11ef6}', WC_Extend),
+ ('\u{11f00}', '\u{11f01}', WC_Extend), ('\u{11f02}', '\u{11f02}', WC_ALetter), ('\u{11f03}',
+ '\u{11f03}', WC_Extend), ('\u{11f04}', '\u{11f10}', WC_ALetter), ('\u{11f12}', '\u{11f33}',
+ WC_ALetter), ('\u{11f34}', '\u{11f3a}', WC_Extend), ('\u{11f3e}', '\u{11f42}', WC_Extend),
+ ('\u{11f50}', '\u{11f59}', WC_Numeric), ('\u{11fb0}', '\u{11fb0}', WC_ALetter),
+ ('\u{12000}', '\u{12399}', WC_ALetter), ('\u{12400}', '\u{1246e}', WC_ALetter),
+ ('\u{12480}', '\u{12543}', WC_ALetter), ('\u{12f90}', '\u{12ff0}', WC_ALetter),
+ ('\u{13000}', '\u{1342f}', WC_ALetter), ('\u{13430}', '\u{1343f}', WC_Format), ('\u{13440}',
+ '\u{13440}', WC_Extend), ('\u{13441}', '\u{13446}', WC_ALetter), ('\u{13447}', '\u{13455}',
+ WC_Extend), ('\u{14400}', '\u{14646}', WC_ALetter), ('\u{16800}', '\u{16a38}', WC_ALetter),
+ ('\u{16a40}', '\u{16a5e}', WC_ALetter), ('\u{16a60}', '\u{16a69}', WC_Numeric),
+ ('\u{16a70}', '\u{16abe}', WC_ALetter), ('\u{16ac0}', '\u{16ac9}', WC_Numeric),
+ ('\u{16ad0}', '\u{16aed}', WC_ALetter), ('\u{16af0}', '\u{16af4}', WC_Extend), ('\u{16b00}',
+ '\u{16b2f}', WC_ALetter), ('\u{16b30}', '\u{16b36}', WC_Extend), ('\u{16b40}', '\u{16b43}',
+ WC_ALetter), ('\u{16b50}', '\u{16b59}', WC_Numeric), ('\u{16b63}', '\u{16b77}', WC_ALetter),
+ ('\u{16b7d}', '\u{16b8f}', WC_ALetter), ('\u{16e40}', '\u{16e7f}', WC_ALetter),
+ ('\u{16f00}', '\u{16f4a}', WC_ALetter), ('\u{16f4f}', '\u{16f4f}', WC_Extend), ('\u{16f50}',
+ '\u{16f50}', WC_ALetter), ('\u{16f51}', '\u{16f87}', WC_Extend), ('\u{16f8f}', '\u{16f92}',
+ WC_Extend), ('\u{16f93}', '\u{16f9f}', WC_ALetter), ('\u{16fe0}', '\u{16fe1}', WC_ALetter),
+ ('\u{16fe3}', '\u{16fe3}', WC_ALetter), ('\u{16fe4}', '\u{16fe4}', WC_Extend), ('\u{16ff0}',
+ '\u{16ff1}', WC_Extend), ('\u{1aff0}', '\u{1aff3}', WC_Katakana), ('\u{1aff5}', '\u{1affb}',
+ WC_Katakana), ('\u{1affd}', '\u{1affe}', WC_Katakana), ('\u{1b000}', '\u{1b000}',
+ WC_Katakana), ('\u{1b120}', '\u{1b122}', WC_Katakana), ('\u{1b155}', '\u{1b155}',
+ WC_Katakana), ('\u{1b164}', '\u{1b167}', WC_Katakana), ('\u{1bc00}', '\u{1bc6a}',
+ WC_ALetter), ('\u{1bc70}', '\u{1bc7c}', WC_ALetter), ('\u{1bc80}', '\u{1bc88}', WC_ALetter),
+ ('\u{1bc90}', '\u{1bc99}', WC_ALetter), ('\u{1bc9d}', '\u{1bc9e}', WC_Extend), ('\u{1bca0}',
+ '\u{1bca3}', WC_Format), ('\u{1cf00}', '\u{1cf2d}', WC_Extend), ('\u{1cf30}', '\u{1cf46}',
+ WC_Extend), ('\u{1d165}', '\u{1d169}', WC_Extend), ('\u{1d16d}', '\u{1d172}', WC_Extend),
+ ('\u{1d173}', '\u{1d17a}', WC_Format), ('\u{1d17b}', '\u{1d182}', WC_Extend), ('\u{1d185}',
+ '\u{1d18b}', WC_Extend), ('\u{1d1aa}', '\u{1d1ad}', WC_Extend), ('\u{1d242}', '\u{1d244}',
+ WC_Extend), ('\u{1d400}', '\u{1d454}', WC_ALetter), ('\u{1d456}', '\u{1d49c}', WC_ALetter),
+ ('\u{1d49e}', '\u{1d49f}', WC_ALetter), ('\u{1d4a2}', '\u{1d4a2}', WC_ALetter),
+ ('\u{1d4a5}', '\u{1d4a6}', WC_ALetter), ('\u{1d4a9}', '\u{1d4ac}', WC_ALetter),
+ ('\u{1d4ae}', '\u{1d4b9}', WC_ALetter), ('\u{1d4bb}', '\u{1d4bb}', WC_ALetter),
+ ('\u{1d4bd}', '\u{1d4c3}', WC_ALetter), ('\u{1d4c5}', '\u{1d505}', WC_ALetter),
+ ('\u{1d507}', '\u{1d50a}', WC_ALetter), ('\u{1d50d}', '\u{1d514}', WC_ALetter),
+ ('\u{1d516}', '\u{1d51c}', WC_ALetter), ('\u{1d51e}', '\u{1d539}', WC_ALetter),
+ ('\u{1d53b}', '\u{1d53e}', WC_ALetter), ('\u{1d540}', '\u{1d544}', WC_ALetter),
+ ('\u{1d546}', '\u{1d546}', WC_ALetter), ('\u{1d54a}', '\u{1d550}', WC_ALetter),
+ ('\u{1d552}', '\u{1d6a5}', WC_ALetter), ('\u{1d6a8}', '\u{1d6c0}', WC_ALetter),
+ ('\u{1d6c2}', '\u{1d6da}', WC_ALetter), ('\u{1d6dc}', '\u{1d6fa}', WC_ALetter),
+ ('\u{1d6fc}', '\u{1d714}', WC_ALetter), ('\u{1d716}', '\u{1d734}', WC_ALetter),
+ ('\u{1d736}', '\u{1d74e}', WC_ALetter), ('\u{1d750}', '\u{1d76e}', WC_ALetter),
+ ('\u{1d770}', '\u{1d788}', WC_ALetter), ('\u{1d78a}', '\u{1d7a8}', WC_ALetter),
+ ('\u{1d7aa}', '\u{1d7c2}', WC_ALetter), ('\u{1d7c4}', '\u{1d7cb}', WC_ALetter),
+ ('\u{1d7ce}', '\u{1d7ff}', WC_Numeric), ('\u{1da00}', '\u{1da36}', WC_Extend), ('\u{1da3b}',
+ '\u{1da6c}', WC_Extend), ('\u{1da75}', '\u{1da75}', WC_Extend), ('\u{1da84}', '\u{1da84}',
+ WC_Extend), ('\u{1da9b}', '\u{1da9f}', WC_Extend), ('\u{1daa1}', '\u{1daaf}', WC_Extend),
+ ('\u{1df00}', '\u{1df1e}', WC_ALetter), ('\u{1df25}', '\u{1df2a}', WC_ALetter),
+ ('\u{1e000}', '\u{1e006}', WC_Extend), ('\u{1e008}', '\u{1e018}', WC_Extend), ('\u{1e01b}',
+ '\u{1e021}', WC_Extend), ('\u{1e023}', '\u{1e024}', WC_Extend), ('\u{1e026}', '\u{1e02a}',
+ WC_Extend), ('\u{1e030}', '\u{1e06d}', WC_ALetter), ('\u{1e08f}', '\u{1e08f}', WC_Extend),
+ ('\u{1e100}', '\u{1e12c}', WC_ALetter), ('\u{1e130}', '\u{1e136}', WC_Extend), ('\u{1e137}',
+ '\u{1e13d}', WC_ALetter), ('\u{1e140}', '\u{1e149}', WC_Numeric), ('\u{1e14e}', '\u{1e14e}',
+ WC_ALetter), ('\u{1e290}', '\u{1e2ad}', WC_ALetter), ('\u{1e2ae}', '\u{1e2ae}', WC_Extend),
+ ('\u{1e2c0}', '\u{1e2eb}', WC_ALetter), ('\u{1e2ec}', '\u{1e2ef}', WC_Extend), ('\u{1e2f0}',
+ '\u{1e2f9}', WC_Numeric), ('\u{1e4d0}', '\u{1e4eb}', WC_ALetter), ('\u{1e4ec}', '\u{1e4ef}',
+ WC_Extend), ('\u{1e4f0}', '\u{1e4f9}', WC_Numeric), ('\u{1e7e0}', '\u{1e7e6}', WC_ALetter),
+ ('\u{1e7e8}', '\u{1e7eb}', WC_ALetter), ('\u{1e7ed}', '\u{1e7ee}', WC_ALetter),
+ ('\u{1e7f0}', '\u{1e7fe}', WC_ALetter), ('\u{1e800}', '\u{1e8c4}', WC_ALetter),
+ ('\u{1e8d0}', '\u{1e8d6}', WC_Extend), ('\u{1e900}', '\u{1e943}', WC_ALetter), ('\u{1e944}',
+ '\u{1e94a}', WC_Extend), ('\u{1e94b}', '\u{1e94b}', WC_ALetter), ('\u{1e950}', '\u{1e959}',
+ WC_Numeric), ('\u{1ee00}', '\u{1ee03}', WC_ALetter), ('\u{1ee05}', '\u{1ee1f}', WC_ALetter),
+ ('\u{1ee21}', '\u{1ee22}', WC_ALetter), ('\u{1ee24}', '\u{1ee24}', WC_ALetter),
+ ('\u{1ee27}', '\u{1ee27}', WC_ALetter), ('\u{1ee29}', '\u{1ee32}', WC_ALetter),
+ ('\u{1ee34}', '\u{1ee37}', WC_ALetter), ('\u{1ee39}', '\u{1ee39}', WC_ALetter),
+ ('\u{1ee3b}', '\u{1ee3b}', WC_ALetter), ('\u{1ee42}', '\u{1ee42}', WC_ALetter),
+ ('\u{1ee47}', '\u{1ee47}', WC_ALetter), ('\u{1ee49}', '\u{1ee49}', WC_ALetter),
+ ('\u{1ee4b}', '\u{1ee4b}', WC_ALetter), ('\u{1ee4d}', '\u{1ee4f}', WC_ALetter),
+ ('\u{1ee51}', '\u{1ee52}', WC_ALetter), ('\u{1ee54}', '\u{1ee54}', WC_ALetter),
+ ('\u{1ee57}', '\u{1ee57}', WC_ALetter), ('\u{1ee59}', '\u{1ee59}', WC_ALetter),
+ ('\u{1ee5b}', '\u{1ee5b}', WC_ALetter), ('\u{1ee5d}', '\u{1ee5d}', WC_ALetter),
+ ('\u{1ee5f}', '\u{1ee5f}', WC_ALetter), ('\u{1ee61}', '\u{1ee62}', WC_ALetter),
+ ('\u{1ee64}', '\u{1ee64}', WC_ALetter), ('\u{1ee67}', '\u{1ee6a}', WC_ALetter),
+ ('\u{1ee6c}', '\u{1ee72}', WC_ALetter), ('\u{1ee74}', '\u{1ee77}', WC_ALetter),
+ ('\u{1ee79}', '\u{1ee7c}', WC_ALetter), ('\u{1ee7e}', '\u{1ee7e}', WC_ALetter),
+ ('\u{1ee80}', '\u{1ee89}', WC_ALetter), ('\u{1ee8b}', '\u{1ee9b}', WC_ALetter),
+ ('\u{1eea1}', '\u{1eea3}', WC_ALetter), ('\u{1eea5}', '\u{1eea9}', WC_ALetter),
+ ('\u{1eeab}', '\u{1eebb}', WC_ALetter), ('\u{1f130}', '\u{1f149}', WC_ALetter),
+ ('\u{1f150}', '\u{1f169}', WC_ALetter), ('\u{1f170}', '\u{1f189}', WC_ALetter),
+ ('\u{1f1e6}', '\u{1f1ff}', WC_Regional_Indicator), ('\u{1f3fb}', '\u{1f3ff}', WC_Extend),
+ ('\u{1fbf0}', '\u{1fbf9}', WC_Numeric), ('\u{e0001}', '\u{e0001}', WC_Format), ('\u{e0020}',
+ '\u{e007f}', WC_Extend), ('\u{e0100}', '\u{e01ef}', WC_Extend)
+ ];
+
+}
+pub mod emoji {
+ use core::result::Result::{Ok, Err};
+
+ pub use self::EmojiCat::*;
+
+ #[allow(non_camel_case_types)]
+ #[derive(Clone, Copy, PartialEq, Eq, Debug)]
+ pub enum EmojiCat {
+ EC_Any,
+ EC_Extended_Pictographic,
+ }
+
+ fn bsearch_range_value_table(c: char, r: &'static [(char, char, EmojiCat)]) -> (u32, u32, EmojiCat) {
+ use core::cmp::Ordering::{Equal, Less, Greater};
+ match r.binary_search_by(|&(lo, hi, _)| {
+ if lo <= c && c <= hi { Equal }
+ else if hi < c { Less }
+ else { Greater }
+ }) {
+ Ok(idx) => {
+ let (lower, upper, cat) = r[idx];
+ (lower as u32, upper as u32, cat)
+ }
+ Err(idx) => {
+ (
+ if idx > 0 { r[idx-1].1 as u32 + 1 } else { 0 },
+ r.get(idx).map(|c|c.0 as u32 - 1).unwrap_or(core::u32::MAX),
+ EC_Any,
+ )
+ }
+ }
+ }
+
+ pub fn emoji_category(c: char) -> (u32, u32, EmojiCat) {
+ bsearch_range_value_table(c, emoji_cat_table)
+ }
+
+ const emoji_cat_table: &'static [(char, char, EmojiCat)] = &[
+ ('\u{a9}', '\u{a9}', EC_Extended_Pictographic), ('\u{ae}', '\u{ae}',
+ EC_Extended_Pictographic), ('\u{203c}', '\u{203c}', EC_Extended_Pictographic), ('\u{2049}',
+ '\u{2049}', EC_Extended_Pictographic), ('\u{2122}', '\u{2122}', EC_Extended_Pictographic),
+ ('\u{2139}', '\u{2139}', EC_Extended_Pictographic), ('\u{2194}', '\u{2199}',
+ EC_Extended_Pictographic), ('\u{21a9}', '\u{21aa}', EC_Extended_Pictographic), ('\u{231a}',
+ '\u{231b}', EC_Extended_Pictographic), ('\u{2328}', '\u{2328}', EC_Extended_Pictographic),
+ ('\u{2388}', '\u{2388}', EC_Extended_Pictographic), ('\u{23cf}', '\u{23cf}',
+ EC_Extended_Pictographic), ('\u{23e9}', '\u{23f3}', EC_Extended_Pictographic), ('\u{23f8}',
+ '\u{23fa}', EC_Extended_Pictographic), ('\u{24c2}', '\u{24c2}', EC_Extended_Pictographic),
+ ('\u{25aa}', '\u{25ab}', EC_Extended_Pictographic), ('\u{25b6}', '\u{25b6}',
+ EC_Extended_Pictographic), ('\u{25c0}', '\u{25c0}', EC_Extended_Pictographic), ('\u{25fb}',
+ '\u{25fe}', EC_Extended_Pictographic), ('\u{2600}', '\u{2605}', EC_Extended_Pictographic),
+ ('\u{2607}', '\u{2612}', EC_Extended_Pictographic), ('\u{2614}', '\u{2685}',
+ EC_Extended_Pictographic), ('\u{2690}', '\u{2705}', EC_Extended_Pictographic), ('\u{2708}',
+ '\u{2712}', EC_Extended_Pictographic), ('\u{2714}', '\u{2714}', EC_Extended_Pictographic),
+ ('\u{2716}', '\u{2716}', EC_Extended_Pictographic), ('\u{271d}', '\u{271d}',
+ EC_Extended_Pictographic), ('\u{2721}', '\u{2721}', EC_Extended_Pictographic), ('\u{2728}',
+ '\u{2728}', EC_Extended_Pictographic), ('\u{2733}', '\u{2734}', EC_Extended_Pictographic),
+ ('\u{2744}', '\u{2744}', EC_Extended_Pictographic), ('\u{2747}', '\u{2747}',
+ EC_Extended_Pictographic), ('\u{274c}', '\u{274c}', EC_Extended_Pictographic), ('\u{274e}',
+ '\u{274e}', EC_Extended_Pictographic), ('\u{2753}', '\u{2755}', EC_Extended_Pictographic),
+ ('\u{2757}', '\u{2757}', EC_Extended_Pictographic), ('\u{2763}', '\u{2767}',
+ EC_Extended_Pictographic), ('\u{2795}', '\u{2797}', EC_Extended_Pictographic), ('\u{27a1}',
+ '\u{27a1}', EC_Extended_Pictographic), ('\u{27b0}', '\u{27b0}', EC_Extended_Pictographic),
+ ('\u{27bf}', '\u{27bf}', EC_Extended_Pictographic), ('\u{2934}', '\u{2935}',
+ EC_Extended_Pictographic), ('\u{2b05}', '\u{2b07}', EC_Extended_Pictographic), ('\u{2b1b}',
+ '\u{2b1c}', EC_Extended_Pictographic), ('\u{2b50}', '\u{2b50}', EC_Extended_Pictographic),
+ ('\u{2b55}', '\u{2b55}', EC_Extended_Pictographic), ('\u{3030}', '\u{3030}',
+ EC_Extended_Pictographic), ('\u{303d}', '\u{303d}', EC_Extended_Pictographic), ('\u{3297}',
+ '\u{3297}', EC_Extended_Pictographic), ('\u{3299}', '\u{3299}', EC_Extended_Pictographic),
+ ('\u{1f000}', '\u{1f0ff}', EC_Extended_Pictographic), ('\u{1f10d}', '\u{1f10f}',
+ EC_Extended_Pictographic), ('\u{1f12f}', '\u{1f12f}', EC_Extended_Pictographic),
+ ('\u{1f16c}', '\u{1f171}', EC_Extended_Pictographic), ('\u{1f17e}', '\u{1f17f}',
+ EC_Extended_Pictographic), ('\u{1f18e}', '\u{1f18e}', EC_Extended_Pictographic),
+ ('\u{1f191}', '\u{1f19a}', EC_Extended_Pictographic), ('\u{1f1ad}', '\u{1f1e5}',
+ EC_Extended_Pictographic), ('\u{1f201}', '\u{1f20f}', EC_Extended_Pictographic),
+ ('\u{1f21a}', '\u{1f21a}', EC_Extended_Pictographic), ('\u{1f22f}', '\u{1f22f}',
+ EC_Extended_Pictographic), ('\u{1f232}', '\u{1f23a}', EC_Extended_Pictographic),
+ ('\u{1f23c}', '\u{1f23f}', EC_Extended_Pictographic), ('\u{1f249}', '\u{1f3fa}',
+ EC_Extended_Pictographic), ('\u{1f400}', '\u{1f53d}', EC_Extended_Pictographic),
+ ('\u{1f546}', '\u{1f64f}', EC_Extended_Pictographic), ('\u{1f680}', '\u{1f6ff}',
+ EC_Extended_Pictographic), ('\u{1f774}', '\u{1f77f}', EC_Extended_Pictographic),
+ ('\u{1f7d5}', '\u{1f7ff}', EC_Extended_Pictographic), ('\u{1f80c}', '\u{1f80f}',
+ EC_Extended_Pictographic), ('\u{1f848}', '\u{1f84f}', EC_Extended_Pictographic),
+ ('\u{1f85a}', '\u{1f85f}', EC_Extended_Pictographic), ('\u{1f888}', '\u{1f88f}',
+ EC_Extended_Pictographic), ('\u{1f8ae}', '\u{1f8ff}', EC_Extended_Pictographic),
+ ('\u{1f90c}', '\u{1f93a}', EC_Extended_Pictographic), ('\u{1f93c}', '\u{1f945}',
+ EC_Extended_Pictographic), ('\u{1f947}', '\u{1faff}', EC_Extended_Pictographic),
+ ('\u{1fc00}', '\u{1fffd}', EC_Extended_Pictographic)
+ ];
+
+}
+pub mod sentence {
+ use core::result::Result::{Ok, Err};
+
+ pub use self::SentenceCat::*;
+
+ #[allow(non_camel_case_types)]
+ #[derive(Clone, Copy, PartialEq, Eq, Debug)]
+ pub enum SentenceCat {
+ SC_ATerm,
+ SC_Any,
+ SC_CR,
+ SC_Close,
+ SC_Extend,
+ SC_Format,
+ SC_LF,
+ SC_Lower,
+ SC_Numeric,
+ SC_OLetter,
+ SC_SContinue,
+ SC_STerm,
+ SC_Sep,
+ SC_Sp,
+ SC_Upper,
+ }
+
+ fn bsearch_range_value_table(c: char, r: &'static [(char, char, SentenceCat)]) -> (u32, u32, SentenceCat) {
+ use core::cmp::Ordering::{Equal, Less, Greater};
+ match r.binary_search_by(|&(lo, hi, _)| {
+ if lo <= c && c <= hi { Equal }
+ else if hi < c { Less }
+ else { Greater }
+ }) {
+ Ok(idx) => {
+ let (lower, upper, cat) = r[idx];
+ (lower as u32, upper as u32, cat)
+ }
+ Err(idx) => {
+ (
+ if idx > 0 { r[idx-1].1 as u32 + 1 } else { 0 },
+ r.get(idx).map(|c|c.0 as u32 - 1).unwrap_or(core::u32::MAX),
+ SC_Any,
+ )
+ }
+ }
+ }
+
+ pub fn sentence_category(c: char) -> (u32, u32, SentenceCat) {
+ bsearch_range_value_table(c, sentence_cat_table)
+ }
+
+ const sentence_cat_table: &'static [(char, char, SentenceCat)] = &[
+ ('\u{9}', '\u{9}', SC_Sp), ('\u{a}', '\u{a}', SC_LF), ('\u{b}', '\u{c}', SC_Sp), ('\u{d}',
+ '\u{d}', SC_CR), ('\u{20}', '\u{20}', SC_Sp), ('\u{21}', '\u{21}', SC_STerm), ('\u{22}',
+ '\u{22}', SC_Close), ('\u{27}', '\u{29}', SC_Close), ('\u{2c}', '\u{2d}', SC_SContinue),
+ ('\u{2e}', '\u{2e}', SC_ATerm), ('\u{30}', '\u{39}', SC_Numeric), ('\u{3a}', '\u{3a}',
+ SC_SContinue), ('\u{3f}', '\u{3f}', SC_STerm), ('\u{41}', '\u{5a}', SC_Upper), ('\u{5b}',
+ '\u{5b}', SC_Close), ('\u{5d}', '\u{5d}', SC_Close), ('\u{61}', '\u{7a}', SC_Lower),
+ ('\u{7b}', '\u{7b}', SC_Close), ('\u{7d}', '\u{7d}', SC_Close), ('\u{85}', '\u{85}',
+ SC_Sep), ('\u{a0}', '\u{a0}', SC_Sp), ('\u{aa}', '\u{aa}', SC_Lower), ('\u{ab}', '\u{ab}',
+ SC_Close), ('\u{ad}', '\u{ad}', SC_Format), ('\u{b5}', '\u{b5}', SC_Lower), ('\u{ba}',
+ '\u{ba}', SC_Lower), ('\u{bb}', '\u{bb}', SC_Close), ('\u{c0}', '\u{d6}', SC_Upper),
+ ('\u{d8}', '\u{de}', SC_Upper), ('\u{df}', '\u{f6}', SC_Lower), ('\u{f8}', '\u{ff}',
+ SC_Lower), ('\u{100}', '\u{100}', SC_Upper), ('\u{101}', '\u{101}', SC_Lower), ('\u{102}',
+ '\u{102}', SC_Upper), ('\u{103}', '\u{103}', SC_Lower), ('\u{104}', '\u{104}', SC_Upper),
+ ('\u{105}', '\u{105}', SC_Lower), ('\u{106}', '\u{106}', SC_Upper), ('\u{107}', '\u{107}',
+ SC_Lower), ('\u{108}', '\u{108}', SC_Upper), ('\u{109}', '\u{109}', SC_Lower), ('\u{10a}',
+ '\u{10a}', SC_Upper), ('\u{10b}', '\u{10b}', SC_Lower), ('\u{10c}', '\u{10c}', SC_Upper),
+ ('\u{10d}', '\u{10d}', SC_Lower), ('\u{10e}', '\u{10e}', SC_Upper), ('\u{10f}', '\u{10f}',
+ SC_Lower), ('\u{110}', '\u{110}', SC_Upper), ('\u{111}', '\u{111}', SC_Lower), ('\u{112}',
+ '\u{112}', SC_Upper), ('\u{113}', '\u{113}', SC_Lower), ('\u{114}', '\u{114}', SC_Upper),
+ ('\u{115}', '\u{115}', SC_Lower), ('\u{116}', '\u{116}', SC_Upper), ('\u{117}', '\u{117}',
+ SC_Lower), ('\u{118}', '\u{118}', SC_Upper), ('\u{119}', '\u{119}', SC_Lower), ('\u{11a}',
+ '\u{11a}', SC_Upper), ('\u{11b}', '\u{11b}', SC_Lower), ('\u{11c}', '\u{11c}', SC_Upper),
+ ('\u{11d}', '\u{11d}', SC_Lower), ('\u{11e}', '\u{11e}', SC_Upper), ('\u{11f}', '\u{11f}',
+ SC_Lower), ('\u{120}', '\u{120}', SC_Upper), ('\u{121}', '\u{121}', SC_Lower), ('\u{122}',
+ '\u{122}', SC_Upper), ('\u{123}', '\u{123}', SC_Lower), ('\u{124}', '\u{124}', SC_Upper),
+ ('\u{125}', '\u{125}', SC_Lower), ('\u{126}', '\u{126}', SC_Upper), ('\u{127}', '\u{127}',
+ SC_Lower), ('\u{128}', '\u{128}', SC_Upper), ('\u{129}', '\u{129}', SC_Lower), ('\u{12a}',
+ '\u{12a}', SC_Upper), ('\u{12b}', '\u{12b}', SC_Lower), ('\u{12c}', '\u{12c}', SC_Upper),
+ ('\u{12d}', '\u{12d}', SC_Lower), ('\u{12e}', '\u{12e}', SC_Upper), ('\u{12f}', '\u{12f}',
+ SC_Lower), ('\u{130}', '\u{130}', SC_Upper), ('\u{131}', '\u{131}', SC_Lower), ('\u{132}',
+ '\u{132}', SC_Upper), ('\u{133}', '\u{133}', SC_Lower), ('\u{134}', '\u{134}', SC_Upper),
+ ('\u{135}', '\u{135}', SC_Lower), ('\u{136}', '\u{136}', SC_Upper), ('\u{137}', '\u{138}',
+ SC_Lower), ('\u{139}', '\u{139}', SC_Upper), ('\u{13a}', '\u{13a}', SC_Lower), ('\u{13b}',
+ '\u{13b}', SC_Upper), ('\u{13c}', '\u{13c}', SC_Lower), ('\u{13d}', '\u{13d}', SC_Upper),
+ ('\u{13e}', '\u{13e}', SC_Lower), ('\u{13f}', '\u{13f}', SC_Upper), ('\u{140}', '\u{140}',
+ SC_Lower), ('\u{141}', '\u{141}', SC_Upper), ('\u{142}', '\u{142}', SC_Lower), ('\u{143}',
+ '\u{143}', SC_Upper), ('\u{144}', '\u{144}', SC_Lower), ('\u{145}', '\u{145}', SC_Upper),
+ ('\u{146}', '\u{146}', SC_Lower), ('\u{147}', '\u{147}', SC_Upper), ('\u{148}', '\u{149}',
+ SC_Lower), ('\u{14a}', '\u{14a}', SC_Upper), ('\u{14b}', '\u{14b}', SC_Lower), ('\u{14c}',
+ '\u{14c}', SC_Upper), ('\u{14d}', '\u{14d}', SC_Lower), ('\u{14e}', '\u{14e}', SC_Upper),
+ ('\u{14f}', '\u{14f}', SC_Lower), ('\u{150}', '\u{150}', SC_Upper), ('\u{151}', '\u{151}',
+ SC_Lower), ('\u{152}', '\u{152}', SC_Upper), ('\u{153}', '\u{153}', SC_Lower), ('\u{154}',
+ '\u{154}', SC_Upper), ('\u{155}', '\u{155}', SC_Lower), ('\u{156}', '\u{156}', SC_Upper),
+ ('\u{157}', '\u{157}', SC_Lower), ('\u{158}', '\u{158}', SC_Upper), ('\u{159}', '\u{159}',
+ SC_Lower), ('\u{15a}', '\u{15a}', SC_Upper), ('\u{15b}', '\u{15b}', SC_Lower), ('\u{15c}',
+ '\u{15c}', SC_Upper), ('\u{15d}', '\u{15d}', SC_Lower), ('\u{15e}', '\u{15e}', SC_Upper),
+ ('\u{15f}', '\u{15f}', SC_Lower), ('\u{160}', '\u{160}', SC_Upper), ('\u{161}', '\u{161}',
+ SC_Lower), ('\u{162}', '\u{162}', SC_Upper), ('\u{163}', '\u{163}', SC_Lower), ('\u{164}',
+ '\u{164}', SC_Upper), ('\u{165}', '\u{165}', SC_Lower), ('\u{166}', '\u{166}', SC_Upper),
+ ('\u{167}', '\u{167}', SC_Lower), ('\u{168}', '\u{168}', SC_Upper), ('\u{169}', '\u{169}',
+ SC_Lower), ('\u{16a}', '\u{16a}', SC_Upper), ('\u{16b}', '\u{16b}', SC_Lower), ('\u{16c}',
+ '\u{16c}', SC_Upper), ('\u{16d}', '\u{16d}', SC_Lower), ('\u{16e}', '\u{16e}', SC_Upper),
+ ('\u{16f}', '\u{16f}', SC_Lower), ('\u{170}', '\u{170}', SC_Upper), ('\u{171}', '\u{171}',
+ SC_Lower), ('\u{172}', '\u{172}', SC_Upper), ('\u{173}', '\u{173}', SC_Lower), ('\u{174}',
+ '\u{174}', SC_Upper), ('\u{175}', '\u{175}', SC_Lower), ('\u{176}', '\u{176}', SC_Upper),
+ ('\u{177}', '\u{177}', SC_Lower), ('\u{178}', '\u{179}', SC_Upper), ('\u{17a}', '\u{17a}',
+ SC_Lower), ('\u{17b}', '\u{17b}', SC_Upper), ('\u{17c}', '\u{17c}', SC_Lower), ('\u{17d}',
+ '\u{17d}', SC_Upper), ('\u{17e}', '\u{180}', SC_Lower), ('\u{181}', '\u{182}', SC_Upper),
+ ('\u{183}', '\u{183}', SC_Lower), ('\u{184}', '\u{184}', SC_Upper), ('\u{185}', '\u{185}',
+ SC_Lower), ('\u{186}', '\u{187}', SC_Upper), ('\u{188}', '\u{188}', SC_Lower), ('\u{189}',
+ '\u{18b}', SC_Upper), ('\u{18c}', '\u{18d}', SC_Lower), ('\u{18e}', '\u{191}', SC_Upper),
+ ('\u{192}', '\u{192}', SC_Lower), ('\u{193}', '\u{194}', SC_Upper), ('\u{195}', '\u{195}',
+ SC_Lower), ('\u{196}', '\u{198}', SC_Upper), ('\u{199}', '\u{19b}', SC_Lower), ('\u{19c}',
+ '\u{19d}', SC_Upper), ('\u{19e}', '\u{19e}', SC_Lower), ('\u{19f}', '\u{1a0}', SC_Upper),
+ ('\u{1a1}', '\u{1a1}', SC_Lower), ('\u{1a2}', '\u{1a2}', SC_Upper), ('\u{1a3}', '\u{1a3}',
+ SC_Lower), ('\u{1a4}', '\u{1a4}', SC_Upper), ('\u{1a5}', '\u{1a5}', SC_Lower), ('\u{1a6}',
+ '\u{1a7}', SC_Upper), ('\u{1a8}', '\u{1a8}', SC_Lower), ('\u{1a9}', '\u{1a9}', SC_Upper),
+ ('\u{1aa}', '\u{1ab}', SC_Lower), ('\u{1ac}', '\u{1ac}', SC_Upper), ('\u{1ad}', '\u{1ad}',
+ SC_Lower), ('\u{1ae}', '\u{1af}', SC_Upper), ('\u{1b0}', '\u{1b0}', SC_Lower), ('\u{1b1}',
+ '\u{1b3}', SC_Upper), ('\u{1b4}', '\u{1b4}', SC_Lower), ('\u{1b5}', '\u{1b5}', SC_Upper),
+ ('\u{1b6}', '\u{1b6}', SC_Lower), ('\u{1b7}', '\u{1b8}', SC_Upper), ('\u{1b9}', '\u{1ba}',
+ SC_Lower), ('\u{1bb}', '\u{1bb}', SC_OLetter), ('\u{1bc}', '\u{1bc}', SC_Upper), ('\u{1bd}',
+ '\u{1bf}', SC_Lower), ('\u{1c0}', '\u{1c3}', SC_OLetter), ('\u{1c4}', '\u{1c5}', SC_Upper),
+ ('\u{1c6}', '\u{1c6}', SC_Lower), ('\u{1c7}', '\u{1c8}', SC_Upper), ('\u{1c9}', '\u{1c9}',
+ SC_Lower), ('\u{1ca}', '\u{1cb}', SC_Upper), ('\u{1cc}', '\u{1cc}', SC_Lower), ('\u{1cd}',
+ '\u{1cd}', SC_Upper), ('\u{1ce}', '\u{1ce}', SC_Lower), ('\u{1cf}', '\u{1cf}', SC_Upper),
+ ('\u{1d0}', '\u{1d0}', SC_Lower), ('\u{1d1}', '\u{1d1}', SC_Upper), ('\u{1d2}', '\u{1d2}',
+ SC_Lower), ('\u{1d3}', '\u{1d3}', SC_Upper), ('\u{1d4}', '\u{1d4}', SC_Lower), ('\u{1d5}',
+ '\u{1d5}', SC_Upper), ('\u{1d6}', '\u{1d6}', SC_Lower), ('\u{1d7}', '\u{1d7}', SC_Upper),
+ ('\u{1d8}', '\u{1d8}', SC_Lower), ('\u{1d9}', '\u{1d9}', SC_Upper), ('\u{1da}', '\u{1da}',
+ SC_Lower), ('\u{1db}', '\u{1db}', SC_Upper), ('\u{1dc}', '\u{1dd}', SC_Lower), ('\u{1de}',
+ '\u{1de}', SC_Upper), ('\u{1df}', '\u{1df}', SC_Lower), ('\u{1e0}', '\u{1e0}', SC_Upper),
+ ('\u{1e1}', '\u{1e1}', SC_Lower), ('\u{1e2}', '\u{1e2}', SC_Upper), ('\u{1e3}', '\u{1e3}',
+ SC_Lower), ('\u{1e4}', '\u{1e4}', SC_Upper), ('\u{1e5}', '\u{1e5}', SC_Lower), ('\u{1e6}',
+ '\u{1e6}', SC_Upper), ('\u{1e7}', '\u{1e7}', SC_Lower), ('\u{1e8}', '\u{1e8}', SC_Upper),
+ ('\u{1e9}', '\u{1e9}', SC_Lower), ('\u{1ea}', '\u{1ea}', SC_Upper), ('\u{1eb}', '\u{1eb}',
+ SC_Lower), ('\u{1ec}', '\u{1ec}', SC_Upper), ('\u{1ed}', '\u{1ed}', SC_Lower), ('\u{1ee}',
+ '\u{1ee}', SC_Upper), ('\u{1ef}', '\u{1f0}', SC_Lower), ('\u{1f1}', '\u{1f2}', SC_Upper),
+ ('\u{1f3}', '\u{1f3}', SC_Lower), ('\u{1f4}', '\u{1f4}', SC_Upper), ('\u{1f5}', '\u{1f5}',
+ SC_Lower), ('\u{1f6}', '\u{1f8}', SC_Upper), ('\u{1f9}', '\u{1f9}', SC_Lower), ('\u{1fa}',
+ '\u{1fa}', SC_Upper), ('\u{1fb}', '\u{1fb}', SC_Lower), ('\u{1fc}', '\u{1fc}', SC_Upper),
+ ('\u{1fd}', '\u{1fd}', SC_Lower), ('\u{1fe}', '\u{1fe}', SC_Upper), ('\u{1ff}', '\u{1ff}',
+ SC_Lower), ('\u{200}', '\u{200}', SC_Upper), ('\u{201}', '\u{201}', SC_Lower), ('\u{202}',
+ '\u{202}', SC_Upper), ('\u{203}', '\u{203}', SC_Lower), ('\u{204}', '\u{204}', SC_Upper),
+ ('\u{205}', '\u{205}', SC_Lower), ('\u{206}', '\u{206}', SC_Upper), ('\u{207}', '\u{207}',
+ SC_Lower), ('\u{208}', '\u{208}', SC_Upper), ('\u{209}', '\u{209}', SC_Lower), ('\u{20a}',
+ '\u{20a}', SC_Upper), ('\u{20b}', '\u{20b}', SC_Lower), ('\u{20c}', '\u{20c}', SC_Upper),
+ ('\u{20d}', '\u{20d}', SC_Lower), ('\u{20e}', '\u{20e}', SC_Upper), ('\u{20f}', '\u{20f}',
+ SC_Lower), ('\u{210}', '\u{210}', SC_Upper), ('\u{211}', '\u{211}', SC_Lower), ('\u{212}',
+ '\u{212}', SC_Upper), ('\u{213}', '\u{213}', SC_Lower), ('\u{214}', '\u{214}', SC_Upper),
+ ('\u{215}', '\u{215}', SC_Lower), ('\u{216}', '\u{216}', SC_Upper), ('\u{217}', '\u{217}',
+ SC_Lower), ('\u{218}', '\u{218}', SC_Upper), ('\u{219}', '\u{219}', SC_Lower), ('\u{21a}',
+ '\u{21a}', SC_Upper), ('\u{21b}', '\u{21b}', SC_Lower), ('\u{21c}', '\u{21c}', SC_Upper),
+ ('\u{21d}', '\u{21d}', SC_Lower), ('\u{21e}', '\u{21e}', SC_Upper), ('\u{21f}', '\u{21f}',
+ SC_Lower), ('\u{220}', '\u{220}', SC_Upper), ('\u{221}', '\u{221}', SC_Lower), ('\u{222}',
+ '\u{222}', SC_Upper), ('\u{223}', '\u{223}', SC_Lower), ('\u{224}', '\u{224}', SC_Upper),
+ ('\u{225}', '\u{225}', SC_Lower), ('\u{226}', '\u{226}', SC_Upper), ('\u{227}', '\u{227}',
+ SC_Lower), ('\u{228}', '\u{228}', SC_Upper), ('\u{229}', '\u{229}', SC_Lower), ('\u{22a}',
+ '\u{22a}', SC_Upper), ('\u{22b}', '\u{22b}', SC_Lower), ('\u{22c}', '\u{22c}', SC_Upper),
+ ('\u{22d}', '\u{22d}', SC_Lower), ('\u{22e}', '\u{22e}', SC_Upper), ('\u{22f}', '\u{22f}',
+ SC_Lower), ('\u{230}', '\u{230}', SC_Upper), ('\u{231}', '\u{231}', SC_Lower), ('\u{232}',
+ '\u{232}', SC_Upper), ('\u{233}', '\u{239}', SC_Lower), ('\u{23a}', '\u{23b}', SC_Upper),
+ ('\u{23c}', '\u{23c}', SC_Lower), ('\u{23d}', '\u{23e}', SC_Upper), ('\u{23f}', '\u{240}',
+ SC_Lower), ('\u{241}', '\u{241}', SC_Upper), ('\u{242}', '\u{242}', SC_Lower), ('\u{243}',
+ '\u{246}', SC_Upper), ('\u{247}', '\u{247}', SC_Lower), ('\u{248}', '\u{248}', SC_Upper),
+ ('\u{249}', '\u{249}', SC_Lower), ('\u{24a}', '\u{24a}', SC_Upper), ('\u{24b}', '\u{24b}',
+ SC_Lower), ('\u{24c}', '\u{24c}', SC_Upper), ('\u{24d}', '\u{24d}', SC_Lower), ('\u{24e}',
+ '\u{24e}', SC_Upper), ('\u{24f}', '\u{293}', SC_Lower), ('\u{294}', '\u{294}', SC_OLetter),
+ ('\u{295}', '\u{2b8}', SC_Lower), ('\u{2b9}', '\u{2bf}', SC_OLetter), ('\u{2c0}', '\u{2c1}',
+ SC_Lower), ('\u{2c6}', '\u{2d1}', SC_OLetter), ('\u{2e0}', '\u{2e4}', SC_Lower), ('\u{2ec}',
+ '\u{2ec}', SC_OLetter), ('\u{2ee}', '\u{2ee}', SC_OLetter), ('\u{300}', '\u{36f}',
+ SC_Extend), ('\u{370}', '\u{370}', SC_Upper), ('\u{371}', '\u{371}', SC_Lower), ('\u{372}',
+ '\u{372}', SC_Upper), ('\u{373}', '\u{373}', SC_Lower), ('\u{374}', '\u{374}', SC_OLetter),
+ ('\u{376}', '\u{376}', SC_Upper), ('\u{377}', '\u{377}', SC_Lower), ('\u{37a}', '\u{37d}',
+ SC_Lower), ('\u{37f}', '\u{37f}', SC_Upper), ('\u{386}', '\u{386}', SC_Upper), ('\u{388}',
+ '\u{38a}', SC_Upper), ('\u{38c}', '\u{38c}', SC_Upper), ('\u{38e}', '\u{38f}', SC_Upper),
+ ('\u{390}', '\u{390}', SC_Lower), ('\u{391}', '\u{3a1}', SC_Upper), ('\u{3a3}', '\u{3ab}',
+ SC_Upper), ('\u{3ac}', '\u{3ce}', SC_Lower), ('\u{3cf}', '\u{3cf}', SC_Upper), ('\u{3d0}',
+ '\u{3d1}', SC_Lower), ('\u{3d2}', '\u{3d4}', SC_Upper), ('\u{3d5}', '\u{3d7}', SC_Lower),
+ ('\u{3d8}', '\u{3d8}', SC_Upper), ('\u{3d9}', '\u{3d9}', SC_Lower), ('\u{3da}', '\u{3da}',
+ SC_Upper), ('\u{3db}', '\u{3db}', SC_Lower), ('\u{3dc}', '\u{3dc}', SC_Upper), ('\u{3dd}',
+ '\u{3dd}', SC_Lower), ('\u{3de}', '\u{3de}', SC_Upper), ('\u{3df}', '\u{3df}', SC_Lower),
+ ('\u{3e0}', '\u{3e0}', SC_Upper), ('\u{3e1}', '\u{3e1}', SC_Lower), ('\u{3e2}', '\u{3e2}',
+ SC_Upper), ('\u{3e3}', '\u{3e3}', SC_Lower), ('\u{3e4}', '\u{3e4}', SC_Upper), ('\u{3e5}',
+ '\u{3e5}', SC_Lower), ('\u{3e6}', '\u{3e6}', SC_Upper), ('\u{3e7}', '\u{3e7}', SC_Lower),
+ ('\u{3e8}', '\u{3e8}', SC_Upper), ('\u{3e9}', '\u{3e9}', SC_Lower), ('\u{3ea}', '\u{3ea}',
+ SC_Upper), ('\u{3eb}', '\u{3eb}', SC_Lower), ('\u{3ec}', '\u{3ec}', SC_Upper), ('\u{3ed}',
+ '\u{3ed}', SC_Lower), ('\u{3ee}', '\u{3ee}', SC_Upper), ('\u{3ef}', '\u{3f3}', SC_Lower),
+ ('\u{3f4}', '\u{3f4}', SC_Upper), ('\u{3f5}', '\u{3f5}', SC_Lower), ('\u{3f7}', '\u{3f7}',
+ SC_Upper), ('\u{3f8}', '\u{3f8}', SC_Lower), ('\u{3f9}', '\u{3fa}', SC_Upper), ('\u{3fb}',
+ '\u{3fc}', SC_Lower), ('\u{3fd}', '\u{42f}', SC_Upper), ('\u{430}', '\u{45f}', SC_Lower),
+ ('\u{460}', '\u{460}', SC_Upper), ('\u{461}', '\u{461}', SC_Lower), ('\u{462}', '\u{462}',
+ SC_Upper), ('\u{463}', '\u{463}', SC_Lower), ('\u{464}', '\u{464}', SC_Upper), ('\u{465}',
+ '\u{465}', SC_Lower), ('\u{466}', '\u{466}', SC_Upper), ('\u{467}', '\u{467}', SC_Lower),
+ ('\u{468}', '\u{468}', SC_Upper), ('\u{469}', '\u{469}', SC_Lower), ('\u{46a}', '\u{46a}',
+ SC_Upper), ('\u{46b}', '\u{46b}', SC_Lower), ('\u{46c}', '\u{46c}', SC_Upper), ('\u{46d}',
+ '\u{46d}', SC_Lower), ('\u{46e}', '\u{46e}', SC_Upper), ('\u{46f}', '\u{46f}', SC_Lower),
+ ('\u{470}', '\u{470}', SC_Upper), ('\u{471}', '\u{471}', SC_Lower), ('\u{472}', '\u{472}',
+ SC_Upper), ('\u{473}', '\u{473}', SC_Lower), ('\u{474}', '\u{474}', SC_Upper), ('\u{475}',
+ '\u{475}', SC_Lower), ('\u{476}', '\u{476}', SC_Upper), ('\u{477}', '\u{477}', SC_Lower),
+ ('\u{478}', '\u{478}', SC_Upper), ('\u{479}', '\u{479}', SC_Lower), ('\u{47a}', '\u{47a}',
+ SC_Upper), ('\u{47b}', '\u{47b}', SC_Lower), ('\u{47c}', '\u{47c}', SC_Upper), ('\u{47d}',
+ '\u{47d}', SC_Lower), ('\u{47e}', '\u{47e}', SC_Upper), ('\u{47f}', '\u{47f}', SC_Lower),
+ ('\u{480}', '\u{480}', SC_Upper), ('\u{481}', '\u{481}', SC_Lower), ('\u{483}', '\u{489}',
+ SC_Extend), ('\u{48a}', '\u{48a}', SC_Upper), ('\u{48b}', '\u{48b}', SC_Lower), ('\u{48c}',
+ '\u{48c}', SC_Upper), ('\u{48d}', '\u{48d}', SC_Lower), ('\u{48e}', '\u{48e}', SC_Upper),
+ ('\u{48f}', '\u{48f}', SC_Lower), ('\u{490}', '\u{490}', SC_Upper), ('\u{491}', '\u{491}',
+ SC_Lower), ('\u{492}', '\u{492}', SC_Upper), ('\u{493}', '\u{493}', SC_Lower), ('\u{494}',
+ '\u{494}', SC_Upper), ('\u{495}', '\u{495}', SC_Lower), ('\u{496}', '\u{496}', SC_Upper),
+ ('\u{497}', '\u{497}', SC_Lower), ('\u{498}', '\u{498}', SC_Upper), ('\u{499}', '\u{499}',
+ SC_Lower), ('\u{49a}', '\u{49a}', SC_Upper), ('\u{49b}', '\u{49b}', SC_Lower), ('\u{49c}',
+ '\u{49c}', SC_Upper), ('\u{49d}', '\u{49d}', SC_Lower), ('\u{49e}', '\u{49e}', SC_Upper),
+ ('\u{49f}', '\u{49f}', SC_Lower), ('\u{4a0}', '\u{4a0}', SC_Upper), ('\u{4a1}', '\u{4a1}',
+ SC_Lower), ('\u{4a2}', '\u{4a2}', SC_Upper), ('\u{4a3}', '\u{4a3}', SC_Lower), ('\u{4a4}',
+ '\u{4a4}', SC_Upper), ('\u{4a5}', '\u{4a5}', SC_Lower), ('\u{4a6}', '\u{4a6}', SC_Upper),
+ ('\u{4a7}', '\u{4a7}', SC_Lower), ('\u{4a8}', '\u{4a8}', SC_Upper), ('\u{4a9}', '\u{4a9}',
+ SC_Lower), ('\u{4aa}', '\u{4aa}', SC_Upper), ('\u{4ab}', '\u{4ab}', SC_Lower), ('\u{4ac}',
+ '\u{4ac}', SC_Upper), ('\u{4ad}', '\u{4ad}', SC_Lower), ('\u{4ae}', '\u{4ae}', SC_Upper),
+ ('\u{4af}', '\u{4af}', SC_Lower), ('\u{4b0}', '\u{4b0}', SC_Upper), ('\u{4b1}', '\u{4b1}',
+ SC_Lower), ('\u{4b2}', '\u{4b2}', SC_Upper), ('\u{4b3}', '\u{4b3}', SC_Lower), ('\u{4b4}',
+ '\u{4b4}', SC_Upper), ('\u{4b5}', '\u{4b5}', SC_Lower), ('\u{4b6}', '\u{4b6}', SC_Upper),
+ ('\u{4b7}', '\u{4b7}', SC_Lower), ('\u{4b8}', '\u{4b8}', SC_Upper), ('\u{4b9}', '\u{4b9}',
+ SC_Lower), ('\u{4ba}', '\u{4ba}', SC_Upper), ('\u{4bb}', '\u{4bb}', SC_Lower), ('\u{4bc}',
+ '\u{4bc}', SC_Upper), ('\u{4bd}', '\u{4bd}', SC_Lower), ('\u{4be}', '\u{4be}', SC_Upper),
+ ('\u{4bf}', '\u{4bf}', SC_Lower), ('\u{4c0}', '\u{4c1}', SC_Upper), ('\u{4c2}', '\u{4c2}',
+ SC_Lower), ('\u{4c3}', '\u{4c3}', SC_Upper), ('\u{4c4}', '\u{4c4}', SC_Lower), ('\u{4c5}',
+ '\u{4c5}', SC_Upper), ('\u{4c6}', '\u{4c6}', SC_Lower), ('\u{4c7}', '\u{4c7}', SC_Upper),
+ ('\u{4c8}', '\u{4c8}', SC_Lower), ('\u{4c9}', '\u{4c9}', SC_Upper), ('\u{4ca}', '\u{4ca}',
+ SC_Lower), ('\u{4cb}', '\u{4cb}', SC_Upper), ('\u{4cc}', '\u{4cc}', SC_Lower), ('\u{4cd}',
+ '\u{4cd}', SC_Upper), ('\u{4ce}', '\u{4cf}', SC_Lower), ('\u{4d0}', '\u{4d0}', SC_Upper),
+ ('\u{4d1}', '\u{4d1}', SC_Lower), ('\u{4d2}', '\u{4d2}', SC_Upper), ('\u{4d3}', '\u{4d3}',
+ SC_Lower), ('\u{4d4}', '\u{4d4}', SC_Upper), ('\u{4d5}', '\u{4d5}', SC_Lower), ('\u{4d6}',
+ '\u{4d6}', SC_Upper), ('\u{4d7}', '\u{4d7}', SC_Lower), ('\u{4d8}', '\u{4d8}', SC_Upper),
+ ('\u{4d9}', '\u{4d9}', SC_Lower), ('\u{4da}', '\u{4da}', SC_Upper), ('\u{4db}', '\u{4db}',
+ SC_Lower), ('\u{4dc}', '\u{4dc}', SC_Upper), ('\u{4dd}', '\u{4dd}', SC_Lower), ('\u{4de}',
+ '\u{4de}', SC_Upper), ('\u{4df}', '\u{4df}', SC_Lower), ('\u{4e0}', '\u{4e0}', SC_Upper),
+ ('\u{4e1}', '\u{4e1}', SC_Lower), ('\u{4e2}', '\u{4e2}', SC_Upper), ('\u{4e3}', '\u{4e3}',
+ SC_Lower), ('\u{4e4}', '\u{4e4}', SC_Upper), ('\u{4e5}', '\u{4e5}', SC_Lower), ('\u{4e6}',
+ '\u{4e6}', SC_Upper), ('\u{4e7}', '\u{4e7}', SC_Lower), ('\u{4e8}', '\u{4e8}', SC_Upper),
+ ('\u{4e9}', '\u{4e9}', SC_Lower), ('\u{4ea}', '\u{4ea}', SC_Upper), ('\u{4eb}', '\u{4eb}',
+ SC_Lower), ('\u{4ec}', '\u{4ec}', SC_Upper), ('\u{4ed}', '\u{4ed}', SC_Lower), ('\u{4ee}',
+ '\u{4ee}', SC_Upper), ('\u{4ef}', '\u{4ef}', SC_Lower), ('\u{4f0}', '\u{4f0}', SC_Upper),
+ ('\u{4f1}', '\u{4f1}', SC_Lower), ('\u{4f2}', '\u{4f2}', SC_Upper), ('\u{4f3}', '\u{4f3}',
+ SC_Lower), ('\u{4f4}', '\u{4f4}', SC_Upper), ('\u{4f5}', '\u{4f5}', SC_Lower), ('\u{4f6}',
+ '\u{4f6}', SC_Upper), ('\u{4f7}', '\u{4f7}', SC_Lower), ('\u{4f8}', '\u{4f8}', SC_Upper),
+ ('\u{4f9}', '\u{4f9}', SC_Lower), ('\u{4fa}', '\u{4fa}', SC_Upper), ('\u{4fb}', '\u{4fb}',
+ SC_Lower), ('\u{4fc}', '\u{4fc}', SC_Upper), ('\u{4fd}', '\u{4fd}', SC_Lower), ('\u{4fe}',
+ '\u{4fe}', SC_Upper), ('\u{4ff}', '\u{4ff}', SC_Lower), ('\u{500}', '\u{500}', SC_Upper),
+ ('\u{501}', '\u{501}', SC_Lower), ('\u{502}', '\u{502}', SC_Upper), ('\u{503}', '\u{503}',
+ SC_Lower), ('\u{504}', '\u{504}', SC_Upper), ('\u{505}', '\u{505}', SC_Lower), ('\u{506}',
+ '\u{506}', SC_Upper), ('\u{507}', '\u{507}', SC_Lower), ('\u{508}', '\u{508}', SC_Upper),
+ ('\u{509}', '\u{509}', SC_Lower), ('\u{50a}', '\u{50a}', SC_Upper), ('\u{50b}', '\u{50b}',
+ SC_Lower), ('\u{50c}', '\u{50c}', SC_Upper), ('\u{50d}', '\u{50d}', SC_Lower), ('\u{50e}',
+ '\u{50e}', SC_Upper), ('\u{50f}', '\u{50f}', SC_Lower), ('\u{510}', '\u{510}', SC_Upper),
+ ('\u{511}', '\u{511}', SC_Lower), ('\u{512}', '\u{512}', SC_Upper), ('\u{513}', '\u{513}',
+ SC_Lower), ('\u{514}', '\u{514}', SC_Upper), ('\u{515}', '\u{515}', SC_Lower), ('\u{516}',
+ '\u{516}', SC_Upper), ('\u{517}', '\u{517}', SC_Lower), ('\u{518}', '\u{518}', SC_Upper),
+ ('\u{519}', '\u{519}', SC_Lower), ('\u{51a}', '\u{51a}', SC_Upper), ('\u{51b}', '\u{51b}',
+ SC_Lower), ('\u{51c}', '\u{51c}', SC_Upper), ('\u{51d}', '\u{51d}', SC_Lower), ('\u{51e}',
+ '\u{51e}', SC_Upper), ('\u{51f}', '\u{51f}', SC_Lower), ('\u{520}', '\u{520}', SC_Upper),
+ ('\u{521}', '\u{521}', SC_Lower), ('\u{522}', '\u{522}', SC_Upper), ('\u{523}', '\u{523}',
+ SC_Lower), ('\u{524}', '\u{524}', SC_Upper), ('\u{525}', '\u{525}', SC_Lower), ('\u{526}',
+ '\u{526}', SC_Upper), ('\u{527}', '\u{527}', SC_Lower), ('\u{528}', '\u{528}', SC_Upper),
+ ('\u{529}', '\u{529}', SC_Lower), ('\u{52a}', '\u{52a}', SC_Upper), ('\u{52b}', '\u{52b}',
+ SC_Lower), ('\u{52c}', '\u{52c}', SC_Upper), ('\u{52d}', '\u{52d}', SC_Lower), ('\u{52e}',
+ '\u{52e}', SC_Upper), ('\u{52f}', '\u{52f}', SC_Lower), ('\u{531}', '\u{556}', SC_Upper),
+ ('\u{559}', '\u{559}', SC_OLetter), ('\u{55d}', '\u{55d}', SC_SContinue), ('\u{560}',
+ '\u{588}', SC_Lower), ('\u{589}', '\u{589}', SC_STerm), ('\u{591}', '\u{5bd}', SC_Extend),
+ ('\u{5bf}', '\u{5bf}', SC_Extend), ('\u{5c1}', '\u{5c2}', SC_Extend), ('\u{5c4}', '\u{5c5}',
+ SC_Extend), ('\u{5c7}', '\u{5c7}', SC_Extend), ('\u{5d0}', '\u{5ea}', SC_OLetter),
+ ('\u{5ef}', '\u{5f3}', SC_OLetter), ('\u{600}', '\u{605}', SC_Format), ('\u{60c}',
+ '\u{60d}', SC_SContinue), ('\u{610}', '\u{61a}', SC_Extend), ('\u{61c}', '\u{61c}',
+ SC_Format), ('\u{61d}', '\u{61f}', SC_STerm), ('\u{620}', '\u{64a}', SC_OLetter),
+ ('\u{64b}', '\u{65f}', SC_Extend), ('\u{660}', '\u{669}', SC_Numeric), ('\u{66b}',
+ '\u{66c}', SC_Numeric), ('\u{66e}', '\u{66f}', SC_OLetter), ('\u{670}', '\u{670}',
+ SC_Extend), ('\u{671}', '\u{6d3}', SC_OLetter), ('\u{6d4}', '\u{6d4}', SC_STerm),
+ ('\u{6d5}', '\u{6d5}', SC_OLetter), ('\u{6d6}', '\u{6dc}', SC_Extend), ('\u{6dd}',
+ '\u{6dd}', SC_Format), ('\u{6df}', '\u{6e4}', SC_Extend), ('\u{6e5}', '\u{6e6}',
+ SC_OLetter), ('\u{6e7}', '\u{6e8}', SC_Extend), ('\u{6ea}', '\u{6ed}', SC_Extend),
+ ('\u{6ee}', '\u{6ef}', SC_OLetter), ('\u{6f0}', '\u{6f9}', SC_Numeric), ('\u{6fa}',
+ '\u{6fc}', SC_OLetter), ('\u{6ff}', '\u{6ff}', SC_OLetter), ('\u{700}', '\u{702}',
+ SC_STerm), ('\u{70f}', '\u{70f}', SC_Format), ('\u{710}', '\u{710}', SC_OLetter),
+ ('\u{711}', '\u{711}', SC_Extend), ('\u{712}', '\u{72f}', SC_OLetter), ('\u{730}',
+ '\u{74a}', SC_Extend), ('\u{74d}', '\u{7a5}', SC_OLetter), ('\u{7a6}', '\u{7b0}',
+ SC_Extend), ('\u{7b1}', '\u{7b1}', SC_OLetter), ('\u{7c0}', '\u{7c9}', SC_Numeric),
+ ('\u{7ca}', '\u{7ea}', SC_OLetter), ('\u{7eb}', '\u{7f3}', SC_Extend), ('\u{7f4}',
+ '\u{7f5}', SC_OLetter), ('\u{7f8}', '\u{7f8}', SC_SContinue), ('\u{7f9}', '\u{7f9}',
+ SC_STerm), ('\u{7fa}', '\u{7fa}', SC_OLetter), ('\u{7fd}', '\u{7fd}', SC_Extend),
+ ('\u{800}', '\u{815}', SC_OLetter), ('\u{816}', '\u{819}', SC_Extend), ('\u{81a}',
+ '\u{81a}', SC_OLetter), ('\u{81b}', '\u{823}', SC_Extend), ('\u{824}', '\u{824}',
+ SC_OLetter), ('\u{825}', '\u{827}', SC_Extend), ('\u{828}', '\u{828}', SC_OLetter),
+ ('\u{829}', '\u{82d}', SC_Extend), ('\u{837}', '\u{837}', SC_STerm), ('\u{839}', '\u{839}',
+ SC_STerm), ('\u{83d}', '\u{83e}', SC_STerm), ('\u{840}', '\u{858}', SC_OLetter), ('\u{859}',
+ '\u{85b}', SC_Extend), ('\u{860}', '\u{86a}', SC_OLetter), ('\u{870}', '\u{887}',
+ SC_OLetter), ('\u{889}', '\u{88e}', SC_OLetter), ('\u{890}', '\u{891}', SC_Format),
+ ('\u{898}', '\u{89f}', SC_Extend), ('\u{8a0}', '\u{8c9}', SC_OLetter), ('\u{8ca}',
+ '\u{8e1}', SC_Extend), ('\u{8e2}', '\u{8e2}', SC_Format), ('\u{8e3}', '\u{903}', SC_Extend),
+ ('\u{904}', '\u{939}', SC_OLetter), ('\u{93a}', '\u{93c}', SC_Extend), ('\u{93d}',
+ '\u{93d}', SC_OLetter), ('\u{93e}', '\u{94f}', SC_Extend), ('\u{950}', '\u{950}',
+ SC_OLetter), ('\u{951}', '\u{957}', SC_Extend), ('\u{958}', '\u{961}', SC_OLetter),
+ ('\u{962}', '\u{963}', SC_Extend), ('\u{964}', '\u{965}', SC_STerm), ('\u{966}', '\u{96f}',
+ SC_Numeric), ('\u{971}', '\u{980}', SC_OLetter), ('\u{981}', '\u{983}', SC_Extend),
+ ('\u{985}', '\u{98c}', SC_OLetter), ('\u{98f}', '\u{990}', SC_OLetter), ('\u{993}',
+ '\u{9a8}', SC_OLetter), ('\u{9aa}', '\u{9b0}', SC_OLetter), ('\u{9b2}', '\u{9b2}',
+ SC_OLetter), ('\u{9b6}', '\u{9b9}', SC_OLetter), ('\u{9bc}', '\u{9bc}', SC_Extend),
+ ('\u{9bd}', '\u{9bd}', SC_OLetter), ('\u{9be}', '\u{9c4}', SC_Extend), ('\u{9c7}',
+ '\u{9c8}', SC_Extend), ('\u{9cb}', '\u{9cd}', SC_Extend), ('\u{9ce}', '\u{9ce}',
+ SC_OLetter), ('\u{9d7}', '\u{9d7}', SC_Extend), ('\u{9dc}', '\u{9dd}', SC_OLetter),
+ ('\u{9df}', '\u{9e1}', SC_OLetter), ('\u{9e2}', '\u{9e3}', SC_Extend), ('\u{9e6}',
+ '\u{9ef}', SC_Numeric), ('\u{9f0}', '\u{9f1}', SC_OLetter), ('\u{9fc}', '\u{9fc}',
+ SC_OLetter), ('\u{9fe}', '\u{9fe}', SC_Extend), ('\u{a01}', '\u{a03}', SC_Extend),
+ ('\u{a05}', '\u{a0a}', SC_OLetter), ('\u{a0f}', '\u{a10}', SC_OLetter), ('\u{a13}',
+ '\u{a28}', SC_OLetter), ('\u{a2a}', '\u{a30}', SC_OLetter), ('\u{a32}', '\u{a33}',
+ SC_OLetter), ('\u{a35}', '\u{a36}', SC_OLetter), ('\u{a38}', '\u{a39}', SC_OLetter),
+ ('\u{a3c}', '\u{a3c}', SC_Extend), ('\u{a3e}', '\u{a42}', SC_Extend), ('\u{a47}', '\u{a48}',
+ SC_Extend), ('\u{a4b}', '\u{a4d}', SC_Extend), ('\u{a51}', '\u{a51}', SC_Extend),
+ ('\u{a59}', '\u{a5c}', SC_OLetter), ('\u{a5e}', '\u{a5e}', SC_OLetter), ('\u{a66}',
+ '\u{a6f}', SC_Numeric), ('\u{a70}', '\u{a71}', SC_Extend), ('\u{a72}', '\u{a74}',
+ SC_OLetter), ('\u{a75}', '\u{a75}', SC_Extend), ('\u{a81}', '\u{a83}', SC_Extend),
+ ('\u{a85}', '\u{a8d}', SC_OLetter), ('\u{a8f}', '\u{a91}', SC_OLetter), ('\u{a93}',
+ '\u{aa8}', SC_OLetter), ('\u{aaa}', '\u{ab0}', SC_OLetter), ('\u{ab2}', '\u{ab3}',
+ SC_OLetter), ('\u{ab5}', '\u{ab9}', SC_OLetter), ('\u{abc}', '\u{abc}', SC_Extend),
+ ('\u{abd}', '\u{abd}', SC_OLetter), ('\u{abe}', '\u{ac5}', SC_Extend), ('\u{ac7}',
+ '\u{ac9}', SC_Extend), ('\u{acb}', '\u{acd}', SC_Extend), ('\u{ad0}', '\u{ad0}',
+ SC_OLetter), ('\u{ae0}', '\u{ae1}', SC_OLetter), ('\u{ae2}', '\u{ae3}', SC_Extend),
+ ('\u{ae6}', '\u{aef}', SC_Numeric), ('\u{af9}', '\u{af9}', SC_OLetter), ('\u{afa}',
+ '\u{aff}', SC_Extend), ('\u{b01}', '\u{b03}', SC_Extend), ('\u{b05}', '\u{b0c}',
+ SC_OLetter), ('\u{b0f}', '\u{b10}', SC_OLetter), ('\u{b13}', '\u{b28}', SC_OLetter),
+ ('\u{b2a}', '\u{b30}', SC_OLetter), ('\u{b32}', '\u{b33}', SC_OLetter), ('\u{b35}',
+ '\u{b39}', SC_OLetter), ('\u{b3c}', '\u{b3c}', SC_Extend), ('\u{b3d}', '\u{b3d}',
+ SC_OLetter), ('\u{b3e}', '\u{b44}', SC_Extend), ('\u{b47}', '\u{b48}', SC_Extend),
+ ('\u{b4b}', '\u{b4d}', SC_Extend), ('\u{b55}', '\u{b57}', SC_Extend), ('\u{b5c}', '\u{b5d}',
+ SC_OLetter), ('\u{b5f}', '\u{b61}', SC_OLetter), ('\u{b62}', '\u{b63}', SC_Extend),
+ ('\u{b66}', '\u{b6f}', SC_Numeric), ('\u{b71}', '\u{b71}', SC_OLetter), ('\u{b82}',
+ '\u{b82}', SC_Extend), ('\u{b83}', '\u{b83}', SC_OLetter), ('\u{b85}', '\u{b8a}',
+ SC_OLetter), ('\u{b8e}', '\u{b90}', SC_OLetter), ('\u{b92}', '\u{b95}', SC_OLetter),
+ ('\u{b99}', '\u{b9a}', SC_OLetter), ('\u{b9c}', '\u{b9c}', SC_OLetter), ('\u{b9e}',
+ '\u{b9f}', SC_OLetter), ('\u{ba3}', '\u{ba4}', SC_OLetter), ('\u{ba8}', '\u{baa}',
+ SC_OLetter), ('\u{bae}', '\u{bb9}', SC_OLetter), ('\u{bbe}', '\u{bc2}', SC_Extend),
+ ('\u{bc6}', '\u{bc8}', SC_Extend), ('\u{bca}', '\u{bcd}', SC_Extend), ('\u{bd0}', '\u{bd0}',
+ SC_OLetter), ('\u{bd7}', '\u{bd7}', SC_Extend), ('\u{be6}', '\u{bef}', SC_Numeric),
+ ('\u{c00}', '\u{c04}', SC_Extend), ('\u{c05}', '\u{c0c}', SC_OLetter), ('\u{c0e}',
+ '\u{c10}', SC_OLetter), ('\u{c12}', '\u{c28}', SC_OLetter), ('\u{c2a}', '\u{c39}',
+ SC_OLetter), ('\u{c3c}', '\u{c3c}', SC_Extend), ('\u{c3d}', '\u{c3d}', SC_OLetter),
+ ('\u{c3e}', '\u{c44}', SC_Extend), ('\u{c46}', '\u{c48}', SC_Extend), ('\u{c4a}', '\u{c4d}',
+ SC_Extend), ('\u{c55}', '\u{c56}', SC_Extend), ('\u{c58}', '\u{c5a}', SC_OLetter),
+ ('\u{c5d}', '\u{c5d}', SC_OLetter), ('\u{c60}', '\u{c61}', SC_OLetter), ('\u{c62}',
+ '\u{c63}', SC_Extend), ('\u{c66}', '\u{c6f}', SC_Numeric), ('\u{c80}', '\u{c80}',
+ SC_OLetter), ('\u{c81}', '\u{c83}', SC_Extend), ('\u{c85}', '\u{c8c}', SC_OLetter),
+ ('\u{c8e}', '\u{c90}', SC_OLetter), ('\u{c92}', '\u{ca8}', SC_OLetter), ('\u{caa}',
+ '\u{cb3}', SC_OLetter), ('\u{cb5}', '\u{cb9}', SC_OLetter), ('\u{cbc}', '\u{cbc}',
+ SC_Extend), ('\u{cbd}', '\u{cbd}', SC_OLetter), ('\u{cbe}', '\u{cc4}', SC_Extend),
+ ('\u{cc6}', '\u{cc8}', SC_Extend), ('\u{cca}', '\u{ccd}', SC_Extend), ('\u{cd5}', '\u{cd6}',
+ SC_Extend), ('\u{cdd}', '\u{cde}', SC_OLetter), ('\u{ce0}', '\u{ce1}', SC_OLetter),
+ ('\u{ce2}', '\u{ce3}', SC_Extend), ('\u{ce6}', '\u{cef}', SC_Numeric), ('\u{cf1}',
+ '\u{cf2}', SC_OLetter), ('\u{cf3}', '\u{cf3}', SC_Extend), ('\u{d00}', '\u{d03}',
+ SC_Extend), ('\u{d04}', '\u{d0c}', SC_OLetter), ('\u{d0e}', '\u{d10}', SC_OLetter),
+ ('\u{d12}', '\u{d3a}', SC_OLetter), ('\u{d3b}', '\u{d3c}', SC_Extend), ('\u{d3d}',
+ '\u{d3d}', SC_OLetter), ('\u{d3e}', '\u{d44}', SC_Extend), ('\u{d46}', '\u{d48}',
+ SC_Extend), ('\u{d4a}', '\u{d4d}', SC_Extend), ('\u{d4e}', '\u{d4e}', SC_OLetter),
+ ('\u{d54}', '\u{d56}', SC_OLetter), ('\u{d57}', '\u{d57}', SC_Extend), ('\u{d5f}',
+ '\u{d61}', SC_OLetter), ('\u{d62}', '\u{d63}', SC_Extend), ('\u{d66}', '\u{d6f}',
+ SC_Numeric), ('\u{d7a}', '\u{d7f}', SC_OLetter), ('\u{d81}', '\u{d83}', SC_Extend),
+ ('\u{d85}', '\u{d96}', SC_OLetter), ('\u{d9a}', '\u{db1}', SC_OLetter), ('\u{db3}',
+ '\u{dbb}', SC_OLetter), ('\u{dbd}', '\u{dbd}', SC_OLetter), ('\u{dc0}', '\u{dc6}',
+ SC_OLetter), ('\u{dca}', '\u{dca}', SC_Extend), ('\u{dcf}', '\u{dd4}', SC_Extend),
+ ('\u{dd6}', '\u{dd6}', SC_Extend), ('\u{dd8}', '\u{ddf}', SC_Extend), ('\u{de6}', '\u{def}',
+ SC_Numeric), ('\u{df2}', '\u{df3}', SC_Extend), ('\u{e01}', '\u{e30}', SC_OLetter),
+ ('\u{e31}', '\u{e31}', SC_Extend), ('\u{e32}', '\u{e33}', SC_OLetter), ('\u{e34}',
+ '\u{e3a}', SC_Extend), ('\u{e40}', '\u{e46}', SC_OLetter), ('\u{e47}', '\u{e4e}',
+ SC_Extend), ('\u{e50}', '\u{e59}', SC_Numeric), ('\u{e81}', '\u{e82}', SC_OLetter),
+ ('\u{e84}', '\u{e84}', SC_OLetter), ('\u{e86}', '\u{e8a}', SC_OLetter), ('\u{e8c}',
+ '\u{ea3}', SC_OLetter), ('\u{ea5}', '\u{ea5}', SC_OLetter), ('\u{ea7}', '\u{eb0}',
+ SC_OLetter), ('\u{eb1}', '\u{eb1}', SC_Extend), ('\u{eb2}', '\u{eb3}', SC_OLetter),
+ ('\u{eb4}', '\u{ebc}', SC_Extend), ('\u{ebd}', '\u{ebd}', SC_OLetter), ('\u{ec0}',
+ '\u{ec4}', SC_OLetter), ('\u{ec6}', '\u{ec6}', SC_OLetter), ('\u{ec8}', '\u{ece}',
+ SC_Extend), ('\u{ed0}', '\u{ed9}', SC_Numeric), ('\u{edc}', '\u{edf}', SC_OLetter),
+ ('\u{f00}', '\u{f00}', SC_OLetter), ('\u{f18}', '\u{f19}', SC_Extend), ('\u{f20}',
+ '\u{f29}', SC_Numeric), ('\u{f35}', '\u{f35}', SC_Extend), ('\u{f37}', '\u{f37}',
+ SC_Extend), ('\u{f39}', '\u{f39}', SC_Extend), ('\u{f3a}', '\u{f3d}', SC_Close), ('\u{f3e}',
+ '\u{f3f}', SC_Extend), ('\u{f40}', '\u{f47}', SC_OLetter), ('\u{f49}', '\u{f6c}',
+ SC_OLetter), ('\u{f71}', '\u{f84}', SC_Extend), ('\u{f86}', '\u{f87}', SC_Extend),
+ ('\u{f88}', '\u{f8c}', SC_OLetter), ('\u{f8d}', '\u{f97}', SC_Extend), ('\u{f99}',
+ '\u{fbc}', SC_Extend), ('\u{fc6}', '\u{fc6}', SC_Extend), ('\u{1000}', '\u{102a}',
+ SC_OLetter), ('\u{102b}', '\u{103e}', SC_Extend), ('\u{103f}', '\u{103f}', SC_OLetter),
+ ('\u{1040}', '\u{1049}', SC_Numeric), ('\u{104a}', '\u{104b}', SC_STerm), ('\u{1050}',
+ '\u{1055}', SC_OLetter), ('\u{1056}', '\u{1059}', SC_Extend), ('\u{105a}', '\u{105d}',
+ SC_OLetter), ('\u{105e}', '\u{1060}', SC_Extend), ('\u{1061}', '\u{1061}', SC_OLetter),
+ ('\u{1062}', '\u{1064}', SC_Extend), ('\u{1065}', '\u{1066}', SC_OLetter), ('\u{1067}',
+ '\u{106d}', SC_Extend), ('\u{106e}', '\u{1070}', SC_OLetter), ('\u{1071}', '\u{1074}',
+ SC_Extend), ('\u{1075}', '\u{1081}', SC_OLetter), ('\u{1082}', '\u{108d}', SC_Extend),
+ ('\u{108e}', '\u{108e}', SC_OLetter), ('\u{108f}', '\u{108f}', SC_Extend), ('\u{1090}',
+ '\u{1099}', SC_Numeric), ('\u{109a}', '\u{109d}', SC_Extend), ('\u{10a0}', '\u{10c5}',
+ SC_Upper), ('\u{10c7}', '\u{10c7}', SC_Upper), ('\u{10cd}', '\u{10cd}', SC_Upper),
+ ('\u{10d0}', '\u{10fa}', SC_OLetter), ('\u{10fc}', '\u{10fc}', SC_Lower), ('\u{10fd}',
+ '\u{1248}', SC_OLetter), ('\u{124a}', '\u{124d}', SC_OLetter), ('\u{1250}', '\u{1256}',
+ SC_OLetter), ('\u{1258}', '\u{1258}', SC_OLetter), ('\u{125a}', '\u{125d}', SC_OLetter),
+ ('\u{1260}', '\u{1288}', SC_OLetter), ('\u{128a}', '\u{128d}', SC_OLetter), ('\u{1290}',
+ '\u{12b0}', SC_OLetter), ('\u{12b2}', '\u{12b5}', SC_OLetter), ('\u{12b8}', '\u{12be}',
+ SC_OLetter), ('\u{12c0}', '\u{12c0}', SC_OLetter), ('\u{12c2}', '\u{12c5}', SC_OLetter),
+ ('\u{12c8}', '\u{12d6}', SC_OLetter), ('\u{12d8}', '\u{1310}', SC_OLetter), ('\u{1312}',
+ '\u{1315}', SC_OLetter), ('\u{1318}', '\u{135a}', SC_OLetter), ('\u{135d}', '\u{135f}',
+ SC_Extend), ('\u{1362}', '\u{1362}', SC_STerm), ('\u{1367}', '\u{1368}', SC_STerm),
+ ('\u{1380}', '\u{138f}', SC_OLetter), ('\u{13a0}', '\u{13f5}', SC_Upper), ('\u{13f8}',
+ '\u{13fd}', SC_Lower), ('\u{1401}', '\u{166c}', SC_OLetter), ('\u{166e}', '\u{166e}',
+ SC_STerm), ('\u{166f}', '\u{167f}', SC_OLetter), ('\u{1680}', '\u{1680}', SC_Sp),
+ ('\u{1681}', '\u{169a}', SC_OLetter), ('\u{169b}', '\u{169c}', SC_Close), ('\u{16a0}',
+ '\u{16ea}', SC_OLetter), ('\u{16ee}', '\u{16f8}', SC_OLetter), ('\u{1700}', '\u{1711}',
+ SC_OLetter), ('\u{1712}', '\u{1715}', SC_Extend), ('\u{171f}', '\u{1731}', SC_OLetter),
+ ('\u{1732}', '\u{1734}', SC_Extend), ('\u{1735}', '\u{1736}', SC_STerm), ('\u{1740}',
+ '\u{1751}', SC_OLetter), ('\u{1752}', '\u{1753}', SC_Extend), ('\u{1760}', '\u{176c}',
+ SC_OLetter), ('\u{176e}', '\u{1770}', SC_OLetter), ('\u{1772}', '\u{1773}', SC_Extend),
+ ('\u{1780}', '\u{17b3}', SC_OLetter), ('\u{17b4}', '\u{17d3}', SC_Extend), ('\u{17d7}',
+ '\u{17d7}', SC_OLetter), ('\u{17dc}', '\u{17dc}', SC_OLetter), ('\u{17dd}', '\u{17dd}',
+ SC_Extend), ('\u{17e0}', '\u{17e9}', SC_Numeric), ('\u{1802}', '\u{1802}', SC_SContinue),
+ ('\u{1803}', '\u{1803}', SC_STerm), ('\u{1808}', '\u{1808}', SC_SContinue), ('\u{1809}',
+ '\u{1809}', SC_STerm), ('\u{180b}', '\u{180d}', SC_Extend), ('\u{180e}', '\u{180e}',
+ SC_Format), ('\u{180f}', '\u{180f}', SC_Extend), ('\u{1810}', '\u{1819}', SC_Numeric),
+ ('\u{1820}', '\u{1878}', SC_OLetter), ('\u{1880}', '\u{1884}', SC_OLetter), ('\u{1885}',
+ '\u{1886}', SC_Extend), ('\u{1887}', '\u{18a8}', SC_OLetter), ('\u{18a9}', '\u{18a9}',
+ SC_Extend), ('\u{18aa}', '\u{18aa}', SC_OLetter), ('\u{18b0}', '\u{18f5}', SC_OLetter),
+ ('\u{1900}', '\u{191e}', SC_OLetter), ('\u{1920}', '\u{192b}', SC_Extend), ('\u{1930}',
+ '\u{193b}', SC_Extend), ('\u{1944}', '\u{1945}', SC_STerm), ('\u{1946}', '\u{194f}',
+ SC_Numeric), ('\u{1950}', '\u{196d}', SC_OLetter), ('\u{1970}', '\u{1974}', SC_OLetter),
+ ('\u{1980}', '\u{19ab}', SC_OLetter), ('\u{19b0}', '\u{19c9}', SC_OLetter), ('\u{19d0}',
+ '\u{19d9}', SC_Numeric), ('\u{1a00}', '\u{1a16}', SC_OLetter), ('\u{1a17}', '\u{1a1b}',
+ SC_Extend), ('\u{1a20}', '\u{1a54}', SC_OLetter), ('\u{1a55}', '\u{1a5e}', SC_Extend),
+ ('\u{1a60}', '\u{1a7c}', SC_Extend), ('\u{1a7f}', '\u{1a7f}', SC_Extend), ('\u{1a80}',
+ '\u{1a89}', SC_Numeric), ('\u{1a90}', '\u{1a99}', SC_Numeric), ('\u{1aa7}', '\u{1aa7}',
+ SC_OLetter), ('\u{1aa8}', '\u{1aab}', SC_STerm), ('\u{1ab0}', '\u{1ace}', SC_Extend),
+ ('\u{1b00}', '\u{1b04}', SC_Extend), ('\u{1b05}', '\u{1b33}', SC_OLetter), ('\u{1b34}',
+ '\u{1b44}', SC_Extend), ('\u{1b45}', '\u{1b4c}', SC_OLetter), ('\u{1b50}', '\u{1b59}',
+ SC_Numeric), ('\u{1b5a}', '\u{1b5b}', SC_STerm), ('\u{1b5e}', '\u{1b5f}', SC_STerm),
+ ('\u{1b6b}', '\u{1b73}', SC_Extend), ('\u{1b7d}', '\u{1b7e}', SC_STerm), ('\u{1b80}',
+ '\u{1b82}', SC_Extend), ('\u{1b83}', '\u{1ba0}', SC_OLetter), ('\u{1ba1}', '\u{1bad}',
+ SC_Extend), ('\u{1bae}', '\u{1baf}', SC_OLetter), ('\u{1bb0}', '\u{1bb9}', SC_Numeric),
+ ('\u{1bba}', '\u{1be5}', SC_OLetter), ('\u{1be6}', '\u{1bf3}', SC_Extend), ('\u{1c00}',
+ '\u{1c23}', SC_OLetter), ('\u{1c24}', '\u{1c37}', SC_Extend), ('\u{1c3b}', '\u{1c3c}',
+ SC_STerm), ('\u{1c40}', '\u{1c49}', SC_Numeric), ('\u{1c4d}', '\u{1c4f}', SC_OLetter),
+ ('\u{1c50}', '\u{1c59}', SC_Numeric), ('\u{1c5a}', '\u{1c7d}', SC_OLetter), ('\u{1c7e}',
+ '\u{1c7f}', SC_STerm), ('\u{1c80}', '\u{1c88}', SC_Lower), ('\u{1c90}', '\u{1cba}',
+ SC_OLetter), ('\u{1cbd}', '\u{1cbf}', SC_OLetter), ('\u{1cd0}', '\u{1cd2}', SC_Extend),
+ ('\u{1cd4}', '\u{1ce8}', SC_Extend), ('\u{1ce9}', '\u{1cec}', SC_OLetter), ('\u{1ced}',
+ '\u{1ced}', SC_Extend), ('\u{1cee}', '\u{1cf3}', SC_OLetter), ('\u{1cf4}', '\u{1cf4}',
+ SC_Extend), ('\u{1cf5}', '\u{1cf6}', SC_OLetter), ('\u{1cf7}', '\u{1cf9}', SC_Extend),
+ ('\u{1cfa}', '\u{1cfa}', SC_OLetter), ('\u{1d00}', '\u{1dbf}', SC_Lower), ('\u{1dc0}',
+ '\u{1dff}', SC_Extend), ('\u{1e00}', '\u{1e00}', SC_Upper), ('\u{1e01}', '\u{1e01}',
+ SC_Lower), ('\u{1e02}', '\u{1e02}', SC_Upper), ('\u{1e03}', '\u{1e03}', SC_Lower),
+ ('\u{1e04}', '\u{1e04}', SC_Upper), ('\u{1e05}', '\u{1e05}', SC_Lower), ('\u{1e06}',
+ '\u{1e06}', SC_Upper), ('\u{1e07}', '\u{1e07}', SC_Lower), ('\u{1e08}', '\u{1e08}',
+ SC_Upper), ('\u{1e09}', '\u{1e09}', SC_Lower), ('\u{1e0a}', '\u{1e0a}', SC_Upper),
+ ('\u{1e0b}', '\u{1e0b}', SC_Lower), ('\u{1e0c}', '\u{1e0c}', SC_Upper), ('\u{1e0d}',
+ '\u{1e0d}', SC_Lower), ('\u{1e0e}', '\u{1e0e}', SC_Upper), ('\u{1e0f}', '\u{1e0f}',
+ SC_Lower), ('\u{1e10}', '\u{1e10}', SC_Upper), ('\u{1e11}', '\u{1e11}', SC_Lower),
+ ('\u{1e12}', '\u{1e12}', SC_Upper), ('\u{1e13}', '\u{1e13}', SC_Lower), ('\u{1e14}',
+ '\u{1e14}', SC_Upper), ('\u{1e15}', '\u{1e15}', SC_Lower), ('\u{1e16}', '\u{1e16}',
+ SC_Upper), ('\u{1e17}', '\u{1e17}', SC_Lower), ('\u{1e18}', '\u{1e18}', SC_Upper),
+ ('\u{1e19}', '\u{1e19}', SC_Lower), ('\u{1e1a}', '\u{1e1a}', SC_Upper), ('\u{1e1b}',
+ '\u{1e1b}', SC_Lower), ('\u{1e1c}', '\u{1e1c}', SC_Upper), ('\u{1e1d}', '\u{1e1d}',
+ SC_Lower), ('\u{1e1e}', '\u{1e1e}', SC_Upper), ('\u{1e1f}', '\u{1e1f}', SC_Lower),
+ ('\u{1e20}', '\u{1e20}', SC_Upper), ('\u{1e21}', '\u{1e21}', SC_Lower), ('\u{1e22}',
+ '\u{1e22}', SC_Upper), ('\u{1e23}', '\u{1e23}', SC_Lower), ('\u{1e24}', '\u{1e24}',
+ SC_Upper), ('\u{1e25}', '\u{1e25}', SC_Lower), ('\u{1e26}', '\u{1e26}', SC_Upper),
+ ('\u{1e27}', '\u{1e27}', SC_Lower), ('\u{1e28}', '\u{1e28}', SC_Upper), ('\u{1e29}',
+ '\u{1e29}', SC_Lower), ('\u{1e2a}', '\u{1e2a}', SC_Upper), ('\u{1e2b}', '\u{1e2b}',
+ SC_Lower), ('\u{1e2c}', '\u{1e2c}', SC_Upper), ('\u{1e2d}', '\u{1e2d}', SC_Lower),
+ ('\u{1e2e}', '\u{1e2e}', SC_Upper), ('\u{1e2f}', '\u{1e2f}', SC_Lower), ('\u{1e30}',
+ '\u{1e30}', SC_Upper), ('\u{1e31}', '\u{1e31}', SC_Lower), ('\u{1e32}', '\u{1e32}',
+ SC_Upper), ('\u{1e33}', '\u{1e33}', SC_Lower), ('\u{1e34}', '\u{1e34}', SC_Upper),
+ ('\u{1e35}', '\u{1e35}', SC_Lower), ('\u{1e36}', '\u{1e36}', SC_Upper), ('\u{1e37}',
+ '\u{1e37}', SC_Lower), ('\u{1e38}', '\u{1e38}', SC_Upper), ('\u{1e39}', '\u{1e39}',
+ SC_Lower), ('\u{1e3a}', '\u{1e3a}', SC_Upper), ('\u{1e3b}', '\u{1e3b}', SC_Lower),
+ ('\u{1e3c}', '\u{1e3c}', SC_Upper), ('\u{1e3d}', '\u{1e3d}', SC_Lower), ('\u{1e3e}',
+ '\u{1e3e}', SC_Upper), ('\u{1e3f}', '\u{1e3f}', SC_Lower), ('\u{1e40}', '\u{1e40}',
+ SC_Upper), ('\u{1e41}', '\u{1e41}', SC_Lower), ('\u{1e42}', '\u{1e42}', SC_Upper),
+ ('\u{1e43}', '\u{1e43}', SC_Lower), ('\u{1e44}', '\u{1e44}', SC_Upper), ('\u{1e45}',
+ '\u{1e45}', SC_Lower), ('\u{1e46}', '\u{1e46}', SC_Upper), ('\u{1e47}', '\u{1e47}',
+ SC_Lower), ('\u{1e48}', '\u{1e48}', SC_Upper), ('\u{1e49}', '\u{1e49}', SC_Lower),
+ ('\u{1e4a}', '\u{1e4a}', SC_Upper), ('\u{1e4b}', '\u{1e4b}', SC_Lower), ('\u{1e4c}',
+ '\u{1e4c}', SC_Upper), ('\u{1e4d}', '\u{1e4d}', SC_Lower), ('\u{1e4e}', '\u{1e4e}',
+ SC_Upper), ('\u{1e4f}', '\u{1e4f}', SC_Lower), ('\u{1e50}', '\u{1e50}', SC_Upper),
+ ('\u{1e51}', '\u{1e51}', SC_Lower), ('\u{1e52}', '\u{1e52}', SC_Upper), ('\u{1e53}',
+ '\u{1e53}', SC_Lower), ('\u{1e54}', '\u{1e54}', SC_Upper), ('\u{1e55}', '\u{1e55}',
+ SC_Lower), ('\u{1e56}', '\u{1e56}', SC_Upper), ('\u{1e57}', '\u{1e57}', SC_Lower),
+ ('\u{1e58}', '\u{1e58}', SC_Upper), ('\u{1e59}', '\u{1e59}', SC_Lower), ('\u{1e5a}',
+ '\u{1e5a}', SC_Upper), ('\u{1e5b}', '\u{1e5b}', SC_Lower), ('\u{1e5c}', '\u{1e5c}',
+ SC_Upper), ('\u{1e5d}', '\u{1e5d}', SC_Lower), ('\u{1e5e}', '\u{1e5e}', SC_Upper),
+ ('\u{1e5f}', '\u{1e5f}', SC_Lower), ('\u{1e60}', '\u{1e60}', SC_Upper), ('\u{1e61}',
+ '\u{1e61}', SC_Lower), ('\u{1e62}', '\u{1e62}', SC_Upper), ('\u{1e63}', '\u{1e63}',
+ SC_Lower), ('\u{1e64}', '\u{1e64}', SC_Upper), ('\u{1e65}', '\u{1e65}', SC_Lower),
+ ('\u{1e66}', '\u{1e66}', SC_Upper), ('\u{1e67}', '\u{1e67}', SC_Lower), ('\u{1e68}',
+ '\u{1e68}', SC_Upper), ('\u{1e69}', '\u{1e69}', SC_Lower), ('\u{1e6a}', '\u{1e6a}',
+ SC_Upper), ('\u{1e6b}', '\u{1e6b}', SC_Lower), ('\u{1e6c}', '\u{1e6c}', SC_Upper),
+ ('\u{1e6d}', '\u{1e6d}', SC_Lower), ('\u{1e6e}', '\u{1e6e}', SC_Upper), ('\u{1e6f}',
+ '\u{1e6f}', SC_Lower), ('\u{1e70}', '\u{1e70}', SC_Upper), ('\u{1e71}', '\u{1e71}',
+ SC_Lower), ('\u{1e72}', '\u{1e72}', SC_Upper), ('\u{1e73}', '\u{1e73}', SC_Lower),
+ ('\u{1e74}', '\u{1e74}', SC_Upper), ('\u{1e75}', '\u{1e75}', SC_Lower), ('\u{1e76}',
+ '\u{1e76}', SC_Upper), ('\u{1e77}', '\u{1e77}', SC_Lower), ('\u{1e78}', '\u{1e78}',
+ SC_Upper), ('\u{1e79}', '\u{1e79}', SC_Lower), ('\u{1e7a}', '\u{1e7a}', SC_Upper),
+ ('\u{1e7b}', '\u{1e7b}', SC_Lower), ('\u{1e7c}', '\u{1e7c}', SC_Upper), ('\u{1e7d}',
+ '\u{1e7d}', SC_Lower), ('\u{1e7e}', '\u{1e7e}', SC_Upper), ('\u{1e7f}', '\u{1e7f}',
+ SC_Lower), ('\u{1e80}', '\u{1e80}', SC_Upper), ('\u{1e81}', '\u{1e81}', SC_Lower),
+ ('\u{1e82}', '\u{1e82}', SC_Upper), ('\u{1e83}', '\u{1e83}', SC_Lower), ('\u{1e84}',
+ '\u{1e84}', SC_Upper), ('\u{1e85}', '\u{1e85}', SC_Lower), ('\u{1e86}', '\u{1e86}',
+ SC_Upper), ('\u{1e87}', '\u{1e87}', SC_Lower), ('\u{1e88}', '\u{1e88}', SC_Upper),
+ ('\u{1e89}', '\u{1e89}', SC_Lower), ('\u{1e8a}', '\u{1e8a}', SC_Upper), ('\u{1e8b}',
+ '\u{1e8b}', SC_Lower), ('\u{1e8c}', '\u{1e8c}', SC_Upper), ('\u{1e8d}', '\u{1e8d}',
+ SC_Lower), ('\u{1e8e}', '\u{1e8e}', SC_Upper), ('\u{1e8f}', '\u{1e8f}', SC_Lower),
+ ('\u{1e90}', '\u{1e90}', SC_Upper), ('\u{1e91}', '\u{1e91}', SC_Lower), ('\u{1e92}',
+ '\u{1e92}', SC_Upper), ('\u{1e93}', '\u{1e93}', SC_Lower), ('\u{1e94}', '\u{1e94}',
+ SC_Upper), ('\u{1e95}', '\u{1e9d}', SC_Lower), ('\u{1e9e}', '\u{1e9e}', SC_Upper),
+ ('\u{1e9f}', '\u{1e9f}', SC_Lower), ('\u{1ea0}', '\u{1ea0}', SC_Upper), ('\u{1ea1}',
+ '\u{1ea1}', SC_Lower), ('\u{1ea2}', '\u{1ea2}', SC_Upper), ('\u{1ea3}', '\u{1ea3}',
+ SC_Lower), ('\u{1ea4}', '\u{1ea4}', SC_Upper), ('\u{1ea5}', '\u{1ea5}', SC_Lower),
+ ('\u{1ea6}', '\u{1ea6}', SC_Upper), ('\u{1ea7}', '\u{1ea7}', SC_Lower), ('\u{1ea8}',
+ '\u{1ea8}', SC_Upper), ('\u{1ea9}', '\u{1ea9}', SC_Lower), ('\u{1eaa}', '\u{1eaa}',
+ SC_Upper), ('\u{1eab}', '\u{1eab}', SC_Lower), ('\u{1eac}', '\u{1eac}', SC_Upper),
+ ('\u{1ead}', '\u{1ead}', SC_Lower), ('\u{1eae}', '\u{1eae}', SC_Upper), ('\u{1eaf}',
+ '\u{1eaf}', SC_Lower), ('\u{1eb0}', '\u{1eb0}', SC_Upper), ('\u{1eb1}', '\u{1eb1}',
+ SC_Lower), ('\u{1eb2}', '\u{1eb2}', SC_Upper), ('\u{1eb3}', '\u{1eb3}', SC_Lower),
+ ('\u{1eb4}', '\u{1eb4}', SC_Upper), ('\u{1eb5}', '\u{1eb5}', SC_Lower), ('\u{1eb6}',
+ '\u{1eb6}', SC_Upper), ('\u{1eb7}', '\u{1eb7}', SC_Lower), ('\u{1eb8}', '\u{1eb8}',
+ SC_Upper), ('\u{1eb9}', '\u{1eb9}', SC_Lower), ('\u{1eba}', '\u{1eba}', SC_Upper),
+ ('\u{1ebb}', '\u{1ebb}', SC_Lower), ('\u{1ebc}', '\u{1ebc}', SC_Upper), ('\u{1ebd}',
+ '\u{1ebd}', SC_Lower), ('\u{1ebe}', '\u{1ebe}', SC_Upper), ('\u{1ebf}', '\u{1ebf}',
+ SC_Lower), ('\u{1ec0}', '\u{1ec0}', SC_Upper), ('\u{1ec1}', '\u{1ec1}', SC_Lower),
+ ('\u{1ec2}', '\u{1ec2}', SC_Upper), ('\u{1ec3}', '\u{1ec3}', SC_Lower), ('\u{1ec4}',
+ '\u{1ec4}', SC_Upper), ('\u{1ec5}', '\u{1ec5}', SC_Lower), ('\u{1ec6}', '\u{1ec6}',
+ SC_Upper), ('\u{1ec7}', '\u{1ec7}', SC_Lower), ('\u{1ec8}', '\u{1ec8}', SC_Upper),
+ ('\u{1ec9}', '\u{1ec9}', SC_Lower), ('\u{1eca}', '\u{1eca}', SC_Upper), ('\u{1ecb}',
+ '\u{1ecb}', SC_Lower), ('\u{1ecc}', '\u{1ecc}', SC_Upper), ('\u{1ecd}', '\u{1ecd}',
+ SC_Lower), ('\u{1ece}', '\u{1ece}', SC_Upper), ('\u{1ecf}', '\u{1ecf}', SC_Lower),
+ ('\u{1ed0}', '\u{1ed0}', SC_Upper), ('\u{1ed1}', '\u{1ed1}', SC_Lower), ('\u{1ed2}',
+ '\u{1ed2}', SC_Upper), ('\u{1ed3}', '\u{1ed3}', SC_Lower), ('\u{1ed4}', '\u{1ed4}',
+ SC_Upper), ('\u{1ed5}', '\u{1ed5}', SC_Lower), ('\u{1ed6}', '\u{1ed6}', SC_Upper),
+ ('\u{1ed7}', '\u{1ed7}', SC_Lower), ('\u{1ed8}', '\u{1ed8}', SC_Upper), ('\u{1ed9}',
+ '\u{1ed9}', SC_Lower), ('\u{1eda}', '\u{1eda}', SC_Upper), ('\u{1edb}', '\u{1edb}',
+ SC_Lower), ('\u{1edc}', '\u{1edc}', SC_Upper), ('\u{1edd}', '\u{1edd}', SC_Lower),
+ ('\u{1ede}', '\u{1ede}', SC_Upper), ('\u{1edf}', '\u{1edf}', SC_Lower), ('\u{1ee0}',
+ '\u{1ee0}', SC_Upper), ('\u{1ee1}', '\u{1ee1}', SC_Lower), ('\u{1ee2}', '\u{1ee2}',
+ SC_Upper), ('\u{1ee3}', '\u{1ee3}', SC_Lower), ('\u{1ee4}', '\u{1ee4}', SC_Upper),
+ ('\u{1ee5}', '\u{1ee5}', SC_Lower), ('\u{1ee6}', '\u{1ee6}', SC_Upper), ('\u{1ee7}',
+ '\u{1ee7}', SC_Lower), ('\u{1ee8}', '\u{1ee8}', SC_Upper), ('\u{1ee9}', '\u{1ee9}',
+ SC_Lower), ('\u{1eea}', '\u{1eea}', SC_Upper), ('\u{1eeb}', '\u{1eeb}', SC_Lower),
+ ('\u{1eec}', '\u{1eec}', SC_Upper), ('\u{1eed}', '\u{1eed}', SC_Lower), ('\u{1eee}',
+ '\u{1eee}', SC_Upper), ('\u{1eef}', '\u{1eef}', SC_Lower), ('\u{1ef0}', '\u{1ef0}',
+ SC_Upper), ('\u{1ef1}', '\u{1ef1}', SC_Lower), ('\u{1ef2}', '\u{1ef2}', SC_Upper),
+ ('\u{1ef3}', '\u{1ef3}', SC_Lower), ('\u{1ef4}', '\u{1ef4}', SC_Upper), ('\u{1ef5}',
+ '\u{1ef5}', SC_Lower), ('\u{1ef6}', '\u{1ef6}', SC_Upper), ('\u{1ef7}', '\u{1ef7}',
+ SC_Lower), ('\u{1ef8}', '\u{1ef8}', SC_Upper), ('\u{1ef9}', '\u{1ef9}', SC_Lower),
+ ('\u{1efa}', '\u{1efa}', SC_Upper), ('\u{1efb}', '\u{1efb}', SC_Lower), ('\u{1efc}',
+ '\u{1efc}', SC_Upper), ('\u{1efd}', '\u{1efd}', SC_Lower), ('\u{1efe}', '\u{1efe}',
+ SC_Upper), ('\u{1eff}', '\u{1f07}', SC_Lower), ('\u{1f08}', '\u{1f0f}', SC_Upper),
+ ('\u{1f10}', '\u{1f15}', SC_Lower), ('\u{1f18}', '\u{1f1d}', SC_Upper), ('\u{1f20}',
+ '\u{1f27}', SC_Lower), ('\u{1f28}', '\u{1f2f}', SC_Upper), ('\u{1f30}', '\u{1f37}',
+ SC_Lower), ('\u{1f38}', '\u{1f3f}', SC_Upper), ('\u{1f40}', '\u{1f45}', SC_Lower),
+ ('\u{1f48}', '\u{1f4d}', SC_Upper), ('\u{1f50}', '\u{1f57}', SC_Lower), ('\u{1f59}',
+ '\u{1f59}', SC_Upper), ('\u{1f5b}', '\u{1f5b}', SC_Upper), ('\u{1f5d}', '\u{1f5d}',
+ SC_Upper), ('\u{1f5f}', '\u{1f5f}', SC_Upper), ('\u{1f60}', '\u{1f67}', SC_Lower),
+ ('\u{1f68}', '\u{1f6f}', SC_Upper), ('\u{1f70}', '\u{1f7d}', SC_Lower), ('\u{1f80}',
+ '\u{1f87}', SC_Lower), ('\u{1f88}', '\u{1f8f}', SC_Upper), ('\u{1f90}', '\u{1f97}',
+ SC_Lower), ('\u{1f98}', '\u{1f9f}', SC_Upper), ('\u{1fa0}', '\u{1fa7}', SC_Lower),
+ ('\u{1fa8}', '\u{1faf}', SC_Upper), ('\u{1fb0}', '\u{1fb4}', SC_Lower), ('\u{1fb6}',
+ '\u{1fb7}', SC_Lower), ('\u{1fb8}', '\u{1fbc}', SC_Upper), ('\u{1fbe}', '\u{1fbe}',
+ SC_Lower), ('\u{1fc2}', '\u{1fc4}', SC_Lower), ('\u{1fc6}', '\u{1fc7}', SC_Lower),
+ ('\u{1fc8}', '\u{1fcc}', SC_Upper), ('\u{1fd0}', '\u{1fd3}', SC_Lower), ('\u{1fd6}',
+ '\u{1fd7}', SC_Lower), ('\u{1fd8}', '\u{1fdb}', SC_Upper), ('\u{1fe0}', '\u{1fe7}',
+ SC_Lower), ('\u{1fe8}', '\u{1fec}', SC_Upper), ('\u{1ff2}', '\u{1ff4}', SC_Lower),
+ ('\u{1ff6}', '\u{1ff7}', SC_Lower), ('\u{1ff8}', '\u{1ffc}', SC_Upper), ('\u{2000}',
+ '\u{200a}', SC_Sp), ('\u{200b}', '\u{200b}', SC_Format), ('\u{200c}', '\u{200d}',
+ SC_Extend), ('\u{200e}', '\u{200f}', SC_Format), ('\u{2013}', '\u{2014}', SC_SContinue),
+ ('\u{2018}', '\u{201f}', SC_Close), ('\u{2024}', '\u{2024}', SC_ATerm), ('\u{2028}',
+ '\u{2029}', SC_Sep), ('\u{202a}', '\u{202e}', SC_Format), ('\u{202f}', '\u{202f}', SC_Sp),
+ ('\u{2039}', '\u{203a}', SC_Close), ('\u{203c}', '\u{203d}', SC_STerm), ('\u{2045}',
+ '\u{2046}', SC_Close), ('\u{2047}', '\u{2049}', SC_STerm), ('\u{205f}', '\u{205f}', SC_Sp),
+ ('\u{2060}', '\u{2064}', SC_Format), ('\u{2066}', '\u{206f}', SC_Format), ('\u{2071}',
+ '\u{2071}', SC_Lower), ('\u{207d}', '\u{207e}', SC_Close), ('\u{207f}', '\u{207f}',
+ SC_Lower), ('\u{208d}', '\u{208e}', SC_Close), ('\u{2090}', '\u{209c}', SC_Lower),
+ ('\u{20d0}', '\u{20f0}', SC_Extend), ('\u{2102}', '\u{2102}', SC_Upper), ('\u{2107}',
+ '\u{2107}', SC_Upper), ('\u{210a}', '\u{210a}', SC_Lower), ('\u{210b}', '\u{210d}',
+ SC_Upper), ('\u{210e}', '\u{210f}', SC_Lower), ('\u{2110}', '\u{2112}', SC_Upper),
+ ('\u{2113}', '\u{2113}', SC_Lower), ('\u{2115}', '\u{2115}', SC_Upper), ('\u{2119}',
+ '\u{211d}', SC_Upper), ('\u{2124}', '\u{2124}', SC_Upper), ('\u{2126}', '\u{2126}',
+ SC_Upper), ('\u{2128}', '\u{2128}', SC_Upper), ('\u{212a}', '\u{212d}', SC_Upper),
+ ('\u{212f}', '\u{212f}', SC_Lower), ('\u{2130}', '\u{2133}', SC_Upper), ('\u{2134}',
+ '\u{2134}', SC_Lower), ('\u{2135}', '\u{2138}', SC_OLetter), ('\u{2139}', '\u{2139}',
+ SC_Lower), ('\u{213c}', '\u{213d}', SC_Lower), ('\u{213e}', '\u{213f}', SC_Upper),
+ ('\u{2145}', '\u{2145}', SC_Upper), ('\u{2146}', '\u{2149}', SC_Lower), ('\u{214e}',
+ '\u{214e}', SC_Lower), ('\u{2160}', '\u{216f}', SC_Upper), ('\u{2170}', '\u{217f}',
+ SC_Lower), ('\u{2180}', '\u{2182}', SC_OLetter), ('\u{2183}', '\u{2183}', SC_Upper),
+ ('\u{2184}', '\u{2184}', SC_Lower), ('\u{2185}', '\u{2188}', SC_OLetter), ('\u{2308}',
+ '\u{230b}', SC_Close), ('\u{2329}', '\u{232a}', SC_Close), ('\u{24b6}', '\u{24cf}',
+ SC_Upper), ('\u{24d0}', '\u{24e9}', SC_Lower), ('\u{275b}', '\u{2760}', SC_Close),
+ ('\u{2768}', '\u{2775}', SC_Close), ('\u{27c5}', '\u{27c6}', SC_Close), ('\u{27e6}',
+ '\u{27ef}', SC_Close), ('\u{2983}', '\u{2998}', SC_Close), ('\u{29d8}', '\u{29db}',
+ SC_Close), ('\u{29fc}', '\u{29fd}', SC_Close), ('\u{2c00}', '\u{2c2f}', SC_Upper),
+ ('\u{2c30}', '\u{2c5f}', SC_Lower), ('\u{2c60}', '\u{2c60}', SC_Upper), ('\u{2c61}',
+ '\u{2c61}', SC_Lower), ('\u{2c62}', '\u{2c64}', SC_Upper), ('\u{2c65}', '\u{2c66}',
+ SC_Lower), ('\u{2c67}', '\u{2c67}', SC_Upper), ('\u{2c68}', '\u{2c68}', SC_Lower),
+ ('\u{2c69}', '\u{2c69}', SC_Upper), ('\u{2c6a}', '\u{2c6a}', SC_Lower), ('\u{2c6b}',
+ '\u{2c6b}', SC_Upper), ('\u{2c6c}', '\u{2c6c}', SC_Lower), ('\u{2c6d}', '\u{2c70}',
+ SC_Upper), ('\u{2c71}', '\u{2c71}', SC_Lower), ('\u{2c72}', '\u{2c72}', SC_Upper),
+ ('\u{2c73}', '\u{2c74}', SC_Lower), ('\u{2c75}', '\u{2c75}', SC_Upper), ('\u{2c76}',
+ '\u{2c7d}', SC_Lower), ('\u{2c7e}', '\u{2c80}', SC_Upper), ('\u{2c81}', '\u{2c81}',
+ SC_Lower), ('\u{2c82}', '\u{2c82}', SC_Upper), ('\u{2c83}', '\u{2c83}', SC_Lower),
+ ('\u{2c84}', '\u{2c84}', SC_Upper), ('\u{2c85}', '\u{2c85}', SC_Lower), ('\u{2c86}',
+ '\u{2c86}', SC_Upper), ('\u{2c87}', '\u{2c87}', SC_Lower), ('\u{2c88}', '\u{2c88}',
+ SC_Upper), ('\u{2c89}', '\u{2c89}', SC_Lower), ('\u{2c8a}', '\u{2c8a}', SC_Upper),
+ ('\u{2c8b}', '\u{2c8b}', SC_Lower), ('\u{2c8c}', '\u{2c8c}', SC_Upper), ('\u{2c8d}',
+ '\u{2c8d}', SC_Lower), ('\u{2c8e}', '\u{2c8e}', SC_Upper), ('\u{2c8f}', '\u{2c8f}',
+ SC_Lower), ('\u{2c90}', '\u{2c90}', SC_Upper), ('\u{2c91}', '\u{2c91}', SC_Lower),
+ ('\u{2c92}', '\u{2c92}', SC_Upper), ('\u{2c93}', '\u{2c93}', SC_Lower), ('\u{2c94}',
+ '\u{2c94}', SC_Upper), ('\u{2c95}', '\u{2c95}', SC_Lower), ('\u{2c96}', '\u{2c96}',
+ SC_Upper), ('\u{2c97}', '\u{2c97}', SC_Lower), ('\u{2c98}', '\u{2c98}', SC_Upper),
+ ('\u{2c99}', '\u{2c99}', SC_Lower), ('\u{2c9a}', '\u{2c9a}', SC_Upper), ('\u{2c9b}',
+ '\u{2c9b}', SC_Lower), ('\u{2c9c}', '\u{2c9c}', SC_Upper), ('\u{2c9d}', '\u{2c9d}',
+ SC_Lower), ('\u{2c9e}', '\u{2c9e}', SC_Upper), ('\u{2c9f}', '\u{2c9f}', SC_Lower),
+ ('\u{2ca0}', '\u{2ca0}', SC_Upper), ('\u{2ca1}', '\u{2ca1}', SC_Lower), ('\u{2ca2}',
+ '\u{2ca2}', SC_Upper), ('\u{2ca3}', '\u{2ca3}', SC_Lower), ('\u{2ca4}', '\u{2ca4}',
+ SC_Upper), ('\u{2ca5}', '\u{2ca5}', SC_Lower), ('\u{2ca6}', '\u{2ca6}', SC_Upper),
+ ('\u{2ca7}', '\u{2ca7}', SC_Lower), ('\u{2ca8}', '\u{2ca8}', SC_Upper), ('\u{2ca9}',
+ '\u{2ca9}', SC_Lower), ('\u{2caa}', '\u{2caa}', SC_Upper), ('\u{2cab}', '\u{2cab}',
+ SC_Lower), ('\u{2cac}', '\u{2cac}', SC_Upper), ('\u{2cad}', '\u{2cad}', SC_Lower),
+ ('\u{2cae}', '\u{2cae}', SC_Upper), ('\u{2caf}', '\u{2caf}', SC_Lower), ('\u{2cb0}',
+ '\u{2cb0}', SC_Upper), ('\u{2cb1}', '\u{2cb1}', SC_Lower), ('\u{2cb2}', '\u{2cb2}',
+ SC_Upper), ('\u{2cb3}', '\u{2cb3}', SC_Lower), ('\u{2cb4}', '\u{2cb4}', SC_Upper),
+ ('\u{2cb5}', '\u{2cb5}', SC_Lower), ('\u{2cb6}', '\u{2cb6}', SC_Upper), ('\u{2cb7}',
+ '\u{2cb7}', SC_Lower), ('\u{2cb8}', '\u{2cb8}', SC_Upper), ('\u{2cb9}', '\u{2cb9}',
+ SC_Lower), ('\u{2cba}', '\u{2cba}', SC_Upper), ('\u{2cbb}', '\u{2cbb}', SC_Lower),
+ ('\u{2cbc}', '\u{2cbc}', SC_Upper), ('\u{2cbd}', '\u{2cbd}', SC_Lower), ('\u{2cbe}',
+ '\u{2cbe}', SC_Upper), ('\u{2cbf}', '\u{2cbf}', SC_Lower), ('\u{2cc0}', '\u{2cc0}',
+ SC_Upper), ('\u{2cc1}', '\u{2cc1}', SC_Lower), ('\u{2cc2}', '\u{2cc2}', SC_Upper),
+ ('\u{2cc3}', '\u{2cc3}', SC_Lower), ('\u{2cc4}', '\u{2cc4}', SC_Upper), ('\u{2cc5}',
+ '\u{2cc5}', SC_Lower), ('\u{2cc6}', '\u{2cc6}', SC_Upper), ('\u{2cc7}', '\u{2cc7}',
+ SC_Lower), ('\u{2cc8}', '\u{2cc8}', SC_Upper), ('\u{2cc9}', '\u{2cc9}', SC_Lower),
+ ('\u{2cca}', '\u{2cca}', SC_Upper), ('\u{2ccb}', '\u{2ccb}', SC_Lower), ('\u{2ccc}',
+ '\u{2ccc}', SC_Upper), ('\u{2ccd}', '\u{2ccd}', SC_Lower), ('\u{2cce}', '\u{2cce}',
+ SC_Upper), ('\u{2ccf}', '\u{2ccf}', SC_Lower), ('\u{2cd0}', '\u{2cd0}', SC_Upper),
+ ('\u{2cd1}', '\u{2cd1}', SC_Lower), ('\u{2cd2}', '\u{2cd2}', SC_Upper), ('\u{2cd3}',
+ '\u{2cd3}', SC_Lower), ('\u{2cd4}', '\u{2cd4}', SC_Upper), ('\u{2cd5}', '\u{2cd5}',
+ SC_Lower), ('\u{2cd6}', '\u{2cd6}', SC_Upper), ('\u{2cd7}', '\u{2cd7}', SC_Lower),
+ ('\u{2cd8}', '\u{2cd8}', SC_Upper), ('\u{2cd9}', '\u{2cd9}', SC_Lower), ('\u{2cda}',
+ '\u{2cda}', SC_Upper), ('\u{2cdb}', '\u{2cdb}', SC_Lower), ('\u{2cdc}', '\u{2cdc}',
+ SC_Upper), ('\u{2cdd}', '\u{2cdd}', SC_Lower), ('\u{2cde}', '\u{2cde}', SC_Upper),
+ ('\u{2cdf}', '\u{2cdf}', SC_Lower), ('\u{2ce0}', '\u{2ce0}', SC_Upper), ('\u{2ce1}',
+ '\u{2ce1}', SC_Lower), ('\u{2ce2}', '\u{2ce2}', SC_Upper), ('\u{2ce3}', '\u{2ce4}',
+ SC_Lower), ('\u{2ceb}', '\u{2ceb}', SC_Upper), ('\u{2cec}', '\u{2cec}', SC_Lower),
+ ('\u{2ced}', '\u{2ced}', SC_Upper), ('\u{2cee}', '\u{2cee}', SC_Lower), ('\u{2cef}',
+ '\u{2cf1}', SC_Extend), ('\u{2cf2}', '\u{2cf2}', SC_Upper), ('\u{2cf3}', '\u{2cf3}',
+ SC_Lower), ('\u{2d00}', '\u{2d25}', SC_Lower), ('\u{2d27}', '\u{2d27}', SC_Lower),
+ ('\u{2d2d}', '\u{2d2d}', SC_Lower), ('\u{2d30}', '\u{2d67}', SC_OLetter), ('\u{2d6f}',
+ '\u{2d6f}', SC_OLetter), ('\u{2d7f}', '\u{2d7f}', SC_Extend), ('\u{2d80}', '\u{2d96}',
+ SC_OLetter), ('\u{2da0}', '\u{2da6}', SC_OLetter), ('\u{2da8}', '\u{2dae}', SC_OLetter),
+ ('\u{2db0}', '\u{2db6}', SC_OLetter), ('\u{2db8}', '\u{2dbe}', SC_OLetter), ('\u{2dc0}',
+ '\u{2dc6}', SC_OLetter), ('\u{2dc8}', '\u{2dce}', SC_OLetter), ('\u{2dd0}', '\u{2dd6}',
+ SC_OLetter), ('\u{2dd8}', '\u{2dde}', SC_OLetter), ('\u{2de0}', '\u{2dff}', SC_Extend),
+ ('\u{2e00}', '\u{2e0d}', SC_Close), ('\u{2e1c}', '\u{2e1d}', SC_Close), ('\u{2e20}',
+ '\u{2e29}', SC_Close), ('\u{2e2e}', '\u{2e2e}', SC_STerm), ('\u{2e2f}', '\u{2e2f}',
+ SC_OLetter), ('\u{2e3c}', '\u{2e3c}', SC_STerm), ('\u{2e42}', '\u{2e42}', SC_Close),
+ ('\u{2e53}', '\u{2e54}', SC_STerm), ('\u{2e55}', '\u{2e5c}', SC_Close), ('\u{3000}',
+ '\u{3000}', SC_Sp), ('\u{3001}', '\u{3001}', SC_SContinue), ('\u{3002}', '\u{3002}',
+ SC_STerm), ('\u{3005}', '\u{3007}', SC_OLetter), ('\u{3008}', '\u{3011}', SC_Close),
+ ('\u{3014}', '\u{301b}', SC_Close), ('\u{301d}', '\u{301f}', SC_Close), ('\u{3021}',
+ '\u{3029}', SC_OLetter), ('\u{302a}', '\u{302f}', SC_Extend), ('\u{3031}', '\u{3035}',
+ SC_OLetter), ('\u{3038}', '\u{303c}', SC_OLetter), ('\u{3041}', '\u{3096}', SC_OLetter),
+ ('\u{3099}', '\u{309a}', SC_Extend), ('\u{309d}', '\u{309f}', SC_OLetter), ('\u{30a1}',
+ '\u{30fa}', SC_OLetter), ('\u{30fc}', '\u{30ff}', SC_OLetter), ('\u{3105}', '\u{312f}',
+ SC_OLetter), ('\u{3131}', '\u{318e}', SC_OLetter), ('\u{31a0}', '\u{31bf}', SC_OLetter),
+ ('\u{31f0}', '\u{31ff}', SC_OLetter), ('\u{3400}', '\u{4dbf}', SC_OLetter), ('\u{4e00}',
+ '\u{a48c}', SC_OLetter), ('\u{a4d0}', '\u{a4fd}', SC_OLetter), ('\u{a4ff}', '\u{a4ff}',
+ SC_STerm), ('\u{a500}', '\u{a60c}', SC_OLetter), ('\u{a60e}', '\u{a60f}', SC_STerm),
+ ('\u{a610}', '\u{a61f}', SC_OLetter), ('\u{a620}', '\u{a629}', SC_Numeric), ('\u{a62a}',
+ '\u{a62b}', SC_OLetter), ('\u{a640}', '\u{a640}', SC_Upper), ('\u{a641}', '\u{a641}',
+ SC_Lower), ('\u{a642}', '\u{a642}', SC_Upper), ('\u{a643}', '\u{a643}', SC_Lower),
+ ('\u{a644}', '\u{a644}', SC_Upper), ('\u{a645}', '\u{a645}', SC_Lower), ('\u{a646}',
+ '\u{a646}', SC_Upper), ('\u{a647}', '\u{a647}', SC_Lower), ('\u{a648}', '\u{a648}',
+ SC_Upper), ('\u{a649}', '\u{a649}', SC_Lower), ('\u{a64a}', '\u{a64a}', SC_Upper),
+ ('\u{a64b}', '\u{a64b}', SC_Lower), ('\u{a64c}', '\u{a64c}', SC_Upper), ('\u{a64d}',
+ '\u{a64d}', SC_Lower), ('\u{a64e}', '\u{a64e}', SC_Upper), ('\u{a64f}', '\u{a64f}',
+ SC_Lower), ('\u{a650}', '\u{a650}', SC_Upper), ('\u{a651}', '\u{a651}', SC_Lower),
+ ('\u{a652}', '\u{a652}', SC_Upper), ('\u{a653}', '\u{a653}', SC_Lower), ('\u{a654}',
+ '\u{a654}', SC_Upper), ('\u{a655}', '\u{a655}', SC_Lower), ('\u{a656}', '\u{a656}',
+ SC_Upper), ('\u{a657}', '\u{a657}', SC_Lower), ('\u{a658}', '\u{a658}', SC_Upper),
+ ('\u{a659}', '\u{a659}', SC_Lower), ('\u{a65a}', '\u{a65a}', SC_Upper), ('\u{a65b}',
+ '\u{a65b}', SC_Lower), ('\u{a65c}', '\u{a65c}', SC_Upper), ('\u{a65d}', '\u{a65d}',
+ SC_Lower), ('\u{a65e}', '\u{a65e}', SC_Upper), ('\u{a65f}', '\u{a65f}', SC_Lower),
+ ('\u{a660}', '\u{a660}', SC_Upper), ('\u{a661}', '\u{a661}', SC_Lower), ('\u{a662}',
+ '\u{a662}', SC_Upper), ('\u{a663}', '\u{a663}', SC_Lower), ('\u{a664}', '\u{a664}',
+ SC_Upper), ('\u{a665}', '\u{a665}', SC_Lower), ('\u{a666}', '\u{a666}', SC_Upper),
+ ('\u{a667}', '\u{a667}', SC_Lower), ('\u{a668}', '\u{a668}', SC_Upper), ('\u{a669}',
+ '\u{a669}', SC_Lower), ('\u{a66a}', '\u{a66a}', SC_Upper), ('\u{a66b}', '\u{a66b}',
+ SC_Lower), ('\u{a66c}', '\u{a66c}', SC_Upper), ('\u{a66d}', '\u{a66d}', SC_Lower),
+ ('\u{a66e}', '\u{a66e}', SC_OLetter), ('\u{a66f}', '\u{a672}', SC_Extend), ('\u{a674}',
+ '\u{a67d}', SC_Extend), ('\u{a67f}', '\u{a67f}', SC_OLetter), ('\u{a680}', '\u{a680}',
+ SC_Upper), ('\u{a681}', '\u{a681}', SC_Lower), ('\u{a682}', '\u{a682}', SC_Upper),
+ ('\u{a683}', '\u{a683}', SC_Lower), ('\u{a684}', '\u{a684}', SC_Upper), ('\u{a685}',
+ '\u{a685}', SC_Lower), ('\u{a686}', '\u{a686}', SC_Upper), ('\u{a687}', '\u{a687}',
+ SC_Lower), ('\u{a688}', '\u{a688}', SC_Upper), ('\u{a689}', '\u{a689}', SC_Lower),
+ ('\u{a68a}', '\u{a68a}', SC_Upper), ('\u{a68b}', '\u{a68b}', SC_Lower), ('\u{a68c}',
+ '\u{a68c}', SC_Upper), ('\u{a68d}', '\u{a68d}', SC_Lower), ('\u{a68e}', '\u{a68e}',
+ SC_Upper), ('\u{a68f}', '\u{a68f}', SC_Lower), ('\u{a690}', '\u{a690}', SC_Upper),
+ ('\u{a691}', '\u{a691}', SC_Lower), ('\u{a692}', '\u{a692}', SC_Upper), ('\u{a693}',
+ '\u{a693}', SC_Lower), ('\u{a694}', '\u{a694}', SC_Upper), ('\u{a695}', '\u{a695}',
+ SC_Lower), ('\u{a696}', '\u{a696}', SC_Upper), ('\u{a697}', '\u{a697}', SC_Lower),
+ ('\u{a698}', '\u{a698}', SC_Upper), ('\u{a699}', '\u{a699}', SC_Lower), ('\u{a69a}',
+ '\u{a69a}', SC_Upper), ('\u{a69b}', '\u{a69d}', SC_Lower), ('\u{a69e}', '\u{a69f}',
+ SC_Extend), ('\u{a6a0}', '\u{a6ef}', SC_OLetter), ('\u{a6f0}', '\u{a6f1}', SC_Extend),
+ ('\u{a6f3}', '\u{a6f3}', SC_STerm), ('\u{a6f7}', '\u{a6f7}', SC_STerm), ('\u{a717}',
+ '\u{a71f}', SC_OLetter), ('\u{a722}', '\u{a722}', SC_Upper), ('\u{a723}', '\u{a723}',
+ SC_Lower), ('\u{a724}', '\u{a724}', SC_Upper), ('\u{a725}', '\u{a725}', SC_Lower),
+ ('\u{a726}', '\u{a726}', SC_Upper), ('\u{a727}', '\u{a727}', SC_Lower), ('\u{a728}',
+ '\u{a728}', SC_Upper), ('\u{a729}', '\u{a729}', SC_Lower), ('\u{a72a}', '\u{a72a}',
+ SC_Upper), ('\u{a72b}', '\u{a72b}', SC_Lower), ('\u{a72c}', '\u{a72c}', SC_Upper),
+ ('\u{a72d}', '\u{a72d}', SC_Lower), ('\u{a72e}', '\u{a72e}', SC_Upper), ('\u{a72f}',
+ '\u{a731}', SC_Lower), ('\u{a732}', '\u{a732}', SC_Upper), ('\u{a733}', '\u{a733}',
+ SC_Lower), ('\u{a734}', '\u{a734}', SC_Upper), ('\u{a735}', '\u{a735}', SC_Lower),
+ ('\u{a736}', '\u{a736}', SC_Upper), ('\u{a737}', '\u{a737}', SC_Lower), ('\u{a738}',
+ '\u{a738}', SC_Upper), ('\u{a739}', '\u{a739}', SC_Lower), ('\u{a73a}', '\u{a73a}',
+ SC_Upper), ('\u{a73b}', '\u{a73b}', SC_Lower), ('\u{a73c}', '\u{a73c}', SC_Upper),
+ ('\u{a73d}', '\u{a73d}', SC_Lower), ('\u{a73e}', '\u{a73e}', SC_Upper), ('\u{a73f}',
+ '\u{a73f}', SC_Lower), ('\u{a740}', '\u{a740}', SC_Upper), ('\u{a741}', '\u{a741}',
+ SC_Lower), ('\u{a742}', '\u{a742}', SC_Upper), ('\u{a743}', '\u{a743}', SC_Lower),
+ ('\u{a744}', '\u{a744}', SC_Upper), ('\u{a745}', '\u{a745}', SC_Lower), ('\u{a746}',
+ '\u{a746}', SC_Upper), ('\u{a747}', '\u{a747}', SC_Lower), ('\u{a748}', '\u{a748}',
+ SC_Upper), ('\u{a749}', '\u{a749}', SC_Lower), ('\u{a74a}', '\u{a74a}', SC_Upper),
+ ('\u{a74b}', '\u{a74b}', SC_Lower), ('\u{a74c}', '\u{a74c}', SC_Upper), ('\u{a74d}',
+ '\u{a74d}', SC_Lower), ('\u{a74e}', '\u{a74e}', SC_Upper), ('\u{a74f}', '\u{a74f}',
+ SC_Lower), ('\u{a750}', '\u{a750}', SC_Upper), ('\u{a751}', '\u{a751}', SC_Lower),
+ ('\u{a752}', '\u{a752}', SC_Upper), ('\u{a753}', '\u{a753}', SC_Lower), ('\u{a754}',
+ '\u{a754}', SC_Upper), ('\u{a755}', '\u{a755}', SC_Lower), ('\u{a756}', '\u{a756}',
+ SC_Upper), ('\u{a757}', '\u{a757}', SC_Lower), ('\u{a758}', '\u{a758}', SC_Upper),
+ ('\u{a759}', '\u{a759}', SC_Lower), ('\u{a75a}', '\u{a75a}', SC_Upper), ('\u{a75b}',
+ '\u{a75b}', SC_Lower), ('\u{a75c}', '\u{a75c}', SC_Upper), ('\u{a75d}', '\u{a75d}',
+ SC_Lower), ('\u{a75e}', '\u{a75e}', SC_Upper), ('\u{a75f}', '\u{a75f}', SC_Lower),
+ ('\u{a760}', '\u{a760}', SC_Upper), ('\u{a761}', '\u{a761}', SC_Lower), ('\u{a762}',
+ '\u{a762}', SC_Upper), ('\u{a763}', '\u{a763}', SC_Lower), ('\u{a764}', '\u{a764}',
+ SC_Upper), ('\u{a765}', '\u{a765}', SC_Lower), ('\u{a766}', '\u{a766}', SC_Upper),
+ ('\u{a767}', '\u{a767}', SC_Lower), ('\u{a768}', '\u{a768}', SC_Upper), ('\u{a769}',
+ '\u{a769}', SC_Lower), ('\u{a76a}', '\u{a76a}', SC_Upper), ('\u{a76b}', '\u{a76b}',
+ SC_Lower), ('\u{a76c}', '\u{a76c}', SC_Upper), ('\u{a76d}', '\u{a76d}', SC_Lower),
+ ('\u{a76e}', '\u{a76e}', SC_Upper), ('\u{a76f}', '\u{a778}', SC_Lower), ('\u{a779}',
+ '\u{a779}', SC_Upper), ('\u{a77a}', '\u{a77a}', SC_Lower), ('\u{a77b}', '\u{a77b}',
+ SC_Upper), ('\u{a77c}', '\u{a77c}', SC_Lower), ('\u{a77d}', '\u{a77e}', SC_Upper),
+ ('\u{a77f}', '\u{a77f}', SC_Lower), ('\u{a780}', '\u{a780}', SC_Upper), ('\u{a781}',
+ '\u{a781}', SC_Lower), ('\u{a782}', '\u{a782}', SC_Upper), ('\u{a783}', '\u{a783}',
+ SC_Lower), ('\u{a784}', '\u{a784}', SC_Upper), ('\u{a785}', '\u{a785}', SC_Lower),
+ ('\u{a786}', '\u{a786}', SC_Upper), ('\u{a787}', '\u{a787}', SC_Lower), ('\u{a788}',
+ '\u{a788}', SC_OLetter), ('\u{a78b}', '\u{a78b}', SC_Upper), ('\u{a78c}', '\u{a78c}',
+ SC_Lower), ('\u{a78d}', '\u{a78d}', SC_Upper), ('\u{a78e}', '\u{a78e}', SC_Lower),
+ ('\u{a78f}', '\u{a78f}', SC_OLetter), ('\u{a790}', '\u{a790}', SC_Upper), ('\u{a791}',
+ '\u{a791}', SC_Lower), ('\u{a792}', '\u{a792}', SC_Upper), ('\u{a793}', '\u{a795}',
+ SC_Lower), ('\u{a796}', '\u{a796}', SC_Upper), ('\u{a797}', '\u{a797}', SC_Lower),
+ ('\u{a798}', '\u{a798}', SC_Upper), ('\u{a799}', '\u{a799}', SC_Lower), ('\u{a79a}',
+ '\u{a79a}', SC_Upper), ('\u{a79b}', '\u{a79b}', SC_Lower), ('\u{a79c}', '\u{a79c}',
+ SC_Upper), ('\u{a79d}', '\u{a79d}', SC_Lower), ('\u{a79e}', '\u{a79e}', SC_Upper),
+ ('\u{a79f}', '\u{a79f}', SC_Lower), ('\u{a7a0}', '\u{a7a0}', SC_Upper), ('\u{a7a1}',
+ '\u{a7a1}', SC_Lower), ('\u{a7a2}', '\u{a7a2}', SC_Upper), ('\u{a7a3}', '\u{a7a3}',
+ SC_Lower), ('\u{a7a4}', '\u{a7a4}', SC_Upper), ('\u{a7a5}', '\u{a7a5}', SC_Lower),
+ ('\u{a7a6}', '\u{a7a6}', SC_Upper), ('\u{a7a7}', '\u{a7a7}', SC_Lower), ('\u{a7a8}',
+ '\u{a7a8}', SC_Upper), ('\u{a7a9}', '\u{a7a9}', SC_Lower), ('\u{a7aa}', '\u{a7ae}',
+ SC_Upper), ('\u{a7af}', '\u{a7af}', SC_Lower), ('\u{a7b0}', '\u{a7b4}', SC_Upper),
+ ('\u{a7b5}', '\u{a7b5}', SC_Lower), ('\u{a7b6}', '\u{a7b6}', SC_Upper), ('\u{a7b7}',
+ '\u{a7b7}', SC_Lower), ('\u{a7b8}', '\u{a7b8}', SC_Upper), ('\u{a7b9}', '\u{a7b9}',
+ SC_Lower), ('\u{a7ba}', '\u{a7ba}', SC_Upper), ('\u{a7bb}', '\u{a7bb}', SC_Lower),
+ ('\u{a7bc}', '\u{a7bc}', SC_Upper), ('\u{a7bd}', '\u{a7bd}', SC_Lower), ('\u{a7be}',
+ '\u{a7be}', SC_Upper), ('\u{a7bf}', '\u{a7bf}', SC_Lower), ('\u{a7c0}', '\u{a7c0}',
+ SC_Upper), ('\u{a7c1}', '\u{a7c1}', SC_Lower), ('\u{a7c2}', '\u{a7c2}', SC_Upper),
+ ('\u{a7c3}', '\u{a7c3}', SC_Lower), ('\u{a7c4}', '\u{a7c7}', SC_Upper), ('\u{a7c8}',
+ '\u{a7c8}', SC_Lower), ('\u{a7c9}', '\u{a7c9}', SC_Upper), ('\u{a7ca}', '\u{a7ca}',
+ SC_Lower), ('\u{a7d0}', '\u{a7d0}', SC_Upper), ('\u{a7d1}', '\u{a7d1}', SC_Lower),
+ ('\u{a7d3}', '\u{a7d3}', SC_Lower), ('\u{a7d5}', '\u{a7d5}', SC_Lower), ('\u{a7d6}',
+ '\u{a7d6}', SC_Upper), ('\u{a7d7}', '\u{a7d7}', SC_Lower), ('\u{a7d8}', '\u{a7d8}',
+ SC_Upper), ('\u{a7d9}', '\u{a7d9}', SC_Lower), ('\u{a7f2}', '\u{a7f4}', SC_Lower),
+ ('\u{a7f5}', '\u{a7f5}', SC_Upper), ('\u{a7f6}', '\u{a7f6}', SC_Lower), ('\u{a7f7}',
+ '\u{a7f7}', SC_OLetter), ('\u{a7f8}', '\u{a7fa}', SC_Lower), ('\u{a7fb}', '\u{a801}',
+ SC_OLetter), ('\u{a802}', '\u{a802}', SC_Extend), ('\u{a803}', '\u{a805}', SC_OLetter),
+ ('\u{a806}', '\u{a806}', SC_Extend), ('\u{a807}', '\u{a80a}', SC_OLetter), ('\u{a80b}',
+ '\u{a80b}', SC_Extend), ('\u{a80c}', '\u{a822}', SC_OLetter), ('\u{a823}', '\u{a827}',
+ SC_Extend), ('\u{a82c}', '\u{a82c}', SC_Extend), ('\u{a840}', '\u{a873}', SC_OLetter),
+ ('\u{a876}', '\u{a877}', SC_STerm), ('\u{a880}', '\u{a881}', SC_Extend), ('\u{a882}',
+ '\u{a8b3}', SC_OLetter), ('\u{a8b4}', '\u{a8c5}', SC_Extend), ('\u{a8ce}', '\u{a8cf}',
+ SC_STerm), ('\u{a8d0}', '\u{a8d9}', SC_Numeric), ('\u{a8e0}', '\u{a8f1}', SC_Extend),
+ ('\u{a8f2}', '\u{a8f7}', SC_OLetter), ('\u{a8fb}', '\u{a8fb}', SC_OLetter), ('\u{a8fd}',
+ '\u{a8fe}', SC_OLetter), ('\u{a8ff}', '\u{a8ff}', SC_Extend), ('\u{a900}', '\u{a909}',
+ SC_Numeric), ('\u{a90a}', '\u{a925}', SC_OLetter), ('\u{a926}', '\u{a92d}', SC_Extend),
+ ('\u{a92f}', '\u{a92f}', SC_STerm), ('\u{a930}', '\u{a946}', SC_OLetter), ('\u{a947}',
+ '\u{a953}', SC_Extend), ('\u{a960}', '\u{a97c}', SC_OLetter), ('\u{a980}', '\u{a983}',
+ SC_Extend), ('\u{a984}', '\u{a9b2}', SC_OLetter), ('\u{a9b3}', '\u{a9c0}', SC_Extend),
+ ('\u{a9c8}', '\u{a9c9}', SC_STerm), ('\u{a9cf}', '\u{a9cf}', SC_OLetter), ('\u{a9d0}',
+ '\u{a9d9}', SC_Numeric), ('\u{a9e0}', '\u{a9e4}', SC_OLetter), ('\u{a9e5}', '\u{a9e5}',
+ SC_Extend), ('\u{a9e6}', '\u{a9ef}', SC_OLetter), ('\u{a9f0}', '\u{a9f9}', SC_Numeric),
+ ('\u{a9fa}', '\u{a9fe}', SC_OLetter), ('\u{aa00}', '\u{aa28}', SC_OLetter), ('\u{aa29}',
+ '\u{aa36}', SC_Extend), ('\u{aa40}', '\u{aa42}', SC_OLetter), ('\u{aa43}', '\u{aa43}',
+ SC_Extend), ('\u{aa44}', '\u{aa4b}', SC_OLetter), ('\u{aa4c}', '\u{aa4d}', SC_Extend),
+ ('\u{aa50}', '\u{aa59}', SC_Numeric), ('\u{aa5d}', '\u{aa5f}', SC_STerm), ('\u{aa60}',
+ '\u{aa76}', SC_OLetter), ('\u{aa7a}', '\u{aa7a}', SC_OLetter), ('\u{aa7b}', '\u{aa7d}',
+ SC_Extend), ('\u{aa7e}', '\u{aaaf}', SC_OLetter), ('\u{aab0}', '\u{aab0}', SC_Extend),
+ ('\u{aab1}', '\u{aab1}', SC_OLetter), ('\u{aab2}', '\u{aab4}', SC_Extend), ('\u{aab5}',
+ '\u{aab6}', SC_OLetter), ('\u{aab7}', '\u{aab8}', SC_Extend), ('\u{aab9}', '\u{aabd}',
+ SC_OLetter), ('\u{aabe}', '\u{aabf}', SC_Extend), ('\u{aac0}', '\u{aac0}', SC_OLetter),
+ ('\u{aac1}', '\u{aac1}', SC_Extend), ('\u{aac2}', '\u{aac2}', SC_OLetter), ('\u{aadb}',
+ '\u{aadd}', SC_OLetter), ('\u{aae0}', '\u{aaea}', SC_OLetter), ('\u{aaeb}', '\u{aaef}',
+ SC_Extend), ('\u{aaf0}', '\u{aaf1}', SC_STerm), ('\u{aaf2}', '\u{aaf4}', SC_OLetter),
+ ('\u{aaf5}', '\u{aaf6}', SC_Extend), ('\u{ab01}', '\u{ab06}', SC_OLetter), ('\u{ab09}',
+ '\u{ab0e}', SC_OLetter), ('\u{ab11}', '\u{ab16}', SC_OLetter), ('\u{ab20}', '\u{ab26}',
+ SC_OLetter), ('\u{ab28}', '\u{ab2e}', SC_OLetter), ('\u{ab30}', '\u{ab5a}', SC_Lower),
+ ('\u{ab5c}', '\u{ab69}', SC_Lower), ('\u{ab70}', '\u{abbf}', SC_Lower), ('\u{abc0}',
+ '\u{abe2}', SC_OLetter), ('\u{abe3}', '\u{abea}', SC_Extend), ('\u{abeb}', '\u{abeb}',
+ SC_STerm), ('\u{abec}', '\u{abed}', SC_Extend), ('\u{abf0}', '\u{abf9}', SC_Numeric),
+ ('\u{ac00}', '\u{d7a3}', SC_OLetter), ('\u{d7b0}', '\u{d7c6}', SC_OLetter), ('\u{d7cb}',
+ '\u{d7fb}', SC_OLetter), ('\u{f900}', '\u{fa6d}', SC_OLetter), ('\u{fa70}', '\u{fad9}',
+ SC_OLetter), ('\u{fb00}', '\u{fb06}', SC_Lower), ('\u{fb13}', '\u{fb17}', SC_Lower),
+ ('\u{fb1d}', '\u{fb1d}', SC_OLetter), ('\u{fb1e}', '\u{fb1e}', SC_Extend), ('\u{fb1f}',
+ '\u{fb28}', SC_OLetter), ('\u{fb2a}', '\u{fb36}', SC_OLetter), ('\u{fb38}', '\u{fb3c}',
+ SC_OLetter), ('\u{fb3e}', '\u{fb3e}', SC_OLetter), ('\u{fb40}', '\u{fb41}', SC_OLetter),
+ ('\u{fb43}', '\u{fb44}', SC_OLetter), ('\u{fb46}', '\u{fbb1}', SC_OLetter), ('\u{fbd3}',
+ '\u{fd3d}', SC_OLetter), ('\u{fd3e}', '\u{fd3f}', SC_Close), ('\u{fd50}', '\u{fd8f}',
+ SC_OLetter), ('\u{fd92}', '\u{fdc7}', SC_OLetter), ('\u{fdf0}', '\u{fdfb}', SC_OLetter),
+ ('\u{fe00}', '\u{fe0f}', SC_Extend), ('\u{fe10}', '\u{fe11}', SC_SContinue), ('\u{fe13}',
+ '\u{fe13}', SC_SContinue), ('\u{fe17}', '\u{fe18}', SC_Close), ('\u{fe20}', '\u{fe2f}',
+ SC_Extend), ('\u{fe31}', '\u{fe32}', SC_SContinue), ('\u{fe35}', '\u{fe44}', SC_Close),
+ ('\u{fe47}', '\u{fe48}', SC_Close), ('\u{fe50}', '\u{fe51}', SC_SContinue), ('\u{fe52}',
+ '\u{fe52}', SC_ATerm), ('\u{fe55}', '\u{fe55}', SC_SContinue), ('\u{fe56}', '\u{fe57}',
+ SC_STerm), ('\u{fe58}', '\u{fe58}', SC_SContinue), ('\u{fe59}', '\u{fe5e}', SC_Close),
+ ('\u{fe63}', '\u{fe63}', SC_SContinue), ('\u{fe70}', '\u{fe74}', SC_OLetter), ('\u{fe76}',
+ '\u{fefc}', SC_OLetter), ('\u{feff}', '\u{feff}', SC_Format), ('\u{ff01}', '\u{ff01}',
+ SC_STerm), ('\u{ff08}', '\u{ff09}', SC_Close), ('\u{ff0c}', '\u{ff0d}', SC_SContinue),
+ ('\u{ff0e}', '\u{ff0e}', SC_ATerm), ('\u{ff10}', '\u{ff19}', SC_Numeric), ('\u{ff1a}',
+ '\u{ff1a}', SC_SContinue), ('\u{ff1f}', '\u{ff1f}', SC_STerm), ('\u{ff21}', '\u{ff3a}',
+ SC_Upper), ('\u{ff3b}', '\u{ff3b}', SC_Close), ('\u{ff3d}', '\u{ff3d}', SC_Close),
+ ('\u{ff41}', '\u{ff5a}', SC_Lower), ('\u{ff5b}', '\u{ff5b}', SC_Close), ('\u{ff5d}',
+ '\u{ff5d}', SC_Close), ('\u{ff5f}', '\u{ff60}', SC_Close), ('\u{ff61}', '\u{ff61}',
+ SC_STerm), ('\u{ff62}', '\u{ff63}', SC_Close), ('\u{ff64}', '\u{ff64}', SC_SContinue),
+ ('\u{ff66}', '\u{ff9d}', SC_OLetter), ('\u{ff9e}', '\u{ff9f}', SC_Extend), ('\u{ffa0}',
+ '\u{ffbe}', SC_OLetter), ('\u{ffc2}', '\u{ffc7}', SC_OLetter), ('\u{ffca}', '\u{ffcf}',
+ SC_OLetter), ('\u{ffd2}', '\u{ffd7}', SC_OLetter), ('\u{ffda}', '\u{ffdc}', SC_OLetter),
+ ('\u{fff9}', '\u{fffb}', SC_Format), ('\u{10000}', '\u{1000b}', SC_OLetter), ('\u{1000d}',
+ '\u{10026}', SC_OLetter), ('\u{10028}', '\u{1003a}', SC_OLetter), ('\u{1003c}', '\u{1003d}',
+ SC_OLetter), ('\u{1003f}', '\u{1004d}', SC_OLetter), ('\u{10050}', '\u{1005d}', SC_OLetter),
+ ('\u{10080}', '\u{100fa}', SC_OLetter), ('\u{10140}', '\u{10174}', SC_OLetter),
+ ('\u{101fd}', '\u{101fd}', SC_Extend), ('\u{10280}', '\u{1029c}', SC_OLetter), ('\u{102a0}',
+ '\u{102d0}', SC_OLetter), ('\u{102e0}', '\u{102e0}', SC_Extend), ('\u{10300}', '\u{1031f}',
+ SC_OLetter), ('\u{1032d}', '\u{1034a}', SC_OLetter), ('\u{10350}', '\u{10375}', SC_OLetter),
+ ('\u{10376}', '\u{1037a}', SC_Extend), ('\u{10380}', '\u{1039d}', SC_OLetter), ('\u{103a0}',
+ '\u{103c3}', SC_OLetter), ('\u{103c8}', '\u{103cf}', SC_OLetter), ('\u{103d1}', '\u{103d5}',
+ SC_OLetter), ('\u{10400}', '\u{10427}', SC_Upper), ('\u{10428}', '\u{1044f}', SC_Lower),
+ ('\u{10450}', '\u{1049d}', SC_OLetter), ('\u{104a0}', '\u{104a9}', SC_Numeric),
+ ('\u{104b0}', '\u{104d3}', SC_Upper), ('\u{104d8}', '\u{104fb}', SC_Lower), ('\u{10500}',
+ '\u{10527}', SC_OLetter), ('\u{10530}', '\u{10563}', SC_OLetter), ('\u{10570}', '\u{1057a}',
+ SC_Upper), ('\u{1057c}', '\u{1058a}', SC_Upper), ('\u{1058c}', '\u{10592}', SC_Upper),
+ ('\u{10594}', '\u{10595}', SC_Upper), ('\u{10597}', '\u{105a1}', SC_Lower), ('\u{105a3}',
+ '\u{105b1}', SC_Lower), ('\u{105b3}', '\u{105b9}', SC_Lower), ('\u{105bb}', '\u{105bc}',
+ SC_Lower), ('\u{10600}', '\u{10736}', SC_OLetter), ('\u{10740}', '\u{10755}', SC_OLetter),
+ ('\u{10760}', '\u{10767}', SC_OLetter), ('\u{10780}', '\u{10780}', SC_Lower), ('\u{10781}',
+ '\u{10782}', SC_OLetter), ('\u{10783}', '\u{10785}', SC_Lower), ('\u{10787}', '\u{107b0}',
+ SC_Lower), ('\u{107b2}', '\u{107ba}', SC_Lower), ('\u{10800}', '\u{10805}', SC_OLetter),
+ ('\u{10808}', '\u{10808}', SC_OLetter), ('\u{1080a}', '\u{10835}', SC_OLetter),
+ ('\u{10837}', '\u{10838}', SC_OLetter), ('\u{1083c}', '\u{1083c}', SC_OLetter),
+ ('\u{1083f}', '\u{10855}', SC_OLetter), ('\u{10860}', '\u{10876}', SC_OLetter),
+ ('\u{10880}', '\u{1089e}', SC_OLetter), ('\u{108e0}', '\u{108f2}', SC_OLetter),
+ ('\u{108f4}', '\u{108f5}', SC_OLetter), ('\u{10900}', '\u{10915}', SC_OLetter),
+ ('\u{10920}', '\u{10939}', SC_OLetter), ('\u{10980}', '\u{109b7}', SC_OLetter),
+ ('\u{109be}', '\u{109bf}', SC_OLetter), ('\u{10a00}', '\u{10a00}', SC_OLetter),
+ ('\u{10a01}', '\u{10a03}', SC_Extend), ('\u{10a05}', '\u{10a06}', SC_Extend), ('\u{10a0c}',
+ '\u{10a0f}', SC_Extend), ('\u{10a10}', '\u{10a13}', SC_OLetter), ('\u{10a15}', '\u{10a17}',
+ SC_OLetter), ('\u{10a19}', '\u{10a35}', SC_OLetter), ('\u{10a38}', '\u{10a3a}', SC_Extend),
+ ('\u{10a3f}', '\u{10a3f}', SC_Extend), ('\u{10a56}', '\u{10a57}', SC_STerm), ('\u{10a60}',
+ '\u{10a7c}', SC_OLetter), ('\u{10a80}', '\u{10a9c}', SC_OLetter), ('\u{10ac0}', '\u{10ac7}',
+ SC_OLetter), ('\u{10ac9}', '\u{10ae4}', SC_OLetter), ('\u{10ae5}', '\u{10ae6}', SC_Extend),
+ ('\u{10b00}', '\u{10b35}', SC_OLetter), ('\u{10b40}', '\u{10b55}', SC_OLetter),
+ ('\u{10b60}', '\u{10b72}', SC_OLetter), ('\u{10b80}', '\u{10b91}', SC_OLetter),
+ ('\u{10c00}', '\u{10c48}', SC_OLetter), ('\u{10c80}', '\u{10cb2}', SC_Upper), ('\u{10cc0}',
+ '\u{10cf2}', SC_Lower), ('\u{10d00}', '\u{10d23}', SC_OLetter), ('\u{10d24}', '\u{10d27}',
+ SC_Extend), ('\u{10d30}', '\u{10d39}', SC_Numeric), ('\u{10e80}', '\u{10ea9}', SC_OLetter),
+ ('\u{10eab}', '\u{10eac}', SC_Extend), ('\u{10eb0}', '\u{10eb1}', SC_OLetter), ('\u{10efd}',
+ '\u{10eff}', SC_Extend), ('\u{10f00}', '\u{10f1c}', SC_OLetter), ('\u{10f27}', '\u{10f27}',
+ SC_OLetter), ('\u{10f30}', '\u{10f45}', SC_OLetter), ('\u{10f46}', '\u{10f50}', SC_Extend),
+ ('\u{10f55}', '\u{10f59}', SC_STerm), ('\u{10f70}', '\u{10f81}', SC_OLetter), ('\u{10f82}',
+ '\u{10f85}', SC_Extend), ('\u{10f86}', '\u{10f89}', SC_STerm), ('\u{10fb0}', '\u{10fc4}',
+ SC_OLetter), ('\u{10fe0}', '\u{10ff6}', SC_OLetter), ('\u{11000}', '\u{11002}', SC_Extend),
+ ('\u{11003}', '\u{11037}', SC_OLetter), ('\u{11038}', '\u{11046}', SC_Extend), ('\u{11047}',
+ '\u{11048}', SC_STerm), ('\u{11066}', '\u{1106f}', SC_Numeric), ('\u{11070}', '\u{11070}',
+ SC_Extend), ('\u{11071}', '\u{11072}', SC_OLetter), ('\u{11073}', '\u{11074}', SC_Extend),
+ ('\u{11075}', '\u{11075}', SC_OLetter), ('\u{1107f}', '\u{11082}', SC_Extend), ('\u{11083}',
+ '\u{110af}', SC_OLetter), ('\u{110b0}', '\u{110ba}', SC_Extend), ('\u{110bd}', '\u{110bd}',
+ SC_Format), ('\u{110be}', '\u{110c1}', SC_STerm), ('\u{110c2}', '\u{110c2}', SC_Extend),
+ ('\u{110cd}', '\u{110cd}', SC_Format), ('\u{110d0}', '\u{110e8}', SC_OLetter), ('\u{110f0}',
+ '\u{110f9}', SC_Numeric), ('\u{11100}', '\u{11102}', SC_Extend), ('\u{11103}', '\u{11126}',
+ SC_OLetter), ('\u{11127}', '\u{11134}', SC_Extend), ('\u{11136}', '\u{1113f}', SC_Numeric),
+ ('\u{11141}', '\u{11143}', SC_STerm), ('\u{11144}', '\u{11144}', SC_OLetter), ('\u{11145}',
+ '\u{11146}', SC_Extend), ('\u{11147}', '\u{11147}', SC_OLetter), ('\u{11150}', '\u{11172}',
+ SC_OLetter), ('\u{11173}', '\u{11173}', SC_Extend), ('\u{11176}', '\u{11176}', SC_OLetter),
+ ('\u{11180}', '\u{11182}', SC_Extend), ('\u{11183}', '\u{111b2}', SC_OLetter), ('\u{111b3}',
+ '\u{111c0}', SC_Extend), ('\u{111c1}', '\u{111c4}', SC_OLetter), ('\u{111c5}', '\u{111c6}',
+ SC_STerm), ('\u{111c9}', '\u{111cc}', SC_Extend), ('\u{111cd}', '\u{111cd}', SC_STerm),
+ ('\u{111ce}', '\u{111cf}', SC_Extend), ('\u{111d0}', '\u{111d9}', SC_Numeric), ('\u{111da}',
+ '\u{111da}', SC_OLetter), ('\u{111dc}', '\u{111dc}', SC_OLetter), ('\u{111de}', '\u{111df}',
+ SC_STerm), ('\u{11200}', '\u{11211}', SC_OLetter), ('\u{11213}', '\u{1122b}', SC_OLetter),
+ ('\u{1122c}', '\u{11237}', SC_Extend), ('\u{11238}', '\u{11239}', SC_STerm), ('\u{1123b}',
+ '\u{1123c}', SC_STerm), ('\u{1123e}', '\u{1123e}', SC_Extend), ('\u{1123f}', '\u{11240}',
+ SC_OLetter), ('\u{11241}', '\u{11241}', SC_Extend), ('\u{11280}', '\u{11286}', SC_OLetter),
+ ('\u{11288}', '\u{11288}', SC_OLetter), ('\u{1128a}', '\u{1128d}', SC_OLetter),
+ ('\u{1128f}', '\u{1129d}', SC_OLetter), ('\u{1129f}', '\u{112a8}', SC_OLetter),
+ ('\u{112a9}', '\u{112a9}', SC_STerm), ('\u{112b0}', '\u{112de}', SC_OLetter), ('\u{112df}',
+ '\u{112ea}', SC_Extend), ('\u{112f0}', '\u{112f9}', SC_Numeric), ('\u{11300}', '\u{11303}',
+ SC_Extend), ('\u{11305}', '\u{1130c}', SC_OLetter), ('\u{1130f}', '\u{11310}', SC_OLetter),
+ ('\u{11313}', '\u{11328}', SC_OLetter), ('\u{1132a}', '\u{11330}', SC_OLetter),
+ ('\u{11332}', '\u{11333}', SC_OLetter), ('\u{11335}', '\u{11339}', SC_OLetter),
+ ('\u{1133b}', '\u{1133c}', SC_Extend), ('\u{1133d}', '\u{1133d}', SC_OLetter), ('\u{1133e}',
+ '\u{11344}', SC_Extend), ('\u{11347}', '\u{11348}', SC_Extend), ('\u{1134b}', '\u{1134d}',
+ SC_Extend), ('\u{11350}', '\u{11350}', SC_OLetter), ('\u{11357}', '\u{11357}', SC_Extend),
+ ('\u{1135d}', '\u{11361}', SC_OLetter), ('\u{11362}', '\u{11363}', SC_Extend), ('\u{11366}',
+ '\u{1136c}', SC_Extend), ('\u{11370}', '\u{11374}', SC_Extend), ('\u{11400}', '\u{11434}',
+ SC_OLetter), ('\u{11435}', '\u{11446}', SC_Extend), ('\u{11447}', '\u{1144a}', SC_OLetter),
+ ('\u{1144b}', '\u{1144c}', SC_STerm), ('\u{11450}', '\u{11459}', SC_Numeric), ('\u{1145e}',
+ '\u{1145e}', SC_Extend), ('\u{1145f}', '\u{11461}', SC_OLetter), ('\u{11480}', '\u{114af}',
+ SC_OLetter), ('\u{114b0}', '\u{114c3}', SC_Extend), ('\u{114c4}', '\u{114c5}', SC_OLetter),
+ ('\u{114c7}', '\u{114c7}', SC_OLetter), ('\u{114d0}', '\u{114d9}', SC_Numeric),
+ ('\u{11580}', '\u{115ae}', SC_OLetter), ('\u{115af}', '\u{115b5}', SC_Extend), ('\u{115b8}',
+ '\u{115c0}', SC_Extend), ('\u{115c2}', '\u{115c3}', SC_STerm), ('\u{115c9}', '\u{115d7}',
+ SC_STerm), ('\u{115d8}', '\u{115db}', SC_OLetter), ('\u{115dc}', '\u{115dd}', SC_Extend),
+ ('\u{11600}', '\u{1162f}', SC_OLetter), ('\u{11630}', '\u{11640}', SC_Extend), ('\u{11641}',
+ '\u{11642}', SC_STerm), ('\u{11644}', '\u{11644}', SC_OLetter), ('\u{11650}', '\u{11659}',
+ SC_Numeric), ('\u{11680}', '\u{116aa}', SC_OLetter), ('\u{116ab}', '\u{116b7}', SC_Extend),
+ ('\u{116b8}', '\u{116b8}', SC_OLetter), ('\u{116c0}', '\u{116c9}', SC_Numeric),
+ ('\u{11700}', '\u{1171a}', SC_OLetter), ('\u{1171d}', '\u{1172b}', SC_Extend), ('\u{11730}',
+ '\u{11739}', SC_Numeric), ('\u{1173c}', '\u{1173e}', SC_STerm), ('\u{11740}', '\u{11746}',
+ SC_OLetter), ('\u{11800}', '\u{1182b}', SC_OLetter), ('\u{1182c}', '\u{1183a}', SC_Extend),
+ ('\u{118a0}', '\u{118bf}', SC_Upper), ('\u{118c0}', '\u{118df}', SC_Lower), ('\u{118e0}',
+ '\u{118e9}', SC_Numeric), ('\u{118ff}', '\u{11906}', SC_OLetter), ('\u{11909}', '\u{11909}',
+ SC_OLetter), ('\u{1190c}', '\u{11913}', SC_OLetter), ('\u{11915}', '\u{11916}', SC_OLetter),
+ ('\u{11918}', '\u{1192f}', SC_OLetter), ('\u{11930}', '\u{11935}', SC_Extend), ('\u{11937}',
+ '\u{11938}', SC_Extend), ('\u{1193b}', '\u{1193e}', SC_Extend), ('\u{1193f}', '\u{1193f}',
+ SC_OLetter), ('\u{11940}', '\u{11940}', SC_Extend), ('\u{11941}', '\u{11941}', SC_OLetter),
+ ('\u{11942}', '\u{11943}', SC_Extend), ('\u{11944}', '\u{11944}', SC_STerm), ('\u{11946}',
+ '\u{11946}', SC_STerm), ('\u{11950}', '\u{11959}', SC_Numeric), ('\u{119a0}', '\u{119a7}',
+ SC_OLetter), ('\u{119aa}', '\u{119d0}', SC_OLetter), ('\u{119d1}', '\u{119d7}', SC_Extend),
+ ('\u{119da}', '\u{119e0}', SC_Extend), ('\u{119e1}', '\u{119e1}', SC_OLetter), ('\u{119e3}',
+ '\u{119e3}', SC_OLetter), ('\u{119e4}', '\u{119e4}', SC_Extend), ('\u{11a00}', '\u{11a00}',
+ SC_OLetter), ('\u{11a01}', '\u{11a0a}', SC_Extend), ('\u{11a0b}', '\u{11a32}', SC_OLetter),
+ ('\u{11a33}', '\u{11a39}', SC_Extend), ('\u{11a3a}', '\u{11a3a}', SC_OLetter), ('\u{11a3b}',
+ '\u{11a3e}', SC_Extend), ('\u{11a42}', '\u{11a43}', SC_STerm), ('\u{11a47}', '\u{11a47}',
+ SC_Extend), ('\u{11a50}', '\u{11a50}', SC_OLetter), ('\u{11a51}', '\u{11a5b}', SC_Extend),
+ ('\u{11a5c}', '\u{11a89}', SC_OLetter), ('\u{11a8a}', '\u{11a99}', SC_Extend), ('\u{11a9b}',
+ '\u{11a9c}', SC_STerm), ('\u{11a9d}', '\u{11a9d}', SC_OLetter), ('\u{11ab0}', '\u{11af8}',
+ SC_OLetter), ('\u{11c00}', '\u{11c08}', SC_OLetter), ('\u{11c0a}', '\u{11c2e}', SC_OLetter),
+ ('\u{11c2f}', '\u{11c36}', SC_Extend), ('\u{11c38}', '\u{11c3f}', SC_Extend), ('\u{11c40}',
+ '\u{11c40}', SC_OLetter), ('\u{11c41}', '\u{11c42}', SC_STerm), ('\u{11c50}', '\u{11c59}',
+ SC_Numeric), ('\u{11c72}', '\u{11c8f}', SC_OLetter), ('\u{11c92}', '\u{11ca7}', SC_Extend),
+ ('\u{11ca9}', '\u{11cb6}', SC_Extend), ('\u{11d00}', '\u{11d06}', SC_OLetter), ('\u{11d08}',
+ '\u{11d09}', SC_OLetter), ('\u{11d0b}', '\u{11d30}', SC_OLetter), ('\u{11d31}', '\u{11d36}',
+ SC_Extend), ('\u{11d3a}', '\u{11d3a}', SC_Extend), ('\u{11d3c}', '\u{11d3d}', SC_Extend),
+ ('\u{11d3f}', '\u{11d45}', SC_Extend), ('\u{11d46}', '\u{11d46}', SC_OLetter), ('\u{11d47}',
+ '\u{11d47}', SC_Extend), ('\u{11d50}', '\u{11d59}', SC_Numeric), ('\u{11d60}', '\u{11d65}',
+ SC_OLetter), ('\u{11d67}', '\u{11d68}', SC_OLetter), ('\u{11d6a}', '\u{11d89}', SC_OLetter),
+ ('\u{11d8a}', '\u{11d8e}', SC_Extend), ('\u{11d90}', '\u{11d91}', SC_Extend), ('\u{11d93}',
+ '\u{11d97}', SC_Extend), ('\u{11d98}', '\u{11d98}', SC_OLetter), ('\u{11da0}', '\u{11da9}',
+ SC_Numeric), ('\u{11ee0}', '\u{11ef2}', SC_OLetter), ('\u{11ef3}', '\u{11ef6}', SC_Extend),
+ ('\u{11ef7}', '\u{11ef8}', SC_STerm), ('\u{11f00}', '\u{11f01}', SC_Extend), ('\u{11f02}',
+ '\u{11f02}', SC_OLetter), ('\u{11f03}', '\u{11f03}', SC_Extend), ('\u{11f04}', '\u{11f10}',
+ SC_OLetter), ('\u{11f12}', '\u{11f33}', SC_OLetter), ('\u{11f34}', '\u{11f3a}', SC_Extend),
+ ('\u{11f3e}', '\u{11f42}', SC_Extend), ('\u{11f43}', '\u{11f44}', SC_STerm), ('\u{11f50}',
+ '\u{11f59}', SC_Numeric), ('\u{11fb0}', '\u{11fb0}', SC_OLetter), ('\u{12000}', '\u{12399}',
+ SC_OLetter), ('\u{12400}', '\u{1246e}', SC_OLetter), ('\u{12480}', '\u{12543}', SC_OLetter),
+ ('\u{12f90}', '\u{12ff0}', SC_OLetter), ('\u{13000}', '\u{1342f}', SC_OLetter),
+ ('\u{13430}', '\u{1343f}', SC_Format), ('\u{13440}', '\u{13440}', SC_Extend), ('\u{13441}',
+ '\u{13446}', SC_OLetter), ('\u{13447}', '\u{13455}', SC_Extend), ('\u{14400}', '\u{14646}',
+ SC_OLetter), ('\u{16800}', '\u{16a38}', SC_OLetter), ('\u{16a40}', '\u{16a5e}', SC_OLetter),
+ ('\u{16a60}', '\u{16a69}', SC_Numeric), ('\u{16a6e}', '\u{16a6f}', SC_STerm), ('\u{16a70}',
+ '\u{16abe}', SC_OLetter), ('\u{16ac0}', '\u{16ac9}', SC_Numeric), ('\u{16ad0}', '\u{16aed}',
+ SC_OLetter), ('\u{16af0}', '\u{16af4}', SC_Extend), ('\u{16af5}', '\u{16af5}', SC_STerm),
+ ('\u{16b00}', '\u{16b2f}', SC_OLetter), ('\u{16b30}', '\u{16b36}', SC_Extend), ('\u{16b37}',
+ '\u{16b38}', SC_STerm), ('\u{16b40}', '\u{16b43}', SC_OLetter), ('\u{16b44}', '\u{16b44}',
+ SC_STerm), ('\u{16b50}', '\u{16b59}', SC_Numeric), ('\u{16b63}', '\u{16b77}', SC_OLetter),
+ ('\u{16b7d}', '\u{16b8f}', SC_OLetter), ('\u{16e40}', '\u{16e5f}', SC_Upper), ('\u{16e60}',
+ '\u{16e7f}', SC_Lower), ('\u{16e98}', '\u{16e98}', SC_STerm), ('\u{16f00}', '\u{16f4a}',
+ SC_OLetter), ('\u{16f4f}', '\u{16f4f}', SC_Extend), ('\u{16f50}', '\u{16f50}', SC_OLetter),
+ ('\u{16f51}', '\u{16f87}', SC_Extend), ('\u{16f8f}', '\u{16f92}', SC_Extend), ('\u{16f93}',
+ '\u{16f9f}', SC_OLetter), ('\u{16fe0}', '\u{16fe1}', SC_OLetter), ('\u{16fe3}', '\u{16fe3}',
+ SC_OLetter), ('\u{16fe4}', '\u{16fe4}', SC_Extend), ('\u{16ff0}', '\u{16ff1}', SC_Extend),
+ ('\u{17000}', '\u{187f7}', SC_OLetter), ('\u{18800}', '\u{18cd5}', SC_OLetter),
+ ('\u{18d00}', '\u{18d08}', SC_OLetter), ('\u{1aff0}', '\u{1aff3}', SC_OLetter),
+ ('\u{1aff5}', '\u{1affb}', SC_OLetter), ('\u{1affd}', '\u{1affe}', SC_OLetter),
+ ('\u{1b000}', '\u{1b122}', SC_OLetter), ('\u{1b132}', '\u{1b132}', SC_OLetter),
+ ('\u{1b150}', '\u{1b152}', SC_OLetter), ('\u{1b155}', '\u{1b155}', SC_OLetter),
+ ('\u{1b164}', '\u{1b167}', SC_OLetter), ('\u{1b170}', '\u{1b2fb}', SC_OLetter),
+ ('\u{1bc00}', '\u{1bc6a}', SC_OLetter), ('\u{1bc70}', '\u{1bc7c}', SC_OLetter),
+ ('\u{1bc80}', '\u{1bc88}', SC_OLetter), ('\u{1bc90}', '\u{1bc99}', SC_OLetter),
+ ('\u{1bc9d}', '\u{1bc9e}', SC_Extend), ('\u{1bc9f}', '\u{1bc9f}', SC_STerm), ('\u{1bca0}',
+ '\u{1bca3}', SC_Format), ('\u{1cf00}', '\u{1cf2d}', SC_Extend), ('\u{1cf30}', '\u{1cf46}',
+ SC_Extend), ('\u{1d165}', '\u{1d169}', SC_Extend), ('\u{1d16d}', '\u{1d172}', SC_Extend),
+ ('\u{1d173}', '\u{1d17a}', SC_Format), ('\u{1d17b}', '\u{1d182}', SC_Extend), ('\u{1d185}',
+ '\u{1d18b}', SC_Extend), ('\u{1d1aa}', '\u{1d1ad}', SC_Extend), ('\u{1d242}', '\u{1d244}',
+ SC_Extend), ('\u{1d400}', '\u{1d419}', SC_Upper), ('\u{1d41a}', '\u{1d433}', SC_Lower),
+ ('\u{1d434}', '\u{1d44d}', SC_Upper), ('\u{1d44e}', '\u{1d454}', SC_Lower), ('\u{1d456}',
+ '\u{1d467}', SC_Lower), ('\u{1d468}', '\u{1d481}', SC_Upper), ('\u{1d482}', '\u{1d49b}',
+ SC_Lower), ('\u{1d49c}', '\u{1d49c}', SC_Upper), ('\u{1d49e}', '\u{1d49f}', SC_Upper),
+ ('\u{1d4a2}', '\u{1d4a2}', SC_Upper), ('\u{1d4a5}', '\u{1d4a6}', SC_Upper), ('\u{1d4a9}',
+ '\u{1d4ac}', SC_Upper), ('\u{1d4ae}', '\u{1d4b5}', SC_Upper), ('\u{1d4b6}', '\u{1d4b9}',
+ SC_Lower), ('\u{1d4bb}', '\u{1d4bb}', SC_Lower), ('\u{1d4bd}', '\u{1d4c3}', SC_Lower),
+ ('\u{1d4c5}', '\u{1d4cf}', SC_Lower), ('\u{1d4d0}', '\u{1d4e9}', SC_Upper), ('\u{1d4ea}',
+ '\u{1d503}', SC_Lower), ('\u{1d504}', '\u{1d505}', SC_Upper), ('\u{1d507}', '\u{1d50a}',
+ SC_Upper), ('\u{1d50d}', '\u{1d514}', SC_Upper), ('\u{1d516}', '\u{1d51c}', SC_Upper),
+ ('\u{1d51e}', '\u{1d537}', SC_Lower), ('\u{1d538}', '\u{1d539}', SC_Upper), ('\u{1d53b}',
+ '\u{1d53e}', SC_Upper), ('\u{1d540}', '\u{1d544}', SC_Upper), ('\u{1d546}', '\u{1d546}',
+ SC_Upper), ('\u{1d54a}', '\u{1d550}', SC_Upper), ('\u{1d552}', '\u{1d56b}', SC_Lower),
+ ('\u{1d56c}', '\u{1d585}', SC_Upper), ('\u{1d586}', '\u{1d59f}', SC_Lower), ('\u{1d5a0}',
+ '\u{1d5b9}', SC_Upper), ('\u{1d5ba}', '\u{1d5d3}', SC_Lower), ('\u{1d5d4}', '\u{1d5ed}',
+ SC_Upper), ('\u{1d5ee}', '\u{1d607}', SC_Lower), ('\u{1d608}', '\u{1d621}', SC_Upper),
+ ('\u{1d622}', '\u{1d63b}', SC_Lower), ('\u{1d63c}', '\u{1d655}', SC_Upper), ('\u{1d656}',
+ '\u{1d66f}', SC_Lower), ('\u{1d670}', '\u{1d689}', SC_Upper), ('\u{1d68a}', '\u{1d6a5}',
+ SC_Lower), ('\u{1d6a8}', '\u{1d6c0}', SC_Upper), ('\u{1d6c2}', '\u{1d6da}', SC_Lower),
+ ('\u{1d6dc}', '\u{1d6e1}', SC_Lower), ('\u{1d6e2}', '\u{1d6fa}', SC_Upper), ('\u{1d6fc}',
+ '\u{1d714}', SC_Lower), ('\u{1d716}', '\u{1d71b}', SC_Lower), ('\u{1d71c}', '\u{1d734}',
+ SC_Upper), ('\u{1d736}', '\u{1d74e}', SC_Lower), ('\u{1d750}', '\u{1d755}', SC_Lower),
+ ('\u{1d756}', '\u{1d76e}', SC_Upper), ('\u{1d770}', '\u{1d788}', SC_Lower), ('\u{1d78a}',
+ '\u{1d78f}', SC_Lower), ('\u{1d790}', '\u{1d7a8}', SC_Upper), ('\u{1d7aa}', '\u{1d7c2}',
+ SC_Lower), ('\u{1d7c4}', '\u{1d7c9}', SC_Lower), ('\u{1d7ca}', '\u{1d7ca}', SC_Upper),
+ ('\u{1d7cb}', '\u{1d7cb}', SC_Lower), ('\u{1d7ce}', '\u{1d7ff}', SC_Numeric), ('\u{1da00}',
+ '\u{1da36}', SC_Extend), ('\u{1da3b}', '\u{1da6c}', SC_Extend), ('\u{1da75}', '\u{1da75}',
+ SC_Extend), ('\u{1da84}', '\u{1da84}', SC_Extend), ('\u{1da88}', '\u{1da88}', SC_STerm),
+ ('\u{1da9b}', '\u{1da9f}', SC_Extend), ('\u{1daa1}', '\u{1daaf}', SC_Extend), ('\u{1df00}',
+ '\u{1df09}', SC_Lower), ('\u{1df0a}', '\u{1df0a}', SC_OLetter), ('\u{1df0b}', '\u{1df1e}',
+ SC_Lower), ('\u{1df25}', '\u{1df2a}', SC_Lower), ('\u{1e000}', '\u{1e006}', SC_Extend),
+ ('\u{1e008}', '\u{1e018}', SC_Extend), ('\u{1e01b}', '\u{1e021}', SC_Extend), ('\u{1e023}',
+ '\u{1e024}', SC_Extend), ('\u{1e026}', '\u{1e02a}', SC_Extend), ('\u{1e030}', '\u{1e06d}',
+ SC_Lower), ('\u{1e08f}', '\u{1e08f}', SC_Extend), ('\u{1e100}', '\u{1e12c}', SC_OLetter),
+ ('\u{1e130}', '\u{1e136}', SC_Extend), ('\u{1e137}', '\u{1e13d}', SC_OLetter), ('\u{1e140}',
+ '\u{1e149}', SC_Numeric), ('\u{1e14e}', '\u{1e14e}', SC_OLetter), ('\u{1e290}', '\u{1e2ad}',
+ SC_OLetter), ('\u{1e2ae}', '\u{1e2ae}', SC_Extend), ('\u{1e2c0}', '\u{1e2eb}', SC_OLetter),
+ ('\u{1e2ec}', '\u{1e2ef}', SC_Extend), ('\u{1e2f0}', '\u{1e2f9}', SC_Numeric), ('\u{1e4d0}',
+ '\u{1e4eb}', SC_OLetter), ('\u{1e4ec}', '\u{1e4ef}', SC_Extend), ('\u{1e4f0}', '\u{1e4f9}',
+ SC_Numeric), ('\u{1e7e0}', '\u{1e7e6}', SC_OLetter), ('\u{1e7e8}', '\u{1e7eb}', SC_OLetter),
+ ('\u{1e7ed}', '\u{1e7ee}', SC_OLetter), ('\u{1e7f0}', '\u{1e7fe}', SC_OLetter),
+ ('\u{1e800}', '\u{1e8c4}', SC_OLetter), ('\u{1e8d0}', '\u{1e8d6}', SC_Extend), ('\u{1e900}',
+ '\u{1e921}', SC_Upper), ('\u{1e922}', '\u{1e943}', SC_Lower), ('\u{1e944}', '\u{1e94a}',
+ SC_Extend), ('\u{1e94b}', '\u{1e94b}', SC_OLetter), ('\u{1e950}', '\u{1e959}', SC_Numeric),
+ ('\u{1ee00}', '\u{1ee03}', SC_OLetter), ('\u{1ee05}', '\u{1ee1f}', SC_OLetter),
+ ('\u{1ee21}', '\u{1ee22}', SC_OLetter), ('\u{1ee24}', '\u{1ee24}', SC_OLetter),
+ ('\u{1ee27}', '\u{1ee27}', SC_OLetter), ('\u{1ee29}', '\u{1ee32}', SC_OLetter),
+ ('\u{1ee34}', '\u{1ee37}', SC_OLetter), ('\u{1ee39}', '\u{1ee39}', SC_OLetter),
+ ('\u{1ee3b}', '\u{1ee3b}', SC_OLetter), ('\u{1ee42}', '\u{1ee42}', SC_OLetter),
+ ('\u{1ee47}', '\u{1ee47}', SC_OLetter), ('\u{1ee49}', '\u{1ee49}', SC_OLetter),
+ ('\u{1ee4b}', '\u{1ee4b}', SC_OLetter), ('\u{1ee4d}', '\u{1ee4f}', SC_OLetter),
+ ('\u{1ee51}', '\u{1ee52}', SC_OLetter), ('\u{1ee54}', '\u{1ee54}', SC_OLetter),
+ ('\u{1ee57}', '\u{1ee57}', SC_OLetter), ('\u{1ee59}', '\u{1ee59}', SC_OLetter),
+ ('\u{1ee5b}', '\u{1ee5b}', SC_OLetter), ('\u{1ee5d}', '\u{1ee5d}', SC_OLetter),
+ ('\u{1ee5f}', '\u{1ee5f}', SC_OLetter), ('\u{1ee61}', '\u{1ee62}', SC_OLetter),
+ ('\u{1ee64}', '\u{1ee64}', SC_OLetter), ('\u{1ee67}', '\u{1ee6a}', SC_OLetter),
+ ('\u{1ee6c}', '\u{1ee72}', SC_OLetter), ('\u{1ee74}', '\u{1ee77}', SC_OLetter),
+ ('\u{1ee79}', '\u{1ee7c}', SC_OLetter), ('\u{1ee7e}', '\u{1ee7e}', SC_OLetter),
+ ('\u{1ee80}', '\u{1ee89}', SC_OLetter), ('\u{1ee8b}', '\u{1ee9b}', SC_OLetter),
+ ('\u{1eea1}', '\u{1eea3}', SC_OLetter), ('\u{1eea5}', '\u{1eea9}', SC_OLetter),
+ ('\u{1eeab}', '\u{1eebb}', SC_OLetter), ('\u{1f130}', '\u{1f149}', SC_Upper), ('\u{1f150}',
+ '\u{1f169}', SC_Upper), ('\u{1f170}', '\u{1f189}', SC_Upper), ('\u{1f676}', '\u{1f678}',
+ SC_Close), ('\u{1fbf0}', '\u{1fbf9}', SC_Numeric), ('\u{20000}', '\u{2a6df}', SC_OLetter),
+ ('\u{2a700}', '\u{2b739}', SC_OLetter), ('\u{2b740}', '\u{2b81d}', SC_OLetter),
+ ('\u{2b820}', '\u{2cea1}', SC_OLetter), ('\u{2ceb0}', '\u{2ebe0}', SC_OLetter),
+ ('\u{2f800}', '\u{2fa1d}', SC_OLetter), ('\u{30000}', '\u{3134a}', SC_OLetter),
+ ('\u{31350}', '\u{323af}', SC_OLetter), ('\u{e0001}', '\u{e0001}', SC_Format), ('\u{e0020}',
+ '\u{e007f}', SC_Extend), ('\u{e0100}', '\u{e01ef}', SC_Extend)
+ ];
+
+}
diff --git a/third_party/rust/unicode-segmentation/src/test.rs b/third_party/rust/unicode-segmentation/src/test.rs
new file mode 100644
index 0000000000..bc642a50da
--- /dev/null
+++ b/third_party/rust/unicode-segmentation/src/test.rs
@@ -0,0 +1,247 @@
+// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use super::UnicodeSegmentation;
+
+use std::prelude::v1::*;
+
+#[test]
+fn test_graphemes() {
+ use crate::testdata::{TEST_DIFF, TEST_SAME};
+
+ pub const EXTRA_DIFF: &'static [(
+ &'static str,
+ &'static [&'static str],
+ &'static [&'static str],
+ )] = &[
+ // Official test suite doesn't include two Prepend chars between two other chars.
+ (
+ "\u{20}\u{600}\u{600}\u{20}",
+ &["\u{20}", "\u{600}\u{600}\u{20}"],
+ &["\u{20}", "\u{600}", "\u{600}", "\u{20}"],
+ ),
+ // Test for Prepend followed by two Any chars
+ (
+ "\u{600}\u{20}\u{20}",
+ &["\u{600}\u{20}", "\u{20}"],
+ &["\u{600}", "\u{20}", "\u{20}"],
+ ),
+ ];
+
+ pub const EXTRA_SAME: &'static [(&'static str, &'static [&'static str])] = &[
+ // family emoji (more than two emoji joined by ZWJ)
+ (
+ "\u{1f468}\u{200d}\u{1f467}\u{200d}\u{1f466}",
+ &["\u{1f468}\u{200d}\u{1f467}\u{200d}\u{1f466}"],
+ ),
+ // cartwheel emoji followed by two fitzpatrick skin tone modifiers
+ // (test case from issue #19)
+ (
+ "\u{1F938}\u{1F3FE}\u{1F3FE}",
+ &["\u{1F938}\u{1F3FE}\u{1F3FE}"],
+ ),
+ ];
+
+ for &(s, g) in TEST_SAME.iter().chain(EXTRA_SAME) {
+ // test forward iterator
+ assert!(UnicodeSegmentation::graphemes(s, true).eq(g.iter().cloned()));
+ assert!(UnicodeSegmentation::graphemes(s, false).eq(g.iter().cloned()));
+
+ // test reverse iterator
+ assert!(UnicodeSegmentation::graphemes(s, true)
+ .rev()
+ .eq(g.iter().rev().cloned()));
+ assert!(UnicodeSegmentation::graphemes(s, false)
+ .rev()
+ .eq(g.iter().rev().cloned()));
+ }
+
+ for &(s, gt, gf) in TEST_DIFF.iter().chain(EXTRA_DIFF) {
+ // test forward iterator
+ assert!(UnicodeSegmentation::graphemes(s, true).eq(gt.iter().cloned()));
+ assert!(UnicodeSegmentation::graphemes(s, false).eq(gf.iter().cloned()));
+
+ // test reverse iterator
+ assert!(UnicodeSegmentation::graphemes(s, true)
+ .rev()
+ .eq(gt.iter().rev().cloned()));
+ assert!(UnicodeSegmentation::graphemes(s, false)
+ .rev()
+ .eq(gf.iter().rev().cloned()));
+ }
+
+ // test the indices iterators
+ let s = "a̐éö̲\r\n";
+ let gr_inds = UnicodeSegmentation::grapheme_indices(s, true).collect::<Vec<(usize, &str)>>();
+ let b: &[_] = &[(0, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")];
+ assert_eq!(gr_inds, b);
+ let gr_inds = UnicodeSegmentation::grapheme_indices(s, true)
+ .rev()
+ .collect::<Vec<(usize, &str)>>();
+ let b: &[_] = &[(11, "\r\n"), (6, "ö̲"), (3, "é"), (0, "a̐")];
+ assert_eq!(gr_inds, b);
+ let mut gr_inds_iter = UnicodeSegmentation::grapheme_indices(s, true);
+ {
+ let gr_inds = gr_inds_iter.by_ref();
+ let e1 = gr_inds.size_hint();
+ assert_eq!(e1, (1, Some(13)));
+ let c = gr_inds.count();
+ assert_eq!(c, 4);
+ }
+ let e2 = gr_inds_iter.size_hint();
+ assert_eq!(e2, (0, Some(0)));
+
+ // make sure the reverse iterator does the right thing with "\n" at beginning of string
+ let s = "\n\r\n\r";
+ let gr = UnicodeSegmentation::graphemes(s, true)
+ .rev()
+ .collect::<Vec<&str>>();
+ let b: &[_] = &["\r", "\r\n", "\n"];
+ assert_eq!(gr, b);
+}
+
+#[test]
+fn test_words() {
+ use crate::testdata::TEST_WORD;
+
+ // Unicode's official tests don't really test longer chains of flag emoji
+ // TODO This could be improved with more tests like flag emoji with interspersed Extend chars and ZWJ
+ const EXTRA_TESTS: &'static [(&'static str, &'static [&'static str])] = &[
+ (
+ "🇦🇫🇦🇽🇦🇱🇩🇿🇦🇸🇦🇩🇦🇴",
+ &["🇦🇫", "🇦🇽", "🇦🇱", "🇩🇿", "🇦🇸", "🇦🇩", "🇦🇴"],
+ ),
+ ("🇦🇫🇦🇽🇦🇱🇩🇿🇦🇸🇦🇩🇦", &["🇦🇫", "🇦🇽", "🇦🇱", "🇩🇿", "🇦🇸", "🇦🇩", "🇦"]),
+ (
+ "🇦a🇫🇦🇽a🇦🇱🇩🇿🇦🇸🇦🇩🇦",
+ &["🇦", "a", "🇫🇦", "🇽", "a", "🇦🇱", "🇩🇿", "🇦🇸", "🇦🇩", "🇦"],
+ ),
+ (
+ "\u{1f468}\u{200d}\u{1f468}\u{200d}\u{1f466}",
+ &["\u{1f468}\u{200d}\u{1f468}\u{200d}\u{1f466}"],
+ ),
+ ("😌👎🏼", &["😌", "👎🏼"]),
+ // perhaps wrong, spaces should not be included?
+ ("hello world", &["hello", " ", "world"]),
+ ("🇨🇦🇨🇭🇿🇲🇿 hi", &["🇨🇦", "🇨🇭", "🇿🇲", "🇿", " ", "hi"]),
+ ];
+ for &(s, w) in TEST_WORD.iter().chain(EXTRA_TESTS.iter()) {
+ macro_rules! assert_ {
+ ($test:expr, $exp:expr, $name:expr) => {
+ // collect into vector for better diagnostics in failure case
+ let testing = $test.collect::<Vec<_>>();
+ let expected = $exp.collect::<Vec<_>>();
+ assert_eq!(
+ testing, expected,
+ "{} test for testcase ({:?}, {:?}) failed.",
+ $name, s, w
+ )
+ };
+ }
+ // test forward iterator
+ assert_!(
+ s.split_word_bounds(),
+ w.iter().cloned(),
+ "Forward word boundaries"
+ );
+
+ // test reverse iterator
+ assert_!(
+ s.split_word_bounds().rev(),
+ w.iter().rev().cloned(),
+ "Reverse word boundaries"
+ );
+
+ // generate offsets from word string lengths
+ let mut indices = vec![0];
+ for i in w.iter().cloned().map(|s| s.len()).scan(0, |t, n| {
+ *t += n;
+ Some(*t)
+ }) {
+ indices.push(i);
+ }
+ indices.pop();
+ let indices = indices;
+
+ // test forward indices iterator
+ assert_!(
+ s.split_word_bound_indices().map(|(l, _)| l),
+ indices.iter().cloned(),
+ "Forward word indices"
+ );
+
+ // test backward indices iterator
+ assert_!(
+ s.split_word_bound_indices().rev().map(|(l, _)| l),
+ indices.iter().rev().cloned(),
+ "Reverse word indices"
+ );
+ }
+}
+
+#[test]
+fn test_sentences() {
+ use crate::testdata::TEST_SENTENCE;
+
+ for &(s, w) in TEST_SENTENCE.iter() {
+ macro_rules! assert_ {
+ ($test:expr, $exp:expr, $name:expr) => {
+ // collect into vector for better diagnostics in failure case
+ let testing = $test.collect::<Vec<_>>();
+ let expected = $exp.collect::<Vec<_>>();
+ assert_eq!(
+ testing, expected,
+ "{} test for testcase ({:?}, {:?}) failed.",
+ $name, s, w
+ )
+ };
+ }
+
+ assert_!(
+ s.split_sentence_bounds(),
+ w.iter().cloned(),
+ "Forward sentence boundaries"
+ );
+ }
+}
+
+quickcheck! {
+ fn quickcheck_forward_reverse_graphemes_extended(s: String) -> bool {
+ let a = s.graphemes(true).collect::<Vec<_>>();
+ let mut b = s.graphemes(true).rev().collect::<Vec<_>>();
+ b.reverse();
+ a == b
+ }
+
+ fn quickcheck_forward_reverse_graphemes_legacy(s: String) -> bool {
+ let a = s.graphemes(false).collect::<Vec<_>>();
+ let mut b = s.graphemes(false).rev().collect::<Vec<_>>();
+ b.reverse();
+ a == b
+ }
+
+ fn quickcheck_join_graphemes(s: String) -> bool {
+ let a = s.graphemes(true).collect::<String>();
+ let b = s.graphemes(false).collect::<String>();
+ a == s && b == s
+ }
+
+ fn quickcheck_forward_reverse_words(s: String) -> bool {
+ let a = s.split_word_bounds().collect::<Vec<_>>();
+ let mut b = s.split_word_bounds().rev().collect::<Vec<_>>();
+ b.reverse();
+ a == b
+ }
+
+ fn quickcheck_join_words(s: String) -> bool {
+ let a = s.split_word_bounds().collect::<String>();
+ a == s
+ }
+}
diff --git a/third_party/rust/unicode-segmentation/src/testdata.rs b/third_party/rust/unicode-segmentation/src/testdata.rs
new file mode 100644
index 0000000000..8a675aafe9
--- /dev/null
+++ b/third_party/rust/unicode-segmentation/src/testdata.rs
@@ -0,0 +1,5250 @@
+// Copyright 2012-2018 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// NOTE: The following code was generated by "scripts/unicode.py", do not edit directly
+
+#![allow(missing_docs, non_upper_case_globals, non_snake_case)]
+// official Unicode test data
+// http://www.unicode.org/Public/15.0.0/ucd/auxiliary/GraphemeBreakTest.txt
+pub const TEST_SAME: &'static [(&'static str, &'static [&'static str])] = &[
+ ("\u{20}\u{20}", &["\u{20}", "\u{20}"]),
+ ("\u{20}\u{308}\u{20}", &["\u{20}\u{308}", "\u{20}"]),
+ ("\u{20}\u{d}", &["\u{20}", "\u{d}"]),
+ ("\u{20}\u{308}\u{d}", &["\u{20}\u{308}", "\u{d}"]),
+ ("\u{20}\u{a}", &["\u{20}", "\u{a}"]),
+ ("\u{20}\u{308}\u{a}", &["\u{20}\u{308}", "\u{a}"]),
+ ("\u{20}\u{1}", &["\u{20}", "\u{1}"]),
+ ("\u{20}\u{308}\u{1}", &["\u{20}\u{308}", "\u{1}"]),
+ ("\u{20}\u{34f}", &["\u{20}\u{34f}"]),
+ ("\u{20}\u{308}\u{34f}", &["\u{20}\u{308}\u{34f}"]),
+ ("\u{20}\u{1f1e6}", &["\u{20}", "\u{1f1e6}"]),
+ ("\u{20}\u{308}\u{1f1e6}", &["\u{20}\u{308}", "\u{1f1e6}"]),
+ ("\u{20}\u{600}", &["\u{20}", "\u{600}"]),
+ ("\u{20}\u{308}\u{600}", &["\u{20}\u{308}", "\u{600}"]),
+ ("\u{20}\u{1100}", &["\u{20}", "\u{1100}"]),
+ ("\u{20}\u{308}\u{1100}", &["\u{20}\u{308}", "\u{1100}"]),
+ ("\u{20}\u{1160}", &["\u{20}", "\u{1160}"]),
+ ("\u{20}\u{308}\u{1160}", &["\u{20}\u{308}", "\u{1160}"]),
+ ("\u{20}\u{11a8}", &["\u{20}", "\u{11a8}"]),
+ ("\u{20}\u{308}\u{11a8}", &["\u{20}\u{308}", "\u{11a8}"]),
+ ("\u{20}\u{ac00}", &["\u{20}", "\u{ac00}"]),
+ ("\u{20}\u{308}\u{ac00}", &["\u{20}\u{308}", "\u{ac00}"]),
+ ("\u{20}\u{ac01}", &["\u{20}", "\u{ac01}"]),
+ ("\u{20}\u{308}\u{ac01}", &["\u{20}\u{308}", "\u{ac01}"]),
+ ("\u{20}\u{231a}", &["\u{20}", "\u{231a}"]),
+ ("\u{20}\u{308}\u{231a}", &["\u{20}\u{308}", "\u{231a}"]),
+ ("\u{20}\u{300}", &["\u{20}\u{300}"]),
+ ("\u{20}\u{308}\u{300}", &["\u{20}\u{308}\u{300}"]),
+ ("\u{20}\u{200d}", &["\u{20}\u{200d}"]),
+ ("\u{20}\u{308}\u{200d}", &["\u{20}\u{308}\u{200d}"]),
+ ("\u{20}\u{378}", &["\u{20}", "\u{378}"]),
+ ("\u{20}\u{308}\u{378}", &["\u{20}\u{308}", "\u{378}"]),
+ ("\u{d}\u{20}", &["\u{d}", "\u{20}"]),
+ ("\u{d}\u{308}\u{20}", &["\u{d}", "\u{308}", "\u{20}"]),
+ ("\u{d}\u{d}", &["\u{d}", "\u{d}"]),
+ ("\u{d}\u{308}\u{d}", &["\u{d}", "\u{308}", "\u{d}"]),
+ ("\u{d}\u{a}", &["\u{d}\u{a}"]),
+ ("\u{d}\u{308}\u{a}", &["\u{d}", "\u{308}", "\u{a}"]),
+ ("\u{d}\u{1}", &["\u{d}", "\u{1}"]),
+ ("\u{d}\u{308}\u{1}", &["\u{d}", "\u{308}", "\u{1}"]),
+ ("\u{d}\u{34f}", &["\u{d}", "\u{34f}"]),
+ ("\u{d}\u{308}\u{34f}", &["\u{d}", "\u{308}\u{34f}"]),
+ ("\u{d}\u{1f1e6}", &["\u{d}", "\u{1f1e6}"]),
+ ("\u{d}\u{308}\u{1f1e6}", &["\u{d}", "\u{308}", "\u{1f1e6}"]),
+ ("\u{d}\u{600}", &["\u{d}", "\u{600}"]),
+ ("\u{d}\u{308}\u{600}", &["\u{d}", "\u{308}", "\u{600}"]),
+ ("\u{d}\u{903}", &["\u{d}", "\u{903}"]),
+ ("\u{d}\u{1100}", &["\u{d}", "\u{1100}"]),
+ ("\u{d}\u{308}\u{1100}", &["\u{d}", "\u{308}", "\u{1100}"]),
+ ("\u{d}\u{1160}", &["\u{d}", "\u{1160}"]),
+ ("\u{d}\u{308}\u{1160}", &["\u{d}", "\u{308}", "\u{1160}"]),
+ ("\u{d}\u{11a8}", &["\u{d}", "\u{11a8}"]),
+ ("\u{d}\u{308}\u{11a8}", &["\u{d}", "\u{308}", "\u{11a8}"]),
+ ("\u{d}\u{ac00}", &["\u{d}", "\u{ac00}"]),
+ ("\u{d}\u{308}\u{ac00}", &["\u{d}", "\u{308}", "\u{ac00}"]),
+ ("\u{d}\u{ac01}", &["\u{d}", "\u{ac01}"]),
+ ("\u{d}\u{308}\u{ac01}", &["\u{d}", "\u{308}", "\u{ac01}"]),
+ ("\u{d}\u{231a}", &["\u{d}", "\u{231a}"]),
+ ("\u{d}\u{308}\u{231a}", &["\u{d}", "\u{308}", "\u{231a}"]),
+ ("\u{d}\u{300}", &["\u{d}", "\u{300}"]),
+ ("\u{d}\u{308}\u{300}", &["\u{d}", "\u{308}\u{300}"]),
+ ("\u{d}\u{200d}", &["\u{d}", "\u{200d}"]),
+ ("\u{d}\u{308}\u{200d}", &["\u{d}", "\u{308}\u{200d}"]),
+ ("\u{d}\u{378}", &["\u{d}", "\u{378}"]),
+ ("\u{d}\u{308}\u{378}", &["\u{d}", "\u{308}", "\u{378}"]),
+ ("\u{a}\u{20}", &["\u{a}", "\u{20}"]),
+ ("\u{a}\u{308}\u{20}", &["\u{a}", "\u{308}", "\u{20}"]),
+ ("\u{a}\u{d}", &["\u{a}", "\u{d}"]),
+ ("\u{a}\u{308}\u{d}", &["\u{a}", "\u{308}", "\u{d}"]),
+ ("\u{a}\u{a}", &["\u{a}", "\u{a}"]),
+ ("\u{a}\u{308}\u{a}", &["\u{a}", "\u{308}", "\u{a}"]),
+ ("\u{a}\u{1}", &["\u{a}", "\u{1}"]),
+ ("\u{a}\u{308}\u{1}", &["\u{a}", "\u{308}", "\u{1}"]),
+ ("\u{a}\u{34f}", &["\u{a}", "\u{34f}"]),
+ ("\u{a}\u{308}\u{34f}", &["\u{a}", "\u{308}\u{34f}"]),
+ ("\u{a}\u{1f1e6}", &["\u{a}", "\u{1f1e6}"]),
+ ("\u{a}\u{308}\u{1f1e6}", &["\u{a}", "\u{308}", "\u{1f1e6}"]),
+ ("\u{a}\u{600}", &["\u{a}", "\u{600}"]),
+ ("\u{a}\u{308}\u{600}", &["\u{a}", "\u{308}", "\u{600}"]),
+ ("\u{a}\u{903}", &["\u{a}", "\u{903}"]),
+ ("\u{a}\u{1100}", &["\u{a}", "\u{1100}"]),
+ ("\u{a}\u{308}\u{1100}", &["\u{a}", "\u{308}", "\u{1100}"]),
+ ("\u{a}\u{1160}", &["\u{a}", "\u{1160}"]),
+ ("\u{a}\u{308}\u{1160}", &["\u{a}", "\u{308}", "\u{1160}"]),
+ ("\u{a}\u{11a8}", &["\u{a}", "\u{11a8}"]),
+ ("\u{a}\u{308}\u{11a8}", &["\u{a}", "\u{308}", "\u{11a8}"]),
+ ("\u{a}\u{ac00}", &["\u{a}", "\u{ac00}"]),
+ ("\u{a}\u{308}\u{ac00}", &["\u{a}", "\u{308}", "\u{ac00}"]),
+ ("\u{a}\u{ac01}", &["\u{a}", "\u{ac01}"]),
+ ("\u{a}\u{308}\u{ac01}", &["\u{a}", "\u{308}", "\u{ac01}"]),
+ ("\u{a}\u{231a}", &["\u{a}", "\u{231a}"]),
+ ("\u{a}\u{308}\u{231a}", &["\u{a}", "\u{308}", "\u{231a}"]),
+ ("\u{a}\u{300}", &["\u{a}", "\u{300}"]),
+ ("\u{a}\u{308}\u{300}", &["\u{a}", "\u{308}\u{300}"]),
+ ("\u{a}\u{200d}", &["\u{a}", "\u{200d}"]),
+ ("\u{a}\u{308}\u{200d}", &["\u{a}", "\u{308}\u{200d}"]),
+ ("\u{a}\u{378}", &["\u{a}", "\u{378}"]),
+ ("\u{a}\u{308}\u{378}", &["\u{a}", "\u{308}", "\u{378}"]),
+ ("\u{1}\u{20}", &["\u{1}", "\u{20}"]),
+ ("\u{1}\u{308}\u{20}", &["\u{1}", "\u{308}", "\u{20}"]),
+ ("\u{1}\u{d}", &["\u{1}", "\u{d}"]),
+ ("\u{1}\u{308}\u{d}", &["\u{1}", "\u{308}", "\u{d}"]),
+ ("\u{1}\u{a}", &["\u{1}", "\u{a}"]),
+ ("\u{1}\u{308}\u{a}", &["\u{1}", "\u{308}", "\u{a}"]),
+ ("\u{1}\u{1}", &["\u{1}", "\u{1}"]),
+ ("\u{1}\u{308}\u{1}", &["\u{1}", "\u{308}", "\u{1}"]),
+ ("\u{1}\u{34f}", &["\u{1}", "\u{34f}"]),
+ ("\u{1}\u{308}\u{34f}", &["\u{1}", "\u{308}\u{34f}"]),
+ ("\u{1}\u{1f1e6}", &["\u{1}", "\u{1f1e6}"]),
+ ("\u{1}\u{308}\u{1f1e6}", &["\u{1}", "\u{308}", "\u{1f1e6}"]),
+ ("\u{1}\u{600}", &["\u{1}", "\u{600}"]),
+ ("\u{1}\u{308}\u{600}", &["\u{1}", "\u{308}", "\u{600}"]),
+ ("\u{1}\u{903}", &["\u{1}", "\u{903}"]),
+ ("\u{1}\u{1100}", &["\u{1}", "\u{1100}"]),
+ ("\u{1}\u{308}\u{1100}", &["\u{1}", "\u{308}", "\u{1100}"]),
+ ("\u{1}\u{1160}", &["\u{1}", "\u{1160}"]),
+ ("\u{1}\u{308}\u{1160}", &["\u{1}", "\u{308}", "\u{1160}"]),
+ ("\u{1}\u{11a8}", &["\u{1}", "\u{11a8}"]),
+ ("\u{1}\u{308}\u{11a8}", &["\u{1}", "\u{308}", "\u{11a8}"]),
+ ("\u{1}\u{ac00}", &["\u{1}", "\u{ac00}"]),
+ ("\u{1}\u{308}\u{ac00}", &["\u{1}", "\u{308}", "\u{ac00}"]),
+ ("\u{1}\u{ac01}", &["\u{1}", "\u{ac01}"]),
+ ("\u{1}\u{308}\u{ac01}", &["\u{1}", "\u{308}", "\u{ac01}"]),
+ ("\u{1}\u{231a}", &["\u{1}", "\u{231a}"]),
+ ("\u{1}\u{308}\u{231a}", &["\u{1}", "\u{308}", "\u{231a}"]),
+ ("\u{1}\u{300}", &["\u{1}", "\u{300}"]),
+ ("\u{1}\u{308}\u{300}", &["\u{1}", "\u{308}\u{300}"]),
+ ("\u{1}\u{200d}", &["\u{1}", "\u{200d}"]),
+ ("\u{1}\u{308}\u{200d}", &["\u{1}", "\u{308}\u{200d}"]),
+ ("\u{1}\u{378}", &["\u{1}", "\u{378}"]),
+ ("\u{1}\u{308}\u{378}", &["\u{1}", "\u{308}", "\u{378}"]),
+ ("\u{34f}\u{20}", &["\u{34f}", "\u{20}"]),
+ ("\u{34f}\u{308}\u{20}", &["\u{34f}\u{308}", "\u{20}"]),
+ ("\u{34f}\u{d}", &["\u{34f}", "\u{d}"]),
+ ("\u{34f}\u{308}\u{d}", &["\u{34f}\u{308}", "\u{d}"]),
+ ("\u{34f}\u{a}", &["\u{34f}", "\u{a}"]),
+ ("\u{34f}\u{308}\u{a}", &["\u{34f}\u{308}", "\u{a}"]),
+ ("\u{34f}\u{1}", &["\u{34f}", "\u{1}"]),
+ ("\u{34f}\u{308}\u{1}", &["\u{34f}\u{308}", "\u{1}"]),
+ ("\u{34f}\u{34f}", &["\u{34f}\u{34f}"]),
+ ("\u{34f}\u{308}\u{34f}", &["\u{34f}\u{308}\u{34f}"]),
+ ("\u{34f}\u{1f1e6}", &["\u{34f}", "\u{1f1e6}"]),
+ ("\u{34f}\u{308}\u{1f1e6}", &["\u{34f}\u{308}", "\u{1f1e6}"]),
+ ("\u{34f}\u{600}", &["\u{34f}", "\u{600}"]),
+ ("\u{34f}\u{308}\u{600}", &["\u{34f}\u{308}", "\u{600}"]),
+ ("\u{34f}\u{1100}", &["\u{34f}", "\u{1100}"]),
+ ("\u{34f}\u{308}\u{1100}", &["\u{34f}\u{308}", "\u{1100}"]),
+ ("\u{34f}\u{1160}", &["\u{34f}", "\u{1160}"]),
+ ("\u{34f}\u{308}\u{1160}", &["\u{34f}\u{308}", "\u{1160}"]),
+ ("\u{34f}\u{11a8}", &["\u{34f}", "\u{11a8}"]),
+ ("\u{34f}\u{308}\u{11a8}", &["\u{34f}\u{308}", "\u{11a8}"]),
+ ("\u{34f}\u{ac00}", &["\u{34f}", "\u{ac00}"]),
+ ("\u{34f}\u{308}\u{ac00}", &["\u{34f}\u{308}", "\u{ac00}"]),
+ ("\u{34f}\u{ac01}", &["\u{34f}", "\u{ac01}"]),
+ ("\u{34f}\u{308}\u{ac01}", &["\u{34f}\u{308}", "\u{ac01}"]),
+ ("\u{34f}\u{231a}", &["\u{34f}", "\u{231a}"]),
+ ("\u{34f}\u{308}\u{231a}", &["\u{34f}\u{308}", "\u{231a}"]),
+ ("\u{34f}\u{300}", &["\u{34f}\u{300}"]),
+ ("\u{34f}\u{308}\u{300}", &["\u{34f}\u{308}\u{300}"]),
+ ("\u{34f}\u{200d}", &["\u{34f}\u{200d}"]),
+ ("\u{34f}\u{308}\u{200d}", &["\u{34f}\u{308}\u{200d}"]),
+ ("\u{34f}\u{378}", &["\u{34f}", "\u{378}"]),
+ ("\u{34f}\u{308}\u{378}", &["\u{34f}\u{308}", "\u{378}"]),
+ ("\u{1f1e6}\u{20}", &["\u{1f1e6}", "\u{20}"]),
+ ("\u{1f1e6}\u{308}\u{20}", &["\u{1f1e6}\u{308}", "\u{20}"]),
+ ("\u{1f1e6}\u{d}", &["\u{1f1e6}", "\u{d}"]),
+ ("\u{1f1e6}\u{308}\u{d}", &["\u{1f1e6}\u{308}", "\u{d}"]),
+ ("\u{1f1e6}\u{a}", &["\u{1f1e6}", "\u{a}"]),
+ ("\u{1f1e6}\u{308}\u{a}", &["\u{1f1e6}\u{308}", "\u{a}"]),
+ ("\u{1f1e6}\u{1}", &["\u{1f1e6}", "\u{1}"]),
+ ("\u{1f1e6}\u{308}\u{1}", &["\u{1f1e6}\u{308}", "\u{1}"]),
+ ("\u{1f1e6}\u{34f}", &["\u{1f1e6}\u{34f}"]),
+ ("\u{1f1e6}\u{308}\u{34f}", &["\u{1f1e6}\u{308}\u{34f}"]),
+ ("\u{1f1e6}\u{1f1e6}", &["\u{1f1e6}\u{1f1e6}"]),
+ (
+ "\u{1f1e6}\u{308}\u{1f1e6}",
+ &["\u{1f1e6}\u{308}", "\u{1f1e6}"],
+ ),
+ ("\u{1f1e6}\u{600}", &["\u{1f1e6}", "\u{600}"]),
+ ("\u{1f1e6}\u{308}\u{600}", &["\u{1f1e6}\u{308}", "\u{600}"]),
+ ("\u{1f1e6}\u{1100}", &["\u{1f1e6}", "\u{1100}"]),
+ (
+ "\u{1f1e6}\u{308}\u{1100}",
+ &["\u{1f1e6}\u{308}", "\u{1100}"],
+ ),
+ ("\u{1f1e6}\u{1160}", &["\u{1f1e6}", "\u{1160}"]),
+ (
+ "\u{1f1e6}\u{308}\u{1160}",
+ &["\u{1f1e6}\u{308}", "\u{1160}"],
+ ),
+ ("\u{1f1e6}\u{11a8}", &["\u{1f1e6}", "\u{11a8}"]),
+ (
+ "\u{1f1e6}\u{308}\u{11a8}",
+ &["\u{1f1e6}\u{308}", "\u{11a8}"],
+ ),
+ ("\u{1f1e6}\u{ac00}", &["\u{1f1e6}", "\u{ac00}"]),
+ (
+ "\u{1f1e6}\u{308}\u{ac00}",
+ &["\u{1f1e6}\u{308}", "\u{ac00}"],
+ ),
+ ("\u{1f1e6}\u{ac01}", &["\u{1f1e6}", "\u{ac01}"]),
+ (
+ "\u{1f1e6}\u{308}\u{ac01}",
+ &["\u{1f1e6}\u{308}", "\u{ac01}"],
+ ),
+ ("\u{1f1e6}\u{231a}", &["\u{1f1e6}", "\u{231a}"]),
+ (
+ "\u{1f1e6}\u{308}\u{231a}",
+ &["\u{1f1e6}\u{308}", "\u{231a}"],
+ ),
+ ("\u{1f1e6}\u{300}", &["\u{1f1e6}\u{300}"]),
+ ("\u{1f1e6}\u{308}\u{300}", &["\u{1f1e6}\u{308}\u{300}"]),
+ ("\u{1f1e6}\u{200d}", &["\u{1f1e6}\u{200d}"]),
+ ("\u{1f1e6}\u{308}\u{200d}", &["\u{1f1e6}\u{308}\u{200d}"]),
+ ("\u{1f1e6}\u{378}", &["\u{1f1e6}", "\u{378}"]),
+ ("\u{1f1e6}\u{308}\u{378}", &["\u{1f1e6}\u{308}", "\u{378}"]),
+ ("\u{600}\u{308}\u{20}", &["\u{600}\u{308}", "\u{20}"]),
+ ("\u{600}\u{d}", &["\u{600}", "\u{d}"]),
+ ("\u{600}\u{308}\u{d}", &["\u{600}\u{308}", "\u{d}"]),
+ ("\u{600}\u{a}", &["\u{600}", "\u{a}"]),
+ ("\u{600}\u{308}\u{a}", &["\u{600}\u{308}", "\u{a}"]),
+ ("\u{600}\u{1}", &["\u{600}", "\u{1}"]),
+ ("\u{600}\u{308}\u{1}", &["\u{600}\u{308}", "\u{1}"]),
+ ("\u{600}\u{34f}", &["\u{600}\u{34f}"]),
+ ("\u{600}\u{308}\u{34f}", &["\u{600}\u{308}\u{34f}"]),
+ ("\u{600}\u{308}\u{1f1e6}", &["\u{600}\u{308}", "\u{1f1e6}"]),
+ ("\u{600}\u{308}\u{600}", &["\u{600}\u{308}", "\u{600}"]),
+ ("\u{600}\u{308}\u{1100}", &["\u{600}\u{308}", "\u{1100}"]),
+ ("\u{600}\u{308}\u{1160}", &["\u{600}\u{308}", "\u{1160}"]),
+ ("\u{600}\u{308}\u{11a8}", &["\u{600}\u{308}", "\u{11a8}"]),
+ ("\u{600}\u{308}\u{ac00}", &["\u{600}\u{308}", "\u{ac00}"]),
+ ("\u{600}\u{308}\u{ac01}", &["\u{600}\u{308}", "\u{ac01}"]),
+ ("\u{600}\u{308}\u{231a}", &["\u{600}\u{308}", "\u{231a}"]),
+ ("\u{600}\u{300}", &["\u{600}\u{300}"]),
+ ("\u{600}\u{308}\u{300}", &["\u{600}\u{308}\u{300}"]),
+ ("\u{600}\u{200d}", &["\u{600}\u{200d}"]),
+ ("\u{600}\u{308}\u{200d}", &["\u{600}\u{308}\u{200d}"]),
+ ("\u{600}\u{308}\u{378}", &["\u{600}\u{308}", "\u{378}"]),
+ ("\u{903}\u{20}", &["\u{903}", "\u{20}"]),
+ ("\u{903}\u{308}\u{20}", &["\u{903}\u{308}", "\u{20}"]),
+ ("\u{903}\u{d}", &["\u{903}", "\u{d}"]),
+ ("\u{903}\u{308}\u{d}", &["\u{903}\u{308}", "\u{d}"]),
+ ("\u{903}\u{a}", &["\u{903}", "\u{a}"]),
+ ("\u{903}\u{308}\u{a}", &["\u{903}\u{308}", "\u{a}"]),
+ ("\u{903}\u{1}", &["\u{903}", "\u{1}"]),
+ ("\u{903}\u{308}\u{1}", &["\u{903}\u{308}", "\u{1}"]),
+ ("\u{903}\u{34f}", &["\u{903}\u{34f}"]),
+ ("\u{903}\u{308}\u{34f}", &["\u{903}\u{308}\u{34f}"]),
+ ("\u{903}\u{1f1e6}", &["\u{903}", "\u{1f1e6}"]),
+ ("\u{903}\u{308}\u{1f1e6}", &["\u{903}\u{308}", "\u{1f1e6}"]),
+ ("\u{903}\u{600}", &["\u{903}", "\u{600}"]),
+ ("\u{903}\u{308}\u{600}", &["\u{903}\u{308}", "\u{600}"]),
+ ("\u{903}\u{1100}", &["\u{903}", "\u{1100}"]),
+ ("\u{903}\u{308}\u{1100}", &["\u{903}\u{308}", "\u{1100}"]),
+ ("\u{903}\u{1160}", &["\u{903}", "\u{1160}"]),
+ ("\u{903}\u{308}\u{1160}", &["\u{903}\u{308}", "\u{1160}"]),
+ ("\u{903}\u{11a8}", &["\u{903}", "\u{11a8}"]),
+ ("\u{903}\u{308}\u{11a8}", &["\u{903}\u{308}", "\u{11a8}"]),
+ ("\u{903}\u{ac00}", &["\u{903}", "\u{ac00}"]),
+ ("\u{903}\u{308}\u{ac00}", &["\u{903}\u{308}", "\u{ac00}"]),
+ ("\u{903}\u{ac01}", &["\u{903}", "\u{ac01}"]),
+ ("\u{903}\u{308}\u{ac01}", &["\u{903}\u{308}", "\u{ac01}"]),
+ ("\u{903}\u{231a}", &["\u{903}", "\u{231a}"]),
+ ("\u{903}\u{308}\u{231a}", &["\u{903}\u{308}", "\u{231a}"]),
+ ("\u{903}\u{300}", &["\u{903}\u{300}"]),
+ ("\u{903}\u{308}\u{300}", &["\u{903}\u{308}\u{300}"]),
+ ("\u{903}\u{200d}", &["\u{903}\u{200d}"]),
+ ("\u{903}\u{308}\u{200d}", &["\u{903}\u{308}\u{200d}"]),
+ ("\u{903}\u{378}", &["\u{903}", "\u{378}"]),
+ ("\u{903}\u{308}\u{378}", &["\u{903}\u{308}", "\u{378}"]),
+ ("\u{1100}\u{20}", &["\u{1100}", "\u{20}"]),
+ ("\u{1100}\u{308}\u{20}", &["\u{1100}\u{308}", "\u{20}"]),
+ ("\u{1100}\u{d}", &["\u{1100}", "\u{d}"]),
+ ("\u{1100}\u{308}\u{d}", &["\u{1100}\u{308}", "\u{d}"]),
+ ("\u{1100}\u{a}", &["\u{1100}", "\u{a}"]),
+ ("\u{1100}\u{308}\u{a}", &["\u{1100}\u{308}", "\u{a}"]),
+ ("\u{1100}\u{1}", &["\u{1100}", "\u{1}"]),
+ ("\u{1100}\u{308}\u{1}", &["\u{1100}\u{308}", "\u{1}"]),
+ ("\u{1100}\u{34f}", &["\u{1100}\u{34f}"]),
+ ("\u{1100}\u{308}\u{34f}", &["\u{1100}\u{308}\u{34f}"]),
+ ("\u{1100}\u{1f1e6}", &["\u{1100}", "\u{1f1e6}"]),
+ (
+ "\u{1100}\u{308}\u{1f1e6}",
+ &["\u{1100}\u{308}", "\u{1f1e6}"],
+ ),
+ ("\u{1100}\u{600}", &["\u{1100}", "\u{600}"]),
+ ("\u{1100}\u{308}\u{600}", &["\u{1100}\u{308}", "\u{600}"]),
+ ("\u{1100}\u{1100}", &["\u{1100}\u{1100}"]),
+ ("\u{1100}\u{308}\u{1100}", &["\u{1100}\u{308}", "\u{1100}"]),
+ ("\u{1100}\u{1160}", &["\u{1100}\u{1160}"]),
+ ("\u{1100}\u{308}\u{1160}", &["\u{1100}\u{308}", "\u{1160}"]),
+ ("\u{1100}\u{11a8}", &["\u{1100}", "\u{11a8}"]),
+ ("\u{1100}\u{308}\u{11a8}", &["\u{1100}\u{308}", "\u{11a8}"]),
+ ("\u{1100}\u{ac00}", &["\u{1100}\u{ac00}"]),
+ ("\u{1100}\u{308}\u{ac00}", &["\u{1100}\u{308}", "\u{ac00}"]),
+ ("\u{1100}\u{ac01}", &["\u{1100}\u{ac01}"]),
+ ("\u{1100}\u{308}\u{ac01}", &["\u{1100}\u{308}", "\u{ac01}"]),
+ ("\u{1100}\u{231a}", &["\u{1100}", "\u{231a}"]),
+ ("\u{1100}\u{308}\u{231a}", &["\u{1100}\u{308}", "\u{231a}"]),
+ ("\u{1100}\u{300}", &["\u{1100}\u{300}"]),
+ ("\u{1100}\u{308}\u{300}", &["\u{1100}\u{308}\u{300}"]),
+ ("\u{1100}\u{200d}", &["\u{1100}\u{200d}"]),
+ ("\u{1100}\u{308}\u{200d}", &["\u{1100}\u{308}\u{200d}"]),
+ ("\u{1100}\u{378}", &["\u{1100}", "\u{378}"]),
+ ("\u{1100}\u{308}\u{378}", &["\u{1100}\u{308}", "\u{378}"]),
+ ("\u{1160}\u{20}", &["\u{1160}", "\u{20}"]),
+ ("\u{1160}\u{308}\u{20}", &["\u{1160}\u{308}", "\u{20}"]),
+ ("\u{1160}\u{d}", &["\u{1160}", "\u{d}"]),
+ ("\u{1160}\u{308}\u{d}", &["\u{1160}\u{308}", "\u{d}"]),
+ ("\u{1160}\u{a}", &["\u{1160}", "\u{a}"]),
+ ("\u{1160}\u{308}\u{a}", &["\u{1160}\u{308}", "\u{a}"]),
+ ("\u{1160}\u{1}", &["\u{1160}", "\u{1}"]),
+ ("\u{1160}\u{308}\u{1}", &["\u{1160}\u{308}", "\u{1}"]),
+ ("\u{1160}\u{34f}", &["\u{1160}\u{34f}"]),
+ ("\u{1160}\u{308}\u{34f}", &["\u{1160}\u{308}\u{34f}"]),
+ ("\u{1160}\u{1f1e6}", &["\u{1160}", "\u{1f1e6}"]),
+ (
+ "\u{1160}\u{308}\u{1f1e6}",
+ &["\u{1160}\u{308}", "\u{1f1e6}"],
+ ),
+ ("\u{1160}\u{600}", &["\u{1160}", "\u{600}"]),
+ ("\u{1160}\u{308}\u{600}", &["\u{1160}\u{308}", "\u{600}"]),
+ ("\u{1160}\u{1100}", &["\u{1160}", "\u{1100}"]),
+ ("\u{1160}\u{308}\u{1100}", &["\u{1160}\u{308}", "\u{1100}"]),
+ ("\u{1160}\u{1160}", &["\u{1160}\u{1160}"]),
+ ("\u{1160}\u{308}\u{1160}", &["\u{1160}\u{308}", "\u{1160}"]),
+ ("\u{1160}\u{11a8}", &["\u{1160}\u{11a8}"]),
+ ("\u{1160}\u{308}\u{11a8}", &["\u{1160}\u{308}", "\u{11a8}"]),
+ ("\u{1160}\u{ac00}", &["\u{1160}", "\u{ac00}"]),
+ ("\u{1160}\u{308}\u{ac00}", &["\u{1160}\u{308}", "\u{ac00}"]),
+ ("\u{1160}\u{ac01}", &["\u{1160}", "\u{ac01}"]),
+ ("\u{1160}\u{308}\u{ac01}", &["\u{1160}\u{308}", "\u{ac01}"]),
+ ("\u{1160}\u{231a}", &["\u{1160}", "\u{231a}"]),
+ ("\u{1160}\u{308}\u{231a}", &["\u{1160}\u{308}", "\u{231a}"]),
+ ("\u{1160}\u{300}", &["\u{1160}\u{300}"]),
+ ("\u{1160}\u{308}\u{300}", &["\u{1160}\u{308}\u{300}"]),
+ ("\u{1160}\u{200d}", &["\u{1160}\u{200d}"]),
+ ("\u{1160}\u{308}\u{200d}", &["\u{1160}\u{308}\u{200d}"]),
+ ("\u{1160}\u{378}", &["\u{1160}", "\u{378}"]),
+ ("\u{1160}\u{308}\u{378}", &["\u{1160}\u{308}", "\u{378}"]),
+ ("\u{11a8}\u{20}", &["\u{11a8}", "\u{20}"]),
+ ("\u{11a8}\u{308}\u{20}", &["\u{11a8}\u{308}", "\u{20}"]),
+ ("\u{11a8}\u{d}", &["\u{11a8}", "\u{d}"]),
+ ("\u{11a8}\u{308}\u{d}", &["\u{11a8}\u{308}", "\u{d}"]),
+ ("\u{11a8}\u{a}", &["\u{11a8}", "\u{a}"]),
+ ("\u{11a8}\u{308}\u{a}", &["\u{11a8}\u{308}", "\u{a}"]),
+ ("\u{11a8}\u{1}", &["\u{11a8}", "\u{1}"]),
+ ("\u{11a8}\u{308}\u{1}", &["\u{11a8}\u{308}", "\u{1}"]),
+ ("\u{11a8}\u{34f}", &["\u{11a8}\u{34f}"]),
+ ("\u{11a8}\u{308}\u{34f}", &["\u{11a8}\u{308}\u{34f}"]),
+ ("\u{11a8}\u{1f1e6}", &["\u{11a8}", "\u{1f1e6}"]),
+ (
+ "\u{11a8}\u{308}\u{1f1e6}",
+ &["\u{11a8}\u{308}", "\u{1f1e6}"],
+ ),
+ ("\u{11a8}\u{600}", &["\u{11a8}", "\u{600}"]),
+ ("\u{11a8}\u{308}\u{600}", &["\u{11a8}\u{308}", "\u{600}"]),
+ ("\u{11a8}\u{1100}", &["\u{11a8}", "\u{1100}"]),
+ ("\u{11a8}\u{308}\u{1100}", &["\u{11a8}\u{308}", "\u{1100}"]),
+ ("\u{11a8}\u{1160}", &["\u{11a8}", "\u{1160}"]),
+ ("\u{11a8}\u{308}\u{1160}", &["\u{11a8}\u{308}", "\u{1160}"]),
+ ("\u{11a8}\u{11a8}", &["\u{11a8}\u{11a8}"]),
+ ("\u{11a8}\u{308}\u{11a8}", &["\u{11a8}\u{308}", "\u{11a8}"]),
+ ("\u{11a8}\u{ac00}", &["\u{11a8}", "\u{ac00}"]),
+ ("\u{11a8}\u{308}\u{ac00}", &["\u{11a8}\u{308}", "\u{ac00}"]),
+ ("\u{11a8}\u{ac01}", &["\u{11a8}", "\u{ac01}"]),
+ ("\u{11a8}\u{308}\u{ac01}", &["\u{11a8}\u{308}", "\u{ac01}"]),
+ ("\u{11a8}\u{231a}", &["\u{11a8}", "\u{231a}"]),
+ ("\u{11a8}\u{308}\u{231a}", &["\u{11a8}\u{308}", "\u{231a}"]),
+ ("\u{11a8}\u{300}", &["\u{11a8}\u{300}"]),
+ ("\u{11a8}\u{308}\u{300}", &["\u{11a8}\u{308}\u{300}"]),
+ ("\u{11a8}\u{200d}", &["\u{11a8}\u{200d}"]),
+ ("\u{11a8}\u{308}\u{200d}", &["\u{11a8}\u{308}\u{200d}"]),
+ ("\u{11a8}\u{378}", &["\u{11a8}", "\u{378}"]),
+ ("\u{11a8}\u{308}\u{378}", &["\u{11a8}\u{308}", "\u{378}"]),
+ ("\u{ac00}\u{20}", &["\u{ac00}", "\u{20}"]),
+ ("\u{ac00}\u{308}\u{20}", &["\u{ac00}\u{308}", "\u{20}"]),
+ ("\u{ac00}\u{d}", &["\u{ac00}", "\u{d}"]),
+ ("\u{ac00}\u{308}\u{d}", &["\u{ac00}\u{308}", "\u{d}"]),
+ ("\u{ac00}\u{a}", &["\u{ac00}", "\u{a}"]),
+ ("\u{ac00}\u{308}\u{a}", &["\u{ac00}\u{308}", "\u{a}"]),
+ ("\u{ac00}\u{1}", &["\u{ac00}", "\u{1}"]),
+ ("\u{ac00}\u{308}\u{1}", &["\u{ac00}\u{308}", "\u{1}"]),
+ ("\u{ac00}\u{34f}", &["\u{ac00}\u{34f}"]),
+ ("\u{ac00}\u{308}\u{34f}", &["\u{ac00}\u{308}\u{34f}"]),
+ ("\u{ac00}\u{1f1e6}", &["\u{ac00}", "\u{1f1e6}"]),
+ (
+ "\u{ac00}\u{308}\u{1f1e6}",
+ &["\u{ac00}\u{308}", "\u{1f1e6}"],
+ ),
+ ("\u{ac00}\u{600}", &["\u{ac00}", "\u{600}"]),
+ ("\u{ac00}\u{308}\u{600}", &["\u{ac00}\u{308}", "\u{600}"]),
+ ("\u{ac00}\u{1100}", &["\u{ac00}", "\u{1100}"]),
+ ("\u{ac00}\u{308}\u{1100}", &["\u{ac00}\u{308}", "\u{1100}"]),
+ ("\u{ac00}\u{1160}", &["\u{ac00}\u{1160}"]),
+ ("\u{ac00}\u{308}\u{1160}", &["\u{ac00}\u{308}", "\u{1160}"]),
+ ("\u{ac00}\u{11a8}", &["\u{ac00}\u{11a8}"]),
+ ("\u{ac00}\u{308}\u{11a8}", &["\u{ac00}\u{308}", "\u{11a8}"]),
+ ("\u{ac00}\u{ac00}", &["\u{ac00}", "\u{ac00}"]),
+ ("\u{ac00}\u{308}\u{ac00}", &["\u{ac00}\u{308}", "\u{ac00}"]),
+ ("\u{ac00}\u{ac01}", &["\u{ac00}", "\u{ac01}"]),
+ ("\u{ac00}\u{308}\u{ac01}", &["\u{ac00}\u{308}", "\u{ac01}"]),
+ ("\u{ac00}\u{231a}", &["\u{ac00}", "\u{231a}"]),
+ ("\u{ac00}\u{308}\u{231a}", &["\u{ac00}\u{308}", "\u{231a}"]),
+ ("\u{ac00}\u{300}", &["\u{ac00}\u{300}"]),
+ ("\u{ac00}\u{308}\u{300}", &["\u{ac00}\u{308}\u{300}"]),
+ ("\u{ac00}\u{200d}", &["\u{ac00}\u{200d}"]),
+ ("\u{ac00}\u{308}\u{200d}", &["\u{ac00}\u{308}\u{200d}"]),
+ ("\u{ac00}\u{378}", &["\u{ac00}", "\u{378}"]),
+ ("\u{ac00}\u{308}\u{378}", &["\u{ac00}\u{308}", "\u{378}"]),
+ ("\u{ac01}\u{20}", &["\u{ac01}", "\u{20}"]),
+ ("\u{ac01}\u{308}\u{20}", &["\u{ac01}\u{308}", "\u{20}"]),
+ ("\u{ac01}\u{d}", &["\u{ac01}", "\u{d}"]),
+ ("\u{ac01}\u{308}\u{d}", &["\u{ac01}\u{308}", "\u{d}"]),
+ ("\u{ac01}\u{a}", &["\u{ac01}", "\u{a}"]),
+ ("\u{ac01}\u{308}\u{a}", &["\u{ac01}\u{308}", "\u{a}"]),
+ ("\u{ac01}\u{1}", &["\u{ac01}", "\u{1}"]),
+ ("\u{ac01}\u{308}\u{1}", &["\u{ac01}\u{308}", "\u{1}"]),
+ ("\u{ac01}\u{34f}", &["\u{ac01}\u{34f}"]),
+ ("\u{ac01}\u{308}\u{34f}", &["\u{ac01}\u{308}\u{34f}"]),
+ ("\u{ac01}\u{1f1e6}", &["\u{ac01}", "\u{1f1e6}"]),
+ (
+ "\u{ac01}\u{308}\u{1f1e6}",
+ &["\u{ac01}\u{308}", "\u{1f1e6}"],
+ ),
+ ("\u{ac01}\u{600}", &["\u{ac01}", "\u{600}"]),
+ ("\u{ac01}\u{308}\u{600}", &["\u{ac01}\u{308}", "\u{600}"]),
+ ("\u{ac01}\u{1100}", &["\u{ac01}", "\u{1100}"]),
+ ("\u{ac01}\u{308}\u{1100}", &["\u{ac01}\u{308}", "\u{1100}"]),
+ ("\u{ac01}\u{1160}", &["\u{ac01}", "\u{1160}"]),
+ ("\u{ac01}\u{308}\u{1160}", &["\u{ac01}\u{308}", "\u{1160}"]),
+ ("\u{ac01}\u{11a8}", &["\u{ac01}\u{11a8}"]),
+ ("\u{ac01}\u{308}\u{11a8}", &["\u{ac01}\u{308}", "\u{11a8}"]),
+ ("\u{ac01}\u{ac00}", &["\u{ac01}", "\u{ac00}"]),
+ ("\u{ac01}\u{308}\u{ac00}", &["\u{ac01}\u{308}", "\u{ac00}"]),
+ ("\u{ac01}\u{ac01}", &["\u{ac01}", "\u{ac01}"]),
+ ("\u{ac01}\u{308}\u{ac01}", &["\u{ac01}\u{308}", "\u{ac01}"]),
+ ("\u{ac01}\u{231a}", &["\u{ac01}", "\u{231a}"]),
+ ("\u{ac01}\u{308}\u{231a}", &["\u{ac01}\u{308}", "\u{231a}"]),
+ ("\u{ac01}\u{300}", &["\u{ac01}\u{300}"]),
+ ("\u{ac01}\u{308}\u{300}", &["\u{ac01}\u{308}\u{300}"]),
+ ("\u{ac01}\u{200d}", &["\u{ac01}\u{200d}"]),
+ ("\u{ac01}\u{308}\u{200d}", &["\u{ac01}\u{308}\u{200d}"]),
+ ("\u{ac01}\u{378}", &["\u{ac01}", "\u{378}"]),
+ ("\u{ac01}\u{308}\u{378}", &["\u{ac01}\u{308}", "\u{378}"]),
+ ("\u{231a}\u{20}", &["\u{231a}", "\u{20}"]),
+ ("\u{231a}\u{308}\u{20}", &["\u{231a}\u{308}", "\u{20}"]),
+ ("\u{231a}\u{d}", &["\u{231a}", "\u{d}"]),
+ ("\u{231a}\u{308}\u{d}", &["\u{231a}\u{308}", "\u{d}"]),
+ ("\u{231a}\u{a}", &["\u{231a}", "\u{a}"]),
+ ("\u{231a}\u{308}\u{a}", &["\u{231a}\u{308}", "\u{a}"]),
+ ("\u{231a}\u{1}", &["\u{231a}", "\u{1}"]),
+ ("\u{231a}\u{308}\u{1}", &["\u{231a}\u{308}", "\u{1}"]),
+ ("\u{231a}\u{34f}", &["\u{231a}\u{34f}"]),
+ ("\u{231a}\u{308}\u{34f}", &["\u{231a}\u{308}\u{34f}"]),
+ ("\u{231a}\u{1f1e6}", &["\u{231a}", "\u{1f1e6}"]),
+ (
+ "\u{231a}\u{308}\u{1f1e6}",
+ &["\u{231a}\u{308}", "\u{1f1e6}"],
+ ),
+ ("\u{231a}\u{600}", &["\u{231a}", "\u{600}"]),
+ ("\u{231a}\u{308}\u{600}", &["\u{231a}\u{308}", "\u{600}"]),
+ ("\u{231a}\u{1100}", &["\u{231a}", "\u{1100}"]),
+ ("\u{231a}\u{308}\u{1100}", &["\u{231a}\u{308}", "\u{1100}"]),
+ ("\u{231a}\u{1160}", &["\u{231a}", "\u{1160}"]),
+ ("\u{231a}\u{308}\u{1160}", &["\u{231a}\u{308}", "\u{1160}"]),
+ ("\u{231a}\u{11a8}", &["\u{231a}", "\u{11a8}"]),
+ ("\u{231a}\u{308}\u{11a8}", &["\u{231a}\u{308}", "\u{11a8}"]),
+ ("\u{231a}\u{ac00}", &["\u{231a}", "\u{ac00}"]),
+ ("\u{231a}\u{308}\u{ac00}", &["\u{231a}\u{308}", "\u{ac00}"]),
+ ("\u{231a}\u{ac01}", &["\u{231a}", "\u{ac01}"]),
+ ("\u{231a}\u{308}\u{ac01}", &["\u{231a}\u{308}", "\u{ac01}"]),
+ ("\u{231a}\u{231a}", &["\u{231a}", "\u{231a}"]),
+ ("\u{231a}\u{308}\u{231a}", &["\u{231a}\u{308}", "\u{231a}"]),
+ ("\u{231a}\u{300}", &["\u{231a}\u{300}"]),
+ ("\u{231a}\u{308}\u{300}", &["\u{231a}\u{308}\u{300}"]),
+ ("\u{231a}\u{200d}", &["\u{231a}\u{200d}"]),
+ ("\u{231a}\u{308}\u{200d}", &["\u{231a}\u{308}\u{200d}"]),
+ ("\u{231a}\u{378}", &["\u{231a}", "\u{378}"]),
+ ("\u{231a}\u{308}\u{378}", &["\u{231a}\u{308}", "\u{378}"]),
+ ("\u{300}\u{20}", &["\u{300}", "\u{20}"]),
+ ("\u{300}\u{308}\u{20}", &["\u{300}\u{308}", "\u{20}"]),
+ ("\u{300}\u{d}", &["\u{300}", "\u{d}"]),
+ ("\u{300}\u{308}\u{d}", &["\u{300}\u{308}", "\u{d}"]),
+ ("\u{300}\u{a}", &["\u{300}", "\u{a}"]),
+ ("\u{300}\u{308}\u{a}", &["\u{300}\u{308}", "\u{a}"]),
+ ("\u{300}\u{1}", &["\u{300}", "\u{1}"]),
+ ("\u{300}\u{308}\u{1}", &["\u{300}\u{308}", "\u{1}"]),
+ ("\u{300}\u{34f}", &["\u{300}\u{34f}"]),
+ ("\u{300}\u{308}\u{34f}", &["\u{300}\u{308}\u{34f}"]),
+ ("\u{300}\u{1f1e6}", &["\u{300}", "\u{1f1e6}"]),
+ ("\u{300}\u{308}\u{1f1e6}", &["\u{300}\u{308}", "\u{1f1e6}"]),
+ ("\u{300}\u{600}", &["\u{300}", "\u{600}"]),
+ ("\u{300}\u{308}\u{600}", &["\u{300}\u{308}", "\u{600}"]),
+ ("\u{300}\u{1100}", &["\u{300}", "\u{1100}"]),
+ ("\u{300}\u{308}\u{1100}", &["\u{300}\u{308}", "\u{1100}"]),
+ ("\u{300}\u{1160}", &["\u{300}", "\u{1160}"]),
+ ("\u{300}\u{308}\u{1160}", &["\u{300}\u{308}", "\u{1160}"]),
+ ("\u{300}\u{11a8}", &["\u{300}", "\u{11a8}"]),
+ ("\u{300}\u{308}\u{11a8}", &["\u{300}\u{308}", "\u{11a8}"]),
+ ("\u{300}\u{ac00}", &["\u{300}", "\u{ac00}"]),
+ ("\u{300}\u{308}\u{ac00}", &["\u{300}\u{308}", "\u{ac00}"]),
+ ("\u{300}\u{ac01}", &["\u{300}", "\u{ac01}"]),
+ ("\u{300}\u{308}\u{ac01}", &["\u{300}\u{308}", "\u{ac01}"]),
+ ("\u{300}\u{231a}", &["\u{300}", "\u{231a}"]),
+ ("\u{300}\u{308}\u{231a}", &["\u{300}\u{308}", "\u{231a}"]),
+ ("\u{300}\u{300}", &["\u{300}\u{300}"]),
+ ("\u{300}\u{308}\u{300}", &["\u{300}\u{308}\u{300}"]),
+ ("\u{300}\u{200d}", &["\u{300}\u{200d}"]),
+ ("\u{300}\u{308}\u{200d}", &["\u{300}\u{308}\u{200d}"]),
+ ("\u{300}\u{378}", &["\u{300}", "\u{378}"]),
+ ("\u{300}\u{308}\u{378}", &["\u{300}\u{308}", "\u{378}"]),
+ ("\u{200d}\u{20}", &["\u{200d}", "\u{20}"]),
+ ("\u{200d}\u{308}\u{20}", &["\u{200d}\u{308}", "\u{20}"]),
+ ("\u{200d}\u{d}", &["\u{200d}", "\u{d}"]),
+ ("\u{200d}\u{308}\u{d}", &["\u{200d}\u{308}", "\u{d}"]),
+ ("\u{200d}\u{a}", &["\u{200d}", "\u{a}"]),
+ ("\u{200d}\u{308}\u{a}", &["\u{200d}\u{308}", "\u{a}"]),
+ ("\u{200d}\u{1}", &["\u{200d}", "\u{1}"]),
+ ("\u{200d}\u{308}\u{1}", &["\u{200d}\u{308}", "\u{1}"]),
+ ("\u{200d}\u{34f}", &["\u{200d}\u{34f}"]),
+ ("\u{200d}\u{308}\u{34f}", &["\u{200d}\u{308}\u{34f}"]),
+ ("\u{200d}\u{1f1e6}", &["\u{200d}", "\u{1f1e6}"]),
+ (
+ "\u{200d}\u{308}\u{1f1e6}",
+ &["\u{200d}\u{308}", "\u{1f1e6}"],
+ ),
+ ("\u{200d}\u{600}", &["\u{200d}", "\u{600}"]),
+ ("\u{200d}\u{308}\u{600}", &["\u{200d}\u{308}", "\u{600}"]),
+ ("\u{200d}\u{1100}", &["\u{200d}", "\u{1100}"]),
+ ("\u{200d}\u{308}\u{1100}", &["\u{200d}\u{308}", "\u{1100}"]),
+ ("\u{200d}\u{1160}", &["\u{200d}", "\u{1160}"]),
+ ("\u{200d}\u{308}\u{1160}", &["\u{200d}\u{308}", "\u{1160}"]),
+ ("\u{200d}\u{11a8}", &["\u{200d}", "\u{11a8}"]),
+ ("\u{200d}\u{308}\u{11a8}", &["\u{200d}\u{308}", "\u{11a8}"]),
+ ("\u{200d}\u{ac00}", &["\u{200d}", "\u{ac00}"]),
+ ("\u{200d}\u{308}\u{ac00}", &["\u{200d}\u{308}", "\u{ac00}"]),
+ ("\u{200d}\u{ac01}", &["\u{200d}", "\u{ac01}"]),
+ ("\u{200d}\u{308}\u{ac01}", &["\u{200d}\u{308}", "\u{ac01}"]),
+ ("\u{200d}\u{231a}", &["\u{200d}", "\u{231a}"]),
+ ("\u{200d}\u{308}\u{231a}", &["\u{200d}\u{308}", "\u{231a}"]),
+ ("\u{200d}\u{300}", &["\u{200d}\u{300}"]),
+ ("\u{200d}\u{308}\u{300}", &["\u{200d}\u{308}\u{300}"]),
+ ("\u{200d}\u{200d}", &["\u{200d}\u{200d}"]),
+ ("\u{200d}\u{308}\u{200d}", &["\u{200d}\u{308}\u{200d}"]),
+ ("\u{200d}\u{378}", &["\u{200d}", "\u{378}"]),
+ ("\u{200d}\u{308}\u{378}", &["\u{200d}\u{308}", "\u{378}"]),
+ ("\u{378}\u{20}", &["\u{378}", "\u{20}"]),
+ ("\u{378}\u{308}\u{20}", &["\u{378}\u{308}", "\u{20}"]),
+ ("\u{378}\u{d}", &["\u{378}", "\u{d}"]),
+ ("\u{378}\u{308}\u{d}", &["\u{378}\u{308}", "\u{d}"]),
+ ("\u{378}\u{a}", &["\u{378}", "\u{a}"]),
+ ("\u{378}\u{308}\u{a}", &["\u{378}\u{308}", "\u{a}"]),
+ ("\u{378}\u{1}", &["\u{378}", "\u{1}"]),
+ ("\u{378}\u{308}\u{1}", &["\u{378}\u{308}", "\u{1}"]),
+ ("\u{378}\u{34f}", &["\u{378}\u{34f}"]),
+ ("\u{378}\u{308}\u{34f}", &["\u{378}\u{308}\u{34f}"]),
+ ("\u{378}\u{1f1e6}", &["\u{378}", "\u{1f1e6}"]),
+ ("\u{378}\u{308}\u{1f1e6}", &["\u{378}\u{308}", "\u{1f1e6}"]),
+ ("\u{378}\u{600}", &["\u{378}", "\u{600}"]),
+ ("\u{378}\u{308}\u{600}", &["\u{378}\u{308}", "\u{600}"]),
+ ("\u{378}\u{1100}", &["\u{378}", "\u{1100}"]),
+ ("\u{378}\u{308}\u{1100}", &["\u{378}\u{308}", "\u{1100}"]),
+ ("\u{378}\u{1160}", &["\u{378}", "\u{1160}"]),
+ ("\u{378}\u{308}\u{1160}", &["\u{378}\u{308}", "\u{1160}"]),
+ ("\u{378}\u{11a8}", &["\u{378}", "\u{11a8}"]),
+ ("\u{378}\u{308}\u{11a8}", &["\u{378}\u{308}", "\u{11a8}"]),
+ ("\u{378}\u{ac00}", &["\u{378}", "\u{ac00}"]),
+ ("\u{378}\u{308}\u{ac00}", &["\u{378}\u{308}", "\u{ac00}"]),
+ ("\u{378}\u{ac01}", &["\u{378}", "\u{ac01}"]),
+ ("\u{378}\u{308}\u{ac01}", &["\u{378}\u{308}", "\u{ac01}"]),
+ ("\u{378}\u{231a}", &["\u{378}", "\u{231a}"]),
+ ("\u{378}\u{308}\u{231a}", &["\u{378}\u{308}", "\u{231a}"]),
+ ("\u{378}\u{300}", &["\u{378}\u{300}"]),
+ ("\u{378}\u{308}\u{300}", &["\u{378}\u{308}\u{300}"]),
+ ("\u{378}\u{200d}", &["\u{378}\u{200d}"]),
+ ("\u{378}\u{308}\u{200d}", &["\u{378}\u{308}\u{200d}"]),
+ ("\u{378}\u{378}", &["\u{378}", "\u{378}"]),
+ ("\u{378}\u{308}\u{378}", &["\u{378}\u{308}", "\u{378}"]),
+ (
+ "\u{d}\u{a}\u{61}\u{a}\u{308}",
+ &["\u{d}\u{a}", "\u{61}", "\u{a}", "\u{308}"],
+ ),
+ ("\u{61}\u{308}", &["\u{61}\u{308}"]),
+ ("\u{20}\u{200d}\u{646}", &["\u{20}\u{200d}", "\u{646}"]),
+ ("\u{646}\u{200d}\u{20}", &["\u{646}\u{200d}", "\u{20}"]),
+ ("\u{1100}\u{1100}", &["\u{1100}\u{1100}"]),
+ (
+ "\u{ac00}\u{11a8}\u{1100}",
+ &["\u{ac00}\u{11a8}", "\u{1100}"],
+ ),
+ (
+ "\u{ac01}\u{11a8}\u{1100}",
+ &["\u{ac01}\u{11a8}", "\u{1100}"],
+ ),
+ (
+ "\u{1f1e6}\u{1f1e7}\u{1f1e8}\u{62}",
+ &["\u{1f1e6}\u{1f1e7}", "\u{1f1e8}", "\u{62}"],
+ ),
+ (
+ "\u{61}\u{1f1e6}\u{1f1e7}\u{1f1e8}\u{62}",
+ &["\u{61}", "\u{1f1e6}\u{1f1e7}", "\u{1f1e8}", "\u{62}"],
+ ),
+ (
+ "\u{61}\u{1f1e6}\u{1f1e7}\u{200d}\u{1f1e8}\u{62}",
+ &[
+ "\u{61}",
+ "\u{1f1e6}\u{1f1e7}\u{200d}",
+ "\u{1f1e8}",
+ "\u{62}",
+ ],
+ ),
+ (
+ "\u{61}\u{1f1e6}\u{200d}\u{1f1e7}\u{1f1e8}\u{62}",
+ &[
+ "\u{61}",
+ "\u{1f1e6}\u{200d}",
+ "\u{1f1e7}\u{1f1e8}",
+ "\u{62}",
+ ],
+ ),
+ (
+ "\u{61}\u{1f1e6}\u{1f1e7}\u{1f1e8}\u{1f1e9}\u{62}",
+ &[
+ "\u{61}",
+ "\u{1f1e6}\u{1f1e7}",
+ "\u{1f1e8}\u{1f1e9}",
+ "\u{62}",
+ ],
+ ),
+ ("\u{61}\u{200d}", &["\u{61}\u{200d}"]),
+ ("\u{61}\u{308}\u{62}", &["\u{61}\u{308}", "\u{62}"]),
+ (
+ "\u{1f476}\u{1f3ff}\u{1f476}",
+ &["\u{1f476}\u{1f3ff}", "\u{1f476}"],
+ ),
+ (
+ "\u{61}\u{1f3ff}\u{1f476}",
+ &["\u{61}\u{1f3ff}", "\u{1f476}"],
+ ),
+ (
+ "\u{61}\u{1f3ff}\u{1f476}\u{200d}\u{1f6d1}",
+ &["\u{61}\u{1f3ff}", "\u{1f476}\u{200d}\u{1f6d1}"],
+ ),
+ (
+ "\u{1f476}\u{1f3ff}\u{308}\u{200d}\u{1f476}\u{1f3ff}",
+ &["\u{1f476}\u{1f3ff}\u{308}\u{200d}\u{1f476}\u{1f3ff}"],
+ ),
+ (
+ "\u{1f6d1}\u{200d}\u{1f6d1}",
+ &["\u{1f6d1}\u{200d}\u{1f6d1}"],
+ ),
+ ("\u{61}\u{200d}\u{1f6d1}", &["\u{61}\u{200d}", "\u{1f6d1}"]),
+ ("\u{2701}\u{200d}\u{2701}", &["\u{2701}\u{200d}\u{2701}"]),
+ ("\u{61}\u{200d}\u{2701}", &["\u{61}\u{200d}", "\u{2701}"]),
+];
+
+pub const TEST_DIFF: &'static [(
+ &'static str,
+ &'static [&'static str],
+ &'static [&'static str],
+)] = &[
+ ("\u{20}\u{903}", &["\u{20}\u{903}"], &["\u{20}", "\u{903}"]),
+ (
+ "\u{20}\u{308}\u{903}",
+ &["\u{20}\u{308}\u{903}"],
+ &["\u{20}\u{308}", "\u{903}"],
+ ),
+ (
+ "\u{d}\u{308}\u{903}",
+ &["\u{d}", "\u{308}\u{903}"],
+ &["\u{d}", "\u{308}", "\u{903}"],
+ ),
+ (
+ "\u{a}\u{308}\u{903}",
+ &["\u{a}", "\u{308}\u{903}"],
+ &["\u{a}", "\u{308}", "\u{903}"],
+ ),
+ (
+ "\u{1}\u{308}\u{903}",
+ &["\u{1}", "\u{308}\u{903}"],
+ &["\u{1}", "\u{308}", "\u{903}"],
+ ),
+ (
+ "\u{34f}\u{903}",
+ &["\u{34f}\u{903}"],
+ &["\u{34f}", "\u{903}"],
+ ),
+ (
+ "\u{34f}\u{308}\u{903}",
+ &["\u{34f}\u{308}\u{903}"],
+ &["\u{34f}\u{308}", "\u{903}"],
+ ),
+ (
+ "\u{1f1e6}\u{903}",
+ &["\u{1f1e6}\u{903}"],
+ &["\u{1f1e6}", "\u{903}"],
+ ),
+ (
+ "\u{1f1e6}\u{308}\u{903}",
+ &["\u{1f1e6}\u{308}\u{903}"],
+ &["\u{1f1e6}\u{308}", "\u{903}"],
+ ),
+ ("\u{600}\u{20}", &["\u{600}\u{20}"], &["\u{600}", "\u{20}"]),
+ (
+ "\u{600}\u{1f1e6}",
+ &["\u{600}\u{1f1e6}"],
+ &["\u{600}", "\u{1f1e6}"],
+ ),
+ (
+ "\u{600}\u{600}",
+ &["\u{600}\u{600}"],
+ &["\u{600}", "\u{600}"],
+ ),
+ (
+ "\u{600}\u{903}",
+ &["\u{600}\u{903}"],
+ &["\u{600}", "\u{903}"],
+ ),
+ (
+ "\u{600}\u{308}\u{903}",
+ &["\u{600}\u{308}\u{903}"],
+ &["\u{600}\u{308}", "\u{903}"],
+ ),
+ (
+ "\u{600}\u{1100}",
+ &["\u{600}\u{1100}"],
+ &["\u{600}", "\u{1100}"],
+ ),
+ (
+ "\u{600}\u{1160}",
+ &["\u{600}\u{1160}"],
+ &["\u{600}", "\u{1160}"],
+ ),
+ (
+ "\u{600}\u{11a8}",
+ &["\u{600}\u{11a8}"],
+ &["\u{600}", "\u{11a8}"],
+ ),
+ (
+ "\u{600}\u{ac00}",
+ &["\u{600}\u{ac00}"],
+ &["\u{600}", "\u{ac00}"],
+ ),
+ (
+ "\u{600}\u{ac01}",
+ &["\u{600}\u{ac01}"],
+ &["\u{600}", "\u{ac01}"],
+ ),
+ (
+ "\u{600}\u{231a}",
+ &["\u{600}\u{231a}"],
+ &["\u{600}", "\u{231a}"],
+ ),
+ (
+ "\u{600}\u{378}",
+ &["\u{600}\u{378}"],
+ &["\u{600}", "\u{378}"],
+ ),
+ (
+ "\u{903}\u{903}",
+ &["\u{903}\u{903}"],
+ &["\u{903}", "\u{903}"],
+ ),
+ (
+ "\u{903}\u{308}\u{903}",
+ &["\u{903}\u{308}\u{903}"],
+ &["\u{903}\u{308}", "\u{903}"],
+ ),
+ (
+ "\u{1100}\u{903}",
+ &["\u{1100}\u{903}"],
+ &["\u{1100}", "\u{903}"],
+ ),
+ (
+ "\u{1100}\u{308}\u{903}",
+ &["\u{1100}\u{308}\u{903}"],
+ &["\u{1100}\u{308}", "\u{903}"],
+ ),
+ (
+ "\u{1160}\u{903}",
+ &["\u{1160}\u{903}"],
+ &["\u{1160}", "\u{903}"],
+ ),
+ (
+ "\u{1160}\u{308}\u{903}",
+ &["\u{1160}\u{308}\u{903}"],
+ &["\u{1160}\u{308}", "\u{903}"],
+ ),
+ (
+ "\u{11a8}\u{903}",
+ &["\u{11a8}\u{903}"],
+ &["\u{11a8}", "\u{903}"],
+ ),
+ (
+ "\u{11a8}\u{308}\u{903}",
+ &["\u{11a8}\u{308}\u{903}"],
+ &["\u{11a8}\u{308}", "\u{903}"],
+ ),
+ (
+ "\u{ac00}\u{903}",
+ &["\u{ac00}\u{903}"],
+ &["\u{ac00}", "\u{903}"],
+ ),
+ (
+ "\u{ac00}\u{308}\u{903}",
+ &["\u{ac00}\u{308}\u{903}"],
+ &["\u{ac00}\u{308}", "\u{903}"],
+ ),
+ (
+ "\u{ac01}\u{903}",
+ &["\u{ac01}\u{903}"],
+ &["\u{ac01}", "\u{903}"],
+ ),
+ (
+ "\u{ac01}\u{308}\u{903}",
+ &["\u{ac01}\u{308}\u{903}"],
+ &["\u{ac01}\u{308}", "\u{903}"],
+ ),
+ (
+ "\u{231a}\u{903}",
+ &["\u{231a}\u{903}"],
+ &["\u{231a}", "\u{903}"],
+ ),
+ (
+ "\u{231a}\u{308}\u{903}",
+ &["\u{231a}\u{308}\u{903}"],
+ &["\u{231a}\u{308}", "\u{903}"],
+ ),
+ (
+ "\u{300}\u{903}",
+ &["\u{300}\u{903}"],
+ &["\u{300}", "\u{903}"],
+ ),
+ (
+ "\u{300}\u{308}\u{903}",
+ &["\u{300}\u{308}\u{903}"],
+ &["\u{300}\u{308}", "\u{903}"],
+ ),
+ (
+ "\u{200d}\u{903}",
+ &["\u{200d}\u{903}"],
+ &["\u{200d}", "\u{903}"],
+ ),
+ (
+ "\u{200d}\u{308}\u{903}",
+ &["\u{200d}\u{308}\u{903}"],
+ &["\u{200d}\u{308}", "\u{903}"],
+ ),
+ (
+ "\u{378}\u{903}",
+ &["\u{378}\u{903}"],
+ &["\u{378}", "\u{903}"],
+ ),
+ (
+ "\u{378}\u{308}\u{903}",
+ &["\u{378}\u{308}\u{903}"],
+ &["\u{378}\u{308}", "\u{903}"],
+ ),
+ (
+ "\u{61}\u{903}\u{62}",
+ &["\u{61}\u{903}", "\u{62}"],
+ &["\u{61}", "\u{903}", "\u{62}"],
+ ),
+ (
+ "\u{61}\u{600}\u{62}",
+ &["\u{61}", "\u{600}\u{62}"],
+ &["\u{61}", "\u{600}", "\u{62}"],
+ ),
+];
+
+// official Unicode test data
+// http://www.unicode.org/Public/15.0.0/ucd/auxiliary/WordBreakTest.txt
+pub const TEST_WORD: &'static [(&'static str, &'static [&'static str])] = &[
+ ("\u{1}\u{1}", &["\u{1}", "\u{1}"]),
+ ("\u{1}\u{308}\u{1}", &["\u{1}\u{308}", "\u{1}"]),
+ ("\u{1}\u{d}", &["\u{1}", "\u{d}"]),
+ ("\u{1}\u{308}\u{d}", &["\u{1}\u{308}", "\u{d}"]),
+ ("\u{1}\u{a}", &["\u{1}", "\u{a}"]),
+ ("\u{1}\u{308}\u{a}", &["\u{1}\u{308}", "\u{a}"]),
+ ("\u{1}\u{b}", &["\u{1}", "\u{b}"]),
+ ("\u{1}\u{308}\u{b}", &["\u{1}\u{308}", "\u{b}"]),
+ ("\u{1}\u{3031}", &["\u{1}", "\u{3031}"]),
+ ("\u{1}\u{308}\u{3031}", &["\u{1}\u{308}", "\u{3031}"]),
+ ("\u{1}\u{41}", &["\u{1}", "\u{41}"]),
+ ("\u{1}\u{308}\u{41}", &["\u{1}\u{308}", "\u{41}"]),
+ ("\u{1}\u{3a}", &["\u{1}", "\u{3a}"]),
+ ("\u{1}\u{308}\u{3a}", &["\u{1}\u{308}", "\u{3a}"]),
+ ("\u{1}\u{2c}", &["\u{1}", "\u{2c}"]),
+ ("\u{1}\u{308}\u{2c}", &["\u{1}\u{308}", "\u{2c}"]),
+ ("\u{1}\u{2e}", &["\u{1}", "\u{2e}"]),
+ ("\u{1}\u{308}\u{2e}", &["\u{1}\u{308}", "\u{2e}"]),
+ ("\u{1}\u{30}", &["\u{1}", "\u{30}"]),
+ ("\u{1}\u{308}\u{30}", &["\u{1}\u{308}", "\u{30}"]),
+ ("\u{1}\u{5f}", &["\u{1}", "\u{5f}"]),
+ ("\u{1}\u{308}\u{5f}", &["\u{1}\u{308}", "\u{5f}"]),
+ ("\u{1}\u{1f1e6}", &["\u{1}", "\u{1f1e6}"]),
+ ("\u{1}\u{308}\u{1f1e6}", &["\u{1}\u{308}", "\u{1f1e6}"]),
+ ("\u{1}\u{5d0}", &["\u{1}", "\u{5d0}"]),
+ ("\u{1}\u{308}\u{5d0}", &["\u{1}\u{308}", "\u{5d0}"]),
+ ("\u{1}\u{22}", &["\u{1}", "\u{22}"]),
+ ("\u{1}\u{308}\u{22}", &["\u{1}\u{308}", "\u{22}"]),
+ ("\u{1}\u{27}", &["\u{1}", "\u{27}"]),
+ ("\u{1}\u{308}\u{27}", &["\u{1}\u{308}", "\u{27}"]),
+ ("\u{1}\u{231a}", &["\u{1}", "\u{231a}"]),
+ ("\u{1}\u{308}\u{231a}", &["\u{1}\u{308}", "\u{231a}"]),
+ ("\u{1}\u{20}", &["\u{1}", "\u{20}"]),
+ ("\u{1}\u{308}\u{20}", &["\u{1}\u{308}", "\u{20}"]),
+ ("\u{1}\u{ad}", &["\u{1}\u{ad}"]),
+ ("\u{1}\u{308}\u{ad}", &["\u{1}\u{308}\u{ad}"]),
+ ("\u{1}\u{300}", &["\u{1}\u{300}"]),
+ ("\u{1}\u{308}\u{300}", &["\u{1}\u{308}\u{300}"]),
+ ("\u{1}\u{200d}", &["\u{1}\u{200d}"]),
+ ("\u{1}\u{308}\u{200d}", &["\u{1}\u{308}\u{200d}"]),
+ ("\u{1}\u{61}\u{2060}", &["\u{1}", "\u{61}\u{2060}"]),
+ (
+ "\u{1}\u{308}\u{61}\u{2060}",
+ &["\u{1}\u{308}", "\u{61}\u{2060}"],
+ ),
+ ("\u{1}\u{61}\u{3a}", &["\u{1}", "\u{61}", "\u{3a}"]),
+ (
+ "\u{1}\u{308}\u{61}\u{3a}",
+ &["\u{1}\u{308}", "\u{61}", "\u{3a}"],
+ ),
+ ("\u{1}\u{61}\u{27}", &["\u{1}", "\u{61}", "\u{27}"]),
+ (
+ "\u{1}\u{308}\u{61}\u{27}",
+ &["\u{1}\u{308}", "\u{61}", "\u{27}"],
+ ),
+ (
+ "\u{1}\u{61}\u{27}\u{2060}",
+ &["\u{1}", "\u{61}", "\u{27}\u{2060}"],
+ ),
+ (
+ "\u{1}\u{308}\u{61}\u{27}\u{2060}",
+ &["\u{1}\u{308}", "\u{61}", "\u{27}\u{2060}"],
+ ),
+ ("\u{1}\u{61}\u{2c}", &["\u{1}", "\u{61}", "\u{2c}"]),
+ (
+ "\u{1}\u{308}\u{61}\u{2c}",
+ &["\u{1}\u{308}", "\u{61}", "\u{2c}"],
+ ),
+ ("\u{1}\u{31}\u{3a}", &["\u{1}", "\u{31}", "\u{3a}"]),
+ (
+ "\u{1}\u{308}\u{31}\u{3a}",
+ &["\u{1}\u{308}", "\u{31}", "\u{3a}"],
+ ),
+ ("\u{1}\u{31}\u{27}", &["\u{1}", "\u{31}", "\u{27}"]),
+ (
+ "\u{1}\u{308}\u{31}\u{27}",
+ &["\u{1}\u{308}", "\u{31}", "\u{27}"],
+ ),
+ ("\u{1}\u{31}\u{2c}", &["\u{1}", "\u{31}", "\u{2c}"]),
+ (
+ "\u{1}\u{308}\u{31}\u{2c}",
+ &["\u{1}\u{308}", "\u{31}", "\u{2c}"],
+ ),
+ (
+ "\u{1}\u{31}\u{2e}\u{2060}",
+ &["\u{1}", "\u{31}", "\u{2e}\u{2060}"],
+ ),
+ (
+ "\u{1}\u{308}\u{31}\u{2e}\u{2060}",
+ &["\u{1}\u{308}", "\u{31}", "\u{2e}\u{2060}"],
+ ),
+ ("\u{d}\u{1}", &["\u{d}", "\u{1}"]),
+ ("\u{d}\u{308}\u{1}", &["\u{d}", "\u{308}", "\u{1}"]),
+ ("\u{d}\u{d}", &["\u{d}", "\u{d}"]),
+ ("\u{d}\u{308}\u{d}", &["\u{d}", "\u{308}", "\u{d}"]),
+ ("\u{d}\u{a}", &["\u{d}\u{a}"]),
+ ("\u{d}\u{308}\u{a}", &["\u{d}", "\u{308}", "\u{a}"]),
+ ("\u{d}\u{b}", &["\u{d}", "\u{b}"]),
+ ("\u{d}\u{308}\u{b}", &["\u{d}", "\u{308}", "\u{b}"]),
+ ("\u{d}\u{3031}", &["\u{d}", "\u{3031}"]),
+ ("\u{d}\u{308}\u{3031}", &["\u{d}", "\u{308}", "\u{3031}"]),
+ ("\u{d}\u{41}", &["\u{d}", "\u{41}"]),
+ ("\u{d}\u{308}\u{41}", &["\u{d}", "\u{308}", "\u{41}"]),
+ ("\u{d}\u{3a}", &["\u{d}", "\u{3a}"]),
+ ("\u{d}\u{308}\u{3a}", &["\u{d}", "\u{308}", "\u{3a}"]),
+ ("\u{d}\u{2c}", &["\u{d}", "\u{2c}"]),
+ ("\u{d}\u{308}\u{2c}", &["\u{d}", "\u{308}", "\u{2c}"]),
+ ("\u{d}\u{2e}", &["\u{d}", "\u{2e}"]),
+ ("\u{d}\u{308}\u{2e}", &["\u{d}", "\u{308}", "\u{2e}"]),
+ ("\u{d}\u{30}", &["\u{d}", "\u{30}"]),
+ ("\u{d}\u{308}\u{30}", &["\u{d}", "\u{308}", "\u{30}"]),
+ ("\u{d}\u{5f}", &["\u{d}", "\u{5f}"]),
+ ("\u{d}\u{308}\u{5f}", &["\u{d}", "\u{308}", "\u{5f}"]),
+ ("\u{d}\u{1f1e6}", &["\u{d}", "\u{1f1e6}"]),
+ ("\u{d}\u{308}\u{1f1e6}", &["\u{d}", "\u{308}", "\u{1f1e6}"]),
+ ("\u{d}\u{5d0}", &["\u{d}", "\u{5d0}"]),
+ ("\u{d}\u{308}\u{5d0}", &["\u{d}", "\u{308}", "\u{5d0}"]),
+ ("\u{d}\u{22}", &["\u{d}", "\u{22}"]),
+ ("\u{d}\u{308}\u{22}", &["\u{d}", "\u{308}", "\u{22}"]),
+ ("\u{d}\u{27}", &["\u{d}", "\u{27}"]),
+ ("\u{d}\u{308}\u{27}", &["\u{d}", "\u{308}", "\u{27}"]),
+ ("\u{d}\u{231a}", &["\u{d}", "\u{231a}"]),
+ ("\u{d}\u{308}\u{231a}", &["\u{d}", "\u{308}", "\u{231a}"]),
+ ("\u{d}\u{20}", &["\u{d}", "\u{20}"]),
+ ("\u{d}\u{308}\u{20}", &["\u{d}", "\u{308}", "\u{20}"]),
+ ("\u{d}\u{ad}", &["\u{d}", "\u{ad}"]),
+ ("\u{d}\u{308}\u{ad}", &["\u{d}", "\u{308}\u{ad}"]),
+ ("\u{d}\u{300}", &["\u{d}", "\u{300}"]),
+ ("\u{d}\u{308}\u{300}", &["\u{d}", "\u{308}\u{300}"]),
+ ("\u{d}\u{200d}", &["\u{d}", "\u{200d}"]),
+ ("\u{d}\u{308}\u{200d}", &["\u{d}", "\u{308}\u{200d}"]),
+ ("\u{d}\u{61}\u{2060}", &["\u{d}", "\u{61}\u{2060}"]),
+ (
+ "\u{d}\u{308}\u{61}\u{2060}",
+ &["\u{d}", "\u{308}", "\u{61}\u{2060}"],
+ ),
+ ("\u{d}\u{61}\u{3a}", &["\u{d}", "\u{61}", "\u{3a}"]),
+ (
+ "\u{d}\u{308}\u{61}\u{3a}",
+ &["\u{d}", "\u{308}", "\u{61}", "\u{3a}"],
+ ),
+ ("\u{d}\u{61}\u{27}", &["\u{d}", "\u{61}", "\u{27}"]),
+ (
+ "\u{d}\u{308}\u{61}\u{27}",
+ &["\u{d}", "\u{308}", "\u{61}", "\u{27}"],
+ ),
+ (
+ "\u{d}\u{61}\u{27}\u{2060}",
+ &["\u{d}", "\u{61}", "\u{27}\u{2060}"],
+ ),
+ (
+ "\u{d}\u{308}\u{61}\u{27}\u{2060}",
+ &["\u{d}", "\u{308}", "\u{61}", "\u{27}\u{2060}"],
+ ),
+ ("\u{d}\u{61}\u{2c}", &["\u{d}", "\u{61}", "\u{2c}"]),
+ (
+ "\u{d}\u{308}\u{61}\u{2c}",
+ &["\u{d}", "\u{308}", "\u{61}", "\u{2c}"],
+ ),
+ ("\u{d}\u{31}\u{3a}", &["\u{d}", "\u{31}", "\u{3a}"]),
+ (
+ "\u{d}\u{308}\u{31}\u{3a}",
+ &["\u{d}", "\u{308}", "\u{31}", "\u{3a}"],
+ ),
+ ("\u{d}\u{31}\u{27}", &["\u{d}", "\u{31}", "\u{27}"]),
+ (
+ "\u{d}\u{308}\u{31}\u{27}",
+ &["\u{d}", "\u{308}", "\u{31}", "\u{27}"],
+ ),
+ ("\u{d}\u{31}\u{2c}", &["\u{d}", "\u{31}", "\u{2c}"]),
+ (
+ "\u{d}\u{308}\u{31}\u{2c}",
+ &["\u{d}", "\u{308}", "\u{31}", "\u{2c}"],
+ ),
+ (
+ "\u{d}\u{31}\u{2e}\u{2060}",
+ &["\u{d}", "\u{31}", "\u{2e}\u{2060}"],
+ ),
+ (
+ "\u{d}\u{308}\u{31}\u{2e}\u{2060}",
+ &["\u{d}", "\u{308}", "\u{31}", "\u{2e}\u{2060}"],
+ ),
+ ("\u{a}\u{1}", &["\u{a}", "\u{1}"]),
+ ("\u{a}\u{308}\u{1}", &["\u{a}", "\u{308}", "\u{1}"]),
+ ("\u{a}\u{d}", &["\u{a}", "\u{d}"]),
+ ("\u{a}\u{308}\u{d}", &["\u{a}", "\u{308}", "\u{d}"]),
+ ("\u{a}\u{a}", &["\u{a}", "\u{a}"]),
+ ("\u{a}\u{308}\u{a}", &["\u{a}", "\u{308}", "\u{a}"]),
+ ("\u{a}\u{b}", &["\u{a}", "\u{b}"]),
+ ("\u{a}\u{308}\u{b}", &["\u{a}", "\u{308}", "\u{b}"]),
+ ("\u{a}\u{3031}", &["\u{a}", "\u{3031}"]),
+ ("\u{a}\u{308}\u{3031}", &["\u{a}", "\u{308}", "\u{3031}"]),
+ ("\u{a}\u{41}", &["\u{a}", "\u{41}"]),
+ ("\u{a}\u{308}\u{41}", &["\u{a}", "\u{308}", "\u{41}"]),
+ ("\u{a}\u{3a}", &["\u{a}", "\u{3a}"]),
+ ("\u{a}\u{308}\u{3a}", &["\u{a}", "\u{308}", "\u{3a}"]),
+ ("\u{a}\u{2c}", &["\u{a}", "\u{2c}"]),
+ ("\u{a}\u{308}\u{2c}", &["\u{a}", "\u{308}", "\u{2c}"]),
+ ("\u{a}\u{2e}", &["\u{a}", "\u{2e}"]),
+ ("\u{a}\u{308}\u{2e}", &["\u{a}", "\u{308}", "\u{2e}"]),
+ ("\u{a}\u{30}", &["\u{a}", "\u{30}"]),
+ ("\u{a}\u{308}\u{30}", &["\u{a}", "\u{308}", "\u{30}"]),
+ ("\u{a}\u{5f}", &["\u{a}", "\u{5f}"]),
+ ("\u{a}\u{308}\u{5f}", &["\u{a}", "\u{308}", "\u{5f}"]),
+ ("\u{a}\u{1f1e6}", &["\u{a}", "\u{1f1e6}"]),
+ ("\u{a}\u{308}\u{1f1e6}", &["\u{a}", "\u{308}", "\u{1f1e6}"]),
+ ("\u{a}\u{5d0}", &["\u{a}", "\u{5d0}"]),
+ ("\u{a}\u{308}\u{5d0}", &["\u{a}", "\u{308}", "\u{5d0}"]),
+ ("\u{a}\u{22}", &["\u{a}", "\u{22}"]),
+ ("\u{a}\u{308}\u{22}", &["\u{a}", "\u{308}", "\u{22}"]),
+ ("\u{a}\u{27}", &["\u{a}", "\u{27}"]),
+ ("\u{a}\u{308}\u{27}", &["\u{a}", "\u{308}", "\u{27}"]),
+ ("\u{a}\u{231a}", &["\u{a}", "\u{231a}"]),
+ ("\u{a}\u{308}\u{231a}", &["\u{a}", "\u{308}", "\u{231a}"]),
+ ("\u{a}\u{20}", &["\u{a}", "\u{20}"]),
+ ("\u{a}\u{308}\u{20}", &["\u{a}", "\u{308}", "\u{20}"]),
+ ("\u{a}\u{ad}", &["\u{a}", "\u{ad}"]),
+ ("\u{a}\u{308}\u{ad}", &["\u{a}", "\u{308}\u{ad}"]),
+ ("\u{a}\u{300}", &["\u{a}", "\u{300}"]),
+ ("\u{a}\u{308}\u{300}", &["\u{a}", "\u{308}\u{300}"]),
+ ("\u{a}\u{200d}", &["\u{a}", "\u{200d}"]),
+ ("\u{a}\u{308}\u{200d}", &["\u{a}", "\u{308}\u{200d}"]),
+ ("\u{a}\u{61}\u{2060}", &["\u{a}", "\u{61}\u{2060}"]),
+ (
+ "\u{a}\u{308}\u{61}\u{2060}",
+ &["\u{a}", "\u{308}", "\u{61}\u{2060}"],
+ ),
+ ("\u{a}\u{61}\u{3a}", &["\u{a}", "\u{61}", "\u{3a}"]),
+ (
+ "\u{a}\u{308}\u{61}\u{3a}",
+ &["\u{a}", "\u{308}", "\u{61}", "\u{3a}"],
+ ),
+ ("\u{a}\u{61}\u{27}", &["\u{a}", "\u{61}", "\u{27}"]),
+ (
+ "\u{a}\u{308}\u{61}\u{27}",
+ &["\u{a}", "\u{308}", "\u{61}", "\u{27}"],
+ ),
+ (
+ "\u{a}\u{61}\u{27}\u{2060}",
+ &["\u{a}", "\u{61}", "\u{27}\u{2060}"],
+ ),
+ (
+ "\u{a}\u{308}\u{61}\u{27}\u{2060}",
+ &["\u{a}", "\u{308}", "\u{61}", "\u{27}\u{2060}"],
+ ),
+ ("\u{a}\u{61}\u{2c}", &["\u{a}", "\u{61}", "\u{2c}"]),
+ (
+ "\u{a}\u{308}\u{61}\u{2c}",
+ &["\u{a}", "\u{308}", "\u{61}", "\u{2c}"],
+ ),
+ ("\u{a}\u{31}\u{3a}", &["\u{a}", "\u{31}", "\u{3a}"]),
+ (
+ "\u{a}\u{308}\u{31}\u{3a}",
+ &["\u{a}", "\u{308}", "\u{31}", "\u{3a}"],
+ ),
+ ("\u{a}\u{31}\u{27}", &["\u{a}", "\u{31}", "\u{27}"]),
+ (
+ "\u{a}\u{308}\u{31}\u{27}",
+ &["\u{a}", "\u{308}", "\u{31}", "\u{27}"],
+ ),
+ ("\u{a}\u{31}\u{2c}", &["\u{a}", "\u{31}", "\u{2c}"]),
+ (
+ "\u{a}\u{308}\u{31}\u{2c}",
+ &["\u{a}", "\u{308}", "\u{31}", "\u{2c}"],
+ ),
+ (
+ "\u{a}\u{31}\u{2e}\u{2060}",
+ &["\u{a}", "\u{31}", "\u{2e}\u{2060}"],
+ ),
+ (
+ "\u{a}\u{308}\u{31}\u{2e}\u{2060}",
+ &["\u{a}", "\u{308}", "\u{31}", "\u{2e}\u{2060}"],
+ ),
+ ("\u{b}\u{1}", &["\u{b}", "\u{1}"]),
+ ("\u{b}\u{308}\u{1}", &["\u{b}", "\u{308}", "\u{1}"]),
+ ("\u{b}\u{d}", &["\u{b}", "\u{d}"]),
+ ("\u{b}\u{308}\u{d}", &["\u{b}", "\u{308}", "\u{d}"]),
+ ("\u{b}\u{a}", &["\u{b}", "\u{a}"]),
+ ("\u{b}\u{308}\u{a}", &["\u{b}", "\u{308}", "\u{a}"]),
+ ("\u{b}\u{b}", &["\u{b}", "\u{b}"]),
+ ("\u{b}\u{308}\u{b}", &["\u{b}", "\u{308}", "\u{b}"]),
+ ("\u{b}\u{3031}", &["\u{b}", "\u{3031}"]),
+ ("\u{b}\u{308}\u{3031}", &["\u{b}", "\u{308}", "\u{3031}"]),
+ ("\u{b}\u{41}", &["\u{b}", "\u{41}"]),
+ ("\u{b}\u{308}\u{41}", &["\u{b}", "\u{308}", "\u{41}"]),
+ ("\u{b}\u{3a}", &["\u{b}", "\u{3a}"]),
+ ("\u{b}\u{308}\u{3a}", &["\u{b}", "\u{308}", "\u{3a}"]),
+ ("\u{b}\u{2c}", &["\u{b}", "\u{2c}"]),
+ ("\u{b}\u{308}\u{2c}", &["\u{b}", "\u{308}", "\u{2c}"]),
+ ("\u{b}\u{2e}", &["\u{b}", "\u{2e}"]),
+ ("\u{b}\u{308}\u{2e}", &["\u{b}", "\u{308}", "\u{2e}"]),
+ ("\u{b}\u{30}", &["\u{b}", "\u{30}"]),
+ ("\u{b}\u{308}\u{30}", &["\u{b}", "\u{308}", "\u{30}"]),
+ ("\u{b}\u{5f}", &["\u{b}", "\u{5f}"]),
+ ("\u{b}\u{308}\u{5f}", &["\u{b}", "\u{308}", "\u{5f}"]),
+ ("\u{b}\u{1f1e6}", &["\u{b}", "\u{1f1e6}"]),
+ ("\u{b}\u{308}\u{1f1e6}", &["\u{b}", "\u{308}", "\u{1f1e6}"]),
+ ("\u{b}\u{5d0}", &["\u{b}", "\u{5d0}"]),
+ ("\u{b}\u{308}\u{5d0}", &["\u{b}", "\u{308}", "\u{5d0}"]),
+ ("\u{b}\u{22}", &["\u{b}", "\u{22}"]),
+ ("\u{b}\u{308}\u{22}", &["\u{b}", "\u{308}", "\u{22}"]),
+ ("\u{b}\u{27}", &["\u{b}", "\u{27}"]),
+ ("\u{b}\u{308}\u{27}", &["\u{b}", "\u{308}", "\u{27}"]),
+ ("\u{b}\u{231a}", &["\u{b}", "\u{231a}"]),
+ ("\u{b}\u{308}\u{231a}", &["\u{b}", "\u{308}", "\u{231a}"]),
+ ("\u{b}\u{20}", &["\u{b}", "\u{20}"]),
+ ("\u{b}\u{308}\u{20}", &["\u{b}", "\u{308}", "\u{20}"]),
+ ("\u{b}\u{ad}", &["\u{b}", "\u{ad}"]),
+ ("\u{b}\u{308}\u{ad}", &["\u{b}", "\u{308}\u{ad}"]),
+ ("\u{b}\u{300}", &["\u{b}", "\u{300}"]),
+ ("\u{b}\u{308}\u{300}", &["\u{b}", "\u{308}\u{300}"]),
+ ("\u{b}\u{200d}", &["\u{b}", "\u{200d}"]),
+ ("\u{b}\u{308}\u{200d}", &["\u{b}", "\u{308}\u{200d}"]),
+ ("\u{b}\u{61}\u{2060}", &["\u{b}", "\u{61}\u{2060}"]),
+ (
+ "\u{b}\u{308}\u{61}\u{2060}",
+ &["\u{b}", "\u{308}", "\u{61}\u{2060}"],
+ ),
+ ("\u{b}\u{61}\u{3a}", &["\u{b}", "\u{61}", "\u{3a}"]),
+ (
+ "\u{b}\u{308}\u{61}\u{3a}",
+ &["\u{b}", "\u{308}", "\u{61}", "\u{3a}"],
+ ),
+ ("\u{b}\u{61}\u{27}", &["\u{b}", "\u{61}", "\u{27}"]),
+ (
+ "\u{b}\u{308}\u{61}\u{27}",
+ &["\u{b}", "\u{308}", "\u{61}", "\u{27}"],
+ ),
+ (
+ "\u{b}\u{61}\u{27}\u{2060}",
+ &["\u{b}", "\u{61}", "\u{27}\u{2060}"],
+ ),
+ (
+ "\u{b}\u{308}\u{61}\u{27}\u{2060}",
+ &["\u{b}", "\u{308}", "\u{61}", "\u{27}\u{2060}"],
+ ),
+ ("\u{b}\u{61}\u{2c}", &["\u{b}", "\u{61}", "\u{2c}"]),
+ (
+ "\u{b}\u{308}\u{61}\u{2c}",
+ &["\u{b}", "\u{308}", "\u{61}", "\u{2c}"],
+ ),
+ ("\u{b}\u{31}\u{3a}", &["\u{b}", "\u{31}", "\u{3a}"]),
+ (
+ "\u{b}\u{308}\u{31}\u{3a}",
+ &["\u{b}", "\u{308}", "\u{31}", "\u{3a}"],
+ ),
+ ("\u{b}\u{31}\u{27}", &["\u{b}", "\u{31}", "\u{27}"]),
+ (
+ "\u{b}\u{308}\u{31}\u{27}",
+ &["\u{b}", "\u{308}", "\u{31}", "\u{27}"],
+ ),
+ ("\u{b}\u{31}\u{2c}", &["\u{b}", "\u{31}", "\u{2c}"]),
+ (
+ "\u{b}\u{308}\u{31}\u{2c}",
+ &["\u{b}", "\u{308}", "\u{31}", "\u{2c}"],
+ ),
+ (
+ "\u{b}\u{31}\u{2e}\u{2060}",
+ &["\u{b}", "\u{31}", "\u{2e}\u{2060}"],
+ ),
+ (
+ "\u{b}\u{308}\u{31}\u{2e}\u{2060}",
+ &["\u{b}", "\u{308}", "\u{31}", "\u{2e}\u{2060}"],
+ ),
+ ("\u{3031}\u{1}", &["\u{3031}", "\u{1}"]),
+ ("\u{3031}\u{308}\u{1}", &["\u{3031}\u{308}", "\u{1}"]),
+ ("\u{3031}\u{d}", &["\u{3031}", "\u{d}"]),
+ ("\u{3031}\u{308}\u{d}", &["\u{3031}\u{308}", "\u{d}"]),
+ ("\u{3031}\u{a}", &["\u{3031}", "\u{a}"]),
+ ("\u{3031}\u{308}\u{a}", &["\u{3031}\u{308}", "\u{a}"]),
+ ("\u{3031}\u{b}", &["\u{3031}", "\u{b}"]),
+ ("\u{3031}\u{308}\u{b}", &["\u{3031}\u{308}", "\u{b}"]),
+ ("\u{3031}\u{3031}", &["\u{3031}\u{3031}"]),
+ ("\u{3031}\u{308}\u{3031}", &["\u{3031}\u{308}\u{3031}"]),
+ ("\u{3031}\u{41}", &["\u{3031}", "\u{41}"]),
+ ("\u{3031}\u{308}\u{41}", &["\u{3031}\u{308}", "\u{41}"]),
+ ("\u{3031}\u{3a}", &["\u{3031}", "\u{3a}"]),
+ ("\u{3031}\u{308}\u{3a}", &["\u{3031}\u{308}", "\u{3a}"]),
+ ("\u{3031}\u{2c}", &["\u{3031}", "\u{2c}"]),
+ ("\u{3031}\u{308}\u{2c}", &["\u{3031}\u{308}", "\u{2c}"]),
+ ("\u{3031}\u{2e}", &["\u{3031}", "\u{2e}"]),
+ ("\u{3031}\u{308}\u{2e}", &["\u{3031}\u{308}", "\u{2e}"]),
+ ("\u{3031}\u{30}", &["\u{3031}", "\u{30}"]),
+ ("\u{3031}\u{308}\u{30}", &["\u{3031}\u{308}", "\u{30}"]),
+ ("\u{3031}\u{5f}", &["\u{3031}\u{5f}"]),
+ ("\u{3031}\u{308}\u{5f}", &["\u{3031}\u{308}\u{5f}"]),
+ ("\u{3031}\u{1f1e6}", &["\u{3031}", "\u{1f1e6}"]),
+ (
+ "\u{3031}\u{308}\u{1f1e6}",
+ &["\u{3031}\u{308}", "\u{1f1e6}"],
+ ),
+ ("\u{3031}\u{5d0}", &["\u{3031}", "\u{5d0}"]),
+ ("\u{3031}\u{308}\u{5d0}", &["\u{3031}\u{308}", "\u{5d0}"]),
+ ("\u{3031}\u{22}", &["\u{3031}", "\u{22}"]),
+ ("\u{3031}\u{308}\u{22}", &["\u{3031}\u{308}", "\u{22}"]),
+ ("\u{3031}\u{27}", &["\u{3031}", "\u{27}"]),
+ ("\u{3031}\u{308}\u{27}", &["\u{3031}\u{308}", "\u{27}"]),
+ ("\u{3031}\u{231a}", &["\u{3031}", "\u{231a}"]),
+ ("\u{3031}\u{308}\u{231a}", &["\u{3031}\u{308}", "\u{231a}"]),
+ ("\u{3031}\u{20}", &["\u{3031}", "\u{20}"]),
+ ("\u{3031}\u{308}\u{20}", &["\u{3031}\u{308}", "\u{20}"]),
+ ("\u{3031}\u{ad}", &["\u{3031}\u{ad}"]),
+ ("\u{3031}\u{308}\u{ad}", &["\u{3031}\u{308}\u{ad}"]),
+ ("\u{3031}\u{300}", &["\u{3031}\u{300}"]),
+ ("\u{3031}\u{308}\u{300}", &["\u{3031}\u{308}\u{300}"]),
+ ("\u{3031}\u{200d}", &["\u{3031}\u{200d}"]),
+ ("\u{3031}\u{308}\u{200d}", &["\u{3031}\u{308}\u{200d}"]),
+ ("\u{3031}\u{61}\u{2060}", &["\u{3031}", "\u{61}\u{2060}"]),
+ (
+ "\u{3031}\u{308}\u{61}\u{2060}",
+ &["\u{3031}\u{308}", "\u{61}\u{2060}"],
+ ),
+ ("\u{3031}\u{61}\u{3a}", &["\u{3031}", "\u{61}", "\u{3a}"]),
+ (
+ "\u{3031}\u{308}\u{61}\u{3a}",
+ &["\u{3031}\u{308}", "\u{61}", "\u{3a}"],
+ ),
+ ("\u{3031}\u{61}\u{27}", &["\u{3031}", "\u{61}", "\u{27}"]),
+ (
+ "\u{3031}\u{308}\u{61}\u{27}",
+ &["\u{3031}\u{308}", "\u{61}", "\u{27}"],
+ ),
+ (
+ "\u{3031}\u{61}\u{27}\u{2060}",
+ &["\u{3031}", "\u{61}", "\u{27}\u{2060}"],
+ ),
+ (
+ "\u{3031}\u{308}\u{61}\u{27}\u{2060}",
+ &["\u{3031}\u{308}", "\u{61}", "\u{27}\u{2060}"],
+ ),
+ ("\u{3031}\u{61}\u{2c}", &["\u{3031}", "\u{61}", "\u{2c}"]),
+ (
+ "\u{3031}\u{308}\u{61}\u{2c}",
+ &["\u{3031}\u{308}", "\u{61}", "\u{2c}"],
+ ),
+ ("\u{3031}\u{31}\u{3a}", &["\u{3031}", "\u{31}", "\u{3a}"]),
+ (
+ "\u{3031}\u{308}\u{31}\u{3a}",
+ &["\u{3031}\u{308}", "\u{31}", "\u{3a}"],
+ ),
+ ("\u{3031}\u{31}\u{27}", &["\u{3031}", "\u{31}", "\u{27}"]),
+ (
+ "\u{3031}\u{308}\u{31}\u{27}",
+ &["\u{3031}\u{308}", "\u{31}", "\u{27}"],
+ ),
+ ("\u{3031}\u{31}\u{2c}", &["\u{3031}", "\u{31}", "\u{2c}"]),
+ (
+ "\u{3031}\u{308}\u{31}\u{2c}",
+ &["\u{3031}\u{308}", "\u{31}", "\u{2c}"],
+ ),
+ (
+ "\u{3031}\u{31}\u{2e}\u{2060}",
+ &["\u{3031}", "\u{31}", "\u{2e}\u{2060}"],
+ ),
+ (
+ "\u{3031}\u{308}\u{31}\u{2e}\u{2060}",
+ &["\u{3031}\u{308}", "\u{31}", "\u{2e}\u{2060}"],
+ ),
+ ("\u{41}\u{1}", &["\u{41}", "\u{1}"]),
+ ("\u{41}\u{308}\u{1}", &["\u{41}\u{308}", "\u{1}"]),
+ ("\u{41}\u{d}", &["\u{41}", "\u{d}"]),
+ ("\u{41}\u{308}\u{d}", &["\u{41}\u{308}", "\u{d}"]),
+ ("\u{41}\u{a}", &["\u{41}", "\u{a}"]),
+ ("\u{41}\u{308}\u{a}", &["\u{41}\u{308}", "\u{a}"]),
+ ("\u{41}\u{b}", &["\u{41}", "\u{b}"]),
+ ("\u{41}\u{308}\u{b}", &["\u{41}\u{308}", "\u{b}"]),
+ ("\u{41}\u{3031}", &["\u{41}", "\u{3031}"]),
+ ("\u{41}\u{308}\u{3031}", &["\u{41}\u{308}", "\u{3031}"]),
+ ("\u{41}\u{41}", &["\u{41}\u{41}"]),
+ ("\u{41}\u{308}\u{41}", &["\u{41}\u{308}\u{41}"]),
+ ("\u{41}\u{3a}", &["\u{41}", "\u{3a}"]),
+ ("\u{41}\u{308}\u{3a}", &["\u{41}\u{308}", "\u{3a}"]),
+ ("\u{41}\u{2c}", &["\u{41}", "\u{2c}"]),
+ ("\u{41}\u{308}\u{2c}", &["\u{41}\u{308}", "\u{2c}"]),
+ ("\u{41}\u{2e}", &["\u{41}", "\u{2e}"]),
+ ("\u{41}\u{308}\u{2e}", &["\u{41}\u{308}", "\u{2e}"]),
+ ("\u{41}\u{30}", &["\u{41}\u{30}"]),
+ ("\u{41}\u{308}\u{30}", &["\u{41}\u{308}\u{30}"]),
+ ("\u{41}\u{5f}", &["\u{41}\u{5f}"]),
+ ("\u{41}\u{308}\u{5f}", &["\u{41}\u{308}\u{5f}"]),
+ ("\u{41}\u{1f1e6}", &["\u{41}", "\u{1f1e6}"]),
+ ("\u{41}\u{308}\u{1f1e6}", &["\u{41}\u{308}", "\u{1f1e6}"]),
+ ("\u{41}\u{5d0}", &["\u{41}\u{5d0}"]),
+ ("\u{41}\u{308}\u{5d0}", &["\u{41}\u{308}\u{5d0}"]),
+ ("\u{41}\u{22}", &["\u{41}", "\u{22}"]),
+ ("\u{41}\u{308}\u{22}", &["\u{41}\u{308}", "\u{22}"]),
+ ("\u{41}\u{27}", &["\u{41}", "\u{27}"]),
+ ("\u{41}\u{308}\u{27}", &["\u{41}\u{308}", "\u{27}"]),
+ ("\u{41}\u{231a}", &["\u{41}", "\u{231a}"]),
+ ("\u{41}\u{308}\u{231a}", &["\u{41}\u{308}", "\u{231a}"]),
+ ("\u{41}\u{20}", &["\u{41}", "\u{20}"]),
+ ("\u{41}\u{308}\u{20}", &["\u{41}\u{308}", "\u{20}"]),
+ ("\u{41}\u{ad}", &["\u{41}\u{ad}"]),
+ ("\u{41}\u{308}\u{ad}", &["\u{41}\u{308}\u{ad}"]),
+ ("\u{41}\u{300}", &["\u{41}\u{300}"]),
+ ("\u{41}\u{308}\u{300}", &["\u{41}\u{308}\u{300}"]),
+ ("\u{41}\u{200d}", &["\u{41}\u{200d}"]),
+ ("\u{41}\u{308}\u{200d}", &["\u{41}\u{308}\u{200d}"]),
+ ("\u{41}\u{61}\u{2060}", &["\u{41}\u{61}\u{2060}"]),
+ (
+ "\u{41}\u{308}\u{61}\u{2060}",
+ &["\u{41}\u{308}\u{61}\u{2060}"],
+ ),
+ ("\u{41}\u{61}\u{3a}", &["\u{41}\u{61}", "\u{3a}"]),
+ (
+ "\u{41}\u{308}\u{61}\u{3a}",
+ &["\u{41}\u{308}\u{61}", "\u{3a}"],
+ ),
+ ("\u{41}\u{61}\u{27}", &["\u{41}\u{61}", "\u{27}"]),
+ (
+ "\u{41}\u{308}\u{61}\u{27}",
+ &["\u{41}\u{308}\u{61}", "\u{27}"],
+ ),
+ (
+ "\u{41}\u{61}\u{27}\u{2060}",
+ &["\u{41}\u{61}", "\u{27}\u{2060}"],
+ ),
+ (
+ "\u{41}\u{308}\u{61}\u{27}\u{2060}",
+ &["\u{41}\u{308}\u{61}", "\u{27}\u{2060}"],
+ ),
+ ("\u{41}\u{61}\u{2c}", &["\u{41}\u{61}", "\u{2c}"]),
+ (
+ "\u{41}\u{308}\u{61}\u{2c}",
+ &["\u{41}\u{308}\u{61}", "\u{2c}"],
+ ),
+ ("\u{41}\u{31}\u{3a}", &["\u{41}\u{31}", "\u{3a}"]),
+ (
+ "\u{41}\u{308}\u{31}\u{3a}",
+ &["\u{41}\u{308}\u{31}", "\u{3a}"],
+ ),
+ ("\u{41}\u{31}\u{27}", &["\u{41}\u{31}", "\u{27}"]),
+ (
+ "\u{41}\u{308}\u{31}\u{27}",
+ &["\u{41}\u{308}\u{31}", "\u{27}"],
+ ),
+ ("\u{41}\u{31}\u{2c}", &["\u{41}\u{31}", "\u{2c}"]),
+ (
+ "\u{41}\u{308}\u{31}\u{2c}",
+ &["\u{41}\u{308}\u{31}", "\u{2c}"],
+ ),
+ (
+ "\u{41}\u{31}\u{2e}\u{2060}",
+ &["\u{41}\u{31}", "\u{2e}\u{2060}"],
+ ),
+ (
+ "\u{41}\u{308}\u{31}\u{2e}\u{2060}",
+ &["\u{41}\u{308}\u{31}", "\u{2e}\u{2060}"],
+ ),
+ ("\u{3a}\u{1}", &["\u{3a}", "\u{1}"]),
+ ("\u{3a}\u{308}\u{1}", &["\u{3a}\u{308}", "\u{1}"]),
+ ("\u{3a}\u{d}", &["\u{3a}", "\u{d}"]),
+ ("\u{3a}\u{308}\u{d}", &["\u{3a}\u{308}", "\u{d}"]),
+ ("\u{3a}\u{a}", &["\u{3a}", "\u{a}"]),
+ ("\u{3a}\u{308}\u{a}", &["\u{3a}\u{308}", "\u{a}"]),
+ ("\u{3a}\u{b}", &["\u{3a}", "\u{b}"]),
+ ("\u{3a}\u{308}\u{b}", &["\u{3a}\u{308}", "\u{b}"]),
+ ("\u{3a}\u{3031}", &["\u{3a}", "\u{3031}"]),
+ ("\u{3a}\u{308}\u{3031}", &["\u{3a}\u{308}", "\u{3031}"]),
+ ("\u{3a}\u{41}", &["\u{3a}", "\u{41}"]),
+ ("\u{3a}\u{308}\u{41}", &["\u{3a}\u{308}", "\u{41}"]),
+ ("\u{3a}\u{3a}", &["\u{3a}", "\u{3a}"]),
+ ("\u{3a}\u{308}\u{3a}", &["\u{3a}\u{308}", "\u{3a}"]),
+ ("\u{3a}\u{2c}", &["\u{3a}", "\u{2c}"]),
+ ("\u{3a}\u{308}\u{2c}", &["\u{3a}\u{308}", "\u{2c}"]),
+ ("\u{3a}\u{2e}", &["\u{3a}", "\u{2e}"]),
+ ("\u{3a}\u{308}\u{2e}", &["\u{3a}\u{308}", "\u{2e}"]),
+ ("\u{3a}\u{30}", &["\u{3a}", "\u{30}"]),
+ ("\u{3a}\u{308}\u{30}", &["\u{3a}\u{308}", "\u{30}"]),
+ ("\u{3a}\u{5f}", &["\u{3a}", "\u{5f}"]),
+ ("\u{3a}\u{308}\u{5f}", &["\u{3a}\u{308}", "\u{5f}"]),
+ ("\u{3a}\u{1f1e6}", &["\u{3a}", "\u{1f1e6}"]),
+ ("\u{3a}\u{308}\u{1f1e6}", &["\u{3a}\u{308}", "\u{1f1e6}"]),
+ ("\u{3a}\u{5d0}", &["\u{3a}", "\u{5d0}"]),
+ ("\u{3a}\u{308}\u{5d0}", &["\u{3a}\u{308}", "\u{5d0}"]),
+ ("\u{3a}\u{22}", &["\u{3a}", "\u{22}"]),
+ ("\u{3a}\u{308}\u{22}", &["\u{3a}\u{308}", "\u{22}"]),
+ ("\u{3a}\u{27}", &["\u{3a}", "\u{27}"]),
+ ("\u{3a}\u{308}\u{27}", &["\u{3a}\u{308}", "\u{27}"]),
+ ("\u{3a}\u{231a}", &["\u{3a}", "\u{231a}"]),
+ ("\u{3a}\u{308}\u{231a}", &["\u{3a}\u{308}", "\u{231a}"]),
+ ("\u{3a}\u{20}", &["\u{3a}", "\u{20}"]),
+ ("\u{3a}\u{308}\u{20}", &["\u{3a}\u{308}", "\u{20}"]),
+ ("\u{3a}\u{ad}", &["\u{3a}\u{ad}"]),
+ ("\u{3a}\u{308}\u{ad}", &["\u{3a}\u{308}\u{ad}"]),
+ ("\u{3a}\u{300}", &["\u{3a}\u{300}"]),
+ ("\u{3a}\u{308}\u{300}", &["\u{3a}\u{308}\u{300}"]),
+ ("\u{3a}\u{200d}", &["\u{3a}\u{200d}"]),
+ ("\u{3a}\u{308}\u{200d}", &["\u{3a}\u{308}\u{200d}"]),
+ ("\u{3a}\u{61}\u{2060}", &["\u{3a}", "\u{61}\u{2060}"]),
+ (
+ "\u{3a}\u{308}\u{61}\u{2060}",
+ &["\u{3a}\u{308}", "\u{61}\u{2060}"],
+ ),
+ ("\u{3a}\u{61}\u{3a}", &["\u{3a}", "\u{61}", "\u{3a}"]),
+ (
+ "\u{3a}\u{308}\u{61}\u{3a}",
+ &["\u{3a}\u{308}", "\u{61}", "\u{3a}"],
+ ),
+ ("\u{3a}\u{61}\u{27}", &["\u{3a}", "\u{61}", "\u{27}"]),
+ (
+ "\u{3a}\u{308}\u{61}\u{27}",
+ &["\u{3a}\u{308}", "\u{61}", "\u{27}"],
+ ),
+ (
+ "\u{3a}\u{61}\u{27}\u{2060}",
+ &["\u{3a}", "\u{61}", "\u{27}\u{2060}"],
+ ),
+ (
+ "\u{3a}\u{308}\u{61}\u{27}\u{2060}",
+ &["\u{3a}\u{308}", "\u{61}", "\u{27}\u{2060}"],
+ ),
+ ("\u{3a}\u{61}\u{2c}", &["\u{3a}", "\u{61}", "\u{2c}"]),
+ (
+ "\u{3a}\u{308}\u{61}\u{2c}",
+ &["\u{3a}\u{308}", "\u{61}", "\u{2c}"],
+ ),
+ ("\u{3a}\u{31}\u{3a}", &["\u{3a}", "\u{31}", "\u{3a}"]),
+ (
+ "\u{3a}\u{308}\u{31}\u{3a}",
+ &["\u{3a}\u{308}", "\u{31}", "\u{3a}"],
+ ),
+ ("\u{3a}\u{31}\u{27}", &["\u{3a}", "\u{31}", "\u{27}"]),
+ (
+ "\u{3a}\u{308}\u{31}\u{27}",
+ &["\u{3a}\u{308}", "\u{31}", "\u{27}"],
+ ),
+ ("\u{3a}\u{31}\u{2c}", &["\u{3a}", "\u{31}", "\u{2c}"]),
+ (
+ "\u{3a}\u{308}\u{31}\u{2c}",
+ &["\u{3a}\u{308}", "\u{31}", "\u{2c}"],
+ ),
+ (
+ "\u{3a}\u{31}\u{2e}\u{2060}",
+ &["\u{3a}", "\u{31}", "\u{2e}\u{2060}"],
+ ),
+ (
+ "\u{3a}\u{308}\u{31}\u{2e}\u{2060}",
+ &["\u{3a}\u{308}", "\u{31}", "\u{2e}\u{2060}"],
+ ),
+ ("\u{2c}\u{1}", &["\u{2c}", "\u{1}"]),
+ ("\u{2c}\u{308}\u{1}", &["\u{2c}\u{308}", "\u{1}"]),
+ ("\u{2c}\u{d}", &["\u{2c}", "\u{d}"]),
+ ("\u{2c}\u{308}\u{d}", &["\u{2c}\u{308}", "\u{d}"]),
+ ("\u{2c}\u{a}", &["\u{2c}", "\u{a}"]),
+ ("\u{2c}\u{308}\u{a}", &["\u{2c}\u{308}", "\u{a}"]),
+ ("\u{2c}\u{b}", &["\u{2c}", "\u{b}"]),
+ ("\u{2c}\u{308}\u{b}", &["\u{2c}\u{308}", "\u{b}"]),
+ ("\u{2c}\u{3031}", &["\u{2c}", "\u{3031}"]),
+ ("\u{2c}\u{308}\u{3031}", &["\u{2c}\u{308}", "\u{3031}"]),
+ ("\u{2c}\u{41}", &["\u{2c}", "\u{41}"]),
+ ("\u{2c}\u{308}\u{41}", &["\u{2c}\u{308}", "\u{41}"]),
+ ("\u{2c}\u{3a}", &["\u{2c}", "\u{3a}"]),
+ ("\u{2c}\u{308}\u{3a}", &["\u{2c}\u{308}", "\u{3a}"]),
+ ("\u{2c}\u{2c}", &["\u{2c}", "\u{2c}"]),
+ ("\u{2c}\u{308}\u{2c}", &["\u{2c}\u{308}", "\u{2c}"]),
+ ("\u{2c}\u{2e}", &["\u{2c}", "\u{2e}"]),
+ ("\u{2c}\u{308}\u{2e}", &["\u{2c}\u{308}", "\u{2e}"]),
+ ("\u{2c}\u{30}", &["\u{2c}", "\u{30}"]),
+ ("\u{2c}\u{308}\u{30}", &["\u{2c}\u{308}", "\u{30}"]),
+ ("\u{2c}\u{5f}", &["\u{2c}", "\u{5f}"]),
+ ("\u{2c}\u{308}\u{5f}", &["\u{2c}\u{308}", "\u{5f}"]),
+ ("\u{2c}\u{1f1e6}", &["\u{2c}", "\u{1f1e6}"]),
+ ("\u{2c}\u{308}\u{1f1e6}", &["\u{2c}\u{308}", "\u{1f1e6}"]),
+ ("\u{2c}\u{5d0}", &["\u{2c}", "\u{5d0}"]),
+ ("\u{2c}\u{308}\u{5d0}", &["\u{2c}\u{308}", "\u{5d0}"]),
+ ("\u{2c}\u{22}", &["\u{2c}", "\u{22}"]),
+ ("\u{2c}\u{308}\u{22}", &["\u{2c}\u{308}", "\u{22}"]),
+ ("\u{2c}\u{27}", &["\u{2c}", "\u{27}"]),
+ ("\u{2c}\u{308}\u{27}", &["\u{2c}\u{308}", "\u{27}"]),
+ ("\u{2c}\u{231a}", &["\u{2c}", "\u{231a}"]),
+ ("\u{2c}\u{308}\u{231a}", &["\u{2c}\u{308}", "\u{231a}"]),
+ ("\u{2c}\u{20}", &["\u{2c}", "\u{20}"]),
+ ("\u{2c}\u{308}\u{20}", &["\u{2c}\u{308}", "\u{20}"]),
+ ("\u{2c}\u{ad}", &["\u{2c}\u{ad}"]),
+ ("\u{2c}\u{308}\u{ad}", &["\u{2c}\u{308}\u{ad}"]),
+ ("\u{2c}\u{300}", &["\u{2c}\u{300}"]),
+ ("\u{2c}\u{308}\u{300}", &["\u{2c}\u{308}\u{300}"]),
+ ("\u{2c}\u{200d}", &["\u{2c}\u{200d}"]),
+ ("\u{2c}\u{308}\u{200d}", &["\u{2c}\u{308}\u{200d}"]),
+ ("\u{2c}\u{61}\u{2060}", &["\u{2c}", "\u{61}\u{2060}"]),
+ (
+ "\u{2c}\u{308}\u{61}\u{2060}",
+ &["\u{2c}\u{308}", "\u{61}\u{2060}"],
+ ),
+ ("\u{2c}\u{61}\u{3a}", &["\u{2c}", "\u{61}", "\u{3a}"]),
+ (
+ "\u{2c}\u{308}\u{61}\u{3a}",
+ &["\u{2c}\u{308}", "\u{61}", "\u{3a}"],
+ ),
+ ("\u{2c}\u{61}\u{27}", &["\u{2c}", "\u{61}", "\u{27}"]),
+ (
+ "\u{2c}\u{308}\u{61}\u{27}",
+ &["\u{2c}\u{308}", "\u{61}", "\u{27}"],
+ ),
+ (
+ "\u{2c}\u{61}\u{27}\u{2060}",
+ &["\u{2c}", "\u{61}", "\u{27}\u{2060}"],
+ ),
+ (
+ "\u{2c}\u{308}\u{61}\u{27}\u{2060}",
+ &["\u{2c}\u{308}", "\u{61}", "\u{27}\u{2060}"],
+ ),
+ ("\u{2c}\u{61}\u{2c}", &["\u{2c}", "\u{61}", "\u{2c}"]),
+ (
+ "\u{2c}\u{308}\u{61}\u{2c}",
+ &["\u{2c}\u{308}", "\u{61}", "\u{2c}"],
+ ),
+ ("\u{2c}\u{31}\u{3a}", &["\u{2c}", "\u{31}", "\u{3a}"]),
+ (
+ "\u{2c}\u{308}\u{31}\u{3a}",
+ &["\u{2c}\u{308}", "\u{31}", "\u{3a}"],
+ ),
+ ("\u{2c}\u{31}\u{27}", &["\u{2c}", "\u{31}", "\u{27}"]),
+ (
+ "\u{2c}\u{308}\u{31}\u{27}",
+ &["\u{2c}\u{308}", "\u{31}", "\u{27}"],
+ ),
+ ("\u{2c}\u{31}\u{2c}", &["\u{2c}", "\u{31}", "\u{2c}"]),
+ (
+ "\u{2c}\u{308}\u{31}\u{2c}",
+ &["\u{2c}\u{308}", "\u{31}", "\u{2c}"],
+ ),
+ (
+ "\u{2c}\u{31}\u{2e}\u{2060}",
+ &["\u{2c}", "\u{31}", "\u{2e}\u{2060}"],
+ ),
+ (
+ "\u{2c}\u{308}\u{31}\u{2e}\u{2060}",
+ &["\u{2c}\u{308}", "\u{31}", "\u{2e}\u{2060}"],
+ ),
+ ("\u{2e}\u{1}", &["\u{2e}", "\u{1}"]),
+ ("\u{2e}\u{308}\u{1}", &["\u{2e}\u{308}", "\u{1}"]),
+ ("\u{2e}\u{d}", &["\u{2e}", "\u{d}"]),
+ ("\u{2e}\u{308}\u{d}", &["\u{2e}\u{308}", "\u{d}"]),
+ ("\u{2e}\u{a}", &["\u{2e}", "\u{a}"]),
+ ("\u{2e}\u{308}\u{a}", &["\u{2e}\u{308}", "\u{a}"]),
+ ("\u{2e}\u{b}", &["\u{2e}", "\u{b}"]),
+ ("\u{2e}\u{308}\u{b}", &["\u{2e}\u{308}", "\u{b}"]),
+ ("\u{2e}\u{3031}", &["\u{2e}", "\u{3031}"]),
+ ("\u{2e}\u{308}\u{3031}", &["\u{2e}\u{308}", "\u{3031}"]),
+ ("\u{2e}\u{41}", &["\u{2e}", "\u{41}"]),
+ ("\u{2e}\u{308}\u{41}", &["\u{2e}\u{308}", "\u{41}"]),
+ ("\u{2e}\u{3a}", &["\u{2e}", "\u{3a}"]),
+ ("\u{2e}\u{308}\u{3a}", &["\u{2e}\u{308}", "\u{3a}"]),
+ ("\u{2e}\u{2c}", &["\u{2e}", "\u{2c}"]),
+ ("\u{2e}\u{308}\u{2c}", &["\u{2e}\u{308}", "\u{2c}"]),
+ ("\u{2e}\u{2e}", &["\u{2e}", "\u{2e}"]),
+ ("\u{2e}\u{308}\u{2e}", &["\u{2e}\u{308}", "\u{2e}"]),
+ ("\u{2e}\u{30}", &["\u{2e}", "\u{30}"]),
+ ("\u{2e}\u{308}\u{30}", &["\u{2e}\u{308}", "\u{30}"]),
+ ("\u{2e}\u{5f}", &["\u{2e}", "\u{5f}"]),
+ ("\u{2e}\u{308}\u{5f}", &["\u{2e}\u{308}", "\u{5f}"]),
+ ("\u{2e}\u{1f1e6}", &["\u{2e}", "\u{1f1e6}"]),
+ ("\u{2e}\u{308}\u{1f1e6}", &["\u{2e}\u{308}", "\u{1f1e6}"]),
+ ("\u{2e}\u{5d0}", &["\u{2e}", "\u{5d0}"]),
+ ("\u{2e}\u{308}\u{5d0}", &["\u{2e}\u{308}", "\u{5d0}"]),
+ ("\u{2e}\u{22}", &["\u{2e}", "\u{22}"]),
+ ("\u{2e}\u{308}\u{22}", &["\u{2e}\u{308}", "\u{22}"]),
+ ("\u{2e}\u{27}", &["\u{2e}", "\u{27}"]),
+ ("\u{2e}\u{308}\u{27}", &["\u{2e}\u{308}", "\u{27}"]),
+ ("\u{2e}\u{231a}", &["\u{2e}", "\u{231a}"]),
+ ("\u{2e}\u{308}\u{231a}", &["\u{2e}\u{308}", "\u{231a}"]),
+ ("\u{2e}\u{20}", &["\u{2e}", "\u{20}"]),
+ ("\u{2e}\u{308}\u{20}", &["\u{2e}\u{308}", "\u{20}"]),
+ ("\u{2e}\u{ad}", &["\u{2e}\u{ad}"]),
+ ("\u{2e}\u{308}\u{ad}", &["\u{2e}\u{308}\u{ad}"]),
+ ("\u{2e}\u{300}", &["\u{2e}\u{300}"]),
+ ("\u{2e}\u{308}\u{300}", &["\u{2e}\u{308}\u{300}"]),
+ ("\u{2e}\u{200d}", &["\u{2e}\u{200d}"]),
+ ("\u{2e}\u{308}\u{200d}", &["\u{2e}\u{308}\u{200d}"]),
+ ("\u{2e}\u{61}\u{2060}", &["\u{2e}", "\u{61}\u{2060}"]),
+ (
+ "\u{2e}\u{308}\u{61}\u{2060}",
+ &["\u{2e}\u{308}", "\u{61}\u{2060}"],
+ ),
+ ("\u{2e}\u{61}\u{3a}", &["\u{2e}", "\u{61}", "\u{3a}"]),
+ (
+ "\u{2e}\u{308}\u{61}\u{3a}",
+ &["\u{2e}\u{308}", "\u{61}", "\u{3a}"],
+ ),
+ ("\u{2e}\u{61}\u{27}", &["\u{2e}", "\u{61}", "\u{27}"]),
+ (
+ "\u{2e}\u{308}\u{61}\u{27}",
+ &["\u{2e}\u{308}", "\u{61}", "\u{27}"],
+ ),
+ (
+ "\u{2e}\u{61}\u{27}\u{2060}",
+ &["\u{2e}", "\u{61}", "\u{27}\u{2060}"],
+ ),
+ (
+ "\u{2e}\u{308}\u{61}\u{27}\u{2060}",
+ &["\u{2e}\u{308}", "\u{61}", "\u{27}\u{2060}"],
+ ),
+ ("\u{2e}\u{61}\u{2c}", &["\u{2e}", "\u{61}", "\u{2c}"]),
+ (
+ "\u{2e}\u{308}\u{61}\u{2c}",
+ &["\u{2e}\u{308}", "\u{61}", "\u{2c}"],
+ ),
+ ("\u{2e}\u{31}\u{3a}", &["\u{2e}", "\u{31}", "\u{3a}"]),
+ (
+ "\u{2e}\u{308}\u{31}\u{3a}",
+ &["\u{2e}\u{308}", "\u{31}", "\u{3a}"],
+ ),
+ ("\u{2e}\u{31}\u{27}", &["\u{2e}", "\u{31}", "\u{27}"]),
+ (
+ "\u{2e}\u{308}\u{31}\u{27}",
+ &["\u{2e}\u{308}", "\u{31}", "\u{27}"],
+ ),
+ ("\u{2e}\u{31}\u{2c}", &["\u{2e}", "\u{31}", "\u{2c}"]),
+ (
+ "\u{2e}\u{308}\u{31}\u{2c}",
+ &["\u{2e}\u{308}", "\u{31}", "\u{2c}"],
+ ),
+ (
+ "\u{2e}\u{31}\u{2e}\u{2060}",
+ &["\u{2e}", "\u{31}", "\u{2e}\u{2060}"],
+ ),
+ (
+ "\u{2e}\u{308}\u{31}\u{2e}\u{2060}",
+ &["\u{2e}\u{308}", "\u{31}", "\u{2e}\u{2060}"],
+ ),
+ ("\u{30}\u{1}", &["\u{30}", "\u{1}"]),
+ ("\u{30}\u{308}\u{1}", &["\u{30}\u{308}", "\u{1}"]),
+ ("\u{30}\u{d}", &["\u{30}", "\u{d}"]),
+ ("\u{30}\u{308}\u{d}", &["\u{30}\u{308}", "\u{d}"]),
+ ("\u{30}\u{a}", &["\u{30}", "\u{a}"]),
+ ("\u{30}\u{308}\u{a}", &["\u{30}\u{308}", "\u{a}"]),
+ ("\u{30}\u{b}", &["\u{30}", "\u{b}"]),
+ ("\u{30}\u{308}\u{b}", &["\u{30}\u{308}", "\u{b}"]),
+ ("\u{30}\u{3031}", &["\u{30}", "\u{3031}"]),
+ ("\u{30}\u{308}\u{3031}", &["\u{30}\u{308}", "\u{3031}"]),
+ ("\u{30}\u{41}", &["\u{30}\u{41}"]),
+ ("\u{30}\u{308}\u{41}", &["\u{30}\u{308}\u{41}"]),
+ ("\u{30}\u{3a}", &["\u{30}", "\u{3a}"]),
+ ("\u{30}\u{308}\u{3a}", &["\u{30}\u{308}", "\u{3a}"]),
+ ("\u{30}\u{2c}", &["\u{30}", "\u{2c}"]),
+ ("\u{30}\u{308}\u{2c}", &["\u{30}\u{308}", "\u{2c}"]),
+ ("\u{30}\u{2e}", &["\u{30}", "\u{2e}"]),
+ ("\u{30}\u{308}\u{2e}", &["\u{30}\u{308}", "\u{2e}"]),
+ ("\u{30}\u{30}", &["\u{30}\u{30}"]),
+ ("\u{30}\u{308}\u{30}", &["\u{30}\u{308}\u{30}"]),
+ ("\u{30}\u{5f}", &["\u{30}\u{5f}"]),
+ ("\u{30}\u{308}\u{5f}", &["\u{30}\u{308}\u{5f}"]),
+ ("\u{30}\u{1f1e6}", &["\u{30}", "\u{1f1e6}"]),
+ ("\u{30}\u{308}\u{1f1e6}", &["\u{30}\u{308}", "\u{1f1e6}"]),
+ ("\u{30}\u{5d0}", &["\u{30}\u{5d0}"]),
+ ("\u{30}\u{308}\u{5d0}", &["\u{30}\u{308}\u{5d0}"]),
+ ("\u{30}\u{22}", &["\u{30}", "\u{22}"]),
+ ("\u{30}\u{308}\u{22}", &["\u{30}\u{308}", "\u{22}"]),
+ ("\u{30}\u{27}", &["\u{30}", "\u{27}"]),
+ ("\u{30}\u{308}\u{27}", &["\u{30}\u{308}", "\u{27}"]),
+ ("\u{30}\u{231a}", &["\u{30}", "\u{231a}"]),
+ ("\u{30}\u{308}\u{231a}", &["\u{30}\u{308}", "\u{231a}"]),
+ ("\u{30}\u{20}", &["\u{30}", "\u{20}"]),
+ ("\u{30}\u{308}\u{20}", &["\u{30}\u{308}", "\u{20}"]),
+ ("\u{30}\u{ad}", &["\u{30}\u{ad}"]),
+ ("\u{30}\u{308}\u{ad}", &["\u{30}\u{308}\u{ad}"]),
+ ("\u{30}\u{300}", &["\u{30}\u{300}"]),
+ ("\u{30}\u{308}\u{300}", &["\u{30}\u{308}\u{300}"]),
+ ("\u{30}\u{200d}", &["\u{30}\u{200d}"]),
+ ("\u{30}\u{308}\u{200d}", &["\u{30}\u{308}\u{200d}"]),
+ ("\u{30}\u{61}\u{2060}", &["\u{30}\u{61}\u{2060}"]),
+ (
+ "\u{30}\u{308}\u{61}\u{2060}",
+ &["\u{30}\u{308}\u{61}\u{2060}"],
+ ),
+ ("\u{30}\u{61}\u{3a}", &["\u{30}\u{61}", "\u{3a}"]),
+ (
+ "\u{30}\u{308}\u{61}\u{3a}",
+ &["\u{30}\u{308}\u{61}", "\u{3a}"],
+ ),
+ ("\u{30}\u{61}\u{27}", &["\u{30}\u{61}", "\u{27}"]),
+ (
+ "\u{30}\u{308}\u{61}\u{27}",
+ &["\u{30}\u{308}\u{61}", "\u{27}"],
+ ),
+ (
+ "\u{30}\u{61}\u{27}\u{2060}",
+ &["\u{30}\u{61}", "\u{27}\u{2060}"],
+ ),
+ (
+ "\u{30}\u{308}\u{61}\u{27}\u{2060}",
+ &["\u{30}\u{308}\u{61}", "\u{27}\u{2060}"],
+ ),
+ ("\u{30}\u{61}\u{2c}", &["\u{30}\u{61}", "\u{2c}"]),
+ (
+ "\u{30}\u{308}\u{61}\u{2c}",
+ &["\u{30}\u{308}\u{61}", "\u{2c}"],
+ ),
+ ("\u{30}\u{31}\u{3a}", &["\u{30}\u{31}", "\u{3a}"]),
+ (
+ "\u{30}\u{308}\u{31}\u{3a}",
+ &["\u{30}\u{308}\u{31}", "\u{3a}"],
+ ),
+ ("\u{30}\u{31}\u{27}", &["\u{30}\u{31}", "\u{27}"]),
+ (
+ "\u{30}\u{308}\u{31}\u{27}",
+ &["\u{30}\u{308}\u{31}", "\u{27}"],
+ ),
+ ("\u{30}\u{31}\u{2c}", &["\u{30}\u{31}", "\u{2c}"]),
+ (
+ "\u{30}\u{308}\u{31}\u{2c}",
+ &["\u{30}\u{308}\u{31}", "\u{2c}"],
+ ),
+ (
+ "\u{30}\u{31}\u{2e}\u{2060}",
+ &["\u{30}\u{31}", "\u{2e}\u{2060}"],
+ ),
+ (
+ "\u{30}\u{308}\u{31}\u{2e}\u{2060}",
+ &["\u{30}\u{308}\u{31}", "\u{2e}\u{2060}"],
+ ),
+ ("\u{5f}\u{1}", &["\u{5f}", "\u{1}"]),
+ ("\u{5f}\u{308}\u{1}", &["\u{5f}\u{308}", "\u{1}"]),
+ ("\u{5f}\u{d}", &["\u{5f}", "\u{d}"]),
+ ("\u{5f}\u{308}\u{d}", &["\u{5f}\u{308}", "\u{d}"]),
+ ("\u{5f}\u{a}", &["\u{5f}", "\u{a}"]),
+ ("\u{5f}\u{308}\u{a}", &["\u{5f}\u{308}", "\u{a}"]),
+ ("\u{5f}\u{b}", &["\u{5f}", "\u{b}"]),
+ ("\u{5f}\u{308}\u{b}", &["\u{5f}\u{308}", "\u{b}"]),
+ ("\u{5f}\u{3031}", &["\u{5f}\u{3031}"]),
+ ("\u{5f}\u{308}\u{3031}", &["\u{5f}\u{308}\u{3031}"]),
+ ("\u{5f}\u{41}", &["\u{5f}\u{41}"]),
+ ("\u{5f}\u{308}\u{41}", &["\u{5f}\u{308}\u{41}"]),
+ ("\u{5f}\u{3a}", &["\u{5f}", "\u{3a}"]),
+ ("\u{5f}\u{308}\u{3a}", &["\u{5f}\u{308}", "\u{3a}"]),
+ ("\u{5f}\u{2c}", &["\u{5f}", "\u{2c}"]),
+ ("\u{5f}\u{308}\u{2c}", &["\u{5f}\u{308}", "\u{2c}"]),
+ ("\u{5f}\u{2e}", &["\u{5f}", "\u{2e}"]),
+ ("\u{5f}\u{308}\u{2e}", &["\u{5f}\u{308}", "\u{2e}"]),
+ ("\u{5f}\u{30}", &["\u{5f}\u{30}"]),
+ ("\u{5f}\u{308}\u{30}", &["\u{5f}\u{308}\u{30}"]),
+ ("\u{5f}\u{5f}", &["\u{5f}\u{5f}"]),
+ ("\u{5f}\u{308}\u{5f}", &["\u{5f}\u{308}\u{5f}"]),
+ ("\u{5f}\u{1f1e6}", &["\u{5f}", "\u{1f1e6}"]),
+ ("\u{5f}\u{308}\u{1f1e6}", &["\u{5f}\u{308}", "\u{1f1e6}"]),
+ ("\u{5f}\u{5d0}", &["\u{5f}\u{5d0}"]),
+ ("\u{5f}\u{308}\u{5d0}", &["\u{5f}\u{308}\u{5d0}"]),
+ ("\u{5f}\u{22}", &["\u{5f}", "\u{22}"]),
+ ("\u{5f}\u{308}\u{22}", &["\u{5f}\u{308}", "\u{22}"]),
+ ("\u{5f}\u{27}", &["\u{5f}", "\u{27}"]),
+ ("\u{5f}\u{308}\u{27}", &["\u{5f}\u{308}", "\u{27}"]),
+ ("\u{5f}\u{231a}", &["\u{5f}", "\u{231a}"]),
+ ("\u{5f}\u{308}\u{231a}", &["\u{5f}\u{308}", "\u{231a}"]),
+ ("\u{5f}\u{20}", &["\u{5f}", "\u{20}"]),
+ ("\u{5f}\u{308}\u{20}", &["\u{5f}\u{308}", "\u{20}"]),
+ ("\u{5f}\u{ad}", &["\u{5f}\u{ad}"]),
+ ("\u{5f}\u{308}\u{ad}", &["\u{5f}\u{308}\u{ad}"]),
+ ("\u{5f}\u{300}", &["\u{5f}\u{300}"]),
+ ("\u{5f}\u{308}\u{300}", &["\u{5f}\u{308}\u{300}"]),
+ ("\u{5f}\u{200d}", &["\u{5f}\u{200d}"]),
+ ("\u{5f}\u{308}\u{200d}", &["\u{5f}\u{308}\u{200d}"]),
+ ("\u{5f}\u{61}\u{2060}", &["\u{5f}\u{61}\u{2060}"]),
+ (
+ "\u{5f}\u{308}\u{61}\u{2060}",
+ &["\u{5f}\u{308}\u{61}\u{2060}"],
+ ),
+ ("\u{5f}\u{61}\u{3a}", &["\u{5f}\u{61}", "\u{3a}"]),
+ (
+ "\u{5f}\u{308}\u{61}\u{3a}",
+ &["\u{5f}\u{308}\u{61}", "\u{3a}"],
+ ),
+ ("\u{5f}\u{61}\u{27}", &["\u{5f}\u{61}", "\u{27}"]),
+ (
+ "\u{5f}\u{308}\u{61}\u{27}",
+ &["\u{5f}\u{308}\u{61}", "\u{27}"],
+ ),
+ (
+ "\u{5f}\u{61}\u{27}\u{2060}",
+ &["\u{5f}\u{61}", "\u{27}\u{2060}"],
+ ),
+ (
+ "\u{5f}\u{308}\u{61}\u{27}\u{2060}",
+ &["\u{5f}\u{308}\u{61}", "\u{27}\u{2060}"],
+ ),
+ ("\u{5f}\u{61}\u{2c}", &["\u{5f}\u{61}", "\u{2c}"]),
+ (
+ "\u{5f}\u{308}\u{61}\u{2c}",
+ &["\u{5f}\u{308}\u{61}", "\u{2c}"],
+ ),
+ ("\u{5f}\u{31}\u{3a}", &["\u{5f}\u{31}", "\u{3a}"]),
+ (
+ "\u{5f}\u{308}\u{31}\u{3a}",
+ &["\u{5f}\u{308}\u{31}", "\u{3a}"],
+ ),
+ ("\u{5f}\u{31}\u{27}", &["\u{5f}\u{31}", "\u{27}"]),
+ (
+ "\u{5f}\u{308}\u{31}\u{27}",
+ &["\u{5f}\u{308}\u{31}", "\u{27}"],
+ ),
+ ("\u{5f}\u{31}\u{2c}", &["\u{5f}\u{31}", "\u{2c}"]),
+ (
+ "\u{5f}\u{308}\u{31}\u{2c}",
+ &["\u{5f}\u{308}\u{31}", "\u{2c}"],
+ ),
+ (
+ "\u{5f}\u{31}\u{2e}\u{2060}",
+ &["\u{5f}\u{31}", "\u{2e}\u{2060}"],
+ ),
+ (
+ "\u{5f}\u{308}\u{31}\u{2e}\u{2060}",
+ &["\u{5f}\u{308}\u{31}", "\u{2e}\u{2060}"],
+ ),
+ ("\u{1f1e6}\u{1}", &["\u{1f1e6}", "\u{1}"]),
+ ("\u{1f1e6}\u{308}\u{1}", &["\u{1f1e6}\u{308}", "\u{1}"]),
+ ("\u{1f1e6}\u{d}", &["\u{1f1e6}", "\u{d}"]),
+ ("\u{1f1e6}\u{308}\u{d}", &["\u{1f1e6}\u{308}", "\u{d}"]),
+ ("\u{1f1e6}\u{a}", &["\u{1f1e6}", "\u{a}"]),
+ ("\u{1f1e6}\u{308}\u{a}", &["\u{1f1e6}\u{308}", "\u{a}"]),
+ ("\u{1f1e6}\u{b}", &["\u{1f1e6}", "\u{b}"]),
+ ("\u{1f1e6}\u{308}\u{b}", &["\u{1f1e6}\u{308}", "\u{b}"]),
+ ("\u{1f1e6}\u{3031}", &["\u{1f1e6}", "\u{3031}"]),
+ (
+ "\u{1f1e6}\u{308}\u{3031}",
+ &["\u{1f1e6}\u{308}", "\u{3031}"],
+ ),
+ ("\u{1f1e6}\u{41}", &["\u{1f1e6}", "\u{41}"]),
+ ("\u{1f1e6}\u{308}\u{41}", &["\u{1f1e6}\u{308}", "\u{41}"]),
+ ("\u{1f1e6}\u{3a}", &["\u{1f1e6}", "\u{3a}"]),
+ ("\u{1f1e6}\u{308}\u{3a}", &["\u{1f1e6}\u{308}", "\u{3a}"]),
+ ("\u{1f1e6}\u{2c}", &["\u{1f1e6}", "\u{2c}"]),
+ ("\u{1f1e6}\u{308}\u{2c}", &["\u{1f1e6}\u{308}", "\u{2c}"]),
+ ("\u{1f1e6}\u{2e}", &["\u{1f1e6}", "\u{2e}"]),
+ ("\u{1f1e6}\u{308}\u{2e}", &["\u{1f1e6}\u{308}", "\u{2e}"]),
+ ("\u{1f1e6}\u{30}", &["\u{1f1e6}", "\u{30}"]),
+ ("\u{1f1e6}\u{308}\u{30}", &["\u{1f1e6}\u{308}", "\u{30}"]),
+ ("\u{1f1e6}\u{5f}", &["\u{1f1e6}", "\u{5f}"]),
+ ("\u{1f1e6}\u{308}\u{5f}", &["\u{1f1e6}\u{308}", "\u{5f}"]),
+ ("\u{1f1e6}\u{1f1e6}", &["\u{1f1e6}\u{1f1e6}"]),
+ ("\u{1f1e6}\u{308}\u{1f1e6}", &["\u{1f1e6}\u{308}\u{1f1e6}"]),
+ ("\u{1f1e6}\u{5d0}", &["\u{1f1e6}", "\u{5d0}"]),
+ ("\u{1f1e6}\u{308}\u{5d0}", &["\u{1f1e6}\u{308}", "\u{5d0}"]),
+ ("\u{1f1e6}\u{22}", &["\u{1f1e6}", "\u{22}"]),
+ ("\u{1f1e6}\u{308}\u{22}", &["\u{1f1e6}\u{308}", "\u{22}"]),
+ ("\u{1f1e6}\u{27}", &["\u{1f1e6}", "\u{27}"]),
+ ("\u{1f1e6}\u{308}\u{27}", &["\u{1f1e6}\u{308}", "\u{27}"]),
+ ("\u{1f1e6}\u{231a}", &["\u{1f1e6}", "\u{231a}"]),
+ (
+ "\u{1f1e6}\u{308}\u{231a}",
+ &["\u{1f1e6}\u{308}", "\u{231a}"],
+ ),
+ ("\u{1f1e6}\u{20}", &["\u{1f1e6}", "\u{20}"]),
+ ("\u{1f1e6}\u{308}\u{20}", &["\u{1f1e6}\u{308}", "\u{20}"]),
+ ("\u{1f1e6}\u{ad}", &["\u{1f1e6}\u{ad}"]),
+ ("\u{1f1e6}\u{308}\u{ad}", &["\u{1f1e6}\u{308}\u{ad}"]),
+ ("\u{1f1e6}\u{300}", &["\u{1f1e6}\u{300}"]),
+ ("\u{1f1e6}\u{308}\u{300}", &["\u{1f1e6}\u{308}\u{300}"]),
+ ("\u{1f1e6}\u{200d}", &["\u{1f1e6}\u{200d}"]),
+ ("\u{1f1e6}\u{308}\u{200d}", &["\u{1f1e6}\u{308}\u{200d}"]),
+ ("\u{1f1e6}\u{61}\u{2060}", &["\u{1f1e6}", "\u{61}\u{2060}"]),
+ (
+ "\u{1f1e6}\u{308}\u{61}\u{2060}",
+ &["\u{1f1e6}\u{308}", "\u{61}\u{2060}"],
+ ),
+ ("\u{1f1e6}\u{61}\u{3a}", &["\u{1f1e6}", "\u{61}", "\u{3a}"]),
+ (
+ "\u{1f1e6}\u{308}\u{61}\u{3a}",
+ &["\u{1f1e6}\u{308}", "\u{61}", "\u{3a}"],
+ ),
+ ("\u{1f1e6}\u{61}\u{27}", &["\u{1f1e6}", "\u{61}", "\u{27}"]),
+ (
+ "\u{1f1e6}\u{308}\u{61}\u{27}",
+ &["\u{1f1e6}\u{308}", "\u{61}", "\u{27}"],
+ ),
+ (
+ "\u{1f1e6}\u{61}\u{27}\u{2060}",
+ &["\u{1f1e6}", "\u{61}", "\u{27}\u{2060}"],
+ ),
+ (
+ "\u{1f1e6}\u{308}\u{61}\u{27}\u{2060}",
+ &["\u{1f1e6}\u{308}", "\u{61}", "\u{27}\u{2060}"],
+ ),
+ ("\u{1f1e6}\u{61}\u{2c}", &["\u{1f1e6}", "\u{61}", "\u{2c}"]),
+ (
+ "\u{1f1e6}\u{308}\u{61}\u{2c}",
+ &["\u{1f1e6}\u{308}", "\u{61}", "\u{2c}"],
+ ),
+ ("\u{1f1e6}\u{31}\u{3a}", &["\u{1f1e6}", "\u{31}", "\u{3a}"]),
+ (
+ "\u{1f1e6}\u{308}\u{31}\u{3a}",
+ &["\u{1f1e6}\u{308}", "\u{31}", "\u{3a}"],
+ ),
+ ("\u{1f1e6}\u{31}\u{27}", &["\u{1f1e6}", "\u{31}", "\u{27}"]),
+ (
+ "\u{1f1e6}\u{308}\u{31}\u{27}",
+ &["\u{1f1e6}\u{308}", "\u{31}", "\u{27}"],
+ ),
+ ("\u{1f1e6}\u{31}\u{2c}", &["\u{1f1e6}", "\u{31}", "\u{2c}"]),
+ (
+ "\u{1f1e6}\u{308}\u{31}\u{2c}",
+ &["\u{1f1e6}\u{308}", "\u{31}", "\u{2c}"],
+ ),
+ (
+ "\u{1f1e6}\u{31}\u{2e}\u{2060}",
+ &["\u{1f1e6}", "\u{31}", "\u{2e}\u{2060}"],
+ ),
+ (
+ "\u{1f1e6}\u{308}\u{31}\u{2e}\u{2060}",
+ &["\u{1f1e6}\u{308}", "\u{31}", "\u{2e}\u{2060}"],
+ ),
+ ("\u{5d0}\u{1}", &["\u{5d0}", "\u{1}"]),
+ ("\u{5d0}\u{308}\u{1}", &["\u{5d0}\u{308}", "\u{1}"]),
+ ("\u{5d0}\u{d}", &["\u{5d0}", "\u{d}"]),
+ ("\u{5d0}\u{308}\u{d}", &["\u{5d0}\u{308}", "\u{d}"]),
+ ("\u{5d0}\u{a}", &["\u{5d0}", "\u{a}"]),
+ ("\u{5d0}\u{308}\u{a}", &["\u{5d0}\u{308}", "\u{a}"]),
+ ("\u{5d0}\u{b}", &["\u{5d0}", "\u{b}"]),
+ ("\u{5d0}\u{308}\u{b}", &["\u{5d0}\u{308}", "\u{b}"]),
+ ("\u{5d0}\u{3031}", &["\u{5d0}", "\u{3031}"]),
+ ("\u{5d0}\u{308}\u{3031}", &["\u{5d0}\u{308}", "\u{3031}"]),
+ ("\u{5d0}\u{41}", &["\u{5d0}\u{41}"]),
+ ("\u{5d0}\u{308}\u{41}", &["\u{5d0}\u{308}\u{41}"]),
+ ("\u{5d0}\u{3a}", &["\u{5d0}", "\u{3a}"]),
+ ("\u{5d0}\u{308}\u{3a}", &["\u{5d0}\u{308}", "\u{3a}"]),
+ ("\u{5d0}\u{2c}", &["\u{5d0}", "\u{2c}"]),
+ ("\u{5d0}\u{308}\u{2c}", &["\u{5d0}\u{308}", "\u{2c}"]),
+ ("\u{5d0}\u{2e}", &["\u{5d0}", "\u{2e}"]),
+ ("\u{5d0}\u{308}\u{2e}", &["\u{5d0}\u{308}", "\u{2e}"]),
+ ("\u{5d0}\u{30}", &["\u{5d0}\u{30}"]),
+ ("\u{5d0}\u{308}\u{30}", &["\u{5d0}\u{308}\u{30}"]),
+ ("\u{5d0}\u{5f}", &["\u{5d0}\u{5f}"]),
+ ("\u{5d0}\u{308}\u{5f}", &["\u{5d0}\u{308}\u{5f}"]),
+ ("\u{5d0}\u{1f1e6}", &["\u{5d0}", "\u{1f1e6}"]),
+ ("\u{5d0}\u{308}\u{1f1e6}", &["\u{5d0}\u{308}", "\u{1f1e6}"]),
+ ("\u{5d0}\u{5d0}", &["\u{5d0}\u{5d0}"]),
+ ("\u{5d0}\u{308}\u{5d0}", &["\u{5d0}\u{308}\u{5d0}"]),
+ ("\u{5d0}\u{22}", &["\u{5d0}", "\u{22}"]),
+ ("\u{5d0}\u{308}\u{22}", &["\u{5d0}\u{308}", "\u{22}"]),
+ ("\u{5d0}\u{27}", &["\u{5d0}\u{27}"]),
+ ("\u{5d0}\u{308}\u{27}", &["\u{5d0}\u{308}\u{27}"]),
+ ("\u{5d0}\u{231a}", &["\u{5d0}", "\u{231a}"]),
+ ("\u{5d0}\u{308}\u{231a}", &["\u{5d0}\u{308}", "\u{231a}"]),
+ ("\u{5d0}\u{20}", &["\u{5d0}", "\u{20}"]),
+ ("\u{5d0}\u{308}\u{20}", &["\u{5d0}\u{308}", "\u{20}"]),
+ ("\u{5d0}\u{ad}", &["\u{5d0}\u{ad}"]),
+ ("\u{5d0}\u{308}\u{ad}", &["\u{5d0}\u{308}\u{ad}"]),
+ ("\u{5d0}\u{300}", &["\u{5d0}\u{300}"]),
+ ("\u{5d0}\u{308}\u{300}", &["\u{5d0}\u{308}\u{300}"]),
+ ("\u{5d0}\u{200d}", &["\u{5d0}\u{200d}"]),
+ ("\u{5d0}\u{308}\u{200d}", &["\u{5d0}\u{308}\u{200d}"]),
+ ("\u{5d0}\u{61}\u{2060}", &["\u{5d0}\u{61}\u{2060}"]),
+ (
+ "\u{5d0}\u{308}\u{61}\u{2060}",
+ &["\u{5d0}\u{308}\u{61}\u{2060}"],
+ ),
+ ("\u{5d0}\u{61}\u{3a}", &["\u{5d0}\u{61}", "\u{3a}"]),
+ (
+ "\u{5d0}\u{308}\u{61}\u{3a}",
+ &["\u{5d0}\u{308}\u{61}", "\u{3a}"],
+ ),
+ ("\u{5d0}\u{61}\u{27}", &["\u{5d0}\u{61}", "\u{27}"]),
+ (
+ "\u{5d0}\u{308}\u{61}\u{27}",
+ &["\u{5d0}\u{308}\u{61}", "\u{27}"],
+ ),
+ (
+ "\u{5d0}\u{61}\u{27}\u{2060}",
+ &["\u{5d0}\u{61}", "\u{27}\u{2060}"],
+ ),
+ (
+ "\u{5d0}\u{308}\u{61}\u{27}\u{2060}",
+ &["\u{5d0}\u{308}\u{61}", "\u{27}\u{2060}"],
+ ),
+ ("\u{5d0}\u{61}\u{2c}", &["\u{5d0}\u{61}", "\u{2c}"]),
+ (
+ "\u{5d0}\u{308}\u{61}\u{2c}",
+ &["\u{5d0}\u{308}\u{61}", "\u{2c}"],
+ ),
+ ("\u{5d0}\u{31}\u{3a}", &["\u{5d0}\u{31}", "\u{3a}"]),
+ (
+ "\u{5d0}\u{308}\u{31}\u{3a}",
+ &["\u{5d0}\u{308}\u{31}", "\u{3a}"],
+ ),
+ ("\u{5d0}\u{31}\u{27}", &["\u{5d0}\u{31}", "\u{27}"]),
+ (
+ "\u{5d0}\u{308}\u{31}\u{27}",
+ &["\u{5d0}\u{308}\u{31}", "\u{27}"],
+ ),
+ ("\u{5d0}\u{31}\u{2c}", &["\u{5d0}\u{31}", "\u{2c}"]),
+ (
+ "\u{5d0}\u{308}\u{31}\u{2c}",
+ &["\u{5d0}\u{308}\u{31}", "\u{2c}"],
+ ),
+ (
+ "\u{5d0}\u{31}\u{2e}\u{2060}",
+ &["\u{5d0}\u{31}", "\u{2e}\u{2060}"],
+ ),
+ (
+ "\u{5d0}\u{308}\u{31}\u{2e}\u{2060}",
+ &["\u{5d0}\u{308}\u{31}", "\u{2e}\u{2060}"],
+ ),
+ ("\u{22}\u{1}", &["\u{22}", "\u{1}"]),
+ ("\u{22}\u{308}\u{1}", &["\u{22}\u{308}", "\u{1}"]),
+ ("\u{22}\u{d}", &["\u{22}", "\u{d}"]),
+ ("\u{22}\u{308}\u{d}", &["\u{22}\u{308}", "\u{d}"]),
+ ("\u{22}\u{a}", &["\u{22}", "\u{a}"]),
+ ("\u{22}\u{308}\u{a}", &["\u{22}\u{308}", "\u{a}"]),
+ ("\u{22}\u{b}", &["\u{22}", "\u{b}"]),
+ ("\u{22}\u{308}\u{b}", &["\u{22}\u{308}", "\u{b}"]),
+ ("\u{22}\u{3031}", &["\u{22}", "\u{3031}"]),
+ ("\u{22}\u{308}\u{3031}", &["\u{22}\u{308}", "\u{3031}"]),
+ ("\u{22}\u{41}", &["\u{22}", "\u{41}"]),
+ ("\u{22}\u{308}\u{41}", &["\u{22}\u{308}", "\u{41}"]),
+ ("\u{22}\u{3a}", &["\u{22}", "\u{3a}"]),
+ ("\u{22}\u{308}\u{3a}", &["\u{22}\u{308}", "\u{3a}"]),
+ ("\u{22}\u{2c}", &["\u{22}", "\u{2c}"]),
+ ("\u{22}\u{308}\u{2c}", &["\u{22}\u{308}", "\u{2c}"]),
+ ("\u{22}\u{2e}", &["\u{22}", "\u{2e}"]),
+ ("\u{22}\u{308}\u{2e}", &["\u{22}\u{308}", "\u{2e}"]),
+ ("\u{22}\u{30}", &["\u{22}", "\u{30}"]),
+ ("\u{22}\u{308}\u{30}", &["\u{22}\u{308}", "\u{30}"]),
+ ("\u{22}\u{5f}", &["\u{22}", "\u{5f}"]),
+ ("\u{22}\u{308}\u{5f}", &["\u{22}\u{308}", "\u{5f}"]),
+ ("\u{22}\u{1f1e6}", &["\u{22}", "\u{1f1e6}"]),
+ ("\u{22}\u{308}\u{1f1e6}", &["\u{22}\u{308}", "\u{1f1e6}"]),
+ ("\u{22}\u{5d0}", &["\u{22}", "\u{5d0}"]),
+ ("\u{22}\u{308}\u{5d0}", &["\u{22}\u{308}", "\u{5d0}"]),
+ ("\u{22}\u{22}", &["\u{22}", "\u{22}"]),
+ ("\u{22}\u{308}\u{22}", &["\u{22}\u{308}", "\u{22}"]),
+ ("\u{22}\u{27}", &["\u{22}", "\u{27}"]),
+ ("\u{22}\u{308}\u{27}", &["\u{22}\u{308}", "\u{27}"]),
+ ("\u{22}\u{231a}", &["\u{22}", "\u{231a}"]),
+ ("\u{22}\u{308}\u{231a}", &["\u{22}\u{308}", "\u{231a}"]),
+ ("\u{22}\u{20}", &["\u{22}", "\u{20}"]),
+ ("\u{22}\u{308}\u{20}", &["\u{22}\u{308}", "\u{20}"]),
+ ("\u{22}\u{ad}", &["\u{22}\u{ad}"]),
+ ("\u{22}\u{308}\u{ad}", &["\u{22}\u{308}\u{ad}"]),
+ ("\u{22}\u{300}", &["\u{22}\u{300}"]),
+ ("\u{22}\u{308}\u{300}", &["\u{22}\u{308}\u{300}"]),
+ ("\u{22}\u{200d}", &["\u{22}\u{200d}"]),
+ ("\u{22}\u{308}\u{200d}", &["\u{22}\u{308}\u{200d}"]),
+ ("\u{22}\u{61}\u{2060}", &["\u{22}", "\u{61}\u{2060}"]),
+ (
+ "\u{22}\u{308}\u{61}\u{2060}",
+ &["\u{22}\u{308}", "\u{61}\u{2060}"],
+ ),
+ ("\u{22}\u{61}\u{3a}", &["\u{22}", "\u{61}", "\u{3a}"]),
+ (
+ "\u{22}\u{308}\u{61}\u{3a}",
+ &["\u{22}\u{308}", "\u{61}", "\u{3a}"],
+ ),
+ ("\u{22}\u{61}\u{27}", &["\u{22}", "\u{61}", "\u{27}"]),
+ (
+ "\u{22}\u{308}\u{61}\u{27}",
+ &["\u{22}\u{308}", "\u{61}", "\u{27}"],
+ ),
+ (
+ "\u{22}\u{61}\u{27}\u{2060}",
+ &["\u{22}", "\u{61}", "\u{27}\u{2060}"],
+ ),
+ (
+ "\u{22}\u{308}\u{61}\u{27}\u{2060}",
+ &["\u{22}\u{308}", "\u{61}", "\u{27}\u{2060}"],
+ ),
+ ("\u{22}\u{61}\u{2c}", &["\u{22}", "\u{61}", "\u{2c}"]),
+ (
+ "\u{22}\u{308}\u{61}\u{2c}",
+ &["\u{22}\u{308}", "\u{61}", "\u{2c}"],
+ ),
+ ("\u{22}\u{31}\u{3a}", &["\u{22}", "\u{31}", "\u{3a}"]),
+ (
+ "\u{22}\u{308}\u{31}\u{3a}",
+ &["\u{22}\u{308}", "\u{31}", "\u{3a}"],
+ ),
+ ("\u{22}\u{31}\u{27}", &["\u{22}", "\u{31}", "\u{27}"]),
+ (
+ "\u{22}\u{308}\u{31}\u{27}",
+ &["\u{22}\u{308}", "\u{31}", "\u{27}"],
+ ),
+ ("\u{22}\u{31}\u{2c}", &["\u{22}", "\u{31}", "\u{2c}"]),
+ (
+ "\u{22}\u{308}\u{31}\u{2c}",
+ &["\u{22}\u{308}", "\u{31}", "\u{2c}"],
+ ),
+ (
+ "\u{22}\u{31}\u{2e}\u{2060}",
+ &["\u{22}", "\u{31}", "\u{2e}\u{2060}"],
+ ),
+ (
+ "\u{22}\u{308}\u{31}\u{2e}\u{2060}",
+ &["\u{22}\u{308}", "\u{31}", "\u{2e}\u{2060}"],
+ ),
+ ("\u{27}\u{1}", &["\u{27}", "\u{1}"]),
+ ("\u{27}\u{308}\u{1}", &["\u{27}\u{308}", "\u{1}"]),
+ ("\u{27}\u{d}", &["\u{27}", "\u{d}"]),
+ ("\u{27}\u{308}\u{d}", &["\u{27}\u{308}", "\u{d}"]),
+ ("\u{27}\u{a}", &["\u{27}", "\u{a}"]),
+ ("\u{27}\u{308}\u{a}", &["\u{27}\u{308}", "\u{a}"]),
+ ("\u{27}\u{b}", &["\u{27}", "\u{b}"]),
+ ("\u{27}\u{308}\u{b}", &["\u{27}\u{308}", "\u{b}"]),
+ ("\u{27}\u{3031}", &["\u{27}", "\u{3031}"]),
+ ("\u{27}\u{308}\u{3031}", &["\u{27}\u{308}", "\u{3031}"]),
+ ("\u{27}\u{41}", &["\u{27}", "\u{41}"]),
+ ("\u{27}\u{308}\u{41}", &["\u{27}\u{308}", "\u{41}"]),
+ ("\u{27}\u{3a}", &["\u{27}", "\u{3a}"]),
+ ("\u{27}\u{308}\u{3a}", &["\u{27}\u{308}", "\u{3a}"]),
+ ("\u{27}\u{2c}", &["\u{27}", "\u{2c}"]),
+ ("\u{27}\u{308}\u{2c}", &["\u{27}\u{308}", "\u{2c}"]),
+ ("\u{27}\u{2e}", &["\u{27}", "\u{2e}"]),
+ ("\u{27}\u{308}\u{2e}", &["\u{27}\u{308}", "\u{2e}"]),
+ ("\u{27}\u{30}", &["\u{27}", "\u{30}"]),
+ ("\u{27}\u{308}\u{30}", &["\u{27}\u{308}", "\u{30}"]),
+ ("\u{27}\u{5f}", &["\u{27}", "\u{5f}"]),
+ ("\u{27}\u{308}\u{5f}", &["\u{27}\u{308}", "\u{5f}"]),
+ ("\u{27}\u{1f1e6}", &["\u{27}", "\u{1f1e6}"]),
+ ("\u{27}\u{308}\u{1f1e6}", &["\u{27}\u{308}", "\u{1f1e6}"]),
+ ("\u{27}\u{5d0}", &["\u{27}", "\u{5d0}"]),
+ ("\u{27}\u{308}\u{5d0}", &["\u{27}\u{308}", "\u{5d0}"]),
+ ("\u{27}\u{22}", &["\u{27}", "\u{22}"]),
+ ("\u{27}\u{308}\u{22}", &["\u{27}\u{308}", "\u{22}"]),
+ ("\u{27}\u{27}", &["\u{27}", "\u{27}"]),
+ ("\u{27}\u{308}\u{27}", &["\u{27}\u{308}", "\u{27}"]),
+ ("\u{27}\u{231a}", &["\u{27}", "\u{231a}"]),
+ ("\u{27}\u{308}\u{231a}", &["\u{27}\u{308}", "\u{231a}"]),
+ ("\u{27}\u{20}", &["\u{27}", "\u{20}"]),
+ ("\u{27}\u{308}\u{20}", &["\u{27}\u{308}", "\u{20}"]),
+ ("\u{27}\u{ad}", &["\u{27}\u{ad}"]),
+ ("\u{27}\u{308}\u{ad}", &["\u{27}\u{308}\u{ad}"]),
+ ("\u{27}\u{300}", &["\u{27}\u{300}"]),
+ ("\u{27}\u{308}\u{300}", &["\u{27}\u{308}\u{300}"]),
+ ("\u{27}\u{200d}", &["\u{27}\u{200d}"]),
+ ("\u{27}\u{308}\u{200d}", &["\u{27}\u{308}\u{200d}"]),
+ ("\u{27}\u{61}\u{2060}", &["\u{27}", "\u{61}\u{2060}"]),
+ (
+ "\u{27}\u{308}\u{61}\u{2060}",
+ &["\u{27}\u{308}", "\u{61}\u{2060}"],
+ ),
+ ("\u{27}\u{61}\u{3a}", &["\u{27}", "\u{61}", "\u{3a}"]),
+ (
+ "\u{27}\u{308}\u{61}\u{3a}",
+ &["\u{27}\u{308}", "\u{61}", "\u{3a}"],
+ ),
+ ("\u{27}\u{61}\u{27}", &["\u{27}", "\u{61}", "\u{27}"]),
+ (
+ "\u{27}\u{308}\u{61}\u{27}",
+ &["\u{27}\u{308}", "\u{61}", "\u{27}"],
+ ),
+ (
+ "\u{27}\u{61}\u{27}\u{2060}",
+ &["\u{27}", "\u{61}", "\u{27}\u{2060}"],
+ ),
+ (
+ "\u{27}\u{308}\u{61}\u{27}\u{2060}",
+ &["\u{27}\u{308}", "\u{61}", "\u{27}\u{2060}"],
+ ),
+ ("\u{27}\u{61}\u{2c}", &["\u{27}", "\u{61}", "\u{2c}"]),
+ (
+ "\u{27}\u{308}\u{61}\u{2c}",
+ &["\u{27}\u{308}", "\u{61}", "\u{2c}"],
+ ),
+ ("\u{27}\u{31}\u{3a}", &["\u{27}", "\u{31}", "\u{3a}"]),
+ (
+ "\u{27}\u{308}\u{31}\u{3a}",
+ &["\u{27}\u{308}", "\u{31}", "\u{3a}"],
+ ),
+ ("\u{27}\u{31}\u{27}", &["\u{27}", "\u{31}", "\u{27}"]),
+ (
+ "\u{27}\u{308}\u{31}\u{27}",
+ &["\u{27}\u{308}", "\u{31}", "\u{27}"],
+ ),
+ ("\u{27}\u{31}\u{2c}", &["\u{27}", "\u{31}", "\u{2c}"]),
+ (
+ "\u{27}\u{308}\u{31}\u{2c}",
+ &["\u{27}\u{308}", "\u{31}", "\u{2c}"],
+ ),
+ (
+ "\u{27}\u{31}\u{2e}\u{2060}",
+ &["\u{27}", "\u{31}", "\u{2e}\u{2060}"],
+ ),
+ (
+ "\u{27}\u{308}\u{31}\u{2e}\u{2060}",
+ &["\u{27}\u{308}", "\u{31}", "\u{2e}\u{2060}"],
+ ),
+ ("\u{231a}\u{1}", &["\u{231a}", "\u{1}"]),
+ ("\u{231a}\u{308}\u{1}", &["\u{231a}\u{308}", "\u{1}"]),
+ ("\u{231a}\u{d}", &["\u{231a}", "\u{d}"]),
+ ("\u{231a}\u{308}\u{d}", &["\u{231a}\u{308}", "\u{d}"]),
+ ("\u{231a}\u{a}", &["\u{231a}", "\u{a}"]),
+ ("\u{231a}\u{308}\u{a}", &["\u{231a}\u{308}", "\u{a}"]),
+ ("\u{231a}\u{b}", &["\u{231a}", "\u{b}"]),
+ ("\u{231a}\u{308}\u{b}", &["\u{231a}\u{308}", "\u{b}"]),
+ ("\u{231a}\u{3031}", &["\u{231a}", "\u{3031}"]),
+ ("\u{231a}\u{308}\u{3031}", &["\u{231a}\u{308}", "\u{3031}"]),
+ ("\u{231a}\u{41}", &["\u{231a}", "\u{41}"]),
+ ("\u{231a}\u{308}\u{41}", &["\u{231a}\u{308}", "\u{41}"]),
+ ("\u{231a}\u{3a}", &["\u{231a}", "\u{3a}"]),
+ ("\u{231a}\u{308}\u{3a}", &["\u{231a}\u{308}", "\u{3a}"]),
+ ("\u{231a}\u{2c}", &["\u{231a}", "\u{2c}"]),
+ ("\u{231a}\u{308}\u{2c}", &["\u{231a}\u{308}", "\u{2c}"]),
+ ("\u{231a}\u{2e}", &["\u{231a}", "\u{2e}"]),
+ ("\u{231a}\u{308}\u{2e}", &["\u{231a}\u{308}", "\u{2e}"]),
+ ("\u{231a}\u{30}", &["\u{231a}", "\u{30}"]),
+ ("\u{231a}\u{308}\u{30}", &["\u{231a}\u{308}", "\u{30}"]),
+ ("\u{231a}\u{5f}", &["\u{231a}", "\u{5f}"]),
+ ("\u{231a}\u{308}\u{5f}", &["\u{231a}\u{308}", "\u{5f}"]),
+ ("\u{231a}\u{1f1e6}", &["\u{231a}", "\u{1f1e6}"]),
+ (
+ "\u{231a}\u{308}\u{1f1e6}",
+ &["\u{231a}\u{308}", "\u{1f1e6}"],
+ ),
+ ("\u{231a}\u{5d0}", &["\u{231a}", "\u{5d0}"]),
+ ("\u{231a}\u{308}\u{5d0}", &["\u{231a}\u{308}", "\u{5d0}"]),
+ ("\u{231a}\u{22}", &["\u{231a}", "\u{22}"]),
+ ("\u{231a}\u{308}\u{22}", &["\u{231a}\u{308}", "\u{22}"]),
+ ("\u{231a}\u{27}", &["\u{231a}", "\u{27}"]),
+ ("\u{231a}\u{308}\u{27}", &["\u{231a}\u{308}", "\u{27}"]),
+ ("\u{231a}\u{231a}", &["\u{231a}", "\u{231a}"]),
+ ("\u{231a}\u{308}\u{231a}", &["\u{231a}\u{308}", "\u{231a}"]),
+ ("\u{231a}\u{20}", &["\u{231a}", "\u{20}"]),
+ ("\u{231a}\u{308}\u{20}", &["\u{231a}\u{308}", "\u{20}"]),
+ ("\u{231a}\u{ad}", &["\u{231a}\u{ad}"]),
+ ("\u{231a}\u{308}\u{ad}", &["\u{231a}\u{308}\u{ad}"]),
+ ("\u{231a}\u{300}", &["\u{231a}\u{300}"]),
+ ("\u{231a}\u{308}\u{300}", &["\u{231a}\u{308}\u{300}"]),
+ ("\u{231a}\u{200d}", &["\u{231a}\u{200d}"]),
+ ("\u{231a}\u{308}\u{200d}", &["\u{231a}\u{308}\u{200d}"]),
+ ("\u{231a}\u{61}\u{2060}", &["\u{231a}", "\u{61}\u{2060}"]),
+ (
+ "\u{231a}\u{308}\u{61}\u{2060}",
+ &["\u{231a}\u{308}", "\u{61}\u{2060}"],
+ ),
+ ("\u{231a}\u{61}\u{3a}", &["\u{231a}", "\u{61}", "\u{3a}"]),
+ (
+ "\u{231a}\u{308}\u{61}\u{3a}",
+ &["\u{231a}\u{308}", "\u{61}", "\u{3a}"],
+ ),
+ ("\u{231a}\u{61}\u{27}", &["\u{231a}", "\u{61}", "\u{27}"]),
+ (
+ "\u{231a}\u{308}\u{61}\u{27}",
+ &["\u{231a}\u{308}", "\u{61}", "\u{27}"],
+ ),
+ (
+ "\u{231a}\u{61}\u{27}\u{2060}",
+ &["\u{231a}", "\u{61}", "\u{27}\u{2060}"],
+ ),
+ (
+ "\u{231a}\u{308}\u{61}\u{27}\u{2060}",
+ &["\u{231a}\u{308}", "\u{61}", "\u{27}\u{2060}"],
+ ),
+ ("\u{231a}\u{61}\u{2c}", &["\u{231a}", "\u{61}", "\u{2c}"]),
+ (
+ "\u{231a}\u{308}\u{61}\u{2c}",
+ &["\u{231a}\u{308}", "\u{61}", "\u{2c}"],
+ ),
+ ("\u{231a}\u{31}\u{3a}", &["\u{231a}", "\u{31}", "\u{3a}"]),
+ (
+ "\u{231a}\u{308}\u{31}\u{3a}",
+ &["\u{231a}\u{308}", "\u{31}", "\u{3a}"],
+ ),
+ ("\u{231a}\u{31}\u{27}", &["\u{231a}", "\u{31}", "\u{27}"]),
+ (
+ "\u{231a}\u{308}\u{31}\u{27}",
+ &["\u{231a}\u{308}", "\u{31}", "\u{27}"],
+ ),
+ ("\u{231a}\u{31}\u{2c}", &["\u{231a}", "\u{31}", "\u{2c}"]),
+ (
+ "\u{231a}\u{308}\u{31}\u{2c}",
+ &["\u{231a}\u{308}", "\u{31}", "\u{2c}"],
+ ),
+ (
+ "\u{231a}\u{31}\u{2e}\u{2060}",
+ &["\u{231a}", "\u{31}", "\u{2e}\u{2060}"],
+ ),
+ (
+ "\u{231a}\u{308}\u{31}\u{2e}\u{2060}",
+ &["\u{231a}\u{308}", "\u{31}", "\u{2e}\u{2060}"],
+ ),
+ ("\u{20}\u{1}", &["\u{20}", "\u{1}"]),
+ ("\u{20}\u{308}\u{1}", &["\u{20}\u{308}", "\u{1}"]),
+ ("\u{20}\u{d}", &["\u{20}", "\u{d}"]),
+ ("\u{20}\u{308}\u{d}", &["\u{20}\u{308}", "\u{d}"]),
+ ("\u{20}\u{a}", &["\u{20}", "\u{a}"]),
+ ("\u{20}\u{308}\u{a}", &["\u{20}\u{308}", "\u{a}"]),
+ ("\u{20}\u{b}", &["\u{20}", "\u{b}"]),
+ ("\u{20}\u{308}\u{b}", &["\u{20}\u{308}", "\u{b}"]),
+ ("\u{20}\u{3031}", &["\u{20}", "\u{3031}"]),
+ ("\u{20}\u{308}\u{3031}", &["\u{20}\u{308}", "\u{3031}"]),
+ ("\u{20}\u{41}", &["\u{20}", "\u{41}"]),
+ ("\u{20}\u{308}\u{41}", &["\u{20}\u{308}", "\u{41}"]),
+ ("\u{20}\u{3a}", &["\u{20}", "\u{3a}"]),
+ ("\u{20}\u{308}\u{3a}", &["\u{20}\u{308}", "\u{3a}"]),
+ ("\u{20}\u{2c}", &["\u{20}", "\u{2c}"]),
+ ("\u{20}\u{308}\u{2c}", &["\u{20}\u{308}", "\u{2c}"]),
+ ("\u{20}\u{2e}", &["\u{20}", "\u{2e}"]),
+ ("\u{20}\u{308}\u{2e}", &["\u{20}\u{308}", "\u{2e}"]),
+ ("\u{20}\u{30}", &["\u{20}", "\u{30}"]),
+ ("\u{20}\u{308}\u{30}", &["\u{20}\u{308}", "\u{30}"]),
+ ("\u{20}\u{5f}", &["\u{20}", "\u{5f}"]),
+ ("\u{20}\u{308}\u{5f}", &["\u{20}\u{308}", "\u{5f}"]),
+ ("\u{20}\u{1f1e6}", &["\u{20}", "\u{1f1e6}"]),
+ ("\u{20}\u{308}\u{1f1e6}", &["\u{20}\u{308}", "\u{1f1e6}"]),
+ ("\u{20}\u{5d0}", &["\u{20}", "\u{5d0}"]),
+ ("\u{20}\u{308}\u{5d0}", &["\u{20}\u{308}", "\u{5d0}"]),
+ ("\u{20}\u{22}", &["\u{20}", "\u{22}"]),
+ ("\u{20}\u{308}\u{22}", &["\u{20}\u{308}", "\u{22}"]),
+ ("\u{20}\u{27}", &["\u{20}", "\u{27}"]),
+ ("\u{20}\u{308}\u{27}", &["\u{20}\u{308}", "\u{27}"]),
+ ("\u{20}\u{231a}", &["\u{20}", "\u{231a}"]),
+ ("\u{20}\u{308}\u{231a}", &["\u{20}\u{308}", "\u{231a}"]),
+ ("\u{20}\u{20}", &["\u{20}\u{20}"]),
+ ("\u{20}\u{308}\u{20}", &["\u{20}\u{308}", "\u{20}"]),
+ ("\u{20}\u{ad}", &["\u{20}\u{ad}"]),
+ ("\u{20}\u{308}\u{ad}", &["\u{20}\u{308}\u{ad}"]),
+ ("\u{20}\u{300}", &["\u{20}\u{300}"]),
+ ("\u{20}\u{308}\u{300}", &["\u{20}\u{308}\u{300}"]),
+ ("\u{20}\u{200d}", &["\u{20}\u{200d}"]),
+ ("\u{20}\u{308}\u{200d}", &["\u{20}\u{308}\u{200d}"]),
+ ("\u{20}\u{61}\u{2060}", &["\u{20}", "\u{61}\u{2060}"]),
+ (
+ "\u{20}\u{308}\u{61}\u{2060}",
+ &["\u{20}\u{308}", "\u{61}\u{2060}"],
+ ),
+ ("\u{20}\u{61}\u{3a}", &["\u{20}", "\u{61}", "\u{3a}"]),
+ (
+ "\u{20}\u{308}\u{61}\u{3a}",
+ &["\u{20}\u{308}", "\u{61}", "\u{3a}"],
+ ),
+ ("\u{20}\u{61}\u{27}", &["\u{20}", "\u{61}", "\u{27}"]),
+ (
+ "\u{20}\u{308}\u{61}\u{27}",
+ &["\u{20}\u{308}", "\u{61}", "\u{27}"],
+ ),
+ (
+ "\u{20}\u{61}\u{27}\u{2060}",
+ &["\u{20}", "\u{61}", "\u{27}\u{2060}"],
+ ),
+ (
+ "\u{20}\u{308}\u{61}\u{27}\u{2060}",
+ &["\u{20}\u{308}", "\u{61}", "\u{27}\u{2060}"],
+ ),
+ ("\u{20}\u{61}\u{2c}", &["\u{20}", "\u{61}", "\u{2c}"]),
+ (
+ "\u{20}\u{308}\u{61}\u{2c}",
+ &["\u{20}\u{308}", "\u{61}", "\u{2c}"],
+ ),
+ ("\u{20}\u{31}\u{3a}", &["\u{20}", "\u{31}", "\u{3a}"]),
+ (
+ "\u{20}\u{308}\u{31}\u{3a}",
+ &["\u{20}\u{308}", "\u{31}", "\u{3a}"],
+ ),
+ ("\u{20}\u{31}\u{27}", &["\u{20}", "\u{31}", "\u{27}"]),
+ (
+ "\u{20}\u{308}\u{31}\u{27}",
+ &["\u{20}\u{308}", "\u{31}", "\u{27}"],
+ ),
+ ("\u{20}\u{31}\u{2c}", &["\u{20}", "\u{31}", "\u{2c}"]),
+ (
+ "\u{20}\u{308}\u{31}\u{2c}",
+ &["\u{20}\u{308}", "\u{31}", "\u{2c}"],
+ ),
+ (
+ "\u{20}\u{31}\u{2e}\u{2060}",
+ &["\u{20}", "\u{31}", "\u{2e}\u{2060}"],
+ ),
+ (
+ "\u{20}\u{308}\u{31}\u{2e}\u{2060}",
+ &["\u{20}\u{308}", "\u{31}", "\u{2e}\u{2060}"],
+ ),
+ ("\u{ad}\u{1}", &["\u{ad}", "\u{1}"]),
+ ("\u{ad}\u{308}\u{1}", &["\u{ad}\u{308}", "\u{1}"]),
+ ("\u{ad}\u{d}", &["\u{ad}", "\u{d}"]),
+ ("\u{ad}\u{308}\u{d}", &["\u{ad}\u{308}", "\u{d}"]),
+ ("\u{ad}\u{a}", &["\u{ad}", "\u{a}"]),
+ ("\u{ad}\u{308}\u{a}", &["\u{ad}\u{308}", "\u{a}"]),
+ ("\u{ad}\u{b}", &["\u{ad}", "\u{b}"]),
+ ("\u{ad}\u{308}\u{b}", &["\u{ad}\u{308}", "\u{b}"]),
+ ("\u{ad}\u{3031}", &["\u{ad}", "\u{3031}"]),
+ ("\u{ad}\u{308}\u{3031}", &["\u{ad}\u{308}", "\u{3031}"]),
+ ("\u{ad}\u{41}", &["\u{ad}", "\u{41}"]),
+ ("\u{ad}\u{308}\u{41}", &["\u{ad}\u{308}", "\u{41}"]),
+ ("\u{ad}\u{3a}", &["\u{ad}", "\u{3a}"]),
+ ("\u{ad}\u{308}\u{3a}", &["\u{ad}\u{308}", "\u{3a}"]),
+ ("\u{ad}\u{2c}", &["\u{ad}", "\u{2c}"]),
+ ("\u{ad}\u{308}\u{2c}", &["\u{ad}\u{308}", "\u{2c}"]),
+ ("\u{ad}\u{2e}", &["\u{ad}", "\u{2e}"]),
+ ("\u{ad}\u{308}\u{2e}", &["\u{ad}\u{308}", "\u{2e}"]),
+ ("\u{ad}\u{30}", &["\u{ad}", "\u{30}"]),
+ ("\u{ad}\u{308}\u{30}", &["\u{ad}\u{308}", "\u{30}"]),
+ ("\u{ad}\u{5f}", &["\u{ad}", "\u{5f}"]),
+ ("\u{ad}\u{308}\u{5f}", &["\u{ad}\u{308}", "\u{5f}"]),
+ ("\u{ad}\u{1f1e6}", &["\u{ad}", "\u{1f1e6}"]),
+ ("\u{ad}\u{308}\u{1f1e6}", &["\u{ad}\u{308}", "\u{1f1e6}"]),
+ ("\u{ad}\u{5d0}", &["\u{ad}", "\u{5d0}"]),
+ ("\u{ad}\u{308}\u{5d0}", &["\u{ad}\u{308}", "\u{5d0}"]),
+ ("\u{ad}\u{22}", &["\u{ad}", "\u{22}"]),
+ ("\u{ad}\u{308}\u{22}", &["\u{ad}\u{308}", "\u{22}"]),
+ ("\u{ad}\u{27}", &["\u{ad}", "\u{27}"]),
+ ("\u{ad}\u{308}\u{27}", &["\u{ad}\u{308}", "\u{27}"]),
+ ("\u{ad}\u{231a}", &["\u{ad}", "\u{231a}"]),
+ ("\u{ad}\u{308}\u{231a}", &["\u{ad}\u{308}", "\u{231a}"]),
+ ("\u{ad}\u{20}", &["\u{ad}", "\u{20}"]),
+ ("\u{ad}\u{308}\u{20}", &["\u{ad}\u{308}", "\u{20}"]),
+ ("\u{ad}\u{ad}", &["\u{ad}\u{ad}"]),
+ ("\u{ad}\u{308}\u{ad}", &["\u{ad}\u{308}\u{ad}"]),
+ ("\u{ad}\u{300}", &["\u{ad}\u{300}"]),
+ ("\u{ad}\u{308}\u{300}", &["\u{ad}\u{308}\u{300}"]),
+ ("\u{ad}\u{200d}", &["\u{ad}\u{200d}"]),
+ ("\u{ad}\u{308}\u{200d}", &["\u{ad}\u{308}\u{200d}"]),
+ ("\u{ad}\u{61}\u{2060}", &["\u{ad}", "\u{61}\u{2060}"]),
+ (
+ "\u{ad}\u{308}\u{61}\u{2060}",
+ &["\u{ad}\u{308}", "\u{61}\u{2060}"],
+ ),
+ ("\u{ad}\u{61}\u{3a}", &["\u{ad}", "\u{61}", "\u{3a}"]),
+ (
+ "\u{ad}\u{308}\u{61}\u{3a}",
+ &["\u{ad}\u{308}", "\u{61}", "\u{3a}"],
+ ),
+ ("\u{ad}\u{61}\u{27}", &["\u{ad}", "\u{61}", "\u{27}"]),
+ (
+ "\u{ad}\u{308}\u{61}\u{27}",
+ &["\u{ad}\u{308}", "\u{61}", "\u{27}"],
+ ),
+ (
+ "\u{ad}\u{61}\u{27}\u{2060}",
+ &["\u{ad}", "\u{61}", "\u{27}\u{2060}"],
+ ),
+ (
+ "\u{ad}\u{308}\u{61}\u{27}\u{2060}",
+ &["\u{ad}\u{308}", "\u{61}", "\u{27}\u{2060}"],
+ ),
+ ("\u{ad}\u{61}\u{2c}", &["\u{ad}", "\u{61}", "\u{2c}"]),
+ (
+ "\u{ad}\u{308}\u{61}\u{2c}",
+ &["\u{ad}\u{308}", "\u{61}", "\u{2c}"],
+ ),
+ ("\u{ad}\u{31}\u{3a}", &["\u{ad}", "\u{31}", "\u{3a}"]),
+ (
+ "\u{ad}\u{308}\u{31}\u{3a}",
+ &["\u{ad}\u{308}", "\u{31}", "\u{3a}"],
+ ),
+ ("\u{ad}\u{31}\u{27}", &["\u{ad}", "\u{31}", "\u{27}"]),
+ (
+ "\u{ad}\u{308}\u{31}\u{27}",
+ &["\u{ad}\u{308}", "\u{31}", "\u{27}"],
+ ),
+ ("\u{ad}\u{31}\u{2c}", &["\u{ad}", "\u{31}", "\u{2c}"]),
+ (
+ "\u{ad}\u{308}\u{31}\u{2c}",
+ &["\u{ad}\u{308}", "\u{31}", "\u{2c}"],
+ ),
+ (
+ "\u{ad}\u{31}\u{2e}\u{2060}",
+ &["\u{ad}", "\u{31}", "\u{2e}\u{2060}"],
+ ),
+ (
+ "\u{ad}\u{308}\u{31}\u{2e}\u{2060}",
+ &["\u{ad}\u{308}", "\u{31}", "\u{2e}\u{2060}"],
+ ),
+ ("\u{300}\u{1}", &["\u{300}", "\u{1}"]),
+ ("\u{300}\u{308}\u{1}", &["\u{300}\u{308}", "\u{1}"]),
+ ("\u{300}\u{d}", &["\u{300}", "\u{d}"]),
+ ("\u{300}\u{308}\u{d}", &["\u{300}\u{308}", "\u{d}"]),
+ ("\u{300}\u{a}", &["\u{300}", "\u{a}"]),
+ ("\u{300}\u{308}\u{a}", &["\u{300}\u{308}", "\u{a}"]),
+ ("\u{300}\u{b}", &["\u{300}", "\u{b}"]),
+ ("\u{300}\u{308}\u{b}", &["\u{300}\u{308}", "\u{b}"]),
+ ("\u{300}\u{3031}", &["\u{300}", "\u{3031}"]),
+ ("\u{300}\u{308}\u{3031}", &["\u{300}\u{308}", "\u{3031}"]),
+ ("\u{300}\u{41}", &["\u{300}", "\u{41}"]),
+ ("\u{300}\u{308}\u{41}", &["\u{300}\u{308}", "\u{41}"]),
+ ("\u{300}\u{3a}", &["\u{300}", "\u{3a}"]),
+ ("\u{300}\u{308}\u{3a}", &["\u{300}\u{308}", "\u{3a}"]),
+ ("\u{300}\u{2c}", &["\u{300}", "\u{2c}"]),
+ ("\u{300}\u{308}\u{2c}", &["\u{300}\u{308}", "\u{2c}"]),
+ ("\u{300}\u{2e}", &["\u{300}", "\u{2e}"]),
+ ("\u{300}\u{308}\u{2e}", &["\u{300}\u{308}", "\u{2e}"]),
+ ("\u{300}\u{30}", &["\u{300}", "\u{30}"]),
+ ("\u{300}\u{308}\u{30}", &["\u{300}\u{308}", "\u{30}"]),
+ ("\u{300}\u{5f}", &["\u{300}", "\u{5f}"]),
+ ("\u{300}\u{308}\u{5f}", &["\u{300}\u{308}", "\u{5f}"]),
+ ("\u{300}\u{1f1e6}", &["\u{300}", "\u{1f1e6}"]),
+ ("\u{300}\u{308}\u{1f1e6}", &["\u{300}\u{308}", "\u{1f1e6}"]),
+ ("\u{300}\u{5d0}", &["\u{300}", "\u{5d0}"]),
+ ("\u{300}\u{308}\u{5d0}", &["\u{300}\u{308}", "\u{5d0}"]),
+ ("\u{300}\u{22}", &["\u{300}", "\u{22}"]),
+ ("\u{300}\u{308}\u{22}", &["\u{300}\u{308}", "\u{22}"]),
+ ("\u{300}\u{27}", &["\u{300}", "\u{27}"]),
+ ("\u{300}\u{308}\u{27}", &["\u{300}\u{308}", "\u{27}"]),
+ ("\u{300}\u{231a}", &["\u{300}", "\u{231a}"]),
+ ("\u{300}\u{308}\u{231a}", &["\u{300}\u{308}", "\u{231a}"]),
+ ("\u{300}\u{20}", &["\u{300}", "\u{20}"]),
+ ("\u{300}\u{308}\u{20}", &["\u{300}\u{308}", "\u{20}"]),
+ ("\u{300}\u{ad}", &["\u{300}\u{ad}"]),
+ ("\u{300}\u{308}\u{ad}", &["\u{300}\u{308}\u{ad}"]),
+ ("\u{300}\u{300}", &["\u{300}\u{300}"]),
+ ("\u{300}\u{308}\u{300}", &["\u{300}\u{308}\u{300}"]),
+ ("\u{300}\u{200d}", &["\u{300}\u{200d}"]),
+ ("\u{300}\u{308}\u{200d}", &["\u{300}\u{308}\u{200d}"]),
+ ("\u{300}\u{61}\u{2060}", &["\u{300}", "\u{61}\u{2060}"]),
+ (
+ "\u{300}\u{308}\u{61}\u{2060}",
+ &["\u{300}\u{308}", "\u{61}\u{2060}"],
+ ),
+ ("\u{300}\u{61}\u{3a}", &["\u{300}", "\u{61}", "\u{3a}"]),
+ (
+ "\u{300}\u{308}\u{61}\u{3a}",
+ &["\u{300}\u{308}", "\u{61}", "\u{3a}"],
+ ),
+ ("\u{300}\u{61}\u{27}", &["\u{300}", "\u{61}", "\u{27}"]),
+ (
+ "\u{300}\u{308}\u{61}\u{27}",
+ &["\u{300}\u{308}", "\u{61}", "\u{27}"],
+ ),
+ (
+ "\u{300}\u{61}\u{27}\u{2060}",
+ &["\u{300}", "\u{61}", "\u{27}\u{2060}"],
+ ),
+ (
+ "\u{300}\u{308}\u{61}\u{27}\u{2060}",
+ &["\u{300}\u{308}", "\u{61}", "\u{27}\u{2060}"],
+ ),
+ ("\u{300}\u{61}\u{2c}", &["\u{300}", "\u{61}", "\u{2c}"]),
+ (
+ "\u{300}\u{308}\u{61}\u{2c}",
+ &["\u{300}\u{308}", "\u{61}", "\u{2c}"],
+ ),
+ ("\u{300}\u{31}\u{3a}", &["\u{300}", "\u{31}", "\u{3a}"]),
+ (
+ "\u{300}\u{308}\u{31}\u{3a}",
+ &["\u{300}\u{308}", "\u{31}", "\u{3a}"],
+ ),
+ ("\u{300}\u{31}\u{27}", &["\u{300}", "\u{31}", "\u{27}"]),
+ (
+ "\u{300}\u{308}\u{31}\u{27}",
+ &["\u{300}\u{308}", "\u{31}", "\u{27}"],
+ ),
+ ("\u{300}\u{31}\u{2c}", &["\u{300}", "\u{31}", "\u{2c}"]),
+ (
+ "\u{300}\u{308}\u{31}\u{2c}",
+ &["\u{300}\u{308}", "\u{31}", "\u{2c}"],
+ ),
+ (
+ "\u{300}\u{31}\u{2e}\u{2060}",
+ &["\u{300}", "\u{31}", "\u{2e}\u{2060}"],
+ ),
+ (
+ "\u{300}\u{308}\u{31}\u{2e}\u{2060}",
+ &["\u{300}\u{308}", "\u{31}", "\u{2e}\u{2060}"],
+ ),
+ ("\u{200d}\u{1}", &["\u{200d}", "\u{1}"]),
+ ("\u{200d}\u{308}\u{1}", &["\u{200d}\u{308}", "\u{1}"]),
+ ("\u{200d}\u{d}", &["\u{200d}", "\u{d}"]),
+ ("\u{200d}\u{308}\u{d}", &["\u{200d}\u{308}", "\u{d}"]),
+ ("\u{200d}\u{a}", &["\u{200d}", "\u{a}"]),
+ ("\u{200d}\u{308}\u{a}", &["\u{200d}\u{308}", "\u{a}"]),
+ ("\u{200d}\u{b}", &["\u{200d}", "\u{b}"]),
+ ("\u{200d}\u{308}\u{b}", &["\u{200d}\u{308}", "\u{b}"]),
+ ("\u{200d}\u{3031}", &["\u{200d}", "\u{3031}"]),
+ ("\u{200d}\u{308}\u{3031}", &["\u{200d}\u{308}", "\u{3031}"]),
+ ("\u{200d}\u{41}", &["\u{200d}", "\u{41}"]),
+ ("\u{200d}\u{308}\u{41}", &["\u{200d}\u{308}", "\u{41}"]),
+ ("\u{200d}\u{3a}", &["\u{200d}", "\u{3a}"]),
+ ("\u{200d}\u{308}\u{3a}", &["\u{200d}\u{308}", "\u{3a}"]),
+ ("\u{200d}\u{2c}", &["\u{200d}", "\u{2c}"]),
+ ("\u{200d}\u{308}\u{2c}", &["\u{200d}\u{308}", "\u{2c}"]),
+ ("\u{200d}\u{2e}", &["\u{200d}", "\u{2e}"]),
+ ("\u{200d}\u{308}\u{2e}", &["\u{200d}\u{308}", "\u{2e}"]),
+ ("\u{200d}\u{30}", &["\u{200d}", "\u{30}"]),
+ ("\u{200d}\u{308}\u{30}", &["\u{200d}\u{308}", "\u{30}"]),
+ ("\u{200d}\u{5f}", &["\u{200d}", "\u{5f}"]),
+ ("\u{200d}\u{308}\u{5f}", &["\u{200d}\u{308}", "\u{5f}"]),
+ ("\u{200d}\u{1f1e6}", &["\u{200d}", "\u{1f1e6}"]),
+ (
+ "\u{200d}\u{308}\u{1f1e6}",
+ &["\u{200d}\u{308}", "\u{1f1e6}"],
+ ),
+ ("\u{200d}\u{5d0}", &["\u{200d}", "\u{5d0}"]),
+ ("\u{200d}\u{308}\u{5d0}", &["\u{200d}\u{308}", "\u{5d0}"]),
+ ("\u{200d}\u{22}", &["\u{200d}", "\u{22}"]),
+ ("\u{200d}\u{308}\u{22}", &["\u{200d}\u{308}", "\u{22}"]),
+ ("\u{200d}\u{27}", &["\u{200d}", "\u{27}"]),
+ ("\u{200d}\u{308}\u{27}", &["\u{200d}\u{308}", "\u{27}"]),
+ ("\u{200d}\u{231a}", &["\u{200d}\u{231a}"]),
+ ("\u{200d}\u{308}\u{231a}", &["\u{200d}\u{308}", "\u{231a}"]),
+ ("\u{200d}\u{20}", &["\u{200d}", "\u{20}"]),
+ ("\u{200d}\u{308}\u{20}", &["\u{200d}\u{308}", "\u{20}"]),
+ ("\u{200d}\u{ad}", &["\u{200d}\u{ad}"]),
+ ("\u{200d}\u{308}\u{ad}", &["\u{200d}\u{308}\u{ad}"]),
+ ("\u{200d}\u{300}", &["\u{200d}\u{300}"]),
+ ("\u{200d}\u{308}\u{300}", &["\u{200d}\u{308}\u{300}"]),
+ ("\u{200d}\u{200d}", &["\u{200d}\u{200d}"]),
+ ("\u{200d}\u{308}\u{200d}", &["\u{200d}\u{308}\u{200d}"]),
+ ("\u{200d}\u{61}\u{2060}", &["\u{200d}", "\u{61}\u{2060}"]),
+ (
+ "\u{200d}\u{308}\u{61}\u{2060}",
+ &["\u{200d}\u{308}", "\u{61}\u{2060}"],
+ ),
+ ("\u{200d}\u{61}\u{3a}", &["\u{200d}", "\u{61}", "\u{3a}"]),
+ (
+ "\u{200d}\u{308}\u{61}\u{3a}",
+ &["\u{200d}\u{308}", "\u{61}", "\u{3a}"],
+ ),
+ ("\u{200d}\u{61}\u{27}", &["\u{200d}", "\u{61}", "\u{27}"]),
+ (
+ "\u{200d}\u{308}\u{61}\u{27}",
+ &["\u{200d}\u{308}", "\u{61}", "\u{27}"],
+ ),
+ (
+ "\u{200d}\u{61}\u{27}\u{2060}",
+ &["\u{200d}", "\u{61}", "\u{27}\u{2060}"],
+ ),
+ (
+ "\u{200d}\u{308}\u{61}\u{27}\u{2060}",
+ &["\u{200d}\u{308}", "\u{61}", "\u{27}\u{2060}"],
+ ),
+ ("\u{200d}\u{61}\u{2c}", &["\u{200d}", "\u{61}", "\u{2c}"]),
+ (
+ "\u{200d}\u{308}\u{61}\u{2c}",
+ &["\u{200d}\u{308}", "\u{61}", "\u{2c}"],
+ ),
+ ("\u{200d}\u{31}\u{3a}", &["\u{200d}", "\u{31}", "\u{3a}"]),
+ (
+ "\u{200d}\u{308}\u{31}\u{3a}",
+ &["\u{200d}\u{308}", "\u{31}", "\u{3a}"],
+ ),
+ ("\u{200d}\u{31}\u{27}", &["\u{200d}", "\u{31}", "\u{27}"]),
+ (
+ "\u{200d}\u{308}\u{31}\u{27}",
+ &["\u{200d}\u{308}", "\u{31}", "\u{27}"],
+ ),
+ ("\u{200d}\u{31}\u{2c}", &["\u{200d}", "\u{31}", "\u{2c}"]),
+ (
+ "\u{200d}\u{308}\u{31}\u{2c}",
+ &["\u{200d}\u{308}", "\u{31}", "\u{2c}"],
+ ),
+ (
+ "\u{200d}\u{31}\u{2e}\u{2060}",
+ &["\u{200d}", "\u{31}", "\u{2e}\u{2060}"],
+ ),
+ (
+ "\u{200d}\u{308}\u{31}\u{2e}\u{2060}",
+ &["\u{200d}\u{308}", "\u{31}", "\u{2e}\u{2060}"],
+ ),
+ ("\u{61}\u{2060}\u{1}", &["\u{61}\u{2060}", "\u{1}"]),
+ (
+ "\u{61}\u{2060}\u{308}\u{1}",
+ &["\u{61}\u{2060}\u{308}", "\u{1}"],
+ ),
+ ("\u{61}\u{2060}\u{d}", &["\u{61}\u{2060}", "\u{d}"]),
+ (
+ "\u{61}\u{2060}\u{308}\u{d}",
+ &["\u{61}\u{2060}\u{308}", "\u{d}"],
+ ),
+ ("\u{61}\u{2060}\u{a}", &["\u{61}\u{2060}", "\u{a}"]),
+ (
+ "\u{61}\u{2060}\u{308}\u{a}",
+ &["\u{61}\u{2060}\u{308}", "\u{a}"],
+ ),
+ ("\u{61}\u{2060}\u{b}", &["\u{61}\u{2060}", "\u{b}"]),
+ (
+ "\u{61}\u{2060}\u{308}\u{b}",
+ &["\u{61}\u{2060}\u{308}", "\u{b}"],
+ ),
+ ("\u{61}\u{2060}\u{3031}", &["\u{61}\u{2060}", "\u{3031}"]),
+ (
+ "\u{61}\u{2060}\u{308}\u{3031}",
+ &["\u{61}\u{2060}\u{308}", "\u{3031}"],
+ ),
+ ("\u{61}\u{2060}\u{41}", &["\u{61}\u{2060}\u{41}"]),
+ (
+ "\u{61}\u{2060}\u{308}\u{41}",
+ &["\u{61}\u{2060}\u{308}\u{41}"],
+ ),
+ ("\u{61}\u{2060}\u{3a}", &["\u{61}\u{2060}", "\u{3a}"]),
+ (
+ "\u{61}\u{2060}\u{308}\u{3a}",
+ &["\u{61}\u{2060}\u{308}", "\u{3a}"],
+ ),
+ ("\u{61}\u{2060}\u{2c}", &["\u{61}\u{2060}", "\u{2c}"]),
+ (
+ "\u{61}\u{2060}\u{308}\u{2c}",
+ &["\u{61}\u{2060}\u{308}", "\u{2c}"],
+ ),
+ ("\u{61}\u{2060}\u{2e}", &["\u{61}\u{2060}", "\u{2e}"]),
+ (
+ "\u{61}\u{2060}\u{308}\u{2e}",
+ &["\u{61}\u{2060}\u{308}", "\u{2e}"],
+ ),
+ ("\u{61}\u{2060}\u{30}", &["\u{61}\u{2060}\u{30}"]),
+ (
+ "\u{61}\u{2060}\u{308}\u{30}",
+ &["\u{61}\u{2060}\u{308}\u{30}"],
+ ),
+ ("\u{61}\u{2060}\u{5f}", &["\u{61}\u{2060}\u{5f}"]),
+ (
+ "\u{61}\u{2060}\u{308}\u{5f}",
+ &["\u{61}\u{2060}\u{308}\u{5f}"],
+ ),
+ ("\u{61}\u{2060}\u{1f1e6}", &["\u{61}\u{2060}", "\u{1f1e6}"]),
+ (
+ "\u{61}\u{2060}\u{308}\u{1f1e6}",
+ &["\u{61}\u{2060}\u{308}", "\u{1f1e6}"],
+ ),
+ ("\u{61}\u{2060}\u{5d0}", &["\u{61}\u{2060}\u{5d0}"]),
+ (
+ "\u{61}\u{2060}\u{308}\u{5d0}",
+ &["\u{61}\u{2060}\u{308}\u{5d0}"],
+ ),
+ ("\u{61}\u{2060}\u{22}", &["\u{61}\u{2060}", "\u{22}"]),
+ (
+ "\u{61}\u{2060}\u{308}\u{22}",
+ &["\u{61}\u{2060}\u{308}", "\u{22}"],
+ ),
+ ("\u{61}\u{2060}\u{27}", &["\u{61}\u{2060}", "\u{27}"]),
+ (
+ "\u{61}\u{2060}\u{308}\u{27}",
+ &["\u{61}\u{2060}\u{308}", "\u{27}"],
+ ),
+ ("\u{61}\u{2060}\u{231a}", &["\u{61}\u{2060}", "\u{231a}"]),
+ (
+ "\u{61}\u{2060}\u{308}\u{231a}",
+ &["\u{61}\u{2060}\u{308}", "\u{231a}"],
+ ),
+ ("\u{61}\u{2060}\u{20}", &["\u{61}\u{2060}", "\u{20}"]),
+ (
+ "\u{61}\u{2060}\u{308}\u{20}",
+ &["\u{61}\u{2060}\u{308}", "\u{20}"],
+ ),
+ ("\u{61}\u{2060}\u{ad}", &["\u{61}\u{2060}\u{ad}"]),
+ (
+ "\u{61}\u{2060}\u{308}\u{ad}",
+ &["\u{61}\u{2060}\u{308}\u{ad}"],
+ ),
+ ("\u{61}\u{2060}\u{300}", &["\u{61}\u{2060}\u{300}"]),
+ (
+ "\u{61}\u{2060}\u{308}\u{300}",
+ &["\u{61}\u{2060}\u{308}\u{300}"],
+ ),
+ ("\u{61}\u{2060}\u{200d}", &["\u{61}\u{2060}\u{200d}"]),
+ (
+ "\u{61}\u{2060}\u{308}\u{200d}",
+ &["\u{61}\u{2060}\u{308}\u{200d}"],
+ ),
+ (
+ "\u{61}\u{2060}\u{61}\u{2060}",
+ &["\u{61}\u{2060}\u{61}\u{2060}"],
+ ),
+ (
+ "\u{61}\u{2060}\u{308}\u{61}\u{2060}",
+ &["\u{61}\u{2060}\u{308}\u{61}\u{2060}"],
+ ),
+ (
+ "\u{61}\u{2060}\u{61}\u{3a}",
+ &["\u{61}\u{2060}\u{61}", "\u{3a}"],
+ ),
+ (
+ "\u{61}\u{2060}\u{308}\u{61}\u{3a}",
+ &["\u{61}\u{2060}\u{308}\u{61}", "\u{3a}"],
+ ),
+ (
+ "\u{61}\u{2060}\u{61}\u{27}",
+ &["\u{61}\u{2060}\u{61}", "\u{27}"],
+ ),
+ (
+ "\u{61}\u{2060}\u{308}\u{61}\u{27}",
+ &["\u{61}\u{2060}\u{308}\u{61}", "\u{27}"],
+ ),
+ (
+ "\u{61}\u{2060}\u{61}\u{27}\u{2060}",
+ &["\u{61}\u{2060}\u{61}", "\u{27}\u{2060}"],
+ ),
+ (
+ "\u{61}\u{2060}\u{308}\u{61}\u{27}\u{2060}",
+ &["\u{61}\u{2060}\u{308}\u{61}", "\u{27}\u{2060}"],
+ ),
+ (
+ "\u{61}\u{2060}\u{61}\u{2c}",
+ &["\u{61}\u{2060}\u{61}", "\u{2c}"],
+ ),
+ (
+ "\u{61}\u{2060}\u{308}\u{61}\u{2c}",
+ &["\u{61}\u{2060}\u{308}\u{61}", "\u{2c}"],
+ ),
+ (
+ "\u{61}\u{2060}\u{31}\u{3a}",
+ &["\u{61}\u{2060}\u{31}", "\u{3a}"],
+ ),
+ (
+ "\u{61}\u{2060}\u{308}\u{31}\u{3a}",
+ &["\u{61}\u{2060}\u{308}\u{31}", "\u{3a}"],
+ ),
+ (
+ "\u{61}\u{2060}\u{31}\u{27}",
+ &["\u{61}\u{2060}\u{31}", "\u{27}"],
+ ),
+ (
+ "\u{61}\u{2060}\u{308}\u{31}\u{27}",
+ &["\u{61}\u{2060}\u{308}\u{31}", "\u{27}"],
+ ),
+ (
+ "\u{61}\u{2060}\u{31}\u{2c}",
+ &["\u{61}\u{2060}\u{31}", "\u{2c}"],
+ ),
+ (
+ "\u{61}\u{2060}\u{308}\u{31}\u{2c}",
+ &["\u{61}\u{2060}\u{308}\u{31}", "\u{2c}"],
+ ),
+ (
+ "\u{61}\u{2060}\u{31}\u{2e}\u{2060}",
+ &["\u{61}\u{2060}\u{31}", "\u{2e}\u{2060}"],
+ ),
+ (
+ "\u{61}\u{2060}\u{308}\u{31}\u{2e}\u{2060}",
+ &["\u{61}\u{2060}\u{308}\u{31}", "\u{2e}\u{2060}"],
+ ),
+ ("\u{61}\u{3a}\u{1}", &["\u{61}", "\u{3a}", "\u{1}"]),
+ (
+ "\u{61}\u{3a}\u{308}\u{1}",
+ &["\u{61}", "\u{3a}\u{308}", "\u{1}"],
+ ),
+ ("\u{61}\u{3a}\u{d}", &["\u{61}", "\u{3a}", "\u{d}"]),
+ (
+ "\u{61}\u{3a}\u{308}\u{d}",
+ &["\u{61}", "\u{3a}\u{308}", "\u{d}"],
+ ),
+ ("\u{61}\u{3a}\u{a}", &["\u{61}", "\u{3a}", "\u{a}"]),
+ (
+ "\u{61}\u{3a}\u{308}\u{a}",
+ &["\u{61}", "\u{3a}\u{308}", "\u{a}"],
+ ),
+ ("\u{61}\u{3a}\u{b}", &["\u{61}", "\u{3a}", "\u{b}"]),
+ (
+ "\u{61}\u{3a}\u{308}\u{b}",
+ &["\u{61}", "\u{3a}\u{308}", "\u{b}"],
+ ),
+ ("\u{61}\u{3a}\u{3031}", &["\u{61}", "\u{3a}", "\u{3031}"]),
+ (
+ "\u{61}\u{3a}\u{308}\u{3031}",
+ &["\u{61}", "\u{3a}\u{308}", "\u{3031}"],
+ ),
+ ("\u{61}\u{3a}\u{41}", &["\u{61}\u{3a}\u{41}"]),
+ ("\u{61}\u{3a}\u{308}\u{41}", &["\u{61}\u{3a}\u{308}\u{41}"]),
+ ("\u{61}\u{3a}\u{3a}", &["\u{61}", "\u{3a}", "\u{3a}"]),
+ (
+ "\u{61}\u{3a}\u{308}\u{3a}",
+ &["\u{61}", "\u{3a}\u{308}", "\u{3a}"],
+ ),
+ ("\u{61}\u{3a}\u{2c}", &["\u{61}", "\u{3a}", "\u{2c}"]),
+ (
+ "\u{61}\u{3a}\u{308}\u{2c}",
+ &["\u{61}", "\u{3a}\u{308}", "\u{2c}"],
+ ),
+ ("\u{61}\u{3a}\u{2e}", &["\u{61}", "\u{3a}", "\u{2e}"]),
+ (
+ "\u{61}\u{3a}\u{308}\u{2e}",
+ &["\u{61}", "\u{3a}\u{308}", "\u{2e}"],
+ ),
+ ("\u{61}\u{3a}\u{30}", &["\u{61}", "\u{3a}", "\u{30}"]),
+ (
+ "\u{61}\u{3a}\u{308}\u{30}",
+ &["\u{61}", "\u{3a}\u{308}", "\u{30}"],
+ ),
+ ("\u{61}\u{3a}\u{5f}", &["\u{61}", "\u{3a}", "\u{5f}"]),
+ (
+ "\u{61}\u{3a}\u{308}\u{5f}",
+ &["\u{61}", "\u{3a}\u{308}", "\u{5f}"],
+ ),
+ ("\u{61}\u{3a}\u{1f1e6}", &["\u{61}", "\u{3a}", "\u{1f1e6}"]),
+ (
+ "\u{61}\u{3a}\u{308}\u{1f1e6}",
+ &["\u{61}", "\u{3a}\u{308}", "\u{1f1e6}"],
+ ),
+ ("\u{61}\u{3a}\u{5d0}", &["\u{61}\u{3a}\u{5d0}"]),
+ (
+ "\u{61}\u{3a}\u{308}\u{5d0}",
+ &["\u{61}\u{3a}\u{308}\u{5d0}"],
+ ),
+ ("\u{61}\u{3a}\u{22}", &["\u{61}", "\u{3a}", "\u{22}"]),
+ (
+ "\u{61}\u{3a}\u{308}\u{22}",
+ &["\u{61}", "\u{3a}\u{308}", "\u{22}"],
+ ),
+ ("\u{61}\u{3a}\u{27}", &["\u{61}", "\u{3a}", "\u{27}"]),
+ (
+ "\u{61}\u{3a}\u{308}\u{27}",
+ &["\u{61}", "\u{3a}\u{308}", "\u{27}"],
+ ),
+ ("\u{61}\u{3a}\u{231a}", &["\u{61}", "\u{3a}", "\u{231a}"]),
+ (
+ "\u{61}\u{3a}\u{308}\u{231a}",
+ &["\u{61}", "\u{3a}\u{308}", "\u{231a}"],
+ ),
+ ("\u{61}\u{3a}\u{20}", &["\u{61}", "\u{3a}", "\u{20}"]),
+ (
+ "\u{61}\u{3a}\u{308}\u{20}",
+ &["\u{61}", "\u{3a}\u{308}", "\u{20}"],
+ ),
+ ("\u{61}\u{3a}\u{ad}", &["\u{61}", "\u{3a}\u{ad}"]),
+ (
+ "\u{61}\u{3a}\u{308}\u{ad}",
+ &["\u{61}", "\u{3a}\u{308}\u{ad}"],
+ ),
+ ("\u{61}\u{3a}\u{300}", &["\u{61}", "\u{3a}\u{300}"]),
+ (
+ "\u{61}\u{3a}\u{308}\u{300}",
+ &["\u{61}", "\u{3a}\u{308}\u{300}"],
+ ),
+ ("\u{61}\u{3a}\u{200d}", &["\u{61}", "\u{3a}\u{200d}"]),
+ (
+ "\u{61}\u{3a}\u{308}\u{200d}",
+ &["\u{61}", "\u{3a}\u{308}\u{200d}"],
+ ),
+ (
+ "\u{61}\u{3a}\u{61}\u{2060}",
+ &["\u{61}\u{3a}\u{61}\u{2060}"],
+ ),
+ (
+ "\u{61}\u{3a}\u{308}\u{61}\u{2060}",
+ &["\u{61}\u{3a}\u{308}\u{61}\u{2060}"],
+ ),
+ (
+ "\u{61}\u{3a}\u{61}\u{3a}",
+ &["\u{61}\u{3a}\u{61}", "\u{3a}"],
+ ),
+ (
+ "\u{61}\u{3a}\u{308}\u{61}\u{3a}",
+ &["\u{61}\u{3a}\u{308}\u{61}", "\u{3a}"],
+ ),
+ (
+ "\u{61}\u{3a}\u{61}\u{27}",
+ &["\u{61}\u{3a}\u{61}", "\u{27}"],
+ ),
+ (
+ "\u{61}\u{3a}\u{308}\u{61}\u{27}",
+ &["\u{61}\u{3a}\u{308}\u{61}", "\u{27}"],
+ ),
+ (
+ "\u{61}\u{3a}\u{61}\u{27}\u{2060}",
+ &["\u{61}\u{3a}\u{61}", "\u{27}\u{2060}"],
+ ),
+ (
+ "\u{61}\u{3a}\u{308}\u{61}\u{27}\u{2060}",
+ &["\u{61}\u{3a}\u{308}\u{61}", "\u{27}\u{2060}"],
+ ),
+ (
+ "\u{61}\u{3a}\u{61}\u{2c}",
+ &["\u{61}\u{3a}\u{61}", "\u{2c}"],
+ ),
+ (
+ "\u{61}\u{3a}\u{308}\u{61}\u{2c}",
+ &["\u{61}\u{3a}\u{308}\u{61}", "\u{2c}"],
+ ),
+ (
+ "\u{61}\u{3a}\u{31}\u{3a}",
+ &["\u{61}", "\u{3a}", "\u{31}", "\u{3a}"],
+ ),
+ (
+ "\u{61}\u{3a}\u{308}\u{31}\u{3a}",
+ &["\u{61}", "\u{3a}\u{308}", "\u{31}", "\u{3a}"],
+ ),
+ (
+ "\u{61}\u{3a}\u{31}\u{27}",
+ &["\u{61}", "\u{3a}", "\u{31}", "\u{27}"],
+ ),
+ (
+ "\u{61}\u{3a}\u{308}\u{31}\u{27}",
+ &["\u{61}", "\u{3a}\u{308}", "\u{31}", "\u{27}"],
+ ),
+ (
+ "\u{61}\u{3a}\u{31}\u{2c}",
+ &["\u{61}", "\u{3a}", "\u{31}", "\u{2c}"],
+ ),
+ (
+ "\u{61}\u{3a}\u{308}\u{31}\u{2c}",
+ &["\u{61}", "\u{3a}\u{308}", "\u{31}", "\u{2c}"],
+ ),
+ (
+ "\u{61}\u{3a}\u{31}\u{2e}\u{2060}",
+ &["\u{61}", "\u{3a}", "\u{31}", "\u{2e}\u{2060}"],
+ ),
+ (
+ "\u{61}\u{3a}\u{308}\u{31}\u{2e}\u{2060}",
+ &["\u{61}", "\u{3a}\u{308}", "\u{31}", "\u{2e}\u{2060}"],
+ ),
+ ("\u{61}\u{27}\u{1}", &["\u{61}", "\u{27}", "\u{1}"]),
+ (
+ "\u{61}\u{27}\u{308}\u{1}",
+ &["\u{61}", "\u{27}\u{308}", "\u{1}"],
+ ),
+ ("\u{61}\u{27}\u{d}", &["\u{61}", "\u{27}", "\u{d}"]),
+ (
+ "\u{61}\u{27}\u{308}\u{d}",
+ &["\u{61}", "\u{27}\u{308}", "\u{d}"],
+ ),
+ ("\u{61}\u{27}\u{a}", &["\u{61}", "\u{27}", "\u{a}"]),
+ (
+ "\u{61}\u{27}\u{308}\u{a}",
+ &["\u{61}", "\u{27}\u{308}", "\u{a}"],
+ ),
+ ("\u{61}\u{27}\u{b}", &["\u{61}", "\u{27}", "\u{b}"]),
+ (
+ "\u{61}\u{27}\u{308}\u{b}",
+ &["\u{61}", "\u{27}\u{308}", "\u{b}"],
+ ),
+ ("\u{61}\u{27}\u{3031}", &["\u{61}", "\u{27}", "\u{3031}"]),
+ (
+ "\u{61}\u{27}\u{308}\u{3031}",
+ &["\u{61}", "\u{27}\u{308}", "\u{3031}"],
+ ),
+ ("\u{61}\u{27}\u{41}", &["\u{61}\u{27}\u{41}"]),
+ ("\u{61}\u{27}\u{308}\u{41}", &["\u{61}\u{27}\u{308}\u{41}"]),
+ ("\u{61}\u{27}\u{3a}", &["\u{61}", "\u{27}", "\u{3a}"]),
+ (
+ "\u{61}\u{27}\u{308}\u{3a}",
+ &["\u{61}", "\u{27}\u{308}", "\u{3a}"],
+ ),
+ ("\u{61}\u{27}\u{2c}", &["\u{61}", "\u{27}", "\u{2c}"]),
+ (
+ "\u{61}\u{27}\u{308}\u{2c}",
+ &["\u{61}", "\u{27}\u{308}", "\u{2c}"],
+ ),
+ ("\u{61}\u{27}\u{2e}", &["\u{61}", "\u{27}", "\u{2e}"]),
+ (
+ "\u{61}\u{27}\u{308}\u{2e}",
+ &["\u{61}", "\u{27}\u{308}", "\u{2e}"],
+ ),
+ ("\u{61}\u{27}\u{30}", &["\u{61}", "\u{27}", "\u{30}"]),
+ (
+ "\u{61}\u{27}\u{308}\u{30}",
+ &["\u{61}", "\u{27}\u{308}", "\u{30}"],
+ ),
+ ("\u{61}\u{27}\u{5f}", &["\u{61}", "\u{27}", "\u{5f}"]),
+ (
+ "\u{61}\u{27}\u{308}\u{5f}",
+ &["\u{61}", "\u{27}\u{308}", "\u{5f}"],
+ ),
+ ("\u{61}\u{27}\u{1f1e6}", &["\u{61}", "\u{27}", "\u{1f1e6}"]),
+ (
+ "\u{61}\u{27}\u{308}\u{1f1e6}",
+ &["\u{61}", "\u{27}\u{308}", "\u{1f1e6}"],
+ ),
+ ("\u{61}\u{27}\u{5d0}", &["\u{61}\u{27}\u{5d0}"]),
+ (
+ "\u{61}\u{27}\u{308}\u{5d0}",
+ &["\u{61}\u{27}\u{308}\u{5d0}"],
+ ),
+ ("\u{61}\u{27}\u{22}", &["\u{61}", "\u{27}", "\u{22}"]),
+ (
+ "\u{61}\u{27}\u{308}\u{22}",
+ &["\u{61}", "\u{27}\u{308}", "\u{22}"],
+ ),
+ ("\u{61}\u{27}\u{27}", &["\u{61}", "\u{27}", "\u{27}"]),
+ (
+ "\u{61}\u{27}\u{308}\u{27}",
+ &["\u{61}", "\u{27}\u{308}", "\u{27}"],
+ ),
+ ("\u{61}\u{27}\u{231a}", &["\u{61}", "\u{27}", "\u{231a}"]),
+ (
+ "\u{61}\u{27}\u{308}\u{231a}",
+ &["\u{61}", "\u{27}\u{308}", "\u{231a}"],
+ ),
+ ("\u{61}\u{27}\u{20}", &["\u{61}", "\u{27}", "\u{20}"]),
+ (
+ "\u{61}\u{27}\u{308}\u{20}",
+ &["\u{61}", "\u{27}\u{308}", "\u{20}"],
+ ),
+ ("\u{61}\u{27}\u{ad}", &["\u{61}", "\u{27}\u{ad}"]),
+ (
+ "\u{61}\u{27}\u{308}\u{ad}",
+ &["\u{61}", "\u{27}\u{308}\u{ad}"],
+ ),
+ ("\u{61}\u{27}\u{300}", &["\u{61}", "\u{27}\u{300}"]),
+ (
+ "\u{61}\u{27}\u{308}\u{300}",
+ &["\u{61}", "\u{27}\u{308}\u{300}"],
+ ),
+ ("\u{61}\u{27}\u{200d}", &["\u{61}", "\u{27}\u{200d}"]),
+ (
+ "\u{61}\u{27}\u{308}\u{200d}",
+ &["\u{61}", "\u{27}\u{308}\u{200d}"],
+ ),
+ (
+ "\u{61}\u{27}\u{61}\u{2060}",
+ &["\u{61}\u{27}\u{61}\u{2060}"],
+ ),
+ (
+ "\u{61}\u{27}\u{308}\u{61}\u{2060}",
+ &["\u{61}\u{27}\u{308}\u{61}\u{2060}"],
+ ),
+ (
+ "\u{61}\u{27}\u{61}\u{3a}",
+ &["\u{61}\u{27}\u{61}", "\u{3a}"],
+ ),
+ (
+ "\u{61}\u{27}\u{308}\u{61}\u{3a}",
+ &["\u{61}\u{27}\u{308}\u{61}", "\u{3a}"],
+ ),
+ (
+ "\u{61}\u{27}\u{61}\u{27}",
+ &["\u{61}\u{27}\u{61}", "\u{27}"],
+ ),
+ (
+ "\u{61}\u{27}\u{308}\u{61}\u{27}",
+ &["\u{61}\u{27}\u{308}\u{61}", "\u{27}"],
+ ),
+ (
+ "\u{61}\u{27}\u{61}\u{27}\u{2060}",
+ &["\u{61}\u{27}\u{61}", "\u{27}\u{2060}"],
+ ),
+ (
+ "\u{61}\u{27}\u{308}\u{61}\u{27}\u{2060}",
+ &["\u{61}\u{27}\u{308}\u{61}", "\u{27}\u{2060}"],
+ ),
+ (
+ "\u{61}\u{27}\u{61}\u{2c}",
+ &["\u{61}\u{27}\u{61}", "\u{2c}"],
+ ),
+ (
+ "\u{61}\u{27}\u{308}\u{61}\u{2c}",
+ &["\u{61}\u{27}\u{308}\u{61}", "\u{2c}"],
+ ),
+ (
+ "\u{61}\u{27}\u{31}\u{3a}",
+ &["\u{61}", "\u{27}", "\u{31}", "\u{3a}"],
+ ),
+ (
+ "\u{61}\u{27}\u{308}\u{31}\u{3a}",
+ &["\u{61}", "\u{27}\u{308}", "\u{31}", "\u{3a}"],
+ ),
+ (
+ "\u{61}\u{27}\u{31}\u{27}",
+ &["\u{61}", "\u{27}", "\u{31}", "\u{27}"],
+ ),
+ (
+ "\u{61}\u{27}\u{308}\u{31}\u{27}",
+ &["\u{61}", "\u{27}\u{308}", "\u{31}", "\u{27}"],
+ ),
+ (
+ "\u{61}\u{27}\u{31}\u{2c}",
+ &["\u{61}", "\u{27}", "\u{31}", "\u{2c}"],
+ ),
+ (
+ "\u{61}\u{27}\u{308}\u{31}\u{2c}",
+ &["\u{61}", "\u{27}\u{308}", "\u{31}", "\u{2c}"],
+ ),
+ (
+ "\u{61}\u{27}\u{31}\u{2e}\u{2060}",
+ &["\u{61}", "\u{27}", "\u{31}", "\u{2e}\u{2060}"],
+ ),
+ (
+ "\u{61}\u{27}\u{308}\u{31}\u{2e}\u{2060}",
+ &["\u{61}", "\u{27}\u{308}", "\u{31}", "\u{2e}\u{2060}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{1}",
+ &["\u{61}", "\u{27}\u{2060}", "\u{1}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{308}\u{1}",
+ &["\u{61}", "\u{27}\u{2060}\u{308}", "\u{1}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{d}",
+ &["\u{61}", "\u{27}\u{2060}", "\u{d}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{308}\u{d}",
+ &["\u{61}", "\u{27}\u{2060}\u{308}", "\u{d}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{a}",
+ &["\u{61}", "\u{27}\u{2060}", "\u{a}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{308}\u{a}",
+ &["\u{61}", "\u{27}\u{2060}\u{308}", "\u{a}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{b}",
+ &["\u{61}", "\u{27}\u{2060}", "\u{b}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{308}\u{b}",
+ &["\u{61}", "\u{27}\u{2060}\u{308}", "\u{b}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{3031}",
+ &["\u{61}", "\u{27}\u{2060}", "\u{3031}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{308}\u{3031}",
+ &["\u{61}", "\u{27}\u{2060}\u{308}", "\u{3031}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{41}",
+ &["\u{61}\u{27}\u{2060}\u{41}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{308}\u{41}",
+ &["\u{61}\u{27}\u{2060}\u{308}\u{41}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{3a}",
+ &["\u{61}", "\u{27}\u{2060}", "\u{3a}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{308}\u{3a}",
+ &["\u{61}", "\u{27}\u{2060}\u{308}", "\u{3a}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{2c}",
+ &["\u{61}", "\u{27}\u{2060}", "\u{2c}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{308}\u{2c}",
+ &["\u{61}", "\u{27}\u{2060}\u{308}", "\u{2c}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{2e}",
+ &["\u{61}", "\u{27}\u{2060}", "\u{2e}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{308}\u{2e}",
+ &["\u{61}", "\u{27}\u{2060}\u{308}", "\u{2e}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{30}",
+ &["\u{61}", "\u{27}\u{2060}", "\u{30}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{308}\u{30}",
+ &["\u{61}", "\u{27}\u{2060}\u{308}", "\u{30}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{5f}",
+ &["\u{61}", "\u{27}\u{2060}", "\u{5f}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{308}\u{5f}",
+ &["\u{61}", "\u{27}\u{2060}\u{308}", "\u{5f}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{1f1e6}",
+ &["\u{61}", "\u{27}\u{2060}", "\u{1f1e6}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{308}\u{1f1e6}",
+ &["\u{61}", "\u{27}\u{2060}\u{308}", "\u{1f1e6}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{5d0}",
+ &["\u{61}\u{27}\u{2060}\u{5d0}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{308}\u{5d0}",
+ &["\u{61}\u{27}\u{2060}\u{308}\u{5d0}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{22}",
+ &["\u{61}", "\u{27}\u{2060}", "\u{22}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{308}\u{22}",
+ &["\u{61}", "\u{27}\u{2060}\u{308}", "\u{22}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{27}",
+ &["\u{61}", "\u{27}\u{2060}", "\u{27}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{308}\u{27}",
+ &["\u{61}", "\u{27}\u{2060}\u{308}", "\u{27}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{231a}",
+ &["\u{61}", "\u{27}\u{2060}", "\u{231a}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{308}\u{231a}",
+ &["\u{61}", "\u{27}\u{2060}\u{308}", "\u{231a}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{20}",
+ &["\u{61}", "\u{27}\u{2060}", "\u{20}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{308}\u{20}",
+ &["\u{61}", "\u{27}\u{2060}\u{308}", "\u{20}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{ad}",
+ &["\u{61}", "\u{27}\u{2060}\u{ad}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{308}\u{ad}",
+ &["\u{61}", "\u{27}\u{2060}\u{308}\u{ad}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{300}",
+ &["\u{61}", "\u{27}\u{2060}\u{300}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{308}\u{300}",
+ &["\u{61}", "\u{27}\u{2060}\u{308}\u{300}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{200d}",
+ &["\u{61}", "\u{27}\u{2060}\u{200d}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{308}\u{200d}",
+ &["\u{61}", "\u{27}\u{2060}\u{308}\u{200d}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{61}\u{2060}",
+ &["\u{61}\u{27}\u{2060}\u{61}\u{2060}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{308}\u{61}\u{2060}",
+ &["\u{61}\u{27}\u{2060}\u{308}\u{61}\u{2060}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{61}\u{3a}",
+ &["\u{61}\u{27}\u{2060}\u{61}", "\u{3a}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{308}\u{61}\u{3a}",
+ &["\u{61}\u{27}\u{2060}\u{308}\u{61}", "\u{3a}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{61}\u{27}",
+ &["\u{61}\u{27}\u{2060}\u{61}", "\u{27}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{308}\u{61}\u{27}",
+ &["\u{61}\u{27}\u{2060}\u{308}\u{61}", "\u{27}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{61}\u{27}\u{2060}",
+ &["\u{61}\u{27}\u{2060}\u{61}", "\u{27}\u{2060}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{308}\u{61}\u{27}\u{2060}",
+ &["\u{61}\u{27}\u{2060}\u{308}\u{61}", "\u{27}\u{2060}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{61}\u{2c}",
+ &["\u{61}\u{27}\u{2060}\u{61}", "\u{2c}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{308}\u{61}\u{2c}",
+ &["\u{61}\u{27}\u{2060}\u{308}\u{61}", "\u{2c}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{31}\u{3a}",
+ &["\u{61}", "\u{27}\u{2060}", "\u{31}", "\u{3a}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{308}\u{31}\u{3a}",
+ &["\u{61}", "\u{27}\u{2060}\u{308}", "\u{31}", "\u{3a}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{31}\u{27}",
+ &["\u{61}", "\u{27}\u{2060}", "\u{31}", "\u{27}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{308}\u{31}\u{27}",
+ &["\u{61}", "\u{27}\u{2060}\u{308}", "\u{31}", "\u{27}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{31}\u{2c}",
+ &["\u{61}", "\u{27}\u{2060}", "\u{31}", "\u{2c}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{308}\u{31}\u{2c}",
+ &["\u{61}", "\u{27}\u{2060}\u{308}", "\u{31}", "\u{2c}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{31}\u{2e}\u{2060}",
+ &["\u{61}", "\u{27}\u{2060}", "\u{31}", "\u{2e}\u{2060}"],
+ ),
+ (
+ "\u{61}\u{27}\u{2060}\u{308}\u{31}\u{2e}\u{2060}",
+ &[
+ "\u{61}",
+ "\u{27}\u{2060}\u{308}",
+ "\u{31}",
+ "\u{2e}\u{2060}",
+ ],
+ ),
+ ("\u{61}\u{2c}\u{1}", &["\u{61}", "\u{2c}", "\u{1}"]),
+ (
+ "\u{61}\u{2c}\u{308}\u{1}",
+ &["\u{61}", "\u{2c}\u{308}", "\u{1}"],
+ ),
+ ("\u{61}\u{2c}\u{d}", &["\u{61}", "\u{2c}", "\u{d}"]),
+ (
+ "\u{61}\u{2c}\u{308}\u{d}",
+ &["\u{61}", "\u{2c}\u{308}", "\u{d}"],
+ ),
+ ("\u{61}\u{2c}\u{a}", &["\u{61}", "\u{2c}", "\u{a}"]),
+ (
+ "\u{61}\u{2c}\u{308}\u{a}",
+ &["\u{61}", "\u{2c}\u{308}", "\u{a}"],
+ ),
+ ("\u{61}\u{2c}\u{b}", &["\u{61}", "\u{2c}", "\u{b}"]),
+ (
+ "\u{61}\u{2c}\u{308}\u{b}",
+ &["\u{61}", "\u{2c}\u{308}", "\u{b}"],
+ ),
+ ("\u{61}\u{2c}\u{3031}", &["\u{61}", "\u{2c}", "\u{3031}"]),
+ (
+ "\u{61}\u{2c}\u{308}\u{3031}",
+ &["\u{61}", "\u{2c}\u{308}", "\u{3031}"],
+ ),
+ ("\u{61}\u{2c}\u{41}", &["\u{61}", "\u{2c}", "\u{41}"]),
+ (
+ "\u{61}\u{2c}\u{308}\u{41}",
+ &["\u{61}", "\u{2c}\u{308}", "\u{41}"],
+ ),
+ ("\u{61}\u{2c}\u{3a}", &["\u{61}", "\u{2c}", "\u{3a}"]),
+ (
+ "\u{61}\u{2c}\u{308}\u{3a}",
+ &["\u{61}", "\u{2c}\u{308}", "\u{3a}"],
+ ),
+ ("\u{61}\u{2c}\u{2c}", &["\u{61}", "\u{2c}", "\u{2c}"]),
+ (
+ "\u{61}\u{2c}\u{308}\u{2c}",
+ &["\u{61}", "\u{2c}\u{308}", "\u{2c}"],
+ ),
+ ("\u{61}\u{2c}\u{2e}", &["\u{61}", "\u{2c}", "\u{2e}"]),
+ (
+ "\u{61}\u{2c}\u{308}\u{2e}",
+ &["\u{61}", "\u{2c}\u{308}", "\u{2e}"],
+ ),
+ ("\u{61}\u{2c}\u{30}", &["\u{61}", "\u{2c}", "\u{30}"]),
+ (
+ "\u{61}\u{2c}\u{308}\u{30}",
+ &["\u{61}", "\u{2c}\u{308}", "\u{30}"],
+ ),
+ ("\u{61}\u{2c}\u{5f}", &["\u{61}", "\u{2c}", "\u{5f}"]),
+ (
+ "\u{61}\u{2c}\u{308}\u{5f}",
+ &["\u{61}", "\u{2c}\u{308}", "\u{5f}"],
+ ),
+ ("\u{61}\u{2c}\u{1f1e6}", &["\u{61}", "\u{2c}", "\u{1f1e6}"]),
+ (
+ "\u{61}\u{2c}\u{308}\u{1f1e6}",
+ &["\u{61}", "\u{2c}\u{308}", "\u{1f1e6}"],
+ ),
+ ("\u{61}\u{2c}\u{5d0}", &["\u{61}", "\u{2c}", "\u{5d0}"]),
+ (
+ "\u{61}\u{2c}\u{308}\u{5d0}",
+ &["\u{61}", "\u{2c}\u{308}", "\u{5d0}"],
+ ),
+ ("\u{61}\u{2c}\u{22}", &["\u{61}", "\u{2c}", "\u{22}"]),
+ (
+ "\u{61}\u{2c}\u{308}\u{22}",
+ &["\u{61}", "\u{2c}\u{308}", "\u{22}"],
+ ),
+ ("\u{61}\u{2c}\u{27}", &["\u{61}", "\u{2c}", "\u{27}"]),
+ (
+ "\u{61}\u{2c}\u{308}\u{27}",
+ &["\u{61}", "\u{2c}\u{308}", "\u{27}"],
+ ),
+ ("\u{61}\u{2c}\u{231a}", &["\u{61}", "\u{2c}", "\u{231a}"]),
+ (
+ "\u{61}\u{2c}\u{308}\u{231a}",
+ &["\u{61}", "\u{2c}\u{308}", "\u{231a}"],
+ ),
+ ("\u{61}\u{2c}\u{20}", &["\u{61}", "\u{2c}", "\u{20}"]),
+ (
+ "\u{61}\u{2c}\u{308}\u{20}",
+ &["\u{61}", "\u{2c}\u{308}", "\u{20}"],
+ ),
+ ("\u{61}\u{2c}\u{ad}", &["\u{61}", "\u{2c}\u{ad}"]),
+ (
+ "\u{61}\u{2c}\u{308}\u{ad}",
+ &["\u{61}", "\u{2c}\u{308}\u{ad}"],
+ ),
+ ("\u{61}\u{2c}\u{300}", &["\u{61}", "\u{2c}\u{300}"]),
+ (
+ "\u{61}\u{2c}\u{308}\u{300}",
+ &["\u{61}", "\u{2c}\u{308}\u{300}"],
+ ),
+ ("\u{61}\u{2c}\u{200d}", &["\u{61}", "\u{2c}\u{200d}"]),
+ (
+ "\u{61}\u{2c}\u{308}\u{200d}",
+ &["\u{61}", "\u{2c}\u{308}\u{200d}"],
+ ),
+ (
+ "\u{61}\u{2c}\u{61}\u{2060}",
+ &["\u{61}", "\u{2c}", "\u{61}\u{2060}"],
+ ),
+ (
+ "\u{61}\u{2c}\u{308}\u{61}\u{2060}",
+ &["\u{61}", "\u{2c}\u{308}", "\u{61}\u{2060}"],
+ ),
+ (
+ "\u{61}\u{2c}\u{61}\u{3a}",
+ &["\u{61}", "\u{2c}", "\u{61}", "\u{3a}"],
+ ),
+ (
+ "\u{61}\u{2c}\u{308}\u{61}\u{3a}",
+ &["\u{61}", "\u{2c}\u{308}", "\u{61}", "\u{3a}"],
+ ),
+ (
+ "\u{61}\u{2c}\u{61}\u{27}",
+ &["\u{61}", "\u{2c}", "\u{61}", "\u{27}"],
+ ),
+ (
+ "\u{61}\u{2c}\u{308}\u{61}\u{27}",
+ &["\u{61}", "\u{2c}\u{308}", "\u{61}", "\u{27}"],
+ ),
+ (
+ "\u{61}\u{2c}\u{61}\u{27}\u{2060}",
+ &["\u{61}", "\u{2c}", "\u{61}", "\u{27}\u{2060}"],
+ ),
+ (
+ "\u{61}\u{2c}\u{308}\u{61}\u{27}\u{2060}",
+ &["\u{61}", "\u{2c}\u{308}", "\u{61}", "\u{27}\u{2060}"],
+ ),
+ (
+ "\u{61}\u{2c}\u{61}\u{2c}",
+ &["\u{61}", "\u{2c}", "\u{61}", "\u{2c}"],
+ ),
+ (
+ "\u{61}\u{2c}\u{308}\u{61}\u{2c}",
+ &["\u{61}", "\u{2c}\u{308}", "\u{61}", "\u{2c}"],
+ ),
+ (
+ "\u{61}\u{2c}\u{31}\u{3a}",
+ &["\u{61}", "\u{2c}", "\u{31}", "\u{3a}"],
+ ),
+ (
+ "\u{61}\u{2c}\u{308}\u{31}\u{3a}",
+ &["\u{61}", "\u{2c}\u{308}", "\u{31}", "\u{3a}"],
+ ),
+ (
+ "\u{61}\u{2c}\u{31}\u{27}",
+ &["\u{61}", "\u{2c}", "\u{31}", "\u{27}"],
+ ),
+ (
+ "\u{61}\u{2c}\u{308}\u{31}\u{27}",
+ &["\u{61}", "\u{2c}\u{308}", "\u{31}", "\u{27}"],
+ ),
+ (
+ "\u{61}\u{2c}\u{31}\u{2c}",
+ &["\u{61}", "\u{2c}", "\u{31}", "\u{2c}"],
+ ),
+ (
+ "\u{61}\u{2c}\u{308}\u{31}\u{2c}",
+ &["\u{61}", "\u{2c}\u{308}", "\u{31}", "\u{2c}"],
+ ),
+ (
+ "\u{61}\u{2c}\u{31}\u{2e}\u{2060}",
+ &["\u{61}", "\u{2c}", "\u{31}", "\u{2e}\u{2060}"],
+ ),
+ (
+ "\u{61}\u{2c}\u{308}\u{31}\u{2e}\u{2060}",
+ &["\u{61}", "\u{2c}\u{308}", "\u{31}", "\u{2e}\u{2060}"],
+ ),
+ ("\u{31}\u{3a}\u{1}", &["\u{31}", "\u{3a}", "\u{1}"]),
+ (
+ "\u{31}\u{3a}\u{308}\u{1}",
+ &["\u{31}", "\u{3a}\u{308}", "\u{1}"],
+ ),
+ ("\u{31}\u{3a}\u{d}", &["\u{31}", "\u{3a}", "\u{d}"]),
+ (
+ "\u{31}\u{3a}\u{308}\u{d}",
+ &["\u{31}", "\u{3a}\u{308}", "\u{d}"],
+ ),
+ ("\u{31}\u{3a}\u{a}", &["\u{31}", "\u{3a}", "\u{a}"]),
+ (
+ "\u{31}\u{3a}\u{308}\u{a}",
+ &["\u{31}", "\u{3a}\u{308}", "\u{a}"],
+ ),
+ ("\u{31}\u{3a}\u{b}", &["\u{31}", "\u{3a}", "\u{b}"]),
+ (
+ "\u{31}\u{3a}\u{308}\u{b}",
+ &["\u{31}", "\u{3a}\u{308}", "\u{b}"],
+ ),
+ ("\u{31}\u{3a}\u{3031}", &["\u{31}", "\u{3a}", "\u{3031}"]),
+ (
+ "\u{31}\u{3a}\u{308}\u{3031}",
+ &["\u{31}", "\u{3a}\u{308}", "\u{3031}"],
+ ),
+ ("\u{31}\u{3a}\u{41}", &["\u{31}", "\u{3a}", "\u{41}"]),
+ (
+ "\u{31}\u{3a}\u{308}\u{41}",
+ &["\u{31}", "\u{3a}\u{308}", "\u{41}"],
+ ),
+ ("\u{31}\u{3a}\u{3a}", &["\u{31}", "\u{3a}", "\u{3a}"]),
+ (
+ "\u{31}\u{3a}\u{308}\u{3a}",
+ &["\u{31}", "\u{3a}\u{308}", "\u{3a}"],
+ ),
+ ("\u{31}\u{3a}\u{2c}", &["\u{31}", "\u{3a}", "\u{2c}"]),
+ (
+ "\u{31}\u{3a}\u{308}\u{2c}",
+ &["\u{31}", "\u{3a}\u{308}", "\u{2c}"],
+ ),
+ ("\u{31}\u{3a}\u{2e}", &["\u{31}", "\u{3a}", "\u{2e}"]),
+ (
+ "\u{31}\u{3a}\u{308}\u{2e}",
+ &["\u{31}", "\u{3a}\u{308}", "\u{2e}"],
+ ),
+ ("\u{31}\u{3a}\u{30}", &["\u{31}", "\u{3a}", "\u{30}"]),
+ (
+ "\u{31}\u{3a}\u{308}\u{30}",
+ &["\u{31}", "\u{3a}\u{308}", "\u{30}"],
+ ),
+ ("\u{31}\u{3a}\u{5f}", &["\u{31}", "\u{3a}", "\u{5f}"]),
+ (
+ "\u{31}\u{3a}\u{308}\u{5f}",
+ &["\u{31}", "\u{3a}\u{308}", "\u{5f}"],
+ ),
+ ("\u{31}\u{3a}\u{1f1e6}", &["\u{31}", "\u{3a}", "\u{1f1e6}"]),
+ (
+ "\u{31}\u{3a}\u{308}\u{1f1e6}",
+ &["\u{31}", "\u{3a}\u{308}", "\u{1f1e6}"],
+ ),
+ ("\u{31}\u{3a}\u{5d0}", &["\u{31}", "\u{3a}", "\u{5d0}"]),
+ (
+ "\u{31}\u{3a}\u{308}\u{5d0}",
+ &["\u{31}", "\u{3a}\u{308}", "\u{5d0}"],
+ ),
+ ("\u{31}\u{3a}\u{22}", &["\u{31}", "\u{3a}", "\u{22}"]),
+ (
+ "\u{31}\u{3a}\u{308}\u{22}",
+ &["\u{31}", "\u{3a}\u{308}", "\u{22}"],
+ ),
+ ("\u{31}\u{3a}\u{27}", &["\u{31}", "\u{3a}", "\u{27}"]),
+ (
+ "\u{31}\u{3a}\u{308}\u{27}",
+ &["\u{31}", "\u{3a}\u{308}", "\u{27}"],
+ ),
+ ("\u{31}\u{3a}\u{231a}", &["\u{31}", "\u{3a}", "\u{231a}"]),
+ (
+ "\u{31}\u{3a}\u{308}\u{231a}",
+ &["\u{31}", "\u{3a}\u{308}", "\u{231a}"],
+ ),
+ ("\u{31}\u{3a}\u{20}", &["\u{31}", "\u{3a}", "\u{20}"]),
+ (
+ "\u{31}\u{3a}\u{308}\u{20}",
+ &["\u{31}", "\u{3a}\u{308}", "\u{20}"],
+ ),
+ ("\u{31}\u{3a}\u{ad}", &["\u{31}", "\u{3a}\u{ad}"]),
+ (
+ "\u{31}\u{3a}\u{308}\u{ad}",
+ &["\u{31}", "\u{3a}\u{308}\u{ad}"],
+ ),
+ ("\u{31}\u{3a}\u{300}", &["\u{31}", "\u{3a}\u{300}"]),
+ (
+ "\u{31}\u{3a}\u{308}\u{300}",
+ &["\u{31}", "\u{3a}\u{308}\u{300}"],
+ ),
+ ("\u{31}\u{3a}\u{200d}", &["\u{31}", "\u{3a}\u{200d}"]),
+ (
+ "\u{31}\u{3a}\u{308}\u{200d}",
+ &["\u{31}", "\u{3a}\u{308}\u{200d}"],
+ ),
+ (
+ "\u{31}\u{3a}\u{61}\u{2060}",
+ &["\u{31}", "\u{3a}", "\u{61}\u{2060}"],
+ ),
+ (
+ "\u{31}\u{3a}\u{308}\u{61}\u{2060}",
+ &["\u{31}", "\u{3a}\u{308}", "\u{61}\u{2060}"],
+ ),
+ (
+ "\u{31}\u{3a}\u{61}\u{3a}",
+ &["\u{31}", "\u{3a}", "\u{61}", "\u{3a}"],
+ ),
+ (
+ "\u{31}\u{3a}\u{308}\u{61}\u{3a}",
+ &["\u{31}", "\u{3a}\u{308}", "\u{61}", "\u{3a}"],
+ ),
+ (
+ "\u{31}\u{3a}\u{61}\u{27}",
+ &["\u{31}", "\u{3a}", "\u{61}", "\u{27}"],
+ ),
+ (
+ "\u{31}\u{3a}\u{308}\u{61}\u{27}",
+ &["\u{31}", "\u{3a}\u{308}", "\u{61}", "\u{27}"],
+ ),
+ (
+ "\u{31}\u{3a}\u{61}\u{27}\u{2060}",
+ &["\u{31}", "\u{3a}", "\u{61}", "\u{27}\u{2060}"],
+ ),
+ (
+ "\u{31}\u{3a}\u{308}\u{61}\u{27}\u{2060}",
+ &["\u{31}", "\u{3a}\u{308}", "\u{61}", "\u{27}\u{2060}"],
+ ),
+ (
+ "\u{31}\u{3a}\u{61}\u{2c}",
+ &["\u{31}", "\u{3a}", "\u{61}", "\u{2c}"],
+ ),
+ (
+ "\u{31}\u{3a}\u{308}\u{61}\u{2c}",
+ &["\u{31}", "\u{3a}\u{308}", "\u{61}", "\u{2c}"],
+ ),
+ (
+ "\u{31}\u{3a}\u{31}\u{3a}",
+ &["\u{31}", "\u{3a}", "\u{31}", "\u{3a}"],
+ ),
+ (
+ "\u{31}\u{3a}\u{308}\u{31}\u{3a}",
+ &["\u{31}", "\u{3a}\u{308}", "\u{31}", "\u{3a}"],
+ ),
+ (
+ "\u{31}\u{3a}\u{31}\u{27}",
+ &["\u{31}", "\u{3a}", "\u{31}", "\u{27}"],
+ ),
+ (
+ "\u{31}\u{3a}\u{308}\u{31}\u{27}",
+ &["\u{31}", "\u{3a}\u{308}", "\u{31}", "\u{27}"],
+ ),
+ (
+ "\u{31}\u{3a}\u{31}\u{2c}",
+ &["\u{31}", "\u{3a}", "\u{31}", "\u{2c}"],
+ ),
+ (
+ "\u{31}\u{3a}\u{308}\u{31}\u{2c}",
+ &["\u{31}", "\u{3a}\u{308}", "\u{31}", "\u{2c}"],
+ ),
+ (
+ "\u{31}\u{3a}\u{31}\u{2e}\u{2060}",
+ &["\u{31}", "\u{3a}", "\u{31}", "\u{2e}\u{2060}"],
+ ),
+ (
+ "\u{31}\u{3a}\u{308}\u{31}\u{2e}\u{2060}",
+ &["\u{31}", "\u{3a}\u{308}", "\u{31}", "\u{2e}\u{2060}"],
+ ),
+ ("\u{31}\u{27}\u{1}", &["\u{31}", "\u{27}", "\u{1}"]),
+ (
+ "\u{31}\u{27}\u{308}\u{1}",
+ &["\u{31}", "\u{27}\u{308}", "\u{1}"],
+ ),
+ ("\u{31}\u{27}\u{d}", &["\u{31}", "\u{27}", "\u{d}"]),
+ (
+ "\u{31}\u{27}\u{308}\u{d}",
+ &["\u{31}", "\u{27}\u{308}", "\u{d}"],
+ ),
+ ("\u{31}\u{27}\u{a}", &["\u{31}", "\u{27}", "\u{a}"]),
+ (
+ "\u{31}\u{27}\u{308}\u{a}",
+ &["\u{31}", "\u{27}\u{308}", "\u{a}"],
+ ),
+ ("\u{31}\u{27}\u{b}", &["\u{31}", "\u{27}", "\u{b}"]),
+ (
+ "\u{31}\u{27}\u{308}\u{b}",
+ &["\u{31}", "\u{27}\u{308}", "\u{b}"],
+ ),
+ ("\u{31}\u{27}\u{3031}", &["\u{31}", "\u{27}", "\u{3031}"]),
+ (
+ "\u{31}\u{27}\u{308}\u{3031}",
+ &["\u{31}", "\u{27}\u{308}", "\u{3031}"],
+ ),
+ ("\u{31}\u{27}\u{41}", &["\u{31}", "\u{27}", "\u{41}"]),
+ (
+ "\u{31}\u{27}\u{308}\u{41}",
+ &["\u{31}", "\u{27}\u{308}", "\u{41}"],
+ ),
+ ("\u{31}\u{27}\u{3a}", &["\u{31}", "\u{27}", "\u{3a}"]),
+ (
+ "\u{31}\u{27}\u{308}\u{3a}",
+ &["\u{31}", "\u{27}\u{308}", "\u{3a}"],
+ ),
+ ("\u{31}\u{27}\u{2c}", &["\u{31}", "\u{27}", "\u{2c}"]),
+ (
+ "\u{31}\u{27}\u{308}\u{2c}",
+ &["\u{31}", "\u{27}\u{308}", "\u{2c}"],
+ ),
+ ("\u{31}\u{27}\u{2e}", &["\u{31}", "\u{27}", "\u{2e}"]),
+ (
+ "\u{31}\u{27}\u{308}\u{2e}",
+ &["\u{31}", "\u{27}\u{308}", "\u{2e}"],
+ ),
+ ("\u{31}\u{27}\u{30}", &["\u{31}\u{27}\u{30}"]),
+ ("\u{31}\u{27}\u{308}\u{30}", &["\u{31}\u{27}\u{308}\u{30}"]),
+ ("\u{31}\u{27}\u{5f}", &["\u{31}", "\u{27}", "\u{5f}"]),
+ (
+ "\u{31}\u{27}\u{308}\u{5f}",
+ &["\u{31}", "\u{27}\u{308}", "\u{5f}"],
+ ),
+ ("\u{31}\u{27}\u{1f1e6}", &["\u{31}", "\u{27}", "\u{1f1e6}"]),
+ (
+ "\u{31}\u{27}\u{308}\u{1f1e6}",
+ &["\u{31}", "\u{27}\u{308}", "\u{1f1e6}"],
+ ),
+ ("\u{31}\u{27}\u{5d0}", &["\u{31}", "\u{27}", "\u{5d0}"]),
+ (
+ "\u{31}\u{27}\u{308}\u{5d0}",
+ &["\u{31}", "\u{27}\u{308}", "\u{5d0}"],
+ ),
+ ("\u{31}\u{27}\u{22}", &["\u{31}", "\u{27}", "\u{22}"]),
+ (
+ "\u{31}\u{27}\u{308}\u{22}",
+ &["\u{31}", "\u{27}\u{308}", "\u{22}"],
+ ),
+ ("\u{31}\u{27}\u{27}", &["\u{31}", "\u{27}", "\u{27}"]),
+ (
+ "\u{31}\u{27}\u{308}\u{27}",
+ &["\u{31}", "\u{27}\u{308}", "\u{27}"],
+ ),
+ ("\u{31}\u{27}\u{231a}", &["\u{31}", "\u{27}", "\u{231a}"]),
+ (
+ "\u{31}\u{27}\u{308}\u{231a}",
+ &["\u{31}", "\u{27}\u{308}", "\u{231a}"],
+ ),
+ ("\u{31}\u{27}\u{20}", &["\u{31}", "\u{27}", "\u{20}"]),
+ (
+ "\u{31}\u{27}\u{308}\u{20}",
+ &["\u{31}", "\u{27}\u{308}", "\u{20}"],
+ ),
+ ("\u{31}\u{27}\u{ad}", &["\u{31}", "\u{27}\u{ad}"]),
+ (
+ "\u{31}\u{27}\u{308}\u{ad}",
+ &["\u{31}", "\u{27}\u{308}\u{ad}"],
+ ),
+ ("\u{31}\u{27}\u{300}", &["\u{31}", "\u{27}\u{300}"]),
+ (
+ "\u{31}\u{27}\u{308}\u{300}",
+ &["\u{31}", "\u{27}\u{308}\u{300}"],
+ ),
+ ("\u{31}\u{27}\u{200d}", &["\u{31}", "\u{27}\u{200d}"]),
+ (
+ "\u{31}\u{27}\u{308}\u{200d}",
+ &["\u{31}", "\u{27}\u{308}\u{200d}"],
+ ),
+ (
+ "\u{31}\u{27}\u{61}\u{2060}",
+ &["\u{31}", "\u{27}", "\u{61}\u{2060}"],
+ ),
+ (
+ "\u{31}\u{27}\u{308}\u{61}\u{2060}",
+ &["\u{31}", "\u{27}\u{308}", "\u{61}\u{2060}"],
+ ),
+ (
+ "\u{31}\u{27}\u{61}\u{3a}",
+ &["\u{31}", "\u{27}", "\u{61}", "\u{3a}"],
+ ),
+ (
+ "\u{31}\u{27}\u{308}\u{61}\u{3a}",
+ &["\u{31}", "\u{27}\u{308}", "\u{61}", "\u{3a}"],
+ ),
+ (
+ "\u{31}\u{27}\u{61}\u{27}",
+ &["\u{31}", "\u{27}", "\u{61}", "\u{27}"],
+ ),
+ (
+ "\u{31}\u{27}\u{308}\u{61}\u{27}",
+ &["\u{31}", "\u{27}\u{308}", "\u{61}", "\u{27}"],
+ ),
+ (
+ "\u{31}\u{27}\u{61}\u{27}\u{2060}",
+ &["\u{31}", "\u{27}", "\u{61}", "\u{27}\u{2060}"],
+ ),
+ (
+ "\u{31}\u{27}\u{308}\u{61}\u{27}\u{2060}",
+ &["\u{31}", "\u{27}\u{308}", "\u{61}", "\u{27}\u{2060}"],
+ ),
+ (
+ "\u{31}\u{27}\u{61}\u{2c}",
+ &["\u{31}", "\u{27}", "\u{61}", "\u{2c}"],
+ ),
+ (
+ "\u{31}\u{27}\u{308}\u{61}\u{2c}",
+ &["\u{31}", "\u{27}\u{308}", "\u{61}", "\u{2c}"],
+ ),
+ (
+ "\u{31}\u{27}\u{31}\u{3a}",
+ &["\u{31}\u{27}\u{31}", "\u{3a}"],
+ ),
+ (
+ "\u{31}\u{27}\u{308}\u{31}\u{3a}",
+ &["\u{31}\u{27}\u{308}\u{31}", "\u{3a}"],
+ ),
+ (
+ "\u{31}\u{27}\u{31}\u{27}",
+ &["\u{31}\u{27}\u{31}", "\u{27}"],
+ ),
+ (
+ "\u{31}\u{27}\u{308}\u{31}\u{27}",
+ &["\u{31}\u{27}\u{308}\u{31}", "\u{27}"],
+ ),
+ (
+ "\u{31}\u{27}\u{31}\u{2c}",
+ &["\u{31}\u{27}\u{31}", "\u{2c}"],
+ ),
+ (
+ "\u{31}\u{27}\u{308}\u{31}\u{2c}",
+ &["\u{31}\u{27}\u{308}\u{31}", "\u{2c}"],
+ ),
+ (
+ "\u{31}\u{27}\u{31}\u{2e}\u{2060}",
+ &["\u{31}\u{27}\u{31}", "\u{2e}\u{2060}"],
+ ),
+ (
+ "\u{31}\u{27}\u{308}\u{31}\u{2e}\u{2060}",
+ &["\u{31}\u{27}\u{308}\u{31}", "\u{2e}\u{2060}"],
+ ),
+ ("\u{31}\u{2c}\u{1}", &["\u{31}", "\u{2c}", "\u{1}"]),
+ (
+ "\u{31}\u{2c}\u{308}\u{1}",
+ &["\u{31}", "\u{2c}\u{308}", "\u{1}"],
+ ),
+ ("\u{31}\u{2c}\u{d}", &["\u{31}", "\u{2c}", "\u{d}"]),
+ (
+ "\u{31}\u{2c}\u{308}\u{d}",
+ &["\u{31}", "\u{2c}\u{308}", "\u{d}"],
+ ),
+ ("\u{31}\u{2c}\u{a}", &["\u{31}", "\u{2c}", "\u{a}"]),
+ (
+ "\u{31}\u{2c}\u{308}\u{a}",
+ &["\u{31}", "\u{2c}\u{308}", "\u{a}"],
+ ),
+ ("\u{31}\u{2c}\u{b}", &["\u{31}", "\u{2c}", "\u{b}"]),
+ (
+ "\u{31}\u{2c}\u{308}\u{b}",
+ &["\u{31}", "\u{2c}\u{308}", "\u{b}"],
+ ),
+ ("\u{31}\u{2c}\u{3031}", &["\u{31}", "\u{2c}", "\u{3031}"]),
+ (
+ "\u{31}\u{2c}\u{308}\u{3031}",
+ &["\u{31}", "\u{2c}\u{308}", "\u{3031}"],
+ ),
+ ("\u{31}\u{2c}\u{41}", &["\u{31}", "\u{2c}", "\u{41}"]),
+ (
+ "\u{31}\u{2c}\u{308}\u{41}",
+ &["\u{31}", "\u{2c}\u{308}", "\u{41}"],
+ ),
+ ("\u{31}\u{2c}\u{3a}", &["\u{31}", "\u{2c}", "\u{3a}"]),
+ (
+ "\u{31}\u{2c}\u{308}\u{3a}",
+ &["\u{31}", "\u{2c}\u{308}", "\u{3a}"],
+ ),
+ ("\u{31}\u{2c}\u{2c}", &["\u{31}", "\u{2c}", "\u{2c}"]),
+ (
+ "\u{31}\u{2c}\u{308}\u{2c}",
+ &["\u{31}", "\u{2c}\u{308}", "\u{2c}"],
+ ),
+ ("\u{31}\u{2c}\u{2e}", &["\u{31}", "\u{2c}", "\u{2e}"]),
+ (
+ "\u{31}\u{2c}\u{308}\u{2e}",
+ &["\u{31}", "\u{2c}\u{308}", "\u{2e}"],
+ ),
+ ("\u{31}\u{2c}\u{30}", &["\u{31}\u{2c}\u{30}"]),
+ ("\u{31}\u{2c}\u{308}\u{30}", &["\u{31}\u{2c}\u{308}\u{30}"]),
+ ("\u{31}\u{2c}\u{5f}", &["\u{31}", "\u{2c}", "\u{5f}"]),
+ (
+ "\u{31}\u{2c}\u{308}\u{5f}",
+ &["\u{31}", "\u{2c}\u{308}", "\u{5f}"],
+ ),
+ ("\u{31}\u{2c}\u{1f1e6}", &["\u{31}", "\u{2c}", "\u{1f1e6}"]),
+ (
+ "\u{31}\u{2c}\u{308}\u{1f1e6}",
+ &["\u{31}", "\u{2c}\u{308}", "\u{1f1e6}"],
+ ),
+ ("\u{31}\u{2c}\u{5d0}", &["\u{31}", "\u{2c}", "\u{5d0}"]),
+ (
+ "\u{31}\u{2c}\u{308}\u{5d0}",
+ &["\u{31}", "\u{2c}\u{308}", "\u{5d0}"],
+ ),
+ ("\u{31}\u{2c}\u{22}", &["\u{31}", "\u{2c}", "\u{22}"]),
+ (
+ "\u{31}\u{2c}\u{308}\u{22}",
+ &["\u{31}", "\u{2c}\u{308}", "\u{22}"],
+ ),
+ ("\u{31}\u{2c}\u{27}", &["\u{31}", "\u{2c}", "\u{27}"]),
+ (
+ "\u{31}\u{2c}\u{308}\u{27}",
+ &["\u{31}", "\u{2c}\u{308}", "\u{27}"],
+ ),
+ ("\u{31}\u{2c}\u{231a}", &["\u{31}", "\u{2c}", "\u{231a}"]),
+ (
+ "\u{31}\u{2c}\u{308}\u{231a}",
+ &["\u{31}", "\u{2c}\u{308}", "\u{231a}"],
+ ),
+ ("\u{31}\u{2c}\u{20}", &["\u{31}", "\u{2c}", "\u{20}"]),
+ (
+ "\u{31}\u{2c}\u{308}\u{20}",
+ &["\u{31}", "\u{2c}\u{308}", "\u{20}"],
+ ),
+ ("\u{31}\u{2c}\u{ad}", &["\u{31}", "\u{2c}\u{ad}"]),
+ (
+ "\u{31}\u{2c}\u{308}\u{ad}",
+ &["\u{31}", "\u{2c}\u{308}\u{ad}"],
+ ),
+ ("\u{31}\u{2c}\u{300}", &["\u{31}", "\u{2c}\u{300}"]),
+ (
+ "\u{31}\u{2c}\u{308}\u{300}",
+ &["\u{31}", "\u{2c}\u{308}\u{300}"],
+ ),
+ ("\u{31}\u{2c}\u{200d}", &["\u{31}", "\u{2c}\u{200d}"]),
+ (
+ "\u{31}\u{2c}\u{308}\u{200d}",
+ &["\u{31}", "\u{2c}\u{308}\u{200d}"],
+ ),
+ (
+ "\u{31}\u{2c}\u{61}\u{2060}",
+ &["\u{31}", "\u{2c}", "\u{61}\u{2060}"],
+ ),
+ (
+ "\u{31}\u{2c}\u{308}\u{61}\u{2060}",
+ &["\u{31}", "\u{2c}\u{308}", "\u{61}\u{2060}"],
+ ),
+ (
+ "\u{31}\u{2c}\u{61}\u{3a}",
+ &["\u{31}", "\u{2c}", "\u{61}", "\u{3a}"],
+ ),
+ (
+ "\u{31}\u{2c}\u{308}\u{61}\u{3a}",
+ &["\u{31}", "\u{2c}\u{308}", "\u{61}", "\u{3a}"],
+ ),
+ (
+ "\u{31}\u{2c}\u{61}\u{27}",
+ &["\u{31}", "\u{2c}", "\u{61}", "\u{27}"],
+ ),
+ (
+ "\u{31}\u{2c}\u{308}\u{61}\u{27}",
+ &["\u{31}", "\u{2c}\u{308}", "\u{61}", "\u{27}"],
+ ),
+ (
+ "\u{31}\u{2c}\u{61}\u{27}\u{2060}",
+ &["\u{31}", "\u{2c}", "\u{61}", "\u{27}\u{2060}"],
+ ),
+ (
+ "\u{31}\u{2c}\u{308}\u{61}\u{27}\u{2060}",
+ &["\u{31}", "\u{2c}\u{308}", "\u{61}", "\u{27}\u{2060}"],
+ ),
+ (
+ "\u{31}\u{2c}\u{61}\u{2c}",
+ &["\u{31}", "\u{2c}", "\u{61}", "\u{2c}"],
+ ),
+ (
+ "\u{31}\u{2c}\u{308}\u{61}\u{2c}",
+ &["\u{31}", "\u{2c}\u{308}", "\u{61}", "\u{2c}"],
+ ),
+ (
+ "\u{31}\u{2c}\u{31}\u{3a}",
+ &["\u{31}\u{2c}\u{31}", "\u{3a}"],
+ ),
+ (
+ "\u{31}\u{2c}\u{308}\u{31}\u{3a}",
+ &["\u{31}\u{2c}\u{308}\u{31}", "\u{3a}"],
+ ),
+ (
+ "\u{31}\u{2c}\u{31}\u{27}",
+ &["\u{31}\u{2c}\u{31}", "\u{27}"],
+ ),
+ (
+ "\u{31}\u{2c}\u{308}\u{31}\u{27}",
+ &["\u{31}\u{2c}\u{308}\u{31}", "\u{27}"],
+ ),
+ (
+ "\u{31}\u{2c}\u{31}\u{2c}",
+ &["\u{31}\u{2c}\u{31}", "\u{2c}"],
+ ),
+ (
+ "\u{31}\u{2c}\u{308}\u{31}\u{2c}",
+ &["\u{31}\u{2c}\u{308}\u{31}", "\u{2c}"],
+ ),
+ (
+ "\u{31}\u{2c}\u{31}\u{2e}\u{2060}",
+ &["\u{31}\u{2c}\u{31}", "\u{2e}\u{2060}"],
+ ),
+ (
+ "\u{31}\u{2c}\u{308}\u{31}\u{2e}\u{2060}",
+ &["\u{31}\u{2c}\u{308}\u{31}", "\u{2e}\u{2060}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{1}",
+ &["\u{31}", "\u{2e}\u{2060}", "\u{1}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{308}\u{1}",
+ &["\u{31}", "\u{2e}\u{2060}\u{308}", "\u{1}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{d}",
+ &["\u{31}", "\u{2e}\u{2060}", "\u{d}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{308}\u{d}",
+ &["\u{31}", "\u{2e}\u{2060}\u{308}", "\u{d}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{a}",
+ &["\u{31}", "\u{2e}\u{2060}", "\u{a}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{308}\u{a}",
+ &["\u{31}", "\u{2e}\u{2060}\u{308}", "\u{a}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{b}",
+ &["\u{31}", "\u{2e}\u{2060}", "\u{b}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{308}\u{b}",
+ &["\u{31}", "\u{2e}\u{2060}\u{308}", "\u{b}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{3031}",
+ &["\u{31}", "\u{2e}\u{2060}", "\u{3031}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{308}\u{3031}",
+ &["\u{31}", "\u{2e}\u{2060}\u{308}", "\u{3031}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{41}",
+ &["\u{31}", "\u{2e}\u{2060}", "\u{41}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{308}\u{41}",
+ &["\u{31}", "\u{2e}\u{2060}\u{308}", "\u{41}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{3a}",
+ &["\u{31}", "\u{2e}\u{2060}", "\u{3a}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{308}\u{3a}",
+ &["\u{31}", "\u{2e}\u{2060}\u{308}", "\u{3a}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{2c}",
+ &["\u{31}", "\u{2e}\u{2060}", "\u{2c}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{308}\u{2c}",
+ &["\u{31}", "\u{2e}\u{2060}\u{308}", "\u{2c}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{2e}",
+ &["\u{31}", "\u{2e}\u{2060}", "\u{2e}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{308}\u{2e}",
+ &["\u{31}", "\u{2e}\u{2060}\u{308}", "\u{2e}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{30}",
+ &["\u{31}\u{2e}\u{2060}\u{30}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{308}\u{30}",
+ &["\u{31}\u{2e}\u{2060}\u{308}\u{30}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{5f}",
+ &["\u{31}", "\u{2e}\u{2060}", "\u{5f}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{308}\u{5f}",
+ &["\u{31}", "\u{2e}\u{2060}\u{308}", "\u{5f}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{1f1e6}",
+ &["\u{31}", "\u{2e}\u{2060}", "\u{1f1e6}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{308}\u{1f1e6}",
+ &["\u{31}", "\u{2e}\u{2060}\u{308}", "\u{1f1e6}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{5d0}",
+ &["\u{31}", "\u{2e}\u{2060}", "\u{5d0}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{308}\u{5d0}",
+ &["\u{31}", "\u{2e}\u{2060}\u{308}", "\u{5d0}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{22}",
+ &["\u{31}", "\u{2e}\u{2060}", "\u{22}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{308}\u{22}",
+ &["\u{31}", "\u{2e}\u{2060}\u{308}", "\u{22}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{27}",
+ &["\u{31}", "\u{2e}\u{2060}", "\u{27}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{308}\u{27}",
+ &["\u{31}", "\u{2e}\u{2060}\u{308}", "\u{27}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{231a}",
+ &["\u{31}", "\u{2e}\u{2060}", "\u{231a}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{308}\u{231a}",
+ &["\u{31}", "\u{2e}\u{2060}\u{308}", "\u{231a}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{20}",
+ &["\u{31}", "\u{2e}\u{2060}", "\u{20}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{308}\u{20}",
+ &["\u{31}", "\u{2e}\u{2060}\u{308}", "\u{20}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{ad}",
+ &["\u{31}", "\u{2e}\u{2060}\u{ad}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{308}\u{ad}",
+ &["\u{31}", "\u{2e}\u{2060}\u{308}\u{ad}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{300}",
+ &["\u{31}", "\u{2e}\u{2060}\u{300}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{308}\u{300}",
+ &["\u{31}", "\u{2e}\u{2060}\u{308}\u{300}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{200d}",
+ &["\u{31}", "\u{2e}\u{2060}\u{200d}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{308}\u{200d}",
+ &["\u{31}", "\u{2e}\u{2060}\u{308}\u{200d}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{61}\u{2060}",
+ &["\u{31}", "\u{2e}\u{2060}", "\u{61}\u{2060}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{308}\u{61}\u{2060}",
+ &["\u{31}", "\u{2e}\u{2060}\u{308}", "\u{61}\u{2060}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{61}\u{3a}",
+ &["\u{31}", "\u{2e}\u{2060}", "\u{61}", "\u{3a}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{308}\u{61}\u{3a}",
+ &["\u{31}", "\u{2e}\u{2060}\u{308}", "\u{61}", "\u{3a}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{61}\u{27}",
+ &["\u{31}", "\u{2e}\u{2060}", "\u{61}", "\u{27}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{308}\u{61}\u{27}",
+ &["\u{31}", "\u{2e}\u{2060}\u{308}", "\u{61}", "\u{27}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{61}\u{27}\u{2060}",
+ &["\u{31}", "\u{2e}\u{2060}", "\u{61}", "\u{27}\u{2060}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{308}\u{61}\u{27}\u{2060}",
+ &[
+ "\u{31}",
+ "\u{2e}\u{2060}\u{308}",
+ "\u{61}",
+ "\u{27}\u{2060}",
+ ],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{61}\u{2c}",
+ &["\u{31}", "\u{2e}\u{2060}", "\u{61}", "\u{2c}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{308}\u{61}\u{2c}",
+ &["\u{31}", "\u{2e}\u{2060}\u{308}", "\u{61}", "\u{2c}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{31}\u{3a}",
+ &["\u{31}\u{2e}\u{2060}\u{31}", "\u{3a}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{308}\u{31}\u{3a}",
+ &["\u{31}\u{2e}\u{2060}\u{308}\u{31}", "\u{3a}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{31}\u{27}",
+ &["\u{31}\u{2e}\u{2060}\u{31}", "\u{27}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{308}\u{31}\u{27}",
+ &["\u{31}\u{2e}\u{2060}\u{308}\u{31}", "\u{27}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{31}\u{2c}",
+ &["\u{31}\u{2e}\u{2060}\u{31}", "\u{2c}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{308}\u{31}\u{2c}",
+ &["\u{31}\u{2e}\u{2060}\u{308}\u{31}", "\u{2c}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{31}\u{2e}\u{2060}",
+ &["\u{31}\u{2e}\u{2060}\u{31}", "\u{2e}\u{2060}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2060}\u{308}\u{31}\u{2e}\u{2060}",
+ &["\u{31}\u{2e}\u{2060}\u{308}\u{31}", "\u{2e}\u{2060}"],
+ ),
+ (
+ "\u{d}\u{a}\u{61}\u{a}\u{308}",
+ &["\u{d}\u{a}", "\u{61}", "\u{a}", "\u{308}"],
+ ),
+ ("\u{61}\u{308}", &["\u{61}\u{308}"]),
+ ("\u{20}\u{200d}\u{646}", &["\u{20}\u{200d}", "\u{646}"]),
+ ("\u{646}\u{200d}\u{20}", &["\u{646}\u{200d}", "\u{20}"]),
+ ("\u{41}\u{41}\u{41}", &["\u{41}\u{41}\u{41}"]),
+ ("\u{41}\u{3a}\u{41}", &["\u{41}\u{3a}\u{41}"]),
+ (
+ "\u{41}\u{3a}\u{3a}\u{41}",
+ &["\u{41}", "\u{3a}", "\u{3a}", "\u{41}"],
+ ),
+ ("\u{5d0}\u{27}", &["\u{5d0}\u{27}"]),
+ ("\u{5d0}\u{22}\u{5d0}", &["\u{5d0}\u{22}\u{5d0}"]),
+ ("\u{41}\u{30}\u{30}\u{41}", &["\u{41}\u{30}\u{30}\u{41}"]),
+ ("\u{30}\u{2c}\u{30}", &["\u{30}\u{2c}\u{30}"]),
+ (
+ "\u{30}\u{2c}\u{2c}\u{30}",
+ &["\u{30}", "\u{2c}", "\u{2c}", "\u{30}"],
+ ),
+ ("\u{3031}\u{3031}", &["\u{3031}\u{3031}"]),
+ (
+ "\u{41}\u{5f}\u{30}\u{5f}\u{3031}\u{5f}",
+ &["\u{41}\u{5f}\u{30}\u{5f}\u{3031}\u{5f}"],
+ ),
+ ("\u{41}\u{5f}\u{5f}\u{41}", &["\u{41}\u{5f}\u{5f}\u{41}"]),
+ (
+ "\u{1f1e6}\u{1f1e7}\u{1f1e8}\u{62}",
+ &["\u{1f1e6}\u{1f1e7}", "\u{1f1e8}", "\u{62}"],
+ ),
+ (
+ "\u{61}\u{1f1e6}\u{1f1e7}\u{1f1e8}\u{62}",
+ &["\u{61}", "\u{1f1e6}\u{1f1e7}", "\u{1f1e8}", "\u{62}"],
+ ),
+ (
+ "\u{61}\u{1f1e6}\u{1f1e7}\u{200d}\u{1f1e8}\u{62}",
+ &[
+ "\u{61}",
+ "\u{1f1e6}\u{1f1e7}\u{200d}",
+ "\u{1f1e8}",
+ "\u{62}",
+ ],
+ ),
+ (
+ "\u{61}\u{1f1e6}\u{200d}\u{1f1e7}\u{1f1e8}\u{62}",
+ &[
+ "\u{61}",
+ "\u{1f1e6}\u{200d}\u{1f1e7}",
+ "\u{1f1e8}",
+ "\u{62}",
+ ],
+ ),
+ (
+ "\u{61}\u{1f1e6}\u{1f1e7}\u{1f1e8}\u{1f1e9}\u{62}",
+ &[
+ "\u{61}",
+ "\u{1f1e6}\u{1f1e7}",
+ "\u{1f1e8}\u{1f1e9}",
+ "\u{62}",
+ ],
+ ),
+ (
+ "\u{1f476}\u{1f3ff}\u{1f476}",
+ &["\u{1f476}\u{1f3ff}", "\u{1f476}"],
+ ),
+ (
+ "\u{1f6d1}\u{200d}\u{1f6d1}",
+ &["\u{1f6d1}\u{200d}\u{1f6d1}"],
+ ),
+ ("\u{61}\u{200d}\u{1f6d1}", &["\u{61}\u{200d}\u{1f6d1}"]),
+ ("\u{2701}\u{200d}\u{2701}", &["\u{2701}\u{200d}\u{2701}"]),
+ ("\u{61}\u{200d}\u{2701}", &["\u{61}\u{200d}\u{2701}"]),
+ (
+ "\u{1f476}\u{1f3ff}\u{308}\u{200d}\u{1f476}\u{1f3ff}",
+ &["\u{1f476}\u{1f3ff}\u{308}\u{200d}\u{1f476}\u{1f3ff}"],
+ ),
+ ("\u{1f6d1}\u{1f3ff}", &["\u{1f6d1}\u{1f3ff}"]),
+ (
+ "\u{200d}\u{1f6d1}\u{1f3ff}",
+ &["\u{200d}\u{1f6d1}\u{1f3ff}"],
+ ),
+ ("\u{200d}\u{1f6d1}", &["\u{200d}\u{1f6d1}"]),
+ ("\u{200d}\u{1f6d1}", &["\u{200d}\u{1f6d1}"]),
+ ("\u{1f6d1}\u{1f6d1}", &["\u{1f6d1}", "\u{1f6d1}"]),
+ (
+ "\u{61}\u{308}\u{200d}\u{308}\u{62}",
+ &["\u{61}\u{308}\u{200d}\u{308}\u{62}"],
+ ),
+ (
+ "\u{61}\u{20}\u{20}\u{62}",
+ &["\u{61}", "\u{20}\u{20}", "\u{62}"],
+ ),
+ (
+ "\u{31}\u{3a}\u{3a}\u{31}",
+ &["\u{31}", "\u{3a}", "\u{3a}", "\u{31}"],
+ ),
+ (
+ "\u{31}\u{5f}\u{31}\u{3a}\u{3a}\u{31}",
+ &["\u{31}\u{5f}\u{31}", "\u{3a}", "\u{3a}", "\u{31}"],
+ ),
+ (
+ "\u{31}\u{5f}\u{61}\u{3a}\u{3a}\u{31}",
+ &["\u{31}\u{5f}\u{61}", "\u{3a}", "\u{3a}", "\u{31}"],
+ ),
+ (
+ "\u{31}\u{3a}\u{3a}\u{61}",
+ &["\u{31}", "\u{3a}", "\u{3a}", "\u{61}"],
+ ),
+ (
+ "\u{31}\u{5f}\u{31}\u{3a}\u{3a}\u{61}",
+ &["\u{31}\u{5f}\u{31}", "\u{3a}", "\u{3a}", "\u{61}"],
+ ),
+ (
+ "\u{31}\u{5f}\u{61}\u{3a}\u{3a}\u{61}",
+ &["\u{31}\u{5f}\u{61}", "\u{3a}", "\u{3a}", "\u{61}"],
+ ),
+ (
+ "\u{31}\u{3a}\u{2e}\u{31}",
+ &["\u{31}", "\u{3a}", "\u{2e}", "\u{31}"],
+ ),
+ (
+ "\u{31}\u{5f}\u{31}\u{3a}\u{2e}\u{31}",
+ &["\u{31}\u{5f}\u{31}", "\u{3a}", "\u{2e}", "\u{31}"],
+ ),
+ (
+ "\u{31}\u{5f}\u{61}\u{3a}\u{2e}\u{31}",
+ &["\u{31}\u{5f}\u{61}", "\u{3a}", "\u{2e}", "\u{31}"],
+ ),
+ (
+ "\u{31}\u{3a}\u{2e}\u{61}",
+ &["\u{31}", "\u{3a}", "\u{2e}", "\u{61}"],
+ ),
+ (
+ "\u{31}\u{5f}\u{31}\u{3a}\u{2e}\u{61}",
+ &["\u{31}\u{5f}\u{31}", "\u{3a}", "\u{2e}", "\u{61}"],
+ ),
+ (
+ "\u{31}\u{5f}\u{61}\u{3a}\u{2e}\u{61}",
+ &["\u{31}\u{5f}\u{61}", "\u{3a}", "\u{2e}", "\u{61}"],
+ ),
+ (
+ "\u{31}\u{3a}\u{2c}\u{31}",
+ &["\u{31}", "\u{3a}", "\u{2c}", "\u{31}"],
+ ),
+ (
+ "\u{31}\u{5f}\u{31}\u{3a}\u{2c}\u{31}",
+ &["\u{31}\u{5f}\u{31}", "\u{3a}", "\u{2c}", "\u{31}"],
+ ),
+ (
+ "\u{31}\u{5f}\u{61}\u{3a}\u{2c}\u{31}",
+ &["\u{31}\u{5f}\u{61}", "\u{3a}", "\u{2c}", "\u{31}"],
+ ),
+ (
+ "\u{31}\u{3a}\u{2c}\u{61}",
+ &["\u{31}", "\u{3a}", "\u{2c}", "\u{61}"],
+ ),
+ (
+ "\u{31}\u{5f}\u{31}\u{3a}\u{2c}\u{61}",
+ &["\u{31}\u{5f}\u{31}", "\u{3a}", "\u{2c}", "\u{61}"],
+ ),
+ (
+ "\u{31}\u{5f}\u{61}\u{3a}\u{2c}\u{61}",
+ &["\u{31}\u{5f}\u{61}", "\u{3a}", "\u{2c}", "\u{61}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{3a}\u{31}",
+ &["\u{31}", "\u{2e}", "\u{3a}", "\u{31}"],
+ ),
+ (
+ "\u{31}\u{5f}\u{31}\u{2e}\u{3a}\u{31}",
+ &["\u{31}\u{5f}\u{31}", "\u{2e}", "\u{3a}", "\u{31}"],
+ ),
+ (
+ "\u{31}\u{5f}\u{61}\u{2e}\u{3a}\u{31}",
+ &["\u{31}\u{5f}\u{61}", "\u{2e}", "\u{3a}", "\u{31}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{3a}\u{61}",
+ &["\u{31}", "\u{2e}", "\u{3a}", "\u{61}"],
+ ),
+ (
+ "\u{31}\u{5f}\u{31}\u{2e}\u{3a}\u{61}",
+ &["\u{31}\u{5f}\u{31}", "\u{2e}", "\u{3a}", "\u{61}"],
+ ),
+ (
+ "\u{31}\u{5f}\u{61}\u{2e}\u{3a}\u{61}",
+ &["\u{31}\u{5f}\u{61}", "\u{2e}", "\u{3a}", "\u{61}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2e}\u{31}",
+ &["\u{31}", "\u{2e}", "\u{2e}", "\u{31}"],
+ ),
+ (
+ "\u{31}\u{5f}\u{31}\u{2e}\u{2e}\u{31}",
+ &["\u{31}\u{5f}\u{31}", "\u{2e}", "\u{2e}", "\u{31}"],
+ ),
+ (
+ "\u{31}\u{5f}\u{61}\u{2e}\u{2e}\u{31}",
+ &["\u{31}\u{5f}\u{61}", "\u{2e}", "\u{2e}", "\u{31}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2e}\u{61}",
+ &["\u{31}", "\u{2e}", "\u{2e}", "\u{61}"],
+ ),
+ (
+ "\u{31}\u{5f}\u{31}\u{2e}\u{2e}\u{61}",
+ &["\u{31}\u{5f}\u{31}", "\u{2e}", "\u{2e}", "\u{61}"],
+ ),
+ (
+ "\u{31}\u{5f}\u{61}\u{2e}\u{2e}\u{61}",
+ &["\u{31}\u{5f}\u{61}", "\u{2e}", "\u{2e}", "\u{61}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2c}\u{31}",
+ &["\u{31}", "\u{2e}", "\u{2c}", "\u{31}"],
+ ),
+ (
+ "\u{31}\u{5f}\u{31}\u{2e}\u{2c}\u{31}",
+ &["\u{31}\u{5f}\u{31}", "\u{2e}", "\u{2c}", "\u{31}"],
+ ),
+ (
+ "\u{31}\u{5f}\u{61}\u{2e}\u{2c}\u{31}",
+ &["\u{31}\u{5f}\u{61}", "\u{2e}", "\u{2c}", "\u{31}"],
+ ),
+ (
+ "\u{31}\u{2e}\u{2c}\u{61}",
+ &["\u{31}", "\u{2e}", "\u{2c}", "\u{61}"],
+ ),
+ (
+ "\u{31}\u{5f}\u{31}\u{2e}\u{2c}\u{61}",
+ &["\u{31}\u{5f}\u{31}", "\u{2e}", "\u{2c}", "\u{61}"],
+ ),
+ (
+ "\u{31}\u{5f}\u{61}\u{2e}\u{2c}\u{61}",
+ &["\u{31}\u{5f}\u{61}", "\u{2e}", "\u{2c}", "\u{61}"],
+ ),
+ (
+ "\u{31}\u{2c}\u{3a}\u{31}",
+ &["\u{31}", "\u{2c}", "\u{3a}", "\u{31}"],
+ ),
+ (
+ "\u{31}\u{5f}\u{31}\u{2c}\u{3a}\u{31}",
+ &["\u{31}\u{5f}\u{31}", "\u{2c}", "\u{3a}", "\u{31}"],
+ ),
+ (
+ "\u{31}\u{5f}\u{61}\u{2c}\u{3a}\u{31}",
+ &["\u{31}\u{5f}\u{61}", "\u{2c}", "\u{3a}", "\u{31}"],
+ ),
+ (
+ "\u{31}\u{2c}\u{3a}\u{61}",
+ &["\u{31}", "\u{2c}", "\u{3a}", "\u{61}"],
+ ),
+ (
+ "\u{31}\u{5f}\u{31}\u{2c}\u{3a}\u{61}",
+ &["\u{31}\u{5f}\u{31}", "\u{2c}", "\u{3a}", "\u{61}"],
+ ),
+ (
+ "\u{31}\u{5f}\u{61}\u{2c}\u{3a}\u{61}",
+ &["\u{31}\u{5f}\u{61}", "\u{2c}", "\u{3a}", "\u{61}"],
+ ),
+ (
+ "\u{31}\u{2c}\u{2e}\u{31}",
+ &["\u{31}", "\u{2c}", "\u{2e}", "\u{31}"],
+ ),
+ (
+ "\u{31}\u{5f}\u{31}\u{2c}\u{2e}\u{31}",
+ &["\u{31}\u{5f}\u{31}", "\u{2c}", "\u{2e}", "\u{31}"],
+ ),
+ (
+ "\u{31}\u{5f}\u{61}\u{2c}\u{2e}\u{31}",
+ &["\u{31}\u{5f}\u{61}", "\u{2c}", "\u{2e}", "\u{31}"],
+ ),
+ (
+ "\u{31}\u{2c}\u{2e}\u{61}",
+ &["\u{31}", "\u{2c}", "\u{2e}", "\u{61}"],
+ ),
+ (
+ "\u{31}\u{5f}\u{31}\u{2c}\u{2e}\u{61}",
+ &["\u{31}\u{5f}\u{31}", "\u{2c}", "\u{2e}", "\u{61}"],
+ ),
+ (
+ "\u{31}\u{5f}\u{61}\u{2c}\u{2e}\u{61}",
+ &["\u{31}\u{5f}\u{61}", "\u{2c}", "\u{2e}", "\u{61}"],
+ ),
+ (
+ "\u{31}\u{2c}\u{2c}\u{31}",
+ &["\u{31}", "\u{2c}", "\u{2c}", "\u{31}"],
+ ),
+ (
+ "\u{31}\u{5f}\u{31}\u{2c}\u{2c}\u{31}",
+ &["\u{31}\u{5f}\u{31}", "\u{2c}", "\u{2c}", "\u{31}"],
+ ),
+ (
+ "\u{31}\u{5f}\u{61}\u{2c}\u{2c}\u{31}",
+ &["\u{31}\u{5f}\u{61}", "\u{2c}", "\u{2c}", "\u{31}"],
+ ),
+ (
+ "\u{31}\u{2c}\u{2c}\u{61}",
+ &["\u{31}", "\u{2c}", "\u{2c}", "\u{61}"],
+ ),
+ (
+ "\u{31}\u{5f}\u{31}\u{2c}\u{2c}\u{61}",
+ &["\u{31}\u{5f}\u{31}", "\u{2c}", "\u{2c}", "\u{61}"],
+ ),
+ (
+ "\u{31}\u{5f}\u{61}\u{2c}\u{2c}\u{61}",
+ &["\u{31}\u{5f}\u{61}", "\u{2c}", "\u{2c}", "\u{61}"],
+ ),
+ (
+ "\u{61}\u{3a}\u{3a}\u{31}",
+ &["\u{61}", "\u{3a}", "\u{3a}", "\u{31}"],
+ ),
+ (
+ "\u{61}\u{5f}\u{31}\u{3a}\u{3a}\u{31}",
+ &["\u{61}\u{5f}\u{31}", "\u{3a}", "\u{3a}", "\u{31}"],
+ ),
+ (
+ "\u{61}\u{5f}\u{61}\u{3a}\u{3a}\u{31}",
+ &["\u{61}\u{5f}\u{61}", "\u{3a}", "\u{3a}", "\u{31}"],
+ ),
+ (
+ "\u{61}\u{3a}\u{3a}\u{61}",
+ &["\u{61}", "\u{3a}", "\u{3a}", "\u{61}"],
+ ),
+ (
+ "\u{61}\u{5f}\u{31}\u{3a}\u{3a}\u{61}",
+ &["\u{61}\u{5f}\u{31}", "\u{3a}", "\u{3a}", "\u{61}"],
+ ),
+ (
+ "\u{61}\u{5f}\u{61}\u{3a}\u{3a}\u{61}",
+ &["\u{61}\u{5f}\u{61}", "\u{3a}", "\u{3a}", "\u{61}"],
+ ),
+ (
+ "\u{61}\u{3a}\u{2e}\u{31}",
+ &["\u{61}", "\u{3a}", "\u{2e}", "\u{31}"],
+ ),
+ (
+ "\u{61}\u{5f}\u{31}\u{3a}\u{2e}\u{31}",
+ &["\u{61}\u{5f}\u{31}", "\u{3a}", "\u{2e}", "\u{31}"],
+ ),
+ (
+ "\u{61}\u{5f}\u{61}\u{3a}\u{2e}\u{31}",
+ &["\u{61}\u{5f}\u{61}", "\u{3a}", "\u{2e}", "\u{31}"],
+ ),
+ (
+ "\u{61}\u{3a}\u{2e}\u{61}",
+ &["\u{61}", "\u{3a}", "\u{2e}", "\u{61}"],
+ ),
+ (
+ "\u{61}\u{5f}\u{31}\u{3a}\u{2e}\u{61}",
+ &["\u{61}\u{5f}\u{31}", "\u{3a}", "\u{2e}", "\u{61}"],
+ ),
+ (
+ "\u{61}\u{5f}\u{61}\u{3a}\u{2e}\u{61}",
+ &["\u{61}\u{5f}\u{61}", "\u{3a}", "\u{2e}", "\u{61}"],
+ ),
+ (
+ "\u{61}\u{3a}\u{2c}\u{31}",
+ &["\u{61}", "\u{3a}", "\u{2c}", "\u{31}"],
+ ),
+ (
+ "\u{61}\u{5f}\u{31}\u{3a}\u{2c}\u{31}",
+ &["\u{61}\u{5f}\u{31}", "\u{3a}", "\u{2c}", "\u{31}"],
+ ),
+ (
+ "\u{61}\u{5f}\u{61}\u{3a}\u{2c}\u{31}",
+ &["\u{61}\u{5f}\u{61}", "\u{3a}", "\u{2c}", "\u{31}"],
+ ),
+ (
+ "\u{61}\u{3a}\u{2c}\u{61}",
+ &["\u{61}", "\u{3a}", "\u{2c}", "\u{61}"],
+ ),
+ (
+ "\u{61}\u{5f}\u{31}\u{3a}\u{2c}\u{61}",
+ &["\u{61}\u{5f}\u{31}", "\u{3a}", "\u{2c}", "\u{61}"],
+ ),
+ (
+ "\u{61}\u{5f}\u{61}\u{3a}\u{2c}\u{61}",
+ &["\u{61}\u{5f}\u{61}", "\u{3a}", "\u{2c}", "\u{61}"],
+ ),
+ (
+ "\u{61}\u{2e}\u{3a}\u{31}",
+ &["\u{61}", "\u{2e}", "\u{3a}", "\u{31}"],
+ ),
+ (
+ "\u{61}\u{5f}\u{31}\u{2e}\u{3a}\u{31}",
+ &["\u{61}\u{5f}\u{31}", "\u{2e}", "\u{3a}", "\u{31}"],
+ ),
+ (
+ "\u{61}\u{5f}\u{61}\u{2e}\u{3a}\u{31}",
+ &["\u{61}\u{5f}\u{61}", "\u{2e}", "\u{3a}", "\u{31}"],
+ ),
+ (
+ "\u{61}\u{2e}\u{3a}\u{61}",
+ &["\u{61}", "\u{2e}", "\u{3a}", "\u{61}"],
+ ),
+ (
+ "\u{61}\u{5f}\u{31}\u{2e}\u{3a}\u{61}",
+ &["\u{61}\u{5f}\u{31}", "\u{2e}", "\u{3a}", "\u{61}"],
+ ),
+ (
+ "\u{61}\u{5f}\u{61}\u{2e}\u{3a}\u{61}",
+ &["\u{61}\u{5f}\u{61}", "\u{2e}", "\u{3a}", "\u{61}"],
+ ),
+ (
+ "\u{61}\u{2e}\u{2e}\u{31}",
+ &["\u{61}", "\u{2e}", "\u{2e}", "\u{31}"],
+ ),
+ (
+ "\u{61}\u{5f}\u{31}\u{2e}\u{2e}\u{31}",
+ &["\u{61}\u{5f}\u{31}", "\u{2e}", "\u{2e}", "\u{31}"],
+ ),
+ (
+ "\u{61}\u{5f}\u{61}\u{2e}\u{2e}\u{31}",
+ &["\u{61}\u{5f}\u{61}", "\u{2e}", "\u{2e}", "\u{31}"],
+ ),
+ (
+ "\u{61}\u{2e}\u{2e}\u{61}",
+ &["\u{61}", "\u{2e}", "\u{2e}", "\u{61}"],
+ ),
+ (
+ "\u{61}\u{5f}\u{31}\u{2e}\u{2e}\u{61}",
+ &["\u{61}\u{5f}\u{31}", "\u{2e}", "\u{2e}", "\u{61}"],
+ ),
+ (
+ "\u{61}\u{5f}\u{61}\u{2e}\u{2e}\u{61}",
+ &["\u{61}\u{5f}\u{61}", "\u{2e}", "\u{2e}", "\u{61}"],
+ ),
+ (
+ "\u{61}\u{2e}\u{2c}\u{31}",
+ &["\u{61}", "\u{2e}", "\u{2c}", "\u{31}"],
+ ),
+ (
+ "\u{61}\u{5f}\u{31}\u{2e}\u{2c}\u{31}",
+ &["\u{61}\u{5f}\u{31}", "\u{2e}", "\u{2c}", "\u{31}"],
+ ),
+ (
+ "\u{61}\u{5f}\u{61}\u{2e}\u{2c}\u{31}",
+ &["\u{61}\u{5f}\u{61}", "\u{2e}", "\u{2c}", "\u{31}"],
+ ),
+ (
+ "\u{61}\u{2e}\u{2c}\u{61}",
+ &["\u{61}", "\u{2e}", "\u{2c}", "\u{61}"],
+ ),
+ (
+ "\u{61}\u{5f}\u{31}\u{2e}\u{2c}\u{61}",
+ &["\u{61}\u{5f}\u{31}", "\u{2e}", "\u{2c}", "\u{61}"],
+ ),
+ (
+ "\u{61}\u{5f}\u{61}\u{2e}\u{2c}\u{61}",
+ &["\u{61}\u{5f}\u{61}", "\u{2e}", "\u{2c}", "\u{61}"],
+ ),
+ (
+ "\u{61}\u{2c}\u{3a}\u{31}",
+ &["\u{61}", "\u{2c}", "\u{3a}", "\u{31}"],
+ ),
+ (
+ "\u{61}\u{5f}\u{31}\u{2c}\u{3a}\u{31}",
+ &["\u{61}\u{5f}\u{31}", "\u{2c}", "\u{3a}", "\u{31}"],
+ ),
+ (
+ "\u{61}\u{5f}\u{61}\u{2c}\u{3a}\u{31}",
+ &["\u{61}\u{5f}\u{61}", "\u{2c}", "\u{3a}", "\u{31}"],
+ ),
+ (
+ "\u{61}\u{2c}\u{3a}\u{61}",
+ &["\u{61}", "\u{2c}", "\u{3a}", "\u{61}"],
+ ),
+ (
+ "\u{61}\u{5f}\u{31}\u{2c}\u{3a}\u{61}",
+ &["\u{61}\u{5f}\u{31}", "\u{2c}", "\u{3a}", "\u{61}"],
+ ),
+ (
+ "\u{61}\u{5f}\u{61}\u{2c}\u{3a}\u{61}",
+ &["\u{61}\u{5f}\u{61}", "\u{2c}", "\u{3a}", "\u{61}"],
+ ),
+ (
+ "\u{61}\u{2c}\u{2e}\u{31}",
+ &["\u{61}", "\u{2c}", "\u{2e}", "\u{31}"],
+ ),
+ (
+ "\u{61}\u{5f}\u{31}\u{2c}\u{2e}\u{31}",
+ &["\u{61}\u{5f}\u{31}", "\u{2c}", "\u{2e}", "\u{31}"],
+ ),
+ (
+ "\u{61}\u{5f}\u{61}\u{2c}\u{2e}\u{31}",
+ &["\u{61}\u{5f}\u{61}", "\u{2c}", "\u{2e}", "\u{31}"],
+ ),
+ (
+ "\u{61}\u{2c}\u{2e}\u{61}",
+ &["\u{61}", "\u{2c}", "\u{2e}", "\u{61}"],
+ ),
+ (
+ "\u{61}\u{5f}\u{31}\u{2c}\u{2e}\u{61}",
+ &["\u{61}\u{5f}\u{31}", "\u{2c}", "\u{2e}", "\u{61}"],
+ ),
+ (
+ "\u{61}\u{5f}\u{61}\u{2c}\u{2e}\u{61}",
+ &["\u{61}\u{5f}\u{61}", "\u{2c}", "\u{2e}", "\u{61}"],
+ ),
+ (
+ "\u{61}\u{2c}\u{2c}\u{31}",
+ &["\u{61}", "\u{2c}", "\u{2c}", "\u{31}"],
+ ),
+ (
+ "\u{61}\u{5f}\u{31}\u{2c}\u{2c}\u{31}",
+ &["\u{61}\u{5f}\u{31}", "\u{2c}", "\u{2c}", "\u{31}"],
+ ),
+ (
+ "\u{61}\u{5f}\u{61}\u{2c}\u{2c}\u{31}",
+ &["\u{61}\u{5f}\u{61}", "\u{2c}", "\u{2c}", "\u{31}"],
+ ),
+ (
+ "\u{61}\u{2c}\u{2c}\u{61}",
+ &["\u{61}", "\u{2c}", "\u{2c}", "\u{61}"],
+ ),
+ (
+ "\u{61}\u{5f}\u{31}\u{2c}\u{2c}\u{61}",
+ &["\u{61}\u{5f}\u{31}", "\u{2c}", "\u{2c}", "\u{61}"],
+ ),
+ (
+ "\u{61}\u{5f}\u{61}\u{2c}\u{2c}\u{61}",
+ &["\u{61}\u{5f}\u{61}", "\u{2c}", "\u{2c}", "\u{61}"],
+ ),
+];
+
+// official Unicode test data
+// http://www.unicode.org/Public/15.0.0/ucd/auxiliary/SentenceBreakTest.txt
+pub const TEST_SENTENCE: &'static [(&'static str, &'static [&'static str])] = &[
+ ("\u{1}\u{1}", &["\u{1}\u{1}"]), ("\u{1}\u{308}\u{1}", &["\u{1}\u{308}\u{1}"]),
+ ("\u{1}\u{d}", &["\u{1}\u{d}"]), ("\u{1}\u{308}\u{d}", &["\u{1}\u{308}\u{d}"]),
+ ("\u{1}\u{a}", &["\u{1}\u{a}"]), ("\u{1}\u{308}\u{a}", &["\u{1}\u{308}\u{a}"]),
+ ("\u{1}\u{85}", &["\u{1}\u{85}"]), ("\u{1}\u{308}\u{85}", &["\u{1}\u{308}\u{85}"]),
+ ("\u{1}\u{9}", &["\u{1}\u{9}"]), ("\u{1}\u{308}\u{9}", &["\u{1}\u{308}\u{9}"]),
+ ("\u{1}\u{61}", &["\u{1}\u{61}"]), ("\u{1}\u{308}\u{61}", &["\u{1}\u{308}\u{61}"]),
+ ("\u{1}\u{41}", &["\u{1}\u{41}"]), ("\u{1}\u{308}\u{41}", &["\u{1}\u{308}\u{41}"]),
+ ("\u{1}\u{1bb}", &["\u{1}\u{1bb}"]), ("\u{1}\u{308}\u{1bb}", &["\u{1}\u{308}\u{1bb}"]),
+ ("\u{1}\u{30}", &["\u{1}\u{30}"]), ("\u{1}\u{308}\u{30}", &["\u{1}\u{308}\u{30}"]),
+ ("\u{1}\u{2e}", &["\u{1}\u{2e}"]), ("\u{1}\u{308}\u{2e}", &["\u{1}\u{308}\u{2e}"]),
+ ("\u{1}\u{21}", &["\u{1}\u{21}"]), ("\u{1}\u{308}\u{21}", &["\u{1}\u{308}\u{21}"]),
+ ("\u{1}\u{22}", &["\u{1}\u{22}"]), ("\u{1}\u{308}\u{22}", &["\u{1}\u{308}\u{22}"]),
+ ("\u{1}\u{2c}", &["\u{1}\u{2c}"]), ("\u{1}\u{308}\u{2c}", &["\u{1}\u{308}\u{2c}"]),
+ ("\u{1}\u{ad}", &["\u{1}\u{ad}"]), ("\u{1}\u{308}\u{ad}", &["\u{1}\u{308}\u{ad}"]),
+ ("\u{1}\u{300}", &["\u{1}\u{300}"]), ("\u{1}\u{308}\u{300}", &["\u{1}\u{308}\u{300}"]),
+ ("\u{d}\u{1}", &["\u{d}", "\u{1}"]), ("\u{d}\u{308}\u{1}", &["\u{d}", "\u{308}\u{1}"]),
+ ("\u{d}\u{d}", &["\u{d}", "\u{d}"]), ("\u{d}\u{308}\u{d}", &["\u{d}", "\u{308}\u{d}"]),
+ ("\u{d}\u{a}", &["\u{d}\u{a}"]), ("\u{d}\u{308}\u{a}", &["\u{d}", "\u{308}\u{a}"]),
+ ("\u{d}\u{85}", &["\u{d}", "\u{85}"]), ("\u{d}\u{308}\u{85}", &["\u{d}", "\u{308}\u{85}"]),
+ ("\u{d}\u{9}", &["\u{d}", "\u{9}"]), ("\u{d}\u{308}\u{9}", &["\u{d}", "\u{308}\u{9}"]),
+ ("\u{d}\u{61}", &["\u{d}", "\u{61}"]), ("\u{d}\u{308}\u{61}", &["\u{d}", "\u{308}\u{61}"]),
+ ("\u{d}\u{41}", &["\u{d}", "\u{41}"]), ("\u{d}\u{308}\u{41}", &["\u{d}", "\u{308}\u{41}"]),
+ ("\u{d}\u{1bb}", &["\u{d}", "\u{1bb}"]), ("\u{d}\u{308}\u{1bb}", &["\u{d}",
+ "\u{308}\u{1bb}"]), ("\u{d}\u{30}", &["\u{d}", "\u{30}"]), ("\u{d}\u{308}\u{30}", &["\u{d}",
+ "\u{308}\u{30}"]), ("\u{d}\u{2e}", &["\u{d}", "\u{2e}"]), ("\u{d}\u{308}\u{2e}", &["\u{d}",
+ "\u{308}\u{2e}"]), ("\u{d}\u{21}", &["\u{d}", "\u{21}"]), ("\u{d}\u{308}\u{21}", &["\u{d}",
+ "\u{308}\u{21}"]), ("\u{d}\u{22}", &["\u{d}", "\u{22}"]), ("\u{d}\u{308}\u{22}", &["\u{d}",
+ "\u{308}\u{22}"]), ("\u{d}\u{2c}", &["\u{d}", "\u{2c}"]), ("\u{d}\u{308}\u{2c}", &["\u{d}",
+ "\u{308}\u{2c}"]), ("\u{d}\u{ad}", &["\u{d}", "\u{ad}"]), ("\u{d}\u{308}\u{ad}", &["\u{d}",
+ "\u{308}\u{ad}"]), ("\u{d}\u{300}", &["\u{d}", "\u{300}"]), ("\u{d}\u{308}\u{300}",
+ &["\u{d}", "\u{308}\u{300}"]), ("\u{a}\u{1}", &["\u{a}", "\u{1}"]), ("\u{a}\u{308}\u{1}",
+ &["\u{a}", "\u{308}\u{1}"]), ("\u{a}\u{d}", &["\u{a}", "\u{d}"]), ("\u{a}\u{308}\u{d}",
+ &["\u{a}", "\u{308}\u{d}"]), ("\u{a}\u{a}", &["\u{a}", "\u{a}"]), ("\u{a}\u{308}\u{a}",
+ &["\u{a}", "\u{308}\u{a}"]), ("\u{a}\u{85}", &["\u{a}", "\u{85}"]), ("\u{a}\u{308}\u{85}",
+ &["\u{a}", "\u{308}\u{85}"]), ("\u{a}\u{9}", &["\u{a}", "\u{9}"]), ("\u{a}\u{308}\u{9}",
+ &["\u{a}", "\u{308}\u{9}"]), ("\u{a}\u{61}", &["\u{a}", "\u{61}"]), ("\u{a}\u{308}\u{61}",
+ &["\u{a}", "\u{308}\u{61}"]), ("\u{a}\u{41}", &["\u{a}", "\u{41}"]), ("\u{a}\u{308}\u{41}",
+ &["\u{a}", "\u{308}\u{41}"]), ("\u{a}\u{1bb}", &["\u{a}", "\u{1bb}"]),
+ ("\u{a}\u{308}\u{1bb}", &["\u{a}", "\u{308}\u{1bb}"]), ("\u{a}\u{30}", &["\u{a}",
+ "\u{30}"]), ("\u{a}\u{308}\u{30}", &["\u{a}", "\u{308}\u{30}"]), ("\u{a}\u{2e}", &["\u{a}",
+ "\u{2e}"]), ("\u{a}\u{308}\u{2e}", &["\u{a}", "\u{308}\u{2e}"]), ("\u{a}\u{21}", &["\u{a}",
+ "\u{21}"]), ("\u{a}\u{308}\u{21}", &["\u{a}", "\u{308}\u{21}"]), ("\u{a}\u{22}", &["\u{a}",
+ "\u{22}"]), ("\u{a}\u{308}\u{22}", &["\u{a}", "\u{308}\u{22}"]), ("\u{a}\u{2c}", &["\u{a}",
+ "\u{2c}"]), ("\u{a}\u{308}\u{2c}", &["\u{a}", "\u{308}\u{2c}"]), ("\u{a}\u{ad}", &["\u{a}",
+ "\u{ad}"]), ("\u{a}\u{308}\u{ad}", &["\u{a}", "\u{308}\u{ad}"]), ("\u{a}\u{300}", &["\u{a}",
+ "\u{300}"]), ("\u{a}\u{308}\u{300}", &["\u{a}", "\u{308}\u{300}"]), ("\u{85}\u{1}",
+ &["\u{85}", "\u{1}"]), ("\u{85}\u{308}\u{1}", &["\u{85}", "\u{308}\u{1}"]), ("\u{85}\u{d}",
+ &["\u{85}", "\u{d}"]), ("\u{85}\u{308}\u{d}", &["\u{85}", "\u{308}\u{d}"]), ("\u{85}\u{a}",
+ &["\u{85}", "\u{a}"]), ("\u{85}\u{308}\u{a}", &["\u{85}", "\u{308}\u{a}"]), ("\u{85}\u{85}",
+ &["\u{85}", "\u{85}"]), ("\u{85}\u{308}\u{85}", &["\u{85}", "\u{308}\u{85}"]),
+ ("\u{85}\u{9}", &["\u{85}", "\u{9}"]), ("\u{85}\u{308}\u{9}", &["\u{85}", "\u{308}\u{9}"]),
+ ("\u{85}\u{61}", &["\u{85}", "\u{61}"]), ("\u{85}\u{308}\u{61}", &["\u{85}",
+ "\u{308}\u{61}"]), ("\u{85}\u{41}", &["\u{85}", "\u{41}"]), ("\u{85}\u{308}\u{41}",
+ &["\u{85}", "\u{308}\u{41}"]), ("\u{85}\u{1bb}", &["\u{85}", "\u{1bb}"]),
+ ("\u{85}\u{308}\u{1bb}", &["\u{85}", "\u{308}\u{1bb}"]), ("\u{85}\u{30}", &["\u{85}",
+ "\u{30}"]), ("\u{85}\u{308}\u{30}", &["\u{85}", "\u{308}\u{30}"]), ("\u{85}\u{2e}",
+ &["\u{85}", "\u{2e}"]), ("\u{85}\u{308}\u{2e}", &["\u{85}", "\u{308}\u{2e}"]),
+ ("\u{85}\u{21}", &["\u{85}", "\u{21}"]), ("\u{85}\u{308}\u{21}", &["\u{85}",
+ "\u{308}\u{21}"]), ("\u{85}\u{22}", &["\u{85}", "\u{22}"]), ("\u{85}\u{308}\u{22}",
+ &["\u{85}", "\u{308}\u{22}"]), ("\u{85}\u{2c}", &["\u{85}", "\u{2c}"]),
+ ("\u{85}\u{308}\u{2c}", &["\u{85}", "\u{308}\u{2c}"]), ("\u{85}\u{ad}", &["\u{85}",
+ "\u{ad}"]), ("\u{85}\u{308}\u{ad}", &["\u{85}", "\u{308}\u{ad}"]), ("\u{85}\u{300}",
+ &["\u{85}", "\u{300}"]), ("\u{85}\u{308}\u{300}", &["\u{85}", "\u{308}\u{300}"]),
+ ("\u{9}\u{1}", &["\u{9}\u{1}"]), ("\u{9}\u{308}\u{1}", &["\u{9}\u{308}\u{1}"]),
+ ("\u{9}\u{d}", &["\u{9}\u{d}"]), ("\u{9}\u{308}\u{d}", &["\u{9}\u{308}\u{d}"]),
+ ("\u{9}\u{a}", &["\u{9}\u{a}"]), ("\u{9}\u{308}\u{a}", &["\u{9}\u{308}\u{a}"]),
+ ("\u{9}\u{85}", &["\u{9}\u{85}"]), ("\u{9}\u{308}\u{85}", &["\u{9}\u{308}\u{85}"]),
+ ("\u{9}\u{9}", &["\u{9}\u{9}"]), ("\u{9}\u{308}\u{9}", &["\u{9}\u{308}\u{9}"]),
+ ("\u{9}\u{61}", &["\u{9}\u{61}"]), ("\u{9}\u{308}\u{61}", &["\u{9}\u{308}\u{61}"]),
+ ("\u{9}\u{41}", &["\u{9}\u{41}"]), ("\u{9}\u{308}\u{41}", &["\u{9}\u{308}\u{41}"]),
+ ("\u{9}\u{1bb}", &["\u{9}\u{1bb}"]), ("\u{9}\u{308}\u{1bb}", &["\u{9}\u{308}\u{1bb}"]),
+ ("\u{9}\u{30}", &["\u{9}\u{30}"]), ("\u{9}\u{308}\u{30}", &["\u{9}\u{308}\u{30}"]),
+ ("\u{9}\u{2e}", &["\u{9}\u{2e}"]), ("\u{9}\u{308}\u{2e}", &["\u{9}\u{308}\u{2e}"]),
+ ("\u{9}\u{21}", &["\u{9}\u{21}"]), ("\u{9}\u{308}\u{21}", &["\u{9}\u{308}\u{21}"]),
+ ("\u{9}\u{22}", &["\u{9}\u{22}"]), ("\u{9}\u{308}\u{22}", &["\u{9}\u{308}\u{22}"]),
+ ("\u{9}\u{2c}", &["\u{9}\u{2c}"]), ("\u{9}\u{308}\u{2c}", &["\u{9}\u{308}\u{2c}"]),
+ ("\u{9}\u{ad}", &["\u{9}\u{ad}"]), ("\u{9}\u{308}\u{ad}", &["\u{9}\u{308}\u{ad}"]),
+ ("\u{9}\u{300}", &["\u{9}\u{300}"]), ("\u{9}\u{308}\u{300}", &["\u{9}\u{308}\u{300}"]),
+ ("\u{61}\u{1}", &["\u{61}\u{1}"]), ("\u{61}\u{308}\u{1}", &["\u{61}\u{308}\u{1}"]),
+ ("\u{61}\u{d}", &["\u{61}\u{d}"]), ("\u{61}\u{308}\u{d}", &["\u{61}\u{308}\u{d}"]),
+ ("\u{61}\u{a}", &["\u{61}\u{a}"]), ("\u{61}\u{308}\u{a}", &["\u{61}\u{308}\u{a}"]),
+ ("\u{61}\u{85}", &["\u{61}\u{85}"]), ("\u{61}\u{308}\u{85}", &["\u{61}\u{308}\u{85}"]),
+ ("\u{61}\u{9}", &["\u{61}\u{9}"]), ("\u{61}\u{308}\u{9}", &["\u{61}\u{308}\u{9}"]),
+ ("\u{61}\u{61}", &["\u{61}\u{61}"]), ("\u{61}\u{308}\u{61}", &["\u{61}\u{308}\u{61}"]),
+ ("\u{61}\u{41}", &["\u{61}\u{41}"]), ("\u{61}\u{308}\u{41}", &["\u{61}\u{308}\u{41}"]),
+ ("\u{61}\u{1bb}", &["\u{61}\u{1bb}"]), ("\u{61}\u{308}\u{1bb}", &["\u{61}\u{308}\u{1bb}"]),
+ ("\u{61}\u{30}", &["\u{61}\u{30}"]), ("\u{61}\u{308}\u{30}", &["\u{61}\u{308}\u{30}"]),
+ ("\u{61}\u{2e}", &["\u{61}\u{2e}"]), ("\u{61}\u{308}\u{2e}", &["\u{61}\u{308}\u{2e}"]),
+ ("\u{61}\u{21}", &["\u{61}\u{21}"]), ("\u{61}\u{308}\u{21}", &["\u{61}\u{308}\u{21}"]),
+ ("\u{61}\u{22}", &["\u{61}\u{22}"]), ("\u{61}\u{308}\u{22}", &["\u{61}\u{308}\u{22}"]),
+ ("\u{61}\u{2c}", &["\u{61}\u{2c}"]), ("\u{61}\u{308}\u{2c}", &["\u{61}\u{308}\u{2c}"]),
+ ("\u{61}\u{ad}", &["\u{61}\u{ad}"]), ("\u{61}\u{308}\u{ad}", &["\u{61}\u{308}\u{ad}"]),
+ ("\u{61}\u{300}", &["\u{61}\u{300}"]), ("\u{61}\u{308}\u{300}", &["\u{61}\u{308}\u{300}"]),
+ ("\u{41}\u{1}", &["\u{41}\u{1}"]), ("\u{41}\u{308}\u{1}", &["\u{41}\u{308}\u{1}"]),
+ ("\u{41}\u{d}", &["\u{41}\u{d}"]), ("\u{41}\u{308}\u{d}", &["\u{41}\u{308}\u{d}"]),
+ ("\u{41}\u{a}", &["\u{41}\u{a}"]), ("\u{41}\u{308}\u{a}", &["\u{41}\u{308}\u{a}"]),
+ ("\u{41}\u{85}", &["\u{41}\u{85}"]), ("\u{41}\u{308}\u{85}", &["\u{41}\u{308}\u{85}"]),
+ ("\u{41}\u{9}", &["\u{41}\u{9}"]), ("\u{41}\u{308}\u{9}", &["\u{41}\u{308}\u{9}"]),
+ ("\u{41}\u{61}", &["\u{41}\u{61}"]), ("\u{41}\u{308}\u{61}", &["\u{41}\u{308}\u{61}"]),
+ ("\u{41}\u{41}", &["\u{41}\u{41}"]), ("\u{41}\u{308}\u{41}", &["\u{41}\u{308}\u{41}"]),
+ ("\u{41}\u{1bb}", &["\u{41}\u{1bb}"]), ("\u{41}\u{308}\u{1bb}", &["\u{41}\u{308}\u{1bb}"]),
+ ("\u{41}\u{30}", &["\u{41}\u{30}"]), ("\u{41}\u{308}\u{30}", &["\u{41}\u{308}\u{30}"]),
+ ("\u{41}\u{2e}", &["\u{41}\u{2e}"]), ("\u{41}\u{308}\u{2e}", &["\u{41}\u{308}\u{2e}"]),
+ ("\u{41}\u{21}", &["\u{41}\u{21}"]), ("\u{41}\u{308}\u{21}", &["\u{41}\u{308}\u{21}"]),
+ ("\u{41}\u{22}", &["\u{41}\u{22}"]), ("\u{41}\u{308}\u{22}", &["\u{41}\u{308}\u{22}"]),
+ ("\u{41}\u{2c}", &["\u{41}\u{2c}"]), ("\u{41}\u{308}\u{2c}", &["\u{41}\u{308}\u{2c}"]),
+ ("\u{41}\u{ad}", &["\u{41}\u{ad}"]), ("\u{41}\u{308}\u{ad}", &["\u{41}\u{308}\u{ad}"]),
+ ("\u{41}\u{300}", &["\u{41}\u{300}"]), ("\u{41}\u{308}\u{300}", &["\u{41}\u{308}\u{300}"]),
+ ("\u{1bb}\u{1}", &["\u{1bb}\u{1}"]), ("\u{1bb}\u{308}\u{1}", &["\u{1bb}\u{308}\u{1}"]),
+ ("\u{1bb}\u{d}", &["\u{1bb}\u{d}"]), ("\u{1bb}\u{308}\u{d}", &["\u{1bb}\u{308}\u{d}"]),
+ ("\u{1bb}\u{a}", &["\u{1bb}\u{a}"]), ("\u{1bb}\u{308}\u{a}", &["\u{1bb}\u{308}\u{a}"]),
+ ("\u{1bb}\u{85}", &["\u{1bb}\u{85}"]), ("\u{1bb}\u{308}\u{85}", &["\u{1bb}\u{308}\u{85}"]),
+ ("\u{1bb}\u{9}", &["\u{1bb}\u{9}"]), ("\u{1bb}\u{308}\u{9}", &["\u{1bb}\u{308}\u{9}"]),
+ ("\u{1bb}\u{61}", &["\u{1bb}\u{61}"]), ("\u{1bb}\u{308}\u{61}", &["\u{1bb}\u{308}\u{61}"]),
+ ("\u{1bb}\u{41}", &["\u{1bb}\u{41}"]), ("\u{1bb}\u{308}\u{41}", &["\u{1bb}\u{308}\u{41}"]),
+ ("\u{1bb}\u{1bb}", &["\u{1bb}\u{1bb}"]), ("\u{1bb}\u{308}\u{1bb}",
+ &["\u{1bb}\u{308}\u{1bb}"]), ("\u{1bb}\u{30}", &["\u{1bb}\u{30}"]), ("\u{1bb}\u{308}\u{30}",
+ &["\u{1bb}\u{308}\u{30}"]), ("\u{1bb}\u{2e}", &["\u{1bb}\u{2e}"]), ("\u{1bb}\u{308}\u{2e}",
+ &["\u{1bb}\u{308}\u{2e}"]), ("\u{1bb}\u{21}", &["\u{1bb}\u{21}"]), ("\u{1bb}\u{308}\u{21}",
+ &["\u{1bb}\u{308}\u{21}"]), ("\u{1bb}\u{22}", &["\u{1bb}\u{22}"]), ("\u{1bb}\u{308}\u{22}",
+ &["\u{1bb}\u{308}\u{22}"]), ("\u{1bb}\u{2c}", &["\u{1bb}\u{2c}"]), ("\u{1bb}\u{308}\u{2c}",
+ &["\u{1bb}\u{308}\u{2c}"]), ("\u{1bb}\u{ad}", &["\u{1bb}\u{ad}"]), ("\u{1bb}\u{308}\u{ad}",
+ &["\u{1bb}\u{308}\u{ad}"]), ("\u{1bb}\u{300}", &["\u{1bb}\u{300}"]),
+ ("\u{1bb}\u{308}\u{300}", &["\u{1bb}\u{308}\u{300}"]), ("\u{30}\u{1}", &["\u{30}\u{1}"]),
+ ("\u{30}\u{308}\u{1}", &["\u{30}\u{308}\u{1}"]), ("\u{30}\u{d}", &["\u{30}\u{d}"]),
+ ("\u{30}\u{308}\u{d}", &["\u{30}\u{308}\u{d}"]), ("\u{30}\u{a}", &["\u{30}\u{a}"]),
+ ("\u{30}\u{308}\u{a}", &["\u{30}\u{308}\u{a}"]), ("\u{30}\u{85}", &["\u{30}\u{85}"]),
+ ("\u{30}\u{308}\u{85}", &["\u{30}\u{308}\u{85}"]), ("\u{30}\u{9}", &["\u{30}\u{9}"]),
+ ("\u{30}\u{308}\u{9}", &["\u{30}\u{308}\u{9}"]), ("\u{30}\u{61}", &["\u{30}\u{61}"]),
+ ("\u{30}\u{308}\u{61}", &["\u{30}\u{308}\u{61}"]), ("\u{30}\u{41}", &["\u{30}\u{41}"]),
+ ("\u{30}\u{308}\u{41}", &["\u{30}\u{308}\u{41}"]), ("\u{30}\u{1bb}", &["\u{30}\u{1bb}"]),
+ ("\u{30}\u{308}\u{1bb}", &["\u{30}\u{308}\u{1bb}"]), ("\u{30}\u{30}", &["\u{30}\u{30}"]),
+ ("\u{30}\u{308}\u{30}", &["\u{30}\u{308}\u{30}"]), ("\u{30}\u{2e}", &["\u{30}\u{2e}"]),
+ ("\u{30}\u{308}\u{2e}", &["\u{30}\u{308}\u{2e}"]), ("\u{30}\u{21}", &["\u{30}\u{21}"]),
+ ("\u{30}\u{308}\u{21}", &["\u{30}\u{308}\u{21}"]), ("\u{30}\u{22}", &["\u{30}\u{22}"]),
+ ("\u{30}\u{308}\u{22}", &["\u{30}\u{308}\u{22}"]), ("\u{30}\u{2c}", &["\u{30}\u{2c}"]),
+ ("\u{30}\u{308}\u{2c}", &["\u{30}\u{308}\u{2c}"]), ("\u{30}\u{ad}", &["\u{30}\u{ad}"]),
+ ("\u{30}\u{308}\u{ad}", &["\u{30}\u{308}\u{ad}"]), ("\u{30}\u{300}", &["\u{30}\u{300}"]),
+ ("\u{30}\u{308}\u{300}", &["\u{30}\u{308}\u{300}"]), ("\u{2e}\u{1}", &["\u{2e}", "\u{1}"]),
+ ("\u{2e}\u{308}\u{1}", &["\u{2e}\u{308}", "\u{1}"]), ("\u{2e}\u{d}", &["\u{2e}\u{d}"]),
+ ("\u{2e}\u{308}\u{d}", &["\u{2e}\u{308}\u{d}"]), ("\u{2e}\u{a}", &["\u{2e}\u{a}"]),
+ ("\u{2e}\u{308}\u{a}", &["\u{2e}\u{308}\u{a}"]), ("\u{2e}\u{85}", &["\u{2e}\u{85}"]),
+ ("\u{2e}\u{308}\u{85}", &["\u{2e}\u{308}\u{85}"]), ("\u{2e}\u{9}", &["\u{2e}\u{9}"]),
+ ("\u{2e}\u{308}\u{9}", &["\u{2e}\u{308}\u{9}"]), ("\u{2e}\u{61}", &["\u{2e}\u{61}"]),
+ ("\u{2e}\u{308}\u{61}", &["\u{2e}\u{308}\u{61}"]), ("\u{2e}\u{41}", &["\u{2e}", "\u{41}"]),
+ ("\u{2e}\u{308}\u{41}", &["\u{2e}\u{308}", "\u{41}"]), ("\u{2e}\u{1bb}", &["\u{2e}",
+ "\u{1bb}"]), ("\u{2e}\u{308}\u{1bb}", &["\u{2e}\u{308}", "\u{1bb}"]), ("\u{2e}\u{30}",
+ &["\u{2e}\u{30}"]), ("\u{2e}\u{308}\u{30}", &["\u{2e}\u{308}\u{30}"]), ("\u{2e}\u{2e}",
+ &["\u{2e}\u{2e}"]), ("\u{2e}\u{308}\u{2e}", &["\u{2e}\u{308}\u{2e}"]), ("\u{2e}\u{21}",
+ &["\u{2e}\u{21}"]), ("\u{2e}\u{308}\u{21}", &["\u{2e}\u{308}\u{21}"]), ("\u{2e}\u{22}",
+ &["\u{2e}\u{22}"]), ("\u{2e}\u{308}\u{22}", &["\u{2e}\u{308}\u{22}"]), ("\u{2e}\u{2c}",
+ &["\u{2e}\u{2c}"]), ("\u{2e}\u{308}\u{2c}", &["\u{2e}\u{308}\u{2c}"]), ("\u{2e}\u{ad}",
+ &["\u{2e}\u{ad}"]), ("\u{2e}\u{308}\u{ad}", &["\u{2e}\u{308}\u{ad}"]), ("\u{2e}\u{300}",
+ &["\u{2e}\u{300}"]), ("\u{2e}\u{308}\u{300}", &["\u{2e}\u{308}\u{300}"]), ("\u{21}\u{1}",
+ &["\u{21}", "\u{1}"]), ("\u{21}\u{308}\u{1}", &["\u{21}\u{308}", "\u{1}"]), ("\u{21}\u{d}",
+ &["\u{21}\u{d}"]), ("\u{21}\u{308}\u{d}", &["\u{21}\u{308}\u{d}"]), ("\u{21}\u{a}",
+ &["\u{21}\u{a}"]), ("\u{21}\u{308}\u{a}", &["\u{21}\u{308}\u{a}"]), ("\u{21}\u{85}",
+ &["\u{21}\u{85}"]), ("\u{21}\u{308}\u{85}", &["\u{21}\u{308}\u{85}"]), ("\u{21}\u{9}",
+ &["\u{21}\u{9}"]), ("\u{21}\u{308}\u{9}", &["\u{21}\u{308}\u{9}"]), ("\u{21}\u{61}",
+ &["\u{21}", "\u{61}"]), ("\u{21}\u{308}\u{61}", &["\u{21}\u{308}", "\u{61}"]),
+ ("\u{21}\u{41}", &["\u{21}", "\u{41}"]), ("\u{21}\u{308}\u{41}", &["\u{21}\u{308}",
+ "\u{41}"]), ("\u{21}\u{1bb}", &["\u{21}", "\u{1bb}"]), ("\u{21}\u{308}\u{1bb}",
+ &["\u{21}\u{308}", "\u{1bb}"]), ("\u{21}\u{30}", &["\u{21}", "\u{30}"]),
+ ("\u{21}\u{308}\u{30}", &["\u{21}\u{308}", "\u{30}"]), ("\u{21}\u{2e}", &["\u{21}\u{2e}"]),
+ ("\u{21}\u{308}\u{2e}", &["\u{21}\u{308}\u{2e}"]), ("\u{21}\u{21}", &["\u{21}\u{21}"]),
+ ("\u{21}\u{308}\u{21}", &["\u{21}\u{308}\u{21}"]), ("\u{21}\u{22}", &["\u{21}\u{22}"]),
+ ("\u{21}\u{308}\u{22}", &["\u{21}\u{308}\u{22}"]), ("\u{21}\u{2c}", &["\u{21}\u{2c}"]),
+ ("\u{21}\u{308}\u{2c}", &["\u{21}\u{308}\u{2c}"]), ("\u{21}\u{ad}", &["\u{21}\u{ad}"]),
+ ("\u{21}\u{308}\u{ad}", &["\u{21}\u{308}\u{ad}"]), ("\u{21}\u{300}", &["\u{21}\u{300}"]),
+ ("\u{21}\u{308}\u{300}", &["\u{21}\u{308}\u{300}"]), ("\u{22}\u{1}", &["\u{22}\u{1}"]),
+ ("\u{22}\u{308}\u{1}", &["\u{22}\u{308}\u{1}"]), ("\u{22}\u{d}", &["\u{22}\u{d}"]),
+ ("\u{22}\u{308}\u{d}", &["\u{22}\u{308}\u{d}"]), ("\u{22}\u{a}", &["\u{22}\u{a}"]),
+ ("\u{22}\u{308}\u{a}", &["\u{22}\u{308}\u{a}"]), ("\u{22}\u{85}", &["\u{22}\u{85}"]),
+ ("\u{22}\u{308}\u{85}", &["\u{22}\u{308}\u{85}"]), ("\u{22}\u{9}", &["\u{22}\u{9}"]),
+ ("\u{22}\u{308}\u{9}", &["\u{22}\u{308}\u{9}"]), ("\u{22}\u{61}", &["\u{22}\u{61}"]),
+ ("\u{22}\u{308}\u{61}", &["\u{22}\u{308}\u{61}"]), ("\u{22}\u{41}", &["\u{22}\u{41}"]),
+ ("\u{22}\u{308}\u{41}", &["\u{22}\u{308}\u{41}"]), ("\u{22}\u{1bb}", &["\u{22}\u{1bb}"]),
+ ("\u{22}\u{308}\u{1bb}", &["\u{22}\u{308}\u{1bb}"]), ("\u{22}\u{30}", &["\u{22}\u{30}"]),
+ ("\u{22}\u{308}\u{30}", &["\u{22}\u{308}\u{30}"]), ("\u{22}\u{2e}", &["\u{22}\u{2e}"]),
+ ("\u{22}\u{308}\u{2e}", &["\u{22}\u{308}\u{2e}"]), ("\u{22}\u{21}", &["\u{22}\u{21}"]),
+ ("\u{22}\u{308}\u{21}", &["\u{22}\u{308}\u{21}"]), ("\u{22}\u{22}", &["\u{22}\u{22}"]),
+ ("\u{22}\u{308}\u{22}", &["\u{22}\u{308}\u{22}"]), ("\u{22}\u{2c}", &["\u{22}\u{2c}"]),
+ ("\u{22}\u{308}\u{2c}", &["\u{22}\u{308}\u{2c}"]), ("\u{22}\u{ad}", &["\u{22}\u{ad}"]),
+ ("\u{22}\u{308}\u{ad}", &["\u{22}\u{308}\u{ad}"]), ("\u{22}\u{300}", &["\u{22}\u{300}"]),
+ ("\u{22}\u{308}\u{300}", &["\u{22}\u{308}\u{300}"]), ("\u{2c}\u{1}", &["\u{2c}\u{1}"]),
+ ("\u{2c}\u{308}\u{1}", &["\u{2c}\u{308}\u{1}"]), ("\u{2c}\u{d}", &["\u{2c}\u{d}"]),
+ ("\u{2c}\u{308}\u{d}", &["\u{2c}\u{308}\u{d}"]), ("\u{2c}\u{a}", &["\u{2c}\u{a}"]),
+ ("\u{2c}\u{308}\u{a}", &["\u{2c}\u{308}\u{a}"]), ("\u{2c}\u{85}", &["\u{2c}\u{85}"]),
+ ("\u{2c}\u{308}\u{85}", &["\u{2c}\u{308}\u{85}"]), ("\u{2c}\u{9}", &["\u{2c}\u{9}"]),
+ ("\u{2c}\u{308}\u{9}", &["\u{2c}\u{308}\u{9}"]), ("\u{2c}\u{61}", &["\u{2c}\u{61}"]),
+ ("\u{2c}\u{308}\u{61}", &["\u{2c}\u{308}\u{61}"]), ("\u{2c}\u{41}", &["\u{2c}\u{41}"]),
+ ("\u{2c}\u{308}\u{41}", &["\u{2c}\u{308}\u{41}"]), ("\u{2c}\u{1bb}", &["\u{2c}\u{1bb}"]),
+ ("\u{2c}\u{308}\u{1bb}", &["\u{2c}\u{308}\u{1bb}"]), ("\u{2c}\u{30}", &["\u{2c}\u{30}"]),
+ ("\u{2c}\u{308}\u{30}", &["\u{2c}\u{308}\u{30}"]), ("\u{2c}\u{2e}", &["\u{2c}\u{2e}"]),
+ ("\u{2c}\u{308}\u{2e}", &["\u{2c}\u{308}\u{2e}"]), ("\u{2c}\u{21}", &["\u{2c}\u{21}"]),
+ ("\u{2c}\u{308}\u{21}", &["\u{2c}\u{308}\u{21}"]), ("\u{2c}\u{22}", &["\u{2c}\u{22}"]),
+ ("\u{2c}\u{308}\u{22}", &["\u{2c}\u{308}\u{22}"]), ("\u{2c}\u{2c}", &["\u{2c}\u{2c}"]),
+ ("\u{2c}\u{308}\u{2c}", &["\u{2c}\u{308}\u{2c}"]), ("\u{2c}\u{ad}", &["\u{2c}\u{ad}"]),
+ ("\u{2c}\u{308}\u{ad}", &["\u{2c}\u{308}\u{ad}"]), ("\u{2c}\u{300}", &["\u{2c}\u{300}"]),
+ ("\u{2c}\u{308}\u{300}", &["\u{2c}\u{308}\u{300}"]), ("\u{ad}\u{1}", &["\u{ad}\u{1}"]),
+ ("\u{ad}\u{308}\u{1}", &["\u{ad}\u{308}\u{1}"]), ("\u{ad}\u{d}", &["\u{ad}\u{d}"]),
+ ("\u{ad}\u{308}\u{d}", &["\u{ad}\u{308}\u{d}"]), ("\u{ad}\u{a}", &["\u{ad}\u{a}"]),
+ ("\u{ad}\u{308}\u{a}", &["\u{ad}\u{308}\u{a}"]), ("\u{ad}\u{85}", &["\u{ad}\u{85}"]),
+ ("\u{ad}\u{308}\u{85}", &["\u{ad}\u{308}\u{85}"]), ("\u{ad}\u{9}", &["\u{ad}\u{9}"]),
+ ("\u{ad}\u{308}\u{9}", &["\u{ad}\u{308}\u{9}"]), ("\u{ad}\u{61}", &["\u{ad}\u{61}"]),
+ ("\u{ad}\u{308}\u{61}", &["\u{ad}\u{308}\u{61}"]), ("\u{ad}\u{41}", &["\u{ad}\u{41}"]),
+ ("\u{ad}\u{308}\u{41}", &["\u{ad}\u{308}\u{41}"]), ("\u{ad}\u{1bb}", &["\u{ad}\u{1bb}"]),
+ ("\u{ad}\u{308}\u{1bb}", &["\u{ad}\u{308}\u{1bb}"]), ("\u{ad}\u{30}", &["\u{ad}\u{30}"]),
+ ("\u{ad}\u{308}\u{30}", &["\u{ad}\u{308}\u{30}"]), ("\u{ad}\u{2e}", &["\u{ad}\u{2e}"]),
+ ("\u{ad}\u{308}\u{2e}", &["\u{ad}\u{308}\u{2e}"]), ("\u{ad}\u{21}", &["\u{ad}\u{21}"]),
+ ("\u{ad}\u{308}\u{21}", &["\u{ad}\u{308}\u{21}"]), ("\u{ad}\u{22}", &["\u{ad}\u{22}"]),
+ ("\u{ad}\u{308}\u{22}", &["\u{ad}\u{308}\u{22}"]), ("\u{ad}\u{2c}", &["\u{ad}\u{2c}"]),
+ ("\u{ad}\u{308}\u{2c}", &["\u{ad}\u{308}\u{2c}"]), ("\u{ad}\u{ad}", &["\u{ad}\u{ad}"]),
+ ("\u{ad}\u{308}\u{ad}", &["\u{ad}\u{308}\u{ad}"]), ("\u{ad}\u{300}", &["\u{ad}\u{300}"]),
+ ("\u{ad}\u{308}\u{300}", &["\u{ad}\u{308}\u{300}"]), ("\u{300}\u{1}", &["\u{300}\u{1}"]),
+ ("\u{300}\u{308}\u{1}", &["\u{300}\u{308}\u{1}"]), ("\u{300}\u{d}", &["\u{300}\u{d}"]),
+ ("\u{300}\u{308}\u{d}", &["\u{300}\u{308}\u{d}"]), ("\u{300}\u{a}", &["\u{300}\u{a}"]),
+ ("\u{300}\u{308}\u{a}", &["\u{300}\u{308}\u{a}"]), ("\u{300}\u{85}", &["\u{300}\u{85}"]),
+ ("\u{300}\u{308}\u{85}", &["\u{300}\u{308}\u{85}"]), ("\u{300}\u{9}", &["\u{300}\u{9}"]),
+ ("\u{300}\u{308}\u{9}", &["\u{300}\u{308}\u{9}"]), ("\u{300}\u{61}", &["\u{300}\u{61}"]),
+ ("\u{300}\u{308}\u{61}", &["\u{300}\u{308}\u{61}"]), ("\u{300}\u{41}", &["\u{300}\u{41}"]),
+ ("\u{300}\u{308}\u{41}", &["\u{300}\u{308}\u{41}"]), ("\u{300}\u{1bb}",
+ &["\u{300}\u{1bb}"]), ("\u{300}\u{308}\u{1bb}", &["\u{300}\u{308}\u{1bb}"]),
+ ("\u{300}\u{30}", &["\u{300}\u{30}"]), ("\u{300}\u{308}\u{30}", &["\u{300}\u{308}\u{30}"]),
+ ("\u{300}\u{2e}", &["\u{300}\u{2e}"]), ("\u{300}\u{308}\u{2e}", &["\u{300}\u{308}\u{2e}"]),
+ ("\u{300}\u{21}", &["\u{300}\u{21}"]), ("\u{300}\u{308}\u{21}", &["\u{300}\u{308}\u{21}"]),
+ ("\u{300}\u{22}", &["\u{300}\u{22}"]), ("\u{300}\u{308}\u{22}", &["\u{300}\u{308}\u{22}"]),
+ ("\u{300}\u{2c}", &["\u{300}\u{2c}"]), ("\u{300}\u{308}\u{2c}", &["\u{300}\u{308}\u{2c}"]),
+ ("\u{300}\u{ad}", &["\u{300}\u{ad}"]), ("\u{300}\u{308}\u{ad}", &["\u{300}\u{308}\u{ad}"]),
+ ("\u{300}\u{300}", &["\u{300}\u{300}"]), ("\u{300}\u{308}\u{300}",
+ &["\u{300}\u{308}\u{300}"]), ("\u{d}\u{a}\u{61}\u{a}\u{308}", &["\u{d}\u{a}", "\u{61}\u{a}",
+ "\u{308}"]), ("\u{61}\u{308}", &["\u{61}\u{308}"]), ("\u{20}\u{200d}\u{646}",
+ &["\u{20}\u{200d}\u{646}"]), ("\u{646}\u{200d}\u{20}", &["\u{646}\u{200d}\u{20}"]),
+ ("\u{28}\u{22}\u{47}\u{6f}\u{2e}\u{22}\u{29}\u{20}\u{28}\u{48}\u{65}\u{20}\u{64}\u{69}\u{64}\u{2e}\u{29}",
+ &["\u{28}\u{22}\u{47}\u{6f}\u{2e}\u{22}\u{29}\u{20}",
+ "\u{28}\u{48}\u{65}\u{20}\u{64}\u{69}\u{64}\u{2e}\u{29}"]),
+ ("\u{28}\u{201c}\u{47}\u{6f}\u{3f}\u{201d}\u{29}\u{20}\u{28}\u{48}\u{65}\u{20}\u{64}\u{69}\u{64}\u{2e}\u{29}",
+ &["\u{28}\u{201c}\u{47}\u{6f}\u{3f}\u{201d}\u{29}\u{20}",
+ "\u{28}\u{48}\u{65}\u{20}\u{64}\u{69}\u{64}\u{2e}\u{29}"]),
+ ("\u{55}\u{2e}\u{53}\u{2e}\u{41}\u{300}\u{2e}\u{20}\u{69}\u{73}",
+ &["\u{55}\u{2e}\u{53}\u{2e}\u{41}\u{300}\u{2e}\u{20}\u{69}\u{73}"]),
+ ("\u{55}\u{2e}\u{53}\u{2e}\u{41}\u{300}\u{3f}\u{20}\u{48}\u{65}",
+ &["\u{55}\u{2e}\u{53}\u{2e}\u{41}\u{300}\u{3f}\u{20}", "\u{48}\u{65}"]),
+ ("\u{55}\u{2e}\u{53}\u{2e}\u{41}\u{300}\u{2e}",
+ &["\u{55}\u{2e}\u{53}\u{2e}\u{41}\u{300}\u{2e}"]), ("\u{33}\u{2e}\u{34}",
+ &["\u{33}\u{2e}\u{34}"]), ("\u{63}\u{2e}\u{64}", &["\u{63}\u{2e}\u{64}"]),
+ ("\u{43}\u{2e}\u{64}", &["\u{43}\u{2e}\u{64}"]), ("\u{63}\u{2e}\u{44}",
+ &["\u{63}\u{2e}\u{44}"]), ("\u{43}\u{2e}\u{44}", &["\u{43}\u{2e}\u{44}"]),
+ ("\u{65}\u{74}\u{63}\u{2e}\u{29}\u{2019}\u{a0}\u{74}\u{68}\u{65}",
+ &["\u{65}\u{74}\u{63}\u{2e}\u{29}\u{2019}\u{a0}\u{74}\u{68}\u{65}"]),
+ ("\u{65}\u{74}\u{63}\u{2e}\u{29}\u{2019}\u{a0}\u{54}\u{68}\u{65}",
+ &["\u{65}\u{74}\u{63}\u{2e}\u{29}\u{2019}\u{a0}", "\u{54}\u{68}\u{65}"]),
+ ("\u{65}\u{74}\u{63}\u{2e}\u{29}\u{2019}\u{a0}\u{2018}\u{28}\u{74}\u{68}\u{65}",
+ &["\u{65}\u{74}\u{63}\u{2e}\u{29}\u{2019}\u{a0}\u{2018}\u{28}\u{74}\u{68}\u{65}"]),
+ ("\u{65}\u{74}\u{63}\u{2e}\u{29}\u{2019}\u{a0}\u{2018}\u{28}\u{54}\u{68}\u{65}",
+ &["\u{65}\u{74}\u{63}\u{2e}\u{29}\u{2019}\u{a0}", "\u{2018}\u{28}\u{54}\u{68}\u{65}"]),
+ ("\u{65}\u{74}\u{63}\u{2e}\u{29}\u{2019}\u{a0}\u{308}\u{74}\u{68}\u{65}",
+ &["\u{65}\u{74}\u{63}\u{2e}\u{29}\u{2019}\u{a0}\u{308}\u{74}\u{68}\u{65}"]),
+ ("\u{65}\u{74}\u{63}\u{2e}\u{29}\u{2019}\u{a0}\u{308}\u{54}\u{68}\u{65}",
+ &["\u{65}\u{74}\u{63}\u{2e}\u{29}\u{2019}\u{a0}\u{308}", "\u{54}\u{68}\u{65}"]),
+ ("\u{65}\u{74}\u{63}\u{2e}\u{29}\u{2019}\u{308}\u{54}\u{68}\u{65}",
+ &["\u{65}\u{74}\u{63}\u{2e}\u{29}\u{2019}\u{308}", "\u{54}\u{68}\u{65}"]),
+ ("\u{65}\u{74}\u{63}\u{2e}\u{29}\u{a}\u{308}\u{54}\u{68}\u{65}",
+ &["\u{65}\u{74}\u{63}\u{2e}\u{29}\u{a}", "\u{308}\u{54}\u{68}\u{65}"]),
+ ("\u{74}\u{68}\u{65}\u{20}\u{72}\u{65}\u{73}\u{70}\u{2e}\u{20}\u{6c}\u{65}\u{61}\u{64}\u{65}\u{72}\u{73}\u{20}\u{61}\u{72}\u{65}",
+ &["\u{74}\u{68}\u{65}\u{20}\u{72}\u{65}\u{73}\u{70}\u{2e}\u{20}\u{6c}\u{65}\u{61}\u{64}\u{65}\u{72}\u{73}\u{20}\u{61}\u{72}\u{65}"]),
+ ("\u{5b57}\u{2e}\u{5b57}", &["\u{5b57}\u{2e}", "\u{5b57}"]),
+ ("\u{65}\u{74}\u{63}\u{2e}\u{5b83}", &["\u{65}\u{74}\u{63}\u{2e}", "\u{5b83}"]),
+ ("\u{65}\u{74}\u{63}\u{2e}\u{3002}", &["\u{65}\u{74}\u{63}\u{2e}\u{3002}"]),
+ ("\u{5b57}\u{3002}\u{5b83}", &["\u{5b57}\u{3002}", "\u{5b83}"]), ("\u{21}\u{20}\u{20}",
+ &["\u{21}\u{20}\u{20}"]),
+ ("\u{2060}\u{28}\u{2060}\u{22}\u{2060}\u{47}\u{2060}\u{6f}\u{2060}\u{2e}\u{2060}\u{22}\u{2060}\u{29}\u{2060}\u{20}\u{2060}\u{28}\u{2060}\u{48}\u{2060}\u{65}\u{2060}\u{20}\u{2060}\u{64}\u{2060}\u{69}\u{2060}\u{64}\u{2060}\u{2e}\u{2060}\u{29}\u{2060}\u{2060}",
+ &["\u{2060}\u{28}\u{2060}\u{22}\u{2060}\u{47}\u{2060}\u{6f}\u{2060}\u{2e}\u{2060}\u{22}\u{2060}\u{29}\u{2060}\u{20}\u{2060}",
+ "\u{28}\u{2060}\u{48}\u{2060}\u{65}\u{2060}\u{20}\u{2060}\u{64}\u{2060}\u{69}\u{2060}\u{64}\u{2060}\u{2e}\u{2060}\u{29}\u{2060}\u{2060}"]),
+ ("\u{2060}\u{28}\u{2060}\u{201c}\u{2060}\u{47}\u{2060}\u{6f}\u{2060}\u{3f}\u{2060}\u{201d}\u{2060}\u{29}\u{2060}\u{20}\u{2060}\u{28}\u{2060}\u{48}\u{2060}\u{65}\u{2060}\u{20}\u{2060}\u{64}\u{2060}\u{69}\u{2060}\u{64}\u{2060}\u{2e}\u{2060}\u{29}\u{2060}\u{2060}",
+ &["\u{2060}\u{28}\u{2060}\u{201c}\u{2060}\u{47}\u{2060}\u{6f}\u{2060}\u{3f}\u{2060}\u{201d}\u{2060}\u{29}\u{2060}\u{20}\u{2060}",
+ "\u{28}\u{2060}\u{48}\u{2060}\u{65}\u{2060}\u{20}\u{2060}\u{64}\u{2060}\u{69}\u{2060}\u{64}\u{2060}\u{2e}\u{2060}\u{29}\u{2060}\u{2060}"]),
+ ("\u{2060}\u{55}\u{2060}\u{2e}\u{2060}\u{53}\u{2060}\u{2e}\u{2060}\u{41}\u{2060}\u{300}\u{2e}\u{2060}\u{20}\u{2060}\u{69}\u{2060}\u{73}\u{2060}\u{2060}",
+ &["\u{2060}\u{55}\u{2060}\u{2e}\u{2060}\u{53}\u{2060}\u{2e}\u{2060}\u{41}\u{2060}\u{300}\u{2e}\u{2060}\u{20}\u{2060}\u{69}\u{2060}\u{73}\u{2060}\u{2060}"]),
+ ("\u{2060}\u{55}\u{2060}\u{2e}\u{2060}\u{53}\u{2060}\u{2e}\u{2060}\u{41}\u{2060}\u{300}\u{3f}\u{2060}\u{20}\u{2060}\u{48}\u{2060}\u{65}\u{2060}\u{2060}",
+ &["\u{2060}\u{55}\u{2060}\u{2e}\u{2060}\u{53}\u{2060}\u{2e}\u{2060}\u{41}\u{2060}\u{300}\u{3f}\u{2060}\u{20}\u{2060}",
+ "\u{48}\u{2060}\u{65}\u{2060}\u{2060}"]),
+ ("\u{2060}\u{55}\u{2060}\u{2e}\u{2060}\u{53}\u{2060}\u{2e}\u{2060}\u{41}\u{2060}\u{300}\u{2e}\u{2060}\u{2060}",
+ &["\u{2060}\u{55}\u{2060}\u{2e}\u{2060}\u{53}\u{2060}\u{2e}\u{2060}\u{41}\u{2060}\u{300}\u{2e}\u{2060}\u{2060}"]),
+ ("\u{2060}\u{33}\u{2060}\u{2e}\u{2060}\u{34}\u{2060}\u{2060}",
+ &["\u{2060}\u{33}\u{2060}\u{2e}\u{2060}\u{34}\u{2060}\u{2060}"]),
+ ("\u{2060}\u{63}\u{2060}\u{2e}\u{2060}\u{64}\u{2060}\u{2060}",
+ &["\u{2060}\u{63}\u{2060}\u{2e}\u{2060}\u{64}\u{2060}\u{2060}"]),
+ ("\u{2060}\u{43}\u{2060}\u{2e}\u{2060}\u{64}\u{2060}\u{2060}",
+ &["\u{2060}\u{43}\u{2060}\u{2e}\u{2060}\u{64}\u{2060}\u{2060}"]),
+ ("\u{2060}\u{63}\u{2060}\u{2e}\u{2060}\u{44}\u{2060}\u{2060}",
+ &["\u{2060}\u{63}\u{2060}\u{2e}\u{2060}\u{44}\u{2060}\u{2060}"]),
+ ("\u{2060}\u{43}\u{2060}\u{2e}\u{2060}\u{44}\u{2060}\u{2060}",
+ &["\u{2060}\u{43}\u{2060}\u{2e}\u{2060}\u{44}\u{2060}\u{2060}"]),
+ ("\u{2060}\u{65}\u{2060}\u{74}\u{2060}\u{63}\u{2060}\u{2e}\u{2060}\u{29}\u{2060}\u{2019}\u{2060}\u{a0}\u{2060}\u{74}\u{2060}\u{68}\u{2060}\u{65}\u{2060}\u{2060}",
+ &["\u{2060}\u{65}\u{2060}\u{74}\u{2060}\u{63}\u{2060}\u{2e}\u{2060}\u{29}\u{2060}\u{2019}\u{2060}\u{a0}\u{2060}\u{74}\u{2060}\u{68}\u{2060}\u{65}\u{2060}\u{2060}"]),
+ ("\u{2060}\u{65}\u{2060}\u{74}\u{2060}\u{63}\u{2060}\u{2e}\u{2060}\u{29}\u{2060}\u{2019}\u{2060}\u{a0}\u{2060}\u{54}\u{2060}\u{68}\u{2060}\u{65}\u{2060}\u{2060}",
+ &["\u{2060}\u{65}\u{2060}\u{74}\u{2060}\u{63}\u{2060}\u{2e}\u{2060}\u{29}\u{2060}\u{2019}\u{2060}\u{a0}\u{2060}",
+ "\u{54}\u{2060}\u{68}\u{2060}\u{65}\u{2060}\u{2060}"]),
+ ("\u{2060}\u{65}\u{2060}\u{74}\u{2060}\u{63}\u{2060}\u{2e}\u{2060}\u{29}\u{2060}\u{2019}\u{2060}\u{a0}\u{2060}\u{2018}\u{2060}\u{28}\u{2060}\u{74}\u{2060}\u{68}\u{2060}\u{65}\u{2060}\u{2060}",
+ &["\u{2060}\u{65}\u{2060}\u{74}\u{2060}\u{63}\u{2060}\u{2e}\u{2060}\u{29}\u{2060}\u{2019}\u{2060}\u{a0}\u{2060}\u{2018}\u{2060}\u{28}\u{2060}\u{74}\u{2060}\u{68}\u{2060}\u{65}\u{2060}\u{2060}"]),
+ ("\u{2060}\u{65}\u{2060}\u{74}\u{2060}\u{63}\u{2060}\u{2e}\u{2060}\u{29}\u{2060}\u{2019}\u{2060}\u{a0}\u{2060}\u{2018}\u{2060}\u{28}\u{2060}\u{54}\u{2060}\u{68}\u{2060}\u{65}\u{2060}\u{2060}",
+ &["\u{2060}\u{65}\u{2060}\u{74}\u{2060}\u{63}\u{2060}\u{2e}\u{2060}\u{29}\u{2060}\u{2019}\u{2060}\u{a0}\u{2060}",
+ "\u{2018}\u{2060}\u{28}\u{2060}\u{54}\u{2060}\u{68}\u{2060}\u{65}\u{2060}\u{2060}"]),
+ ("\u{2060}\u{65}\u{2060}\u{74}\u{2060}\u{63}\u{2060}\u{2e}\u{2060}\u{29}\u{2060}\u{2019}\u{2060}\u{a0}\u{2060}\u{308}\u{74}\u{2060}\u{68}\u{2060}\u{65}\u{2060}\u{2060}",
+ &["\u{2060}\u{65}\u{2060}\u{74}\u{2060}\u{63}\u{2060}\u{2e}\u{2060}\u{29}\u{2060}\u{2019}\u{2060}\u{a0}\u{2060}\u{308}\u{74}\u{2060}\u{68}\u{2060}\u{65}\u{2060}\u{2060}"]),
+ ("\u{2060}\u{65}\u{2060}\u{74}\u{2060}\u{63}\u{2060}\u{2e}\u{2060}\u{29}\u{2060}\u{2019}\u{2060}\u{a0}\u{2060}\u{308}\u{54}\u{2060}\u{68}\u{2060}\u{65}\u{2060}\u{2060}",
+ &["\u{2060}\u{65}\u{2060}\u{74}\u{2060}\u{63}\u{2060}\u{2e}\u{2060}\u{29}\u{2060}\u{2019}\u{2060}\u{a0}\u{2060}\u{308}",
+ "\u{54}\u{2060}\u{68}\u{2060}\u{65}\u{2060}\u{2060}"]),
+ ("\u{2060}\u{65}\u{2060}\u{74}\u{2060}\u{63}\u{2060}\u{2e}\u{2060}\u{29}\u{2060}\u{2019}\u{2060}\u{308}\u{54}\u{2060}\u{68}\u{2060}\u{65}\u{2060}\u{2060}",
+ &["\u{2060}\u{65}\u{2060}\u{74}\u{2060}\u{63}\u{2060}\u{2e}\u{2060}\u{29}\u{2060}\u{2019}\u{2060}\u{308}",
+ "\u{54}\u{2060}\u{68}\u{2060}\u{65}\u{2060}\u{2060}"]),
+ ("\u{2060}\u{65}\u{2060}\u{74}\u{2060}\u{63}\u{2060}\u{2e}\u{2060}\u{29}\u{2060}\u{a}\u{2060}\u{308}\u{2060}\u{54}\u{2060}\u{68}\u{2060}\u{65}\u{2060}\u{2060}",
+ &["\u{2060}\u{65}\u{2060}\u{74}\u{2060}\u{63}\u{2060}\u{2e}\u{2060}\u{29}\u{2060}\u{a}",
+ "\u{2060}\u{308}\u{2060}\u{54}\u{2060}\u{68}\u{2060}\u{65}\u{2060}\u{2060}"]),
+ ("\u{2060}\u{74}\u{2060}\u{68}\u{2060}\u{65}\u{2060}\u{20}\u{2060}\u{72}\u{2060}\u{65}\u{2060}\u{73}\u{2060}\u{70}\u{2060}\u{2e}\u{2060}\u{20}\u{2060}\u{6c}\u{2060}\u{65}\u{2060}\u{61}\u{2060}\u{64}\u{2060}\u{65}\u{2060}\u{72}\u{2060}\u{73}\u{2060}\u{20}\u{2060}\u{61}\u{2060}\u{72}\u{2060}\u{65}\u{2060}\u{2060}",
+ &["\u{2060}\u{74}\u{2060}\u{68}\u{2060}\u{65}\u{2060}\u{20}\u{2060}\u{72}\u{2060}\u{65}\u{2060}\u{73}\u{2060}\u{70}\u{2060}\u{2e}\u{2060}\u{20}\u{2060}\u{6c}\u{2060}\u{65}\u{2060}\u{61}\u{2060}\u{64}\u{2060}\u{65}\u{2060}\u{72}\u{2060}\u{73}\u{2060}\u{20}\u{2060}\u{61}\u{2060}\u{72}\u{2060}\u{65}\u{2060}\u{2060}"]),
+ ("\u{2060}\u{5b57}\u{2060}\u{2e}\u{2060}\u{5b57}\u{2060}\u{2060}",
+ &["\u{2060}\u{5b57}\u{2060}\u{2e}\u{2060}", "\u{5b57}\u{2060}\u{2060}"]),
+ ("\u{2060}\u{65}\u{2060}\u{74}\u{2060}\u{63}\u{2060}\u{2e}\u{2060}\u{5b83}\u{2060}\u{2060}",
+ &["\u{2060}\u{65}\u{2060}\u{74}\u{2060}\u{63}\u{2060}\u{2e}\u{2060}",
+ "\u{5b83}\u{2060}\u{2060}"]),
+ ("\u{2060}\u{65}\u{2060}\u{74}\u{2060}\u{63}\u{2060}\u{2e}\u{2060}\u{3002}\u{2060}\u{2060}",
+ &["\u{2060}\u{65}\u{2060}\u{74}\u{2060}\u{63}\u{2060}\u{2e}\u{2060}\u{3002}\u{2060}\u{2060}"]),
+ ("\u{2060}\u{5b57}\u{2060}\u{3002}\u{2060}\u{5b83}\u{2060}\u{2060}",
+ &["\u{2060}\u{5b57}\u{2060}\u{3002}\u{2060}", "\u{5b83}\u{2060}\u{2060}"]),
+ ("\u{2060}\u{21}\u{2060}\u{20}\u{2060}\u{20}\u{2060}\u{2060}",
+ &["\u{2060}\u{21}\u{2060}\u{20}\u{2060}\u{20}\u{2060}\u{2060}"])
+ ];
diff --git a/third_party/rust/unicode-segmentation/src/word.rs b/third_party/rust/unicode-segmentation/src/word.rs
new file mode 100644
index 0000000000..16dfafd0b4
--- /dev/null
+++ b/third_party/rust/unicode-segmentation/src/word.rs
@@ -0,0 +1,754 @@
+// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use core::cmp;
+use core::iter::Filter;
+
+use crate::tables::word::WordCat;
+
+/// An iterator over the substrings of a string which, after splitting the string on
+/// [word boundaries](http://www.unicode.org/reports/tr29/#Word_Boundaries),
+/// contain any characters with the
+/// [Alphabetic](http://unicode.org/reports/tr44/#Alphabetic)
+/// property, or with
+/// [General_Category=Number](http://unicode.org/reports/tr44/#General_Category_Values).
+///
+/// This struct is created by the [`unicode_words`] method on the [`UnicodeSegmentation`] trait. See
+/// its documentation for more.
+///
+/// [`unicode_words`]: trait.UnicodeSegmentation.html#tymethod.unicode_words
+/// [`UnicodeSegmentation`]: trait.UnicodeSegmentation.html
+pub struct UnicodeWords<'a> {
+ inner: Filter<UWordBounds<'a>, fn(&&str) -> bool>,
+}
+
+impl<'a> Iterator for UnicodeWords<'a> {
+ type Item = &'a str;
+
+ #[inline]
+ fn next(&mut self) -> Option<&'a str> {
+ self.inner.next()
+ }
+}
+impl<'a> DoubleEndedIterator for UnicodeWords<'a> {
+ #[inline]
+ fn next_back(&mut self) -> Option<&'a str> {
+ self.inner.next_back()
+ }
+}
+
+/// An iterator over the substrings of a string which, after splitting the string on
+/// [word boundaries](http://www.unicode.org/reports/tr29/#Word_Boundaries),
+/// contain any characters with the
+/// [Alphabetic](http://unicode.org/reports/tr44/#Alphabetic)
+/// property, or with
+/// [General_Category=Number](http://unicode.org/reports/tr44/#General_Category_Values).
+/// This iterator also provides the byte offsets for each substring.
+///
+/// This struct is created by the [`unicode_word_indices`] method on the [`UnicodeSegmentation`] trait. See
+/// its documentation for more.
+///
+/// [`unicode_word_indices`]: trait.UnicodeSegmentation.html#tymethod.unicode_word_indices
+/// [`UnicodeSegmentation`]: trait.UnicodeSegmentation.html
+pub struct UnicodeWordIndices<'a> {
+ inner: Filter<UWordBoundIndices<'a>, fn(&(usize, &str)) -> bool>,
+}
+
+impl<'a> Iterator for UnicodeWordIndices<'a> {
+ type Item = (usize, &'a str);
+
+ #[inline]
+ fn next(&mut self) -> Option<(usize, &'a str)> {
+ self.inner.next()
+ }
+}
+impl<'a> DoubleEndedIterator for UnicodeWordIndices<'a> {
+ #[inline]
+ fn next_back(&mut self) -> Option<(usize, &'a str)> {
+ self.inner.next_back()
+ }
+}
+
+/// External iterator for a string's
+/// [word boundaries](http://www.unicode.org/reports/tr29/#Word_Boundaries).
+///
+/// This struct is created by the [`split_word_bounds`] method on the [`UnicodeSegmentation`]
+/// trait. See its documentation for more.
+///
+/// [`split_word_bounds`]: trait.UnicodeSegmentation.html#tymethod.split_word_bounds
+/// [`UnicodeSegmentation`]: trait.UnicodeSegmentation.html
+#[derive(Clone)]
+pub struct UWordBounds<'a> {
+ string: &'a str,
+ cat: Option<WordCat>,
+ catb: Option<WordCat>,
+}
+
+/// External iterator for word boundaries and byte offsets.
+///
+/// This struct is created by the [`split_word_bound_indices`] method on the
+/// [`UnicodeSegmentation`] trait. See its documentation for more.
+///
+/// [`split_word_bound_indices`]: trait.UnicodeSegmentation.html#tymethod.split_word_bound_indices
+/// [`UnicodeSegmentation`]: trait.UnicodeSegmentation.html
+#[derive(Clone)]
+pub struct UWordBoundIndices<'a> {
+ start_offset: usize,
+ iter: UWordBounds<'a>,
+}
+
+impl<'a> UWordBoundIndices<'a> {
+ #[inline]
+ /// View the underlying data (the part yet to be iterated) as a slice of the original string.
+ ///
+ /// ```rust
+ /// # use unicode_segmentation::UnicodeSegmentation;
+ /// let mut iter = "Hello world".split_word_bound_indices();
+ /// assert_eq!(iter.as_str(), "Hello world");
+ /// iter.next();
+ /// assert_eq!(iter.as_str(), " world");
+ /// iter.next();
+ /// assert_eq!(iter.as_str(), "world");
+ /// ```
+ pub fn as_str(&self) -> &'a str {
+ self.iter.as_str()
+ }
+}
+
+impl<'a> Iterator for UWordBoundIndices<'a> {
+ type Item = (usize, &'a str);
+
+ #[inline]
+ fn next(&mut self) -> Option<(usize, &'a str)> {
+ self.iter
+ .next()
+ .map(|s| (s.as_ptr() as usize - self.start_offset, s))
+ }
+
+ #[inline]
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ self.iter.size_hint()
+ }
+}
+
+impl<'a> DoubleEndedIterator for UWordBoundIndices<'a> {
+ #[inline]
+ fn next_back(&mut self) -> Option<(usize, &'a str)> {
+ self.iter
+ .next_back()
+ .map(|s| (s.as_ptr() as usize - self.start_offset, s))
+ }
+}
+
+// state machine for word boundary rules
+#[derive(Clone, Copy, PartialEq, Eq, Debug)]
+enum UWordBoundsState {
+ Start,
+ Letter,
+ HLetter,
+ Numeric,
+ Katakana,
+ ExtendNumLet,
+ Regional(RegionalState),
+ FormatExtend(FormatExtendType),
+ Zwj,
+ Emoji,
+ WSegSpace,
+}
+
+// subtypes for FormatExtend state in UWordBoundsState
+#[derive(Clone, Copy, PartialEq, Eq, Debug)]
+enum FormatExtendType {
+ AcceptAny,
+ AcceptNone,
+ RequireLetter,
+ RequireHLetter,
+ AcceptQLetter,
+ RequireNumeric,
+}
+
+#[derive(Clone, Copy, PartialEq, Eq, Debug)]
+enum RegionalState {
+ Half,
+ Full,
+ Unknown,
+}
+
+fn is_emoji(ch: char) -> bool {
+ use crate::tables::emoji;
+ emoji::emoji_category(ch).2 == emoji::EmojiCat::EC_Extended_Pictographic
+}
+
+impl<'a> Iterator for UWordBounds<'a> {
+ type Item = &'a str;
+
+ #[inline]
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ let slen = self.string.len();
+ (cmp::min(slen, 1), Some(slen))
+ }
+
+ #[inline]
+ fn next(&mut self) -> Option<&'a str> {
+ use self::FormatExtendType::*;
+ use self::UWordBoundsState::*;
+ use crate::tables::word as wd;
+ if self.string.len() == 0 {
+ return None;
+ }
+
+ let mut take_curr = true;
+ let mut take_cat = true;
+ let mut idx = 0;
+ let mut saveidx = 0;
+ let mut state = Start;
+ let mut cat = wd::WC_Any;
+ let mut savecat = wd::WC_Any;
+
+ // If extend/format/zwj were skipped. Handles precedence of WB3d over WB4
+ let mut skipped_format_extend = false;
+ for (curr, ch) in self.string.char_indices() {
+ idx = curr;
+ // Whether or not the previous category was ZWJ
+ // ZWJs get collapsed, so this handles precedence of WB3c over WB4
+ let prev_zwj = cat == wd::WC_ZWJ;
+ // if there's a category cached, grab it
+ cat = match self.cat {
+ None => wd::word_category(ch).2,
+ _ => self.cat.take().unwrap(),
+ };
+ take_cat = true;
+
+ // handle rule WB4
+ // just skip all format, extend, and zwj chars
+ // note that Start is a special case: if there's a bunch of Format | Extend
+ // characters at the beginning of a block of text, dump them out as one unit.
+ //
+ // (This is not obvious from the wording of UAX#29, but if you look at the
+ // test cases http://www.unicode.org/Public/UNIDATA/auxiliary/WordBreakTest.txt
+ // then the "correct" interpretation of WB4 becomes apparent.)
+ if state != Start {
+ match cat {
+ wd::WC_Extend | wd::WC_Format | wd::WC_ZWJ => {
+ skipped_format_extend = true;
+ continue;
+ }
+ _ => {}
+ }
+ }
+
+ // rule WB3c
+ // WB4 makes all ZWJs collapse into the previous state
+ // but you can still be in a Zwj state if you started with Zwj
+ //
+ // This means that an EP + Zwj will collapse into EP, which is wrong,
+ // since EP+EP is not a boundary but EP+ZWJ+EP is
+ //
+ // Thus, we separately keep track of whether or not the last character
+ // was a ZWJ. This is an additional bit of state tracked outside of the
+ // state enum; the state enum represents the last non-zwj state encountered.
+ // When prev_zwj is true, for the purposes of WB3c, we are in the Zwj state,
+ // however we are in the previous state for the purposes of all other rules.
+ if prev_zwj {
+ if is_emoji(ch) {
+ state = Emoji;
+ continue;
+ }
+ }
+ // Don't use `continue` in this match without updating `cat`
+ state = match state {
+ Start if cat == wd::WC_CR => {
+ idx += match self.get_next_cat(idx) {
+ Some(ncat) if ncat == wd::WC_LF => 1, // rule WB3
+ _ => 0,
+ };
+ break; // rule WB3a
+ }
+ Start => match cat {
+ wd::WC_ALetter => Letter, // rule WB5, WB6, WB9, WB13a
+ wd::WC_Hebrew_Letter => HLetter, // rule WB5, WB6, WB7a, WB7b, WB9, WB13a
+ wd::WC_Numeric => Numeric, // rule WB8, WB10, WB12, WB13a
+ wd::WC_Katakana => Katakana, // rule WB13, WB13a
+ wd::WC_ExtendNumLet => ExtendNumLet, // rule WB13a, WB13b
+ wd::WC_Regional_Indicator => Regional(RegionalState::Half), // rule WB13c
+ wd::WC_LF | wd::WC_Newline => break, // rule WB3a
+ wd::WC_ZWJ => Zwj, // rule WB3c
+ wd::WC_WSegSpace => WSegSpace, // rule WB3d
+ _ => {
+ if let Some(ncat) = self.get_next_cat(idx) {
+ // rule WB4
+ if ncat == wd::WC_Format || ncat == wd::WC_Extend || ncat == wd::WC_ZWJ
+ {
+ state = FormatExtend(AcceptNone);
+ self.cat = Some(ncat);
+ continue;
+ }
+ }
+ break; // rule WB999
+ }
+ },
+ WSegSpace => match cat {
+ wd::WC_WSegSpace if !skipped_format_extend => WSegSpace,
+ _ => {
+ take_curr = false;
+ break;
+ }
+ },
+ Zwj => {
+ // We already handle WB3c above.
+ take_curr = false;
+ break;
+ }
+ Letter | HLetter => match cat {
+ wd::WC_ALetter => Letter, // rule WB5
+ wd::WC_Hebrew_Letter => HLetter, // rule WB5
+ wd::WC_Numeric => Numeric, // rule WB9
+ wd::WC_ExtendNumLet => ExtendNumLet, // rule WB13a
+ wd::WC_Double_Quote if state == HLetter => {
+ savecat = cat;
+ saveidx = idx;
+ FormatExtend(RequireHLetter) // rule WB7b
+ }
+ wd::WC_Single_Quote if state == HLetter => {
+ FormatExtend(AcceptQLetter) // rule WB7a
+ }
+ wd::WC_MidLetter | wd::WC_MidNumLet | wd::WC_Single_Quote => {
+ savecat = cat;
+ saveidx = idx;
+ FormatExtend(RequireLetter) // rule WB6
+ }
+ _ => {
+ take_curr = false;
+ break;
+ }
+ },
+ Numeric => match cat {
+ wd::WC_Numeric => Numeric, // rule WB8
+ wd::WC_ALetter => Letter, // rule WB10
+ wd::WC_Hebrew_Letter => HLetter, // rule WB10
+ wd::WC_ExtendNumLet => ExtendNumLet, // rule WB13a
+ wd::WC_MidNum | wd::WC_MidNumLet | wd::WC_Single_Quote => {
+ savecat = cat;
+ saveidx = idx;
+ FormatExtend(RequireNumeric) // rule WB12
+ }
+ _ => {
+ take_curr = false;
+ break;
+ }
+ },
+ Katakana => match cat {
+ wd::WC_Katakana => Katakana, // rule WB13
+ wd::WC_ExtendNumLet => ExtendNumLet, // rule WB13a
+ _ => {
+ take_curr = false;
+ break;
+ }
+ },
+ ExtendNumLet => match cat {
+ wd::WC_ExtendNumLet => ExtendNumLet, // rule WB13a
+ wd::WC_ALetter => Letter, // rule WB13b
+ wd::WC_Hebrew_Letter => HLetter, // rule WB13b
+ wd::WC_Numeric => Numeric, // rule WB13b
+ wd::WC_Katakana => Katakana, // rule WB13b
+ _ => {
+ take_curr = false;
+ break;
+ }
+ },
+ Regional(RegionalState::Full) => {
+ // if it reaches here we've gone too far,
+ // a full flag can only compose with ZWJ/Extend/Format
+ // proceeding it.
+ take_curr = false;
+ break;
+ }
+ Regional(RegionalState::Half) => match cat {
+ wd::WC_Regional_Indicator => Regional(RegionalState::Full), // rule WB13c
+ _ => {
+ take_curr = false;
+ break;
+ }
+ },
+ Regional(_) => {
+ unreachable!("RegionalState::Unknown should not occur on forward iteration")
+ }
+ Emoji => {
+ // We already handle WB3c above. If you've reached this point, the emoji sequence is over.
+ take_curr = false;
+ break;
+ }
+ FormatExtend(t) => match t {
+ // handle FormatExtends depending on what type
+ RequireNumeric if cat == wd::WC_Numeric => Numeric, // rule WB11
+ RequireLetter | AcceptQLetter if cat == wd::WC_ALetter => Letter, // rule WB7
+ RequireLetter | AcceptQLetter if cat == wd::WC_Hebrew_Letter => HLetter, // WB7a
+ RequireHLetter if cat == wd::WC_Hebrew_Letter => HLetter, // rule WB7b
+ AcceptNone | AcceptQLetter => {
+ take_curr = false; // emit all the Format|Extend characters
+ take_cat = false;
+ break;
+ }
+ _ => break, // rewind (in if statement below)
+ },
+ }
+ }
+
+ if let FormatExtend(t) = state {
+ // we were looking for something and didn't find it; we have to back up
+ if t == RequireLetter || t == RequireHLetter || t == RequireNumeric {
+ idx = saveidx;
+ cat = savecat;
+ take_curr = false;
+ }
+ }
+
+ self.cat = if take_curr {
+ idx = idx + self.string[idx..].chars().next().unwrap().len_utf8();
+ None
+ } else if take_cat {
+ Some(cat)
+ } else {
+ None
+ };
+
+ let retstr = &self.string[..idx];
+ self.string = &self.string[idx..];
+ Some(retstr)
+ }
+}
+
+impl<'a> DoubleEndedIterator for UWordBounds<'a> {
+ #[inline]
+ fn next_back(&mut self) -> Option<&'a str> {
+ use self::FormatExtendType::*;
+ use self::UWordBoundsState::*;
+ use crate::tables::word as wd;
+ if self.string.len() == 0 {
+ return None;
+ }
+
+ let mut take_curr = true;
+ let mut take_cat = true;
+ let mut idx = self.string.len();
+ idx -= self.string.chars().next_back().unwrap().len_utf8();
+ let mut previdx = idx;
+ let mut saveidx = idx;
+ let mut state = Start;
+ let mut savestate = Start;
+ let mut cat = wd::WC_Any;
+
+ let mut skipped_format_extend = false;
+
+ for (curr, ch) in self.string.char_indices().rev() {
+ previdx = idx;
+ idx = curr;
+
+ // if there's a category cached, grab it
+ cat = match self.catb {
+ None => wd::word_category(ch).2,
+ _ => self.catb.take().unwrap(),
+ };
+ take_cat = true;
+
+ // backward iterator over word boundaries. Mostly the same as the forward
+ // iterator, with two weirdnesses:
+ // (1) If we encounter a single quote in the Start state, we have to check for a
+ // Hebrew Letter immediately before it.
+ // (2) Format and Extend char handling takes some gymnastics.
+
+ if cat == wd::WC_Extend || cat == wd::WC_Format || (cat == wd::WC_ZWJ && state != Zwj) {
+ // WB3c has more priority so we should not
+ // fold in that case
+ if match state {
+ FormatExtend(_) | Start => false,
+ _ => true,
+ } {
+ saveidx = previdx;
+ savestate = state;
+ state = FormatExtend(AcceptNone);
+ }
+
+ if state != Start {
+ continue;
+ }
+ } else if state == FormatExtend(AcceptNone) {
+ // finished a scan of some Format|Extend chars, restore previous state
+ state = savestate;
+ previdx = saveidx;
+ take_cat = false;
+ skipped_format_extend = true;
+ }
+
+ // Don't use `continue` in this match without updating `catb`
+ state = match state {
+ Start | FormatExtend(AcceptAny) => match cat {
+ _ if is_emoji(ch) => Zwj,
+ wd::WC_ALetter => Letter, // rule WB5, WB7, WB10, WB13b
+ wd::WC_Hebrew_Letter => HLetter, // rule WB5, WB7, WB7c, WB10, WB13b
+ wd::WC_Numeric => Numeric, // rule WB8, WB9, WB11, WB13b
+ wd::WC_Katakana => Katakana, // rule WB13, WB13b
+ wd::WC_ExtendNumLet => ExtendNumLet, // rule WB13a
+ wd::WC_Regional_Indicator => Regional(RegionalState::Unknown), // rule WB13c
+ // rule WB4:
+ wd::WC_Extend | wd::WC_Format | wd::WC_ZWJ => FormatExtend(AcceptAny),
+ wd::WC_Single_Quote => {
+ saveidx = idx;
+ FormatExtend(AcceptQLetter) // rule WB7a
+ }
+ wd::WC_WSegSpace => WSegSpace,
+ wd::WC_CR | wd::WC_LF | wd::WC_Newline => {
+ if state == Start {
+ if cat == wd::WC_LF {
+ idx -= match self.get_prev_cat(idx) {
+ Some(pcat) if pcat == wd::WC_CR => 1, // rule WB3
+ _ => 0,
+ };
+ }
+ } else {
+ take_curr = false;
+ }
+ break; // rule WB3a
+ }
+ _ => break, // rule WB999
+ },
+ Zwj => match cat {
+ // rule WB3c
+ wd::WC_ZWJ => FormatExtend(AcceptAny),
+ _ => {
+ take_curr = false;
+ break;
+ }
+ },
+ WSegSpace => match cat {
+ // rule WB3d
+ wd::WC_WSegSpace if !skipped_format_extend => WSegSpace,
+ _ => {
+ take_curr = false;
+ break;
+ }
+ },
+ Letter | HLetter => match cat {
+ wd::WC_ALetter => Letter, // rule WB5
+ wd::WC_Hebrew_Letter => HLetter, // rule WB5
+ wd::WC_Numeric => Numeric, // rule WB10
+ wd::WC_ExtendNumLet => ExtendNumLet, // rule WB13b
+ wd::WC_Double_Quote if state == HLetter => {
+ saveidx = previdx;
+ FormatExtend(RequireHLetter) // rule WB7c
+ }
+ wd::WC_MidLetter | wd::WC_MidNumLet | wd::WC_Single_Quote => {
+ saveidx = previdx;
+ FormatExtend(RequireLetter) // rule WB7
+ }
+ _ => {
+ take_curr = false;
+ break;
+ }
+ },
+ Numeric => match cat {
+ wd::WC_Numeric => Numeric, // rule WB8
+ wd::WC_ALetter => Letter, // rule WB9
+ wd::WC_Hebrew_Letter => HLetter, // rule WB9
+ wd::WC_ExtendNumLet => ExtendNumLet, // rule WB13b
+ wd::WC_MidNum | wd::WC_MidNumLet | wd::WC_Single_Quote => {
+ saveidx = previdx;
+ FormatExtend(RequireNumeric) // rule WB11
+ }
+ _ => {
+ take_curr = false;
+ break;
+ }
+ },
+ Katakana => match cat {
+ wd::WC_Katakana => Katakana, // rule WB13
+ wd::WC_ExtendNumLet => ExtendNumLet, // rule WB13b
+ _ => {
+ take_curr = false;
+ break;
+ }
+ },
+ ExtendNumLet => match cat {
+ wd::WC_ExtendNumLet => ExtendNumLet, // rule WB13a
+ wd::WC_ALetter => Letter, // rule WB13a
+ wd::WC_Hebrew_Letter => HLetter, // rule WB13a
+ wd::WC_Numeric => Numeric, // rule WB13a
+ wd::WC_Katakana => Katakana, // rule WB13a
+ _ => {
+ take_curr = false;
+ break;
+ }
+ },
+ Regional(mut regional_state) => match cat {
+ // rule WB13c
+ wd::WC_Regional_Indicator => {
+ if regional_state == RegionalState::Unknown {
+ let count = self.string[..previdx]
+ .chars()
+ .rev()
+ .map(|c| wd::word_category(c).2)
+ .filter(|&c| {
+ !(c == wd::WC_ZWJ || c == wd::WC_Extend || c == wd::WC_Format)
+ })
+ .take_while(|&c| c == wd::WC_Regional_Indicator)
+ .count();
+ regional_state = if count % 2 == 0 {
+ RegionalState::Full
+ } else {
+ RegionalState::Half
+ };
+ }
+ if regional_state == RegionalState::Full {
+ take_curr = false;
+ break;
+ } else {
+ Regional(RegionalState::Full)
+ }
+ }
+ _ => {
+ take_curr = false;
+ break;
+ }
+ },
+ Emoji => {
+ if is_emoji(ch) {
+ // rule WB3c
+ Zwj
+ } else {
+ take_curr = false;
+ break;
+ }
+ }
+ FormatExtend(t) => match t {
+ RequireNumeric if cat == wd::WC_Numeric => Numeric, // rule WB12
+ RequireLetter if cat == wd::WC_ALetter => Letter, // rule WB6
+ RequireLetter if cat == wd::WC_Hebrew_Letter => HLetter, // rule WB6
+ AcceptQLetter if cat == wd::WC_Hebrew_Letter => HLetter, // rule WB7a
+ RequireHLetter if cat == wd::WC_Hebrew_Letter => HLetter, // rule WB7b
+ _ => break, // backtrack will happens
+ },
+ }
+ }
+
+ if let FormatExtend(t) = state {
+ // if we required something but didn't find it, backtrack
+ if t == RequireLetter
+ || t == RequireHLetter
+ || t == RequireNumeric
+ || t == AcceptNone
+ || t == AcceptQLetter
+ {
+ previdx = saveidx;
+ take_cat = false;
+ take_curr = false;
+ }
+ }
+
+ self.catb = if take_curr {
+ None
+ } else {
+ idx = previdx;
+ if take_cat {
+ Some(cat)
+ } else {
+ None
+ }
+ };
+
+ let retstr = &self.string[idx..];
+ self.string = &self.string[..idx];
+ Some(retstr)
+ }
+}
+
+impl<'a> UWordBounds<'a> {
+ #[inline]
+ /// View the underlying data (the part yet to be iterated) as a slice of the original string.
+ ///
+ /// ```rust
+ /// # use unicode_segmentation::UnicodeSegmentation;
+ /// let mut iter = "Hello world".split_word_bounds();
+ /// assert_eq!(iter.as_str(), "Hello world");
+ /// iter.next();
+ /// assert_eq!(iter.as_str(), " world");
+ /// iter.next();
+ /// assert_eq!(iter.as_str(), "world");
+ /// ```
+ pub fn as_str(&self) -> &'a str {
+ self.string
+ }
+
+ #[inline]
+ fn get_next_cat(&self, idx: usize) -> Option<WordCat> {
+ use crate::tables::word as wd;
+ let nidx = idx + self.string[idx..].chars().next().unwrap().len_utf8();
+ if nidx < self.string.len() {
+ let nch = self.string[nidx..].chars().next().unwrap();
+ Some(wd::word_category(nch).2)
+ } else {
+ None
+ }
+ }
+
+ #[inline]
+ fn get_prev_cat(&self, idx: usize) -> Option<WordCat> {
+ use crate::tables::word as wd;
+ if idx > 0 {
+ let nch = self.string[..idx].chars().next_back().unwrap();
+ Some(wd::word_category(nch).2)
+ } else {
+ None
+ }
+ }
+}
+
+#[inline]
+pub fn new_word_bounds<'b>(s: &'b str) -> UWordBounds<'b> {
+ UWordBounds {
+ string: s,
+ cat: None,
+ catb: None,
+ }
+}
+
+#[inline]
+pub fn new_word_bound_indices<'b>(s: &'b str) -> UWordBoundIndices<'b> {
+ UWordBoundIndices {
+ start_offset: s.as_ptr() as usize,
+ iter: new_word_bounds(s),
+ }
+}
+
+#[inline]
+fn has_alphanumeric(s: &&str) -> bool {
+ use crate::tables::util::is_alphanumeric;
+
+ s.chars().any(|c| is_alphanumeric(c))
+}
+
+#[inline]
+pub fn new_unicode_words<'b>(s: &'b str) -> UnicodeWords<'b> {
+ use super::UnicodeSegmentation;
+
+ UnicodeWords {
+ inner: s.split_word_bounds().filter(has_alphanumeric),
+ }
+}
+
+#[inline]
+pub fn new_unicode_word_indices<'b>(s: &'b str) -> UnicodeWordIndices<'b> {
+ use super::UnicodeSegmentation;
+
+ UnicodeWordIndices {
+ inner: s
+ .split_word_bound_indices()
+ .filter(|(_, c)| has_alphanumeric(c)),
+ }
+}