summaryrefslogtreecommitdiffstats
path: root/vendor/bstr
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/bstr')
-rw-r--r--vendor/bstr/.cargo-checksum.json1
-rw-r--r--vendor/bstr/COPYING8
-rw-r--r--vendor/bstr/Cargo.lock136
-rw-r--r--vendor/bstr/Cargo.toml63
-rw-r--r--vendor/bstr/LICENSE-APACHE201
-rw-r--r--vendor/bstr/LICENSE-MIT21
-rw-r--r--vendor/bstr/README.md251
-rw-r--r--vendor/bstr/examples/graphemes-std.rs25
-rw-r--r--vendor/bstr/examples/graphemes.rs22
-rw-r--r--vendor/bstr/examples/lines-std.rs17
-rw-r--r--vendor/bstr/examples/lines.rs17
-rw-r--r--vendor/bstr/examples/uppercase-std.rs15
-rw-r--r--vendor/bstr/examples/uppercase.rs18
-rw-r--r--vendor/bstr/examples/words-std.rs18
-rw-r--r--vendor/bstr/examples/words.rs15
-rw-r--r--vendor/bstr/rustfmt.toml2
-rwxr-xr-xvendor/bstr/scripts/generate-unicode-data149
-rw-r--r--vendor/bstr/scripts/regex/grapheme.sh50
-rw-r--r--vendor/bstr/scripts/regex/sentence.sh176
-rw-r--r--vendor/bstr/scripts/regex/word.sh111
-rw-r--r--vendor/bstr/src/ascii.rs336
-rw-r--r--vendor/bstr/src/bstr.rs74
-rw-r--r--vendor/bstr/src/bstring.rs59
-rw-r--r--vendor/bstr/src/byteset/mod.rs114
-rw-r--r--vendor/bstr/src/byteset/scalar.rs295
-rw-r--r--vendor/bstr/src/ext_slice.rs3655
-rw-r--r--vendor/bstr/src/ext_vec.rs1105
-rw-r--r--vendor/bstr/src/impls.rs987
-rw-r--r--vendor/bstr/src/io.rs514
-rw-r--r--vendor/bstr/src/lib.rs437
-rw-r--r--vendor/bstr/src/tests.rs32
-rw-r--r--vendor/bstr/src/unicode/data/GraphemeBreakTest.txt630
-rw-r--r--vendor/bstr/src/unicode/data/LICENSE-UNICODE45
-rw-r--r--vendor/bstr/src/unicode/data/SentenceBreakTest.txt530
-rw-r--r--vendor/bstr/src/unicode/data/WordBreakTest.txt1851
-rw-r--r--vendor/bstr/src/unicode/fsm/grapheme_break_fwd.bigendian.dfabin0 -> 10589 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/grapheme_break_fwd.littleendian.dfabin0 -> 10589 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/grapheme_break_fwd.rs45
-rw-r--r--vendor/bstr/src/unicode/fsm/grapheme_break_rev.bigendian.dfabin0 -> 53905 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/grapheme_break_rev.littleendian.dfabin0 -> 53905 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/grapheme_break_rev.rs45
-rw-r--r--vendor/bstr/src/unicode/fsm/mod.rs8
-rw-r--r--vendor/bstr/src/unicode/fsm/regional_indicator_rev.bigendian.dfabin0 -> 366 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/regional_indicator_rev.littleendian.dfabin0 -> 366 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/regional_indicator_rev.rs45
-rw-r--r--vendor/bstr/src/unicode/fsm/sentence_break_fwd.bigendian.dfabin0 -> 149903 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/sentence_break_fwd.littleendian.dfabin0 -> 149903 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/sentence_break_fwd.rs45
-rw-r--r--vendor/bstr/src/unicode/fsm/simple_word_fwd.bigendian.dfabin0 -> 8975 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/simple_word_fwd.littleendian.dfabin0 -> 8975 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/simple_word_fwd.rs45
-rw-r--r--vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.bigendian.dfabin0 -> 572 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.littleendian.dfabin0 -> 572 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.rs45
-rw-r--r--vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.bigendian.dfabin0 -> 884 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.littleendian.dfabin0 -> 884 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.rs45
-rw-r--r--vendor/bstr/src/unicode/fsm/word_break_fwd.bigendian.dfabin0 -> 229739 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/word_break_fwd.littleendian.dfabin0 -> 229739 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/word_break_fwd.rs45
-rw-r--r--vendor/bstr/src/unicode/grapheme.rs355
-rw-r--r--vendor/bstr/src/unicode/mod.rs12
-rw-r--r--vendor/bstr/src/unicode/sentence.rs220
-rw-r--r--vendor/bstr/src/unicode/whitespace.rs14
-rw-r--r--vendor/bstr/src/unicode/word.rs406
-rw-r--r--vendor/bstr/src/utf8.rs1370
66 files changed, 14725 insertions, 0 deletions
diff --git a/vendor/bstr/.cargo-checksum.json b/vendor/bstr/.cargo-checksum.json
new file mode 100644
index 000000000..a38a78f37
--- /dev/null
+++ b/vendor/bstr/.cargo-checksum.json
@@ -0,0 +1 @@
+{"files":{"COPYING":"13361f93b3db60ddb43f8fdc97ba469461019bbf7589f6bde833eb3d995d2497","Cargo.lock":"0098420b2c41749835875ecc77cf074781b2448945fe5e7518fdb928ae560b3d","Cargo.toml":"c6c4148fcfd276773164c1966a5d5c9bd45818f00ffc1b7173e61703461e41b6","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"6b7374c39a57e57fc2c38eb529c4c88340152b10f51dd5ae2d819dfa67f61715","README.md":"2edf49d9a95fae4fcf1ec37c25c56e1493928daafeb83dc90e486c925d5ae74d","examples/graphemes-std.rs":"100264f623ff973be76831fb1d4519e6f371b21972d6577bb49bf7bbff4d0d5e","examples/graphemes.rs":"401c5fac813f78e4029ece9c98bccb3128637c507d8667b73e069bfbc9d7f2f4","examples/lines-std.rs":"094a48bfd483ec01f80f9c937ddfe6f0bdbf09f960ba822215ec8ed9862624df","examples/lines.rs":"65ae4edbdb0ccff8ff40cdc70b4e7a70824f5028daff2e1b2a3247f884589db8","examples/uppercase-std.rs":"33aed88e38483aa303625757304a974594476a3a659d8bdd4877aceb90ff8be3","examples/uppercase.rs":"2cdf7f173cb6a5d4c16a967e3f733bc40331f5167da519c5194ceee187ff814f","examples/words-std.rs":"ffde2fccd361890fab0e0b051915a749d5d51e95b9be700b76fada231d002f00","examples/words.rs":"aa805faa5012714428ef895596947c333417c2b16a7e0155d4a128be7428fc17","rustfmt.toml":"1ca600239a27401c4a43f363cf3f38183a212affc1f31bff3ae93234bbaec228","scripts/generate-unicode-data":"75d68617b5acf564cc681ddfbf77a6097eaa9a9f8f54af4e16905dda3dc6df77","scripts/regex/grapheme.sh":"d796bca73278f6ab04d65f285b2dc72efcad76874f98f4bfa22bf38f2eaeece7","scripts/regex/sentence.sh":"7892e07ac9e450967bd79279558cbef2f0fc0b0aefab24217910937ed6330f06","scripts/regex/word.sh":"b3f53e2331e9e50a1a7232b7d278aaecace6a42ef6c16dd0b8d0ec59fd2aaf4f","src/ascii.rs":"19bdd85083467382c5bcc31345aa7ce731cc0dbfad3db21cbf09ee86a4ef8327","src/bstr.rs":"10e8af4cf7f39226da45bfb4836bd9da3694edf963d20f34b19574efc9bdbd58","src/bstring.rs":"00dabd0b94dca1b21324a0d6c7caf5a18b0231d4126a617d272c0c12ea30995d","src/byteset/mod.rs":"2ac3f6685fa396fc0fb59ea279d36c59556afea0a7df4d8f0c5855312b24b331","src/byteset/scalar.rs":"81aed13fbbe24274b22794a14767026e3f31d63d6e8f0eaf48f88b3c4d6ee94e","src/ext_slice.rs":"f2e1e4c9b67df88751005bdc186187a49341920ef013196450b125cfddf34b38","src/ext_vec.rs":"02ae97c421f8dabfb5402652ec0de99d8560fb949c105f856253cdb85b1a9187","src/impls.rs":"0da8b528009878b48a5925a0fcdc7b7fe639a66161ac34a8af20bd75d73981d7","src/io.rs":"b9e6718605b24057af0016d969c52b82b0ce19e740b167b600b6a0f0cfa6961b","src/lib.rs":"eace27e8708288fc71450b4ab0e553443412fe6102b2730f0eb53b211f4ffff3","src/tests.rs":"9e50757473b50273ae3a8aeaed924282ffab82ded7134c15745f3b798b16e574","src/unicode/data/GraphemeBreakTest.txt":"95bd55cf803b93eb0b4990cf0e1d70ede263b36412d9c6921fd6f15cef058961","src/unicode/data/LICENSE-UNICODE":"8b9babb256418ec15761d635a49f973424939affba7a0a88de2fc2690e454a23","src/unicode/data/SentenceBreakTest.txt":"7d6c909af97d0ab545a132d412f6e4e65c7eb5158514a7feb9bf00bcd05875f9","src/unicode/data/WordBreakTest.txt":"3e3320bbbe775de7f1a0b9a30021eb949116a9b05cb461c90596c5ecf1743831","src/unicode/fsm/grapheme_break_fwd.bigendian.dfa":"6daed7530f9e0798b51fbd758ea4626ce53d889bdc7d64dd7d2124b3d0c065db","src/unicode/fsm/grapheme_break_fwd.littleendian.dfa":"f24e7d3fd1fe702811e315611fbe5be24746f7f30e5fd30cf578ade55ae2ac15","src/unicode/fsm/grapheme_break_fwd.rs":"68239e41a0702c2e4a4f207b14f0c7b55f26309d02c6e4402bf723d5e100975b","src/unicode/fsm/grapheme_break_rev.bigendian.dfa":"58544096b5209e438d65000d481ad8bdab4c1ee21f1af70247fafd2a0638b712","src/unicode/fsm/grapheme_break_rev.littleendian.dfa":"9895471633472d48ad3280f436c63b5ae08989c1816cd511428c0dc01d32e7f2","src/unicode/fsm/grapheme_break_rev.rs":"5852dd8064828c050a99a95b3ca958e489de752dbcb60aec9f337a9f631ff7b0","src/unicode/fsm/mod.rs":"50b8baa692e83f909a0fe62eced9666b712a68b6c7bf42976c8cc37e49dd9b64","src/unicode/fsm/regional_indicator_rev.bigendian.dfa":"db9db4c86bced5f4aaf68d5e475e13e5d4976c237deec13c192111a399aa5858","src/unicode/fsm/regional_indicator_rev.littleendian.dfa":"0905f70acddd423c1b53bfbeb73299009f724400029d7f9a987d63c32d36e36c","src/unicode/fsm/regional_indicator_rev.rs":"6241065da1a2dcea4416a4915a3e4e8c40b700ebc55f6f983cee2cd213d1af25","src/unicode/fsm/sentence_break_fwd.bigendian.dfa":"3c1966fb1edd143264dc98e59b5c907b383dc20a957ae3e3982c38fbfac7d5a2","src/unicode/fsm/sentence_break_fwd.littleendian.dfa":"855249a618cb57d3403a449883a925e6d6cd4dfc5d316f473038f258ea890b3f","src/unicode/fsm/sentence_break_fwd.rs":"b0bf5dfb9d29d1529034242c4f2a7954b3be2410598b9ace7f170d24703b313f","src/unicode/fsm/simple_word_fwd.bigendian.dfa":"8092da9a59d903bdee43fd73a95f32bb61e611e46528c676aeb42b3810bce085","src/unicode/fsm/simple_word_fwd.littleendian.dfa":"c79d90294aa00f4915295d13d13eb3743cb202066088c9407a6a3690f4b172d4","src/unicode/fsm/simple_word_fwd.rs":"7c2c1d37390c970e9d8b33d3be45cc4fff92d719324737e8a1abfb715942dd81","src/unicode/fsm/whitespace_anchored_fwd.bigendian.dfa":"593c8ad059ab0bee60a2ea25f4c1fc89c105cb19a9bda3fa98d1464b8e87cfc0","src/unicode/fsm/whitespace_anchored_fwd.littleendian.dfa":"a04ed70d5dbd969c0af0e12bec5033ca910161c486f741dd0a792d2e5b0cc6f6","src/unicode/fsm/whitespace_anchored_fwd.rs":"09d402fad14ffc81d88318808a10c763f356edd42ed6ec1f80df09e5f02700a3","src/unicode/fsm/whitespace_anchored_rev.bigendian.dfa":"9ab09359ce73058d22e5bfa857e040831d49f4a53dd25da804136e9db9e7f5fb","src/unicode/fsm/whitespace_anchored_rev.littleendian.dfa":"cb5804786bd98bfe0726f3f7733d72bc1d69130c8a8b026465c03c78f8c8ac79","src/unicode/fsm/whitespace_anchored_rev.rs":"680cc4e81c94aab5f017bb43efff03221cc3ebcae7bd1fd9284e0c4fc86ebd9e","src/unicode/fsm/word_break_fwd.bigendian.dfa":"5ec979ebe17f0d0b85805625c5b9206476a518d89c707c869dcbe24f422367c8","src/unicode/fsm/word_break_fwd.littleendian.dfa":"28e1b3e13a4ff4b0c7660b611f9e5dd0a087bb3b9a9b63b27cd2eb69a8412a31","src/unicode/fsm/word_break_fwd.rs":"a2581e5e298cd6b748071bc3b637a2a9d3b503c027c3c47b25f47dfd05e8b6db","src/unicode/grapheme.rs":"e0e65949fda3d41dfbf34a03dda3f403c3e8e5e95112b982a358392a37c51b50","src/unicode/mod.rs":"93340dcadca5a4f01a0a993d40e283e98015065098a7b1d076ab966ee591736d","src/unicode/sentence.rs":"ab09a096ab8474bbafb832a8e9905ebb1491a08d11848d9d73587b8361bf6297","src/unicode/whitespace.rs":"0188d053d1aa283d63d1583497d62502ff8c6c7af9f99d42cc9ac800b1438a9c","src/unicode/word.rs":"ce5791b0dc34a2033b99d48bbceb335806cee684913327578f35bac9bf918f06","src/utf8.rs":"6c94ac997924d54cf8f1866e6e62723c780d63b3302adc5285a100c7968b94a9"},"package":"ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223"} \ No newline at end of file
diff --git a/vendor/bstr/COPYING b/vendor/bstr/COPYING
new file mode 100644
index 000000000..d5a7d7ec8
--- /dev/null
+++ b/vendor/bstr/COPYING
@@ -0,0 +1,8 @@
+This project is licensed under either of
+
+ * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
+ http://www.apache.org/licenses/LICENSE-2.0)
+ * MIT license ([LICENSE-MIT](LICENSE-MIT) or
+ http://opensource.org/licenses/MIT)
+
+at your option.
diff --git a/vendor/bstr/Cargo.lock b/vendor/bstr/Cargo.lock
new file mode 100644
index 000000000..b99837c67
--- /dev/null
+++ b/vendor/bstr/Cargo.lock
@@ -0,0 +1,136 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "bstr"
+version = "0.2.17"
+dependencies = [
+ "lazy_static",
+ "memchr",
+ "quickcheck",
+ "regex-automata",
+ "serde",
+ "ucd-parse",
+ "unicode-segmentation",
+]
+
+[[package]]
+name = "byteorder"
+version = "1.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de"
+
+[[package]]
+name = "cfg-if"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
+
+[[package]]
+name = "getrandom"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "wasi",
+]
+
+[[package]]
+name = "lazy_static"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
+
+[[package]]
+name = "libc"
+version = "0.2.79"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2448f6066e80e3bfc792e9c98bf705b4b0fc6e8ef5b43e5889aff0eaa9c58743"
+
+[[package]]
+name = "memchr"
+version = "2.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a"
+
+[[package]]
+name = "quickcheck"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "588f6378e4dd99458b60ec275b4477add41ce4fa9f64dcba6f15adccb19b50d6"
+dependencies = [
+ "rand",
+]
+
+[[package]]
+name = "rand"
+version = "0.8.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2e7573632e6454cf6b99d7aac4ccca54be06da05aca2ef7423d22d27d4d4bcd8"
+dependencies = [
+ "rand_core",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.6.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7"
+dependencies = [
+ "getrandom",
+]
+
+[[package]]
+name = "regex"
+version = "1.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8963b85b8ce3074fecffde43b4b0dded83ce2f367dc8d363afc56679f3ee820b"
+dependencies = [
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ae1ded71d66a4a97f5e961fd0cb25a5f366a42a41570d16a763a69c092c26ae4"
+dependencies = [
+ "byteorder",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.6.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8cab7a364d15cde1e505267766a2d3c4e22a843e1a601f0fa7564c0f82ced11c"
+
+[[package]]
+name = "serde"
+version = "1.0.117"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b88fa983de7720629c9387e9f517353ed404164b1e482c970a90c1a4aaf7dc1a"
+
+[[package]]
+name = "ucd-parse"
+version = "0.1.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5269f8d35df6b8b60758343a6d742ecf09e4bca13faee32af5503aebd1e11b7c"
+dependencies = [
+ "lazy_static",
+ "regex",
+]
+
+[[package]]
+name = "unicode-segmentation"
+version = "1.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e83e153d1053cbb5a118eeff7fd5be06ed99153f00dbcd8ae310c5fb2b22edc0"
+
+[[package]]
+name = "wasi"
+version = "0.10.2+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6"
diff --git a/vendor/bstr/Cargo.toml b/vendor/bstr/Cargo.toml
new file mode 100644
index 000000000..0f206ba5a
--- /dev/null
+++ b/vendor/bstr/Cargo.toml
@@ -0,0 +1,63 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g., crates.io) dependencies.
+#
+# If you are reading this file be aware that the original Cargo.toml
+# will likely look very different (and much more reasonable).
+# See Cargo.toml.orig for the original contents.
+
+[package]
+edition = "2018"
+name = "bstr"
+version = "0.2.17"
+authors = ["Andrew Gallant <jamslam@gmail.com>"]
+exclude = ["/.github"]
+description = "A string type that is not required to be valid UTF-8."
+homepage = "https://github.com/BurntSushi/bstr"
+documentation = "https://docs.rs/bstr"
+readme = "README.md"
+keywords = ["string", "str", "byte", "bytes", "text"]
+categories = ["text-processing", "encoding"]
+license = "MIT OR Apache-2.0"
+repository = "https://github.com/BurntSushi/bstr"
+[profile.release]
+debug = true
+
+[lib]
+bench = false
+[dependencies.lazy_static]
+version = "1.2.0"
+optional = true
+
+[dependencies.memchr]
+version = "2.4.0"
+default-features = false
+
+[dependencies.regex-automata]
+version = "0.1.5"
+optional = true
+default-features = false
+
+[dependencies.serde]
+version = "1.0.85"
+optional = true
+default-features = false
+[dev-dependencies.quickcheck]
+version = "1"
+default-features = false
+
+[dev-dependencies.ucd-parse]
+version = "0.1.3"
+
+[dev-dependencies.unicode-segmentation]
+version = "1.2.1"
+
+[features]
+default = ["std", "unicode"]
+serde1 = ["std", "serde1-nostd", "serde/std"]
+serde1-nostd = ["serde"]
+std = ["memchr/std"]
+unicode = ["lazy_static", "regex-automata"]
diff --git a/vendor/bstr/LICENSE-APACHE b/vendor/bstr/LICENSE-APACHE
new file mode 100644
index 000000000..16fe87b06
--- /dev/null
+++ b/vendor/bstr/LICENSE-APACHE
@@ -0,0 +1,201 @@
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
+
+APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+Copyright [yyyy] [name of copyright owner]
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
diff --git a/vendor/bstr/LICENSE-MIT b/vendor/bstr/LICENSE-MIT
new file mode 100644
index 000000000..17d687378
--- /dev/null
+++ b/vendor/bstr/LICENSE-MIT
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2018-2019 Andrew Gallant
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/vendor/bstr/README.md b/vendor/bstr/README.md
new file mode 100644
index 000000000..13bf0fc71
--- /dev/null
+++ b/vendor/bstr/README.md
@@ -0,0 +1,251 @@
+bstr
+====
+This crate provides extension traits for `&[u8]` and `Vec<u8>` that enable
+their use as byte strings, where byte strings are _conventionally_ UTF-8. This
+differs from the standard library's `String` and `str` types in that they are
+not required to be valid UTF-8, but may be fully or partially valid UTF-8.
+
+[![Build status](https://github.com/BurntSushi/bstr/workflows/ci/badge.svg)](https://github.com/BurntSushi/bstr/actions)
+[![](https://meritbadge.herokuapp.com/bstr)](https://crates.io/crates/bstr)
+
+
+### Documentation
+
+https://docs.rs/bstr
+
+
+### When should I use byte strings?
+
+See this part of the documentation for more details:
+https://docs.rs/bstr/0.2.*/bstr/#when-should-i-use-byte-strings.
+
+The short story is that byte strings are useful when it is inconvenient or
+incorrect to require valid UTF-8.
+
+
+### Usage
+
+Add this to your `Cargo.toml`:
+
+```toml
+[dependencies]
+bstr = "0.2"
+```
+
+
+### Examples
+
+The following two examples exhibit both the API features of byte strings and
+the I/O convenience functions provided for reading line-by-line quickly.
+
+This first example simply shows how to efficiently iterate over lines in
+stdin, and print out lines containing a particular substring:
+
+```rust
+use std::error::Error;
+use std::io::{self, Write};
+
+use bstr::{ByteSlice, io::BufReadExt};
+
+fn main() -> Result<(), Box<dyn Error>> {
+ let stdin = io::stdin();
+ let mut stdout = io::BufWriter::new(io::stdout());
+
+ stdin.lock().for_byte_line_with_terminator(|line| {
+ if line.contains_str("Dimension") {
+ stdout.write_all(line)?;
+ }
+ Ok(true)
+ })?;
+ Ok(())
+}
+```
+
+This example shows how to count all of the words (Unicode-aware) in stdin,
+line-by-line:
+
+```rust
+use std::error::Error;
+use std::io;
+
+use bstr::{ByteSlice, io::BufReadExt};
+
+fn main() -> Result<(), Box<dyn Error>> {
+ let stdin = io::stdin();
+ let mut words = 0;
+ stdin.lock().for_byte_line_with_terminator(|line| {
+ words += line.words().count();
+ Ok(true)
+ })?;
+ println!("{}", words);
+ Ok(())
+}
+```
+
+This example shows how to convert a stream on stdin to uppercase without
+performing UTF-8 validation _and_ amortizing allocation. On standard ASCII
+text, this is quite a bit faster than what you can (easily) do with standard
+library APIs. (N.B. Any invalid UTF-8 bytes are passed through unchanged.)
+
+```rust
+use std::error::Error;
+use std::io::{self, Write};
+
+use bstr::{ByteSlice, io::BufReadExt};
+
+fn main() -> Result<(), Box<dyn Error>> {
+ let stdin = io::stdin();
+ let mut stdout = io::BufWriter::new(io::stdout());
+
+ let mut upper = vec![];
+ stdin.lock().for_byte_line_with_terminator(|line| {
+ upper.clear();
+ line.to_uppercase_into(&mut upper);
+ stdout.write_all(&upper)?;
+ Ok(true)
+ })?;
+ Ok(())
+}
+```
+
+This example shows how to extract the first 10 visual characters (as grapheme
+clusters) from each line, where invalid UTF-8 sequences are generally treated
+as a single character and are passed through correctly:
+
+```rust
+use std::error::Error;
+use std::io::{self, Write};
+
+use bstr::{ByteSlice, io::BufReadExt};
+
+fn main() -> Result<(), Box<dyn Error>> {
+ let stdin = io::stdin();
+ let mut stdout = io::BufWriter::new(io::stdout());
+
+ stdin.lock().for_byte_line_with_terminator(|line| {
+ let end = line
+ .grapheme_indices()
+ .map(|(_, end, _)| end)
+ .take(10)
+ .last()
+ .unwrap_or(line.len());
+ stdout.write_all(line[..end].trim_end())?;
+ stdout.write_all(b"\n")?;
+ Ok(true)
+ })?;
+ Ok(())
+}
+```
+
+
+### Cargo features
+
+This crates comes with a few features that control standard library, serde
+and Unicode support.
+
+* `std` - **Enabled** by default. This provides APIs that require the standard
+ library, such as `Vec<u8>`.
+* `unicode` - **Enabled** by default. This provides APIs that require sizable
+ Unicode data compiled into the binary. This includes, but is not limited to,
+ grapheme/word/sentence segmenters. When this is disabled, basic support such
+ as UTF-8 decoding is still included.
+* `serde1` - **Disabled** by default. Enables implementations of serde traits
+ for the `BStr` and `BString` types.
+* `serde1-nostd` - **Disabled** by default. Enables implementations of serde
+ traits for the `BStr` type only, intended for use without the standard
+ library. Generally, you either want `serde1` or `serde1-nostd`, not both.
+
+
+### Minimum Rust version policy
+
+This crate's minimum supported `rustc` version (MSRV) is `1.41.1`.
+
+In general, this crate will be conservative with respect to the minimum
+supported version of Rust. MSRV may be bumped in minor version releases.
+
+
+### Future work
+
+Since this is meant to be a core crate, getting a `1.0` release is a priority.
+My hope is to move to `1.0` within the next year and commit to its API so that
+`bstr` can be used as a public dependency.
+
+A large part of the API surface area was taken from the standard library, so
+from an API design perspective, a good portion of this crate should be on solid
+ground already. The main differences from the standard library are in how the
+various substring search routines work. The standard library provides generic
+infrastructure for supporting different types of searches with a single method,
+where as this library prefers to define new methods for each type of search and
+drop the generic infrastructure.
+
+Some _probable_ future considerations for APIs include, but are not limited to:
+
+* A convenience layer on top of the `aho-corasick` crate.
+* Unicode normalization.
+* More sophisticated support for dealing with Unicode case, perhaps by
+ combining the use cases supported by [`caseless`](https://docs.rs/caseless)
+ and [`unicase`](https://docs.rs/unicase).
+* Add facilities for dealing with OS strings and file paths, probably via
+ simple conversion routines.
+
+Here are some examples that are _probably_ out of scope for this crate:
+
+* Regular expressions.
+* Unicode collation.
+
+The exact scope isn't quite clear, but I expect we can iterate on it.
+
+In general, as stated below, this crate brings lots of related APIs together
+into a single crate while simultaneously attempting to keep the total number of
+dependencies low. Indeed, every dependency of `bstr`, except for `memchr`, is
+optional.
+
+
+### High level motivation
+
+Strictly speaking, the `bstr` crate provides very little that can't already be
+achieved with the standard library `Vec<u8>`/`&[u8]` APIs and the ecosystem of
+library crates. For example:
+
+* The standard library's
+ [`Utf8Error`](https://doc.rust-lang.org/std/str/struct.Utf8Error.html)
+ can be used for incremental lossy decoding of `&[u8]`.
+* The
+ [`unicode-segmentation`](https://unicode-rs.github.io/unicode-segmentation/unicode_segmentation/index.html)
+ crate can be used for iterating over graphemes (or words), but is only
+ implemented for `&str` types. One could use `Utf8Error` above to implement
+ grapheme iteration with the same semantics as what `bstr` provides (automatic
+ Unicode replacement codepoint substitution).
+* The [`twoway`](https://docs.rs/twoway) crate can be used for
+ fast substring searching on `&[u8]`.
+
+So why create `bstr`? Part of the point of the `bstr` crate is to provide a
+uniform API of coupled components instead of relying on users to piece together
+loosely coupled components from the crate ecosystem. For example, if you wanted
+to perform a search and replace in a `Vec<u8>`, then writing the code to do
+that with the `twoway` crate is not that difficult, but it's still additional
+glue code you have to write. This work adds up depending on what you're doing.
+Consider, for example, trimming and splitting, along with their different
+variants.
+
+In other words, `bstr` is partially a way of pushing back against the
+micro-crate ecosystem that appears to be evolving. Namely, it is a goal of
+`bstr` to keep its dependency list lightweight. For example, `serde` is an
+optional dependency because there is no feasible alternative. In service of
+this philosophy, currently, the only required dependency of `bstr` is `memchr`.
+
+
+### License
+
+This project is licensed under either of
+
+ * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
+ https://www.apache.org/licenses/LICENSE-2.0)
+ * MIT license ([LICENSE-MIT](LICENSE-MIT) or
+ https://opensource.org/licenses/MIT)
+
+at your option.
+
+The data in `src/unicode/data/` is licensed under the Unicode License Agreement
+([LICENSE-UNICODE](https://www.unicode.org/copyright.html#License)), although
+this data is only used in tests.
diff --git a/vendor/bstr/examples/graphemes-std.rs b/vendor/bstr/examples/graphemes-std.rs
new file mode 100644
index 000000000..647739d4a
--- /dev/null
+++ b/vendor/bstr/examples/graphemes-std.rs
@@ -0,0 +1,25 @@
+use std::error::Error;
+use std::io::{self, BufRead, Write};
+
+use unicode_segmentation::UnicodeSegmentation;
+
+fn main() -> Result<(), Box<dyn Error>> {
+ let stdin = io::stdin();
+ let mut stdin = stdin.lock();
+ let mut stdout = io::BufWriter::new(io::stdout());
+
+ let mut line = String::new();
+ while stdin.read_line(&mut line)? > 0 {
+ let end = line
+ .grapheme_indices(true)
+ .map(|(start, g)| start + g.len())
+ .take(10)
+ .last()
+ .unwrap_or(line.len());
+ stdout.write_all(line[..end].trim_end().as_bytes())?;
+ stdout.write_all(b"\n")?;
+
+ line.clear();
+ }
+ Ok(())
+}
diff --git a/vendor/bstr/examples/graphemes.rs b/vendor/bstr/examples/graphemes.rs
new file mode 100644
index 000000000..6adc7019d
--- /dev/null
+++ b/vendor/bstr/examples/graphemes.rs
@@ -0,0 +1,22 @@
+use std::error::Error;
+use std::io::{self, Write};
+
+use bstr::{io::BufReadExt, ByteSlice};
+
+fn main() -> Result<(), Box<dyn Error>> {
+ let stdin = io::stdin();
+ let mut stdout = io::BufWriter::new(io::stdout());
+
+ stdin.lock().for_byte_line_with_terminator(|line| {
+ let end = line
+ .grapheme_indices()
+ .map(|(_, end, _)| end)
+ .take(10)
+ .last()
+ .unwrap_or(line.len());
+ stdout.write_all(line[..end].trim_end())?;
+ stdout.write_all(b"\n")?;
+ Ok(true)
+ })?;
+ Ok(())
+}
diff --git a/vendor/bstr/examples/lines-std.rs b/vendor/bstr/examples/lines-std.rs
new file mode 100644
index 000000000..69fc6a586
--- /dev/null
+++ b/vendor/bstr/examples/lines-std.rs
@@ -0,0 +1,17 @@
+use std::error::Error;
+use std::io::{self, BufRead, Write};
+
+fn main() -> Result<(), Box<dyn Error>> {
+ let stdin = io::stdin();
+ let mut stdin = stdin.lock();
+ let mut stdout = io::BufWriter::new(io::stdout());
+
+ let mut line = String::new();
+ while stdin.read_line(&mut line)? > 0 {
+ if line.contains("Dimension") {
+ stdout.write_all(line.as_bytes())?;
+ }
+ line.clear();
+ }
+ Ok(())
+}
diff --git a/vendor/bstr/examples/lines.rs b/vendor/bstr/examples/lines.rs
new file mode 100644
index 000000000..c392a2229
--- /dev/null
+++ b/vendor/bstr/examples/lines.rs
@@ -0,0 +1,17 @@
+use std::error::Error;
+use std::io::{self, Write};
+
+use bstr::{io::BufReadExt, ByteSlice};
+
+fn main() -> Result<(), Box<dyn Error>> {
+ let stdin = io::stdin();
+ let mut stdout = io::BufWriter::new(io::stdout());
+
+ stdin.lock().for_byte_line_with_terminator(|line| {
+ if line.contains_str("Dimension") {
+ stdout.write_all(line)?;
+ }
+ Ok(true)
+ })?;
+ Ok(())
+}
diff --git a/vendor/bstr/examples/uppercase-std.rs b/vendor/bstr/examples/uppercase-std.rs
new file mode 100644
index 000000000..672bd7171
--- /dev/null
+++ b/vendor/bstr/examples/uppercase-std.rs
@@ -0,0 +1,15 @@
+use std::error::Error;
+use std::io::{self, BufRead, Write};
+
+fn main() -> Result<(), Box<dyn Error>> {
+ let stdin = io::stdin();
+ let mut stdin = stdin.lock();
+ let mut stdout = io::BufWriter::new(io::stdout());
+
+ let mut line = String::new();
+ while stdin.read_line(&mut line)? > 0 {
+ stdout.write_all(line.to_uppercase().as_bytes())?;
+ line.clear();
+ }
+ Ok(())
+}
diff --git a/vendor/bstr/examples/uppercase.rs b/vendor/bstr/examples/uppercase.rs
new file mode 100644
index 000000000..1eb798a34
--- /dev/null
+++ b/vendor/bstr/examples/uppercase.rs
@@ -0,0 +1,18 @@
+use std::error::Error;
+use std::io::{self, Write};
+
+use bstr::{io::BufReadExt, ByteSlice};
+
+fn main() -> Result<(), Box<dyn Error>> {
+ let stdin = io::stdin();
+ let mut stdout = io::BufWriter::new(io::stdout());
+
+ let mut upper = vec![];
+ stdin.lock().for_byte_line_with_terminator(|line| {
+ upper.clear();
+ line.to_uppercase_into(&mut upper);
+ stdout.write_all(&upper)?;
+ Ok(true)
+ })?;
+ Ok(())
+}
diff --git a/vendor/bstr/examples/words-std.rs b/vendor/bstr/examples/words-std.rs
new file mode 100644
index 000000000..aeeeb26ff
--- /dev/null
+++ b/vendor/bstr/examples/words-std.rs
@@ -0,0 +1,18 @@
+use std::error::Error;
+use std::io::{self, BufRead};
+
+use unicode_segmentation::UnicodeSegmentation;
+
+fn main() -> Result<(), Box<dyn Error>> {
+ let stdin = io::stdin();
+ let mut stdin = stdin.lock();
+
+ let mut words = 0;
+ let mut line = String::new();
+ while stdin.read_line(&mut line)? > 0 {
+ words += line.unicode_words().count();
+ line.clear();
+ }
+ println!("{}", words);
+ Ok(())
+}
diff --git a/vendor/bstr/examples/words.rs b/vendor/bstr/examples/words.rs
new file mode 100644
index 000000000..db305da0d
--- /dev/null
+++ b/vendor/bstr/examples/words.rs
@@ -0,0 +1,15 @@
+use std::error::Error;
+use std::io;
+
+use bstr::{io::BufReadExt, ByteSlice};
+
+fn main() -> Result<(), Box<dyn Error>> {
+ let stdin = io::stdin();
+ let mut words = 0;
+ stdin.lock().for_byte_line_with_terminator(|line| {
+ words += line.words().count();
+ Ok(true)
+ })?;
+ println!("{}", words);
+ Ok(())
+}
diff --git a/vendor/bstr/rustfmt.toml b/vendor/bstr/rustfmt.toml
new file mode 100644
index 000000000..aa37a218b
--- /dev/null
+++ b/vendor/bstr/rustfmt.toml
@@ -0,0 +1,2 @@
+max_width = 79
+use_small_heuristics = "max"
diff --git a/vendor/bstr/scripts/generate-unicode-data b/vendor/bstr/scripts/generate-unicode-data
new file mode 100755
index 000000000..b8341c5a6
--- /dev/null
+++ b/vendor/bstr/scripts/generate-unicode-data
@@ -0,0 +1,149 @@
+#!/bin/sh
+
+set -e
+D="$(dirname "$0")"
+
+# Convenience function for checking that a command exists.
+requires() {
+ cmd="$1"
+ if ! command -v "$cmd" > /dev/null 2>&1; then
+ echo "DEPENDENCY MISSING: $cmd must be installed" >&2
+ exit 1
+ fi
+}
+
+# Test if an array ($2) contains a particular element ($1).
+array_exists() {
+ needle="$1"
+ shift
+
+ for el in "$@"; do
+ if [ "$el" = "$needle" ]; then
+ return 0
+ fi
+ done
+ return 1
+}
+
+graphemes() {
+ regex="$(sh "$D/regex/grapheme.sh")"
+
+ echo "generating forward grapheme DFA"
+ ucd-generate dfa \
+ --name GRAPHEME_BREAK_FWD \
+ --sparse --minimize --anchored --state-size 2 \
+ src/unicode/fsm/ \
+ "$regex"
+
+ echo "generating reverse grapheme DFA"
+ ucd-generate dfa \
+ --name GRAPHEME_BREAK_REV \
+ --reverse --longest \
+ --sparse --minimize --anchored --state-size 2 \
+ src/unicode/fsm/ \
+ "$regex"
+}
+
+words() {
+ regex="$(sh "$D/regex/word.sh")"
+
+ echo "generating forward word DFA (this can take a while)"
+ ucd-generate dfa \
+ --name WORD_BREAK_FWD \
+ --sparse --minimize --anchored --state-size 4 \
+ src/unicode/fsm/ \
+ "$regex"
+}
+
+sentences() {
+ regex="$(sh "$D/regex/sentence.sh")"
+
+ echo "generating forward sentence DFA (this can take a while)"
+ ucd-generate dfa \
+ --name SENTENCE_BREAK_FWD \
+ --minimize \
+ --sparse --anchored --state-size 4 \
+ src/unicode/fsm/ \
+ "$regex"
+}
+
+regional_indicator() {
+ # For finding all occurrences of region indicators. This is used to handle
+ # regional indicators as a special case for the reverse grapheme iterator
+ # and the reverse word iterator.
+ echo "generating regional indicator DFA"
+ ucd-generate dfa \
+ --name REGIONAL_INDICATOR_REV \
+ --reverse \
+ --classes --minimize --anchored --premultiply --state-size 1 \
+ src/unicode/fsm/ \
+ "\p{gcb=Regional_Indicator}"
+}
+
+simple_word() {
+ echo "generating forward simple word DFA"
+ ucd-generate dfa \
+ --name SIMPLE_WORD_FWD \
+ --sparse --minimize --state-size 2 \
+ src/unicode/fsm/ \
+ "\w"
+}
+
+whitespace() {
+ echo "generating forward whitespace DFA"
+ ucd-generate dfa \
+ --name WHITESPACE_ANCHORED_FWD \
+ --anchored --classes --premultiply --minimize --state-size 1 \
+ src/unicode/fsm/ \
+ "\s+"
+
+ echo "generating reverse whitespace DFA"
+ ucd-generate dfa \
+ --name WHITESPACE_ANCHORED_REV \
+ --reverse \
+ --anchored --classes --premultiply --minimize --state-size 2 \
+ src/unicode/fsm/ \
+ "\s+"
+}
+
+main() {
+ if array_exists "-h" "$@" || array_exists "--help" "$@"; then
+ echo "Usage: $(basename "$0") [--list-commands] [<command>] ..." >&2
+ exit
+ fi
+
+ commands="
+ graphemes
+ sentences
+ words
+ regional-indicator
+ simple-word
+ whitespace
+ "
+ if array_exists "--list-commands" "$@"; then
+ for cmd in $commands; do
+ echo "$cmd"
+ done
+ exit
+ fi
+
+ # ucd-generate is used to compile regexes into DFAs.
+ requires ucd-generate
+
+ mkdir -p src/unicode/fsm/
+
+ cmds=$*
+ if [ $# -eq 0 ] || array_exists "all" "$@"; then
+ cmds=$commands
+ fi
+ for cmd in $cmds; do
+ if array_exists "$cmd" $commands; then
+ fun="$(echo "$cmd" | sed 's/-/_/g')"
+ eval "$fun"
+ else
+ echo "unrecognized command: $cmd" >&2
+ fi
+ done
+}
+
+main "$@"
diff --git a/vendor/bstr/scripts/regex/grapheme.sh b/vendor/bstr/scripts/regex/grapheme.sh
new file mode 100644
index 000000000..0b2b54daa
--- /dev/null
+++ b/vendor/bstr/scripts/regex/grapheme.sh
@@ -0,0 +1,50 @@
+#!/bin/sh
+
+# vim: indentexpr= nosmartindent autoindent
+# vim: tabstop=2 shiftwidth=2 softtabstop=2
+
+# This regex was manually written, derived from the rules in UAX #29.
+# Particularly, from Table 1c, which lays out a regex for grapheme clusters.
+
+CR="\p{gcb=CR}"
+LF="\p{gcb=LF}"
+Control="\p{gcb=Control}"
+Prepend="\p{gcb=Prepend}"
+L="\p{gcb=L}"
+V="\p{gcb=V}"
+LV="\p{gcb=LV}"
+LVT="\p{gcb=LVT}"
+T="\p{gcb=T}"
+RI="\p{gcb=RI}"
+Extend="\p{gcb=Extend}"
+ZWJ="\p{gcb=ZWJ}"
+SpacingMark="\p{gcb=SpacingMark}"
+
+Any="\p{any}"
+ExtendPict="\p{Extended_Pictographic}"
+
+echo "(?x)
+$CR $LF
+|
+$Control
+|
+$Prepend*
+(
+ (
+ ($L* ($V+ | $LV $V* | $LVT) $T*)
+ |
+ $L+
+ |
+ $T+
+ )
+ |
+ $RI $RI
+ |
+ $ExtendPict ($Extend* $ZWJ $ExtendPict)*
+ |
+ [^$Control $CR $LF]
+)
+[$Extend $ZWJ $SpacingMark]*
+|
+$Any
+"
diff --git a/vendor/bstr/scripts/regex/sentence.sh b/vendor/bstr/scripts/regex/sentence.sh
new file mode 100644
index 000000000..689d1849f
--- /dev/null
+++ b/vendor/bstr/scripts/regex/sentence.sh
@@ -0,0 +1,176 @@
+#!/bin/sh
+
+# vim: indentexpr= nosmartindent autoindent
+# vim: tabstop=2 shiftwidth=2 softtabstop=2
+
+# This is a regex that I reverse engineered from the sentence boundary chain
+# rules in UAX #29. Unlike the grapheme regex, which is essentially provided
+# for us in UAX #29, no such sentence regex exists.
+#
+# I looked into how ICU achieves this, since UAX #29 hints that producing
+# finite state machines for grapheme/sentence/word/line breaking is possible,
+# but only easy to do for graphemes. ICU does this by implementing their own
+# DSL for describing the break algorithms in terms of the chaining rules
+# directly. You can see an example for sentences in
+# icu4c/source/data/brkitr/rules/sent.txt. ICU then builds a finite state
+# machine from those rules in a mostly standard way, but implements the
+# "chaining" aspect of the rules by connecting overlapping end and start
+# states. For example, given SB7:
+#
+# (Upper | Lower) ATerm x Upper
+#
+# Then the naive way to convert this into a regex would be something like
+#
+# [\p{sb=Upper}\p{sb=Lower}]\p{sb=ATerm}\p{sb=Upper}
+#
+# Unfortunately, this is incorrect. Why? Well, consider an example like so:
+#
+# U.S.A.
+#
+# A correct implementation of the sentence breaking algorithm should not insert
+# any breaks here, exactly in accordance with repeatedly applying rule SB7 as
+# given above. Our regex fails to do this because it will first match `U.S`
+# without breaking them---which is correct---but will then start looking for
+# its next rule beginning with a full stop (in ATerm) and followed by an
+# uppercase letter (A). This will wind up triggering rule SB11 (without
+# matching `A`), which inserts a break.
+#
+# The reason why this happens is because our initial application of rule SB7
+# "consumes" the next uppercase letter (S), which we want to reuse as a prefix
+# in the next rule application. A natural way to express this would be with
+# look-around, although it's not clear that works in every case since you
+# ultimately might want to consume that ending uppercase letter. In any case,
+# we can't use look-around in our truly regular regexes, so we must fix this.
+# The approach we take is to explicitly repeat rules when a suffix of a rule
+# is a prefix of another rule. In the case of SB7, the end of the rule, an
+# uppercase letter, also happens to match the beginning of the rule. This can
+# in turn be repeated indefinitely. Thus, our actual translation to a regex is:
+#
+# [\p{sb=Upper}\p{sb=Lower}]\p{sb=ATerm}\p{sb=Upper}(\p{sb=ATerm}\p{sb=Upper}*
+#
+# It turns out that this is exactly what ICU does, but in their case, they do
+# it automatically. In our case, we connect the chaining rules manually. It's
+# tedious. With that said, we do no implement Unicode line breaking with this
+# approach, which is a far scarier beast. In that case, it would probably be
+# worth writing the code to do what ICU does.
+#
+# In the case of sentence breaks, there aren't *too* many overlaps of this
+# nature. We list them out exhaustively to make this clear, because it's
+# essentially impossible to easily observe this in the regex. (It took me a
+# full day to figure all of this out.) Rules marked with N/A mean that they
+# specify a break, and this strategy only really applies to stringing together
+# non-breaks.
+#
+# SB1 - N/A
+# SB2 - N/A
+# SB3 - None
+# SB4 - N/A
+# SB5 - None
+# SB6 - None
+# SB7 - End overlaps with beginning of SB7
+# SB8 - End overlaps with beginning of SB7
+# SB8a - End overlaps with beginning of SB6, SB8, SB8a, SB9, SB10, SB11
+# SB9 - None
+# SB10 - None
+# SB11 - None
+# SB998 - N/A
+#
+# SB8a is in particular quite tricky to get right without look-ahead, since it
+# allows ping-ponging between match rules SB8a and SB9-11, where SB9-11
+# otherwise indicate that a break has been found. In the regex below, we tackle
+# this by only permitting part of SB8a to match inside our core non-breaking
+# repetition. In particular, we only allow the parts of SB8a to match that
+# permit the non-breaking components to continue. If a part of SB8a matches
+# that guarantees a pop out to SB9-11, (like `STerm STerm`), then we let it
+# happen. This still isn't correct because an SContinue might be seen which
+# would allow moving back into SB998 and thus the non-breaking repetition, so
+# we handle that case as well.
+#
+# Finally, the last complication here is the sprinkling of $Ex* everywhere.
+# This essentially corresponds to the implementation of SB5 by following
+# UAX #29's recommendation in S6.2. Essentially, we use it avoid ever breaking
+# in the middle of a grapheme cluster.
+
+CR="\p{sb=CR}"
+LF="\p{sb=LF}"
+Sep="\p{sb=Sep}"
+Close="\p{sb=Close}"
+Sp="\p{sb=Sp}"
+STerm="\p{sb=STerm}"
+ATerm="\p{sb=ATerm}"
+SContinue="\p{sb=SContinue}"
+Numeric="\p{sb=Numeric}"
+Upper="\p{sb=Upper}"
+Lower="\p{sb=Lower}"
+OLetter="\p{sb=OLetter}"
+
+Ex="[\p{sb=Extend}\p{sb=Format}]"
+ParaSep="[$Sep $CR $LF]"
+SATerm="[$STerm $ATerm]"
+
+LetterSepTerm="[$OLetter $Upper $Lower $ParaSep $SATerm]"
+
+echo "(?x)
+(
+ # SB6
+ $ATerm $Ex*
+ $Numeric
+ |
+ # SB7
+ [$Upper $Lower] $Ex* $ATerm $Ex*
+ $Upper $Ex*
+ # overlap with SB7
+ ($ATerm $Ex* $Upper $Ex*)*
+ |
+ # SB8
+ $ATerm $Ex* $Close* $Ex* $Sp* $Ex*
+ ([^$LetterSepTerm] $Ex*)* $Lower $Ex*
+ # overlap with SB7
+ ($ATerm $Ex* $Upper $Ex*)*
+ |
+ # SB8a
+ $SATerm $Ex* $Close* $Ex* $Sp* $Ex*
+ (
+ $SContinue
+ |
+ $ATerm $Ex*
+ # Permit repetition of SB8a
+ (($Close $Ex*)* ($Sp $Ex*)* $SATerm)*
+ # In order to continue non-breaking matching, we now must observe
+ # a match with a rule that keeps us in SB6-8a. Otherwise, we've entered
+ # one of SB9-11 and know that a break must follow.
+ (
+ # overlap with SB6
+ $Numeric
+ |
+ # overlap with SB8
+ ($Close $Ex*)* ($Sp $Ex*)*
+ ([^$LetterSepTerm] $Ex*)* $Lower $Ex*
+ # overlap with SB7
+ ($ATerm $Ex* $Upper $Ex*)*
+ |
+ # overlap with SB8a
+ ($Close $Ex*)* ($Sp $Ex*)* $SContinue
+ )
+ |
+ $STerm $Ex*
+ # Permit repetition of SB8a
+ (($Close $Ex*)* ($Sp $Ex*)* $SATerm)*
+ # As with ATerm above, in order to continue non-breaking matching, we
+ # must now observe a match with a rule that keeps us out of SB9-11.
+ # For STerm, the only such possibility is to see an SContinue. Anything
+ # else will result in a break.
+ ($Close $Ex*)* ($Sp $Ex*)* $SContinue
+ )
+ |
+ # SB998
+ # The logic behind this catch-all is that if we get to this point and
+ # see a Sep, CR, LF, STerm or ATerm, then it has to fall into one of
+ # SB9, SB10 or SB11. In the cases of SB9-11, we always find a break since
+ # SB11 acts as a catch-all to induce a break following a SATerm that isn't
+ # handled by rules SB6-SB8a.
+ [^$ParaSep $SATerm]
+)*
+# The following collapses rules SB3, SB4, part of SB8a, SB9, SB10 and SB11.
+($SATerm $Ex* ($Close $Ex*)* ($Sp $Ex*)*)* ($CR $LF | $ParaSep)?
+"
diff --git a/vendor/bstr/scripts/regex/word.sh b/vendor/bstr/scripts/regex/word.sh
new file mode 100644
index 000000000..78c7a05cf
--- /dev/null
+++ b/vendor/bstr/scripts/regex/word.sh
@@ -0,0 +1,111 @@
+#!/bin/sh
+
+# vim: indentexpr= nosmartindent autoindent
+# vim: tabstop=2 shiftwidth=2 softtabstop=2
+
+# See the comments in regex/sentence.sh for the general approach to how this
+# regex was written.
+#
+# Writing the regex for this was *hard*. It took me two days of hacking to get
+# this far, and that was after I had finished the sentence regex, so my brain
+# was fully cached on this. Unlike the sentence regex, the rules in the regex
+# below don't correspond as nicely to the rules in UAX #29. In particular, the
+# UAX #29 rules have a ton of overlap with each other, which requires crazy
+# stuff in the regex. I'm not even sure the regex below is 100% correct or even
+# minimal, however, I did compare this with the ICU word segmenter on a few
+# different corpora, and it produces identical results. (In addition to of
+# course passing the UCD tests.)
+#
+# In general, I consider this approach to be a failure. Firstly, this is
+# clearly a write-only regex. Secondly, building the minimized DFA for this is
+# incredibly slow. Thirdly, the DFA is itself very large (~240KB). Fourthly,
+# reversing this regex (for reverse word iteration) results in a >19MB DFA.
+# Yes. That's MB. Wat. And it took 5 minutes to build.
+#
+# I think we might consider changing our approach to this problem. The normal
+# path I've seen, I think, is to decode codepoints one at a time, and then
+# thread them through a state machine in the code itself. We could take this
+# approach, or possibly combine it with a DFA that tells us which Word_Break
+# value a codepoint has. I'd prefer the latter approach, but it requires adding
+# RegexSet support to regex-automata. Something that should definitely be done,
+# but is a fair amount of work.
+#
+# Gah.
+
+CR="\p{wb=CR}"
+LF="\p{wb=LF}"
+Newline="\p{wb=Newline}"
+ZWJ="\p{wb=ZWJ}"
+RI="\p{wb=Regional_Indicator}"
+Katakana="\p{wb=Katakana}"
+HebrewLet="\p{wb=HebrewLetter}"
+ALetter="\p{wb=ALetter}"
+SingleQuote="\p{wb=SingleQuote}"
+DoubleQuote="\p{wb=DoubleQuote}"
+MidNumLet="\p{wb=MidNumLet}"
+MidLetter="\p{wb=MidLetter}"
+MidNum="\p{wb=MidNum}"
+Numeric="\p{wb=Numeric}"
+ExtendNumLet="\p{wb=ExtendNumLet}"
+WSegSpace="\p{wb=WSegSpace}"
+
+Any="\p{any}"
+Ex="[\p{wb=Extend} \p{wb=Format} $ZWJ]"
+ExtendPict="\p{Extended_Pictographic}"
+AHLetter="[$ALetter $HebrewLet]"
+MidNumLetQ="[$MidNumLet $SingleQuote]"
+
+AHLetterRepeat="$AHLetter $Ex* ([$MidLetter $MidNumLetQ] $Ex* $AHLetter $Ex*)*"
+NumericRepeat="$Numeric $Ex* ([$MidNum $MidNumLetQ] $Ex* $Numeric $Ex*)*"
+
+echo "(?x)
+$CR $LF
+|
+[$Newline $CR $LF]
+|
+$WSegSpace $WSegSpace+
+|
+(
+ ([^$Newline $CR $LF]? $Ex* $ZWJ $ExtendPict $Ex*)+
+ |
+ ($ExtendNumLet $Ex*)* $AHLetter $Ex*
+ (
+ (
+ ($NumericRepeat | $ExtendNumLet $Ex*)*
+ |
+ [$MidLetter $MidNumLetQ] $Ex*
+ )
+ $AHLetter $Ex*
+ )+
+ ($NumericRepeat | $ExtendNumLet $Ex*)*
+ |
+ ($ExtendNumLet $Ex*)* $AHLetter $Ex* ($NumericRepeat | $ExtendNumLet $Ex*)+
+ |
+ ($ExtendNumLet $Ex*)* $Numeric $Ex*
+ (
+ (
+ ($AHLetterRepeat | $ExtendNumLet $Ex*)*
+ |
+ [$MidNum $MidNumLetQ] $Ex*
+ )
+ $Numeric $Ex*
+ )+
+ ($AHLetterRepeat | $ExtendNumLet $Ex*)*
+ |
+ ($ExtendNumLet $Ex*)* $Numeric $Ex* ($AHLetterRepeat | $ExtendNumLet $Ex*)+
+ |
+ $Katakana $Ex*
+ (($Katakana | $ExtendNumLet) $Ex*)+
+ |
+ $ExtendNumLet $Ex*
+ (($ExtendNumLet | $AHLetter | $Numeric | $Katakana) $Ex*)+
+)+
+|
+$HebrewLet $Ex* $SingleQuote $Ex*
+|
+($HebrewLet $Ex* $DoubleQuote $Ex*)+ $HebrewLet $Ex*
+|
+$RI $Ex* $RI $Ex*
+|
+$Any $Ex*
+"
diff --git a/vendor/bstr/src/ascii.rs b/vendor/bstr/src/ascii.rs
new file mode 100644
index 000000000..bb2b67949
--- /dev/null
+++ b/vendor/bstr/src/ascii.rs
@@ -0,0 +1,336 @@
+use core::mem;
+
+// The following ~400 lines of code exists for exactly one purpose, which is
+// to optimize this code:
+//
+// byte_slice.iter().position(|&b| b > 0x7F).unwrap_or(byte_slice.len())
+//
+// Yes... Overengineered is a word that comes to mind, but this is effectively
+// a very similar problem to memchr, and virtually nobody has been able to
+// resist optimizing the crap out of that (except for perhaps the BSD and MUSL
+// folks). In particular, this routine makes a very common case (ASCII) very
+// fast, which seems worth it. We do stop short of adding AVX variants of the
+// code below in order to retain our sanity and also to avoid needing to deal
+// with runtime target feature detection. RESIST!
+//
+// In order to understand the SIMD version below, it would be good to read this
+// comment describing how my memchr routine works:
+// https://github.com/BurntSushi/rust-memchr/blob/b0a29f267f4a7fad8ffcc8fe8377a06498202883/src/x86/sse2.rs#L19-L106
+//
+// The primary difference with memchr is that for ASCII, we can do a bit less
+// work. In particular, we don't need to detect the presence of a specific
+// byte, but rather, whether any byte has its most significant bit set. That
+// means we can effectively skip the _mm_cmpeq_epi8 step and jump straight to
+// _mm_movemask_epi8.
+
+#[cfg(any(test, not(target_arch = "x86_64")))]
+const USIZE_BYTES: usize = mem::size_of::<usize>();
+#[cfg(any(test, not(target_arch = "x86_64")))]
+const FALLBACK_LOOP_SIZE: usize = 2 * USIZE_BYTES;
+
+// This is a mask where the most significant bit of each byte in the usize
+// is set. We test this bit to determine whether a character is ASCII or not.
+// Namely, a single byte is regarded as an ASCII codepoint if and only if it's
+// most significant bit is not set.
+#[cfg(any(test, not(target_arch = "x86_64")))]
+const ASCII_MASK_U64: u64 = 0x8080808080808080;
+#[cfg(any(test, not(target_arch = "x86_64")))]
+const ASCII_MASK: usize = ASCII_MASK_U64 as usize;
+
+/// Returns the index of the first non ASCII byte in the given slice.
+///
+/// If slice only contains ASCII bytes, then the length of the slice is
+/// returned.
+pub fn first_non_ascii_byte(slice: &[u8]) -> usize {
+ #[cfg(not(target_arch = "x86_64"))]
+ {
+ first_non_ascii_byte_fallback(slice)
+ }
+
+ #[cfg(target_arch = "x86_64")]
+ {
+ first_non_ascii_byte_sse2(slice)
+ }
+}
+
+#[cfg(any(test, not(target_arch = "x86_64")))]
+fn first_non_ascii_byte_fallback(slice: &[u8]) -> usize {
+ let align = USIZE_BYTES - 1;
+ let start_ptr = slice.as_ptr();
+ let end_ptr = slice[slice.len()..].as_ptr();
+ let mut ptr = start_ptr;
+
+ unsafe {
+ if slice.len() < USIZE_BYTES {
+ return first_non_ascii_byte_slow(start_ptr, end_ptr, ptr);
+ }
+
+ let chunk = read_unaligned_usize(ptr);
+ let mask = chunk & ASCII_MASK;
+ if mask != 0 {
+ return first_non_ascii_byte_mask(mask);
+ }
+
+ ptr = ptr_add(ptr, USIZE_BYTES - (start_ptr as usize & align));
+ debug_assert!(ptr > start_ptr);
+ debug_assert!(ptr_sub(end_ptr, USIZE_BYTES) >= start_ptr);
+ if slice.len() >= FALLBACK_LOOP_SIZE {
+ while ptr <= ptr_sub(end_ptr, FALLBACK_LOOP_SIZE) {
+ debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
+
+ let a = *(ptr as *const usize);
+ let b = *(ptr_add(ptr, USIZE_BYTES) as *const usize);
+ if (a | b) & ASCII_MASK != 0 {
+ // What a kludge. We wrap the position finding code into
+ // a non-inlineable function, which makes the codegen in
+ // the tight loop above a bit better by avoiding a
+ // couple extra movs. We pay for it by two additional
+ // stores, but only in the case of finding a non-ASCII
+ // byte.
+ #[inline(never)]
+ unsafe fn findpos(
+ start_ptr: *const u8,
+ ptr: *const u8,
+ ) -> usize {
+ let a = *(ptr as *const usize);
+ let b = *(ptr_add(ptr, USIZE_BYTES) as *const usize);
+
+ let mut at = sub(ptr, start_ptr);
+ let maska = a & ASCII_MASK;
+ if maska != 0 {
+ return at + first_non_ascii_byte_mask(maska);
+ }
+
+ at += USIZE_BYTES;
+ let maskb = b & ASCII_MASK;
+ debug_assert!(maskb != 0);
+ return at + first_non_ascii_byte_mask(maskb);
+ }
+ return findpos(start_ptr, ptr);
+ }
+ ptr = ptr_add(ptr, FALLBACK_LOOP_SIZE);
+ }
+ }
+ first_non_ascii_byte_slow(start_ptr, end_ptr, ptr)
+ }
+}
+
+#[cfg(target_arch = "x86_64")]
+fn first_non_ascii_byte_sse2(slice: &[u8]) -> usize {
+ use core::arch::x86_64::*;
+
+ const VECTOR_SIZE: usize = mem::size_of::<__m128i>();
+ const VECTOR_ALIGN: usize = VECTOR_SIZE - 1;
+ const VECTOR_LOOP_SIZE: usize = 4 * VECTOR_SIZE;
+
+ let start_ptr = slice.as_ptr();
+ let end_ptr = slice[slice.len()..].as_ptr();
+ let mut ptr = start_ptr;
+
+ unsafe {
+ if slice.len() < VECTOR_SIZE {
+ return first_non_ascii_byte_slow(start_ptr, end_ptr, ptr);
+ }
+
+ let chunk = _mm_loadu_si128(ptr as *const __m128i);
+ let mask = _mm_movemask_epi8(chunk);
+ if mask != 0 {
+ return mask.trailing_zeros() as usize;
+ }
+
+ ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN));
+ debug_assert!(ptr > start_ptr);
+ debug_assert!(end_ptr.sub(VECTOR_SIZE) >= start_ptr);
+ if slice.len() >= VECTOR_LOOP_SIZE {
+ while ptr <= ptr_sub(end_ptr, VECTOR_LOOP_SIZE) {
+ debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
+
+ let a = _mm_load_si128(ptr as *const __m128i);
+ let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i);
+ let c =
+ _mm_load_si128(ptr.add(2 * VECTOR_SIZE) as *const __m128i);
+ let d =
+ _mm_load_si128(ptr.add(3 * VECTOR_SIZE) as *const __m128i);
+
+ let or1 = _mm_or_si128(a, b);
+ let or2 = _mm_or_si128(c, d);
+ let or3 = _mm_or_si128(or1, or2);
+ if _mm_movemask_epi8(or3) != 0 {
+ let mut at = sub(ptr, start_ptr);
+ let mask = _mm_movemask_epi8(a);
+ if mask != 0 {
+ return at + mask.trailing_zeros() as usize;
+ }
+
+ at += VECTOR_SIZE;
+ let mask = _mm_movemask_epi8(b);
+ if mask != 0 {
+ return at + mask.trailing_zeros() as usize;
+ }
+
+ at += VECTOR_SIZE;
+ let mask = _mm_movemask_epi8(c);
+ if mask != 0 {
+ return at + mask.trailing_zeros() as usize;
+ }
+
+ at += VECTOR_SIZE;
+ let mask = _mm_movemask_epi8(d);
+ debug_assert!(mask != 0);
+ return at + mask.trailing_zeros() as usize;
+ }
+ ptr = ptr_add(ptr, VECTOR_LOOP_SIZE);
+ }
+ }
+ while ptr <= end_ptr.sub(VECTOR_SIZE) {
+ debug_assert!(sub(end_ptr, ptr) >= VECTOR_SIZE);
+
+ let chunk = _mm_loadu_si128(ptr as *const __m128i);
+ let mask = _mm_movemask_epi8(chunk);
+ if mask != 0 {
+ return sub(ptr, start_ptr) + mask.trailing_zeros() as usize;
+ }
+ ptr = ptr.add(VECTOR_SIZE);
+ }
+ first_non_ascii_byte_slow(start_ptr, end_ptr, ptr)
+ }
+}
+
+#[inline(always)]
+unsafe fn first_non_ascii_byte_slow(
+ start_ptr: *const u8,
+ end_ptr: *const u8,
+ mut ptr: *const u8,
+) -> usize {
+ debug_assert!(start_ptr <= ptr);
+ debug_assert!(ptr <= end_ptr);
+
+ while ptr < end_ptr {
+ if *ptr > 0x7F {
+ return sub(ptr, start_ptr);
+ }
+ ptr = ptr.offset(1);
+ }
+ sub(end_ptr, start_ptr)
+}
+
+/// Compute the position of the first ASCII byte in the given mask.
+///
+/// The mask should be computed by `chunk & ASCII_MASK`, where `chunk` is
+/// 8 contiguous bytes of the slice being checked where *at least* one of those
+/// bytes is not an ASCII byte.
+///
+/// The position returned is always in the inclusive range [0, 7].
+#[cfg(any(test, not(target_arch = "x86_64")))]
+fn first_non_ascii_byte_mask(mask: usize) -> usize {
+ #[cfg(target_endian = "little")]
+ {
+ mask.trailing_zeros() as usize / 8
+ }
+ #[cfg(target_endian = "big")]
+ {
+ mask.leading_zeros() as usize / 8
+ }
+}
+
+/// Increment the given pointer by the given amount.
+unsafe fn ptr_add(ptr: *const u8, amt: usize) -> *const u8 {
+ debug_assert!(amt < ::core::isize::MAX as usize);
+ ptr.offset(amt as isize)
+}
+
+/// Decrement the given pointer by the given amount.
+unsafe fn ptr_sub(ptr: *const u8, amt: usize) -> *const u8 {
+ debug_assert!(amt < ::core::isize::MAX as usize);
+ ptr.offset((amt as isize).wrapping_neg())
+}
+
+#[cfg(any(test, not(target_arch = "x86_64")))]
+unsafe fn read_unaligned_usize(ptr: *const u8) -> usize {
+ use core::ptr;
+
+ let mut n: usize = 0;
+ ptr::copy_nonoverlapping(ptr, &mut n as *mut _ as *mut u8, USIZE_BYTES);
+ n
+}
+
+/// Subtract `b` from `a` and return the difference. `a` should be greater than
+/// or equal to `b`.
+fn sub(a: *const u8, b: *const u8) -> usize {
+ debug_assert!(a >= b);
+ (a as usize) - (b as usize)
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ // Our testing approach here is to try and exhaustively test every case.
+ // This includes the position at which a non-ASCII byte occurs in addition
+ // to the alignment of the slice that we're searching.
+
+ #[test]
+ fn positive_fallback_forward() {
+ for i in 0..517 {
+ let s = "a".repeat(i);
+ assert_eq!(
+ i,
+ first_non_ascii_byte_fallback(s.as_bytes()),
+ "i: {:?}, len: {:?}, s: {:?}",
+ i,
+ s.len(),
+ s
+ );
+ }
+ }
+
+ #[test]
+ #[cfg(target_arch = "x86_64")]
+ fn positive_sse2_forward() {
+ for i in 0..517 {
+ let b = "a".repeat(i).into_bytes();
+ assert_eq!(b.len(), first_non_ascii_byte_sse2(&b));
+ }
+ }
+
+ #[test]
+ fn negative_fallback_forward() {
+ for i in 0..517 {
+ for align in 0..65 {
+ let mut s = "a".repeat(i);
+ s.push_str("☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃");
+ let s = s.get(align..).unwrap_or("");
+ assert_eq!(
+ i.saturating_sub(align),
+ first_non_ascii_byte_fallback(s.as_bytes()),
+ "i: {:?}, align: {:?}, len: {:?}, s: {:?}",
+ i,
+ align,
+ s.len(),
+ s
+ );
+ }
+ }
+ }
+
+ #[test]
+ #[cfg(target_arch = "x86_64")]
+ fn negative_sse2_forward() {
+ for i in 0..517 {
+ for align in 0..65 {
+ let mut s = "a".repeat(i);
+ s.push_str("☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃");
+ let s = s.get(align..).unwrap_or("");
+ assert_eq!(
+ i.saturating_sub(align),
+ first_non_ascii_byte_sse2(s.as_bytes()),
+ "i: {:?}, align: {:?}, len: {:?}, s: {:?}",
+ i,
+ align,
+ s.len(),
+ s
+ );
+ }
+ }
+ }
+}
diff --git a/vendor/bstr/src/bstr.rs b/vendor/bstr/src/bstr.rs
new file mode 100644
index 000000000..1e3c91b9a
--- /dev/null
+++ b/vendor/bstr/src/bstr.rs
@@ -0,0 +1,74 @@
+use core::mem;
+
+/// A wrapper for `&[u8]` that provides convenient string oriented trait impls.
+///
+/// If you need ownership or a growable byte string buffer, then use
+/// [`BString`](struct.BString.html).
+///
+/// Using a `&BStr` is just like using a `&[u8]`, since `BStr`
+/// implements `Deref` to `[u8]`. So all methods available on `[u8]`
+/// are also available on `BStr`.
+///
+/// # Representation
+///
+/// A `&BStr` has the same representation as a `&str`. That is, a `&BStr` is
+/// a fat pointer which consists of a pointer to some bytes and a length.
+///
+/// # Trait implementations
+///
+/// The `BStr` type has a number of trait implementations, and in particular,
+/// defines equality and ordinal comparisons between `&BStr`, `&str` and
+/// `&[u8]` for convenience.
+///
+/// The `Debug` implementation for `BStr` shows its bytes as a normal string.
+/// For invalid UTF-8, hex escape sequences are used.
+///
+/// The `Display` implementation behaves as if `BStr` were first lossily
+/// converted to a `str`. Invalid UTF-8 bytes are substituted with the Unicode
+/// replacement codepoint, which looks like this: �.
+#[derive(Hash)]
+#[repr(transparent)]
+pub struct BStr {
+ pub(crate) bytes: [u8],
+}
+
+impl BStr {
+ #[inline]
+ pub(crate) fn new<B: ?Sized + AsRef<[u8]>>(bytes: &B) -> &BStr {
+ BStr::from_bytes(bytes.as_ref())
+ }
+
+ #[inline]
+ pub(crate) fn new_mut<B: ?Sized + AsMut<[u8]>>(
+ bytes: &mut B,
+ ) -> &mut BStr {
+ BStr::from_bytes_mut(bytes.as_mut())
+ }
+
+ #[inline]
+ pub(crate) fn from_bytes(slice: &[u8]) -> &BStr {
+ unsafe { mem::transmute(slice) }
+ }
+
+ #[inline]
+ pub(crate) fn from_bytes_mut(slice: &mut [u8]) -> &mut BStr {
+ unsafe { mem::transmute(slice) }
+ }
+
+ #[inline]
+ #[cfg(feature = "std")]
+ pub(crate) fn from_boxed_bytes(slice: Box<[u8]>) -> Box<BStr> {
+ unsafe { Box::from_raw(Box::into_raw(slice) as _) }
+ }
+
+ #[inline]
+ #[cfg(feature = "std")]
+ pub(crate) fn into_boxed_bytes(slice: Box<BStr>) -> Box<[u8]> {
+ unsafe { Box::from_raw(Box::into_raw(slice) as _) }
+ }
+
+ #[inline]
+ pub(crate) fn as_bytes(&self) -> &[u8] {
+ &self.bytes
+ }
+}
diff --git a/vendor/bstr/src/bstring.rs b/vendor/bstr/src/bstring.rs
new file mode 100644
index 000000000..30093ba64
--- /dev/null
+++ b/vendor/bstr/src/bstring.rs
@@ -0,0 +1,59 @@
+use crate::bstr::BStr;
+
+/// A wrapper for `Vec<u8>` that provides convenient string oriented trait
+/// impls.
+///
+/// A `BString` has ownership over its contents and corresponds to
+/// a growable or shrinkable buffer. Its borrowed counterpart is a
+/// [`BStr`](struct.BStr.html), called a byte string slice.
+///
+/// Using a `BString` is just like using a `Vec<u8>`, since `BString`
+/// implements `Deref` to `Vec<u8>`. So all methods available on `Vec<u8>`
+/// are also available on `BString`.
+///
+/// # Examples
+///
+/// You can create a new `BString` from a `Vec<u8>` via a `From` impl:
+///
+/// ```
+/// use bstr::BString;
+///
+/// let s = BString::from("Hello, world!");
+/// ```
+///
+/// # Deref
+///
+/// The `BString` type implements `Deref` and `DerefMut`, where the target
+/// types are `&Vec<u8>` and `&mut Vec<u8>`, respectively. `Deref` permits all of the
+/// methods defined on `Vec<u8>` to be implicitly callable on any `BString`.
+///
+/// For more information about how deref works, see the documentation for the
+/// [`std::ops::Deref`](https://doc.rust-lang.org/std/ops/trait.Deref.html)
+/// trait.
+///
+/// # Representation
+///
+/// A `BString` has the same representation as a `Vec<u8>` and a `String`.
+/// That is, it is made up of three word sized components: a pointer to a
+/// region of memory containing the bytes, a length and a capacity.
+#[derive(Clone, Hash)]
+pub struct BString {
+ pub(crate) bytes: Vec<u8>,
+}
+
+impl BString {
+ #[inline]
+ pub(crate) fn as_bytes(&self) -> &[u8] {
+ &self.bytes
+ }
+
+ #[inline]
+ pub(crate) fn as_bstr(&self) -> &BStr {
+ BStr::new(&self.bytes)
+ }
+
+ #[inline]
+ pub(crate) fn as_mut_bstr(&mut self) -> &mut BStr {
+ BStr::new_mut(&mut self.bytes)
+ }
+}
diff --git a/vendor/bstr/src/byteset/mod.rs b/vendor/bstr/src/byteset/mod.rs
new file mode 100644
index 000000000..043d30986
--- /dev/null
+++ b/vendor/bstr/src/byteset/mod.rs
@@ -0,0 +1,114 @@
+use memchr::{memchr, memchr2, memchr3, memrchr, memrchr2, memrchr3};
+mod scalar;
+
+#[inline]
+fn build_table(byteset: &[u8]) -> [u8; 256] {
+ let mut table = [0u8; 256];
+ for &b in byteset {
+ table[b as usize] = 1;
+ }
+ table
+}
+
+#[inline]
+pub(crate) fn find(haystack: &[u8], byteset: &[u8]) -> Option<usize> {
+ match byteset.len() {
+ 0 => return None,
+ 1 => memchr(byteset[0], haystack),
+ 2 => memchr2(byteset[0], byteset[1], haystack),
+ 3 => memchr3(byteset[0], byteset[1], byteset[2], haystack),
+ _ => {
+ let table = build_table(byteset);
+ scalar::forward_search_bytes(haystack, |b| table[b as usize] != 0)
+ }
+ }
+}
+
+#[inline]
+pub(crate) fn rfind(haystack: &[u8], byteset: &[u8]) -> Option<usize> {
+ match byteset.len() {
+ 0 => return None,
+ 1 => memrchr(byteset[0], haystack),
+ 2 => memrchr2(byteset[0], byteset[1], haystack),
+ 3 => memrchr3(byteset[0], byteset[1], byteset[2], haystack),
+ _ => {
+ let table = build_table(byteset);
+ scalar::reverse_search_bytes(haystack, |b| table[b as usize] != 0)
+ }
+ }
+}
+
+#[inline]
+pub(crate) fn find_not(haystack: &[u8], byteset: &[u8]) -> Option<usize> {
+ if haystack.is_empty() {
+ return None;
+ }
+ match byteset.len() {
+ 0 => return Some(0),
+ 1 => scalar::inv_memchr(byteset[0], haystack),
+ 2 => scalar::forward_search_bytes(haystack, |b| {
+ b != byteset[0] && b != byteset[1]
+ }),
+ 3 => scalar::forward_search_bytes(haystack, |b| {
+ b != byteset[0] && b != byteset[1] && b != byteset[2]
+ }),
+ _ => {
+ let table = build_table(byteset);
+ scalar::forward_search_bytes(haystack, |b| table[b as usize] == 0)
+ }
+ }
+}
+#[inline]
+pub(crate) fn rfind_not(haystack: &[u8], byteset: &[u8]) -> Option<usize> {
+ if haystack.is_empty() {
+ return None;
+ }
+ match byteset.len() {
+ 0 => return Some(haystack.len() - 1),
+ 1 => scalar::inv_memrchr(byteset[0], haystack),
+ 2 => scalar::reverse_search_bytes(haystack, |b| {
+ b != byteset[0] && b != byteset[1]
+ }),
+ 3 => scalar::reverse_search_bytes(haystack, |b| {
+ b != byteset[0] && b != byteset[1] && b != byteset[2]
+ }),
+ _ => {
+ let table = build_table(byteset);
+ scalar::reverse_search_bytes(haystack, |b| table[b as usize] == 0)
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ quickcheck::quickcheck! {
+ fn qc_byteset_forward_matches_naive(
+ haystack: Vec<u8>,
+ needles: Vec<u8>
+ ) -> bool {
+ super::find(&haystack, &needles)
+ == haystack.iter().position(|b| needles.contains(b))
+ }
+ fn qc_byteset_backwards_matches_naive(
+ haystack: Vec<u8>,
+ needles: Vec<u8>
+ ) -> bool {
+ super::rfind(&haystack, &needles)
+ == haystack.iter().rposition(|b| needles.contains(b))
+ }
+ fn qc_byteset_forward_not_matches_naive(
+ haystack: Vec<u8>,
+ needles: Vec<u8>
+ ) -> bool {
+ super::find_not(&haystack, &needles)
+ == haystack.iter().position(|b| !needles.contains(b))
+ }
+ fn qc_byteset_backwards_not_matches_naive(
+ haystack: Vec<u8>,
+ needles: Vec<u8>
+ ) -> bool {
+ super::rfind_not(&haystack, &needles)
+ == haystack.iter().rposition(|b| !needles.contains(b))
+ }
+ }
+}
diff --git a/vendor/bstr/src/byteset/scalar.rs b/vendor/bstr/src/byteset/scalar.rs
new file mode 100644
index 000000000..9bd34a8f9
--- /dev/null
+++ b/vendor/bstr/src/byteset/scalar.rs
@@ -0,0 +1,295 @@
+// This is adapted from `fallback.rs` from rust-memchr. It's modified to return
+// the 'inverse' query of memchr, e.g. finding the first byte not in the provided
+// set. This is simple for the 1-byte case.
+
+use core::cmp;
+use core::usize;
+
+#[cfg(target_pointer_width = "32")]
+const USIZE_BYTES: usize = 4;
+
+#[cfg(target_pointer_width = "64")]
+const USIZE_BYTES: usize = 8;
+
+// The number of bytes to loop at in one iteration of memchr/memrchr.
+const LOOP_SIZE: usize = 2 * USIZE_BYTES;
+
+/// Repeat the given byte into a word size number. That is, every 8 bits
+/// is equivalent to the given byte. For example, if `b` is `\x4E` or
+/// `01001110` in binary, then the returned value on a 32-bit system would be:
+/// `01001110_01001110_01001110_01001110`.
+#[inline(always)]
+fn repeat_byte(b: u8) -> usize {
+ (b as usize) * (usize::MAX / 255)
+}
+
+pub fn inv_memchr(n1: u8, haystack: &[u8]) -> Option<usize> {
+ let vn1 = repeat_byte(n1);
+ let confirm = |byte| byte != n1;
+ let loop_size = cmp::min(LOOP_SIZE, haystack.len());
+ let align = USIZE_BYTES - 1;
+ let start_ptr = haystack.as_ptr();
+ let end_ptr = haystack[haystack.len()..].as_ptr();
+ let mut ptr = start_ptr;
+
+ unsafe {
+ if haystack.len() < USIZE_BYTES {
+ return forward_search(start_ptr, end_ptr, ptr, confirm);
+ }
+
+ let chunk = read_unaligned_usize(ptr);
+ if (chunk ^ vn1) != 0 {
+ return forward_search(start_ptr, end_ptr, ptr, confirm);
+ }
+
+ ptr = ptr.add(USIZE_BYTES - (start_ptr as usize & align));
+ debug_assert!(ptr > start_ptr);
+ debug_assert!(end_ptr.sub(USIZE_BYTES) >= start_ptr);
+ while loop_size == LOOP_SIZE && ptr <= end_ptr.sub(loop_size) {
+ debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
+
+ let a = *(ptr as *const usize);
+ let b = *(ptr.add(USIZE_BYTES) as *const usize);
+ let eqa = (a ^ vn1) != 0;
+ let eqb = (b ^ vn1) != 0;
+ if eqa || eqb {
+ break;
+ }
+ ptr = ptr.add(LOOP_SIZE);
+ }
+ forward_search(start_ptr, end_ptr, ptr, confirm)
+ }
+}
+
+/// Return the last index not matching the byte `x` in `text`.
+pub fn inv_memrchr(n1: u8, haystack: &[u8]) -> Option<usize> {
+ let vn1 = repeat_byte(n1);
+ let confirm = |byte| byte != n1;
+ let loop_size = cmp::min(LOOP_SIZE, haystack.len());
+ let align = USIZE_BYTES - 1;
+ let start_ptr = haystack.as_ptr();
+ let end_ptr = haystack[haystack.len()..].as_ptr();
+ let mut ptr = end_ptr;
+
+ unsafe {
+ if haystack.len() < USIZE_BYTES {
+ return reverse_search(start_ptr, end_ptr, ptr, confirm);
+ }
+
+ let chunk = read_unaligned_usize(ptr.sub(USIZE_BYTES));
+ if (chunk ^ vn1) != 0 {
+ return reverse_search(start_ptr, end_ptr, ptr, confirm);
+ }
+
+ ptr = (end_ptr as usize & !align) as *const u8;
+ debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
+ while loop_size == LOOP_SIZE && ptr >= start_ptr.add(loop_size) {
+ debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
+
+ let a = *(ptr.sub(2 * USIZE_BYTES) as *const usize);
+ let b = *(ptr.sub(1 * USIZE_BYTES) as *const usize);
+ let eqa = (a ^ vn1) != 0;
+ let eqb = (b ^ vn1) != 0;
+ if eqa || eqb {
+ break;
+ }
+ ptr = ptr.sub(loop_size);
+ }
+ reverse_search(start_ptr, end_ptr, ptr, confirm)
+ }
+}
+
+#[inline(always)]
+unsafe fn forward_search<F: Fn(u8) -> bool>(
+ start_ptr: *const u8,
+ end_ptr: *const u8,
+ mut ptr: *const u8,
+ confirm: F,
+) -> Option<usize> {
+ debug_assert!(start_ptr <= ptr);
+ debug_assert!(ptr <= end_ptr);
+
+ while ptr < end_ptr {
+ if confirm(*ptr) {
+ return Some(sub(ptr, start_ptr));
+ }
+ ptr = ptr.offset(1);
+ }
+ None
+}
+
+#[inline(always)]
+unsafe fn reverse_search<F: Fn(u8) -> bool>(
+ start_ptr: *const u8,
+ end_ptr: *const u8,
+ mut ptr: *const u8,
+ confirm: F,
+) -> Option<usize> {
+ debug_assert!(start_ptr <= ptr);
+ debug_assert!(ptr <= end_ptr);
+
+ while ptr > start_ptr {
+ ptr = ptr.offset(-1);
+ if confirm(*ptr) {
+ return Some(sub(ptr, start_ptr));
+ }
+ }
+ None
+}
+
+unsafe fn read_unaligned_usize(ptr: *const u8) -> usize {
+ (ptr as *const usize).read_unaligned()
+}
+
+/// Subtract `b` from `a` and return the difference. `a` should be greater than
+/// or equal to `b`.
+fn sub(a: *const u8, b: *const u8) -> usize {
+ debug_assert!(a >= b);
+ (a as usize) - (b as usize)
+}
+
+/// Safe wrapper around `forward_search`
+#[inline]
+pub(crate) fn forward_search_bytes<F: Fn(u8) -> bool>(
+ s: &[u8],
+ confirm: F,
+) -> Option<usize> {
+ unsafe {
+ let start = s.as_ptr();
+ let end = start.add(s.len());
+ forward_search(start, end, start, confirm)
+ }
+}
+
+/// Safe wrapper around `reverse_search`
+#[inline]
+pub(crate) fn reverse_search_bytes<F: Fn(u8) -> bool>(
+ s: &[u8],
+ confirm: F,
+) -> Option<usize> {
+ unsafe {
+ let start = s.as_ptr();
+ let end = start.add(s.len());
+ reverse_search(start, end, end, confirm)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::{inv_memchr, inv_memrchr};
+ // search string, search byte, inv_memchr result, inv_memrchr result.
+ // these are expanded into a much larger set of tests in build_tests
+ const TESTS: &[(&[u8], u8, usize, usize)] = &[
+ (b"z", b'a', 0, 0),
+ (b"zz", b'a', 0, 1),
+ (b"aza", b'a', 1, 1),
+ (b"zaz", b'a', 0, 2),
+ (b"zza", b'a', 0, 1),
+ (b"zaa", b'a', 0, 0),
+ (b"zzz", b'a', 0, 2),
+ ];
+
+ type TestCase = (Vec<u8>, u8, Option<(usize, usize)>);
+
+ fn build_tests() -> Vec<TestCase> {
+ let mut result = vec![];
+ for &(search, byte, fwd_pos, rev_pos) in TESTS {
+ result.push((search.to_vec(), byte, Some((fwd_pos, rev_pos))));
+ for i in 1..515 {
+ // add a bunch of copies of the search byte to the end.
+ let mut suffixed: Vec<u8> = search.into();
+ suffixed.extend(std::iter::repeat(byte).take(i));
+ result.push((suffixed, byte, Some((fwd_pos, rev_pos))));
+
+ // add a bunch of copies of the search byte to the start.
+ let mut prefixed: Vec<u8> =
+ std::iter::repeat(byte).take(i).collect();
+ prefixed.extend(search);
+ result.push((
+ prefixed,
+ byte,
+ Some((fwd_pos + i, rev_pos + i)),
+ ));
+
+ // add a bunch of copies of the search byte to both ends.
+ let mut surrounded: Vec<u8> =
+ std::iter::repeat(byte).take(i).collect();
+ surrounded.extend(search);
+ surrounded.extend(std::iter::repeat(byte).take(i));
+ result.push((
+ surrounded,
+ byte,
+ Some((fwd_pos + i, rev_pos + i)),
+ ));
+ }
+ }
+
+ // build non-matching tests for several sizes
+ for i in 0..515 {
+ result.push((
+ std::iter::repeat(b'\0').take(i).collect(),
+ b'\0',
+ None,
+ ));
+ }
+
+ result
+ }
+
+ #[test]
+ fn test_inv_memchr() {
+ use crate::{ByteSlice, B};
+ for (search, byte, matching) in build_tests() {
+ assert_eq!(
+ inv_memchr(byte, &search),
+ matching.map(|m| m.0),
+ "inv_memchr when searching for {:?} in {:?}",
+ byte as char,
+ // better printing
+ B(&search).as_bstr(),
+ );
+ assert_eq!(
+ inv_memrchr(byte, &search),
+ matching.map(|m| m.1),
+ "inv_memrchr when searching for {:?} in {:?}",
+ byte as char,
+ // better printing
+ B(&search).as_bstr(),
+ );
+ // Test a rather large number off offsets for potential alignment issues
+ for offset in 1..130 {
+ if offset >= search.len() {
+ break;
+ }
+ // If this would cause us to shift the results off the end, skip
+ // it so that we don't have to recompute them.
+ if let Some((f, r)) = matching {
+ if offset > f || offset > r {
+ break;
+ }
+ }
+ let realigned = &search[offset..];
+
+ let forward_pos = matching.map(|m| m.0 - offset);
+ let reverse_pos = matching.map(|m| m.1 - offset);
+
+ assert_eq!(
+ inv_memchr(byte, &realigned),
+ forward_pos,
+ "inv_memchr when searching (realigned by {}) for {:?} in {:?}",
+ offset,
+ byte as char,
+ realigned.as_bstr(),
+ );
+ assert_eq!(
+ inv_memrchr(byte, &realigned),
+ reverse_pos,
+ "inv_memrchr when searching (realigned by {}) for {:?} in {:?}",
+ offset,
+ byte as char,
+ realigned.as_bstr(),
+ );
+ }
+ }
+ }
+}
diff --git a/vendor/bstr/src/ext_slice.rs b/vendor/bstr/src/ext_slice.rs
new file mode 100644
index 000000000..0cc73affc
--- /dev/null
+++ b/vendor/bstr/src/ext_slice.rs
@@ -0,0 +1,3655 @@
+#[cfg(feature = "std")]
+use std::borrow::Cow;
+#[cfg(feature = "std")]
+use std::ffi::OsStr;
+#[cfg(feature = "std")]
+use std::path::Path;
+
+use core::{iter, ops, ptr, slice, str};
+use memchr::{memchr, memmem, memrchr};
+
+use crate::ascii;
+use crate::bstr::BStr;
+use crate::byteset;
+#[cfg(feature = "std")]
+use crate::ext_vec::ByteVec;
+#[cfg(feature = "unicode")]
+use crate::unicode::{
+ whitespace_len_fwd, whitespace_len_rev, GraphemeIndices, Graphemes,
+ SentenceIndices, Sentences, WordIndices, Words, WordsWithBreakIndices,
+ WordsWithBreaks,
+};
+use crate::utf8::{self, CharIndices, Chars, Utf8Chunks, Utf8Error};
+
+/// A short-hand constructor for building a `&[u8]`.
+///
+/// This idiosyncratic constructor is useful for concisely building byte string
+/// slices. Its primary utility is in conveniently writing byte string literals
+/// in a uniform way. For example, consider this code that does not compile:
+///
+/// ```ignore
+/// let strs = vec![b"a", b"xy"];
+/// ```
+///
+/// The above code doesn't compile because the type of the byte string literal
+/// `b"a"` is `&'static [u8; 1]`, and the type of `b"xy"` is
+/// `&'static [u8; 2]`. Since their types aren't the same, they can't be stored
+/// in the same `Vec`. (This is dissimilar from normal Unicode string slices,
+/// where both `"a"` and `"xy"` have the same type of `&'static str`.)
+///
+/// One way of getting the above code to compile is to convert byte strings to
+/// slices. You might try this:
+///
+/// ```ignore
+/// let strs = vec![&b"a", &b"xy"];
+/// ```
+///
+/// But this just creates values with type `& &'static [u8; 1]` and
+/// `& &'static [u8; 2]`. Instead, you need to force the issue like so:
+///
+/// ```
+/// let strs = vec![&b"a"[..], &b"xy"[..]];
+/// // or
+/// let strs = vec![b"a".as_ref(), b"xy".as_ref()];
+/// ```
+///
+/// But neither of these are particularly convenient to type, especially when
+/// it's something as common as a string literal. Thus, this constructor
+/// permits writing the following instead:
+///
+/// ```
+/// use bstr::B;
+///
+/// let strs = vec![B("a"), B(b"xy")];
+/// ```
+///
+/// Notice that this also lets you mix and match both string literals and byte
+/// string literals. This can be quite convenient!
+#[allow(non_snake_case)]
+#[inline]
+pub fn B<'a, B: ?Sized + AsRef<[u8]>>(bytes: &'a B) -> &'a [u8] {
+ bytes.as_ref()
+}
+
+impl ByteSlice for [u8] {
+ #[inline]
+ fn as_bytes(&self) -> &[u8] {
+ self
+ }
+
+ #[inline]
+ fn as_bytes_mut(&mut self) -> &mut [u8] {
+ self
+ }
+}
+
+/// Ensure that callers cannot implement `ByteSlice` by making an
+/// umplementable trait its super trait.
+pub trait Sealed {}
+impl Sealed for [u8] {}
+
+/// A trait that extends `&[u8]` with string oriented methods.
+pub trait ByteSlice: Sealed {
+ /// A method for accessing the raw bytes of this type. This is always a
+ /// no-op and callers shouldn't care about it. This only exists for making
+ /// the extension trait work.
+ #[doc(hidden)]
+ fn as_bytes(&self) -> &[u8];
+
+ /// A method for accessing the raw bytes of this type, mutably. This is
+ /// always a no-op and callers shouldn't care about it. This only exists
+ /// for making the extension trait work.
+ #[doc(hidden)]
+ fn as_bytes_mut(&mut self) -> &mut [u8];
+
+ /// Return this byte slice as a `&BStr`.
+ ///
+ /// Use `&BStr` is useful because of its `fmt::Debug` representation
+ /// and various other trait implementations (such as `PartialEq` and
+ /// `PartialOrd`). In particular, the `Debug` implementation for `BStr`
+ /// shows its bytes as a normal string. For invalid UTF-8, hex escape
+ /// sequences are used.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// println!("{:?}", b"foo\xFFbar".as_bstr());
+ /// ```
+ #[inline]
+ fn as_bstr(&self) -> &BStr {
+ BStr::new(self.as_bytes())
+ }
+
+ /// Return this byte slice as a `&mut BStr`.
+ ///
+ /// Use `&mut BStr` is useful because of its `fmt::Debug` representation
+ /// and various other trait implementations (such as `PartialEq` and
+ /// `PartialOrd`). In particular, the `Debug` implementation for `BStr`
+ /// shows its bytes as a normal string. For invalid UTF-8, hex escape
+ /// sequences are used.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut bytes = *b"foo\xFFbar";
+ /// println!("{:?}", &mut bytes.as_bstr_mut());
+ /// ```
+ #[inline]
+ fn as_bstr_mut(&mut self) -> &mut BStr {
+ BStr::new_mut(self.as_bytes_mut())
+ }
+
+ /// Create an immutable byte string from an OS string slice.
+ ///
+ /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
+ /// this returns `None` if the given OS string is not valid UTF-8. (For
+ /// example, on Windows, file paths are allowed to be a sequence of
+ /// arbitrary 16-bit integers. Not all such sequences can be transcoded to
+ /// valid UTF-8.)
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use std::ffi::OsStr;
+ ///
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let os_str = OsStr::new("foo");
+ /// let bs = <[u8]>::from_os_str(os_str).expect("should be valid UTF-8");
+ /// assert_eq!(bs, B("foo"));
+ /// ```
+ #[cfg(feature = "std")]
+ #[inline]
+ fn from_os_str(os_str: &OsStr) -> Option<&[u8]> {
+ #[cfg(unix)]
+ #[inline]
+ fn imp(os_str: &OsStr) -> Option<&[u8]> {
+ use std::os::unix::ffi::OsStrExt;
+
+ Some(os_str.as_bytes())
+ }
+
+ #[cfg(not(unix))]
+ #[inline]
+ fn imp(os_str: &OsStr) -> Option<&[u8]> {
+ os_str.to_str().map(|s| s.as_bytes())
+ }
+
+ imp(os_str)
+ }
+
+ /// Create an immutable byte string from a file path.
+ ///
+ /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
+ /// this returns `None` if the given path is not valid UTF-8. (For example,
+ /// on Windows, file paths are allowed to be a sequence of arbitrary 16-bit
+ /// integers. Not all such sequences can be transcoded to valid UTF-8.)
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use std::path::Path;
+ ///
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let path = Path::new("foo");
+ /// let bs = <[u8]>::from_path(path).expect("should be valid UTF-8");
+ /// assert_eq!(bs, B("foo"));
+ /// ```
+ #[cfg(feature = "std")]
+ #[inline]
+ fn from_path(path: &Path) -> Option<&[u8]> {
+ Self::from_os_str(path.as_os_str())
+ }
+
+ /// Safely convert this byte string into a `&str` if it's valid UTF-8.
+ ///
+ /// If this byte string is not valid UTF-8, then an error is returned. The
+ /// error returned indicates the first invalid byte found and the length
+ /// of the error.
+ ///
+ /// In cases where a lossy conversion to `&str` is acceptable, then use one
+ /// of the [`to_str_lossy`](trait.ByteSlice.html#method.to_str_lossy) or
+ /// [`to_str_lossy_into`](trait.ByteSlice.html#method.to_str_lossy_into)
+ /// methods.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice, ByteVec};
+ ///
+ /// # fn example() -> Result<(), bstr::Utf8Error> {
+ /// let s = B("☃βツ").to_str()?;
+ /// assert_eq!("☃βツ", s);
+ ///
+ /// let mut bstring = <Vec<u8>>::from("☃βツ");
+ /// bstring.push(b'\xFF');
+ /// let err = bstring.to_str().unwrap_err();
+ /// assert_eq!(8, err.valid_up_to());
+ /// # Ok(()) }; example().unwrap()
+ /// ```
+ #[inline]
+ fn to_str(&self) -> Result<&str, Utf8Error> {
+ utf8::validate(self.as_bytes()).map(|_| {
+ // SAFETY: This is safe because of the guarantees provided by
+ // utf8::validate.
+ unsafe { str::from_utf8_unchecked(self.as_bytes()) }
+ })
+ }
+
+ /// Unsafely convert this byte string into a `&str`, without checking for
+ /// valid UTF-8.
+ ///
+ /// # Safety
+ ///
+ /// Callers *must* ensure that this byte string is valid UTF-8 before
+ /// calling this method. Converting a byte string into a `&str` that is
+ /// not valid UTF-8 is considered undefined behavior.
+ ///
+ /// This routine is useful in performance sensitive contexts where the
+ /// UTF-8 validity of the byte string is already known and it is
+ /// undesirable to pay the cost of an additional UTF-8 validation check
+ /// that [`to_str`](trait.ByteSlice.html#method.to_str) performs.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// // SAFETY: This is safe because string literals are guaranteed to be
+ /// // valid UTF-8 by the Rust compiler.
+ /// let s = unsafe { B("☃βツ").to_str_unchecked() };
+ /// assert_eq!("☃βツ", s);
+ /// ```
+ #[inline]
+ unsafe fn to_str_unchecked(&self) -> &str {
+ str::from_utf8_unchecked(self.as_bytes())
+ }
+
+ /// Convert this byte string to a valid UTF-8 string by replacing invalid
+ /// UTF-8 bytes with the Unicode replacement codepoint (`U+FFFD`).
+ ///
+ /// If the byte string is already valid UTF-8, then no copying or
+ /// allocation is performed and a borrrowed string slice is returned. If
+ /// the byte string is not valid UTF-8, then an owned string buffer is
+ /// returned with invalid bytes replaced by the replacement codepoint.
+ ///
+ /// This method uses the "substitution of maximal subparts" (Unicode
+ /// Standard, Chapter 3, Section 9) strategy for inserting the replacement
+ /// codepoint. Specifically, a replacement codepoint is inserted whenever a
+ /// byte is found that cannot possibly lead to a valid code unit sequence.
+ /// If there were previous bytes that represented a prefix of a well-formed
+ /// code unit sequence, then all of those bytes are substituted with a
+ /// single replacement codepoint. The "substitution of maximal subparts"
+ /// strategy is the same strategy used by
+ /// [W3C's Encoding standard](https://www.w3.org/TR/encoding/).
+ /// For a more precise description of the maximal subpart strategy, see
+ /// the Unicode Standard, Chapter 3, Section 9. See also
+ /// [Public Review Issue #121](http://www.unicode.org/review/pr-121.html).
+ ///
+ /// N.B. Rust's standard library also appears to use the same strategy,
+ /// but it does not appear to be an API guarantee.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use std::borrow::Cow;
+ ///
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut bstring = <Vec<u8>>::from("☃βツ");
+ /// assert_eq!(Cow::Borrowed("☃βツ"), bstring.to_str_lossy());
+ ///
+ /// // Add a byte that makes the sequence invalid.
+ /// bstring.push(b'\xFF');
+ /// assert_eq!(Cow::Borrowed("☃βツ\u{FFFD}"), bstring.to_str_lossy());
+ /// ```
+ ///
+ /// This demonstrates the "maximal subpart" substitution logic.
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// // \x61 is the ASCII codepoint for 'a'.
+ /// // \xF1\x80\x80 is a valid 3-byte code unit prefix.
+ /// // \xE1\x80 is a valid 2-byte code unit prefix.
+ /// // \xC2 is a valid 1-byte code unit prefix.
+ /// // \x62 is the ASCII codepoint for 'b'.
+ /// //
+ /// // In sum, each of the prefixes is replaced by a single replacement
+ /// // codepoint since none of the prefixes are properly completed. This
+ /// // is in contrast to other strategies that might insert a replacement
+ /// // codepoint for every single byte.
+ /// let bs = B(b"\x61\xF1\x80\x80\xE1\x80\xC2\x62");
+ /// assert_eq!("a\u{FFFD}\u{FFFD}\u{FFFD}b", bs.to_str_lossy());
+ /// ```
+ #[cfg(feature = "std")]
+ #[inline]
+ fn to_str_lossy(&self) -> Cow<'_, str> {
+ match utf8::validate(self.as_bytes()) {
+ Ok(()) => {
+ // SAFETY: This is safe because of the guarantees provided by
+ // utf8::validate.
+ unsafe {
+ Cow::Borrowed(str::from_utf8_unchecked(self.as_bytes()))
+ }
+ }
+ Err(err) => {
+ let mut lossy = String::with_capacity(self.as_bytes().len());
+ let (valid, after) =
+ self.as_bytes().split_at(err.valid_up_to());
+ // SAFETY: This is safe because utf8::validate guarantees
+ // that all of `valid` is valid UTF-8.
+ lossy.push_str(unsafe { str::from_utf8_unchecked(valid) });
+ lossy.push_str("\u{FFFD}");
+ if let Some(len) = err.error_len() {
+ after[len..].to_str_lossy_into(&mut lossy);
+ }
+ Cow::Owned(lossy)
+ }
+ }
+ }
+
+ /// Copy the contents of this byte string into the given owned string
+ /// buffer, while replacing invalid UTF-8 code unit sequences with the
+ /// Unicode replacement codepoint (`U+FFFD`).
+ ///
+ /// This method uses the same "substitution of maximal subparts" strategy
+ /// for inserting the replacement codepoint as the
+ /// [`to_str_lossy`](trait.ByteSlice.html#method.to_str_lossy) method.
+ ///
+ /// This routine is useful for amortizing allocation. However, unlike
+ /// `to_str_lossy`, this routine will _always_ copy the contents of this
+ /// byte string into the destination buffer, even if this byte string is
+ /// valid UTF-8.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use std::borrow::Cow;
+ ///
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut bstring = <Vec<u8>>::from("☃βツ");
+ /// // Add a byte that makes the sequence invalid.
+ /// bstring.push(b'\xFF');
+ ///
+ /// let mut dest = String::new();
+ /// bstring.to_str_lossy_into(&mut dest);
+ /// assert_eq!("☃βツ\u{FFFD}", dest);
+ /// ```
+ #[cfg(feature = "std")]
+ #[inline]
+ fn to_str_lossy_into(&self, dest: &mut String) {
+ let mut bytes = self.as_bytes();
+ dest.reserve(bytes.len());
+ loop {
+ match utf8::validate(bytes) {
+ Ok(()) => {
+ // SAFETY: This is safe because utf8::validate guarantees
+ // that all of `bytes` is valid UTF-8.
+ dest.push_str(unsafe { str::from_utf8_unchecked(bytes) });
+ break;
+ }
+ Err(err) => {
+ let (valid, after) = bytes.split_at(err.valid_up_to());
+ // SAFETY: This is safe because utf8::validate guarantees
+ // that all of `valid` is valid UTF-8.
+ dest.push_str(unsafe { str::from_utf8_unchecked(valid) });
+ dest.push_str("\u{FFFD}");
+ match err.error_len() {
+ None => break,
+ Some(len) => bytes = &after[len..],
+ }
+ }
+ }
+ }
+ }
+
+ /// Create an OS string slice from this byte string.
+ ///
+ /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
+ /// this returns a UTF-8 decoding error if this byte string is not valid
+ /// UTF-8. (For example, on Windows, file paths are allowed to be a
+ /// sequence of arbitrary 16-bit integers. There is no obvious mapping from
+ /// an arbitrary sequence of 8-bit integers to an arbitrary sequence of
+ /// 16-bit integers.)
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let os_str = b"foo".to_os_str().expect("should be valid UTF-8");
+ /// assert_eq!(os_str, "foo");
+ /// ```
+ #[cfg(feature = "std")]
+ #[inline]
+ fn to_os_str(&self) -> Result<&OsStr, Utf8Error> {
+ #[cfg(unix)]
+ #[inline]
+ fn imp(bytes: &[u8]) -> Result<&OsStr, Utf8Error> {
+ use std::os::unix::ffi::OsStrExt;
+
+ Ok(OsStr::from_bytes(bytes))
+ }
+
+ #[cfg(not(unix))]
+ #[inline]
+ fn imp(bytes: &[u8]) -> Result<&OsStr, Utf8Error> {
+ bytes.to_str().map(OsStr::new)
+ }
+
+ imp(self.as_bytes())
+ }
+
+ /// Lossily create an OS string slice from this byte string.
+ ///
+ /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
+ /// this will perform a UTF-8 check and lossily convert this byte string
+ /// into valid UTF-8 using the Unicode replacement codepoint.
+ ///
+ /// Note that this can prevent the correct roundtripping of file paths on
+ /// non-Unix systems such as Windows, where file paths are an arbitrary
+ /// sequence of 16-bit integers.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let os_str = b"foo\xFFbar".to_os_str_lossy();
+ /// assert_eq!(os_str.to_string_lossy(), "foo\u{FFFD}bar");
+ /// ```
+ #[cfg(feature = "std")]
+ #[inline]
+ fn to_os_str_lossy(&self) -> Cow<'_, OsStr> {
+ #[cfg(unix)]
+ #[inline]
+ fn imp(bytes: &[u8]) -> Cow<'_, OsStr> {
+ use std::os::unix::ffi::OsStrExt;
+
+ Cow::Borrowed(OsStr::from_bytes(bytes))
+ }
+
+ #[cfg(not(unix))]
+ #[inline]
+ fn imp(bytes: &[u8]) -> Cow<OsStr> {
+ use std::ffi::OsString;
+
+ match bytes.to_str_lossy() {
+ Cow::Borrowed(x) => Cow::Borrowed(OsStr::new(x)),
+ Cow::Owned(x) => Cow::Owned(OsString::from(x)),
+ }
+ }
+
+ imp(self.as_bytes())
+ }
+
+ /// Create a path slice from this byte string.
+ ///
+ /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
+ /// this returns a UTF-8 decoding error if this byte string is not valid
+ /// UTF-8. (For example, on Windows, file paths are allowed to be a
+ /// sequence of arbitrary 16-bit integers. There is no obvious mapping from
+ /// an arbitrary sequence of 8-bit integers to an arbitrary sequence of
+ /// 16-bit integers.)
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let path = b"foo".to_path().expect("should be valid UTF-8");
+ /// assert_eq!(path.as_os_str(), "foo");
+ /// ```
+ #[cfg(feature = "std")]
+ #[inline]
+ fn to_path(&self) -> Result<&Path, Utf8Error> {
+ self.to_os_str().map(Path::new)
+ }
+
+ /// Lossily create a path slice from this byte string.
+ ///
+ /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
+ /// this will perform a UTF-8 check and lossily convert this byte string
+ /// into valid UTF-8 using the Unicode replacement codepoint.
+ ///
+ /// Note that this can prevent the correct roundtripping of file paths on
+ /// non-Unix systems such as Windows, where file paths are an arbitrary
+ /// sequence of 16-bit integers.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let bs = b"foo\xFFbar";
+ /// let path = bs.to_path_lossy();
+ /// assert_eq!(path.to_string_lossy(), "foo\u{FFFD}bar");
+ /// ```
+ #[cfg(feature = "std")]
+ #[inline]
+ fn to_path_lossy(&self) -> Cow<'_, Path> {
+ use std::path::PathBuf;
+
+ match self.to_os_str_lossy() {
+ Cow::Borrowed(x) => Cow::Borrowed(Path::new(x)),
+ Cow::Owned(x) => Cow::Owned(PathBuf::from(x)),
+ }
+ }
+
+ /// Create a new byte string by repeating this byte string `n` times.
+ ///
+ /// # Panics
+ ///
+ /// This function panics if the capacity of the new byte string would
+ /// overflow.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// assert_eq!(b"foo".repeatn(4), B("foofoofoofoo"));
+ /// assert_eq!(b"foo".repeatn(0), B(""));
+ /// ```
+ #[cfg(feature = "std")]
+ #[inline]
+ fn repeatn(&self, n: usize) -> Vec<u8> {
+ let bs = self.as_bytes();
+ let mut dst = vec![0; bs.len() * n];
+ for i in 0..n {
+ dst[i * bs.len()..(i + 1) * bs.len()].copy_from_slice(bs);
+ }
+ dst
+ }
+
+ /// Returns true if and only if this byte string contains the given needle.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// assert!(b"foo bar".contains_str("foo"));
+ /// assert!(b"foo bar".contains_str("bar"));
+ /// assert!(!b"foo".contains_str("foobar"));
+ /// ```
+ #[inline]
+ fn contains_str<B: AsRef<[u8]>>(&self, needle: B) -> bool {
+ self.find(needle).is_some()
+ }
+
+ /// Returns true if and only if this byte string has the given prefix.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// assert!(b"foo bar".starts_with_str("foo"));
+ /// assert!(!b"foo bar".starts_with_str("bar"));
+ /// assert!(!b"foo".starts_with_str("foobar"));
+ /// ```
+ #[inline]
+ fn starts_with_str<B: AsRef<[u8]>>(&self, prefix: B) -> bool {
+ self.as_bytes().starts_with(prefix.as_ref())
+ }
+
+ /// Returns true if and only if this byte string has the given suffix.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// assert!(b"foo bar".ends_with_str("bar"));
+ /// assert!(!b"foo bar".ends_with_str("foo"));
+ /// assert!(!b"bar".ends_with_str("foobar"));
+ /// ```
+ #[inline]
+ fn ends_with_str<B: AsRef<[u8]>>(&self, suffix: B) -> bool {
+ self.as_bytes().ends_with(suffix.as_ref())
+ }
+
+ /// Returns the index of the first occurrence of the given needle.
+ ///
+ /// The needle may be any type that can be cheaply converted into a
+ /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
+ ///
+ /// Note that if you're are searching for the same needle in many
+ /// different small haystacks, it may be faster to initialize a
+ /// [`Finder`](struct.Finder.html) once, and reuse it for each search.
+ ///
+ /// # Complexity
+ ///
+ /// This routine is guaranteed to have worst case linear time complexity
+ /// with respect to both the needle and the haystack. That is, this runs
+ /// in `O(needle.len() + haystack.len())` time.
+ ///
+ /// This routine is also guaranteed to have worst case constant space
+ /// complexity.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"foo bar baz";
+ /// assert_eq!(Some(0), s.find("foo"));
+ /// assert_eq!(Some(4), s.find("bar"));
+ /// assert_eq!(None, s.find("quux"));
+ /// ```
+ #[inline]
+ fn find<B: AsRef<[u8]>>(&self, needle: B) -> Option<usize> {
+ Finder::new(needle.as_ref()).find(self.as_bytes())
+ }
+
+ /// Returns the index of the last occurrence of the given needle.
+ ///
+ /// The needle may be any type that can be cheaply converted into a
+ /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
+ ///
+ /// Note that if you're are searching for the same needle in many
+ /// different small haystacks, it may be faster to initialize a
+ /// [`FinderReverse`](struct.FinderReverse.html) once, and reuse it for
+ /// each search.
+ ///
+ /// # Complexity
+ ///
+ /// This routine is guaranteed to have worst case linear time complexity
+ /// with respect to both the needle and the haystack. That is, this runs
+ /// in `O(needle.len() + haystack.len())` time.
+ ///
+ /// This routine is also guaranteed to have worst case constant space
+ /// complexity.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"foo bar baz";
+ /// assert_eq!(Some(0), s.rfind("foo"));
+ /// assert_eq!(Some(4), s.rfind("bar"));
+ /// assert_eq!(Some(8), s.rfind("ba"));
+ /// assert_eq!(None, s.rfind("quux"));
+ /// ```
+ #[inline]
+ fn rfind<B: AsRef<[u8]>>(&self, needle: B) -> Option<usize> {
+ FinderReverse::new(needle.as_ref()).rfind(self.as_bytes())
+ }
+
+ /// Returns an iterator of the non-overlapping occurrences of the given
+ /// needle. The iterator yields byte offset positions indicating the start
+ /// of each match.
+ ///
+ /// # Complexity
+ ///
+ /// This routine is guaranteed to have worst case linear time complexity
+ /// with respect to both the needle and the haystack. That is, this runs
+ /// in `O(needle.len() + haystack.len())` time.
+ ///
+ /// This routine is also guaranteed to have worst case constant space
+ /// complexity.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"foo bar foo foo quux foo";
+ /// let matches: Vec<usize> = s.find_iter("foo").collect();
+ /// assert_eq!(matches, vec![0, 8, 12, 21]);
+ /// ```
+ ///
+ /// An empty string matches at every position, including the position
+ /// immediately following the last byte:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let matches: Vec<usize> = b"foo".find_iter("").collect();
+ /// assert_eq!(matches, vec![0, 1, 2, 3]);
+ ///
+ /// let matches: Vec<usize> = b"".find_iter("").collect();
+ /// assert_eq!(matches, vec![0]);
+ /// ```
+ #[inline]
+ fn find_iter<'a, B: ?Sized + AsRef<[u8]>>(
+ &'a self,
+ needle: &'a B,
+ ) -> Find<'a> {
+ Find::new(self.as_bytes(), needle.as_ref())
+ }
+
+ /// Returns an iterator of the non-overlapping occurrences of the given
+ /// needle in reverse. The iterator yields byte offset positions indicating
+ /// the start of each match.
+ ///
+ /// # Complexity
+ ///
+ /// This routine is guaranteed to have worst case linear time complexity
+ /// with respect to both the needle and the haystack. That is, this runs
+ /// in `O(needle.len() + haystack.len())` time.
+ ///
+ /// This routine is also guaranteed to have worst case constant space
+ /// complexity.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"foo bar foo foo quux foo";
+ /// let matches: Vec<usize> = s.rfind_iter("foo").collect();
+ /// assert_eq!(matches, vec![21, 12, 8, 0]);
+ /// ```
+ ///
+ /// An empty string matches at every position, including the position
+ /// immediately following the last byte:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let matches: Vec<usize> = b"foo".rfind_iter("").collect();
+ /// assert_eq!(matches, vec![3, 2, 1, 0]);
+ ///
+ /// let matches: Vec<usize> = b"".rfind_iter("").collect();
+ /// assert_eq!(matches, vec![0]);
+ /// ```
+ #[inline]
+ fn rfind_iter<'a, B: ?Sized + AsRef<[u8]>>(
+ &'a self,
+ needle: &'a B,
+ ) -> FindReverse<'a> {
+ FindReverse::new(self.as_bytes(), needle.as_ref())
+ }
+
+ /// Returns the index of the first occurrence of the given byte. If the
+ /// byte does not occur in this byte string, then `None` is returned.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// assert_eq!(Some(10), b"foo bar baz".find_byte(b'z'));
+ /// assert_eq!(None, b"foo bar baz".find_byte(b'y'));
+ /// ```
+ #[inline]
+ fn find_byte(&self, byte: u8) -> Option<usize> {
+ memchr(byte, self.as_bytes())
+ }
+
+ /// Returns the index of the last occurrence of the given byte. If the
+ /// byte does not occur in this byte string, then `None` is returned.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// assert_eq!(Some(10), b"foo bar baz".rfind_byte(b'z'));
+ /// assert_eq!(None, b"foo bar baz".rfind_byte(b'y'));
+ /// ```
+ #[inline]
+ fn rfind_byte(&self, byte: u8) -> Option<usize> {
+ memrchr(byte, self.as_bytes())
+ }
+
+ /// Returns the index of the first occurrence of the given codepoint.
+ /// If the codepoint does not occur in this byte string, then `None` is
+ /// returned.
+ ///
+ /// Note that if one searches for the replacement codepoint, `\u{FFFD}`,
+ /// then only explicit occurrences of that encoding will be found. Invalid
+ /// UTF-8 sequences will not be matched.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// assert_eq!(Some(10), b"foo bar baz".find_char('z'));
+ /// assert_eq!(Some(4), B("αβγγδ").find_char('γ'));
+ /// assert_eq!(None, b"foo bar baz".find_char('y'));
+ /// ```
+ #[inline]
+ fn find_char(&self, ch: char) -> Option<usize> {
+ self.find(ch.encode_utf8(&mut [0; 4]))
+ }
+
+ /// Returns the index of the last occurrence of the given codepoint.
+ /// If the codepoint does not occur in this byte string, then `None` is
+ /// returned.
+ ///
+ /// Note that if one searches for the replacement codepoint, `\u{FFFD}`,
+ /// then only explicit occurrences of that encoding will be found. Invalid
+ /// UTF-8 sequences will not be matched.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// assert_eq!(Some(10), b"foo bar baz".rfind_char('z'));
+ /// assert_eq!(Some(6), B("αβγγδ").rfind_char('γ'));
+ /// assert_eq!(None, b"foo bar baz".rfind_char('y'));
+ /// ```
+ #[inline]
+ fn rfind_char(&self, ch: char) -> Option<usize> {
+ self.rfind(ch.encode_utf8(&mut [0; 4]))
+ }
+
+ /// Returns the index of the first occurrence of any of the bytes in the
+ /// provided set.
+ ///
+ /// The `byteset` may be any type that can be cheaply converted into a
+ /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but
+ /// note that passing a `&str` which contains multibyte characters may not
+ /// behave as you expect: each byte in the `&str` is treated as an
+ /// individual member of the byte set.
+ ///
+ /// Note that order is irrelevant for the `byteset` parameter, and
+ /// duplicate bytes present in its body are ignored.
+ ///
+ /// # Complexity
+ ///
+ /// This routine is guaranteed to have worst case linear time complexity
+ /// with respect to both the set of bytes and the haystack. That is, this
+ /// runs in `O(byteset.len() + haystack.len())` time.
+ ///
+ /// This routine is also guaranteed to have worst case constant space
+ /// complexity.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// assert_eq!(b"foo bar baz".find_byteset(b"zr"), Some(6));
+ /// assert_eq!(b"foo baz bar".find_byteset(b"bzr"), Some(4));
+ /// assert_eq!(None, b"foo baz bar".find_byteset(b"\t\n"));
+ /// ```
+ #[inline]
+ fn find_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {
+ byteset::find(self.as_bytes(), byteset.as_ref())
+ }
+
+ /// Returns the index of the first occurrence of a byte that is not a member
+ /// of the provided set.
+ ///
+ /// The `byteset` may be any type that can be cheaply converted into a
+ /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but
+ /// note that passing a `&str` which contains multibyte characters may not
+ /// behave as you expect: each byte in the `&str` is treated as an
+ /// individual member of the byte set.
+ ///
+ /// Note that order is irrelevant for the `byteset` parameter, and
+ /// duplicate bytes present in its body are ignored.
+ ///
+ /// # Complexity
+ ///
+ /// This routine is guaranteed to have worst case linear time complexity
+ /// with respect to both the set of bytes and the haystack. That is, this
+ /// runs in `O(byteset.len() + haystack.len())` time.
+ ///
+ /// This routine is also guaranteed to have worst case constant space
+ /// complexity.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// assert_eq!(b"foo bar baz".find_not_byteset(b"fo "), Some(4));
+ /// assert_eq!(b"\t\tbaz bar".find_not_byteset(b" \t\r\n"), Some(2));
+ /// assert_eq!(b"foo\nbaz\tbar".find_not_byteset(b"\t\n"), Some(0));
+ /// ```
+ #[inline]
+ fn find_not_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {
+ byteset::find_not(self.as_bytes(), byteset.as_ref())
+ }
+
+ /// Returns the index of the last occurrence of any of the bytes in the
+ /// provided set.
+ ///
+ /// The `byteset` may be any type that can be cheaply converted into a
+ /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but
+ /// note that passing a `&str` which contains multibyte characters may not
+ /// behave as you expect: each byte in the `&str` is treated as an
+ /// individual member of the byte set.
+ ///
+ /// Note that order is irrelevant for the `byteset` parameter, and duplicate
+ /// bytes present in its body are ignored.
+ ///
+ /// # Complexity
+ ///
+ /// This routine is guaranteed to have worst case linear time complexity
+ /// with respect to both the set of bytes and the haystack. That is, this
+ /// runs in `O(byteset.len() + haystack.len())` time.
+ ///
+ /// This routine is also guaranteed to have worst case constant space
+ /// complexity.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// assert_eq!(b"foo bar baz".rfind_byteset(b"agb"), Some(9));
+ /// assert_eq!(b"foo baz bar".rfind_byteset(b"rabz "), Some(10));
+ /// assert_eq!(b"foo baz bar".rfind_byteset(b"\n123"), None);
+ /// ```
+ #[inline]
+ fn rfind_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {
+ byteset::rfind(self.as_bytes(), byteset.as_ref())
+ }
+
+ /// Returns the index of the last occurrence of a byte that is not a member
+ /// of the provided set.
+ ///
+ /// The `byteset` may be any type that can be cheaply converted into a
+ /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but
+ /// note that passing a `&str` which contains multibyte characters may not
+ /// behave as you expect: each byte in the `&str` is treated as an
+ /// individual member of the byte set.
+ ///
+ /// Note that order is irrelevant for the `byteset` parameter, and
+ /// duplicate bytes present in its body are ignored.
+ ///
+ /// # Complexity
+ ///
+ /// This routine is guaranteed to have worst case linear time complexity
+ /// with respect to both the set of bytes and the haystack. That is, this
+ /// runs in `O(byteset.len() + haystack.len())` time.
+ ///
+ /// This routine is also guaranteed to have worst case constant space
+ /// complexity.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// assert_eq!(b"foo bar baz,\t".rfind_not_byteset(b",\t"), Some(10));
+ /// assert_eq!(b"foo baz bar".rfind_not_byteset(b"rabz "), Some(2));
+ /// assert_eq!(None, b"foo baz bar".rfind_not_byteset(b"barfoz "));
+ /// ```
+ #[inline]
+ fn rfind_not_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {
+ byteset::rfind_not(self.as_bytes(), byteset.as_ref())
+ }
+
+ /// Returns an iterator over the fields in a byte string, separated by
+ /// contiguous whitespace.
+ ///
+ /// # Example
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B(" foo\tbar\t\u{2003}\nquux \n");
+ /// let fields: Vec<&[u8]> = s.fields().collect();
+ /// assert_eq!(fields, vec![B("foo"), B("bar"), B("quux")]);
+ /// ```
+ ///
+ /// A byte string consisting of just whitespace yields no elements:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// assert_eq!(0, B(" \n\t\u{2003}\n \t").fields().count());
+ /// ```
+ #[inline]
+ fn fields(&self) -> Fields<'_> {
+ Fields::new(self.as_bytes())
+ }
+
+ /// Returns an iterator over the fields in a byte string, separated by
+ /// contiguous codepoints satisfying the given predicate.
+ ///
+ /// If this byte string is not valid UTF-8, then the given closure will
+ /// be called with a Unicode replacement codepoint when invalid UTF-8
+ /// bytes are seen.
+ ///
+ /// # Example
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = b"123foo999999bar1quux123456";
+ /// let fields: Vec<&[u8]> = s.fields_with(|c| c.is_numeric()).collect();
+ /// assert_eq!(fields, vec![B("foo"), B("bar"), B("quux")]);
+ /// ```
+ ///
+ /// A byte string consisting of all codepoints satisfying the predicate
+ /// yields no elements:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// assert_eq!(0, b"1911354563".fields_with(|c| c.is_numeric()).count());
+ /// ```
+ #[inline]
+ fn fields_with<F: FnMut(char) -> bool>(&self, f: F) -> FieldsWith<'_, F> {
+ FieldsWith::new(self.as_bytes(), f)
+ }
+
+ /// Returns an iterator over substrings of this byte string, separated
+ /// by the given byte string. Each element yielded is guaranteed not to
+ /// include the splitter substring.
+ ///
+ /// The splitter may be any type that can be cheaply converted into a
+ /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let x: Vec<&[u8]> = b"Mary had a little lamb".split_str(" ").collect();
+ /// assert_eq!(x, vec![
+ /// B("Mary"), B("had"), B("a"), B("little"), B("lamb"),
+ /// ]);
+ ///
+ /// let x: Vec<&[u8]> = b"".split_str("X").collect();
+ /// assert_eq!(x, vec![b""]);
+ ///
+ /// let x: Vec<&[u8]> = b"lionXXtigerXleopard".split_str("X").collect();
+ /// assert_eq!(x, vec![B("lion"), B(""), B("tiger"), B("leopard")]);
+ ///
+ /// let x: Vec<&[u8]> = b"lion::tiger::leopard".split_str("::").collect();
+ /// assert_eq!(x, vec![B("lion"), B("tiger"), B("leopard")]);
+ /// ```
+ ///
+ /// If a string contains multiple contiguous separators, you will end up
+ /// with empty strings yielded by the iterator:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let x: Vec<&[u8]> = b"||||a||b|c".split_str("|").collect();
+ /// assert_eq!(x, vec![
+ /// B(""), B(""), B(""), B(""), B("a"), B(""), B("b"), B("c"),
+ /// ]);
+ ///
+ /// let x: Vec<&[u8]> = b"(///)".split_str("/").collect();
+ /// assert_eq!(x, vec![B("("), B(""), B(""), B(")")]);
+ /// ```
+ ///
+ /// Separators at the start or end of a string are neighbored by empty
+ /// strings.
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let x: Vec<&[u8]> = b"010".split_str("0").collect();
+ /// assert_eq!(x, vec![B(""), B("1"), B("")]);
+ /// ```
+ ///
+ /// When the empty string is used as a separator, it splits every **byte**
+ /// in the byte string, along with the beginning and end of the byte
+ /// string.
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let x: Vec<&[u8]> = b"rust".split_str("").collect();
+ /// assert_eq!(x, vec![
+ /// B(""), B("r"), B("u"), B("s"), B("t"), B(""),
+ /// ]);
+ ///
+ /// // Splitting by an empty string is not UTF-8 aware. Elements yielded
+ /// // may not be valid UTF-8!
+ /// let x: Vec<&[u8]> = B("☃").split_str("").collect();
+ /// assert_eq!(x, vec![
+ /// B(""), B(b"\xE2"), B(b"\x98"), B(b"\x83"), B(""),
+ /// ]);
+ /// ```
+ ///
+ /// Contiguous separators, especially whitespace, can lead to possibly
+ /// surprising behavior. For example, this code is correct:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let x: Vec<&[u8]> = b" a b c".split_str(" ").collect();
+ /// assert_eq!(x, vec![
+ /// B(""), B(""), B(""), B(""), B("a"), B(""), B("b"), B("c"),
+ /// ]);
+ /// ```
+ ///
+ /// It does *not* give you `["a", "b", "c"]`. For that behavior, use
+ /// [`fields`](#method.fields) instead.
+ #[inline]
+ fn split_str<'a, B: ?Sized + AsRef<[u8]>>(
+ &'a self,
+ splitter: &'a B,
+ ) -> Split<'a> {
+ Split::new(self.as_bytes(), splitter.as_ref())
+ }
+
+ /// Returns an iterator over substrings of this byte string, separated by
+ /// the given byte string, in reverse. Each element yielded is guaranteed
+ /// not to include the splitter substring.
+ ///
+ /// The splitter may be any type that can be cheaply converted into a
+ /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let x: Vec<&[u8]> =
+ /// b"Mary had a little lamb".rsplit_str(" ").collect();
+ /// assert_eq!(x, vec![
+ /// B("lamb"), B("little"), B("a"), B("had"), B("Mary"),
+ /// ]);
+ ///
+ /// let x: Vec<&[u8]> = b"".rsplit_str("X").collect();
+ /// assert_eq!(x, vec![b""]);
+ ///
+ /// let x: Vec<&[u8]> = b"lionXXtigerXleopard".rsplit_str("X").collect();
+ /// assert_eq!(x, vec![B("leopard"), B("tiger"), B(""), B("lion")]);
+ ///
+ /// let x: Vec<&[u8]> = b"lion::tiger::leopard".rsplit_str("::").collect();
+ /// assert_eq!(x, vec![B("leopard"), B("tiger"), B("lion")]);
+ /// ```
+ ///
+ /// If a string contains multiple contiguous separators, you will end up
+ /// with empty strings yielded by the iterator:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let x: Vec<&[u8]> = b"||||a||b|c".rsplit_str("|").collect();
+ /// assert_eq!(x, vec![
+ /// B("c"), B("b"), B(""), B("a"), B(""), B(""), B(""), B(""),
+ /// ]);
+ ///
+ /// let x: Vec<&[u8]> = b"(///)".rsplit_str("/").collect();
+ /// assert_eq!(x, vec![B(")"), B(""), B(""), B("(")]);
+ /// ```
+ ///
+ /// Separators at the start or end of a string are neighbored by empty
+ /// strings.
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let x: Vec<&[u8]> = b"010".rsplit_str("0").collect();
+ /// assert_eq!(x, vec![B(""), B("1"), B("")]);
+ /// ```
+ ///
+ /// When the empty string is used as a separator, it splits every **byte**
+ /// in the byte string, along with the beginning and end of the byte
+ /// string.
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let x: Vec<&[u8]> = b"rust".rsplit_str("").collect();
+ /// assert_eq!(x, vec![
+ /// B(""), B("t"), B("s"), B("u"), B("r"), B(""),
+ /// ]);
+ ///
+ /// // Splitting by an empty string is not UTF-8 aware. Elements yielded
+ /// // may not be valid UTF-8!
+ /// let x: Vec<&[u8]> = B("☃").rsplit_str("").collect();
+ /// assert_eq!(x, vec![B(""), B(b"\x83"), B(b"\x98"), B(b"\xE2"), B("")]);
+ /// ```
+ ///
+ /// Contiguous separators, especially whitespace, can lead to possibly
+ /// surprising behavior. For example, this code is correct:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let x: Vec<&[u8]> = b" a b c".rsplit_str(" ").collect();
+ /// assert_eq!(x, vec![
+ /// B("c"), B("b"), B(""), B("a"), B(""), B(""), B(""), B(""),
+ /// ]);
+ /// ```
+ ///
+ /// It does *not* give you `["a", "b", "c"]`.
+ #[inline]
+ fn rsplit_str<'a, B: ?Sized + AsRef<[u8]>>(
+ &'a self,
+ splitter: &'a B,
+ ) -> SplitReverse<'a> {
+ SplitReverse::new(self.as_bytes(), splitter.as_ref())
+ }
+
+ /// Returns an iterator of at most `limit` substrings of this byte string,
+ /// separated by the given byte string. If `limit` substrings are yielded,
+ /// then the last substring will contain the remainder of this byte string.
+ ///
+ /// The needle may be any type that can be cheaply converted into a
+ /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let x: Vec<_> = b"Mary had a little lamb".splitn_str(3, " ").collect();
+ /// assert_eq!(x, vec![B("Mary"), B("had"), B("a little lamb")]);
+ ///
+ /// let x: Vec<_> = b"".splitn_str(3, "X").collect();
+ /// assert_eq!(x, vec![b""]);
+ ///
+ /// let x: Vec<_> = b"lionXXtigerXleopard".splitn_str(3, "X").collect();
+ /// assert_eq!(x, vec![B("lion"), B(""), B("tigerXleopard")]);
+ ///
+ /// let x: Vec<_> = b"lion::tiger::leopard".splitn_str(2, "::").collect();
+ /// assert_eq!(x, vec![B("lion"), B("tiger::leopard")]);
+ ///
+ /// let x: Vec<_> = b"abcXdef".splitn_str(1, "X").collect();
+ /// assert_eq!(x, vec![B("abcXdef")]);
+ ///
+ /// let x: Vec<_> = b"abcdef".splitn_str(2, "X").collect();
+ /// assert_eq!(x, vec![B("abcdef")]);
+ ///
+ /// let x: Vec<_> = b"abcXdef".splitn_str(0, "X").collect();
+ /// assert!(x.is_empty());
+ /// ```
+ #[inline]
+ fn splitn_str<'a, B: ?Sized + AsRef<[u8]>>(
+ &'a self,
+ limit: usize,
+ splitter: &'a B,
+ ) -> SplitN<'a> {
+ SplitN::new(self.as_bytes(), splitter.as_ref(), limit)
+ }
+
+ /// Returns an iterator of at most `limit` substrings of this byte string,
+ /// separated by the given byte string, in reverse. If `limit` substrings
+ /// are yielded, then the last substring will contain the remainder of this
+ /// byte string.
+ ///
+ /// The needle may be any type that can be cheaply converted into a
+ /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let x: Vec<_> =
+ /// b"Mary had a little lamb".rsplitn_str(3, " ").collect();
+ /// assert_eq!(x, vec![B("lamb"), B("little"), B("Mary had a")]);
+ ///
+ /// let x: Vec<_> = b"".rsplitn_str(3, "X").collect();
+ /// assert_eq!(x, vec![b""]);
+ ///
+ /// let x: Vec<_> = b"lionXXtigerXleopard".rsplitn_str(3, "X").collect();
+ /// assert_eq!(x, vec![B("leopard"), B("tiger"), B("lionX")]);
+ ///
+ /// let x: Vec<_> = b"lion::tiger::leopard".rsplitn_str(2, "::").collect();
+ /// assert_eq!(x, vec![B("leopard"), B("lion::tiger")]);
+ ///
+ /// let x: Vec<_> = b"abcXdef".rsplitn_str(1, "X").collect();
+ /// assert_eq!(x, vec![B("abcXdef")]);
+ ///
+ /// let x: Vec<_> = b"abcdef".rsplitn_str(2, "X").collect();
+ /// assert_eq!(x, vec![B("abcdef")]);
+ ///
+ /// let x: Vec<_> = b"abcXdef".rsplitn_str(0, "X").collect();
+ /// assert!(x.is_empty());
+ /// ```
+ #[inline]
+ fn rsplitn_str<'a, B: ?Sized + AsRef<[u8]>>(
+ &'a self,
+ limit: usize,
+ splitter: &'a B,
+ ) -> SplitNReverse<'a> {
+ SplitNReverse::new(self.as_bytes(), splitter.as_ref(), limit)
+ }
+
+ /// Replace all matches of the given needle with the given replacement, and
+ /// the result as a new `Vec<u8>`.
+ ///
+ /// This routine is useful as a convenience. If you need to reuse an
+ /// allocation, use [`replace_into`](#method.replace_into) instead.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"this is old".replace("old", "new");
+ /// assert_eq!(s, "this is new".as_bytes());
+ /// ```
+ ///
+ /// When the pattern doesn't match:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"this is old".replace("nada nada", "limonada");
+ /// assert_eq!(s, "this is old".as_bytes());
+ /// ```
+ ///
+ /// When the needle is an empty string:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"foo".replace("", "Z");
+ /// assert_eq!(s, "ZfZoZoZ".as_bytes());
+ /// ```
+ #[cfg(feature = "std")]
+ #[inline]
+ fn replace<N: AsRef<[u8]>, R: AsRef<[u8]>>(
+ &self,
+ needle: N,
+ replacement: R,
+ ) -> Vec<u8> {
+ let mut dest = Vec::with_capacity(self.as_bytes().len());
+ self.replace_into(needle, replacement, &mut dest);
+ dest
+ }
+
+ /// Replace up to `limit` matches of the given needle with the given
+ /// replacement, and the result as a new `Vec<u8>`.
+ ///
+ /// This routine is useful as a convenience. If you need to reuse an
+ /// allocation, use [`replacen_into`](#method.replacen_into) instead.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"foofoo".replacen("o", "z", 2);
+ /// assert_eq!(s, "fzzfoo".as_bytes());
+ /// ```
+ ///
+ /// When the pattern doesn't match:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"foofoo".replacen("a", "z", 2);
+ /// assert_eq!(s, "foofoo".as_bytes());
+ /// ```
+ ///
+ /// When the needle is an empty string:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"foo".replacen("", "Z", 2);
+ /// assert_eq!(s, "ZfZoo".as_bytes());
+ /// ```
+ #[cfg(feature = "std")]
+ #[inline]
+ fn replacen<N: AsRef<[u8]>, R: AsRef<[u8]>>(
+ &self,
+ needle: N,
+ replacement: R,
+ limit: usize,
+ ) -> Vec<u8> {
+ let mut dest = Vec::with_capacity(self.as_bytes().len());
+ self.replacen_into(needle, replacement, limit, &mut dest);
+ dest
+ }
+
+ /// Replace all matches of the given needle with the given replacement,
+ /// and write the result into the provided `Vec<u8>`.
+ ///
+ /// This does **not** clear `dest` before writing to it.
+ ///
+ /// This routine is useful for reusing allocation. For a more convenient
+ /// API, use [`replace`](#method.replace) instead.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"this is old";
+ ///
+ /// let mut dest = vec![];
+ /// s.replace_into("old", "new", &mut dest);
+ /// assert_eq!(dest, "this is new".as_bytes());
+ /// ```
+ ///
+ /// When the pattern doesn't match:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"this is old";
+ ///
+ /// let mut dest = vec![];
+ /// s.replace_into("nada nada", "limonada", &mut dest);
+ /// assert_eq!(dest, "this is old".as_bytes());
+ /// ```
+ ///
+ /// When the needle is an empty string:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"foo";
+ ///
+ /// let mut dest = vec![];
+ /// s.replace_into("", "Z", &mut dest);
+ /// assert_eq!(dest, "ZfZoZoZ".as_bytes());
+ /// ```
+ #[cfg(feature = "std")]
+ #[inline]
+ fn replace_into<N: AsRef<[u8]>, R: AsRef<[u8]>>(
+ &self,
+ needle: N,
+ replacement: R,
+ dest: &mut Vec<u8>,
+ ) {
+ let (needle, replacement) = (needle.as_ref(), replacement.as_ref());
+
+ let mut last = 0;
+ for start in self.find_iter(needle) {
+ dest.push_str(&self.as_bytes()[last..start]);
+ dest.push_str(replacement);
+ last = start + needle.len();
+ }
+ dest.push_str(&self.as_bytes()[last..]);
+ }
+
+ /// Replace up to `limit` matches of the given needle with the given
+ /// replacement, and write the result into the provided `Vec<u8>`.
+ ///
+ /// This does **not** clear `dest` before writing to it.
+ ///
+ /// This routine is useful for reusing allocation. For a more convenient
+ /// API, use [`replacen`](#method.replacen) instead.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"foofoo";
+ ///
+ /// let mut dest = vec![];
+ /// s.replacen_into("o", "z", 2, &mut dest);
+ /// assert_eq!(dest, "fzzfoo".as_bytes());
+ /// ```
+ ///
+ /// When the pattern doesn't match:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"foofoo";
+ ///
+ /// let mut dest = vec![];
+ /// s.replacen_into("a", "z", 2, &mut dest);
+ /// assert_eq!(dest, "foofoo".as_bytes());
+ /// ```
+ ///
+ /// When the needle is an empty string:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"foo";
+ ///
+ /// let mut dest = vec![];
+ /// s.replacen_into("", "Z", 2, &mut dest);
+ /// assert_eq!(dest, "ZfZoo".as_bytes());
+ /// ```
+ #[cfg(feature = "std")]
+ #[inline]
+ fn replacen_into<N: AsRef<[u8]>, R: AsRef<[u8]>>(
+ &self,
+ needle: N,
+ replacement: R,
+ limit: usize,
+ dest: &mut Vec<u8>,
+ ) {
+ let (needle, replacement) = (needle.as_ref(), replacement.as_ref());
+
+ let mut last = 0;
+ for start in self.find_iter(needle).take(limit) {
+ dest.push_str(&self.as_bytes()[last..start]);
+ dest.push_str(replacement);
+ last = start + needle.len();
+ }
+ dest.push_str(&self.as_bytes()[last..]);
+ }
+
+ /// Returns an iterator over the bytes in this byte string.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let bs = b"foobar";
+ /// let bytes: Vec<u8> = bs.bytes().collect();
+ /// assert_eq!(bytes, bs);
+ /// ```
+ #[inline]
+ fn bytes(&self) -> Bytes<'_> {
+ Bytes { it: self.as_bytes().iter() }
+ }
+
+ /// Returns an iterator over the Unicode scalar values in this byte string.
+ /// If invalid UTF-8 is encountered, then the Unicode replacement codepoint
+ /// is yielded instead.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let bs = b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61";
+ /// let chars: Vec<char> = bs.chars().collect();
+ /// assert_eq!(vec!['☃', '\u{FFFD}', '𝞃', '\u{FFFD}', 'a'], chars);
+ /// ```
+ ///
+ /// Codepoints can also be iterated over in reverse:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let bs = b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61";
+ /// let chars: Vec<char> = bs.chars().rev().collect();
+ /// assert_eq!(vec!['a', '\u{FFFD}', '𝞃', '\u{FFFD}', '☃'], chars);
+ /// ```
+ #[inline]
+ fn chars(&self) -> Chars<'_> {
+ Chars::new(self.as_bytes())
+ }
+
+ /// Returns an iterator over the Unicode scalar values in this byte string
+ /// along with their starting and ending byte index positions. If invalid
+ /// UTF-8 is encountered, then the Unicode replacement codepoint is yielded
+ /// instead.
+ ///
+ /// Note that this is slightly different from the `CharIndices` iterator
+ /// provided by the standard library. Aside from working on possibly
+ /// invalid UTF-8, this iterator provides both the corresponding starting
+ /// and ending byte indices of each codepoint yielded. The ending position
+ /// is necessary to slice the original byte string when invalid UTF-8 bytes
+ /// are converted into a Unicode replacement codepoint, since a single
+ /// replacement codepoint can substitute anywhere from 1 to 3 invalid bytes
+ /// (inclusive).
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let bs = b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61";
+ /// let chars: Vec<(usize, usize, char)> = bs.char_indices().collect();
+ /// assert_eq!(chars, vec![
+ /// (0, 3, '☃'),
+ /// (3, 4, '\u{FFFD}'),
+ /// (4, 8, '𝞃'),
+ /// (8, 10, '\u{FFFD}'),
+ /// (10, 11, 'a'),
+ /// ]);
+ /// ```
+ ///
+ /// Codepoints can also be iterated over in reverse:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let bs = b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61";
+ /// let chars: Vec<(usize, usize, char)> = bs
+ /// .char_indices()
+ /// .rev()
+ /// .collect();
+ /// assert_eq!(chars, vec![
+ /// (10, 11, 'a'),
+ /// (8, 10, '\u{FFFD}'),
+ /// (4, 8, '𝞃'),
+ /// (3, 4, '\u{FFFD}'),
+ /// (0, 3, '☃'),
+ /// ]);
+ /// ```
+ #[inline]
+ fn char_indices(&self) -> CharIndices<'_> {
+ CharIndices::new(self.as_bytes())
+ }
+
+ /// Iterate over chunks of valid UTF-8.
+ ///
+ /// The iterator returned yields chunks of valid UTF-8 separated by invalid
+ /// UTF-8 bytes, if they exist. Invalid UTF-8 bytes are always 1-3 bytes,
+ /// which are determined via the "substitution of maximal subparts"
+ /// strategy described in the docs for the
+ /// [`ByteSlice::to_str_lossy`](trait.ByteSlice.html#method.to_str_lossy)
+ /// method.
+ ///
+ /// # Examples
+ ///
+ /// This example shows how to gather all valid and invalid chunks from a
+ /// byte slice:
+ ///
+ /// ```
+ /// use bstr::{ByteSlice, Utf8Chunk};
+ ///
+ /// let bytes = b"foo\xFD\xFEbar\xFF";
+ ///
+ /// let (mut valid_chunks, mut invalid_chunks) = (vec![], vec![]);
+ /// for chunk in bytes.utf8_chunks() {
+ /// if !chunk.valid().is_empty() {
+ /// valid_chunks.push(chunk.valid());
+ /// }
+ /// if !chunk.invalid().is_empty() {
+ /// invalid_chunks.push(chunk.invalid());
+ /// }
+ /// }
+ ///
+ /// assert_eq!(valid_chunks, vec!["foo", "bar"]);
+ /// assert_eq!(invalid_chunks, vec![b"\xFD", b"\xFE", b"\xFF"]);
+ /// ```
+ #[inline]
+ fn utf8_chunks(&self) -> Utf8Chunks<'_> {
+ Utf8Chunks { bytes: self.as_bytes() }
+ }
+
+ /// Returns an iterator over the grapheme clusters in this byte string.
+ /// If invalid UTF-8 is encountered, then the Unicode replacement codepoint
+ /// is yielded instead.
+ ///
+ /// # Examples
+ ///
+ /// This example shows how multiple codepoints can combine to form a
+ /// single grapheme cluster:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let bs = "a\u{0300}\u{0316}\u{1F1FA}\u{1F1F8}".as_bytes();
+ /// let graphemes: Vec<&str> = bs.graphemes().collect();
+ /// assert_eq!(vec!["à̖", "🇺🇸"], graphemes);
+ /// ```
+ ///
+ /// This shows that graphemes can be iterated over in reverse:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let bs = "a\u{0300}\u{0316}\u{1F1FA}\u{1F1F8}".as_bytes();
+ /// let graphemes: Vec<&str> = bs.graphemes().rev().collect();
+ /// assert_eq!(vec!["🇺🇸", "à̖"], graphemes);
+ /// ```
+ #[cfg(feature = "unicode")]
+ #[inline]
+ fn graphemes(&self) -> Graphemes<'_> {
+ Graphemes::new(self.as_bytes())
+ }
+
+ /// Returns an iterator over the grapheme clusters in this byte string
+ /// along with their starting and ending byte index positions. If invalid
+ /// UTF-8 is encountered, then the Unicode replacement codepoint is yielded
+ /// instead.
+ ///
+ /// # Examples
+ ///
+ /// This example shows how to get the byte offsets of each individual
+ /// grapheme cluster:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let bs = "a\u{0300}\u{0316}\u{1F1FA}\u{1F1F8}".as_bytes();
+ /// let graphemes: Vec<(usize, usize, &str)> =
+ /// bs.grapheme_indices().collect();
+ /// assert_eq!(vec![(0, 5, "à̖"), (5, 13, "🇺🇸")], graphemes);
+ /// ```
+ ///
+ /// This example shows what happens when invalid UTF-8 is enountered. Note
+ /// that the offsets are valid indices into the original string, and do
+ /// not necessarily correspond to the length of the `&str` returned!
+ ///
+ /// ```
+ /// use bstr::{ByteSlice, ByteVec};
+ ///
+ /// let mut bytes = vec![];
+ /// bytes.push_str("a\u{0300}\u{0316}");
+ /// bytes.push(b'\xFF');
+ /// bytes.push_str("\u{1F1FA}\u{1F1F8}");
+ ///
+ /// let graphemes: Vec<(usize, usize, &str)> =
+ /// bytes.grapheme_indices().collect();
+ /// assert_eq!(
+ /// graphemes,
+ /// vec![(0, 5, "à̖"), (5, 6, "\u{FFFD}"), (6, 14, "🇺🇸")]
+ /// );
+ /// ```
+ #[cfg(feature = "unicode")]
+ #[inline]
+ fn grapheme_indices(&self) -> GraphemeIndices<'_> {
+ GraphemeIndices::new(self.as_bytes())
+ }
+
+ /// Returns an iterator over the words in this byte string. If invalid
+ /// UTF-8 is encountered, then the Unicode replacement codepoint is yielded
+ /// instead.
+ ///
+ /// This is similar to
+ /// [`words_with_breaks`](trait.ByteSlice.html#method.words_with_breaks),
+ /// except it only returns elements that contain a "word" character. A word
+ /// character is defined by UTS #18 (Annex C) to be the combination of the
+ /// `Alphabetic` and `Join_Control` properties, along with the
+ /// `Decimal_Number`, `Mark` and `Connector_Punctuation` general
+ /// categories.
+ ///
+ /// Since words are made up of one or more codepoints, this iterator
+ /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
+ /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let bs = br#"The quick ("brown") fox can't jump 32.3 feet, right?"#;
+ /// let words: Vec<&str> = bs.words().collect();
+ /// assert_eq!(words, vec![
+ /// "The", "quick", "brown", "fox", "can't",
+ /// "jump", "32.3", "feet", "right",
+ /// ]);
+ /// ```
+ #[cfg(feature = "unicode")]
+ #[inline]
+ fn words(&self) -> Words<'_> {
+ Words::new(self.as_bytes())
+ }
+
+ /// Returns an iterator over the words in this byte string along with
+ /// their starting and ending byte index positions.
+ ///
+ /// This is similar to
+ /// [`words_with_break_indices`](trait.ByteSlice.html#method.words_with_break_indices),
+ /// except it only returns elements that contain a "word" character. A word
+ /// character is defined by UTS #18 (Annex C) to be the combination of the
+ /// `Alphabetic` and `Join_Control` properties, along with the
+ /// `Decimal_Number`, `Mark` and `Connector_Punctuation` general
+ /// categories.
+ ///
+ /// Since words are made up of one or more codepoints, this iterator
+ /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
+ /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
+ ///
+ /// # Examples
+ ///
+ /// This example shows how to get the byte offsets of each individual
+ /// word:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let bs = b"can't jump 32.3 feet";
+ /// let words: Vec<(usize, usize, &str)> = bs.word_indices().collect();
+ /// assert_eq!(words, vec![
+ /// (0, 5, "can't"),
+ /// (6, 10, "jump"),
+ /// (11, 15, "32.3"),
+ /// (16, 20, "feet"),
+ /// ]);
+ /// ```
+ #[cfg(feature = "unicode")]
+ #[inline]
+ fn word_indices(&self) -> WordIndices<'_> {
+ WordIndices::new(self.as_bytes())
+ }
+
+ /// Returns an iterator over the words in this byte string, along with
+ /// all breaks between the words. Concatenating all elements yielded by
+ /// the iterator results in the original string (modulo Unicode replacement
+ /// codepoint substitutions if invalid UTF-8 is encountered).
+ ///
+ /// Since words are made up of one or more codepoints, this iterator
+ /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
+ /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let bs = br#"The quick ("brown") fox can't jump 32.3 feet, right?"#;
+ /// let words: Vec<&str> = bs.words_with_breaks().collect();
+ /// assert_eq!(words, vec![
+ /// "The", " ", "quick", " ", "(", "\"", "brown", "\"", ")",
+ /// " ", "fox", " ", "can't", " ", "jump", " ", "32.3", " ", "feet",
+ /// ",", " ", "right", "?",
+ /// ]);
+ /// ```
+ #[cfg(feature = "unicode")]
+ #[inline]
+ fn words_with_breaks(&self) -> WordsWithBreaks<'_> {
+ WordsWithBreaks::new(self.as_bytes())
+ }
+
+ /// Returns an iterator over the words and their byte offsets in this
+ /// byte string, along with all breaks between the words. Concatenating
+ /// all elements yielded by the iterator results in the original string
+ /// (modulo Unicode replacement codepoint substitutions if invalid UTF-8 is
+ /// encountered).
+ ///
+ /// Since words are made up of one or more codepoints, this iterator
+ /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
+ /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
+ ///
+ /// # Examples
+ ///
+ /// This example shows how to get the byte offsets of each individual
+ /// word:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let bs = b"can't jump 32.3 feet";
+ /// let words: Vec<(usize, usize, &str)> =
+ /// bs.words_with_break_indices().collect();
+ /// assert_eq!(words, vec![
+ /// (0, 5, "can't"),
+ /// (5, 6, " "),
+ /// (6, 10, "jump"),
+ /// (10, 11, " "),
+ /// (11, 15, "32.3"),
+ /// (15, 16, " "),
+ /// (16, 20, "feet"),
+ /// ]);
+ /// ```
+ #[cfg(feature = "unicode")]
+ #[inline]
+ fn words_with_break_indices(&self) -> WordsWithBreakIndices<'_> {
+ WordsWithBreakIndices::new(self.as_bytes())
+ }
+
+ /// Returns an iterator over the sentences in this byte string.
+ ///
+ /// Typically, a sentence will include its trailing punctuation and
+ /// whitespace. Concatenating all elements yielded by the iterator
+ /// results in the original string (modulo Unicode replacement codepoint
+ /// substitutions if invalid UTF-8 is encountered).
+ ///
+ /// Since sentences are made up of one or more codepoints, this iterator
+ /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
+ /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let bs = b"I want this. Not that. Right now.";
+ /// let sentences: Vec<&str> = bs.sentences().collect();
+ /// assert_eq!(sentences, vec![
+ /// "I want this. ",
+ /// "Not that. ",
+ /// "Right now.",
+ /// ]);
+ /// ```
+ #[cfg(feature = "unicode")]
+ #[inline]
+ fn sentences(&self) -> Sentences<'_> {
+ Sentences::new(self.as_bytes())
+ }
+
+ /// Returns an iterator over the sentences in this byte string along with
+ /// their starting and ending byte index positions.
+ ///
+ /// Typically, a sentence will include its trailing punctuation and
+ /// whitespace. Concatenating all elements yielded by the iterator
+ /// results in the original string (modulo Unicode replacement codepoint
+ /// substitutions if invalid UTF-8 is encountered).
+ ///
+ /// Since sentences are made up of one or more codepoints, this iterator
+ /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
+ /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let bs = b"I want this. Not that. Right now.";
+ /// let sentences: Vec<(usize, usize, &str)> =
+ /// bs.sentence_indices().collect();
+ /// assert_eq!(sentences, vec![
+ /// (0, 13, "I want this. "),
+ /// (13, 23, "Not that. "),
+ /// (23, 33, "Right now."),
+ /// ]);
+ /// ```
+ #[cfg(feature = "unicode")]
+ #[inline]
+ fn sentence_indices(&self) -> SentenceIndices<'_> {
+ SentenceIndices::new(self.as_bytes())
+ }
+
+ /// An iterator over all lines in a byte string, without their
+ /// terminators.
+ ///
+ /// For this iterator, the only line terminators recognized are `\r\n` and
+ /// `\n`.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = b"\
+ /// foo
+ ///
+ /// bar\r
+ /// baz
+ ///
+ ///
+ /// quux";
+ /// let lines: Vec<&[u8]> = s.lines().collect();
+ /// assert_eq!(lines, vec![
+ /// B("foo"), B(""), B("bar"), B("baz"), B(""), B(""), B("quux"),
+ /// ]);
+ /// ```
+ #[inline]
+ fn lines(&self) -> Lines<'_> {
+ Lines::new(self.as_bytes())
+ }
+
+ /// An iterator over all lines in a byte string, including their
+ /// terminators.
+ ///
+ /// For this iterator, the only line terminator recognized is `\n`. (Since
+ /// line terminators are included, this also handles `\r\n` line endings.)
+ ///
+ /// Line terminators are only included if they are present in the original
+ /// byte string. For example, the last line in a byte string may not end
+ /// with a line terminator.
+ ///
+ /// Concatenating all elements yielded by this iterator is guaranteed to
+ /// yield the original byte string.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = b"\
+ /// foo
+ ///
+ /// bar\r
+ /// baz
+ ///
+ ///
+ /// quux";
+ /// let lines: Vec<&[u8]> = s.lines_with_terminator().collect();
+ /// assert_eq!(lines, vec![
+ /// B("foo\n"),
+ /// B("\n"),
+ /// B("bar\r\n"),
+ /// B("baz\n"),
+ /// B("\n"),
+ /// B("\n"),
+ /// B("quux"),
+ /// ]);
+ /// ```
+ #[inline]
+ fn lines_with_terminator(&self) -> LinesWithTerminator<'_> {
+ LinesWithTerminator::new(self.as_bytes())
+ }
+
+ /// Return a byte string slice with leading and trailing whitespace
+ /// removed.
+ ///
+ /// Whitespace is defined according to the terms of the `White_Space`
+ /// Unicode property.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B(" foo\tbar\t\u{2003}\n");
+ /// assert_eq!(s.trim(), B("foo\tbar"));
+ /// ```
+ #[cfg(feature = "unicode")]
+ #[inline]
+ fn trim(&self) -> &[u8] {
+ self.trim_start().trim_end()
+ }
+
+ /// Return a byte string slice with leading whitespace removed.
+ ///
+ /// Whitespace is defined according to the terms of the `White_Space`
+ /// Unicode property.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B(" foo\tbar\t\u{2003}\n");
+ /// assert_eq!(s.trim_start(), B("foo\tbar\t\u{2003}\n"));
+ /// ```
+ #[cfg(feature = "unicode")]
+ #[inline]
+ fn trim_start(&self) -> &[u8] {
+ let start = whitespace_len_fwd(self.as_bytes());
+ &self.as_bytes()[start..]
+ }
+
+ /// Return a byte string slice with trailing whitespace removed.
+ ///
+ /// Whitespace is defined according to the terms of the `White_Space`
+ /// Unicode property.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B(" foo\tbar\t\u{2003}\n");
+ /// assert_eq!(s.trim_end(), B(" foo\tbar"));
+ /// ```
+ #[cfg(feature = "unicode")]
+ #[inline]
+ fn trim_end(&self) -> &[u8] {
+ let end = whitespace_len_rev(self.as_bytes());
+ &self.as_bytes()[..end]
+ }
+
+ /// Return a byte string slice with leading and trailing characters
+ /// satisfying the given predicate removed.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = b"123foo5bar789";
+ /// assert_eq!(s.trim_with(|c| c.is_numeric()), B("foo5bar"));
+ /// ```
+ #[inline]
+ fn trim_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8] {
+ self.trim_start_with(&mut trim).trim_end_with(&mut trim)
+ }
+
+ /// Return a byte string slice with leading characters satisfying the given
+ /// predicate removed.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = b"123foo5bar789";
+ /// assert_eq!(s.trim_start_with(|c| c.is_numeric()), B("foo5bar789"));
+ /// ```
+ #[inline]
+ fn trim_start_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8] {
+ for (s, _, ch) in self.char_indices() {
+ if !trim(ch) {
+ return &self.as_bytes()[s..];
+ }
+ }
+ b""
+ }
+
+ /// Return a byte string slice with trailing characters satisfying the
+ /// given predicate removed.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = b"123foo5bar789";
+ /// assert_eq!(s.trim_end_with(|c| c.is_numeric()), B("123foo5bar"));
+ /// ```
+ #[inline]
+ fn trim_end_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8] {
+ for (_, e, ch) in self.char_indices().rev() {
+ if !trim(ch) {
+ return &self.as_bytes()[..e];
+ }
+ }
+ b""
+ }
+
+ /// Returns a new `Vec<u8>` containing the lowercase equivalent of this
+ /// byte string.
+ ///
+ /// In this case, lowercase is defined according to the `Lowercase` Unicode
+ /// property.
+ ///
+ /// If invalid UTF-8 is seen, or if a character has no lowercase variant,
+ /// then it is written to the given buffer unchanged.
+ ///
+ /// Note that some characters in this byte string may expand into multiple
+ /// characters when changing the case, so the number of bytes written to
+ /// the given byte string may not be equivalent to the number of bytes in
+ /// this byte string.
+ ///
+ /// If you'd like to reuse an allocation for performance reasons, then use
+ /// [`to_lowercase_into`](#method.to_lowercase_into) instead.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B("HELLO Β");
+ /// assert_eq!("hello β".as_bytes(), s.to_lowercase().as_bytes());
+ /// ```
+ ///
+ /// Scripts without case are not changed:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B("农历新年");
+ /// assert_eq!("农历新年".as_bytes(), s.to_lowercase().as_bytes());
+ /// ```
+ ///
+ /// Invalid UTF-8 remains as is:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B(b"FOO\xFFBAR\xE2\x98BAZ");
+ /// assert_eq!(B(b"foo\xFFbar\xE2\x98baz"), s.to_lowercase().as_bytes());
+ /// ```
+ #[cfg(all(feature = "std", feature = "unicode"))]
+ #[inline]
+ fn to_lowercase(&self) -> Vec<u8> {
+ let mut buf = vec![];
+ self.to_lowercase_into(&mut buf);
+ buf
+ }
+
+ /// Writes the lowercase equivalent of this byte string into the given
+ /// buffer. The buffer is not cleared before written to.
+ ///
+ /// In this case, lowercase is defined according to the `Lowercase`
+ /// Unicode property.
+ ///
+ /// If invalid UTF-8 is seen, or if a character has no lowercase variant,
+ /// then it is written to the given buffer unchanged.
+ ///
+ /// Note that some characters in this byte string may expand into multiple
+ /// characters when changing the case, so the number of bytes written to
+ /// the given byte string may not be equivalent to the number of bytes in
+ /// this byte string.
+ ///
+ /// If you don't need to amortize allocation and instead prefer
+ /// convenience, then use [`to_lowercase`](#method.to_lowercase) instead.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B("HELLO Β");
+ ///
+ /// let mut buf = vec![];
+ /// s.to_lowercase_into(&mut buf);
+ /// assert_eq!("hello β".as_bytes(), buf.as_bytes());
+ /// ```
+ ///
+ /// Scripts without case are not changed:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B("农历新年");
+ ///
+ /// let mut buf = vec![];
+ /// s.to_lowercase_into(&mut buf);
+ /// assert_eq!("农历新年".as_bytes(), buf.as_bytes());
+ /// ```
+ ///
+ /// Invalid UTF-8 remains as is:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B(b"FOO\xFFBAR\xE2\x98BAZ");
+ ///
+ /// let mut buf = vec![];
+ /// s.to_lowercase_into(&mut buf);
+ /// assert_eq!(B(b"foo\xFFbar\xE2\x98baz"), buf.as_bytes());
+ /// ```
+ #[cfg(all(feature = "std", feature = "unicode"))]
+ #[inline]
+ fn to_lowercase_into(&self, buf: &mut Vec<u8>) {
+ // TODO: This is the best we can do given what std exposes I think.
+ // If we roll our own case handling, then we might be able to do this
+ // a bit faster. We shouldn't roll our own case handling unless we
+ // need to, e.g., for doing caseless matching or case folding.
+
+ // TODO(BUG): This doesn't handle any special casing rules.
+
+ buf.reserve(self.as_bytes().len());
+ for (s, e, ch) in self.char_indices() {
+ if ch == '\u{FFFD}' {
+ buf.push_str(&self.as_bytes()[s..e]);
+ } else if ch.is_ascii() {
+ buf.push_char(ch.to_ascii_lowercase());
+ } else {
+ for upper in ch.to_lowercase() {
+ buf.push_char(upper);
+ }
+ }
+ }
+ }
+
+ /// Returns a new `Vec<u8>` containing the ASCII lowercase equivalent of
+ /// this byte string.
+ ///
+ /// In this case, lowercase is only defined in ASCII letters. Namely, the
+ /// letters `A-Z` are converted to `a-z`. All other bytes remain unchanged.
+ /// In particular, the length of the byte string returned is always
+ /// equivalent to the length of this byte string.
+ ///
+ /// If you'd like to reuse an allocation for performance reasons, then use
+ /// [`make_ascii_lowercase`](#method.make_ascii_lowercase) to perform
+ /// the conversion in place.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B("HELLO Β");
+ /// assert_eq!("hello Β".as_bytes(), s.to_ascii_lowercase().as_bytes());
+ /// ```
+ ///
+ /// Invalid UTF-8 remains as is:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B(b"FOO\xFFBAR\xE2\x98BAZ");
+ /// assert_eq!(s.to_ascii_lowercase(), B(b"foo\xFFbar\xE2\x98baz"));
+ /// ```
+ #[cfg(feature = "std")]
+ #[inline]
+ fn to_ascii_lowercase(&self) -> Vec<u8> {
+ self.as_bytes().to_ascii_lowercase()
+ }
+
+ /// Convert this byte string to its lowercase ASCII equivalent in place.
+ ///
+ /// In this case, lowercase is only defined in ASCII letters. Namely, the
+ /// letters `A-Z` are converted to `a-z`. All other bytes remain unchanged.
+ ///
+ /// If you don't need to do the conversion in
+ /// place and instead prefer convenience, then use
+ /// [`to_ascii_lowercase`](#method.to_ascii_lowercase) instead.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut s = <Vec<u8>>::from("HELLO Β");
+ /// s.make_ascii_lowercase();
+ /// assert_eq!(s, "hello Β".as_bytes());
+ /// ```
+ ///
+ /// Invalid UTF-8 remains as is:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice, ByteVec};
+ ///
+ /// let mut s = <Vec<u8>>::from_slice(b"FOO\xFFBAR\xE2\x98BAZ");
+ /// s.make_ascii_lowercase();
+ /// assert_eq!(s, B(b"foo\xFFbar\xE2\x98baz"));
+ /// ```
+ #[inline]
+ fn make_ascii_lowercase(&mut self) {
+ self.as_bytes_mut().make_ascii_lowercase();
+ }
+
+ /// Returns a new `Vec<u8>` containing the uppercase equivalent of this
+ /// byte string.
+ ///
+ /// In this case, uppercase is defined according to the `Uppercase`
+ /// Unicode property.
+ ///
+ /// If invalid UTF-8 is seen, or if a character has no uppercase variant,
+ /// then it is written to the given buffer unchanged.
+ ///
+ /// Note that some characters in this byte string may expand into multiple
+ /// characters when changing the case, so the number of bytes written to
+ /// the given byte string may not be equivalent to the number of bytes in
+ /// this byte string.
+ ///
+ /// If you'd like to reuse an allocation for performance reasons, then use
+ /// [`to_uppercase_into`](#method.to_uppercase_into) instead.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B("hello β");
+ /// assert_eq!(s.to_uppercase(), B("HELLO Β"));
+ /// ```
+ ///
+ /// Scripts without case are not changed:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B("农历新年");
+ /// assert_eq!(s.to_uppercase(), B("农历新年"));
+ /// ```
+ ///
+ /// Invalid UTF-8 remains as is:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B(b"foo\xFFbar\xE2\x98baz");
+ /// assert_eq!(s.to_uppercase(), B(b"FOO\xFFBAR\xE2\x98BAZ"));
+ /// ```
+ #[cfg(all(feature = "std", feature = "unicode"))]
+ #[inline]
+ fn to_uppercase(&self) -> Vec<u8> {
+ let mut buf = vec![];
+ self.to_uppercase_into(&mut buf);
+ buf
+ }
+
+ /// Writes the uppercase equivalent of this byte string into the given
+ /// buffer. The buffer is not cleared before written to.
+ ///
+ /// In this case, uppercase is defined according to the `Uppercase`
+ /// Unicode property.
+ ///
+ /// If invalid UTF-8 is seen, or if a character has no uppercase variant,
+ /// then it is written to the given buffer unchanged.
+ ///
+ /// Note that some characters in this byte string may expand into multiple
+ /// characters when changing the case, so the number of bytes written to
+ /// the given byte string may not be equivalent to the number of bytes in
+ /// this byte string.
+ ///
+ /// If you don't need to amortize allocation and instead prefer
+ /// convenience, then use [`to_uppercase`](#method.to_uppercase) instead.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B("hello β");
+ ///
+ /// let mut buf = vec![];
+ /// s.to_uppercase_into(&mut buf);
+ /// assert_eq!(buf, B("HELLO Β"));
+ /// ```
+ ///
+ /// Scripts without case are not changed:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B("农历新年");
+ ///
+ /// let mut buf = vec![];
+ /// s.to_uppercase_into(&mut buf);
+ /// assert_eq!(buf, B("农历新年"));
+ /// ```
+ ///
+ /// Invalid UTF-8 remains as is:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B(b"foo\xFFbar\xE2\x98baz");
+ ///
+ /// let mut buf = vec![];
+ /// s.to_uppercase_into(&mut buf);
+ /// assert_eq!(buf, B(b"FOO\xFFBAR\xE2\x98BAZ"));
+ /// ```
+ #[cfg(all(feature = "std", feature = "unicode"))]
+ #[inline]
+ fn to_uppercase_into(&self, buf: &mut Vec<u8>) {
+ // TODO: This is the best we can do given what std exposes I think.
+ // If we roll our own case handling, then we might be able to do this
+ // a bit faster. We shouldn't roll our own case handling unless we
+ // need to, e.g., for doing caseless matching or case folding.
+ buf.reserve(self.as_bytes().len());
+ for (s, e, ch) in self.char_indices() {
+ if ch == '\u{FFFD}' {
+ buf.push_str(&self.as_bytes()[s..e]);
+ } else if ch.is_ascii() {
+ buf.push_char(ch.to_ascii_uppercase());
+ } else {
+ for upper in ch.to_uppercase() {
+ buf.push_char(upper);
+ }
+ }
+ }
+ }
+
+ /// Returns a new `Vec<u8>` containing the ASCII uppercase equivalent of
+ /// this byte string.
+ ///
+ /// In this case, uppercase is only defined in ASCII letters. Namely, the
+ /// letters `a-z` are converted to `A-Z`. All other bytes remain unchanged.
+ /// In particular, the length of the byte string returned is always
+ /// equivalent to the length of this byte string.
+ ///
+ /// If you'd like to reuse an allocation for performance reasons, then use
+ /// [`make_ascii_uppercase`](#method.make_ascii_uppercase) to perform
+ /// the conversion in place.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B("hello β");
+ /// assert_eq!(s.to_ascii_uppercase(), B("HELLO β"));
+ /// ```
+ ///
+ /// Invalid UTF-8 remains as is:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B(b"foo\xFFbar\xE2\x98baz");
+ /// assert_eq!(s.to_ascii_uppercase(), B(b"FOO\xFFBAR\xE2\x98BAZ"));
+ /// ```
+ #[cfg(feature = "std")]
+ #[inline]
+ fn to_ascii_uppercase(&self) -> Vec<u8> {
+ self.as_bytes().to_ascii_uppercase()
+ }
+
+ /// Convert this byte string to its uppercase ASCII equivalent in place.
+ ///
+ /// In this case, uppercase is only defined in ASCII letters. Namely, the
+ /// letters `a-z` are converted to `A-Z`. All other bytes remain unchanged.
+ ///
+ /// If you don't need to do the conversion in
+ /// place and instead prefer convenience, then use
+ /// [`to_ascii_uppercase`](#method.to_ascii_uppercase) instead.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let mut s = <Vec<u8>>::from("hello β");
+ /// s.make_ascii_uppercase();
+ /// assert_eq!(s, B("HELLO β"));
+ /// ```
+ ///
+ /// Invalid UTF-8 remains as is:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice, ByteVec};
+ ///
+ /// let mut s = <Vec<u8>>::from_slice(b"foo\xFFbar\xE2\x98baz");
+ /// s.make_ascii_uppercase();
+ /// assert_eq!(s, B(b"FOO\xFFBAR\xE2\x98BAZ"));
+ /// ```
+ #[inline]
+ fn make_ascii_uppercase(&mut self) {
+ self.as_bytes_mut().make_ascii_uppercase();
+ }
+
+ /// Reverse the bytes in this string, in place.
+ ///
+ /// This is not necessarily a well formed operation! For example, if this
+ /// byte string contains valid UTF-8 that isn't ASCII, then reversing the
+ /// string will likely result in invalid UTF-8 and otherwise non-sensical
+ /// content.
+ ///
+ /// Note that this is equivalent to the generic `[u8]::reverse` method.
+ /// This method is provided to permit callers to explicitly differentiate
+ /// between reversing bytes, codepoints and graphemes.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut s = <Vec<u8>>::from("hello");
+ /// s.reverse_bytes();
+ /// assert_eq!(s, "olleh".as_bytes());
+ /// ```
+ #[inline]
+ fn reverse_bytes(&mut self) {
+ self.as_bytes_mut().reverse();
+ }
+
+ /// Reverse the codepoints in this string, in place.
+ ///
+ /// If this byte string is valid UTF-8, then its reversal by codepoint
+ /// is also guaranteed to be valid UTF-8.
+ ///
+ /// This operation is equivalent to the following, but without allocating:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut s = <Vec<u8>>::from("foo☃bar");
+ ///
+ /// let mut chars: Vec<char> = s.chars().collect();
+ /// chars.reverse();
+ ///
+ /// let reversed: String = chars.into_iter().collect();
+ /// assert_eq!(reversed, "rab☃oof");
+ /// ```
+ ///
+ /// Note that this is not necessarily a well formed operation. For example,
+ /// if this byte string contains grapheme clusters with more than one
+ /// codepoint, then those grapheme clusters will not necessarily be
+ /// preserved. If you'd like to preserve grapheme clusters, then use
+ /// [`reverse_graphemes`](#method.reverse_graphemes) instead.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut s = <Vec<u8>>::from("foo☃bar");
+ /// s.reverse_chars();
+ /// assert_eq!(s, "rab☃oof".as_bytes());
+ /// ```
+ ///
+ /// This example shows that not all reversals lead to a well formed string.
+ /// For example, in this case, combining marks are used to put accents over
+ /// some letters, and those accent marks must appear after the codepoints
+ /// they modify.
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let mut s = <Vec<u8>>::from("résumé");
+ /// s.reverse_chars();
+ /// assert_eq!(s, B(b"\xCC\x81emus\xCC\x81er"));
+ /// ```
+ ///
+ /// A word of warning: the above example relies on the fact that
+ /// `résumé` is in decomposed normal form, which means there are separate
+ /// codepoints for the accents above `e`. If it is instead in composed
+ /// normal form, then the example works:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let mut s = <Vec<u8>>::from("résumé");
+ /// s.reverse_chars();
+ /// assert_eq!(s, B("émusér"));
+ /// ```
+ ///
+ /// The point here is to be cautious and not assume that just because
+ /// `reverse_chars` works in one case, that it therefore works in all
+ /// cases.
+ #[inline]
+ fn reverse_chars(&mut self) {
+ let mut i = 0;
+ loop {
+ let (_, size) = utf8::decode(&self.as_bytes()[i..]);
+ if size == 0 {
+ break;
+ }
+ if size > 1 {
+ self.as_bytes_mut()[i..i + size].reverse_bytes();
+ }
+ i += size;
+ }
+ self.reverse_bytes();
+ }
+
+ /// Reverse the graphemes in this string, in place.
+ ///
+ /// If this byte string is valid UTF-8, then its reversal by grapheme
+ /// is also guaranteed to be valid UTF-8.
+ ///
+ /// This operation is equivalent to the following, but without allocating:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut s = <Vec<u8>>::from("foo☃bar");
+ ///
+ /// let mut graphemes: Vec<&str> = s.graphemes().collect();
+ /// graphemes.reverse();
+ ///
+ /// let reversed = graphemes.concat();
+ /// assert_eq!(reversed, "rab☃oof");
+ /// ```
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut s = <Vec<u8>>::from("foo☃bar");
+ /// s.reverse_graphemes();
+ /// assert_eq!(s, "rab☃oof".as_bytes());
+ /// ```
+ ///
+ /// This example shows how this correctly handles grapheme clusters,
+ /// unlike `reverse_chars`.
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut s = <Vec<u8>>::from("résumé");
+ /// s.reverse_graphemes();
+ /// assert_eq!(s, "émusér".as_bytes());
+ /// ```
+ #[cfg(feature = "unicode")]
+ #[inline]
+ fn reverse_graphemes(&mut self) {
+ use crate::unicode::decode_grapheme;
+
+ let mut i = 0;
+ loop {
+ let (_, size) = decode_grapheme(&self.as_bytes()[i..]);
+ if size == 0 {
+ break;
+ }
+ if size > 1 {
+ self.as_bytes_mut()[i..i + size].reverse_bytes();
+ }
+ i += size;
+ }
+ self.reverse_bytes();
+ }
+
+ /// Returns true if and only if every byte in this byte string is ASCII.
+ ///
+ /// ASCII is an encoding that defines 128 codepoints. A byte corresponds to
+ /// an ASCII codepoint if and only if it is in the inclusive range
+ /// `[0, 127]`.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// assert!(B("abc").is_ascii());
+ /// assert!(!B("☃βツ").is_ascii());
+ /// assert!(!B(b"\xFF").is_ascii());
+ /// ```
+ #[inline]
+ fn is_ascii(&self) -> bool {
+ ascii::first_non_ascii_byte(self.as_bytes()) == self.as_bytes().len()
+ }
+
+ /// Returns true if and only if the entire byte string is valid UTF-8.
+ ///
+ /// If you need location information about where a byte string's first
+ /// invalid UTF-8 byte is, then use the [`to_str`](#method.to_str) method.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// assert!(B("abc").is_utf8());
+ /// assert!(B("☃βツ").is_utf8());
+ /// // invalid bytes
+ /// assert!(!B(b"abc\xFF").is_utf8());
+ /// // surrogate encoding
+ /// assert!(!B(b"\xED\xA0\x80").is_utf8());
+ /// // incomplete sequence
+ /// assert!(!B(b"\xF0\x9D\x9Ca").is_utf8());
+ /// // overlong sequence
+ /// assert!(!B(b"\xF0\x82\x82\xAC").is_utf8());
+ /// ```
+ #[inline]
+ fn is_utf8(&self) -> bool {
+ utf8::validate(self.as_bytes()).is_ok()
+ }
+
+ /// Returns the last byte in this byte string, if it's non-empty. If this
+ /// byte string is empty, this returns `None`.
+ ///
+ /// Note that this is like the generic `[u8]::last`, except this returns
+ /// the byte by value instead of a reference to the byte.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// assert_eq!(Some(b'z'), b"baz".last_byte());
+ /// assert_eq!(None, b"".last_byte());
+ /// ```
+ #[inline]
+ fn last_byte(&self) -> Option<u8> {
+ let bytes = self.as_bytes();
+ bytes.get(bytes.len().saturating_sub(1)).map(|&b| b)
+ }
+
+ /// Returns the index of the first non-ASCII byte in this byte string (if
+ /// any such indices exist). Specifically, it returns the index of the
+ /// first byte with a value greater than or equal to `0x80`.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{ByteSlice, B};
+ ///
+ /// assert_eq!(Some(3), b"abc\xff".find_non_ascii_byte());
+ /// assert_eq!(None, b"abcde".find_non_ascii_byte());
+ /// assert_eq!(Some(0), B("😀").find_non_ascii_byte());
+ /// ```
+ #[inline]
+ fn find_non_ascii_byte(&self) -> Option<usize> {
+ let index = ascii::first_non_ascii_byte(self.as_bytes());
+ if index == self.as_bytes().len() {
+ None
+ } else {
+ Some(index)
+ }
+ }
+
+ /// Copies elements from one part of the slice to another part of itself,
+ /// where the parts may be overlapping.
+ ///
+ /// `src` is the range within this byte string to copy from, while `dest`
+ /// is the starting index of the range within this byte string to copy to.
+ /// The length indicated by `src` must be less than or equal to the number
+ /// of bytes from `dest` to the end of the byte string.
+ ///
+ /// # Panics
+ ///
+ /// Panics if either range is out of bounds, or if `src` is too big to fit
+ /// into `dest`, or if the end of `src` is before the start.
+ ///
+ /// # Examples
+ ///
+ /// Copying four bytes within a byte string:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let mut buf = *b"Hello, World!";
+ /// let s = &mut buf;
+ /// s.copy_within_str(1..5, 8);
+ /// assert_eq!(s, B("Hello, Wello!"));
+ /// ```
+ #[inline]
+ fn copy_within_str<R>(&mut self, src: R, dest: usize)
+ where
+ R: ops::RangeBounds<usize>,
+ {
+ // TODO: Deprecate this once slice::copy_within stabilizes.
+ let src_start = match src.start_bound() {
+ ops::Bound::Included(&n) => n,
+ ops::Bound::Excluded(&n) => {
+ n.checked_add(1).expect("attempted to index slice beyond max")
+ }
+ ops::Bound::Unbounded => 0,
+ };
+ let src_end = match src.end_bound() {
+ ops::Bound::Included(&n) => {
+ n.checked_add(1).expect("attempted to index slice beyond max")
+ }
+ ops::Bound::Excluded(&n) => n,
+ ops::Bound::Unbounded => self.as_bytes().len(),
+ };
+ assert!(src_start <= src_end, "src end is before src start");
+ assert!(src_end <= self.as_bytes().len(), "src is out of bounds");
+ let count = src_end - src_start;
+ assert!(
+ dest <= self.as_bytes().len() - count,
+ "dest is out of bounds",
+ );
+
+ // SAFETY: This is safe because we use ptr::copy to handle overlapping
+ // copies, and is also safe because we've checked all the bounds above.
+ // Finally, we are only dealing with u8 data, which is Copy, which
+ // means we can copy without worrying about ownership/destructors.
+ unsafe {
+ ptr::copy(
+ self.as_bytes().get_unchecked(src_start),
+ self.as_bytes_mut().get_unchecked_mut(dest),
+ count,
+ );
+ }
+ }
+}
+
+/// A single substring searcher fixed to a particular needle.
+///
+/// The purpose of this type is to permit callers to construct a substring
+/// searcher that can be used to search haystacks without the overhead of
+/// constructing the searcher in the first place. This is a somewhat niche
+/// concern when it's necessary to re-use the same needle to search multiple
+/// different haystacks with as little overhead as possible. In general, using
+/// [`ByteSlice::find`](trait.ByteSlice.html#method.find)
+/// or
+/// [`ByteSlice::find_iter`](trait.ByteSlice.html#method.find_iter)
+/// is good enough, but `Finder` is useful when you can meaningfully observe
+/// searcher construction time in a profile.
+///
+/// When the `std` feature is enabled, then this type has an `into_owned`
+/// version which permits building a `Finder` that is not connected to the
+/// lifetime of its needle.
+#[derive(Clone, Debug)]
+pub struct Finder<'a>(memmem::Finder<'a>);
+
+impl<'a> Finder<'a> {
+ /// Create a new finder for the given needle.
+ #[inline]
+ pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'a B) -> Finder<'a> {
+ Finder(memmem::Finder::new(needle.as_ref()))
+ }
+
+ /// Convert this finder into its owned variant, such that it no longer
+ /// borrows the needle.
+ ///
+ /// If this is already an owned finder, then this is a no-op. Otherwise,
+ /// this copies the needle.
+ ///
+ /// This is only available when the `std` feature is enabled.
+ #[cfg(feature = "std")]
+ #[inline]
+ pub fn into_owned(self) -> Finder<'static> {
+ Finder(self.0.into_owned())
+ }
+
+ /// Returns the needle that this finder searches for.
+ ///
+ /// Note that the lifetime of the needle returned is tied to the lifetime
+ /// of the finder, and may be shorter than the `'a` lifetime. Namely, a
+ /// finder's needle can be either borrowed or owned, so the lifetime of the
+ /// needle returned must necessarily be the shorter of the two.
+ #[inline]
+ pub fn needle(&self) -> &[u8] {
+ self.0.needle()
+ }
+
+ /// Returns the index of the first occurrence of this needle in the given
+ /// haystack.
+ ///
+ /// The haystack may be any type that can be cheaply converted into a
+ /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
+ ///
+ /// # Complexity
+ ///
+ /// This routine is guaranteed to have worst case linear time complexity
+ /// with respect to both the needle and the haystack. That is, this runs
+ /// in `O(needle.len() + haystack.len())` time.
+ ///
+ /// This routine is also guaranteed to have worst case constant space
+ /// complexity.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::Finder;
+ ///
+ /// let haystack = "foo bar baz";
+ /// assert_eq!(Some(0), Finder::new("foo").find(haystack));
+ /// assert_eq!(Some(4), Finder::new("bar").find(haystack));
+ /// assert_eq!(None, Finder::new("quux").find(haystack));
+ /// ```
+ #[inline]
+ pub fn find<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize> {
+ self.0.find(haystack.as_ref())
+ }
+}
+
+/// A single substring reverse searcher fixed to a particular needle.
+///
+/// The purpose of this type is to permit callers to construct a substring
+/// searcher that can be used to search haystacks without the overhead of
+/// constructing the searcher in the first place. This is a somewhat niche
+/// concern when it's necessary to re-use the same needle to search multiple
+/// different haystacks with as little overhead as possible. In general, using
+/// [`ByteSlice::rfind`](trait.ByteSlice.html#method.rfind)
+/// or
+/// [`ByteSlice::rfind_iter`](trait.ByteSlice.html#method.rfind_iter)
+/// is good enough, but `FinderReverse` is useful when you can meaningfully
+/// observe searcher construction time in a profile.
+///
+/// When the `std` feature is enabled, then this type has an `into_owned`
+/// version which permits building a `FinderReverse` that is not connected to
+/// the lifetime of its needle.
+#[derive(Clone, Debug)]
+pub struct FinderReverse<'a>(memmem::FinderRev<'a>);
+
+impl<'a> FinderReverse<'a> {
+ /// Create a new reverse finder for the given needle.
+ #[inline]
+ pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'a B) -> FinderReverse<'a> {
+ FinderReverse(memmem::FinderRev::new(needle.as_ref()))
+ }
+
+ /// Convert this finder into its owned variant, such that it no longer
+ /// borrows the needle.
+ ///
+ /// If this is already an owned finder, then this is a no-op. Otherwise,
+ /// this copies the needle.
+ ///
+ /// This is only available when the `std` feature is enabled.
+ #[cfg(feature = "std")]
+ #[inline]
+ pub fn into_owned(self) -> FinderReverse<'static> {
+ FinderReverse(self.0.into_owned())
+ }
+
+ /// Returns the needle that this finder searches for.
+ ///
+ /// Note that the lifetime of the needle returned is tied to the lifetime
+ /// of this finder, and may be shorter than the `'a` lifetime. Namely,
+ /// a finder's needle can be either borrowed or owned, so the lifetime of
+ /// the needle returned must necessarily be the shorter of the two.
+ #[inline]
+ pub fn needle(&self) -> &[u8] {
+ self.0.needle()
+ }
+
+ /// Returns the index of the last occurrence of this needle in the given
+ /// haystack.
+ ///
+ /// The haystack may be any type that can be cheaply converted into a
+ /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
+ ///
+ /// # Complexity
+ ///
+ /// This routine is guaranteed to have worst case linear time complexity
+ /// with respect to both the needle and the haystack. That is, this runs
+ /// in `O(needle.len() + haystack.len())` time.
+ ///
+ /// This routine is also guaranteed to have worst case constant space
+ /// complexity.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::FinderReverse;
+ ///
+ /// let haystack = "foo bar baz";
+ /// assert_eq!(Some(0), FinderReverse::new("foo").rfind(haystack));
+ /// assert_eq!(Some(4), FinderReverse::new("bar").rfind(haystack));
+ /// assert_eq!(None, FinderReverse::new("quux").rfind(haystack));
+ /// ```
+ #[inline]
+ pub fn rfind<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize> {
+ self.0.rfind(haystack.as_ref())
+ }
+}
+
+/// An iterator over non-overlapping substring matches.
+///
+/// Matches are reported by the byte offset at which they begin.
+///
+/// `'a` is the shorter of two lifetimes: the byte string being searched or the
+/// byte string being looked for.
+#[derive(Debug)]
+pub struct Find<'a> {
+ it: memmem::FindIter<'a, 'a>,
+ haystack: &'a [u8],
+ needle: &'a [u8],
+}
+
+impl<'a> Find<'a> {
+ fn new(haystack: &'a [u8], needle: &'a [u8]) -> Find<'a> {
+ Find { it: memmem::find_iter(haystack, needle), haystack, needle }
+ }
+}
+
+impl<'a> Iterator for Find<'a> {
+ type Item = usize;
+
+ #[inline]
+ fn next(&mut self) -> Option<usize> {
+ self.it.next()
+ }
+}
+
+/// An iterator over non-overlapping substring matches in reverse.
+///
+/// Matches are reported by the byte offset at which they begin.
+///
+/// `'a` is the shorter of two lifetimes: the byte string being searched or the
+/// byte string being looked for.
+#[derive(Debug)]
+pub struct FindReverse<'a> {
+ it: memmem::FindRevIter<'a, 'a>,
+ haystack: &'a [u8],
+ needle: &'a [u8],
+}
+
+impl<'a> FindReverse<'a> {
+ fn new(haystack: &'a [u8], needle: &'a [u8]) -> FindReverse<'a> {
+ FindReverse {
+ it: memmem::rfind_iter(haystack, needle),
+ haystack,
+ needle,
+ }
+ }
+
+ fn haystack(&self) -> &'a [u8] {
+ self.haystack
+ }
+
+ fn needle(&self) -> &[u8] {
+ self.needle
+ }
+}
+
+impl<'a> Iterator for FindReverse<'a> {
+ type Item = usize;
+
+ #[inline]
+ fn next(&mut self) -> Option<usize> {
+ self.it.next()
+ }
+}
+
+/// An iterator over the bytes in a byte string.
+///
+/// `'a` is the lifetime of the byte string being traversed.
+#[derive(Clone, Debug)]
+pub struct Bytes<'a> {
+ it: slice::Iter<'a, u8>,
+}
+
+impl<'a> Bytes<'a> {
+ /// Views the remaining underlying data as a subslice of the original data.
+ /// This has the same lifetime as the original slice,
+ /// and so the iterator can continue to be used while this exists.
+ #[inline]
+ pub fn as_slice(&self) -> &'a [u8] {
+ self.it.as_slice()
+ }
+}
+
+impl<'a> Iterator for Bytes<'a> {
+ type Item = u8;
+
+ #[inline]
+ fn next(&mut self) -> Option<u8> {
+ self.it.next().map(|&b| b)
+ }
+
+ #[inline]
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ self.it.size_hint()
+ }
+}
+
+impl<'a> DoubleEndedIterator for Bytes<'a> {
+ #[inline]
+ fn next_back(&mut self) -> Option<u8> {
+ self.it.next_back().map(|&b| b)
+ }
+}
+
+impl<'a> ExactSizeIterator for Bytes<'a> {
+ #[inline]
+ fn len(&self) -> usize {
+ self.it.len()
+ }
+}
+
+impl<'a> iter::FusedIterator for Bytes<'a> {}
+
+/// An iterator over the fields in a byte string, separated by whitespace.
+///
+/// This iterator splits on contiguous runs of whitespace, such that the fields
+/// in `foo\t\t\n \nbar` are `foo` and `bar`.
+///
+/// `'a` is the lifetime of the byte string being split.
+#[derive(Debug)]
+pub struct Fields<'a> {
+ it: FieldsWith<'a, fn(char) -> bool>,
+}
+
+impl<'a> Fields<'a> {
+ fn new(bytes: &'a [u8]) -> Fields<'a> {
+ Fields { it: bytes.fields_with(|ch| ch.is_whitespace()) }
+ }
+}
+
+impl<'a> Iterator for Fields<'a> {
+ type Item = &'a [u8];
+
+ #[inline]
+ fn next(&mut self) -> Option<&'a [u8]> {
+ self.it.next()
+ }
+}
+
+/// An iterator over fields in the byte string, separated by a predicate over
+/// codepoints.
+///
+/// This iterator splits a byte string based on its predicate function such
+/// that the elements returned are separated by contiguous runs of codepoints
+/// for which the predicate returns true.
+///
+/// `'a` is the lifetime of the byte string being split, while `F` is the type
+/// of the predicate, i.e., `FnMut(char) -> bool`.
+#[derive(Debug)]
+pub struct FieldsWith<'a, F> {
+ f: F,
+ bytes: &'a [u8],
+ chars: CharIndices<'a>,
+}
+
+impl<'a, F: FnMut(char) -> bool> FieldsWith<'a, F> {
+ fn new(bytes: &'a [u8], f: F) -> FieldsWith<'a, F> {
+ FieldsWith { f, bytes, chars: bytes.char_indices() }
+ }
+}
+
+impl<'a, F: FnMut(char) -> bool> Iterator for FieldsWith<'a, F> {
+ type Item = &'a [u8];
+
+ #[inline]
+ fn next(&mut self) -> Option<&'a [u8]> {
+ let (start, mut end);
+ loop {
+ match self.chars.next() {
+ None => return None,
+ Some((s, e, ch)) => {
+ if !(self.f)(ch) {
+ start = s;
+ end = e;
+ break;
+ }
+ }
+ }
+ }
+ while let Some((_, e, ch)) = self.chars.next() {
+ if (self.f)(ch) {
+ break;
+ }
+ end = e;
+ }
+ Some(&self.bytes[start..end])
+ }
+}
+
+/// An iterator over substrings in a byte string, split by a separator.
+///
+/// `'a` is the lifetime of the byte string being split.
+#[derive(Debug)]
+pub struct Split<'a> {
+ finder: Find<'a>,
+ /// The end position of the previous match of our splitter. The element
+ /// we yield corresponds to the substring starting at `last` up to the
+ /// beginning of the next match of the splitter.
+ last: usize,
+ /// Only set when iteration is complete. A corner case here is when a
+ /// splitter is matched at the end of the haystack. At that point, we still
+ /// need to yield an empty string following it.
+ done: bool,
+}
+
+impl<'a> Split<'a> {
+ fn new(haystack: &'a [u8], splitter: &'a [u8]) -> Split<'a> {
+ let finder = haystack.find_iter(splitter);
+ Split { finder, last: 0, done: false }
+ }
+}
+
+impl<'a> Iterator for Split<'a> {
+ type Item = &'a [u8];
+
+ #[inline]
+ fn next(&mut self) -> Option<&'a [u8]> {
+ let haystack = self.finder.haystack;
+ match self.finder.next() {
+ Some(start) => {
+ let next = &haystack[self.last..start];
+ self.last = start + self.finder.needle.len();
+ Some(next)
+ }
+ None => {
+ if self.last >= haystack.len() {
+ if !self.done {
+ self.done = true;
+ Some(b"")
+ } else {
+ None
+ }
+ } else {
+ let s = &haystack[self.last..];
+ self.last = haystack.len();
+ self.done = true;
+ Some(s)
+ }
+ }
+ }
+ }
+}
+
+/// An iterator over substrings in a byte string, split by a separator, in
+/// reverse.
+///
+/// `'a` is the lifetime of the byte string being split, while `F` is the type
+/// of the predicate, i.e., `FnMut(char) -> bool`.
+#[derive(Debug)]
+pub struct SplitReverse<'a> {
+ finder: FindReverse<'a>,
+ /// The end position of the previous match of our splitter. The element
+ /// we yield corresponds to the substring starting at `last` up to the
+ /// beginning of the next match of the splitter.
+ last: usize,
+ /// Only set when iteration is complete. A corner case here is when a
+ /// splitter is matched at the end of the haystack. At that point, we still
+ /// need to yield an empty string following it.
+ done: bool,
+}
+
+impl<'a> SplitReverse<'a> {
+ fn new(haystack: &'a [u8], splitter: &'a [u8]) -> SplitReverse<'a> {
+ let finder = haystack.rfind_iter(splitter);
+ SplitReverse { finder, last: haystack.len(), done: false }
+ }
+}
+
+impl<'a> Iterator for SplitReverse<'a> {
+ type Item = &'a [u8];
+
+ #[inline]
+ fn next(&mut self) -> Option<&'a [u8]> {
+ let haystack = self.finder.haystack();
+ match self.finder.next() {
+ Some(start) => {
+ let nlen = self.finder.needle().len();
+ let next = &haystack[start + nlen..self.last];
+ self.last = start;
+ Some(next)
+ }
+ None => {
+ if self.last == 0 {
+ if !self.done {
+ self.done = true;
+ Some(b"")
+ } else {
+ None
+ }
+ } else {
+ let s = &haystack[..self.last];
+ self.last = 0;
+ self.done = true;
+ Some(s)
+ }
+ }
+ }
+ }
+}
+
+/// An iterator over at most `n` substrings in a byte string, split by a
+/// separator.
+///
+/// `'a` is the lifetime of the byte string being split, while `F` is the type
+/// of the predicate, i.e., `FnMut(char) -> bool`.
+#[derive(Debug)]
+pub struct SplitN<'a> {
+ split: Split<'a>,
+ limit: usize,
+ count: usize,
+}
+
+impl<'a> SplitN<'a> {
+ fn new(
+ haystack: &'a [u8],
+ splitter: &'a [u8],
+ limit: usize,
+ ) -> SplitN<'a> {
+ let split = haystack.split_str(splitter);
+ SplitN { split, limit, count: 0 }
+ }
+}
+
+impl<'a> Iterator for SplitN<'a> {
+ type Item = &'a [u8];
+
+ #[inline]
+ fn next(&mut self) -> Option<&'a [u8]> {
+ self.count += 1;
+ if self.count > self.limit || self.split.done {
+ None
+ } else if self.count == self.limit {
+ Some(&self.split.finder.haystack[self.split.last..])
+ } else {
+ self.split.next()
+ }
+ }
+}
+
+/// An iterator over at most `n` substrings in a byte string, split by a
+/// separator, in reverse.
+///
+/// `'a` is the lifetime of the byte string being split, while `F` is the type
+/// of the predicate, i.e., `FnMut(char) -> bool`.
+#[derive(Debug)]
+pub struct SplitNReverse<'a> {
+ split: SplitReverse<'a>,
+ limit: usize,
+ count: usize,
+}
+
+impl<'a> SplitNReverse<'a> {
+ fn new(
+ haystack: &'a [u8],
+ splitter: &'a [u8],
+ limit: usize,
+ ) -> SplitNReverse<'a> {
+ let split = haystack.rsplit_str(splitter);
+ SplitNReverse { split, limit, count: 0 }
+ }
+}
+
+impl<'a> Iterator for SplitNReverse<'a> {
+ type Item = &'a [u8];
+
+ #[inline]
+ fn next(&mut self) -> Option<&'a [u8]> {
+ self.count += 1;
+ if self.count > self.limit || self.split.done {
+ None
+ } else if self.count == self.limit {
+ Some(&self.split.finder.haystack()[..self.split.last])
+ } else {
+ self.split.next()
+ }
+ }
+}
+
+/// An iterator over all lines in a byte string, without their terminators.
+///
+/// For this iterator, the only line terminators recognized are `\r\n` and
+/// `\n`.
+///
+/// `'a` is the lifetime of the byte string being iterated over.
+pub struct Lines<'a> {
+ it: LinesWithTerminator<'a>,
+}
+
+impl<'a> Lines<'a> {
+ fn new(bytes: &'a [u8]) -> Lines<'a> {
+ Lines { it: LinesWithTerminator::new(bytes) }
+ }
+}
+
+impl<'a> Iterator for Lines<'a> {
+ type Item = &'a [u8];
+
+ #[inline]
+ fn next(&mut self) -> Option<&'a [u8]> {
+ let mut line = self.it.next()?;
+ if line.last_byte() == Some(b'\n') {
+ line = &line[..line.len() - 1];
+ if line.last_byte() == Some(b'\r') {
+ line = &line[..line.len() - 1];
+ }
+ }
+ Some(line)
+ }
+}
+
+/// An iterator over all lines in a byte string, including their terminators.
+///
+/// For this iterator, the only line terminator recognized is `\n`. (Since
+/// line terminators are included, this also handles `\r\n` line endings.)
+///
+/// Line terminators are only included if they are present in the original
+/// byte string. For example, the last line in a byte string may not end with
+/// a line terminator.
+///
+/// Concatenating all elements yielded by this iterator is guaranteed to yield
+/// the original byte string.
+///
+/// `'a` is the lifetime of the byte string being iterated over.
+pub struct LinesWithTerminator<'a> {
+ bytes: &'a [u8],
+}
+
+impl<'a> LinesWithTerminator<'a> {
+ fn new(bytes: &'a [u8]) -> LinesWithTerminator<'a> {
+ LinesWithTerminator { bytes }
+ }
+}
+
+impl<'a> Iterator for LinesWithTerminator<'a> {
+ type Item = &'a [u8];
+
+ #[inline]
+ fn next(&mut self) -> Option<&'a [u8]> {
+ match self.bytes.find_byte(b'\n') {
+ None if self.bytes.is_empty() => None,
+ None => {
+ let line = self.bytes;
+ self.bytes = b"";
+ Some(line)
+ }
+ Some(end) => {
+ let line = &self.bytes[..end + 1];
+ self.bytes = &self.bytes[end + 1..];
+ Some(line)
+ }
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use crate::ext_slice::{ByteSlice, B};
+ use crate::tests::LOSSY_TESTS;
+
+ #[test]
+ fn to_str_lossy() {
+ for (i, &(expected, input)) in LOSSY_TESTS.iter().enumerate() {
+ let got = B(input).to_str_lossy();
+ assert_eq!(
+ expected.as_bytes(),
+ got.as_bytes(),
+ "to_str_lossy(ith: {:?}, given: {:?})",
+ i,
+ input,
+ );
+
+ let mut got = String::new();
+ B(input).to_str_lossy_into(&mut got);
+ assert_eq!(
+ expected.as_bytes(),
+ got.as_bytes(),
+ "to_str_lossy_into",
+ );
+
+ let got = String::from_utf8_lossy(input);
+ assert_eq!(expected.as_bytes(), got.as_bytes(), "std");
+ }
+ }
+
+ #[test]
+ #[should_panic]
+ fn copy_within_fail1() {
+ let mut buf = *b"foobar";
+ let s = &mut buf;
+ s.copy_within_str(0..2, 5);
+ }
+
+ #[test]
+ #[should_panic]
+ fn copy_within_fail2() {
+ let mut buf = *b"foobar";
+ let s = &mut buf;
+ s.copy_within_str(3..2, 0);
+ }
+
+ #[test]
+ #[should_panic]
+ fn copy_within_fail3() {
+ let mut buf = *b"foobar";
+ let s = &mut buf;
+ s.copy_within_str(5..7, 0);
+ }
+
+ #[test]
+ #[should_panic]
+ fn copy_within_fail4() {
+ let mut buf = *b"foobar";
+ let s = &mut buf;
+ s.copy_within_str(0..1, 6);
+ }
+}
diff --git a/vendor/bstr/src/ext_vec.rs b/vendor/bstr/src/ext_vec.rs
new file mode 100644
index 000000000..5beb0e16a
--- /dev/null
+++ b/vendor/bstr/src/ext_vec.rs
@@ -0,0 +1,1105 @@
+use std::borrow::Cow;
+use std::error;
+use std::ffi::{OsStr, OsString};
+use std::fmt;
+use std::iter;
+use std::ops;
+use std::path::{Path, PathBuf};
+use std::ptr;
+use std::str;
+use std::vec;
+
+use crate::ext_slice::ByteSlice;
+use crate::utf8::{self, Utf8Error};
+
+/// Concatenate the elements given by the iterator together into a single
+/// `Vec<u8>`.
+///
+/// The elements may be any type that can be cheaply converted into an `&[u8]`.
+/// This includes, but is not limited to, `&str`, `&BStr` and `&[u8]` itself.
+///
+/// # Examples
+///
+/// Basic usage:
+///
+/// ```
+/// use bstr;
+///
+/// let s = bstr::concat(&["foo", "bar", "baz"]);
+/// assert_eq!(s, "foobarbaz".as_bytes());
+/// ```
+#[inline]
+pub fn concat<T, I>(elements: I) -> Vec<u8>
+where
+ T: AsRef<[u8]>,
+ I: IntoIterator<Item = T>,
+{
+ let mut dest = vec![];
+ for element in elements {
+ dest.push_str(element);
+ }
+ dest
+}
+
+/// Join the elements given by the iterator with the given separator into a
+/// single `Vec<u8>`.
+///
+/// Both the separator and the elements may be any type that can be cheaply
+/// converted into an `&[u8]`. This includes, but is not limited to,
+/// `&str`, `&BStr` and `&[u8]` itself.
+///
+/// # Examples
+///
+/// Basic usage:
+///
+/// ```
+/// use bstr;
+///
+/// let s = bstr::join(",", &["foo", "bar", "baz"]);
+/// assert_eq!(s, "foo,bar,baz".as_bytes());
+/// ```
+#[inline]
+pub fn join<B, T, I>(separator: B, elements: I) -> Vec<u8>
+where
+ B: AsRef<[u8]>,
+ T: AsRef<[u8]>,
+ I: IntoIterator<Item = T>,
+{
+ let mut it = elements.into_iter();
+ let mut dest = vec![];
+ match it.next() {
+ None => return dest,
+ Some(first) => {
+ dest.push_str(first);
+ }
+ }
+ for element in it {
+ dest.push_str(&separator);
+ dest.push_str(element);
+ }
+ dest
+}
+
+impl ByteVec for Vec<u8> {
+ #[inline]
+ fn as_vec(&self) -> &Vec<u8> {
+ self
+ }
+
+ #[inline]
+ fn as_vec_mut(&mut self) -> &mut Vec<u8> {
+ self
+ }
+
+ #[inline]
+ fn into_vec(self) -> Vec<u8> {
+ self
+ }
+}
+
+/// Ensure that callers cannot implement `ByteSlice` by making an
+/// umplementable trait its super trait.
+pub trait Sealed {}
+impl Sealed for Vec<u8> {}
+
+/// A trait that extends `Vec<u8>` with string oriented methods.
+///
+/// Note that when using the constructor methods, such as
+/// `ByteVec::from_slice`, one should actually call them using the concrete
+/// type. For example:
+///
+/// ```
+/// use bstr::{B, ByteVec};
+///
+/// let s = Vec::from_slice(b"abc"); // NOT ByteVec::from_slice("...")
+/// assert_eq!(s, B("abc"));
+/// ```
+pub trait ByteVec: Sealed {
+ /// A method for accessing the raw vector bytes of this type. This is
+ /// always a no-op and callers shouldn't care about it. This only exists
+ /// for making the extension trait work.
+ #[doc(hidden)]
+ fn as_vec(&self) -> &Vec<u8>;
+
+ /// A method for accessing the raw vector bytes of this type, mutably. This
+ /// is always a no-op and callers shouldn't care about it. This only exists
+ /// for making the extension trait work.
+ #[doc(hidden)]
+ fn as_vec_mut(&mut self) -> &mut Vec<u8>;
+
+ /// A method for consuming ownership of this vector. This is always a no-op
+ /// and callers shouldn't care about it. This only exists for making the
+ /// extension trait work.
+ #[doc(hidden)]
+ fn into_vec(self) -> Vec<u8>
+ where
+ Self: Sized;
+
+ /// Create a new owned byte string from the given byte slice.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteVec};
+ ///
+ /// let s = Vec::from_slice(b"abc");
+ /// assert_eq!(s, B("abc"));
+ /// ```
+ #[inline]
+ fn from_slice<B: AsRef<[u8]>>(bytes: B) -> Vec<u8> {
+ bytes.as_ref().to_vec()
+ }
+
+ /// Create a new byte string from an owned OS string.
+ ///
+ /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
+ /// this returns the original OS string if it is not valid UTF-8.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use std::ffi::OsString;
+ ///
+ /// use bstr::{B, ByteVec};
+ ///
+ /// let os_str = OsString::from("foo");
+ /// let bs = Vec::from_os_string(os_str).expect("valid UTF-8");
+ /// assert_eq!(bs, B("foo"));
+ /// ```
+ #[inline]
+ fn from_os_string(os_str: OsString) -> Result<Vec<u8>, OsString> {
+ #[cfg(unix)]
+ #[inline]
+ fn imp(os_str: OsString) -> Result<Vec<u8>, OsString> {
+ use std::os::unix::ffi::OsStringExt;
+
+ Ok(Vec::from(os_str.into_vec()))
+ }
+
+ #[cfg(not(unix))]
+ #[inline]
+ fn imp(os_str: OsString) -> Result<Vec<u8>, OsString> {
+ os_str.into_string().map(Vec::from)
+ }
+
+ imp(os_str)
+ }
+
+ /// Lossily create a new byte string from an OS string slice.
+ ///
+ /// On Unix, this always succeeds, is zero cost and always returns a slice.
+ /// On non-Unix systems, this does a UTF-8 check. If the given OS string
+ /// slice is not valid UTF-8, then it is lossily decoded into valid UTF-8
+ /// (with invalid bytes replaced by the Unicode replacement codepoint).
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use std::ffi::OsStr;
+ ///
+ /// use bstr::{B, ByteVec};
+ ///
+ /// let os_str = OsStr::new("foo");
+ /// let bs = Vec::from_os_str_lossy(os_str);
+ /// assert_eq!(bs, B("foo"));
+ /// ```
+ #[inline]
+ fn from_os_str_lossy<'a>(os_str: &'a OsStr) -> Cow<'a, [u8]> {
+ #[cfg(unix)]
+ #[inline]
+ fn imp<'a>(os_str: &'a OsStr) -> Cow<'a, [u8]> {
+ use std::os::unix::ffi::OsStrExt;
+
+ Cow::Borrowed(os_str.as_bytes())
+ }
+
+ #[cfg(not(unix))]
+ #[inline]
+ fn imp<'a>(os_str: &'a OsStr) -> Cow<'a, [u8]> {
+ match os_str.to_string_lossy() {
+ Cow::Borrowed(x) => Cow::Borrowed(x.as_bytes()),
+ Cow::Owned(x) => Cow::Owned(Vec::from(x)),
+ }
+ }
+
+ imp(os_str)
+ }
+
+ /// Create a new byte string from an owned file path.
+ ///
+ /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
+ /// this returns the original path if it is not valid UTF-8.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use std::path::PathBuf;
+ ///
+ /// use bstr::{B, ByteVec};
+ ///
+ /// let path = PathBuf::from("foo");
+ /// let bs = Vec::from_path_buf(path).expect("must be valid UTF-8");
+ /// assert_eq!(bs, B("foo"));
+ /// ```
+ #[inline]
+ fn from_path_buf(path: PathBuf) -> Result<Vec<u8>, PathBuf> {
+ Vec::from_os_string(path.into_os_string()).map_err(PathBuf::from)
+ }
+
+ /// Lossily create a new byte string from a file path.
+ ///
+ /// On Unix, this always succeeds, is zero cost and always returns a slice.
+ /// On non-Unix systems, this does a UTF-8 check. If the given path is not
+ /// valid UTF-8, then it is lossily decoded into valid UTF-8 (with invalid
+ /// bytes replaced by the Unicode replacement codepoint).
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use std::path::Path;
+ ///
+ /// use bstr::{B, ByteVec};
+ ///
+ /// let path = Path::new("foo");
+ /// let bs = Vec::from_path_lossy(path);
+ /// assert_eq!(bs, B("foo"));
+ /// ```
+ #[inline]
+ fn from_path_lossy<'a>(path: &'a Path) -> Cow<'a, [u8]> {
+ Vec::from_os_str_lossy(path.as_os_str())
+ }
+
+ /// Appends the given byte to the end of this byte string.
+ ///
+ /// Note that this is equivalent to the generic `Vec::push` method. This
+ /// method is provided to permit callers to explicitly differentiate
+ /// between pushing bytes, codepoints and strings.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// let mut s = <Vec<u8>>::from("abc");
+ /// s.push_byte(b'\xE2');
+ /// s.push_byte(b'\x98');
+ /// s.push_byte(b'\x83');
+ /// assert_eq!(s, "abc☃".as_bytes());
+ /// ```
+ #[inline]
+ fn push_byte(&mut self, byte: u8) {
+ self.as_vec_mut().push(byte);
+ }
+
+ /// Appends the given `char` to the end of this byte string.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// let mut s = <Vec<u8>>::from("abc");
+ /// s.push_char('1');
+ /// s.push_char('2');
+ /// s.push_char('3');
+ /// assert_eq!(s, "abc123".as_bytes());
+ /// ```
+ #[inline]
+ fn push_char(&mut self, ch: char) {
+ if ch.len_utf8() == 1 {
+ self.push_byte(ch as u8);
+ return;
+ }
+ self.as_vec_mut()
+ .extend_from_slice(ch.encode_utf8(&mut [0; 4]).as_bytes());
+ }
+
+ /// Appends the given slice to the end of this byte string. This accepts
+ /// any type that be converted to a `&[u8]`. This includes, but is not
+ /// limited to, `&str`, `&BStr`, and of course, `&[u8]` itself.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// let mut s = <Vec<u8>>::from("abc");
+ /// s.push_str(b"123");
+ /// assert_eq!(s, "abc123".as_bytes());
+ /// ```
+ #[inline]
+ fn push_str<B: AsRef<[u8]>>(&mut self, bytes: B) {
+ self.as_vec_mut().extend_from_slice(bytes.as_ref());
+ }
+
+ /// Converts a `Vec<u8>` into a `String` if and only if this byte string is
+ /// valid UTF-8.
+ ///
+ /// If it is not valid UTF-8, then a
+ /// [`FromUtf8Error`](struct.FromUtf8Error.html)
+ /// is returned. (This error can be used to examine why UTF-8 validation
+ /// failed, or to regain the original byte string.)
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
+ /// let bytes = Vec::from("hello");
+ /// let string = bytes.into_string()?;
+ ///
+ /// assert_eq!("hello", string);
+ /// # Ok(()) }; example().unwrap()
+ /// ```
+ ///
+ /// If this byte string is not valid UTF-8, then an error will be returned.
+ /// That error can then be used to inspect the location at which invalid
+ /// UTF-8 was found, or to regain the original byte string:
+ ///
+ /// ```
+ /// use bstr::{B, ByteVec};
+ ///
+ /// let bytes = Vec::from_slice(b"foo\xFFbar");
+ /// let err = bytes.into_string().unwrap_err();
+ ///
+ /// assert_eq!(err.utf8_error().valid_up_to(), 3);
+ /// assert_eq!(err.utf8_error().error_len(), Some(1));
+ ///
+ /// // At no point in this example is an allocation performed.
+ /// let bytes = Vec::from(err.into_vec());
+ /// assert_eq!(bytes, B(b"foo\xFFbar"));
+ /// ```
+ #[inline]
+ fn into_string(self) -> Result<String, FromUtf8Error>
+ where
+ Self: Sized,
+ {
+ match utf8::validate(self.as_vec()) {
+ Err(err) => Err(FromUtf8Error { original: self.into_vec(), err }),
+ Ok(()) => {
+ // SAFETY: This is safe because of the guarantees provided by
+ // utf8::validate.
+ unsafe { Ok(self.into_string_unchecked()) }
+ }
+ }
+ }
+
+ /// Lossily converts a `Vec<u8>` into a `String`. If this byte string
+ /// contains invalid UTF-8, then the invalid bytes are replaced with the
+ /// Unicode replacement codepoint.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// let bytes = Vec::from_slice(b"foo\xFFbar");
+ /// let string = bytes.into_string_lossy();
+ /// assert_eq!(string, "foo\u{FFFD}bar");
+ /// ```
+ #[inline]
+ fn into_string_lossy(self) -> String
+ where
+ Self: Sized,
+ {
+ match self.as_vec().to_str_lossy() {
+ Cow::Borrowed(_) => {
+ // SAFETY: to_str_lossy() returning a Cow::Borrowed guarantees
+ // the entire string is valid utf8.
+ unsafe { self.into_string_unchecked() }
+ }
+ Cow::Owned(s) => s,
+ }
+ }
+
+ /// Unsafely convert this byte string into a `String`, without checking for
+ /// valid UTF-8.
+ ///
+ /// # Safety
+ ///
+ /// Callers *must* ensure that this byte string is valid UTF-8 before
+ /// calling this method. Converting a byte string into a `String` that is
+ /// not valid UTF-8 is considered undefined behavior.
+ ///
+ /// This routine is useful in performance sensitive contexts where the
+ /// UTF-8 validity of the byte string is already known and it is
+ /// undesirable to pay the cost of an additional UTF-8 validation check
+ /// that [`into_string`](#method.into_string) performs.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// // SAFETY: This is safe because string literals are guaranteed to be
+ /// // valid UTF-8 by the Rust compiler.
+ /// let s = unsafe { Vec::from("☃βツ").into_string_unchecked() };
+ /// assert_eq!("☃βツ", s);
+ /// ```
+ #[inline]
+ unsafe fn into_string_unchecked(self) -> String
+ where
+ Self: Sized,
+ {
+ String::from_utf8_unchecked(self.into_vec())
+ }
+
+ /// Converts this byte string into an OS string, in place.
+ ///
+ /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
+ /// this returns the original byte string if it is not valid UTF-8.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use std::ffi::OsStr;
+ ///
+ /// use bstr::ByteVec;
+ ///
+ /// let bs = Vec::from("foo");
+ /// let os_str = bs.into_os_string().expect("should be valid UTF-8");
+ /// assert_eq!(os_str, OsStr::new("foo"));
+ /// ```
+ #[inline]
+ fn into_os_string(self) -> Result<OsString, Vec<u8>>
+ where
+ Self: Sized,
+ {
+ #[cfg(unix)]
+ #[inline]
+ fn imp(v: Vec<u8>) -> Result<OsString, Vec<u8>> {
+ use std::os::unix::ffi::OsStringExt;
+
+ Ok(OsString::from_vec(v))
+ }
+
+ #[cfg(not(unix))]
+ #[inline]
+ fn imp(v: Vec<u8>) -> Result<OsString, Vec<u8>> {
+ match v.into_string() {
+ Ok(s) => Ok(OsString::from(s)),
+ Err(err) => Err(err.into_vec()),
+ }
+ }
+
+ imp(self.into_vec())
+ }
+
+ /// Lossily converts this byte string into an OS string, in place.
+ ///
+ /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
+ /// this will perform a UTF-8 check and lossily convert this byte string
+ /// into valid UTF-8 using the Unicode replacement codepoint.
+ ///
+ /// Note that this can prevent the correct roundtripping of file paths on
+ /// non-Unix systems such as Windows, where file paths are an arbitrary
+ /// sequence of 16-bit integers.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// let bs = Vec::from_slice(b"foo\xFFbar");
+ /// let os_str = bs.into_os_string_lossy();
+ /// assert_eq!(os_str.to_string_lossy(), "foo\u{FFFD}bar");
+ /// ```
+ #[inline]
+ fn into_os_string_lossy(self) -> OsString
+ where
+ Self: Sized,
+ {
+ #[cfg(unix)]
+ #[inline]
+ fn imp(v: Vec<u8>) -> OsString {
+ use std::os::unix::ffi::OsStringExt;
+
+ OsString::from_vec(v)
+ }
+
+ #[cfg(not(unix))]
+ #[inline]
+ fn imp(v: Vec<u8>) -> OsString {
+ OsString::from(v.into_string_lossy())
+ }
+
+ imp(self.into_vec())
+ }
+
+ /// Converts this byte string into an owned file path, in place.
+ ///
+ /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
+ /// this returns the original byte string if it is not valid UTF-8.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// let bs = Vec::from("foo");
+ /// let path = bs.into_path_buf().expect("should be valid UTF-8");
+ /// assert_eq!(path.as_os_str(), "foo");
+ /// ```
+ #[inline]
+ fn into_path_buf(self) -> Result<PathBuf, Vec<u8>>
+ where
+ Self: Sized,
+ {
+ self.into_os_string().map(PathBuf::from)
+ }
+
+ /// Lossily converts this byte string into an owned file path, in place.
+ ///
+ /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
+ /// this will perform a UTF-8 check and lossily convert this byte string
+ /// into valid UTF-8 using the Unicode replacement codepoint.
+ ///
+ /// Note that this can prevent the correct roundtripping of file paths on
+ /// non-Unix systems such as Windows, where file paths are an arbitrary
+ /// sequence of 16-bit integers.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// let bs = Vec::from_slice(b"foo\xFFbar");
+ /// let path = bs.into_path_buf_lossy();
+ /// assert_eq!(path.to_string_lossy(), "foo\u{FFFD}bar");
+ /// ```
+ #[inline]
+ fn into_path_buf_lossy(self) -> PathBuf
+ where
+ Self: Sized,
+ {
+ PathBuf::from(self.into_os_string_lossy())
+ }
+
+ /// Removes the last byte from this `Vec<u8>` and returns it.
+ ///
+ /// If this byte string is empty, then `None` is returned.
+ ///
+ /// If the last codepoint in this byte string is not ASCII, then removing
+ /// the last byte could make this byte string contain invalid UTF-8.
+ ///
+ /// Note that this is equivalent to the generic `Vec::pop` method. This
+ /// method is provided to permit callers to explicitly differentiate
+ /// between popping bytes and codepoints.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// let mut s = Vec::from("foo");
+ /// assert_eq!(s.pop_byte(), Some(b'o'));
+ /// assert_eq!(s.pop_byte(), Some(b'o'));
+ /// assert_eq!(s.pop_byte(), Some(b'f'));
+ /// assert_eq!(s.pop_byte(), None);
+ /// ```
+ #[inline]
+ fn pop_byte(&mut self) -> Option<u8> {
+ self.as_vec_mut().pop()
+ }
+
+ /// Removes the last codepoint from this `Vec<u8>` and returns it.
+ ///
+ /// If this byte string is empty, then `None` is returned. If the last
+ /// bytes of this byte string do not correspond to a valid UTF-8 code unit
+ /// sequence, then the Unicode replacement codepoint is yielded instead in
+ /// accordance with the
+ /// [replacement codepoint substitution policy](index.html#handling-of-invalid-utf8-8).
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// let mut s = Vec::from("foo");
+ /// assert_eq!(s.pop_char(), Some('o'));
+ /// assert_eq!(s.pop_char(), Some('o'));
+ /// assert_eq!(s.pop_char(), Some('f'));
+ /// assert_eq!(s.pop_char(), None);
+ /// ```
+ ///
+ /// This shows the replacement codepoint substitution policy. Note that
+ /// the first pop yields a replacement codepoint but actually removes two
+ /// bytes. This is in contrast with subsequent pops when encountering
+ /// `\xFF` since `\xFF` is never a valid prefix for any valid UTF-8
+ /// code unit sequence.
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// let mut s = Vec::from_slice(b"f\xFF\xFF\xFFoo\xE2\x98");
+ /// assert_eq!(s.pop_char(), Some('\u{FFFD}'));
+ /// assert_eq!(s.pop_char(), Some('o'));
+ /// assert_eq!(s.pop_char(), Some('o'));
+ /// assert_eq!(s.pop_char(), Some('\u{FFFD}'));
+ /// assert_eq!(s.pop_char(), Some('\u{FFFD}'));
+ /// assert_eq!(s.pop_char(), Some('\u{FFFD}'));
+ /// assert_eq!(s.pop_char(), Some('f'));
+ /// assert_eq!(s.pop_char(), None);
+ /// ```
+ #[inline]
+ fn pop_char(&mut self) -> Option<char> {
+ let (ch, size) = utf8::decode_last_lossy(self.as_vec());
+ if size == 0 {
+ return None;
+ }
+ let new_len = self.as_vec().len() - size;
+ self.as_vec_mut().truncate(new_len);
+ Some(ch)
+ }
+
+ /// Removes a `char` from this `Vec<u8>` at the given byte position and
+ /// returns it.
+ ///
+ /// If the bytes at the given position do not lead to a valid UTF-8 code
+ /// unit sequence, then a
+ /// [replacement codepoint is returned instead](index.html#handling-of-invalid-utf8-8).
+ ///
+ /// # Panics
+ ///
+ /// Panics if `at` is larger than or equal to this byte string's length.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// let mut s = Vec::from("foo☃bar");
+ /// assert_eq!(s.remove_char(3), '☃');
+ /// assert_eq!(s, b"foobar");
+ /// ```
+ ///
+ /// This example shows how the Unicode replacement codepoint policy is
+ /// used:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// let mut s = Vec::from_slice(b"foo\xFFbar");
+ /// assert_eq!(s.remove_char(3), '\u{FFFD}');
+ /// assert_eq!(s, b"foobar");
+ /// ```
+ #[inline]
+ fn remove_char(&mut self, at: usize) -> char {
+ let (ch, size) = utf8::decode_lossy(&self.as_vec()[at..]);
+ assert!(
+ size > 0,
+ "expected {} to be less than {}",
+ at,
+ self.as_vec().len(),
+ );
+ self.as_vec_mut().drain(at..at + size);
+ ch
+ }
+
+ /// Inserts the given codepoint into this `Vec<u8>` at a particular byte
+ /// position.
+ ///
+ /// This is an `O(n)` operation as it may copy a number of elements in this
+ /// byte string proportional to its length.
+ ///
+ /// # Panics
+ ///
+ /// Panics if `at` is larger than the byte string's length.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// let mut s = Vec::from("foobar");
+ /// s.insert_char(3, '☃');
+ /// assert_eq!(s, "foo☃bar".as_bytes());
+ /// ```
+ #[inline]
+ fn insert_char(&mut self, at: usize, ch: char) {
+ self.insert_str(at, ch.encode_utf8(&mut [0; 4]).as_bytes());
+ }
+
+ /// Inserts the given byte string into this byte string at a particular
+ /// byte position.
+ ///
+ /// This is an `O(n)` operation as it may copy a number of elements in this
+ /// byte string proportional to its length.
+ ///
+ /// The given byte string may be any type that can be cheaply converted
+ /// into a `&[u8]`. This includes, but is not limited to, `&str` and
+ /// `&[u8]`.
+ ///
+ /// # Panics
+ ///
+ /// Panics if `at` is larger than the byte string's length.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// let mut s = Vec::from("foobar");
+ /// s.insert_str(3, "☃☃☃");
+ /// assert_eq!(s, "foo☃☃☃bar".as_bytes());
+ /// ```
+ #[inline]
+ fn insert_str<B: AsRef<[u8]>>(&mut self, at: usize, bytes: B) {
+ let bytes = bytes.as_ref();
+ let len = self.as_vec().len();
+ assert!(at <= len, "expected {} to be <= {}", at, len);
+
+ // SAFETY: We'd like to efficiently splice in the given bytes into
+ // this byte string. Since we are only working with `u8` elements here,
+ // we only need to consider whether our bounds are correct and whether
+ // our byte string has enough space.
+ self.as_vec_mut().reserve(bytes.len());
+ unsafe {
+ // Shift bytes after `at` over by the length of `bytes` to make
+ // room for it. This requires referencing two regions of memory
+ // that may overlap, so we use ptr::copy.
+ ptr::copy(
+ self.as_vec().as_ptr().add(at),
+ self.as_vec_mut().as_mut_ptr().add(at + bytes.len()),
+ len - at,
+ );
+ // Now copy the bytes given into the room we made above. In this
+ // case, we know that the given bytes cannot possibly overlap
+ // with this byte string since we have a mutable borrow of the
+ // latter. Thus, we can use a nonoverlapping copy.
+ ptr::copy_nonoverlapping(
+ bytes.as_ptr(),
+ self.as_vec_mut().as_mut_ptr().add(at),
+ bytes.len(),
+ );
+ self.as_vec_mut().set_len(len + bytes.len());
+ }
+ }
+
+ /// Removes the specified range in this byte string and replaces it with
+ /// the given bytes. The given bytes do not need to have the same length
+ /// as the range provided.
+ ///
+ /// # Panics
+ ///
+ /// Panics if the given range is invalid.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// let mut s = Vec::from("foobar");
+ /// s.replace_range(2..4, "xxxxx");
+ /// assert_eq!(s, "foxxxxxar".as_bytes());
+ /// ```
+ #[inline]
+ fn replace_range<R, B>(&mut self, range: R, replace_with: B)
+ where
+ R: ops::RangeBounds<usize>,
+ B: AsRef<[u8]>,
+ {
+ self.as_vec_mut().splice(range, replace_with.as_ref().iter().cloned());
+ }
+
+ /// Creates a draining iterator that removes the specified range in this
+ /// `Vec<u8>` and yields each of the removed bytes.
+ ///
+ /// Note that the elements specified by the given range are removed
+ /// regardless of whether the returned iterator is fully exhausted.
+ ///
+ /// Also note that is is unspecified how many bytes are removed from the
+ /// `Vec<u8>` if the `DrainBytes` iterator is leaked.
+ ///
+ /// # Panics
+ ///
+ /// Panics if the given range is not valid.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// let mut s = Vec::from("foobar");
+ /// {
+ /// let mut drainer = s.drain_bytes(2..4);
+ /// assert_eq!(drainer.next(), Some(b'o'));
+ /// assert_eq!(drainer.next(), Some(b'b'));
+ /// assert_eq!(drainer.next(), None);
+ /// }
+ /// assert_eq!(s, "foar".as_bytes());
+ /// ```
+ #[inline]
+ fn drain_bytes<R>(&mut self, range: R) -> DrainBytes<'_>
+ where
+ R: ops::RangeBounds<usize>,
+ {
+ DrainBytes { it: self.as_vec_mut().drain(range) }
+ }
+}
+
+/// A draining byte oriented iterator for `Vec<u8>`.
+///
+/// This iterator is created by
+/// [`ByteVec::drain_bytes`](trait.ByteVec.html#method.drain_bytes).
+///
+/// # Examples
+///
+/// Basic usage:
+///
+/// ```
+/// use bstr::ByteVec;
+///
+/// let mut s = Vec::from("foobar");
+/// {
+/// let mut drainer = s.drain_bytes(2..4);
+/// assert_eq!(drainer.next(), Some(b'o'));
+/// assert_eq!(drainer.next(), Some(b'b'));
+/// assert_eq!(drainer.next(), None);
+/// }
+/// assert_eq!(s, "foar".as_bytes());
+/// ```
+#[derive(Debug)]
+pub struct DrainBytes<'a> {
+ it: vec::Drain<'a, u8>,
+}
+
+impl<'a> iter::FusedIterator for DrainBytes<'a> {}
+
+impl<'a> Iterator for DrainBytes<'a> {
+ type Item = u8;
+
+ #[inline]
+ fn next(&mut self) -> Option<u8> {
+ self.it.next()
+ }
+}
+
+impl<'a> DoubleEndedIterator for DrainBytes<'a> {
+ #[inline]
+ fn next_back(&mut self) -> Option<u8> {
+ self.it.next_back()
+ }
+}
+
+impl<'a> ExactSizeIterator for DrainBytes<'a> {
+ #[inline]
+ fn len(&self) -> usize {
+ self.it.len()
+ }
+}
+
+/// An error that may occur when converting a `Vec<u8>` to a `String`.
+///
+/// This error includes the original `Vec<u8>` that failed to convert to a
+/// `String`. This permits callers to recover the allocation used even if it
+/// it not valid UTF-8.
+///
+/// # Examples
+///
+/// Basic usage:
+///
+/// ```
+/// use bstr::{B, ByteVec};
+///
+/// let bytes = Vec::from_slice(b"foo\xFFbar");
+/// let err = bytes.into_string().unwrap_err();
+///
+/// assert_eq!(err.utf8_error().valid_up_to(), 3);
+/// assert_eq!(err.utf8_error().error_len(), Some(1));
+///
+/// // At no point in this example is an allocation performed.
+/// let bytes = Vec::from(err.into_vec());
+/// assert_eq!(bytes, B(b"foo\xFFbar"));
+/// ```
+#[derive(Debug, Eq, PartialEq)]
+pub struct FromUtf8Error {
+ original: Vec<u8>,
+ err: Utf8Error,
+}
+
+impl FromUtf8Error {
+ /// Return the original bytes as a slice that failed to convert to a
+ /// `String`.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteVec};
+ ///
+ /// let bytes = Vec::from_slice(b"foo\xFFbar");
+ /// let err = bytes.into_string().unwrap_err();
+ ///
+ /// // At no point in this example is an allocation performed.
+ /// assert_eq!(err.as_bytes(), B(b"foo\xFFbar"));
+ /// ```
+ #[inline]
+ pub fn as_bytes(&self) -> &[u8] {
+ &self.original
+ }
+
+ /// Consume this error and return the original byte string that failed to
+ /// convert to a `String`.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteVec};
+ ///
+ /// let bytes = Vec::from_slice(b"foo\xFFbar");
+ /// let err = bytes.into_string().unwrap_err();
+ /// let original = err.into_vec();
+ ///
+ /// // At no point in this example is an allocation performed.
+ /// assert_eq!(original, B(b"foo\xFFbar"));
+ /// ```
+ #[inline]
+ pub fn into_vec(self) -> Vec<u8> {
+ self.original
+ }
+
+ /// Return the underlying UTF-8 error that occurred. This error provides
+ /// information on the nature and location of the invalid UTF-8 detected.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteVec};
+ ///
+ /// let bytes = Vec::from_slice(b"foo\xFFbar");
+ /// let err = bytes.into_string().unwrap_err();
+ ///
+ /// assert_eq!(err.utf8_error().valid_up_to(), 3);
+ /// assert_eq!(err.utf8_error().error_len(), Some(1));
+ /// ```
+ #[inline]
+ pub fn utf8_error(&self) -> &Utf8Error {
+ &self.err
+ }
+}
+
+impl error::Error for FromUtf8Error {
+ #[inline]
+ fn description(&self) -> &str {
+ "invalid UTF-8 vector"
+ }
+}
+
+impl fmt::Display for FromUtf8Error {
+ #[inline]
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ write!(f, "{}", self.err)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use crate::ext_vec::ByteVec;
+
+ #[test]
+ fn insert() {
+ let mut s = vec![];
+ s.insert_str(0, "foo");
+ assert_eq!(s, "foo".as_bytes());
+
+ let mut s = Vec::from("a");
+ s.insert_str(0, "foo");
+ assert_eq!(s, "fooa".as_bytes());
+
+ let mut s = Vec::from("a");
+ s.insert_str(1, "foo");
+ assert_eq!(s, "afoo".as_bytes());
+
+ let mut s = Vec::from("foobar");
+ s.insert_str(3, "quux");
+ assert_eq!(s, "fooquuxbar".as_bytes());
+
+ let mut s = Vec::from("foobar");
+ s.insert_str(3, "x");
+ assert_eq!(s, "fooxbar".as_bytes());
+
+ let mut s = Vec::from("foobar");
+ s.insert_str(0, "x");
+ assert_eq!(s, "xfoobar".as_bytes());
+
+ let mut s = Vec::from("foobar");
+ s.insert_str(6, "x");
+ assert_eq!(s, "foobarx".as_bytes());
+
+ let mut s = Vec::from("foobar");
+ s.insert_str(3, "quuxbazquux");
+ assert_eq!(s, "fooquuxbazquuxbar".as_bytes());
+ }
+
+ #[test]
+ #[should_panic]
+ fn insert_fail1() {
+ let mut s = vec![];
+ s.insert_str(1, "foo");
+ }
+
+ #[test]
+ #[should_panic]
+ fn insert_fail2() {
+ let mut s = Vec::from("a");
+ s.insert_str(2, "foo");
+ }
+
+ #[test]
+ #[should_panic]
+ fn insert_fail3() {
+ let mut s = Vec::from("foobar");
+ s.insert_str(7, "foo");
+ }
+}
diff --git a/vendor/bstr/src/impls.rs b/vendor/bstr/src/impls.rs
new file mode 100644
index 000000000..85a27ba77
--- /dev/null
+++ b/vendor/bstr/src/impls.rs
@@ -0,0 +1,987 @@
+macro_rules! impl_partial_eq {
+ ($lhs:ty, $rhs:ty) => {
+ impl<'a, 'b> PartialEq<$rhs> for $lhs {
+ #[inline]
+ fn eq(&self, other: &$rhs) -> bool {
+ let other: &[u8] = other.as_ref();
+ PartialEq::eq(self.as_bytes(), other)
+ }
+ }
+
+ impl<'a, 'b> PartialEq<$lhs> for $rhs {
+ #[inline]
+ fn eq(&self, other: &$lhs) -> bool {
+ let this: &[u8] = self.as_ref();
+ PartialEq::eq(this, other.as_bytes())
+ }
+ }
+ };
+}
+
+#[cfg(feature = "std")]
+macro_rules! impl_partial_eq_cow {
+ ($lhs:ty, $rhs:ty) => {
+ impl<'a, 'b> PartialEq<$rhs> for $lhs {
+ #[inline]
+ fn eq(&self, other: &$rhs) -> bool {
+ let other: &[u8] = (&**other).as_ref();
+ PartialEq::eq(self.as_bytes(), other)
+ }
+ }
+
+ impl<'a, 'b> PartialEq<$lhs> for $rhs {
+ #[inline]
+ fn eq(&self, other: &$lhs) -> bool {
+ let this: &[u8] = (&**other).as_ref();
+ PartialEq::eq(this, self.as_bytes())
+ }
+ }
+ };
+}
+
+macro_rules! impl_partial_ord {
+ ($lhs:ty, $rhs:ty) => {
+ impl<'a, 'b> PartialOrd<$rhs> for $lhs {
+ #[inline]
+ fn partial_cmp(&self, other: &$rhs) -> Option<Ordering> {
+ let other: &[u8] = other.as_ref();
+ PartialOrd::partial_cmp(self.as_bytes(), other)
+ }
+ }
+
+ impl<'a, 'b> PartialOrd<$lhs> for $rhs {
+ #[inline]
+ fn partial_cmp(&self, other: &$lhs) -> Option<Ordering> {
+ let this: &[u8] = self.as_ref();
+ PartialOrd::partial_cmp(this, other.as_bytes())
+ }
+ }
+ };
+}
+
+#[cfg(feature = "std")]
+mod bstring {
+ use std::borrow::{Borrow, Cow, ToOwned};
+ use std::cmp::Ordering;
+ use std::fmt;
+ use std::iter::FromIterator;
+ use std::ops;
+
+ use crate::bstr::BStr;
+ use crate::bstring::BString;
+ use crate::ext_vec::ByteVec;
+
+ impl fmt::Display for BString {
+ #[inline]
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fmt::Display::fmt(self.as_bstr(), f)
+ }
+ }
+
+ impl fmt::Debug for BString {
+ #[inline]
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fmt::Debug::fmt(self.as_bstr(), f)
+ }
+ }
+
+ impl ops::Deref for BString {
+ type Target = Vec<u8>;
+
+ #[inline]
+ fn deref(&self) -> &Vec<u8> {
+ &self.bytes
+ }
+ }
+
+ impl ops::DerefMut for BString {
+ #[inline]
+ fn deref_mut(&mut self) -> &mut Vec<u8> {
+ &mut self.bytes
+ }
+ }
+
+ impl AsRef<[u8]> for BString {
+ #[inline]
+ fn as_ref(&self) -> &[u8] {
+ &self.bytes
+ }
+ }
+
+ impl AsRef<BStr> for BString {
+ #[inline]
+ fn as_ref(&self) -> &BStr {
+ self.as_bstr()
+ }
+ }
+
+ impl AsMut<[u8]> for BString {
+ #[inline]
+ fn as_mut(&mut self) -> &mut [u8] {
+ &mut self.bytes
+ }
+ }
+
+ impl AsMut<BStr> for BString {
+ #[inline]
+ fn as_mut(&mut self) -> &mut BStr {
+ self.as_mut_bstr()
+ }
+ }
+
+ impl Borrow<BStr> for BString {
+ #[inline]
+ fn borrow(&self) -> &BStr {
+ self.as_bstr()
+ }
+ }
+
+ impl ToOwned for BStr {
+ type Owned = BString;
+
+ #[inline]
+ fn to_owned(&self) -> BString {
+ BString::from(self)
+ }
+ }
+
+ impl Default for BString {
+ fn default() -> BString {
+ BString::from(vec![])
+ }
+ }
+
+ impl<'a> From<&'a [u8]> for BString {
+ #[inline]
+ fn from(s: &'a [u8]) -> BString {
+ BString::from(s.to_vec())
+ }
+ }
+
+ impl From<Vec<u8>> for BString {
+ #[inline]
+ fn from(s: Vec<u8>) -> BString {
+ BString { bytes: s }
+ }
+ }
+
+ impl From<BString> for Vec<u8> {
+ #[inline]
+ fn from(s: BString) -> Vec<u8> {
+ s.bytes
+ }
+ }
+
+ impl<'a> From<&'a str> for BString {
+ #[inline]
+ fn from(s: &'a str) -> BString {
+ BString::from(s.as_bytes().to_vec())
+ }
+ }
+
+ impl From<String> for BString {
+ #[inline]
+ fn from(s: String) -> BString {
+ BString::from(s.into_bytes())
+ }
+ }
+
+ impl<'a> From<&'a BStr> for BString {
+ #[inline]
+ fn from(s: &'a BStr) -> BString {
+ BString::from(s.bytes.to_vec())
+ }
+ }
+
+ impl<'a> From<BString> for Cow<'a, BStr> {
+ #[inline]
+ fn from(s: BString) -> Cow<'a, BStr> {
+ Cow::Owned(s)
+ }
+ }
+
+ impl FromIterator<char> for BString {
+ #[inline]
+ fn from_iter<T: IntoIterator<Item = char>>(iter: T) -> BString {
+ BString::from(iter.into_iter().collect::<String>())
+ }
+ }
+
+ impl FromIterator<u8> for BString {
+ #[inline]
+ fn from_iter<T: IntoIterator<Item = u8>>(iter: T) -> BString {
+ BString::from(iter.into_iter().collect::<Vec<u8>>())
+ }
+ }
+
+ impl<'a> FromIterator<&'a str> for BString {
+ #[inline]
+ fn from_iter<T: IntoIterator<Item = &'a str>>(iter: T) -> BString {
+ let mut buf = vec![];
+ for b in iter {
+ buf.push_str(b);
+ }
+ BString::from(buf)
+ }
+ }
+
+ impl<'a> FromIterator<&'a [u8]> for BString {
+ #[inline]
+ fn from_iter<T: IntoIterator<Item = &'a [u8]>>(iter: T) -> BString {
+ let mut buf = vec![];
+ for b in iter {
+ buf.push_str(b);
+ }
+ BString::from(buf)
+ }
+ }
+
+ impl<'a> FromIterator<&'a BStr> for BString {
+ #[inline]
+ fn from_iter<T: IntoIterator<Item = &'a BStr>>(iter: T) -> BString {
+ let mut buf = vec![];
+ for b in iter {
+ buf.push_str(b);
+ }
+ BString::from(buf)
+ }
+ }
+
+ impl FromIterator<BString> for BString {
+ #[inline]
+ fn from_iter<T: IntoIterator<Item = BString>>(iter: T) -> BString {
+ let mut buf = vec![];
+ for b in iter {
+ buf.push_str(b);
+ }
+ BString::from(buf)
+ }
+ }
+
+ impl Eq for BString {}
+
+ impl PartialEq for BString {
+ #[inline]
+ fn eq(&self, other: &BString) -> bool {
+ &self[..] == &other[..]
+ }
+ }
+
+ impl_partial_eq!(BString, Vec<u8>);
+ impl_partial_eq!(BString, [u8]);
+ impl_partial_eq!(BString, &'a [u8]);
+ impl_partial_eq!(BString, String);
+ impl_partial_eq!(BString, str);
+ impl_partial_eq!(BString, &'a str);
+ impl_partial_eq!(BString, BStr);
+ impl_partial_eq!(BString, &'a BStr);
+
+ impl PartialOrd for BString {
+ #[inline]
+ fn partial_cmp(&self, other: &BString) -> Option<Ordering> {
+ PartialOrd::partial_cmp(&self.bytes, &other.bytes)
+ }
+ }
+
+ impl Ord for BString {
+ #[inline]
+ fn cmp(&self, other: &BString) -> Ordering {
+ self.partial_cmp(other).unwrap()
+ }
+ }
+
+ impl_partial_ord!(BString, Vec<u8>);
+ impl_partial_ord!(BString, [u8]);
+ impl_partial_ord!(BString, &'a [u8]);
+ impl_partial_ord!(BString, String);
+ impl_partial_ord!(BString, str);
+ impl_partial_ord!(BString, &'a str);
+ impl_partial_ord!(BString, BStr);
+ impl_partial_ord!(BString, &'a BStr);
+}
+
+mod bstr {
+ #[cfg(feature = "std")]
+ use std::borrow::Cow;
+
+ use core::cmp::Ordering;
+ use core::fmt;
+ use core::ops;
+
+ use crate::bstr::BStr;
+ use crate::ext_slice::ByteSlice;
+
+ impl fmt::Display for BStr {
+ #[inline]
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ /// Write the given bstr (lossily) to the given formatter.
+ fn write_bstr(
+ f: &mut fmt::Formatter<'_>,
+ bstr: &BStr,
+ ) -> Result<(), fmt::Error> {
+ for chunk in bstr.utf8_chunks() {
+ f.write_str(chunk.valid())?;
+ if !chunk.invalid().is_empty() {
+ f.write_str("\u{FFFD}")?;
+ }
+ }
+ Ok(())
+ }
+
+ /// Write 'num' fill characters to the given formatter.
+ fn write_pads(
+ f: &mut fmt::Formatter<'_>,
+ num: usize,
+ ) -> fmt::Result {
+ let fill = f.fill();
+ for _ in 0..num {
+ f.write_fmt(format_args!("{}", fill))?;
+ }
+ Ok(())
+ }
+
+ if let Some(align) = f.align() {
+ let width = f.width().unwrap_or(0);
+ let nchars = self.chars().count();
+ let remaining_pads = width.saturating_sub(nchars);
+ match align {
+ fmt::Alignment::Left => {
+ write_bstr(f, self)?;
+ write_pads(f, remaining_pads)?;
+ }
+ fmt::Alignment::Right => {
+ write_pads(f, remaining_pads)?;
+ write_bstr(f, self)?;
+ }
+ fmt::Alignment::Center => {
+ let half = remaining_pads / 2;
+ let second_half = if remaining_pads % 2 == 0 {
+ half
+ } else {
+ half + 1
+ };
+ write_pads(f, half)?;
+ write_bstr(f, self)?;
+ write_pads(f, second_half)?;
+ }
+ }
+ Ok(())
+ } else {
+ write_bstr(f, self)?;
+ Ok(())
+ }
+ }
+ }
+
+ impl fmt::Debug for BStr {
+ #[inline]
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ write!(f, "\"")?;
+ for (s, e, ch) in self.char_indices() {
+ match ch {
+ '\0' => write!(f, "\\0")?,
+ '\u{FFFD}' => {
+ let bytes = self[s..e].as_bytes();
+ if bytes == b"\xEF\xBF\xBD" {
+ write!(f, "{}", ch.escape_debug())?;
+ } else {
+ for &b in self[s..e].as_bytes() {
+ write!(f, r"\x{:02X}", b)?;
+ }
+ }
+ }
+ // ASCII control characters except \0, \n, \r, \t
+ '\x01'..='\x08'
+ | '\x0b'
+ | '\x0c'
+ | '\x0e'..='\x19'
+ | '\x7f' => {
+ write!(f, "\\x{:02x}", ch as u32)?;
+ }
+ '\n' | '\r' | '\t' | _ => {
+ write!(f, "{}", ch.escape_debug())?;
+ }
+ }
+ }
+ write!(f, "\"")?;
+ Ok(())
+ }
+ }
+
+ impl ops::Deref for BStr {
+ type Target = [u8];
+
+ #[inline]
+ fn deref(&self) -> &[u8] {
+ &self.bytes
+ }
+ }
+
+ impl ops::DerefMut for BStr {
+ #[inline]
+ fn deref_mut(&mut self) -> &mut [u8] {
+ &mut self.bytes
+ }
+ }
+
+ impl ops::Index<usize> for BStr {
+ type Output = u8;
+
+ #[inline]
+ fn index(&self, idx: usize) -> &u8 {
+ &self.as_bytes()[idx]
+ }
+ }
+
+ impl ops::Index<ops::RangeFull> for BStr {
+ type Output = BStr;
+
+ #[inline]
+ fn index(&self, _: ops::RangeFull) -> &BStr {
+ self
+ }
+ }
+
+ impl ops::Index<ops::Range<usize>> for BStr {
+ type Output = BStr;
+
+ #[inline]
+ fn index(&self, r: ops::Range<usize>) -> &BStr {
+ BStr::new(&self.as_bytes()[r.start..r.end])
+ }
+ }
+
+ impl ops::Index<ops::RangeInclusive<usize>> for BStr {
+ type Output = BStr;
+
+ #[inline]
+ fn index(&self, r: ops::RangeInclusive<usize>) -> &BStr {
+ BStr::new(&self.as_bytes()[*r.start()..=*r.end()])
+ }
+ }
+
+ impl ops::Index<ops::RangeFrom<usize>> for BStr {
+ type Output = BStr;
+
+ #[inline]
+ fn index(&self, r: ops::RangeFrom<usize>) -> &BStr {
+ BStr::new(&self.as_bytes()[r.start..])
+ }
+ }
+
+ impl ops::Index<ops::RangeTo<usize>> for BStr {
+ type Output = BStr;
+
+ #[inline]
+ fn index(&self, r: ops::RangeTo<usize>) -> &BStr {
+ BStr::new(&self.as_bytes()[..r.end])
+ }
+ }
+
+ impl ops::Index<ops::RangeToInclusive<usize>> for BStr {
+ type Output = BStr;
+
+ #[inline]
+ fn index(&self, r: ops::RangeToInclusive<usize>) -> &BStr {
+ BStr::new(&self.as_bytes()[..=r.end])
+ }
+ }
+
+ impl ops::IndexMut<usize> for BStr {
+ #[inline]
+ fn index_mut(&mut self, idx: usize) -> &mut u8 {
+ &mut self.bytes[idx]
+ }
+ }
+
+ impl ops::IndexMut<ops::RangeFull> for BStr {
+ #[inline]
+ fn index_mut(&mut self, _: ops::RangeFull) -> &mut BStr {
+ self
+ }
+ }
+
+ impl ops::IndexMut<ops::Range<usize>> for BStr {
+ #[inline]
+ fn index_mut(&mut self, r: ops::Range<usize>) -> &mut BStr {
+ BStr::from_bytes_mut(&mut self.bytes[r.start..r.end])
+ }
+ }
+
+ impl ops::IndexMut<ops::RangeInclusive<usize>> for BStr {
+ #[inline]
+ fn index_mut(&mut self, r: ops::RangeInclusive<usize>) -> &mut BStr {
+ BStr::from_bytes_mut(&mut self.bytes[*r.start()..=*r.end()])
+ }
+ }
+
+ impl ops::IndexMut<ops::RangeFrom<usize>> for BStr {
+ #[inline]
+ fn index_mut(&mut self, r: ops::RangeFrom<usize>) -> &mut BStr {
+ BStr::from_bytes_mut(&mut self.bytes[r.start..])
+ }
+ }
+
+ impl ops::IndexMut<ops::RangeTo<usize>> for BStr {
+ #[inline]
+ fn index_mut(&mut self, r: ops::RangeTo<usize>) -> &mut BStr {
+ BStr::from_bytes_mut(&mut self.bytes[..r.end])
+ }
+ }
+
+ impl ops::IndexMut<ops::RangeToInclusive<usize>> for BStr {
+ #[inline]
+ fn index_mut(&mut self, r: ops::RangeToInclusive<usize>) -> &mut BStr {
+ BStr::from_bytes_mut(&mut self.bytes[..=r.end])
+ }
+ }
+
+ impl AsRef<[u8]> for BStr {
+ #[inline]
+ fn as_ref(&self) -> &[u8] {
+ self.as_bytes()
+ }
+ }
+
+ impl AsRef<BStr> for [u8] {
+ #[inline]
+ fn as_ref(&self) -> &BStr {
+ BStr::new(self)
+ }
+ }
+
+ impl AsRef<BStr> for str {
+ #[inline]
+ fn as_ref(&self) -> &BStr {
+ BStr::new(self)
+ }
+ }
+
+ impl AsMut<[u8]> for BStr {
+ #[inline]
+ fn as_mut(&mut self) -> &mut [u8] {
+ &mut self.bytes
+ }
+ }
+
+ impl AsMut<BStr> for [u8] {
+ #[inline]
+ fn as_mut(&mut self) -> &mut BStr {
+ BStr::new_mut(self)
+ }
+ }
+
+ impl<'a> Default for &'a BStr {
+ fn default() -> &'a BStr {
+ BStr::from_bytes(b"")
+ }
+ }
+
+ impl<'a> Default for &'a mut BStr {
+ fn default() -> &'a mut BStr {
+ BStr::from_bytes_mut(&mut [])
+ }
+ }
+
+ impl<'a> From<&'a [u8]> for &'a BStr {
+ #[inline]
+ fn from(s: &'a [u8]) -> &'a BStr {
+ BStr::from_bytes(s)
+ }
+ }
+
+ impl<'a> From<&'a str> for &'a BStr {
+ #[inline]
+ fn from(s: &'a str) -> &'a BStr {
+ BStr::from_bytes(s.as_bytes())
+ }
+ }
+
+ #[cfg(feature = "std")]
+ impl<'a> From<&'a BStr> for Cow<'a, BStr> {
+ #[inline]
+ fn from(s: &'a BStr) -> Cow<'a, BStr> {
+ Cow::Borrowed(s)
+ }
+ }
+
+ #[cfg(feature = "std")]
+ impl From<Box<[u8]>> for Box<BStr> {
+ #[inline]
+ fn from(s: Box<[u8]>) -> Box<BStr> {
+ BStr::from_boxed_bytes(s)
+ }
+ }
+
+ #[cfg(feature = "std")]
+ impl From<Box<BStr>> for Box<[u8]> {
+ #[inline]
+ fn from(s: Box<BStr>) -> Box<[u8]> {
+ BStr::into_boxed_bytes(s)
+ }
+ }
+
+ impl Eq for BStr {}
+
+ impl PartialEq<BStr> for BStr {
+ #[inline]
+ fn eq(&self, other: &BStr) -> bool {
+ self.as_bytes() == other.as_bytes()
+ }
+ }
+
+ impl_partial_eq!(BStr, [u8]);
+ impl_partial_eq!(BStr, &'a [u8]);
+ impl_partial_eq!(BStr, str);
+ impl_partial_eq!(BStr, &'a str);
+
+ #[cfg(feature = "std")]
+ impl_partial_eq!(BStr, Vec<u8>);
+ #[cfg(feature = "std")]
+ impl_partial_eq!(&'a BStr, Vec<u8>);
+ #[cfg(feature = "std")]
+ impl_partial_eq!(BStr, String);
+ #[cfg(feature = "std")]
+ impl_partial_eq!(&'a BStr, String);
+ #[cfg(feature = "std")]
+ impl_partial_eq_cow!(&'a BStr, Cow<'a, BStr>);
+ #[cfg(feature = "std")]
+ impl_partial_eq_cow!(&'a BStr, Cow<'a, str>);
+ #[cfg(feature = "std")]
+ impl_partial_eq_cow!(&'a BStr, Cow<'a, [u8]>);
+
+ impl PartialOrd for BStr {
+ #[inline]
+ fn partial_cmp(&self, other: &BStr) -> Option<Ordering> {
+ PartialOrd::partial_cmp(self.as_bytes(), other.as_bytes())
+ }
+ }
+
+ impl Ord for BStr {
+ #[inline]
+ fn cmp(&self, other: &BStr) -> Ordering {
+ self.partial_cmp(other).unwrap()
+ }
+ }
+
+ impl_partial_ord!(BStr, [u8]);
+ impl_partial_ord!(BStr, &'a [u8]);
+ impl_partial_ord!(BStr, str);
+ impl_partial_ord!(BStr, &'a str);
+
+ #[cfg(feature = "std")]
+ impl_partial_ord!(BStr, Vec<u8>);
+ #[cfg(feature = "std")]
+ impl_partial_ord!(&'a BStr, Vec<u8>);
+ #[cfg(feature = "std")]
+ impl_partial_ord!(BStr, String);
+ #[cfg(feature = "std")]
+ impl_partial_ord!(&'a BStr, String);
+}
+
+#[cfg(feature = "serde1-nostd")]
+mod bstr_serde {
+ use core::fmt;
+
+ use serde::{
+ de::Error, de::Visitor, Deserialize, Deserializer, Serialize,
+ Serializer,
+ };
+
+ use crate::bstr::BStr;
+
+ impl Serialize for BStr {
+ #[inline]
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: Serializer,
+ {
+ serializer.serialize_bytes(self.as_bytes())
+ }
+ }
+
+ impl<'a, 'de: 'a> Deserialize<'de> for &'a BStr {
+ #[inline]
+ fn deserialize<D>(deserializer: D) -> Result<&'a BStr, D::Error>
+ where
+ D: Deserializer<'de>,
+ {
+ struct BStrVisitor;
+
+ impl<'de> Visitor<'de> for BStrVisitor {
+ type Value = &'de BStr;
+
+ fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ f.write_str("a borrowed byte string")
+ }
+
+ #[inline]
+ fn visit_borrowed_bytes<E: Error>(
+ self,
+ value: &'de [u8],
+ ) -> Result<&'de BStr, E> {
+ Ok(BStr::new(value))
+ }
+
+ #[inline]
+ fn visit_borrowed_str<E: Error>(
+ self,
+ value: &'de str,
+ ) -> Result<&'de BStr, E> {
+ Ok(BStr::new(value))
+ }
+ }
+
+ deserializer.deserialize_bytes(BStrVisitor)
+ }
+ }
+}
+
+#[cfg(feature = "serde1")]
+mod bstring_serde {
+ use std::cmp;
+ use std::fmt;
+
+ use serde::{
+ de::Error, de::SeqAccess, de::Visitor, Deserialize, Deserializer,
+ Serialize, Serializer,
+ };
+
+ use crate::bstring::BString;
+
+ impl Serialize for BString {
+ #[inline]
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: Serializer,
+ {
+ serializer.serialize_bytes(self.as_bytes())
+ }
+ }
+
+ impl<'de> Deserialize<'de> for BString {
+ #[inline]
+ fn deserialize<D>(deserializer: D) -> Result<BString, D::Error>
+ where
+ D: Deserializer<'de>,
+ {
+ struct BStringVisitor;
+
+ impl<'de> Visitor<'de> for BStringVisitor {
+ type Value = BString;
+
+ fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ f.write_str("a byte string")
+ }
+
+ #[inline]
+ fn visit_seq<V: SeqAccess<'de>>(
+ self,
+ mut visitor: V,
+ ) -> Result<BString, V::Error> {
+ let len = cmp::min(visitor.size_hint().unwrap_or(0), 256);
+ let mut bytes = Vec::with_capacity(len);
+ while let Some(v) = visitor.next_element()? {
+ bytes.push(v);
+ }
+ Ok(BString::from(bytes))
+ }
+
+ #[inline]
+ fn visit_bytes<E: Error>(
+ self,
+ value: &[u8],
+ ) -> Result<BString, E> {
+ Ok(BString::from(value))
+ }
+
+ #[inline]
+ fn visit_byte_buf<E: Error>(
+ self,
+ value: Vec<u8>,
+ ) -> Result<BString, E> {
+ Ok(BString::from(value))
+ }
+
+ #[inline]
+ fn visit_str<E: Error>(
+ self,
+ value: &str,
+ ) -> Result<BString, E> {
+ Ok(BString::from(value))
+ }
+
+ #[inline]
+ fn visit_string<E: Error>(
+ self,
+ value: String,
+ ) -> Result<BString, E> {
+ Ok(BString::from(value))
+ }
+ }
+
+ deserializer.deserialize_byte_buf(BStringVisitor)
+ }
+ }
+}
+
+#[cfg(test)]
+mod display {
+ use crate::bstring::BString;
+ use crate::ByteSlice;
+
+ #[test]
+ fn clean() {
+ assert_eq!(&format!("{}", &b"abc".as_bstr()), "abc");
+ assert_eq!(&format!("{}", &b"\xf0\x28\x8c\xbc".as_bstr()), "�(��");
+ }
+
+ #[test]
+ fn width_bigger_than_bstr() {
+ assert_eq!(&format!("{:<7}!", &b"abc".as_bstr()), "abc !");
+ assert_eq!(&format!("{:>7}!", &b"abc".as_bstr()), " abc!");
+ assert_eq!(&format!("{:^7}!", &b"abc".as_bstr()), " abc !");
+ assert_eq!(&format!("{:^6}!", &b"abc".as_bstr()), " abc !");
+ assert_eq!(&format!("{:-<7}!", &b"abc".as_bstr()), "abc----!");
+ assert_eq!(&format!("{:->7}!", &b"abc".as_bstr()), "----abc!");
+ assert_eq!(&format!("{:-^7}!", &b"abc".as_bstr()), "--abc--!");
+ assert_eq!(&format!("{:-^6}!", &b"abc".as_bstr()), "-abc--!");
+
+ assert_eq!(
+ &format!("{:<7}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
+ "�(�� !"
+ );
+ assert_eq!(
+ &format!("{:>7}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
+ " �(��!"
+ );
+ assert_eq!(
+ &format!("{:^7}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
+ " �(�� !"
+ );
+ assert_eq!(
+ &format!("{:^6}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
+ " �(�� !"
+ );
+
+ assert_eq!(
+ &format!("{:-<7}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
+ "�(��---!"
+ );
+ assert_eq!(
+ &format!("{:->7}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
+ "---�(��!"
+ );
+ assert_eq!(
+ &format!("{:-^7}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
+ "-�(��--!"
+ );
+ assert_eq!(
+ &format!("{:-^6}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
+ "-�(��-!"
+ );
+ }
+
+ #[test]
+ fn width_lesser_than_bstr() {
+ assert_eq!(&format!("{:<2}!", &b"abc".as_bstr()), "abc!");
+ assert_eq!(&format!("{:>2}!", &b"abc".as_bstr()), "abc!");
+ assert_eq!(&format!("{:^2}!", &b"abc".as_bstr()), "abc!");
+ assert_eq!(&format!("{:-<2}!", &b"abc".as_bstr()), "abc!");
+ assert_eq!(&format!("{:->2}!", &b"abc".as_bstr()), "abc!");
+ assert_eq!(&format!("{:-^2}!", &b"abc".as_bstr()), "abc!");
+
+ assert_eq!(
+ &format!("{:<3}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
+ "�(��!"
+ );
+ assert_eq!(
+ &format!("{:>3}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
+ "�(��!"
+ );
+ assert_eq!(
+ &format!("{:^3}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
+ "�(��!"
+ );
+ assert_eq!(
+ &format!("{:^2}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
+ "�(��!"
+ );
+
+ assert_eq!(
+ &format!("{:-<3}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
+ "�(��!"
+ );
+ assert_eq!(
+ &format!("{:->3}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
+ "�(��!"
+ );
+ assert_eq!(
+ &format!("{:-^3}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
+ "�(��!"
+ );
+ assert_eq!(
+ &format!("{:-^2}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
+ "�(��!"
+ );
+ }
+
+ quickcheck::quickcheck! {
+ fn total_length(bstr: BString) -> bool {
+ let size = bstr.chars().count();
+ format!("{:<1$}", bstr.as_bstr(), size).chars().count() >= size
+ }
+ }
+}
+
+#[cfg(test)]
+mod bstring_arbitrary {
+ use crate::bstring::BString;
+
+ use quickcheck::{Arbitrary, Gen};
+
+ impl Arbitrary for BString {
+ fn arbitrary(g: &mut Gen) -> BString {
+ BString::from(Vec::<u8>::arbitrary(g))
+ }
+
+ fn shrink(&self) -> Box<dyn Iterator<Item = BString>> {
+ Box::new(self.bytes.shrink().map(BString::from))
+ }
+ }
+}
+
+#[test]
+fn test_debug() {
+ use crate::{ByteSlice, B};
+
+ assert_eq!(
+ r#""\0\0\0 ftypisom\0\0\x02\0isomiso2avc1mp""#,
+ format!("{:?}", b"\0\0\0 ftypisom\0\0\x02\0isomiso2avc1mp".as_bstr()),
+ );
+
+ // Tests that if the underlying bytes contain the UTF-8 encoding of the
+ // replacement codepoint, then we emit the codepoint just like other
+ // non-printable Unicode characters.
+ assert_eq!(
+ b"\"\\xFF\xEF\xBF\xBD\\xFF\"".as_bstr(),
+ // Before fixing #72, the output here would be:
+ // \\xFF\\xEF\\xBF\\xBD\\xFF
+ B(&format!("{:?}", b"\xFF\xEF\xBF\xBD\xFF".as_bstr())).as_bstr(),
+ );
+}
+
+// See: https://github.com/BurntSushi/bstr/issues/82
+#[test]
+fn test_cows_regression() {
+ use crate::ByteSlice;
+ use std::borrow::Cow;
+
+ let c1 = Cow::from(b"hello bstr".as_bstr());
+ let c2 = b"goodbye bstr".as_bstr();
+ assert_ne!(c1, c2);
+
+ let c3 = Cow::from("hello str");
+ let c4 = "goodbye str";
+ assert_ne!(c3, c4);
+}
diff --git a/vendor/bstr/src/io.rs b/vendor/bstr/src/io.rs
new file mode 100644
index 000000000..ad6f3c1bb
--- /dev/null
+++ b/vendor/bstr/src/io.rs
@@ -0,0 +1,514 @@
+/*!
+Utilities for working with I/O using byte strings.
+
+This module currently only exports a single trait, `BufReadExt`, which provides
+facilities for conveniently and efficiently working with lines as byte strings.
+
+More APIs may be added in the future.
+*/
+
+use std::io;
+
+use crate::ext_slice::ByteSlice;
+use crate::ext_vec::ByteVec;
+
+/// An extention trait for
+/// [`std::io::BufRead`](https://doc.rust-lang.org/std/io/trait.BufRead.html)
+/// which provides convenience APIs for dealing with byte strings.
+pub trait BufReadExt: io::BufRead {
+ /// Returns an iterator over the lines of this reader, where each line
+ /// is represented as a byte string.
+ ///
+ /// Each item yielded by this iterator is a `io::Result<Vec<u8>>`, where
+ /// an error is yielded if there was a problem reading from the underlying
+ /// reader.
+ ///
+ /// On success, the next line in the iterator is returned. The line does
+ /// *not* contain a trailing `\n` or `\r\n`.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use std::io;
+ ///
+ /// use bstr::io::BufReadExt;
+ ///
+ /// # fn example() -> Result<(), io::Error> {
+ /// let cursor = io::Cursor::new(b"lorem\nipsum\r\ndolor");
+ ///
+ /// let mut lines = vec![];
+ /// for result in cursor.byte_lines() {
+ /// let line = result?;
+ /// lines.push(line);
+ /// }
+ /// assert_eq!(lines.len(), 3);
+ /// assert_eq!(lines[0], "lorem".as_bytes());
+ /// assert_eq!(lines[1], "ipsum".as_bytes());
+ /// assert_eq!(lines[2], "dolor".as_bytes());
+ /// # Ok(()) }; example().unwrap()
+ /// ```
+ fn byte_lines(self) -> ByteLines<Self>
+ where
+ Self: Sized,
+ {
+ ByteLines { buf: self }
+ }
+
+ /// Returns an iterator over byte-terminated records of this reader, where
+ /// each record is represented as a byte string.
+ ///
+ /// Each item yielded by this iterator is a `io::Result<Vec<u8>>`, where
+ /// an error is yielded if there was a problem reading from the underlying
+ /// reader.
+ ///
+ /// On success, the next record in the iterator is returned. The record
+ /// does *not* contain its trailing terminator.
+ ///
+ /// Note that calling `byte_records(b'\n')` differs from `byte_lines()` in
+ /// that it has no special handling for `\r`.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use std::io;
+ ///
+ /// use bstr::io::BufReadExt;
+ ///
+ /// # fn example() -> Result<(), io::Error> {
+ /// let cursor = io::Cursor::new(b"lorem\x00ipsum\x00dolor");
+ ///
+ /// let mut records = vec![];
+ /// for result in cursor.byte_records(b'\x00') {
+ /// let record = result?;
+ /// records.push(record);
+ /// }
+ /// assert_eq!(records.len(), 3);
+ /// assert_eq!(records[0], "lorem".as_bytes());
+ /// assert_eq!(records[1], "ipsum".as_bytes());
+ /// assert_eq!(records[2], "dolor".as_bytes());
+ /// # Ok(()) }; example().unwrap()
+ /// ```
+ fn byte_records(self, terminator: u8) -> ByteRecords<Self>
+ where
+ Self: Sized,
+ {
+ ByteRecords { terminator, buf: self }
+ }
+
+ /// Executes the given closure on each line in the underlying reader.
+ ///
+ /// If the closure returns an error (or if the underlying reader returns an
+ /// error), then iteration is stopped and the error is returned. If false
+ /// is returned, then iteration is stopped and no error is returned.
+ ///
+ /// The closure given is called on exactly the same values as yielded by
+ /// the [`byte_lines`](trait.BufReadExt.html#method.byte_lines)
+ /// iterator. Namely, lines do _not_ contain trailing `\n` or `\r\n` bytes.
+ ///
+ /// This routine is useful for iterating over lines as quickly as
+ /// possible. Namely, a single allocation is reused for each line.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use std::io;
+ ///
+ /// use bstr::io::BufReadExt;
+ ///
+ /// # fn example() -> Result<(), io::Error> {
+ /// let cursor = io::Cursor::new(b"lorem\nipsum\r\ndolor");
+ ///
+ /// let mut lines = vec![];
+ /// cursor.for_byte_line(|line| {
+ /// lines.push(line.to_vec());
+ /// Ok(true)
+ /// })?;
+ /// assert_eq!(lines.len(), 3);
+ /// assert_eq!(lines[0], "lorem".as_bytes());
+ /// assert_eq!(lines[1], "ipsum".as_bytes());
+ /// assert_eq!(lines[2], "dolor".as_bytes());
+ /// # Ok(()) }; example().unwrap()
+ /// ```
+ fn for_byte_line<F>(self, mut for_each_line: F) -> io::Result<()>
+ where
+ Self: Sized,
+ F: FnMut(&[u8]) -> io::Result<bool>,
+ {
+ self.for_byte_line_with_terminator(|line| {
+ for_each_line(&trim_line_slice(&line))
+ })
+ }
+
+ /// Executes the given closure on each byte-terminated record in the
+ /// underlying reader.
+ ///
+ /// If the closure returns an error (or if the underlying reader returns an
+ /// error), then iteration is stopped and the error is returned. If false
+ /// is returned, then iteration is stopped and no error is returned.
+ ///
+ /// The closure given is called on exactly the same values as yielded by
+ /// the [`byte_records`](trait.BufReadExt.html#method.byte_records)
+ /// iterator. Namely, records do _not_ contain a trailing terminator byte.
+ ///
+ /// This routine is useful for iterating over records as quickly as
+ /// possible. Namely, a single allocation is reused for each record.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use std::io;
+ ///
+ /// use bstr::io::BufReadExt;
+ ///
+ /// # fn example() -> Result<(), io::Error> {
+ /// let cursor = io::Cursor::new(b"lorem\x00ipsum\x00dolor");
+ ///
+ /// let mut records = vec![];
+ /// cursor.for_byte_record(b'\x00', |record| {
+ /// records.push(record.to_vec());
+ /// Ok(true)
+ /// })?;
+ /// assert_eq!(records.len(), 3);
+ /// assert_eq!(records[0], "lorem".as_bytes());
+ /// assert_eq!(records[1], "ipsum".as_bytes());
+ /// assert_eq!(records[2], "dolor".as_bytes());
+ /// # Ok(()) }; example().unwrap()
+ /// ```
+ fn for_byte_record<F>(
+ self,
+ terminator: u8,
+ mut for_each_record: F,
+ ) -> io::Result<()>
+ where
+ Self: Sized,
+ F: FnMut(&[u8]) -> io::Result<bool>,
+ {
+ self.for_byte_record_with_terminator(terminator, |chunk| {
+ for_each_record(&trim_record_slice(&chunk, terminator))
+ })
+ }
+
+ /// Executes the given closure on each line in the underlying reader.
+ ///
+ /// If the closure returns an error (or if the underlying reader returns an
+ /// error), then iteration is stopped and the error is returned. If false
+ /// is returned, then iteration is stopped and no error is returned.
+ ///
+ /// Unlike
+ /// [`for_byte_line`](trait.BufReadExt.html#method.for_byte_line),
+ /// the lines given to the closure *do* include the line terminator, if one
+ /// exists.
+ ///
+ /// This routine is useful for iterating over lines as quickly as
+ /// possible. Namely, a single allocation is reused for each line.
+ ///
+ /// This is identical to `for_byte_record_with_terminator` with a
+ /// terminator of `\n`.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use std::io;
+ ///
+ /// use bstr::io::BufReadExt;
+ ///
+ /// # fn example() -> Result<(), io::Error> {
+ /// let cursor = io::Cursor::new(b"lorem\nipsum\r\ndolor");
+ ///
+ /// let mut lines = vec![];
+ /// cursor.for_byte_line_with_terminator(|line| {
+ /// lines.push(line.to_vec());
+ /// Ok(true)
+ /// })?;
+ /// assert_eq!(lines.len(), 3);
+ /// assert_eq!(lines[0], "lorem\n".as_bytes());
+ /// assert_eq!(lines[1], "ipsum\r\n".as_bytes());
+ /// assert_eq!(lines[2], "dolor".as_bytes());
+ /// # Ok(()) }; example().unwrap()
+ /// ```
+ fn for_byte_line_with_terminator<F>(
+ self,
+ for_each_line: F,
+ ) -> io::Result<()>
+ where
+ Self: Sized,
+ F: FnMut(&[u8]) -> io::Result<bool>,
+ {
+ self.for_byte_record_with_terminator(b'\n', for_each_line)
+ }
+
+ /// Executes the given closure on each byte-terminated record in the
+ /// underlying reader.
+ ///
+ /// If the closure returns an error (or if the underlying reader returns an
+ /// error), then iteration is stopped and the error is returned. If false
+ /// is returned, then iteration is stopped and no error is returned.
+ ///
+ /// Unlike
+ /// [`for_byte_record`](trait.BufReadExt.html#method.for_byte_record),
+ /// the lines given to the closure *do* include the record terminator, if
+ /// one exists.
+ ///
+ /// This routine is useful for iterating over records as quickly as
+ /// possible. Namely, a single allocation is reused for each record.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use std::io;
+ ///
+ /// use bstr::B;
+ /// use bstr::io::BufReadExt;
+ ///
+ /// # fn example() -> Result<(), io::Error> {
+ /// let cursor = io::Cursor::new(b"lorem\x00ipsum\x00dolor");
+ ///
+ /// let mut records = vec![];
+ /// cursor.for_byte_record_with_terminator(b'\x00', |record| {
+ /// records.push(record.to_vec());
+ /// Ok(true)
+ /// })?;
+ /// assert_eq!(records.len(), 3);
+ /// assert_eq!(records[0], B(b"lorem\x00"));
+ /// assert_eq!(records[1], B("ipsum\x00"));
+ /// assert_eq!(records[2], B("dolor"));
+ /// # Ok(()) }; example().unwrap()
+ /// ```
+ fn for_byte_record_with_terminator<F>(
+ mut self,
+ terminator: u8,
+ mut for_each_record: F,
+ ) -> io::Result<()>
+ where
+ Self: Sized,
+ F: FnMut(&[u8]) -> io::Result<bool>,
+ {
+ let mut bytes = vec![];
+ let mut res = Ok(());
+ let mut consumed = 0;
+ 'outer: loop {
+ // Lend out complete record slices from our buffer
+ {
+ let mut buf = self.fill_buf()?;
+ while let Some(index) = buf.find_byte(terminator) {
+ let (record, rest) = buf.split_at(index + 1);
+ buf = rest;
+ consumed += record.len();
+ match for_each_record(&record) {
+ Ok(false) => break 'outer,
+ Err(err) => {
+ res = Err(err);
+ break 'outer;
+ }
+ _ => (),
+ }
+ }
+
+ // Copy the final record fragment to our local buffer. This
+ // saves read_until() from re-scanning a buffer we know
+ // contains no remaining terminators.
+ bytes.extend_from_slice(&buf);
+ consumed += buf.len();
+ }
+
+ self.consume(consumed);
+ consumed = 0;
+
+ // N.B. read_until uses a different version of memchr that may
+ // be slower than the memchr crate that bstr uses. However, this
+ // should only run for a fairly small number of records, assuming a
+ // decent buffer size.
+ self.read_until(terminator, &mut bytes)?;
+ if bytes.is_empty() || !for_each_record(&bytes)? {
+ break;
+ }
+ bytes.clear();
+ }
+ self.consume(consumed);
+ res
+ }
+}
+
+impl<B: io::BufRead> BufReadExt for B {}
+
+/// An iterator over lines from an instance of
+/// [`std::io::BufRead`](https://doc.rust-lang.org/std/io/trait.BufRead.html).
+///
+/// This iterator is generally created by calling the
+/// [`byte_lines`](trait.BufReadExt.html#method.byte_lines)
+/// method on the
+/// [`BufReadExt`](trait.BufReadExt.html)
+/// trait.
+#[derive(Debug)]
+pub struct ByteLines<B> {
+ buf: B,
+}
+
+/// An iterator over records from an instance of
+/// [`std::io::BufRead`](https://doc.rust-lang.org/std/io/trait.BufRead.html).
+///
+/// A byte record is any sequence of bytes terminated by a particular byte
+/// chosen by the caller. For example, NUL separated byte strings are said to
+/// be NUL-terminated byte records.
+///
+/// This iterator is generally created by calling the
+/// [`byte_records`](trait.BufReadExt.html#method.byte_records)
+/// method on the
+/// [`BufReadExt`](trait.BufReadExt.html)
+/// trait.
+#[derive(Debug)]
+pub struct ByteRecords<B> {
+ buf: B,
+ terminator: u8,
+}
+
+impl<B: io::BufRead> Iterator for ByteLines<B> {
+ type Item = io::Result<Vec<u8>>;
+
+ fn next(&mut self) -> Option<io::Result<Vec<u8>>> {
+ let mut bytes = vec![];
+ match self.buf.read_until(b'\n', &mut bytes) {
+ Err(e) => Some(Err(e)),
+ Ok(0) => None,
+ Ok(_) => {
+ trim_line(&mut bytes);
+ Some(Ok(bytes))
+ }
+ }
+ }
+}
+
+impl<B: io::BufRead> Iterator for ByteRecords<B> {
+ type Item = io::Result<Vec<u8>>;
+
+ fn next(&mut self) -> Option<io::Result<Vec<u8>>> {
+ let mut bytes = vec![];
+ match self.buf.read_until(self.terminator, &mut bytes) {
+ Err(e) => Some(Err(e)),
+ Ok(0) => None,
+ Ok(_) => {
+ trim_record(&mut bytes, self.terminator);
+ Some(Ok(bytes))
+ }
+ }
+ }
+}
+
+fn trim_line(line: &mut Vec<u8>) {
+ if line.last_byte() == Some(b'\n') {
+ line.pop_byte();
+ if line.last_byte() == Some(b'\r') {
+ line.pop_byte();
+ }
+ }
+}
+
+fn trim_line_slice(mut line: &[u8]) -> &[u8] {
+ if line.last_byte() == Some(b'\n') {
+ line = &line[..line.len() - 1];
+ if line.last_byte() == Some(b'\r') {
+ line = &line[..line.len() - 1];
+ }
+ }
+ line
+}
+
+fn trim_record(record: &mut Vec<u8>, terminator: u8) {
+ if record.last_byte() == Some(terminator) {
+ record.pop_byte();
+ }
+}
+
+fn trim_record_slice(mut record: &[u8], terminator: u8) -> &[u8] {
+ if record.last_byte() == Some(terminator) {
+ record = &record[..record.len() - 1];
+ }
+ record
+}
+
+#[cfg(test)]
+mod tests {
+ use super::BufReadExt;
+ use crate::bstring::BString;
+
+ fn collect_lines<B: AsRef<[u8]>>(slice: B) -> Vec<BString> {
+ let mut lines = vec![];
+ slice
+ .as_ref()
+ .for_byte_line(|line| {
+ lines.push(BString::from(line.to_vec()));
+ Ok(true)
+ })
+ .unwrap();
+ lines
+ }
+
+ fn collect_lines_term<B: AsRef<[u8]>>(slice: B) -> Vec<BString> {
+ let mut lines = vec![];
+ slice
+ .as_ref()
+ .for_byte_line_with_terminator(|line| {
+ lines.push(BString::from(line.to_vec()));
+ Ok(true)
+ })
+ .unwrap();
+ lines
+ }
+
+ #[test]
+ fn lines_without_terminator() {
+ assert_eq!(collect_lines(""), Vec::<BString>::new());
+
+ assert_eq!(collect_lines("\n"), vec![""]);
+ assert_eq!(collect_lines("\n\n"), vec!["", ""]);
+ assert_eq!(collect_lines("a\nb\n"), vec!["a", "b"]);
+ assert_eq!(collect_lines("a\nb"), vec!["a", "b"]);
+ assert_eq!(collect_lines("abc\nxyz\n"), vec!["abc", "xyz"]);
+ assert_eq!(collect_lines("abc\nxyz"), vec!["abc", "xyz"]);
+
+ assert_eq!(collect_lines("\r\n"), vec![""]);
+ assert_eq!(collect_lines("\r\n\r\n"), vec!["", ""]);
+ assert_eq!(collect_lines("a\r\nb\r\n"), vec!["a", "b"]);
+ assert_eq!(collect_lines("a\r\nb"), vec!["a", "b"]);
+ assert_eq!(collect_lines("abc\r\nxyz\r\n"), vec!["abc", "xyz"]);
+ assert_eq!(collect_lines("abc\r\nxyz"), vec!["abc", "xyz"]);
+
+ assert_eq!(collect_lines("abc\rxyz"), vec!["abc\rxyz"]);
+ }
+
+ #[test]
+ fn lines_with_terminator() {
+ assert_eq!(collect_lines_term(""), Vec::<BString>::new());
+
+ assert_eq!(collect_lines_term("\n"), vec!["\n"]);
+ assert_eq!(collect_lines_term("\n\n"), vec!["\n", "\n"]);
+ assert_eq!(collect_lines_term("a\nb\n"), vec!["a\n", "b\n"]);
+ assert_eq!(collect_lines_term("a\nb"), vec!["a\n", "b"]);
+ assert_eq!(collect_lines_term("abc\nxyz\n"), vec!["abc\n", "xyz\n"]);
+ assert_eq!(collect_lines_term("abc\nxyz"), vec!["abc\n", "xyz"]);
+
+ assert_eq!(collect_lines_term("\r\n"), vec!["\r\n"]);
+ assert_eq!(collect_lines_term("\r\n\r\n"), vec!["\r\n", "\r\n"]);
+ assert_eq!(collect_lines_term("a\r\nb\r\n"), vec!["a\r\n", "b\r\n"]);
+ assert_eq!(collect_lines_term("a\r\nb"), vec!["a\r\n", "b"]);
+ assert_eq!(
+ collect_lines_term("abc\r\nxyz\r\n"),
+ vec!["abc\r\n", "xyz\r\n"]
+ );
+ assert_eq!(collect_lines_term("abc\r\nxyz"), vec!["abc\r\n", "xyz"]);
+
+ assert_eq!(collect_lines_term("abc\rxyz"), vec!["abc\rxyz"]);
+ }
+}
diff --git a/vendor/bstr/src/lib.rs b/vendor/bstr/src/lib.rs
new file mode 100644
index 000000000..41142c9c2
--- /dev/null
+++ b/vendor/bstr/src/lib.rs
@@ -0,0 +1,437 @@
+/*!
+A byte string library.
+
+Byte strings are just like standard Unicode strings with one very important
+difference: byte strings are only *conventionally* UTF-8 while Rust's standard
+Unicode strings are *guaranteed* to be valid UTF-8. The primary motivation for
+byte strings is for handling arbitrary bytes that are mostly UTF-8.
+
+# Overview
+
+This crate provides two important traits that provide string oriented methods
+on `&[u8]` and `Vec<u8>` types:
+
+* [`ByteSlice`](trait.ByteSlice.html) extends the `[u8]` type with additional
+ string oriented methods.
+* [`ByteVec`](trait.ByteVec.html) extends the `Vec<u8>` type with additional
+ string oriented methods.
+
+Additionally, this crate provides two concrete byte string types that deref to
+`[u8]` and `Vec<u8>`. These are useful for storing byte string types, and come
+with convenient `std::fmt::Debug` implementations:
+
+* [`BStr`](struct.BStr.html) is a byte string slice, analogous to `str`.
+* [`BString`](struct.BString.html) is an owned growable byte string buffer,
+ analogous to `String`.
+
+Additionally, the free function [`B`](fn.B.html) serves as a convenient short
+hand for writing byte string literals.
+
+# Quick examples
+
+Byte strings build on the existing APIs for `Vec<u8>` and `&[u8]`, with
+additional string oriented methods. Operations such as iterating over
+graphemes, searching for substrings, replacing substrings, trimming and case
+conversion are examples of things not provided on the standard library `&[u8]`
+APIs but are provided by this crate. For example, this code iterates over all
+of occurrences of a subtring:
+
+```
+use bstr::ByteSlice;
+
+let s = b"foo bar foo foo quux foo";
+
+let mut matches = vec![];
+for start in s.find_iter("foo") {
+ matches.push(start);
+}
+assert_eq!(matches, [0, 8, 12, 21]);
+```
+
+Here's another example showing how to do a search and replace (and also showing
+use of the `B` function):
+
+```
+use bstr::{B, ByteSlice};
+
+let old = B("foo ☃☃☃ foo foo quux foo");
+let new = old.replace("foo", "hello");
+assert_eq!(new, B("hello ☃☃☃ hello hello quux hello"));
+```
+
+And here's an example that shows case conversion, even in the presence of
+invalid UTF-8:
+
+```
+use bstr::{ByteSlice, ByteVec};
+
+let mut lower = Vec::from("hello β");
+lower[0] = b'\xFF';
+// lowercase β is uppercased to Β
+assert_eq!(lower.to_uppercase(), b"\xFFELLO \xCE\x92");
+```
+
+# Convenient debug representation
+
+When working with byte strings, it is often useful to be able to print them
+as if they were byte strings and not sequences of integers. While this crate
+cannot affect the `std::fmt::Debug` implementations for `[u8]` and `Vec<u8>`,
+this crate does provide the `BStr` and `BString` types which have convenient
+`std::fmt::Debug` implementations.
+
+For example, this
+
+```
+use bstr::ByteSlice;
+
+let mut bytes = Vec::from("hello β");
+bytes[0] = b'\xFF';
+
+println!("{:?}", bytes.as_bstr());
+```
+
+will output `"\xFFello β"`.
+
+This example works because the
+[`ByteSlice::as_bstr`](trait.ByteSlice.html#method.as_bstr)
+method converts any `&[u8]` to a `&BStr`.
+
+# When should I use byte strings?
+
+This library reflects my hypothesis that UTF-8 by convention is a better trade
+off in some circumstances than guaranteed UTF-8. It's possible, perhaps even
+likely, that this is a niche concern for folks working closely with core text
+primitives.
+
+The first time this idea hit me was in the implementation of Rust's regex
+engine. In particular, very little of the internal implementation cares at all
+about searching valid UTF-8 encoded strings. Indeed, internally, the
+implementation converts `&str` from the API to `&[u8]` fairly quickly and
+just deals with raw bytes. UTF-8 match boundaries are then guaranteed by the
+finite state machine itself rather than any specific string type. This makes it
+possible to not only run regexes on `&str` values, but also on `&[u8]` values.
+
+Why would you ever want to run a regex on a `&[u8]` though? Well, `&[u8]` is
+the fundamental way at which one reads data from all sorts of streams, via the
+standard library's [`Read`](https://doc.rust-lang.org/std/io/trait.Read.html)
+trait. In particular, there is no platform independent way to determine whether
+what you're reading from is some binary file or a human readable text file.
+Therefore, if you're writing a program to search files, you probably need to
+deal with `&[u8]` directly unless you're okay with first converting it to a
+`&str` and dropping any bytes that aren't valid UTF-8. (Or otherwise determine
+the encoding---which is often impractical---and perform a transcoding step.)
+Often, the simplest and most robust way to approach this is to simply treat the
+contents of a file as if it were mostly valid UTF-8 and pass through invalid
+UTF-8 untouched. This may not be the most correct approach though!
+
+One case in particular exacerbates these issues, and that's memory mapping
+a file. When you memory map a file, that file may be gigabytes big, but all
+you get is a `&[u8]`. Converting that to a `&str` all in one go is generally
+not a good idea because of the costs associated with doing so, and also
+because it generally causes one to do two passes over the data instead of
+one, which is quite undesirable. It is of course usually possible to do it an
+incremental way by only parsing chunks at a time, but this is often complex to
+do or impractical. For example, many regex engines only accept one contiguous
+sequence of bytes at a time with no way to perform incremental matching.
+
+In summary, conventional UTF-8 byte strings provided by this library are
+definitely useful in some limited circumstances, but how useful they are more
+broadly isn't clear yet.
+
+# `bstr` in public APIs
+
+Since this library is not yet `1.0`, you should not use it in the public API of
+your crates until it hits `1.0` (unless you're OK with with tracking breaking
+releases of `bstr`). It is expected that `bstr 1.0` will be released before
+2022.
+
+In general, it should be possible to avoid putting anything in this crate into
+your public APIs. Namely, you should never need to use the `ByteSlice` or
+`ByteVec` traits as bounds on public APIs, since their only purpose is to
+extend the methods on the concrete types `[u8]` and `Vec<u8>`, respectively.
+Similarly, it should not be necessary to put either the `BStr` or `BString`
+types into public APIs. If you want to use them internally, then they can
+be converted to/from `[u8]`/`Vec<u8>` as needed.
+
+# Differences with standard strings
+
+The primary difference between `[u8]` and `str` is that the former is
+conventionally UTF-8 while the latter is guaranteed to be UTF-8. The phrase
+"conventionally UTF-8" means that a `[u8]` may contain bytes that do not form
+a valid UTF-8 sequence, but operations defined on the type in this crate are
+generally most useful on valid UTF-8 sequences. For example, iterating over
+Unicode codepoints or grapheme clusters is an operation that is only defined
+on valid UTF-8. Therefore, when invalid UTF-8 is encountered, the Unicode
+replacement codepoint is substituted. Thus, a byte string that is not UTF-8 at
+all is of limited utility when using these crate.
+
+However, not all operations on byte strings are specifically Unicode aware. For
+example, substring search has no specific Unicode semantics ascribed to it. It
+works just as well for byte strings that are completely valid UTF-8 as for byte
+strings that contain no valid UTF-8 at all. Similarly for replacements and
+various other operations that do not need any Unicode specific tailoring.
+
+Aside from the difference in how UTF-8 is handled, the APIs between `[u8]` and
+`str` (and `Vec<u8>` and `String`) are intentionally very similar, including
+maintaining the same behavior for corner cases in things like substring
+splitting. There are, however, some differences:
+
+* Substring search is not done with `matches`, but instead, `find_iter`.
+ In general, this crate does not define any generic
+ [`Pattern`](https://doc.rust-lang.org/std/str/pattern/trait.Pattern.html)
+ infrastructure, and instead prefers adding new methods for different
+ argument types. For example, `matches` can search by a `char` or a `&str`,
+ where as `find_iter` can only search by a byte string. `find_char` can be
+ used for searching by a `char`.
+* Since `SliceConcatExt` in the standard library is unstable, it is not
+ possible to reuse that to implement `join` and `concat` methods. Instead,
+ [`join`](fn.join.html) and [`concat`](fn.concat.html) are provided as free
+ functions that perform a similar task.
+* This library bundles in a few more Unicode operations, such as grapheme,
+ word and sentence iterators. More operations, such as normalization and
+ case folding, may be provided in the future.
+* Some `String`/`str` APIs will panic if a particular index was not on a valid
+ UTF-8 code unit sequence boundary. Conversely, no such checking is performed
+ in this crate, as is consistent with treating byte strings as a sequence of
+ bytes. This means callers are responsible for maintaining a UTF-8 invariant
+ if that's important.
+* Some routines provided by this crate, such as `starts_with_str`, have a
+ `_str` suffix to differentiate them from similar routines already defined
+ on the `[u8]` type. The difference is that `starts_with` requires its
+ parameter to be a `&[u8]`, where as `starts_with_str` permits its parameter
+ to by anything that implements `AsRef<[u8]>`, which is more flexible. This
+ means you can write `bytes.starts_with_str("☃")` instead of
+ `bytes.starts_with("☃".as_bytes())`.
+
+Otherwise, you should find most of the APIs between this crate and the standard
+library string APIs to be very similar, if not identical.
+
+# Handling of invalid UTF-8
+
+Since byte strings are only *conventionally* UTF-8, there is no guarantee
+that byte strings contain valid UTF-8. Indeed, it is perfectly legal for a
+byte string to contain arbitrary bytes. However, since this library defines
+a *string* type, it provides many operations specified by Unicode. These
+operations are typically only defined over codepoints, and thus have no real
+meaning on bytes that are invalid UTF-8 because they do not map to a particular
+codepoint.
+
+For this reason, whenever operations defined only on codepoints are used, this
+library will automatically convert invalid UTF-8 to the Unicode replacement
+codepoint, `U+FFFD`, which looks like this: `�`. For example, an
+[iterator over codepoints](struct.Chars.html) will yield a Unicode
+replacement codepoint whenever it comes across bytes that are not valid UTF-8:
+
+```
+use bstr::ByteSlice;
+
+let bs = b"a\xFF\xFFz";
+let chars: Vec<char> = bs.chars().collect();
+assert_eq!(vec!['a', '\u{FFFD}', '\u{FFFD}', 'z'], chars);
+```
+
+There are a few ways in which invalid bytes can be substituted with a Unicode
+replacement codepoint. One way, not used by this crate, is to replace every
+individual invalid byte with a single replacement codepoint. In contrast, the
+approach this crate uses is called the "substitution of maximal subparts," as
+specified by the Unicode Standard (Chapter 3, Section 9). (This approach is
+also used by [W3C's Encoding Standard](https://www.w3.org/TR/encoding/).) In
+this strategy, a replacement codepoint is inserted whenever a byte is found
+that cannot possibly lead to a valid UTF-8 code unit sequence. If there were
+previous bytes that represented a *prefix* of a well-formed UTF-8 code unit
+sequence, then all of those bytes (up to 3) are substituted with a single
+replacement codepoint. For example:
+
+```
+use bstr::ByteSlice;
+
+let bs = b"a\xF0\x9F\x87z";
+let chars: Vec<char> = bs.chars().collect();
+// The bytes \xF0\x9F\x87 could lead to a valid UTF-8 sequence, but 3 of them
+// on their own are invalid. Only one replacement codepoint is substituted,
+// which demonstrates the "substitution of maximal subparts" strategy.
+assert_eq!(vec!['a', '\u{FFFD}', 'z'], chars);
+```
+
+If you do need to access the raw bytes for some reason in an iterator like
+`Chars`, then you should use the iterator's "indices" variant, which gives
+the byte offsets containing the invalid UTF-8 bytes that were substituted with
+the replacement codepoint. For example:
+
+```
+use bstr::{B, ByteSlice};
+
+let bs = b"a\xE2\x98z";
+let chars: Vec<(usize, usize, char)> = bs.char_indices().collect();
+// Even though the replacement codepoint is encoded as 3 bytes itself, the
+// byte range given here is only two bytes, corresponding to the original
+// raw bytes.
+assert_eq!(vec![(0, 1, 'a'), (1, 3, '\u{FFFD}'), (3, 4, 'z')], chars);
+
+// Thus, getting the original raw bytes is as simple as slicing the original
+// byte string:
+let chars: Vec<&[u8]> = bs.char_indices().map(|(s, e, _)| &bs[s..e]).collect();
+assert_eq!(vec![B("a"), B(b"\xE2\x98"), B("z")], chars);
+```
+
+# File paths and OS strings
+
+One of the premiere features of Rust's standard library is how it handles file
+paths. In particular, it makes it very hard to write incorrect code while
+simultaneously providing a correct cross platform abstraction for manipulating
+file paths. The key challenge that one faces with file paths across platforms
+is derived from the following observations:
+
+* On most Unix-like systems, file paths are an arbitrary sequence of bytes.
+* On Windows, file paths are an arbitrary sequence of 16-bit integers.
+
+(In both cases, certain sequences aren't allowed. For example a `NUL` byte is
+not allowed in either case. But we can ignore this for the purposes of this
+section.)
+
+Byte strings, like the ones provided in this crate, line up really well with
+file paths on Unix like systems, which are themselves just arbitrary sequences
+of bytes. It turns out that if you treat them as "mostly UTF-8," then things
+work out pretty well. On the contrary, byte strings _don't_ really work
+that well on Windows because it's not possible to correctly roundtrip file
+paths between 16-bit integers and something that looks like UTF-8 _without_
+explicitly defining an encoding to do this for you, which is anathema to byte
+strings, which are just bytes.
+
+Rust's standard library elegantly solves this problem by specifying an
+internal encoding for file paths that's only used on Windows called
+[WTF-8](https://simonsapin.github.io/wtf-8/). Its key properties are that they
+permit losslessly roundtripping file paths on Windows by extending UTF-8 to
+support an encoding of surrogate codepoints, while simultaneously supporting
+zero-cost conversion from Rust's Unicode strings to file paths. (Since UTF-8 is
+a proper subset of WTF-8.)
+
+The fundamental point at which the above strategy fails is when you want to
+treat file paths as things that look like strings in a zero cost way. In most
+cases, this is actually the wrong thing to do, but some cases call for it,
+for example, glob or regex matching on file paths. This is because WTF-8 is
+treated as an internal implementation detail, and there is no way to access
+those bytes via a public API. Therefore, such consumers are limited in what
+they can do:
+
+1. One could re-implement WTF-8 and re-encode file paths on Windows to WTF-8
+ by accessing their underlying 16-bit integer representation. Unfortunately,
+ this isn't zero cost (it introduces a second WTF-8 decoding step) and it's
+ not clear this is a good thing to do, since WTF-8 should ideally remain an
+ internal implementation detail.
+2. One could instead declare that they will not handle paths on Windows that
+ are not valid UTF-16, and return an error when one is encountered.
+3. Like (2), but instead of returning an error, lossily decode the file path
+ on Windows that isn't valid UTF-16 into UTF-16 by replacing invalid bytes
+ with the Unicode replacement codepoint.
+
+While this library may provide facilities for (1) in the future, currently,
+this library only provides facilities for (2) and (3). In particular, a suite
+of conversion functions are provided that permit converting between byte
+strings, OS strings and file paths. For owned byte strings, they are:
+
+* [`ByteVec::from_os_string`](trait.ByteVec.html#method.from_os_string)
+* [`ByteVec::from_os_str_lossy`](trait.ByteVec.html#method.from_os_str_lossy)
+* [`ByteVec::from_path_buf`](trait.ByteVec.html#method.from_path_buf)
+* [`ByteVec::from_path_lossy`](trait.ByteVec.html#method.from_path_lossy)
+* [`ByteVec::into_os_string`](trait.ByteVec.html#method.into_os_string)
+* [`ByteVec::into_os_string_lossy`](trait.ByteVec.html#method.into_os_string_lossy)
+* [`ByteVec::into_path_buf`](trait.ByteVec.html#method.into_path_buf)
+* [`ByteVec::into_path_buf_lossy`](trait.ByteVec.html#method.into_path_buf_lossy)
+
+For byte string slices, they are:
+
+* [`ByteSlice::from_os_str`](trait.ByteSlice.html#method.from_os_str)
+* [`ByteSlice::from_path`](trait.ByteSlice.html#method.from_path)
+* [`ByteSlice::to_os_str`](trait.ByteSlice.html#method.to_os_str)
+* [`ByteSlice::to_os_str_lossy`](trait.ByteSlice.html#method.to_os_str_lossy)
+* [`ByteSlice::to_path`](trait.ByteSlice.html#method.to_path)
+* [`ByteSlice::to_path_lossy`](trait.ByteSlice.html#method.to_path_lossy)
+
+On Unix, all of these conversions are rigorously zero cost, which gives one
+a way to ergonomically deal with raw file paths exactly as they are using
+normal string-related functions. On Windows, these conversion routines perform
+a UTF-8 check and either return an error or lossily decode the file path
+into valid UTF-8, depending on which function you use. This means that you
+cannot roundtrip all file paths on Windows correctly using these conversion
+routines. However, this may be an acceptable downside since such file paths
+are exceptionally rare. Moreover, roundtripping isn't always necessary, for
+example, if all you're doing is filtering based on file paths.
+
+The reason why using byte strings for this is potentially superior than the
+standard library's approach is that a lot of Rust code is already lossily
+converting file paths to Rust's Unicode strings, which are required to be valid
+UTF-8, and thus contain latent bugs on Unix where paths with invalid UTF-8 are
+not terribly uncommon. If you instead use byte strings, then you're guaranteed
+to write correct code for Unix, at the cost of getting a corner case wrong on
+Windows.
+*/
+
+#![cfg_attr(not(feature = "std"), no_std)]
+
+pub use crate::bstr::BStr;
+#[cfg(feature = "std")]
+pub use crate::bstring::BString;
+pub use crate::ext_slice::{
+ ByteSlice, Bytes, Fields, FieldsWith, Find, FindReverse, Finder,
+ FinderReverse, Lines, LinesWithTerminator, Split, SplitN, SplitNReverse,
+ SplitReverse, B,
+};
+#[cfg(feature = "std")]
+pub use crate::ext_vec::{concat, join, ByteVec, DrainBytes, FromUtf8Error};
+#[cfg(feature = "unicode")]
+pub use crate::unicode::{
+ GraphemeIndices, Graphemes, SentenceIndices, Sentences, WordIndices,
+ Words, WordsWithBreakIndices, WordsWithBreaks,
+};
+pub use crate::utf8::{
+ decode as decode_utf8, decode_last as decode_last_utf8, CharIndices,
+ Chars, Utf8Chunk, Utf8Chunks, Utf8Error,
+};
+
+mod ascii;
+mod bstr;
+#[cfg(feature = "std")]
+mod bstring;
+mod byteset;
+mod ext_slice;
+#[cfg(feature = "std")]
+mod ext_vec;
+mod impls;
+#[cfg(feature = "std")]
+pub mod io;
+#[cfg(test)]
+mod tests;
+#[cfg(feature = "unicode")]
+mod unicode;
+mod utf8;
+
+#[cfg(test)]
+mod apitests {
+ use crate::bstr::BStr;
+ use crate::bstring::BString;
+ use crate::ext_slice::{Finder, FinderReverse};
+
+ #[test]
+ fn oibits() {
+ use std::panic::{RefUnwindSafe, UnwindSafe};
+
+ fn assert_send<T: Send>() {}
+ fn assert_sync<T: Sync>() {}
+ fn assert_unwind_safe<T: RefUnwindSafe + UnwindSafe>() {}
+
+ assert_send::<&BStr>();
+ assert_sync::<&BStr>();
+ assert_unwind_safe::<&BStr>();
+ assert_send::<BString>();
+ assert_sync::<BString>();
+ assert_unwind_safe::<BString>();
+
+ assert_send::<Finder<'_>>();
+ assert_sync::<Finder<'_>>();
+ assert_unwind_safe::<Finder<'_>>();
+ assert_send::<FinderReverse<'_>>();
+ assert_sync::<FinderReverse<'_>>();
+ assert_unwind_safe::<FinderReverse<'_>>();
+ }
+}
diff --git a/vendor/bstr/src/tests.rs b/vendor/bstr/src/tests.rs
new file mode 100644
index 000000000..f4179fd1d
--- /dev/null
+++ b/vendor/bstr/src/tests.rs
@@ -0,0 +1,32 @@
+/// A sequence of tests for checking whether lossy decoding uses the maximal
+/// subpart strategy correctly. Namely, if a sequence of otherwise invalid
+/// UTF-8 bytes is a valid prefix of a valid UTF-8 sequence, then the entire
+/// prefix is replaced by a single replacement codepoint. In all other cases,
+/// each invalid byte is replaced by a single replacement codepoint.
+///
+/// The first element in each tuple is the expected result of lossy decoding,
+/// while the second element is the input given.
+pub const LOSSY_TESTS: &[(&str, &[u8])] = &[
+ ("a", b"a"),
+ ("\u{FFFD}", b"\xFF"),
+ ("\u{FFFD}\u{FFFD}", b"\xFF\xFF"),
+ ("β\u{FFFD}", b"\xCE\xB2\xFF"),
+ ("☃\u{FFFD}", b"\xE2\x98\x83\xFF"),
+ ("𝝱\u{FFFD}", b"\xF0\x9D\x9D\xB1\xFF"),
+ ("\u{FFFD}\u{FFFD}", b"\xCE\xF0"),
+ ("\u{FFFD}\u{FFFD}", b"\xCE\xFF"),
+ ("\u{FFFD}\u{FFFD}", b"\xE2\x98\xF0"),
+ ("\u{FFFD}\u{FFFD}", b"\xE2\x98\xFF"),
+ ("\u{FFFD}", b"\xF0\x9D\x9D"),
+ ("\u{FFFD}\u{FFFD}", b"\xF0\x9D\x9D\xF0"),
+ ("\u{FFFD}\u{FFFD}", b"\xF0\x9D\x9D\xFF"),
+ ("\u{FFFD}", b"\xCE"),
+ ("a\u{FFFD}", b"a\xCE"),
+ ("\u{FFFD}", b"\xE2\x98"),
+ ("a\u{FFFD}", b"a\xE2\x98"),
+ ("\u{FFFD}", b"\xF0\x9D\x9C"),
+ ("a\u{FFFD}", b"a\xF0\x9D\x9C"),
+ ("a\u{FFFD}\u{FFFD}\u{FFFD}z", b"a\xED\xA0\x80z"),
+ ("☃βツ\u{FFFD}", b"\xe2\x98\x83\xce\xb2\xe3\x83\x84\xFF"),
+ ("a\u{FFFD}\u{FFFD}\u{FFFD}b", b"\x61\xF1\x80\x80\xE1\x80\xC2\x62"),
+];
diff --git a/vendor/bstr/src/unicode/data/GraphemeBreakTest.txt b/vendor/bstr/src/unicode/data/GraphemeBreakTest.txt
new file mode 100644
index 000000000..fb4fec9ff
--- /dev/null
+++ b/vendor/bstr/src/unicode/data/GraphemeBreakTest.txt
@@ -0,0 +1,630 @@
+# GraphemeBreakTest-12.1.0.txt
+# Date: 2019-03-10, 10:53:12 GMT
+# © 2019 Unicode®, Inc.
+# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+#
+# Unicode Character Database
+# For documentation, see http://www.unicode.org/reports/tr44/
+#
+# Default Grapheme_Cluster_Break Test
+#
+# Format:
+# <string> (# <comment>)?
+# <string> contains hex Unicode code points, with
+# ÷ wherever there is a break opportunity, and
+# × wherever there is not.
+# <comment> the format can change, but currently it shows:
+# - the sample character name
+# - (x) the Grapheme_Cluster_Break property value for the sample character
+# - [x] the rule that determines whether there is a break or not,
+# as listed in the Rules section of GraphemeBreakTest.html
+#
+# These samples may be extended or changed in the future.
+#
+÷ 0020 ÷ 0020 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0020 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 0020 ÷ 000D ÷ # ÷ [0.2] SPACE (Other) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0020 × 0308 ÷ 000D ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0020 ÷ 000A ÷ # ÷ [0.2] SPACE (Other) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0020 × 0308 ÷ 000A ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0020 ÷ 0001 ÷ # ÷ [0.2] SPACE (Other) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0001 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 0020 × 034F ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 0020 × 0308 × 034F ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 0020 ÷ 1F1E6 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0020 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0020 ÷ 0600 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0600 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 0020 × 0903 ÷ # ÷ [0.2] SPACE (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0020 × 0308 × 0903 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0020 ÷ 1100 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0020 × 0308 ÷ 1100 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0020 ÷ 1160 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0020 × 0308 ÷ 1160 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0020 ÷ 11A8 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0020 × 0308 ÷ 11A8 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0020 ÷ AC00 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0020 × 0308 ÷ AC00 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0020 ÷ AC01 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0020 × 0308 ÷ AC01 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0020 ÷ 231A ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0020 × 0308 ÷ 231A ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0020 × 0300 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0020 × 0308 × 0300 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0020 × 200D ÷ # ÷ [0.2] SPACE (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 0020 × 0308 × 200D ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 0020 ÷ 0378 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0378 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 000D ÷ 0020 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] SPACE (Other) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 000D ÷ 000D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 000D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 000D × 000A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) × [3.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 000A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 000D ÷ 0001 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 000D ÷ 034F ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 000D ÷ 0308 × 034F ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 000D ÷ 1F1E6 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 000D ÷ 0600 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0600 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 000D ÷ 0903 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 000D ÷ 0308 × 0903 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 000D ÷ 1100 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 1100 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 000D ÷ 1160 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 1160 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 000D ÷ 11A8 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 11A8 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 000D ÷ AC00 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 000D ÷ AC01 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 000D ÷ 231A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] WATCH (ExtPict) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 231A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 000D ÷ 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 000D ÷ 0308 × 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 000D ÷ 200D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 000D ÷ 0308 × 200D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 000D ÷ 0378 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0378 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 000A ÷ 0020 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] SPACE (Other) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 000A ÷ 000D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 000D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 000A ÷ 000A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 000A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 000A ÷ 0001 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 000A ÷ 034F ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 000A ÷ 0308 × 034F ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 000A ÷ 1F1E6 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 000A ÷ 0600 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0600 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 000A ÷ 0903 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 000A ÷ 0308 × 0903 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 000A ÷ 1100 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 1100 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 000A ÷ 1160 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 1160 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 000A ÷ 11A8 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 11A8 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 000A ÷ AC00 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 000A ÷ AC01 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 000A ÷ 231A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] WATCH (ExtPict) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 231A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 000A ÷ 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 000A ÷ 0308 × 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 000A ÷ 200D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 000A ÷ 0308 × 200D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 000A ÷ 0378 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0378 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 0001 ÷ 0020 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] SPACE (Other) ÷ [0.3]
+÷ 0001 ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 0001 ÷ 000D ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0001 ÷ 0308 ÷ 000D ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0001 ÷ 000A ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0001 ÷ 0308 ÷ 000A ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0001 ÷ 0001 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 0001 ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 0001 ÷ 034F ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 0001 ÷ 0308 × 034F ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 0001 ÷ 1F1E6 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0001 ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0001 ÷ 0600 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 0001 ÷ 0308 ÷ 0600 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 0001 ÷ 0903 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0001 ÷ 0308 × 0903 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0001 ÷ 1100 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0001 ÷ 0308 ÷ 1100 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0001 ÷ 1160 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0001 ÷ 0308 ÷ 1160 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0001 ÷ 11A8 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0001 ÷ 0308 ÷ 11A8 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0001 ÷ AC00 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0001 ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0001 ÷ AC01 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0001 ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0001 ÷ 231A ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0001 ÷ 0308 ÷ 231A ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0001 ÷ 0300 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0001 ÷ 0308 × 0300 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0001 ÷ 200D ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 0001 ÷ 0308 × 200D ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 0001 ÷ 0378 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 0001 ÷ 0308 ÷ 0378 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 034F ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 034F × 0308 ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 034F ÷ 000D ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 034F × 0308 ÷ 000D ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 034F ÷ 000A ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 034F × 0308 ÷ 000A ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 034F ÷ 0001 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 034F × 0308 ÷ 0001 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 034F × 034F ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 034F × 0308 × 034F ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 034F ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 034F × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 034F ÷ 0600 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 034F × 0308 ÷ 0600 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 034F × 0903 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 034F × 0308 × 0903 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 034F ÷ 1100 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 034F × 0308 ÷ 1100 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 034F ÷ 1160 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 034F × 0308 ÷ 1160 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 034F ÷ 11A8 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 034F × 0308 ÷ 11A8 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 034F ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 034F × 0308 ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 034F ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 034F × 0308 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 034F ÷ 231A ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 034F × 0308 ÷ 231A ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 034F × 0300 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 034F × 0308 × 0300 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 034F × 200D ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 034F × 0308 × 200D ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 034F ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 034F × 0308 ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 1F1E6 ÷ 0020 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0020 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 1F1E6 ÷ 000D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 000D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 1F1E6 ÷ 000A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 000A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 1F1E6 ÷ 0001 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0001 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 1F1E6 × 034F ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 1F1E6 × 0308 × 034F ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 1F1E6 × 1F1E6 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [12.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 1F1E6 ÷ 0600 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0600 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 1F1E6 × 0903 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 1F1E6 × 0308 × 0903 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 1F1E6 ÷ 1100 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 1100 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 1F1E6 ÷ 1160 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 1160 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 1F1E6 ÷ 11A8 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 11A8 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 1F1E6 ÷ AC00 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ AC00 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 1F1E6 ÷ AC01 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ AC01 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 1F1E6 ÷ 231A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 231A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 1F1E6 × 0300 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 1F1E6 × 0308 × 0300 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 1F1E6 × 200D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 1F1E6 × 0308 × 200D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 1F1E6 ÷ 0378 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0378 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 0600 × 0020 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] SPACE (Other) ÷ [0.3]
+÷ 0600 × 0308 ÷ 0020 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 0600 ÷ 000D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0600 × 0308 ÷ 000D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0600 ÷ 000A ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0600 × 0308 ÷ 000A ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0600 ÷ 0001 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 0600 × 0308 ÷ 0001 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 0600 × 034F ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 0600 × 0308 × 034F ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 0600 × 1F1E6 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0600 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0600 × 0600 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 0600 × 0308 ÷ 0600 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 0600 × 0903 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0600 × 0308 × 0903 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0600 × 1100 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0600 × 0308 ÷ 1100 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0600 × 1160 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0600 × 0308 ÷ 1160 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0600 × 11A8 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0600 × 0308 ÷ 11A8 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0600 × AC00 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0600 × 0308 ÷ AC00 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0600 × AC01 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0600 × 0308 ÷ AC01 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0600 × 231A ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] WATCH (ExtPict) ÷ [0.3]
+÷ 0600 × 0308 ÷ 231A ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0600 × 0300 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0600 × 0308 × 0300 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0600 × 200D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 0600 × 0308 × 200D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 0600 × 0378 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] <reserved-0378> (Other) ÷ [0.3]
+÷ 0600 × 0308 ÷ 0378 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 0903 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 0903 × 0308 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 0903 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0903 × 0308 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0903 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0903 × 0308 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0903 ÷ 0001 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 0903 × 0308 ÷ 0001 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 0903 × 034F ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 0903 × 0308 × 034F ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 0903 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0903 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0903 ÷ 0600 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 0903 × 0308 ÷ 0600 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 0903 × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0903 × 0308 × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0903 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0903 × 0308 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0903 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0903 × 0308 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0903 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0903 × 0308 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0903 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0903 × 0308 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0903 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0903 × 0308 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0903 ÷ 231A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0903 × 0308 ÷ 231A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0903 × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0903 × 0308 × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0903 × 200D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 0903 × 0308 × 200D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 0903 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 0903 × 0308 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 1100 ÷ 0020 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 1100 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 1100 ÷ 000D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 1100 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 1100 ÷ 000A ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 1100 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 1100 ÷ 0001 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 1100 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 1100 × 034F ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 1100 × 0308 × 034F ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 1100 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 1100 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 1100 ÷ 0600 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 1100 × 0308 ÷ 0600 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 1100 × 0903 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 1100 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 1100 × 1100 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 1100 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 1100 × 1160 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 1100 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 1100 ÷ 11A8 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 1100 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 1100 × AC00 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 1100 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 1100 × AC01 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 1100 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 1100 ÷ 231A ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 1100 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 1100 × 0300 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 1100 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 1100 × 200D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 1100 × 0308 × 200D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 1100 ÷ 0378 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 1100 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 1160 ÷ 0020 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 1160 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 1160 ÷ 000D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 1160 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 1160 ÷ 000A ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 1160 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 1160 ÷ 0001 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 1160 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 1160 × 034F ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 1160 × 0308 × 034F ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 1160 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 1160 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 1160 ÷ 0600 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 1160 × 0308 ÷ 0600 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 1160 × 0903 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 1160 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 1160 ÷ 1100 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 1160 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 1160 × 1160 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [7.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 1160 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 1160 × 11A8 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [7.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 1160 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 1160 ÷ AC00 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 1160 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 1160 ÷ AC01 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 1160 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 1160 ÷ 231A ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 1160 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 1160 × 0300 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 1160 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 1160 × 200D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 1160 × 0308 × 200D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 1160 ÷ 0378 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 1160 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 11A8 ÷ 0020 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 11A8 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 11A8 ÷ 000D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 11A8 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 11A8 ÷ 000A ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 11A8 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 11A8 ÷ 0001 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 11A8 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 11A8 × 034F ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 11A8 × 0308 × 034F ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 11A8 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 11A8 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 11A8 ÷ 0600 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 11A8 × 0308 ÷ 0600 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 11A8 × 0903 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 11A8 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 11A8 ÷ 1100 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 11A8 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 11A8 ÷ 1160 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 11A8 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 11A8 × 11A8 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [8.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 11A8 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 11A8 ÷ AC00 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 11A8 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 11A8 ÷ AC01 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 11A8 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 11A8 ÷ 231A ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 11A8 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 11A8 × 0300 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 11A8 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 11A8 × 200D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 11A8 × 0308 × 200D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 11A8 ÷ 0378 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 11A8 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ AC00 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ AC00 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ AC00 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ AC00 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ AC00 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ AC00 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ AC00 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ AC00 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ AC00 × 034F ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ AC00 × 0308 × 034F ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ AC00 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ AC00 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ AC00 ÷ 0600 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ AC00 × 0308 ÷ 0600 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ AC00 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ AC00 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ AC00 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ AC00 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ AC00 × 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [7.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ AC00 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ AC00 × 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [7.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ AC00 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ AC00 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ AC00 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ AC00 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ AC00 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ AC00 ÷ 231A ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ AC00 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ AC00 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ AC00 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ AC00 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ AC00 × 0308 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ AC00 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ AC00 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ AC01 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ AC01 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ AC01 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ AC01 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ AC01 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ AC01 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ AC01 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ AC01 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ AC01 × 034F ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ AC01 × 0308 × 034F ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ AC01 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ AC01 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ AC01 ÷ 0600 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ AC01 × 0308 ÷ 0600 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ AC01 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ AC01 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ AC01 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ AC01 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ AC01 ÷ 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ AC01 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ AC01 × 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [8.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ AC01 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ AC01 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ AC01 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ AC01 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ AC01 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ AC01 ÷ 231A ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ AC01 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ AC01 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ AC01 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ AC01 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ AC01 × 0308 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ AC01 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ AC01 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 231A ÷ 0020 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 231A × 0308 ÷ 0020 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 231A ÷ 000D ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 231A × 0308 ÷ 000D ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 231A ÷ 000A ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 231A × 0308 ÷ 000A ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 231A ÷ 0001 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 231A × 0308 ÷ 0001 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 231A × 034F ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 231A × 0308 × 034F ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 231A ÷ 1F1E6 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 231A × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 231A ÷ 0600 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 231A × 0308 ÷ 0600 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 231A × 0903 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 231A × 0308 × 0903 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 231A ÷ 1100 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 231A × 0308 ÷ 1100 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 231A ÷ 1160 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 231A × 0308 ÷ 1160 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 231A ÷ 11A8 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 231A × 0308 ÷ 11A8 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 231A ÷ AC00 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 231A × 0308 ÷ AC00 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 231A ÷ AC01 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 231A × 0308 ÷ AC01 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 231A ÷ 231A ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 231A × 0308 ÷ 231A ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 231A × 0300 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 231A × 0308 × 0300 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 231A × 200D ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 231A × 0308 × 200D ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 231A ÷ 0378 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 231A × 0308 ÷ 0378 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 0300 ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 0300 ÷ 000D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0300 × 0308 ÷ 000D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0300 ÷ 000A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0300 × 0308 ÷ 000A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0300 ÷ 0001 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0001 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 0300 × 034F ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 0300 × 0308 × 034F ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 0300 ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0300 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0300 ÷ 0600 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0600 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 0300 × 0903 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0300 × 0308 × 0903 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0300 ÷ 1100 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0300 × 0308 ÷ 1100 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0300 ÷ 1160 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0300 × 0308 ÷ 1160 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0300 ÷ 11A8 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0300 × 0308 ÷ 11A8 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0300 ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0300 × 0308 ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0300 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0300 × 0308 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0300 ÷ 231A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0300 × 0308 ÷ 231A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0300 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0300 × 0308 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0300 × 200D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 0300 × 0308 × 200D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 0300 ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 200D ÷ 0020 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 200D × 0308 ÷ 0020 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 200D ÷ 000D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 200D × 0308 ÷ 000D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 200D ÷ 000A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 200D × 0308 ÷ 000A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 200D ÷ 0001 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 200D × 0308 ÷ 0001 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 200D × 034F ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 200D × 0308 × 034F ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 200D ÷ 1F1E6 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 200D × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 200D ÷ 0600 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 200D × 0308 ÷ 0600 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 200D × 0903 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 200D × 0308 × 0903 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 200D ÷ 1100 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 200D × 0308 ÷ 1100 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 200D ÷ 1160 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 200D × 0308 ÷ 1160 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 200D ÷ 11A8 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 200D × 0308 ÷ 11A8 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 200D ÷ AC00 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 200D × 0308 ÷ AC00 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 200D ÷ AC01 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 200D × 0308 ÷ AC01 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 200D ÷ 231A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 200D × 0308 ÷ 231A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 200D × 0300 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 200D × 0308 × 0300 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 200D × 200D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 200D × 0308 × 200D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 200D ÷ 0378 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 200D × 0308 ÷ 0378 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 0378 ÷ 0020 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 0378 × 0308 ÷ 0020 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 0378 ÷ 000D ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0378 × 0308 ÷ 000D ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0378 ÷ 000A ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0378 × 0308 ÷ 000A ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0378 ÷ 0001 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 0378 × 0308 ÷ 0001 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 0378 × 034F ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 0378 × 0308 × 034F ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 0378 ÷ 1F1E6 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0378 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0378 ÷ 0600 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 0378 × 0308 ÷ 0600 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 0378 × 0903 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0378 × 0308 × 0903 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0378 ÷ 1100 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0378 × 0308 ÷ 1100 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0378 ÷ 1160 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0378 × 0308 ÷ 1160 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0378 ÷ 11A8 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0378 × 0308 ÷ 11A8 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0378 ÷ AC00 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0378 × 0308 ÷ AC00 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0378 ÷ AC01 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0378 × 0308 ÷ AC01 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0378 ÷ 231A ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0378 × 0308 ÷ 231A ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0378 × 0300 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0378 × 0308 × 0300 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0378 × 200D ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 0378 × 0308 × 200D ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 0378 ÷ 0378 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 0378 × 0308 ÷ 0378 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 000D × 000A ÷ 0061 ÷ 000A ÷ 0308 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) × [3.0] <LINE FEED (LF)> (LF) ÷ [4.0] LATIN SMALL LETTER A (Other) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0061 × 0308 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0020 × 200D ÷ 0646 ÷ # ÷ [0.2] SPACE (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] ARABIC LETTER NOON (Other) ÷ [0.3]
+÷ 0646 × 200D ÷ 0020 ÷ # ÷ [0.2] ARABIC LETTER NOON (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 1100 × 1100 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ AC00 × 11A8 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [7.0] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ AC01 × 11A8 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [8.0] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 1F1E6 × 1F1E7 ÷ 1F1E8 ÷ 0062 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [12.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
+÷ 0061 ÷ 1F1E6 × 1F1E7 ÷ 1F1E8 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
+÷ 0061 ÷ 1F1E6 × 1F1E7 × 200D ÷ 1F1E8 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
+÷ 0061 ÷ 1F1E6 × 200D ÷ 1F1E7 × 1F1E8 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
+÷ 0061 ÷ 1F1E6 × 1F1E7 ÷ 1F1E8 × 1F1E9 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER D (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
+÷ 0061 × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 0061 × 0308 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
+÷ 0061 × 0903 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
+÷ 0061 ÷ 0600 × 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) × [9.2] LATIN SMALL LETTER B (Other) ÷ [0.3]
+÷ 1F476 × 1F3FF ÷ 1F476 ÷ # ÷ [0.2] BABY (ExtPict) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) ÷ [999.0] BABY (ExtPict) ÷ [0.3]
+÷ 0061 × 1F3FF ÷ 1F476 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) ÷ [999.0] BABY (ExtPict) ÷ [0.3]
+÷ 0061 × 1F3FF ÷ 1F476 × 200D × 1F6D1 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) ÷ [999.0] BABY (ExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [11.0] OCTAGONAL SIGN (ExtPict) ÷ [0.3]
+÷ 1F476 × 1F3FF × 0308 × 200D × 1F476 × 1F3FF ÷ # ÷ [0.2] BABY (ExtPict) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [11.0] BABY (ExtPict) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) ÷ [0.3]
+÷ 1F6D1 × 200D × 1F6D1 ÷ # ÷ [0.2] OCTAGONAL SIGN (ExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [11.0] OCTAGONAL SIGN (ExtPict) ÷ [0.3]
+÷ 0061 × 200D ÷ 1F6D1 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] OCTAGONAL SIGN (ExtPict) ÷ [0.3]
+÷ 2701 × 200D × 2701 ÷ # ÷ [0.2] UPPER BLADE SCISSORS (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [11.0] UPPER BLADE SCISSORS (Other) ÷ [0.3]
+÷ 0061 × 200D ÷ 2701 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] UPPER BLADE SCISSORS (Other) ÷ [0.3]
+#
+# Lines: 602
+#
+# EOF
diff --git a/vendor/bstr/src/unicode/data/LICENSE-UNICODE b/vendor/bstr/src/unicode/data/LICENSE-UNICODE
new file mode 100644
index 000000000..ad0693577
--- /dev/null
+++ b/vendor/bstr/src/unicode/data/LICENSE-UNICODE
@@ -0,0 +1,45 @@
+UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE
+See Terms of Use for definitions of Unicode Inc.'s
+Data Files and Software.
+
+NOTICE TO USER: Carefully read the following legal agreement.
+BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S
+DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"),
+YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
+TERMS AND CONDITIONS OF THIS AGREEMENT.
+IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE
+THE DATA FILES OR SOFTWARE.
+
+COPYRIGHT AND PERMISSION NOTICE
+
+Copyright © 1991-2019 Unicode, Inc. All rights reserved.
+Distributed under the Terms of Use in https://www.unicode.org/copyright.html.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of the Unicode data files and any associated documentation
+(the "Data Files") or Unicode software and any associated documentation
+(the "Software") to deal in the Data Files or Software
+without restriction, including without limitation the rights to use,
+copy, modify, merge, publish, distribute, and/or sell copies of
+the Data Files or Software, and to permit persons to whom the Data Files
+or Software are furnished to do so, provided that either
+(a) this copyright and permission notice appear with all copies
+of the Data Files or Software, or
+(b) this copyright and permission notice appear in associated
+Documentation.
+
+THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
+ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
+NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
+DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+PERFORMANCE OF THE DATA FILES OR SOFTWARE.
+
+Except as contained in this notice, the name of a copyright holder
+shall not be used in advertising or otherwise to promote the sale,
+use or other dealings in these Data Files or Software without prior
+written authorization of the copyright holder.
diff --git a/vendor/bstr/src/unicode/data/SentenceBreakTest.txt b/vendor/bstr/src/unicode/data/SentenceBreakTest.txt
new file mode 100644
index 000000000..7c1c34afb
--- /dev/null
+++ b/vendor/bstr/src/unicode/data/SentenceBreakTest.txt
@@ -0,0 +1,530 @@
+# SentenceBreakTest-12.1.0.txt
+# Date: 2019-03-10, 10:53:28 GMT
+# © 2019 Unicode®, Inc.
+# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+#
+# Unicode Character Database
+# For documentation, see http://www.unicode.org/reports/tr44/
+#
+# Default Sentence_Break Test
+#
+# Format:
+# <string> (# <comment>)?
+# <string> contains hex Unicode code points, with
+# ÷ wherever there is a break opportunity, and
+# × wherever there is not.
+# <comment> the format can change, but currently it shows:
+# - the sample character name
+# - (x) the Sentence_Break property value for the sample character
+# - [x] the rule that determines whether there is a break or not,
+# as listed in the Rules section of SentenceBreakTest.html
+#
+# These samples may be extended or changed in the future.
+#
+÷ 0001 × 0001 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0001 × 0308 × 0001 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0001 × 000D ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0001 × 0308 × 000D ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0001 × 000A ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0001 × 0308 × 000A ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0001 × 0085 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0001 × 0308 × 0085 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0001 × 0009 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0001 × 0308 × 0009 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0001 × 0061 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0001 × 0308 × 0061 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0001 × 0041 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0001 × 0308 × 0041 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0001 × 01BB ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0001 × 0308 × 01BB ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0001 × 0030 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0001 × 0308 × 0030 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0001 × 002E ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0001 × 0308 × 002E ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0001 × 0021 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0001 × 0308 × 0021 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0001 × 0022 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0001 × 0308 × 0022 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0001 × 002C ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 0001 × 0308 × 002C ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 0001 × 00AD ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0001 × 0308 × 00AD ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0001 × 0300 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0001 × 0308 × 0300 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 000D ÷ 0001 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 000D ÷ 0308 × 0001 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 000D ÷ 000D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 000D ÷ 0308 × 000D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 000D × 000A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) × [3.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 000D ÷ 0308 × 000A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 000D ÷ 0085 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 000D ÷ 0308 × 0085 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 000D ÷ 0009 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 000D ÷ 0308 × 0009 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 000D ÷ 0061 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 000D ÷ 0308 × 0061 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 000D ÷ 0041 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 000D ÷ 0308 × 0041 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 000D ÷ 01BB ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 000D ÷ 0308 × 01BB ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 000D ÷ 0030 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 000D ÷ 0308 × 0030 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 000D ÷ 002E ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 000D ÷ 0308 × 002E ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 000D ÷ 0021 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 000D ÷ 0308 × 0021 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 000D ÷ 0022 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 000D ÷ 0308 × 0022 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 000D ÷ 002C ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMMA (SContinue) ÷ [0.3]
+÷ 000D ÷ 0308 × 002C ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 000D ÷ 00AD ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 000D ÷ 0308 × 00AD ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 000D ÷ 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 000D ÷ 0308 × 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 000A ÷ 0001 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 000A ÷ 0308 × 0001 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 000A ÷ 000D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 000A ÷ 0308 × 000D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 000A ÷ 000A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 000A ÷ 0308 × 000A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 000A ÷ 0085 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 000A ÷ 0308 × 0085 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 000A ÷ 0009 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 000A ÷ 0308 × 0009 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 000A ÷ 0061 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 000A ÷ 0308 × 0061 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 000A ÷ 0041 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 000A ÷ 0308 × 0041 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 000A ÷ 01BB ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 000A ÷ 0308 × 01BB ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 000A ÷ 0030 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 000A ÷ 0308 × 0030 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 000A ÷ 002E ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 000A ÷ 0308 × 002E ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 000A ÷ 0021 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 000A ÷ 0308 × 0021 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 000A ÷ 0022 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 000A ÷ 0308 × 0022 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 000A ÷ 002C ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMMA (SContinue) ÷ [0.3]
+÷ 000A ÷ 0308 × 002C ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 000A ÷ 00AD ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 000A ÷ 0308 × 00AD ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 000A ÷ 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 000A ÷ 0308 × 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0085 ÷ 0001 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0085 ÷ 0308 × 0001 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0085 ÷ 000D ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0085 ÷ 0308 × 000D ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0085 ÷ 000A ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0085 ÷ 0308 × 000A ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0085 ÷ 0085 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0085 ÷ 0308 × 0085 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0085 ÷ 0009 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0085 ÷ 0308 × 0009 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0085 ÷ 0061 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0085 ÷ 0308 × 0061 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0085 ÷ 0041 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0085 ÷ 0308 × 0041 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0085 ÷ 01BB ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0085 ÷ 0308 × 01BB ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0085 ÷ 0030 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0085 ÷ 0308 × 0030 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0085 ÷ 002E ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0085 ÷ 0308 × 002E ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0085 ÷ 0021 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0085 ÷ 0308 × 0021 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0085 ÷ 0022 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0085 ÷ 0308 × 0022 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0085 ÷ 002C ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMMA (SContinue) ÷ [0.3]
+÷ 0085 ÷ 0308 × 002C ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 0085 ÷ 00AD ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0085 ÷ 0308 × 00AD ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0085 ÷ 0300 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0085 ÷ 0308 × 0300 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0009 × 0001 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0009 × 0308 × 0001 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0009 × 000D ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0009 × 0308 × 000D ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0009 × 000A ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0009 × 0308 × 000A ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0009 × 0085 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0009 × 0308 × 0085 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0009 × 0009 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0009 × 0308 × 0009 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0009 × 0061 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0009 × 0308 × 0061 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0009 × 0041 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0009 × 0308 × 0041 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0009 × 01BB ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0009 × 0308 × 01BB ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0009 × 0030 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0009 × 0308 × 0030 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0009 × 002E ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0009 × 0308 × 002E ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0009 × 0021 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0009 × 0308 × 0021 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0009 × 0022 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0009 × 0308 × 0022 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0009 × 002C ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 0009 × 0308 × 002C ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 0009 × 00AD ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0009 × 0308 × 00AD ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0009 × 0300 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0009 × 0308 × 0300 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0061 × 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0061 × 0308 × 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0061 × 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0061 × 0308 × 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0061 × 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0061 × 0308 × 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0061 × 0085 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0061 × 0308 × 0085 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0061 × 0009 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0061 × 0308 × 0009 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0061 × 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0061 × 0308 × 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0061 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0061 × 0308 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0061 × 01BB ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0061 × 0308 × 01BB ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0061 × 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0061 × 0308 × 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0061 × 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0061 × 0308 × 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0061 × 0021 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0061 × 0308 × 0021 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0061 × 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0061 × 0308 × 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0061 × 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 0061 × 0308 × 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 0061 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0061 × 0308 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0061 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0061 × 0308 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0041 × 0001 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0041 × 0308 × 0001 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0041 × 000D ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0041 × 0308 × 000D ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0041 × 000A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0041 × 0308 × 000A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0041 × 0085 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0041 × 0308 × 0085 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0041 × 0009 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0041 × 0308 × 0009 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0041 × 0061 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0041 × 0308 × 0061 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0041 × 0041 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0041 × 0308 × 0041 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0041 × 01BB ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0041 × 0308 × 01BB ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0041 × 0030 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0041 × 0308 × 0030 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0041 × 002E ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0041 × 0308 × 002E ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0041 × 0021 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0041 × 0308 × 0021 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0041 × 0022 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0041 × 0308 × 0022 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0041 × 002C ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 0041 × 0308 × 002C ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 0041 × 00AD ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0041 × 0308 × 00AD ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0041 × 0300 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0041 × 0308 × 0300 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 01BB × 0001 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 01BB × 0308 × 0001 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 01BB × 000D ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 01BB × 0308 × 000D ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 01BB × 000A ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 01BB × 0308 × 000A ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 01BB × 0085 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 01BB × 0308 × 0085 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 01BB × 0009 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 01BB × 0308 × 0009 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 01BB × 0061 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 01BB × 0308 × 0061 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 01BB × 0041 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 01BB × 0308 × 0041 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 01BB × 01BB ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 01BB × 0308 × 01BB ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 01BB × 0030 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 01BB × 0308 × 0030 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 01BB × 002E ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 01BB × 0308 × 002E ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 01BB × 0021 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 01BB × 0308 × 0021 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 01BB × 0022 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 01BB × 0308 × 0022 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 01BB × 002C ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 01BB × 0308 × 002C ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 01BB × 00AD ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 01BB × 0308 × 00AD ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 01BB × 0300 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 01BB × 0308 × 0300 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0030 × 0001 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0030 × 0308 × 0001 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0030 × 000D ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0030 × 0308 × 000D ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0030 × 000A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0030 × 0308 × 000A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0030 × 0085 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0030 × 0308 × 0085 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0030 × 0009 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0030 × 0308 × 0009 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0030 × 0061 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0030 × 0308 × 0061 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0030 × 0041 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0030 × 0308 × 0041 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0030 × 01BB ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0030 × 0308 × 01BB ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0030 × 0030 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0030 × 0308 × 0030 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0030 × 002E ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0030 × 0308 × 002E ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0030 × 0021 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0030 × 0308 × 0021 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0030 × 0022 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0030 × 0308 × 0022 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0030 × 002C ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 0030 × 0308 × 002C ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 0030 × 00AD ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0030 × 0308 × 00AD ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0030 × 0300 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0030 × 0308 × 0300 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 002E ÷ 0001 ÷ # ÷ [0.2] FULL STOP (ATerm) ÷ [11.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 002E × 0308 ÷ 0001 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 002E × 000D ÷ # ÷ [0.2] FULL STOP (ATerm) × [9.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 002E × 0308 × 000D ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 002E × 000A ÷ # ÷ [0.2] FULL STOP (ATerm) × [9.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 002E × 0308 × 000A ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 002E × 0085 ÷ # ÷ [0.2] FULL STOP (ATerm) × [9.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 002E × 0308 × 0085 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 002E × 0009 ÷ # ÷ [0.2] FULL STOP (ATerm) × [9.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 002E × 0308 × 0009 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 002E × 0061 ÷ # ÷ [0.2] FULL STOP (ATerm) × [8.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 002E × 0308 × 0061 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [8.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 002E ÷ 0041 ÷ # ÷ [0.2] FULL STOP (ATerm) ÷ [11.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 002E × 0308 ÷ 0041 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 002E ÷ 01BB ÷ # ÷ [0.2] FULL STOP (ATerm) ÷ [11.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 002E × 0308 ÷ 01BB ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 002E × 0030 ÷ # ÷ [0.2] FULL STOP (ATerm) × [6.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 002E × 0308 × 0030 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [6.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 002E × 002E ÷ # ÷ [0.2] FULL STOP (ATerm) × [8.1] FULL STOP (ATerm) ÷ [0.3]
+÷ 002E × 0308 × 002E ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [8.1] FULL STOP (ATerm) ÷ [0.3]
+÷ 002E × 0021 ÷ # ÷ [0.2] FULL STOP (ATerm) × [8.1] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 002E × 0308 × 0021 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [8.1] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 002E × 0022 ÷ # ÷ [0.2] FULL STOP (ATerm) × [9.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 002E × 0308 × 0022 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 002E × 002C ÷ # ÷ [0.2] FULL STOP (ATerm) × [8.1] COMMA (SContinue) ÷ [0.3]
+÷ 002E × 0308 × 002C ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [8.1] COMMA (SContinue) ÷ [0.3]
+÷ 002E × 00AD ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 002E × 0308 × 00AD ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 002E × 0300 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 002E × 0308 × 0300 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0021 ÷ 0001 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) ÷ [11.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0021 × 0308 ÷ 0001 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0021 × 000D ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [9.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0021 × 0308 × 000D ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0021 × 000A ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [9.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0021 × 0308 × 000A ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0021 × 0085 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [9.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0021 × 0308 × 0085 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0021 × 0009 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [9.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0021 × 0308 × 0009 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0021 ÷ 0061 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) ÷ [11.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0021 × 0308 ÷ 0061 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0021 ÷ 0041 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) ÷ [11.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0021 × 0308 ÷ 0041 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0021 ÷ 01BB ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) ÷ [11.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0021 × 0308 ÷ 01BB ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0021 ÷ 0030 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) ÷ [11.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0021 × 0308 ÷ 0030 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0021 × 002E ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [8.1] FULL STOP (ATerm) ÷ [0.3]
+÷ 0021 × 0308 × 002E ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [8.1] FULL STOP (ATerm) ÷ [0.3]
+÷ 0021 × 0021 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [8.1] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0021 × 0308 × 0021 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [8.1] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0021 × 0022 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [9.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0021 × 0308 × 0022 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0021 × 002C ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [8.1] COMMA (SContinue) ÷ [0.3]
+÷ 0021 × 0308 × 002C ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [8.1] COMMA (SContinue) ÷ [0.3]
+÷ 0021 × 00AD ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0021 × 0308 × 00AD ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0021 × 0300 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0021 × 0308 × 0300 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0022 × 0001 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0022 × 0308 × 0001 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0022 × 000D ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0022 × 0308 × 000D ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0022 × 000A ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0022 × 0308 × 000A ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0022 × 0085 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0022 × 0308 × 0085 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0022 × 0009 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0022 × 0308 × 0009 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0022 × 0061 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0022 × 0308 × 0061 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0022 × 0041 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0022 × 0308 × 0041 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0022 × 01BB ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0022 × 0308 × 01BB ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0022 × 0030 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0022 × 0308 × 0030 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0022 × 002E ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0022 × 0308 × 002E ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0022 × 0021 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0022 × 0308 × 0021 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0022 × 0022 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0022 × 0308 × 0022 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0022 × 002C ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 0022 × 0308 × 002C ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 0022 × 00AD ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0022 × 0308 × 00AD ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0022 × 0300 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0022 × 0308 × 0300 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 002C × 0001 ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 002C × 0308 × 0001 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 002C × 000D ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 002C × 0308 × 000D ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 002C × 000A ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 002C × 0308 × 000A ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 002C × 0085 ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 002C × 0308 × 0085 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 002C × 0009 ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 002C × 0308 × 0009 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 002C × 0061 ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 002C × 0308 × 0061 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 002C × 0041 ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 002C × 0308 × 0041 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 002C × 01BB ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 002C × 0308 × 01BB ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 002C × 0030 ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 002C × 0308 × 0030 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 002C × 002E ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 002C × 0308 × 002E ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 002C × 0021 ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 002C × 0308 × 0021 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 002C × 0022 ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 002C × 0308 × 0022 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 002C × 002C ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 002C × 0308 × 002C ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 002C × 00AD ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 002C × 0308 × 00AD ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 002C × 0300 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 002C × 0308 × 0300 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 00AD × 0001 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 00AD × 0308 × 0001 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 00AD × 000D ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 00AD × 0308 × 000D ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 00AD × 000A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 00AD × 0308 × 000A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 00AD × 0085 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 00AD × 0308 × 0085 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 00AD × 0009 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 00AD × 0308 × 0009 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 00AD × 0061 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 00AD × 0308 × 0061 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 00AD × 0041 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 00AD × 0308 × 0041 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 00AD × 01BB ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 00AD × 0308 × 01BB ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 00AD × 0030 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 00AD × 0308 × 0030 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 00AD × 002E ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 00AD × 0308 × 002E ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 00AD × 0021 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 00AD × 0308 × 0021 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 00AD × 0022 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 00AD × 0308 × 0022 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 00AD × 002C ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 00AD × 0308 × 002C ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 00AD × 00AD ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 00AD × 0308 × 00AD ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 00AD × 0300 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 00AD × 0308 × 0300 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0300 × 0001 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0300 × 0308 × 0001 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0300 × 000D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0300 × 0308 × 000D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0300 × 000A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0300 × 0308 × 000A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0300 × 0085 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0300 × 0308 × 0085 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0300 × 0009 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0300 × 0308 × 0009 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0300 × 0061 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0300 × 0308 × 0061 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0300 × 0041 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0300 × 0308 × 0041 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0300 × 01BB ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0300 × 0308 × 01BB ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0300 × 0030 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0300 × 0308 × 0030 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0300 × 002E ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0300 × 0308 × 002E ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0300 × 0021 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0300 × 0308 × 0021 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0300 × 0022 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0300 × 0308 × 0022 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0300 × 002C ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 0300 × 0308 × 002C ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 0300 × 00AD ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0300 × 0308 × 00AD ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0300 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0300 × 0308 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 000D × 000A ÷ 0061 × 000A ÷ 0308 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) × [3.0] <LINE FEED (LF)> (LF) ÷ [4.0] LATIN SMALL LETTER A (Lower) × [998.0] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [0.3]
+÷ 0061 × 0308 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [0.3]
+÷ 0020 × 200D × 0646 ÷ # ÷ [0.2] SPACE (Sp) × [5.0] ZERO WIDTH JOINER (Extend_FE) × [998.0] ARABIC LETTER NOON (OLetter) ÷ [0.3]
+÷ 0646 × 200D × 0020 ÷ # ÷ [0.2] ARABIC LETTER NOON (OLetter) × [5.0] ZERO WIDTH JOINER (Extend_FE) × [998.0] SPACE (Sp) ÷ [0.3]
+÷ 0028 × 0022 × 0047 × 006F × 002E × 0022 × 0029 × 0020 ÷ 0028 × 0048 × 0065 × 0020 × 0064 × 0069 × 0064 × 002E × 0029 ÷ # ÷ [0.2] LEFT PARENTHESIS (Close) × [998.0] QUOTATION MARK (Close) × [998.0] LATIN CAPITAL LETTER G (Upper) × [998.0] LATIN SMALL LETTER O (Lower) × [998.0] FULL STOP (ATerm) × [9.0] QUOTATION MARK (Close) × [9.0] RIGHT PARENTHESIS (Close) × [9.0] SPACE (Sp) ÷ [11.0] LEFT PARENTHESIS (Close) × [998.0] LATIN CAPITAL LETTER H (Upper) × [998.0] LATIN SMALL LETTER E (Lower) × [998.0] SPACE (Sp) × [998.0] LATIN SMALL LETTER D (Lower) × [998.0] LATIN SMALL LETTER I (Lower) × [998.0] LATIN SMALL LETTER D (Lower) × [998.0] FULL STOP (ATerm) × [9.0] RIGHT PARENTHESIS (Close) ÷ [0.3]
+÷ 0028 × 201C × 0047 × 006F × 003F × 201D × 0029 × 0020 ÷ 0028 × 0048 × 0065 × 0020 × 0064 × 0069 × 0064 × 002E × 0029 ÷ # ÷ [0.2] LEFT PARENTHESIS (Close) × [998.0] LEFT DOUBLE QUOTATION MARK (Close) × [998.0] LATIN CAPITAL LETTER G (Upper) × [998.0] LATIN SMALL LETTER O (Lower) × [998.0] QUESTION MARK (STerm) × [9.0] RIGHT DOUBLE QUOTATION MARK (Close) × [9.0] RIGHT PARENTHESIS (Close) × [9.0] SPACE (Sp) ÷ [11.0] LEFT PARENTHESIS (Close) × [998.0] LATIN CAPITAL LETTER H (Upper) × [998.0] LATIN SMALL LETTER E (Lower) × [998.0] SPACE (Sp) × [998.0] LATIN SMALL LETTER D (Lower) × [998.0] LATIN SMALL LETTER I (Lower) × [998.0] LATIN SMALL LETTER D (Lower) × [998.0] FULL STOP (ATerm) × [9.0] RIGHT PARENTHESIS (Close) ÷ [0.3]
+÷ 0055 × 002E × 0053 × 002E × 0041 × 0300 × 002E × 0020 × 0069 × 0073 ÷ # ÷ [0.2] LATIN CAPITAL LETTER U (Upper) × [998.0] FULL STOP (ATerm) × [7.0] LATIN CAPITAL LETTER S (Upper) × [998.0] FULL STOP (ATerm) × [7.0] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] FULL STOP (ATerm) × [8.0] SPACE (Sp) × [8.0] LATIN SMALL LETTER I (Lower) × [998.0] LATIN SMALL LETTER S (Lower) ÷ [0.3]
+÷ 0055 × 002E × 0053 × 002E × 0041 × 0300 × 003F × 0020 ÷ 0048 × 0065 ÷ # ÷ [0.2] LATIN CAPITAL LETTER U (Upper) × [998.0] FULL STOP (ATerm) × [7.0] LATIN CAPITAL LETTER S (Upper) × [998.0] FULL STOP (ATerm) × [7.0] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] QUESTION MARK (STerm) × [9.0] SPACE (Sp) ÷ [11.0] LATIN CAPITAL LETTER H (Upper) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3]
+÷ 0055 × 002E × 0053 × 002E × 0041 × 0300 × 002E ÷ # ÷ [0.2] LATIN CAPITAL LETTER U (Upper) × [998.0] FULL STOP (ATerm) × [7.0] LATIN CAPITAL LETTER S (Upper) × [998.0] FULL STOP (ATerm) × [7.0] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0033 × 002E × 0034 ÷ # ÷ [0.2] DIGIT THREE (Numeric) × [998.0] FULL STOP (ATerm) × [6.0] DIGIT FOUR (Numeric) ÷ [0.3]
+÷ 0063 × 002E × 0064 ÷ # ÷ [0.2] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [8.0] LATIN SMALL LETTER D (Lower) ÷ [0.3]
+÷ 0043 × 002E × 0064 ÷ # ÷ [0.2] LATIN CAPITAL LETTER C (Upper) × [998.0] FULL STOP (ATerm) × [8.0] LATIN SMALL LETTER D (Lower) ÷ [0.3]
+÷ 0063 × 002E × 0044 ÷ # ÷ [0.2] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [7.0] LATIN CAPITAL LETTER D (Upper) ÷ [0.3]
+÷ 0043 × 002E × 0044 ÷ # ÷ [0.2] LATIN CAPITAL LETTER C (Upper) × [998.0] FULL STOP (ATerm) × [7.0] LATIN CAPITAL LETTER D (Upper) ÷ [0.3]
+÷ 0065 × 0074 × 0063 × 002E × 0029 × 2019 × 00A0 × 0074 × 0068 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [8.0] RIGHT PARENTHESIS (Close) × [8.0] RIGHT SINGLE QUOTATION MARK (Close) × [8.0] NO-BREAK SPACE (Sp) × [8.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER H (Lower) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3]
+÷ 0065 × 0074 × 0063 × 002E × 0029 × 2019 × 00A0 ÷ 0054 × 0068 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [9.0] RIGHT PARENTHESIS (Close) × [9.0] RIGHT SINGLE QUOTATION MARK (Close) × [9.0] NO-BREAK SPACE (Sp) ÷ [11.0] LATIN CAPITAL LETTER T (Upper) × [998.0] LATIN SMALL LETTER H (Lower) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3]
+÷ 0065 × 0074 × 0063 × 002E × 0029 × 2019 × 00A0 × 2018 × 0028 × 0074 × 0068 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [8.0] RIGHT PARENTHESIS (Close) × [8.0] RIGHT SINGLE QUOTATION MARK (Close) × [8.0] NO-BREAK SPACE (Sp) × [8.0] LEFT SINGLE QUOTATION MARK (Close) × [998.0] LEFT PARENTHESIS (Close) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER H (Lower) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3]
+÷ 0065 × 0074 × 0063 × 002E × 0029 × 2019 × 00A0 ÷ 2018 × 0028 × 0054 × 0068 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [9.0] RIGHT PARENTHESIS (Close) × [9.0] RIGHT SINGLE QUOTATION MARK (Close) × [9.0] NO-BREAK SPACE (Sp) ÷ [11.0] LEFT SINGLE QUOTATION MARK (Close) × [998.0] LEFT PARENTHESIS (Close) × [998.0] LATIN CAPITAL LETTER T (Upper) × [998.0] LATIN SMALL LETTER H (Lower) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3]
+÷ 0065 × 0074 × 0063 × 002E × 0029 × 2019 × 00A0 × 0308 × 0074 × 0068 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [8.0] RIGHT PARENTHESIS (Close) × [8.0] RIGHT SINGLE QUOTATION MARK (Close) × [8.0] NO-BREAK SPACE (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [8.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER H (Lower) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3]
+÷ 0065 × 0074 × 0063 × 002E × 0029 × 2019 × 00A0 × 0308 ÷ 0054 × 0068 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [9.0] RIGHT PARENTHESIS (Close) × [9.0] RIGHT SINGLE QUOTATION MARK (Close) × [9.0] NO-BREAK SPACE (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] LATIN CAPITAL LETTER T (Upper) × [998.0] LATIN SMALL LETTER H (Lower) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3]
+÷ 0065 × 0074 × 0063 × 002E × 0029 × 2019 × 0308 ÷ 0054 × 0068 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [9.0] RIGHT PARENTHESIS (Close) × [9.0] RIGHT SINGLE QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] LATIN CAPITAL LETTER T (Upper) × [998.0] LATIN SMALL LETTER H (Lower) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3]
+÷ 0065 × 0074 × 0063 × 002E × 0029 × 000A ÷ 0308 × 0054 × 0068 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [9.0] RIGHT PARENTHESIS (Close) × [9.0] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER T (Upper) × [998.0] LATIN SMALL LETTER H (Lower) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3]
+÷ 0074 × 0068 × 0065 × 0020 × 0072 × 0065 × 0073 × 0070 × 002E × 0020 × 006C × 0065 × 0061 × 0064 × 0065 × 0072 × 0073 × 0020 × 0061 × 0072 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER H (Lower) × [998.0] LATIN SMALL LETTER E (Lower) × [998.0] SPACE (Sp) × [998.0] LATIN SMALL LETTER R (Lower) × [998.0] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER S (Lower) × [998.0] LATIN SMALL LETTER P (Lower) × [998.0] FULL STOP (ATerm) × [8.0] SPACE (Sp) × [8.0] LATIN SMALL LETTER L (Lower) × [998.0] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER A (Lower) × [998.0] LATIN SMALL LETTER D (Lower) × [998.0] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER R (Lower) × [998.0] LATIN SMALL LETTER S (Lower) × [998.0] SPACE (Sp) × [998.0] LATIN SMALL LETTER A (Lower) × [998.0] LATIN SMALL LETTER R (Lower) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3]
+÷ 5B57 × 002E ÷ 5B57 ÷ # ÷ [0.2] CJK UNIFIED IDEOGRAPH-5B57 (OLetter) × [998.0] FULL STOP (ATerm) ÷ [11.0] CJK UNIFIED IDEOGRAPH-5B57 (OLetter) ÷ [0.3]
+÷ 0065 × 0074 × 0063 × 002E ÷ 5B83 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) ÷ [11.0] CJK UNIFIED IDEOGRAPH-5B83 (OLetter) ÷ [0.3]
+÷ 0065 × 0074 × 0063 × 002E × 3002 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [8.1] IDEOGRAPHIC FULL STOP (STerm) ÷ [0.3]
+÷ 5B57 × 3002 ÷ 5B83 ÷ # ÷ [0.2] CJK UNIFIED IDEOGRAPH-5B57 (OLetter) × [998.0] IDEOGRAPHIC FULL STOP (STerm) ÷ [11.0] CJK UNIFIED IDEOGRAPH-5B83 (OLetter) ÷ [0.3]
+÷ 0021 × 0020 × 0020 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [9.0] SPACE (Sp) × [10.0] SPACE (Sp) ÷ [0.3]
+÷ 2060 × 0028 × 2060 × 0022 × 2060 × 0047 × 2060 × 006F × 2060 × 002E × 2060 × 0022 × 2060 × 0029 × 2060 × 0020 × 2060 ÷ 0028 × 2060 × 0048 × 2060 × 0065 × 2060 × 0020 × 2060 × 0064 × 2060 × 0069 × 2060 × 0064 × 2060 × 002E × 2060 × 0029 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LEFT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER G (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER O (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [9.0] QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) ÷ [11.0] LEFT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER H (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER D (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER I (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER D (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0028 × 2060 × 201C × 2060 × 0047 × 2060 × 006F × 2060 × 003F × 2060 × 201D × 2060 × 0029 × 2060 × 0020 × 2060 ÷ 0028 × 2060 × 0048 × 2060 × 0065 × 2060 × 0020 × 2060 × 0064 × 2060 × 0069 × 2060 × 0064 × 2060 × 002E × 2060 × 0029 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LEFT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] LEFT DOUBLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER G (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER O (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] QUESTION MARK (STerm) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT DOUBLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) ÷ [11.0] LEFT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER H (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER D (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER I (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER D (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0055 × 2060 × 002E × 2060 × 0053 × 2060 × 002E × 2060 × 0041 × 2060 × 0300 × 002E × 2060 × 0020 × 2060 × 0069 × 2060 × 0073 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER U (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [7.0] LATIN CAPITAL LETTER S (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [7.0] LATIN CAPITAL LETTER A (Upper) × [5.0] WORD JOINER (Format_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [8.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [8.0] LATIN SMALL LETTER I (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER S (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0055 × 2060 × 002E × 2060 × 0053 × 2060 × 002E × 2060 × 0041 × 2060 × 0300 × 003F × 2060 × 0020 × 2060 ÷ 0048 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER U (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [7.0] LATIN CAPITAL LETTER S (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [7.0] LATIN CAPITAL LETTER A (Upper) × [5.0] WORD JOINER (Format_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] QUESTION MARK (STerm) × [5.0] WORD JOINER (Format_FE) × [9.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) ÷ [11.0] LATIN CAPITAL LETTER H (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0055 × 2060 × 002E × 2060 × 0053 × 2060 × 002E × 2060 × 0041 × 2060 × 0300 × 002E × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER U (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [7.0] LATIN CAPITAL LETTER S (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [7.0] LATIN CAPITAL LETTER A (Upper) × [5.0] WORD JOINER (Format_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0033 × 2060 × 002E × 2060 × 0034 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] DIGIT THREE (Numeric) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [6.0] DIGIT FOUR (Numeric) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0063 × 2060 × 002E × 2060 × 0064 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [8.0] LATIN SMALL LETTER D (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0043 × 2060 × 002E × 2060 × 0064 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER C (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [8.0] LATIN SMALL LETTER D (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0063 × 2060 × 002E × 2060 × 0044 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [7.0] LATIN CAPITAL LETTER D (Upper) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0043 × 2060 × 002E × 2060 × 0044 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER C (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [7.0] LATIN CAPITAL LETTER D (Upper) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 0029 × 2060 × 2019 × 2060 × 00A0 × 2060 × 0074 × 2060 × 0068 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [8.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [8.0] RIGHT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [8.0] NO-BREAK SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [8.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 0029 × 2060 × 2019 × 2060 × 00A0 × 2060 ÷ 0054 × 2060 × 0068 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] NO-BREAK SPACE (Sp) × [5.0] WORD JOINER (Format_FE) ÷ [11.0] LATIN CAPITAL LETTER T (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 0029 × 2060 × 2019 × 2060 × 00A0 × 2060 × 2018 × 2060 × 0028 × 2060 × 0074 × 2060 × 0068 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [8.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [8.0] RIGHT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [8.0] NO-BREAK SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [8.0] LEFT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] LEFT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 0029 × 2060 × 2019 × 2060 × 00A0 × 2060 ÷ 2018 × 2060 × 0028 × 2060 × 0054 × 2060 × 0068 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] NO-BREAK SPACE (Sp) × [5.0] WORD JOINER (Format_FE) ÷ [11.0] LEFT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] LEFT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER T (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 0029 × 2060 × 2019 × 2060 × 00A0 × 2060 × 0308 × 0074 × 2060 × 0068 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [8.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [8.0] RIGHT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [8.0] NO-BREAK SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [8.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 0029 × 2060 × 2019 × 2060 × 00A0 × 2060 × 0308 ÷ 0054 × 2060 × 0068 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] NO-BREAK SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] LATIN CAPITAL LETTER T (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 0029 × 2060 × 2019 × 2060 × 0308 ÷ 0054 × 2060 × 0068 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] LATIN CAPITAL LETTER T (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 0029 × 2060 × 000A ÷ 2060 × 0308 × 2060 × 0054 × 2060 × 0068 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] <LINE FEED (LF)> (LF) ÷ [4.0] WORD JOINER (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER T (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0074 × 2060 × 0068 × 2060 × 0065 × 2060 × 0020 × 2060 × 0072 × 2060 × 0065 × 2060 × 0073 × 2060 × 0070 × 2060 × 002E × 2060 × 0020 × 2060 × 006C × 2060 × 0065 × 2060 × 0061 × 2060 × 0064 × 2060 × 0065 × 2060 × 0072 × 2060 × 0073 × 2060 × 0020 × 2060 × 0061 × 2060 × 0072 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER R (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER S (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER P (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [8.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [8.0] LATIN SMALL LETTER L (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER A (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER D (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER R (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER S (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER A (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER R (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 5B57 × 2060 × 002E × 2060 ÷ 5B57 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] CJK UNIFIED IDEOGRAPH-5B57 (OLetter) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) ÷ [11.0] CJK UNIFIED IDEOGRAPH-5B57 (OLetter) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 ÷ 5B83 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) ÷ [11.0] CJK UNIFIED IDEOGRAPH-5B83 (OLetter) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 3002 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [8.1] IDEOGRAPHIC FULL STOP (STerm) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 5B57 × 2060 × 3002 × 2060 ÷ 5B83 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] CJK UNIFIED IDEOGRAPH-5B57 (OLetter) × [5.0] WORD JOINER (Format_FE) × [998.0] IDEOGRAPHIC FULL STOP (STerm) × [5.0] WORD JOINER (Format_FE) ÷ [11.0] CJK UNIFIED IDEOGRAPH-5B83 (OLetter) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0021 × 2060 × 0020 × 2060 × 0020 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] EXCLAMATION MARK (STerm) × [5.0] WORD JOINER (Format_FE) × [9.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [10.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+#
+# Lines: 502
+#
+# EOF
diff --git a/vendor/bstr/src/unicode/data/WordBreakTest.txt b/vendor/bstr/src/unicode/data/WordBreakTest.txt
new file mode 100644
index 000000000..facd8920e
--- /dev/null
+++ b/vendor/bstr/src/unicode/data/WordBreakTest.txt
@@ -0,0 +1,1851 @@
+# WordBreakTest-12.1.0.txt
+# Date: 2019-03-10, 10:53:29 GMT
+# © 2019 Unicode®, Inc.
+# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+#
+# Unicode Character Database
+# For documentation, see http://www.unicode.org/reports/tr44/
+#
+# Default Word_Break Test
+#
+# Format:
+# <string> (# <comment>)?
+# <string> contains hex Unicode code points, with
+# ÷ wherever there is a break opportunity, and
+# × wherever there is not.
+# <comment> the format can change, but currently it shows:
+# - the sample character name
+# - (x) the Word_Break property value for the sample character
+# - [x] the rule that determines whether there is a break or not,
+# as listed in the Rules section of WordBreakTest.html
+#
+# These samples may be extended or changed in the future.
+#
+÷ 0001 ÷ 0001 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0001 × 0308 ÷ 0001 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0001 ÷ 000D ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0001 × 0308 ÷ 000D ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0001 ÷ 000A ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0001 × 0308 ÷ 000A ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0001 ÷ 000B ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0001 × 0308 ÷ 000B ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0001 ÷ 3031 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0001 × 0308 ÷ 3031 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0001 ÷ 0041 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0001 × 0308 ÷ 0041 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0001 ÷ 003A ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0001 × 0308 ÷ 003A ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0001 ÷ 002C ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0001 × 0308 ÷ 002C ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0001 ÷ 002E ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0001 × 0308 ÷ 002E ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0001 ÷ 0030 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0001 × 0308 ÷ 0030 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0001 ÷ 005F ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0001 × 0308 ÷ 005F ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0001 ÷ 1F1E6 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0001 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0001 ÷ 05D0 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0001 × 0308 ÷ 05D0 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0001 ÷ 0022 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0001 × 0308 ÷ 0022 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0001 ÷ 0027 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0001 × 0308 ÷ 0027 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0001 ÷ 231A ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0001 × 0308 ÷ 231A ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0001 ÷ 0020 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0001 × 0308 ÷ 0020 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0001 × 00AD ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0001 × 0308 × 00AD ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0001 × 0300 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0001 × 0308 × 0300 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0001 × 200D ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0001 × 0308 × 200D ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0001 ÷ 0061 × 2060 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0001 × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0001 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0001 × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0001 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0001 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0001 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0001 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0001 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0001 × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0001 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0001 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0001 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0001 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0001 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0001 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0001 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0001 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000D ÷ 0001 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] <START OF HEADING> (Other) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 000D ÷ 000D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 000D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 000D × 000A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) × [3.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 000A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 000D ÷ 000B ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 000B ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 000D ÷ 3031 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 3031 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 000D ÷ 0041 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0041 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 000D ÷ 003A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COLON (MidLetter) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 003A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 000D ÷ 002C ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMMA (MidNum) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 002C ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 000D ÷ 002E ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 002E ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 000D ÷ 0030 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0030 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 000D ÷ 005F ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 005F ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 000D ÷ 1F1E6 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 000D ÷ 05D0 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 05D0 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 000D ÷ 0022 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0022 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 000D ÷ 0027 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0027 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000D ÷ 231A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] WATCH (ExtPict) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 231A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 000D ÷ 0020 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] SPACE (WSegSpace) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 000D ÷ 00AD ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 000D ÷ 0308 × 00AD ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 000D ÷ 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 000D ÷ 0308 × 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 000D ÷ 200D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 000D ÷ 0308 × 200D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 000D ÷ 0061 × 2060 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000D ÷ 0061 ÷ 003A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 000D ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000D ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000D ÷ 0061 ÷ 002C ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 000D ÷ 0031 ÷ 003A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 000D ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000D ÷ 0031 ÷ 002C ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 000D ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000A ÷ 0001 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] <START OF HEADING> (Other) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 000A ÷ 000D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 000D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 000A ÷ 000A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 000A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 000A ÷ 000B ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 000B ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 000A ÷ 3031 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 3031 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 000A ÷ 0041 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0041 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 000A ÷ 003A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COLON (MidLetter) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 003A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 000A ÷ 002C ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMMA (MidNum) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 002C ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 000A ÷ 002E ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 002E ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 000A ÷ 0030 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0030 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 000A ÷ 005F ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 005F ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 000A ÷ 1F1E6 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 000A ÷ 05D0 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 05D0 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 000A ÷ 0022 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0022 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 000A ÷ 0027 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0027 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000A ÷ 231A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] WATCH (ExtPict) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 231A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 000A ÷ 0020 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] SPACE (WSegSpace) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 000A ÷ 00AD ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 000A ÷ 0308 × 00AD ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 000A ÷ 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 000A ÷ 0308 × 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 000A ÷ 200D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 000A ÷ 0308 × 200D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 000A ÷ 0061 × 2060 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000A ÷ 0061 ÷ 003A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 000A ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000A ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000A ÷ 0061 ÷ 002C ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 000A ÷ 0031 ÷ 003A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 000A ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000A ÷ 0031 ÷ 002C ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 000A ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000B ÷ 0001 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] <START OF HEADING> (Other) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 000B ÷ 000D ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 000D ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 000B ÷ 000A ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 000A ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 000B ÷ 000B ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 000B ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 000B ÷ 3031 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 3031 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 000B ÷ 0041 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 0041 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 000B ÷ 003A ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COLON (MidLetter) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 003A ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 000B ÷ 002C ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMMA (MidNum) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 002C ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 000B ÷ 002E ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 002E ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 000B ÷ 0030 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 0030 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 000B ÷ 005F ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 005F ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 000B ÷ 1F1E6 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 000B ÷ 05D0 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 05D0 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 000B ÷ 0022 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 0022 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 000B ÷ 0027 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 0027 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000B ÷ 231A ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] WATCH (ExtPict) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 231A ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 000B ÷ 0020 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] SPACE (WSegSpace) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 000B ÷ 00AD ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 000B ÷ 0308 × 00AD ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 000B ÷ 0300 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 000B ÷ 0308 × 0300 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 000B ÷ 200D ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 000B ÷ 0308 × 200D ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 000B ÷ 0061 × 2060 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000B ÷ 0061 ÷ 003A ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 000B ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000B ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000B ÷ 0061 ÷ 002C ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 000B ÷ 0031 ÷ 003A ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 000B ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000B ÷ 0031 ÷ 002C ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 000B ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 3031 ÷ 0001 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 3031 × 0308 ÷ 0001 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 3031 ÷ 000D ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 3031 × 0308 ÷ 000D ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 3031 ÷ 000A ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 3031 × 0308 ÷ 000A ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 3031 ÷ 000B ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 3031 × 0308 ÷ 000B ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 3031 × 3031 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [13.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 3031 × 0308 × 3031 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 3031 ÷ 0041 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 3031 × 0308 ÷ 0041 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 3031 ÷ 003A ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 3031 × 0308 ÷ 003A ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 3031 ÷ 002C ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 3031 × 0308 ÷ 002C ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 3031 ÷ 002E ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 3031 × 0308 ÷ 002E ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 3031 ÷ 0030 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 3031 × 0308 ÷ 0030 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 3031 × 005F ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 3031 × 0308 × 005F ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 3031 ÷ 1F1E6 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 3031 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 3031 ÷ 05D0 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 3031 × 0308 ÷ 05D0 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 3031 ÷ 0022 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 3031 × 0308 ÷ 0022 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 3031 ÷ 0027 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 3031 × 0308 ÷ 0027 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 3031 ÷ 231A ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 3031 × 0308 ÷ 231A ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 3031 ÷ 0020 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 3031 × 0308 ÷ 0020 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 3031 × 00AD ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 3031 × 0308 × 00AD ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 3031 × 0300 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 3031 × 0308 × 0300 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 3031 × 200D ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 3031 × 0308 × 200D ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 3031 ÷ 0061 × 2060 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 3031 × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 3031 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 3031 × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 3031 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 3031 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 3031 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 3031 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 3031 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 3031 × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 3031 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 3031 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 3031 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 3031 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 3031 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 3031 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 3031 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 3031 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0041 ÷ 0001 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0041 × 0308 ÷ 0001 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0041 ÷ 000D ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0041 × 0308 ÷ 000D ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0041 ÷ 000A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0041 × 0308 ÷ 000A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0041 ÷ 000B ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0041 × 0308 ÷ 000B ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0041 ÷ 3031 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0041 × 0308 ÷ 3031 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0041 × 0041 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [5.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0041 × 0308 × 0041 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0041 ÷ 003A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0041 × 0308 ÷ 003A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0041 ÷ 002C ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0041 × 0308 ÷ 002C ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0041 ÷ 002E ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0041 × 0308 ÷ 002E ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0041 × 0030 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [9.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0041 × 0308 × 0030 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0041 × 005F ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0041 × 0308 × 005F ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0041 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0041 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0041 × 05D0 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [5.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0041 × 0308 × 05D0 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0041 ÷ 0022 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0041 × 0308 ÷ 0022 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0041 ÷ 0027 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0041 × 0308 ÷ 0027 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0041 ÷ 231A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0041 × 0308 ÷ 231A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0041 ÷ 0020 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0041 × 0308 ÷ 0020 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0041 × 00AD ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0041 × 0308 × 00AD ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0041 × 0300 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0041 × 0308 × 0300 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0041 × 200D ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0041 × 0308 × 200D ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0041 × 0061 × 2060 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [5.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0041 × 0308 × 0061 × 2060 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0041 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0041 × 0308 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0041 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0041 × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0041 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0041 × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0041 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0041 × 0308 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0041 × 0031 ÷ 003A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0041 × 0308 × 0031 ÷ 003A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0041 × 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0041 × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0041 × 0031 ÷ 002C ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0041 × 0308 × 0031 ÷ 002C ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0041 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0041 × 0308 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 003A ÷ 0001 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 003A × 0308 ÷ 0001 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 003A ÷ 000D ÷ # ÷ [0.2] COLON (MidLetter) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 003A × 0308 ÷ 000D ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 003A ÷ 000A ÷ # ÷ [0.2] COLON (MidLetter) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 003A × 0308 ÷ 000A ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 003A ÷ 000B ÷ # ÷ [0.2] COLON (MidLetter) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 003A × 0308 ÷ 000B ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 003A ÷ 3031 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 003A × 0308 ÷ 3031 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 003A ÷ 0041 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 003A × 0308 ÷ 0041 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 003A ÷ 003A ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 003A × 0308 ÷ 003A ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 003A ÷ 002C ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 003A × 0308 ÷ 002C ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 003A ÷ 002E ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 003A × 0308 ÷ 002E ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 003A ÷ 0030 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 003A × 0308 ÷ 0030 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 003A ÷ 005F ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 003A × 0308 ÷ 005F ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 003A ÷ 1F1E6 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 003A × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 003A ÷ 05D0 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 003A × 0308 ÷ 05D0 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 003A ÷ 0022 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 003A × 0308 ÷ 0022 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 003A ÷ 0027 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 003A × 0308 ÷ 0027 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 003A ÷ 231A ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 003A × 0308 ÷ 231A ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 003A ÷ 0020 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 003A × 0308 ÷ 0020 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 003A × 00AD ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 003A × 0308 × 00AD ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 003A × 0300 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 003A × 0308 × 0300 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 003A × 200D ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 003A × 0308 × 200D ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 003A ÷ 0061 × 2060 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 003A × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 003A ÷ 0061 ÷ 003A ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 003A × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 003A ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 003A × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 003A ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 003A × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 003A ÷ 0061 ÷ 002C ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 003A × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 003A ÷ 0031 ÷ 003A ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 003A × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 003A ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 003A × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 003A ÷ 0031 ÷ 002C ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 003A × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 003A ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 003A × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 002C ÷ 0001 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 002C × 0308 ÷ 0001 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 002C ÷ 000D ÷ # ÷ [0.2] COMMA (MidNum) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 002C × 0308 ÷ 000D ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 002C ÷ 000A ÷ # ÷ [0.2] COMMA (MidNum) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 002C × 0308 ÷ 000A ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 002C ÷ 000B ÷ # ÷ [0.2] COMMA (MidNum) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 002C × 0308 ÷ 000B ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 002C ÷ 3031 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 002C × 0308 ÷ 3031 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 002C ÷ 0041 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 002C × 0308 ÷ 0041 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 002C ÷ 003A ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 002C × 0308 ÷ 003A ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 002C ÷ 002C ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 002C × 0308 ÷ 002C ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 002C ÷ 002E ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 002C × 0308 ÷ 002E ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 002C ÷ 0030 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 002C × 0308 ÷ 0030 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 002C ÷ 005F ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 002C × 0308 ÷ 005F ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 002C ÷ 1F1E6 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 002C × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 002C ÷ 05D0 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 002C × 0308 ÷ 05D0 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 002C ÷ 0022 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 002C × 0308 ÷ 0022 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 002C ÷ 0027 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 002C × 0308 ÷ 0027 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 002C ÷ 231A ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 002C × 0308 ÷ 231A ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 002C ÷ 0020 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 002C × 0308 ÷ 0020 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 002C × 00AD ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 002C × 0308 × 00AD ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 002C × 0300 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 002C × 0308 × 0300 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 002C × 200D ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 002C × 0308 × 200D ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 002C ÷ 0061 × 2060 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 002C × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 002C ÷ 0061 ÷ 003A ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 002C × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 002C ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 002C × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 002C ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 002C × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 002C ÷ 0061 ÷ 002C ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 002C × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 002C ÷ 0031 ÷ 003A ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 002C × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 002C ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 002C × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 002C ÷ 0031 ÷ 002C ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 002C × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 002C ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 002C × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 002E ÷ 0001 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 002E × 0308 ÷ 0001 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 002E ÷ 000D ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 002E × 0308 ÷ 000D ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 002E ÷ 000A ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 002E × 0308 ÷ 000A ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 002E ÷ 000B ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 002E × 0308 ÷ 000B ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 002E ÷ 3031 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 002E × 0308 ÷ 3031 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 002E ÷ 0041 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 002E × 0308 ÷ 0041 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 002E ÷ 003A ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 002E × 0308 ÷ 003A ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 002E ÷ 002C ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 002E × 0308 ÷ 002C ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 002E ÷ 002E ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 002E × 0308 ÷ 002E ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 002E ÷ 0030 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 002E × 0308 ÷ 0030 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 002E ÷ 005F ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 002E × 0308 ÷ 005F ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 002E ÷ 1F1E6 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 002E × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 002E ÷ 05D0 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 002E × 0308 ÷ 05D0 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 002E ÷ 0022 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 002E × 0308 ÷ 0022 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 002E ÷ 0027 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 002E × 0308 ÷ 0027 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 002E ÷ 231A ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 002E × 0308 ÷ 231A ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 002E ÷ 0020 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 002E × 0308 ÷ 0020 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 002E × 00AD ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 002E × 0308 × 00AD ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 002E × 0300 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 002E × 0308 × 0300 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 002E × 200D ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 002E × 0308 × 200D ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 002E ÷ 0061 × 2060 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 002E × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 002E ÷ 0061 ÷ 003A ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 002E × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 002E ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 002E × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 002E ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 002E × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 002E ÷ 0061 ÷ 002C ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 002E × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 002E ÷ 0031 ÷ 003A ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 002E × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 002E ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 002E × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 002E ÷ 0031 ÷ 002C ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 002E × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 002E ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 002E × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0030 ÷ 0001 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0030 × 0308 ÷ 0001 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0030 ÷ 000D ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0030 × 0308 ÷ 000D ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0030 ÷ 000A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0030 × 0308 ÷ 000A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0030 ÷ 000B ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0030 × 0308 ÷ 000B ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0030 ÷ 3031 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0030 × 0308 ÷ 3031 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0030 × 0041 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [10.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0030 × 0308 × 0041 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [10.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0030 ÷ 003A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0030 × 0308 ÷ 003A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0030 ÷ 002C ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0030 × 0308 ÷ 002C ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0030 ÷ 002E ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0030 × 0308 ÷ 002E ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0030 × 0030 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [8.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0030 × 0308 × 0030 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [8.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0030 × 005F ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0030 × 0308 × 005F ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0030 ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0030 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0030 × 05D0 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [10.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0030 × 0308 × 05D0 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [10.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0030 ÷ 0022 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0030 × 0308 ÷ 0022 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0030 ÷ 0027 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0030 × 0308 ÷ 0027 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0030 ÷ 231A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0030 × 0308 ÷ 231A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0030 ÷ 0020 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0030 × 0308 ÷ 0020 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0030 × 00AD ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0030 × 0308 × 00AD ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0030 × 0300 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0030 × 0308 × 0300 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0030 × 200D ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0030 × 0308 × 200D ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0030 × 0061 × 2060 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [10.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0030 × 0308 × 0061 × 2060 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [10.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0030 × 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [10.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0030 × 0308 × 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [10.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0030 × 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [10.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0030 × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [10.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0030 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [10.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0030 × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [10.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0030 × 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [10.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0030 × 0308 × 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [10.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0030 × 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [8.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0030 × 0308 × 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [8.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0030 × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [8.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0030 × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [8.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0030 × 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [8.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0030 × 0308 × 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [8.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0030 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [8.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0030 × 0308 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [8.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 005F ÷ 0001 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 005F × 0308 ÷ 0001 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 005F ÷ 000D ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 005F × 0308 ÷ 000D ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 005F ÷ 000A ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 005F × 0308 ÷ 000A ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 005F ÷ 000B ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 005F × 0308 ÷ 000B ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 005F × 3031 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 005F × 0308 × 3031 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 005F × 0041 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 005F × 0308 × 0041 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 005F ÷ 003A ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 005F × 0308 ÷ 003A ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 005F ÷ 002C ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 005F × 0308 ÷ 002C ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 005F ÷ 002E ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 005F × 0308 ÷ 002E ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 005F × 0030 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 005F × 0308 × 0030 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 005F × 005F ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 005F × 0308 × 005F ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 005F ÷ 1F1E6 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 005F × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 005F × 05D0 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 005F × 0308 × 05D0 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 005F ÷ 0022 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 005F × 0308 ÷ 0022 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 005F ÷ 0027 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 005F × 0308 ÷ 0027 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 005F ÷ 231A ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 005F × 0308 ÷ 231A ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 005F ÷ 0020 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 005F × 0308 ÷ 0020 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 005F × 00AD ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 005F × 0308 × 00AD ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 005F × 0300 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 005F × 0308 × 0300 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 005F × 200D ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 005F × 0308 × 200D ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 005F × 0061 × 2060 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 005F × 0308 × 0061 × 2060 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 005F × 0061 ÷ 003A ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 005F × 0308 × 0061 ÷ 003A ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 005F × 0061 ÷ 0027 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 005F × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 005F × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 005F × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 005F × 0061 ÷ 002C ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 005F × 0308 × 0061 ÷ 002C ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 005F × 0031 ÷ 003A ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 005F × 0308 × 0031 ÷ 003A ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 005F × 0031 ÷ 0027 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 005F × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 005F × 0031 ÷ 002C ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 005F × 0308 × 0031 ÷ 002C ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 005F × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 005F × 0308 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 1F1E6 ÷ 0001 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0001 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 1F1E6 ÷ 000D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 000D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 1F1E6 ÷ 000A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 000A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 1F1E6 ÷ 000B ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 000B ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 1F1E6 ÷ 3031 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 3031 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 1F1E6 ÷ 0041 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0041 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 1F1E6 ÷ 003A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 003A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 1F1E6 ÷ 002C ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 002C ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 1F1E6 ÷ 002E ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 002E ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 1F1E6 ÷ 0030 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0030 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 1F1E6 ÷ 005F ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 005F ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 1F1E6 × 1F1E6 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [15.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 1F1E6 × 0308 × 1F1E6 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) × [15.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 1F1E6 ÷ 05D0 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 05D0 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 1F1E6 ÷ 0022 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0022 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 1F1E6 ÷ 0027 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0027 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 1F1E6 ÷ 231A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 231A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 1F1E6 ÷ 0020 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0020 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 1F1E6 × 00AD ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 1F1E6 × 0308 × 00AD ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 1F1E6 × 0300 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 1F1E6 × 0308 × 0300 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 1F1E6 × 200D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 1F1E6 × 0308 × 200D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 1F1E6 ÷ 0061 × 2060 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 1F1E6 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 1F1E6 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 1F1E6 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 1F1E6 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 1F1E6 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 1F1E6 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 1F1E6 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 1F1E6 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 05D0 ÷ 0001 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 05D0 × 0308 ÷ 0001 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 05D0 ÷ 000D ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 05D0 × 0308 ÷ 000D ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 05D0 ÷ 000A ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 05D0 × 0308 ÷ 000A ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 05D0 ÷ 000B ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 05D0 × 0308 ÷ 000B ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 05D0 ÷ 3031 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 05D0 × 0308 ÷ 3031 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 05D0 × 0041 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [5.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 05D0 × 0308 × 0041 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 05D0 ÷ 003A ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 05D0 × 0308 ÷ 003A ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 05D0 ÷ 002C ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 05D0 × 0308 ÷ 002C ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 05D0 ÷ 002E ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 05D0 × 0308 ÷ 002E ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 05D0 × 0030 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [9.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 05D0 × 0308 × 0030 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 05D0 × 005F ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 05D0 × 0308 × 005F ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 05D0 ÷ 1F1E6 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 05D0 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 05D0 × 05D0 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [5.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 05D0 × 0308 × 05D0 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 05D0 ÷ 0022 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 05D0 × 0308 ÷ 0022 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 05D0 × 0027 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [7.1] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 05D0 × 0308 × 0027 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.1] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 05D0 ÷ 231A ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 05D0 × 0308 ÷ 231A ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 05D0 ÷ 0020 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 05D0 × 0308 ÷ 0020 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 05D0 × 00AD ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 05D0 × 0308 × 00AD ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 05D0 × 0300 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 05D0 × 0308 × 0300 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 05D0 × 200D ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 05D0 × 0308 × 200D ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 05D0 × 0061 × 2060 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [5.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 05D0 × 0308 × 0061 × 2060 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 05D0 × 0061 ÷ 003A ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 05D0 × 0308 × 0061 ÷ 003A ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 05D0 × 0061 ÷ 0027 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 05D0 × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 05D0 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 05D0 × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 05D0 × 0061 ÷ 002C ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 05D0 × 0308 × 0061 ÷ 002C ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 05D0 × 0031 ÷ 003A ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 05D0 × 0308 × 0031 ÷ 003A ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 05D0 × 0031 ÷ 0027 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 05D0 × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 05D0 × 0031 ÷ 002C ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 05D0 × 0308 × 0031 ÷ 002C ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 05D0 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 05D0 × 0308 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0022 ÷ 0001 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0022 × 0308 ÷ 0001 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0022 ÷ 000D ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0022 × 0308 ÷ 000D ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0022 ÷ 000A ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0022 × 0308 ÷ 000A ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0022 ÷ 000B ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0022 × 0308 ÷ 000B ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0022 ÷ 3031 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0022 × 0308 ÷ 3031 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0022 ÷ 0041 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0022 × 0308 ÷ 0041 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0022 ÷ 003A ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0022 × 0308 ÷ 003A ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0022 ÷ 002C ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0022 × 0308 ÷ 002C ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0022 ÷ 002E ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0022 × 0308 ÷ 002E ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0022 ÷ 0030 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0022 × 0308 ÷ 0030 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0022 ÷ 005F ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0022 × 0308 ÷ 005F ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0022 ÷ 1F1E6 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0022 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0022 ÷ 05D0 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0022 × 0308 ÷ 05D0 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0022 ÷ 0022 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0022 × 0308 ÷ 0022 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0022 ÷ 0027 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0022 × 0308 ÷ 0027 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0022 ÷ 231A ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0022 × 0308 ÷ 231A ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0022 ÷ 0020 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0022 × 0308 ÷ 0020 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0022 × 00AD ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0022 × 0308 × 00AD ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0022 × 0300 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0022 × 0308 × 0300 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0022 × 200D ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0022 × 0308 × 200D ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0022 ÷ 0061 × 2060 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0022 × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0022 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0022 × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0022 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0022 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0022 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0022 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0022 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0022 × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0022 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0022 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0022 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0022 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0022 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0022 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0022 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0022 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0027 ÷ 0001 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0027 × 0308 ÷ 0001 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0027 ÷ 000D ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0027 × 0308 ÷ 000D ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0027 ÷ 000A ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0027 × 0308 ÷ 000A ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0027 ÷ 000B ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0027 × 0308 ÷ 000B ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0027 ÷ 3031 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0027 × 0308 ÷ 3031 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0027 ÷ 0041 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0027 × 0308 ÷ 0041 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0027 ÷ 003A ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0027 × 0308 ÷ 003A ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0027 ÷ 002C ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0027 × 0308 ÷ 002C ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0027 ÷ 002E ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0027 × 0308 ÷ 002E ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0027 ÷ 0030 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0027 × 0308 ÷ 0030 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0027 ÷ 005F ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0027 × 0308 ÷ 005F ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0027 ÷ 1F1E6 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0027 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0027 ÷ 05D0 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0027 × 0308 ÷ 05D0 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0027 ÷ 0022 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0027 × 0308 ÷ 0022 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0027 ÷ 0027 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0027 × 0308 ÷ 0027 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0027 ÷ 231A ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0027 × 0308 ÷ 231A ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0027 ÷ 0020 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0027 × 0308 ÷ 0020 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0027 × 00AD ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0027 × 0308 × 00AD ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0027 × 0300 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0027 × 0308 × 0300 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0027 × 200D ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0027 × 0308 × 200D ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0027 ÷ 0061 × 2060 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0027 × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0027 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0027 × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0027 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0027 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0027 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0027 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0027 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0027 × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0027 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0027 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0027 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0027 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0027 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0027 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0027 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0027 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 231A ÷ 0001 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 231A × 0308 ÷ 0001 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 231A ÷ 000D ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 231A × 0308 ÷ 000D ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 231A ÷ 000A ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 231A × 0308 ÷ 000A ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 231A ÷ 000B ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 231A × 0308 ÷ 000B ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 231A ÷ 3031 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 231A × 0308 ÷ 3031 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 231A ÷ 0041 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 231A × 0308 ÷ 0041 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 231A ÷ 003A ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 231A × 0308 ÷ 003A ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 231A ÷ 002C ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 231A × 0308 ÷ 002C ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 231A ÷ 002E ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 231A × 0308 ÷ 002E ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 231A ÷ 0030 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 231A × 0308 ÷ 0030 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 231A ÷ 005F ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 231A × 0308 ÷ 005F ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 231A ÷ 1F1E6 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 231A × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 231A ÷ 05D0 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 231A × 0308 ÷ 05D0 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 231A ÷ 0022 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 231A × 0308 ÷ 0022 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 231A ÷ 0027 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 231A × 0308 ÷ 0027 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 231A ÷ 231A ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 231A × 0308 ÷ 231A ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 231A ÷ 0020 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 231A × 0308 ÷ 0020 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 231A × 00AD ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 231A × 0308 × 00AD ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 231A × 0300 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 231A × 0308 × 0300 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 231A × 200D ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 231A × 0308 × 200D ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 231A ÷ 0061 × 2060 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 231A × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 231A ÷ 0061 ÷ 003A ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 231A × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 231A ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 231A × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 231A ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 231A × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 231A ÷ 0061 ÷ 002C ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 231A × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 231A ÷ 0031 ÷ 003A ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 231A × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 231A ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 231A × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 231A ÷ 0031 ÷ 002C ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 231A × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 231A ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 231A × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0020 ÷ 0001 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0001 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0020 ÷ 000D ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0020 × 0308 ÷ 000D ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0020 ÷ 000A ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0020 × 0308 ÷ 000A ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0020 ÷ 000B ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0020 × 0308 ÷ 000B ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0020 ÷ 3031 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0020 × 0308 ÷ 3031 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0020 ÷ 0041 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0041 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0020 ÷ 003A ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0020 × 0308 ÷ 003A ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0020 ÷ 002C ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0020 × 0308 ÷ 002C ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0020 ÷ 002E ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0020 × 0308 ÷ 002E ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0020 ÷ 0030 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0030 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0020 ÷ 005F ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0020 × 0308 ÷ 005F ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0020 ÷ 1F1E6 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0020 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0020 ÷ 05D0 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0020 × 0308 ÷ 05D0 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0020 ÷ 0022 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0022 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0020 ÷ 0027 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0027 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0020 ÷ 231A ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0020 × 0308 ÷ 231A ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0020 × 0020 ÷ # ÷ [0.2] SPACE (WSegSpace) × [3.4] SPACE (WSegSpace) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0020 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0020 × 00AD ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0020 × 0308 × 00AD ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0020 × 0300 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0020 × 0308 × 0300 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0020 × 200D ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0020 × 0308 × 200D ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0020 ÷ 0061 × 2060 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0020 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0020 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0020 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0020 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0020 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0020 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0020 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0020 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 00AD ÷ 0001 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 00AD × 0308 ÷ 0001 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 00AD ÷ 000D ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 00AD × 0308 ÷ 000D ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 00AD ÷ 000A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 00AD × 0308 ÷ 000A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 00AD ÷ 000B ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 00AD × 0308 ÷ 000B ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 00AD ÷ 3031 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 00AD × 0308 ÷ 3031 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 00AD ÷ 0041 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 00AD × 0308 ÷ 0041 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 00AD ÷ 003A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 00AD × 0308 ÷ 003A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 00AD ÷ 002C ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 00AD × 0308 ÷ 002C ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 00AD ÷ 002E ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 00AD × 0308 ÷ 002E ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 00AD ÷ 0030 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 00AD × 0308 ÷ 0030 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 00AD ÷ 005F ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 00AD × 0308 ÷ 005F ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 00AD ÷ 1F1E6 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 00AD × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 00AD ÷ 05D0 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 00AD × 0308 ÷ 05D0 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 00AD ÷ 0022 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 00AD × 0308 ÷ 0022 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 00AD ÷ 0027 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 00AD × 0308 ÷ 0027 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 00AD ÷ 231A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 00AD × 0308 ÷ 231A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 00AD ÷ 0020 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 00AD × 0308 ÷ 0020 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 00AD × 00AD ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 00AD × 0308 × 00AD ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 00AD × 0300 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 00AD × 0308 × 0300 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 00AD × 200D ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 00AD × 0308 × 200D ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 00AD ÷ 0061 × 2060 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 00AD × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 00AD ÷ 0061 ÷ 003A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 00AD × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 00AD ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 00AD × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 00AD ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 00AD × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 00AD ÷ 0061 ÷ 002C ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 00AD × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 00AD ÷ 0031 ÷ 003A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 00AD × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 00AD ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 00AD × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 00AD ÷ 0031 ÷ 002C ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 00AD × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 00AD ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 00AD × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0300 ÷ 0001 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0001 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0300 ÷ 000D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0300 × 0308 ÷ 000D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0300 ÷ 000A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0300 × 0308 ÷ 000A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0300 ÷ 000B ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0300 × 0308 ÷ 000B ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0300 ÷ 3031 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0300 × 0308 ÷ 3031 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0300 ÷ 0041 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0041 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0300 ÷ 003A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0300 × 0308 ÷ 003A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0300 ÷ 002C ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0300 × 0308 ÷ 002C ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0300 ÷ 002E ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0300 × 0308 ÷ 002E ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0300 ÷ 0030 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0030 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0300 ÷ 005F ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0300 × 0308 ÷ 005F ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0300 ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0300 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0300 ÷ 05D0 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0300 × 0308 ÷ 05D0 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0300 ÷ 0022 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0022 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0300 ÷ 0027 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0027 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0300 ÷ 231A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0300 × 0308 ÷ 231A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0300 ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0300 × 00AD ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0300 × 0308 × 00AD ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0300 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0300 × 0308 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0300 × 200D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0300 × 0308 × 200D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0300 ÷ 0061 × 2060 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0300 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0300 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0300 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0300 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0300 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0300 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0300 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0300 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 200D ÷ 0001 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 200D × 0308 ÷ 0001 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 200D ÷ 000D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 200D × 0308 ÷ 000D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 200D ÷ 000A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 200D × 0308 ÷ 000A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 200D ÷ 000B ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 200D × 0308 ÷ 000B ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 200D ÷ 3031 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 200D × 0308 ÷ 3031 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 200D ÷ 0041 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 200D × 0308 ÷ 0041 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 200D ÷ 003A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 200D × 0308 ÷ 003A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 200D ÷ 002C ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 200D × 0308 ÷ 002C ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 200D ÷ 002E ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 200D × 0308 ÷ 002E ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 200D ÷ 0030 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 200D × 0308 ÷ 0030 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 200D ÷ 005F ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 200D × 0308 ÷ 005F ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 200D ÷ 1F1E6 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 200D × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 200D ÷ 05D0 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 200D × 0308 ÷ 05D0 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 200D ÷ 0022 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 200D × 0308 ÷ 0022 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 200D ÷ 0027 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 200D × 0308 ÷ 0027 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 200D × 231A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] WATCH (ExtPict) ÷ [0.3]
+÷ 200D × 0308 ÷ 231A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 200D ÷ 0020 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 200D × 0308 ÷ 0020 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 200D × 00AD ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 200D × 0308 × 00AD ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 200D × 0300 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 200D × 0308 × 0300 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 200D × 200D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 200D × 0308 × 200D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 200D ÷ 0061 × 2060 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 200D × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 200D ÷ 0061 ÷ 003A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 200D × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 200D ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 200D × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 200D ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 200D × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 200D ÷ 0061 ÷ 002C ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 200D × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 200D ÷ 0031 ÷ 003A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 200D × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 200D ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 200D × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 200D ÷ 0031 ÷ 002C ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 200D × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 200D ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 200D × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 2060 ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0061 × 2060 × 0308 ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0061 × 2060 ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0061 × 2060 × 0308 ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0061 × 2060 ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0061 × 2060 × 0308 ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0061 × 2060 ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0061 × 2060 × 0308 ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0061 × 2060 ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0061 × 2060 × 0308 ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0061 × 2060 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 2060 × 0308 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 2060 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 × 2060 × 0308 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 × 2060 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 × 2060 × 0308 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 × 2060 ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0061 × 2060 × 0308 ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0061 × 2060 × 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [9.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0061 × 2060 × 0308 × 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0061 × 2060 × 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0061 × 2060 × 0308 × 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0061 × 2060 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0061 × 2060 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0061 × 2060 × 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0061 × 2060 × 0308 × 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0061 × 2060 ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0061 × 2060 × 0308 ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0061 × 2060 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 × 2060 × 0308 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 × 2060 ÷ 231A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0061 × 2060 × 0308 ÷ 231A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0061 × 2060 ÷ 0020 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0061 × 2060 × 0308 ÷ 0020 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0061 × 2060 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0061 × 2060 × 0308 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0061 × 2060 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0061 × 2060 × 0308 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0061 × 2060 × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0061 × 2060 × 0308 × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0061 × 2060 × 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 2060 × 0308 × 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 2060 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 × 2060 × 0308 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 × 2060 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 × 2060 × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 × 2060 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 2060 × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 2060 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 × 2060 × 0308 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 × 2060 × 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 × 2060 × 0308 × 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 × 2060 × 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 × 2060 × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 × 2060 × 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 × 2060 × 0308 × 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 × 2060 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 2060 × 0308 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0061 × 003A × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [7.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 003A × 0308 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0061 × 003A × 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [7.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0061 × 003A × 0308 × 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 231A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 231A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 0020 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 0020 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0061 ÷ 003A × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 003A × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0061 ÷ 003A × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0061 × 003A × 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [7.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 003A × 0308 × 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 003A × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 × 003A × 0308 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 × 003A × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 × 003A × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 × 003A × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 003A × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 003A × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 × 003A × 0308 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0061 × 0027 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [7.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 0027 × 0308 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0061 × 0027 × 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [7.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0061 × 0027 × 0308 × 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 231A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 231A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 0020 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 0020 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0061 ÷ 0027 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0061 ÷ 0027 × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0061 × 0027 × 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [7.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 0027 × 0308 × 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 0027 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 × 0027 × 0308 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 × 0027 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 × 0027 × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 × 0027 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 0027 × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 0027 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 × 0027 × 0308 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0061 × 0027 × 2060 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [7.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 0027 × 2060 × 0308 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0061 × 0027 × 2060 × 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [7.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0061 × 0027 × 2060 × 0308 × 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 231A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 231A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 0020 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0020 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0061 × 0027 × 2060 × 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [7.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 0027 × 2060 × 0308 × 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 0027 × 2060 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 × 0027 × 2060 × 0308 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 × 0027 × 2060 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 × 0027 × 2060 × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 × 0027 × 2060 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 0027 × 2060 × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 0027 × 2060 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 × 0027 × 2060 × 0308 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 231A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 231A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 0020 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 0020 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0061 ÷ 002C × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 002C × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0061 ÷ 002C × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 0001 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 0001 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 000D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 000D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 000A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 000A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 000B ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 000B ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 3031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 3031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 0041 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 0041 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 002E ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 002E ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 05D0 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 05D0 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 0022 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 0022 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 231A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 231A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 0020 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 0020 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0031 ÷ 003A × 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 × 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 003A × 0300 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 × 0300 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0031 ÷ 003A × 200D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 × 200D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 0061 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 0001 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 0001 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 000D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 000D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 000A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 000A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 000B ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 000B ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 3031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 3031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 0041 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 0041 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 002E ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 002E ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0031 × 0027 × 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [11.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0031 × 0027 × 0308 × 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 05D0 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 05D0 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 0022 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 0022 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 231A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 231A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 0020 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 0020 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0031 ÷ 0027 × 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 × 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0300 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 × 0300 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0031 ÷ 0027 × 200D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 × 200D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 0061 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 × 0027 × 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 × 0027 × 0308 × 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 × 0027 × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 × 0027 × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 × 0027 × 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 × 0027 × 0308 × 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 × 0027 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 × 0027 × 0308 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 0001 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 0001 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 000D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 000D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 000A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 000A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 000B ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 000B ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 3031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 3031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 0041 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 0041 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 002E ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 002E ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0031 × 002C × 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [11.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0031 × 002C × 0308 × 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 05D0 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 05D0 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 0022 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 0022 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 231A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 231A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 0020 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 0020 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0031 ÷ 002C × 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 × 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 002C × 0300 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 × 0300 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0031 ÷ 002C × 200D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 × 200D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 0061 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 × 002C × 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 × 002C × 0308 × 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 × 002C × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 × 002C × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 × 002C × 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 × 002C × 0308 × 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 × 002C × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 × 002C × 0308 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 0001 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0001 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 000D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 000D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 000A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 000A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 000B ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 000B ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 3031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 3031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 0041 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0041 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 002E ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 002E ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0031 × 002E × 2060 × 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [11.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0031 × 002E × 2060 × 0308 × 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 05D0 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 05D0 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 0022 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0022 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 231A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 231A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 0020 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0020 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 × 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0300 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 × 0300 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 200D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 × 200D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 0061 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 × 002E × 2060 × 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 × 002E × 2060 × 0308 × 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 × 002E × 2060 × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 × 002E × 2060 × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 × 002E × 2060 × 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 × 002E × 2060 × 0308 × 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 × 002E × 2060 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 × 002E × 2060 × 0308 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000D × 000A ÷ 0061 ÷ 000A ÷ 0308 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) × [3.0] <LINE FEED (LF)> (LF) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [0.3]
+÷ 0061 × 0308 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [0.3]
+÷ 0020 × 200D ÷ 0646 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] ARABIC LETTER NOON (ALetter) ÷ [0.3]
+÷ 0646 × 200D ÷ 0020 ÷ # ÷ [0.2] ARABIC LETTER NOON (ALetter) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0041 × 0041 × 0041 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [5.0] LATIN CAPITAL LETTER A (ALetter) × [5.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0041 × 003A × 0041 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [7.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0041 ÷ 003A ÷ 003A ÷ 0041 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 05D0 × 0027 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [7.1] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 05D0 × 0022 × 05D0 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [7.2] QUOTATION MARK (Double_Quote) × [7.3] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0041 × 0030 × 0030 × 0041 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [9.0] DIGIT ZERO (Numeric) × [8.0] DIGIT ZERO (Numeric) × [10.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0030 × 002C × 0030 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [12.0] COMMA (MidNum) × [11.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0030 ÷ 002C ÷ 002C ÷ 0030 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 3031 × 3031 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [13.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0041 × 005F × 0030 × 005F × 3031 × 005F ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ZERO (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] VERTICAL KANA REPEAT MARK (Katakana) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0041 × 005F × 005F × 0041 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 1F1E6 × 1F1E7 ÷ 1F1E8 ÷ 0062 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [15.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (ALetter) ÷ [0.3]
+÷ 0061 ÷ 1F1E6 × 1F1E7 ÷ 1F1E8 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [16.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (ALetter) ÷ [0.3]
+÷ 0061 ÷ 1F1E6 × 1F1E7 × 200D ÷ 1F1E8 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [16.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (ALetter) ÷ [0.3]
+÷ 0061 ÷ 1F1E6 × 200D × 1F1E7 ÷ 1F1E8 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) × [16.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (ALetter) ÷ [0.3]
+÷ 0061 ÷ 1F1E6 × 1F1E7 ÷ 1F1E8 × 1F1E9 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [16.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) × [16.0] REGIONAL INDICATOR SYMBOL LETTER D (RI) ÷ [999.0] LATIN SMALL LETTER B (ALetter) ÷ [0.3]
+÷ 1F476 × 1F3FF ÷ 1F476 ÷ # ÷ [0.2] BABY (ExtPict) × [4.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_FE) ÷ [999.0] BABY (ExtPict) ÷ [0.3]
+÷ 1F6D1 × 200D × 1F6D1 ÷ # ÷ [0.2] OCTAGONAL SIGN (ExtPict) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] OCTAGONAL SIGN (ExtPict) ÷ [0.3]
+÷ 0061 × 200D × 1F6D1 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] OCTAGONAL SIGN (ExtPict) ÷ [0.3]
+÷ 2701 × 200D × 2701 ÷ # ÷ [0.2] UPPER BLADE SCISSORS (Other) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] UPPER BLADE SCISSORS (Other) ÷ [0.3]
+÷ 0061 × 200D × 2701 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] UPPER BLADE SCISSORS (Other) ÷ [0.3]
+÷ 1F476 × 1F3FF × 0308 × 200D × 1F476 × 1F3FF ÷ # ÷ [0.2] BABY (ExtPict) × [4.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] BABY (ExtPict) × [4.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_FE) ÷ [0.3]
+÷ 1F6D1 × 1F3FF ÷ # ÷ [0.2] OCTAGONAL SIGN (ExtPict) × [4.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_FE) ÷ [0.3]
+÷ 200D × 1F6D1 × 1F3FF ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] OCTAGONAL SIGN (ExtPict) × [4.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_FE) ÷ [0.3]
+÷ 200D × 1F6D1 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] OCTAGONAL SIGN (ExtPict) ÷ [0.3]
+÷ 200D × 1F6D1 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] OCTAGONAL SIGN (ExtPict) ÷ [0.3]
+÷ 1F6D1 ÷ 1F6D1 ÷ # ÷ [0.2] OCTAGONAL SIGN (ExtPict) ÷ [999.0] OCTAGONAL SIGN (ExtPict) ÷ [0.3]
+÷ 0061 × 0308 × 200D × 0308 × 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER B (ALetter) ÷ [0.3]
+÷ 0061 ÷ 0020 × 0020 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] SPACE (WSegSpace) × [3.4] SPACE (WSegSpace) ÷ [999.0] LATIN SMALL LETTER B (ALetter) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 003A ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 003A ÷ 003A ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 003A ÷ 003A ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 003A ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 003A ÷ 003A ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 003A ÷ 003A ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 002E ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 003A ÷ 002E ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 003A ÷ 002E ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 002E ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 003A ÷ 002E ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 003A ÷ 002E ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 002C ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 003A ÷ 002C ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 003A ÷ 002C ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 002C ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 003A ÷ 002C ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 003A ÷ 002C ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 ÷ 002E ÷ 003A ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 002E ÷ 003A ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 002E ÷ 003A ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 ÷ 002E ÷ 003A ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 002E ÷ 003A ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 002E ÷ 003A ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 ÷ 002E ÷ 002E ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 002E ÷ 002E ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 002E ÷ 002E ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 ÷ 002E ÷ 002E ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 002E ÷ 002E ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 002E ÷ 002E ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 ÷ 002E ÷ 002C ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 002E ÷ 002C ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 002E ÷ 002C ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 ÷ 002E ÷ 002C ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 002E ÷ 002C ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 002E ÷ 002C ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 003A ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 002C ÷ 003A ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 002C ÷ 003A ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 003A ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 002C ÷ 003A ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 002C ÷ 003A ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 002E ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 002C ÷ 002E ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 002C ÷ 002E ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 002E ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 002C ÷ 002E ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 002C ÷ 002E ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 002C ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 002C ÷ 002C ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 002C ÷ 002C ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 002C ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 002C ÷ 002C ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 002C ÷ 002C ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 003A ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 003A ÷ 003A ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 003A ÷ 003A ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 003A ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 003A ÷ 003A ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 003A ÷ 003A ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 002E ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 003A ÷ 002E ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 003A ÷ 002E ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 002E ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 003A ÷ 002E ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 003A ÷ 002E ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 002C ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 003A ÷ 002C ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 003A ÷ 002C ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 002C ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 003A ÷ 002C ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 003A ÷ 002C ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 ÷ 002E ÷ 003A ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 002E ÷ 003A ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 002E ÷ 003A ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 ÷ 002E ÷ 003A ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 002E ÷ 003A ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 002E ÷ 003A ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 ÷ 002E ÷ 002E ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 002E ÷ 002E ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 002E ÷ 002E ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 ÷ 002E ÷ 002E ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 002E ÷ 002E ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 002E ÷ 002E ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 ÷ 002E ÷ 002C ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 002E ÷ 002C ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 002E ÷ 002C ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 ÷ 002E ÷ 002C ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 002E ÷ 002C ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 002E ÷ 002C ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 003A ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 002C ÷ 003A ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 002C ÷ 003A ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 003A ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 002C ÷ 003A ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 002C ÷ 003A ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 002E ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 002C ÷ 002E ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 002C ÷ 002E ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 002E ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 002C ÷ 002E ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 002C ÷ 002E ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 002C ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 002C ÷ 002C ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 002C ÷ 002C ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 002C ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 002C ÷ 002C ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 002C ÷ 002C ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+#
+# Lines: 1823
+#
+# EOF
diff --git a/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.bigendian.dfa b/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.bigendian.dfa
new file mode 100644
index 000000000..0efaaf290
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.bigendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.littleendian.dfa b/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.littleendian.dfa
new file mode 100644
index 000000000..eb240256c
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.littleendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.rs b/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.rs
new file mode 100644
index 000000000..b53b1d7d9
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.rs
@@ -0,0 +1,45 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+// ucd-generate dfa --name GRAPHEME_BREAK_FWD --sparse --minimize --anchored --state-size 2 src/unicode/fsm/ [snip (arg too long)]
+//
+// ucd-generate 0.2.9 is available on crates.io.
+
+#[cfg(target_endian = "big")]
+lazy_static::lazy_static! {
+ pub static ref GRAPHEME_BREAK_FWD: ::regex_automata::SparseDFA<&'static [u8], u16> = {
+ #[repr(C)]
+ struct Aligned<B: ?Sized> {
+ _align: [u8; 0],
+ bytes: B,
+ }
+
+ static ALIGNED: &'static Aligned<[u8]> = &Aligned {
+ _align: [],
+ bytes: *include_bytes!("grapheme_break_fwd.bigendian.dfa"),
+ };
+
+ unsafe {
+ ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes)
+ }
+ };
+}
+
+#[cfg(target_endian = "little")]
+lazy_static::lazy_static! {
+ pub static ref GRAPHEME_BREAK_FWD: ::regex_automata::SparseDFA<&'static [u8], u16> = {
+ #[repr(C)]
+ struct Aligned<B: ?Sized> {
+ _align: [u8; 0],
+ bytes: B,
+ }
+
+ static ALIGNED: &'static Aligned<[u8]> = &Aligned {
+ _align: [],
+ bytes: *include_bytes!("grapheme_break_fwd.littleendian.dfa"),
+ };
+
+ unsafe {
+ ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes)
+ }
+ };
+}
diff --git a/vendor/bstr/src/unicode/fsm/grapheme_break_rev.bigendian.dfa b/vendor/bstr/src/unicode/fsm/grapheme_break_rev.bigendian.dfa
new file mode 100644
index 000000000..d42cd3652
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/grapheme_break_rev.bigendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/grapheme_break_rev.littleendian.dfa b/vendor/bstr/src/unicode/fsm/grapheme_break_rev.littleendian.dfa
new file mode 100644
index 000000000..c75ea5fd6
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/grapheme_break_rev.littleendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/grapheme_break_rev.rs b/vendor/bstr/src/unicode/fsm/grapheme_break_rev.rs
new file mode 100644
index 000000000..93e888c64
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/grapheme_break_rev.rs
@@ -0,0 +1,45 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+// ucd-generate dfa --name GRAPHEME_BREAK_REV --reverse --longest --sparse --minimize --anchored --state-size 2 src/unicode/fsm/ [snip (arg too long)]
+//
+// ucd-generate 0.2.9 is available on crates.io.
+
+#[cfg(target_endian = "big")]
+lazy_static::lazy_static! {
+ pub static ref GRAPHEME_BREAK_REV: ::regex_automata::SparseDFA<&'static [u8], u16> = {
+ #[repr(C)]
+ struct Aligned<B: ?Sized> {
+ _align: [u8; 0],
+ bytes: B,
+ }
+
+ static ALIGNED: &'static Aligned<[u8]> = &Aligned {
+ _align: [],
+ bytes: *include_bytes!("grapheme_break_rev.bigendian.dfa"),
+ };
+
+ unsafe {
+ ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes)
+ }
+ };
+}
+
+#[cfg(target_endian = "little")]
+lazy_static::lazy_static! {
+ pub static ref GRAPHEME_BREAK_REV: ::regex_automata::SparseDFA<&'static [u8], u16> = {
+ #[repr(C)]
+ struct Aligned<B: ?Sized> {
+ _align: [u8; 0],
+ bytes: B,
+ }
+
+ static ALIGNED: &'static Aligned<[u8]> = &Aligned {
+ _align: [],
+ bytes: *include_bytes!("grapheme_break_rev.littleendian.dfa"),
+ };
+
+ unsafe {
+ ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes)
+ }
+ };
+}
diff --git a/vendor/bstr/src/unicode/fsm/mod.rs b/vendor/bstr/src/unicode/fsm/mod.rs
new file mode 100644
index 000000000..ae6c499fc
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/mod.rs
@@ -0,0 +1,8 @@
+pub mod grapheme_break_fwd;
+pub mod grapheme_break_rev;
+pub mod regional_indicator_rev;
+pub mod sentence_break_fwd;
+pub mod simple_word_fwd;
+pub mod whitespace_anchored_fwd;
+pub mod whitespace_anchored_rev;
+pub mod word_break_fwd;
diff --git a/vendor/bstr/src/unicode/fsm/regional_indicator_rev.bigendian.dfa b/vendor/bstr/src/unicode/fsm/regional_indicator_rev.bigendian.dfa
new file mode 100644
index 000000000..1a3357f71
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/regional_indicator_rev.bigendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/regional_indicator_rev.littleendian.dfa b/vendor/bstr/src/unicode/fsm/regional_indicator_rev.littleendian.dfa
new file mode 100644
index 000000000..e437aae3a
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/regional_indicator_rev.littleendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/regional_indicator_rev.rs b/vendor/bstr/src/unicode/fsm/regional_indicator_rev.rs
new file mode 100644
index 000000000..2bf7e4c91
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/regional_indicator_rev.rs
@@ -0,0 +1,45 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+// ucd-generate dfa --name REGIONAL_INDICATOR_REV --reverse --classes --minimize --anchored --premultiply --state-size 1 src/unicode/fsm/ \p{gcb=Regional_Indicator}
+//
+// ucd-generate 0.2.9 is available on crates.io.
+
+#[cfg(target_endian = "big")]
+lazy_static::lazy_static! {
+ pub static ref REGIONAL_INDICATOR_REV: ::regex_automata::DenseDFA<&'static [u8], u8> = {
+ #[repr(C)]
+ struct Aligned<B: ?Sized> {
+ _align: [u8; 0],
+ bytes: B,
+ }
+
+ static ALIGNED: &'static Aligned<[u8]> = &Aligned {
+ _align: [],
+ bytes: *include_bytes!("regional_indicator_rev.bigendian.dfa"),
+ };
+
+ unsafe {
+ ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes)
+ }
+ };
+}
+
+#[cfg(target_endian = "little")]
+lazy_static::lazy_static! {
+ pub static ref REGIONAL_INDICATOR_REV: ::regex_automata::DenseDFA<&'static [u8], u8> = {
+ #[repr(C)]
+ struct Aligned<B: ?Sized> {
+ _align: [u8; 0],
+ bytes: B,
+ }
+
+ static ALIGNED: &'static Aligned<[u8]> = &Aligned {
+ _align: [],
+ bytes: *include_bytes!("regional_indicator_rev.littleendian.dfa"),
+ };
+
+ unsafe {
+ ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes)
+ }
+ };
+}
diff --git a/vendor/bstr/src/unicode/fsm/sentence_break_fwd.bigendian.dfa b/vendor/bstr/src/unicode/fsm/sentence_break_fwd.bigendian.dfa
new file mode 100644
index 000000000..a1813d7c8
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/sentence_break_fwd.bigendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/sentence_break_fwd.littleendian.dfa b/vendor/bstr/src/unicode/fsm/sentence_break_fwd.littleendian.dfa
new file mode 100644
index 000000000..2763583be
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/sentence_break_fwd.littleendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/sentence_break_fwd.rs b/vendor/bstr/src/unicode/fsm/sentence_break_fwd.rs
new file mode 100644
index 000000000..cc937a4ba
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/sentence_break_fwd.rs
@@ -0,0 +1,45 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+// ucd-generate dfa --name SENTENCE_BREAK_FWD --minimize --sparse --anchored --state-size 4 src/unicode/fsm/ [snip (arg too long)]
+//
+// ucd-generate 0.2.9 is available on crates.io.
+
+#[cfg(target_endian = "big")]
+lazy_static::lazy_static! {
+ pub static ref SENTENCE_BREAK_FWD: ::regex_automata::SparseDFA<&'static [u8], u32> = {
+ #[repr(C)]
+ struct Aligned<B: ?Sized> {
+ _align: [u8; 0],
+ bytes: B,
+ }
+
+ static ALIGNED: &'static Aligned<[u8]> = &Aligned {
+ _align: [],
+ bytes: *include_bytes!("sentence_break_fwd.bigendian.dfa"),
+ };
+
+ unsafe {
+ ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes)
+ }
+ };
+}
+
+#[cfg(target_endian = "little")]
+lazy_static::lazy_static! {
+ pub static ref SENTENCE_BREAK_FWD: ::regex_automata::SparseDFA<&'static [u8], u32> = {
+ #[repr(C)]
+ struct Aligned<B: ?Sized> {
+ _align: [u8; 0],
+ bytes: B,
+ }
+
+ static ALIGNED: &'static Aligned<[u8]> = &Aligned {
+ _align: [],
+ bytes: *include_bytes!("sentence_break_fwd.littleendian.dfa"),
+ };
+
+ unsafe {
+ ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes)
+ }
+ };
+}
diff --git a/vendor/bstr/src/unicode/fsm/simple_word_fwd.bigendian.dfa b/vendor/bstr/src/unicode/fsm/simple_word_fwd.bigendian.dfa
new file mode 100644
index 000000000..adc64c192
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/simple_word_fwd.bigendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/simple_word_fwd.littleendian.dfa b/vendor/bstr/src/unicode/fsm/simple_word_fwd.littleendian.dfa
new file mode 100644
index 000000000..dd4838619
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/simple_word_fwd.littleendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/simple_word_fwd.rs b/vendor/bstr/src/unicode/fsm/simple_word_fwd.rs
new file mode 100644
index 000000000..f1f3da5f8
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/simple_word_fwd.rs
@@ -0,0 +1,45 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+// ucd-generate dfa --name SIMPLE_WORD_FWD --sparse --minimize --state-size 2 src/unicode/fsm/ \w
+//
+// ucd-generate 0.2.9 is available on crates.io.
+
+#[cfg(target_endian = "big")]
+lazy_static::lazy_static! {
+ pub static ref SIMPLE_WORD_FWD: ::regex_automata::SparseDFA<&'static [u8], u16> = {
+ #[repr(C)]
+ struct Aligned<B: ?Sized> {
+ _align: [u8; 0],
+ bytes: B,
+ }
+
+ static ALIGNED: &'static Aligned<[u8]> = &Aligned {
+ _align: [],
+ bytes: *include_bytes!("simple_word_fwd.bigendian.dfa"),
+ };
+
+ unsafe {
+ ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes)
+ }
+ };
+}
+
+#[cfg(target_endian = "little")]
+lazy_static::lazy_static! {
+ pub static ref SIMPLE_WORD_FWD: ::regex_automata::SparseDFA<&'static [u8], u16> = {
+ #[repr(C)]
+ struct Aligned<B: ?Sized> {
+ _align: [u8; 0],
+ bytes: B,
+ }
+
+ static ALIGNED: &'static Aligned<[u8]> = &Aligned {
+ _align: [],
+ bytes: *include_bytes!("simple_word_fwd.littleendian.dfa"),
+ };
+
+ unsafe {
+ ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes)
+ }
+ };
+}
diff --git a/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.bigendian.dfa b/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.bigendian.dfa
new file mode 100644
index 000000000..bcfc4e9a1
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.bigendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.littleendian.dfa b/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.littleendian.dfa
new file mode 100644
index 000000000..d534a464a
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.littleendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.rs b/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.rs
new file mode 100644
index 000000000..419b5d4f6
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.rs
@@ -0,0 +1,45 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+// ucd-generate dfa --name WHITESPACE_ANCHORED_FWD --anchored --classes --premultiply --minimize --state-size 1 src/unicode/fsm/ \s+
+//
+// ucd-generate 0.2.9 is available on crates.io.
+
+#[cfg(target_endian = "big")]
+lazy_static::lazy_static! {
+ pub static ref WHITESPACE_ANCHORED_FWD: ::regex_automata::DenseDFA<&'static [u8], u8> = {
+ #[repr(C)]
+ struct Aligned<B: ?Sized> {
+ _align: [u8; 0],
+ bytes: B,
+ }
+
+ static ALIGNED: &'static Aligned<[u8]> = &Aligned {
+ _align: [],
+ bytes: *include_bytes!("whitespace_anchored_fwd.bigendian.dfa"),
+ };
+
+ unsafe {
+ ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes)
+ }
+ };
+}
+
+#[cfg(target_endian = "little")]
+lazy_static::lazy_static! {
+ pub static ref WHITESPACE_ANCHORED_FWD: ::regex_automata::DenseDFA<&'static [u8], u8> = {
+ #[repr(C)]
+ struct Aligned<B: ?Sized> {
+ _align: [u8; 0],
+ bytes: B,
+ }
+
+ static ALIGNED: &'static Aligned<[u8]> = &Aligned {
+ _align: [],
+ bytes: *include_bytes!("whitespace_anchored_fwd.littleendian.dfa"),
+ };
+
+ unsafe {
+ ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes)
+ }
+ };
+}
diff --git a/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.bigendian.dfa b/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.bigendian.dfa
new file mode 100644
index 000000000..427d3a922
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.bigendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.littleendian.dfa b/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.littleendian.dfa
new file mode 100644
index 000000000..7cc3a0a99
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.littleendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.rs b/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.rs
new file mode 100644
index 000000000..301b03cd9
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.rs
@@ -0,0 +1,45 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+// ucd-generate dfa --name WHITESPACE_ANCHORED_REV --reverse --anchored --classes --premultiply --minimize --state-size 2 src/unicode/fsm/ \s+
+//
+// ucd-generate 0.2.9 is available on crates.io.
+
+#[cfg(target_endian = "big")]
+lazy_static::lazy_static! {
+ pub static ref WHITESPACE_ANCHORED_REV: ::regex_automata::DenseDFA<&'static [u16], u16> = {
+ #[repr(C)]
+ struct Aligned<B: ?Sized> {
+ _align: [u16; 0],
+ bytes: B,
+ }
+
+ static ALIGNED: &'static Aligned<[u8]> = &Aligned {
+ _align: [],
+ bytes: *include_bytes!("whitespace_anchored_rev.bigendian.dfa"),
+ };
+
+ unsafe {
+ ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes)
+ }
+ };
+}
+
+#[cfg(target_endian = "little")]
+lazy_static::lazy_static! {
+ pub static ref WHITESPACE_ANCHORED_REV: ::regex_automata::DenseDFA<&'static [u16], u16> = {
+ #[repr(C)]
+ struct Aligned<B: ?Sized> {
+ _align: [u16; 0],
+ bytes: B,
+ }
+
+ static ALIGNED: &'static Aligned<[u8]> = &Aligned {
+ _align: [],
+ bytes: *include_bytes!("whitespace_anchored_rev.littleendian.dfa"),
+ };
+
+ unsafe {
+ ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes)
+ }
+ };
+}
diff --git a/vendor/bstr/src/unicode/fsm/word_break_fwd.bigendian.dfa b/vendor/bstr/src/unicode/fsm/word_break_fwd.bigendian.dfa
new file mode 100644
index 000000000..1e75db68a
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/word_break_fwd.bigendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/word_break_fwd.littleendian.dfa b/vendor/bstr/src/unicode/fsm/word_break_fwd.littleendian.dfa
new file mode 100644
index 000000000..e3093a3f0
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/word_break_fwd.littleendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/word_break_fwd.rs b/vendor/bstr/src/unicode/fsm/word_break_fwd.rs
new file mode 100644
index 000000000..fb041b7a3
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/word_break_fwd.rs
@@ -0,0 +1,45 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+// ucd-generate dfa --name WORD_BREAK_FWD --sparse --minimize --anchored --state-size 4 src/unicode/fsm/ [snip (arg too long)]
+//
+// ucd-generate 0.2.9 is available on crates.io.
+
+#[cfg(target_endian = "big")]
+lazy_static::lazy_static! {
+ pub static ref WORD_BREAK_FWD: ::regex_automata::SparseDFA<&'static [u8], u32> = {
+ #[repr(C)]
+ struct Aligned<B: ?Sized> {
+ _align: [u8; 0],
+ bytes: B,
+ }
+
+ static ALIGNED: &'static Aligned<[u8]> = &Aligned {
+ _align: [],
+ bytes: *include_bytes!("word_break_fwd.bigendian.dfa"),
+ };
+
+ unsafe {
+ ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes)
+ }
+ };
+}
+
+#[cfg(target_endian = "little")]
+lazy_static::lazy_static! {
+ pub static ref WORD_BREAK_FWD: ::regex_automata::SparseDFA<&'static [u8], u32> = {
+ #[repr(C)]
+ struct Aligned<B: ?Sized> {
+ _align: [u8; 0],
+ bytes: B,
+ }
+
+ static ALIGNED: &'static Aligned<[u8]> = &Aligned {
+ _align: [],
+ bytes: *include_bytes!("word_break_fwd.littleendian.dfa"),
+ };
+
+ unsafe {
+ ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes)
+ }
+ };
+}
diff --git a/vendor/bstr/src/unicode/grapheme.rs b/vendor/bstr/src/unicode/grapheme.rs
new file mode 100644
index 000000000..ad31cf158
--- /dev/null
+++ b/vendor/bstr/src/unicode/grapheme.rs
@@ -0,0 +1,355 @@
+use regex_automata::DFA;
+
+use crate::ext_slice::ByteSlice;
+use crate::unicode::fsm::grapheme_break_fwd::GRAPHEME_BREAK_FWD;
+use crate::unicode::fsm::grapheme_break_rev::GRAPHEME_BREAK_REV;
+use crate::unicode::fsm::regional_indicator_rev::REGIONAL_INDICATOR_REV;
+use crate::utf8;
+
+/// An iterator over grapheme clusters in a byte string.
+///
+/// This iterator is typically constructed by
+/// [`ByteSlice::graphemes`](trait.ByteSlice.html#method.graphemes).
+///
+/// Unicode defines a grapheme cluster as an *approximation* to a single user
+/// visible character. A grapheme cluster, or just "grapheme," is made up of
+/// one or more codepoints. For end user oriented tasks, one should generally
+/// prefer using graphemes instead of [`Chars`](struct.Chars.html), which
+/// always yields one codepoint at a time.
+///
+/// Since graphemes are made up of one or more codepoints, this iterator yields
+/// `&str` elements. When invalid UTF-8 is encountered, replacement codepoints
+/// are [substituted](index.html#handling-of-invalid-utf-8).
+///
+/// This iterator can be used in reverse. When reversed, exactly the same
+/// set of grapheme clusters are yielded, but in reverse order.
+///
+/// This iterator only yields *extended* grapheme clusters, in accordance with
+/// [UAX #29](https://www.unicode.org/reports/tr29/tr29-33.html#Grapheme_Cluster_Boundaries).
+#[derive(Clone, Debug)]
+pub struct Graphemes<'a> {
+ bs: &'a [u8],
+}
+
+impl<'a> Graphemes<'a> {
+ pub(crate) fn new(bs: &'a [u8]) -> Graphemes<'a> {
+ Graphemes { bs }
+ }
+
+ /// View the underlying data as a subslice of the original data.
+ ///
+ /// The slice returned has the same lifetime as the original slice, and so
+ /// the iterator can continue to be used while this exists.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut it = b"abc".graphemes();
+ ///
+ /// assert_eq!(b"abc", it.as_bytes());
+ /// it.next();
+ /// assert_eq!(b"bc", it.as_bytes());
+ /// it.next();
+ /// it.next();
+ /// assert_eq!(b"", it.as_bytes());
+ /// ```
+ #[inline]
+ pub fn as_bytes(&self) -> &'a [u8] {
+ self.bs
+ }
+}
+
+impl<'a> Iterator for Graphemes<'a> {
+ type Item = &'a str;
+
+ #[inline]
+ fn next(&mut self) -> Option<&'a str> {
+ let (grapheme, size) = decode_grapheme(self.bs);
+ if size == 0 {
+ return None;
+ }
+ self.bs = &self.bs[size..];
+ Some(grapheme)
+ }
+}
+
+impl<'a> DoubleEndedIterator for Graphemes<'a> {
+ #[inline]
+ fn next_back(&mut self) -> Option<&'a str> {
+ let (grapheme, size) = decode_last_grapheme(self.bs);
+ if size == 0 {
+ return None;
+ }
+ self.bs = &self.bs[..self.bs.len() - size];
+ Some(grapheme)
+ }
+}
+
+/// An iterator over grapheme clusters in a byte string and their byte index
+/// positions.
+///
+/// This iterator is typically constructed by
+/// [`ByteSlice::grapheme_indices`](trait.ByteSlice.html#method.grapheme_indices).
+///
+/// Unicode defines a grapheme cluster as an *approximation* to a single user
+/// visible character. A grapheme cluster, or just "grapheme," is made up of
+/// one or more codepoints. For end user oriented tasks, one should generally
+/// prefer using graphemes instead of [`Chars`](struct.Chars.html), which
+/// always yields one codepoint at a time.
+///
+/// Since graphemes are made up of one or more codepoints, this iterator
+/// yields `&str` elements (along with their start and end byte offsets).
+/// When invalid UTF-8 is encountered, replacement codepoints are
+/// [substituted](index.html#handling-of-invalid-utf-8). Because of this, the
+/// indices yielded by this iterator may not correspond to the length of the
+/// grapheme cluster yielded with those indices. For example, when this
+/// iterator encounters `\xFF` in the byte string, then it will yield a pair
+/// of indices ranging over a single byte, but will provide an `&str`
+/// equivalent to `"\u{FFFD}"`, which is three bytes in length. However, when
+/// given only valid UTF-8, then all indices are in exact correspondence with
+/// their paired grapheme cluster.
+///
+/// This iterator can be used in reverse. When reversed, exactly the same
+/// set of grapheme clusters are yielded, but in reverse order.
+///
+/// This iterator only yields *extended* grapheme clusters, in accordance with
+/// [UAX #29](https://www.unicode.org/reports/tr29/tr29-33.html#Grapheme_Cluster_Boundaries).
+#[derive(Clone, Debug)]
+pub struct GraphemeIndices<'a> {
+ bs: &'a [u8],
+ forward_index: usize,
+ reverse_index: usize,
+}
+
+impl<'a> GraphemeIndices<'a> {
+ pub(crate) fn new(bs: &'a [u8]) -> GraphemeIndices<'a> {
+ GraphemeIndices { bs: bs, forward_index: 0, reverse_index: bs.len() }
+ }
+
+ /// View the underlying data as a subslice of the original data.
+ ///
+ /// The slice returned has the same lifetime as the original slice, and so
+ /// the iterator can continue to be used while this exists.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut it = b"abc".grapheme_indices();
+ ///
+ /// assert_eq!(b"abc", it.as_bytes());
+ /// it.next();
+ /// assert_eq!(b"bc", it.as_bytes());
+ /// it.next();
+ /// it.next();
+ /// assert_eq!(b"", it.as_bytes());
+ /// ```
+ #[inline]
+ pub fn as_bytes(&self) -> &'a [u8] {
+ self.bs
+ }
+}
+
+impl<'a> Iterator for GraphemeIndices<'a> {
+ type Item = (usize, usize, &'a str);
+
+ #[inline]
+ fn next(&mut self) -> Option<(usize, usize, &'a str)> {
+ let index = self.forward_index;
+ let (grapheme, size) = decode_grapheme(self.bs);
+ if size == 0 {
+ return None;
+ }
+ self.bs = &self.bs[size..];
+ self.forward_index += size;
+ Some((index, index + size, grapheme))
+ }
+}
+
+impl<'a> DoubleEndedIterator for GraphemeIndices<'a> {
+ #[inline]
+ fn next_back(&mut self) -> Option<(usize, usize, &'a str)> {
+ let (grapheme, size) = decode_last_grapheme(self.bs);
+ if size == 0 {
+ return None;
+ }
+ self.bs = &self.bs[..self.bs.len() - size];
+ self.reverse_index -= size;
+ Some((self.reverse_index, self.reverse_index + size, grapheme))
+ }
+}
+
+/// Decode a grapheme from the given byte string.
+///
+/// This returns the resulting grapheme (which may be a Unicode replacement
+/// codepoint if invalid UTF-8 was found), along with the number of bytes
+/// decoded in the byte string. The number of bytes decoded may not be the
+/// same as the length of grapheme in the case where invalid UTF-8 is found.
+pub fn decode_grapheme(bs: &[u8]) -> (&str, usize) {
+ if bs.is_empty() {
+ ("", 0)
+ } else if let Some(end) = GRAPHEME_BREAK_FWD.find(bs) {
+ // Safe because a match can only occur for valid UTF-8.
+ let grapheme = unsafe { bs[..end].to_str_unchecked() };
+ (grapheme, grapheme.len())
+ } else {
+ const INVALID: &'static str = "\u{FFFD}";
+ // No match on non-empty bytes implies we found invalid UTF-8.
+ let (_, size) = utf8::decode_lossy(bs);
+ (INVALID, size)
+ }
+}
+
+fn decode_last_grapheme(bs: &[u8]) -> (&str, usize) {
+ if bs.is_empty() {
+ ("", 0)
+ } else if let Some(mut start) = GRAPHEME_BREAK_REV.rfind(bs) {
+ start = adjust_rev_for_regional_indicator(bs, start);
+ // Safe because a match can only occur for valid UTF-8.
+ let grapheme = unsafe { bs[start..].to_str_unchecked() };
+ (grapheme, grapheme.len())
+ } else {
+ const INVALID: &'static str = "\u{FFFD}";
+ // No match on non-empty bytes implies we found invalid UTF-8.
+ let (_, size) = utf8::decode_last_lossy(bs);
+ (INVALID, size)
+ }
+}
+
+/// Return the correct offset for the next grapheme decoded at the end of the
+/// given byte string, where `i` is the initial guess. In particular,
+/// `&bs[i..]` represents the candidate grapheme.
+///
+/// `i` is returned by this function in all cases except when `&bs[i..]` is
+/// a pair of regional indicator codepoints. In that case, if an odd number of
+/// additional regional indicator codepoints precedes `i`, then `i` is
+/// adjusted such that it points to only a single regional indicator.
+///
+/// This "fixing" is necessary to handle the requirement that a break cannot
+/// occur between regional indicators where it would cause an odd number of
+/// regional indicators to exist before the break from the *start* of the
+/// string. A reverse regex cannot detect this case easily without look-around.
+fn adjust_rev_for_regional_indicator(mut bs: &[u8], i: usize) -> usize {
+ // All regional indicators use a 4 byte encoding, and we only care about
+ // the case where we found a pair of regional indicators.
+ if bs.len() - i != 8 {
+ return i;
+ }
+ // Count all contiguous occurrences of regional indicators. If there's an
+ // even number of them, then we can accept the pair we found. Otherwise,
+ // we can only take one of them.
+ //
+ // FIXME: This is quadratic in the worst case, e.g., a string of just
+ // regional indicator codepoints. A fix probably requires refactoring this
+ // code a bit such that we don't rescan regional indicators.
+ let mut count = 0;
+ while let Some(start) = REGIONAL_INDICATOR_REV.rfind(bs) {
+ bs = &bs[..start];
+ count += 1;
+ }
+ if count % 2 == 0 {
+ i
+ } else {
+ i + 4
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use ucd_parse::GraphemeClusterBreakTest;
+
+ use super::*;
+ use crate::ext_slice::ByteSlice;
+ use crate::tests::LOSSY_TESTS;
+
+ #[test]
+ fn forward_ucd() {
+ for (i, test) in ucdtests().into_iter().enumerate() {
+ let given = test.grapheme_clusters.concat();
+ let got: Vec<String> = Graphemes::new(given.as_bytes())
+ .map(|cluster| cluster.to_string())
+ .collect();
+ assert_eq!(
+ test.grapheme_clusters,
+ got,
+ "\ngrapheme forward break test {} failed:\n\
+ given: {:?}\n\
+ expected: {:?}\n\
+ got: {:?}\n",
+ i,
+ uniescape(&given),
+ uniescape_vec(&test.grapheme_clusters),
+ uniescape_vec(&got),
+ );
+ }
+ }
+
+ #[test]
+ fn reverse_ucd() {
+ for (i, test) in ucdtests().into_iter().enumerate() {
+ let given = test.grapheme_clusters.concat();
+ let mut got: Vec<String> = Graphemes::new(given.as_bytes())
+ .rev()
+ .map(|cluster| cluster.to_string())
+ .collect();
+ got.reverse();
+ assert_eq!(
+ test.grapheme_clusters,
+ got,
+ "\n\ngrapheme reverse break test {} failed:\n\
+ given: {:?}\n\
+ expected: {:?}\n\
+ got: {:?}\n",
+ i,
+ uniescape(&given),
+ uniescape_vec(&test.grapheme_clusters),
+ uniescape_vec(&got),
+ );
+ }
+ }
+
+ #[test]
+ fn forward_lossy() {
+ for &(expected, input) in LOSSY_TESTS {
+ let got = Graphemes::new(input.as_bytes()).collect::<String>();
+ assert_eq!(expected, got);
+ }
+ }
+
+ #[test]
+ fn reverse_lossy() {
+ for &(expected, input) in LOSSY_TESTS {
+ let expected: String = expected.chars().rev().collect();
+ let got =
+ Graphemes::new(input.as_bytes()).rev().collect::<String>();
+ assert_eq!(expected, got);
+ }
+ }
+
+ fn uniescape(s: &str) -> String {
+ s.chars().flat_map(|c| c.escape_unicode()).collect::<String>()
+ }
+
+ fn uniescape_vec(strs: &[String]) -> Vec<String> {
+ strs.iter().map(|s| uniescape(s)).collect()
+ }
+
+ /// Return all of the UCD for grapheme breaks.
+ fn ucdtests() -> Vec<GraphemeClusterBreakTest> {
+ const TESTDATA: &'static str =
+ include_str!("data/GraphemeBreakTest.txt");
+
+ let mut tests = vec![];
+ for mut line in TESTDATA.lines() {
+ line = line.trim();
+ if line.starts_with("#") || line.contains("surrogate") {
+ continue;
+ }
+ tests.push(line.parse().unwrap());
+ }
+ tests
+ }
+}
diff --git a/vendor/bstr/src/unicode/mod.rs b/vendor/bstr/src/unicode/mod.rs
new file mode 100644
index 000000000..60318f437
--- /dev/null
+++ b/vendor/bstr/src/unicode/mod.rs
@@ -0,0 +1,12 @@
+pub use self::grapheme::{decode_grapheme, GraphemeIndices, Graphemes};
+pub use self::sentence::{SentenceIndices, Sentences};
+pub use self::whitespace::{whitespace_len_fwd, whitespace_len_rev};
+pub use self::word::{
+ WordIndices, Words, WordsWithBreakIndices, WordsWithBreaks,
+};
+
+mod fsm;
+mod grapheme;
+mod sentence;
+mod whitespace;
+mod word;
diff --git a/vendor/bstr/src/unicode/sentence.rs b/vendor/bstr/src/unicode/sentence.rs
new file mode 100644
index 000000000..063f342e2
--- /dev/null
+++ b/vendor/bstr/src/unicode/sentence.rs
@@ -0,0 +1,220 @@
+use regex_automata::DFA;
+
+use crate::ext_slice::ByteSlice;
+use crate::unicode::fsm::sentence_break_fwd::SENTENCE_BREAK_FWD;
+use crate::utf8;
+
+/// An iterator over sentences in a byte string.
+///
+/// This iterator is typically constructed by
+/// [`ByteSlice::sentences`](trait.ByteSlice.html#method.sentences).
+///
+/// Sentences typically include their trailing punctuation and whitespace.
+///
+/// Since sentences are made up of one or more codepoints, this iterator yields
+/// `&str` elements. When invalid UTF-8 is encountered, replacement codepoints
+/// are [substituted](index.html#handling-of-invalid-utf-8).
+///
+/// This iterator yields words in accordance with the default sentence boundary
+/// rules specified in
+/// [UAX #29](https://www.unicode.org/reports/tr29/tr29-33.html#Sentence_Boundaries).
+#[derive(Clone, Debug)]
+pub struct Sentences<'a> {
+ bs: &'a [u8],
+}
+
+impl<'a> Sentences<'a> {
+ pub(crate) fn new(bs: &'a [u8]) -> Sentences<'a> {
+ Sentences { bs }
+ }
+
+ /// View the underlying data as a subslice of the original data.
+ ///
+ /// The slice returned has the same lifetime as the original slice, and so
+ /// the iterator can continue to be used while this exists.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut it = b"I want this. Not that. Right now.".sentences();
+ ///
+ /// assert_eq!(&b"I want this. Not that. Right now."[..], it.as_bytes());
+ /// it.next();
+ /// assert_eq!(b"Not that. Right now.", it.as_bytes());
+ /// it.next();
+ /// it.next();
+ /// assert_eq!(b"", it.as_bytes());
+ /// ```
+ #[inline]
+ pub fn as_bytes(&self) -> &'a [u8] {
+ self.bs
+ }
+}
+
+impl<'a> Iterator for Sentences<'a> {
+ type Item = &'a str;
+
+ #[inline]
+ fn next(&mut self) -> Option<&'a str> {
+ let (sentence, size) = decode_sentence(self.bs);
+ if size == 0 {
+ return None;
+ }
+ self.bs = &self.bs[size..];
+ Some(sentence)
+ }
+}
+
+/// An iterator over sentences in a byte string, along with their byte offsets.
+///
+/// This iterator is typically constructed by
+/// [`ByteSlice::sentence_indices`](trait.ByteSlice.html#method.sentence_indices).
+///
+/// Sentences typically include their trailing punctuation and whitespace.
+///
+/// Since sentences are made up of one or more codepoints, this iterator
+/// yields `&str` elements (along with their start and end byte offsets).
+/// When invalid UTF-8 is encountered, replacement codepoints are
+/// [substituted](index.html#handling-of-invalid-utf-8). Because of this, the
+/// indices yielded by this iterator may not correspond to the length of the
+/// sentence yielded with those indices. For example, when this iterator
+/// encounters `\xFF` in the byte string, then it will yield a pair of indices
+/// ranging over a single byte, but will provide an `&str` equivalent to
+/// `"\u{FFFD}"`, which is three bytes in length. However, when given only
+/// valid UTF-8, then all indices are in exact correspondence with their paired
+/// word.
+///
+/// This iterator yields words in accordance with the default sentence boundary
+/// rules specified in
+/// [UAX #29](https://www.unicode.org/reports/tr29/tr29-33.html#Sentence_Boundaries).
+#[derive(Clone, Debug)]
+pub struct SentenceIndices<'a> {
+ bs: &'a [u8],
+ forward_index: usize,
+}
+
+impl<'a> SentenceIndices<'a> {
+ pub(crate) fn new(bs: &'a [u8]) -> SentenceIndices<'a> {
+ SentenceIndices { bs: bs, forward_index: 0 }
+ }
+
+ /// View the underlying data as a subslice of the original data.
+ ///
+ /// The slice returned has the same lifetime as the original slice, and so
+ /// the iterator can continue to be used while this exists.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut it = b"I want this. Not that. Right now.".sentence_indices();
+ ///
+ /// assert_eq!(&b"I want this. Not that. Right now."[..], it.as_bytes());
+ /// it.next();
+ /// assert_eq!(b"Not that. Right now.", it.as_bytes());
+ /// it.next();
+ /// it.next();
+ /// assert_eq!(b"", it.as_bytes());
+ /// ```
+ #[inline]
+ pub fn as_bytes(&self) -> &'a [u8] {
+ self.bs
+ }
+}
+
+impl<'a> Iterator for SentenceIndices<'a> {
+ type Item = (usize, usize, &'a str);
+
+ #[inline]
+ fn next(&mut self) -> Option<(usize, usize, &'a str)> {
+ let index = self.forward_index;
+ let (word, size) = decode_sentence(self.bs);
+ if size == 0 {
+ return None;
+ }
+ self.bs = &self.bs[size..];
+ self.forward_index += size;
+ Some((index, index + size, word))
+ }
+}
+
+fn decode_sentence(bs: &[u8]) -> (&str, usize) {
+ if bs.is_empty() {
+ ("", 0)
+ } else if let Some(end) = SENTENCE_BREAK_FWD.find(bs) {
+ // Safe because a match can only occur for valid UTF-8.
+ let sentence = unsafe { bs[..end].to_str_unchecked() };
+ (sentence, sentence.len())
+ } else {
+ const INVALID: &'static str = "\u{FFFD}";
+ // No match on non-empty bytes implies we found invalid UTF-8.
+ let (_, size) = utf8::decode_lossy(bs);
+ (INVALID, size)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use ucd_parse::SentenceBreakTest;
+
+ use crate::ext_slice::ByteSlice;
+
+ #[test]
+ fn forward_ucd() {
+ for (i, test) in ucdtests().into_iter().enumerate() {
+ let given = test.sentences.concat();
+ let got = sentences(given.as_bytes());
+ assert_eq!(
+ test.sentences,
+ got,
+ "\n\nsentence forward break test {} failed:\n\
+ given: {:?}\n\
+ expected: {:?}\n\
+ got: {:?}\n",
+ i,
+ given,
+ strs_to_bstrs(&test.sentences),
+ strs_to_bstrs(&got),
+ );
+ }
+ }
+
+ // Some additional tests that don't seem to be covered by the UCD tests.
+ #[test]
+ fn forward_additional() {
+ assert_eq!(vec!["a.. ", "A"], sentences(b"a.. A"));
+ assert_eq!(vec!["a.. a"], sentences(b"a.. a"));
+
+ assert_eq!(vec!["a... ", "A"], sentences(b"a... A"));
+ assert_eq!(vec!["a... a"], sentences(b"a... a"));
+
+ assert_eq!(vec!["a...,..., a"], sentences(b"a...,..., a"));
+ }
+
+ fn sentences(bytes: &[u8]) -> Vec<&str> {
+ bytes.sentences().collect()
+ }
+
+ fn strs_to_bstrs<S: AsRef<str>>(strs: &[S]) -> Vec<&[u8]> {
+ strs.iter().map(|s| s.as_ref().as_bytes()).collect()
+ }
+
+ /// Return all of the UCD for sentence breaks.
+ fn ucdtests() -> Vec<SentenceBreakTest> {
+ const TESTDATA: &'static str =
+ include_str!("data/SentenceBreakTest.txt");
+
+ let mut tests = vec![];
+ for mut line in TESTDATA.lines() {
+ line = line.trim();
+ if line.starts_with("#") || line.contains("surrogate") {
+ continue;
+ }
+ tests.push(line.parse().unwrap());
+ }
+ tests
+ }
+}
diff --git a/vendor/bstr/src/unicode/whitespace.rs b/vendor/bstr/src/unicode/whitespace.rs
new file mode 100644
index 000000000..949a83faf
--- /dev/null
+++ b/vendor/bstr/src/unicode/whitespace.rs
@@ -0,0 +1,14 @@
+use regex_automata::DFA;
+
+use crate::unicode::fsm::whitespace_anchored_fwd::WHITESPACE_ANCHORED_FWD;
+use crate::unicode::fsm::whitespace_anchored_rev::WHITESPACE_ANCHORED_REV;
+
+/// Return the first position of a non-whitespace character.
+pub fn whitespace_len_fwd(slice: &[u8]) -> usize {
+ WHITESPACE_ANCHORED_FWD.find(slice).unwrap_or(0)
+}
+
+/// Return the last position of a non-whitespace character.
+pub fn whitespace_len_rev(slice: &[u8]) -> usize {
+ WHITESPACE_ANCHORED_REV.rfind(slice).unwrap_or(slice.len())
+}
diff --git a/vendor/bstr/src/unicode/word.rs b/vendor/bstr/src/unicode/word.rs
new file mode 100644
index 000000000..e0a570144
--- /dev/null
+++ b/vendor/bstr/src/unicode/word.rs
@@ -0,0 +1,406 @@
+use regex_automata::DFA;
+
+use crate::ext_slice::ByteSlice;
+use crate::unicode::fsm::simple_word_fwd::SIMPLE_WORD_FWD;
+use crate::unicode::fsm::word_break_fwd::WORD_BREAK_FWD;
+use crate::utf8;
+
+/// An iterator over words in a byte string.
+///
+/// This iterator is typically constructed by
+/// [`ByteSlice::words`](trait.ByteSlice.html#method.words).
+///
+/// This is similar to the [`WordsWithBreaks`](struct.WordsWithBreaks.html)
+/// iterator, except it only returns elements that contain a "word" character.
+/// A word character is defined by UTS #18 (Annex C) to be the combination
+/// of the `Alphabetic` and `Join_Control` properties, along with the
+/// `Decimal_Number`, `Mark` and `Connector_Punctuation` general categories.
+///
+/// Since words are made up of one or more codepoints, this iterator yields
+/// `&str` elements. When invalid UTF-8 is encountered, replacement codepoints
+/// are [substituted](index.html#handling-of-invalid-utf-8).
+///
+/// This iterator yields words in accordance with the default word boundary
+/// rules specified in
+/// [UAX #29](https://www.unicode.org/reports/tr29/tr29-33.html#Word_Boundaries).
+/// In particular, this may not be suitable for Japanese and Chinese scripts
+/// that do not use spaces between words.
+#[derive(Clone, Debug)]
+pub struct Words<'a>(WordsWithBreaks<'a>);
+
+impl<'a> Words<'a> {
+ pub(crate) fn new(bs: &'a [u8]) -> Words<'a> {
+ Words(WordsWithBreaks::new(bs))
+ }
+
+ /// View the underlying data as a subslice of the original data.
+ ///
+ /// The slice returned has the same lifetime as the original slice, and so
+ /// the iterator can continue to be used while this exists.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut it = b"foo bar baz".words();
+ ///
+ /// assert_eq!(b"foo bar baz", it.as_bytes());
+ /// it.next();
+ /// it.next();
+ /// assert_eq!(b" baz", it.as_bytes());
+ /// it.next();
+ /// assert_eq!(b"", it.as_bytes());
+ /// ```
+ #[inline]
+ pub fn as_bytes(&self) -> &'a [u8] {
+ self.0.as_bytes()
+ }
+}
+
+impl<'a> Iterator for Words<'a> {
+ type Item = &'a str;
+
+ #[inline]
+ fn next(&mut self) -> Option<&'a str> {
+ while let Some(word) = self.0.next() {
+ if SIMPLE_WORD_FWD.is_match(word.as_bytes()) {
+ return Some(word);
+ }
+ }
+ None
+ }
+}
+
+/// An iterator over words in a byte string and their byte index positions.
+///
+/// This iterator is typically constructed by
+/// [`ByteSlice::word_indices`](trait.ByteSlice.html#method.word_indices).
+///
+/// This is similar to the
+/// [`WordsWithBreakIndices`](struct.WordsWithBreakIndices.html) iterator,
+/// except it only returns elements that contain a "word" character. A
+/// word character is defined by UTS #18 (Annex C) to be the combination
+/// of the `Alphabetic` and `Join_Control` properties, along with the
+/// `Decimal_Number`, `Mark` and `Connector_Punctuation` general categories.
+///
+/// Since words are made up of one or more codepoints, this iterator
+/// yields `&str` elements (along with their start and end byte offsets).
+/// When invalid UTF-8 is encountered, replacement codepoints are
+/// [substituted](index.html#handling-of-invalid-utf-8). Because of this, the
+/// indices yielded by this iterator may not correspond to the length of the
+/// word yielded with those indices. For example, when this iterator encounters
+/// `\xFF` in the byte string, then it will yield a pair of indices ranging
+/// over a single byte, but will provide an `&str` equivalent to `"\u{FFFD}"`,
+/// which is three bytes in length. However, when given only valid UTF-8, then
+/// all indices are in exact correspondence with their paired word.
+///
+/// This iterator yields words in accordance with the default word boundary
+/// rules specified in
+/// [UAX #29](https://www.unicode.org/reports/tr29/tr29-33.html#Word_Boundaries).
+/// In particular, this may not be suitable for Japanese and Chinese scripts
+/// that do not use spaces between words.
+#[derive(Clone, Debug)]
+pub struct WordIndices<'a>(WordsWithBreakIndices<'a>);
+
+impl<'a> WordIndices<'a> {
+ pub(crate) fn new(bs: &'a [u8]) -> WordIndices<'a> {
+ WordIndices(WordsWithBreakIndices::new(bs))
+ }
+
+ /// View the underlying data as a subslice of the original data.
+ ///
+ /// The slice returned has the same lifetime as the original slice, and so
+ /// the iterator can continue to be used while this exists.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut it = b"foo bar baz".word_indices();
+ ///
+ /// assert_eq!(b"foo bar baz", it.as_bytes());
+ /// it.next();
+ /// it.next();
+ /// assert_eq!(b" baz", it.as_bytes());
+ /// it.next();
+ /// it.next();
+ /// assert_eq!(b"", it.as_bytes());
+ /// ```
+ #[inline]
+ pub fn as_bytes(&self) -> &'a [u8] {
+ self.0.as_bytes()
+ }
+}
+
+impl<'a> Iterator for WordIndices<'a> {
+ type Item = (usize, usize, &'a str);
+
+ #[inline]
+ fn next(&mut self) -> Option<(usize, usize, &'a str)> {
+ while let Some((start, end, word)) = self.0.next() {
+ if SIMPLE_WORD_FWD.is_match(word.as_bytes()) {
+ return Some((start, end, word));
+ }
+ }
+ None
+ }
+}
+
+/// An iterator over all word breaks in a byte string.
+///
+/// This iterator is typically constructed by
+/// [`ByteSlice::words_with_breaks`](trait.ByteSlice.html#method.words_with_breaks).
+///
+/// This iterator yields not only all words, but the content that comes between
+/// words. In particular, if all elements yielded by this iterator are
+/// concatenated, then the result is the original string (subject to Unicode
+/// replacement codepoint substitutions).
+///
+/// Since words are made up of one or more codepoints, this iterator yields
+/// `&str` elements. When invalid UTF-8 is encountered, replacement codepoints
+/// are [substituted](index.html#handling-of-invalid-utf-8).
+///
+/// This iterator yields words in accordance with the default word boundary
+/// rules specified in
+/// [UAX #29](https://www.unicode.org/reports/tr29/tr29-33.html#Word_Boundaries).
+/// In particular, this may not be suitable for Japanese and Chinese scripts
+/// that do not use spaces between words.
+#[derive(Clone, Debug)]
+pub struct WordsWithBreaks<'a> {
+ bs: &'a [u8],
+}
+
+impl<'a> WordsWithBreaks<'a> {
+ pub(crate) fn new(bs: &'a [u8]) -> WordsWithBreaks<'a> {
+ WordsWithBreaks { bs }
+ }
+
+ /// View the underlying data as a subslice of the original data.
+ ///
+ /// The slice returned has the same lifetime as the original slice, and so
+ /// the iterator can continue to be used while this exists.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut it = b"foo bar baz".words_with_breaks();
+ ///
+ /// assert_eq!(b"foo bar baz", it.as_bytes());
+ /// it.next();
+ /// assert_eq!(b" bar baz", it.as_bytes());
+ /// it.next();
+ /// it.next();
+ /// assert_eq!(b" baz", it.as_bytes());
+ /// it.next();
+ /// it.next();
+ /// assert_eq!(b"", it.as_bytes());
+ /// ```
+ #[inline]
+ pub fn as_bytes(&self) -> &'a [u8] {
+ self.bs
+ }
+}
+
+impl<'a> Iterator for WordsWithBreaks<'a> {
+ type Item = &'a str;
+
+ #[inline]
+ fn next(&mut self) -> Option<&'a str> {
+ let (word, size) = decode_word(self.bs);
+ if size == 0 {
+ return None;
+ }
+ self.bs = &self.bs[size..];
+ Some(word)
+ }
+}
+
+/// An iterator over all word breaks in a byte string, along with their byte
+/// index positions.
+///
+/// This iterator is typically constructed by
+/// [`ByteSlice::words_with_break_indices`](trait.ByteSlice.html#method.words_with_break_indices).
+///
+/// This iterator yields not only all words, but the content that comes between
+/// words. In particular, if all elements yielded by this iterator are
+/// concatenated, then the result is the original string (subject to Unicode
+/// replacement codepoint substitutions).
+///
+/// Since words are made up of one or more codepoints, this iterator
+/// yields `&str` elements (along with their start and end byte offsets).
+/// When invalid UTF-8 is encountered, replacement codepoints are
+/// [substituted](index.html#handling-of-invalid-utf-8). Because of this, the
+/// indices yielded by this iterator may not correspond to the length of the
+/// word yielded with those indices. For example, when this iterator encounters
+/// `\xFF` in the byte string, then it will yield a pair of indices ranging
+/// over a single byte, but will provide an `&str` equivalent to `"\u{FFFD}"`,
+/// which is three bytes in length. However, when given only valid UTF-8, then
+/// all indices are in exact correspondence with their paired word.
+///
+/// This iterator yields words in accordance with the default word boundary
+/// rules specified in
+/// [UAX #29](https://www.unicode.org/reports/tr29/tr29-33.html#Word_Boundaries).
+/// In particular, this may not be suitable for Japanese and Chinese scripts
+/// that do not use spaces between words.
+#[derive(Clone, Debug)]
+pub struct WordsWithBreakIndices<'a> {
+ bs: &'a [u8],
+ forward_index: usize,
+}
+
+impl<'a> WordsWithBreakIndices<'a> {
+ pub(crate) fn new(bs: &'a [u8]) -> WordsWithBreakIndices<'a> {
+ WordsWithBreakIndices { bs: bs, forward_index: 0 }
+ }
+
+ /// View the underlying data as a subslice of the original data.
+ ///
+ /// The slice returned has the same lifetime as the original slice, and so
+ /// the iterator can continue to be used while this exists.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut it = b"foo bar baz".words_with_break_indices();
+ ///
+ /// assert_eq!(b"foo bar baz", it.as_bytes());
+ /// it.next();
+ /// assert_eq!(b" bar baz", it.as_bytes());
+ /// it.next();
+ /// it.next();
+ /// assert_eq!(b" baz", it.as_bytes());
+ /// it.next();
+ /// it.next();
+ /// assert_eq!(b"", it.as_bytes());
+ /// ```
+ #[inline]
+ pub fn as_bytes(&self) -> &'a [u8] {
+ self.bs
+ }
+}
+
+impl<'a> Iterator for WordsWithBreakIndices<'a> {
+ type Item = (usize, usize, &'a str);
+
+ #[inline]
+ fn next(&mut self) -> Option<(usize, usize, &'a str)> {
+ let index = self.forward_index;
+ let (word, size) = decode_word(self.bs);
+ if size == 0 {
+ return None;
+ }
+ self.bs = &self.bs[size..];
+ self.forward_index += size;
+ Some((index, index + size, word))
+ }
+}
+
+fn decode_word(bs: &[u8]) -> (&str, usize) {
+ if bs.is_empty() {
+ ("", 0)
+ } else if let Some(end) = WORD_BREAK_FWD.find(bs) {
+ // Safe because a match can only occur for valid UTF-8.
+ let word = unsafe { bs[..end].to_str_unchecked() };
+ (word, word.len())
+ } else {
+ const INVALID: &'static str = "\u{FFFD}";
+ // No match on non-empty bytes implies we found invalid UTF-8.
+ let (_, size) = utf8::decode_lossy(bs);
+ (INVALID, size)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use ucd_parse::WordBreakTest;
+
+ use crate::ext_slice::ByteSlice;
+
+ #[test]
+ fn forward_ucd() {
+ for (i, test) in ucdtests().into_iter().enumerate() {
+ let given = test.words.concat();
+ let got = words(given.as_bytes());
+ assert_eq!(
+ test.words,
+ got,
+ "\n\nword forward break test {} failed:\n\
+ given: {:?}\n\
+ expected: {:?}\n\
+ got: {:?}\n",
+ i,
+ given,
+ strs_to_bstrs(&test.words),
+ strs_to_bstrs(&got),
+ );
+ }
+ }
+
+ // Some additional tests that don't seem to be covered by the UCD tests.
+ //
+ // It's pretty amazing that the UCD tests miss these cases. I only found
+ // them by running this crate's segmenter and ICU's segmenter on the same
+ // text and comparing the output.
+ #[test]
+ fn forward_additional() {
+ assert_eq!(vec!["a", ".", " ", "Y"], words(b"a. Y"));
+ assert_eq!(vec!["r", ".", " ", "Yo"], words(b"r. Yo"));
+ assert_eq!(
+ vec!["whatsoever", ".", " ", "You", " ", "may"],
+ words(b"whatsoever. You may")
+ );
+ assert_eq!(
+ vec!["21stcentury'syesterday"],
+ words(b"21stcentury'syesterday")
+ );
+
+ assert_eq!(vec!["Bonta_", "'", "s"], words(b"Bonta_'s"));
+ assert_eq!(vec!["_vhat's"], words(b"_vhat's"));
+ assert_eq!(vec!["__on'anima"], words(b"__on'anima"));
+ assert_eq!(vec!["123_", "'", "4"], words(b"123_'4"));
+ assert_eq!(vec!["_123'4"], words(b"_123'4"));
+ assert_eq!(vec!["__12'345"], words(b"__12'345"));
+
+ assert_eq!(
+ vec!["tomorrowat4", ":", "00", ","],
+ words(b"tomorrowat4:00,")
+ );
+ assert_eq!(vec!["RS1", "'", "s"], words(b"RS1's"));
+ assert_eq!(vec!["X38"], words(b"X38"));
+
+ assert_eq!(vec!["4abc", ":", "00", ","], words(b"4abc:00,"));
+ assert_eq!(vec!["12S", "'", "1"], words(b"12S'1"));
+ assert_eq!(vec!["1XY"], words(b"1XY"));
+
+ assert_eq!(vec!["\u{FEFF}", "Ты"], words("\u{FEFF}Ты".as_bytes()));
+ }
+
+ fn words(bytes: &[u8]) -> Vec<&str> {
+ bytes.words_with_breaks().collect()
+ }
+
+ fn strs_to_bstrs<S: AsRef<str>>(strs: &[S]) -> Vec<&[u8]> {
+ strs.iter().map(|s| s.as_ref().as_bytes()).collect()
+ }
+
+ /// Return all of the UCD for word breaks.
+ fn ucdtests() -> Vec<WordBreakTest> {
+ const TESTDATA: &'static str = include_str!("data/WordBreakTest.txt");
+
+ let mut tests = vec![];
+ for mut line in TESTDATA.lines() {
+ line = line.trim();
+ if line.starts_with("#") || line.contains("surrogate") {
+ continue;
+ }
+ tests.push(line.parse().unwrap());
+ }
+ tests
+ }
+}
diff --git a/vendor/bstr/src/utf8.rs b/vendor/bstr/src/utf8.rs
new file mode 100644
index 000000000..5c7de363d
--- /dev/null
+++ b/vendor/bstr/src/utf8.rs
@@ -0,0 +1,1370 @@
+use core::char;
+use core::cmp;
+use core::fmt;
+use core::str;
+#[cfg(feature = "std")]
+use std::error;
+
+use crate::ascii;
+use crate::bstr::BStr;
+use crate::ext_slice::ByteSlice;
+
+// The UTF-8 decoder provided here is based on the one presented here:
+// https://bjoern.hoehrmann.de/utf-8/decoder/dfa/
+//
+// We *could* have done UTF-8 decoding by using a DFA generated by `\p{any}`
+// using regex-automata that is roughly the same size. The real benefit of
+// Hoehrmann's formulation is that the byte class mapping below is manually
+// tailored such that each byte's class doubles as a shift to mask out the
+// bits necessary for constructing the leading bits of each codepoint value
+// from the initial byte.
+//
+// There are some minor differences between this implementation and Hoehrmann's
+// formulation.
+//
+// Firstly, we make REJECT have state ID 0, since it makes the state table
+// itself a little easier to read and is consistent with the notion that 0
+// means "false" or "bad."
+//
+// Secondly, when doing bulk decoding, we add a SIMD accelerated ASCII fast
+// path.
+//
+// Thirdly, we pre-multiply the state IDs to avoid a multiplication instruction
+// in the core decoding loop. (Which is what regex-automata would do by
+// default.)
+//
+// Fourthly, we split the byte class mapping and transition table into two
+// arrays because it's clearer.
+//
+// It is unlikely that this is the fastest way to do UTF-8 decoding, however,
+// it is fairly simple.
+
+const ACCEPT: usize = 12;
+const REJECT: usize = 0;
+
+/// SAFETY: The decode below function relies on the correctness of these
+/// equivalence classes.
+#[cfg_attr(rustfmt, rustfmt::skip)]
+const CLASSES: [u8; 256] = [
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+ 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
+];
+
+/// SAFETY: The decode below function relies on the correctness of this state
+/// machine.
+#[cfg_attr(rustfmt, rustfmt::skip)]
+const STATES_FORWARD: &'static [u8] = &[
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 12, 0, 24, 36, 60, 96, 84, 0, 0, 0, 48, 72,
+ 0, 12, 0, 0, 0, 0, 0, 12, 0, 12, 0, 0,
+ 0, 24, 0, 0, 0, 0, 0, 24, 0, 24, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 24, 0, 0, 0, 0,
+ 0, 24, 0, 0, 0, 0, 0, 0, 0, 24, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 36, 0, 36, 0, 0,
+ 0, 36, 0, 0, 0, 0, 0, 36, 0, 36, 0, 0,
+ 0, 36, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+];
+
+/// An iterator over Unicode scalar values in a byte string.
+///
+/// When invalid UTF-8 byte sequences are found, they are substituted with the
+/// Unicode replacement codepoint (`U+FFFD`) using the
+/// ["maximal subpart" strategy](http://www.unicode.org/review/pr-121.html).
+///
+/// This iterator is created by the
+/// [`chars`](trait.ByteSlice.html#method.chars) method provided by the
+/// [`ByteSlice`](trait.ByteSlice.html) extension trait for `&[u8]`.
+#[derive(Clone, Debug)]
+pub struct Chars<'a> {
+ bs: &'a [u8],
+}
+
+impl<'a> Chars<'a> {
+ pub(crate) fn new(bs: &'a [u8]) -> Chars<'a> {
+ Chars { bs }
+ }
+
+ /// View the underlying data as a subslice of the original data.
+ ///
+ /// The slice returned has the same lifetime as the original slice, and so
+ /// the iterator can continue to be used while this exists.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut chars = b"abc".chars();
+ ///
+ /// assert_eq!(b"abc", chars.as_bytes());
+ /// chars.next();
+ /// assert_eq!(b"bc", chars.as_bytes());
+ /// chars.next();
+ /// chars.next();
+ /// assert_eq!(b"", chars.as_bytes());
+ /// ```
+ #[inline]
+ pub fn as_bytes(&self) -> &'a [u8] {
+ self.bs
+ }
+}
+
+impl<'a> Iterator for Chars<'a> {
+ type Item = char;
+
+ #[inline]
+ fn next(&mut self) -> Option<char> {
+ let (ch, size) = decode_lossy(self.bs);
+ if size == 0 {
+ return None;
+ }
+ self.bs = &self.bs[size..];
+ Some(ch)
+ }
+}
+
+impl<'a> DoubleEndedIterator for Chars<'a> {
+ #[inline]
+ fn next_back(&mut self) -> Option<char> {
+ let (ch, size) = decode_last_lossy(self.bs);
+ if size == 0 {
+ return None;
+ }
+ self.bs = &self.bs[..self.bs.len() - size];
+ Some(ch)
+ }
+}
+
+/// An iterator over Unicode scalar values in a byte string and their
+/// byte index positions.
+///
+/// When invalid UTF-8 byte sequences are found, they are substituted with the
+/// Unicode replacement codepoint (`U+FFFD`) using the
+/// ["maximal subpart" strategy](http://www.unicode.org/review/pr-121.html).
+///
+/// Note that this is slightly different from the `CharIndices` iterator
+/// provided by the standard library. Aside from working on possibly invalid
+/// UTF-8, this iterator provides both the corresponding starting and ending
+/// byte indices of each codepoint yielded. The ending position is necessary to
+/// slice the original byte string when invalid UTF-8 bytes are converted into
+/// a Unicode replacement codepoint, since a single replacement codepoint can
+/// substitute anywhere from 1 to 3 invalid bytes (inclusive).
+///
+/// This iterator is created by the
+/// [`char_indices`](trait.ByteSlice.html#method.char_indices) method provided
+/// by the [`ByteSlice`](trait.ByteSlice.html) extension trait for `&[u8]`.
+#[derive(Clone, Debug)]
+pub struct CharIndices<'a> {
+ bs: &'a [u8],
+ forward_index: usize,
+ reverse_index: usize,
+}
+
+impl<'a> CharIndices<'a> {
+ pub(crate) fn new(bs: &'a [u8]) -> CharIndices<'a> {
+ CharIndices { bs: bs, forward_index: 0, reverse_index: bs.len() }
+ }
+
+ /// View the underlying data as a subslice of the original data.
+ ///
+ /// The slice returned has the same lifetime as the original slice, and so
+ /// the iterator can continue to be used while this exists.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut it = b"abc".char_indices();
+ ///
+ /// assert_eq!(b"abc", it.as_bytes());
+ /// it.next();
+ /// assert_eq!(b"bc", it.as_bytes());
+ /// it.next();
+ /// it.next();
+ /// assert_eq!(b"", it.as_bytes());
+ /// ```
+ #[inline]
+ pub fn as_bytes(&self) -> &'a [u8] {
+ self.bs
+ }
+}
+
+impl<'a> Iterator for CharIndices<'a> {
+ type Item = (usize, usize, char);
+
+ #[inline]
+ fn next(&mut self) -> Option<(usize, usize, char)> {
+ let index = self.forward_index;
+ let (ch, size) = decode_lossy(self.bs);
+ if size == 0 {
+ return None;
+ }
+ self.bs = &self.bs[size..];
+ self.forward_index += size;
+ Some((index, index + size, ch))
+ }
+}
+
+impl<'a> DoubleEndedIterator for CharIndices<'a> {
+ #[inline]
+ fn next_back(&mut self) -> Option<(usize, usize, char)> {
+ let (ch, size) = decode_last_lossy(self.bs);
+ if size == 0 {
+ return None;
+ }
+ self.bs = &self.bs[..self.bs.len() - size];
+ self.reverse_index -= size;
+ Some((self.reverse_index, self.reverse_index + size, ch))
+ }
+}
+
+impl<'a> ::core::iter::FusedIterator for CharIndices<'a> {}
+
+/// An iterator over chunks of valid UTF-8 in a byte slice.
+///
+/// See [`utf8_chunks`](trait.ByteSlice.html#method.utf8_chunks).
+#[derive(Clone, Debug)]
+pub struct Utf8Chunks<'a> {
+ pub(super) bytes: &'a [u8],
+}
+
+/// A chunk of valid UTF-8, possibly followed by invalid UTF-8 bytes.
+///
+/// This is yielded by the
+/// [`Utf8Chunks`](struct.Utf8Chunks.html)
+/// iterator, which can be created via the
+/// [`ByteSlice::utf8_chunks`](trait.ByteSlice.html#method.utf8_chunks)
+/// method.
+///
+/// The `'a` lifetime parameter corresponds to the lifetime of the bytes that
+/// are being iterated over.
+#[cfg_attr(test, derive(Debug, PartialEq))]
+pub struct Utf8Chunk<'a> {
+ /// A valid UTF-8 piece, at the start, end, or between invalid UTF-8 bytes.
+ ///
+ /// This is empty between adjacent invalid UTF-8 byte sequences.
+ valid: &'a str,
+ /// A sequence of invalid UTF-8 bytes.
+ ///
+ /// Can only be empty in the last chunk.
+ ///
+ /// Should be replaced by a single unicode replacement character, if not
+ /// empty.
+ invalid: &'a BStr,
+ /// Indicates whether the invalid sequence could've been valid if there
+ /// were more bytes.
+ ///
+ /// Can only be true in the last chunk.
+ incomplete: bool,
+}
+
+impl<'a> Utf8Chunk<'a> {
+ /// Returns the (possibly empty) valid UTF-8 bytes in this chunk.
+ ///
+ /// This may be empty if there are consecutive sequences of invalid UTF-8
+ /// bytes.
+ #[inline]
+ pub fn valid(&self) -> &'a str {
+ self.valid
+ }
+
+ /// Returns the (possibly empty) invalid UTF-8 bytes in this chunk that
+ /// immediately follow the valid UTF-8 bytes in this chunk.
+ ///
+ /// This is only empty when this chunk corresponds to the last chunk in
+ /// the original bytes.
+ ///
+ /// The maximum length of this slice is 3. That is, invalid UTF-8 byte
+ /// sequences greater than 1 always correspond to a valid _prefix_ of
+ /// a valid UTF-8 encoded codepoint. This corresponds to the "substitution
+ /// of maximal subparts" strategy that is described in more detail in the
+ /// docs for the
+ /// [`ByteSlice::to_str_lossy`](trait.ByteSlice.html#method.to_str_lossy)
+ /// method.
+ #[inline]
+ pub fn invalid(&self) -> &'a [u8] {
+ self.invalid.as_bytes()
+ }
+
+ /// Returns whether the invalid sequence might still become valid if more
+ /// bytes are added.
+ ///
+ /// Returns true if the end of the input was reached unexpectedly,
+ /// without encountering an unexpected byte.
+ ///
+ /// This can only be the case for the last chunk.
+ #[inline]
+ pub fn incomplete(&self) -> bool {
+ self.incomplete
+ }
+}
+
+impl<'a> Iterator for Utf8Chunks<'a> {
+ type Item = Utf8Chunk<'a>;
+
+ #[inline]
+ fn next(&mut self) -> Option<Utf8Chunk<'a>> {
+ if self.bytes.is_empty() {
+ return None;
+ }
+ match validate(self.bytes) {
+ Ok(()) => {
+ let valid = self.bytes;
+ self.bytes = &[];
+ Some(Utf8Chunk {
+ // SAFETY: This is safe because of the guarantees provided
+ // by utf8::validate.
+ valid: unsafe { str::from_utf8_unchecked(valid) },
+ invalid: [].as_bstr(),
+ incomplete: false,
+ })
+ }
+ Err(e) => {
+ let (valid, rest) = self.bytes.split_at(e.valid_up_to());
+ // SAFETY: This is safe because of the guarantees provided by
+ // utf8::validate.
+ let valid = unsafe { str::from_utf8_unchecked(valid) };
+ let (invalid_len, incomplete) = match e.error_len() {
+ Some(n) => (n, false),
+ None => (rest.len(), true),
+ };
+ let (invalid, rest) = rest.split_at(invalid_len);
+ self.bytes = rest;
+ Some(Utf8Chunk {
+ valid,
+ invalid: invalid.as_bstr(),
+ incomplete,
+ })
+ }
+ }
+ }
+
+ #[inline]
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ if self.bytes.is_empty() {
+ (0, Some(0))
+ } else {
+ (1, Some(self.bytes.len()))
+ }
+ }
+}
+
+impl<'a> ::core::iter::FusedIterator for Utf8Chunks<'a> {}
+
+/// An error that occurs when UTF-8 decoding fails.
+///
+/// This error occurs when attempting to convert a non-UTF-8 byte
+/// string to a Rust string that must be valid UTF-8. For example,
+/// [`to_str`](trait.ByteSlice.html#method.to_str) is one such method.
+///
+/// # Example
+///
+/// This example shows what happens when a given byte sequence is invalid,
+/// but ends with a sequence that is a possible prefix of valid UTF-8.
+///
+/// ```
+/// use bstr::{B, ByteSlice};
+///
+/// let s = B(b"foobar\xF1\x80\x80");
+/// let err = s.to_str().unwrap_err();
+/// assert_eq!(err.valid_up_to(), 6);
+/// assert_eq!(err.error_len(), None);
+/// ```
+///
+/// This example shows what happens when a given byte sequence contains
+/// invalid UTF-8.
+///
+/// ```
+/// use bstr::ByteSlice;
+///
+/// let s = b"foobar\xF1\x80\x80quux";
+/// let err = s.to_str().unwrap_err();
+/// assert_eq!(err.valid_up_to(), 6);
+/// // The error length reports the maximum number of bytes that correspond to
+/// // a valid prefix of a UTF-8 encoded codepoint.
+/// assert_eq!(err.error_len(), Some(3));
+///
+/// // In contrast to the above which contains a single invalid prefix,
+/// // consider the case of multiple individal bytes that are never valid
+/// // prefixes. Note how the value of error_len changes!
+/// let s = b"foobar\xFF\xFFquux";
+/// let err = s.to_str().unwrap_err();
+/// assert_eq!(err.valid_up_to(), 6);
+/// assert_eq!(err.error_len(), Some(1));
+///
+/// // The fact that it's an invalid prefix does not change error_len even
+/// // when it immediately precedes the end of the string.
+/// let s = b"foobar\xFF";
+/// let err = s.to_str().unwrap_err();
+/// assert_eq!(err.valid_up_to(), 6);
+/// assert_eq!(err.error_len(), Some(1));
+/// ```
+#[derive(Debug, Eq, PartialEq)]
+pub struct Utf8Error {
+ valid_up_to: usize,
+ error_len: Option<usize>,
+}
+
+impl Utf8Error {
+ /// Returns the byte index of the position immediately following the last
+ /// valid UTF-8 byte.
+ ///
+ /// # Example
+ ///
+ /// This examples shows how `valid_up_to` can be used to retrieve a
+ /// possibly empty prefix that is guaranteed to be valid UTF-8:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"foobar\xF1\x80\x80quux";
+ /// let err = s.to_str().unwrap_err();
+ ///
+ /// // This is guaranteed to never panic.
+ /// let string = s[..err.valid_up_to()].to_str().unwrap();
+ /// assert_eq!(string, "foobar");
+ /// ```
+ #[inline]
+ pub fn valid_up_to(&self) -> usize {
+ self.valid_up_to
+ }
+
+ /// Returns the total number of invalid UTF-8 bytes immediately following
+ /// the position returned by `valid_up_to`. This value is always at least
+ /// `1`, but can be up to `3` if bytes form a valid prefix of some UTF-8
+ /// encoded codepoint.
+ ///
+ /// If the end of the original input was found before a valid UTF-8 encoded
+ /// codepoint could be completed, then this returns `None`. This is useful
+ /// when processing streams, where a `None` value signals that more input
+ /// might be needed.
+ #[inline]
+ pub fn error_len(&self) -> Option<usize> {
+ self.error_len
+ }
+}
+
+#[cfg(feature = "std")]
+impl error::Error for Utf8Error {
+ fn description(&self) -> &str {
+ "invalid UTF-8"
+ }
+}
+
+impl fmt::Display for Utf8Error {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ write!(f, "invalid UTF-8 found at byte offset {}", self.valid_up_to)
+ }
+}
+
+/// Returns OK if and only if the given slice is completely valid UTF-8.
+///
+/// If the slice isn't valid UTF-8, then an error is returned that explains
+/// the first location at which invalid UTF-8 was detected.
+pub fn validate(slice: &[u8]) -> Result<(), Utf8Error> {
+ // The fast path for validating UTF-8. It steps through a UTF-8 automaton
+ // and uses a SIMD accelerated ASCII fast path on x86_64. If an error is
+ // detected, it backs up and runs the slower version of the UTF-8 automaton
+ // to determine correct error information.
+ fn fast(slice: &[u8]) -> Result<(), Utf8Error> {
+ let mut state = ACCEPT;
+ let mut i = 0;
+
+ while i < slice.len() {
+ let b = slice[i];
+
+ // ASCII fast path. If we see two consecutive ASCII bytes, then try
+ // to validate as much ASCII as possible very quickly.
+ if state == ACCEPT
+ && b <= 0x7F
+ && slice.get(i + 1).map_or(false, |&b| b <= 0x7F)
+ {
+ i += ascii::first_non_ascii_byte(&slice[i..]);
+ continue;
+ }
+
+ state = step(state, b);
+ if state == REJECT {
+ return Err(find_valid_up_to(slice, i));
+ }
+ i += 1;
+ }
+ if state != ACCEPT {
+ Err(find_valid_up_to(slice, slice.len()))
+ } else {
+ Ok(())
+ }
+ }
+
+ // Given the first position at which a UTF-8 sequence was determined to be
+ // invalid, return an error that correctly reports the position at which
+ // the last complete UTF-8 sequence ends.
+ #[inline(never)]
+ fn find_valid_up_to(slice: &[u8], rejected_at: usize) -> Utf8Error {
+ // In order to find the last valid byte, we need to back up an amount
+ // that guarantees every preceding byte is part of a valid UTF-8
+ // code unit sequence. To do this, we simply locate the last leading
+ // byte that occurs before rejected_at.
+ let mut backup = rejected_at.saturating_sub(1);
+ while backup > 0 && !is_leading_or_invalid_utf8_byte(slice[backup]) {
+ backup -= 1;
+ }
+ let upto = cmp::min(slice.len(), rejected_at.saturating_add(1));
+ let mut err = slow(&slice[backup..upto]).unwrap_err();
+ err.valid_up_to += backup;
+ err
+ }
+
+ // Like top-level UTF-8 decoding, except it correctly reports a UTF-8 error
+ // when an invalid sequence is found. This is split out from validate so
+ // that the fast path doesn't need to keep track of the position of the
+ // last valid UTF-8 byte. In particular, tracking this requires checking
+ // for an ACCEPT state on each byte, which degrades throughput pretty
+ // badly.
+ fn slow(slice: &[u8]) -> Result<(), Utf8Error> {
+ let mut state = ACCEPT;
+ let mut valid_up_to = 0;
+ for (i, &b) in slice.iter().enumerate() {
+ state = step(state, b);
+ if state == ACCEPT {
+ valid_up_to = i + 1;
+ } else if state == REJECT {
+ // Our error length must always be at least 1.
+ let error_len = Some(cmp::max(1, i - valid_up_to));
+ return Err(Utf8Error { valid_up_to, error_len });
+ }
+ }
+ if state != ACCEPT {
+ Err(Utf8Error { valid_up_to, error_len: None })
+ } else {
+ Ok(())
+ }
+ }
+
+ // Advance to the next state given the current state and current byte.
+ fn step(state: usize, b: u8) -> usize {
+ let class = CLASSES[b as usize];
+ // SAFETY: This is safe because 'class' is always <=11 and 'state' is
+ // always <=96. Therefore, the maximal index is 96+11 = 107, where
+ // STATES_FORWARD.len() = 108 such that every index is guaranteed to be
+ // valid by construction of the state machine and the byte equivalence
+ // classes.
+ unsafe {
+ *STATES_FORWARD.get_unchecked(state + class as usize) as usize
+ }
+ }
+
+ fast(slice)
+}
+
+/// UTF-8 decode a single Unicode scalar value from the beginning of a slice.
+///
+/// When successful, the corresponding Unicode scalar value is returned along
+/// with the number of bytes it was encoded with. The number of bytes consumed
+/// for a successful decode is always between 1 and 4, inclusive.
+///
+/// When unsuccessful, `None` is returned along with the number of bytes that
+/// make up a maximal prefix of a valid UTF-8 code unit sequence. In this case,
+/// the number of bytes consumed is always between 0 and 3, inclusive, where
+/// 0 is only returned when `slice` is empty.
+///
+/// # Examples
+///
+/// Basic usage:
+///
+/// ```
+/// use bstr::decode_utf8;
+///
+/// // Decoding a valid codepoint.
+/// let (ch, size) = decode_utf8(b"\xE2\x98\x83");
+/// assert_eq!(Some('☃'), ch);
+/// assert_eq!(3, size);
+///
+/// // Decoding an incomplete codepoint.
+/// let (ch, size) = decode_utf8(b"\xE2\x98");
+/// assert_eq!(None, ch);
+/// assert_eq!(2, size);
+/// ```
+///
+/// This example shows how to iterate over all codepoints in UTF-8 encoded
+/// bytes, while replacing invalid UTF-8 sequences with the replacement
+/// codepoint:
+///
+/// ```
+/// use bstr::{B, decode_utf8};
+///
+/// let mut bytes = B(b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61");
+/// let mut chars = vec![];
+/// while !bytes.is_empty() {
+/// let (ch, size) = decode_utf8(bytes);
+/// bytes = &bytes[size..];
+/// chars.push(ch.unwrap_or('\u{FFFD}'));
+/// }
+/// assert_eq!(vec!['☃', '\u{FFFD}', '𝞃', '\u{FFFD}', 'a'], chars);
+/// ```
+#[inline]
+pub fn decode<B: AsRef<[u8]>>(slice: B) -> (Option<char>, usize) {
+ let slice = slice.as_ref();
+ match slice.get(0) {
+ None => return (None, 0),
+ Some(&b) if b <= 0x7F => return (Some(b as char), 1),
+ _ => {}
+ }
+
+ let (mut state, mut cp, mut i) = (ACCEPT, 0, 0);
+ while i < slice.len() {
+ decode_step(&mut state, &mut cp, slice[i]);
+ i += 1;
+
+ if state == ACCEPT {
+ // SAFETY: This is safe because `decode_step` guarantees that
+ // `cp` is a valid Unicode scalar value in an ACCEPT state.
+ let ch = unsafe { char::from_u32_unchecked(cp) };
+ return (Some(ch), i);
+ } else if state == REJECT {
+ // At this point, we always want to advance at least one byte.
+ return (None, cmp::max(1, i.saturating_sub(1)));
+ }
+ }
+ (None, i)
+}
+
+/// Lossily UTF-8 decode a single Unicode scalar value from the beginning of a
+/// slice.
+///
+/// When successful, the corresponding Unicode scalar value is returned along
+/// with the number of bytes it was encoded with. The number of bytes consumed
+/// for a successful decode is always between 1 and 4, inclusive.
+///
+/// When unsuccessful, the Unicode replacement codepoint (`U+FFFD`) is returned
+/// along with the number of bytes that make up a maximal prefix of a valid
+/// UTF-8 code unit sequence. In this case, the number of bytes consumed is
+/// always between 0 and 3, inclusive, where 0 is only returned when `slice` is
+/// empty.
+///
+/// # Examples
+///
+/// Basic usage:
+///
+/// ```ignore
+/// use bstr::decode_utf8_lossy;
+///
+/// // Decoding a valid codepoint.
+/// let (ch, size) = decode_utf8_lossy(b"\xE2\x98\x83");
+/// assert_eq!('☃', ch);
+/// assert_eq!(3, size);
+///
+/// // Decoding an incomplete codepoint.
+/// let (ch, size) = decode_utf8_lossy(b"\xE2\x98");
+/// assert_eq!('\u{FFFD}', ch);
+/// assert_eq!(2, size);
+/// ```
+///
+/// This example shows how to iterate over all codepoints in UTF-8 encoded
+/// bytes, while replacing invalid UTF-8 sequences with the replacement
+/// codepoint:
+///
+/// ```ignore
+/// use bstr::{B, decode_utf8_lossy};
+///
+/// let mut bytes = B(b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61");
+/// let mut chars = vec![];
+/// while !bytes.is_empty() {
+/// let (ch, size) = decode_utf8_lossy(bytes);
+/// bytes = &bytes[size..];
+/// chars.push(ch);
+/// }
+/// assert_eq!(vec!['☃', '\u{FFFD}', '𝞃', '\u{FFFD}', 'a'], chars);
+/// ```
+#[inline]
+pub fn decode_lossy<B: AsRef<[u8]>>(slice: B) -> (char, usize) {
+ match decode(slice) {
+ (Some(ch), size) => (ch, size),
+ (None, size) => ('\u{FFFD}', size),
+ }
+}
+
+/// UTF-8 decode a single Unicode scalar value from the end of a slice.
+///
+/// When successful, the corresponding Unicode scalar value is returned along
+/// with the number of bytes it was encoded with. The number of bytes consumed
+/// for a successful decode is always between 1 and 4, inclusive.
+///
+/// When unsuccessful, `None` is returned along with the number of bytes that
+/// make up a maximal prefix of a valid UTF-8 code unit sequence. In this case,
+/// the number of bytes consumed is always between 0 and 3, inclusive, where
+/// 0 is only returned when `slice` is empty.
+///
+/// # Examples
+///
+/// Basic usage:
+///
+/// ```
+/// use bstr::decode_last_utf8;
+///
+/// // Decoding a valid codepoint.
+/// let (ch, size) = decode_last_utf8(b"\xE2\x98\x83");
+/// assert_eq!(Some('☃'), ch);
+/// assert_eq!(3, size);
+///
+/// // Decoding an incomplete codepoint.
+/// let (ch, size) = decode_last_utf8(b"\xE2\x98");
+/// assert_eq!(None, ch);
+/// assert_eq!(2, size);
+/// ```
+///
+/// This example shows how to iterate over all codepoints in UTF-8 encoded
+/// bytes in reverse, while replacing invalid UTF-8 sequences with the
+/// replacement codepoint:
+///
+/// ```
+/// use bstr::{B, decode_last_utf8};
+///
+/// let mut bytes = B(b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61");
+/// let mut chars = vec![];
+/// while !bytes.is_empty() {
+/// let (ch, size) = decode_last_utf8(bytes);
+/// bytes = &bytes[..bytes.len()-size];
+/// chars.push(ch.unwrap_or('\u{FFFD}'));
+/// }
+/// assert_eq!(vec!['a', '\u{FFFD}', '𝞃', '\u{FFFD}', '☃'], chars);
+/// ```
+#[inline]
+pub fn decode_last<B: AsRef<[u8]>>(slice: B) -> (Option<char>, usize) {
+ // TODO: We could implement this by reversing the UTF-8 automaton, but for
+ // now, we do it the slow way by using the forward automaton.
+
+ let slice = slice.as_ref();
+ if slice.is_empty() {
+ return (None, 0);
+ }
+ let mut start = slice.len() - 1;
+ let limit = slice.len().saturating_sub(4);
+ while start > limit && !is_leading_or_invalid_utf8_byte(slice[start]) {
+ start -= 1;
+ }
+ let (ch, size) = decode(&slice[start..]);
+ // If we didn't consume all of the bytes, then that means there's at least
+ // one stray byte that never occurs in a valid code unit prefix, so we can
+ // advance by one byte.
+ if start + size != slice.len() {
+ (None, 1)
+ } else {
+ (ch, size)
+ }
+}
+
+/// Lossily UTF-8 decode a single Unicode scalar value from the end of a slice.
+///
+/// When successful, the corresponding Unicode scalar value is returned along
+/// with the number of bytes it was encoded with. The number of bytes consumed
+/// for a successful decode is always between 1 and 4, inclusive.
+///
+/// When unsuccessful, the Unicode replacement codepoint (`U+FFFD`) is returned
+/// along with the number of bytes that make up a maximal prefix of a valid
+/// UTF-8 code unit sequence. In this case, the number of bytes consumed is
+/// always between 0 and 3, inclusive, where 0 is only returned when `slice` is
+/// empty.
+///
+/// # Examples
+///
+/// Basic usage:
+///
+/// ```ignore
+/// use bstr::decode_last_utf8_lossy;
+///
+/// // Decoding a valid codepoint.
+/// let (ch, size) = decode_last_utf8_lossy(b"\xE2\x98\x83");
+/// assert_eq!('☃', ch);
+/// assert_eq!(3, size);
+///
+/// // Decoding an incomplete codepoint.
+/// let (ch, size) = decode_last_utf8_lossy(b"\xE2\x98");
+/// assert_eq!('\u{FFFD}', ch);
+/// assert_eq!(2, size);
+/// ```
+///
+/// This example shows how to iterate over all codepoints in UTF-8 encoded
+/// bytes in reverse, while replacing invalid UTF-8 sequences with the
+/// replacement codepoint:
+///
+/// ```ignore
+/// use bstr::decode_last_utf8_lossy;
+///
+/// let mut bytes = B(b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61");
+/// let mut chars = vec![];
+/// while !bytes.is_empty() {
+/// let (ch, size) = decode_last_utf8_lossy(bytes);
+/// bytes = &bytes[..bytes.len()-size];
+/// chars.push(ch);
+/// }
+/// assert_eq!(vec!['a', '\u{FFFD}', '𝞃', '\u{FFFD}', '☃'], chars);
+/// ```
+#[inline]
+pub fn decode_last_lossy<B: AsRef<[u8]>>(slice: B) -> (char, usize) {
+ match decode_last(slice) {
+ (Some(ch), size) => (ch, size),
+ (None, size) => ('\u{FFFD}', size),
+ }
+}
+
+/// SAFETY: The decode function relies on state being equal to ACCEPT only if
+/// cp is a valid Unicode scalar value.
+#[inline]
+pub fn decode_step(state: &mut usize, cp: &mut u32, b: u8) {
+ let class = CLASSES[b as usize];
+ if *state == ACCEPT {
+ *cp = (0xFF >> class) & (b as u32);
+ } else {
+ *cp = (b as u32 & 0b111111) | (*cp << 6);
+ }
+ *state = STATES_FORWARD[*state + class as usize] as usize;
+}
+
+/// Returns true if and only if the given byte is either a valid leading UTF-8
+/// byte, or is otherwise an invalid byte that can never appear anywhere in a
+/// valid UTF-8 sequence.
+fn is_leading_or_invalid_utf8_byte(b: u8) -> bool {
+ // In the ASCII case, the most significant bit is never set. The leading
+ // byte of a 2/3/4-byte sequence always has the top two most significant
+ // bits set. For bytes that can never appear anywhere in valid UTF-8, this
+ // also returns true, since every such byte has its two most significant
+ // bits set:
+ //
+ // \xC0 :: 11000000
+ // \xC1 :: 11000001
+ // \xF5 :: 11110101
+ // \xF6 :: 11110110
+ // \xF7 :: 11110111
+ // \xF8 :: 11111000
+ // \xF9 :: 11111001
+ // \xFA :: 11111010
+ // \xFB :: 11111011
+ // \xFC :: 11111100
+ // \xFD :: 11111101
+ // \xFE :: 11111110
+ // \xFF :: 11111111
+ (b & 0b1100_0000) != 0b1000_0000
+}
+
+#[cfg(test)]
+mod tests {
+ use std::char;
+
+ use crate::ext_slice::{ByteSlice, B};
+ use crate::tests::LOSSY_TESTS;
+ use crate::utf8::{self, Utf8Error};
+
+ fn utf8e(valid_up_to: usize) -> Utf8Error {
+ Utf8Error { valid_up_to, error_len: None }
+ }
+
+ fn utf8e2(valid_up_to: usize, error_len: usize) -> Utf8Error {
+ Utf8Error { valid_up_to, error_len: Some(error_len) }
+ }
+
+ #[test]
+ fn validate_all_codepoints() {
+ for i in 0..(0x10FFFF + 1) {
+ let cp = match char::from_u32(i) {
+ None => continue,
+ Some(cp) => cp,
+ };
+ let mut buf = [0; 4];
+ let s = cp.encode_utf8(&mut buf);
+ assert_eq!(Ok(()), utf8::validate(s.as_bytes()));
+ }
+ }
+
+ #[test]
+ fn validate_multiple_codepoints() {
+ assert_eq!(Ok(()), utf8::validate(b"abc"));
+ assert_eq!(Ok(()), utf8::validate(b"a\xE2\x98\x83a"));
+ assert_eq!(Ok(()), utf8::validate(b"a\xF0\x9D\x9C\xB7a"));
+ assert_eq!(Ok(()), utf8::validate(b"\xE2\x98\x83\xF0\x9D\x9C\xB7",));
+ assert_eq!(
+ Ok(()),
+ utf8::validate(b"a\xE2\x98\x83a\xF0\x9D\x9C\xB7a",)
+ );
+ assert_eq!(
+ Ok(()),
+ utf8::validate(b"\xEF\xBF\xBD\xE2\x98\x83\xEF\xBF\xBD",)
+ );
+ }
+
+ #[test]
+ fn validate_errors() {
+ // single invalid byte
+ assert_eq!(Err(utf8e2(0, 1)), utf8::validate(b"\xFF"));
+ // single invalid byte after ASCII
+ assert_eq!(Err(utf8e2(1, 1)), utf8::validate(b"a\xFF"));
+ // single invalid byte after 2 byte sequence
+ assert_eq!(Err(utf8e2(2, 1)), utf8::validate(b"\xCE\xB2\xFF"));
+ // single invalid byte after 3 byte sequence
+ assert_eq!(Err(utf8e2(3, 1)), utf8::validate(b"\xE2\x98\x83\xFF"));
+ // single invalid byte after 4 byte sequence
+ assert_eq!(Err(utf8e2(4, 1)), utf8::validate(b"\xF0\x9D\x9D\xB1\xFF"));
+
+ // An invalid 2-byte sequence with a valid 1-byte prefix.
+ assert_eq!(Err(utf8e2(0, 1)), utf8::validate(b"\xCE\xF0"));
+ // An invalid 3-byte sequence with a valid 2-byte prefix.
+ assert_eq!(Err(utf8e2(0, 2)), utf8::validate(b"\xE2\x98\xF0"));
+ // An invalid 4-byte sequence with a valid 3-byte prefix.
+ assert_eq!(Err(utf8e2(0, 3)), utf8::validate(b"\xF0\x9D\x9D\xF0"));
+
+ // An overlong sequence. Should be \xE2\x82\xAC, but we encode the
+ // same codepoint value in 4 bytes. This not only tests that we reject
+ // overlong sequences, but that we get valid_up_to correct.
+ assert_eq!(Err(utf8e2(0, 1)), utf8::validate(b"\xF0\x82\x82\xAC"));
+ assert_eq!(Err(utf8e2(1, 1)), utf8::validate(b"a\xF0\x82\x82\xAC"));
+ assert_eq!(
+ Err(utf8e2(3, 1)),
+ utf8::validate(b"\xE2\x98\x83\xF0\x82\x82\xAC",)
+ );
+
+ // Check that encoding a surrogate codepoint using the UTF-8 scheme
+ // fails validation.
+ assert_eq!(Err(utf8e2(0, 1)), utf8::validate(b"\xED\xA0\x80"));
+ assert_eq!(Err(utf8e2(1, 1)), utf8::validate(b"a\xED\xA0\x80"));
+ assert_eq!(
+ Err(utf8e2(3, 1)),
+ utf8::validate(b"\xE2\x98\x83\xED\xA0\x80",)
+ );
+
+ // Check that an incomplete 2-byte sequence fails.
+ assert_eq!(Err(utf8e2(0, 1)), utf8::validate(b"\xCEa"));
+ assert_eq!(Err(utf8e2(1, 1)), utf8::validate(b"a\xCEa"));
+ assert_eq!(
+ Err(utf8e2(3, 1)),
+ utf8::validate(b"\xE2\x98\x83\xCE\xE2\x98\x83",)
+ );
+ // Check that an incomplete 3-byte sequence fails.
+ assert_eq!(Err(utf8e2(0, 2)), utf8::validate(b"\xE2\x98a"));
+ assert_eq!(Err(utf8e2(1, 2)), utf8::validate(b"a\xE2\x98a"));
+ assert_eq!(
+ Err(utf8e2(3, 2)),
+ utf8::validate(b"\xE2\x98\x83\xE2\x98\xE2\x98\x83",)
+ );
+ // Check that an incomplete 4-byte sequence fails.
+ assert_eq!(Err(utf8e2(0, 3)), utf8::validate(b"\xF0\x9D\x9Ca"));
+ assert_eq!(Err(utf8e2(1, 3)), utf8::validate(b"a\xF0\x9D\x9Ca"));
+ assert_eq!(
+ Err(utf8e2(4, 3)),
+ utf8::validate(b"\xF0\x9D\x9C\xB1\xF0\x9D\x9C\xE2\x98\x83",)
+ );
+ assert_eq!(
+ Err(utf8e2(6, 3)),
+ utf8::validate(b"foobar\xF1\x80\x80quux",)
+ );
+
+ // Check that an incomplete (EOF) 2-byte sequence fails.
+ assert_eq!(Err(utf8e(0)), utf8::validate(b"\xCE"));
+ assert_eq!(Err(utf8e(1)), utf8::validate(b"a\xCE"));
+ assert_eq!(Err(utf8e(3)), utf8::validate(b"\xE2\x98\x83\xCE"));
+ // Check that an incomplete (EOF) 3-byte sequence fails.
+ assert_eq!(Err(utf8e(0)), utf8::validate(b"\xE2\x98"));
+ assert_eq!(Err(utf8e(1)), utf8::validate(b"a\xE2\x98"));
+ assert_eq!(Err(utf8e(3)), utf8::validate(b"\xE2\x98\x83\xE2\x98"));
+ // Check that an incomplete (EOF) 4-byte sequence fails.
+ assert_eq!(Err(utf8e(0)), utf8::validate(b"\xF0\x9D\x9C"));
+ assert_eq!(Err(utf8e(1)), utf8::validate(b"a\xF0\x9D\x9C"));
+ assert_eq!(
+ Err(utf8e(4)),
+ utf8::validate(b"\xF0\x9D\x9C\xB1\xF0\x9D\x9C",)
+ );
+
+ // Test that we errors correct even after long valid sequences. This
+ // checks that our "backup" logic for detecting errors is correct.
+ assert_eq!(
+ Err(utf8e2(8, 1)),
+ utf8::validate(b"\xe2\x98\x83\xce\xb2\xe3\x83\x84\xFF",)
+ );
+ }
+
+ #[test]
+ fn decode_valid() {
+ fn d(mut s: &str) -> Vec<char> {
+ let mut chars = vec![];
+ while !s.is_empty() {
+ let (ch, size) = utf8::decode(s.as_bytes());
+ s = &s[size..];
+ chars.push(ch.unwrap());
+ }
+ chars
+ }
+
+ assert_eq!(vec!['☃'], d("☃"));
+ assert_eq!(vec!['☃', '☃'], d("☃☃"));
+ assert_eq!(vec!['α', 'β', 'γ', 'δ', 'ε'], d("αβγδε"));
+ assert_eq!(vec!['☃', '⛄', '⛇'], d("☃⛄⛇"));
+ assert_eq!(vec!['𝗮', '𝗯', '𝗰', '𝗱', '𝗲'], d("𝗮𝗯𝗰𝗱𝗲"));
+ }
+
+ #[test]
+ fn decode_invalid() {
+ let (ch, size) = utf8::decode(b"");
+ assert_eq!(None, ch);
+ assert_eq!(0, size);
+
+ let (ch, size) = utf8::decode(b"\xFF");
+ assert_eq!(None, ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode(b"\xCE\xF0");
+ assert_eq!(None, ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode(b"\xE2\x98\xF0");
+ assert_eq!(None, ch);
+ assert_eq!(2, size);
+
+ let (ch, size) = utf8::decode(b"\xF0\x9D\x9D");
+ assert_eq!(None, ch);
+ assert_eq!(3, size);
+
+ let (ch, size) = utf8::decode(b"\xF0\x9D\x9D\xF0");
+ assert_eq!(None, ch);
+ assert_eq!(3, size);
+
+ let (ch, size) = utf8::decode(b"\xF0\x82\x82\xAC");
+ assert_eq!(None, ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode(b"\xED\xA0\x80");
+ assert_eq!(None, ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode(b"\xCEa");
+ assert_eq!(None, ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode(b"\xE2\x98a");
+ assert_eq!(None, ch);
+ assert_eq!(2, size);
+
+ let (ch, size) = utf8::decode(b"\xF0\x9D\x9Ca");
+ assert_eq!(None, ch);
+ assert_eq!(3, size);
+ }
+
+ #[test]
+ fn decode_lossy() {
+ let (ch, size) = utf8::decode_lossy(b"");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(0, size);
+
+ let (ch, size) = utf8::decode_lossy(b"\xFF");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_lossy(b"\xCE\xF0");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_lossy(b"\xE2\x98\xF0");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(2, size);
+
+ let (ch, size) = utf8::decode_lossy(b"\xF0\x9D\x9D\xF0");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(3, size);
+
+ let (ch, size) = utf8::decode_lossy(b"\xF0\x82\x82\xAC");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_lossy(b"\xED\xA0\x80");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_lossy(b"\xCEa");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_lossy(b"\xE2\x98a");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(2, size);
+
+ let (ch, size) = utf8::decode_lossy(b"\xF0\x9D\x9Ca");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(3, size);
+ }
+
+ #[test]
+ fn decode_last_valid() {
+ fn d(mut s: &str) -> Vec<char> {
+ let mut chars = vec![];
+ while !s.is_empty() {
+ let (ch, size) = utf8::decode_last(s.as_bytes());
+ s = &s[..s.len() - size];
+ chars.push(ch.unwrap());
+ }
+ chars
+ }
+
+ assert_eq!(vec!['☃'], d("☃"));
+ assert_eq!(vec!['☃', '☃'], d("☃☃"));
+ assert_eq!(vec!['ε', 'δ', 'γ', 'β', 'α'], d("αβγδε"));
+ assert_eq!(vec!['⛇', '⛄', '☃'], d("☃⛄⛇"));
+ assert_eq!(vec!['𝗲', '𝗱', '𝗰', '𝗯', '𝗮'], d("𝗮𝗯𝗰𝗱𝗲"));
+ }
+
+ #[test]
+ fn decode_last_invalid() {
+ let (ch, size) = utf8::decode_last(b"");
+ assert_eq!(None, ch);
+ assert_eq!(0, size);
+
+ let (ch, size) = utf8::decode_last(b"\xFF");
+ assert_eq!(None, ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last(b"\xCE\xF0");
+ assert_eq!(None, ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last(b"\xCE");
+ assert_eq!(None, ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last(b"\xE2\x98\xF0");
+ assert_eq!(None, ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last(b"\xE2\x98");
+ assert_eq!(None, ch);
+ assert_eq!(2, size);
+
+ let (ch, size) = utf8::decode_last(b"\xF0\x9D\x9D\xF0");
+ assert_eq!(None, ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last(b"\xF0\x9D\x9D");
+ assert_eq!(None, ch);
+ assert_eq!(3, size);
+
+ let (ch, size) = utf8::decode_last(b"\xF0\x82\x82\xAC");
+ assert_eq!(None, ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last(b"\xED\xA0\x80");
+ assert_eq!(None, ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last(b"\xED\xA0");
+ assert_eq!(None, ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last(b"\xED");
+ assert_eq!(None, ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last(b"a\xCE");
+ assert_eq!(None, ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last(b"a\xE2\x98");
+ assert_eq!(None, ch);
+ assert_eq!(2, size);
+
+ let (ch, size) = utf8::decode_last(b"a\xF0\x9D\x9C");
+ assert_eq!(None, ch);
+ assert_eq!(3, size);
+ }
+
+ #[test]
+ fn decode_last_lossy() {
+ let (ch, size) = utf8::decode_last_lossy(b"");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(0, size);
+
+ let (ch, size) = utf8::decode_last_lossy(b"\xFF");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last_lossy(b"\xCE\xF0");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last_lossy(b"\xCE");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last_lossy(b"\xE2\x98\xF0");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last_lossy(b"\xE2\x98");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(2, size);
+
+ let (ch, size) = utf8::decode_last_lossy(b"\xF0\x9D\x9D\xF0");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last_lossy(b"\xF0\x9D\x9D");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(3, size);
+
+ let (ch, size) = utf8::decode_last_lossy(b"\xF0\x82\x82\xAC");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last_lossy(b"\xED\xA0\x80");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last_lossy(b"\xED\xA0");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last_lossy(b"\xED");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last_lossy(b"a\xCE");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last_lossy(b"a\xE2\x98");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(2, size);
+
+ let (ch, size) = utf8::decode_last_lossy(b"a\xF0\x9D\x9C");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(3, size);
+ }
+
+ #[test]
+ fn chars() {
+ for (i, &(expected, input)) in LOSSY_TESTS.iter().enumerate() {
+ let got: String = B(input).chars().collect();
+ assert_eq!(
+ expected, got,
+ "chars(ith: {:?}, given: {:?})",
+ i, input,
+ );
+ let got: String =
+ B(input).char_indices().map(|(_, _, ch)| ch).collect();
+ assert_eq!(
+ expected, got,
+ "char_indices(ith: {:?}, given: {:?})",
+ i, input,
+ );
+
+ let expected: String = expected.chars().rev().collect();
+
+ let got: String = B(input).chars().rev().collect();
+ assert_eq!(
+ expected, got,
+ "chars.rev(ith: {:?}, given: {:?})",
+ i, input,
+ );
+ let got: String =
+ B(input).char_indices().rev().map(|(_, _, ch)| ch).collect();
+ assert_eq!(
+ expected, got,
+ "char_indices.rev(ith: {:?}, given: {:?})",
+ i, input,
+ );
+ }
+ }
+
+ #[test]
+ fn utf8_chunks() {
+ let mut c = utf8::Utf8Chunks { bytes: b"123\xC0" };
+ assert_eq!(
+ (c.next(), c.next()),
+ (
+ Some(utf8::Utf8Chunk {
+ valid: "123",
+ invalid: b"\xC0".as_bstr(),
+ incomplete: false,
+ }),
+ None,
+ )
+ );
+
+ let mut c = utf8::Utf8Chunks { bytes: b"123\xFF\xFF" };
+ assert_eq!(
+ (c.next(), c.next(), c.next()),
+ (
+ Some(utf8::Utf8Chunk {
+ valid: "123",
+ invalid: b"\xFF".as_bstr(),
+ incomplete: false,
+ }),
+ Some(utf8::Utf8Chunk {
+ valid: "",
+ invalid: b"\xFF".as_bstr(),
+ incomplete: false,
+ }),
+ None,
+ )
+ );
+
+ let mut c = utf8::Utf8Chunks { bytes: b"123\xD0" };
+ assert_eq!(
+ (c.next(), c.next()),
+ (
+ Some(utf8::Utf8Chunk {
+ valid: "123",
+ invalid: b"\xD0".as_bstr(),
+ incomplete: true,
+ }),
+ None,
+ )
+ );
+
+ let mut c = utf8::Utf8Chunks { bytes: b"123\xD0456" };
+ assert_eq!(
+ (c.next(), c.next(), c.next()),
+ (
+ Some(utf8::Utf8Chunk {
+ valid: "123",
+ invalid: b"\xD0".as_bstr(),
+ incomplete: false,
+ }),
+ Some(utf8::Utf8Chunk {
+ valid: "456",
+ invalid: b"".as_bstr(),
+ incomplete: false,
+ }),
+ None,
+ )
+ );
+
+ let mut c = utf8::Utf8Chunks { bytes: b"123\xE2\x98" };
+ assert_eq!(
+ (c.next(), c.next()),
+ (
+ Some(utf8::Utf8Chunk {
+ valid: "123",
+ invalid: b"\xE2\x98".as_bstr(),
+ incomplete: true,
+ }),
+ None,
+ )
+ );
+
+ let mut c = utf8::Utf8Chunks { bytes: b"123\xF4\x8F\xBF" };
+ assert_eq!(
+ (c.next(), c.next()),
+ (
+ Some(utf8::Utf8Chunk {
+ valid: "123",
+ invalid: b"\xF4\x8F\xBF".as_bstr(),
+ incomplete: true,
+ }),
+ None,
+ )
+ );
+ }
+}