summaryrefslogtreecommitdiffstats
path: root/vendor/ucd-parse
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-30 03:57:31 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-30 03:57:31 +0000
commitdc0db358abe19481e475e10c32149b53370f1a1c (patch)
treeab8ce99c4b255ce46f99ef402c27916055b899ee /vendor/ucd-parse
parentReleasing progress-linux version 1.71.1+dfsg1-2~progress7.99u1. (diff)
downloadrustc-dc0db358abe19481e475e10c32149b53370f1a1c.tar.xz
rustc-dc0db358abe19481e475e10c32149b53370f1a1c.zip
Merging upstream version 1.72.1+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/ucd-parse')
-rw-r--r--vendor/ucd-parse/.cargo-checksum.json2
-rw-r--r--vendor/ucd-parse/Cargo.toml32
-rw-r--r--vendor/ucd-parse/README.md8
-rw-r--r--vendor/ucd-parse/src/arabic_shaping.rs12
-rw-r--r--vendor/ucd-parse/src/bidi_mirroring_glyph.rs12
-rw-r--r--vendor/ucd-parse/src/case_folding.rs12
-rw-r--r--vendor/ucd-parse/src/common.rs54
-rw-r--r--vendor/ucd-parse/src/east_asian_width.rs63
-rw-r--r--vendor/ucd-parse/src/extracted/derived_bidi_class.rs61
-rw-r--r--vendor/ucd-parse/src/extracted/derived_binary_properties.rs66
-rw-r--r--vendor/ucd-parse/src/extracted/derived_combining_class.rs65
-rw-r--r--vendor/ucd-parse/src/extracted/derived_decomposition_type.rs66
-rw-r--r--vendor/ucd-parse/src/extracted/derived_east_asian_width.rs66
-rw-r--r--vendor/ucd-parse/src/extracted/derived_general_category.rs65
-rw-r--r--vendor/ucd-parse/src/extracted/derived_joining_group.rs64
-rw-r--r--vendor/ucd-parse/src/extracted/derived_joining_type.rs64
-rw-r--r--vendor/ucd-parse/src/extracted/derived_line_break.rs61
-rw-r--r--vendor/ucd-parse/src/extracted/derived_name.rs61
-rw-r--r--vendor/ucd-parse/src/extracted/derived_numeric_type.rs65
-rw-r--r--vendor/ucd-parse/src/extracted/derived_numeric_values.rs92
-rw-r--r--vendor/ucd-parse/src/extracted/mod.rs30
-rw-r--r--vendor/ucd-parse/src/jamo_short_name.rs12
-rw-r--r--vendor/ucd-parse/src/lib.rs4
-rw-r--r--vendor/ucd-parse/src/name_aliases.rs12
-rw-r--r--vendor/ucd-parse/src/property_aliases.rs17
-rw-r--r--vendor/ucd-parse/src/property_value_aliases.rs25
-rw-r--r--vendor/ucd-parse/src/special_casing.rs12
-rw-r--r--vendor/ucd-parse/src/unicode_data.rs33
28 files changed, 1023 insertions, 113 deletions
diff --git a/vendor/ucd-parse/.cargo-checksum.json b/vendor/ucd-parse/.cargo-checksum.json
index 34cd1d5b5..bd0940ddd 100644
--- a/vendor/ucd-parse/.cargo-checksum.json
+++ b/vendor/ucd-parse/.cargo-checksum.json
@@ -1 +1 @@
-{"files":{"Cargo.toml":"3a23e75f3807a38f86e8564a139135970f38c9ebc448749682b75fd4096f6d4a","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"0f96a83840e146e43c0ec96a22ec1f392e0680e6c1226e6f3ba87e0740af850f","README.md":"5af803e482641f01332bde35cc8137211714b6f100122ec548c9712a09aead55","src/age.rs":"13a9a01b2373e9eff06b547543479394843cb9103c200b3e666ca5e408369bc9","src/arabic_shaping.rs":"31075e05b33248540f10ae5a3bb14715965e109b2be40cd9c0735810903ce29b","src/bidi_mirroring_glyph.rs":"945a110e0f54eabc2f48719893da300c11b4fd1f28265ab8f7b32ce2e5e3f6e5","src/case_folding.rs":"1ec85e0fa8e8cb0315974b843d168d9cddecad40efcf8ce78de677c0f0417f34","src/common.rs":"40782238affb569c9bd89a7ce19202677ba3e1da0bb5c8f8c4439adaa375858b","src/core_properties.rs":"24b261ed0bc4b7443734d843cda58433c7727914524ac4c3cc46fc153463e8cd","src/emoji_properties.rs":"bdb24a301661592d0956db2ad945a86778e0ad8f86cd82077835bb0d2a4f144c","src/error.rs":"6df32d4c5cc9819832083f465aa4ce11d26d3b44e37a9d4274a45fd8e1314903","src/grapheme_cluster_break.rs":"f63f75f1a5a82b698d4a840b063bc650f2b2f64429830dc338c9723bf1368e0b","src/jamo_short_name.rs":"02dc272c1a7d01de5e22737a76327b94ae2d132703dbc0657e3e887ceb1d1d91","src/lib.rs":"894ecd08e4588e14de69e8b9d25e9a38e9e2f73e260855c99df13c2ee1d825d3","src/line_break.rs":"1def7f73d44c1703fd18dbd9c9fc8dd76edabed27a5061564d6521d59335a95c","src/name_aliases.rs":"497629a0499d048b0f1615c619975f149c6a1d97361b7ff16850a8291796c90d","src/prop_list.rs":"856f00f51e7e6b9b0386a9b3941582eba63eb96896c86e58a791384a1235fdec","src/property_aliases.rs":"7b6da97e45a898499f29e30346f1b7aa6b7d758184a3bfa4f0b816d20edc9851","src/property_value_aliases.rs":"4e9fbad2b32ad636e5f8dfefa082352e444e4a68822a7786ea7d4217e7afd2fb","src/script_extensions.rs":"d967e213122702df642c975765fec28811ae8351f6f5307ca67989bf0b456fba","src/scripts.rs":"04740c080bb48e99d84622e4708215b40abdd387c70347d6b264b9c7fcbbac37","src/sentence_break.rs":"ac54a7f09f75694582904509d979c61784fa1ec647e4d531ea1b283bc3082635","src/special_casing.rs":"de7ed50ec34a222c73e8ad6d82a2a658b4475ce312301c5110d07fa13e51cb0b","src/unicode_data.rs":"cad99e17c6d56c9029416a0f3ec1b469786864eace2a20f212f2b4a1c96b59f1","src/word_break.rs":"eea514f238dc9dea82f52efc3154fde3f215b068dd201b22c31ef1c0acf1fba3"},"package":"5269f8d35df6b8b60758343a6d742ecf09e4bca13faee32af5503aebd1e11b7c"} \ No newline at end of file
+{"files":{"Cargo.toml":"8cb870371f3e11a5de703ff9282be1e83e20705bfa819c44bbaee61aca4e8758","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"0f96a83840e146e43c0ec96a22ec1f392e0680e6c1226e6f3ba87e0740af850f","README.md":"8c52a24f51bf5374f687c5a7be756b40a5433e5afe20355f08f50cfaa46416ab","src/age.rs":"13a9a01b2373e9eff06b547543479394843cb9103c200b3e666ca5e408369bc9","src/arabic_shaping.rs":"a9ed7eb9d56aa80c960010976be0b817a1242a41288d3badb4234ccd34fd9c2b","src/bidi_mirroring_glyph.rs":"890ad46a8009fa00b1f5bf1664bd85606a9ae3992c5e613aa5daebdc83b65dc4","src/case_folding.rs":"ecbec1c8943abdc432c542adeded1c3027ba1c0748019fab8cf67e722e1ff24d","src/common.rs":"1e742dd23ac63348743dced41f6f97db0d87cd2374ca622bd254f68526f25e04","src/core_properties.rs":"24b261ed0bc4b7443734d843cda58433c7727914524ac4c3cc46fc153463e8cd","src/east_asian_width.rs":"343e0106e3c620c2ee05fa5d51a744aa17a2e62d8025a6a91b6b5d9886e60f57","src/emoji_properties.rs":"bdb24a301661592d0956db2ad945a86778e0ad8f86cd82077835bb0d2a4f144c","src/error.rs":"6df32d4c5cc9819832083f465aa4ce11d26d3b44e37a9d4274a45fd8e1314903","src/extracted/derived_bidi_class.rs":"e1f908966800c2fd56aae109c60a99d4638402a05df810351063abab71bb16cd","src/extracted/derived_binary_properties.rs":"f23fd8a8959c9d07a482c6f34a3139400b8b4853c15abb3d9eb165a7d5fd269a","src/extracted/derived_combining_class.rs":"f0ea9f1422d3228d49219d52d678e7001fd018fbd569baad62b4d285bcb13632","src/extracted/derived_decomposition_type.rs":"5223d2fa6ce152c2edd043789de870e7433796e05948fdcafd55483228f8d3b8","src/extracted/derived_east_asian_width.rs":"ab9bce7f326ca7643c3b4257231863558f7868d00825f5290523e0913519810c","src/extracted/derived_general_category.rs":"e435252e67559538f08f312274873dabd3976e06d08c3e71919b20e0a19e344a","src/extracted/derived_joining_group.rs":"f2f79069dfbfba3b646e37995a56731e17bcf7dfc175c4109c37d1e15537adc2","src/extracted/derived_joining_type.rs":"e057dfa2b509826dd878b90d65832fd7e456018aa1af4e0d771f3a8dd67d94df","src/extracted/derived_line_break.rs":"0acb821357ab3d89545eb12fb7b07529fc5e9aa814381bd0e4231a0ce5841669","src/extracted/derived_name.rs":"04f658c2e425692ac0469b886a1edfdfc78d44ace844a7edec69bdacef18d5f1","src/extracted/derived_numeric_type.rs":"75b01173ab21370882cc5e94cf56f2c523baa40f6f67d562aaf2ca24533992fe","src/extracted/derived_numeric_values.rs":"9344564aeb7c6d9323032b2cbd4027c4bed60e817fec040e3484b8a5bd2f0f53","src/extracted/mod.rs":"f1792fd03413c2ce65ca281eaf989382770d1a6a06998d99a95e6b5bfd8f6e46","src/grapheme_cluster_break.rs":"f63f75f1a5a82b698d4a840b063bc650f2b2f64429830dc338c9723bf1368e0b","src/jamo_short_name.rs":"026cb03606c2e5bb62e5f4d58268227d3bea0e62b1797e8e06ca0dbe0e4077c7","src/lib.rs":"47d1caedeb193d532def34453c51227c2be9861732ae2780900463e97c3803f5","src/line_break.rs":"1def7f73d44c1703fd18dbd9c9fc8dd76edabed27a5061564d6521d59335a95c","src/name_aliases.rs":"db32345e01babc0eb29b9deb937ddc82add834dde0d15b69f112070ecf292874","src/prop_list.rs":"856f00f51e7e6b9b0386a9b3941582eba63eb96896c86e58a791384a1235fdec","src/property_aliases.rs":"b05e2e5399d079e283d351d383ef13a289af1ed2c7632c1e860cf8cb29090b70","src/property_value_aliases.rs":"1ec2dc8d6b2073f8cfff985d6069565b8a1408871f8a00877591a6ac7ccf4f36","src/script_extensions.rs":"d967e213122702df642c975765fec28811ae8351f6f5307ca67989bf0b456fba","src/scripts.rs":"04740c080bb48e99d84622e4708215b40abdd387c70347d6b264b9c7fcbbac37","src/sentence_break.rs":"ac54a7f09f75694582904509d979c61784fa1ec647e4d531ea1b283bc3082635","src/special_casing.rs":"b3387ca69e67542f44512480639dc80d11c721b9afafe11132708af062f95f7a","src/unicode_data.rs":"47bb0f3e824cb12836c190ad46841d212f6809e7568b867b0bef71e1ba327ecb","src/word_break.rs":"eea514f238dc9dea82f52efc3154fde3f215b068dd201b22c31ef1c0acf1fba3"},"package":"fc2d0556a998f4c55500ce1730901ba32bafbe820068cbdc091421525d61253b"} \ No newline at end of file
diff --git a/vendor/ucd-parse/Cargo.toml b/vendor/ucd-parse/Cargo.toml
index f7efa0970..5b2520fcc 100644
--- a/vendor/ucd-parse/Cargo.toml
+++ b/vendor/ucd-parse/Cargo.toml
@@ -3,29 +3,39 @@
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
-# to registry (e.g., crates.io) dependencies
+# to registry (e.g., crates.io) dependencies.
#
-# If you believe there's an error in this file please file an
-# issue against the rust-lang/cargo repository. If you're
-# editing this file be aware that the upstream Cargo.toml
-# will likely look very different (and much more reasonable)
+# If you are reading this file be aware that the original Cargo.toml
+# will likely look very different (and much more reasonable).
+# See Cargo.toml.orig for the original contents.
[package]
edition = "2018"
name = "ucd-parse"
-version = "0.1.8"
+version = "0.1.10"
authors = ["Andrew Gallant <jamslam@gmail.com>"]
-description = "A library for parsing data files in the Unicode character database.\n"
+description = """
+A library for parsing data files in the Unicode character database.
+"""
homepage = "https://github.com/BurntSushi/ucd-generate"
documentation = "https://docs.rs/ucd-parse"
readme = "README.md"
-keywords = ["unicode", "database", "character", "property"]
-license = "MIT/Apache-2.0"
+keywords = [
+ "unicode",
+ "database",
+ "character",
+ "property",
+]
+license = "MIT OR Apache-2.0"
repository = "https://github.com/BurntSushi/ucd-generate"
-[dependencies.lazy_static]
+
+[dependencies.once_cell]
version = "1"
[dependencies.regex]
version = "1"
-features = ["std", "unicode"]
+features = [
+ "std",
+ "unicode",
+]
default-features = false
diff --git a/vendor/ucd-parse/README.md b/vendor/ucd-parse/README.md
index dc3f78dc6..0ce9cfef3 100644
--- a/vendor/ucd-parse/README.md
+++ b/vendor/ucd-parse/README.md
@@ -3,8 +3,8 @@ ucd-parse
A library for parsing Unicode Character Database (UCD) files into structured
data.
-[![Linux build status](https://api.travis-ci.org/BurntSushi/ucd-generate.png)](https://travis-ci.org/BurntSushi/ucd-generate)
-[![](http://meritbadge.herokuapp.com/ucd-generate)](https://crates.io/crates/ucd-parse)
+[![Build status](https://github.com/BurntSushi/ucd-generate/workflows/ci/badge.svg)](https://github.com/BurntSushi/ucd-generate/actions)
+[![crates.io](https://img.shields.io/crates/v/ucd-parse.svg)](https://crates.io/crates/ucd-parse)
### Documentation
@@ -16,7 +16,7 @@ https://docs.rs/ucd-parse
This project is licensed under either of
* Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
- http://www.apache.org/licenses/LICENSE-2.0)
+ https://www.apache.org/licenses/LICENSE-2.0)
* MIT license ([LICENSE-MIT](LICENSE-MIT) or
- http://opensource.org/licenses/MIT)
+ https://opensource.org/licenses/MIT)
at your option.
diff --git a/vendor/ucd-parse/src/arabic_shaping.rs b/vendor/ucd-parse/src/arabic_shaping.rs
index d1d942a82..1885c022e 100644
--- a/vendor/ucd-parse/src/arabic_shaping.rs
+++ b/vendor/ucd-parse/src/arabic_shaping.rs
@@ -1,7 +1,7 @@
use std::path::Path;
use std::str::FromStr;
-use lazy_static::lazy_static;
+use once_cell::sync::Lazy;
use regex::Regex;
use crate::common::{Codepoint, CodepointIter, UcdFile, UcdFileByCodepoint};
@@ -93,8 +93,8 @@ impl FromStr for ArabicShaping {
type Err = Error;
fn from_str(line: &str) -> Result<ArabicShaping, Error> {
- lazy_static! {
- static ref PARTS: Regex = Regex::new(
+ static PARTS: Lazy<Regex> = Lazy::new(|| {
+ Regex::new(
r"(?x)
^
\s*(?P<codepoint>[A-F0-9]+)\s*;
@@ -102,10 +102,10 @@ impl FromStr for ArabicShaping {
\s*(?P<joining_type>[^;]+)\s*;
\s*(?P<joining_group>[^;]+)
$
- "
+ ",
)
- .unwrap();
- };
+ .unwrap()
+ });
let caps = match PARTS.captures(line.trim()) {
Some(caps) => caps,
None => return err!("invalid ArabicShaping line"),
diff --git a/vendor/ucd-parse/src/bidi_mirroring_glyph.rs b/vendor/ucd-parse/src/bidi_mirroring_glyph.rs
index fcfefffcb..78ad706df 100644
--- a/vendor/ucd-parse/src/bidi_mirroring_glyph.rs
+++ b/vendor/ucd-parse/src/bidi_mirroring_glyph.rs
@@ -2,7 +2,7 @@ use std::fmt;
use std::path::Path;
use std::str::FromStr;
-use lazy_static::lazy_static;
+use once_cell::sync::Lazy;
use regex::Regex;
use crate::common::{Codepoint, CodepointIter, UcdFile, UcdFileByCodepoint};
@@ -36,8 +36,8 @@ impl FromStr for BidiMirroring {
type Err = Error;
fn from_str(line: &str) -> Result<BidiMirroring, Error> {
- lazy_static! {
- static ref PARTS: Regex = Regex::new(
+ static PARTS: Lazy<Regex> = Lazy::new(|| {
+ Regex::new(
r"(?x)
^
\s*(?P<codepoint>[A-F0-9]+)\s*;
@@ -45,10 +45,10 @@ impl FromStr for BidiMirroring {
\s+
\#(?:.+)
$
- "
+ ",
)
- .unwrap();
- };
+ .unwrap()
+ });
let caps = match PARTS.captures(line.trim()) {
Some(caps) => caps,
None => return err!("invalid BidiMirroring line"),
diff --git a/vendor/ucd-parse/src/case_folding.rs b/vendor/ucd-parse/src/case_folding.rs
index 813fc81a1..fab72e320 100644
--- a/vendor/ucd-parse/src/case_folding.rs
+++ b/vendor/ucd-parse/src/case_folding.rs
@@ -1,7 +1,7 @@
use std::path::Path;
use std::str::FromStr;
-use lazy_static::lazy_static;
+use once_cell::sync::Lazy;
use regex::Regex;
use crate::common::{Codepoint, CodepointIter, UcdFile, UcdFileByCodepoint};
@@ -42,17 +42,17 @@ impl FromStr for CaseFold {
type Err = Error;
fn from_str(line: &str) -> Result<CaseFold, Error> {
- lazy_static! {
- static ref PARTS: Regex = Regex::new(
+ static PARTS: Lazy<Regex> = Lazy::new(|| {
+ Regex::new(
r"(?x)
^
\s*(?P<codepoint>[^\s;]+)\s*;
\s*(?P<status>[^\s;]+)\s*;
\s*(?P<mapping>[^;]+)\s*;
- "
+ ",
)
- .unwrap();
- };
+ .unwrap()
+ });
let caps = match PARTS.captures(line.trim()) {
Some(caps) => caps,
diff --git a/vendor/ucd-parse/src/common.rs b/vendor/ucd-parse/src/common.rs
index c18be668e..de38c34c4 100644
--- a/vendor/ucd-parse/src/common.rs
+++ b/vendor/ucd-parse/src/common.rs
@@ -7,7 +7,7 @@ use std::marker::PhantomData;
use std::path::{Path, PathBuf};
use std::str::FromStr;
-use lazy_static::lazy_static;
+use once_cell::sync::Lazy;
use regex::Regex;
use crate::error::{Error, ErrorKind};
@@ -85,10 +85,9 @@ pub fn ucd_directory_version<D: ?Sized + AsRef<Path>>(
fn ucd_directory_version_inner(
ucd_dir: &Path,
) -> Result<(u64, u64, u64), Error> {
- lazy_static::lazy_static! {
- static ref VERSION_RX: Regex =
- Regex::new(r"-([0-9]+).([0-9]+).([0-9]+).txt").unwrap();
- }
+ static VERSION_RX: Lazy<Regex> = Lazy::new(|| {
+ Regex::new(r"-([0-9]+).([0-9]+).([0-9]+).txt").unwrap()
+ });
let proplist = ucd_dir.join("PropList.txt");
let contents = first_line(&proplist)?;
@@ -140,16 +139,16 @@ fn first_line(path: &Path) -> Result<String, Error> {
pub fn parse_codepoint_association<'a>(
line: &'a str,
) -> Result<(Codepoints, &'a str), Error> {
- lazy_static! {
- static ref PARTS: Regex = Regex::new(
+ static PARTS: Lazy<Regex> = Lazy::new(|| {
+ Regex::new(
r"(?x)
^
\s*(?P<codepoints>[^\s;]+)\s*;
\s*(?P<property>[^;\x23]+)\s*
- "
+ ",
)
- .unwrap();
- };
+ .unwrap()
+ });
let caps = match PARTS.captures(line.trim()) {
Some(caps) => caps,
@@ -184,8 +183,8 @@ pub fn parse_codepoint_sequence(s: &str) -> Result<Vec<Codepoint>, Error> {
/// with the comment associated with the test. The comment is a human readable
/// description of the test that may prove useful for debugging.
pub fn parse_break_test(line: &str) -> Result<(Vec<String>, String), Error> {
- lazy_static! {
- static ref PARTS: Regex = Regex::new(
+ static PARTS: Lazy<Regex> = Lazy::new(|| {
+ Regex::new(
r"(?x)
^
(?:÷|×)
@@ -193,16 +192,18 @@ pub fn parse_break_test(line: &str) -> Result<(Vec<String>, String), Error> {
\s+
\#(?P<comment>.+)
$
- "
+ ",
)
- .unwrap();
- static ref GROUP: Regex = Regex::new(
+ .unwrap()
+ });
+ static GROUP: Lazy<Regex> = Lazy::new(|| {
+ Regex::new(
r"(?x)
(?P<codepoint>[0-9A-Fa-f]{4,5})\s(?P<kind>÷|×)
- "
+ ",
)
- .unwrap();
- }
+ .unwrap()
+ });
let caps = match PARTS.captures(line.trim()) {
Some(caps) => caps,
@@ -435,7 +436,9 @@ impl PartialEq<(Codepoint, Codepoint)> for Codepoints {
/// A range of Unicode codepoints. The range is inclusive; both ends of the
/// range are guaranteed to be valid codepoints.
-#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq, PartialOrd, Ord)]
+#[derive(
+ Clone, Copy, Debug, Default, Eq, Hash, PartialEq, PartialOrd, Ord,
+)]
pub struct CodepointRange {
/// The start of the codepoint range.
pub start: Codepoint,
@@ -456,11 +459,10 @@ impl FromStr for CodepointRange {
type Err = Error;
fn from_str(s: &str) -> Result<CodepointRange, Error> {
- lazy_static! {
- static ref PARTS: Regex =
- Regex::new(r"^(?P<start>[A-Z0-9]+)\.\.(?P<end>[A-Z0-9]+)$")
- .unwrap();
- }
+ static PARTS: Lazy<Regex> = Lazy::new(|| {
+ Regex::new(r"^(?P<start>[A-Z0-9]+)\.\.(?P<end>[A-Z0-9]+)$")
+ .unwrap()
+ });
let caps = match PARTS.captures(s) {
Some(caps) => caps,
None => return err!("invalid codepoint range: '{}'", s),
@@ -499,7 +501,9 @@ impl PartialEq<(Codepoint, Codepoint)> for CodepointRange {
/// to be in the range `[0, 10FFFF]`.
///
/// Note that unlike Rust's `char` type, this may be a surrogate codepoint.
-#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq, PartialOrd, Ord)]
+#[derive(
+ Clone, Copy, Debug, Default, Eq, Hash, PartialEq, PartialOrd, Ord,
+)]
pub struct Codepoint(u32);
impl Codepoint {
diff --git a/vendor/ucd-parse/src/east_asian_width.rs b/vendor/ucd-parse/src/east_asian_width.rs
new file mode 100644
index 000000000..c146e09b9
--- /dev/null
+++ b/vendor/ucd-parse/src/east_asian_width.rs
@@ -0,0 +1,63 @@
+use std::path::Path;
+use std::str::FromStr;
+
+use crate::common::{
+ parse_codepoint_association, CodepointIter, Codepoints, UcdFile,
+ UcdFileByCodepoint,
+};
+use crate::error::Error;
+
+/// A single row in the `EastAsianWidth.txt` file, describing the value of the
+/// `East_Asian_Width` property.
+///
+/// Note: All code points, assigned or unassigned, that are not listed in
+/// EastAsianWidth.txt file are given the value "N".
+#[derive(Clone, Debug, Default, Eq, PartialEq)]
+pub struct EastAsianWidth {
+ /// The codepoint or codepoint range for this entry.
+ pub codepoints: Codepoints,
+ /// One of "A", "F", "H", "N", "Na", "W".
+ pub width: String,
+}
+
+impl UcdFile for EastAsianWidth {
+ fn relative_file_path() -> &'static Path {
+ Path::new("EastAsianWidth.txt")
+ }
+}
+
+impl UcdFileByCodepoint for EastAsianWidth {
+ fn codepoints(&self) -> CodepointIter {
+ self.codepoints.into_iter()
+ }
+}
+
+impl FromStr for EastAsianWidth {
+ type Err = Error;
+
+ fn from_str(line: &str) -> Result<EastAsianWidth, Error> {
+ let (codepoints, width) = parse_codepoint_association(line)?;
+ Ok(EastAsianWidth { codepoints, width: width.to_string() })
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::EastAsianWidth;
+
+ #[test]
+ fn parse_single() {
+ let line = "27E7;Na # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET\n";
+ let row: EastAsianWidth = line.parse().unwrap();
+ assert_eq!(row.codepoints, 0x27E7);
+ assert_eq!(row.width, "Na");
+ }
+
+ #[test]
+ fn parse_range() {
+ let line = "1F57B..1F594;N # So [26] LEFT HAND TELEPHONE RECEIVER..REVERSED VICTORY HAND\n";
+ let row: EastAsianWidth = line.parse().unwrap();
+ assert_eq!(row.codepoints, (0x1F57B, 0x1F594));
+ assert_eq!(row.width, "N");
+ }
+}
diff --git a/vendor/ucd-parse/src/extracted/derived_bidi_class.rs b/vendor/ucd-parse/src/extracted/derived_bidi_class.rs
new file mode 100644
index 000000000..da3882472
--- /dev/null
+++ b/vendor/ucd-parse/src/extracted/derived_bidi_class.rs
@@ -0,0 +1,61 @@
+use std::path::Path;
+use std::str::FromStr;
+
+use crate::common::{
+ parse_codepoint_association, CodepointIter, Codepoints, UcdFile,
+ UcdFileByCodepoint,
+};
+use crate::error::Error;
+
+/// A single row in the `extracted/DerivedBidiClass.txt` file.
+///
+/// This file gives the derived values of the Bidi_Class property.
+#[derive(Clone, Debug, Default, Eq, PartialEq)]
+pub struct DerivedBidiClass {
+ /// The codepoint or codepoint range for this entry.
+ pub codepoints: Codepoints,
+ /// The derived Bidi_Class of the codepoints in this entry.
+ pub bidi_class: String,
+}
+
+impl UcdFile for DerivedBidiClass {
+ fn relative_file_path() -> &'static Path {
+ Path::new("extracted/DerivedBidiClass.txt")
+ }
+}
+
+impl UcdFileByCodepoint for DerivedBidiClass {
+ fn codepoints(&self) -> CodepointIter {
+ self.codepoints.into_iter()
+ }
+}
+
+impl FromStr for DerivedBidiClass {
+ type Err = Error;
+
+ fn from_str(line: &str) -> Result<DerivedBidiClass, Error> {
+ let (codepoints, bidi_class) = parse_codepoint_association(line)?;
+ Ok(DerivedBidiClass { codepoints, bidi_class: bidi_class.to_string() })
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::DerivedBidiClass;
+
+ #[test]
+ fn parse_single() {
+ let line = "00B5 ; L # L& MICRO SIGN\n";
+ let row: DerivedBidiClass = line.parse().unwrap();
+ assert_eq!(row.codepoints, 0x00B5);
+ assert_eq!(row.bidi_class, "L");
+ }
+
+ #[test]
+ fn parse_range() {
+ let line = "0030..0039 ; EN # Nd [10] DIGIT ZERO..DIGIT NINE\n";
+ let row: DerivedBidiClass = line.parse().unwrap();
+ assert_eq!(row.codepoints, (0x0030, 0x0039));
+ assert_eq!(row.bidi_class, "EN");
+ }
+}
diff --git a/vendor/ucd-parse/src/extracted/derived_binary_properties.rs b/vendor/ucd-parse/src/extracted/derived_binary_properties.rs
new file mode 100644
index 000000000..19ba8cfd1
--- /dev/null
+++ b/vendor/ucd-parse/src/extracted/derived_binary_properties.rs
@@ -0,0 +1,66 @@
+use std::path::Path;
+use std::str::FromStr;
+
+use crate::common::{
+ parse_codepoint_association, CodepointIter, Codepoints, UcdFile,
+ UcdFileByCodepoint,
+};
+use crate::error::Error;
+
+/// A single row in the `extracted/DerivedBinaryProperties.txt` file.
+///
+/// This file indicates whether a codepoint has the Bidi_Mirrored property.
+#[derive(Clone, Debug, Default, Eq, PartialEq)]
+pub struct DerivedBinaryProperties {
+ /// The codepoint or codepoint range for this entry.
+ pub codepoints: Codepoints,
+ /// The derived property of the codepoints in this entry. Currently,
+ /// this is always the always the string "Bidi_Mirrored".
+ pub property: String,
+}
+
+impl UcdFile for DerivedBinaryProperties {
+ fn relative_file_path() -> &'static Path {
+ Path::new("extracted/DerivedBinaryProperties.txt")
+ }
+}
+
+impl UcdFileByCodepoint for DerivedBinaryProperties {
+ fn codepoints(&self) -> CodepointIter {
+ self.codepoints.into_iter()
+ }
+}
+
+impl FromStr for DerivedBinaryProperties {
+ type Err = Error;
+
+ fn from_str(line: &str) -> Result<DerivedBinaryProperties, Error> {
+ let (codepoints, property) = parse_codepoint_association(line)?;
+ Ok(DerivedBinaryProperties {
+ codepoints,
+ property: property.to_string(),
+ })
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::DerivedBinaryProperties;
+
+ #[test]
+ fn parse_single() {
+ let line =
+ "0028 ; Bidi_Mirrored # Ps LEFT PARENTHESIS\n";
+ let row: DerivedBinaryProperties = line.parse().unwrap();
+ assert_eq!(row.codepoints, 0x0028);
+ assert_eq!(row.property, "Bidi_Mirrored");
+ }
+
+ #[test]
+ fn parse_range() {
+ let line = "2A3C..2A3E ; Bidi_Mirrored # Sm [3] INTERIOR PRODUCT..Z NOTATION RELATIONAL COMPOSITION\n";
+ let row: DerivedBinaryProperties = line.parse().unwrap();
+ assert_eq!(row.codepoints, (0x2A3C, 0x2A3E));
+ assert_eq!(row.property, "Bidi_Mirrored");
+ }
+}
diff --git a/vendor/ucd-parse/src/extracted/derived_combining_class.rs b/vendor/ucd-parse/src/extracted/derived_combining_class.rs
new file mode 100644
index 000000000..9a26036d0
--- /dev/null
+++ b/vendor/ucd-parse/src/extracted/derived_combining_class.rs
@@ -0,0 +1,65 @@
+use std::path::Path;
+use std::str::FromStr;
+
+use crate::common::{
+ parse_codepoint_association, CodepointIter, Codepoints, UcdFile,
+ UcdFileByCodepoint,
+};
+use crate::error::Error;
+
+/// A single row in the `extracted/DerivedCombiningClass.txt` file.
+///
+/// This file gives the derived values of the Canonical_Combining_Class
+/// property.
+#[derive(Clone, Debug, Default, Eq, PartialEq)]
+pub struct DerivedCombiningClass {
+ /// The codepoint or codepoint range for this entry.
+ pub codepoints: Codepoints,
+ /// The derived Canonical_Combining_Class of the codepoints in this entry.
+ pub combining_class: String,
+}
+
+impl UcdFile for DerivedCombiningClass {
+ fn relative_file_path() -> &'static Path {
+ Path::new("extracted/DerivedCombiningClass.txt")
+ }
+}
+
+impl UcdFileByCodepoint for DerivedCombiningClass {
+ fn codepoints(&self) -> CodepointIter {
+ self.codepoints.into_iter()
+ }
+}
+
+impl FromStr for DerivedCombiningClass {
+ type Err = Error;
+
+ fn from_str(line: &str) -> Result<DerivedCombiningClass, Error> {
+ let (codepoints, combining_class) = parse_codepoint_association(line)?;
+ Ok(DerivedCombiningClass {
+ codepoints,
+ combining_class: combining_class.to_string(),
+ })
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::DerivedCombiningClass;
+
+ #[test]
+ fn parse_single() {
+ let line = "0020 ; 0 # Zs SPACE\n";
+ let row: DerivedCombiningClass = line.parse().unwrap();
+ assert_eq!(row.codepoints, 0x0020);
+ assert_eq!(row.combining_class, "0");
+ }
+
+ #[test]
+ fn parse_range() {
+ let line = "1DD1..1DF5 ; 230 # Mn [37] COMBINING UR ABOVE..COMBINING UP TACK ABOVE\n";
+ let row: DerivedCombiningClass = line.parse().unwrap();
+ assert_eq!(row.codepoints, (0x1DD1, 0x1DF5));
+ assert_eq!(row.combining_class, "230");
+ }
+}
diff --git a/vendor/ucd-parse/src/extracted/derived_decomposition_type.rs b/vendor/ucd-parse/src/extracted/derived_decomposition_type.rs
new file mode 100644
index 000000000..b0b605aad
--- /dev/null
+++ b/vendor/ucd-parse/src/extracted/derived_decomposition_type.rs
@@ -0,0 +1,66 @@
+use std::path::Path;
+use std::str::FromStr;
+
+use crate::common::{
+ parse_codepoint_association, CodepointIter, Codepoints, UcdFile,
+ UcdFileByCodepoint,
+};
+use crate::error::Error;
+
+/// A single row in the `extracted/DerivedCombiningClass.txt` file.
+///
+/// This file gives the derived values of the Decomposition_Type
+/// property.
+#[derive(Clone, Debug, Default, Eq, PartialEq)]
+pub struct DerivedDecompositionType {
+ /// The codepoint or codepoint range for this entry.
+ pub codepoints: Codepoints,
+ /// The derived Decomposition_Type of the codepoints in this entry.
+ pub decomposition_type: String,
+}
+
+impl UcdFile for DerivedDecompositionType {
+ fn relative_file_path() -> &'static Path {
+ Path::new("extracted/DerivedDecompositionType.txt")
+ }
+}
+
+impl UcdFileByCodepoint for DerivedDecompositionType {
+ fn codepoints(&self) -> CodepointIter {
+ self.codepoints.into_iter()
+ }
+}
+
+impl FromStr for DerivedDecompositionType {
+ type Err = Error;
+
+ fn from_str(line: &str) -> Result<DerivedDecompositionType, Error> {
+ let (codepoints, decomposition_type) =
+ parse_codepoint_association(line)?;
+ Ok(DerivedDecompositionType {
+ codepoints,
+ decomposition_type: decomposition_type.to_string(),
+ })
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::DerivedDecompositionType;
+
+ #[test]
+ fn parse_single() {
+ let line = "00A0 ; Nobreak # Zs NO-BREAK SPACE\n";
+ let row: DerivedDecompositionType = line.parse().unwrap();
+ assert_eq!(row.codepoints, 0x00A0);
+ assert_eq!(row.decomposition_type, "Nobreak");
+ }
+
+ #[test]
+ fn parse_range() {
+ let line = "3070..3071 ; Canonical # Lo [2] HIRAGANA LETTER BA..HIRAGANA LETTER PA\n";
+ let row: DerivedDecompositionType = line.parse().unwrap();
+ assert_eq!(row.codepoints, (0x3070, 0x3071));
+ assert_eq!(row.decomposition_type, "Canonical");
+ }
+}
diff --git a/vendor/ucd-parse/src/extracted/derived_east_asian_width.rs b/vendor/ucd-parse/src/extracted/derived_east_asian_width.rs
new file mode 100644
index 000000000..c4ce8229c
--- /dev/null
+++ b/vendor/ucd-parse/src/extracted/derived_east_asian_width.rs
@@ -0,0 +1,66 @@
+use std::path::Path;
+use std::str::FromStr;
+
+use crate::common::{
+ parse_codepoint_association, CodepointIter, Codepoints, UcdFile,
+ UcdFileByCodepoint,
+};
+use crate::error::Error;
+
+/// A single row in the `extracted/DerivedEastAsianWidth.txt` file.
+///
+/// This file gives the derived values of the East_Asian_Width
+/// property.
+#[derive(Clone, Debug, Default, Eq, PartialEq)]
+pub struct DerivedEastAsianWidth {
+ /// The codepoint or codepoint range for this entry.
+ pub codepoints: Codepoints,
+ /// The derived East_Asian_Width of the codepoints in this entry.
+ pub east_asian_width: String,
+}
+
+impl UcdFile for DerivedEastAsianWidth {
+ fn relative_file_path() -> &'static Path {
+ Path::new("extracted/DerivedEastAsianWidth.txt")
+ }
+}
+
+impl UcdFileByCodepoint for DerivedEastAsianWidth {
+ fn codepoints(&self) -> CodepointIter {
+ self.codepoints.into_iter()
+ }
+}
+
+impl FromStr for DerivedEastAsianWidth {
+ type Err = Error;
+
+ fn from_str(line: &str) -> Result<DerivedEastAsianWidth, Error> {
+ let (codepoints, east_asian_width) =
+ parse_codepoint_association(line)?;
+ Ok(DerivedEastAsianWidth {
+ codepoints,
+ east_asian_width: east_asian_width.to_string(),
+ })
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::DerivedEastAsianWidth;
+
+ #[test]
+ fn parse_single() {
+ let line = "00A0 ; N # Zs NO-BREAK SPACE\n";
+ let row: DerivedEastAsianWidth = line.parse().unwrap();
+ assert_eq!(row.codepoints, 0x00A0);
+ assert_eq!(row.east_asian_width, "N");
+ }
+
+ #[test]
+ fn parse_range() {
+ let line = "FF10..FF19 ; F # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE\n";
+ let row: DerivedEastAsianWidth = line.parse().unwrap();
+ assert_eq!(row.codepoints, (0xFF10, 0xFF19));
+ assert_eq!(row.east_asian_width, "F");
+ }
+}
diff --git a/vendor/ucd-parse/src/extracted/derived_general_category.rs b/vendor/ucd-parse/src/extracted/derived_general_category.rs
new file mode 100644
index 000000000..9a9710e00
--- /dev/null
+++ b/vendor/ucd-parse/src/extracted/derived_general_category.rs
@@ -0,0 +1,65 @@
+use std::path::Path;
+use std::str::FromStr;
+
+use crate::common::{
+ parse_codepoint_association, CodepointIter, Codepoints, UcdFile,
+ UcdFileByCodepoint,
+};
+use crate::error::Error;
+
+/// A single row in the `extracted/DerivedGeneralCategory.txt` file.
+///
+/// This file gives the derived values of the General_Category property.
+#[derive(Clone, Debug, Default, Eq, PartialEq)]
+pub struct DerivedGeneralCategory {
+ /// The codepoint or codepoint range for this entry.
+ pub codepoints: Codepoints,
+ /// The derived General_Category of the codepoints in this entry.
+ pub general_category: String,
+}
+
+impl UcdFile for DerivedGeneralCategory {
+ fn relative_file_path() -> &'static Path {
+ Path::new("extracted/DerivedGeneralCategory.txt")
+ }
+}
+
+impl UcdFileByCodepoint for DerivedGeneralCategory {
+ fn codepoints(&self) -> CodepointIter {
+ self.codepoints.into_iter()
+ }
+}
+
+impl FromStr for DerivedGeneralCategory {
+ type Err = Error;
+
+ fn from_str(line: &str) -> Result<DerivedGeneralCategory, Error> {
+ let (codepoints, general_category) =
+ parse_codepoint_association(line)?;
+ Ok(DerivedGeneralCategory {
+ codepoints,
+ general_category: general_category.to_string(),
+ })
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::DerivedGeneralCategory;
+
+ #[test]
+ fn parse_single() {
+ let line = "04D9 ; Ll # CYRILLIC SMALL LETTER SCHWA\n";
+ let row: DerivedGeneralCategory = line.parse().unwrap();
+ assert_eq!(row.codepoints, 0x04D9);
+ assert_eq!(row.general_category, "Ll");
+ }
+
+ #[test]
+ fn parse_range() {
+ let line = "0660..0669 ; Nd # [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE";
+ let row: DerivedGeneralCategory = line.parse().unwrap();
+ assert_eq!(row.codepoints, (0x0660, 0x0669));
+ assert_eq!(row.general_category, "Nd");
+ }
+}
diff --git a/vendor/ucd-parse/src/extracted/derived_joining_group.rs b/vendor/ucd-parse/src/extracted/derived_joining_group.rs
new file mode 100644
index 000000000..7707ac76e
--- /dev/null
+++ b/vendor/ucd-parse/src/extracted/derived_joining_group.rs
@@ -0,0 +1,64 @@
+use std::path::Path;
+use std::str::FromStr;
+
+use crate::common::{
+ parse_codepoint_association, CodepointIter, Codepoints, UcdFile,
+ UcdFileByCodepoint,
+};
+use crate::error::Error;
+
+/// A single row in the `extracted/DerivedJoiningGroup.txt` file.
+///
+/// This file gives the derived values of the Joining_Group property.
+#[derive(Clone, Debug, Default, Eq, PartialEq)]
+pub struct DerivedJoiningGroup {
+ /// The codepoint or codepoint range for this entry.
+ pub codepoints: Codepoints,
+ /// The derived Joining_Group of the codepoints in this entry.
+ pub joining_group: String,
+}
+
+impl UcdFile for DerivedJoiningGroup {
+ fn relative_file_path() -> &'static Path {
+ Path::new("extracted/DerivedJoiningGroup.txt")
+ }
+}
+
+impl UcdFileByCodepoint for DerivedJoiningGroup {
+ fn codepoints(&self) -> CodepointIter {
+ self.codepoints.into_iter()
+ }
+}
+
+impl FromStr for DerivedJoiningGroup {
+ type Err = Error;
+
+ fn from_str(line: &str) -> Result<DerivedJoiningGroup, Error> {
+ let (codepoints, joining_group) = parse_codepoint_association(line)?;
+ Ok(DerivedJoiningGroup {
+ codepoints,
+ joining_group: joining_group.to_string(),
+ })
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::DerivedJoiningGroup;
+
+ #[test]
+ fn parse_single() {
+ let line = "0710 ; Alaph # Lo SYRIAC LETTER ALAPH\n";
+ let row: DerivedJoiningGroup = line.parse().unwrap();
+ assert_eq!(row.codepoints, 0x0710);
+ assert_eq!(row.joining_group, "Alaph");
+ }
+
+ #[test]
+ fn parse_range() {
+ let line = "0633..0634 ; Seen # Lo [2] ARABIC LETTER SEEN..ARABIC LETTER SHEEN\n";
+ let row: DerivedJoiningGroup = line.parse().unwrap();
+ assert_eq!(row.codepoints, (0x0633, 0x0634));
+ assert_eq!(row.joining_group, "Seen");
+ }
+}
diff --git a/vendor/ucd-parse/src/extracted/derived_joining_type.rs b/vendor/ucd-parse/src/extracted/derived_joining_type.rs
new file mode 100644
index 000000000..82e11b895
--- /dev/null
+++ b/vendor/ucd-parse/src/extracted/derived_joining_type.rs
@@ -0,0 +1,64 @@
+use std::path::Path;
+use std::str::FromStr;
+
+use crate::common::{
+ parse_codepoint_association, CodepointIter, Codepoints, UcdFile,
+ UcdFileByCodepoint,
+};
+use crate::error::Error;
+
+/// A single row in the `extracted/DerivedJoiningType.txt` file.
+///
+/// This file gives the derived values of the Joining_Type property.
+#[derive(Clone, Debug, Default, Eq, PartialEq)]
+pub struct DerivedJoiningType {
+ /// The codepoint or codepoint range for this entry.
+ pub codepoints: Codepoints,
+ /// The derived Joining_Type of the codepoints in this entry.
+ pub joining_type: String,
+}
+
+impl UcdFile for DerivedJoiningType {
+ fn relative_file_path() -> &'static Path {
+ Path::new("extracted/DerivedJoiningType.txt")
+ }
+}
+
+impl UcdFileByCodepoint for DerivedJoiningType {
+ fn codepoints(&self) -> CodepointIter {
+ self.codepoints.into_iter()
+ }
+}
+
+impl FromStr for DerivedJoiningType {
+ type Err = Error;
+
+ fn from_str(line: &str) -> Result<DerivedJoiningType, Error> {
+ let (codepoints, joining_type) = parse_codepoint_association(line)?;
+ Ok(DerivedJoiningType {
+ codepoints,
+ joining_type: joining_type.to_string(),
+ })
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::DerivedJoiningType;
+
+ #[test]
+ fn parse_single() {
+ let line = "0628 ; D # Lo ARABIC LETTER BEH\n";
+ let row: DerivedJoiningType = line.parse().unwrap();
+ assert_eq!(row.codepoints, 0x0628);
+ assert_eq!(row.joining_type, "D");
+ }
+
+ #[test]
+ fn parse_range() {
+ let line = "1133B..1133C ; T # Mn [2] COMBINING BINDU BELOW..GRANTHA SIGN NUKTA\n";
+ let row: DerivedJoiningType = line.parse().unwrap();
+ assert_eq!(row.codepoints, (0x1133B, 0x1133C));
+ assert_eq!(row.joining_type, "T");
+ }
+}
diff --git a/vendor/ucd-parse/src/extracted/derived_line_break.rs b/vendor/ucd-parse/src/extracted/derived_line_break.rs
new file mode 100644
index 000000000..dd1de43af
--- /dev/null
+++ b/vendor/ucd-parse/src/extracted/derived_line_break.rs
@@ -0,0 +1,61 @@
+use std::path::Path;
+use std::str::FromStr;
+
+use crate::common::{
+ parse_codepoint_association, CodepointIter, Codepoints, UcdFile,
+ UcdFileByCodepoint,
+};
+use crate::error::Error;
+
+/// A single row in the `extracted/DerivedLineBreak.txt` file.
+///
+/// This file gives the derived values of the Line_Break property.
+#[derive(Clone, Debug, Default, Eq, PartialEq)]
+pub struct DerivedLineBreak {
+ /// The codepoint or codepoint range for this entry.
+ pub codepoints: Codepoints,
+ /// The derived Line_Break of the codepoints in this entry.
+ pub line_break: String,
+}
+
+impl UcdFile for DerivedLineBreak {
+ fn relative_file_path() -> &'static Path {
+ Path::new("extracted/DerivedLineBreak.txt")
+ }
+}
+
+impl UcdFileByCodepoint for DerivedLineBreak {
+ fn codepoints(&self) -> CodepointIter {
+ self.codepoints.into_iter()
+ }
+}
+
+impl FromStr for DerivedLineBreak {
+ type Err = Error;
+
+ fn from_str(line: &str) -> Result<DerivedLineBreak, Error> {
+ let (codepoints, line_break) = parse_codepoint_association(line)?;
+ Ok(DerivedLineBreak { codepoints, line_break: line_break.to_string() })
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::DerivedLineBreak;
+
+ #[test]
+ fn parse_single() {
+ let line = "0028 ; OP # Ps LEFT PARENTHESIS\n";
+ let row: DerivedLineBreak = line.parse().unwrap();
+ assert_eq!(row.codepoints, 0x0028);
+ assert_eq!(row.line_break, "OP");
+ }
+
+ #[test]
+ fn parse_range() {
+ let line = "0030..0039 ; NU # Nd [10] DIGIT ZERO..DIGIT NINE\n";
+ let row: DerivedLineBreak = line.parse().unwrap();
+ assert_eq!(row.codepoints, (0x0030, 0x0039));
+ assert_eq!(row.line_break, "NU");
+ }
+}
diff --git a/vendor/ucd-parse/src/extracted/derived_name.rs b/vendor/ucd-parse/src/extracted/derived_name.rs
new file mode 100644
index 000000000..713a2afee
--- /dev/null
+++ b/vendor/ucd-parse/src/extracted/derived_name.rs
@@ -0,0 +1,61 @@
+use std::path::Path;
+use std::str::FromStr;
+
+use crate::common::{
+ parse_codepoint_association, CodepointIter, Codepoints, UcdFile,
+ UcdFileByCodepoint,
+};
+use crate::error::Error;
+
+/// A single row in the `extracted/DerivedName.txt` file.
+///
+/// This file gives the derived values of the Name property.
+#[derive(Clone, Debug, Default, Eq, PartialEq)]
+pub struct DerivedName {
+ /// The codepoint or codepoint range for this entry.
+ pub codepoints: Codepoints,
+ /// The derived Name of the codepoints in this entry.
+ pub name: String,
+}
+
+impl UcdFile for DerivedName {
+ fn relative_file_path() -> &'static Path {
+ Path::new("extracted/DerivedName.txt")
+ }
+}
+
+impl UcdFileByCodepoint for DerivedName {
+ fn codepoints(&self) -> CodepointIter {
+ self.codepoints.into_iter()
+ }
+}
+
+impl FromStr for DerivedName {
+ type Err = Error;
+
+ fn from_str(line: &str) -> Result<DerivedName, Error> {
+ let (codepoints, name) = parse_codepoint_association(line)?;
+ Ok(DerivedName { codepoints, name: name.to_string() })
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::DerivedName;
+
+ #[test]
+ fn parse_single() {
+ let line = "0021 ; EXCLAMATION MARK\n";
+ let row: DerivedName = line.parse().unwrap();
+ assert_eq!(row.codepoints, 0x0021);
+ assert_eq!(row.name, "EXCLAMATION MARK");
+ }
+
+ #[test]
+ fn parse_range() {
+ let line = "3400..4DBF ; CJK UNIFIED IDEOGRAPH-*\n";
+ let row: DerivedName = line.parse().unwrap();
+ assert_eq!(row.codepoints, (0x3400, 0x4DBF));
+ assert_eq!(row.name, "CJK UNIFIED IDEOGRAPH-*");
+ }
+}
diff --git a/vendor/ucd-parse/src/extracted/derived_numeric_type.rs b/vendor/ucd-parse/src/extracted/derived_numeric_type.rs
new file mode 100644
index 000000000..554b29a57
--- /dev/null
+++ b/vendor/ucd-parse/src/extracted/derived_numeric_type.rs
@@ -0,0 +1,65 @@
+use std::path::Path;
+use std::str::FromStr;
+
+use crate::common::{
+ parse_codepoint_association, CodepointIter, Codepoints, UcdFile,
+ UcdFileByCodepoint,
+};
+use crate::error::Error;
+
+/// A single row in the `extracted/DerivedNumericType.txt` file.
+///
+/// This file gives the derived values of the Numeric_Type property.
+#[derive(Clone, Debug, Default, Eq, PartialEq)]
+pub struct DerivedNumericType {
+ /// The codepoint or codepoint range for this entry.
+ pub codepoints: Codepoints,
+ /// The derived Numeric_Type of the codepoints in this entry.
+ pub numeric_type: String,
+}
+
+impl UcdFile for DerivedNumericType {
+ fn relative_file_path() -> &'static Path {
+ Path::new("extracted/DerivedNumericType.txt")
+ }
+}
+
+impl UcdFileByCodepoint for DerivedNumericType {
+ fn codepoints(&self) -> CodepointIter {
+ self.codepoints.into_iter()
+ }
+}
+
+impl FromStr for DerivedNumericType {
+ type Err = Error;
+
+ fn from_str(line: &str) -> Result<DerivedNumericType, Error> {
+ let (codepoints, numeric_type) = parse_codepoint_association(line)?;
+ Ok(DerivedNumericType {
+ codepoints,
+ numeric_type: numeric_type.to_string(),
+ })
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::DerivedNumericType;
+
+ #[test]
+ fn parse_single() {
+ let line =
+ "2189 ; Numeric # No VULGAR FRACTION ZERO THIRDS\n";
+ let row: DerivedNumericType = line.parse().unwrap();
+ assert_eq!(row.codepoints, 0x2189);
+ assert_eq!(row.numeric_type, "Numeric");
+ }
+
+ #[test]
+ fn parse_range() {
+ let line = "00B2..00B3 ; Digit # No [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE\n";
+ let row: DerivedNumericType = line.parse().unwrap();
+ assert_eq!(row.codepoints, (0x00B2, 0x00B3));
+ assert_eq!(row.numeric_type, "Digit");
+ }
+}
diff --git a/vendor/ucd-parse/src/extracted/derived_numeric_values.rs b/vendor/ucd-parse/src/extracted/derived_numeric_values.rs
new file mode 100644
index 000000000..98369a12b
--- /dev/null
+++ b/vendor/ucd-parse/src/extracted/derived_numeric_values.rs
@@ -0,0 +1,92 @@
+use std::path::Path;
+use std::str::FromStr;
+
+use once_cell::sync::Lazy;
+use regex::Regex;
+
+use crate::common::{CodepointIter, Codepoints, UcdFile, UcdFileByCodepoint};
+use crate::error::Error;
+
+/// A single row in the `extracted/DerivedNumericValues.txt` file.
+///
+/// This file gives the derived values of the Numeric_Value property.
+#[derive(Clone, Debug, Default, Eq, PartialEq)]
+pub struct DerivedNumericValues {
+ /// The codepoint or codepoint range for this entry.
+ pub codepoints: Codepoints,
+ /// The approximate Numeric_Value of the codepoints in this entry,
+ /// as a decimal.
+ pub numeric_value_decimal: String,
+ /// The exact Numeric_Value of the codepoints in this entry, as
+ /// a fraction.
+ pub numeric_value_fraction: String,
+}
+
+impl UcdFile for DerivedNumericValues {
+ fn relative_file_path() -> &'static Path {
+ Path::new("extracted/DerivedNumericValues.txt")
+ }
+}
+
+impl UcdFileByCodepoint for DerivedNumericValues {
+ fn codepoints(&self) -> CodepointIter {
+ self.codepoints.into_iter()
+ }
+}
+
+impl FromStr for DerivedNumericValues {
+ type Err = Error;
+
+ fn from_str(line: &str) -> Result<DerivedNumericValues, Error> {
+ static PARTS: Lazy<Regex> = Lazy::new(|| {
+ Regex::new(
+ r"(?x)
+ ^
+ \s*(?P<codepoints>[^\s;]+)\s*;
+ \s*(?P<numeric_value_decimal>[^\s;]+)\s*;
+ \s*;
+ \s*(?P<numeric_value_fraction>[^\s;]+)\s*
+ ",
+ )
+ .unwrap()
+ });
+
+ let caps = match PARTS.captures(line.trim()) {
+ Some(caps) => caps,
+ None => return err!("invalid PropList line: '{}'", line),
+ };
+ let codepoints = caps["codepoints"].parse()?;
+ let numeric_value_decimal = caps["numeric_value_decimal"].to_string();
+ let numeric_value_fraction =
+ caps["numeric_value_fraction"].to_string();
+
+ Ok(DerivedNumericValues {
+ codepoints,
+ numeric_value_decimal,
+ numeric_value_fraction,
+ })
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::DerivedNumericValues;
+
+ #[test]
+ fn parse_single() {
+ let line = "0030 ; 0.0 ; ; 0 # Nd DIGIT ZERO\n";
+ let row: DerivedNumericValues = line.parse().unwrap();
+ assert_eq!(row.codepoints, 0x0030);
+ assert_eq!(row.numeric_value_decimal, "0.0");
+ assert_eq!(row.numeric_value_fraction, "0");
+ }
+
+ #[test]
+ fn parse_range() {
+ let line = "11FC9..11FCA ; 0.0625 ; ; 1/16 # No [2] TAMIL FRACTION ONE SIXTEENTH-1..TAMIL FRACTION ONE SIXTEENTH-2\n";
+ let row: DerivedNumericValues = line.parse().unwrap();
+ assert_eq!(row.codepoints, (0x11FC9, 0x11FCA));
+ assert_eq!(row.numeric_value_decimal, "0.0625");
+ assert_eq!(row.numeric_value_fraction, "1/16");
+ }
+}
diff --git a/vendor/ucd-parse/src/extracted/mod.rs b/vendor/ucd-parse/src/extracted/mod.rs
new file mode 100644
index 000000000..d9c532cfe
--- /dev/null
+++ b/vendor/ucd-parse/src/extracted/mod.rs
@@ -0,0 +1,30 @@
+//! Structs for parsing files in the `extracted` subdirectory.
+//!
+//! These are placed here, rather than at the top level, to help keep
+//! the number of structs in any given module managable.
+
+pub use self::derived_bidi_class::DerivedBidiClass;
+pub use self::derived_binary_properties::DerivedBinaryProperties;
+pub use self::derived_combining_class::DerivedCombiningClass;
+pub use self::derived_decomposition_type::DerivedDecompositionType;
+pub use self::derived_east_asian_width::DerivedEastAsianWidth;
+pub use self::derived_general_category::DerivedGeneralCategory;
+pub use self::derived_joining_group::DerivedJoiningGroup;
+pub use self::derived_joining_type::DerivedJoiningType;
+pub use self::derived_line_break::DerivedLineBreak;
+pub use self::derived_name::DerivedName;
+pub use self::derived_numeric_type::DerivedNumericType;
+pub use self::derived_numeric_values::DerivedNumericValues;
+
+mod derived_bidi_class;
+mod derived_binary_properties;
+mod derived_combining_class;
+mod derived_decomposition_type;
+mod derived_east_asian_width;
+mod derived_general_category;
+mod derived_joining_group;
+mod derived_joining_type;
+mod derived_line_break;
+mod derived_name;
+mod derived_numeric_type;
+mod derived_numeric_values;
diff --git a/vendor/ucd-parse/src/jamo_short_name.rs b/vendor/ucd-parse/src/jamo_short_name.rs
index 4103dd7ee..348e941b2 100644
--- a/vendor/ucd-parse/src/jamo_short_name.rs
+++ b/vendor/ucd-parse/src/jamo_short_name.rs
@@ -1,7 +1,7 @@
use std::path::Path;
use std::str::FromStr;
-use lazy_static::lazy_static;
+use once_cell::sync::Lazy;
use regex::Regex;
use crate::common::{Codepoint, CodepointIter, UcdFile, UcdFileByCodepoint};
@@ -35,17 +35,17 @@ impl FromStr for JamoShortName {
type Err = Error;
fn from_str(line: &str) -> Result<JamoShortName, Error> {
- lazy_static! {
- static ref PARTS: Regex = Regex::new(
+ static PARTS: Lazy<Regex> = Lazy::new(|| {
+ Regex::new(
r"(?x)
^
(?P<codepoint>[A-Z0-9]+);
\s*
(?P<name>[A-Z]*)
- "
+ ",
)
- .unwrap();
- };
+ .unwrap()
+ });
let caps = match PARTS.captures(line.trim()) {
Some(caps) => caps,
diff --git a/vendor/ucd-parse/src/lib.rs b/vendor/ucd-parse/src/lib.rs
index f6654658a..f53963200 100644
--- a/vendor/ucd-parse/src/lib.rs
+++ b/vendor/ucd-parse/src/lib.rs
@@ -16,6 +16,7 @@ pub use crate::arabic_shaping::ArabicShaping;
pub use crate::bidi_mirroring_glyph::BidiMirroring;
pub use crate::case_folding::{CaseFold, CaseStatus};
pub use crate::core_properties::CoreProperty;
+pub use crate::east_asian_width::EastAsianWidth;
pub use crate::emoji_properties::EmojiProperty;
pub use crate::grapheme_cluster_break::{
GraphemeClusterBreak, GraphemeClusterBreakTest,
@@ -42,6 +43,8 @@ macro_rules! err {
}
}
+pub mod extracted;
+
mod common;
mod error;
@@ -50,6 +53,7 @@ mod arabic_shaping;
mod bidi_mirroring_glyph;
mod case_folding;
mod core_properties;
+mod east_asian_width;
mod emoji_properties;
mod grapheme_cluster_break;
mod jamo_short_name;
diff --git a/vendor/ucd-parse/src/name_aliases.rs b/vendor/ucd-parse/src/name_aliases.rs
index 36c9c4b01..8b50b9394 100644
--- a/vendor/ucd-parse/src/name_aliases.rs
+++ b/vendor/ucd-parse/src/name_aliases.rs
@@ -1,7 +1,7 @@
use std::path::Path;
use std::str::FromStr;
-use lazy_static::lazy_static;
+use once_cell::sync::Lazy;
use regex::Regex;
use crate::common::{Codepoint, CodepointIter, UcdFile, UcdFileByCodepoint};
@@ -37,8 +37,8 @@ impl FromStr for NameAlias {
type Err = Error;
fn from_str(line: &str) -> Result<NameAlias, Error> {
- lazy_static! {
- static ref PARTS: Regex = Regex::new(
+ static PARTS: Lazy<Regex> = Lazy::new(|| {
+ Regex::new(
r"(?x)
^
(?P<codepoint>[A-Z0-9]+);
@@ -46,10 +46,10 @@ impl FromStr for NameAlias {
(?P<alias>[^;]+);
\s*
(?P<label>\S+)
- "
+ ",
)
- .unwrap();
- };
+ .unwrap()
+ });
let caps = match PARTS.captures(line.trim()) {
Some(caps) => caps,
diff --git a/vendor/ucd-parse/src/property_aliases.rs b/vendor/ucd-parse/src/property_aliases.rs
index f94a116e6..ff5894016 100644
--- a/vendor/ucd-parse/src/property_aliases.rs
+++ b/vendor/ucd-parse/src/property_aliases.rs
@@ -1,7 +1,7 @@
use std::path::Path;
use std::str::FromStr;
-use lazy_static::lazy_static;
+use once_cell::sync::Lazy;
use regex::Regex;
use crate::common::UcdFile;
@@ -28,19 +28,20 @@ impl FromStr for PropertyAlias {
type Err = Error;
fn from_str(line: &str) -> Result<PropertyAlias, Error> {
- lazy_static! {
- static ref PARTS: Regex = Regex::new(
+ static PARTS: Lazy<Regex> = Lazy::new(|| {
+ Regex::new(
r"(?x)
^
\s*(?P<abbrev>[^\s;]+)\s*;
\s*(?P<long>[^\s;]+)\s*
(?:;(?P<aliases>.*))?
- "
+ ",
)
- .unwrap();
- static ref ALIASES: Regex =
- Regex::new(r"\s*(?P<alias>[^\s;]+)\s*;?\s*").unwrap();
- };
+ .unwrap()
+ });
+ static ALIASES: Lazy<Regex> = Lazy::new(|| {
+ Regex::new(r"\s*(?P<alias>[^\s;]+)\s*;?\s*").unwrap()
+ });
let caps = match PARTS.captures(line.trim()) {
Some(caps) => caps,
diff --git a/vendor/ucd-parse/src/property_value_aliases.rs b/vendor/ucd-parse/src/property_value_aliases.rs
index 7e8a3c890..253afebba 100644
--- a/vendor/ucd-parse/src/property_value_aliases.rs
+++ b/vendor/ucd-parse/src/property_value_aliases.rs
@@ -1,7 +1,7 @@
use std::path::Path;
use std::str::FromStr;
-use lazy_static::lazy_static;
+use once_cell::sync::Lazy;
use regex::Regex;
use crate::common::UcdFile;
@@ -34,30 +34,33 @@ impl FromStr for PropertyValueAlias {
type Err = Error;
fn from_str(line: &str) -> Result<PropertyValueAlias, Error> {
- lazy_static! {
- static ref PARTS: Regex = Regex::new(
+ static PARTS: Lazy<Regex> = Lazy::new(|| {
+ Regex::new(
r"(?x)
^
\s*(?P<prop>[^\s;]+)\s*;
\s*(?P<abbrev>[^\s;]+)\s*;
\s*(?P<long>[^\s;]+)\s*
(?:;(?P<aliases>.*))?
- "
+ ",
)
- .unwrap();
- static ref PARTS_CCC: Regex = Regex::new(
+ .unwrap()
+ });
+ static PARTS_CCC: Lazy<Regex> = Lazy::new(|| {
+ Regex::new(
r"(?x)
^
ccc;
\s*(?P<num_class>[0-9]+)\s*;
\s*(?P<abbrev>[^\s;]+)\s*;
\s*(?P<long>[^\s;]+)
- "
+ ",
)
- .unwrap();
- static ref ALIASES: Regex =
- Regex::new(r"\s*(?P<alias>[^\s;]+)\s*;?\s*").unwrap();
- };
+ .unwrap()
+ });
+ static ALIASES: Lazy<Regex> = Lazy::new(|| {
+ Regex::new(r"\s*(?P<alias>[^\s;]+)\s*;?\s*").unwrap()
+ });
if line.starts_with("ccc;") {
let caps = match PARTS_CCC.captures(line.trim()) {
diff --git a/vendor/ucd-parse/src/special_casing.rs b/vendor/ucd-parse/src/special_casing.rs
index a8fc61ddb..dbeff2b54 100644
--- a/vendor/ucd-parse/src/special_casing.rs
+++ b/vendor/ucd-parse/src/special_casing.rs
@@ -1,7 +1,7 @@
use std::path::Path;
use std::str::FromStr;
-use lazy_static::lazy_static;
+use once_cell::sync::Lazy;
use regex::Regex;
use crate::common::{
@@ -46,8 +46,8 @@ impl FromStr for SpecialCaseMapping {
type Err = Error;
fn from_str(line: &str) -> Result<SpecialCaseMapping, Error> {
- lazy_static! {
- static ref PARTS: Regex = Regex::new(
+ static PARTS: Lazy<Regex> = Lazy::new(|| {
+ Regex::new(
r"(?x)
^
\s*(?P<codepoint>[^\s;]+)\s*;
@@ -55,10 +55,10 @@ impl FromStr for SpecialCaseMapping {
\s*(?P<title>[^;]+)\s*;
\s*(?P<upper>[^;]+)\s*;
\s*(?P<conditions>[^;\x23]+)?
- "
+ ",
)
- .unwrap();
- };
+ .unwrap()
+ });
let caps = match PARTS.captures(line.trim()) {
Some(caps) => caps,
diff --git a/vendor/ucd-parse/src/unicode_data.rs b/vendor/ucd-parse/src/unicode_data.rs
index 87910cc1d..9151f60b2 100644
--- a/vendor/ucd-parse/src/unicode_data.rs
+++ b/vendor/ucd-parse/src/unicode_data.rs
@@ -4,7 +4,7 @@ use std::ops::Range;
use std::path::Path;
use std::str::FromStr;
-use lazy_static::lazy_static;
+use once_cell::sync::Lazy;
use regex::Regex;
use crate::common::{Codepoint, CodepointIter, UcdFile, UcdFileByCodepoint};
@@ -14,7 +14,7 @@ use crate::error::Error;
///
/// These fields were taken from UAX44, Table 9, as part of the documentation
/// for the
-/// [`UnicodeData.txt` file](http://www.unicode.org/reports/tr44/#UnicodeData.txt).
+/// [`UnicodeData.txt` file](https://www.unicode.org/reports/tr44/#UnicodeData.txt).
#[derive(Clone, Debug, Default, Eq, PartialEq)]
pub struct UnicodeData {
/// The codepoint corresponding to this row.
@@ -26,12 +26,12 @@ pub struct UnicodeData {
/// The class of this codepoint used in the Canonical Ordering Algorithm.
///
/// Note that some classes map to a particular symbol. See
- /// [UAX44, Table 15](http://www.unicode.org/reports/tr44/#Canonical_Combining_Class_Values).
+ /// [UAX44, Table 15](https://www.unicode.org/reports/tr44/#Canonical_Combining_Class_Values).
pub canonical_combining_class: u8,
/// The bidirectional class of this codepoint.
///
/// Possible values are listed in
- /// [UAX44, Table 13](http://www.unicode.org/reports/tr44/#Bidi_Class_Values).
+ /// [UAX44, Table 13](https://www.unicode.org/reports/tr44/#Bidi_Class_Values).
pub bidi_class: String,
/// The decomposition mapping for this codepoint. This includes its
/// formatting tag (if present).
@@ -99,8 +99,8 @@ impl FromStr for UnicodeData {
type Err = Error;
fn from_str(line: &str) -> Result<UnicodeData, Error> {
- lazy_static! {
- static ref PARTS: Regex = Regex::new(
+ static PARTS: Lazy<Regex> = Lazy::new(|| {
+ Regex::new(
r"(?x)
^
([A-Z0-9]+); # 1; codepoint
@@ -119,10 +119,10 @@ impl FromStr for UnicodeData {
([^;]*); # 14; simple lowercase mapping
([^;]*) # 15; simple titlecase mapping
$
- "
+ ",
)
- .unwrap();
- };
+ .unwrap()
+ });
let caps = match PARTS.captures(line.trim()) {
Some(caps) => caps,
None => return err!("invalid UnicodeData line"),
@@ -301,13 +301,12 @@ impl FromStr for UnicodeDataDecomposition {
type Err = Error;
fn from_str(s: &str) -> Result<UnicodeDataDecomposition, Error> {
- lazy_static! {
- static ref WITH_TAG: Regex = Regex::new(
- r"^(?:<(?P<tag>[^>]+)>)?\s*(?P<chars>[\s0-9A-F]+)$"
- )
- .unwrap();
- static ref CHARS: Regex = Regex::new(r"[0-9A-F]+").unwrap();
- };
+ static WITH_TAG: Lazy<Regex> = Lazy::new(|| {
+ Regex::new(r"^(?:<(?P<tag>[^>]+)>)?\s*(?P<chars>[\s0-9A-F]+)$")
+ .unwrap()
+ });
+ static CHARS: Lazy<Regex> =
+ Lazy::new(|| Regex::new(r"[0-9A-F]+").unwrap());
if s.is_empty() {
return err!(
"expected non-empty string for \
@@ -352,7 +351,7 @@ impl fmt::Display for UnicodeDataDecomposition {
/// The formatting tag on a decomposition mapping.
///
/// This is taken from
-/// [UAX44, Table 14](http://www.unicode.org/reports/tr44/#Character_Decomposition_Mappings).
+/// [UAX44, Table 14](https://www.unicode.org/reports/tr44/#Character_Decomposition_Mappings).
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum UnicodeDataDecompositionTag {
/// <font>