diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-30 03:57:19 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-30 03:57:19 +0000 |
commit | a0b8f38ab54ac451646aa00cd5e91b6c76f22a84 (patch) | |
tree | fc451898ccaf445814e26b46664d78702178101d /vendor/ucd-parse/src | |
parent | Adding debian version 1.71.1+dfsg1-2. (diff) | |
download | rustc-a0b8f38ab54ac451646aa00cd5e91b6c76f22a84.tar.xz rustc-a0b8f38ab54ac451646aa00cd5e91b6c76f22a84.zip |
Merging upstream version 1.72.1+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/ucd-parse/src')
25 files changed, 997 insertions, 97 deletions
diff --git a/vendor/ucd-parse/src/arabic_shaping.rs b/vendor/ucd-parse/src/arabic_shaping.rs index d1d942a82..1885c022e 100644 --- a/vendor/ucd-parse/src/arabic_shaping.rs +++ b/vendor/ucd-parse/src/arabic_shaping.rs @@ -1,7 +1,7 @@ use std::path::Path; use std::str::FromStr; -use lazy_static::lazy_static; +use once_cell::sync::Lazy; use regex::Regex; use crate::common::{Codepoint, CodepointIter, UcdFile, UcdFileByCodepoint}; @@ -93,8 +93,8 @@ impl FromStr for ArabicShaping { type Err = Error; fn from_str(line: &str) -> Result<ArabicShaping, Error> { - lazy_static! { - static ref PARTS: Regex = Regex::new( + static PARTS: Lazy<Regex> = Lazy::new(|| { + Regex::new( r"(?x) ^ \s*(?P<codepoint>[A-F0-9]+)\s*; @@ -102,10 +102,10 @@ impl FromStr for ArabicShaping { \s*(?P<joining_type>[^;]+)\s*; \s*(?P<joining_group>[^;]+) $ - " + ", ) - .unwrap(); - }; + .unwrap() + }); let caps = match PARTS.captures(line.trim()) { Some(caps) => caps, None => return err!("invalid ArabicShaping line"), diff --git a/vendor/ucd-parse/src/bidi_mirroring_glyph.rs b/vendor/ucd-parse/src/bidi_mirroring_glyph.rs index fcfefffcb..78ad706df 100644 --- a/vendor/ucd-parse/src/bidi_mirroring_glyph.rs +++ b/vendor/ucd-parse/src/bidi_mirroring_glyph.rs @@ -2,7 +2,7 @@ use std::fmt; use std::path::Path; use std::str::FromStr; -use lazy_static::lazy_static; +use once_cell::sync::Lazy; use regex::Regex; use crate::common::{Codepoint, CodepointIter, UcdFile, UcdFileByCodepoint}; @@ -36,8 +36,8 @@ impl FromStr for BidiMirroring { type Err = Error; fn from_str(line: &str) -> Result<BidiMirroring, Error> { - lazy_static! { - static ref PARTS: Regex = Regex::new( + static PARTS: Lazy<Regex> = Lazy::new(|| { + Regex::new( r"(?x) ^ \s*(?P<codepoint>[A-F0-9]+)\s*; @@ -45,10 +45,10 @@ impl FromStr for BidiMirroring { \s+ \#(?:.+) $ - " + ", ) - .unwrap(); - }; + .unwrap() + }); let caps = match PARTS.captures(line.trim()) { Some(caps) => caps, None => return err!("invalid BidiMirroring line"), diff --git a/vendor/ucd-parse/src/case_folding.rs b/vendor/ucd-parse/src/case_folding.rs index 813fc81a1..fab72e320 100644 --- a/vendor/ucd-parse/src/case_folding.rs +++ b/vendor/ucd-parse/src/case_folding.rs @@ -1,7 +1,7 @@ use std::path::Path; use std::str::FromStr; -use lazy_static::lazy_static; +use once_cell::sync::Lazy; use regex::Regex; use crate::common::{Codepoint, CodepointIter, UcdFile, UcdFileByCodepoint}; @@ -42,17 +42,17 @@ impl FromStr for CaseFold { type Err = Error; fn from_str(line: &str) -> Result<CaseFold, Error> { - lazy_static! { - static ref PARTS: Regex = Regex::new( + static PARTS: Lazy<Regex> = Lazy::new(|| { + Regex::new( r"(?x) ^ \s*(?P<codepoint>[^\s;]+)\s*; \s*(?P<status>[^\s;]+)\s*; \s*(?P<mapping>[^;]+)\s*; - " + ", ) - .unwrap(); - }; + .unwrap() + }); let caps = match PARTS.captures(line.trim()) { Some(caps) => caps, diff --git a/vendor/ucd-parse/src/common.rs b/vendor/ucd-parse/src/common.rs index c18be668e..de38c34c4 100644 --- a/vendor/ucd-parse/src/common.rs +++ b/vendor/ucd-parse/src/common.rs @@ -7,7 +7,7 @@ use std::marker::PhantomData; use std::path::{Path, PathBuf}; use std::str::FromStr; -use lazy_static::lazy_static; +use once_cell::sync::Lazy; use regex::Regex; use crate::error::{Error, ErrorKind}; @@ -85,10 +85,9 @@ pub fn ucd_directory_version<D: ?Sized + AsRef<Path>>( fn ucd_directory_version_inner( ucd_dir: &Path, ) -> Result<(u64, u64, u64), Error> { - lazy_static::lazy_static! { - static ref VERSION_RX: Regex = - Regex::new(r"-([0-9]+).([0-9]+).([0-9]+).txt").unwrap(); - } + static VERSION_RX: Lazy<Regex> = Lazy::new(|| { + Regex::new(r"-([0-9]+).([0-9]+).([0-9]+).txt").unwrap() + }); let proplist = ucd_dir.join("PropList.txt"); let contents = first_line(&proplist)?; @@ -140,16 +139,16 @@ fn first_line(path: &Path) -> Result<String, Error> { pub fn parse_codepoint_association<'a>( line: &'a str, ) -> Result<(Codepoints, &'a str), Error> { - lazy_static! { - static ref PARTS: Regex = Regex::new( + static PARTS: Lazy<Regex> = Lazy::new(|| { + Regex::new( r"(?x) ^ \s*(?P<codepoints>[^\s;]+)\s*; \s*(?P<property>[^;\x23]+)\s* - " + ", ) - .unwrap(); - }; + .unwrap() + }); let caps = match PARTS.captures(line.trim()) { Some(caps) => caps, @@ -184,8 +183,8 @@ pub fn parse_codepoint_sequence(s: &str) -> Result<Vec<Codepoint>, Error> { /// with the comment associated with the test. The comment is a human readable /// description of the test that may prove useful for debugging. pub fn parse_break_test(line: &str) -> Result<(Vec<String>, String), Error> { - lazy_static! { - static ref PARTS: Regex = Regex::new( + static PARTS: Lazy<Regex> = Lazy::new(|| { + Regex::new( r"(?x) ^ (?:÷|×) @@ -193,16 +192,18 @@ pub fn parse_break_test(line: &str) -> Result<(Vec<String>, String), Error> { \s+ \#(?P<comment>.+) $ - " + ", ) - .unwrap(); - static ref GROUP: Regex = Regex::new( + .unwrap() + }); + static GROUP: Lazy<Regex> = Lazy::new(|| { + Regex::new( r"(?x) (?P<codepoint>[0-9A-Fa-f]{4,5})\s(?P<kind>÷|×) - " + ", ) - .unwrap(); - } + .unwrap() + }); let caps = match PARTS.captures(line.trim()) { Some(caps) => caps, @@ -435,7 +436,9 @@ impl PartialEq<(Codepoint, Codepoint)> for Codepoints { /// A range of Unicode codepoints. The range is inclusive; both ends of the /// range are guaranteed to be valid codepoints. -#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq, PartialOrd, Ord)] +#[derive( + Clone, Copy, Debug, Default, Eq, Hash, PartialEq, PartialOrd, Ord, +)] pub struct CodepointRange { /// The start of the codepoint range. pub start: Codepoint, @@ -456,11 +459,10 @@ impl FromStr for CodepointRange { type Err = Error; fn from_str(s: &str) -> Result<CodepointRange, Error> { - lazy_static! { - static ref PARTS: Regex = - Regex::new(r"^(?P<start>[A-Z0-9]+)\.\.(?P<end>[A-Z0-9]+)$") - .unwrap(); - } + static PARTS: Lazy<Regex> = Lazy::new(|| { + Regex::new(r"^(?P<start>[A-Z0-9]+)\.\.(?P<end>[A-Z0-9]+)$") + .unwrap() + }); let caps = match PARTS.captures(s) { Some(caps) => caps, None => return err!("invalid codepoint range: '{}'", s), @@ -499,7 +501,9 @@ impl PartialEq<(Codepoint, Codepoint)> for CodepointRange { /// to be in the range `[0, 10FFFF]`. /// /// Note that unlike Rust's `char` type, this may be a surrogate codepoint. -#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq, PartialOrd, Ord)] +#[derive( + Clone, Copy, Debug, Default, Eq, Hash, PartialEq, PartialOrd, Ord, +)] pub struct Codepoint(u32); impl Codepoint { diff --git a/vendor/ucd-parse/src/east_asian_width.rs b/vendor/ucd-parse/src/east_asian_width.rs new file mode 100644 index 000000000..c146e09b9 --- /dev/null +++ b/vendor/ucd-parse/src/east_asian_width.rs @@ -0,0 +1,63 @@ +use std::path::Path; +use std::str::FromStr; + +use crate::common::{ + parse_codepoint_association, CodepointIter, Codepoints, UcdFile, + UcdFileByCodepoint, +}; +use crate::error::Error; + +/// A single row in the `EastAsianWidth.txt` file, describing the value of the +/// `East_Asian_Width` property. +/// +/// Note: All code points, assigned or unassigned, that are not listed in +/// EastAsianWidth.txt file are given the value "N". +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct EastAsianWidth { + /// The codepoint or codepoint range for this entry. + pub codepoints: Codepoints, + /// One of "A", "F", "H", "N", "Na", "W". + pub width: String, +} + +impl UcdFile for EastAsianWidth { + fn relative_file_path() -> &'static Path { + Path::new("EastAsianWidth.txt") + } +} + +impl UcdFileByCodepoint for EastAsianWidth { + fn codepoints(&self) -> CodepointIter { + self.codepoints.into_iter() + } +} + +impl FromStr for EastAsianWidth { + type Err = Error; + + fn from_str(line: &str) -> Result<EastAsianWidth, Error> { + let (codepoints, width) = parse_codepoint_association(line)?; + Ok(EastAsianWidth { codepoints, width: width.to_string() }) + } +} + +#[cfg(test)] +mod tests { + use super::EastAsianWidth; + + #[test] + fn parse_single() { + let line = "27E7;Na # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET\n"; + let row: EastAsianWidth = line.parse().unwrap(); + assert_eq!(row.codepoints, 0x27E7); + assert_eq!(row.width, "Na"); + } + + #[test] + fn parse_range() { + let line = "1F57B..1F594;N # So [26] LEFT HAND TELEPHONE RECEIVER..REVERSED VICTORY HAND\n"; + let row: EastAsianWidth = line.parse().unwrap(); + assert_eq!(row.codepoints, (0x1F57B, 0x1F594)); + assert_eq!(row.width, "N"); + } +} diff --git a/vendor/ucd-parse/src/extracted/derived_bidi_class.rs b/vendor/ucd-parse/src/extracted/derived_bidi_class.rs new file mode 100644 index 000000000..da3882472 --- /dev/null +++ b/vendor/ucd-parse/src/extracted/derived_bidi_class.rs @@ -0,0 +1,61 @@ +use std::path::Path; +use std::str::FromStr; + +use crate::common::{ + parse_codepoint_association, CodepointIter, Codepoints, UcdFile, + UcdFileByCodepoint, +}; +use crate::error::Error; + +/// A single row in the `extracted/DerivedBidiClass.txt` file. +/// +/// This file gives the derived values of the Bidi_Class property. +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct DerivedBidiClass { + /// The codepoint or codepoint range for this entry. + pub codepoints: Codepoints, + /// The derived Bidi_Class of the codepoints in this entry. + pub bidi_class: String, +} + +impl UcdFile for DerivedBidiClass { + fn relative_file_path() -> &'static Path { + Path::new("extracted/DerivedBidiClass.txt") + } +} + +impl UcdFileByCodepoint for DerivedBidiClass { + fn codepoints(&self) -> CodepointIter { + self.codepoints.into_iter() + } +} + +impl FromStr for DerivedBidiClass { + type Err = Error; + + fn from_str(line: &str) -> Result<DerivedBidiClass, Error> { + let (codepoints, bidi_class) = parse_codepoint_association(line)?; + Ok(DerivedBidiClass { codepoints, bidi_class: bidi_class.to_string() }) + } +} + +#[cfg(test)] +mod tests { + use super::DerivedBidiClass; + + #[test] + fn parse_single() { + let line = "00B5 ; L # L& MICRO SIGN\n"; + let row: DerivedBidiClass = line.parse().unwrap(); + assert_eq!(row.codepoints, 0x00B5); + assert_eq!(row.bidi_class, "L"); + } + + #[test] + fn parse_range() { + let line = "0030..0039 ; EN # Nd [10] DIGIT ZERO..DIGIT NINE\n"; + let row: DerivedBidiClass = line.parse().unwrap(); + assert_eq!(row.codepoints, (0x0030, 0x0039)); + assert_eq!(row.bidi_class, "EN"); + } +} diff --git a/vendor/ucd-parse/src/extracted/derived_binary_properties.rs b/vendor/ucd-parse/src/extracted/derived_binary_properties.rs new file mode 100644 index 000000000..19ba8cfd1 --- /dev/null +++ b/vendor/ucd-parse/src/extracted/derived_binary_properties.rs @@ -0,0 +1,66 @@ +use std::path::Path; +use std::str::FromStr; + +use crate::common::{ + parse_codepoint_association, CodepointIter, Codepoints, UcdFile, + UcdFileByCodepoint, +}; +use crate::error::Error; + +/// A single row in the `extracted/DerivedBinaryProperties.txt` file. +/// +/// This file indicates whether a codepoint has the Bidi_Mirrored property. +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct DerivedBinaryProperties { + /// The codepoint or codepoint range for this entry. + pub codepoints: Codepoints, + /// The derived property of the codepoints in this entry. Currently, + /// this is always the always the string "Bidi_Mirrored". + pub property: String, +} + +impl UcdFile for DerivedBinaryProperties { + fn relative_file_path() -> &'static Path { + Path::new("extracted/DerivedBinaryProperties.txt") + } +} + +impl UcdFileByCodepoint for DerivedBinaryProperties { + fn codepoints(&self) -> CodepointIter { + self.codepoints.into_iter() + } +} + +impl FromStr for DerivedBinaryProperties { + type Err = Error; + + fn from_str(line: &str) -> Result<DerivedBinaryProperties, Error> { + let (codepoints, property) = parse_codepoint_association(line)?; + Ok(DerivedBinaryProperties { + codepoints, + property: property.to_string(), + }) + } +} + +#[cfg(test)] +mod tests { + use super::DerivedBinaryProperties; + + #[test] + fn parse_single() { + let line = + "0028 ; Bidi_Mirrored # Ps LEFT PARENTHESIS\n"; + let row: DerivedBinaryProperties = line.parse().unwrap(); + assert_eq!(row.codepoints, 0x0028); + assert_eq!(row.property, "Bidi_Mirrored"); + } + + #[test] + fn parse_range() { + let line = "2A3C..2A3E ; Bidi_Mirrored # Sm [3] INTERIOR PRODUCT..Z NOTATION RELATIONAL COMPOSITION\n"; + let row: DerivedBinaryProperties = line.parse().unwrap(); + assert_eq!(row.codepoints, (0x2A3C, 0x2A3E)); + assert_eq!(row.property, "Bidi_Mirrored"); + } +} diff --git a/vendor/ucd-parse/src/extracted/derived_combining_class.rs b/vendor/ucd-parse/src/extracted/derived_combining_class.rs new file mode 100644 index 000000000..9a26036d0 --- /dev/null +++ b/vendor/ucd-parse/src/extracted/derived_combining_class.rs @@ -0,0 +1,65 @@ +use std::path::Path; +use std::str::FromStr; + +use crate::common::{ + parse_codepoint_association, CodepointIter, Codepoints, UcdFile, + UcdFileByCodepoint, +}; +use crate::error::Error; + +/// A single row in the `extracted/DerivedCombiningClass.txt` file. +/// +/// This file gives the derived values of the Canonical_Combining_Class +/// property. +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct DerivedCombiningClass { + /// The codepoint or codepoint range for this entry. + pub codepoints: Codepoints, + /// The derived Canonical_Combining_Class of the codepoints in this entry. + pub combining_class: String, +} + +impl UcdFile for DerivedCombiningClass { + fn relative_file_path() -> &'static Path { + Path::new("extracted/DerivedCombiningClass.txt") + } +} + +impl UcdFileByCodepoint for DerivedCombiningClass { + fn codepoints(&self) -> CodepointIter { + self.codepoints.into_iter() + } +} + +impl FromStr for DerivedCombiningClass { + type Err = Error; + + fn from_str(line: &str) -> Result<DerivedCombiningClass, Error> { + let (codepoints, combining_class) = parse_codepoint_association(line)?; + Ok(DerivedCombiningClass { + codepoints, + combining_class: combining_class.to_string(), + }) + } +} + +#[cfg(test)] +mod tests { + use super::DerivedCombiningClass; + + #[test] + fn parse_single() { + let line = "0020 ; 0 # Zs SPACE\n"; + let row: DerivedCombiningClass = line.parse().unwrap(); + assert_eq!(row.codepoints, 0x0020); + assert_eq!(row.combining_class, "0"); + } + + #[test] + fn parse_range() { + let line = "1DD1..1DF5 ; 230 # Mn [37] COMBINING UR ABOVE..COMBINING UP TACK ABOVE\n"; + let row: DerivedCombiningClass = line.parse().unwrap(); + assert_eq!(row.codepoints, (0x1DD1, 0x1DF5)); + assert_eq!(row.combining_class, "230"); + } +} diff --git a/vendor/ucd-parse/src/extracted/derived_decomposition_type.rs b/vendor/ucd-parse/src/extracted/derived_decomposition_type.rs new file mode 100644 index 000000000..b0b605aad --- /dev/null +++ b/vendor/ucd-parse/src/extracted/derived_decomposition_type.rs @@ -0,0 +1,66 @@ +use std::path::Path; +use std::str::FromStr; + +use crate::common::{ + parse_codepoint_association, CodepointIter, Codepoints, UcdFile, + UcdFileByCodepoint, +}; +use crate::error::Error; + +/// A single row in the `extracted/DerivedCombiningClass.txt` file. +/// +/// This file gives the derived values of the Decomposition_Type +/// property. +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct DerivedDecompositionType { + /// The codepoint or codepoint range for this entry. + pub codepoints: Codepoints, + /// The derived Decomposition_Type of the codepoints in this entry. + pub decomposition_type: String, +} + +impl UcdFile for DerivedDecompositionType { + fn relative_file_path() -> &'static Path { + Path::new("extracted/DerivedDecompositionType.txt") + } +} + +impl UcdFileByCodepoint for DerivedDecompositionType { + fn codepoints(&self) -> CodepointIter { + self.codepoints.into_iter() + } +} + +impl FromStr for DerivedDecompositionType { + type Err = Error; + + fn from_str(line: &str) -> Result<DerivedDecompositionType, Error> { + let (codepoints, decomposition_type) = + parse_codepoint_association(line)?; + Ok(DerivedDecompositionType { + codepoints, + decomposition_type: decomposition_type.to_string(), + }) + } +} + +#[cfg(test)] +mod tests { + use super::DerivedDecompositionType; + + #[test] + fn parse_single() { + let line = "00A0 ; Nobreak # Zs NO-BREAK SPACE\n"; + let row: DerivedDecompositionType = line.parse().unwrap(); + assert_eq!(row.codepoints, 0x00A0); + assert_eq!(row.decomposition_type, "Nobreak"); + } + + #[test] + fn parse_range() { + let line = "3070..3071 ; Canonical # Lo [2] HIRAGANA LETTER BA..HIRAGANA LETTER PA\n"; + let row: DerivedDecompositionType = line.parse().unwrap(); + assert_eq!(row.codepoints, (0x3070, 0x3071)); + assert_eq!(row.decomposition_type, "Canonical"); + } +} diff --git a/vendor/ucd-parse/src/extracted/derived_east_asian_width.rs b/vendor/ucd-parse/src/extracted/derived_east_asian_width.rs new file mode 100644 index 000000000..c4ce8229c --- /dev/null +++ b/vendor/ucd-parse/src/extracted/derived_east_asian_width.rs @@ -0,0 +1,66 @@ +use std::path::Path; +use std::str::FromStr; + +use crate::common::{ + parse_codepoint_association, CodepointIter, Codepoints, UcdFile, + UcdFileByCodepoint, +}; +use crate::error::Error; + +/// A single row in the `extracted/DerivedEastAsianWidth.txt` file. +/// +/// This file gives the derived values of the East_Asian_Width +/// property. +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct DerivedEastAsianWidth { + /// The codepoint or codepoint range for this entry. + pub codepoints: Codepoints, + /// The derived East_Asian_Width of the codepoints in this entry. + pub east_asian_width: String, +} + +impl UcdFile for DerivedEastAsianWidth { + fn relative_file_path() -> &'static Path { + Path::new("extracted/DerivedEastAsianWidth.txt") + } +} + +impl UcdFileByCodepoint for DerivedEastAsianWidth { + fn codepoints(&self) -> CodepointIter { + self.codepoints.into_iter() + } +} + +impl FromStr for DerivedEastAsianWidth { + type Err = Error; + + fn from_str(line: &str) -> Result<DerivedEastAsianWidth, Error> { + let (codepoints, east_asian_width) = + parse_codepoint_association(line)?; + Ok(DerivedEastAsianWidth { + codepoints, + east_asian_width: east_asian_width.to_string(), + }) + } +} + +#[cfg(test)] +mod tests { + use super::DerivedEastAsianWidth; + + #[test] + fn parse_single() { + let line = "00A0 ; N # Zs NO-BREAK SPACE\n"; + let row: DerivedEastAsianWidth = line.parse().unwrap(); + assert_eq!(row.codepoints, 0x00A0); + assert_eq!(row.east_asian_width, "N"); + } + + #[test] + fn parse_range() { + let line = "FF10..FF19 ; F # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE\n"; + let row: DerivedEastAsianWidth = line.parse().unwrap(); + assert_eq!(row.codepoints, (0xFF10, 0xFF19)); + assert_eq!(row.east_asian_width, "F"); + } +} diff --git a/vendor/ucd-parse/src/extracted/derived_general_category.rs b/vendor/ucd-parse/src/extracted/derived_general_category.rs new file mode 100644 index 000000000..9a9710e00 --- /dev/null +++ b/vendor/ucd-parse/src/extracted/derived_general_category.rs @@ -0,0 +1,65 @@ +use std::path::Path; +use std::str::FromStr; + +use crate::common::{ + parse_codepoint_association, CodepointIter, Codepoints, UcdFile, + UcdFileByCodepoint, +}; +use crate::error::Error; + +/// A single row in the `extracted/DerivedGeneralCategory.txt` file. +/// +/// This file gives the derived values of the General_Category property. +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct DerivedGeneralCategory { + /// The codepoint or codepoint range for this entry. + pub codepoints: Codepoints, + /// The derived General_Category of the codepoints in this entry. + pub general_category: String, +} + +impl UcdFile for DerivedGeneralCategory { + fn relative_file_path() -> &'static Path { + Path::new("extracted/DerivedGeneralCategory.txt") + } +} + +impl UcdFileByCodepoint for DerivedGeneralCategory { + fn codepoints(&self) -> CodepointIter { + self.codepoints.into_iter() + } +} + +impl FromStr for DerivedGeneralCategory { + type Err = Error; + + fn from_str(line: &str) -> Result<DerivedGeneralCategory, Error> { + let (codepoints, general_category) = + parse_codepoint_association(line)?; + Ok(DerivedGeneralCategory { + codepoints, + general_category: general_category.to_string(), + }) + } +} + +#[cfg(test)] +mod tests { + use super::DerivedGeneralCategory; + + #[test] + fn parse_single() { + let line = "04D9 ; Ll # CYRILLIC SMALL LETTER SCHWA\n"; + let row: DerivedGeneralCategory = line.parse().unwrap(); + assert_eq!(row.codepoints, 0x04D9); + assert_eq!(row.general_category, "Ll"); + } + + #[test] + fn parse_range() { + let line = "0660..0669 ; Nd # [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE"; + let row: DerivedGeneralCategory = line.parse().unwrap(); + assert_eq!(row.codepoints, (0x0660, 0x0669)); + assert_eq!(row.general_category, "Nd"); + } +} diff --git a/vendor/ucd-parse/src/extracted/derived_joining_group.rs b/vendor/ucd-parse/src/extracted/derived_joining_group.rs new file mode 100644 index 000000000..7707ac76e --- /dev/null +++ b/vendor/ucd-parse/src/extracted/derived_joining_group.rs @@ -0,0 +1,64 @@ +use std::path::Path; +use std::str::FromStr; + +use crate::common::{ + parse_codepoint_association, CodepointIter, Codepoints, UcdFile, + UcdFileByCodepoint, +}; +use crate::error::Error; + +/// A single row in the `extracted/DerivedJoiningGroup.txt` file. +/// +/// This file gives the derived values of the Joining_Group property. +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct DerivedJoiningGroup { + /// The codepoint or codepoint range for this entry. + pub codepoints: Codepoints, + /// The derived Joining_Group of the codepoints in this entry. + pub joining_group: String, +} + +impl UcdFile for DerivedJoiningGroup { + fn relative_file_path() -> &'static Path { + Path::new("extracted/DerivedJoiningGroup.txt") + } +} + +impl UcdFileByCodepoint for DerivedJoiningGroup { + fn codepoints(&self) -> CodepointIter { + self.codepoints.into_iter() + } +} + +impl FromStr for DerivedJoiningGroup { + type Err = Error; + + fn from_str(line: &str) -> Result<DerivedJoiningGroup, Error> { + let (codepoints, joining_group) = parse_codepoint_association(line)?; + Ok(DerivedJoiningGroup { + codepoints, + joining_group: joining_group.to_string(), + }) + } +} + +#[cfg(test)] +mod tests { + use super::DerivedJoiningGroup; + + #[test] + fn parse_single() { + let line = "0710 ; Alaph # Lo SYRIAC LETTER ALAPH\n"; + let row: DerivedJoiningGroup = line.parse().unwrap(); + assert_eq!(row.codepoints, 0x0710); + assert_eq!(row.joining_group, "Alaph"); + } + + #[test] + fn parse_range() { + let line = "0633..0634 ; Seen # Lo [2] ARABIC LETTER SEEN..ARABIC LETTER SHEEN\n"; + let row: DerivedJoiningGroup = line.parse().unwrap(); + assert_eq!(row.codepoints, (0x0633, 0x0634)); + assert_eq!(row.joining_group, "Seen"); + } +} diff --git a/vendor/ucd-parse/src/extracted/derived_joining_type.rs b/vendor/ucd-parse/src/extracted/derived_joining_type.rs new file mode 100644 index 000000000..82e11b895 --- /dev/null +++ b/vendor/ucd-parse/src/extracted/derived_joining_type.rs @@ -0,0 +1,64 @@ +use std::path::Path; +use std::str::FromStr; + +use crate::common::{ + parse_codepoint_association, CodepointIter, Codepoints, UcdFile, + UcdFileByCodepoint, +}; +use crate::error::Error; + +/// A single row in the `extracted/DerivedJoiningType.txt` file. +/// +/// This file gives the derived values of the Joining_Type property. +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct DerivedJoiningType { + /// The codepoint or codepoint range for this entry. + pub codepoints: Codepoints, + /// The derived Joining_Type of the codepoints in this entry. + pub joining_type: String, +} + +impl UcdFile for DerivedJoiningType { + fn relative_file_path() -> &'static Path { + Path::new("extracted/DerivedJoiningType.txt") + } +} + +impl UcdFileByCodepoint for DerivedJoiningType { + fn codepoints(&self) -> CodepointIter { + self.codepoints.into_iter() + } +} + +impl FromStr for DerivedJoiningType { + type Err = Error; + + fn from_str(line: &str) -> Result<DerivedJoiningType, Error> { + let (codepoints, joining_type) = parse_codepoint_association(line)?; + Ok(DerivedJoiningType { + codepoints, + joining_type: joining_type.to_string(), + }) + } +} + +#[cfg(test)] +mod tests { + use super::DerivedJoiningType; + + #[test] + fn parse_single() { + let line = "0628 ; D # Lo ARABIC LETTER BEH\n"; + let row: DerivedJoiningType = line.parse().unwrap(); + assert_eq!(row.codepoints, 0x0628); + assert_eq!(row.joining_type, "D"); + } + + #[test] + fn parse_range() { + let line = "1133B..1133C ; T # Mn [2] COMBINING BINDU BELOW..GRANTHA SIGN NUKTA\n"; + let row: DerivedJoiningType = line.parse().unwrap(); + assert_eq!(row.codepoints, (0x1133B, 0x1133C)); + assert_eq!(row.joining_type, "T"); + } +} diff --git a/vendor/ucd-parse/src/extracted/derived_line_break.rs b/vendor/ucd-parse/src/extracted/derived_line_break.rs new file mode 100644 index 000000000..dd1de43af --- /dev/null +++ b/vendor/ucd-parse/src/extracted/derived_line_break.rs @@ -0,0 +1,61 @@ +use std::path::Path; +use std::str::FromStr; + +use crate::common::{ + parse_codepoint_association, CodepointIter, Codepoints, UcdFile, + UcdFileByCodepoint, +}; +use crate::error::Error; + +/// A single row in the `extracted/DerivedLineBreak.txt` file. +/// +/// This file gives the derived values of the Line_Break property. +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct DerivedLineBreak { + /// The codepoint or codepoint range for this entry. + pub codepoints: Codepoints, + /// The derived Line_Break of the codepoints in this entry. + pub line_break: String, +} + +impl UcdFile for DerivedLineBreak { + fn relative_file_path() -> &'static Path { + Path::new("extracted/DerivedLineBreak.txt") + } +} + +impl UcdFileByCodepoint for DerivedLineBreak { + fn codepoints(&self) -> CodepointIter { + self.codepoints.into_iter() + } +} + +impl FromStr for DerivedLineBreak { + type Err = Error; + + fn from_str(line: &str) -> Result<DerivedLineBreak, Error> { + let (codepoints, line_break) = parse_codepoint_association(line)?; + Ok(DerivedLineBreak { codepoints, line_break: line_break.to_string() }) + } +} + +#[cfg(test)] +mod tests { + use super::DerivedLineBreak; + + #[test] + fn parse_single() { + let line = "0028 ; OP # Ps LEFT PARENTHESIS\n"; + let row: DerivedLineBreak = line.parse().unwrap(); + assert_eq!(row.codepoints, 0x0028); + assert_eq!(row.line_break, "OP"); + } + + #[test] + fn parse_range() { + let line = "0030..0039 ; NU # Nd [10] DIGIT ZERO..DIGIT NINE\n"; + let row: DerivedLineBreak = line.parse().unwrap(); + assert_eq!(row.codepoints, (0x0030, 0x0039)); + assert_eq!(row.line_break, "NU"); + } +} diff --git a/vendor/ucd-parse/src/extracted/derived_name.rs b/vendor/ucd-parse/src/extracted/derived_name.rs new file mode 100644 index 000000000..713a2afee --- /dev/null +++ b/vendor/ucd-parse/src/extracted/derived_name.rs @@ -0,0 +1,61 @@ +use std::path::Path; +use std::str::FromStr; + +use crate::common::{ + parse_codepoint_association, CodepointIter, Codepoints, UcdFile, + UcdFileByCodepoint, +}; +use crate::error::Error; + +/// A single row in the `extracted/DerivedName.txt` file. +/// +/// This file gives the derived values of the Name property. +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct DerivedName { + /// The codepoint or codepoint range for this entry. + pub codepoints: Codepoints, + /// The derived Name of the codepoints in this entry. + pub name: String, +} + +impl UcdFile for DerivedName { + fn relative_file_path() -> &'static Path { + Path::new("extracted/DerivedName.txt") + } +} + +impl UcdFileByCodepoint for DerivedName { + fn codepoints(&self) -> CodepointIter { + self.codepoints.into_iter() + } +} + +impl FromStr for DerivedName { + type Err = Error; + + fn from_str(line: &str) -> Result<DerivedName, Error> { + let (codepoints, name) = parse_codepoint_association(line)?; + Ok(DerivedName { codepoints, name: name.to_string() }) + } +} + +#[cfg(test)] +mod tests { + use super::DerivedName; + + #[test] + fn parse_single() { + let line = "0021 ; EXCLAMATION MARK\n"; + let row: DerivedName = line.parse().unwrap(); + assert_eq!(row.codepoints, 0x0021); + assert_eq!(row.name, "EXCLAMATION MARK"); + } + + #[test] + fn parse_range() { + let line = "3400..4DBF ; CJK UNIFIED IDEOGRAPH-*\n"; + let row: DerivedName = line.parse().unwrap(); + assert_eq!(row.codepoints, (0x3400, 0x4DBF)); + assert_eq!(row.name, "CJK UNIFIED IDEOGRAPH-*"); + } +} diff --git a/vendor/ucd-parse/src/extracted/derived_numeric_type.rs b/vendor/ucd-parse/src/extracted/derived_numeric_type.rs new file mode 100644 index 000000000..554b29a57 --- /dev/null +++ b/vendor/ucd-parse/src/extracted/derived_numeric_type.rs @@ -0,0 +1,65 @@ +use std::path::Path; +use std::str::FromStr; + +use crate::common::{ + parse_codepoint_association, CodepointIter, Codepoints, UcdFile, + UcdFileByCodepoint, +}; +use crate::error::Error; + +/// A single row in the `extracted/DerivedNumericType.txt` file. +/// +/// This file gives the derived values of the Numeric_Type property. +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct DerivedNumericType { + /// The codepoint or codepoint range for this entry. + pub codepoints: Codepoints, + /// The derived Numeric_Type of the codepoints in this entry. + pub numeric_type: String, +} + +impl UcdFile for DerivedNumericType { + fn relative_file_path() -> &'static Path { + Path::new("extracted/DerivedNumericType.txt") + } +} + +impl UcdFileByCodepoint for DerivedNumericType { + fn codepoints(&self) -> CodepointIter { + self.codepoints.into_iter() + } +} + +impl FromStr for DerivedNumericType { + type Err = Error; + + fn from_str(line: &str) -> Result<DerivedNumericType, Error> { + let (codepoints, numeric_type) = parse_codepoint_association(line)?; + Ok(DerivedNumericType { + codepoints, + numeric_type: numeric_type.to_string(), + }) + } +} + +#[cfg(test)] +mod tests { + use super::DerivedNumericType; + + #[test] + fn parse_single() { + let line = + "2189 ; Numeric # No VULGAR FRACTION ZERO THIRDS\n"; + let row: DerivedNumericType = line.parse().unwrap(); + assert_eq!(row.codepoints, 0x2189); + assert_eq!(row.numeric_type, "Numeric"); + } + + #[test] + fn parse_range() { + let line = "00B2..00B3 ; Digit # No [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE\n"; + let row: DerivedNumericType = line.parse().unwrap(); + assert_eq!(row.codepoints, (0x00B2, 0x00B3)); + assert_eq!(row.numeric_type, "Digit"); + } +} diff --git a/vendor/ucd-parse/src/extracted/derived_numeric_values.rs b/vendor/ucd-parse/src/extracted/derived_numeric_values.rs new file mode 100644 index 000000000..98369a12b --- /dev/null +++ b/vendor/ucd-parse/src/extracted/derived_numeric_values.rs @@ -0,0 +1,92 @@ +use std::path::Path; +use std::str::FromStr; + +use once_cell::sync::Lazy; +use regex::Regex; + +use crate::common::{CodepointIter, Codepoints, UcdFile, UcdFileByCodepoint}; +use crate::error::Error; + +/// A single row in the `extracted/DerivedNumericValues.txt` file. +/// +/// This file gives the derived values of the Numeric_Value property. +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct DerivedNumericValues { + /// The codepoint or codepoint range for this entry. + pub codepoints: Codepoints, + /// The approximate Numeric_Value of the codepoints in this entry, + /// as a decimal. + pub numeric_value_decimal: String, + /// The exact Numeric_Value of the codepoints in this entry, as + /// a fraction. + pub numeric_value_fraction: String, +} + +impl UcdFile for DerivedNumericValues { + fn relative_file_path() -> &'static Path { + Path::new("extracted/DerivedNumericValues.txt") + } +} + +impl UcdFileByCodepoint for DerivedNumericValues { + fn codepoints(&self) -> CodepointIter { + self.codepoints.into_iter() + } +} + +impl FromStr for DerivedNumericValues { + type Err = Error; + + fn from_str(line: &str) -> Result<DerivedNumericValues, Error> { + static PARTS: Lazy<Regex> = Lazy::new(|| { + Regex::new( + r"(?x) + ^ + \s*(?P<codepoints>[^\s;]+)\s*; + \s*(?P<numeric_value_decimal>[^\s;]+)\s*; + \s*; + \s*(?P<numeric_value_fraction>[^\s;]+)\s* + ", + ) + .unwrap() + }); + + let caps = match PARTS.captures(line.trim()) { + Some(caps) => caps, + None => return err!("invalid PropList line: '{}'", line), + }; + let codepoints = caps["codepoints"].parse()?; + let numeric_value_decimal = caps["numeric_value_decimal"].to_string(); + let numeric_value_fraction = + caps["numeric_value_fraction"].to_string(); + + Ok(DerivedNumericValues { + codepoints, + numeric_value_decimal, + numeric_value_fraction, + }) + } +} + +#[cfg(test)] +mod tests { + use super::DerivedNumericValues; + + #[test] + fn parse_single() { + let line = "0030 ; 0.0 ; ; 0 # Nd DIGIT ZERO\n"; + let row: DerivedNumericValues = line.parse().unwrap(); + assert_eq!(row.codepoints, 0x0030); + assert_eq!(row.numeric_value_decimal, "0.0"); + assert_eq!(row.numeric_value_fraction, "0"); + } + + #[test] + fn parse_range() { + let line = "11FC9..11FCA ; 0.0625 ; ; 1/16 # No [2] TAMIL FRACTION ONE SIXTEENTH-1..TAMIL FRACTION ONE SIXTEENTH-2\n"; + let row: DerivedNumericValues = line.parse().unwrap(); + assert_eq!(row.codepoints, (0x11FC9, 0x11FCA)); + assert_eq!(row.numeric_value_decimal, "0.0625"); + assert_eq!(row.numeric_value_fraction, "1/16"); + } +} diff --git a/vendor/ucd-parse/src/extracted/mod.rs b/vendor/ucd-parse/src/extracted/mod.rs new file mode 100644 index 000000000..d9c532cfe --- /dev/null +++ b/vendor/ucd-parse/src/extracted/mod.rs @@ -0,0 +1,30 @@ +//! Structs for parsing files in the `extracted` subdirectory. +//! +//! These are placed here, rather than at the top level, to help keep +//! the number of structs in any given module managable. + +pub use self::derived_bidi_class::DerivedBidiClass; +pub use self::derived_binary_properties::DerivedBinaryProperties; +pub use self::derived_combining_class::DerivedCombiningClass; +pub use self::derived_decomposition_type::DerivedDecompositionType; +pub use self::derived_east_asian_width::DerivedEastAsianWidth; +pub use self::derived_general_category::DerivedGeneralCategory; +pub use self::derived_joining_group::DerivedJoiningGroup; +pub use self::derived_joining_type::DerivedJoiningType; +pub use self::derived_line_break::DerivedLineBreak; +pub use self::derived_name::DerivedName; +pub use self::derived_numeric_type::DerivedNumericType; +pub use self::derived_numeric_values::DerivedNumericValues; + +mod derived_bidi_class; +mod derived_binary_properties; +mod derived_combining_class; +mod derived_decomposition_type; +mod derived_east_asian_width; +mod derived_general_category; +mod derived_joining_group; +mod derived_joining_type; +mod derived_line_break; +mod derived_name; +mod derived_numeric_type; +mod derived_numeric_values; diff --git a/vendor/ucd-parse/src/jamo_short_name.rs b/vendor/ucd-parse/src/jamo_short_name.rs index 4103dd7ee..348e941b2 100644 --- a/vendor/ucd-parse/src/jamo_short_name.rs +++ b/vendor/ucd-parse/src/jamo_short_name.rs @@ -1,7 +1,7 @@ use std::path::Path; use std::str::FromStr; -use lazy_static::lazy_static; +use once_cell::sync::Lazy; use regex::Regex; use crate::common::{Codepoint, CodepointIter, UcdFile, UcdFileByCodepoint}; @@ -35,17 +35,17 @@ impl FromStr for JamoShortName { type Err = Error; fn from_str(line: &str) -> Result<JamoShortName, Error> { - lazy_static! { - static ref PARTS: Regex = Regex::new( + static PARTS: Lazy<Regex> = Lazy::new(|| { + Regex::new( r"(?x) ^ (?P<codepoint>[A-Z0-9]+); \s* (?P<name>[A-Z]*) - " + ", ) - .unwrap(); - }; + .unwrap() + }); let caps = match PARTS.captures(line.trim()) { Some(caps) => caps, diff --git a/vendor/ucd-parse/src/lib.rs b/vendor/ucd-parse/src/lib.rs index f6654658a..f53963200 100644 --- a/vendor/ucd-parse/src/lib.rs +++ b/vendor/ucd-parse/src/lib.rs @@ -16,6 +16,7 @@ pub use crate::arabic_shaping::ArabicShaping; pub use crate::bidi_mirroring_glyph::BidiMirroring; pub use crate::case_folding::{CaseFold, CaseStatus}; pub use crate::core_properties::CoreProperty; +pub use crate::east_asian_width::EastAsianWidth; pub use crate::emoji_properties::EmojiProperty; pub use crate::grapheme_cluster_break::{ GraphemeClusterBreak, GraphemeClusterBreakTest, @@ -42,6 +43,8 @@ macro_rules! err { } } +pub mod extracted; + mod common; mod error; @@ -50,6 +53,7 @@ mod arabic_shaping; mod bidi_mirroring_glyph; mod case_folding; mod core_properties; +mod east_asian_width; mod emoji_properties; mod grapheme_cluster_break; mod jamo_short_name; diff --git a/vendor/ucd-parse/src/name_aliases.rs b/vendor/ucd-parse/src/name_aliases.rs index 36c9c4b01..8b50b9394 100644 --- a/vendor/ucd-parse/src/name_aliases.rs +++ b/vendor/ucd-parse/src/name_aliases.rs @@ -1,7 +1,7 @@ use std::path::Path; use std::str::FromStr; -use lazy_static::lazy_static; +use once_cell::sync::Lazy; use regex::Regex; use crate::common::{Codepoint, CodepointIter, UcdFile, UcdFileByCodepoint}; @@ -37,8 +37,8 @@ impl FromStr for NameAlias { type Err = Error; fn from_str(line: &str) -> Result<NameAlias, Error> { - lazy_static! { - static ref PARTS: Regex = Regex::new( + static PARTS: Lazy<Regex> = Lazy::new(|| { + Regex::new( r"(?x) ^ (?P<codepoint>[A-Z0-9]+); @@ -46,10 +46,10 @@ impl FromStr for NameAlias { (?P<alias>[^;]+); \s* (?P<label>\S+) - " + ", ) - .unwrap(); - }; + .unwrap() + }); let caps = match PARTS.captures(line.trim()) { Some(caps) => caps, diff --git a/vendor/ucd-parse/src/property_aliases.rs b/vendor/ucd-parse/src/property_aliases.rs index f94a116e6..ff5894016 100644 --- a/vendor/ucd-parse/src/property_aliases.rs +++ b/vendor/ucd-parse/src/property_aliases.rs @@ -1,7 +1,7 @@ use std::path::Path; use std::str::FromStr; -use lazy_static::lazy_static; +use once_cell::sync::Lazy; use regex::Regex; use crate::common::UcdFile; @@ -28,19 +28,20 @@ impl FromStr for PropertyAlias { type Err = Error; fn from_str(line: &str) -> Result<PropertyAlias, Error> { - lazy_static! { - static ref PARTS: Regex = Regex::new( + static PARTS: Lazy<Regex> = Lazy::new(|| { + Regex::new( r"(?x) ^ \s*(?P<abbrev>[^\s;]+)\s*; \s*(?P<long>[^\s;]+)\s* (?:;(?P<aliases>.*))? - " + ", ) - .unwrap(); - static ref ALIASES: Regex = - Regex::new(r"\s*(?P<alias>[^\s;]+)\s*;?\s*").unwrap(); - }; + .unwrap() + }); + static ALIASES: Lazy<Regex> = Lazy::new(|| { + Regex::new(r"\s*(?P<alias>[^\s;]+)\s*;?\s*").unwrap() + }); let caps = match PARTS.captures(line.trim()) { Some(caps) => caps, diff --git a/vendor/ucd-parse/src/property_value_aliases.rs b/vendor/ucd-parse/src/property_value_aliases.rs index 7e8a3c890..253afebba 100644 --- a/vendor/ucd-parse/src/property_value_aliases.rs +++ b/vendor/ucd-parse/src/property_value_aliases.rs @@ -1,7 +1,7 @@ use std::path::Path; use std::str::FromStr; -use lazy_static::lazy_static; +use once_cell::sync::Lazy; use regex::Regex; use crate::common::UcdFile; @@ -34,30 +34,33 @@ impl FromStr for PropertyValueAlias { type Err = Error; fn from_str(line: &str) -> Result<PropertyValueAlias, Error> { - lazy_static! { - static ref PARTS: Regex = Regex::new( + static PARTS: Lazy<Regex> = Lazy::new(|| { + Regex::new( r"(?x) ^ \s*(?P<prop>[^\s;]+)\s*; \s*(?P<abbrev>[^\s;]+)\s*; \s*(?P<long>[^\s;]+)\s* (?:;(?P<aliases>.*))? - " + ", ) - .unwrap(); - static ref PARTS_CCC: Regex = Regex::new( + .unwrap() + }); + static PARTS_CCC: Lazy<Regex> = Lazy::new(|| { + Regex::new( r"(?x) ^ ccc; \s*(?P<num_class>[0-9]+)\s*; \s*(?P<abbrev>[^\s;]+)\s*; \s*(?P<long>[^\s;]+) - " + ", ) - .unwrap(); - static ref ALIASES: Regex = - Regex::new(r"\s*(?P<alias>[^\s;]+)\s*;?\s*").unwrap(); - }; + .unwrap() + }); + static ALIASES: Lazy<Regex> = Lazy::new(|| { + Regex::new(r"\s*(?P<alias>[^\s;]+)\s*;?\s*").unwrap() + }); if line.starts_with("ccc;") { let caps = match PARTS_CCC.captures(line.trim()) { diff --git a/vendor/ucd-parse/src/special_casing.rs b/vendor/ucd-parse/src/special_casing.rs index a8fc61ddb..dbeff2b54 100644 --- a/vendor/ucd-parse/src/special_casing.rs +++ b/vendor/ucd-parse/src/special_casing.rs @@ -1,7 +1,7 @@ use std::path::Path; use std::str::FromStr; -use lazy_static::lazy_static; +use once_cell::sync::Lazy; use regex::Regex; use crate::common::{ @@ -46,8 +46,8 @@ impl FromStr for SpecialCaseMapping { type Err = Error; fn from_str(line: &str) -> Result<SpecialCaseMapping, Error> { - lazy_static! { - static ref PARTS: Regex = Regex::new( + static PARTS: Lazy<Regex> = Lazy::new(|| { + Regex::new( r"(?x) ^ \s*(?P<codepoint>[^\s;]+)\s*; @@ -55,10 +55,10 @@ impl FromStr for SpecialCaseMapping { \s*(?P<title>[^;]+)\s*; \s*(?P<upper>[^;]+)\s*; \s*(?P<conditions>[^;\x23]+)? - " + ", ) - .unwrap(); - }; + .unwrap() + }); let caps = match PARTS.captures(line.trim()) { Some(caps) => caps, diff --git a/vendor/ucd-parse/src/unicode_data.rs b/vendor/ucd-parse/src/unicode_data.rs index 87910cc1d..9151f60b2 100644 --- a/vendor/ucd-parse/src/unicode_data.rs +++ b/vendor/ucd-parse/src/unicode_data.rs @@ -4,7 +4,7 @@ use std::ops::Range; use std::path::Path; use std::str::FromStr; -use lazy_static::lazy_static; +use once_cell::sync::Lazy; use regex::Regex; use crate::common::{Codepoint, CodepointIter, UcdFile, UcdFileByCodepoint}; @@ -14,7 +14,7 @@ use crate::error::Error; /// /// These fields were taken from UAX44, Table 9, as part of the documentation /// for the -/// [`UnicodeData.txt` file](http://www.unicode.org/reports/tr44/#UnicodeData.txt). +/// [`UnicodeData.txt` file](https://www.unicode.org/reports/tr44/#UnicodeData.txt). #[derive(Clone, Debug, Default, Eq, PartialEq)] pub struct UnicodeData { /// The codepoint corresponding to this row. @@ -26,12 +26,12 @@ pub struct UnicodeData { /// The class of this codepoint used in the Canonical Ordering Algorithm. /// /// Note that some classes map to a particular symbol. See - /// [UAX44, Table 15](http://www.unicode.org/reports/tr44/#Canonical_Combining_Class_Values). + /// [UAX44, Table 15](https://www.unicode.org/reports/tr44/#Canonical_Combining_Class_Values). pub canonical_combining_class: u8, /// The bidirectional class of this codepoint. /// /// Possible values are listed in - /// [UAX44, Table 13](http://www.unicode.org/reports/tr44/#Bidi_Class_Values). + /// [UAX44, Table 13](https://www.unicode.org/reports/tr44/#Bidi_Class_Values). pub bidi_class: String, /// The decomposition mapping for this codepoint. This includes its /// formatting tag (if present). @@ -99,8 +99,8 @@ impl FromStr for UnicodeData { type Err = Error; fn from_str(line: &str) -> Result<UnicodeData, Error> { - lazy_static! { - static ref PARTS: Regex = Regex::new( + static PARTS: Lazy<Regex> = Lazy::new(|| { + Regex::new( r"(?x) ^ ([A-Z0-9]+); # 1; codepoint @@ -119,10 +119,10 @@ impl FromStr for UnicodeData { ([^;]*); # 14; simple lowercase mapping ([^;]*) # 15; simple titlecase mapping $ - " + ", ) - .unwrap(); - }; + .unwrap() + }); let caps = match PARTS.captures(line.trim()) { Some(caps) => caps, None => return err!("invalid UnicodeData line"), @@ -301,13 +301,12 @@ impl FromStr for UnicodeDataDecomposition { type Err = Error; fn from_str(s: &str) -> Result<UnicodeDataDecomposition, Error> { - lazy_static! { - static ref WITH_TAG: Regex = Regex::new( - r"^(?:<(?P<tag>[^>]+)>)?\s*(?P<chars>[\s0-9A-F]+)$" - ) - .unwrap(); - static ref CHARS: Regex = Regex::new(r"[0-9A-F]+").unwrap(); - }; + static WITH_TAG: Lazy<Regex> = Lazy::new(|| { + Regex::new(r"^(?:<(?P<tag>[^>]+)>)?\s*(?P<chars>[\s0-9A-F]+)$") + .unwrap() + }); + static CHARS: Lazy<Regex> = + Lazy::new(|| Regex::new(r"[0-9A-F]+").unwrap()); if s.is_empty() { return err!( "expected non-empty string for \ @@ -352,7 +351,7 @@ impl fmt::Display for UnicodeDataDecomposition { /// The formatting tag on a decomposition mapping. /// /// This is taken from -/// [UAX44, Table 14](http://www.unicode.org/reports/tr44/#Character_Decomposition_Mappings). +/// [UAX44, Table 14](https://www.unicode.org/reports/tr44/#Character_Decomposition_Mappings). #[derive(Clone, Debug, Eq, PartialEq)] pub enum UnicodeDataDecompositionTag { /// <font> |