diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
commit | 36d22d82aa202bb199967e9512281e9a53db42c9 (patch) | |
tree | 105e8c98ddea1c1e4784a60a5a6410fa416be2de /third_party/rust/unicase/src/unicode/mod.rs | |
parent | Initial commit. (diff) | |
download | firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip |
Adding upstream version 115.7.0esr.upstream/115.7.0esrupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/unicase/src/unicode/mod.rs')
-rw-r--r-- | third_party/rust/unicase/src/unicode/mod.rs | 201 |
1 files changed, 201 insertions, 0 deletions
diff --git a/third_party/rust/unicase/src/unicode/mod.rs b/third_party/rust/unicase/src/unicode/mod.rs new file mode 100644 index 0000000000..8b887331bf --- /dev/null +++ b/third_party/rust/unicase/src/unicode/mod.rs @@ -0,0 +1,201 @@ +#[cfg(__unicase__iter_cmp)] +use core::cmp::Ordering; +use core::hash::{Hash, Hasher}; + +use self::map::lookup; +mod map; + +#[derive(Clone, Copy, Debug, Default)] +pub struct Unicode<S>(pub S); + +impl<S1: AsRef<str>, S2: AsRef<str>> PartialEq<Unicode<S2>> for Unicode<S1> { + #[inline] + fn eq(&self, other: &Unicode<S2>) -> bool { + let mut left = self.0.as_ref().chars().flat_map(lookup); + let mut right = other.0.as_ref().chars().flat_map(lookup); + + // inline Iterator::eq since not added until Rust 1.5 + loop { + let x = match left.next() { + None => return right.next().is_none(), + Some(val) => val, + }; + + let y = match right.next() { + None => return false, + Some(val) => val, + }; + + if x != y { + return false; + } + } + } +} + +impl<S: AsRef<str>> Eq for Unicode<S> {} + +#[cfg(__unicase__iter_cmp)] +impl<T: AsRef<str>> PartialOrd for Unicode<T> { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option<Ordering> { + Some(self.cmp(other)) + } +} + +#[cfg(__unicase__iter_cmp)] +impl<T: AsRef<str>> Ord for Unicode<T> { + #[inline] + fn cmp(&self, other: &Self) -> Ordering { + let self_chars = self.0.as_ref().chars().flat_map(lookup); + let other_chars = other.0.as_ref().chars().flat_map(lookup); + self_chars.cmp(other_chars) + } +} + +impl<S: AsRef<str>> Hash for Unicode<S> { + #[inline] + fn hash<H: Hasher>(&self, hasher: &mut H) { + let mut buf = [0; 4]; + for c in self.0.as_ref().chars().flat_map(|c| lookup(c)) { + let len = char_to_utf8(c, &mut buf); + hasher.write(&buf[..len]) + } + } +} + +#[inline] +fn char_to_utf8(c: char, dst: &mut [u8; 4]) -> usize { + const TAG_CONT: u8 = 0b1000_0000; + const TAG_TWO_B: u8 = 0b1100_0000; + const TAG_THREE_B: u8 = 0b1110_0000; + const TAG_FOUR_B: u8 = 0b1111_0000; + + let code = c as u32; + if code <= 0x7F { + dst[0] = code as u8; + 1 + } else if code <= 0x7FF { + dst[0] = (code >> 6 & 0x1F) as u8 | TAG_TWO_B; + dst[1] = (code & 0x3F) as u8 | TAG_CONT; + 2 + } else if code <= 0xFFFF { + dst[0] = (code >> 12 & 0x0F) as u8 | TAG_THREE_B; + dst[1] = (code >> 6 & 0x3F) as u8 | TAG_CONT; + dst[2] = (code & 0x3F) as u8 | TAG_CONT; + 3 + } else { + dst[0] = (code >> 18 & 0x07) as u8 | TAG_FOUR_B; + dst[1] = (code >> 12 & 0x3F) as u8 | TAG_CONT; + dst[2] = (code >> 6 & 0x3F) as u8 | TAG_CONT; + dst[3] = (code & 0x3F) as u8 | TAG_CONT; + 4 + } +} + +// internal mod so that the enum can be 'pub' +// thanks privacy-checker :___( +mod fold { + #[derive(Clone, Copy)] + pub enum Fold { + Zero, + One(char), + Two(char, char), + Three(char, char, char), + } + + impl Iterator for Fold { + type Item = char; + #[inline] + fn next(&mut self) -> Option<char> { + match *self { + Fold::Zero => None, + Fold::One(one) => { + *self = Fold::Zero; + Some(one) + }, + Fold::Two(one, two) => { + *self = Fold::One(two); + Some(one) + }, + Fold::Three(one, two, three) => { + *self = Fold::Two(one, two); + Some(three) + } + } + } + + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + match *self { + Fold::Zero => (0, Some(0)), + Fold::One(..) => (1, Some(1)), + Fold::Two(..) => (2, Some(2)), + Fold::Three(..) => (3, Some(3)) + } + } + + } + impl From<(char,)> for Fold { + #[inline] + fn from((one,): (char,)) -> Fold { + Fold::One(one) + } + } + + impl From<(char, char)> for Fold { + #[inline] + fn from((one, two): (char, char)) -> Fold { + Fold::Two(one, two) + } + } + + impl From<(char, char, char)> for Fold { + #[inline] + fn from((one, two, three): (char, char, char)) -> Fold { + Fold::Three(one, two, three) + } + } +} + +#[cfg(test)] +mod tests { + use super::Unicode; + + macro_rules! eq { + ($left:expr, $right:expr) => ({ + assert_eq!(Unicode($left), Unicode($right)); + }); + } + + #[test] + fn test_ascii_folding() { + eq!("foo bar", "FoO BAR"); + } + + #[test] + fn test_simple_case_folding() { + eq!("στιγμας", "στιγμασ"); + } + + #[test] + fn test_full_case_folding() { + eq!("flour", "flour"); + eq!("Maße", "MASSE"); + eq!("ᾲ στο διάολο", "ὰι στο διάολο"); + } + + #[cfg(feature = "nightly")] + #[bench] + fn bench_ascii_folding(b: &mut ::test::Bencher) { + b.bytes = b"foo bar".len() as u64; + b.iter(|| eq!("foo bar", "FoO BAR")); + } + + #[cfg(feature = "nightly")] + #[bench] + fn bench_simple_case_folding(b: &mut ::test::Bencher) { + b.bytes = "στιγμας".len() as u64; + b.iter(|| eq!("στιγμας", "στιγμασ")); + } +} |