summaryrefslogtreecommitdiffstats
path: root/third_party/rust/unicase/src/unicode/mod.rs
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
commit36d22d82aa202bb199967e9512281e9a53db42c9 (patch)
tree105e8c98ddea1c1e4784a60a5a6410fa416be2de /third_party/rust/unicase/src/unicode/mod.rs
parentInitial commit. (diff)
downloadfirefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz
firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip
Adding upstream version 115.7.0esr.upstream/115.7.0esrupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/unicase/src/unicode/mod.rs')
-rw-r--r--third_party/rust/unicase/src/unicode/mod.rs201
1 files changed, 201 insertions, 0 deletions
diff --git a/third_party/rust/unicase/src/unicode/mod.rs b/third_party/rust/unicase/src/unicode/mod.rs
new file mode 100644
index 0000000000..8b887331bf
--- /dev/null
+++ b/third_party/rust/unicase/src/unicode/mod.rs
@@ -0,0 +1,201 @@
+#[cfg(__unicase__iter_cmp)]
+use core::cmp::Ordering;
+use core::hash::{Hash, Hasher};
+
+use self::map::lookup;
+mod map;
+
+#[derive(Clone, Copy, Debug, Default)]
+pub struct Unicode<S>(pub S);
+
+impl<S1: AsRef<str>, S2: AsRef<str>> PartialEq<Unicode<S2>> for Unicode<S1> {
+ #[inline]
+ fn eq(&self, other: &Unicode<S2>) -> bool {
+ let mut left = self.0.as_ref().chars().flat_map(lookup);
+ let mut right = other.0.as_ref().chars().flat_map(lookup);
+
+ // inline Iterator::eq since not added until Rust 1.5
+ loop {
+ let x = match left.next() {
+ None => return right.next().is_none(),
+ Some(val) => val,
+ };
+
+ let y = match right.next() {
+ None => return false,
+ Some(val) => val,
+ };
+
+ if x != y {
+ return false;
+ }
+ }
+ }
+}
+
+impl<S: AsRef<str>> Eq for Unicode<S> {}
+
+#[cfg(__unicase__iter_cmp)]
+impl<T: AsRef<str>> PartialOrd for Unicode<T> {
+ #[inline]
+ fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+ Some(self.cmp(other))
+ }
+}
+
+#[cfg(__unicase__iter_cmp)]
+impl<T: AsRef<str>> Ord for Unicode<T> {
+ #[inline]
+ fn cmp(&self, other: &Self) -> Ordering {
+ let self_chars = self.0.as_ref().chars().flat_map(lookup);
+ let other_chars = other.0.as_ref().chars().flat_map(lookup);
+ self_chars.cmp(other_chars)
+ }
+}
+
+impl<S: AsRef<str>> Hash for Unicode<S> {
+ #[inline]
+ fn hash<H: Hasher>(&self, hasher: &mut H) {
+ let mut buf = [0; 4];
+ for c in self.0.as_ref().chars().flat_map(|c| lookup(c)) {
+ let len = char_to_utf8(c, &mut buf);
+ hasher.write(&buf[..len])
+ }
+ }
+}
+
+#[inline]
+fn char_to_utf8(c: char, dst: &mut [u8; 4]) -> usize {
+ const TAG_CONT: u8 = 0b1000_0000;
+ const TAG_TWO_B: u8 = 0b1100_0000;
+ const TAG_THREE_B: u8 = 0b1110_0000;
+ const TAG_FOUR_B: u8 = 0b1111_0000;
+
+ let code = c as u32;
+ if code <= 0x7F {
+ dst[0] = code as u8;
+ 1
+ } else if code <= 0x7FF {
+ dst[0] = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
+ dst[1] = (code & 0x3F) as u8 | TAG_CONT;
+ 2
+ } else if code <= 0xFFFF {
+ dst[0] = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
+ dst[1] = (code >> 6 & 0x3F) as u8 | TAG_CONT;
+ dst[2] = (code & 0x3F) as u8 | TAG_CONT;
+ 3
+ } else {
+ dst[0] = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
+ dst[1] = (code >> 12 & 0x3F) as u8 | TAG_CONT;
+ dst[2] = (code >> 6 & 0x3F) as u8 | TAG_CONT;
+ dst[3] = (code & 0x3F) as u8 | TAG_CONT;
+ 4
+ }
+}
+
+// internal mod so that the enum can be 'pub'
+// thanks privacy-checker :___(
+mod fold {
+ #[derive(Clone, Copy)]
+ pub enum Fold {
+ Zero,
+ One(char),
+ Two(char, char),
+ Three(char, char, char),
+ }
+
+ impl Iterator for Fold {
+ type Item = char;
+ #[inline]
+ fn next(&mut self) -> Option<char> {
+ match *self {
+ Fold::Zero => None,
+ Fold::One(one) => {
+ *self = Fold::Zero;
+ Some(one)
+ },
+ Fold::Two(one, two) => {
+ *self = Fold::One(two);
+ Some(one)
+ },
+ Fold::Three(one, two, three) => {
+ *self = Fold::Two(one, two);
+ Some(three)
+ }
+ }
+ }
+
+ #[inline]
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ match *self {
+ Fold::Zero => (0, Some(0)),
+ Fold::One(..) => (1, Some(1)),
+ Fold::Two(..) => (2, Some(2)),
+ Fold::Three(..) => (3, Some(3))
+ }
+ }
+
+ }
+ impl From<(char,)> for Fold {
+ #[inline]
+ fn from((one,): (char,)) -> Fold {
+ Fold::One(one)
+ }
+ }
+
+ impl From<(char, char)> for Fold {
+ #[inline]
+ fn from((one, two): (char, char)) -> Fold {
+ Fold::Two(one, two)
+ }
+ }
+
+ impl From<(char, char, char)> for Fold {
+ #[inline]
+ fn from((one, two, three): (char, char, char)) -> Fold {
+ Fold::Three(one, two, three)
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::Unicode;
+
+ macro_rules! eq {
+ ($left:expr, $right:expr) => ({
+ assert_eq!(Unicode($left), Unicode($right));
+ });
+ }
+
+ #[test]
+ fn test_ascii_folding() {
+ eq!("foo bar", "FoO BAR");
+ }
+
+ #[test]
+ fn test_simple_case_folding() {
+ eq!("στιγμας", "στιγμασ");
+ }
+
+ #[test]
+ fn test_full_case_folding() {
+ eq!("flour", "flour");
+ eq!("Maße", "MASSE");
+ eq!("ᾲ στο διάολο", "ὰι στο διάολο");
+ }
+
+ #[cfg(feature = "nightly")]
+ #[bench]
+ fn bench_ascii_folding(b: &mut ::test::Bencher) {
+ b.bytes = b"foo bar".len() as u64;
+ b.iter(|| eq!("foo bar", "FoO BAR"));
+ }
+
+ #[cfg(feature = "nightly")]
+ #[bench]
+ fn bench_simple_case_folding(b: &mut ::test::Bencher) {
+ b.bytes = "στιγμας".len() as u64;
+ b.iter(|| eq!("στιγμας", "στιγμασ"));
+ }
+}