diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
commit | 26a029d407be480d791972afb5975cf62c9360a6 (patch) | |
tree | f435a8308119effd964b339f76abb83a57c29483 /third_party/rust/icu_locid/src/helpers.rs | |
parent | Initial commit. (diff) | |
download | firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip |
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/icu_locid/src/helpers.rs')
-rw-r--r-- | third_party/rust/icu_locid/src/helpers.rs | 698 |
1 files changed, 698 insertions, 0 deletions
diff --git a/third_party/rust/icu_locid/src/helpers.rs b/third_party/rust/icu_locid/src/helpers.rs new file mode 100644 index 0000000000..d12435fbf3 --- /dev/null +++ b/third_party/rust/icu_locid/src/helpers.rs @@ -0,0 +1,698 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use core::iter::FromIterator; + +use alloc::boxed::Box; +use alloc::vec; +use alloc::vec::Vec; +use core::ops::{Deref, DerefMut}; +use litemap::store::*; + +/// Internal: A vector that supports no-allocation, constant values if length 0 or 1. +/// Using ZeroOne(Option<T>) saves 8 bytes in ShortSlice via niche optimization. +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub(crate) enum ShortSlice<T> { + ZeroOne(Option<T>), + Multi(Box<[T]>), +} + +impl<T> ShortSlice<T> { + #[inline] + pub const fn new() -> Self { + Self::ZeroOne(None) + } + + #[inline] + pub const fn new_single(item: T) -> Self { + Self::ZeroOne(Some(item)) + } + + pub fn push(&mut self, item: T) { + *self = match core::mem::replace(self, Self::ZeroOne(None)) { + ShortSlice::ZeroOne(None) => ShortSlice::ZeroOne(Some(item)), + ShortSlice::ZeroOne(Some(prev_item)) => { + ShortSlice::Multi(vec![prev_item, item].into_boxed_slice()) + } + ShortSlice::Multi(items) => { + let mut items = items.into_vec(); + items.push(item); + ShortSlice::Multi(items.into_boxed_slice()) + } + }; + } + + #[inline] + pub const fn single(&self) -> Option<&T> { + match self { + ShortSlice::ZeroOne(Some(v)) => Some(v), + _ => None, + } + } + + #[inline] + pub fn len(&self) -> usize { + match self { + ShortSlice::ZeroOne(None) => 0, + ShortSlice::ZeroOne(_) => 1, + ShortSlice::Multi(ref v) => v.len(), + } + } + + pub fn insert(&mut self, index: usize, elt: T) { + assert!( + index <= self.len(), + "insertion index (is {}) should be <= len (is {})", + index, + self.len() + ); + + *self = match core::mem::replace(self, ShortSlice::ZeroOne(None)) { + ShortSlice::ZeroOne(None) => ShortSlice::ZeroOne(Some(elt)), + ShortSlice::ZeroOne(Some(item)) => { + let items = if index == 0 { + vec![elt, item].into_boxed_slice() + } else { + vec![item, elt].into_boxed_slice() + }; + ShortSlice::Multi(items) + } + ShortSlice::Multi(items) => { + let mut items = items.into_vec(); + items.insert(index, elt); + ShortSlice::Multi(items.into_boxed_slice()) + } + } + } + + pub fn remove(&mut self, index: usize) -> T { + assert!( + index < self.len(), + "removal index (is {}) should be < len (is {})", + index, + self.len() + ); + + let (replaced, removed_item) = match core::mem::replace(self, ShortSlice::ZeroOne(None)) { + ShortSlice::ZeroOne(None) => unreachable!(), + ShortSlice::ZeroOne(Some(v)) => (ShortSlice::ZeroOne(None), v), + ShortSlice::Multi(v) => { + let mut v = v.into_vec(); + let removed_item = v.remove(index); + match v.len() { + #[allow(clippy::unwrap_used)] + // we know that the vec has exactly one element left + 1 => (ShortSlice::ZeroOne(Some(v.pop().unwrap())), removed_item), + // v has at least 2 elements, create a Multi variant + _ => (ShortSlice::Multi(v.into_boxed_slice()), removed_item), + } + } + }; + *self = replaced; + removed_item + } + + #[inline] + pub fn clear(&mut self) { + let _ = core::mem::replace(self, ShortSlice::ZeroOne(None)); + } + + pub fn retain<F>(&mut self, mut f: F) + where + F: FnMut(&T) -> bool, + { + *self = match core::mem::take(self) { + Self::ZeroOne(Some(one)) if f(&one) => Self::ZeroOne(Some(one)), + Self::ZeroOne(_) => Self::ZeroOne(None), + Self::Multi(slice) => { + let mut vec = slice.into_vec(); + vec.retain(f); + Self::from(vec) + } + }; + } +} + +impl<T> Deref for ShortSlice<T> { + type Target = [T]; + + fn deref(&self) -> &Self::Target { + match self { + ShortSlice::ZeroOne(None) => &[], + ShortSlice::ZeroOne(Some(v)) => core::slice::from_ref(v), + ShortSlice::Multi(v) => v, + } + } +} + +impl<T> DerefMut for ShortSlice<T> { + fn deref_mut(&mut self) -> &mut Self::Target { + match self { + ShortSlice::ZeroOne(None) => &mut [], + ShortSlice::ZeroOne(Some(v)) => core::slice::from_mut(v), + ShortSlice::Multi(v) => v, + } + } +} + +impl<T> From<Vec<T>> for ShortSlice<T> { + fn from(v: Vec<T>) -> Self { + match v.len() { + 0 => ShortSlice::ZeroOne(None), + #[allow(clippy::unwrap_used)] // we know that the vec is not empty + 1 => ShortSlice::ZeroOne(Some(v.into_iter().next().unwrap())), + _ => ShortSlice::Multi(v.into_boxed_slice()), + } + } +} + +impl<T> Default for ShortSlice<T> { + fn default() -> Self { + ShortSlice::ZeroOne(None) + } +} + +impl<T> FromIterator<T> for ShortSlice<T> { + fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self { + let mut iter = iter.into_iter(); + match (iter.next(), iter.next()) { + (Some(first), Some(second)) => { + // Size hint behaviour same as `Vec::extend` + 2 + let mut vec = Vec::with_capacity(iter.size_hint().0.saturating_add(3)); + vec.push(first); + vec.push(second); + vec.extend(iter); + Self::Multi(vec.into_boxed_slice()) + } + (first, _) => Self::ZeroOne(first), + } + } +} + +impl<K, V> StoreConstEmpty<K, V> for ShortSlice<(K, V)> { + const EMPTY: ShortSlice<(K, V)> = ShortSlice::ZeroOne(None); +} + +impl<K, V> Store<K, V> for ShortSlice<(K, V)> { + #[inline] + fn lm_len(&self) -> usize { + self.len() + } + + #[inline] + fn lm_is_empty(&self) -> bool { + matches!(self, ShortSlice::ZeroOne(None)) + } + + #[inline] + fn lm_get(&self, index: usize) -> Option<(&K, &V)> { + self.get(index).map(|elt| (&elt.0, &elt.1)) + } + + #[inline] + fn lm_last(&self) -> Option<(&K, &V)> { + match self { + ShortSlice::ZeroOne(v) => v.as_ref(), + ShortSlice::Multi(v) => v.last(), + } + .map(|elt| (&elt.0, &elt.1)) + } + + #[inline] + fn lm_binary_search_by<F>(&self, mut cmp: F) -> Result<usize, usize> + where + F: FnMut(&K) -> core::cmp::Ordering, + { + self.binary_search_by(|(k, _)| cmp(k)) + } +} + +impl<K: Ord, V> StoreFromIterable<K, V> for ShortSlice<(K, V)> { + fn lm_sort_from_iter<I: IntoIterator<Item = (K, V)>>(iter: I) -> Self { + let v: Vec<(K, V)> = Vec::lm_sort_from_iter(iter); + v.into() + } +} + +impl<K, V> StoreMut<K, V> for ShortSlice<(K, V)> { + fn lm_with_capacity(_capacity: usize) -> Self { + ShortSlice::ZeroOne(None) + } + + fn lm_reserve(&mut self, _additional: usize) {} + + fn lm_get_mut(&mut self, index: usize) -> Option<(&K, &mut V)> { + self.get_mut(index).map(|elt| (&elt.0, &mut elt.1)) + } + + fn lm_push(&mut self, key: K, value: V) { + self.push((key, value)) + } + + fn lm_insert(&mut self, index: usize, key: K, value: V) { + self.insert(index, (key, value)) + } + + fn lm_remove(&mut self, index: usize) -> (K, V) { + self.remove(index) + } + + fn lm_clear(&mut self) { + self.clear(); + } + + fn lm_retain<F>(&mut self, mut predicate: F) + where + F: FnMut(&K, &V) -> bool, + { + self.retain(|(k, v)| predicate(k, v)) + } +} + +impl<'a, K: 'a, V: 'a> StoreIterable<'a, K, V> for ShortSlice<(K, V)> { + type KeyValueIter = + core::iter::Map<core::slice::Iter<'a, (K, V)>, for<'r> fn(&'r (K, V)) -> (&'r K, &'r V)>; + + fn lm_iter(&'a self) -> Self::KeyValueIter { + self.iter().map(|elt| (&elt.0, &elt.1)) + } +} + +impl<K, V> StoreFromIterator<K, V> for ShortSlice<(K, V)> {} + +#[test] +fn test_short_slice_impl() { + litemap::testing::check_store::<ShortSlice<(u32, u64)>>(); +} + +macro_rules! impl_tinystr_subtag { + ( + $(#[$doc:meta])* + $name:ident, + $($path:ident)::+, + $macro_name:ident, + $legacy_macro_name:ident, + $len_start:literal..=$len_end:literal, + $tinystr_ident:ident, + $validate:expr, + $normalize:expr, + $is_normalized:expr, + $error:ident, + [$good_example:literal $(,$more_good_examples:literal)*], + [$bad_example:literal $(, $more_bad_examples:literal)*], + ) => { + #[derive(Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord, Copy)] + #[cfg_attr(feature = "serde", derive(serde::Serialize))] + #[repr(transparent)] + $(#[$doc])* + pub struct $name(tinystr::TinyAsciiStr<$len_end>); + + impl $name { + /// A constructor which takes a UTF-8 slice, parses it and + #[doc = concat!("produces a well-formed [`", stringify!($name), "`].")] + /// + /// # Examples + /// + /// ``` + #[doc = concat!("use icu_locid::", stringify!($($path::)+), stringify!($name), ";")] + /// + #[doc = concat!("assert!(", stringify!($name), "::try_from_bytes(b", stringify!($good_example), ").is_ok());")] + #[doc = concat!("assert!(", stringify!($name), "::try_from_bytes(b", stringify!($bad_example), ").is_err());")] + /// ``` + pub const fn try_from_bytes(v: &[u8]) -> Result<Self, crate::parser::errors::ParserError> { + Self::try_from_bytes_manual_slice(v, 0, v.len()) + } + + /// Equivalent to [`try_from_bytes(bytes[start..end])`](Self::try_from_bytes), + /// but callable in a `const` context (which range indexing is not). + pub const fn try_from_bytes_manual_slice( + v: &[u8], + start: usize, + end: usize, + ) -> Result<Self, crate::parser::errors::ParserError> { + let slen = end - start; + + #[allow(clippy::double_comparisons)] // if len_start == len_end + if slen < $len_start || slen > $len_end { + return Err(crate::parser::errors::ParserError::$error); + } + + match tinystr::TinyAsciiStr::from_bytes_manual_slice(v, start, end) { + Ok($tinystr_ident) if $validate => Ok(Self($normalize)), + _ => Err(crate::parser::errors::ParserError::$error), + } + } + + #[doc = concat!("Safely creates a [`", stringify!($name), "`] from its raw format")] + /// as returned by [`Self::into_raw`]. Unlike [`Self::try_from_bytes`], + /// this constructor only takes normalized values. + pub const fn try_from_raw( + v: [u8; $len_end], + ) -> Result<Self, crate::parser::errors::ParserError> { + if let Ok($tinystr_ident) = tinystr::TinyAsciiStr::<$len_end>::try_from_raw(v) { + if $tinystr_ident.len() >= $len_start && $is_normalized { + Ok(Self($tinystr_ident)) + } else { + Err(crate::parser::errors::ParserError::$error) + } + } else { + Err(crate::parser::errors::ParserError::$error) + } + } + + #[doc = concat!("Unsafely creates a [`", stringify!($name), "`] from its raw format")] + /// as returned by [`Self::into_raw`]. Unlike [`Self::try_from_bytes`], + /// this constructor only takes normalized values. + /// + /// # Safety + /// + /// This function is safe iff [`Self::try_from_raw`] returns an `Ok`. This is the case + /// for inputs that are correctly normalized. + pub const unsafe fn from_raw_unchecked(v: [u8; $len_end]) -> Self { + Self(tinystr::TinyAsciiStr::from_bytes_unchecked(v)) + } + + /// Deconstructs into a raw format to be consumed by + /// [`from_raw_unchecked`](Self::from_raw_unchecked()) or + /// [`try_from_raw`](Self::try_from_raw()). + pub const fn into_raw(self) -> [u8; $len_end] { + *self.0.all_bytes() + } + + #[inline] + /// A helper function for displaying as a `&str`. + pub const fn as_str(&self) -> &str { + self.0.as_str() + } + + #[doc(hidden)] + pub const fn into_tinystr(&self) -> tinystr::TinyAsciiStr<$len_end> { + self.0 + } + + /// Compare with BCP-47 bytes. + /// + /// The return value is equivalent to what would happen if you first converted + /// `self` to a BCP-47 string and then performed a byte comparison. + /// + /// This function is case-sensitive and results in a *total order*, so it is appropriate for + /// binary search. The only argument producing [`Ordering::Equal`](core::cmp::Ordering::Equal) + /// is `self.as_str().as_bytes()`. + #[inline] + pub fn strict_cmp(self, other: &[u8]) -> core::cmp::Ordering { + self.as_str().as_bytes().cmp(other) + } + + /// Compare with a potentially unnormalized BCP-47 string. + /// + /// The return value is equivalent to what would happen if you first parsed the + /// BCP-47 string and then performed a structural comparison. + /// + #[inline] + pub fn normalizing_eq(self, other: &str) -> bool { + self.as_str().eq_ignore_ascii_case(other) + } + } + + impl core::str::FromStr for $name { + type Err = crate::parser::errors::ParserError; + + fn from_str(source: &str) -> Result<Self, Self::Err> { + Self::try_from_bytes(source.as_bytes()) + } + } + + impl<'l> From<&'l $name> for &'l str { + fn from(input: &'l $name) -> Self { + input.as_str() + } + } + + impl From<$name> for tinystr::TinyAsciiStr<$len_end> { + fn from(input: $name) -> Self { + input.into_tinystr() + } + } + + impl writeable::Writeable for $name { + #[inline] + fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result { + sink.write_str(self.as_str()) + } + #[inline] + fn writeable_length_hint(&self) -> writeable::LengthHint { + writeable::LengthHint::exact(self.0.len()) + } + #[inline] + fn write_to_string(&self) -> alloc::borrow::Cow<str> { + alloc::borrow::Cow::Borrowed(self.0.as_str()) + } + } + + writeable::impl_display_with_writeable!($name); + + #[doc = concat!("A macro allowing for compile-time construction of valid [`", stringify!($name), "`] subtags.")] + /// + /// # Examples + /// + /// Parsing errors don't have to be handled at runtime: + /// ``` + /// assert_eq!( + #[doc = concat!(" icu_locid::", $(stringify!($path), "::",)+ stringify!($macro_name), "!(", stringify!($good_example) ,"),")] + #[doc = concat!(" ", stringify!($good_example), ".parse::<icu_locid::", $(stringify!($path), "::",)+ stringify!($name), ">().unwrap()")] + /// ); + /// ``` + /// + /// Invalid input is a compile failure: + /// ```compile_fail,E0080 + #[doc = concat!("icu_locid::", $(stringify!($path), "::",)+ stringify!($macro_name), "!(", stringify!($bad_example) ,");")] + /// ``` + /// + #[doc = concat!("[`", stringify!($name), "`]: crate::", $(stringify!($path), "::",)+ stringify!($name))] + #[macro_export] + #[doc(hidden)] + macro_rules! $legacy_macro_name { + ($string:literal) => {{ + use $crate::$($path ::)+ $name; + const R: $name = + match $name::try_from_bytes($string.as_bytes()) { + Ok(r) => r, + #[allow(clippy::panic)] // const context + _ => panic!(concat!("Invalid ", $(stringify!($path), "::",)+ stringify!($name), ": ", $string)), + }; + R + }}; + } + #[doc(inline)] + pub use $legacy_macro_name as $macro_name; + + #[cfg(feature = "databake")] + impl databake::Bake for $name { + fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream { + env.insert("icu_locid"); + let string = self.as_str(); + databake::quote! { icu_locid::$($path::)+ $macro_name!(#string) } + } + } + + #[test] + fn test_construction() { + let maybe = $name::try_from_bytes($good_example.as_bytes()); + assert!(maybe.is_ok()); + assert_eq!(maybe, $name::try_from_raw(maybe.unwrap().into_raw())); + assert_eq!(maybe.unwrap().as_str(), $good_example); + $( + let maybe = $name::try_from_bytes($more_good_examples.as_bytes()); + assert!(maybe.is_ok()); + assert_eq!(maybe, $name::try_from_raw(maybe.unwrap().into_raw())); + assert_eq!(maybe.unwrap().as_str(), $more_good_examples); + )* + assert!($name::try_from_bytes($bad_example.as_bytes()).is_err()); + $( + assert!($name::try_from_bytes($more_bad_examples.as_bytes()).is_err()); + )* + } + + #[test] + fn test_writeable() { + writeable::assert_writeable_eq!(&$good_example.parse::<$name>().unwrap(), $good_example); + $( + writeable::assert_writeable_eq!($more_good_examples.parse::<$name>().unwrap(), $more_good_examples); + )* + } + + #[cfg(feature = "serde")] + impl<'de> serde::Deserialize<'de> for $name { + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: serde::de::Deserializer<'de>, + { + struct Visitor; + + impl<'de> serde::de::Visitor<'de> for Visitor { + type Value = $name; + + fn expecting( + &self, + formatter: &mut core::fmt::Formatter<'_>, + ) -> core::fmt::Result { + write!(formatter, "a valid BCP-47 {}", stringify!($name)) + } + + fn visit_str<E: serde::de::Error>(self, s: &str) -> Result<Self::Value, E> { + s.parse().map_err(serde::de::Error::custom) + } + } + + if deserializer.is_human_readable() { + deserializer.deserialize_string(Visitor) + } else { + Self::try_from_raw(serde::de::Deserialize::deserialize(deserializer)?) + .map_err(serde::de::Error::custom) + } + } + } + + // Safety checklist for ULE: + // + // 1. Must not include any uninitialized or padding bytes (true since transparent over a ULE). + // 2. Must have an alignment of 1 byte (true since transparent over a ULE). + // 3. ULE::validate_byte_slice() checks that the given byte slice represents a valid slice. + // 4. ULE::validate_byte_slice() checks that the given byte slice has a valid length. + // 5. All other methods must be left with their default impl. + // 6. Byte equality is semantic equality. + #[cfg(feature = "zerovec")] + unsafe impl zerovec::ule::ULE for $name { + fn validate_byte_slice(bytes: &[u8]) -> Result<(), zerovec::ZeroVecError> { + let it = bytes.chunks_exact(core::mem::size_of::<Self>()); + if !it.remainder().is_empty() { + return Err(zerovec::ZeroVecError::length::<Self>(bytes.len())); + } + for v in it { + // The following can be removed once `array_chunks` is stabilized. + let mut a = [0; core::mem::size_of::<Self>()]; + a.copy_from_slice(v); + if Self::try_from_raw(a).is_err() { + return Err(zerovec::ZeroVecError::parse::<Self>()); + } + } + Ok(()) + } + } + + #[cfg(feature = "zerovec")] + impl zerovec::ule::AsULE for $name { + type ULE = Self; + fn to_unaligned(self) -> Self::ULE { + self + } + fn from_unaligned(unaligned: Self::ULE) -> Self { + unaligned + } + } + + #[cfg(feature = "zerovec")] + impl<'a> zerovec::maps::ZeroMapKV<'a> for $name { + type Container = zerovec::ZeroVec<'a, $name>; + type Slice = zerovec::ZeroSlice<$name>; + type GetType = $name; + type OwnedType = $name; + } + }; +} + +macro_rules! impl_writeable_for_each_subtag_str_no_test { + ($type:tt $(, $self:ident, $borrow_cond:expr => $borrow:expr)?) => { + impl writeable::Writeable for $type { + fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result { + let mut initial = true; + self.for_each_subtag_str(&mut |subtag| { + if initial { + initial = false; + } else { + sink.write_char('-')?; + } + sink.write_str(subtag) + }) + } + + #[inline] + fn writeable_length_hint(&self) -> writeable::LengthHint { + let mut result = writeable::LengthHint::exact(0); + let mut initial = true; + self.for_each_subtag_str::<core::convert::Infallible, _>(&mut |subtag| { + if initial { + initial = false; + } else { + result += 1; + } + result += subtag.len(); + Ok(()) + }) + .expect("infallible"); + result + } + + $( + fn write_to_string(&self) -> alloc::borrow::Cow<str> { + #[allow(clippy::unwrap_used)] // impl_writeable_for_subtag_list's $borrow uses unwrap + let $self = self; + if $borrow_cond { + $borrow + } else { + let mut output = alloc::string::String::with_capacity(self.writeable_length_hint().capacity()); + let _ = self.write_to(&mut output); + alloc::borrow::Cow::Owned(output) + } + } + )? + } + + writeable::impl_display_with_writeable!($type); + }; +} + +macro_rules! impl_writeable_for_subtag_list { + ($type:tt, $sample1:literal, $sample2:literal) => { + impl_writeable_for_each_subtag_str_no_test!($type, selff, selff.0.len() == 1 => alloc::borrow::Cow::Borrowed(selff.0.get(0).unwrap().as_str())); + + #[test] + fn test_writeable() { + writeable::assert_writeable_eq!(&$type::default(), ""); + writeable::assert_writeable_eq!( + &$type::from_short_slice_unchecked(alloc::vec![$sample1.parse().unwrap()].into()), + $sample1, + ); + writeable::assert_writeable_eq!( + &$type::from_short_slice_unchecked(vec![ + $sample1.parse().unwrap(), + $sample2.parse().unwrap() + ].into()), + core::concat!($sample1, "-", $sample2), + ); + } + }; +} + +macro_rules! impl_writeable_for_key_value { + ($type:tt, $key1:literal, $value1:literal, $key2:literal, $expected2:literal) => { + impl_writeable_for_each_subtag_str_no_test!($type); + + #[test] + fn test_writeable() { + writeable::assert_writeable_eq!(&$type::default(), ""); + writeable::assert_writeable_eq!( + &$type::from_tuple_vec(vec![($key1.parse().unwrap(), $value1.parse().unwrap())]), + core::concat!($key1, "-", $value1), + ); + writeable::assert_writeable_eq!( + &$type::from_tuple_vec(vec![ + ($key1.parse().unwrap(), $value1.parse().unwrap()), + ($key2.parse().unwrap(), "true".parse().unwrap()) + ]), + core::concat!($key1, "-", $value1, "-", $expected2), + ); + } + }; +} |