diff options
Diffstat (limited to 'vendor/zerovec/src/varzerovec/slice.rs')
-rw-r--r-- | vendor/zerovec/src/varzerovec/slice.rs | 549 |
1 files changed, 549 insertions, 0 deletions
diff --git a/vendor/zerovec/src/varzerovec/slice.rs b/vendor/zerovec/src/varzerovec/slice.rs new file mode 100644 index 000000000..59e8da03f --- /dev/null +++ b/vendor/zerovec/src/varzerovec/slice.rs @@ -0,0 +1,549 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use super::components::VarZeroVecComponents; +use super::*; +use crate::ule::*; +use alloc::boxed::Box; +use alloc::vec::Vec; +use core::cmp::{Ord, Ordering, PartialOrd}; +use core::fmt; +use core::marker::PhantomData; +use core::mem; + +use core::ops::Index; +use core::ops::Range; + +/// A zero-copy "slice", that works for unsized types, i.e. the zero-copy version of `[T]` +/// where `T` is not `Sized`. +/// +/// This behaves similarly to [`VarZeroVec<T>`], however [`VarZeroVec<T>`] is allowed to contain +/// owned data and as such is ideal for deserialization since most human readable +/// serialization formats cannot unconditionally deserialize zero-copy. +/// +/// This type can be used inside [`VarZeroVec<T>`](crate::VarZeroVec) and [`ZeroMap`](crate::ZeroMap): +/// This essentially allows for the construction of zero-copy types isomorphic to `Vec<Vec<T>>` by instead +/// using `VarZeroVec<ZeroSlice<T>>`. +/// +/// The `F` type parameter is a [`VarZeroVecFormat`] (see its docs for more details), which can be used to select the +/// precise format of the backing buffer with various size and performance tradeoffs. It defaults to [`Index16`]. +/// +/// This type can be nested within itself to allow for multi-level nested `Vec`s, for +/// example the following code constructs the conceptual zero-copy equivalent of `Vec<Vec<Vec<str>>>` +/// +/// ```rust +/// use zerovec::ule::*; +/// use zerovec::{VarZeroSlice, VarZeroVec, ZeroVec}; +/// let strings_1: Vec<&str> = vec!["foo", "bar", "baz"]; +/// let strings_2: Vec<&str> = vec!["twelve", "seventeen", "forty two"]; +/// let strings_3: Vec<&str> = vec!["我", "喜歡", "烏龍茶"]; +/// let strings_4: Vec<&str> = vec!["w", "ω", "文", "𑄃"]; +/// let strings_12 = vec![&*strings_1, &*strings_2]; +/// let strings_34 = vec![&*strings_3, &*strings_4]; +/// let all_strings = vec![strings_12, strings_34]; +/// +/// let vzv_1: VarZeroVec<str> = VarZeroVec::from(&strings_1); +/// let vzv_2: VarZeroVec<str> = VarZeroVec::from(&strings_2); +/// let vzv_3: VarZeroVec<str> = VarZeroVec::from(&strings_3); +/// let vzv_4: VarZeroVec<str> = VarZeroVec::from(&strings_4); +/// let vzv_12 = VarZeroVec::from(&[vzv_1.as_slice(), vzv_2.as_slice()]); +/// let vzv_34 = VarZeroVec::from(&[vzv_3.as_slice(), vzv_4.as_slice()]); +/// let vzv_all = VarZeroVec::from(&[vzv_12.as_slice(), vzv_34.as_slice()]); +/// +/// let reconstructed: Vec<Vec<Vec<String>>> = vzv_all +/// .iter() +/// .map(|v: &VarZeroSlice<VarZeroSlice<str>>| { +/// v.iter() +/// .map(|x: &VarZeroSlice<_>| { +/// x.as_varzerovec() +/// .iter() +/// .map(|s| s.to_owned()) +/// .collect::<Vec<String>>() +/// }) +/// .collect::<Vec<_>>() +/// }) +/// .collect::<Vec<_>>(); +/// assert_eq!(reconstructed, all_strings); +/// +/// let bytes = vzv_all.as_bytes(); +/// let vzv_from_bytes: VarZeroVec<VarZeroSlice<VarZeroSlice<str>>> = +/// VarZeroVec::parse_byte_slice(bytes).unwrap(); +/// assert_eq!(vzv_from_bytes, vzv_all); +/// ``` +// +// safety invariant: The slice MUST be one which parses to +// a valid VarZeroVecComponents<T> +#[repr(transparent)] +pub struct VarZeroSlice<T: ?Sized, F = Index16> { + marker: PhantomData<(F, T)>, + /// The original slice this was constructed from + entire_slice: [u8], +} + +impl<T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroSlice<T, F> { + /// Construct a new empty VarZeroSlice + pub const fn new_empty() -> &'static Self { + let arr: &[u8] = &[]; + unsafe { mem::transmute(arr) } + } + + /// Obtain a [`VarZeroVecComponents`] borrowing from the internal buffer + #[inline] + pub(crate) fn as_components<'a>(&'a self) -> VarZeroVecComponents<'a, T, F> { + unsafe { + // safety: VarZeroSlice is guaranteed to parse here + VarZeroVecComponents::from_bytes_unchecked(&self.entire_slice) + } + } + + /// Uses a `&[u8]` buffer as a `VarZeroSlice<T>` without any verification. + /// + /// # Safety + /// + /// `bytes` need to be an output from [`VarZeroSlice::as_bytes()`]. + pub const unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { + // self is really just a wrapper around a byte slice + mem::transmute(bytes) + } + + /// Get the number of elements in this slice + /// + /// # Example + /// + /// ```rust + /// # use std::str::Utf8Error; + /// # use zerovec::ule::ZeroVecError; + /// # use zerovec::VarZeroVec; + /// + /// let strings = vec!["foo", "bar", "baz", "quux"]; + /// let vec = VarZeroVec::<str>::from(&strings); + /// + /// assert_eq!(vec.len(), 4); + /// # Ok::<(), ZeroVecError>(()) + /// ``` + pub fn len(&self) -> usize { + self.as_components().len() + } + + /// Returns `true` if the slice contains no elements. + /// + /// # Examples + /// + /// ``` + /// # use std::str::Utf8Error; + /// # use zerovec::ule::ZeroVecError; + /// # use zerovec::VarZeroVec; + /// + /// let strings: Vec<String> = vec![]; + /// let vec = VarZeroVec::<str>::from(&strings); + /// + /// assert!(vec.is_empty()); + /// # Ok::<(), ZeroVecError>(()) + /// ``` + pub fn is_empty(&self) -> bool { + self.as_components().is_empty() + } + + /// Obtain an iterator over this slice's elements + /// + /// # Example + /// + /// ```rust + /// # use std::str::Utf8Error; + /// # use zerovec::ule::ZeroVecError; + /// # use zerovec::VarZeroVec; + /// + /// let strings = vec!["foo", "bar", "baz", "quux"]; + /// let vec = VarZeroVec::<str>::from(&strings); + /// + /// let mut iter_results: Vec<&str> = vec.iter().collect(); + /// assert_eq!(iter_results[0], "foo"); + /// assert_eq!(iter_results[1], "bar"); + /// assert_eq!(iter_results[2], "baz"); + /// assert_eq!(iter_results[3], "quux"); + /// # Ok::<(), ZeroVecError>(()) + /// ``` + pub fn iter<'b>(&'b self) -> impl Iterator<Item = &'b T> { + self.as_components().iter() + } + + /// Get one of this slice's elements, returning None if the index is out of bounds + /// + /// # Example + /// + /// ```rust + /// # use std::str::Utf8Error; + /// # use zerovec::ule::ZeroVecError; + /// # use zerovec::VarZeroVec; + /// + /// let strings = vec!["foo", "bar", "baz", "quux"]; + /// let vec = VarZeroVec::<str>::from(&strings); + /// + /// let mut iter_results: Vec<&str> = vec.iter().collect(); + /// assert_eq!(vec.get(0), Some("foo")); + /// assert_eq!(vec.get(1), Some("bar")); + /// assert_eq!(vec.get(2), Some("baz")); + /// assert_eq!(vec.get(3), Some("quux")); + /// assert_eq!(vec.get(4), None); + /// # Ok::<(), ZeroVecError>(()) + /// ``` + pub fn get(&self, idx: usize) -> Option<&T> { + self.as_components().get(idx) + } + + /// Get one of this slice's elements + /// + /// # Safety + /// + /// `index` must be in range + /// + /// # Example + /// + /// ```rust + /// # use std::str::Utf8Error; + /// # use zerovec::ule::ZeroVecError; + /// # use zerovec::VarZeroVec; + /// + /// let strings = vec!["foo", "bar", "baz", "quux"]; + /// let vec = VarZeroVec::<str>::from(&strings); + /// + /// let mut iter_results: Vec<&str> = vec.iter().collect(); + /// unsafe { + /// assert_eq!(vec.get_unchecked(0), "foo"); + /// assert_eq!(vec.get_unchecked(1), "bar"); + /// assert_eq!(vec.get_unchecked(2), "baz"); + /// assert_eq!(vec.get_unchecked(3), "quux"); + /// } + /// # Ok::<(), ZeroVecError>(()) + /// ``` + pub unsafe fn get_unchecked(&self, idx: usize) -> &T { + self.as_components().get_unchecked(idx) + } + + /// Obtain an owned `Vec<Box<T>>` out of this + pub fn to_vec(&self) -> Vec<Box<T>> { + self.as_components().to_vec() + } + + /// Get a reference to the entire encoded backing buffer of this slice + /// + /// The bytes can be passed back to [`Self::parse_byte_slice()`]. + /// + /// To take the bytes as a vector, see [`VarZeroVec::into_bytes()`]. + /// + /// # Example + /// + /// ```rust + /// # use std::str::Utf8Error; + /// # use zerovec::ule::ZeroVecError; + /// # use zerovec::VarZeroVec; + /// + /// let strings = vec!["foo", "bar", "baz"]; + /// let vzv = VarZeroVec::<str>::from(&strings); + /// + /// assert_eq!(vzv, VarZeroVec::parse_byte_slice(vzv.as_bytes()).unwrap()); + /// + /// # Ok::<(), ZeroVecError>(()) + /// ``` + #[inline] + pub const fn as_bytes(&self) -> &[u8] { + &self.entire_slice + } + + /// Get this [`VarZeroSlice`] as a borrowed [`VarZeroVec`] + /// + /// If you wish to repeatedly call methods on this [`VarZeroSlice`], + /// it is more efficient to perform this conversion first + pub const fn as_varzerovec<'a>(&'a self) -> VarZeroVec<'a, T, F> { + VarZeroVec::Borrowed(self) + } + + /// Parse a VarZeroSlice from a slice of the appropriate format + /// + /// Slices of the right format can be obtained via [`VarZeroSlice::as_bytes()`] + pub fn parse_byte_slice<'a>(slice: &'a [u8]) -> Result<&'a Self, ZeroVecError> { + <Self as VarULE>::parse_byte_slice(slice) + } + + /// Convert a `bytes` array known to represent a `VarZeroSlice` to a mutable reference to a `VarZeroSlice` + /// + /// # Safety + /// - `bytes` must be a valid sequence of bytes for this VarZeroVec + pub(crate) unsafe fn from_byte_slice_unchecked_mut(bytes: &mut [u8]) -> &mut Self { + // self is really just a wrapper around a byte slice + mem::transmute(bytes) + } + + pub(crate) unsafe fn get_bytes_at_mut(&mut self, idx: usize) -> &mut [u8] { + let range = self.as_components().get_range(idx); + #[allow(clippy::indexing_slicing)] // get_range() is known to return in-bounds ranges + &mut self.entire_slice[range] + } +} + +impl<T, F> VarZeroSlice<T, F> +where + T: VarULE, + T: ?Sized, + T: Ord, + F: VarZeroVecFormat, +{ + /// Binary searches a sorted `VarZeroVec<T>` for the given element. For more information, see + /// the standard library function [`binary_search`]. + /// + /// # Example + /// + /// ``` + /// # use std::str::Utf8Error; + /// # use zerovec::ule::ZeroVecError; + /// # use zerovec::VarZeroVec; + /// + /// let strings = vec!["a", "b", "f", "g"]; + /// let vec = VarZeroVec::<str>::from(&strings); + /// + /// assert_eq!(vec.binary_search("f"), Ok(2)); + /// assert_eq!(vec.binary_search("e"), Err(2)); + /// # Ok::<(), ZeroVecError>(()) + /// ``` + /// + /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search + #[inline] + pub fn binary_search(&self, x: &T) -> Result<usize, usize> { + self.as_components().binary_search(x) + } + + /// Binary searches a `VarZeroVec<T>` for the given element within a certain sorted range. + /// + /// If the range is out of bounds, returns `None`. Otherwise, returns a `Result` according + /// to the behavior of the standard library function [`binary_search`]. + /// + /// The index is returned relative to the start of the range. + /// + /// # Example + /// + /// ``` + /// # use std::str::Utf8Error; + /// # use zerovec::ule::ZeroVecError; + /// # use zerovec::VarZeroVec; + /// + /// let strings = vec!["a", "b", "f", "g", "m", "n", "q"]; + /// let vec = VarZeroVec::<str>::from(&strings); + /// + /// // Same behavior as binary_search when the range covers the whole slice: + /// assert_eq!(vec.binary_search_in_range("g", 0..7), Some(Ok(3))); + /// assert_eq!(vec.binary_search_in_range("h", 0..7), Some(Err(4))); + /// + /// // Will not look outside of the range: + /// assert_eq!(vec.binary_search_in_range("g", 0..1), Some(Err(1))); + /// assert_eq!(vec.binary_search_in_range("g", 6..7), Some(Err(0))); + /// + /// // Will return indices relative to the start of the range: + /// assert_eq!(vec.binary_search_in_range("g", 1..6), Some(Ok(2))); + /// assert_eq!(vec.binary_search_in_range("h", 1..6), Some(Err(3))); + /// + /// // Will return None if the range is out of bounds: + /// assert_eq!(vec.binary_search_in_range("x", 100..200), None); + /// assert_eq!(vec.binary_search_in_range("x", 0..200), None); + /// # Ok::<(), ZeroVecError>(()) + /// ``` + /// + /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search + #[inline] + pub fn binary_search_in_range( + &self, + x: &T, + range: Range<usize>, + ) -> Option<Result<usize, usize>> { + self.as_components().binary_search_in_range(x, range) + } +} + +impl<T, F> VarZeroSlice<T, F> +where + T: VarULE, + T: ?Sized, + F: VarZeroVecFormat, +{ + /// Binary searches a sorted `VarZeroVec<T>` for the given predicate. For more information, see + /// the standard library function [`binary_search_by`]. + /// + /// # Example + /// + /// ``` + /// # use std::str::Utf8Error; + /// # use zerovec::ule::ZeroVecError; + /// # use zerovec::VarZeroVec; + /// + /// let strings = vec!["a", "b", "f", "g"]; + /// let vec = VarZeroVec::<str>::from(&strings); + /// + /// assert_eq!(vec.binary_search_by(|probe| probe.cmp("f")), Ok(2)); + /// assert_eq!(vec.binary_search_by(|probe| probe.cmp("e")), Err(2)); + /// # Ok::<(), ZeroVecError>(()) + /// ``` + /// + /// [`binary_search_by`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search_by + #[inline] + pub fn binary_search_by(&self, predicate: impl FnMut(&T) -> Ordering) -> Result<usize, usize> { + self.as_components().binary_search_by(predicate) + } + + /// Binary searches a `VarZeroVec<T>` for the given predicate within a certain sorted range. + /// + /// If the range is out of bounds, returns `None`. Otherwise, returns a `Result` according + /// to the behavior of the standard library function [`binary_search`]. + /// + /// The index is returned relative to the start of the range. + /// + /// # Example + /// + /// ``` + /// # use std::str::Utf8Error; + /// # use zerovec::ule::ZeroVecError; + /// # use zerovec::VarZeroVec; + /// + /// let strings = vec!["a", "b", "f", "g", "m", "n", "q"]; + /// let vec = VarZeroVec::<str>::from(&strings); + /// + /// // Same behavior as binary_search when the range covers the whole slice: + /// assert_eq!( + /// vec.binary_search_in_range_by(|v| v.cmp("g"), 0..7), + /// Some(Ok(3)) + /// ); + /// assert_eq!( + /// vec.binary_search_in_range_by(|v| v.cmp("h"), 0..7), + /// Some(Err(4)) + /// ); + /// + /// // Will not look outside of the range: + /// assert_eq!( + /// vec.binary_search_in_range_by(|v| v.cmp("g"), 0..1), + /// Some(Err(1)) + /// ); + /// assert_eq!( + /// vec.binary_search_in_range_by(|v| v.cmp("g"), 6..7), + /// Some(Err(0)) + /// ); + /// + /// // Will return indices relative to the start of the range: + /// assert_eq!( + /// vec.binary_search_in_range_by(|v| v.cmp("g"), 1..6), + /// Some(Ok(2)) + /// ); + /// assert_eq!( + /// vec.binary_search_in_range_by(|v| v.cmp("h"), 1..6), + /// Some(Err(3)) + /// ); + /// + /// // Will return None if the range is out of bounds: + /// assert_eq!( + /// vec.binary_search_in_range_by(|v| v.cmp("x"), 100..200), + /// None + /// ); + /// assert_eq!(vec.binary_search_in_range_by(|v| v.cmp("x"), 0..200), None); + /// # Ok::<(), ZeroVecError>(()) + /// ``` + /// + /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search + pub fn binary_search_in_range_by( + &self, + predicate: impl FnMut(&T) -> Ordering, + range: Range<usize>, + ) -> Option<Result<usize, usize>> { + self.as_components() + .binary_search_in_range_by(predicate, range) + } +} +// Safety (based on the safety checklist on the VarULE trait): +// 1. VarZeroSlice does not include any uninitialized or padding bytes (achieved by `#[repr(transparent)]` on a +// `[u8]` slice which satisfies this invariant) +// 2. VarZeroSlice is aligned to 1 byte (achieved by `#[repr(transparent)]` on a +// `[u8]` slice which satisfies this invariant) +// 3. The impl of `validate_byte_slice()` returns an error if any byte is not valid. +// 4. The impl of `validate_byte_slice()` returns an error if the slice cannot be used in its entirety +// 5. The impl of `from_byte_slice_unchecked()` returns a reference to the same data. +// 6. `as_byte_slice()` is equivalent to a regular transmute of the underlying data +// 7. VarZeroSlice byte equality is semantic equality (relying on the guideline of the underlying VarULE type) +unsafe impl<T: VarULE + ?Sized + 'static, F: VarZeroVecFormat> VarULE for VarZeroSlice<T, F> { + fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> { + let _: VarZeroVecComponents<T, F> = VarZeroVecComponents::parse_byte_slice(bytes)?; + Ok(()) + } + + unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self { + // self is really just a wrapper around a byte slice + mem::transmute(bytes) + } + + fn as_byte_slice(&self) -> &[u8] { + &self.entire_slice + } +} + +impl<T: VarULE + ?Sized, F: VarZeroVecFormat> Index<usize> for VarZeroSlice<T, F> { + type Output = T; + fn index(&self, index: usize) -> &Self::Output { + #[allow(clippy::panic)] // documented + match self.get(index) { + Some(x) => x, + None => panic!( + "index out of bounds: the len is {} but the index is {index}", + self.len() + ), + } + } +} + +impl<T, F> PartialEq<VarZeroSlice<T, F>> for VarZeroSlice<T, F> +where + T: VarULE, + T: ?Sized, + T: PartialEq, + F: VarZeroVecFormat, +{ + #[inline] + fn eq(&self, other: &VarZeroSlice<T, F>) -> bool { + // VarULE has an API guarantee that this is equivalent + // to `T::VarULE::eq()` + self.entire_slice.eq(&other.entire_slice) + } +} + +impl<T, F> Eq for VarZeroSlice<T, F> +where + T: VarULE, + T: ?Sized, + T: Eq, + F: VarZeroVecFormat, +{ +} + +impl<T: VarULE + ?Sized + PartialOrd, F: VarZeroVecFormat> PartialOrd for VarZeroSlice<T, F> { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option<Ordering> { + self.iter().partial_cmp(other.iter()) + } +} + +impl<T: VarULE + ?Sized + Ord, F: VarZeroVecFormat> Ord for VarZeroSlice<T, F> { + #[inline] + fn cmp(&self, other: &Self) -> Ordering { + self.iter().cmp(other.iter()) + } +} + +impl<T: VarULE + ?Sized, F: VarZeroVecFormat> fmt::Debug for VarZeroSlice<T, F> +where + T: fmt::Debug, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_list().entries(self.iter()).finish() + } +} + +impl<T: ?Sized, F: VarZeroVecFormat> AsRef<VarZeroSlice<T, F>> for VarZeroSlice<T, F> { + fn as_ref(&self) -> &VarZeroSlice<T, F> { + self + } +} |