// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use super::components::VarZeroVecComponents; use super::*; use crate::ule::*; use alloc::boxed::Box; use alloc::vec::Vec; use core::cmp::{Ord, Ordering, PartialOrd}; use core::fmt; use core::marker::PhantomData; use core::mem; use core::ops::Index; use core::ops::Range; /// A zero-copy "slice", that works for unsized types, i.e. the zero-copy version of `[T]` /// where `T` is not `Sized`. /// /// This behaves similarly to [`VarZeroVec`], however [`VarZeroVec`] is allowed to contain /// owned data and as such is ideal for deserialization since most human readable /// serialization formats cannot unconditionally deserialize zero-copy. /// /// This type can be used inside [`VarZeroVec`](crate::VarZeroVec) and [`ZeroMap`](crate::ZeroMap): /// This essentially allows for the construction of zero-copy types isomorphic to `Vec>` by instead /// using `VarZeroVec>`. /// /// The `F` type parameter is a [`VarZeroVecFormat`] (see its docs for more details), which can be used to select the /// precise format of the backing buffer with various size and performance tradeoffs. It defaults to [`Index16`]. /// /// This type can be nested within itself to allow for multi-level nested `Vec`s. /// /// # Examples /// /// ## Nested Slices /// /// The following code constructs the conceptual zero-copy equivalent of `Vec>>` /// /// ```rust /// use zerovec::ule::*; /// use zerovec::{VarZeroSlice, VarZeroVec, ZeroVec}; /// let strings_1: Vec<&str> = vec!["foo", "bar", "baz"]; /// let strings_2: Vec<&str> = vec!["twelve", "seventeen", "forty two"]; /// let strings_3: Vec<&str> = vec!["我", "喜歡", "烏龍茶"]; /// let strings_4: Vec<&str> = vec!["w", "ω", "文", "𑄃"]; /// let strings_12 = vec![&*strings_1, &*strings_2]; /// let strings_34 = vec![&*strings_3, &*strings_4]; /// let all_strings = vec![strings_12, strings_34]; /// /// let vzv_1: VarZeroVec = VarZeroVec::from(&strings_1); /// let vzv_2: VarZeroVec = VarZeroVec::from(&strings_2); /// let vzv_3: VarZeroVec = VarZeroVec::from(&strings_3); /// let vzv_4: VarZeroVec = VarZeroVec::from(&strings_4); /// let vzv_12 = VarZeroVec::from(&[vzv_1.as_slice(), vzv_2.as_slice()]); /// let vzv_34 = VarZeroVec::from(&[vzv_3.as_slice(), vzv_4.as_slice()]); /// let vzv_all = VarZeroVec::from(&[vzv_12.as_slice(), vzv_34.as_slice()]); /// /// let reconstructed: Vec>> = vzv_all /// .iter() /// .map(|v: &VarZeroSlice>| { /// v.iter() /// .map(|x: &VarZeroSlice<_>| { /// x.as_varzerovec() /// .iter() /// .map(|s| s.to_owned()) /// .collect::>() /// }) /// .collect::>() /// }) /// .collect::>(); /// assert_eq!(reconstructed, all_strings); /// /// let bytes = vzv_all.as_bytes(); /// let vzv_from_bytes: VarZeroVec>> = /// VarZeroVec::parse_byte_slice(bytes).unwrap(); /// assert_eq!(vzv_from_bytes, vzv_all); /// ``` /// /// ## Iterate over Windows /// /// Although [`VarZeroSlice`] does not itself have a `.windows` iterator like /// [core::slice::Windows], this behavior can be easily modeled using an iterator: /// /// ``` /// use zerovec::VarZeroVec; /// /// let vzv = VarZeroVec::::from(&["a", "b", "c", "d"]); /// # let mut pairs: Vec<(&str, &str)> = Vec::new(); /// /// let mut it = vzv.iter().peekable(); /// while let (Some(x), Some(y)) = (it.next(), it.peek()) { /// // Evaluate (x, y) here. /// # pairs.push((x, y)); /// } /// # assert_eq!(pairs, &[("a", "b"), ("b", "c"), ("c", "d")]); /// ``` // // safety invariant: The slice MUST be one which parses to // a valid VarZeroVecComponents #[repr(transparent)] pub struct VarZeroSlice { marker: PhantomData<(F, T)>, /// The original slice this was constructed from entire_slice: [u8], } impl VarZeroSlice { /// Construct a new empty VarZeroSlice pub const fn new_empty() -> &'static Self { let arr: &[u8] = &[]; unsafe { mem::transmute(arr) } } /// Obtain a [`VarZeroVecComponents`] borrowing from the internal buffer #[inline] pub(crate) fn as_components<'a>(&'a self) -> VarZeroVecComponents<'a, T, F> { unsafe { // safety: VarZeroSlice is guaranteed to parse here VarZeroVecComponents::from_bytes_unchecked(&self.entire_slice) } } /// Uses a `&[u8]` buffer as a `VarZeroSlice` without any verification. /// /// # Safety /// /// `bytes` need to be an output from [`VarZeroSlice::as_bytes()`]. pub const unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { // self is really just a wrapper around a byte slice mem::transmute(bytes) } /// Get the number of elements in this slice /// /// # Example /// /// ```rust /// # use std::str::Utf8Error; /// # use zerovec::ule::ZeroVecError; /// # use zerovec::VarZeroVec; /// /// let strings = vec!["foo", "bar", "baz", "quux"]; /// let vec = VarZeroVec::::from(&strings); /// /// assert_eq!(vec.len(), 4); /// # Ok::<(), ZeroVecError>(()) /// ``` pub fn len(&self) -> usize { self.as_components().len() } /// Returns `true` if the slice contains no elements. /// /// # Examples /// /// ``` /// # use std::str::Utf8Error; /// # use zerovec::ule::ZeroVecError; /// # use zerovec::VarZeroVec; /// /// let strings: Vec = vec![]; /// let vec = VarZeroVec::::from(&strings); /// /// assert!(vec.is_empty()); /// # Ok::<(), ZeroVecError>(()) /// ``` pub fn is_empty(&self) -> bool { self.as_components().is_empty() } /// Obtain an iterator over this slice's elements /// /// # Example /// /// ```rust /// # use std::str::Utf8Error; /// # use zerovec::ule::ZeroVecError; /// # use zerovec::VarZeroVec; /// /// let strings = vec!["foo", "bar", "baz", "quux"]; /// let vec = VarZeroVec::::from(&strings); /// /// let mut iter_results: Vec<&str> = vec.iter().collect(); /// assert_eq!(iter_results[0], "foo"); /// assert_eq!(iter_results[1], "bar"); /// assert_eq!(iter_results[2], "baz"); /// assert_eq!(iter_results[3], "quux"); /// # Ok::<(), ZeroVecError>(()) /// ``` pub fn iter<'b>(&'b self) -> impl Iterator { self.as_components().iter() } /// Get one of this slice's elements, returning None if the index is out of bounds /// /// # Example /// /// ```rust /// # use std::str::Utf8Error; /// # use zerovec::ule::ZeroVecError; /// # use zerovec::VarZeroVec; /// /// let strings = vec!["foo", "bar", "baz", "quux"]; /// let vec = VarZeroVec::::from(&strings); /// /// let mut iter_results: Vec<&str> = vec.iter().collect(); /// assert_eq!(vec.get(0), Some("foo")); /// assert_eq!(vec.get(1), Some("bar")); /// assert_eq!(vec.get(2), Some("baz")); /// assert_eq!(vec.get(3), Some("quux")); /// assert_eq!(vec.get(4), None); /// # Ok::<(), ZeroVecError>(()) /// ``` pub fn get(&self, idx: usize) -> Option<&T> { self.as_components().get(idx) } /// Get one of this slice's elements /// /// # Safety /// /// `index` must be in range /// /// # Example /// /// ```rust /// # use std::str::Utf8Error; /// # use zerovec::ule::ZeroVecError; /// # use zerovec::VarZeroVec; /// /// let strings = vec!["foo", "bar", "baz", "quux"]; /// let vec = VarZeroVec::::from(&strings); /// /// let mut iter_results: Vec<&str> = vec.iter().collect(); /// unsafe { /// assert_eq!(vec.get_unchecked(0), "foo"); /// assert_eq!(vec.get_unchecked(1), "bar"); /// assert_eq!(vec.get_unchecked(2), "baz"); /// assert_eq!(vec.get_unchecked(3), "quux"); /// } /// # Ok::<(), ZeroVecError>(()) /// ``` pub unsafe fn get_unchecked(&self, idx: usize) -> &T { self.as_components().get_unchecked(idx) } /// Obtain an owned `Vec>` out of this pub fn to_vec(&self) -> Vec> { self.as_components().to_vec() } /// Get a reference to the entire encoded backing buffer of this slice /// /// The bytes can be passed back to [`Self::parse_byte_slice()`]. /// /// To take the bytes as a vector, see [`VarZeroVec::into_bytes()`]. /// /// # Example /// /// ```rust /// # use std::str::Utf8Error; /// # use zerovec::ule::ZeroVecError; /// # use zerovec::VarZeroVec; /// /// let strings = vec!["foo", "bar", "baz"]; /// let vzv = VarZeroVec::::from(&strings); /// /// assert_eq!(vzv, VarZeroVec::parse_byte_slice(vzv.as_bytes()).unwrap()); /// /// # Ok::<(), ZeroVecError>(()) /// ``` #[inline] pub const fn as_bytes(&self) -> &[u8] { &self.entire_slice } /// Get this [`VarZeroSlice`] as a borrowed [`VarZeroVec`] /// /// If you wish to repeatedly call methods on this [`VarZeroSlice`], /// it is more efficient to perform this conversion first pub const fn as_varzerovec<'a>(&'a self) -> VarZeroVec<'a, T, F> { VarZeroVec::Borrowed(self) } /// Parse a VarZeroSlice from a slice of the appropriate format /// /// Slices of the right format can be obtained via [`VarZeroSlice::as_bytes()`] pub fn parse_byte_slice<'a>(slice: &'a [u8]) -> Result<&'a Self, ZeroVecError> { ::parse_byte_slice(slice) } /// Convert a `bytes` array known to represent a `VarZeroSlice` to a mutable reference to a `VarZeroSlice` /// /// # Safety /// - `bytes` must be a valid sequence of bytes for this VarZeroVec pub(crate) unsafe fn from_byte_slice_unchecked_mut(bytes: &mut [u8]) -> &mut Self { // self is really just a wrapper around a byte slice mem::transmute(bytes) } pub(crate) unsafe fn get_bytes_at_mut(&mut self, idx: usize) -> &mut [u8] { let range = self.as_components().get_range(idx); #[allow(clippy::indexing_slicing)] // get_range() is known to return in-bounds ranges &mut self.entire_slice[range] } } impl VarZeroSlice where T: VarULE, T: ?Sized, T: Ord, F: VarZeroVecFormat, { /// Binary searches a sorted `VarZeroVec` for the given element. For more information, see /// the standard library function [`binary_search`]. /// /// # Example /// /// ``` /// # use std::str::Utf8Error; /// # use zerovec::ule::ZeroVecError; /// # use zerovec::VarZeroVec; /// /// let strings = vec!["a", "b", "f", "g"]; /// let vec = VarZeroVec::::from(&strings); /// /// assert_eq!(vec.binary_search("f"), Ok(2)); /// assert_eq!(vec.binary_search("e"), Err(2)); /// # Ok::<(), ZeroVecError>(()) /// ``` /// /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search #[inline] pub fn binary_search(&self, x: &T) -> Result { self.as_components().binary_search(x) } /// Binary searches a `VarZeroVec` for the given element within a certain sorted range. /// /// If the range is out of bounds, returns `None`. Otherwise, returns a `Result` according /// to the behavior of the standard library function [`binary_search`]. /// /// The index is returned relative to the start of the range. /// /// # Example /// /// ``` /// # use std::str::Utf8Error; /// # use zerovec::ule::ZeroVecError; /// # use zerovec::VarZeroVec; /// /// let strings = vec!["a", "b", "f", "g", "m", "n", "q"]; /// let vec = VarZeroVec::::from(&strings); /// /// // Same behavior as binary_search when the range covers the whole slice: /// assert_eq!(vec.binary_search_in_range("g", 0..7), Some(Ok(3))); /// assert_eq!(vec.binary_search_in_range("h", 0..7), Some(Err(4))); /// /// // Will not look outside of the range: /// assert_eq!(vec.binary_search_in_range("g", 0..1), Some(Err(1))); /// assert_eq!(vec.binary_search_in_range("g", 6..7), Some(Err(0))); /// /// // Will return indices relative to the start of the range: /// assert_eq!(vec.binary_search_in_range("g", 1..6), Some(Ok(2))); /// assert_eq!(vec.binary_search_in_range("h", 1..6), Some(Err(3))); /// /// // Will return None if the range is out of bounds: /// assert_eq!(vec.binary_search_in_range("x", 100..200), None); /// assert_eq!(vec.binary_search_in_range("x", 0..200), None); /// # Ok::<(), ZeroVecError>(()) /// ``` /// /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search #[inline] pub fn binary_search_in_range( &self, x: &T, range: Range, ) -> Option> { self.as_components().binary_search_in_range(x, range) } } impl VarZeroSlice where T: VarULE, T: ?Sized, F: VarZeroVecFormat, { /// Binary searches a sorted `VarZeroVec` for the given predicate. For more information, see /// the standard library function [`binary_search_by`]. /// /// # Example /// /// ``` /// # use std::str::Utf8Error; /// # use zerovec::ule::ZeroVecError; /// # use zerovec::VarZeroVec; /// /// let strings = vec!["a", "b", "f", "g"]; /// let vec = VarZeroVec::::from(&strings); /// /// assert_eq!(vec.binary_search_by(|probe| probe.cmp("f")), Ok(2)); /// assert_eq!(vec.binary_search_by(|probe| probe.cmp("e")), Err(2)); /// # Ok::<(), ZeroVecError>(()) /// ``` /// /// [`binary_search_by`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search_by #[inline] pub fn binary_search_by(&self, predicate: impl FnMut(&T) -> Ordering) -> Result { self.as_components().binary_search_by(predicate) } /// Binary searches a `VarZeroVec` for the given predicate within a certain sorted range. /// /// If the range is out of bounds, returns `None`. Otherwise, returns a `Result` according /// to the behavior of the standard library function [`binary_search`]. /// /// The index is returned relative to the start of the range. /// /// # Example /// /// ``` /// # use std::str::Utf8Error; /// # use zerovec::ule::ZeroVecError; /// # use zerovec::VarZeroVec; /// /// let strings = vec!["a", "b", "f", "g", "m", "n", "q"]; /// let vec = VarZeroVec::::from(&strings); /// /// // Same behavior as binary_search when the range covers the whole slice: /// assert_eq!( /// vec.binary_search_in_range_by(|v| v.cmp("g"), 0..7), /// Some(Ok(3)) /// ); /// assert_eq!( /// vec.binary_search_in_range_by(|v| v.cmp("h"), 0..7), /// Some(Err(4)) /// ); /// /// // Will not look outside of the range: /// assert_eq!( /// vec.binary_search_in_range_by(|v| v.cmp("g"), 0..1), /// Some(Err(1)) /// ); /// assert_eq!( /// vec.binary_search_in_range_by(|v| v.cmp("g"), 6..7), /// Some(Err(0)) /// ); /// /// // Will return indices relative to the start of the range: /// assert_eq!( /// vec.binary_search_in_range_by(|v| v.cmp("g"), 1..6), /// Some(Ok(2)) /// ); /// assert_eq!( /// vec.binary_search_in_range_by(|v| v.cmp("h"), 1..6), /// Some(Err(3)) /// ); /// /// // Will return None if the range is out of bounds: /// assert_eq!( /// vec.binary_search_in_range_by(|v| v.cmp("x"), 100..200), /// None /// ); /// assert_eq!(vec.binary_search_in_range_by(|v| v.cmp("x"), 0..200), None); /// # Ok::<(), ZeroVecError>(()) /// ``` /// /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search pub fn binary_search_in_range_by( &self, predicate: impl FnMut(&T) -> Ordering, range: Range, ) -> Option> { self.as_components() .binary_search_in_range_by(predicate, range) } } // Safety (based on the safety checklist on the VarULE trait): // 1. VarZeroSlice does not include any uninitialized or padding bytes (achieved by `#[repr(transparent)]` on a // `[u8]` slice which satisfies this invariant) // 2. VarZeroSlice is aligned to 1 byte (achieved by `#[repr(transparent)]` on a // `[u8]` slice which satisfies this invariant) // 3. The impl of `validate_byte_slice()` returns an error if any byte is not valid. // 4. The impl of `validate_byte_slice()` returns an error if the slice cannot be used in its entirety // 5. The impl of `from_byte_slice_unchecked()` returns a reference to the same data. // 6. `as_byte_slice()` is equivalent to a regular transmute of the underlying data // 7. VarZeroSlice byte equality is semantic equality (relying on the guideline of the underlying VarULE type) unsafe impl VarULE for VarZeroSlice { fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> { let _: VarZeroVecComponents = VarZeroVecComponents::parse_byte_slice(bytes)?; Ok(()) } unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self { // self is really just a wrapper around a byte slice mem::transmute(bytes) } fn as_byte_slice(&self) -> &[u8] { &self.entire_slice } } impl Index for VarZeroSlice { type Output = T; fn index(&self, index: usize) -> &Self::Output { #[allow(clippy::panic)] // documented match self.get(index) { Some(x) => x, None => panic!( "index out of bounds: the len is {} but the index is {index}", self.len() ), } } } impl PartialEq> for VarZeroSlice where T: VarULE, T: ?Sized, T: PartialEq, F: VarZeroVecFormat, { #[inline] fn eq(&self, other: &VarZeroSlice) -> bool { // VarULE has an API guarantee that this is equivalent // to `T::VarULE::eq()` self.entire_slice.eq(&other.entire_slice) } } impl Eq for VarZeroSlice where T: VarULE, T: ?Sized, T: Eq, F: VarZeroVecFormat, { } impl PartialOrd for VarZeroSlice { #[inline] fn partial_cmp(&self, other: &Self) -> Option { self.iter().partial_cmp(other.iter()) } } impl Ord for VarZeroSlice { #[inline] fn cmp(&self, other: &Self) -> Ordering { self.iter().cmp(other.iter()) } } impl fmt::Debug for VarZeroSlice where T: fmt::Debug, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_list().entries(self.iter()).finish() } } impl AsRef> for VarZeroSlice { fn as_ref(&self) -> &VarZeroSlice { self } }