From 4e8199b572f2035b7749cba276ece3a26630d23e Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 14:18:21 +0200 Subject: Adding upstream version 1.67.1+dfsg1. Signed-off-by: Daniel Baumann --- vendor/zerovec/src/varzerovec/components.rs | 578 ++++++++++++++++++++++++ vendor/zerovec/src/varzerovec/databake.rs | 50 +++ vendor/zerovec/src/varzerovec/mod.rs | 26 ++ vendor/zerovec/src/varzerovec/owned.rs | 665 ++++++++++++++++++++++++++++ vendor/zerovec/src/varzerovec/serde.rs | 246 ++++++++++ vendor/zerovec/src/varzerovec/slice.rs | 549 +++++++++++++++++++++++ vendor/zerovec/src/varzerovec/vec.rs | 493 +++++++++++++++++++++ 7 files changed, 2607 insertions(+) create mode 100644 vendor/zerovec/src/varzerovec/components.rs create mode 100644 vendor/zerovec/src/varzerovec/databake.rs create mode 100644 vendor/zerovec/src/varzerovec/mod.rs create mode 100644 vendor/zerovec/src/varzerovec/owned.rs create mode 100644 vendor/zerovec/src/varzerovec/serde.rs create mode 100644 vendor/zerovec/src/varzerovec/slice.rs create mode 100644 vendor/zerovec/src/varzerovec/vec.rs (limited to 'vendor/zerovec/src/varzerovec') diff --git a/vendor/zerovec/src/varzerovec/components.rs b/vendor/zerovec/src/varzerovec/components.rs new file mode 100644 index 000000000..bf70c83c5 --- /dev/null +++ b/vendor/zerovec/src/varzerovec/components.rs @@ -0,0 +1,578 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::ule::*; +use alloc::boxed::Box; +use alloc::format; +use alloc::string::String; +use alloc::vec::Vec; +use core::cmp::Ordering; +use core::convert::TryFrom; +use core::marker::PhantomData; +use core::ops::Range; + +// Also used by owned.rs +pub(super) const LENGTH_WIDTH: usize = 4; +pub(super) const METADATA_WIDTH: usize = 0; +pub(super) const MAX_LENGTH: usize = u32::MAX as usize; +pub(super) const MAX_INDEX: usize = u32::MAX as usize; + +/// This trait allows switching between different possible internal +/// representations of VarZeroVec. +/// +/// Currently this crate supports two formats: [`Index16`] and [`Index32`], +/// with [`Index16`] being the default for all [`VarZeroVec`](super::VarZeroVec) +/// types unless explicitly specified otherwise. +/// +/// Do not implement this trait, its internals may be changed in the future, +/// and all of its associated items are hidden from the docs. +#[allow(clippy::missing_safety_doc)] // no safety section for you, don't implement this trait period +pub unsafe trait VarZeroVecFormat: 'static + Sized { + #[doc(hidden)] + const INDEX_WIDTH: usize; + #[doc(hidden)] + const MAX_VALUE: u32; + /// This is always `RawBytesULE` however + /// Rust does not currently support using associated constants in const + /// generics + #[doc(hidden)] + type RawBytes: ULE; + + // various conversions because RawBytes is an associated constant now + #[doc(hidden)] + fn rawbytes_to_usize(raw: Self::RawBytes) -> usize; + #[doc(hidden)] + fn usize_to_rawbytes(u: usize) -> Self::RawBytes; + + #[doc(hidden)] + fn rawbytes_from_byte_slice_unchecked_mut(bytes: &mut [u8]) -> &mut [Self::RawBytes]; +} + +/// This is a [`VarZeroVecFormat`] that stores u16s in the index array. +/// Will have a smaller data size, but it's more likely for larger arrays +/// to be unrepresentable (and error on construction) +/// +/// This is the default index size used by all [`VarZeroVec`](super::VarZeroVec) tyoes. +#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] +#[allow(clippy::exhaustive_structs)] // marker +pub struct Index16; + +/// This is a [`VarZeroVecFormat`] that stores u32s in the index array. +/// Will have a larger data size, but will support large arrays without +/// problems. +#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] +#[allow(clippy::exhaustive_structs)] // marker +pub struct Index32; + +unsafe impl VarZeroVecFormat for Index16 { + const INDEX_WIDTH: usize = 2; + const MAX_VALUE: u32 = u16::MAX as u32; + type RawBytes = RawBytesULE<2>; + #[inline] + fn rawbytes_to_usize(raw: Self::RawBytes) -> usize { + raw.as_unsigned_int() as usize + } + #[inline] + fn usize_to_rawbytes(u: usize) -> Self::RawBytes { + (u as u16).to_unaligned() + } + #[inline] + fn rawbytes_from_byte_slice_unchecked_mut(bytes: &mut [u8]) -> &mut [Self::RawBytes] { + Self::RawBytes::from_byte_slice_unchecked_mut(bytes) + } +} + +unsafe impl VarZeroVecFormat for Index32 { + const INDEX_WIDTH: usize = 4; + const MAX_VALUE: u32 = u32::MAX as u32; + type RawBytes = RawBytesULE<4>; + #[inline] + fn rawbytes_to_usize(raw: Self::RawBytes) -> usize { + raw.as_unsigned_int() as usize + } + #[inline] + fn usize_to_rawbytes(u: usize) -> Self::RawBytes { + (u as u32).to_unaligned() + } + #[inline] + fn rawbytes_from_byte_slice_unchecked_mut(bytes: &mut [u8]) -> &mut [Self::RawBytes] { + Self::RawBytes::from_byte_slice_unchecked_mut(bytes) + } +} + +/// A more parsed version of `VarZeroSlice`. This type is where most of the VarZeroVec +/// internal representation code lies. +/// +/// This is *basically* an `&'a [u8]` to a zero copy buffer, but split out into +/// the buffer components. Logically this is capable of behaving as +/// a `&'a [T::VarULE]`, but since `T::VarULE` is unsized that type does not actually +/// exist. +/// +/// See [`VarZeroVecComponents::parse_byte_slice()`] for information on the internal invariants involved +pub struct VarZeroVecComponents<'a, T: ?Sized, F> { + /// The number of elements + len: u32, + /// The list of indices into the `things` slice + indices: &'a [u8], + /// The contiguous list of `T::VarULE`s + things: &'a [u8], + /// The original slice this was constructed from + entire_slice: &'a [u8], + marker: PhantomData<(&'a T, F)>, +} + +// #[derive()] won't work here since we do not want it to be +// bound on T: Copy +impl<'a, T: ?Sized, F> Copy for VarZeroVecComponents<'a, T, F> {} +impl<'a, T: ?Sized, F> Clone for VarZeroVecComponents<'a, T, F> { + fn clone(&self) -> Self { + VarZeroVecComponents { + len: self.len, + indices: self.indices, + things: self.things, + entire_slice: self.entire_slice, + marker: PhantomData, + } + } +} + +impl<'a, T: VarULE + ?Sized, F> Default for VarZeroVecComponents<'a, T, F> { + #[inline] + fn default() -> Self { + Self::new() + } +} + +impl<'a, T: VarULE + ?Sized, F> VarZeroVecComponents<'a, T, F> { + #[inline] + pub fn new() -> Self { + Self { + len: 0, + indices: &[], + things: &[], + entire_slice: &[], + marker: PhantomData, + } + } +} +impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroVecComponents<'a, T, F> { + /// Construct a new VarZeroVecComponents, checking invariants about the overall buffer size: + /// + /// - There must be either zero or at least four bytes (if four, this is the "length" parsed as a usize) + /// - There must be at least `4*length + 4` bytes total, to form the the array `indices` of indices + /// - `indices[i]..indices[i+1]` must index into a valid section of + /// `things`, such that it parses to a `T::VarULE` + /// - `indices[len - 1]..things.len()` must index into a valid section of + /// `things`, such that it parses to a `T::VarULE` + #[inline] + pub fn parse_byte_slice(slice: &'a [u8]) -> Result { + if slice.is_empty() { + return Ok(VarZeroVecComponents { + len: 0, + indices: &[], + things: &[], + entire_slice: slice, + marker: PhantomData, + }); + } + let len_bytes = slice + .get(0..LENGTH_WIDTH) + .ok_or(ZeroVecError::VarZeroVecFormatError)?; + let len_ule = RawBytesULE::::parse_byte_slice(len_bytes) + .map_err(|_| ZeroVecError::VarZeroVecFormatError)?; + + let len = len_ule + .get(0) + .ok_or(ZeroVecError::VarZeroVecFormatError)? + .as_unsigned_int(); + let indices_bytes = slice + .get( + LENGTH_WIDTH + METADATA_WIDTH + ..LENGTH_WIDTH + METADATA_WIDTH + F::INDEX_WIDTH * (len as usize), + ) + .ok_or(ZeroVecError::VarZeroVecFormatError)?; + let things = slice + .get(F::INDEX_WIDTH * (len as usize) + LENGTH_WIDTH + METADATA_WIDTH..) + .ok_or(ZeroVecError::VarZeroVecFormatError)?; + + let borrowed = VarZeroVecComponents { + len: len as u32, + indices: indices_bytes, + things, + entire_slice: slice, + marker: PhantomData, + }; + + borrowed.check_indices_and_things()?; + + Ok(borrowed) + } + + /// Construct a [`VarZeroVecComponents`] from a byte slice that has previously + /// successfully returned a [`VarZeroVecComponents`] when passed to + /// [`VarZeroVecComponents::parse_byte_slice()`]. Will return the same + /// object as one would get from calling [`VarZeroVecComponents::parse_byte_slice()`]. + /// + /// # Safety + /// The bytes must have previously successfully run through + /// [`VarZeroVecComponents::parse_byte_slice()`] + pub unsafe fn from_bytes_unchecked(slice: &'a [u8]) -> Self { + if slice.is_empty() { + return VarZeroVecComponents { + len: 0, + indices: &[], + things: &[], + entire_slice: slice, + marker: PhantomData, + }; + } + let len_bytes = slice.get_unchecked(0..LENGTH_WIDTH); + let len_ule = RawBytesULE::::from_byte_slice_unchecked(len_bytes); + + let len = len_ule.get_unchecked(0).as_unsigned_int(); + let indices_bytes = slice.get_unchecked( + LENGTH_WIDTH + METADATA_WIDTH + ..LENGTH_WIDTH + METADATA_WIDTH + F::INDEX_WIDTH * (len as usize), + ); + let things = + slice.get_unchecked(LENGTH_WIDTH + METADATA_WIDTH + F::INDEX_WIDTH * (len as usize)..); + + VarZeroVecComponents { + len, + indices: indices_bytes, + things, + entire_slice: slice, + marker: PhantomData, + } + } + + /// Get the number of elements in this vector + #[inline] + pub fn len(self) -> usize { + self.len as usize + } + + /// Returns `true` if the vector contains no elements. + #[inline] + pub fn is_empty(self) -> bool { + self.indices.is_empty() + } + + /// Get the idx'th element out of this slice. Returns `None` if out of bounds. + #[inline] + pub fn get(self, idx: usize) -> Option<&'a T> { + if idx >= self.len() { + return None; + } + Some(unsafe { self.get_unchecked(idx) }) + } + + /// Get the idx'th element out of this slice. Does not bounds check. + /// + /// Safety: + /// - `idx` must be in bounds (`idx < self.len()`) + #[inline] + pub(crate) unsafe fn get_unchecked(self, idx: usize) -> &'a T { + let range = self.get_things_range(idx); + let things_slice = self.things.get_unchecked(range); + T::from_byte_slice_unchecked(things_slice) + } + + /// Get the range in `things` for the element at `idx`. Does not bounds check. + /// + /// Safety: + /// - `idx` must be in bounds (`idx < self.len()`) + #[inline] + unsafe fn get_things_range(self, idx: usize) -> Range { + let start = F::rawbytes_to_usize(*self.indices_slice().get_unchecked(idx)); + let end = if idx + 1 == self.len() { + self.things.len() + } else { + F::rawbytes_to_usize(*self.indices_slice().get_unchecked(idx + 1)) + }; + debug_assert!(start <= end); + start..end + } + + /// Get the range in `entire_slice` for the element at `idx`. Does not bounds check. + /// + /// Safety: + /// - `idx` must be in bounds (`idx < self.len()`) + #[inline] + pub(crate) unsafe fn get_range(self, idx: usize) -> Range { + let range = self.get_things_range(idx); + let offset = (self.things as *const [u8] as *const u8) + .offset_from(self.entire_slice as *const [u8] as *const u8) + as usize; + range.start + offset..range.end + offset + } + + /// Check the internal invariants of VarZeroVecComponents: + /// + /// - `indices[i]..indices[i+1]` must index into a valid section of + /// `things`, such that it parses to a `T::VarULE` + /// - `indices[len - 1]..things.len()` must index into a valid section of + /// `things`, such that it parses to a `T::VarULE` + /// - `indices` is monotonically increasing + /// + /// This method is NOT allowed to call any other methods on VarZeroVecComponents since all other methods + /// assume that the slice has been passed through check_indices_and_things + #[inline] + #[allow(clippy::len_zero)] // more explicit to enforce safety invariants + fn check_indices_and_things(self) -> Result<(), ZeroVecError> { + assert_eq!(self.len(), self.indices_slice().len()); + if self.len() == 0 { + if self.things.len() > 0 { + return Err(ZeroVecError::VarZeroVecFormatError); + } else { + return Ok(()); + } + } + // Safety: i is in bounds (assertion above) + let mut start = F::rawbytes_to_usize(unsafe { *self.indices_slice().get_unchecked(0) }); + if start != 0 { + return Err(ZeroVecError::VarZeroVecFormatError); + } + for i in 0..self.len() { + let end = if i == self.len() - 1 { + self.things.len() + } else { + // Safety: i+1 is in bounds (assertion above) + F::rawbytes_to_usize(unsafe { *self.indices_slice().get_unchecked(i + 1) }) + }; + if start > end { + return Err(ZeroVecError::VarZeroVecFormatError); + } + if end > self.things.len() { + return Err(ZeroVecError::VarZeroVecFormatError); + } + // Safety: start..end is a valid range in self.things + let bytes = unsafe { self.things.get_unchecked(start..end) }; + T::parse_byte_slice(bytes)?; + start = end; + } + Ok(()) + } + + /// Create an iterator over the Ts contained in VarZeroVecComponents + #[inline] + pub fn iter(self) -> impl Iterator { + self.indices_slice() + .iter() + .copied() + .map(F::rawbytes_to_usize) + .zip( + self.indices_slice() + .iter() + .copied() + .map(F::rawbytes_to_usize) + .skip(1) + .chain(core::iter::once(self.things.len())), + ) + .map(move |(start, end)| unsafe { self.things.get_unchecked(start..end) }) + .map(|bytes| unsafe { T::from_byte_slice_unchecked(bytes) }) + } + + pub fn to_vec(self) -> Vec> { + self.iter().map(T::to_boxed).collect() + } + + #[inline] + fn indices_slice(&self) -> &'a [F::RawBytes] { + unsafe { F::RawBytes::from_byte_slice_unchecked(self.indices) } + } + + // Dump a debuggable representation of this type + #[allow(unused)] // useful for debugging + pub(crate) fn dump(&self) -> String { + let indices = self + .indices_slice() + .iter() + .copied() + .map(F::rawbytes_to_usize) + .collect::>(); + format!("VarZeroVecComponents {{ indices: {:?} }}", indices) + } +} + +impl<'a, T, F> VarZeroVecComponents<'a, T, F> +where + T: VarULE, + T: ?Sized, + T: Ord, + F: VarZeroVecFormat, +{ + /// Binary searches a sorted `VarZeroVecComponents` for the given element. For more information, see + /// the primitive function [`binary_search`](slice::binary_search). + pub fn binary_search(&self, needle: &T) -> Result { + self.binary_search_impl(|probe| probe.cmp(needle), self.indices_slice()) + } + + pub fn binary_search_in_range( + &self, + needle: &T, + range: Range, + ) -> Option> { + let indices_slice = self.indices_slice().get(range)?; + Some(self.binary_search_impl(|probe| probe.cmp(needle), indices_slice)) + } +} + +impl<'a, T, F> VarZeroVecComponents<'a, T, F> +where + T: VarULE, + T: ?Sized, + F: VarZeroVecFormat, +{ + /// Binary searches a sorted `VarZeroVecComponents` for the given predicate. For more information, see + /// the primitive function [`binary_search_by`](slice::binary_search_by). + pub fn binary_search_by(&self, predicate: impl FnMut(&T) -> Ordering) -> Result { + self.binary_search_impl(predicate, self.indices_slice()) + } + + pub fn binary_search_in_range_by( + &self, + predicate: impl FnMut(&T) -> Ordering, + range: Range, + ) -> Option> { + let indices_slice = self.indices_slice().get(range)?; + Some(self.binary_search_impl(predicate, indices_slice)) + } + + /// Binary searches a sorted `VarZeroVecComponents` with the given predicate. For more information, see + /// the primitive function [`binary_search`](slice::binary_search). + fn binary_search_impl( + &self, + mut predicate: impl FnMut(&T) -> Ordering, + indices_slice: &[F::RawBytes], + ) -> Result { + // This code is an absolute atrocity. This code is not a place of honor. This + // code is known to the State of California to cause cancer. + // + // Unfortunately, the stdlib's `binary_search*` functions can only operate on slices. + // We do not have a slice. We have something we can .get() and index on, but that is not + // a slice. + // + // The `binary_search*` functions also do not have a variant where they give you the element's + // index, which we could otherwise use to directly index `self`. + // We do have `self.indices`, but these are indices into a byte buffer, which cannot in + // isolation be used to recoup the logical index of the element they refer to. + // + // However, `binary_search_by()` provides references to the elements of the slice being iterated. + // Since the layout of Rust slices is well-defined, we can do pointer arithmetic on these references + // to obtain the index being used by the search. + // + // It's worth noting that the slice we choose to search is irrelevant, as long as it has the appropriate + // length. `self.indices` is defined to have length `self.len()`, so it is convenient to use + // here and does not require additional allocations. + // + // The alternative to doing this is to implement our own binary search. This is significantly less fun. + + // Note: We always use zero_index relative to the whole indices array, even if we are + // only searching a subslice of it. + let zero_index = self.indices.as_ptr() as *const _ as usize; + indices_slice.binary_search_by(|probe: &_| { + // `self.indices` is a vec of unaligned F::INDEX_WIDTH values, so we divide by F::INDEX_WIDTH + // to get the actual index + let index = (probe as *const _ as usize - zero_index) / F::INDEX_WIDTH; + // safety: we know this is in bounds + let actual_probe = unsafe { self.get_unchecked(index) }; + predicate(actual_probe) + }) + } +} + +/// Collects the bytes for a VarZeroSlice into a Vec. +pub fn get_serializable_bytes(elements: &[A]) -> Option> +where + T: VarULE + ?Sized, + A: EncodeAsVarULE, + F: VarZeroVecFormat, +{ + let len = compute_serializable_len::(elements)?; + debug_assert!(len >= LENGTH_WIDTH as u32); + let mut output: Vec = alloc::vec![0; len as usize]; + write_serializable_bytes::(elements, &mut output); + Some(output) +} + +/// Writes the bytes for a VarZeroSlice into an output buffer. +/// +/// Every byte in the buffer will be initialized after calling this function. +/// +/// # Panics +/// +/// Panics if the buffer is not exactly the correct length. +pub fn write_serializable_bytes(elements: &[A], output: &mut [u8]) +where + T: VarULE + ?Sized, + A: EncodeAsVarULE, + F: VarZeroVecFormat, +{ + assert!(elements.len() <= MAX_LENGTH); + let num_elements_bytes = elements.len().to_le_bytes(); + #[allow(clippy::indexing_slicing)] // Function contract allows panicky behavior + output[0..LENGTH_WIDTH].copy_from_slice(&num_elements_bytes[0..LENGTH_WIDTH]); + + // idx_offset = offset from the start of the buffer for the next index + let mut idx_offset: usize = LENGTH_WIDTH + METADATA_WIDTH; + // first_dat_offset = offset from the start of the buffer of the first data block + let first_dat_offset: usize = idx_offset + elements.len() * F::INDEX_WIDTH; + // dat_offset = offset from the start of the buffer of the next data block + let mut dat_offset: usize = first_dat_offset; + + for element in elements.iter() { + let element_len = element.encode_var_ule_len(); + + let idx_limit = idx_offset + F::INDEX_WIDTH; + #[allow(clippy::indexing_slicing)] // Function contract allows panicky behavior + let idx_slice = &mut output[idx_offset..idx_limit]; + // VZV expects data offsets to be stored relative to the first data block + let idx = dat_offset - first_dat_offset; + assert!(idx <= MAX_INDEX); + #[allow(clippy::indexing_slicing)] // this function is explicitly panicky + idx_slice.copy_from_slice(&idx.to_le_bytes()[..F::INDEX_WIDTH]); + + let dat_limit = dat_offset + element_len; + #[allow(clippy::indexing_slicing)] // Function contract allows panicky behavior + let dat_slice = &mut output[dat_offset..dat_limit]; + element.encode_var_ule_write(dat_slice); + debug_assert_eq!(T::validate_byte_slice(dat_slice), Ok(())); + + idx_offset = idx_limit; + dat_offset = dat_limit; + } + + debug_assert_eq!( + idx_offset, + LENGTH_WIDTH + METADATA_WIDTH + F::INDEX_WIDTH * elements.len() + ); + assert_eq!(dat_offset, output.len()); +} + +pub fn compute_serializable_len(elements: &[A]) -> Option +where + T: VarULE + ?Sized, + A: EncodeAsVarULE, + F: VarZeroVecFormat, +{ + let idx_len: u32 = u32::try_from(elements.len()) + .ok()? + .checked_mul(F::INDEX_WIDTH as u32)? + .checked_add(LENGTH_WIDTH as u32)? + .checked_add(METADATA_WIDTH as u32)?; + let data_len: u32 = elements + .iter() + .map(|v| u32::try_from(v.encode_var_ule_len()).ok()) + .fold(Some(0u32), |s, v| { + s.and_then(|s| v.and_then(|v| s.checked_add(v))) + })?; + let ret = idx_len.checked_add(data_len); + if let Some(r) = ret { + if r >= F::MAX_VALUE { + return None; + } + } + ret +} diff --git a/vendor/zerovec/src/varzerovec/databake.rs b/vendor/zerovec/src/varzerovec/databake.rs new file mode 100644 index 000000000..2c3d506b3 --- /dev/null +++ b/vendor/zerovec/src/varzerovec/databake.rs @@ -0,0 +1,50 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::{ule::VarULE, VarZeroSlice, VarZeroVec}; +use databake::*; + +impl Bake for VarZeroVec<'_, T> { + fn bake(&self, env: &CrateEnv) -> TokenStream { + env.insert("zerovec"); + let bytes = self.as_bytes(); + // Safe because self.as_bytes is a safe input + quote! { unsafe { ::zerovec::VarZeroVec::from_bytes_unchecked(&[#(#bytes),*]) } } + } +} + +impl Bake for &VarZeroSlice { + fn bake(&self, env: &CrateEnv) -> TokenStream { + env.insert("zerovec"); + let bytes = self.as_bytes(); + // Safe because self.as_bytes is a safe input + quote! { unsafe { ::zerovec::VarZeroSlice::from_bytes_unchecked(&[#(#bytes),*]) } } + } +} + +#[test] +fn test_baked_vec() { + test_bake!( + VarZeroVec, + const: unsafe { + crate::VarZeroVec::from_bytes_unchecked(&[ + 2u8, 1u8, 0u8, 22u8, 0u8, 77u8, 1u8, 92u8, 17u8, + ]) + }, + zerovec + ); +} + +#[test] +fn test_baked_slice() { + test_bake!( + &VarZeroSlice, + const: unsafe { + crate::VarZeroSlice::from_bytes_unchecked(&[ + 2u8, 1u8, 0u8, 22u8, 0u8, 77u8, 1u8, 92u8, 17u8, + ]) + }, + zerovec + ); +} diff --git a/vendor/zerovec/src/varzerovec/mod.rs b/vendor/zerovec/src/varzerovec/mod.rs new file mode 100644 index 000000000..2e9f68000 --- /dev/null +++ b/vendor/zerovec/src/varzerovec/mod.rs @@ -0,0 +1,26 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! See [`VarZeroVec`](crate::VarZeroVec) for details + +pub(crate) mod components; +pub(crate) mod owned; +pub(crate) mod slice; +pub(crate) mod vec; + +#[cfg(feature = "databake")] +mod databake; + +#[cfg(feature = "serde")] +mod serde; + +pub use crate::{VarZeroSlice, VarZeroVec}; + +#[cfg(feature = "bench")] +#[doc(hidden)] +pub use components::VarZeroVecComponents; + +pub use components::{Index16, Index32, VarZeroVecFormat}; + +pub use owned::VarZeroVecOwned; diff --git a/vendor/zerovec/src/varzerovec/owned.rs b/vendor/zerovec/src/varzerovec/owned.rs new file mode 100644 index 000000000..af4692d48 --- /dev/null +++ b/vendor/zerovec/src/varzerovec/owned.rs @@ -0,0 +1,665 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +// The mutation operations in this file should panic to prevent undefined behavior +#![allow(clippy::unwrap_used)] +#![allow(clippy::expect_used)] +#![allow(clippy::indexing_slicing)] +#![allow(clippy::panic)] + +use super::*; +use crate::ule::*; +use alloc::boxed::Box; +use alloc::vec::Vec; +use core::any; +use core::convert::TryInto; +use core::marker::PhantomData; +use core::ops::Deref; +use core::ops::Range; +use core::{fmt, ptr, slice}; + +use super::components::LENGTH_WIDTH; +use super::components::MAX_INDEX; +use super::components::MAX_LENGTH; +use super::components::METADATA_WIDTH; + +/// A fully-owned [`VarZeroVec`]. This type has no lifetime but has the same +/// internal buffer representation of [`VarZeroVec`], making it cheaply convertible to +/// [`VarZeroVec`] and [`VarZeroSlice`]. +/// +/// The `F` type parameter is a [`VarZeroVecFormat`] (see its docs for more details), which can be used to select the +/// precise format of the backing buffer with various size and performance tradeoffs. It defaults to [`Index16`]. +pub struct VarZeroVecOwned { + marker: PhantomData<(Box, F)>, + // safety invariant: must parse into a valid VarZeroVecComponents + entire_slice: Vec, +} + +impl Clone for VarZeroVecOwned { + fn clone(&self) -> Self { + VarZeroVecOwned { + marker: self.marker, + entire_slice: self.entire_slice.clone(), + } + } +} + +// The effect of a shift on the indices in the varzerovec. +#[derive(PartialEq)] +enum ShiftType { + Insert, + Replace, + Remove, +} + +impl Deref for VarZeroVecOwned { + type Target = VarZeroSlice; + fn deref(&self) -> &VarZeroSlice { + self.as_slice() + } +} + +impl VarZeroVecOwned { + /// Construct an empty VarZeroVecOwned + pub fn new() -> Self { + Self { + marker: PhantomData, + entire_slice: Vec::new(), + } + } +} + +impl VarZeroVecOwned { + /// Construct a VarZeroVecOwned from a [`VarZeroSlice`] by cloning the internal data + pub fn from_slice(slice: &VarZeroSlice) -> Self { + Self { + marker: PhantomData, + entire_slice: slice.as_bytes().into(), + } + } + + /// Construct a VarZeroVecOwned from a list of elements + pub fn try_from_elements(elements: &[A]) -> Result + where + A: EncodeAsVarULE, + { + Ok(Self { + marker: PhantomData, + // TODO(#1410): Rethink length errors in VZV. + entire_slice: components::get_serializable_bytes::(elements).ok_or( + "Attempted to build VarZeroVec out of elements that \ + cumulatively are larger than a u32 in size", + )?, + }) + } + + /// Obtain this `VarZeroVec` as a [`VarZeroSlice`] + pub fn as_slice(&self) -> &VarZeroSlice { + let slice: &[u8] = &*self.entire_slice; + unsafe { + // safety: the slice is known to come from a valid parsed VZV + VarZeroSlice::from_byte_slice_unchecked(slice) + } + } + + /// Try to allocate a buffer with enough capacity for `capacity` + /// elements. Since `T` can take up an arbitrary size this will + /// just allocate enough space for 4-byte Ts + pub(crate) fn with_capacity(capacity: usize) -> Self { + Self { + marker: PhantomData, + entire_slice: Vec::with_capacity(capacity * (F::INDEX_WIDTH + 4)), + } + } + + /// Try to reserve space for `capacity` + /// elements. Since `T` can take up an arbitrary size this will + /// just allocate enough space for 4-byte Ts + pub(crate) fn reserve(&mut self, capacity: usize) { + self.entire_slice.reserve(capacity * (F::INDEX_WIDTH + 4)) + } + + /// Get the position of a specific element in the data segment. + /// + /// If `idx == self.len()`, it will return the size of the data segment (where a new element would go). + /// + /// ## Safety + /// `idx <= self.len()` and `self.as_encoded_bytes()` is well-formed. + unsafe fn element_position_unchecked(&self, idx: usize) -> usize { + let len = self.len(); + let out = if idx == len { + self.entire_slice.len() - LENGTH_WIDTH - METADATA_WIDTH - (F::INDEX_WIDTH * len) + } else { + F::rawbytes_to_usize(*self.index_data(idx)) + }; + debug_assert!( + out + LENGTH_WIDTH + METADATA_WIDTH + len * F::INDEX_WIDTH <= self.entire_slice.len() + ); + out + } + + /// Get the range of a specific element in the data segment. + /// + /// ## Safety + /// `idx < self.len()` and `self.as_encoded_bytes()` is well-formed. + unsafe fn element_range_unchecked(&self, idx: usize) -> core::ops::Range { + let start = self.element_position_unchecked(idx); + let end = self.element_position_unchecked(idx + 1); + debug_assert!(start <= end, "{} > {}", start, end); + start..end + } + + /// Set the number of elements in the list without any checks. + /// + /// ## Safety + /// No safe functions may be called until `self.as_encoded_bytes()` is well-formed. + unsafe fn set_len(&mut self, len: usize) { + assert!(len <= MAX_LENGTH); + let len_bytes = len.to_le_bytes(); + self.entire_slice[0..LENGTH_WIDTH].copy_from_slice(&len_bytes[0..LENGTH_WIDTH]); + // Double-check that the length fits in the length field + assert_eq!(len_bytes[LENGTH_WIDTH..].iter().sum::(), 0); + } + + fn index_range(index: usize) -> Range { + let pos = LENGTH_WIDTH + METADATA_WIDTH + F::INDEX_WIDTH * index; + pos..pos + F::INDEX_WIDTH + } + + /// Return the raw bytes representing the given `index`. + /// + /// ## Safety + /// The index must be valid, and self.as_encoded_bytes() must be well-formed + unsafe fn index_data(&self, index: usize) -> &F::RawBytes { + &F::RawBytes::from_byte_slice_unchecked(&self.entire_slice[Self::index_range(index)])[0] + } + + /// Return the mutable slice representing the given `index`. + /// + /// ## Safety + /// The index must be valid. self.as_encoded_bytes() must have allocated space + /// for this index, but need not have its length appropriately set. + unsafe fn index_data_mut(&mut self, index: usize) -> &mut F::RawBytes { + let ptr = self.entire_slice.as_mut_ptr(); + let range = Self::index_range(index); + + // Doing this instead of just `get_unchecked_mut()` because it's unclear + // if `get_unchecked_mut()` can be called out of bounds on a slice even + // if we know the buffer is larger. + let data = slice::from_raw_parts_mut(ptr.add(range.start), F::INDEX_WIDTH); + + &mut F::rawbytes_from_byte_slice_unchecked_mut(data)[0] + } + + /// Shift the indices starting with and after `starting_index` by the provided `amount`. + /// + /// ## Safety + /// Adding `amount` to each index after `starting_index` must not result in the slice from becoming malformed. + /// The length of the slice must be correctly set. + unsafe fn shift_indices(&mut self, starting_index: usize, amount: i32) { + let len = self.len(); + let indices = F::rawbytes_from_byte_slice_unchecked_mut( + &mut self.entire_slice[LENGTH_WIDTH + METADATA_WIDTH + ..LENGTH_WIDTH + METADATA_WIDTH + F::INDEX_WIDTH * len], + ); + for idx in &mut indices[starting_index..] { + let mut new_idx = F::rawbytes_to_usize(*idx); + if amount > 0 { + new_idx = new_idx.checked_add(amount.try_into().unwrap()).unwrap(); + } else { + new_idx = new_idx.checked_sub((-amount).try_into().unwrap()).unwrap(); + } + *idx = F::usize_to_rawbytes(new_idx); + } + } + + /// Get this [`VarZeroVecOwned`] as a borrowed [`VarZeroVec`] + /// + /// If you wish to repeatedly call methods on this [`VarZeroVecOwned`], + /// it is more efficient to perform this conversion first + pub fn as_varzerovec<'a>(&'a self) -> VarZeroVec<'a, T, F> { + self.as_slice().into() + } + + /// Empty the vector + pub fn clear(&mut self) { + self.entire_slice.clear() + } + + /// Consume this vector and return the backing buffer + #[inline] + pub fn into_bytes(self) -> Vec { + self.entire_slice + } + + /// Invalidate and resize the data at an index, optionally inserting or removing the index. + /// Also updates affected indices and the length. + /// Returns a slice to the new element data - it doesn't contain uninitialized data but its value is indeterminate. + /// + /// ## Safety + /// - `index` must be a valid index, or, if `shift_type == ShiftType::Insert`, `index == self.len()` is allowed. + /// - `new_size` musn't result in the data segment growing larger than `F::MAX_VALUE`. + unsafe fn shift(&mut self, index: usize, new_size: usize, shift_type: ShiftType) -> &mut [u8] { + // The format of the encoded data is: + // - four bytes of "len" + // - len*4 bytes for an array of indices + // - the actual data to which the indices point + // + // When inserting or removing an element, the size of the indices segment must be changed, + // so the data before the target element must be shifted by 4 bytes in addition to the + // shifting needed for the new element size. + let len = self.len(); + let slice_len = self.entire_slice.len(); + + let prev_element = match shift_type { + ShiftType::Insert => { + let pos = self.element_position_unchecked(index); + // In the case of an insert, there's no previous element, + // so it's an empty range at the new position. + pos..pos + } + _ => self.element_range_unchecked(index), + }; + + // How much shifting must be done in bytes due to removal/insertion of an index. + let index_shift: i64 = match shift_type { + ShiftType::Insert => F::INDEX_WIDTH as i64, + ShiftType::Replace => 0, + ShiftType::Remove => -(F::INDEX_WIDTH as i64), + }; + // The total shift in byte size of the owned slice. + let shift: i64 = + new_size as i64 - (prev_element.end - prev_element.start) as i64 + index_shift; + let new_slice_len = slice_len.wrapping_add(shift as usize); + if shift > 0 { + if new_slice_len > F::MAX_VALUE as usize { + panic!( + "Attempted to grow VarZeroVec to an encoded size that does not fit within the length size used by {}", + any::type_name::() + ); + } + self.entire_slice.reserve(shift as usize); + } + + // Now that we've ensured there's enough space, we can shift the data around. + { + // Note: There are no references introduced between pointer creation and pointer use, and all + // raw pointers are derived from a single &mut. This preserves pointer provenance. + let slice_range = self.entire_slice.as_mut_ptr_range(); + let data_start = slice_range + .start + .add(LENGTH_WIDTH + METADATA_WIDTH + len * F::INDEX_WIDTH); + let prev_element_p = + data_start.add(prev_element.start)..data_start.add(prev_element.end); + + // The memory range of the affected index. + // When inserting: where the new index goes. + // When removing: where the index being removed is. + // When replacing: unused. + let index_range = { + let index_start = slice_range + .start + .add(LENGTH_WIDTH + METADATA_WIDTH + F::INDEX_WIDTH * index); + index_start..index_start.add(F::INDEX_WIDTH) + }; + + unsafe fn shift_bytes(block: Range<*const u8>, to: *mut u8) { + debug_assert!(block.end >= block.start); + ptr::copy(block.start, to, block.end.offset_from(block.start) as usize); + } + + if shift_type == ShiftType::Remove { + // Move the data before the element back by 4 to remove the index. + shift_bytes(index_range.end..prev_element_p.start, index_range.start); + } + + // Shift data after the element to its new position. + shift_bytes( + prev_element_p.end..slice_range.end, + prev_element_p + .start + .offset((new_size as i64 + index_shift) as isize), + ); + + let first_affected_index = match shift_type { + ShiftType::Insert => { + // Move data before the element forward by 4 to make space for a new index. + shift_bytes(index_range.start..prev_element_p.start, index_range.end); + + *self.index_data_mut(index) = F::usize_to_rawbytes(prev_element.start); + self.set_len(len + 1); + index + 1 + } + ShiftType::Remove => { + self.set_len(len - 1); + index + } + ShiftType::Replace => index + 1, + }; + // No raw pointer use should occur after this point (because of self.index_data and self.set_len). + + // Set the new slice length. This must be done after shifting data around to avoid uninitialized data. + self.entire_slice.set_len(new_slice_len); + + // Shift the affected indices. + self.shift_indices(first_affected_index, (shift - index_shift) as i32); + }; + + debug_assert!(self.verify_integrity()); + + // Return a mut slice to the new element data. + let element_pos = LENGTH_WIDTH + + METADATA_WIDTH + + self.len() * F::INDEX_WIDTH + + self.element_position_unchecked(index); + &mut self.entire_slice[element_pos..element_pos + new_size as usize] + } + + /// Checks the internal invariants of the vec to ensure safe code will not cause UB. + /// Returns whether integrity was verified. + /// + /// Note: an index is valid if it doesn't point to data past the end of the slice and is + /// less than or equal to all future indices. The length of the index segment is not part of each index. + fn verify_integrity(&self) -> bool { + if self.is_empty() && !self.entire_slice.is_empty() { + return false; + } + let slice_len = self.entire_slice.len(); + match slice_len { + 0 => return true, + 1..=3 => return false, + _ => (), + } + let len = unsafe { + RawBytesULE::::from_byte_slice_unchecked( + &self.entire_slice[..LENGTH_WIDTH], + )[0] + .as_unsigned_int() + }; + if len == 0 { + // An empty vec must have an empty slice: there is only a single valid byte representation. + return false; + } + if slice_len < LENGTH_WIDTH + METADATA_WIDTH + len as usize * F::INDEX_WIDTH { + // Not enough room for the indices. + return false; + } + let data_len = + self.entire_slice.len() - LENGTH_WIDTH - METADATA_WIDTH - len as usize * F::INDEX_WIDTH; + if data_len > MAX_INDEX { + // The data segment is too long. + return false; + } + + // Test index validity. + let indices = unsafe { + F::RawBytes::from_byte_slice_unchecked( + &self.entire_slice[LENGTH_WIDTH + METADATA_WIDTH + ..LENGTH_WIDTH + METADATA_WIDTH + len as usize * F::INDEX_WIDTH], + ) + }; + for idx in indices { + if F::rawbytes_to_usize(*idx) > data_len { + // Indices must not point past the data segment. + return false; + } + } + for window in indices.windows(2) { + if F::rawbytes_to_usize(window[0]) > F::rawbytes_to_usize(window[1]) { + // Indices must be in non-decreasing order. + return false; + } + } + true + } + + /// Insert an element at the end of this vector + pub fn push + ?Sized>(&mut self, element: &A) { + self.insert(self.len(), element) + } + + /// Insert an element at index `idx` + pub fn insert + ?Sized>(&mut self, index: usize, element: &A) { + let len = self.len(); + if index > len { + panic!( + "Called out-of-bounds insert() on VarZeroVec, index {} len {}", + index, len + ); + } + + let value_len = element.encode_var_ule_len(); + + if len == 0 { + let header_len = LENGTH_WIDTH + METADATA_WIDTH + F::INDEX_WIDTH; + let cap = header_len + value_len; + self.entire_slice.resize(cap, 0); + self.entire_slice[0] = 1; // set length + element.encode_var_ule_write(&mut self.entire_slice[header_len..]); + return; + } + + assert!(value_len < MAX_INDEX); + unsafe { + let place = self.shift(index, value_len, ShiftType::Insert); + element.encode_var_ule_write(place); + } + } + + /// Remove the element at index `idx` + pub fn remove(&mut self, index: usize) { + let len = self.len(); + if index >= len { + panic!( + "Called out-of-bounds remove() on VarZeroVec, index {} len {}", + index, len + ); + } + if len == 1 { + // This is removing the last element. Set the slice to empty to ensure all empty vecs have empty data slices. + self.entire_slice.clear(); + return; + } + unsafe { + self.shift(index, 0, ShiftType::Remove); + } + } + + /// Replace the element at index `idx` with another + pub fn replace + ?Sized>(&mut self, index: usize, element: &A) { + let len = self.len(); + if index >= len { + panic!( + "Called out-of-bounds replace() on VarZeroVec, index {} len {}", + index, len + ); + } + + let value_len = element.encode_var_ule_len(); + + assert!(value_len < MAX_INDEX); + unsafe { + let place = self.shift(index, value_len, ShiftType::Replace); + element.encode_var_ule_write(place); + } + } +} + +impl fmt::Debug for VarZeroVecOwned +where + T: fmt::Debug, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + VarZeroSlice::fmt(self, f) + } +} + +impl Default for VarZeroVecOwned { + fn default() -> Self { + Self::new() + } +} + +impl PartialEq<&'_ [A]> for VarZeroVecOwned +where + T: VarULE + ?Sized, + T: PartialEq, + A: AsRef, + F: VarZeroVecFormat, +{ + #[inline] + fn eq(&self, other: &&[A]) -> bool { + self.iter().eq(other.iter().map(|t| t.as_ref())) + } +} + +impl<'a, T: ?Sized + VarULE, F: VarZeroVecFormat> From<&'a VarZeroSlice> + for VarZeroVecOwned +{ + fn from(other: &'a VarZeroSlice) -> Self { + Self::from_slice(other) + } +} + +#[cfg(test)] +mod test { + use super::VarZeroVecOwned; + #[test] + fn test_insert_integrity() { + let mut items: Vec = Vec::new(); + let mut zerovec = VarZeroVecOwned::::new(); + + // Insert into an empty vec. + items.insert(0, "1234567890".into()); + zerovec.insert(0, "1234567890"); + assert_eq!(zerovec, &*items); + + zerovec.insert(1, "foo3"); + items.insert(1, "foo3".into()); + assert_eq!(zerovec, &*items); + + // Insert at the end. + items.insert(items.len(), "qwertyuiop".into()); + zerovec.insert(zerovec.len(), "qwertyuiop"); + assert_eq!(zerovec, &*items); + + items.insert(0, "asdfghjkl;".into()); + zerovec.insert(0, "asdfghjkl;"); + assert_eq!(zerovec, &*items); + + items.insert(2, "".into()); + zerovec.insert(2, ""); + assert_eq!(zerovec, &*items); + } + + #[test] + // ensure that inserting empty items works + fn test_empty_inserts() { + let mut items: Vec = Vec::new(); + let mut zerovec = VarZeroVecOwned::::new(); + + // Insert into an empty vec. + items.insert(0, "".into()); + zerovec.insert(0, ""); + assert_eq!(zerovec, &*items); + + items.insert(0, "".into()); + zerovec.insert(0, ""); + assert_eq!(zerovec, &*items); + + items.insert(0, "1234567890".into()); + zerovec.insert(0, "1234567890"); + assert_eq!(zerovec, &*items); + + items.insert(0, "".into()); + zerovec.insert(0, ""); + assert_eq!(zerovec, &*items); + } + + #[test] + fn test_small_insert_integrity() { + // Tests that insert() works even when there + // is not enough space for the new index in entire_slice.len() + let mut items: Vec = Vec::new(); + let mut zerovec = VarZeroVecOwned::::new(); + + // Insert into an empty vec. + items.insert(0, "abc".into()); + zerovec.insert(0, "abc"); + assert_eq!(zerovec, &*items); + + zerovec.insert(1, "def"); + items.insert(1, "def".into()); + assert_eq!(zerovec, &*items); + } + + #[test] + #[should_panic] + fn test_insert_past_end() { + VarZeroVecOwned::::new().insert(1, ""); + } + + #[test] + fn test_remove_integrity() { + let mut items: Vec<&str> = vec!["apples", "bananas", "eeples", "", "baneenees", "five", ""]; + let mut zerovec = VarZeroVecOwned::::try_from_elements(&items).unwrap(); + + for index in [0, 2, 4, 0, 1, 1, 0] { + items.remove(index); + zerovec.remove(index); + assert_eq!(zerovec, &*items, "index {}, len {}", index, items.len()); + } + } + + #[test] + fn test_removing_last_element_clears() { + let mut zerovec = VarZeroVecOwned::::try_from_elements(&["buy some apples"]).unwrap(); + assert!(!zerovec.as_bytes().is_empty()); + zerovec.remove(0); + assert!(zerovec.as_bytes().is_empty()); + } + + #[test] + #[should_panic] + fn test_remove_past_end() { + VarZeroVecOwned::::new().remove(0); + } + + #[test] + fn test_replace_integrity() { + let mut items: Vec<&str> = vec!["apples", "bananas", "eeples", "", "baneenees", "five", ""]; + let mut zerovec = VarZeroVecOwned::::try_from_elements(&items).unwrap(); + + // Replace with an element of the same size (and the first element) + items[0] = "blablah"; + zerovec.replace(0, "blablah"); + assert_eq!(zerovec, &*items); + + // Replace with a smaller element + items[1] = "twily"; + zerovec.replace(1, "twily"); + assert_eq!(zerovec, &*items); + + // Replace an empty element + items[3] = "aoeuidhtns"; + zerovec.replace(3, "aoeuidhtns"); + assert_eq!(zerovec, &*items); + + // Replace the last element + items[6] = "0123456789"; + zerovec.replace(6, "0123456789"); + assert_eq!(zerovec, &*items); + + // Replace with an empty element + items[2] = ""; + zerovec.replace(2, ""); + assert_eq!(zerovec, &*items); + } + + #[test] + #[should_panic] + fn test_replace_past_end() { + VarZeroVecOwned::::new().replace(0, ""); + } +} diff --git a/vendor/zerovec/src/varzerovec/serde.rs b/vendor/zerovec/src/varzerovec/serde.rs new file mode 100644 index 000000000..dd6e863ff --- /dev/null +++ b/vendor/zerovec/src/varzerovec/serde.rs @@ -0,0 +1,246 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use super::{VarZeroSlice, VarZeroVec, VarZeroVecFormat}; +use crate::ule::*; +use alloc::boxed::Box; +use alloc::vec::Vec; +use core::fmt; +use core::marker::PhantomData; +use serde::de::{self, Deserialize, Deserializer, SeqAccess, Visitor}; +#[cfg(feature = "serde")] +use serde::ser::{Serialize, SerializeSeq, Serializer}; + +struct VarZeroVecVisitor { + #[allow(clippy::type_complexity)] // this is a private marker type, who cares + marker: PhantomData<(fn() -> Box, F)>, +} + +impl Default for VarZeroVecVisitor { + fn default() -> Self { + Self { + marker: PhantomData, + } + } +} + +impl<'de, T, F> Visitor<'de> for VarZeroVecVisitor +where + T: VarULE + ?Sized, + Box: Deserialize<'de>, + F: VarZeroVecFormat, +{ + type Value = VarZeroVec<'de, T, F>; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("a sequence or borrowed buffer of bytes") + } + + fn visit_borrowed_bytes(self, bytes: &'de [u8]) -> Result + where + E: de::Error, + { + VarZeroVec::parse_byte_slice(bytes).map_err(de::Error::custom) + } + + fn visit_seq(self, mut seq: A) -> Result + where + A: SeqAccess<'de>, + { + let mut vec: Vec> = if let Some(capacity) = seq.size_hint() { + Vec::with_capacity(capacity) + } else { + Vec::new() + }; + while let Some(value) = seq.next_element::>()? { + vec.push(value); + } + Ok(VarZeroVec::from(&vec)) + } +} + +/// This impl can be made available by enabling the optional `serde` feature of the `zerovec` crate +impl<'de, 'a, T, F> Deserialize<'de> for VarZeroVec<'a, T, F> +where + T: VarULE + ?Sized, + Box: Deserialize<'de>, + F: VarZeroVecFormat, + 'de: 'a, +{ + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let visitor = VarZeroVecVisitor::default(); + if deserializer.is_human_readable() { + deserializer.deserialize_seq(visitor) + } else { + deserializer.deserialize_bytes(visitor) + } + } +} + +/// This impl can be made available by enabling the optional `serde` feature of the `zerovec` crate +impl<'de, 'a, T, F> Deserialize<'de> for &'a VarZeroSlice +where + T: VarULE + ?Sized, + Box: Deserialize<'de>, + F: VarZeroVecFormat, + 'de: 'a, +{ + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + if deserializer.is_human_readable() { + Err(de::Error::custom( + "&VarZeroSlice cannot be deserialized from human-readable formats", + )) + } else { + let deserialized: VarZeroVec<'a, T, F> = VarZeroVec::deserialize(deserializer)?; + let borrowed = if let VarZeroVec::Borrowed(b) = deserialized { + b + } else { + return Err(de::Error::custom( + "&VarZeroSlice can only deserialize in zero-copy ways", + )); + }; + Ok(borrowed) + } + } +} + +/// This impl can be made available by enabling the optional `serde` feature of the `zerovec` crate +#[cfg(feature = "serde")] +impl Serialize for VarZeroVec<'_, T, F> +where + T: Serialize + VarULE + ?Sized, + F: VarZeroVecFormat, +{ + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + if serializer.is_human_readable() { + let mut seq = serializer.serialize_seq(Some(self.len()))?; + for value in self.iter() { + seq.serialize_element(value)?; + } + seq.end() + } else { + serializer.serialize_bytes(self.as_bytes()) + } + } +} + +/// This impl can be made available by enabling the optional `serde` feature of the `zerovec` crate +#[cfg(feature = "serde")] +impl Serialize for VarZeroSlice +where + T: Serialize + VarULE + ?Sized, + F: VarZeroVecFormat, +{ + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + self.as_varzerovec().serialize(serializer) + } +} + +#[cfg(test)] +#[allow(non_camel_case_types)] +mod test { + use crate::{VarZeroSlice, VarZeroVec}; + + #[derive(serde::Serialize, serde::Deserialize)] + struct DeriveTest_VarZeroVec<'data> { + #[serde(borrow)] + _data: VarZeroVec<'data, str>, + } + + #[derive(serde::Serialize, serde::Deserialize)] + struct DeriveTest_VarZeroSlice<'data> { + #[serde(borrow)] + _data: &'data VarZeroSlice, + } + + // ["foo", "bar", "baz", "dolor", "quux", "lorem ipsum"]; + const BYTES: &[u8] = &[ + 6, 0, 0, 0, 0, 0, 3, 0, 6, 0, 9, 0, 14, 0, 18, 0, 102, 111, 111, 98, 97, 114, 98, 97, 122, + 100, 111, 108, 111, 114, 113, 117, 117, 120, 108, 111, 114, 101, 109, 32, 105, 112, 115, + 117, 109, + ]; + const JSON_STR: &str = "[\"foo\",\"bar\",\"baz\",\"dolor\",\"quux\",\"lorem ipsum\"]"; + const BINCODE_BUF: &[u8] = &[ + 45, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 3, 0, 6, 0, 9, 0, 14, 0, 18, 0, 102, 111, 111, + 98, 97, 114, 98, 97, 122, 100, 111, 108, 111, 114, 113, 117, 117, 120, 108, 111, 114, 101, + 109, 32, 105, 112, 115, 117, 109, + ]; + + // ["w", "ω", "文", "𑄃"] + const NONASCII_STR: &[&str] = &["w", "ω", "文", "𑄃"]; + const NONASCII_BYTES: &[u8] = &[ + 4, 0, 0, 0, 0, 0, 1, 0, 3, 0, 6, 0, 119, 207, 137, 230, 150, 135, 240, 145, 132, 131, + ]; + #[test] + fn test_serde_json() { + let zerovec_orig: VarZeroVec = VarZeroVec::parse_byte_slice(BYTES).expect("parse"); + let json_str = serde_json::to_string(&zerovec_orig).expect("serialize"); + assert_eq!(JSON_STR, json_str); + // VarZeroVec should deserialize from JSON to either Vec or VarZeroVec + let vec_new: Vec> = + serde_json::from_str(&json_str).expect("deserialize from buffer to Vec"); + assert_eq!(zerovec_orig.to_vec(), vec_new); + let zerovec_new: VarZeroVec = + serde_json::from_str(&json_str).expect("deserialize from buffer to VarZeroVec"); + assert_eq!(zerovec_orig.to_vec(), zerovec_new.to_vec()); + assert!(zerovec_new.is_owned()); + } + + #[test] + fn test_serde_bincode() { + let zerovec_orig: VarZeroVec = VarZeroVec::parse_byte_slice(BYTES).expect("parse"); + let bincode_buf = bincode::serialize(&zerovec_orig).expect("serialize"); + assert_eq!(BINCODE_BUF, bincode_buf); + let zerovec_new: VarZeroVec = + bincode::deserialize(&bincode_buf).expect("deserialize from buffer to VarZeroVec"); + assert_eq!(zerovec_orig.to_vec(), zerovec_new.to_vec()); + assert!(!zerovec_new.is_owned()); + } + + #[test] + fn test_vzv_borrowed() { + let zerovec_orig: &VarZeroSlice = + VarZeroSlice::parse_byte_slice(BYTES).expect("parse"); + let bincode_buf = bincode::serialize(&zerovec_orig).expect("serialize"); + assert_eq!(BINCODE_BUF, bincode_buf); + let zerovec_new: &VarZeroSlice = + bincode::deserialize(&bincode_buf).expect("deserialize from buffer to VarZeroSlice"); + assert_eq!(zerovec_orig.to_vec(), zerovec_new.to_vec()); + } + + #[test] + fn test_nonascii_bincode() { + let src_vec = NONASCII_STR + .iter() + .copied() + .map(Box::::from) + .collect::>(); + let mut zerovec: VarZeroVec = + VarZeroVec::parse_byte_slice(NONASCII_BYTES).expect("parse"); + assert_eq!(zerovec.to_vec(), src_vec); + let bincode_buf = bincode::serialize(&zerovec).expect("serialize"); + let zerovec_result = + bincode::deserialize::>(&bincode_buf).expect("deserialize"); + assert_eq!(zerovec_result.to_vec(), src_vec); + + // try again with owned zerovec + zerovec.make_mut(); + let bincode_buf = bincode::serialize(&zerovec).expect("serialize"); + let zerovec_result = + bincode::deserialize::>(&bincode_buf).expect("deserialize"); + assert_eq!(zerovec_result.to_vec(), src_vec); + } +} diff --git a/vendor/zerovec/src/varzerovec/slice.rs b/vendor/zerovec/src/varzerovec/slice.rs new file mode 100644 index 000000000..59e8da03f --- /dev/null +++ b/vendor/zerovec/src/varzerovec/slice.rs @@ -0,0 +1,549 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use super::components::VarZeroVecComponents; +use super::*; +use crate::ule::*; +use alloc::boxed::Box; +use alloc::vec::Vec; +use core::cmp::{Ord, Ordering, PartialOrd}; +use core::fmt; +use core::marker::PhantomData; +use core::mem; + +use core::ops::Index; +use core::ops::Range; + +/// A zero-copy "slice", that works for unsized types, i.e. the zero-copy version of `[T]` +/// where `T` is not `Sized`. +/// +/// This behaves similarly to [`VarZeroVec`], however [`VarZeroVec`] is allowed to contain +/// owned data and as such is ideal for deserialization since most human readable +/// serialization formats cannot unconditionally deserialize zero-copy. +/// +/// This type can be used inside [`VarZeroVec`](crate::VarZeroVec) and [`ZeroMap`](crate::ZeroMap): +/// This essentially allows for the construction of zero-copy types isomorphic to `Vec>` by instead +/// using `VarZeroVec>`. +/// +/// The `F` type parameter is a [`VarZeroVecFormat`] (see its docs for more details), which can be used to select the +/// precise format of the backing buffer with various size and performance tradeoffs. It defaults to [`Index16`]. +/// +/// This type can be nested within itself to allow for multi-level nested `Vec`s, for +/// example the following code constructs the conceptual zero-copy equivalent of `Vec>>` +/// +/// ```rust +/// use zerovec::ule::*; +/// use zerovec::{VarZeroSlice, VarZeroVec, ZeroVec}; +/// let strings_1: Vec<&str> = vec!["foo", "bar", "baz"]; +/// let strings_2: Vec<&str> = vec!["twelve", "seventeen", "forty two"]; +/// let strings_3: Vec<&str> = vec!["我", "喜歡", "烏龍茶"]; +/// let strings_4: Vec<&str> = vec!["w", "ω", "文", "𑄃"]; +/// let strings_12 = vec![&*strings_1, &*strings_2]; +/// let strings_34 = vec![&*strings_3, &*strings_4]; +/// let all_strings = vec![strings_12, strings_34]; +/// +/// let vzv_1: VarZeroVec = VarZeroVec::from(&strings_1); +/// let vzv_2: VarZeroVec = VarZeroVec::from(&strings_2); +/// let vzv_3: VarZeroVec = VarZeroVec::from(&strings_3); +/// let vzv_4: VarZeroVec = VarZeroVec::from(&strings_4); +/// let vzv_12 = VarZeroVec::from(&[vzv_1.as_slice(), vzv_2.as_slice()]); +/// let vzv_34 = VarZeroVec::from(&[vzv_3.as_slice(), vzv_4.as_slice()]); +/// let vzv_all = VarZeroVec::from(&[vzv_12.as_slice(), vzv_34.as_slice()]); +/// +/// let reconstructed: Vec>> = vzv_all +/// .iter() +/// .map(|v: &VarZeroSlice>| { +/// v.iter() +/// .map(|x: &VarZeroSlice<_>| { +/// x.as_varzerovec() +/// .iter() +/// .map(|s| s.to_owned()) +/// .collect::>() +/// }) +/// .collect::>() +/// }) +/// .collect::>(); +/// assert_eq!(reconstructed, all_strings); +/// +/// let bytes = vzv_all.as_bytes(); +/// let vzv_from_bytes: VarZeroVec>> = +/// VarZeroVec::parse_byte_slice(bytes).unwrap(); +/// assert_eq!(vzv_from_bytes, vzv_all); +/// ``` +// +// safety invariant: The slice MUST be one which parses to +// a valid VarZeroVecComponents +#[repr(transparent)] +pub struct VarZeroSlice { + marker: PhantomData<(F, T)>, + /// The original slice this was constructed from + entire_slice: [u8], +} + +impl VarZeroSlice { + /// Construct a new empty VarZeroSlice + pub const fn new_empty() -> &'static Self { + let arr: &[u8] = &[]; + unsafe { mem::transmute(arr) } + } + + /// Obtain a [`VarZeroVecComponents`] borrowing from the internal buffer + #[inline] + pub(crate) fn as_components<'a>(&'a self) -> VarZeroVecComponents<'a, T, F> { + unsafe { + // safety: VarZeroSlice is guaranteed to parse here + VarZeroVecComponents::from_bytes_unchecked(&self.entire_slice) + } + } + + /// Uses a `&[u8]` buffer as a `VarZeroSlice` without any verification. + /// + /// # Safety + /// + /// `bytes` need to be an output from [`VarZeroSlice::as_bytes()`]. + pub const unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { + // self is really just a wrapper around a byte slice + mem::transmute(bytes) + } + + /// Get the number of elements in this slice + /// + /// # Example + /// + /// ```rust + /// # use std::str::Utf8Error; + /// # use zerovec::ule::ZeroVecError; + /// # use zerovec::VarZeroVec; + /// + /// let strings = vec!["foo", "bar", "baz", "quux"]; + /// let vec = VarZeroVec::::from(&strings); + /// + /// assert_eq!(vec.len(), 4); + /// # Ok::<(), ZeroVecError>(()) + /// ``` + pub fn len(&self) -> usize { + self.as_components().len() + } + + /// Returns `true` if the slice contains no elements. + /// + /// # Examples + /// + /// ``` + /// # use std::str::Utf8Error; + /// # use zerovec::ule::ZeroVecError; + /// # use zerovec::VarZeroVec; + /// + /// let strings: Vec = vec![]; + /// let vec = VarZeroVec::::from(&strings); + /// + /// assert!(vec.is_empty()); + /// # Ok::<(), ZeroVecError>(()) + /// ``` + pub fn is_empty(&self) -> bool { + self.as_components().is_empty() + } + + /// Obtain an iterator over this slice's elements + /// + /// # Example + /// + /// ```rust + /// # use std::str::Utf8Error; + /// # use zerovec::ule::ZeroVecError; + /// # use zerovec::VarZeroVec; + /// + /// let strings = vec!["foo", "bar", "baz", "quux"]; + /// let vec = VarZeroVec::::from(&strings); + /// + /// let mut iter_results: Vec<&str> = vec.iter().collect(); + /// assert_eq!(iter_results[0], "foo"); + /// assert_eq!(iter_results[1], "bar"); + /// assert_eq!(iter_results[2], "baz"); + /// assert_eq!(iter_results[3], "quux"); + /// # Ok::<(), ZeroVecError>(()) + /// ``` + pub fn iter<'b>(&'b self) -> impl Iterator { + self.as_components().iter() + } + + /// Get one of this slice's elements, returning None if the index is out of bounds + /// + /// # Example + /// + /// ```rust + /// # use std::str::Utf8Error; + /// # use zerovec::ule::ZeroVecError; + /// # use zerovec::VarZeroVec; + /// + /// let strings = vec!["foo", "bar", "baz", "quux"]; + /// let vec = VarZeroVec::::from(&strings); + /// + /// let mut iter_results: Vec<&str> = vec.iter().collect(); + /// assert_eq!(vec.get(0), Some("foo")); + /// assert_eq!(vec.get(1), Some("bar")); + /// assert_eq!(vec.get(2), Some("baz")); + /// assert_eq!(vec.get(3), Some("quux")); + /// assert_eq!(vec.get(4), None); + /// # Ok::<(), ZeroVecError>(()) + /// ``` + pub fn get(&self, idx: usize) -> Option<&T> { + self.as_components().get(idx) + } + + /// Get one of this slice's elements + /// + /// # Safety + /// + /// `index` must be in range + /// + /// # Example + /// + /// ```rust + /// # use std::str::Utf8Error; + /// # use zerovec::ule::ZeroVecError; + /// # use zerovec::VarZeroVec; + /// + /// let strings = vec!["foo", "bar", "baz", "quux"]; + /// let vec = VarZeroVec::::from(&strings); + /// + /// let mut iter_results: Vec<&str> = vec.iter().collect(); + /// unsafe { + /// assert_eq!(vec.get_unchecked(0), "foo"); + /// assert_eq!(vec.get_unchecked(1), "bar"); + /// assert_eq!(vec.get_unchecked(2), "baz"); + /// assert_eq!(vec.get_unchecked(3), "quux"); + /// } + /// # Ok::<(), ZeroVecError>(()) + /// ``` + pub unsafe fn get_unchecked(&self, idx: usize) -> &T { + self.as_components().get_unchecked(idx) + } + + /// Obtain an owned `Vec>` out of this + pub fn to_vec(&self) -> Vec> { + self.as_components().to_vec() + } + + /// Get a reference to the entire encoded backing buffer of this slice + /// + /// The bytes can be passed back to [`Self::parse_byte_slice()`]. + /// + /// To take the bytes as a vector, see [`VarZeroVec::into_bytes()`]. + /// + /// # Example + /// + /// ```rust + /// # use std::str::Utf8Error; + /// # use zerovec::ule::ZeroVecError; + /// # use zerovec::VarZeroVec; + /// + /// let strings = vec!["foo", "bar", "baz"]; + /// let vzv = VarZeroVec::::from(&strings); + /// + /// assert_eq!(vzv, VarZeroVec::parse_byte_slice(vzv.as_bytes()).unwrap()); + /// + /// # Ok::<(), ZeroVecError>(()) + /// ``` + #[inline] + pub const fn as_bytes(&self) -> &[u8] { + &self.entire_slice + } + + /// Get this [`VarZeroSlice`] as a borrowed [`VarZeroVec`] + /// + /// If you wish to repeatedly call methods on this [`VarZeroSlice`], + /// it is more efficient to perform this conversion first + pub const fn as_varzerovec<'a>(&'a self) -> VarZeroVec<'a, T, F> { + VarZeroVec::Borrowed(self) + } + + /// Parse a VarZeroSlice from a slice of the appropriate format + /// + /// Slices of the right format can be obtained via [`VarZeroSlice::as_bytes()`] + pub fn parse_byte_slice<'a>(slice: &'a [u8]) -> Result<&'a Self, ZeroVecError> { + ::parse_byte_slice(slice) + } + + /// Convert a `bytes` array known to represent a `VarZeroSlice` to a mutable reference to a `VarZeroSlice` + /// + /// # Safety + /// - `bytes` must be a valid sequence of bytes for this VarZeroVec + pub(crate) unsafe fn from_byte_slice_unchecked_mut(bytes: &mut [u8]) -> &mut Self { + // self is really just a wrapper around a byte slice + mem::transmute(bytes) + } + + pub(crate) unsafe fn get_bytes_at_mut(&mut self, idx: usize) -> &mut [u8] { + let range = self.as_components().get_range(idx); + #[allow(clippy::indexing_slicing)] // get_range() is known to return in-bounds ranges + &mut self.entire_slice[range] + } +} + +impl VarZeroSlice +where + T: VarULE, + T: ?Sized, + T: Ord, + F: VarZeroVecFormat, +{ + /// Binary searches a sorted `VarZeroVec` for the given element. For more information, see + /// the standard library function [`binary_search`]. + /// + /// # Example + /// + /// ``` + /// # use std::str::Utf8Error; + /// # use zerovec::ule::ZeroVecError; + /// # use zerovec::VarZeroVec; + /// + /// let strings = vec!["a", "b", "f", "g"]; + /// let vec = VarZeroVec::::from(&strings); + /// + /// assert_eq!(vec.binary_search("f"), Ok(2)); + /// assert_eq!(vec.binary_search("e"), Err(2)); + /// # Ok::<(), ZeroVecError>(()) + /// ``` + /// + /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search + #[inline] + pub fn binary_search(&self, x: &T) -> Result { + self.as_components().binary_search(x) + } + + /// Binary searches a `VarZeroVec` for the given element within a certain sorted range. + /// + /// If the range is out of bounds, returns `None`. Otherwise, returns a `Result` according + /// to the behavior of the standard library function [`binary_search`]. + /// + /// The index is returned relative to the start of the range. + /// + /// # Example + /// + /// ``` + /// # use std::str::Utf8Error; + /// # use zerovec::ule::ZeroVecError; + /// # use zerovec::VarZeroVec; + /// + /// let strings = vec!["a", "b", "f", "g", "m", "n", "q"]; + /// let vec = VarZeroVec::::from(&strings); + /// + /// // Same behavior as binary_search when the range covers the whole slice: + /// assert_eq!(vec.binary_search_in_range("g", 0..7), Some(Ok(3))); + /// assert_eq!(vec.binary_search_in_range("h", 0..7), Some(Err(4))); + /// + /// // Will not look outside of the range: + /// assert_eq!(vec.binary_search_in_range("g", 0..1), Some(Err(1))); + /// assert_eq!(vec.binary_search_in_range("g", 6..7), Some(Err(0))); + /// + /// // Will return indices relative to the start of the range: + /// assert_eq!(vec.binary_search_in_range("g", 1..6), Some(Ok(2))); + /// assert_eq!(vec.binary_search_in_range("h", 1..6), Some(Err(3))); + /// + /// // Will return None if the range is out of bounds: + /// assert_eq!(vec.binary_search_in_range("x", 100..200), None); + /// assert_eq!(vec.binary_search_in_range("x", 0..200), None); + /// # Ok::<(), ZeroVecError>(()) + /// ``` + /// + /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search + #[inline] + pub fn binary_search_in_range( + &self, + x: &T, + range: Range, + ) -> Option> { + self.as_components().binary_search_in_range(x, range) + } +} + +impl VarZeroSlice +where + T: VarULE, + T: ?Sized, + F: VarZeroVecFormat, +{ + /// Binary searches a sorted `VarZeroVec` for the given predicate. For more information, see + /// the standard library function [`binary_search_by`]. + /// + /// # Example + /// + /// ``` + /// # use std::str::Utf8Error; + /// # use zerovec::ule::ZeroVecError; + /// # use zerovec::VarZeroVec; + /// + /// let strings = vec!["a", "b", "f", "g"]; + /// let vec = VarZeroVec::::from(&strings); + /// + /// assert_eq!(vec.binary_search_by(|probe| probe.cmp("f")), Ok(2)); + /// assert_eq!(vec.binary_search_by(|probe| probe.cmp("e")), Err(2)); + /// # Ok::<(), ZeroVecError>(()) + /// ``` + /// + /// [`binary_search_by`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search_by + #[inline] + pub fn binary_search_by(&self, predicate: impl FnMut(&T) -> Ordering) -> Result { + self.as_components().binary_search_by(predicate) + } + + /// Binary searches a `VarZeroVec` for the given predicate within a certain sorted range. + /// + /// If the range is out of bounds, returns `None`. Otherwise, returns a `Result` according + /// to the behavior of the standard library function [`binary_search`]. + /// + /// The index is returned relative to the start of the range. + /// + /// # Example + /// + /// ``` + /// # use std::str::Utf8Error; + /// # use zerovec::ule::ZeroVecError; + /// # use zerovec::VarZeroVec; + /// + /// let strings = vec!["a", "b", "f", "g", "m", "n", "q"]; + /// let vec = VarZeroVec::::from(&strings); + /// + /// // Same behavior as binary_search when the range covers the whole slice: + /// assert_eq!( + /// vec.binary_search_in_range_by(|v| v.cmp("g"), 0..7), + /// Some(Ok(3)) + /// ); + /// assert_eq!( + /// vec.binary_search_in_range_by(|v| v.cmp("h"), 0..7), + /// Some(Err(4)) + /// ); + /// + /// // Will not look outside of the range: + /// assert_eq!( + /// vec.binary_search_in_range_by(|v| v.cmp("g"), 0..1), + /// Some(Err(1)) + /// ); + /// assert_eq!( + /// vec.binary_search_in_range_by(|v| v.cmp("g"), 6..7), + /// Some(Err(0)) + /// ); + /// + /// // Will return indices relative to the start of the range: + /// assert_eq!( + /// vec.binary_search_in_range_by(|v| v.cmp("g"), 1..6), + /// Some(Ok(2)) + /// ); + /// assert_eq!( + /// vec.binary_search_in_range_by(|v| v.cmp("h"), 1..6), + /// Some(Err(3)) + /// ); + /// + /// // Will return None if the range is out of bounds: + /// assert_eq!( + /// vec.binary_search_in_range_by(|v| v.cmp("x"), 100..200), + /// None + /// ); + /// assert_eq!(vec.binary_search_in_range_by(|v| v.cmp("x"), 0..200), None); + /// # Ok::<(), ZeroVecError>(()) + /// ``` + /// + /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search + pub fn binary_search_in_range_by( + &self, + predicate: impl FnMut(&T) -> Ordering, + range: Range, + ) -> Option> { + self.as_components() + .binary_search_in_range_by(predicate, range) + } +} +// Safety (based on the safety checklist on the VarULE trait): +// 1. VarZeroSlice does not include any uninitialized or padding bytes (achieved by `#[repr(transparent)]` on a +// `[u8]` slice which satisfies this invariant) +// 2. VarZeroSlice is aligned to 1 byte (achieved by `#[repr(transparent)]` on a +// `[u8]` slice which satisfies this invariant) +// 3. The impl of `validate_byte_slice()` returns an error if any byte is not valid. +// 4. The impl of `validate_byte_slice()` returns an error if the slice cannot be used in its entirety +// 5. The impl of `from_byte_slice_unchecked()` returns a reference to the same data. +// 6. `as_byte_slice()` is equivalent to a regular transmute of the underlying data +// 7. VarZeroSlice byte equality is semantic equality (relying on the guideline of the underlying VarULE type) +unsafe impl VarULE for VarZeroSlice { + fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> { + let _: VarZeroVecComponents = VarZeroVecComponents::parse_byte_slice(bytes)?; + Ok(()) + } + + unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self { + // self is really just a wrapper around a byte slice + mem::transmute(bytes) + } + + fn as_byte_slice(&self) -> &[u8] { + &self.entire_slice + } +} + +impl Index for VarZeroSlice { + type Output = T; + fn index(&self, index: usize) -> &Self::Output { + #[allow(clippy::panic)] // documented + match self.get(index) { + Some(x) => x, + None => panic!( + "index out of bounds: the len is {} but the index is {index}", + self.len() + ), + } + } +} + +impl PartialEq> for VarZeroSlice +where + T: VarULE, + T: ?Sized, + T: PartialEq, + F: VarZeroVecFormat, +{ + #[inline] + fn eq(&self, other: &VarZeroSlice) -> bool { + // VarULE has an API guarantee that this is equivalent + // to `T::VarULE::eq()` + self.entire_slice.eq(&other.entire_slice) + } +} + +impl Eq for VarZeroSlice +where + T: VarULE, + T: ?Sized, + T: Eq, + F: VarZeroVecFormat, +{ +} + +impl PartialOrd for VarZeroSlice { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option { + self.iter().partial_cmp(other.iter()) + } +} + +impl Ord for VarZeroSlice { + #[inline] + fn cmp(&self, other: &Self) -> Ordering { + self.iter().cmp(other.iter()) + } +} + +impl fmt::Debug for VarZeroSlice +where + T: fmt::Debug, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_list().entries(self.iter()).finish() + } +} + +impl AsRef> for VarZeroSlice { + fn as_ref(&self) -> &VarZeroSlice { + self + } +} diff --git a/vendor/zerovec/src/varzerovec/vec.rs b/vendor/zerovec/src/varzerovec/vec.rs new file mode 100644 index 000000000..031da6453 --- /dev/null +++ b/vendor/zerovec/src/varzerovec/vec.rs @@ -0,0 +1,493 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::ule::*; + +use alloc::vec::Vec; +use core::cmp::{Ord, Ordering, PartialOrd}; +use core::fmt; +use core::ops::Deref; + +use super::*; + +/// A zero-copy vector for variable-width types. +/// +/// `VarZeroVec` is designed as a drop-in replacement for `Vec` in situations where it is +/// desirable to borrow data from an unaligned byte slice, such as zero-copy deserialization, and +/// where `T`'s data is variable-length (e.g. `String`) +/// +/// `T` must implement [`VarULE`], which is already implemented for [`str`] and `[u8]`. For storing more +/// complicated series of elements, it is implemented on `ZeroSlice` as well as `VarZeroSlice` +/// for nesting. [`zerovec::make_varule`](crate::make_varule) may be used to generate +/// a dynamically-sized [`VarULE`] type and conversions to and from a custom type. +/// +/// For example, here are some owned types and their zero-copy equivalents: +/// +/// - `Vec`: `VarZeroVec<'a, str>` +/// - `Vec>>`: `VarZeroVec<'a, [u8]>` +/// - `Vec>`: `VarZeroVec<'a, ZeroSlice>` +/// - `Vec>`: `VarZeroVec<'a, VarZeroSlice>` +/// +/// Most of the methods on `VarZeroVec<'a, T>` come from its [`Deref`] implementation to [`VarZeroSlice`](VarZeroSlice). +/// +/// For creating zero-copy vectors of fixed-size types, see [`ZeroVec`](crate::ZeroVec). +/// +/// `VarZeroVec` behaves much like [`Cow`](alloc::borrow::Cow), where it can be constructed from +/// owned data (and then mutated!) but can also borrow from some buffer. +/// +/// The `F` type parameter is a [`VarZeroVecFormat`] (see its docs for more details), which can be used to select the +/// precise format of the backing buffer with various size and performance tradeoffs. It defaults to [`Index16`]. +/// +/// # Example +/// +/// ```rust +/// # use std::str::Utf8Error; +/// # use zerovec::ule::ZeroVecError; +/// use zerovec::VarZeroVec; +/// +/// // The little-endian bytes correspond to the list of strings. +/// let strings = vec!["w", "ω", "文", "𑄃"]; +/// +/// #[derive(serde::Serialize, serde::Deserialize)] +/// struct Data<'a> { +/// #[serde(borrow)] +/// strings: VarZeroVec<'a, str>, +/// } +/// +/// let data = Data { +/// strings: VarZeroVec::from(&strings), +/// }; +/// +/// let bincode_bytes = +/// bincode::serialize(&data).expect("Serialization should be successful"); +/// +/// // Will deserialize without allocations +/// let deserialized: Data = bincode::deserialize(&bincode_bytes) +/// .expect("Deserialization should be successful"); +/// +/// assert_eq!(deserialized.strings.get(2), Some("文")); +/// assert_eq!(deserialized.strings, &*strings); +/// # Ok::<(), ZeroVecError>(()) +/// ``` +/// +/// Here's another example with `ZeroSlice` (similar to `[T]`): +/// +/// ```rust +/// # use std::str::Utf8Error; +/// # use zerovec::ule::ZeroVecError; +/// use zerovec::ule::*; +/// use zerovec::VarZeroVec; +/// use zerovec::ZeroSlice; +/// use zerovec::ZeroVec; +/// +/// // The structured list correspond to the list of integers. +/// let numbers: &[&[u32]] = &[ +/// &[12, 25, 38], +/// &[39179, 100], +/// &[42, 55555], +/// &[12345, 54321, 9], +/// ]; +/// +/// #[derive(serde::Serialize, serde::Deserialize)] +/// struct Data<'a> { +/// #[serde(borrow)] +/// vecs: VarZeroVec<'a, ZeroSlice>, +/// } +/// +/// let data = Data { +/// vecs: VarZeroVec::from(numbers), +/// }; +/// +/// let bincode_bytes = +/// bincode::serialize(&data).expect("Serialization should be successful"); +/// +/// let deserialized: Data = bincode::deserialize(&bincode_bytes) +/// .expect("Deserialization should be successful"); +/// +/// assert_eq!(deserialized.vecs[0].get(1).unwrap(), 25); +/// assert_eq!(deserialized.vecs[1], *numbers[1]); +/// +/// # Ok::<(), ZeroVecError>(()) +/// ``` +/// +/// [`VarZeroVec`]s can be nested infinitely via a similar mechanism, see the docs of [`VarZeroSlice`] +/// for more information. +/// +/// # How it Works +/// +/// `VarZeroVec`, when used with non-human-readable serializers (like `bincode`), will +/// serialize to a specially formatted list of bytes. The format is: +/// +/// - 4 bytes for `length` (interpreted as a little-endian u32) +/// - `4 * length` bytes of `indices` (interpreted as little-endian u32) +/// - Remaining bytes for actual `data` +/// +/// Each element in the `indices` array points to the starting index of its corresponding +/// data part in the `data` list. The ending index can be calculated from the starting index +/// of the next element (or the length of the slice if dealing with the last element). +/// +/// See [the design doc](https://github.com/unicode-org/icu4x/blob/main/utils/zerovec/design_doc.md) for more details. +/// +/// [`ule`]: crate::ule +#[non_exhaustive] +pub enum VarZeroVec<'a, T: ?Sized, F = Index16> { + /// An allocated VarZeroVec, allowing for mutations. + /// + /// # Examples + /// + /// ``` + /// use zerovec::VarZeroVec; + /// + /// let mut vzv = VarZeroVec::::default(); + /// vzv.make_mut().push("foo"); + /// vzv.make_mut().push("bar"); + /// assert!(matches!(vzv, VarZeroVec::Owned(_))); + /// ``` + Owned(VarZeroVecOwned), + /// A borrowed VarZeroVec, requiring no allocations. + /// + /// If a mutating operation is invoked on VarZeroVec, the Borrowed is converted to Owned. + /// + /// # Examples + /// + /// ``` + /// use zerovec::VarZeroVec; + /// + /// let bytes = &[ + /// 4, 0, 0, 0, 0, 0, 1, 0, 3, 0, 6, 0, 119, 207, 137, 230, 150, 135, 240, + /// 145, 132, 131, + /// ]; + /// + /// let vzv: VarZeroVec = VarZeroVec::parse_byte_slice(bytes).unwrap(); + /// assert!(matches!(vzv, VarZeroVec::Borrowed(_))); + /// ``` + Borrowed(&'a VarZeroSlice), +} + +impl<'a, T: ?Sized, F> Clone for VarZeroVec<'a, T, F> { + fn clone(&self) -> Self { + match *self { + VarZeroVec::Owned(ref o) => o.clone().into(), + VarZeroVec::Borrowed(b) => b.into(), + } + } +} + +impl fmt::Debug for VarZeroVec<'_, T, F> +where + T: fmt::Debug, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + VarZeroSlice::fmt(self, f) + } +} + +impl<'a, T: ?Sized, F> From> for VarZeroVec<'a, T, F> { + #[inline] + fn from(other: VarZeroVecOwned) -> Self { + VarZeroVec::Owned(other) + } +} + +impl<'a, T: ?Sized, F> From<&'a VarZeroSlice> for VarZeroVec<'a, T, F> { + fn from(other: &'a VarZeroSlice) -> Self { + VarZeroVec::Borrowed(other) + } +} + +impl<'a, T: ?Sized + VarULE, F: VarZeroVecFormat> From> + for VarZeroVecOwned +{ + #[inline] + fn from(other: VarZeroVec<'a, T, F>) -> Self { + match other { + VarZeroVec::Owned(o) => o, + VarZeroVec::Borrowed(b) => b.into(), + } + } +} + +impl Default for VarZeroVec<'_, T> { + #[inline] + fn default() -> Self { + Self::new() + } +} + +impl Deref for VarZeroVec<'_, T, F> { + type Target = VarZeroSlice; + fn deref(&self) -> &VarZeroSlice { + self.as_slice() + } +} + +impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroVec<'a, T, F> { + /// Creates a new, empty `VarZeroVec`. + /// + /// # Examples + /// + /// ``` + /// use zerovec::VarZeroVec; + /// + /// let vzv: VarZeroVec = VarZeroVec::new(); + /// assert!(vzv.is_empty()); + /// ``` + #[inline] + pub fn new() -> Self { + Self::Borrowed(VarZeroSlice::new_empty()) + } + + /// Parse a VarZeroVec from a slice of the appropriate format + /// + /// Slices of the right format can be obtained via [`VarZeroSlice::as_bytes()`]. + /// + /// # Example + /// + /// ```rust + /// # use std::str::Utf8Error; + /// # use zerovec::ule::ZeroVecError; + /// # use zerovec::VarZeroVec; + /// + /// let strings = vec!["foo", "bar", "baz", "quux"]; + /// let vec = VarZeroVec::::from(&strings); + /// + /// assert_eq!(&vec[0], "foo"); + /// assert_eq!(&vec[1], "bar"); + /// assert_eq!(&vec[2], "baz"); + /// assert_eq!(&vec[3], "quux"); + /// # Ok::<(), ZeroVecError>(()) + /// ``` + pub fn parse_byte_slice(slice: &'a [u8]) -> Result { + let borrowed = VarZeroSlice::::parse_byte_slice(slice)?; + + Ok(VarZeroVec::Borrowed(borrowed)) + } + + /// Uses a `&[u8]` buffer as a `VarZeroVec` without any verification. + /// + /// # Safety + /// + /// `bytes` need to be an output from [`VarZeroSlice::as_bytes()`]. + pub const unsafe fn from_bytes_unchecked(bytes: &'a [u8]) -> Self { + Self::Borrowed(core::mem::transmute(bytes)) + } + + /// Convert this into a mutable vector of the owned `T` type, cloning if necessary. + /// + /// + /// # Example + /// + /// ```rust,ignore + /// # use std::str::Utf8Error; + /// # use zerovec::ule::ZeroVecError; + /// # use zerovec::VarZeroVec; + /// + /// let strings = vec!["foo", "bar", "baz", "quux"]; + /// let mut vec = VarZeroVec::::from(&strings); + /// + /// assert_eq!(vec.len(), 4); + /// let mutvec = vec.make_mut(); + /// mutvec.push("lorem ipsum".into()); + /// mutvec[2] = "dolor sit".into(); + /// assert_eq!(&vec[0], "foo"); + /// assert_eq!(&vec[1], "bar"); + /// assert_eq!(&vec[2], "dolor sit"); + /// assert_eq!(&vec[3], "quux"); + /// assert_eq!(&vec[4], "lorem ipsum"); + /// # Ok::<(), ZeroVecError>(()) + /// ``` + // + // This function is crate-public for now since we don't yet want to stabilize + // the internal implementation details + pub fn make_mut(&mut self) -> &mut VarZeroVecOwned { + match self { + VarZeroVec::Owned(ref mut vec) => vec, + VarZeroVec::Borrowed(slice) => { + let new_self = VarZeroVecOwned::from_slice(slice); + *self = new_self.into(); + // recursion is limited since we are guaranteed to hit the Owned branch + self.make_mut() + } + } + } + + /// Converts a borrowed ZeroVec to an owned ZeroVec. No-op if already owned. + /// + /// # Example + /// + /// ``` + /// # use std::str::Utf8Error; + /// # use zerovec::ule::ZeroVecError; + /// # use zerovec::VarZeroVec; + /// + /// let strings = vec!["foo", "bar", "baz", "quux"]; + /// let vec = VarZeroVec::::from(&strings); + /// + /// assert_eq!(vec.len(), 4); + /// // has 'static lifetime + /// let owned = vec.into_owned(); + /// # Ok::<(), ZeroVecError>(()) + /// ``` + pub fn into_owned(mut self) -> VarZeroVec<'static, T, F> { + self.make_mut(); + match self { + VarZeroVec::Owned(vec) => vec.into(), + _ => unreachable!(), + } + } + + /// Obtain this `VarZeroVec` as a [`VarZeroSlice`] + pub fn as_slice(&self) -> &VarZeroSlice { + match *self { + VarZeroVec::Owned(ref owned) => &**owned, + VarZeroVec::Borrowed(b) => b, + } + } + + /// Takes the byte vector representing the encoded data of this VarZeroVec. If borrowed, + /// this function allocates a byte vector and copies the borrowed bytes into it. + /// + /// The bytes can be passed back to [`Self::parse_byte_slice()`]. + /// + /// To get a reference to the bytes without moving, see [`VarZeroSlice::as_bytes()`]. + /// + /// # Example + /// + /// ```rust + /// # use std::str::Utf8Error; + /// # use zerovec::ule::ZeroVecError; + /// # use zerovec::VarZeroVec; + /// + /// let strings = vec!["foo", "bar", "baz"]; + /// let bytes = VarZeroVec::::from(&strings).into_bytes(); + /// + /// let mut borrowed: VarZeroVec = VarZeroVec::parse_byte_slice(&bytes)?; + /// assert_eq!(borrowed, &*strings); + /// + /// # Ok::<(), ZeroVecError>(()) + /// ``` + pub fn into_bytes(self) -> Vec { + match self { + VarZeroVec::Owned(vec) => vec.into_bytes(), + VarZeroVec::Borrowed(vec) => vec.as_bytes().to_vec(), + } + } + + /// Return whether the [`VarZeroVec`] is operating on owned or borrowed + /// data. [`VarZeroVec::into_owned()`] and [`VarZeroVec::make_mut()`] can + /// be used to force it into an owned type + pub fn is_owned(&self) -> bool { + match self { + VarZeroVec::Owned(..) => true, + VarZeroVec::Borrowed(..) => false, + } + } + + #[cfg(feature = "bench")] + #[doc(hidden)] + pub fn as_components<'b>(&'b self) -> VarZeroVecComponents<'b, T, F> { + self.as_slice().as_components() + } +} + +impl From<&Vec> for VarZeroVec<'static, T, F> +where + T: VarULE + ?Sized, + A: EncodeAsVarULE, + F: VarZeroVecFormat, +{ + #[inline] + fn from(elements: &Vec) -> Self { + #[allow(clippy::unwrap_used)] // TODO(#1410) Better story for fallibility + VarZeroVecOwned::try_from_elements(elements).unwrap().into() + } +} + +impl From<&[A]> for VarZeroVec<'static, T, F> +where + T: VarULE + ?Sized, + A: EncodeAsVarULE, + F: VarZeroVecFormat, +{ + #[inline] + fn from(elements: &[A]) -> Self { + #[allow(clippy::unwrap_used)] // TODO(#1410) Better story for fallibility + VarZeroVecOwned::try_from_elements(elements).unwrap().into() + } +} + +impl From<&[A; N]> for VarZeroVec<'static, T, F> +where + T: VarULE + ?Sized, + A: EncodeAsVarULE, + F: VarZeroVecFormat, +{ + #[inline] + fn from(elements: &[A; N]) -> Self { + #[allow(clippy::unwrap_used)] // TODO(#1410) Better story for fallibility + VarZeroVecOwned::try_from_elements(elements).unwrap().into() + } +} + +impl<'a, 'b, T, F> PartialEq> for VarZeroVec<'a, T, F> +where + T: VarULE, + T: ?Sized, + T: PartialEq, + F: VarZeroVecFormat, +{ + #[inline] + fn eq(&self, other: &VarZeroVec<'b, T, F>) -> bool { + // VarULE has an API guarantee that this is equivalent + // to `T::VarULE::eq()` + self.as_bytes().eq(other.as_bytes()) + } +} + +impl<'a, T, F> Eq for VarZeroVec<'a, T, F> +where + T: VarULE, + T: ?Sized, + T: Eq, + F: VarZeroVecFormat, +{ +} + +impl PartialEq<&'_ [A]> for VarZeroVec<'_, T, F> +where + T: VarULE + ?Sized, + T: PartialEq, + A: AsRef, + F: VarZeroVecFormat, +{ + #[inline] + fn eq(&self, other: &&[A]) -> bool { + self.iter().eq(other.iter().map(|t| t.as_ref())) + } +} + +impl PartialEq<[A; N]> for VarZeroVec<'_, T, F> +where + T: VarULE + ?Sized, + T: PartialEq, + A: AsRef, + F: VarZeroVecFormat, +{ + #[inline] + fn eq(&self, other: &[A; N]) -> bool { + self.iter().eq(other.iter().map(|t| t.as_ref())) + } +} + +impl<'a, T: VarULE + ?Sized + PartialOrd, F: VarZeroVecFormat> PartialOrd for VarZeroVec<'a, T, F> { + fn partial_cmp(&self, other: &Self) -> Option { + self.iter().partial_cmp(other.iter()) + } +} + +impl<'a, T: VarULE + ?Sized + Ord, F: VarZeroVecFormat> Ord for VarZeroVec<'a, T, F> { + fn cmp(&self, other: &Self) -> Ordering { + self.iter().cmp(other.iter()) + } +} -- cgit v1.2.3