diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
commit | 26a029d407be480d791972afb5975cf62c9360a6 (patch) | |
tree | f435a8308119effd964b339f76abb83a57c29483 /third_party/rust/zerovec/src | |
parent | Initial commit. (diff) | |
download | firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip |
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/zerovec/src')
51 files changed, 15709 insertions, 0 deletions
diff --git a/third_party/rust/zerovec/src/error.rs b/third_party/rust/zerovec/src/error.rs new file mode 100644 index 0000000000..85de3ecc8d --- /dev/null +++ b/third_party/rust/zerovec/src/error.rs @@ -0,0 +1,55 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use core::any; +use core::fmt; + +/// A generic error type to be used for decoding slices of ULE types +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +#[non_exhaustive] +pub enum ZeroVecError { + /// Attempted to parse a buffer into a slice of the given ULE type but its + /// length was not compatible + InvalidLength { ty: &'static str, len: usize }, + /// The byte sequence provided for `ty` failed to parse correctly + ParseError { ty: &'static str }, + /// The byte buffer was not in the appropriate format for VarZeroVec + VarZeroVecFormatError, +} + +impl fmt::Display for ZeroVecError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { + match *self { + ZeroVecError::InvalidLength { ty, len } => { + write!(f, "Invalid length {len} for slice of type {ty}") + } + ZeroVecError::ParseError { ty } => { + write!(f, "Could not parse bytes to slice of type {ty}") + } + ZeroVecError::VarZeroVecFormatError => { + write!(f, "Invalid format for VarZeroVec buffer") + } + } + } +} + +impl ZeroVecError { + /// Construct a parse error for the given type + pub fn parse<T: ?Sized + 'static>() -> ZeroVecError { + ZeroVecError::ParseError { + ty: any::type_name::<T>(), + } + } + + /// Construct an "invalid length" error for the given type and length + pub fn length<T: ?Sized + 'static>(len: usize) -> ZeroVecError { + ZeroVecError::InvalidLength { + ty: any::type_name::<T>(), + len, + } + } +} + +#[cfg(feature = "std")] +impl ::std::error::Error for ZeroVecError {} diff --git a/third_party/rust/zerovec/src/flexzerovec/databake.rs b/third_party/rust/zerovec/src/flexzerovec/databake.rs new file mode 100644 index 0000000000..bd165352e8 --- /dev/null +++ b/third_party/rust/zerovec/src/flexzerovec/databake.rs @@ -0,0 +1,66 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use super::{FlexZeroSlice, FlexZeroVec}; +use databake::*; + +impl Bake for FlexZeroVec<'_> { + fn bake(&self, env: &CrateEnv) -> TokenStream { + env.insert("zerovec"); + if self.is_empty() { + quote! { zerovec::vecs::FlexZeroVec::new() } + } else { + let slice = self.as_ref().bake(env); + quote! { #slice.as_flexzerovec() } + } + } +} + +impl Bake for &FlexZeroSlice { + fn bake(&self, env: &CrateEnv) -> TokenStream { + env.insert("zerovec"); + if self.is_empty() { + quote! { zerovec::vecs::FlexZeroSlice::new_empty() } + } else { + let bytes = databake::Bake::bake(&self.as_bytes(), env); + quote! { unsafe { zerovec::vecs::FlexZeroSlice::from_byte_slice_unchecked(#bytes) } } + } + } +} + +#[test] +fn test_baked_vec() { + test_bake!( + FlexZeroVec, + const: crate::vecs::FlexZeroVec::new(), + zerovec + ); + test_bake!( + FlexZeroVec, + const: unsafe { + crate::vecs::FlexZeroSlice::from_byte_slice_unchecked( + b"\x02\x01\0\x16\0M\x01\x11" + ) + }.as_flexzerovec(), + zerovec + ); +} + +#[test] +fn test_baked_slice() { + test_bake!( + &FlexZeroSlice, + const: crate::vecs::FlexZeroSlice::new_empty(), + zerovec + ); + test_bake!( + &FlexZeroSlice, + const: unsafe { + crate::vecs::FlexZeroSlice::from_byte_slice_unchecked( + b"\x02\x01\0\x16\0M\x01\x11" + ) + }, + zerovec + ); +} diff --git a/third_party/rust/zerovec/src/flexzerovec/mod.rs b/third_party/rust/zerovec/src/flexzerovec/mod.rs new file mode 100644 index 0000000000..b6d7e780ac --- /dev/null +++ b/third_party/rust/zerovec/src/flexzerovec/mod.rs @@ -0,0 +1,20 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! See [`FlexZeroVec`](crate::vecs::FlexZeroVec) for details. + +pub(crate) mod owned; +pub(crate) mod slice; +pub(crate) mod vec; + +#[cfg(feature = "databake")] +mod databake; + +#[cfg(feature = "serde")] +mod serde; + +pub use owned::FlexZeroVecOwned; +pub(crate) use slice::chunk_to_usize; +pub use slice::FlexZeroSlice; +pub use vec::FlexZeroVec; diff --git a/third_party/rust/zerovec/src/flexzerovec/owned.rs b/third_party/rust/zerovec/src/flexzerovec/owned.rs new file mode 100644 index 0000000000..7d7bfb33d6 --- /dev/null +++ b/third_party/rust/zerovec/src/flexzerovec/owned.rs @@ -0,0 +1,335 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use alloc::vec; +use alloc::vec::Vec; +use core::fmt; +use core::iter::FromIterator; +use core::ops::Deref; + +use super::FlexZeroSlice; +use super::FlexZeroVec; + +/// The fully-owned variant of [`FlexZeroVec`]. Contains all mutation methods. +// Safety invariant: the inner bytes must deref to a valid `FlexZeroSlice` +#[derive(Clone, PartialEq, Eq)] +pub struct FlexZeroVecOwned(Vec<u8>); + +impl FlexZeroVecOwned { + /// Creates a new [`FlexZeroVecOwned`] with zero elements. + pub fn new_empty() -> Self { + Self(vec![1]) + } + + /// Creates a [`FlexZeroVecOwned`] from a [`FlexZeroSlice`]. + pub fn from_slice(other: &FlexZeroSlice) -> FlexZeroVecOwned { + // safety: the bytes originate from a valid FlexZeroSlice + Self(other.as_bytes().to_vec()) + } + + /// Obtains this [`FlexZeroVecOwned`] as a [`FlexZeroSlice`]. + pub fn as_slice(&self) -> &FlexZeroSlice { + let slice: &[u8] = &self.0; + unsafe { + // safety: the slice is known to come from a valid parsed FlexZeroSlice + FlexZeroSlice::from_byte_slice_unchecked(slice) + } + } + + /// Mutably obtains this `FlexZeroVecOwned` as a [`FlexZeroSlice`]. + pub(crate) fn as_mut_slice(&mut self) -> &mut FlexZeroSlice { + let slice: &mut [u8] = &mut self.0; + unsafe { + // safety: the slice is known to come from a valid parsed FlexZeroSlice + FlexZeroSlice::from_byte_slice_mut_unchecked(slice) + } + } + + /// Converts this `FlexZeroVecOwned` into a [`FlexZeroVec::Owned`]. + #[inline] + pub fn into_flexzerovec(self) -> FlexZeroVec<'static> { + FlexZeroVec::Owned(self) + } + + /// Clears all values out of this `FlexZeroVecOwned`. + #[inline] + pub fn clear(&mut self) { + *self = Self::new_empty() + } + + /// Appends an item to the end of the vector. + /// + /// # Panics + /// + /// Panics if inserting the element would require allocating more than `usize::MAX` bytes. + /// + /// # Examples + /// + /// ``` + /// use zerovec::vecs::FlexZeroVec; + /// + /// let mut zv: FlexZeroVec = [22, 44, 66].iter().copied().collect(); + /// zv.to_mut().push(33); + /// assert_eq!(zv.to_vec(), vec![22, 44, 66, 33]); + /// ``` + pub fn push(&mut self, item: usize) { + let insert_info = self.get_insert_info(item); + self.0.resize(insert_info.new_bytes_len, 0); + let insert_index = insert_info.new_count - 1; + self.as_mut_slice().insert_impl(insert_info, insert_index); + } + + /// Inserts an element into the middle of the vector. + /// + /// Caution: Both arguments to this function are of type `usize`. Please be careful to pass + /// the index first followed by the value second. + /// + /// # Panics + /// + /// Panics if `index > len`. + /// + /// Panics if inserting the element would require allocating more than `usize::MAX` bytes. + /// + /// # Examples + /// + /// ``` + /// use zerovec::vecs::FlexZeroVec; + /// + /// let mut zv: FlexZeroVec = [22, 44, 66].iter().copied().collect(); + /// zv.to_mut().insert(2, 33); + /// assert_eq!(zv.to_vec(), vec![22, 44, 33, 66]); + /// ``` + pub fn insert(&mut self, index: usize, item: usize) { + #[allow(clippy::panic)] // panic is documented in function contract + if index > self.len() { + panic!("index {} out of range {}", index, self.len()); + } + let insert_info = self.get_insert_info(item); + self.0.resize(insert_info.new_bytes_len, 0); + self.as_mut_slice().insert_impl(insert_info, index); + } + + /// Inserts an element into an ascending sorted vector + /// at a position that keeps the vector sorted. + /// + /// # Panics + /// + /// Panics if inserting the element would require allocating more than `usize::MAX` bytes. + /// + /// # Examples + /// + /// ``` + /// use zerovec::vecs::FlexZeroVecOwned; + /// + /// let mut fzv = FlexZeroVecOwned::new_empty(); + /// fzv.insert_sorted(10); + /// fzv.insert_sorted(5); + /// fzv.insert_sorted(8); + /// + /// assert!(Iterator::eq(fzv.iter(), [5, 8, 10].iter().copied())); + /// ``` + pub fn insert_sorted(&mut self, item: usize) { + let index = match self.binary_search(item) { + Ok(i) => i, + Err(i) => i, + }; + let insert_info = self.get_insert_info(item); + self.0.resize(insert_info.new_bytes_len, 0); + self.as_mut_slice().insert_impl(insert_info, index); + } + + /// Removes and returns the element at the specified index. + /// + /// # Panics + /// + /// Panics if `index >= len`. + /// + /// # Examples + /// + /// ``` + /// use zerovec::vecs::FlexZeroVec; + /// + /// let mut zv: FlexZeroVec = [22, 44, 66].iter().copied().collect(); + /// let removed_item = zv.to_mut().remove(1); + /// assert_eq!(44, removed_item); + /// assert_eq!(zv.to_vec(), vec![22, 66]); + /// ``` + pub fn remove(&mut self, index: usize) -> usize { + #[allow(clippy::panic)] // panic is documented in function contract + if index >= self.len() { + panic!("index {} out of range {}", index, self.len()); + } + let remove_info = self.get_remove_info(index); + // Safety: `remove_index` is a valid index + let item = unsafe { self.get_unchecked(remove_info.remove_index) }; + let new_bytes_len = remove_info.new_bytes_len; + self.as_mut_slice().remove_impl(remove_info); + self.0.truncate(new_bytes_len); + item + } + + /// Removes and returns the last element from an ascending sorted vector. + /// + /// If the vector is not sorted, use [`FlexZeroVecOwned::remove()`] instead. Calling this + /// function would leave the FlexZeroVec in a safe, well-defined state; however, information + /// may be lost and/or the equality invariant might not hold. + /// + /// # Panics + /// + /// Panics if `self.is_empty()`. + /// + /// # Examples + /// + /// ``` + /// use zerovec::vecs::FlexZeroVec; + /// + /// let mut zv: FlexZeroVec = [22, 44, 66].iter().copied().collect(); + /// let popped_item = zv.to_mut().pop_sorted(); + /// assert_eq!(66, popped_item); + /// assert_eq!(zv.to_vec(), vec![22, 44]); + /// ``` + /// + /// Calling this function on a non-ascending vector could cause surprising results: + /// + /// ``` + /// use zerovec::vecs::FlexZeroVec; + /// + /// let mut zv1: FlexZeroVec = [444, 222, 111].iter().copied().collect(); + /// let popped_item = zv1.to_mut().pop_sorted(); + /// assert_eq!(111, popped_item); + /// + /// // Oops! + /// assert_eq!(zv1.to_vec(), vec![188, 222]); + /// ``` + pub fn pop_sorted(&mut self) -> usize { + #[allow(clippy::panic)] // panic is documented in function contract + if self.is_empty() { + panic!("cannot pop from an empty vector"); + } + let remove_info = self.get_sorted_pop_info(); + // Safety: `remove_index` is a valid index + let item = unsafe { self.get_unchecked(remove_info.remove_index) }; + let new_bytes_len = remove_info.new_bytes_len; + self.as_mut_slice().remove_impl(remove_info); + self.0.truncate(new_bytes_len); + item + } +} + +impl Deref for FlexZeroVecOwned { + type Target = FlexZeroSlice; + fn deref(&self) -> &Self::Target { + self.as_slice() + } +} + +impl fmt::Debug for FlexZeroVecOwned { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{:?}", self.to_vec()) + } +} + +impl From<&FlexZeroSlice> for FlexZeroVecOwned { + fn from(other: &FlexZeroSlice) -> Self { + Self::from_slice(other) + } +} + +impl FromIterator<usize> for FlexZeroVecOwned { + /// Creates a [`FlexZeroVecOwned`] from an iterator of `usize`. + fn from_iter<I>(iter: I) -> Self + where + I: IntoIterator<Item = usize>, + { + let mut result = FlexZeroVecOwned::new_empty(); + for item in iter { + result.push(item); + } + result + } +} + +#[cfg(test)] +mod test { + use super::*; + + fn check_contents(fzv: &FlexZeroSlice, expected: &[usize]) { + assert_eq!(fzv.len(), expected.len(), "len: {fzv:?} != {expected:?}"); + assert_eq!( + fzv.is_empty(), + expected.is_empty(), + "is_empty: {fzv:?} != {expected:?}" + ); + assert_eq!( + fzv.first(), + expected.first().copied(), + "first: {fzv:?} != {expected:?}" + ); + assert_eq!( + fzv.last(), + expected.last().copied(), + "last: {fzv:?} != {expected:?}" + ); + for i in 0..(expected.len() + 1) { + assert_eq!( + fzv.get(i), + expected.get(i).copied(), + "@{i}: {fzv:?} != {expected:?}" + ); + } + } + + #[test] + fn test_basic() { + let mut fzv = FlexZeroVecOwned::new_empty(); + assert_eq!(fzv.get_width(), 1); + check_contents(&fzv, &[]); + + fzv.push(42); + assert_eq!(fzv.get_width(), 1); + check_contents(&fzv, &[42]); + + fzv.push(77); + assert_eq!(fzv.get_width(), 1); + check_contents(&fzv, &[42, 77]); + + // Scale up + fzv.push(300); + assert_eq!(fzv.get_width(), 2); + check_contents(&fzv, &[42, 77, 300]); + + // Does not need to be sorted + fzv.insert(1, 325); + assert_eq!(fzv.get_width(), 2); + check_contents(&fzv, &[42, 325, 77, 300]); + + fzv.remove(3); + assert_eq!(fzv.get_width(), 2); + check_contents(&fzv, &[42, 325, 77]); + + // Scale down + fzv.remove(1); + assert_eq!(fzv.get_width(), 1); + check_contents(&fzv, &[42, 77]); + } + + #[test] + fn test_build_sorted() { + let nums: &[usize] = &[0, 50, 0, 77, 831, 29, 89182, 931, 0, 77, 712381]; + let mut fzv = FlexZeroVecOwned::new_empty(); + + for num in nums { + fzv.insert_sorted(*num); + } + assert_eq!(fzv.get_width(), 3); + check_contents(&fzv, &[0, 0, 0, 29, 50, 77, 77, 831, 931, 89182, 712381]); + + for num in nums { + let index = fzv.binary_search(*num).unwrap(); + fzv.remove(index); + } + assert_eq!(fzv.get_width(), 1); + check_contents(&fzv, &[]); + } +} diff --git a/third_party/rust/zerovec/src/flexzerovec/serde.rs b/third_party/rust/zerovec/src/flexzerovec/serde.rs new file mode 100644 index 0000000000..fb7caa7a8b --- /dev/null +++ b/third_party/rust/zerovec/src/flexzerovec/serde.rs @@ -0,0 +1,175 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use super::{FlexZeroSlice, FlexZeroVec}; +use alloc::vec::Vec; +use core::fmt; +use serde::de::{self, Deserialize, Deserializer, SeqAccess, Visitor}; +#[cfg(feature = "serde")] +use serde::ser::{Serialize, SerializeSeq, Serializer}; + +#[derive(Default)] +struct FlexZeroVecVisitor {} + +impl<'de> Visitor<'de> for FlexZeroVecVisitor { + type Value = FlexZeroVec<'de>; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("a sequence or borrowed buffer of bytes") + } + + fn visit_borrowed_bytes<E>(self, bytes: &'de [u8]) -> Result<Self::Value, E> + where + E: de::Error, + { + FlexZeroVec::parse_byte_slice(bytes).map_err(de::Error::custom) + } + + fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error> + where + A: SeqAccess<'de>, + { + let mut vec: Vec<usize> = if let Some(capacity) = seq.size_hint() { + Vec::with_capacity(capacity) + } else { + Vec::new() + }; + while let Some(value) = seq.next_element::<usize>()? { + vec.push(value); + } + Ok(vec.into_iter().collect()) + } +} + +/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate +impl<'de, 'a> Deserialize<'de> for FlexZeroVec<'a> +where + 'de: 'a, +{ + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: Deserializer<'de>, + { + let visitor = FlexZeroVecVisitor::default(); + if deserializer.is_human_readable() { + deserializer.deserialize_seq(visitor) + } else { + deserializer.deserialize_bytes(visitor) + } + } +} + +/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate +impl<'de, 'a> Deserialize<'de> for &'a FlexZeroSlice +where + 'de: 'a, +{ + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: Deserializer<'de>, + { + if deserializer.is_human_readable() { + Err(de::Error::custom( + "&FlexZeroSlice cannot be deserialized from human-readable formats", + )) + } else { + let deserialized: FlexZeroVec<'a> = FlexZeroVec::deserialize(deserializer)?; + let borrowed = if let FlexZeroVec::Borrowed(b) = deserialized { + b + } else { + return Err(de::Error::custom( + "&FlexZeroSlice can only deserialize in zero-copy ways", + )); + }; + Ok(borrowed) + } + } +} + +/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate +impl Serialize for FlexZeroVec<'_> { + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + if serializer.is_human_readable() { + let mut seq = serializer.serialize_seq(Some(self.len()))?; + for value in self.iter() { + seq.serialize_element(&value)?; + } + seq.end() + } else { + serializer.serialize_bytes(self.as_bytes()) + } + } +} + +/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate +impl Serialize for FlexZeroSlice { + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + self.as_flexzerovec().serialize(serializer) + } +} + +#[cfg(test)] +#[allow(non_camel_case_types)] +mod test { + use super::{FlexZeroSlice, FlexZeroVec}; + + #[derive(serde::Serialize, serde::Deserialize)] + struct DeriveTest_FlexZeroVec<'data> { + #[serde(borrow)] + _data: FlexZeroVec<'data>, + } + + #[derive(serde::Serialize, serde::Deserialize)] + struct DeriveTest_FlexZeroSlice<'data> { + #[serde(borrow)] + _data: &'data FlexZeroSlice, + } + + // [1, 22, 333, 4444]; + const BYTES: &[u8] = &[2, 0x01, 0x00, 0x16, 0x00, 0x4D, 0x01, 0x5C, 0x11]; + const JSON_STR: &str = "[1,22,333,4444]"; + const BINCODE_BUF: &[u8] = &[9, 0, 0, 0, 0, 0, 0, 0, 2, 1, 0, 22, 0, 77, 1, 92, 17]; + + #[test] + fn test_serde_json() { + let zerovec_orig: FlexZeroVec = FlexZeroVec::parse_byte_slice(BYTES).expect("parse"); + let json_str = serde_json::to_string(&zerovec_orig).expect("serialize"); + assert_eq!(JSON_STR, json_str); + // FlexZeroVec should deserialize from JSON to either Vec or FlexZeroVec + let vec_new: Vec<usize> = + serde_json::from_str(&json_str).expect("deserialize from buffer to Vec"); + assert_eq!(zerovec_orig.to_vec(), vec_new); + let zerovec_new: FlexZeroVec = + serde_json::from_str(&json_str).expect("deserialize from buffer to FlexZeroVec"); + assert_eq!(zerovec_orig.to_vec(), zerovec_new.to_vec()); + assert!(matches!(zerovec_new, FlexZeroVec::Owned(_))); + } + + #[test] + fn test_serde_bincode() { + let zerovec_orig: FlexZeroVec = FlexZeroVec::parse_byte_slice(BYTES).expect("parse"); + let bincode_buf = bincode::serialize(&zerovec_orig).expect("serialize"); + assert_eq!(BINCODE_BUF, bincode_buf); + let zerovec_new: FlexZeroVec = + bincode::deserialize(&bincode_buf).expect("deserialize from buffer to FlexZeroVec"); + assert_eq!(zerovec_orig.to_vec(), zerovec_new.to_vec()); + assert!(matches!(zerovec_new, FlexZeroVec::Borrowed(_))); + } + + #[test] + fn test_vzv_borrowed() { + let zerovec_orig: &FlexZeroSlice = FlexZeroSlice::parse_byte_slice(BYTES).expect("parse"); + let bincode_buf = bincode::serialize(&zerovec_orig).expect("serialize"); + assert_eq!(BINCODE_BUF, bincode_buf); + let zerovec_new: &FlexZeroSlice = + bincode::deserialize(&bincode_buf).expect("deserialize from buffer to FlexZeroSlice"); + assert_eq!(zerovec_orig.to_vec(), zerovec_new.to_vec()); + } +} diff --git a/third_party/rust/zerovec/src/flexzerovec/slice.rs b/third_party/rust/zerovec/src/flexzerovec/slice.rs new file mode 100644 index 0000000000..41cb7116f9 --- /dev/null +++ b/third_party/rust/zerovec/src/flexzerovec/slice.rs @@ -0,0 +1,722 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use super::FlexZeroVec; +use crate::ZeroVecError; +use alloc::vec::Vec; +use core::cmp::Ordering; +use core::fmt; +use core::mem; +use core::ops::Range; + +const USIZE_WIDTH: usize = mem::size_of::<usize>(); + +/// A zero-copy "slice" that efficiently represents `[usize]`. +#[repr(packed)] +pub struct FlexZeroSlice { + // Hard Invariant: 1 <= width <= USIZE_WIDTH (which is target_pointer_width) + // Soft Invariant: width == the width of the largest element + width: u8, + // Hard Invariant: data.len() % width == 0 + data: [u8], +} + +impl fmt::Debug for FlexZeroSlice { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.to_vec().fmt(f) + } +} + +impl PartialEq for FlexZeroSlice { + fn eq(&self, other: &Self) -> bool { + self.width == other.width && self.data == other.data + } +} +impl Eq for FlexZeroSlice {} + +/// Helper function to decode a little-endian "chunk" (byte slice of a specific length) +/// into a `usize`. We cannot call `usize::from_le_bytes` directly because that function +/// requires the high bits to be set to 0. +#[inline] +pub(crate) fn chunk_to_usize(chunk: &[u8], width: usize) -> usize { + debug_assert_eq!(chunk.len(), width); + let mut bytes = [0; USIZE_WIDTH]; + #[allow(clippy::indexing_slicing)] // protected by debug_assert above + bytes[0..width].copy_from_slice(chunk); + usize::from_le_bytes(bytes) +} + +impl FlexZeroSlice { + /// Constructs a new empty [`FlexZeroSlice`]. + /// + /// ``` + /// use zerovec::vecs::FlexZeroSlice; + /// + /// const EMPTY_SLICE: &FlexZeroSlice = FlexZeroSlice::new_empty(); + /// + /// assert!(EMPTY_SLICE.is_empty()); + /// assert_eq!(EMPTY_SLICE.len(), 0); + /// assert_eq!(EMPTY_SLICE.first(), None); + /// ``` + #[inline] + pub const fn new_empty() -> &'static Self { + const ARR: &[u8] = &[1u8]; + // Safety: The slice is a valid empty `FlexZeroSlice` + unsafe { Self::from_byte_slice_unchecked(ARR) } + } + + /// Safely constructs a [`FlexZeroSlice`] from a byte array. + /// + /// # Examples + /// + /// ``` + /// use zerovec::vecs::FlexZeroSlice; + /// + /// const FZS: &FlexZeroSlice = match FlexZeroSlice::parse_byte_slice(&[ + /// 2, // width + /// 0x42, 0x00, // first value + /// 0x07, 0x09, // second value + /// 0xFF, 0xFF, // third value + /// ]) { + /// Ok(v) => v, + /// Err(_) => panic!("invalid bytes"), + /// }; + /// + /// assert!(!FZS.is_empty()); + /// assert_eq!(FZS.len(), 3); + /// assert_eq!(FZS.first(), Some(0x0042)); + /// assert_eq!(FZS.get(0), Some(0x0042)); + /// assert_eq!(FZS.get(1), Some(0x0907)); + /// assert_eq!(FZS.get(2), Some(0xFFFF)); + /// assert_eq!(FZS.get(3), None); + /// assert_eq!(FZS.last(), Some(0xFFFF)); + /// ``` + pub const fn parse_byte_slice(bytes: &[u8]) -> Result<&Self, ZeroVecError> { + let (width_u8, data) = match bytes.split_first() { + Some(v) => v, + None => { + return Err(ZeroVecError::InvalidLength { + ty: "FlexZeroSlice", + len: 0, + }) + } + }; + let width = *width_u8 as usize; + if width < 1 || width > USIZE_WIDTH { + return Err(ZeroVecError::ParseError { + ty: "FlexZeroSlice", + }); + } + if data.len() % width != 0 { + return Err(ZeroVecError::InvalidLength { + ty: "FlexZeroSlice", + len: bytes.len(), + }); + } + // Safety: All hard invariants have been checked. + // Note: The soft invariant requires a linear search that we don't do here. + Ok(unsafe { Self::from_byte_slice_unchecked(bytes) }) + } + + /// Constructs a [`FlexZeroSlice`] without checking invariants. + /// + /// # Panics + /// + /// Panics if `bytes` is empty. + /// + /// # Safety + /// + /// Must be called on a valid [`FlexZeroSlice`] byte array. + #[inline] + pub const unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self { + // Safety: The DST of FlexZeroSlice is a pointer to the `width` element and has a metadata + // equal to the length of the `data` field, which will be one less than the length of the + // overall array. + #[allow(clippy::panic)] // panic is documented in function contract + if bytes.is_empty() { + panic!("from_byte_slice_unchecked called with empty slice") + } + let slice = core::ptr::slice_from_raw_parts(bytes.as_ptr(), bytes.len() - 1); + &*(slice as *const Self) + } + + #[inline] + pub(crate) unsafe fn from_byte_slice_mut_unchecked(bytes: &mut [u8]) -> &mut Self { + // Safety: See comments in `from_byte_slice_unchecked` + let remainder = core::ptr::slice_from_raw_parts_mut(bytes.as_mut_ptr(), bytes.len() - 1); + &mut *(remainder as *mut Self) + } + + /// Returns this slice as its underlying `&[u8]` byte buffer representation. + /// + /// Useful for serialization. + /// + /// # Example + /// + /// ``` + /// use zerovec::vecs::FlexZeroSlice; + /// + /// let bytes: &[u8] = &[2, 0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x80]; + /// let fzv = FlexZeroSlice::parse_byte_slice(bytes).expect("valid bytes"); + /// + /// assert_eq!(bytes, fzv.as_bytes()); + /// ``` + #[inline] + pub fn as_bytes(&self) -> &[u8] { + // Safety: See comments in `from_byte_slice_unchecked` + unsafe { + core::slice::from_raw_parts(self as *const Self as *const u8, self.data.len() + 1) + } + } + + /// Borrows this `FlexZeroSlice` as a [`FlexZeroVec::Borrowed`]. + #[inline] + pub const fn as_flexzerovec(&self) -> FlexZeroVec { + FlexZeroVec::Borrowed(self) + } + + /// Returns the number of elements in the `FlexZeroSlice`. + #[inline] + pub fn len(&self) -> usize { + self.data.len() / self.get_width() + } + + #[inline] + pub(crate) fn get_width(&self) -> usize { + usize::from(self.width) + } + + /// Returns whether there are zero elements in the `FlexZeroSlice`. + #[inline] + pub fn is_empty(&self) -> bool { + self.data.len() == 0 + } + + /// Gets the element at `index`, or `None` if `index >= self.len()`. + /// + /// # Examples + /// + /// ``` + /// use zerovec::vecs::FlexZeroVec; + /// + /// let fzv: FlexZeroVec = [22, 33].iter().copied().collect(); + /// assert_eq!(fzv.get(0), Some(22)); + /// assert_eq!(fzv.get(1), Some(33)); + /// assert_eq!(fzv.get(2), None); + /// ``` + #[inline] + pub fn get(&self, index: usize) -> Option<usize> { + if index >= self.len() { + None + } else { + Some(unsafe { self.get_unchecked(index) }) + } + } + + /// Gets the element at `index` as a chunk of bytes, or `None` if `index >= self.len()`. + #[inline] + pub(crate) fn get_chunk(&self, index: usize) -> Option<&[u8]> { + let w = self.get_width(); + self.data.get(index * w..index * w + w) + } + + /// Gets the element at `index` without checking bounds. + /// + /// # Safety + /// + /// `index` must be in-range. + #[inline] + pub unsafe fn get_unchecked(&self, index: usize) -> usize { + match self.width { + 1 => *self.data.get_unchecked(index) as usize, + 2 => { + let ptr = self.data.as_ptr().add(index * 2); + u16::from_le_bytes(core::ptr::read(ptr as *const [u8; 2])) as usize + } + _ => { + let mut bytes = [0; USIZE_WIDTH]; + let w = self.get_width(); + assert!(w <= USIZE_WIDTH); + let ptr = self.data.as_ptr().add(index * w); + core::ptr::copy_nonoverlapping(ptr, bytes.as_mut_ptr(), w); + usize::from_le_bytes(bytes) + } + } + } + + /// Gets the first element of the slice, or `None` if the slice is empty. + #[inline] + pub fn first(&self) -> Option<usize> { + let w = self.get_width(); + self.data.get(0..w).map(|chunk| chunk_to_usize(chunk, w)) + } + + /// Gets the last element of the slice, or `None` if the slice is empty. + #[inline] + pub fn last(&self) -> Option<usize> { + let l = self.data.len(); + if l == 0 { + None + } else { + let w = self.get_width(); + self.data + .get(l - w..l) + .map(|chunk| chunk_to_usize(chunk, w)) + } + } + + /// Gets an iterator over the elements of the slice as `usize`. + #[inline] + pub fn iter( + &self, + ) -> impl DoubleEndedIterator<Item = usize> + '_ + ExactSizeIterator<Item = usize> { + let w = self.get_width(); + self.data + .chunks_exact(w) + .map(move |chunk| chunk_to_usize(chunk, w)) + } + + /// Gets an iterator over pairs of elements. + /// + /// The second element of the final pair is `None`. + /// + /// # Examples + /// + /// ``` + /// use zerovec::vecs::FlexZeroVec; + /// + /// let nums: &[usize] = &[211, 281, 421, 461]; + /// let fzv: FlexZeroVec = nums.iter().copied().collect(); + /// + /// let mut pairs_it = fzv.iter_pairs(); + /// + /// assert_eq!(pairs_it.next(), Some((211, Some(281)))); + /// assert_eq!(pairs_it.next(), Some((281, Some(421)))); + /// assert_eq!(pairs_it.next(), Some((421, Some(461)))); + /// assert_eq!(pairs_it.next(), Some((461, None))); + /// assert_eq!(pairs_it.next(), None); + /// ``` + pub fn iter_pairs(&self) -> impl Iterator<Item = (usize, Option<usize>)> + '_ { + self.iter().zip(self.iter().skip(1).map(Some).chain([None])) + } + + /// Creates a `Vec<usize>` from a [`FlexZeroSlice`] (or `FlexZeroVec`). + /// + /// # Examples + /// + /// ``` + /// use zerovec::vecs::FlexZeroVec; + /// + /// let nums: &[usize] = &[211, 281, 421, 461]; + /// let fzv: FlexZeroVec = nums.iter().copied().collect(); + /// let vec: Vec<usize> = fzv.to_vec(); + /// + /// assert_eq!(nums, vec.as_slice()); + /// ``` + #[inline] + pub fn to_vec(&self) -> Vec<usize> { + self.iter().collect() + } + + /// Binary searches a sorted `FlexZeroSlice` for the given `usize` value. + /// + /// # Examples + /// + /// ``` + /// use zerovec::vecs::FlexZeroVec; + /// + /// let nums: &[usize] = &[211, 281, 421, 461]; + /// let fzv: FlexZeroVec = nums.iter().copied().collect(); + /// + /// assert_eq!(fzv.binary_search(0), Err(0)); + /// assert_eq!(fzv.binary_search(211), Ok(0)); + /// assert_eq!(fzv.binary_search(250), Err(1)); + /// assert_eq!(fzv.binary_search(281), Ok(1)); + /// assert_eq!(fzv.binary_search(300), Err(2)); + /// assert_eq!(fzv.binary_search(421), Ok(2)); + /// assert_eq!(fzv.binary_search(450), Err(3)); + /// assert_eq!(fzv.binary_search(461), Ok(3)); + /// assert_eq!(fzv.binary_search(462), Err(4)); + /// ``` + #[inline] + pub fn binary_search(&self, needle: usize) -> Result<usize, usize> { + self.binary_search_by(|probe| probe.cmp(&needle)) + } + + /// Binary searches a sorted range of a `FlexZeroSlice` for the given `usize` value. + /// + /// The indices in the return value are relative to the start of the range. + /// + /// # Examples + /// + /// ``` + /// use zerovec::vecs::FlexZeroVec; + /// + /// // Make a FlexZeroVec with two sorted ranges: 0..3 and 3..5 + /// let nums: &[usize] = &[111, 222, 444, 333, 555]; + /// let fzv: FlexZeroVec = nums.iter().copied().collect(); + /// + /// // Search in the first range: + /// assert_eq!(fzv.binary_search_in_range(0, 0..3), Some(Err(0))); + /// assert_eq!(fzv.binary_search_in_range(111, 0..3), Some(Ok(0))); + /// assert_eq!(fzv.binary_search_in_range(199, 0..3), Some(Err(1))); + /// assert_eq!(fzv.binary_search_in_range(222, 0..3), Some(Ok(1))); + /// assert_eq!(fzv.binary_search_in_range(399, 0..3), Some(Err(2))); + /// assert_eq!(fzv.binary_search_in_range(444, 0..3), Some(Ok(2))); + /// assert_eq!(fzv.binary_search_in_range(999, 0..3), Some(Err(3))); + /// + /// // Search in the second range: + /// assert_eq!(fzv.binary_search_in_range(0, 3..5), Some(Err(0))); + /// assert_eq!(fzv.binary_search_in_range(333, 3..5), Some(Ok(0))); + /// assert_eq!(fzv.binary_search_in_range(399, 3..5), Some(Err(1))); + /// assert_eq!(fzv.binary_search_in_range(555, 3..5), Some(Ok(1))); + /// assert_eq!(fzv.binary_search_in_range(999, 3..5), Some(Err(2))); + /// + /// // Out-of-bounds range: + /// assert_eq!(fzv.binary_search_in_range(0, 4..6), None); + /// ``` + #[inline] + pub fn binary_search_in_range( + &self, + needle: usize, + range: Range<usize>, + ) -> Option<Result<usize, usize>> { + self.binary_search_in_range_by(|probe| probe.cmp(&needle), range) + } + + /// Binary searches a sorted `FlexZeroSlice` according to a predicate function. + #[inline] + pub fn binary_search_by( + &self, + predicate: impl FnMut(usize) -> Ordering, + ) -> Result<usize, usize> { + debug_assert!(self.len() <= self.data.len()); + // Safety: self.len() <= self.data.len() + let scaled_slice = unsafe { self.data.get_unchecked(0..self.len()) }; + self.binary_search_impl(predicate, scaled_slice) + } + + /// Binary searches a sorted range of a `FlexZeroSlice` according to a predicate function. + /// + /// The indices in the return value are relative to the start of the range. + #[inline] + pub fn binary_search_in_range_by( + &self, + predicate: impl FnMut(usize) -> Ordering, + range: Range<usize>, + ) -> Option<Result<usize, usize>> { + // Note: We need to check bounds separately, since `self.data.get(range)` does not return + // bounds errors, since it is indexing directly into the upscaled data array + if range.start > self.len() || range.end > self.len() { + return None; + } + let scaled_slice = self.data.get(range)?; + Some(self.binary_search_impl(predicate, scaled_slice)) + } + + /// Binary searches a `FlexZeroSlice` by its indices. + /// + /// The `predicate` function is passed in-bounds indices into the `FlexZeroSlice`. + #[inline] + pub fn binary_search_with_index( + &self, + predicate: impl FnMut(usize) -> Ordering, + ) -> Result<usize, usize> { + debug_assert!(self.len() <= self.data.len()); + // Safety: self.len() <= self.data.len() + let scaled_slice = unsafe { self.data.get_unchecked(0..self.len()) }; + self.binary_search_with_index_impl(predicate, scaled_slice) + } + + /// Binary searches a range of a `FlexZeroSlice` by its indices. + /// + /// The `predicate` function is passed in-bounds indices into the `FlexZeroSlice`, which are + /// relative to the start of the entire slice. + /// + /// The indices in the return value are relative to the start of the range. + #[inline] + pub fn binary_search_in_range_with_index( + &self, + predicate: impl FnMut(usize) -> Ordering, + range: Range<usize>, + ) -> Option<Result<usize, usize>> { + // Note: We need to check bounds separately, since `self.data.get(range)` does not return + // bounds errors, since it is indexing directly into the upscaled data array + if range.start > self.len() || range.end > self.len() { + return None; + } + let scaled_slice = self.data.get(range)?; + Some(self.binary_search_with_index_impl(predicate, scaled_slice)) + } + + /// # Safety + /// + /// `scaled_slice` must be a subslice of `self.data` + #[inline] + fn binary_search_impl( + &self, + mut predicate: impl FnMut(usize) -> Ordering, + scaled_slice: &[u8], + ) -> Result<usize, usize> { + self.binary_search_with_index_impl( + |index| { + // Safety: The contract of `binary_search_with_index_impl` says `index` is in bounds + let actual_probe = unsafe { self.get_unchecked(index) }; + predicate(actual_probe) + }, + scaled_slice, + ) + } + + /// `predicate` is passed a valid index as an argument. + /// + /// # Safety + /// + /// `scaled_slice` must be a subslice of `self.data` + fn binary_search_with_index_impl( + &self, + mut predicate: impl FnMut(usize) -> Ordering, + scaled_slice: &[u8], + ) -> Result<usize, usize> { + // This code is an absolute atrocity. This code is not a place of honor. This + // code is known to the State of California to cause cancer. + // + // Unfortunately, the stdlib's `binary_search*` functions can only operate on slices. + // We do not have a slice. We have something we can .get() and index on, but that is not + // a slice. + // + // The `binary_search*` functions also do not have a variant where they give you the element's + // index, which we could otherwise use to directly index `self`. + // We do have `self.indices`, but these are indices into a byte buffer, which cannot in + // isolation be used to recoup the logical index of the element they refer to. + // + // However, `binary_search_by()` provides references to the elements of the slice being iterated. + // Since the layout of Rust slices is well-defined, we can do pointer arithmetic on these references + // to obtain the index being used by the search. + // + // It's worth noting that the slice we choose to search is irrelevant, as long as it has the appropriate + // length. `self.indices` is defined to have length `self.len()`, so it is convenient to use + // here and does not require additional allocations. + // + // The alternative to doing this is to implement our own binary search. This is significantly less fun. + + // Note: We always use zero_index relative to the whole indices array, even if we are + // only searching a subslice of it. + let zero_index = self.data.as_ptr() as *const _ as usize; + scaled_slice.binary_search_by(|probe: &_| { + // Note: `scaled_slice` is a slice of u8 + let index = probe as *const _ as usize - zero_index; + predicate(index) + }) + } +} + +#[inline] +pub(crate) fn get_item_width(item_bytes: &[u8; USIZE_WIDTH]) -> usize { + USIZE_WIDTH - item_bytes.iter().rev().take_while(|b| **b == 0).count() +} + +/// Pre-computed information about a pending insertion operation. +/// +/// Do not create one of these directly; call `get_insert_info()`. +pub(crate) struct InsertInfo { + /// The bytes to be inserted, with zero-fill. + pub item_bytes: [u8; USIZE_WIDTH], + /// The new item width after insertion. + pub new_width: usize, + /// The new number of items in the vector: self.len() after insertion. + pub new_count: usize, + /// The new number of bytes required for the entire slice (self.data.len() + 1). + pub new_bytes_len: usize, +} + +impl FlexZeroSlice { + /// Compute the [`InsertInfo`] for inserting the specified item anywhere into the vector. + /// + /// # Panics + /// + /// Panics if inserting the element would require allocating more than `usize::MAX` bytes. + pub(crate) fn get_insert_info(&self, new_item: usize) -> InsertInfo { + let item_bytes = new_item.to_le_bytes(); + let item_width = get_item_width(&item_bytes); + let old_width = self.get_width(); + let new_width = core::cmp::max(old_width, item_width); + let new_count = 1 + (self.data.len() / old_width); + #[allow(clippy::unwrap_used)] // panic is documented in function contract + let new_bytes_len = new_count + .checked_mul(new_width) + .unwrap() + .checked_add(1) + .unwrap(); + InsertInfo { + item_bytes, + new_width, + new_count, + new_bytes_len, + } + } + + /// This function should be called on a slice with a data array `new_data_len` long + /// which previously held `new_count - 1` elements. + /// + /// After calling this function, all bytes in the slice will have been written. + pub(crate) fn insert_impl(&mut self, insert_info: InsertInfo, insert_index: usize) { + let InsertInfo { + item_bytes, + new_width, + new_count, + new_bytes_len, + } = insert_info; + debug_assert!(new_width <= USIZE_WIDTH); + debug_assert!(new_width >= self.get_width()); + debug_assert!(insert_index < new_count); + debug_assert_eq!(new_bytes_len, new_count * new_width + 1); + debug_assert_eq!(new_bytes_len, self.data.len() + 1); + // For efficiency, calculate how many items we can skip copying. + let lower_i = if new_width == self.get_width() { + insert_index + } else { + 0 + }; + // Copy elements starting from the end into the new empty section of the vector. + // Note: We could copy fully in place, but we need to set 0 bytes for the high bytes, + // so we stage the new value on the stack. + for i in (lower_i..new_count).rev() { + let bytes_to_write = if i == insert_index { + item_bytes + } else { + let j = if i > insert_index { i - 1 } else { i }; + debug_assert!(j < new_count - 1); + // Safety: j is in range (assertion on previous line), and it has not been + // overwritten yet since we are walking backwards. + unsafe { self.get_unchecked(j).to_le_bytes() } + }; + // Safety: The vector has capacity for `new_width` items at the new index, which is + // later in the array than the bytes that we read above. + unsafe { + core::ptr::copy_nonoverlapping( + bytes_to_write.as_ptr(), + self.data.as_mut_ptr().add(new_width * i), + new_width, + ); + } + } + self.width = new_width as u8; + } +} + +/// Pre-computed information about a pending removal operation. +/// +/// Do not create one of these directly; call `get_remove_info()` or `get_sorted_pop_info()`. +pub(crate) struct RemoveInfo { + /// The index of the item to be removed. + pub remove_index: usize, + /// The new item width after insertion. + pub new_width: usize, + /// The new number of items in the vector: self.len() after insertion. + pub new_count: usize, + /// The new number of bytes required for the entire slice (self.data.len() + 1). + pub new_bytes_len: usize, +} + +impl FlexZeroSlice { + /// Compute the [`RemoveInfo`] for removing the item at the specified index. + pub(crate) fn get_remove_info(&self, remove_index: usize) -> RemoveInfo { + debug_assert!(remove_index < self.len()); + // Safety: remove_index is in range (assertion on previous line) + let item_bytes = unsafe { self.get_unchecked(remove_index).to_le_bytes() }; + let item_width = get_item_width(&item_bytes); + let old_width = self.get_width(); + let old_count = self.data.len() / old_width; + let new_width = if item_width < old_width { + old_width + } else { + debug_assert_eq!(old_width, item_width); + // We might be removing the widest element. If so, we need to scale down. + let mut largest_width = 1; + for i in 0..old_count { + if i == remove_index { + continue; + } + // Safety: i is in range (between 0 and old_count) + let curr_bytes = unsafe { self.get_unchecked(i).to_le_bytes() }; + let curr_width = get_item_width(&curr_bytes); + largest_width = core::cmp::max(curr_width, largest_width); + } + largest_width + }; + let new_count = old_count - 1; + // Note: the following line won't overflow because we are making the slice shorter. + let new_bytes_len = new_count * new_width + 1; + RemoveInfo { + remove_index, + new_width, + new_count, + new_bytes_len, + } + } + + /// Returns the [`RemoveInfo`] for removing the last element. Should be called + /// on a slice sorted in ascending order. + /// + /// This is more efficient than `get_remove_info()` because it doesn't require a + /// linear traversal of the vector in order to calculate `new_width`. + pub(crate) fn get_sorted_pop_info(&self) -> RemoveInfo { + debug_assert!(!self.is_empty()); + let remove_index = self.len() - 1; + let old_count = self.len(); + let new_width = if old_count == 1 { + 1 + } else { + // Safety: the FlexZeroSlice has at least two elements + let largest_item = unsafe { self.get_unchecked(remove_index - 1).to_le_bytes() }; + get_item_width(&largest_item) + }; + let new_count = old_count - 1; + // Note: the following line won't overflow because we are making the slice shorter. + let new_bytes_len = new_count * new_width + 1; + RemoveInfo { + remove_index, + new_width, + new_count, + new_bytes_len, + } + } + + /// This function should be called on a valid slice. + /// + /// After calling this function, the slice data should be truncated to `new_data_len` bytes. + pub(crate) fn remove_impl(&mut self, remove_info: RemoveInfo) { + let RemoveInfo { + remove_index, + new_width, + new_count, + .. + } = remove_info; + debug_assert!(new_width <= self.get_width()); + debug_assert!(new_count < self.len()); + // For efficiency, calculate how many items we can skip copying. + let lower_i = if new_width == self.get_width() { + remove_index + } else { + 0 + }; + // Copy elements starting from the beginning to compress the vector to fewer bytes. + for i in lower_i..new_count { + let j = if i < remove_index { i } else { i + 1 }; + // Safety: j is in range because j <= new_count < self.len() + let bytes_to_write = unsafe { self.get_unchecked(j).to_le_bytes() }; + // Safety: The bytes are being copied to a section of the array that is not after + // the section of the array that currently holds the bytes. + unsafe { + core::ptr::copy_nonoverlapping( + bytes_to_write.as_ptr(), + self.data.as_mut_ptr().add(new_width * i), + new_width, + ); + } + } + self.width = new_width as u8; + } +} diff --git a/third_party/rust/zerovec/src/flexzerovec/vec.rs b/third_party/rust/zerovec/src/flexzerovec/vec.rs new file mode 100644 index 0000000000..d83f600b57 --- /dev/null +++ b/third_party/rust/zerovec/src/flexzerovec/vec.rs @@ -0,0 +1,275 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use super::FlexZeroSlice; +use super::FlexZeroVecOwned; +use crate::ZeroVecError; +use core::cmp::Ordering; +use core::iter::FromIterator; +use core::ops::Deref; + +/// A zero-copy data structure that efficiently stores integer values. +/// +/// `FlexZeroVec` automatically increases or decreases its storage capacity based on the largest +/// integer stored in the vector. It therefore results in lower memory usage when smaller numbers +/// are usually stored, but larger values must sometimes also be stored. +/// +/// The maximum value that can be stored in `FlexZeroVec` is `usize::MAX` on the current platform. +/// +/// `FlexZeroVec` is the data structure for storing `usize` in a `ZeroMap`. +/// +/// `FlexZeroVec` derefs to [`FlexZeroSlice`], which contains most of the methods. +/// +/// # Examples +/// +/// Storing a vec of `usize`s in a zero-copy way: +/// +/// ``` +/// use zerovec::vecs::FlexZeroVec; +/// +/// // Create a FlexZeroVec and add a few numbers to it +/// let mut zv1 = FlexZeroVec::new(); +/// zv1.to_mut().push(55); +/// zv1.to_mut().push(33); +/// zv1.to_mut().push(999); +/// assert_eq!(zv1.to_vec(), vec![55, 33, 999]); +/// +/// // Convert it to bytes and back +/// let bytes = zv1.as_bytes(); +/// let zv2 = +/// FlexZeroVec::parse_byte_slice(bytes).expect("bytes should round-trip"); +/// assert_eq!(zv2.to_vec(), vec![55, 33, 999]); +/// +/// // Verify the compact storage +/// assert_eq!(7, bytes.len()); +/// assert!(matches!(zv2, FlexZeroVec::Borrowed(_))); +/// ``` +/// +/// Storing a map of `usize` to `usize` in a zero-copy way: +/// +/// ``` +/// use zerovec::ZeroMap; +/// +/// // Append some values to the ZeroMap +/// let mut zm = ZeroMap::<usize, usize>::new(); +/// assert!(zm.try_append(&29, &92).is_none()); +/// assert!(zm.try_append(&38, &83).is_none()); +/// assert!(zm.try_append(&56, &65).is_none()); +/// assert_eq!(zm.len(), 3); +/// +/// // Insert another value into the middle +/// assert!(zm.try_append(&47, &74).is_some()); +/// assert!(zm.insert(&47, &74).is_none()); +/// assert_eq!(zm.len(), 4); +/// +/// // Verify that the values are correct +/// assert_eq!(zm.get_copied(&0), None); +/// assert_eq!(zm.get_copied(&29), Some(92)); +/// assert_eq!(zm.get_copied(&38), Some(83)); +/// assert_eq!(zm.get_copied(&47), Some(74)); +/// assert_eq!(zm.get_copied(&56), Some(65)); +/// assert_eq!(zm.get_copied(&usize::MAX), None); +/// ``` +#[derive(Debug)] +#[non_exhaustive] +pub enum FlexZeroVec<'a> { + Owned(FlexZeroVecOwned), + Borrowed(&'a FlexZeroSlice), +} + +impl<'a> Deref for FlexZeroVec<'a> { + type Target = FlexZeroSlice; + fn deref(&self) -> &Self::Target { + match self { + FlexZeroVec::Owned(v) => v.deref(), + FlexZeroVec::Borrowed(v) => v, + } + } +} + +impl<'a> AsRef<FlexZeroSlice> for FlexZeroVec<'a> { + fn as_ref(&self) -> &FlexZeroSlice { + self.deref() + } +} + +impl Eq for FlexZeroVec<'_> {} + +impl<'a, 'b> PartialEq<FlexZeroVec<'b>> for FlexZeroVec<'a> { + #[inline] + fn eq(&self, other: &FlexZeroVec<'b>) -> bool { + self.iter().eq(other.iter()) + } +} + +impl<'a> Default for FlexZeroVec<'a> { + #[inline] + fn default() -> Self { + Self::new() + } +} + +impl<'a> PartialOrd for FlexZeroVec<'a> { + fn partial_cmp(&self, other: &Self) -> Option<Ordering> { + Some(self.cmp(other)) + } +} + +impl<'a> Ord for FlexZeroVec<'a> { + fn cmp(&self, other: &Self) -> Ordering { + self.iter().cmp(other.iter()) + } +} + +impl<'a> FlexZeroVec<'a> { + #[inline] + /// Creates a new, borrowed, empty `FlexZeroVec`. + /// + /// # Examples + /// + /// ``` + /// use zerovec::vecs::FlexZeroVec; + /// + /// let zv: FlexZeroVec = FlexZeroVec::new(); + /// assert!(zv.is_empty()); + /// ``` + pub const fn new() -> Self { + Self::Borrowed(FlexZeroSlice::new_empty()) + } + + /// Parses a `&[u8]` buffer into a `FlexZeroVec`. + /// + /// The bytes within the byte buffer must remain constant for the life of the FlexZeroVec. + /// + /// # Endianness + /// + /// The byte buffer must be encoded in little-endian, even if running in a big-endian + /// environment. This ensures a consistent representation of data across platforms. + /// + /// # Max Value + /// + /// The bytes will fail to parse if the high value is greater than the capacity of `usize` + /// on this platform. For example, a `FlexZeroVec` created on a 64-bit platform might fail + /// to deserialize on a 32-bit platform. + /// + /// # Example + /// + /// ``` + /// use zerovec::vecs::FlexZeroVec; + /// + /// let bytes: &[u8] = &[2, 0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; + /// let zv = FlexZeroVec::parse_byte_slice(bytes).expect("valid slice"); + /// + /// assert!(matches!(zv, FlexZeroVec::Borrowed(_))); + /// assert_eq!(zv.get(2), Some(421)); + /// ``` + pub fn parse_byte_slice(bytes: &'a [u8]) -> Result<Self, ZeroVecError> { + let slice: &'a FlexZeroSlice = FlexZeroSlice::parse_byte_slice(bytes)?; + Ok(Self::Borrowed(slice)) + } + + /// Converts a borrowed FlexZeroVec to an owned FlexZeroVec. No-op if already owned. + /// + /// # Example + /// + /// ``` + /// use zerovec::vecs::FlexZeroVec; + /// + /// let bytes: &[u8] = &[2, 0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; + /// let zv = FlexZeroVec::parse_byte_slice(bytes).expect("valid bytes"); + /// assert!(matches!(zv, FlexZeroVec::Borrowed(_))); + /// + /// let owned = zv.into_owned(); + /// assert!(matches!(owned, FlexZeroVec::Owned(_))); + /// ``` + pub fn into_owned(self) -> FlexZeroVec<'static> { + match self { + Self::Owned(owned) => FlexZeroVec::Owned(owned), + Self::Borrowed(slice) => FlexZeroVec::Owned(FlexZeroVecOwned::from_slice(slice)), + } + } + + /// Allows the FlexZeroVec to be mutated by converting it to an owned variant, and producing + /// a mutable [`FlexZeroVecOwned`]. + /// + /// # Example + /// + /// ``` + /// use zerovec::vecs::FlexZeroVec; + /// + /// let bytes: &[u8] = &[2, 0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; + /// let mut zv = FlexZeroVec::parse_byte_slice(bytes).expect("valid bytes"); + /// assert!(matches!(zv, FlexZeroVec::Borrowed(_))); + /// + /// zv.to_mut().push(12); + /// assert!(matches!(zv, FlexZeroVec::Owned(_))); + /// assert_eq!(zv.get(4), Some(12)); + /// ``` + pub fn to_mut(&mut self) -> &mut FlexZeroVecOwned { + match self { + Self::Owned(ref mut owned) => owned, + Self::Borrowed(slice) => { + *self = FlexZeroVec::Owned(FlexZeroVecOwned::from_slice(slice)); + // recursion is limited since we are guaranteed to hit the Owned branch + self.to_mut() + } + } + } + + /// Remove all elements from this FlexZeroVec and reset it to an empty borrowed state. + /// + /// # Examples + /// + /// ``` + /// use zerovec::vecs::FlexZeroVec; + /// + /// let mut zv: FlexZeroVec = [1, 2, 3].iter().copied().collect(); + /// assert!(!zv.is_empty()); + /// zv.clear(); + /// assert!(zv.is_empty()); + /// ``` + pub fn clear(&mut self) { + *self = Self::Borrowed(FlexZeroSlice::new_empty()) + } +} + +impl FromIterator<usize> for FlexZeroVec<'_> { + /// Creates a [`FlexZeroVec::Owned`] from an iterator of `usize`. + fn from_iter<I>(iter: I) -> Self + where + I: IntoIterator<Item = usize>, + { + FlexZeroVecOwned::from_iter(iter).into_flexzerovec() + } +} + +#[test] +fn test_zeromap_usize() { + use crate::ZeroMap; + + let mut zm = ZeroMap::<usize, usize>::new(); + assert!(zm.try_append(&29, &92).is_none()); + assert!(zm.try_append(&38, &83).is_none()); + assert!(zm.try_append(&47, &74).is_none()); + assert!(zm.try_append(&56, &65).is_none()); + + assert_eq!(zm.keys.get_width(), 1); + assert_eq!(zm.values.get_width(), 1); + + assert_eq!(zm.insert(&47, &744), Some(74)); + assert_eq!(zm.values.get_width(), 2); + assert_eq!(zm.insert(&47, &774), Some(744)); + assert_eq!(zm.values.get_width(), 2); + assert!(zm.try_append(&1100, &1).is_none()); + assert_eq!(zm.keys.get_width(), 2); + assert_eq!(zm.remove(&1100), Some(1)); + assert_eq!(zm.keys.get_width(), 1); + + assert_eq!(zm.get_copied(&0), None); + assert_eq!(zm.get_copied(&29), Some(92)); + assert_eq!(zm.get_copied(&38), Some(83)); + assert_eq!(zm.get_copied(&47), Some(774)); + assert_eq!(zm.get_copied(&56), Some(65)); + assert_eq!(zm.get_copied(&usize::MAX), None); +} diff --git a/third_party/rust/zerovec/src/hashmap/algorithms.rs b/third_party/rust/zerovec/src/hashmap/algorithms.rs new file mode 100644 index 0000000000..58ffc48f4c --- /dev/null +++ b/third_party/rust/zerovec/src/hashmap/algorithms.rs @@ -0,0 +1,162 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use alloc::vec; +use alloc::vec::Vec; +use core::hash::{Hash, Hasher}; +use t1ha::T1haHasher; + +// Const seed to be used with [`T1haHasher::with_seed`]. +const SEED: u64 = 0xaabbccdd; + +/// Split the 64bit `hash` into (g, f0, f1). +/// g denotes the highest 16bits of the hash modulo `m`, and is referred to as first level hash. +/// (f0, f1) denotes the middle, and lower 24bits of the hash respectively. +/// (f0, f1) are used to distribute the keys with same g, into distinct slots. +/// +/// # Arguments +/// +/// * `hash` - The hash to split. +/// * `m` - The modulo used to split the hash. +pub const fn split_hash64(hash: u64, m: usize) -> (usize, u32, u32) { + ( + ((hash >> 48) as usize % m), + ((hash >> 24) as u32 & 0xffffff), + ((hash & 0xffffff) as u32), + ) +} + +/// Compute hash using [`T1haHasher`]. +pub fn compute_hash<K: Hash + ?Sized>(key: &K) -> u64 { + let mut hasher = T1haHasher::with_seed(SEED); + key.hash(&mut hasher); + hasher.finish() +} + +/// Calculate the index using (f0, f1), (d0, d1) in modulo m. +/// Returns [`None`] if d is (0, 0) or modulo is 0 +/// else returns the index computed using (f0 + f1 * d0 + d1) mod m. +pub fn compute_index(f: (u32, u32), d: (u32, u32), m: u32) -> Option<usize> { + if d == (0, 0) || m == 0 { + None + } else { + Some((f.1.wrapping_mul(d.0).wrapping_add(f.0).wrapping_add(d.1) % m) as usize) + } +} + +/// Compute displacements for the given `key_hashes`, which split the keys into distinct slots by a +/// two-level hashing schema. +/// Returns a tuple of where the first item is the displacement array and the second item is the +/// reverse mapping used to permute keys, values into their slots. +/// +/// 1. Split the hashes into (g, f0, f1). +/// 2. Bucket and sort the split hash on g in descending order. +/// 3. In decreasing order of bucket size, try until a (d0, d1) is found that splits the keys +/// in the bucket into distinct slots. +/// 4. Mark the slots for current bucket as occupied and store the reverse mapping. +/// 5. Repeat untill all the keys have been assigned distinct slots. +/// +/// # Arguments +/// +/// * `key_hashes` - [`ExactSizeIterator`] over the hashed key values +#[allow(clippy::indexing_slicing, clippy::unwrap_used)] +pub fn compute_displacements( + key_hashes: impl ExactSizeIterator<Item = u64>, +) -> (Vec<(u32, u32)>, Vec<usize>) { + let len = key_hashes.len(); + + // A vector to track the size of buckets for sorting. + let mut bucket_sizes = vec![0; len]; + + // A flattened representation of items in the buckets after applying first level hash function + let mut bucket_flatten = Vec::with_capacity(len); + + // Compute initial displacement and bucket sizes + + key_hashes.into_iter().enumerate().for_each(|(i, kh)| { + let h = split_hash64(kh, len); + bucket_sizes[h.0] += 1; + bucket_flatten.push((h, i)) + }); + + // Sort by decreasing order of bucket_sizes. + bucket_flatten.sort_by(|&(ha, _), &(hb, _)| { + // ha.0, hb.0 are always within bounds of `bucket_sizes` + (bucket_sizes[hb.0], hb).cmp(&(bucket_sizes[ha.0], ha)) + }); + + // Generation count while iterating buckets. + // Each trial of ((d0, d1), bucket chain) is a new generation. + // We use this to track which all slots are assigned for the current bucket chain. + let mut generation = 0; + + // Whether a slot has been occupied by previous buckets with a different first level hash (different + // bucket chain). + let mut occupied = vec![false; len]; + + // Track generation count for the slots. + // A slot is empty if either it is unoccupied by the previous bucket chains and the + // assignment is not equal to generation. + let mut assignments = vec![0; len]; + + // Vec to store the displacements (saves us a recomputation of hash while assigning slots). + let mut current_displacements = Vec::with_capacity(16); + + // (d0, d1) which splits the bucket into different slots + let mut displacements = vec![(0, 0); len]; + + // Vec to store mapping to the original order of keys. + // This is a permutation which will be applied to keys, values at the end. + let mut reverse_mapping = vec![0; len]; + + let mut start = 0; + while start < len { + // Bucket span with the same first level hash + // start is always within bounds of `bucket_flatten` + let g = bucket_flatten[start].0 .0; + // g is always within bounds of `bucket_sizes` + let end = start + bucket_sizes[g]; + // start, end - 1 are always within bounds of `bucket_sizes` + let buckets = &bucket_flatten[start..end]; + + 'd0: for d0 in 0..len as u32 { + 'd1: for d1 in 0..len as u32 { + if (d0, d1) == (0, 0) { + continue; + } + current_displacements.clear(); + generation += 1; + + for ((_, f0, f1), _) in buckets { + let displacement_idx = compute_index((*f0, *f1), (d0, d1), len as u32).unwrap(); + + // displacement_idx is always within bounds + if occupied[displacement_idx] || assignments[displacement_idx] == generation { + continue 'd1; + } + assignments[displacement_idx] = generation; + current_displacements.push(displacement_idx); + } + + // Successfully found a (d0, d1), store it as index g. + // g < displacements.len() due to modulo operation + displacements[g] = (d0, d1); + + for (i, displacement_idx) in current_displacements.iter().enumerate() { + // `current_displacements` has same size as `buckets` + let (_, idx) = &buckets[i]; + + // displacement_idx is always within bounds + occupied[*displacement_idx] = true; + reverse_mapping[*displacement_idx] = *idx; + } + break 'd0; + } + } + + start = end; + } + + (displacements, reverse_mapping) +} diff --git a/third_party/rust/zerovec/src/hashmap/mod.rs b/third_party/rust/zerovec/src/hashmap/mod.rs new file mode 100644 index 0000000000..e3aed11980 --- /dev/null +++ b/third_party/rust/zerovec/src/hashmap/mod.rs @@ -0,0 +1,240 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::map::{MutableZeroVecLike, ZeroMapKV, ZeroVecLike}; +use crate::ZeroVec; +use alloc::borrow::Borrow; +use alloc::vec; +use core::hash::Hash; + +pub mod algorithms; +use algorithms::*; + +#[cfg(feature = "serde")] +mod serde; + +/// A perfect zerohashmap optimized for lookups over immutable keys. +/// +/// # Examples +/// ``` +/// use zerovec::ZeroHashMap; +/// +/// let hashmap = +/// ZeroHashMap::<i32, str>::from_iter([(0, "a"), (1, "b"), (2, "c")]); +/// assert_eq!(hashmap.get(&0), Some("a")); +/// assert_eq!(hashmap.get(&2), Some("c")); +/// assert_eq!(hashmap.get(&4), None); +/// ``` +#[derive(Debug)] +pub struct ZeroHashMap<'a, K, V> +where + K: ZeroMapKV<'a> + ?Sized, + V: ZeroMapKV<'a> + ?Sized, +{ + /// Array of (d0, d1) which splits the keys with same first level hash into distinct + /// slots. + /// The ith index of the array splits the keys with first level hash i. + /// If no key with first level hash is found in the original keys, (0, 0) is used as an empty + /// placeholder. + displacements: ZeroVec<'a, (u32, u32)>, + keys: K::Container, + values: V::Container, +} + +impl<'a, K, V> ZeroHashMap<'a, K, V> +where + K: ZeroMapKV<'a> + ?Sized, + V: ZeroMapKV<'a> + ?Sized, +{ + /// The number of elements in the [`ZeroHashMap`]. + pub fn len(&self) -> usize { + self.values.zvl_len() + } + + /// Whether the [`ZeroHashMap`] is empty. + pub fn is_empty(&self) -> bool { + self.len() == 0 + } +} + +impl<'a, K, V> ZeroHashMap<'a, K, V> +where + K: ZeroMapKV<'a> + ?Sized + Hash + Eq, + V: ZeroMapKV<'a> + ?Sized, +{ + /// Given a `key` return the index for the key or [`None`] if the key is absent. + fn index<A>(&self, key: &A) -> Option<usize> + where + A: Borrow<K> + ?Sized, + { + let hash = compute_hash(key.borrow()); + let (g, f0, f1) = split_hash64(hash, self.len()); + + #[allow(clippy::unwrap_used)] // g is in-range + let (d0, d1) = self.displacements.get(g).unwrap(); + let index = compute_index((f0, f1), (d0, d1), self.displacements.len() as u32)?; + + #[allow(clippy::unwrap_used)] // index is in 0..self.keys.len() + let found = self.keys.zvl_get(index).unwrap(); + if K::Container::zvl_get_as_t(found, |found| found == key.borrow()) { + Some(index) + } else { + None + } + } + + /// Get the value corresponding to `key`. + /// If absent [`None`] is returned. + /// + /// # Example + /// ``` + /// use zerovec::ZeroHashMap; + /// + /// let hashmap = ZeroHashMap::<str, str>::from_iter([("a", "A"), ("z", "Z")]); + /// + /// assert_eq!(hashmap.get("a"), Some("A")); + /// assert_eq!(hashmap.get("z"), Some("Z")); + /// assert_eq!(hashmap.get("0"), None); + /// ``` + pub fn get<'b, A>(&'b self, key: &A) -> Option<&'b V::GetType> + where + A: Borrow<K> + ?Sized + 'b, + { + self.index(key).and_then(|i| self.values.zvl_get(i)) + } + + /// Returns whether `key` is contained in this hashmap + /// + /// # Example + /// ```rust + /// use zerovec::ZeroHashMap; + /// + /// let hashmap = ZeroHashMap::<str, str>::from_iter([("a", "A"), ("z", "Z")]); + /// + /// assert!(hashmap.contains_key("a")); + /// assert!(!hashmap.contains_key("p")); + /// ``` + pub fn contains_key(&self, key: &K) -> bool { + self.index(key).is_some() + } +} + +impl<'a, K, V> ZeroHashMap<'a, K, V> +where + K: ZeroMapKV<'a> + ?Sized, + V: ZeroMapKV<'a> + ?Sized, +{ + // Produce an iterator over (key, value) pairs. + pub fn iter<'b>( + &'b self, + ) -> impl ExactSizeIterator< + Item = ( + &'b <K as ZeroMapKV<'a>>::GetType, + &'b <V as ZeroMapKV<'a>>::GetType, + ), + > { + (0..self.len()).map(|index| { + ( + #[allow(clippy::unwrap_used)] // index is in range + self.keys.zvl_get(index).unwrap(), + #[allow(clippy::unwrap_used)] // index is in range + self.values.zvl_get(index).unwrap(), + ) + }) + } + + // Produce an iterator over keys. + pub fn iter_keys<'b>( + &'b self, + ) -> impl ExactSizeIterator<Item = &'b <K as ZeroMapKV<'a>>::GetType> { + #[allow(clippy::unwrap_used)] // index is in range + (0..self.len()).map(|index| self.keys.zvl_get(index).unwrap()) + } + + // Produce an iterator over values. + pub fn iter_values<'b>( + &'b self, + ) -> impl ExactSizeIterator<Item = &'b <V as ZeroMapKV<'a>>::GetType> { + #[allow(clippy::unwrap_used)] // index is in range + (0..self.len()).map(|index| self.values.zvl_get(index).unwrap()) + } +} + +impl<'a, K, V, A, B> FromIterator<(A, B)> for ZeroHashMap<'a, K, V> +where + K: ZeroMapKV<'a> + ?Sized + Hash + Eq, + V: ZeroMapKV<'a> + ?Sized, + B: Borrow<V>, + A: Borrow<K>, +{ + /// Build a [`ZeroHashMap`] from an iterator returning (K, V) tuples. + /// + /// # Example + /// ``` + /// use zerovec::ZeroHashMap; + /// + /// let hashmap = ZeroHashMap::<i32, str>::from_iter([ + /// (1, "a"), + /// (2, "b"), + /// (3, "c"), + /// (4, "d"), + /// ]); + /// assert_eq!(hashmap.get(&1), Some("a")); + /// assert_eq!(hashmap.get(&2), Some("b")); + /// assert_eq!(hashmap.get(&3), Some("c")); + /// assert_eq!(hashmap.get(&4), Some("d")); + /// ``` + fn from_iter<T: IntoIterator<Item = (A, B)>>(iter: T) -> Self { + let iter = iter.into_iter(); + let size_hint = match iter.size_hint() { + (_, Some(upper)) => upper, + (lower, None) => lower, + }; + + let mut key_hashes = vec![]; + key_hashes.reserve(size_hint); + let mut keys = K::Container::zvl_with_capacity(size_hint); + let mut values = V::Container::zvl_with_capacity(size_hint); + for (k, v) in iter { + keys.zvl_push(k.borrow()); + key_hashes.push(compute_hash(k.borrow())); + values.zvl_push(v.borrow()); + } + + let (displacements, mut reverse_mapping) = compute_displacements(key_hashes.into_iter()); + + keys.zvl_permute(&mut reverse_mapping.clone()); + values.zvl_permute(&mut reverse_mapping); + + Self { + displacements: ZeroVec::alloc_from_slice(&displacements), + values, + keys, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::ule::AsULE; + use rand::{distributions::Standard, Rng, SeedableRng}; + use rand_pcg::Lcg64Xsh32; + + #[test] + fn test_zhms_u64k_u64v() { + const N: usize = 65530; + let seed = u64::from_le_bytes(*b"testseed"); + let rng = Lcg64Xsh32::seed_from_u64(seed); + let kv: Vec<(u64, u64)> = rng.sample_iter(&Standard).take(N).collect(); + let hashmap: ZeroHashMap<u64, u64> = + ZeroHashMap::from_iter(kv.iter().map(|e| (&e.0, &e.1))); + for (k, v) in kv { + assert_eq!( + hashmap.get(&k).copied().map(<u64 as AsULE>::from_unaligned), + Some(v), + ); + } + } +} diff --git a/third_party/rust/zerovec/src/hashmap/serde.rs b/third_party/rust/zerovec/src/hashmap/serde.rs new file mode 100644 index 0000000000..7a4941205c --- /dev/null +++ b/third_party/rust/zerovec/src/hashmap/serde.rs @@ -0,0 +1,147 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use super::ZeroHashMap; +use crate::{ + map::{ZeroMapKV, ZeroVecLike}, + ZeroVec, +}; + +use serde::{de, Deserialize, Serialize}; + +impl<'a, K, V> Serialize for ZeroHashMap<'a, K, V> +where + K: ZeroMapKV<'a> + Serialize + ?Sized, + V: ZeroMapKV<'a> + Serialize + ?Sized, + K::Container: Serialize, + V::Container: Serialize, +{ + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: serde::Serializer, + { + (&self.displacements, &self.keys, &self.values).serialize(serializer) + } +} + +impl<'de, 'a, K, V> Deserialize<'de> for ZeroHashMap<'a, K, V> +where + K: ZeroMapKV<'a> + ?Sized, + V: ZeroMapKV<'a> + ?Sized, + K::Container: Deserialize<'de>, + V::Container: Deserialize<'de>, + 'de: 'a, +{ + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: serde::Deserializer<'de>, + { + let (displacements, keys, values): (ZeroVec<(u32, u32)>, K::Container, V::Container) = + Deserialize::deserialize(deserializer)?; + if keys.zvl_len() != values.zvl_len() { + return Err(de::Error::custom( + "Mismatched key and value sizes in ZeroHashMap", + )); + } + if displacements.zvl_len() != keys.zvl_len() { + return Err(de::Error::custom( + "Mismatched displacements and key, value sizes in ZeroHashMap", + )); + } + Ok(Self { + displacements, + keys, + values, + }) + } +} + +#[cfg(test)] +mod test { + use crate::{VarZeroVec, ZeroHashMap, ZeroVec}; + use serde::{Deserialize, Serialize}; + + const JSON_STR: &str = "[[[0,1],[0,0],[0,1]],[2,1,0],[\"c\",\"b\",\"a\"]]"; + + const BINCODE_BYTES: &[u8] = &[ + 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, + 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 13, 0, 0, 0, 0, 0, 0, 0, + 3, 0, 0, 0, 0, 0, 1, 0, 2, 0, 99, 98, 97, + ]; + + #[derive(Serialize, Deserialize)] + struct DeriveTestZeroHashMap<'data> { + #[serde(borrow)] + _data: ZeroHashMap<'data, str, [u8]>, + } + + fn make_zerohashmap() -> ZeroHashMap<'static, u32, str> { + ZeroHashMap::from_iter([(0, "a"), (1, "b"), (2, "c")]) + } + + fn build_invalid_hashmap_str( + displacements: Vec<(u32, u32)>, + keys: Vec<u32>, + values: Vec<&str>, + ) -> String { + let invalid_hm: ZeroHashMap<u32, str> = ZeroHashMap { + displacements: ZeroVec::alloc_from_slice(&displacements), + keys: ZeroVec::alloc_from_slice(&keys), + values: VarZeroVec::<str>::from(&values), + }; + serde_json::to_string(&invalid_hm).expect("serialize") + } + + #[test] + fn test_invalid_deser_zhm() { + // Invalid hashmap |keys| != |values| + let mut invalid_hm_str = + build_invalid_hashmap_str(vec![(0, 1), (0, 0)], vec![1, 2], vec!["a", "b", "c"]); + + assert_eq!( + serde_json::from_str::<ZeroHashMap<u32, str>>(&invalid_hm_str) + .unwrap_err() + .to_string(), + "Mismatched key and value sizes in ZeroHashMap" + ); + + // Invalid hashmap |displacements| != |keys| == |values| + // |displacements| = 2, |keys| = 3, |values| = 3 + invalid_hm_str = + build_invalid_hashmap_str(vec![(0, 1), (0, 0)], vec![2, 1, 0], vec!["a", "b", "c"]); + + assert_eq!( + serde_json::from_str::<ZeroHashMap<u32, str>>(&invalid_hm_str) + .unwrap_err() + .to_string(), + "Mismatched displacements and key, value sizes in ZeroHashMap" + ); + } + + #[test] + fn test_serde_valid_deser_zhm() { + let hm = make_zerohashmap(); + let json_str = serde_json::to_string(&hm).expect("serialize"); + assert_eq!(json_str, JSON_STR); + let deserialized_hm: ZeroHashMap<u32, str> = + serde_json::from_str(JSON_STR).expect("deserialize"); + assert_eq!( + hm.iter().collect::<Vec<_>>(), + deserialized_hm.iter().collect::<Vec<_>>() + ); + } + + #[test] + fn test_bincode_zhm() { + let hm = make_zerohashmap(); + let bincode_bytes = bincode::serialize(&hm).expect("serialize"); + assert_eq!(bincode_bytes, BINCODE_BYTES); + let deserialized_hm: ZeroHashMap<u32, str> = + bincode::deserialize(BINCODE_BYTES).expect("deserialize"); + assert_eq!( + hm.iter().collect::<Vec<_>>(), + deserialized_hm.iter().collect::<Vec<_>>() + ); + } +} diff --git a/third_party/rust/zerovec/src/lib.rs b/third_party/rust/zerovec/src/lib.rs new file mode 100644 index 0000000000..961d62f34c --- /dev/null +++ b/third_party/rust/zerovec/src/lib.rs @@ -0,0 +1,558 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Zero-copy vector abstractions for arbitrary types, backed by byte slices. +//! +//! `zerovec` enables a far wider range of types — beyond just `&[u8]` and `&str` — to participate in +//! zero-copy deserialization from byte slices. It is `serde` compatible and comes equipped with +//! proc macros +//! +//! Clients upgrading to `zerovec` benefit from zero heap allocations when deserializing +//! read-only data. +//! +//! This crate has four main types: +//! +//! - [`ZeroVec<'a, T>`] (and [`ZeroSlice<T>`](ZeroSlice)) for fixed-width types like `u32` +//! - [`VarZeroVec<'a, T>`] (and [`VarZeroSlice<T>`](ZeroSlice)) for variable-width types like `str` +//! - [`ZeroMap<'a, K, V>`] to map from `K` to `V` +//! - [`ZeroMap2d<'a, K0, K1, V>`] to map from the pair `(K0, K1)` to `V` +//! +//! The first two are intended as close-to-drop-in replacements for `Vec<T>` in Serde structs. The third and fourth are +//! intended as a replacement for `HashMap` or [`LiteMap`](docs.rs/litemap). When used with Serde derives, **be sure to apply +//! `#[serde(borrow)]` to these types**, same as one would for [`Cow<'a, T>`]. +//! +//! [`ZeroVec<'a, T>`], [`VarZeroVec<'a, T>`], [`ZeroMap<'a, K, V>`], and [`ZeroMap2d<'a, K0, K1, V>`] all behave like +//! [`Cow<'a, T>`] in that they abstract over either borrowed or owned data. When performing deserialization +//! from human-readable formats (like `json` and `xml`), typically these types will allocate and fully own their data, whereas if deserializing +//! from binary formats like `bincode` and `postcard`, these types will borrow data directly from the buffer being deserialized from, +//! avoiding allocations and only performing validity checks. As such, this crate can be pretty fast (see [below](#Performance) for more information) +//! on deserialization. +//! +//! See [the design doc](https://github.com/unicode-org/icu4x/blob/main/utils/zerovec/design_doc.md) for details on how this crate +//! works under the hood. +//! +//! # Cargo features +//! +//! This crate has several optional Cargo features: +//! - `serde`: Allows serializing and deserializing `zerovec`'s abstractions via [`serde`](https://docs.rs/serde) +//! - `yoke`: Enables implementations of `Yokeable` from the [`yoke`](https://docs.rs/yoke/) crate, which is also useful +//! in situations involving a lot of zero-copy deserialization. +//! - `derive`: Makes it easier to use custom types in these collections by providing the [`#[make_ule]`](crate::make_ule) and +//! [`#[make_varule]`](crate::make_varule) proc macros, which generate appropriate [`ULE`](crate::ule::ULE) and +//! [`VarULE`](crate::ule::VarULE)-conformant types for a given "normal" type. +//! - `std`: Enabled `std::Error` implementations for error types. This crate is by default `no_std` with a dependency on `alloc`. +//! +//! [`ZeroVec<'a, T>`]: ZeroVec +//! [`VarZeroVec<'a, T>`]: VarZeroVec +//! [`ZeroMap<'a, K, V>`]: ZeroMap +//! [`ZeroMap2d<'a, K0, K1, V>`]: ZeroMap2d +//! [`Cow<'a, T>`]: alloc::borrow::Cow +//! +//! # Examples +//! +//! Serialize and deserialize a struct with ZeroVec and VarZeroVec with Bincode: +//! +//! ``` +//! # #[cfg(feature = "serde")] { +//! use zerovec::{VarZeroVec, ZeroVec}; +//! +//! // This example requires the "serde" feature +//! #[derive(serde::Serialize, serde::Deserialize)] +//! pub struct DataStruct<'data> { +//! #[serde(borrow)] +//! nums: ZeroVec<'data, u32>, +//! #[serde(borrow)] +//! chars: ZeroVec<'data, char>, +//! #[serde(borrow)] +//! strs: VarZeroVec<'data, str>, +//! } +//! +//! let data = DataStruct { +//! nums: ZeroVec::from_slice_or_alloc(&[211, 281, 421, 461]), +//! chars: ZeroVec::alloc_from_slice(&['ö', '冇', 'म']), +//! strs: VarZeroVec::from(&["hello", "world"]), +//! }; +//! let bincode_bytes = +//! bincode::serialize(&data).expect("Serialization should be successful"); +//! assert_eq!(bincode_bytes.len(), 67); +//! +//! let deserialized: DataStruct = bincode::deserialize(&bincode_bytes) +//! .expect("Deserialization should be successful"); +//! assert_eq!(deserialized.nums.first(), Some(211)); +//! assert_eq!(deserialized.chars.get(1), Some('冇')); +//! assert_eq!(deserialized.strs.get(1), Some("world")); +//! // The deserialization will not have allocated anything +//! assert!(!deserialized.nums.is_owned()); +//! # } // feature = "serde" +//! ``` +//! +//! Use custom types inside of ZeroVec: +//! +//! ```rust +//! # #[cfg(all(feature = "serde", feature = "derive"))] { +//! use zerovec::{ZeroVec, VarZeroVec, ZeroMap}; +//! use std::borrow::Cow; +//! use zerovec::ule::encode_varule_to_box; +//! +//! // custom fixed-size ULE type for ZeroVec +//! #[zerovec::make_ule(DateULE)] +//! #[derive(Copy, Clone, PartialEq, Eq, Ord, PartialOrd, serde::Serialize, serde::Deserialize)] +//! struct Date { +//! y: u64, +//! m: u8, +//! d: u8 +//! } +//! +//! // custom variable sized VarULE type for VarZeroVec +//! #[zerovec::make_varule(PersonULE)] +//! #[zerovec::derive(Serialize, Deserialize)] // add Serde impls to PersonULE +//! #[derive(Clone, PartialEq, Eq, Ord, PartialOrd, serde::Serialize, serde::Deserialize)] +//! struct Person<'a> { +//! birthday: Date, +//! favorite_character: char, +//! #[serde(borrow)] +//! name: Cow<'a, str>, +//! } +//! +//! #[derive(serde::Serialize, serde::Deserialize)] +//! struct Data<'a> { +//! #[serde(borrow)] +//! important_dates: ZeroVec<'a, Date>, +//! // note: VarZeroVec always must reference the ULE type directly +//! #[serde(borrow)] +//! important_people: VarZeroVec<'a, PersonULE>, +//! #[serde(borrow)] +//! birthdays_to_people: ZeroMap<'a, Date, PersonULE> +//! } +//! +//! +//! let person1 = Person { +//! birthday: Date { y: 1990, m: 9, d: 7}, +//! favorite_character: 'π', +//! name: Cow::from("Kate") +//! }; +//! let person2 = Person { +//! birthday: Date { y: 1960, m: 5, d: 25}, +//! favorite_character: '冇', +//! name: Cow::from("Jesse") +//! }; +//! +//! let important_dates = ZeroVec::alloc_from_slice(&[Date { y: 1943, m: 3, d: 20}, Date { y: 1976, m: 8, d: 2}, Date { y: 1998, m: 2, d: 15}]); +//! let important_people = VarZeroVec::from(&[&person1, &person2]); +//! let mut birthdays_to_people: ZeroMap<Date, PersonULE> = ZeroMap::new(); +//! // `.insert_var_v()` is slightly more convenient over `.insert()` for custom ULE types +//! birthdays_to_people.insert_var_v(&person1.birthday, &person1); +//! birthdays_to_people.insert_var_v(&person2.birthday, &person2); +//! +//! let data = Data { important_dates, important_people, birthdays_to_people }; +//! +//! let bincode_bytes = bincode::serialize(&data) +//! .expect("Serialization should be successful"); +//! assert_eq!(bincode_bytes.len(), 168); +//! +//! let deserialized: Data = bincode::deserialize(&bincode_bytes) +//! .expect("Deserialization should be successful"); +//! +//! assert_eq!(deserialized.important_dates.get(0).unwrap().y, 1943); +//! assert_eq!(&deserialized.important_people.get(1).unwrap().name, "Jesse"); +//! assert_eq!(&deserialized.important_people.get(0).unwrap().name, "Kate"); +//! assert_eq!(&deserialized.birthdays_to_people.get(&person1.birthday).unwrap().name, "Kate"); +//! +//! } // feature = serde and derive +//! ``` +//! +//! # Performance +//! +//! `zerovec` is designed for fast deserialization from byte buffers with zero memory allocations +//! while minimizing performance regressions for common vector operations. +//! +//! Benchmark results on x86_64: +//! +//! | Operation | `Vec<T>` | `zerovec` | +//! |---|---|---| +//! | Deserialize vec of 100 `u32` | 233.18 ns | 14.120 ns | +//! | Compute sum of vec of 100 `u32` (read every element) | 8.7472 ns | 10.775 ns | +//! | Binary search vec of 1000 `u32` 50 times | 442.80 ns | 472.51 ns | +//! | Deserialize vec of 100 strings | 7.3740 μs\* | 1.4495 μs | +//! | Count chars in vec of 100 strings (read every element) | 747.50 ns | 955.28 ns | +//! | Binary search vec of 500 strings 10 times | 466.09 ns | 790.33 ns | +//! +//! \* *This result is reported for `Vec<String>`. However, Serde also supports deserializing to the partially-zero-copy `Vec<&str>`; this gives 1.8420 μs, much faster than `Vec<String>` but a bit slower than `zerovec`.* +//! +//! | Operation | `HashMap<K,V>` | `LiteMap<K,V>` | `ZeroMap<K,V>` | +//! |---|---|---|---| +//! | Deserialize a small map | 2.72 μs | 1.28 μs | 480 ns | +//! | Deserialize a large map | 50.5 ms | 18.3 ms | 3.74 ms | +//! | Look up from a small deserialized map | 49 ns | 42 ns | 54 ns | +//! | Look up from a large deserialized map | 51 ns | 155 ns | 213 ns | +//! +//! Small = 16 elements, large = 131,072 elements. Maps contain `<String, String>`. +//! +//! The benches used to generate the above table can be found in the `benches` directory in the project repository. +//! `zeromap` benches are named by convention, e.g. `zeromap/deserialize/small`, `zeromap/lookup/large`. The type +//! is appended for baseline comparisons, e.g. `zeromap/lookup/small/hashmap`. + +// https://github.com/unicode-org/icu4x/blob/main/docs/process/boilerplate.md#library-annotations +#![cfg_attr(not(any(test, feature = "std")), no_std)] +#![cfg_attr( + not(test), + deny( + clippy::indexing_slicing, + clippy::unwrap_used, + clippy::expect_used, + clippy::panic, + clippy::exhaustive_structs, + clippy::exhaustive_enums, + missing_debug_implementations, + ) +)] +// this crate does a lot of nuanced lifetime manipulation, being explicit +// is better here. +#![allow(clippy::needless_lifetimes)] + +extern crate alloc; + +mod error; +mod flexzerovec; +#[cfg(feature = "hashmap")] +pub mod hashmap; +mod map; +mod map2d; +#[cfg(test)] +pub mod samples; +mod varzerovec; +mod zerovec; + +// This must be after `mod zerovec` for some impls on `ZeroSlice<RawBytesULE>` +// to show up in the right spot in the docs +pub mod ule; + +#[cfg(feature = "yoke")] +mod yoke_impls; +mod zerofrom_impls; + +pub use crate::error::ZeroVecError; +#[cfg(feature = "hashmap")] +pub use crate::hashmap::ZeroHashMap; +pub use crate::map::map::ZeroMap; +pub use crate::map2d::map::ZeroMap2d; +pub use crate::varzerovec::{slice::VarZeroSlice, vec::VarZeroVec}; +pub use crate::zerovec::{ZeroSlice, ZeroVec}; + +pub(crate) use flexzerovec::chunk_to_usize; + +#[doc(hidden)] +pub mod __zerovec_internal_reexport { + pub use zerofrom::ZeroFrom; + + pub use alloc::boxed; + + #[cfg(feature = "serde")] + pub use serde; +} + +pub mod maps { + //! This module contains additional utility types and traits for working with + //! [`ZeroMap`] and [`ZeroMap2d`]. See their docs for more details on the general purpose + //! of these types. + //! + //! [`ZeroMapBorrowed`] and [`ZeroMap2dBorrowed`] are versions of [`ZeroMap`] and [`ZeroMap2d`] + //! that can be used when you wish to guarantee that the map data is always borrowed, leading to + //! relaxed lifetime constraints. + //! + //! The [`ZeroMapKV`] trait is required to be implemented on any type that needs to be used + //! within a map type. [`ZeroVecLike`] and [`MutableZeroVecLike`] are traits used in the + //! internal workings of the map types, and should typically not be used or implemented by + //! users of this crate. + #[doc(no_inline)] + pub use crate::map::ZeroMap; + pub use crate::map::ZeroMapBorrowed; + + #[doc(no_inline)] + pub use crate::map2d::ZeroMap2d; + pub use crate::map2d::ZeroMap2dBorrowed; + + pub use crate::map::{MutableZeroVecLike, ZeroMapKV, ZeroVecLike}; + + pub use crate::map2d::ZeroMap2dCursor; +} + +pub mod vecs { + //! This module contains additional utility types for working with + //! [`ZeroVec`] and [`VarZeroVec`]. See their docs for more details on the general purpose + //! of these types. + //! + //! [`ZeroSlice`] and [`VarZeroSlice`] provide slice-like versions of the vector types + //! for use behind references and in custom ULE types. + //! + //! [`VarZeroVecOwned`] is a special owned/mutable version of [`VarZeroVec`], allowing + //! direct manipulation of the backing buffer. + + #[doc(no_inline)] + pub use crate::zerovec::{ZeroSlice, ZeroVec}; + + #[doc(no_inline)] + pub use crate::varzerovec::{VarZeroSlice, VarZeroVec}; + + pub use crate::varzerovec::{Index16, Index32, VarZeroVecFormat, VarZeroVecOwned}; + + pub use crate::flexzerovec::{FlexZeroSlice, FlexZeroVec, FlexZeroVecOwned}; +} + +// Proc macro reexports +// +// These exist so that our docs can use intra-doc links. +// Due to quirks of how rustdoc does documentation on reexports, these must be in this module and not reexported from +// a submodule + +/// Generate a corresponding [`ULE`] type and the relevant [`AsULE`] implementations for this type +/// +/// This can be attached to structs containing only [`AsULE`] types, or C-like enums that have `#[repr(u8)]` +/// and all explicit discriminants. +/// +/// The type must be [`Copy`], [`PartialEq`], and [`Eq`]. +/// +/// `#[make_ule]` will automatically derive the following traits on the [`ULE`] type: +/// +/// - [`Ord`] and [`PartialOrd`] +/// - [`ZeroMapKV`] +/// +/// To disable one of the automatic derives, use `#[zerovec::skip_derive(...)]` like so: `#[zerovec::skip_derive(ZeroMapKV)]`. +/// `Ord` and `PartialOrd` are implemented as a unit and can only be disabled as a group with `#[zerovec::skip_derive(Ord)]`. +/// +/// The following traits are available to derive, but not automatic: +/// +/// - [`Debug`] +/// +/// To enable one of these additional derives, use `#[zerovec::derive(...)]` like so: `#[zerovec::derive(Debug)]`. +/// +/// In most cases these derives will defer to the impl of the same trait on the current type, so such impls must exist. +/// +/// For enums, this attribute will generate a crate-public `fn new_from_u8(value: u8) -> Option<Self>` +/// method on the main type that allows one to construct the value from a u8. If this method is desired +/// to be more public, it should be wrapped. +/// +/// [`ULE`]: ule::ULE +/// [`AsULE`]: ule::AsULE +/// [`ZeroMapKV`]: maps::ZeroMapKV +/// +/// # Example +/// +/// ```rust +/// use zerovec::ZeroVec; +/// +/// #[zerovec::make_ule(DateULE)] +/// #[derive( +/// Copy, +/// Clone, +/// PartialEq, +/// Eq, +/// Ord, +/// PartialOrd, +/// serde::Serialize, +/// serde::Deserialize, +/// )] +/// struct Date { +/// y: u64, +/// m: u8, +/// d: u8, +/// } +/// +/// #[derive(serde::Serialize, serde::Deserialize)] +/// struct Dates<'a> { +/// #[serde(borrow)] +/// dates: ZeroVec<'a, Date>, +/// } +/// +/// let dates = Dates { +/// dates: ZeroVec::alloc_from_slice(&[ +/// Date { +/// y: 1985, +/// m: 9, +/// d: 3, +/// }, +/// Date { +/// y: 1970, +/// m: 2, +/// d: 20, +/// }, +/// Date { +/// y: 1990, +/// m: 6, +/// d: 13, +/// }, +/// ]), +/// }; +/// +/// let bincode_bytes = +/// bincode::serialize(&dates).expect("Serialization should be successful"); +/// +/// // Will deserialize without allocations +/// let deserialized: Dates = bincode::deserialize(&bincode_bytes) +/// .expect("Deserialization should be successful"); +/// +/// assert_eq!(deserialized.dates.get(1).unwrap().y, 1970); +/// assert_eq!(deserialized.dates.get(2).unwrap().d, 13); +/// ``` +#[cfg(feature = "derive")] +pub use zerovec_derive::make_ule; + +/// Generate a corresponding [`VarULE`] type and the relevant [`EncodeAsVarULE`]/[`zerofrom::ZeroFrom`] +/// implementations for this type +/// +/// This can be attached to structs containing only [`AsULE`] types with the last fields being +/// [`Cow<'a, str>`](alloc::borrow::Cow), [`ZeroSlice`], or [`VarZeroSlice`]. If there is more than one such field, it will be represented +/// using [`MultiFieldsULE`](crate::ule::MultiFieldsULE) and getters will be generated. Other VarULE fields will be detected if they are +/// tagged with `#[zerovec::varule(NameOfVarULETy)]`. +/// +/// The type must be [`PartialEq`] and [`Eq`]. +/// +/// [`EncodeAsVarULE`] and [`zerofrom::ZeroFrom`] are useful for avoiding the need to deal with +/// the [`VarULE`] type directly. In particular, it is recommended to use [`zerofrom::ZeroFrom`] +/// to convert the [`VarULE`] type back to this type in a cheap, zero-copy way (see the example below +/// for more details). +/// +/// `#[make_varule]` will automatically derive the following traits on the [`VarULE`] type: +/// +/// - [`Ord`] and [`PartialOrd`] +/// - [`ZeroMapKV`] +/// +/// To disable one of the automatic derives, use `#[zerovec::skip_derive(...)]` like so: `#[zerovec::skip_derive(ZeroMapKV)]`. +/// `Ord` and `PartialOrd` are implemented as a unit and can only be disabled as a group with `#[zerovec::skip_derive(Ord)]`. +/// +/// The following traits are available to derive, but not automatic: +/// +/// - [`Debug`] +/// - [`Serialize`](serde::Serialize) +/// - [`Deserialize`](serde::Deserialize) +/// +/// To enable one of these additional derives, use `#[zerovec::derive(...)]` like so: `#[zerovec::derive(Debug)]`. +/// +/// In most cases these derives will defer to the impl of the same trait on the current type, so such impls must exist. +/// +/// This implementation will also by default autogenerate [`Ord`] and [`PartialOrd`] on the [`VarULE`] type based on +/// the implementation on `Self`. You can opt out of this with `#[zerovec::skip_derive(Ord)]` +/// +/// Note that this implementation will autogenerate [`EncodeAsVarULE`] impls for _both_ `Self` and `&Self` +/// for convenience. This allows for a little more flexibility encoding slices. +/// +/// [`EncodeAsVarULE`]: ule::EncodeAsVarULE +/// [`VarULE`]: ule::VarULE +/// [`ULE`]: ule::ULE +/// [`AsULE`]: ule::AsULE +/// [`ZeroMapKV`]: maps::ZeroMapKV +/// +/// # Example +/// +/// ```rust +/// use std::borrow::Cow; +/// use zerofrom::ZeroFrom; +/// use zerovec::ule::encode_varule_to_box; +/// use zerovec::{VarZeroVec, ZeroMap, ZeroVec}; +/// +/// // custom fixed-size ULE type for ZeroVec +/// #[zerovec::make_ule(DateULE)] +/// #[derive(Copy, Clone, PartialEq, Eq, Ord, PartialOrd, serde::Serialize, serde::Deserialize)] +/// struct Date { +/// y: u64, +/// m: u8, +/// d: u8, +/// } +/// +/// // custom variable sized VarULE type for VarZeroVec +/// #[zerovec::make_varule(PersonULE)] +/// #[zerovec::derive(Serialize, Deserialize)] +/// #[derive(Clone, PartialEq, Eq, Ord, PartialOrd, serde::Serialize, serde::Deserialize)] +/// struct Person<'a> { +/// birthday: Date, +/// favorite_character: char, +/// #[serde(borrow)] +/// name: Cow<'a, str>, +/// } +/// +/// #[derive(serde::Serialize, serde::Deserialize)] +/// struct Data<'a> { +/// // note: VarZeroVec always must reference the ULE type directly +/// #[serde(borrow)] +/// important_people: VarZeroVec<'a, PersonULE>, +/// } +/// +/// let person1 = Person { +/// birthday: Date { +/// y: 1990, +/// m: 9, +/// d: 7, +/// }, +/// favorite_character: 'π', +/// name: Cow::from("Kate"), +/// }; +/// let person2 = Person { +/// birthday: Date { +/// y: 1960, +/// m: 5, +/// d: 25, +/// }, +/// favorite_character: '冇', +/// name: Cow::from("Jesse"), +/// }; +/// +/// let important_people = VarZeroVec::from(&[person1, person2]); +/// let data = Data { important_people }; +/// +/// let bincode_bytes = bincode::serialize(&data).expect("Serialization should be successful"); +/// +/// // Will deserialize without allocations +/// let deserialized: Data = +/// bincode::deserialize(&bincode_bytes).expect("Deserialization should be successful"); +/// +/// assert_eq!(&deserialized.important_people.get(1).unwrap().name, "Jesse"); +/// assert_eq!(&deserialized.important_people.get(0).unwrap().name, "Kate"); +/// +/// // Since VarZeroVec produces PersonULE types, it's convenient to use ZeroFrom +/// // to recoup Person values in a zero-copy way +/// let person_converted: Person = +/// ZeroFrom::zero_from(deserialized.important_people.get(1).unwrap()); +/// assert_eq!(person_converted.name, "Jesse"); +/// assert_eq!(person_converted.birthday.y, 1960); +/// ``` +#[cfg(feature = "derive")] +pub use zerovec_derive::make_varule; + +#[cfg(test)] +mod tests { + use super::*; + use core::mem::size_of; + + /// Checks that the size of the type is one of the given sizes. + /// The size might differ across Rust versions or channels. + macro_rules! check_size_of { + ($sizes:pat, $type:path) => { + assert!( + matches!(size_of::<$type>(), $sizes), + concat!(stringify!($type), " is of size {}"), + size_of::<$type>() + ); + }; + } + + #[test] + fn check_sizes() { + check_size_of!(24, ZeroVec<u8>); + check_size_of!(24, ZeroVec<u32>); + check_size_of!(32 | 24, VarZeroVec<[u8]>); + check_size_of!(32 | 24, VarZeroVec<str>); + check_size_of!(48, ZeroMap<u32, u32>); + check_size_of!(56 | 48, ZeroMap<u32, str>); + check_size_of!(56 | 48, ZeroMap<str, u32>); + check_size_of!(64 | 48, ZeroMap<str, str>); + check_size_of!(120 | 96, ZeroMap2d<str, str, str>); + check_size_of!(32 | 24, vecs::FlexZeroVec); + + check_size_of!(32, Option<ZeroVec<u8>>); + check_size_of!(32, Option<VarZeroVec<str>>); + check_size_of!(64 | 56, Option<ZeroMap<str, str>>); + check_size_of!(120 | 104, Option<ZeroMap2d<str, str, str>>); + check_size_of!(32, Option<vecs::FlexZeroVec>); + } +} diff --git a/third_party/rust/zerovec/src/map/borrowed.rs b/third_party/rust/zerovec/src/map/borrowed.rs new file mode 100644 index 0000000000..98b2d2f9d1 --- /dev/null +++ b/third_party/rust/zerovec/src/map/borrowed.rs @@ -0,0 +1,325 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::ule::AsULE; +use crate::ZeroSlice; + +use core::cmp::Ordering; +use core::fmt; + +pub use super::kv::ZeroMapKV; +pub use super::vecs::{MutableZeroVecLike, ZeroVecLike}; + +/// A borrowed-only version of [`ZeroMap`](super::ZeroMap) +/// +/// This is useful for fully-zero-copy deserialization from non-human-readable +/// serialization formats. It also has the advantage that it can return references that live for +/// the lifetime of the backing buffer as opposed to that of the [`ZeroMapBorrowed`] instance. +/// +/// # Examples +/// +/// ``` +/// use zerovec::maps::ZeroMapBorrowed; +/// +/// // Example byte buffer representing the map { 1: "one" } +/// let BINCODE_BYTES: &[u8; 29] = &[ +/// 4, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, +/// 0, 0, 111, 110, 101, +/// ]; +/// +/// // Deserializing to ZeroMap requires no heap allocations. +/// let zero_map: ZeroMapBorrowed<u32, str> = +/// bincode::deserialize(BINCODE_BYTES) +/// .expect("Should deserialize successfully"); +/// assert_eq!(zero_map.get(&1), Some("one")); +/// ``` +/// +/// This can be obtained from a [`ZeroMap`](super::ZeroMap) via [`ZeroMap::as_borrowed`](super::ZeroMap::as_borrowed) +pub struct ZeroMapBorrowed<'a, K, V> +where + K: ZeroMapKV<'a>, + V: ZeroMapKV<'a>, + K: ?Sized, + V: ?Sized, +{ + pub(crate) keys: &'a <K as ZeroMapKV<'a>>::Slice, + pub(crate) values: &'a <V as ZeroMapKV<'a>>::Slice, +} + +impl<'a, K, V> Copy for ZeroMapBorrowed<'a, K, V> +where + K: ZeroMapKV<'a>, + V: ZeroMapKV<'a>, + K: ?Sized, + V: ?Sized, +{ +} +impl<'a, K, V> Clone for ZeroMapBorrowed<'a, K, V> +where + K: ZeroMapKV<'a>, + V: ZeroMapKV<'a>, + K: ?Sized, + V: ?Sized, +{ + fn clone(&self) -> Self { + *self + } +} + +impl<'a, K, V> Default for ZeroMapBorrowed<'a, K, V> +where + K: ZeroMapKV<'a>, + V: ZeroMapKV<'a>, + K::Slice: 'static, + V::Slice: 'static, + K: ?Sized, + V: ?Sized, +{ + fn default() -> Self { + Self::new() + } +} + +impl<'a, K, V> ZeroMapBorrowed<'a, K, V> +where + K: ZeroMapKV<'a>, + V: ZeroMapKV<'a>, + K::Slice: 'static, + V::Slice: 'static, + K: ?Sized, + V: ?Sized, +{ + /// Creates a new, empty `ZeroMapBorrowed<K, V>`. + /// + /// Note: Since [`ZeroMapBorrowed`] is not mutable, the return value will be a stub unless + /// converted into a [`ZeroMap`](super::ZeroMap). + /// + /// # Examples + /// + /// ``` + /// use zerovec::maps::ZeroMapBorrowed; + /// + /// let zm: ZeroMapBorrowed<u16, str> = ZeroMapBorrowed::new(); + /// assert!(zm.is_empty()); + /// ``` + pub fn new() -> Self { + Self { + keys: K::Container::zvl_new_borrowed(), + values: V::Container::zvl_new_borrowed(), + } + } +} + +impl<'a, K, V> ZeroMapBorrowed<'a, K, V> +where + K: ZeroMapKV<'a>, + V: ZeroMapKV<'a>, + K: ?Sized, + V: ?Sized, +{ + #[doc(hidden)] // databake internal + pub const unsafe fn from_parts_unchecked( + keys: &'a <K as ZeroMapKV<'a>>::Slice, + values: &'a <V as ZeroMapKV<'a>>::Slice, + ) -> Self { + Self { keys, values } + } + + /// The number of elements in the [`ZeroMapBorrowed`] + pub fn len(&self) -> usize { + self.values.zvl_len() + } + + /// Whether the [`ZeroMapBorrowed`] is empty + pub fn is_empty(&self) -> bool { + self.values.zvl_len() == 0 + } +} + +impl<'a, K, V> ZeroMapBorrowed<'a, K, V> +where + K: ZeroMapKV<'a> + Ord, + V: ZeroMapKV<'a>, + K: ?Sized, + V: ?Sized, +{ + /// Get the value associated with `key`, if it exists. + /// + /// This is able to return values that live longer than the map itself + /// since they borrow directly from the backing buffer. This is the + /// primary advantage of using [`ZeroMapBorrowed`](super::ZeroMapBorrowed) over [`ZeroMap`](super::ZeroMap). + /// + /// ```rust + /// use zerovec::maps::ZeroMapBorrowed; + /// use zerovec::ZeroMap; + /// + /// let mut map = ZeroMap::new(); + /// map.insert(&1, "one"); + /// map.insert(&2, "two"); + /// let borrowed = map.as_borrowed(); + /// assert_eq!(borrowed.get(&1), Some("one")); + /// assert_eq!(borrowed.get(&3), None); + /// ``` + pub fn get(&self, key: &K) -> Option<&'a V::GetType> { + let index = self.keys.zvl_binary_search(key).ok()?; + self.values.zvl_get(index) + } + + /// Binary search the map with `predicate` to find a key, returning the value. + /// + /// This is able to return values that live longer than the map itself + /// since they borrow directly from the backing buffer. This is the + /// primary advantage of using [`ZeroMapBorrowed`](super::ZeroMapBorrowed) over [`ZeroMap`](super::ZeroMap). + /// + /// ```rust + /// use zerovec::maps::ZeroMapBorrowed; + /// use zerovec::ZeroMap; + /// + /// let mut map = ZeroMap::new(); + /// map.insert(&1, "one"); + /// map.insert(&2, "two"); + /// let borrowed = map.as_borrowed(); + /// assert_eq!(borrowed.get_by(|probe| probe.cmp(&1)), Some("one")); + /// assert_eq!(borrowed.get_by(|probe| probe.cmp(&3)), None); + /// ``` + pub fn get_by(&self, predicate: impl FnMut(&K) -> Ordering) -> Option<&'a V::GetType> { + let index = self.keys.zvl_binary_search_by(predicate).ok()?; + self.values.zvl_get(index) + } + + /// Returns whether `key` is contained in this map + /// + /// ```rust + /// use zerovec::maps::ZeroMapBorrowed; + /// use zerovec::ZeroMap; + /// + /// let mut map = ZeroMap::new(); + /// map.insert(&1, "one"); + /// map.insert(&2, "two"); + /// let borrowed = map.as_borrowed(); + /// assert!(borrowed.contains_key(&1)); + /// assert!(!borrowed.contains_key(&3)); + /// ``` + pub fn contains_key(&self, key: &K) -> bool { + self.keys.zvl_binary_search(key).is_ok() + } +} + +impl<'a, K, V> ZeroMapBorrowed<'a, K, V> +where + K: ZeroMapKV<'a> + ?Sized, + V: ZeroMapKV<'a> + ?Sized, +{ + /// Produce an ordered iterator over key-value pairs + pub fn iter<'b>( + &'b self, + ) -> impl Iterator< + Item = ( + &'a <K as ZeroMapKV<'a>>::GetType, + &'a <V as ZeroMapKV<'a>>::GetType, + ), + > + 'b { + self.iter_keys().zip(self.iter_values()) + } + + /// Produce an ordered iterator over keys + pub fn iter_keys<'b>(&'b self) -> impl Iterator<Item = &'a <K as ZeroMapKV<'a>>::GetType> + 'b { + #[allow(clippy::unwrap_used)] // idx in 0..keys.zvl_len() + (0..self.keys.zvl_len()).map(move |idx| self.keys.zvl_get(idx).unwrap()) + } + + /// Produce an iterator over values, ordered by keys + pub fn iter_values<'b>( + &'b self, + ) -> impl Iterator<Item = &'a <V as ZeroMapKV<'a>>::GetType> + 'b { + #[allow(clippy::unwrap_used)] // idx in 0..keys.zvl_len() == values.zvl_len() + (0..self.values.zvl_len()).map(move |idx| self.values.zvl_get(idx).unwrap()) + } +} + +impl<'a, K, V> ZeroMapBorrowed<'a, K, V> +where + K: ZeroMapKV<'a> + Ord + ?Sized, + V: ZeroMapKV<'a, Slice = ZeroSlice<V>> + AsULE + Copy + 'static, +{ + /// For cases when `V` is fixed-size, obtain a direct copy of `V` instead of `V::ULE` + pub fn get_copied(&self, key: &K) -> Option<V> { + let index = self.keys.zvl_binary_search(key).ok()?; + self.values.get(index) + } + + /// For cases when `V` is fixed-size, obtain a direct copy of `V` instead of `V::ULE` + pub fn get_copied_by(&self, predicate: impl FnMut(&K) -> Ordering) -> Option<V> { + let index = self.keys.zvl_binary_search_by(predicate).ok()?; + self.values.get(index) + } + + /// Similar to [`Self::iter()`] except it returns a direct copy of the values instead of references + /// to `V::ULE`, in cases when `V` is fixed-size + pub fn iter_copied_values<'b>( + &'b self, + ) -> impl Iterator<Item = (&'b <K as ZeroMapKV<'a>>::GetType, V)> { + (0..self.keys.zvl_len()).map(move |idx| { + ( + #[allow(clippy::unwrap_used)] // idx in 0..keys.zvl_len() + self.keys.zvl_get(idx).unwrap(), + #[allow(clippy::unwrap_used)] // idx in 0..keys.zvl_len() = values.zvl_len() + self.values.get(idx).unwrap(), + ) + }) + } +} + +impl<'a, K, V> ZeroMapBorrowed<'a, K, V> +where + K: ZeroMapKV<'a, Slice = ZeroSlice<K>> + AsULE + Copy + Ord + 'static, + V: ZeroMapKV<'a, Slice = ZeroSlice<V>> + AsULE + Copy + 'static, +{ + /// Similar to [`Self::iter()`] except it returns a direct copy of the keys values instead of references + /// to `K::ULE` and `V::ULE`, in cases when `K` and `V` are fixed-size + #[allow(clippy::needless_lifetimes)] // Lifetime is necessary in impl Trait + pub fn iter_copied<'b: 'a>(&'b self) -> impl Iterator<Item = (K, V)> + 'b { + let keys = &self.keys; + let values = &self.values; + let len = self.keys.zvl_len(); + (0..len).map(move |idx| { + ( + #[allow(clippy::unwrap_used)] // idx in 0..keys.zvl_len() + ZeroSlice::get(keys, idx).unwrap(), + #[allow(clippy::unwrap_used)] // idx in 0..keys.zvl_len() = values.zvl_len() + ZeroSlice::get(values, idx).unwrap(), + ) + }) + } +} + +// We can't use the default PartialEq because ZeroMap is invariant +// so otherwise rustc will not automatically allow you to compare ZeroMaps +// with different lifetimes +impl<'a, 'b, K, V> PartialEq<ZeroMapBorrowed<'b, K, V>> for ZeroMapBorrowed<'a, K, V> +where + K: for<'c> ZeroMapKV<'c> + ?Sized, + V: for<'c> ZeroMapKV<'c> + ?Sized, + <K as ZeroMapKV<'a>>::Slice: PartialEq<<K as ZeroMapKV<'b>>::Slice>, + <V as ZeroMapKV<'a>>::Slice: PartialEq<<V as ZeroMapKV<'b>>::Slice>, +{ + fn eq(&self, other: &ZeroMapBorrowed<'b, K, V>) -> bool { + self.keys.eq(other.keys) && self.values.eq(other.values) + } +} + +impl<'a, K, V> fmt::Debug for ZeroMapBorrowed<'a, K, V> +where + K: ZeroMapKV<'a> + ?Sized, + V: ZeroMapKV<'a> + ?Sized, + K::Slice: fmt::Debug, + V::Slice: fmt::Debug, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { + f.debug_struct("ZeroMapBorrowed") + .field("keys", &self.keys) + .field("values", &self.values) + .finish() + } +} diff --git a/third_party/rust/zerovec/src/map/databake.rs b/third_party/rust/zerovec/src/map/databake.rs new file mode 100644 index 0000000000..f861e5c29c --- /dev/null +++ b/third_party/rust/zerovec/src/map/databake.rs @@ -0,0 +1,82 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::{maps::ZeroMapBorrowed, maps::ZeroMapKV, ZeroMap}; +use databake::*; + +impl<'a, K, V> Bake for ZeroMap<'a, K, V> +where + K: ZeroMapKV<'a> + ?Sized, + V: ZeroMapKV<'a> + ?Sized, + K::Container: Bake, + V::Container: Bake, +{ + fn bake(&self, env: &CrateEnv) -> TokenStream { + env.insert("zerovec"); + let keys = self.keys.bake(env); + let values = self.values.bake(env); + quote! { unsafe { #[allow(unused_unsafe)] zerovec::ZeroMap::from_parts_unchecked(#keys, #values) } } + } +} + +impl<'a, K, V> Bake for ZeroMapBorrowed<'a, K, V> +where + K: ZeroMapKV<'a> + ?Sized, + V: ZeroMapKV<'a> + ?Sized, + &'a K::Slice: Bake, + &'a V::Slice: Bake, +{ + fn bake(&self, env: &CrateEnv) -> TokenStream { + env.insert("zerovec"); + let keys = self.keys.bake(env); + let values = self.values.bake(env); + quote! { unsafe { #[allow(unused_unsafe)] zerovec::maps::ZeroMapBorrowed::from_parts_unchecked(#keys, #values) } } + } +} + +#[test] +fn test_baked_map() { + test_bake!( + ZeroMap<str, str>, + const: unsafe { + #[allow(unused_unsafe)] + crate::ZeroMap::from_parts_unchecked( + unsafe { + crate::VarZeroVec::from_bytes_unchecked( + b"\x02\0\0\0\0\0\0\0\x02\0\0\0adbc" + ) + }, + unsafe { + crate::VarZeroVec::from_bytes_unchecked( + b"\x02\0\0\0\0\0\0\0\x04\0\0\0ERA1ERA0" + ) + }, + ) + }, + zerovec + ); +} + +#[test] +fn test_baked_borrowed_map() { + test_bake!( + ZeroMapBorrowed<str, str>, + const: unsafe { + #[allow(unused_unsafe)] + crate::maps::ZeroMapBorrowed::from_parts_unchecked( + unsafe { + crate::VarZeroSlice::from_bytes_unchecked( + b"\x02\0\0\0\0\0\0\0\x02\0\0\0adbc" + ) + }, + unsafe { + crate::VarZeroSlice::from_bytes_unchecked( + b"\x02\0\0\0\0\0\0\0\x04\0\0\0ERA1ERA0" + ) + }, + ) + }, + zerovec + ); +} diff --git a/third_party/rust/zerovec/src/map/kv.rs b/third_party/rust/zerovec/src/map/kv.rs new file mode 100644 index 0000000000..1923ed9911 --- /dev/null +++ b/third_party/rust/zerovec/src/map/kv.rs @@ -0,0 +1,131 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use super::vecs::{MutableZeroVecLike, ZeroVecLike}; +use crate::ule::*; +use crate::vecs::{FlexZeroSlice, FlexZeroVec}; +use crate::vecs::{VarZeroSlice, VarZeroVec}; +use crate::zerovec::{ZeroSlice, ZeroVec}; +use alloc::boxed::Box; + +/// Trait marking types which are allowed to be keys or values in [`ZeroMap`](super::ZeroMap). +/// +/// Users should not be calling methods of this trait directly, however if you are +/// implementing your own [`AsULE`] or [`VarULE`] type you may wish to implement +/// this trait. +// this lifetime should be a GAT on Container once that is possible +#[allow(clippy::upper_case_acronyms)] // KV is not an acronym +pub trait ZeroMapKV<'a> { + /// The container that can be used with this type: [`ZeroVec`] or [`VarZeroVec`]. + type Container: MutableZeroVecLike< + 'a, + Self, + SliceVariant = Self::Slice, + GetType = Self::GetType, + OwnedType = Self::OwnedType, + > + Sized; + type Slice: ZeroVecLike<Self, GetType = Self::GetType> + ?Sized; + /// The type produced by `Container::get()` + /// + /// This type will be predetermined by the choice of `Self::Container`: + /// For sized types this must be `T::ULE`, and for unsized types this must be `T` + type GetType: ?Sized + 'static; + /// The type produced by `Container::replace()` and `Container::remove()`, + /// also used during deserialization. If `Self` is human readable serialized, + /// deserializing to `Self::OwnedType` should produce the same value once + /// passed through `Self::owned_as_self()` + /// + /// This type will be predetermined by the choice of `Self::Container`: + /// For sized types this must be `T` and for unsized types this must be `Box<T>` + type OwnedType: 'static; +} + +macro_rules! impl_sized_kv { + ($ty:ident) => { + impl<'a> ZeroMapKV<'a> for $ty { + type Container = ZeroVec<'a, $ty>; + type Slice = ZeroSlice<$ty>; + type GetType = <$ty as AsULE>::ULE; + type OwnedType = $ty; + } + }; +} + +impl_sized_kv!(u8); +impl_sized_kv!(u16); +impl_sized_kv!(u32); +impl_sized_kv!(u64); +impl_sized_kv!(u128); +impl_sized_kv!(i8); +impl_sized_kv!(i16); +impl_sized_kv!(i32); +impl_sized_kv!(i64); +impl_sized_kv!(i128); +impl_sized_kv!(char); +impl_sized_kv!(f32); +impl_sized_kv!(f64); + +impl<'a> ZeroMapKV<'a> for usize { + type Container = FlexZeroVec<'a>; + type Slice = FlexZeroSlice; + type GetType = [u8]; + type OwnedType = usize; +} + +impl<'a, T> ZeroMapKV<'a> for Option<T> +where + Option<T>: AsULE + 'static, +{ + type Container = ZeroVec<'a, Option<T>>; + type Slice = ZeroSlice<Option<T>>; + type GetType = <Option<T> as AsULE>::ULE; + type OwnedType = Option<T>; +} + +impl<'a, T> ZeroMapKV<'a> for OptionVarULE<T> +where + T: VarULE + ?Sized, +{ + type Container = VarZeroVec<'a, OptionVarULE<T>>; + type Slice = VarZeroSlice<OptionVarULE<T>>; + type GetType = OptionVarULE<T>; + type OwnedType = Box<OptionVarULE<T>>; +} + +impl<'a> ZeroMapKV<'a> for str { + type Container = VarZeroVec<'a, str>; + type Slice = VarZeroSlice<str>; + type GetType = str; + type OwnedType = Box<str>; +} + +impl<'a, T> ZeroMapKV<'a> for [T] +where + T: ULE + AsULE<ULE = T>, +{ + type Container = VarZeroVec<'a, [T]>; + type Slice = VarZeroSlice<[T]>; + type GetType = [T]; + type OwnedType = Box<[T]>; +} + +impl<'a, T, const N: usize> ZeroMapKV<'a> for [T; N] +where + T: AsULE + 'static, +{ + type Container = ZeroVec<'a, [T; N]>; + type Slice = ZeroSlice<[T; N]>; + type GetType = [T::ULE; N]; + type OwnedType = [T; N]; +} + +impl<'a, T> ZeroMapKV<'a> for ZeroSlice<T> +where + T: AsULE + 'static, +{ + type Container = VarZeroVec<'a, ZeroSlice<T>>; + type Slice = VarZeroSlice<ZeroSlice<T>>; + type GetType = ZeroSlice<T>; + type OwnedType = Box<ZeroSlice<T>>; +} diff --git a/third_party/rust/zerovec/src/map/map.rs b/third_party/rust/zerovec/src/map/map.rs new file mode 100644 index 0000000000..6801869c96 --- /dev/null +++ b/third_party/rust/zerovec/src/map/map.rs @@ -0,0 +1,653 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use super::*; +use crate::ule::{AsULE, EncodeAsVarULE, VarULE}; +use crate::{VarZeroVec, ZeroSlice, ZeroVec, ZeroVecError}; +use alloc::borrow::Borrow; +use alloc::boxed::Box; +use core::cmp::Ordering; +use core::fmt; +use core::iter::FromIterator; + +/// A zero-copy map datastructure, built on sorted binary-searchable [`ZeroVec`] +/// and [`VarZeroVec`]. +/// +/// This type, like [`ZeroVec`] and [`VarZeroVec`], is able to zero-copy +/// deserialize from appropriately formatted byte buffers. It is internally copy-on-write, so it can be mutated +/// afterwards as necessary. +/// +/// Internally, a `ZeroMap` is a zero-copy vector for keys paired with a zero-copy vector for +/// values, sorted by the keys. Therefore, all types used in `ZeroMap` need to work with either +/// [`ZeroVec`] or [`VarZeroVec`]. +/// +/// This does mean that for fixed-size data, one must use the regular type (`u32`, `u8`, `char`, etc), +/// whereas for variable-size data, `ZeroMap` will use the dynamically sized version (`str` not `String`, +/// `ZeroSlice` not `ZeroVec`, `FooULE` not `Foo` for custom types) +/// +/// # Examples +/// +/// ``` +/// use zerovec::ZeroMap; +/// +/// #[derive(serde::Serialize, serde::Deserialize)] +/// struct Data<'a> { +/// #[serde(borrow)] +/// map: ZeroMap<'a, u32, str>, +/// } +/// +/// let mut map = ZeroMap::new(); +/// map.insert(&1, "one"); +/// map.insert(&2, "two"); +/// map.insert(&4, "four"); +/// +/// let data = Data { map }; +/// +/// let bincode_bytes = +/// bincode::serialize(&data).expect("Serialization should be successful"); +/// +/// // Will deserialize without any allocations +/// let deserialized: Data = bincode::deserialize(&bincode_bytes) +/// .expect("Deserialization should be successful"); +/// +/// assert_eq!(data.map.get(&1), Some("one")); +/// assert_eq!(data.map.get(&2), Some("two")); +/// ``` +/// +/// [`VarZeroVec`]: crate::VarZeroVec +// ZeroMap has only one invariant: keys.len() == values.len() +// It is also expected that the keys are sorted, but this is not an invariant. See #1433 +pub struct ZeroMap<'a, K, V> +where + K: ZeroMapKV<'a> + ?Sized, + V: ZeroMapKV<'a> + ?Sized, +{ + pub(crate) keys: K::Container, + pub(crate) values: V::Container, +} + +impl<'a, K, V> Default for ZeroMap<'a, K, V> +where + K: ZeroMapKV<'a> + ?Sized, + V: ZeroMapKV<'a> + ?Sized, +{ + fn default() -> Self { + Self::new() + } +} + +impl<'a, K, V> ZeroMap<'a, K, V> +where + K: ZeroMapKV<'a> + ?Sized, + V: ZeroMapKV<'a> + ?Sized, +{ + /// Creates a new, empty `ZeroMap<K, V>`. + /// + /// # Examples + /// + /// ``` + /// use zerovec::ZeroMap; + /// + /// let zm: ZeroMap<u16, str> = ZeroMap::new(); + /// assert!(zm.is_empty()); + /// ``` + pub fn new() -> Self { + Self { + keys: K::Container::zvl_with_capacity(0), + values: V::Container::zvl_with_capacity(0), + } + } + + #[doc(hidden)] // databake internal + pub const unsafe fn from_parts_unchecked(keys: K::Container, values: V::Container) -> Self { + Self { keys, values } + } + + /// Construct a new [`ZeroMap`] with a given capacity + pub fn with_capacity(capacity: usize) -> Self { + Self { + keys: K::Container::zvl_with_capacity(capacity), + values: V::Container::zvl_with_capacity(capacity), + } + } + + /// Obtain a borrowed version of this map + pub fn as_borrowed(&'a self) -> ZeroMapBorrowed<'a, K, V> { + ZeroMapBorrowed { + keys: self.keys.zvl_as_borrowed(), + values: self.values.zvl_as_borrowed(), + } + } + + /// The number of elements in the [`ZeroMap`] + pub fn len(&self) -> usize { + self.values.zvl_len() + } + + /// Whether the [`ZeroMap`] is empty + pub fn is_empty(&self) -> bool { + self.values.zvl_len() == 0 + } + + /// Remove all elements from the [`ZeroMap`] + pub fn clear(&mut self) { + self.keys.zvl_clear(); + self.values.zvl_clear(); + } + + /// Reserve capacity for `additional` more elements to be inserted into + /// the [`ZeroMap`] to avoid frequent reallocations. + /// + /// See [`Vec::reserve()`](alloc::vec::Vec::reserve) for more information. + pub fn reserve(&mut self, additional: usize) { + self.keys.zvl_reserve(additional); + self.values.zvl_reserve(additional); + } +} +impl<'a, K, V> ZeroMap<'a, K, V> +where + K: ZeroMapKV<'a> + ?Sized + Ord, + V: ZeroMapKV<'a> + ?Sized, +{ + /// Get the value associated with `key`, if it exists. + /// + /// For fixed-size ([`AsULE`]) `V` types, this _will_ return + /// their corresponding [`AsULE::ULE`] type. If you wish to work with the `V` + /// type directly, [`Self::get_copied()`] exists for convenience. + /// + /// ```rust + /// use zerovec::ZeroMap; + /// + /// let mut map = ZeroMap::new(); + /// map.insert(&1, "one"); + /// map.insert(&2, "two"); + /// assert_eq!(map.get(&1), Some("one")); + /// assert_eq!(map.get(&3), None); + /// ``` + pub fn get(&self, key: &K) -> Option<&V::GetType> { + let index = self.keys.zvl_binary_search(key).ok()?; + self.values.zvl_get(index) + } + + /// Binary search the map with `predicate` to find a key, returning the value. + /// + /// ```rust + /// use zerovec::ZeroMap; + /// + /// let mut map = ZeroMap::new(); + /// map.insert(&1, "one"); + /// map.insert(&2, "two"); + /// assert_eq!(map.get_by(|probe| probe.cmp(&1)), Some("one")); + /// assert_eq!(map.get_by(|probe| probe.cmp(&3)), None); + /// ``` + pub fn get_by(&self, predicate: impl FnMut(&K) -> Ordering) -> Option<&V::GetType> { + let index = self.keys.zvl_binary_search_by(predicate).ok()?; + self.values.zvl_get(index) + } + + /// Returns whether `key` is contained in this map + /// + /// ```rust + /// use zerovec::ZeroMap; + /// + /// let mut map = ZeroMap::new(); + /// map.insert(&1, "one"); + /// map.insert(&2, "two"); + /// assert!(map.contains_key(&1)); + /// assert!(!map.contains_key(&3)); + /// ``` + pub fn contains_key(&self, key: &K) -> bool { + self.keys.zvl_binary_search(key).is_ok() + } + + /// Insert `value` with `key`, returning the existing value if it exists. + /// + /// ```rust + /// use zerovec::ZeroMap; + /// + /// let mut map = ZeroMap::new(); + /// map.insert(&1, "one"); + /// map.insert(&2, "two"); + /// assert_eq!(map.get(&1), Some("one")); + /// assert_eq!(map.get(&3), None); + /// ``` + pub fn insert(&mut self, key: &K, value: &V) -> Option<V::OwnedType> { + match self.keys.zvl_binary_search(key) { + Ok(index) => Some(self.values.zvl_replace(index, value)), + Err(index) => { + self.keys.zvl_insert(index, key); + self.values.zvl_insert(index, value); + None + } + } + } + + /// Remove the value at `key`, returning it if it exists. + /// + /// ```rust + /// use zerovec::ZeroMap; + /// + /// let mut map = ZeroMap::new(); + /// map.insert(&1, "one"); + /// map.insert(&2, "two"); + /// assert_eq!(map.remove(&1), Some("one".to_owned().into_boxed_str())); + /// assert_eq!(map.get(&1), None); + /// ``` + pub fn remove(&mut self, key: &K) -> Option<V::OwnedType> { + let idx = self.keys.zvl_binary_search(key).ok()?; + self.keys.zvl_remove(idx); + Some(self.values.zvl_remove(idx)) + } + + /// Appends `value` with `key` to the end of the underlying vector, returning + /// `key` and `value` _if it failed_. Useful for extending with an existing + /// sorted list. + /// ```rust + /// use zerovec::ZeroMap; + /// + /// let mut map = ZeroMap::new(); + /// assert!(map.try_append(&1, "uno").is_none()); + /// assert!(map.try_append(&3, "tres").is_none()); + /// + /// let unsuccessful = map.try_append(&3, "tres-updated"); + /// assert!(unsuccessful.is_some(), "append duplicate of last key"); + /// + /// let unsuccessful = map.try_append(&2, "dos"); + /// assert!(unsuccessful.is_some(), "append out of order"); + /// + /// assert_eq!(map.get(&1), Some("uno")); + /// + /// // contains the original value for the key: 3 + /// assert_eq!(map.get(&3), Some("tres")); + /// + /// // not appended since it wasn't in order + /// assert_eq!(map.get(&2), None); + /// ``` + #[must_use] + pub fn try_append<'b>(&mut self, key: &'b K, value: &'b V) -> Option<(&'b K, &'b V)> { + if self.keys.zvl_len() != 0 { + if let Some(last) = self.keys.zvl_get(self.keys.zvl_len() - 1) { + if K::Container::t_cmp_get(key, last) != Ordering::Greater { + return Some((key, value)); + } + } + } + + self.keys.zvl_push(key); + self.values.zvl_push(value); + None + } +} + +impl<'a, K, V> ZeroMap<'a, K, V> +where + K: ZeroMapKV<'a> + ?Sized, + V: ZeroMapKV<'a> + ?Sized, +{ + /// Produce an ordered iterator over key-value pairs + pub fn iter<'b>( + &'b self, + ) -> impl ExactSizeIterator< + Item = ( + &'b <K as ZeroMapKV<'a>>::GetType, + &'b <V as ZeroMapKV<'a>>::GetType, + ), + > { + (0..self.keys.zvl_len()).map(move |idx| { + ( + #[allow(clippy::unwrap_used)] // idx is in-range + self.keys.zvl_get(idx).unwrap(), + #[allow(clippy::unwrap_used)] // idx is in-range + self.values.zvl_get(idx).unwrap(), + ) + }) + } + + /// Produce an ordered iterator over keys + pub fn iter_keys<'b>( + &'b self, + ) -> impl ExactSizeIterator<Item = &'b <K as ZeroMapKV<'a>>::GetType> { + #[allow(clippy::unwrap_used)] // idx is in-range + (0..self.keys.zvl_len()).map(move |idx| self.keys.zvl_get(idx).unwrap()) + } + + /// Produce an iterator over values, ordered by keys + pub fn iter_values<'b>( + &'b self, + ) -> impl ExactSizeIterator<Item = &'b <V as ZeroMapKV<'a>>::GetType> { + #[allow(clippy::unwrap_used)] // idx is in-range + (0..self.values.zvl_len()).map(move |idx| self.values.zvl_get(idx).unwrap()) + } +} + +impl<'a, K, V> ZeroMap<'a, K, V> +where + K: ZeroMapKV<'a, Container = ZeroVec<'a, K>> + ?Sized, + V: ZeroMapKV<'a> + ?Sized, + K: AsULE, +{ + /// Cast a `ZeroMap<K, V>` to `ZeroMap<P, V>` where `K` and `P` are [`AsULE`] types + /// with the same representation. + /// + /// # Unchecked Invariants + /// + /// If `K` and `P` have different ordering semantics, unexpected behavior may occur. + pub fn cast_zv_k_unchecked<P>(self) -> ZeroMap<'a, P, V> + where + P: AsULE<ULE = K::ULE> + ZeroMapKV<'a, Container = ZeroVec<'a, P>> + ?Sized, + { + ZeroMap { + keys: self.keys.cast(), + values: self.values, + } + } + + /// Convert a `ZeroMap<K, V>` to `ZeroMap<P, V>` where `K` and `P` are [`AsULE`] types + /// with the same size. + /// + /// # Unchecked Invariants + /// + /// If `K` and `P` have different ordering semantics, unexpected behavior may occur. + /// + /// # Panics + /// + /// Panics if `K::ULE` and `P::ULE` are not the same size. + pub fn try_convert_zv_k_unchecked<P>(self) -> Result<ZeroMap<'a, P, V>, ZeroVecError> + where + P: AsULE + ZeroMapKV<'a, Container = ZeroVec<'a, P>> + ?Sized, + { + Ok(ZeroMap { + keys: self.keys.try_into_converted()?, + values: self.values, + }) + } +} + +impl<'a, K, V> ZeroMap<'a, K, V> +where + K: ZeroMapKV<'a> + ?Sized, + V: ZeroMapKV<'a, Container = ZeroVec<'a, V>> + ?Sized, + V: AsULE, +{ + /// Cast a `ZeroMap<K, V>` to `ZeroMap<K, P>` where `V` and `P` are [`AsULE`] types + /// with the same representation. + /// + /// # Unchecked Invariants + /// + /// If `V` and `P` have different ordering semantics, unexpected behavior may occur. + pub fn cast_zv_v_unchecked<P>(self) -> ZeroMap<'a, K, P> + where + P: AsULE<ULE = V::ULE> + ZeroMapKV<'a, Container = ZeroVec<'a, P>> + ?Sized, + { + ZeroMap { + keys: self.keys, + values: self.values.cast(), + } + } + + /// Convert a `ZeroMap<K, V>` to `ZeroMap<K, P>` where `V` and `P` are [`AsULE`] types + /// with the same size. + /// + /// # Unchecked Invariants + /// + /// If `V` and `P` have different ordering semantics, unexpected behavior may occur. + /// + /// # Panics + /// + /// Panics if `V::ULE` and `P::ULE` are not the same size. + pub fn try_convert_zv_v_unchecked<P>(self) -> Result<ZeroMap<'a, K, P>, ZeroVecError> + where + P: AsULE + ZeroMapKV<'a, Container = ZeroVec<'a, P>> + ?Sized, + { + Ok(ZeroMap { + keys: self.keys, + values: self.values.try_into_converted()?, + }) + } +} + +impl<'a, K, V> ZeroMap<'a, K, V> +where + K: ZeroMapKV<'a> + ?Sized + Ord, + V: ZeroMapKV<'a, Container = VarZeroVec<'a, V>> + ?Sized, + V: VarULE, +{ + /// Same as `insert()`, but allows using [EncodeAsVarULE](crate::ule::EncodeAsVarULE) + /// types with the value to avoid an extra allocation when dealing with custom ULE types. + /// + /// ```rust + /// use std::borrow::Cow; + /// use zerovec::ZeroMap; + /// + /// #[zerovec::make_varule(PersonULE)] + /// #[derive(Clone, Eq, PartialEq, Ord, PartialOrd)] + /// struct Person<'a> { + /// age: u8, + /// name: Cow<'a, str>, + /// } + /// + /// let mut map: ZeroMap<u32, PersonULE> = ZeroMap::new(); + /// map.insert_var_v( + /// &1, + /// &Person { + /// age: 20, + /// name: "Joseph".into(), + /// }, + /// ); + /// map.insert_var_v( + /// &1, + /// &Person { + /// age: 35, + /// name: "Carla".into(), + /// }, + /// ); + /// assert_eq!(&map.get(&1).unwrap().name, "Carla"); + /// assert!(map.get(&3).is_none()); + /// ``` + pub fn insert_var_v<VE: EncodeAsVarULE<V>>(&mut self, key: &K, value: &VE) -> Option<Box<V>> { + match self.keys.zvl_binary_search(key) { + Ok(index) => { + #[allow(clippy::unwrap_used)] // binary search + let ret = self.values.get(index).unwrap().to_boxed(); + self.values.make_mut().replace(index, value); + Some(ret) + } + Err(index) => { + self.keys.zvl_insert(index, key); + self.values.make_mut().insert(index, value); + None + } + } + } + + // insert_var_k, insert_var_kv are not possible since one cannot perform the binary search with EncodeAsVarULE + // though we might be able to do it in the future if we add a trait for cross-Ord requirements +} + +impl<'a, K, V> ZeroMap<'a, K, V> +where + K: ZeroMapKV<'a> + ?Sized + Ord, + V: ZeroMapKV<'a> + ?Sized, + V: Copy, +{ + /// For cases when `V` is fixed-size, obtain a direct copy of `V` instead of `V::ULE`. + /// + /// # Examples + /// + /// ```rust + /// use zerovec::ZeroMap; + /// + /// let mut map = ZeroMap::new(); + /// map.insert(&1, &'a'); + /// map.insert(&2, &'b'); + /// assert_eq!(map.get_copied(&1), Some('a')); + /// assert_eq!(map.get_copied(&3), None); + #[inline] + pub fn get_copied(&self, key: &K) -> Option<V> { + let index = self.keys.zvl_binary_search(key).ok()?; + self.get_copied_at(index) + } + + /// Binary search the map with `predicate` to find a key, returning the value. + /// + /// For cases when `V` is fixed-size, use this method to obtain a direct copy of `V` + /// instead of `V::ULE`. + /// + /// # Examples + /// + /// ```rust + /// use zerovec::ZeroMap; + /// + /// let mut map = ZeroMap::new(); + /// map.insert(&1, &'a'); + /// map.insert(&2, &'b'); + /// assert_eq!(map.get_copied_by(|probe| probe.cmp(&1)), Some('a')); + /// assert_eq!(map.get_copied_by(|probe| probe.cmp(&3)), None); + /// ``` + #[inline] + pub fn get_copied_by(&self, predicate: impl FnMut(&K) -> Ordering) -> Option<V> { + let index = self.keys.zvl_binary_search_by(predicate).ok()?; + self.get_copied_at(index) + } + + fn get_copied_at(&self, index: usize) -> Option<V> { + let ule = self.values.zvl_get(index)?; + let mut result = Option::<V>::None; + V::Container::zvl_get_as_t(ule, |v| result.replace(*v)); + #[allow(clippy::unwrap_used)] // `zvl_get_as_t` guarantees that the callback is invoked + Some(result.unwrap()) + } +} + +impl<'a, K, V> ZeroMap<'a, K, V> +where + K: ZeroMapKV<'a> + ?Sized, + V: ZeroMapKV<'a, Container = ZeroVec<'a, V>> + ?Sized, + V: AsULE + Copy, +{ + /// Similar to [`Self::iter()`] except it returns a direct copy of the values instead of references + /// to `V::ULE`, in cases when `V` is fixed-size + pub fn iter_copied_values<'b>( + &'b self, + ) -> impl Iterator<Item = (&'b <K as ZeroMapKV<'a>>::GetType, V)> { + (0..self.keys.zvl_len()).map(move |idx| { + ( + #[allow(clippy::unwrap_used)] // idx in 0..keys.zvl_len() + self.keys.zvl_get(idx).unwrap(), + #[allow(clippy::unwrap_used)] // idx in 0..keys.zvl_len() = values.zvl_len() + ZeroSlice::get(&*self.values, idx).unwrap(), + ) + }) + } +} + +impl<'a, K, V> ZeroMap<'a, K, V> +where + K: ZeroMapKV<'a, Container = ZeroVec<'a, K>> + ?Sized, + V: ZeroMapKV<'a, Container = ZeroVec<'a, V>> + ?Sized, + K: AsULE + Copy, + V: AsULE + Copy, +{ + /// Similar to [`Self::iter()`] except it returns a direct copy of the keys values instead of references + /// to `K::ULE` and `V::ULE`, in cases when `K` and `V` are fixed-size + #[allow(clippy::needless_lifetimes)] // Lifetime is necessary in impl Trait + pub fn iter_copied<'b>(&'b self) -> impl Iterator<Item = (K, V)> + 'b { + let keys = &self.keys; + let values = &self.values; + (0..keys.len()).map(move |idx| { + ( + #[allow(clippy::unwrap_used)] // idx in 0..keys.zvl_len() + ZeroSlice::get(&**keys, idx).unwrap(), + #[allow(clippy::unwrap_used)] // idx in 0..keys.zvl_len() = values.zvl_len() + ZeroSlice::get(&**values, idx).unwrap(), + ) + }) + } +} + +impl<'a, K, V> From<ZeroMapBorrowed<'a, K, V>> for ZeroMap<'a, K, V> +where + K: ZeroMapKV<'a>, + V: ZeroMapKV<'a>, + K: ?Sized, + V: ?Sized, +{ + fn from(other: ZeroMapBorrowed<'a, K, V>) -> Self { + Self { + keys: K::Container::zvl_from_borrowed(other.keys), + values: V::Container::zvl_from_borrowed(other.values), + } + } +} + +// We can't use the default PartialEq because ZeroMap is invariant +// so otherwise rustc will not automatically allow you to compare ZeroMaps +// with different lifetimes +impl<'a, 'b, K, V> PartialEq<ZeroMap<'b, K, V>> for ZeroMap<'a, K, V> +where + K: for<'c> ZeroMapKV<'c> + ?Sized, + V: for<'c> ZeroMapKV<'c> + ?Sized, + <K as ZeroMapKV<'a>>::Container: PartialEq<<K as ZeroMapKV<'b>>::Container>, + <V as ZeroMapKV<'a>>::Container: PartialEq<<V as ZeroMapKV<'b>>::Container>, +{ + fn eq(&self, other: &ZeroMap<'b, K, V>) -> bool { + self.keys.eq(&other.keys) && self.values.eq(&other.values) + } +} + +impl<'a, K, V> fmt::Debug for ZeroMap<'a, K, V> +where + K: ZeroMapKV<'a> + ?Sized, + V: ZeroMapKV<'a> + ?Sized, + <K as ZeroMapKV<'a>>::Container: fmt::Debug, + <V as ZeroMapKV<'a>>::Container: fmt::Debug, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { + f.debug_struct("ZeroMap") + .field("keys", &self.keys) + .field("values", &self.values) + .finish() + } +} + +impl<'a, K, V> Clone for ZeroMap<'a, K, V> +where + K: ZeroMapKV<'a> + ?Sized, + V: ZeroMapKV<'a> + ?Sized, + <K as ZeroMapKV<'a>>::Container: Clone, + <V as ZeroMapKV<'a>>::Container: Clone, +{ + fn clone(&self) -> Self { + Self { + keys: self.keys.clone(), + values: self.values.clone(), + } + } +} + +impl<'a, A, B, K, V> FromIterator<(A, B)> for ZeroMap<'a, K, V> +where + A: Borrow<K>, + B: Borrow<V>, + K: ZeroMapKV<'a> + ?Sized + Ord, + V: ZeroMapKV<'a> + ?Sized, +{ + fn from_iter<T>(iter: T) -> Self + where + T: IntoIterator<Item = (A, B)>, + { + let iter = iter.into_iter(); + let mut map = match iter.size_hint() { + (_, Some(upper)) => Self::with_capacity(upper), + (lower, None) => Self::with_capacity(lower), + }; + + for (key, value) in iter { + if let Some((key, value)) = map.try_append(key.borrow(), value.borrow()) { + map.insert(key, value); + } + } + map + } +} diff --git a/third_party/rust/zerovec/src/map/mod.rs b/third_party/rust/zerovec/src/map/mod.rs new file mode 100644 index 0000000000..fcad0cff71 --- /dev/null +++ b/third_party/rust/zerovec/src/map/mod.rs @@ -0,0 +1,23 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! See [`ZeroMap`](crate::ZeroMap) for details. + +mod borrowed; +mod kv; +#[allow(clippy::module_inception)] // module is purely internal +pub(crate) mod map; +mod vecs; + +#[cfg(feature = "databake")] +mod databake; +#[cfg(feature = "serde")] +mod serde; +#[cfg(feature = "serde")] +mod serde_helpers; + +pub use crate::ZeroMap; +pub use borrowed::ZeroMapBorrowed; +pub use kv::ZeroMapKV; +pub use vecs::{MutableZeroVecLike, ZeroVecLike}; diff --git a/third_party/rust/zerovec/src/map/serde.rs b/third_party/rust/zerovec/src/map/serde.rs new file mode 100644 index 0000000000..e82886d2ad --- /dev/null +++ b/third_party/rust/zerovec/src/map/serde.rs @@ -0,0 +1,313 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use super::{MutableZeroVecLike, ZeroMap, ZeroMapBorrowed, ZeroMapKV, ZeroVecLike}; +use core::fmt; +use core::marker::PhantomData; +use serde::de::{self, Deserialize, Deserializer, MapAccess, SeqAccess, Visitor}; +#[cfg(feature = "serde")] +use serde::ser::{Serialize, SerializeMap, SerializeSeq, Serializer}; + +/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate +#[cfg(feature = "serde")] +impl<'a, K, V> Serialize for ZeroMap<'a, K, V> +where + K: ZeroMapKV<'a> + Serialize + ?Sized + Ord, + V: ZeroMapKV<'a> + Serialize + ?Sized, + K::Container: Serialize, + V::Container: Serialize, +{ + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + if serializer.is_human_readable() { + // Many human-readable formats don't support values other + // than numbers and strings as map keys. For them, we can serialize + // as a vec of tuples instead + if let Some(k) = self.iter_keys().next() { + if !K::Container::zvl_get_as_t(k, super::serde_helpers::is_num_or_string) { + let mut seq = serializer.serialize_seq(Some(self.len()))?; + for (k, v) in self.iter() { + K::Container::zvl_get_as_t(k, |k| { + V::Container::zvl_get_as_t(v, |v| seq.serialize_element(&(k, v))) + })?; + } + return seq.end(); + } + } + let mut map = serializer.serialize_map(Some(self.len()))?; + for (k, v) in self.iter() { + K::Container::zvl_get_as_t(k, |k| map.serialize_key(k))?; + V::Container::zvl_get_as_t(v, |v| map.serialize_value(v))?; + } + map.end() + } else { + (&self.keys, &self.values).serialize(serializer) + } + } +} + +/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate +#[cfg(feature = "serde")] +impl<'a, K, V> Serialize for ZeroMapBorrowed<'a, K, V> +where + K: ZeroMapKV<'a> + Serialize + ?Sized + Ord, + V: ZeroMapKV<'a> + Serialize + ?Sized, + K::Container: Serialize, + V::Container: Serialize, +{ + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + ZeroMap::<K, V>::from(*self).serialize(serializer) + } +} + +/// Modified example from https://serde.rs/deserialize-map.html +struct ZeroMapMapVisitor<'a, K, V> +where + K: ZeroMapKV<'a> + ?Sized + Ord, + V: ZeroMapKV<'a> + ?Sized, +{ + #[allow(clippy::type_complexity)] // it's a marker type, complexity doesn't matter + marker: PhantomData<fn() -> (&'a K::OwnedType, &'a V::OwnedType)>, +} + +impl<'a, K, V> ZeroMapMapVisitor<'a, K, V> +where + K: ZeroMapKV<'a> + ?Sized + Ord, + V: ZeroMapKV<'a> + ?Sized, +{ + fn new() -> Self { + ZeroMapMapVisitor { + marker: PhantomData, + } + } +} + +impl<'a, 'de, K, V> Visitor<'de> for ZeroMapMapVisitor<'a, K, V> +where + K: ZeroMapKV<'a> + Ord + ?Sized, + V: ZeroMapKV<'a> + ?Sized, + K::OwnedType: Deserialize<'de>, + V::OwnedType: Deserialize<'de>, +{ + type Value = ZeroMap<'a, K, V>; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("a map produced by ZeroMap") + } + + fn visit_seq<S>(self, mut access: S) -> Result<Self::Value, S::Error> + where + S: SeqAccess<'de>, + { + let mut map = ZeroMap::with_capacity(access.size_hint().unwrap_or(0)); + + // While there are entries remaining in the input, add them + // into our map. + while let Some((key, value)) = access.next_element::<(K::OwnedType, V::OwnedType)>()? { + // Try to append it at the end, hoping for a sorted map. + // If not sorted, return an error + // a serialized map that came from another ZeroMap + if map + .try_append( + K::Container::owned_as_t(&key), + V::Container::owned_as_t(&value), + ) + .is_some() + { + return Err(de::Error::custom( + "ZeroMap's keys must be sorted while deserializing", + )); + } + } + + Ok(map) + } + + fn visit_map<M>(self, mut access: M) -> Result<Self::Value, M::Error> + where + M: MapAccess<'de>, + { + let mut map = ZeroMap::with_capacity(access.size_hint().unwrap_or(0)); + + // While there are entries remaining in the input, add them + // into our map. + while let Some((key, value)) = access.next_entry::<K::OwnedType, V::OwnedType>()? { + // Try to append it at the end, hoping for a sorted map. + // If not sorted, return an error + // a serialized map that came from another ZeroMap + if map + .try_append( + K::Container::owned_as_t(&key), + V::Container::owned_as_t(&value), + ) + .is_some() + { + return Err(de::Error::custom( + "ZeroMap's keys must be sorted while deserializing", + )); + } + } + + Ok(map) + } +} + +/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate +impl<'de, 'a, K, V> Deserialize<'de> for ZeroMap<'a, K, V> +where + K: ZeroMapKV<'a> + Ord + ?Sized, + V: ZeroMapKV<'a> + ?Sized, + K::Container: Deserialize<'de>, + V::Container: Deserialize<'de>, + K::OwnedType: Deserialize<'de>, + V::OwnedType: Deserialize<'de>, + 'de: 'a, +{ + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: Deserializer<'de>, + { + if deserializer.is_human_readable() { + deserializer.deserialize_any(ZeroMapMapVisitor::<'a, K, V>::new()) + } else { + let (keys, values): (K::Container, V::Container) = + Deserialize::deserialize(deserializer)?; + if keys.zvl_len() != values.zvl_len() { + return Err(de::Error::custom( + "Mismatched key and value sizes in ZeroMap", + )); + } + // #1433: If keys are out of order, treat it as GIGO. + debug_assert!(keys.zvl_is_ascending()); + Ok(Self { keys, values }) + } + } +} + +// /// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate +impl<'de, 'a, K, V> Deserialize<'de> for ZeroMapBorrowed<'a, K, V> +where + K: ZeroMapKV<'a> + Ord + ?Sized, + V: ZeroMapKV<'a> + ?Sized, + K::Container: Deserialize<'de>, + V::Container: Deserialize<'de>, + K::OwnedType: Deserialize<'de>, + V::OwnedType: Deserialize<'de>, + 'de: 'a, +{ + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: Deserializer<'de>, + { + if deserializer.is_human_readable() { + Err(de::Error::custom( + "ZeroMapBorrowed cannot be deserialized from human-readable formats", + )) + } else { + let deserialized: ZeroMap<'a, K, V> = ZeroMap::deserialize(deserializer)?; + let keys = if let Some(keys) = deserialized.keys.zvl_as_borrowed_inner() { + keys + } else { + return Err(de::Error::custom( + "ZeroMapBorrowed can only deserialize in zero-copy ways", + )); + }; + let values = if let Some(values) = deserialized.values.zvl_as_borrowed_inner() { + values + } else { + return Err(de::Error::custom( + "ZeroMapBorrowed can only deserialize in zero-copy ways", + )); + }; + Ok(Self { keys, values }) + } + } +} + +#[cfg(test)] +#[allow(non_camel_case_types)] +mod test { + use crate::{map::ZeroMapBorrowed, ZeroMap}; + + #[derive(serde::Serialize, serde::Deserialize)] + struct DeriveTest_ZeroMap<'data> { + #[serde(borrow)] + _data: ZeroMap<'data, str, [u8]>, + } + + #[derive(serde::Serialize, serde::Deserialize)] + struct DeriveTest_ZeroMapBorrowed<'data> { + #[serde(borrow)] + _data: ZeroMapBorrowed<'data, str, [u8]>, + } + + const JSON_STR: &str = "{\"1\":\"uno\",\"2\":\"dos\",\"3\":\"tres\"}"; + const BINCODE_BYTES: &[u8] = &[ + 12, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 3, 0, + 0, 0, 0, 0, 3, 0, 6, 0, 117, 110, 111, 100, 111, 115, 116, 114, 101, 115, + ]; + + fn make_map() -> ZeroMap<'static, u32, str> { + let mut map = ZeroMap::new(); + map.insert(&1, "uno"); + map.insert(&2, "dos"); + map.insert(&3, "tres"); + map + } + + #[test] + fn test_serde_json() { + let map = make_map(); + let json_str = serde_json::to_string(&map).expect("serialize"); + assert_eq!(JSON_STR, json_str); + let new_map: ZeroMap<u32, str> = serde_json::from_str(&json_str).expect("deserialize"); + assert_eq!( + new_map.iter().collect::<Vec<_>>(), + map.iter().collect::<Vec<_>>() + ); + } + + #[test] + fn test_serde_json_complex_key() { + let mut map = ZeroMap::new(); + map.insert(&(1, 1), "uno"); + map.insert(&(2, 2), "dos"); + map.insert(&(3, 3), "tres"); + let json_str = serde_json::to_string(&map).expect("serialize"); + assert_eq!( + json_str, + "[[[1,1],\"uno\"],[[2,2],\"dos\"],[[3,3],\"tres\"]]" + ); + let new_map: ZeroMap<(u32, u32), str> = + serde_json::from_str(&json_str).expect("deserialize"); + assert_eq!( + new_map.iter().collect::<Vec<_>>(), + map.iter().collect::<Vec<_>>() + ); + } + + #[test] + fn test_bincode() { + let map = make_map(); + let bincode_bytes = bincode::serialize(&map).expect("serialize"); + assert_eq!(BINCODE_BYTES, bincode_bytes); + let new_map: ZeroMap<u32, str> = bincode::deserialize(&bincode_bytes).expect("deserialize"); + assert_eq!( + new_map.iter().collect::<Vec<_>>(), + map.iter().collect::<Vec<_>>() + ); + + let new_map: ZeroMapBorrowed<u32, str> = + bincode::deserialize(&bincode_bytes).expect("deserialize"); + assert_eq!( + new_map.iter().collect::<Vec<_>>(), + map.iter().collect::<Vec<_>>() + ); + } +} diff --git a/third_party/rust/zerovec/src/map/serde_helpers.rs b/third_party/rust/zerovec/src/map/serde_helpers.rs new file mode 100644 index 0000000000..b1ead938a0 --- /dev/null +++ b/third_party/rust/zerovec/src/map/serde_helpers.rs @@ -0,0 +1,168 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +// @@@@@@@@@@@@@@@@ +// THIS FILE IS SHARED BETWEEN LITEMAP AND ZEROVEC. PLEASE KEEP IT IN SYNC FOR ALL EDITS +// @@@@@@@@@@@@@@@@ + +use serde::ser::{Impossible, Serialize, Serializer}; + +pub fn is_num_or_string<T: Serialize + ?Sized>(k: &T) -> bool { + // Serializer that errors in the same cases as serde_json::ser::MapKeySerializer + struct MapKeySerializerDryRun; + impl Serializer for MapKeySerializerDryRun { + type Ok = (); + // Singleton error type that implements serde::ser::Error + type Error = core::fmt::Error; + + type SerializeSeq = Impossible<(), Self::Error>; + type SerializeTuple = Impossible<(), Self::Error>; + type SerializeTupleStruct = Impossible<(), Self::Error>; + type SerializeTupleVariant = Impossible<(), Self::Error>; + type SerializeMap = Impossible<(), Self::Error>; + type SerializeStruct = Impossible<(), Self::Error>; + type SerializeStructVariant = Impossible<(), Self::Error>; + + fn serialize_str(self, _value: &str) -> Result<Self::Ok, Self::Error> { + Ok(()) + } + fn serialize_unit_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + ) -> Result<Self::Ok, Self::Error> { + Ok(()) + } + fn serialize_newtype_struct<T: Serialize + ?Sized>( + self, + _name: &'static str, + value: &T, + ) -> Result<Self::Ok, Self::Error> { + // Recurse + value.serialize(self) + } + fn serialize_bool(self, _value: bool) -> Result<Self::Ok, Self::Error> { + Err(core::fmt::Error) + } + fn serialize_i8(self, _value: i8) -> Result<Self::Ok, Self::Error> { + Ok(()) + } + fn serialize_i16(self, _value: i16) -> Result<Self::Ok, Self::Error> { + Ok(()) + } + fn serialize_i32(self, _value: i32) -> Result<Self::Ok, Self::Error> { + Ok(()) + } + fn serialize_i64(self, _value: i64) -> Result<Self::Ok, Self::Error> { + Ok(()) + } + serde::serde_if_integer128! { + fn serialize_i128(self, _value: i128) -> Result<Self::Ok, Self::Error> { + Ok(()) + } + } + fn serialize_u8(self, _value: u8) -> Result<Self::Ok, Self::Error> { + Ok(()) + } + fn serialize_u16(self, _value: u16) -> Result<Self::Ok, Self::Error> { + Ok(()) + } + fn serialize_u32(self, _value: u32) -> Result<Self::Ok, Self::Error> { + Ok(()) + } + fn serialize_u64(self, _value: u64) -> Result<Self::Ok, Self::Error> { + Ok(()) + } + serde::serde_if_integer128! { + fn serialize_u128(self, _value: u128) -> Result<Self::Ok, Self::Error> { + Ok(()) + } + } + fn serialize_f32(self, _value: f32) -> Result<Self::Ok, Self::Error> { + Err(core::fmt::Error) + } + fn serialize_f64(self, _value: f64) -> Result<Self::Ok, Self::Error> { + Err(core::fmt::Error) + } + fn serialize_char(self, _value: char) -> Result<Self::Ok, Self::Error> { + Ok(()) + } + fn serialize_bytes(self, _value: &[u8]) -> Result<Self::Ok, Self::Error> { + Err(core::fmt::Error) + } + fn serialize_unit(self) -> Result<Self::Ok, Self::Error> { + Err(core::fmt::Error) + } + fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> { + Err(core::fmt::Error) + } + fn serialize_newtype_variant<T: Serialize + ?Sized>( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _value: &T, + ) -> Result<Self::Ok, Self::Error> { + Err(core::fmt::Error) + } + fn serialize_none(self) -> Result<Self::Ok, Self::Error> { + Err(core::fmt::Error) + } + fn serialize_some<T: Serialize + ?Sized>( + self, + _value: &T, + ) -> Result<Self::Ok, Self::Error> { + Err(core::fmt::Error) + } + fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> { + Err(core::fmt::Error) + } + fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> { + Err(core::fmt::Error) + } + fn serialize_tuple_struct( + self, + _name: &'static str, + _len: usize, + ) -> Result<Self::SerializeTupleStruct, Self::Error> { + Err(core::fmt::Error) + } + fn serialize_tuple_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _len: usize, + ) -> Result<Self::SerializeTupleVariant, Self::Error> { + Err(core::fmt::Error) + } + fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> { + Err(core::fmt::Error) + } + fn serialize_struct( + self, + _name: &'static str, + _len: usize, + ) -> Result<Self::SerializeStruct, Self::Error> { + Err(core::fmt::Error) + } + fn serialize_struct_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _len: usize, + ) -> Result<Self::SerializeStructVariant, Self::Error> { + Err(core::fmt::Error) + } + fn collect_str<T: core::fmt::Display + ?Sized>( + self, + _value: &T, + ) -> Result<Self::Ok, Self::Error> { + Ok(()) + } + } + k.serialize(MapKeySerializerDryRun).is_ok() +} diff --git a/third_party/rust/zerovec/src/map/vecs.rs b/third_party/rust/zerovec/src/map/vecs.rs new file mode 100644 index 0000000000..5ee93d3fea --- /dev/null +++ b/third_party/rust/zerovec/src/map/vecs.rs @@ -0,0 +1,724 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::ule::*; +use crate::varzerovec::owned::VarZeroVecOwned; +use crate::vecs::{FlexZeroSlice, FlexZeroVec, FlexZeroVecOwned, VarZeroVecFormat}; +use crate::{VarZeroSlice, VarZeroVec}; +use crate::{ZeroSlice, ZeroVec}; +use alloc::boxed::Box; +use alloc::vec::Vec; +use core::cmp::Ordering; +use core::mem; +use core::ops::Range; + +/// Trait abstracting over [`ZeroVec`] and [`VarZeroVec`], for use in [`ZeroMap`](super::ZeroMap). **You +/// should not be implementing or calling this trait directly.** +/// +/// The T type is the type received by [`Self::zvl_binary_search()`], as well as the one used +/// for human-readable serialization. +/// +/// Methods are prefixed with `zvl_*` to avoid clashes with methods on the types themselves +pub trait ZeroVecLike<T: ?Sized> { + /// The type returned by `Self::get()` + type GetType: ?Sized + 'static; + /// A fully borrowed version of this + type SliceVariant: ZeroVecLike<T, GetType = Self::GetType> + ?Sized; + + /// Create a new, empty borrowed variant + fn zvl_new_borrowed() -> &'static Self::SliceVariant; + + /// Search for a key in a sorted vector, returns `Ok(index)` if found, + /// returns `Err(insert_index)` if not found, where `insert_index` is the + /// index where it should be inserted to maintain sort order. + fn zvl_binary_search(&self, k: &T) -> Result<usize, usize> + where + T: Ord; + /// Search for a key within a certain range in a sorted vector. + /// Returns `None` if the range is out of bounds, and + /// `Ok` or `Err` in the same way as `zvl_binary_search`. + /// Indices are returned relative to the start of the range. + fn zvl_binary_search_in_range( + &self, + k: &T, + range: Range<usize>, + ) -> Option<Result<usize, usize>> + where + T: Ord; + + /// Search for a key in a sorted vector by a predicate, returns `Ok(index)` if found, + /// returns `Err(insert_index)` if not found, where `insert_index` is the + /// index where it should be inserted to maintain sort order. + fn zvl_binary_search_by(&self, predicate: impl FnMut(&T) -> Ordering) -> Result<usize, usize>; + /// Search for a key within a certain range in a sorted vector by a predicate. + /// Returns `None` if the range is out of bounds, and + /// `Ok` or `Err` in the same way as `zvl_binary_search`. + /// Indices are returned relative to the start of the range. + fn zvl_binary_search_in_range_by( + &self, + predicate: impl FnMut(&T) -> Ordering, + range: Range<usize>, + ) -> Option<Result<usize, usize>>; + + /// Get element at `index` + fn zvl_get(&self, index: usize) -> Option<&Self::GetType>; + /// The length of this vector + fn zvl_len(&self) -> usize; + /// Check if this vector is in ascending order according to `T`s `Ord` impl + fn zvl_is_ascending(&self) -> bool + where + T: Ord, + { + if let Some(first) = self.zvl_get(0) { + let mut prev = first; + for i in 1..self.zvl_len() { + #[allow(clippy::unwrap_used)] // looping over the valid indices + let curr = self.zvl_get(i).unwrap(); + if Self::get_cmp_get(prev, curr) != Ordering::Less { + return false; + } + prev = curr; + } + } + true + } + /// Check if this vector is empty + fn zvl_is_empty(&self) -> bool { + self.zvl_len() == 0 + } + + /// Construct a borrowed variant by borrowing from `&self`. + /// + /// This function behaves like `&'b self -> Self::SliceVariant<'b>`, + /// where `'b` is the lifetime of the reference to this object. + /// + /// Note: We rely on the compiler recognizing `'a` and `'b` as covariant and + /// casting `&'b Self<'a>` to `&'b Self<'b>` when this gets called, which works + /// out for `ZeroVec` and `VarZeroVec` containers just fine. + fn zvl_as_borrowed(&self) -> &Self::SliceVariant; + + /// Compare this type with a `Self::GetType`. This must produce the same result as + /// if `g` were converted to `Self` + #[inline] + fn t_cmp_get(t: &T, g: &Self::GetType) -> Ordering + where + T: Ord, + { + Self::zvl_get_as_t(g, |g| t.cmp(g)) + } + + /// Compare two values of `Self::GetType`. This must produce the same result as + /// if both `a` and `b` were converted to `Self` + #[inline] + fn get_cmp_get(a: &Self::GetType, b: &Self::GetType) -> Ordering + where + T: Ord, + { + Self::zvl_get_as_t(a, |a| Self::zvl_get_as_t(b, |b| a.cmp(b))) + } + + /// Obtain a reference to T, passed to a closure + /// + /// This uses a callback because it's not possible to return owned-or-borrowed + /// types without GATs + /// + /// Impls should guarantee that the callback function is be called exactly once. + fn zvl_get_as_t<R>(g: &Self::GetType, f: impl FnOnce(&T) -> R) -> R; +} + +/// Trait abstracting over [`ZeroVec`] and [`VarZeroVec`], for use in [`ZeroMap`](super::ZeroMap). **You +/// should not be implementing or calling this trait directly.** +/// +/// This trait augments [`ZeroVecLike`] with methods allowing for mutation of the underlying +/// vector for owned vector types. +/// +/// Methods are prefixed with `zvl_*` to avoid clashes with methods on the types themselves +pub trait MutableZeroVecLike<'a, T: ?Sized>: ZeroVecLike<T> { + /// The type returned by `Self::remove()` and `Self::replace()` + type OwnedType; + + /// Insert an element at `index` + fn zvl_insert(&mut self, index: usize, value: &T); + /// Remove the element at `index` (panicking if nonexistant) + fn zvl_remove(&mut self, index: usize) -> Self::OwnedType; + /// Replace the element at `index` with another one, returning the old element + fn zvl_replace(&mut self, index: usize, value: &T) -> Self::OwnedType; + /// Push an element to the end of this vector + fn zvl_push(&mut self, value: &T); + /// Create a new, empty vector, with given capacity + fn zvl_with_capacity(cap: usize) -> Self; + /// Remove all elements from the vector + fn zvl_clear(&mut self); + /// Reserve space for `addl` additional elements + fn zvl_reserve(&mut self, addl: usize); + /// Applies the permutation such that `before.zvl_get(permutation[i]) == after.zvl_get(i)`. + /// + /// # Panics + /// If `permutation` is not a valid permutation of length `zvl_len()`. + fn zvl_permute(&mut self, permutation: &mut [usize]); + + /// Convert an owned value to a borrowed T + fn owned_as_t(o: &Self::OwnedType) -> &T; + + /// Construct from the borrowed version of the type + /// + /// These are useful to ensure serialization parity between borrowed and owned versions + fn zvl_from_borrowed(b: &'a Self::SliceVariant) -> Self; + /// Extract the inner borrowed variant if possible. Returns `None` if the data is owned. + /// + /// This function behaves like `&'_ self -> Self::SliceVariant<'a>`, + /// where `'a` is the lifetime of this object's borrowed data. + /// + /// This function is similar to matching the `Borrowed` variant of `ZeroVec` + /// or `VarZeroVec`, returning the inner borrowed type. + fn zvl_as_borrowed_inner(&self) -> Option<&'a Self::SliceVariant>; +} + +impl<'a, T> ZeroVecLike<T> for ZeroVec<'a, T> +where + T: 'a + AsULE + Copy, +{ + type GetType = T::ULE; + type SliceVariant = ZeroSlice<T>; + + fn zvl_new_borrowed() -> &'static Self::SliceVariant { + ZeroSlice::<T>::new_empty() + } + fn zvl_binary_search(&self, k: &T) -> Result<usize, usize> + where + T: Ord, + { + ZeroSlice::binary_search(self, k) + } + fn zvl_binary_search_in_range(&self, k: &T, range: Range<usize>) -> Option<Result<usize, usize>> + where + T: Ord, + { + let zs: &ZeroSlice<T> = self; + zs.zvl_binary_search_in_range(k, range) + } + fn zvl_binary_search_by( + &self, + mut predicate: impl FnMut(&T) -> Ordering, + ) -> Result<usize, usize> { + ZeroSlice::binary_search_by(self, |probe| predicate(&probe)) + } + fn zvl_binary_search_in_range_by( + &self, + predicate: impl FnMut(&T) -> Ordering, + range: Range<usize>, + ) -> Option<Result<usize, usize>> { + let zs: &ZeroSlice<T> = self; + zs.zvl_binary_search_in_range_by(predicate, range) + } + fn zvl_get(&self, index: usize) -> Option<&T::ULE> { + self.get_ule_ref(index) + } + fn zvl_len(&self) -> usize { + ZeroSlice::len(self) + } + fn zvl_as_borrowed(&self) -> &ZeroSlice<T> { + self + } + #[inline] + fn zvl_get_as_t<R>(g: &Self::GetType, f: impl FnOnce(&T) -> R) -> R { + f(&T::from_unaligned(*g)) + } +} + +impl<T> ZeroVecLike<T> for ZeroSlice<T> +where + T: AsULE + Copy, +{ + type GetType = T::ULE; + type SliceVariant = ZeroSlice<T>; + + fn zvl_new_borrowed() -> &'static Self::SliceVariant { + ZeroSlice::<T>::new_empty() + } + fn zvl_binary_search(&self, k: &T) -> Result<usize, usize> + where + T: Ord, + { + ZeroSlice::binary_search(self, k) + } + fn zvl_binary_search_in_range(&self, k: &T, range: Range<usize>) -> Option<Result<usize, usize>> + where + T: Ord, + { + let subslice = self.get_subslice(range)?; + Some(ZeroSlice::binary_search(subslice, k)) + } + fn zvl_binary_search_by( + &self, + mut predicate: impl FnMut(&T) -> Ordering, + ) -> Result<usize, usize> { + ZeroSlice::binary_search_by(self, |probe| predicate(&probe)) + } + fn zvl_binary_search_in_range_by( + &self, + mut predicate: impl FnMut(&T) -> Ordering, + range: Range<usize>, + ) -> Option<Result<usize, usize>> { + let subslice = self.get_subslice(range)?; + Some(ZeroSlice::binary_search_by(subslice, |probe| { + predicate(&probe) + })) + } + fn zvl_get(&self, index: usize) -> Option<&T::ULE> { + self.get_ule_ref(index) + } + fn zvl_len(&self) -> usize { + ZeroSlice::len(self) + } + fn zvl_as_borrowed(&self) -> &ZeroSlice<T> { + self + } + + #[inline] + fn zvl_get_as_t<R>(g: &Self::GetType, f: impl FnOnce(&T) -> R) -> R { + f(&T::from_unaligned(*g)) + } +} + +impl<'a, T> MutableZeroVecLike<'a, T> for ZeroVec<'a, T> +where + T: AsULE + Copy + 'static, +{ + type OwnedType = T; + fn zvl_insert(&mut self, index: usize, value: &T) { + self.with_mut(|v| v.insert(index, value.to_unaligned())) + } + fn zvl_remove(&mut self, index: usize) -> T { + T::from_unaligned(self.with_mut(|v| v.remove(index))) + } + fn zvl_replace(&mut self, index: usize, value: &T) -> T { + #[allow(clippy::indexing_slicing)] + let unaligned = self.with_mut(|vec| { + debug_assert!(index < vec.len()); + mem::replace(&mut vec[index], value.to_unaligned()) + }); + T::from_unaligned(unaligned) + } + fn zvl_push(&mut self, value: &T) { + self.with_mut(|v| v.push(value.to_unaligned())) + } + fn zvl_with_capacity(cap: usize) -> Self { + if cap == 0 { + ZeroVec::new() + } else { + ZeroVec::new_owned(Vec::with_capacity(cap)) + } + } + fn zvl_clear(&mut self) { + self.with_mut(|v| v.clear()) + } + fn zvl_reserve(&mut self, addl: usize) { + self.with_mut(|v| v.reserve(addl)) + } + + fn owned_as_t(o: &Self::OwnedType) -> &T { + o + } + + fn zvl_from_borrowed(b: &'a ZeroSlice<T>) -> Self { + b.as_zerovec() + } + fn zvl_as_borrowed_inner(&self) -> Option<&'a ZeroSlice<T>> { + self.as_maybe_borrowed() + } + + #[allow(clippy::indexing_slicing)] // documented panic + fn zvl_permute(&mut self, permutation: &mut [usize]) { + assert_eq!(permutation.len(), self.zvl_len()); + + let vec = self.to_mut_slice(); + + for cycle_start in 0..permutation.len() { + let mut curr = cycle_start; + let mut next = permutation[curr]; + + while next != cycle_start { + vec.swap(curr, next); + // Make curr a self-cycle so we don't use it as a cycle_start later + permutation[curr] = curr; + curr = next; + next = permutation[next]; + } + permutation[curr] = curr; + } + } +} + +impl<'a, T, F> ZeroVecLike<T> for VarZeroVec<'a, T, F> +where + T: VarULE, + T: ?Sized, + F: VarZeroVecFormat, +{ + type GetType = T; + type SliceVariant = VarZeroSlice<T, F>; + + fn zvl_new_borrowed() -> &'static Self::SliceVariant { + VarZeroSlice::<T, F>::new_empty() + } + fn zvl_binary_search(&self, k: &T) -> Result<usize, usize> + where + T: Ord, + { + self.binary_search(k) + } + fn zvl_binary_search_in_range(&self, k: &T, range: Range<usize>) -> Option<Result<usize, usize>> + where + T: Ord, + { + self.binary_search_in_range(k, range) + } + fn zvl_binary_search_by(&self, predicate: impl FnMut(&T) -> Ordering) -> Result<usize, usize> { + self.binary_search_by(predicate) + } + fn zvl_binary_search_in_range_by( + &self, + predicate: impl FnMut(&T) -> Ordering, + range: Range<usize>, + ) -> Option<Result<usize, usize>> { + self.binary_search_in_range_by(predicate, range) + } + fn zvl_get(&self, index: usize) -> Option<&T> { + self.get(index) + } + fn zvl_len(&self) -> usize { + self.len() + } + + fn zvl_as_borrowed(&self) -> &VarZeroSlice<T, F> { + self.as_slice() + } + + #[inline] + fn zvl_get_as_t<R>(g: &Self::GetType, f: impl FnOnce(&T) -> R) -> R { + f(g) + } +} + +impl<T, F> ZeroVecLike<T> for VarZeroSlice<T, F> +where + T: VarULE, + T: ?Sized, + F: VarZeroVecFormat, +{ + type GetType = T; + type SliceVariant = VarZeroSlice<T, F>; + + fn zvl_new_borrowed() -> &'static Self::SliceVariant { + VarZeroSlice::<T, F>::new_empty() + } + fn zvl_binary_search(&self, k: &T) -> Result<usize, usize> + where + T: Ord, + { + self.binary_search(k) + } + fn zvl_binary_search_in_range(&self, k: &T, range: Range<usize>) -> Option<Result<usize, usize>> + where + T: Ord, + { + self.binary_search_in_range(k, range) + } + fn zvl_binary_search_by(&self, predicate: impl FnMut(&T) -> Ordering) -> Result<usize, usize> { + self.binary_search_by(predicate) + } + fn zvl_binary_search_in_range_by( + &self, + predicate: impl FnMut(&T) -> Ordering, + range: Range<usize>, + ) -> Option<Result<usize, usize>> { + self.binary_search_in_range_by(predicate, range) + } + fn zvl_get(&self, index: usize) -> Option<&T> { + self.get(index) + } + fn zvl_len(&self) -> usize { + self.len() + } + + fn zvl_as_borrowed(&self) -> &VarZeroSlice<T, F> { + self + } + + #[inline] + fn zvl_get_as_t<R>(g: &Self::GetType, f: impl FnOnce(&T) -> R) -> R { + f(g) + } +} + +impl<'a, T, F> MutableZeroVecLike<'a, T> for VarZeroVec<'a, T, F> +where + T: VarULE, + T: ?Sized, + F: VarZeroVecFormat, +{ + type OwnedType = Box<T>; + fn zvl_insert(&mut self, index: usize, value: &T) { + self.make_mut().insert(index, value) + } + fn zvl_remove(&mut self, index: usize) -> Box<T> { + let vec = self.make_mut(); + debug_assert!(index < vec.len()); + #[allow(clippy::unwrap_used)] + let old = vec.get(index).unwrap().to_boxed(); + vec.remove(index); + old + } + fn zvl_replace(&mut self, index: usize, value: &T) -> Box<T> { + let vec = self.make_mut(); + debug_assert!(index < vec.len()); + #[allow(clippy::unwrap_used)] + let old = vec.get(index).unwrap().to_boxed(); + vec.replace(index, value); + old + } + fn zvl_push(&mut self, value: &T) { + let len = self.len(); + self.make_mut().insert(len, value) + } + fn zvl_with_capacity(cap: usize) -> Self { + if cap == 0 { + VarZeroVec::new() + } else { + VarZeroVec::Owned(VarZeroVecOwned::with_capacity(cap)) + } + } + fn zvl_clear(&mut self) { + self.make_mut().clear() + } + fn zvl_reserve(&mut self, addl: usize) { + self.make_mut().reserve(addl) + } + + fn owned_as_t(o: &Self::OwnedType) -> &T { + o + } + + fn zvl_from_borrowed(b: &'a VarZeroSlice<T, F>) -> Self { + b.as_varzerovec() + } + fn zvl_as_borrowed_inner(&self) -> Option<&'a VarZeroSlice<T, F>> { + if let VarZeroVec::Borrowed(b) = *self { + Some(b) + } else { + None + } + } + + #[allow(clippy::unwrap_used)] // documented panic + fn zvl_permute(&mut self, permutation: &mut [usize]) { + assert_eq!(permutation.len(), self.zvl_len()); + + let mut result = VarZeroVecOwned::new(); + for &i in permutation.iter() { + result.push(self.get(i).unwrap()); + } + *self = VarZeroVec::Owned(result); + } +} + +impl<'a> ZeroVecLike<usize> for FlexZeroVec<'a> { + type GetType = [u8]; + type SliceVariant = FlexZeroSlice; + + fn zvl_new_borrowed() -> &'static Self::SliceVariant { + FlexZeroSlice::new_empty() + } + fn zvl_binary_search(&self, k: &usize) -> Result<usize, usize> { + FlexZeroSlice::binary_search(self, *k) + } + fn zvl_binary_search_in_range( + &self, + k: &usize, + range: Range<usize>, + ) -> Option<Result<usize, usize>> { + FlexZeroSlice::binary_search_in_range(self, *k, range) + } + fn zvl_binary_search_by( + &self, + mut predicate: impl FnMut(&usize) -> Ordering, + ) -> Result<usize, usize> { + FlexZeroSlice::binary_search_by(self, |probe| predicate(&probe)) + } + fn zvl_binary_search_in_range_by( + &self, + mut predicate: impl FnMut(&usize) -> Ordering, + range: Range<usize>, + ) -> Option<Result<usize, usize>> { + FlexZeroSlice::binary_search_in_range_by(self, |probe| predicate(&probe), range) + } + fn zvl_get(&self, index: usize) -> Option<&[u8]> { + self.get_chunk(index) + } + fn zvl_len(&self) -> usize { + FlexZeroSlice::len(self) + } + + fn zvl_as_borrowed(&self) -> &FlexZeroSlice { + self + } + + #[inline] + fn zvl_get_as_t<R>(g: &[u8], f: impl FnOnce(&usize) -> R) -> R { + f(&crate::chunk_to_usize(g, g.len())) + } +} + +impl ZeroVecLike<usize> for FlexZeroSlice { + type GetType = [u8]; + type SliceVariant = FlexZeroSlice; + + fn zvl_new_borrowed() -> &'static Self::SliceVariant { + FlexZeroSlice::new_empty() + } + fn zvl_binary_search(&self, k: &usize) -> Result<usize, usize> { + FlexZeroSlice::binary_search(self, *k) + } + fn zvl_binary_search_in_range( + &self, + k: &usize, + range: Range<usize>, + ) -> Option<Result<usize, usize>> { + FlexZeroSlice::binary_search_in_range(self, *k, range) + } + fn zvl_binary_search_by( + &self, + mut predicate: impl FnMut(&usize) -> Ordering, + ) -> Result<usize, usize> { + FlexZeroSlice::binary_search_by(self, |probe| predicate(&probe)) + } + fn zvl_binary_search_in_range_by( + &self, + mut predicate: impl FnMut(&usize) -> Ordering, + range: Range<usize>, + ) -> Option<Result<usize, usize>> { + FlexZeroSlice::binary_search_in_range_by(self, |probe| predicate(&probe), range) + } + fn zvl_get(&self, index: usize) -> Option<&[u8]> { + self.get_chunk(index) + } + fn zvl_len(&self) -> usize { + FlexZeroSlice::len(self) + } + + fn zvl_as_borrowed(&self) -> &FlexZeroSlice { + self + } + + #[inline] + fn zvl_get_as_t<R>(g: &Self::GetType, f: impl FnOnce(&usize) -> R) -> R { + f(&crate::chunk_to_usize(g, g.len())) + } +} + +impl<'a> MutableZeroVecLike<'a, usize> for FlexZeroVec<'a> { + type OwnedType = usize; + fn zvl_insert(&mut self, index: usize, value: &usize) { + self.to_mut().insert(index, *value) + } + fn zvl_remove(&mut self, index: usize) -> usize { + self.to_mut().remove(index) + } + fn zvl_replace(&mut self, index: usize, value: &usize) -> usize { + // TODO(#2028): Make this a single operation instead of two operations. + let mutable = self.to_mut(); + let old_value = mutable.remove(index); + mutable.insert(index, *value); + old_value + } + fn zvl_push(&mut self, value: &usize) { + self.to_mut().push(*value) + } + fn zvl_with_capacity(_cap: usize) -> Self { + // There is no `FlexZeroVec::with_capacity()` because it is variable-width + FlexZeroVec::Owned(FlexZeroVecOwned::new_empty()) + } + fn zvl_clear(&mut self) { + self.to_mut().clear() + } + fn zvl_reserve(&mut self, _addl: usize) { + // There is no `FlexZeroVec::reserve()` because it is variable-width + } + + fn owned_as_t(o: &Self::OwnedType) -> &usize { + o + } + + fn zvl_from_borrowed(b: &'a FlexZeroSlice) -> Self { + b.as_flexzerovec() + } + fn zvl_as_borrowed_inner(&self) -> Option<&'a FlexZeroSlice> { + if let FlexZeroVec::Borrowed(b) = *self { + Some(b) + } else { + None + } + } + + #[allow(clippy::unwrap_used)] // documented panic + fn zvl_permute(&mut self, permutation: &mut [usize]) { + assert_eq!(permutation.len(), self.zvl_len()); + *self = permutation.iter().map(|&i| self.get(i).unwrap()).collect(); + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_zerovec_binary_search_in_range() { + let zv: ZeroVec<u16> = ZeroVec::from_slice_or_alloc(&[11, 22, 33, 44, 55, 66, 77]); + + // Full range search + assert_eq!(zv.zvl_binary_search_in_range(&11, 0..7), Some(Ok(0))); + assert_eq!(zv.zvl_binary_search_in_range(&12, 0..7), Some(Err(1))); + assert_eq!(zv.zvl_binary_search_in_range(&44, 0..7), Some(Ok(3))); + assert_eq!(zv.zvl_binary_search_in_range(&45, 0..7), Some(Err(4))); + assert_eq!(zv.zvl_binary_search_in_range(&77, 0..7), Some(Ok(6))); + assert_eq!(zv.zvl_binary_search_in_range(&78, 0..7), Some(Err(7))); + + // Out-of-range search + assert_eq!(zv.zvl_binary_search_in_range(&44, 0..2), Some(Err(2))); + assert_eq!(zv.zvl_binary_search_in_range(&44, 5..7), Some(Err(0))); + + // Offset search + assert_eq!(zv.zvl_binary_search_in_range(&44, 2..5), Some(Ok(1))); + assert_eq!(zv.zvl_binary_search_in_range(&45, 2..5), Some(Err(2))); + + // Out-of-bounds + assert_eq!(zv.zvl_binary_search_in_range(&44, 0..100), None); + assert_eq!(zv.zvl_binary_search_in_range(&44, 100..200), None); + } + + #[test] + fn test_permute() { + let mut zv: ZeroVec<u16> = ZeroVec::from_slice_or_alloc(&[11, 22, 33, 44, 55, 66, 77]); + let mut permutation = vec![3, 2, 1, 0, 6, 5, 4]; + zv.zvl_permute(&mut permutation); + assert_eq!(&zv, &[44, 33, 22, 11, 77, 66, 55]); + + let mut vzv: VarZeroVec<str> = VarZeroVec::Owned( + VarZeroVecOwned::try_from_elements(&["11", "22", "33", "44", "55", "66", "77"]) + .unwrap(), + ); + let mut permutation = vec![3, 2, 1, 0, 6, 5, 4]; + vzv.zvl_permute(&mut permutation); + assert_eq!(&vzv, &["44", "33", "22", "11", "77", "66", "55"]); + + let mut fzv: FlexZeroVec = [11, 22, 33, 44, 55, 66, 77].into_iter().collect(); + let mut permutation = vec![3, 2, 1, 0, 6, 5, 4]; + fzv.zvl_permute(&mut permutation); + assert_eq!( + fzv.iter().collect::<Vec<_>>(), + [44, 33, 22, 11, 77, 66, 55].into_iter().collect::<Vec<_>>() + ); + } +} diff --git a/third_party/rust/zerovec/src/map2d/borrowed.rs b/third_party/rust/zerovec/src/map2d/borrowed.rs new file mode 100644 index 0000000000..166f1be743 --- /dev/null +++ b/third_party/rust/zerovec/src/map2d/borrowed.rs @@ -0,0 +1,339 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::ZeroSlice; + +use core::cmp::Ordering; +use core::fmt; + +use crate::map::ZeroMapKV; +use crate::map::ZeroVecLike; +use crate::map2d::ZeroMap2dCursor; + +/// A borrowed-only version of [`ZeroMap2d`](super::ZeroMap2d) +/// +/// This is useful for fully-zero-copy deserialization from non-human-readable +/// serialization formats. It also has the advantage that it can return references that live for +/// the lifetime of the backing buffer as opposed to that of the [`ZeroMap2dBorrowed`] instance. +/// +/// # Examples +/// +/// ``` +/// use zerovec::maps::ZeroMap2dBorrowed; +/// +/// // Example byte buffer representing the map { 1: {2: "three" } } +/// let BINCODE_BYTES: &[u8; 51] = &[ +/// 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, 4, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, +/// 0, 0, 0, 0, 0, 0, 2, 0, 11, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 116, +/// 104, 114, 101, 101, +/// ]; +/// +/// // Deserializing to ZeroMap2d requires no heap allocations. +/// let zero_map: ZeroMap2dBorrowed<u16, u16, str> = +/// bincode::deserialize(BINCODE_BYTES) +/// .expect("Should deserialize successfully"); +/// assert_eq!(zero_map.get_2d(&1, &2), Some("three")); +/// ``` +/// +/// This can be obtained from a [`ZeroMap2d`](super::ZeroMap2d) via [`ZeroMap2d::as_borrowed`](super::ZeroMap2d::as_borrowed) +pub struct ZeroMap2dBorrowed<'a, K0, K1, V> +where + K0: ZeroMapKV<'a>, + K1: ZeroMapKV<'a>, + V: ZeroMapKV<'a>, + K0: ?Sized, + K1: ?Sized, + V: ?Sized, +{ + pub(crate) keys0: &'a K0::Slice, + pub(crate) joiner: &'a ZeroSlice<u32>, + pub(crate) keys1: &'a K1::Slice, + pub(crate) values: &'a V::Slice, +} + +impl<'a, K0, K1, V> Copy for ZeroMap2dBorrowed<'a, K0, K1, V> +where + K0: ZeroMapKV<'a>, + K1: ZeroMapKV<'a>, + V: ZeroMapKV<'a>, + K0: ?Sized, + K1: ?Sized, + V: ?Sized, +{ +} + +impl<'a, K0, K1, V> Clone for ZeroMap2dBorrowed<'a, K0, K1, V> +where + K0: ZeroMapKV<'a>, + K1: ZeroMapKV<'a>, + V: ZeroMapKV<'a>, + K0: ?Sized, + K1: ?Sized, + V: ?Sized, +{ + fn clone(&self) -> Self { + *self + } +} + +impl<'a, K0, K1, V> Default for ZeroMap2dBorrowed<'a, K0, K1, V> +where + K0: ZeroMapKV<'a>, + K1: ZeroMapKV<'a>, + V: ZeroMapKV<'a>, + K0::Slice: 'static, + K1::Slice: 'static, + V::Slice: 'static, + K0: ?Sized, + K1: ?Sized, + V: ?Sized, +{ + fn default() -> Self { + Self::new() + } +} + +impl<'a, K0, K1, V> ZeroMap2dBorrowed<'a, K0, K1, V> +where + K0: ZeroMapKV<'a>, + K1: ZeroMapKV<'a>, + V: ZeroMapKV<'a>, + K0::Slice: 'static, + K1::Slice: 'static, + V::Slice: 'static, + K0: ?Sized, + K1: ?Sized, + V: ?Sized, +{ + /// Creates a new, empty `ZeroMap2dBorrowed<K0, K1, V>`. + /// + /// Note: Since [`ZeroMap2dBorrowed`] is not mutable, the return value will be a stub unless + /// converted into a [`ZeroMap2d`](super::ZeroMap2d). + /// + /// # Examples + /// + /// ``` + /// use zerovec::maps::ZeroMap2dBorrowed; + /// + /// let zm: ZeroMap2dBorrowed<u16, u16, str> = ZeroMap2dBorrowed::new(); + /// assert!(zm.is_empty()); + /// ``` + pub fn new() -> Self { + Self { + keys0: K0::Container::zvl_new_borrowed(), + joiner: Default::default(), + keys1: K1::Container::zvl_new_borrowed(), + values: V::Container::zvl_new_borrowed(), + } + } +} + +impl<'a, K0, K1, V> ZeroMap2dBorrowed<'a, K0, K1, V> +where + K0: ZeroMapKV<'a>, + K1: ZeroMapKV<'a>, + V: ZeroMapKV<'a>, + K0: ?Sized, + K1: ?Sized, + V: ?Sized, +{ + #[doc(hidden)] // databake internal + pub const unsafe fn from_parts_unchecked( + keys0: &'a K0::Slice, + joiner: &'a ZeroSlice<u32>, + keys1: &'a K1::Slice, + values: &'a V::Slice, + ) -> Self { + Self { + keys0, + joiner, + keys1, + values, + } + } + + /// The number of elements in the [`ZeroMap2dBorrowed`] + pub fn len(&self) -> usize { + self.values.zvl_len() + } + + /// Whether the [`ZeroMap2dBorrowed`] is empty + pub fn is_empty(&self) -> bool { + self.values.zvl_len() == 0 + } +} + +impl<'a, K0, K1, V> ZeroMap2dBorrowed<'a, K0, K1, V> +where + K0: ZeroMapKV<'a> + Ord, + K1: ZeroMapKV<'a> + Ord, + V: ZeroMapKV<'a>, + K0: ?Sized, + K1: ?Sized, + V: ?Sized, +{ + /// Get the value associated with `key0` and `key1`, if it exists. + /// + /// This is able to return values that live longer than the map itself + /// since they borrow directly from the backing buffer. This is the + /// primary advantage of using [`ZeroMap2dBorrowed`](super::ZeroMap2dBorrowed) over [`ZeroMap2d`](super::ZeroMap2d). + /// + /// ```rust + /// use zerovec::maps::ZeroMap2dBorrowed; + /// use zerovec::ZeroMap2d; + /// + /// let mut map = ZeroMap2d::new(); + /// map.insert(&1, "one", "foo"); + /// map.insert(&2, "one", "bar"); + /// map.insert(&2, "two", "baz"); + /// + /// let borrowed = map.as_borrowed(); + /// assert_eq!(borrowed.get_2d(&1, "one"), Some("foo")); + /// assert_eq!(borrowed.get_2d(&1, "two"), None); + /// assert_eq!(borrowed.get_2d(&2, "one"), Some("bar")); + /// assert_eq!(borrowed.get_2d(&2, "two"), Some("baz")); + /// assert_eq!(borrowed.get_2d(&3, "three"), None); + /// ``` + pub fn get_2d(&self, key0: &K0, key1: &K1) -> Option<&'a V::GetType> { + self.get0(key0)?.get1(key1) + } +} + +impl<'a, K0, K1, V> ZeroMap2dBorrowed<'a, K0, K1, V> +where + K0: ZeroMapKV<'a> + Ord, + K1: ZeroMapKV<'a>, + V: ZeroMapKV<'a>, + K0: ?Sized, + K1: ?Sized, + V: ?Sized, +{ + /// Gets a cursor for `key0`. If `None`, then `key0` is not in the map. If `Some`, + /// then `key0` is in the map, and `key1` can be queried. + /// + /// ```rust + /// use zerovec::maps::ZeroMap2dBorrowed; + /// use zerovec::ZeroMap2d; + /// + /// let mut map = ZeroMap2d::new(); + /// map.insert(&1, "one", "foo"); + /// map.insert(&2, "two", "bar"); + /// let borrowed = map.as_borrowed(); + /// assert!(matches!(borrowed.get0(&1), Some(_))); + /// assert!(matches!(borrowed.get0(&3), None)); + /// ``` + #[inline] + pub fn get0<'l>(&'l self, key0: &K0) -> Option<ZeroMap2dCursor<'a, 'a, K0, K1, V>> { + let key0_index = self.keys0.zvl_binary_search(key0).ok()?; + Some(ZeroMap2dCursor::from_borrowed(self, key0_index)) + } + + /// Binary search the map for `key0`, returning a cursor. + /// + /// ```rust + /// use zerovec::maps::ZeroMap2dBorrowed; + /// use zerovec::ZeroMap2d; + /// + /// let mut map = ZeroMap2d::new(); + /// map.insert(&1, "one", "foo"); + /// map.insert(&2, "two", "bar"); + /// let borrowed = map.as_borrowed(); + /// assert!(matches!(borrowed.get0_by(|probe| probe.cmp(&1)), Some(_))); + /// assert!(matches!(borrowed.get0_by(|probe| probe.cmp(&3)), None)); + /// ``` + pub fn get0_by<'l>( + &'l self, + predicate: impl FnMut(&K0) -> Ordering, + ) -> Option<ZeroMap2dCursor<'a, 'a, K0, K1, V>> { + let key0_index = self.keys0.zvl_binary_search_by(predicate).ok()?; + Some(ZeroMap2dCursor::from_borrowed(self, key0_index)) + } + + /// Returns whether `key0` is contained in this map + /// + /// ```rust + /// use zerovec::maps::ZeroMap2dBorrowed; + /// use zerovec::ZeroMap2d; + /// + /// let mut map = ZeroMap2d::new(); + /// map.insert(&1, "one", "foo"); + /// map.insert(&2, "two", "bar"); + /// let borrowed = map.as_borrowed(); + /// assert!(borrowed.contains_key0(&1)); + /// assert!(!borrowed.contains_key0(&3)); + /// ``` + pub fn contains_key0(&self, key0: &K0) -> bool { + self.keys0.zvl_binary_search(key0).is_ok() + } +} + +impl<'a, K0, K1, V> ZeroMap2dBorrowed<'a, K0, K1, V> +where + K0: ZeroMapKV<'a>, + K1: ZeroMapKV<'a>, + V: ZeroMapKV<'a>, + K0: ?Sized, + K1: ?Sized, + V: ?Sized, +{ + /// Produce an ordered iterator over keys0 + pub fn iter0<'l>(&'l self) -> impl Iterator<Item = ZeroMap2dCursor<'a, 'a, K0, K1, V>> + '_ { + (0..self.keys0.zvl_len()).map(move |idx| ZeroMap2dCursor::from_borrowed(self, idx)) + } +} + +impl<'a, K0, K1, V> ZeroMap2dBorrowed<'a, K0, K1, V> +where + K0: ZeroMapKV<'a> + Ord, + K1: ZeroMapKV<'a> + Ord, + V: ZeroMapKV<'a>, + V: Copy, + K0: ?Sized, + K1: ?Sized, +{ + /// For cases when `V` is fixed-size, obtain a direct copy of `V` instead of `V::ULE` + pub fn get_copied_2d(&self, key0: &K0, key1: &K1) -> Option<V> { + self.get0(key0)?.get1_copied(key1) + } +} + +// We can't use the default PartialEq because ZeroMap2d is invariant +// so otherwise rustc will not automatically allow you to compare ZeroMaps +// with different lifetimes +impl<'a, 'b, K0, K1, V> PartialEq<ZeroMap2dBorrowed<'b, K0, K1, V>> + for ZeroMap2dBorrowed<'a, K0, K1, V> +where + K0: for<'c> ZeroMapKV<'c> + ?Sized, + K1: for<'c> ZeroMapKV<'c> + ?Sized, + V: for<'c> ZeroMapKV<'c> + ?Sized, + <K0 as ZeroMapKV<'a>>::Slice: PartialEq<<K0 as ZeroMapKV<'b>>::Slice>, + <K1 as ZeroMapKV<'a>>::Slice: PartialEq<<K1 as ZeroMapKV<'b>>::Slice>, + <V as ZeroMapKV<'a>>::Slice: PartialEq<<V as ZeroMapKV<'b>>::Slice>, +{ + fn eq(&self, other: &ZeroMap2dBorrowed<'b, K0, K1, V>) -> bool { + self.keys0.eq(other.keys0) + && self.joiner.eq(other.joiner) + && self.keys1.eq(other.keys1) + && self.values.eq(other.values) + } +} + +impl<'a, K0, K1, V> fmt::Debug for ZeroMap2dBorrowed<'a, K0, K1, V> +where + K0: ZeroMapKV<'a> + ?Sized, + K1: ZeroMapKV<'a> + ?Sized, + V: ZeroMapKV<'a> + ?Sized, + K0::Slice: fmt::Debug, + K1::Slice: fmt::Debug, + V::Slice: fmt::Debug, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { + f.debug_struct("ZeroMap2dBorrowed") + .field("keys0", &self.keys0) + .field("joiner", &self.joiner) + .field("keys1", &self.keys1) + .field("values", &self.values) + .finish() + } +} diff --git a/third_party/rust/zerovec/src/map2d/cursor.rs b/third_party/rust/zerovec/src/map2d/cursor.rs new file mode 100644 index 0000000000..4802187bec --- /dev/null +++ b/third_party/rust/zerovec/src/map2d/cursor.rs @@ -0,0 +1,358 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::{ZeroMap2d, ZeroSlice}; + +use core::cmp::Ordering; +use core::fmt; +use core::ops::Range; + +use crate::map::ZeroMapKV; +use crate::map::ZeroVecLike; + +use super::ZeroMap2dBorrowed; + +/// An intermediate state of queries over [`ZeroMap2d`] and [`ZeroMap2dBorrowed`]. +pub struct ZeroMap2dCursor<'l, 'a, K0, K1, V> +where + K0: ZeroMapKV<'a>, + K1: ZeroMapKV<'a>, + V: ZeroMapKV<'a>, + K0: ?Sized, + K1: ?Sized, + V: ?Sized, +{ + // Invariant: these fields have the same invariants as they do in ZeroMap2d + keys0: &'l K0::Slice, + joiner: &'l ZeroSlice<u32>, + keys1: &'l K1::Slice, + values: &'l V::Slice, + // Invariant: key0_index is in range + key0_index: usize, +} + +impl<'a, K0, K1, V> ZeroMap2dCursor<'a, 'a, K0, K1, V> +where + K0: ZeroMapKV<'a>, + K1: ZeroMapKV<'a>, + V: ZeroMapKV<'a>, + K0: ?Sized, + K1: ?Sized, + V: ?Sized, +{ + /// `key0_index` must be in range + pub(crate) fn from_borrowed( + borrowed: &ZeroMap2dBorrowed<'a, K0, K1, V>, + key0_index: usize, + ) -> Self { + debug_assert!(key0_index < borrowed.joiner.len()); + ZeroMap2dCursor { + keys0: borrowed.keys0, + joiner: borrowed.joiner, + keys1: borrowed.keys1, + values: borrowed.values, + key0_index, + } + } +} + +impl<'l, 'a, K0, K1, V> ZeroMap2dCursor<'l, 'a, K0, K1, V> +where + K0: ZeroMapKV<'a>, + K1: ZeroMapKV<'a>, + V: ZeroMapKV<'a>, + K0: ?Sized, + K1: ?Sized, + V: ?Sized, +{ + /// `key0_index` must be in range + pub(crate) fn from_cow(cow: &'l ZeroMap2d<'a, K0, K1, V>, key0_index: usize) -> Self { + debug_assert!(key0_index < cow.joiner.len()); + Self { + keys0: cow.keys0.zvl_as_borrowed(), + joiner: &cow.joiner, + keys1: cow.keys1.zvl_as_borrowed(), + values: cow.values.zvl_as_borrowed(), + key0_index, + } + } + + /// Returns the key0 corresponding to the cursor position. + /// + /// ```rust + /// use zerovec::ZeroMap2d; + /// + /// let mut map = ZeroMap2d::new(); + /// map.insert("one", &1u32, "foo"); + /// assert_eq!(map.get0("one").unwrap().key0(), "one"); + /// ``` + pub fn key0(&self) -> &'l K0::GetType { + #[allow(clippy::unwrap_used)] // safe by invariant on `self.key0_index` + self.keys0.zvl_get(self.key0_index).unwrap() + } + + /// Borrow an ordered iterator over keys1 and values for a particular key0. + /// + /// To get the values as copy types, see [`Self::iter1_copied`]. + /// + /// For an example, see [`ZeroMap2d::iter0()`]. + pub fn iter1( + &self, + ) -> impl Iterator< + Item = ( + &'l <K1 as ZeroMapKV<'a>>::GetType, + &'l <V as ZeroMapKV<'a>>::GetType, + ), + > + '_ { + let range = self.get_range(); + #[allow(clippy::unwrap_used)] // `self.get_range()` returns a valid range + range.map(move |idx| { + ( + self.keys1.zvl_get(idx).unwrap(), + self.values.zvl_get(idx).unwrap(), + ) + }) + } + + /// Transform this cursor into an ordered iterator over keys1 for a particular key0. + pub fn into_iter1( + self, + ) -> impl Iterator< + Item = ( + &'l <K1 as ZeroMapKV<'a>>::GetType, + &'l <V as ZeroMapKV<'a>>::GetType, + ), + > { + let range = self.get_range(); + #[allow(clippy::unwrap_used)] // `self.get_range()` returns a valid range + range.map(move |idx| { + ( + self.keys1.zvl_get(idx).unwrap(), + self.values.zvl_get(idx).unwrap(), + ) + }) + } + + /// Given key0_index, returns the corresponding range of keys1, which will be valid + pub(super) fn get_range(&self) -> Range<usize> { + debug_assert!(self.key0_index < self.joiner.len()); + let start = if self.key0_index == 0 { + 0 + } else { + #[allow(clippy::unwrap_used)] // protected by the debug_assert above + self.joiner.get(self.key0_index - 1).unwrap() + }; + #[allow(clippy::unwrap_used)] // protected by the debug_assert above + let limit = self.joiner.get(self.key0_index).unwrap(); + // These two assertions are true based on the invariants of ZeroMap2d + debug_assert!(start < limit); + debug_assert!((limit as usize) <= self.values.zvl_len()); + (start as usize)..(limit as usize) + } +} + +impl<'l, 'a, K0, K1, V> ZeroMap2dCursor<'l, 'a, K0, K1, V> +where + K0: ZeroMapKV<'a>, + K1: ZeroMapKV<'a>, + V: ZeroMapKV<'a>, + K0: ?Sized, + K1: ?Sized, + V: Copy, +{ + /// Borrow an ordered iterator over keys1 and values for a particular key0. + /// + /// The values are returned as copy types. + /// + /// # Examples + /// + /// ``` + /// use zerovec::ZeroMap2d; + /// + /// let zm2d: ZeroMap2d<str, u8, usize> = [ + /// ("a", 0u8, 1usize), + /// ("b", 1u8, 1000usize), + /// ("b", 2u8, 2000usize), + /// ] + /// .into_iter() + /// .collect(); + /// + /// let mut total_value = 0; + /// + /// for cursor in zm2d.iter0() { + /// for (_, value) in cursor.iter1_copied() { + /// total_value += value; + /// } + /// } + /// + /// assert_eq!(total_value, 3001); + /// ``` + pub fn iter1_copied( + &self, + ) -> impl Iterator<Item = (&'l <K1 as ZeroMapKV<'a>>::GetType, V)> + '_ { + let range = self.get_range(); + #[allow(clippy::unwrap_used)] // `self.get_range()` returns a valid range + range.map(move |idx| { + ( + self.keys1.zvl_get(idx).unwrap(), + self.get1_copied_at(idx).unwrap(), + ) + }) + } + + fn get1_copied_at(&self, index: usize) -> Option<V> { + let ule = self.values.zvl_get(index)?; + let mut result = Option::<V>::None; + V::Container::zvl_get_as_t(ule, |v| result.replace(*v)); + #[allow(clippy::unwrap_used)] // `zvl_get_as_t` guarantees that the callback is invoked + Some(result.unwrap()) + } +} + +impl<'l, 'a, K0, K1, V> ZeroMap2dCursor<'l, 'a, K0, K1, V> +where + K0: ZeroMapKV<'a>, + K1: ZeroMapKV<'a> + Ord, + V: ZeroMapKV<'a>, + K0: ?Sized, + K1: ?Sized, + V: ?Sized, +{ + /// Gets the value for a key1 from this cursor, or `None` if key1 is not in the map. + /// + /// ```rust + /// use zerovec::ZeroMap2d; + /// + /// let mut map = ZeroMap2d::new(); + /// map.insert("one", &1u32, "foo"); + /// assert_eq!(map.get0("one").unwrap().get1(&1), Some("foo")); + /// assert_eq!(map.get0("one").unwrap().get1(&2), None); + /// ``` + pub fn get1(&self, key1: &K1) -> Option<&'l V::GetType> { + let key1_index = self.get_key1_index(key1)?; + #[allow(clippy::unwrap_used)] // key1_index is valid + Some(self.values.zvl_get(key1_index).unwrap()) + } + + /// Gets the value for a predicate from this cursor, or `None` if key1 is not in the map. + /// + /// ```rust + /// use zerovec::ZeroMap2d; + /// + /// let mut map = ZeroMap2d::new(); + /// map.insert("one", &1u32, "foo"); + /// assert_eq!(map.get0("one").unwrap().get1_by(|v| v.cmp(&1)), Some("foo")); + /// assert_eq!(map.get0("one").unwrap().get1_by(|v| v.cmp(&2)), None); + /// ``` + pub fn get1_by(&self, predicate: impl FnMut(&K1) -> Ordering) -> Option<&'l V::GetType> { + let key1_index = self.get_key1_index_by(predicate)?; + #[allow(clippy::unwrap_used)] // key1_index is valid + Some(self.values.zvl_get(key1_index).unwrap()) + } + + /// Given key0_index and predicate, returns the index into the values array + fn get_key1_index_by(&self, predicate: impl FnMut(&K1) -> Ordering) -> Option<usize> { + let range = self.get_range(); + debug_assert!(range.start < range.end); // '<' because every key0 should have a key1 + debug_assert!(range.end <= self.keys1.zvl_len()); + let start = range.start; + #[allow(clippy::expect_used)] // protected by the debug_assert above + let binary_search_result = self + .keys1 + .zvl_binary_search_in_range_by(predicate, range) + .expect("in-bounds range"); + binary_search_result.ok().map(move |s| s + start) + } + + /// Given key0_index and key1, returns the index into the values array + fn get_key1_index(&self, key1: &K1) -> Option<usize> { + let range = self.get_range(); + debug_assert!(range.start < range.end); // '<' because every key0 should have a key1 + debug_assert!(range.end <= self.keys1.zvl_len()); + let start = range.start; + #[allow(clippy::expect_used)] // protected by the debug_assert above + let binary_search_result = self + .keys1 + .zvl_binary_search_in_range(key1, range) + .expect("in-bounds range"); + binary_search_result.ok().map(move |s| s + start) + } +} + +impl<'l, 'a, K0, K1, V> ZeroMap2dCursor<'l, 'a, K0, K1, V> +where + K0: ZeroMapKV<'a>, + K1: ZeroMapKV<'a> + Ord, + V: ZeroMapKV<'a>, + V: Copy, + K0: ?Sized, + K1: ?Sized, +{ + /// For cases when `V` is fixed-size, obtain a direct copy of `V` instead of `V::ULE` + /// + /// ```rust + /// use zerovec::ZeroMap2d; + /// + /// let mut map: ZeroMap2d<u16, u16, u16> = ZeroMap2d::new(); + /// map.insert(&1, &2, &3); + /// map.insert(&1, &4, &5); + /// map.insert(&6, &7, &8); + /// + /// assert_eq!(map.get0(&6).unwrap().get1_copied(&7), Some(8)); + /// ``` + #[inline] + pub fn get1_copied(&self, key1: &K1) -> Option<V> { + let key1_index = self.get_key1_index(key1)?; + self.get1_copied_at(key1_index) + } + + /// For cases when `V` is fixed-size, obtain a direct copy of `V` instead of `V::ULE` + #[inline] + pub fn get1_copied_by(&self, predicate: impl FnMut(&K1) -> Ordering) -> Option<V> { + let key1_index = self.get_key1_index_by(predicate)?; + self.get1_copied_at(key1_index) + } +} + +// We can't use the default PartialEq because ZeroMap2d is invariant +// so otherwise rustc will not automatically allow you to compare ZeroMaps +// with different lifetimes +impl<'m, 'n, 'a, 'b, K0, K1, V> PartialEq<ZeroMap2dCursor<'n, 'b, K0, K1, V>> + for ZeroMap2dCursor<'m, 'a, K0, K1, V> +where + K0: for<'c> ZeroMapKV<'c> + ?Sized, + K1: for<'c> ZeroMapKV<'c> + ?Sized, + V: for<'c> ZeroMapKV<'c> + ?Sized, + <K0 as ZeroMapKV<'a>>::Slice: PartialEq<<K0 as ZeroMapKV<'b>>::Slice>, + <K1 as ZeroMapKV<'a>>::Slice: PartialEq<<K1 as ZeroMapKV<'b>>::Slice>, + <V as ZeroMapKV<'a>>::Slice: PartialEq<<V as ZeroMapKV<'b>>::Slice>, +{ + fn eq(&self, other: &ZeroMap2dCursor<'n, 'b, K0, K1, V>) -> bool { + self.keys0.eq(other.keys0) + && self.joiner.eq(other.joiner) + && self.keys1.eq(other.keys1) + && self.values.eq(other.values) + && self.key0_index.eq(&other.key0_index) + } +} + +impl<'l, 'a, K0, K1, V> fmt::Debug for ZeroMap2dCursor<'l, 'a, K0, K1, V> +where + K0: ZeroMapKV<'a> + ?Sized, + K1: ZeroMapKV<'a> + ?Sized, + V: ZeroMapKV<'a> + ?Sized, + K0::Slice: fmt::Debug, + K1::Slice: fmt::Debug, + V::Slice: fmt::Debug, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { + f.debug_struct("ZeroMap2d") + .field("keys0", &self.keys0) + .field("joiner", &self.joiner) + .field("keys1", &self.keys1) + .field("values", &self.values) + .field("key0_index", &self.key0_index) + .finish() + } +} diff --git a/third_party/rust/zerovec/src/map2d/databake.rs b/third_party/rust/zerovec/src/map2d/databake.rs new file mode 100644 index 0000000000..c5b9aca546 --- /dev/null +++ b/third_party/rust/zerovec/src/map2d/databake.rs @@ -0,0 +1,110 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::{maps::ZeroMap2dBorrowed, maps::ZeroMapKV, ZeroMap2d}; +use databake::*; + +impl<'a, K0, K1, V> Bake for ZeroMap2d<'a, K0, K1, V> +where + K0: ZeroMapKV<'a> + ?Sized, + K1: ZeroMapKV<'a> + ?Sized, + V: ZeroMapKV<'a> + ?Sized, + K0::Container: Bake, + K1::Container: Bake, + V::Container: Bake, +{ + fn bake(&self, env: &CrateEnv) -> TokenStream { + env.insert("zerovec"); + let keys0 = self.keys0.bake(env); + let joiner = self.joiner.bake(env); + let keys1 = self.keys1.bake(env); + let values = self.values.bake(env); + quote! { unsafe { #[allow(unused_unsafe)] zerovec::ZeroMap2d::from_parts_unchecked(#keys0, #joiner, #keys1, #values) } } + } +} + +impl<'a, K0, K1, V> Bake for ZeroMap2dBorrowed<'a, K0, K1, V> +where + K0: ZeroMapKV<'a> + ?Sized, + K1: ZeroMapKV<'a> + ?Sized, + V: ZeroMapKV<'a> + ?Sized, + &'a K0::Slice: Bake, + &'a K1::Slice: Bake, + &'a V::Slice: Bake, +{ + fn bake(&self, env: &CrateEnv) -> TokenStream { + env.insert("zerovec"); + let keys0 = self.keys0.bake(env); + let joiner = self.joiner.bake(env); + let keys1 = self.keys1.bake(env); + let values = self.values.bake(env); + quote! { unsafe { #[allow(unused_unsafe)] zerovec::maps::ZeroMap2dBorrowed::from_parts_unchecked(#keys0, #joiner, #keys1, #values) } } + } +} + +#[test] +fn test_baked_map() { + test_bake!( + ZeroMap2d<str, str, str>, + const: unsafe { + #[allow(unused_unsafe)] + crate::ZeroMap2d::from_parts_unchecked( + unsafe { + crate::VarZeroVec::from_bytes_unchecked( + b"\x0E\0\0\0\0\0\x05\0\x07\0\t\0\x0B\0\x10\0\x12\0\x14\0\x1C\0\x1E\0#\0%\0'\0,\0arcazcuenffgrckkkukylifmanmnpapalsdtgugunruzyuezh" + ) + }, + unsafe { + crate::ZeroVec::from_bytes_unchecked( + b"\x02\0\0\0\x03\0\0\0\x04\0\0\0\x05\0\0\0\x06\0\0\0\x07\0\0\0\x08\0\0\0\n\0\0\0\x0C\0\0\0\r\0\0\0\x0E\0\0\0\x0F\0\0\0\x10\0\0\0\x11\0\0\0\x14\0\0\0\x15\0\0\0\x16\0\0\0\x17\0\0\0\x18\0\0\0\x19\0\0\0\x1C\0\0\0" + ) + }, + unsafe { + crate::VarZeroVec::from_bytes_unchecked( + b"\x1C\0\0\0\0\0\x04\0\x08\0\x0C\0\x10\0\x14\0\x18\0\x1C\0 \0$\0(\0,\x000\x004\08\0<\0@\0D\0H\0L\0P\0T\0X\0\\\0`\0d\0h\0l\0NbatPalmArabGlagShawAdlmLinbArabArabYeziArabLatnLimbNkooMongArabPhlpDevaKhojSindArabCyrlDevaArabHansBopoHanbHant" + ) + }, + unsafe { + crate::VarZeroVec::from_bytes_unchecked( + b"\x1C\0\0\0\0\0\x02\0\x04\0\x06\0\x08\0\n\0\x0C\0\x0E\0\x10\0\x12\0\x14\0\x16\0\x18\0\x1A\0\x1C\0\x1E\0 \0\"\0$\0&\0(\0*\0,\0.\x000\x002\x004\x006\0JOSYIRBGGBGNGRCNIQGECNTRINGNCNPKCNINININPKKZNPAFCNTWTWTW" + ) + }, + ) + }, + zerovec + ); +} + +#[test] +fn test_baked_borrowed_map() { + test_bake!( + ZeroMap2dBorrowed<str, str, str>, + const: unsafe { + #[allow(unused_unsafe)] + crate::maps::ZeroMap2dBorrowed::from_parts_unchecked( + unsafe { + crate::VarZeroSlice::from_bytes_unchecked( + b"\x0E\0\0\0\0\0\x05\0\x07\0\t\0\x0B\0\x10\0\x12\0\x14\0\x1C\0\x1E\0#\0%\0'\0,\0arcazcuenffgrckkkukylifmanmnpapalsdtgugunruzyuezh" + ) + }, + unsafe { + crate::ZeroSlice::from_bytes_unchecked( + b"\x02\0\0\0\x03\0\0\0\x04\0\0\0\x05\0\0\0\x06\0\0\0\x07\0\0\0\x08\0\0\0\n\0\0\0\x0C\0\0\0\r\0\0\0\x0E\0\0\0\x0F\0\0\0\x10\0\0\0\x11\0\0\0\x14\0\0\0\x15\0\0\0\x16\0\0\0\x17\0\0\0\x18\0\0\0\x19\0\0\0\x1C\0\0\0" + ) + }, + unsafe { + crate::VarZeroSlice::from_bytes_unchecked( + b"\x1C\0\0\0\0\0\x04\0\x08\0\x0C\0\x10\0\x14\0\x18\0\x1C\0 \0$\0(\0,\x000\x004\08\0<\0@\0D\0H\0L\0P\0T\0X\0\\\0`\0d\0h\0l\0NbatPalmArabGlagShawAdlmLinbArabArabYeziArabLatnLimbNkooMongArabPhlpDevaKhojSindArabCyrlDevaArabHansBopoHanbHant" + ) + }, + unsafe { + crate::VarZeroSlice::from_bytes_unchecked( + b"\x1C\0\0\0\0\0\x02\0\x04\0\x06\0\x08\0\n\0\x0C\0\x0E\0\x10\0\x12\0\x14\0\x16\0\x18\0\x1A\0\x1C\0\x1E\0 \0\"\0$\0&\0(\0*\0,\0.\x000\x002\x004\x006\0JOSYIRBGGBGNGRCNIQGECNTRINGNCNPKCNINININPKKZNPAFCNTWTWTW" + ) + }, + ) + }, + zerovec + ); +} diff --git a/third_party/rust/zerovec/src/map2d/map.rs b/third_party/rust/zerovec/src/map2d/map.rs new file mode 100644 index 0000000000..1975387a43 --- /dev/null +++ b/third_party/rust/zerovec/src/map2d/map.rs @@ -0,0 +1,875 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::ule::AsULE; +use crate::ZeroVec; +use alloc::borrow::Borrow; +use core::cmp::Ordering; +use core::convert::TryFrom; +use core::fmt; +use core::iter::FromIterator; +use core::ops::Range; + +use super::*; +use crate::map::ZeroMapKV; +use crate::map::{MutableZeroVecLike, ZeroVecLike}; + +/// A zero-copy, two-dimensional map datastructure . +/// +/// This is an extension of [`ZeroMap`] that supports two layers of keys. For example, +/// to map a pair of an integer and a string to a buffer, you can write: +/// +/// ```no_run +/// # use zerovec::ZeroMap2d; +/// let _: ZeroMap2d<u32, str, [u8]> = unimplemented!(); +/// ``` +/// +/// Internally, `ZeroMap2d` stores four zero-copy vectors, one for each type argument plus +/// one more to match between the two vectors of keys. +/// +/// # Examples +/// +/// ``` +/// use zerovec::ZeroMap2d; +/// +/// // Example byte buffer representing the map { 1: {2: "three" } } +/// let BINCODE_BYTES: &[u8; 51] = &[ +/// 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, 4, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, +/// 0, 0, 0, 0, 0, 0, 2, 0, 11, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 116, +/// 104, 114, 101, 101, +/// ]; +/// +/// // Deserializing to ZeroMap requires no heap allocations. +/// let zero_map: ZeroMap2d<u16, u16, str> = +/// bincode::deserialize(BINCODE_BYTES) +/// .expect("Should deserialize successfully"); +/// assert_eq!(zero_map.get_2d(&1, &2), Some("three")); +/// ``` +/// +/// [`VarZeroVec`]: crate::VarZeroVec +/// [`ZeroMap`]: crate::ZeroMap +// ZeroMap2d contains 4 fields: +// +// - keys0 = sorted list of all K0 in the map +// - joiner = helper vec that maps from a K0 to a range of keys1 +// - keys1 = list of all K1 in the map, sorted in ranges for each K0 +// - values = list of all values in the map, sorted by (K0, K1) +// +// For a particular K0 at index i, the range of keys1 corresponding to K0 is +// (joiner[i-1]..joiner[i]), where the first range starts at 0. +// +// Required Invariants: +// +// 1. len(keys0) == len(joiner) +// 2. len(keys1) == len(values) +// 3. joiner is sorted +// 4. the last element of joiner is the length of keys1 +// +// Optional Invariants: +// +// 5. keys0 is sorted (for binary_search) +// 6. ranges within keys1 are sorted (for binary_search) +// 7. every K0 is associated with at least one K1 (no empty ranges) +// +// During deserialization, these three invariants are not checked, because they put the +// ZeroMap2d in a deterministic state, even though it may have unexpected behavior. +pub struct ZeroMap2d<'a, K0, K1, V> +where + K0: ZeroMapKV<'a>, + K1: ZeroMapKV<'a>, + V: ZeroMapKV<'a>, + K0: ?Sized, + K1: ?Sized, + V: ?Sized, +{ + pub(crate) keys0: K0::Container, + pub(crate) joiner: ZeroVec<'a, u32>, + pub(crate) keys1: K1::Container, + pub(crate) values: V::Container, +} + +impl<'a, K0, K1, V> Default for ZeroMap2d<'a, K0, K1, V> +where + K0: ZeroMapKV<'a>, + K1: ZeroMapKV<'a>, + V: ZeroMapKV<'a>, + K0: ?Sized, + K1: ?Sized, + V: ?Sized, +{ + fn default() -> Self { + Self::new() + } +} + +impl<'a, K0, K1, V> ZeroMap2d<'a, K0, K1, V> +where + K0: ZeroMapKV<'a>, + K1: ZeroMapKV<'a>, + V: ZeroMapKV<'a>, + K0: ?Sized, + K1: ?Sized, + V: ?Sized, +{ + /// Creates a new, empty `ZeroMap2d`. + /// + /// # Examples + /// + /// ``` + /// use zerovec::ZeroMap2d; + /// + /// let zm: ZeroMap2d<u16, str, str> = ZeroMap2d::new(); + /// assert!(zm.is_empty()); + /// ``` + pub fn new() -> Self { + Self { + keys0: K0::Container::zvl_with_capacity(0), + joiner: ZeroVec::new(), + keys1: K1::Container::zvl_with_capacity(0), + values: V::Container::zvl_with_capacity(0), + } + } + + #[doc(hidden)] // databake internal + pub const unsafe fn from_parts_unchecked( + keys0: K0::Container, + joiner: ZeroVec<'a, u32>, + keys1: K1::Container, + values: V::Container, + ) -> Self { + Self { + keys0, + joiner, + keys1, + values, + } + } + + /// Construct a new [`ZeroMap2d`] with a given capacity + pub fn with_capacity(capacity: usize) -> Self { + Self { + keys0: K0::Container::zvl_with_capacity(capacity), + joiner: ZeroVec::with_capacity(capacity), + keys1: K1::Container::zvl_with_capacity(capacity), + values: V::Container::zvl_with_capacity(capacity), + } + } + + /// Obtain a borrowed version of this map + pub fn as_borrowed(&'a self) -> ZeroMap2dBorrowed<'a, K0, K1, V> { + ZeroMap2dBorrowed { + keys0: self.keys0.zvl_as_borrowed(), + joiner: &self.joiner, + keys1: self.keys1.zvl_as_borrowed(), + values: self.values.zvl_as_borrowed(), + } + } + + /// The number of values in the [`ZeroMap2d`] + pub fn len(&self) -> usize { + self.values.zvl_len() + } + + /// Whether the [`ZeroMap2d`] is empty + pub fn is_empty(&self) -> bool { + self.values.zvl_len() == 0 + } + + /// Remove all elements from the [`ZeroMap2d`] + pub fn clear(&mut self) { + self.keys0.zvl_clear(); + self.joiner.clear(); + self.keys1.zvl_clear(); + self.values.zvl_clear(); + } + + /// Reserve capacity for `additional` more elements to be inserted into + /// the [`ZeroMap2d`] to avoid frequent reallocations. + /// + /// See [`Vec::reserve()`](alloc::vec::Vec::reserve) for more information. + pub fn reserve(&mut self, additional: usize) { + self.keys0.zvl_reserve(additional); + self.joiner.zvl_reserve(additional); + self.keys1.zvl_reserve(additional); + self.values.zvl_reserve(additional); + } + + /// Produce an ordered iterator over keys0, which can then be used to get an iterator + /// over keys1 for a particular key0. + /// + /// # Example + /// + /// Loop over all elements of a ZeroMap2d: + /// + /// ``` + /// use zerovec::ZeroMap2d; + /// + /// let mut map: ZeroMap2d<u16, u16, str> = ZeroMap2d::new(); + /// map.insert(&1, &1, "foo"); + /// map.insert(&2, &3, "bar"); + /// map.insert(&2, &4, "baz"); + /// + /// let mut total_value = 0; + /// + /// for cursor in map.iter0() { + /// for (key1, value) in cursor.iter1() { + /// // This code runs for every (key0, key1) pair + /// total_value += cursor.key0().as_unsigned_int() as usize; + /// total_value += key1.as_unsigned_int() as usize; + /// total_value += value.len(); + /// } + /// } + /// + /// assert_eq!(total_value, 22); + /// ``` + pub fn iter0<'l>(&'l self) -> impl Iterator<Item = ZeroMap2dCursor<'l, 'a, K0, K1, V>> + 'l { + (0..self.keys0.zvl_len()).map(move |idx| ZeroMap2dCursor::from_cow(self, idx)) + } + + // INTERNAL ROUTINES FOLLOW // + + /// Given an index into the joiner array, returns the corresponding range of keys1 + fn get_range_for_key0_index(&self, key0_index: usize) -> Range<usize> { + ZeroMap2dCursor::from_cow(self, key0_index).get_range() + } + + /// Removes key0_index from the keys0 array and the joiner array + fn remove_key0_index(&mut self, key0_index: usize) { + self.keys0.zvl_remove(key0_index); + self.joiner.with_mut(|v| v.remove(key0_index)); + } + + /// Shifts all joiner ranges from key0_index onward one index up + fn joiner_expand(&mut self, key0_index: usize) { + #[allow(clippy::expect_used)] // slice overflow + self.joiner + .to_mut_slice() + .iter_mut() + .skip(key0_index) + .for_each(|ref mut v| { + // TODO(#1410): Make this fallible + **v = v + .as_unsigned_int() + .checked_add(1) + .expect("Attempted to add more than 2^32 elements to a ZeroMap2d") + .to_unaligned() + }) + } + + /// Shifts all joiner ranges from key0_index onward one index down + fn joiner_shrink(&mut self, key0_index: usize) { + self.joiner + .to_mut_slice() + .iter_mut() + .skip(key0_index) + .for_each(|ref mut v| **v = (v.as_unsigned_int() - 1).to_unaligned()) + } +} + +impl<'a, K0, K1, V> ZeroMap2d<'a, K0, K1, V> +where + K0: ZeroMapKV<'a> + Ord, + K1: ZeroMapKV<'a> + Ord, + V: ZeroMapKV<'a>, + K0: ?Sized, + K1: ?Sized, + V: ?Sized, +{ + /// Get the value associated with `key0` and `key1`, if it exists. + /// + /// For more fine-grained error handling, use [`ZeroMap2d::get0`]. + /// + /// ```rust + /// use zerovec::ZeroMap2d; + /// + /// let mut map = ZeroMap2d::new(); + /// map.insert(&1, "one", "foo"); + /// map.insert(&2, "one", "bar"); + /// map.insert(&2, "two", "baz"); + /// assert_eq!(map.get_2d(&1, "one"), Some("foo")); + /// assert_eq!(map.get_2d(&1, "two"), None); + /// assert_eq!(map.get_2d(&2, "one"), Some("bar")); + /// assert_eq!(map.get_2d(&2, "two"), Some("baz")); + /// assert_eq!(map.get_2d(&3, "three"), None); + /// ``` + pub fn get_2d(&self, key0: &K0, key1: &K1) -> Option<&V::GetType> { + self.get0(key0)?.get1(key1) + } + + /// Insert `value` with `key`, returning the existing value if it exists. + /// + /// ```rust + /// use zerovec::ZeroMap2d; + /// + /// let mut map = ZeroMap2d::new(); + /// assert_eq!(map.insert(&0, "zero", "foo"), None,); + /// assert_eq!(map.insert(&1, "one", "bar"), None,); + /// assert_eq!(map.insert(&1, "one", "baz").as_deref(), Some("bar"),); + /// assert_eq!(map.get_2d(&1, "one").as_deref(), Some("baz")); + /// assert_eq!(map.len(), 2); + /// ``` + pub fn insert(&mut self, key0: &K0, key1: &K1, value: &V) -> Option<V::OwnedType> { + let (key0_index, range) = self.get_or_insert_range_for_key0(key0); + debug_assert!(range.start <= range.end); // '<=' because we may have inserted a new key0 + debug_assert!(range.end <= self.keys1.zvl_len()); + let range_start = range.start; + #[allow(clippy::unwrap_used)] // by debug_assert! invariants + let index = range_start + + match self.keys1.zvl_binary_search_in_range(key1, range).unwrap() { + Ok(index) => return Some(self.values.zvl_replace(range_start + index, value)), + Err(index) => index, + }; + self.keys1.zvl_insert(index, key1); + self.values.zvl_insert(index, value); + self.joiner_expand(key0_index); + #[cfg(debug_assertions)] + self.check_invariants(); + None + } + + /// Remove the value at `key`, returning it if it exists. + /// + /// ```rust + /// use zerovec::ZeroMap2d; + /// + /// let mut map = ZeroMap2d::new(); + /// map.insert(&1, "one", "foo"); + /// map.insert(&2, "two", "bar"); + /// assert_eq!( + /// map.remove(&1, "one"), + /// Some("foo".to_owned().into_boxed_str()) + /// ); + /// assert_eq!(map.get_2d(&1, "one"), None); + /// assert_eq!(map.remove(&1, "one"), None); + /// ``` + pub fn remove(&mut self, key0: &K0, key1: &K1) -> Option<V::OwnedType> { + let key0_index = self.keys0.zvl_binary_search(key0).ok()?; + let range = self.get_range_for_key0_index(key0_index); + debug_assert!(range.start < range.end); // '<' because every key0 should have a key1 + debug_assert!(range.end <= self.keys1.zvl_len()); + let is_singleton_range = range.start + 1 == range.end; + #[allow(clippy::unwrap_used)] // by debug_assert invariants + let index = range.start + + self + .keys1 + .zvl_binary_search_in_range(key1, range) + .unwrap() + .ok()?; + self.keys1.zvl_remove(index); + let removed = self.values.zvl_remove(index); + self.joiner_shrink(key0_index); + if is_singleton_range { + self.remove_key0_index(key0_index); + } + #[cfg(debug_assertions)] + self.check_invariants(); + Some(removed) + } + + /// Appends `value` with `key` to the end of the underlying vector, returning + /// `key` and `value` _if it failed_. Useful for extending with an existing + /// sorted list. + /// + /// ```rust + /// use zerovec::ZeroMap2d; + /// + /// let mut map = ZeroMap2d::new(); + /// assert!(map.try_append(&1, "one", "uno").is_none()); + /// assert!(map.try_append(&3, "three", "tres").is_none()); + /// + /// let unsuccessful = map.try_append(&3, "three", "tres-updated"); + /// assert!(unsuccessful.is_some(), "append duplicate of last key"); + /// + /// let unsuccessful = map.try_append(&2, "two", "dos"); + /// assert!(unsuccessful.is_some(), "append out of order"); + /// + /// assert_eq!(map.get_2d(&1, "one"), Some("uno")); + /// + /// // contains the original value for the key: 3 + /// assert_eq!(map.get_2d(&3, "three"), Some("tres")); + /// + /// // not appended since it wasn't in order + /// assert_eq!(map.get_2d(&2, "two"), None); + /// ``` + #[must_use] + pub fn try_append<'b>( + &mut self, + key0: &'b K0, + key1: &'b K1, + value: &'b V, + ) -> Option<(&'b K0, &'b K1, &'b V)> { + if self.is_empty() { + self.keys0.zvl_push(key0); + self.joiner.with_mut(|v| v.push(1u32.to_unaligned())); + self.keys1.zvl_push(key1); + self.values.zvl_push(value); + return None; + } + + // The unwraps are protected by the fact that we are not empty + #[allow(clippy::unwrap_used)] + let last_key0 = self.keys0.zvl_get(self.keys0.zvl_len() - 1).unwrap(); + let key0_cmp = K0::Container::t_cmp_get(key0, last_key0); + #[allow(clippy::unwrap_used)] + let last_key1 = self.keys1.zvl_get(self.keys1.zvl_len() - 1).unwrap(); + let key1_cmp = K1::Container::t_cmp_get(key1, last_key1); + + // Check for error case (out of order) + match key0_cmp { + Ordering::Less => { + // Error case + return Some((key0, key1, value)); + } + Ordering::Equal => { + match key1_cmp { + Ordering::Less | Ordering::Equal => { + // Error case + return Some((key0, key1, value)); + } + _ => {} + } + } + _ => {} + } + + #[allow(clippy::expect_used)] // slice overflow + let joiner_value = u32::try_from(self.keys1.zvl_len() + 1) + .expect("Attempted to add more than 2^32 elements to a ZeroMap2d"); + + // All OK to append + #[allow(clippy::unwrap_used)] + if key0_cmp == Ordering::Greater { + self.keys0.zvl_push(key0); + self.joiner + .with_mut(|v| v.push(joiner_value.to_unaligned())); + } else { + // This unwrap is protected because we are not empty + *self.joiner.to_mut_slice().last_mut().unwrap() = joiner_value.to_unaligned(); + } + self.keys1.zvl_push(key1); + self.values.zvl_push(value); + + #[cfg(debug_assertions)] + self.check_invariants(); + + None + } + + // INTERNAL ROUTINES FOLLOW // + + #[cfg(debug_assertions)] + #[allow(clippy::unwrap_used)] // this is an assertion function + pub(crate) fn check_invariants(&self) { + debug_assert_eq!(self.keys0.zvl_len(), self.joiner.len()); + debug_assert_eq!(self.keys1.zvl_len(), self.values.zvl_len()); + debug_assert!(self.keys0.zvl_is_ascending()); + debug_assert!(self.joiner.zvl_is_ascending()); + if let Some(last_joiner) = self.joiner.last() { + debug_assert_eq!(last_joiner as usize, self.keys1.zvl_len()); + } + for i in 0..self.joiner.len() { + let j0 = if i == 0 { + 0 + } else { + self.joiner.get(i - 1).unwrap() as usize + }; + let j1 = self.joiner.get(i).unwrap() as usize; + debug_assert_ne!(j0, j1); + for j in (j0 + 1)..j1 { + let m0 = self.keys1.zvl_get(j - 1).unwrap(); + let m1 = self.keys1.zvl_get(j).unwrap(); + debug_assert_eq!(Ordering::Less, K1::Container::get_cmp_get(m0, m1)); + } + } + } +} + +impl<'a, K0, K1, V> ZeroMap2d<'a, K0, K1, V> +where + K0: ZeroMapKV<'a> + Ord, + K1: ZeroMapKV<'a>, + V: ZeroMapKV<'a>, + K0: ?Sized, + K1: ?Sized, + V: ?Sized, +{ + /// Gets a cursor for `key0`. If `None`, then `key0` is not in the map. If `Some`, + /// then `key0` is in the map, and `key1` can be queried. + /// + /// ```rust + /// use zerovec::ZeroMap2d; + /// + /// let mut map = ZeroMap2d::new(); + /// map.insert(&1u32, "one", "foo"); + /// map.insert(&2, "one", "bar"); + /// map.insert(&2, "two", "baz"); + /// assert_eq!(map.get0(&1).unwrap().get1("one").unwrap(), "foo"); + /// assert_eq!(map.get0(&1).unwrap().get1("two"), None); + /// assert_eq!(map.get0(&2).unwrap().get1("one").unwrap(), "bar"); + /// assert_eq!(map.get0(&2).unwrap().get1("two").unwrap(), "baz"); + /// assert_eq!(map.get0(&3), None); + /// ``` + #[inline] + pub fn get0<'l>(&'l self, key0: &K0) -> Option<ZeroMap2dCursor<'l, 'a, K0, K1, V>> { + let key0_index = self.keys0.zvl_binary_search(key0).ok()?; + Some(ZeroMap2dCursor::from_cow(self, key0_index)) + } + + /// Binary search the map for `key0`, returning a cursor. + /// + /// ```rust + /// use zerovec::maps::ZeroMap2dBorrowed; + /// use zerovec::ZeroMap2d; + /// + /// let mut map = ZeroMap2d::new(); + /// map.insert(&1, "one", "foo"); + /// map.insert(&2, "two", "bar"); + /// assert!(matches!(map.get0_by(|probe| probe.cmp(&1)), Some(_))); + /// assert!(matches!(map.get0_by(|probe| probe.cmp(&3)), None)); + /// ``` + pub fn get0_by<'l>( + &'l self, + predicate: impl FnMut(&K0) -> Ordering, + ) -> Option<ZeroMap2dCursor<'l, 'a, K0, K1, V>> { + let key0_index = self.keys0.zvl_binary_search_by(predicate).ok()?; + Some(ZeroMap2dCursor::from_cow(self, key0_index)) + } + + /// Returns whether `key0` is contained in this map + /// + /// ```rust + /// use zerovec::ZeroMap2d; + /// + /// let mut map = ZeroMap2d::new(); + /// map.insert(&1, "one", "foo"); + /// map.insert(&2, "two", "bar"); + /// assert!(map.contains_key0(&1)); + /// assert!(!map.contains_key0(&3)); + /// ``` + pub fn contains_key0(&self, key0: &K0) -> bool { + self.keys0.zvl_binary_search(key0).is_ok() + } + + // INTERNAL ROUTINES FOLLOW // + + /// Same as `get_range_for_key0`, but creates key0 if it doesn't already exist + fn get_or_insert_range_for_key0(&mut self, key0: &K0) -> (usize, Range<usize>) { + match self.keys0.zvl_binary_search(key0) { + Ok(key0_index) => (key0_index, self.get_range_for_key0_index(key0_index)), + Err(key0_index) => { + // Add an entry to self.keys0 and self.joiner + let joiner_value = if key0_index == 0 { + 0 + } else { + debug_assert!(key0_index <= self.joiner.len()); + // The unwrap is protected by the debug_assert above and key0_index != 0 + #[allow(clippy::unwrap_used)] + self.joiner.get(key0_index - 1).unwrap() + }; + self.keys0.zvl_insert(key0_index, key0); + self.joiner + .with_mut(|v| v.insert(key0_index, joiner_value.to_unaligned())); + (key0_index, (joiner_value as usize)..(joiner_value as usize)) + } + } + } +} + +impl<'a, K0, K1, V> ZeroMap2d<'a, K0, K1, V> +where + K0: ZeroMapKV<'a> + Ord, + K1: ZeroMapKV<'a> + Ord, + V: ZeroMapKV<'a>, + V: Copy, + K0: ?Sized, + K1: ?Sized, +{ + /// For cases when `V` is fixed-size, obtain a direct copy of `V` instead of `V::ULE` + /// + /// # Examples + /// + /// ``` + /// # use zerovec::ZeroMap2d; + /// let mut map: ZeroMap2d<u16, u16, u16> = ZeroMap2d::new(); + /// map.insert(&1, &2, &3); + /// map.insert(&1, &4, &5); + /// map.insert(&6, &7, &8); + /// + /// assert_eq!(map.get_copied_2d(&6, &7), Some(8)); + /// ``` + #[inline] + pub fn get_copied_2d(&self, key0: &K0, key1: &K1) -> Option<V> { + self.get0(key0)?.get1_copied(key1) + } +} + +impl<'a, K0, K1, V> From<ZeroMap2dBorrowed<'a, K0, K1, V>> for ZeroMap2d<'a, K0, K1, V> +where + K0: ZeroMapKV<'a>, + K1: ZeroMapKV<'a>, + V: ZeroMapKV<'a>, + K0: ?Sized, + K1: ?Sized, + V: ?Sized, +{ + fn from(other: ZeroMap2dBorrowed<'a, K0, K1, V>) -> Self { + Self { + keys0: K0::Container::zvl_from_borrowed(other.keys0), + joiner: other.joiner.as_zerovec(), + keys1: K1::Container::zvl_from_borrowed(other.keys1), + values: V::Container::zvl_from_borrowed(other.values), + } + } +} + +// We can't use the default PartialEq because ZeroMap2d is invariant +// so otherwise rustc will not automatically allow you to compare ZeroMaps +// with different lifetimes +impl<'a, 'b, K0, K1, V> PartialEq<ZeroMap2d<'b, K0, K1, V>> for ZeroMap2d<'a, K0, K1, V> +where + K0: for<'c> ZeroMapKV<'c> + ?Sized, + K1: for<'c> ZeroMapKV<'c> + ?Sized, + V: for<'c> ZeroMapKV<'c> + ?Sized, + <K0 as ZeroMapKV<'a>>::Container: PartialEq<<K0 as ZeroMapKV<'b>>::Container>, + <K1 as ZeroMapKV<'a>>::Container: PartialEq<<K1 as ZeroMapKV<'b>>::Container>, + <V as ZeroMapKV<'a>>::Container: PartialEq<<V as ZeroMapKV<'b>>::Container>, +{ + fn eq(&self, other: &ZeroMap2d<'b, K0, K1, V>) -> bool { + self.keys0.eq(&other.keys0) + && self.joiner.eq(&other.joiner) + && self.keys1.eq(&other.keys1) + && self.values.eq(&other.values) + } +} + +impl<'a, K0, K1, V> fmt::Debug for ZeroMap2d<'a, K0, K1, V> +where + K0: ZeroMapKV<'a> + ?Sized, + K1: ZeroMapKV<'a> + ?Sized, + V: ZeroMapKV<'a> + ?Sized, + <K0 as ZeroMapKV<'a>>::Container: fmt::Debug, + <K1 as ZeroMapKV<'a>>::Container: fmt::Debug, + <V as ZeroMapKV<'a>>::Container: fmt::Debug, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { + f.debug_struct("ZeroMap2d") + .field("keys0", &self.keys0) + .field("joiner", &self.joiner) + .field("keys1", &self.keys1) + .field("values", &self.values) + .finish() + } +} + +impl<'a, K0, K1, V> Clone for ZeroMap2d<'a, K0, K1, V> +where + K0: ZeroMapKV<'a> + ?Sized, + K1: ZeroMapKV<'a> + ?Sized, + V: ZeroMapKV<'a> + ?Sized, + <K0 as ZeroMapKV<'a>>::Container: Clone, + <K1 as ZeroMapKV<'a>>::Container: Clone, + <V as ZeroMapKV<'a>>::Container: Clone, +{ + fn clone(&self) -> Self { + Self { + keys0: self.keys0.clone(), + joiner: self.joiner.clone(), + keys1: self.keys1.clone(), + values: self.values.clone(), + } + } +} + +impl<'a, A, B, C, K0, K1, V> FromIterator<(A, B, C)> for ZeroMap2d<'a, K0, K1, V> +where + A: Borrow<K0>, + B: Borrow<K1>, + C: Borrow<V>, + K0: ZeroMapKV<'a> + ?Sized + Ord, + K1: ZeroMapKV<'a> + ?Sized + Ord, + V: ZeroMapKV<'a> + ?Sized, +{ + fn from_iter<T>(iter: T) -> Self + where + T: IntoIterator<Item = (A, B, C)>, + { + let iter = iter.into_iter(); + let mut map = match iter.size_hint() { + (_, Some(upper)) => Self::with_capacity(upper), + (lower, None) => Self::with_capacity(lower), + }; + + for (key0, key1, value) in iter { + if let Some((key0, key1, value)) = + map.try_append(key0.borrow(), key1.borrow(), value.borrow()) + { + map.insert(key0, key1, value); + } + } + #[cfg(debug_assertions)] + map.check_invariants(); + map + } +} + +#[cfg(test)] +mod test { + use super::*; + use alloc::collections::BTreeMap; + + #[test] + fn stress_test() { + let mut zm2d = ZeroMap2d::<u16, str, str>::new(); + + assert_eq!( + format!("{zm2d:?}"), + "ZeroMap2d { keys0: ZeroVec([]), joiner: ZeroVec([]), keys1: [], values: [] }" + ); + assert_eq!(zm2d.get0(&0), None); + + let result = zm2d.try_append(&3, "ccc", "CCC"); + assert!(result.is_none()); + + assert_eq!(format!("{zm2d:?}"), "ZeroMap2d { keys0: ZeroVec([3]), joiner: ZeroVec([1]), keys1: [\"ccc\"], values: [\"CCC\"] }"); + assert_eq!(zm2d.get0(&0), None); + assert_eq!(zm2d.get0(&3).unwrap().get1(""), None); + assert_eq!(zm2d.get_2d(&3, "ccc"), Some("CCC")); + assert_eq!(zm2d.get0(&99), None); + + let result = zm2d.try_append(&3, "eee", "EEE"); + assert!(result.is_none()); + + assert_eq!(format!("{zm2d:?}"), "ZeroMap2d { keys0: ZeroVec([3]), joiner: ZeroVec([2]), keys1: [\"ccc\", \"eee\"], values: [\"CCC\", \"EEE\"] }"); + assert_eq!(zm2d.get0(&0), None); + assert_eq!(zm2d.get0(&3).unwrap().get1(""), None); + assert_eq!(zm2d.get_2d(&3, "ccc"), Some("CCC")); + assert_eq!(zm2d.get_2d(&3, "eee"), Some("EEE")); + assert_eq!(zm2d.get0(&3).unwrap().get1("five"), None); + assert_eq!(zm2d.get0(&99), None); + + // Out of order + let result = zm2d.try_append(&3, "ddd", "DD0"); + assert!(result.is_some()); + + // Append a few more elements + let result = zm2d.try_append(&5, "ddd", "DD1"); + assert!(result.is_none()); + let result = zm2d.try_append(&7, "ddd", "DD2"); + assert!(result.is_none()); + let result = zm2d.try_append(&7, "eee", "EEE"); + assert!(result.is_none()); + let result = zm2d.try_append(&7, "www", "WWW"); + assert!(result.is_none()); + let result = zm2d.try_append(&9, "yyy", "YYY"); + assert!(result.is_none()); + + assert_eq!(format!("{zm2d:?}"), "ZeroMap2d { keys0: ZeroVec([3, 5, 7, 9]), joiner: ZeroVec([2, 3, 6, 7]), keys1: [\"ccc\", \"eee\", \"ddd\", \"ddd\", \"eee\", \"www\", \"yyy\"], values: [\"CCC\", \"EEE\", \"DD1\", \"DD2\", \"EEE\", \"WWW\", \"YYY\"] }"); + assert_eq!(zm2d.get0(&0), None); + assert_eq!(zm2d.get0(&3).unwrap().get1(""), None); + assert_eq!(zm2d.get_2d(&3, "ccc"), Some("CCC")); + assert_eq!(zm2d.get_2d(&3, "eee"), Some("EEE")); + assert_eq!(zm2d.get0(&3).unwrap().get1("zzz"), None); + assert_eq!(zm2d.get0(&4), None); + assert_eq!(zm2d.get0(&5).unwrap().get1("aaa"), None); + assert_eq!(zm2d.get_2d(&5, "ddd"), Some("DD1")); + assert_eq!(zm2d.get0(&5).unwrap().get1("zzz"), None); + assert_eq!(zm2d.get0(&6), None); + assert_eq!(zm2d.get0(&7).unwrap().get1("aaa"), None); + assert_eq!(zm2d.get_2d(&7, "ddd"), Some("DD2")); + assert_eq!(zm2d.get_2d(&7, "eee"), Some("EEE")); + assert_eq!(zm2d.get_2d(&7, "www"), Some("WWW")); + assert_eq!(zm2d.get0(&7).unwrap().get1("yyy"), None); + assert_eq!(zm2d.get0(&7).unwrap().get1("zzz"), None); + assert_eq!(zm2d.get0(&8), None); + assert_eq!(zm2d.get0(&9).unwrap().get1("aaa"), None); + assert_eq!(zm2d.get0(&9).unwrap().get1("www"), None); + assert_eq!(zm2d.get_2d(&9, "yyy"), Some("YYY")); + assert_eq!(zm2d.get0(&9).unwrap().get1("zzz"), None); + assert_eq!(zm2d.get0(&10), None); + assert_eq!(zm2d.get0(&99), None); + + // Insert some elements + zm2d.insert(&3, "mmm", "MM0"); + zm2d.insert(&6, "ddd", "DD3"); + zm2d.insert(&6, "mmm", "MM1"); + zm2d.insert(&6, "nnn", "NNN"); + + assert_eq!(format!("{zm2d:?}"), "ZeroMap2d { keys0: ZeroVec([3, 5, 6, 7, 9]), joiner: ZeroVec([3, 4, 7, 10, 11]), keys1: [\"ccc\", \"eee\", \"mmm\", \"ddd\", \"ddd\", \"mmm\", \"nnn\", \"ddd\", \"eee\", \"www\", \"yyy\"], values: [\"CCC\", \"EEE\", \"MM0\", \"DD1\", \"DD3\", \"MM1\", \"NNN\", \"DD2\", \"EEE\", \"WWW\", \"YYY\"] }"); + assert_eq!(zm2d.get0(&0), None); + assert_eq!(zm2d.get0(&3).unwrap().get1(""), None); + assert_eq!(zm2d.get_2d(&3, "ccc"), Some("CCC")); + assert_eq!(zm2d.get_2d(&3, "eee"), Some("EEE")); + assert_eq!(zm2d.get_2d(&3, "mmm"), Some("MM0")); + assert_eq!(zm2d.get0(&3).unwrap().get1("zzz"), None); + assert_eq!(zm2d.get0(&4), None); + assert_eq!(zm2d.get0(&5).unwrap().get1("aaa"), None); + assert_eq!(zm2d.get_2d(&5, "ddd"), Some("DD1")); + assert_eq!(zm2d.get0(&5).unwrap().get1("zzz"), None); + assert_eq!(zm2d.get0(&6).unwrap().get1("aaa"), None); + assert_eq!(zm2d.get_2d(&6, "ddd"), Some("DD3")); + assert_eq!(zm2d.get_2d(&6, "mmm"), Some("MM1")); + assert_eq!(zm2d.get_2d(&6, "nnn"), Some("NNN")); + assert_eq!(zm2d.get0(&6).unwrap().get1("zzz"), None); + assert_eq!(zm2d.get0(&7).unwrap().get1("aaa"), None); + assert_eq!(zm2d.get_2d(&7, "ddd"), Some("DD2")); + assert_eq!(zm2d.get_2d(&7, "eee"), Some("EEE")); + assert_eq!(zm2d.get_2d(&7, "www"), Some("WWW")); + assert_eq!(zm2d.get0(&7).unwrap().get1("yyy"), None); + assert_eq!(zm2d.get0(&7).unwrap().get1("zzz"), None); + assert_eq!(zm2d.get0(&8), None); + assert_eq!(zm2d.get0(&9).unwrap().get1("aaa"), None); + assert_eq!(zm2d.get0(&9).unwrap().get1("www"), None); + assert_eq!(zm2d.get_2d(&9, "yyy"), Some("YYY")); + assert_eq!(zm2d.get0(&9).unwrap().get1("zzz"), None); + assert_eq!(zm2d.get0(&10), None); + assert_eq!(zm2d.get0(&99), None); + + // Remove some elements + let result = zm2d.remove(&3, "ccc"); // first element + assert_eq!(result.as_deref(), Some("CCC")); + let result = zm2d.remove(&3, "mmm"); // middle element + assert_eq!(result.as_deref(), Some("MM0")); + let result = zm2d.remove(&5, "ddd"); // singleton K0 + assert_eq!(result.as_deref(), Some("DD1")); + let result = zm2d.remove(&9, "yyy"); // last element + assert_eq!(result.as_deref(), Some("YYY")); + + assert_eq!(format!("{zm2d:?}"), "ZeroMap2d { keys0: ZeroVec([3, 6, 7]), joiner: ZeroVec([1, 4, 7]), keys1: [\"eee\", \"ddd\", \"mmm\", \"nnn\", \"ddd\", \"eee\", \"www\"], values: [\"EEE\", \"DD3\", \"MM1\", \"NNN\", \"DD2\", \"EEE\", \"WWW\"] }"); + } + + #[test] + fn zeromap2d_metazone() { + let source_data = [ + (*b"aedxb", 0, Some(*b"gulf")), + (*b"afkbl", 0, Some(*b"afgh")), + (*b"ushnl", 0, None), + (*b"ushnl", 7272660, Some(*b"haal")), + (*b"ushnl", 0, None), + (*b"ushnl", 7272660, Some(*b"haal")), + ]; + + let btreemap: BTreeMap<([u8; 5], i32), Option<[u8; 4]>> = source_data + .iter() + .copied() + .map(|(a, b, c)| ((a, b), c)) + .collect(); + + let zeromap2d: ZeroMap2d<[u8; 5], i32, Option<[u8; 4]>> = + source_data.iter().copied().collect(); + + let mut btreemap_iter = btreemap.iter(); + + for cursor in zeromap2d.iter0() { + for (key1, value) in cursor.iter1() { + // This code runs for every (key0, key1) pair in order + let expected = btreemap_iter.next().unwrap(); + assert_eq!( + (expected.0 .0, expected.0 .1, expected.1), + (*cursor.key0(), key1.as_unsigned_int() as i32, &value.get()) + ); + } + } + assert!(btreemap_iter.next().is_none()); + } +} diff --git a/third_party/rust/zerovec/src/map2d/mod.rs b/third_party/rust/zerovec/src/map2d/mod.rs new file mode 100644 index 0000000000..f5465fcf24 --- /dev/null +++ b/third_party/rust/zerovec/src/map2d/mod.rs @@ -0,0 +1,18 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! See [`ZeroMap2d`](crate::ZeroMap2d) for details. + +mod borrowed; +mod cursor; +pub(crate) mod map; + +#[cfg(feature = "databake")] +mod databake; +#[cfg(feature = "serde")] +mod serde; + +pub use crate::ZeroMap2d; +pub use borrowed::ZeroMap2dBorrowed; +pub use cursor::ZeroMap2dCursor; diff --git a/third_party/rust/zerovec/src/map2d/serde.rs b/third_party/rust/zerovec/src/map2d/serde.rs new file mode 100644 index 0000000000..53e3284b31 --- /dev/null +++ b/third_party/rust/zerovec/src/map2d/serde.rs @@ -0,0 +1,430 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use super::{ZeroMap2d, ZeroMap2dBorrowed, ZeroMap2dCursor}; +use crate::map::{MutableZeroVecLike, ZeroMapKV, ZeroVecLike}; +use crate::ZeroVec; +use alloc::vec::Vec; +use core::fmt; +use core::marker::PhantomData; +use serde::de::{self, Deserialize, Deserializer, MapAccess, Visitor}; +#[cfg(feature = "serde")] +use serde::ser::{Serialize, SerializeMap, Serializer}; + +/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate +#[cfg(feature = "serde")] +impl<'a, K0, K1, V> Serialize for ZeroMap2d<'a, K0, K1, V> +where + K0: ZeroMapKV<'a> + Serialize + ?Sized + Ord, + K1: ZeroMapKV<'a> + Serialize + ?Sized + Ord, + V: ZeroMapKV<'a> + Serialize + ?Sized, + K0::Container: Serialize, + K1::Container: Serialize, + V::Container: Serialize, +{ + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + if serializer.is_human_readable() { + let mut serde_map = serializer.serialize_map(None)?; + for cursor in self.iter0() { + K0::Container::zvl_get_as_t(cursor.key0(), |k| serde_map.serialize_key(k))?; + let inner_map = ZeroMap2dInnerMapSerialize { cursor }; + serde_map.serialize_value(&inner_map)?; + } + serde_map.end() + } else { + (&self.keys0, &self.joiner, &self.keys1, &self.values).serialize(serializer) + } + } +} + +/// Helper struct for human-serializing the inner map of a ZeroMap2d +#[cfg(feature = "serde")] +struct ZeroMap2dInnerMapSerialize<'a, 'l, K0, K1, V> +where + K0: ZeroMapKV<'a> + ?Sized + Ord, + K1: ZeroMapKV<'a> + ?Sized + Ord, + V: ZeroMapKV<'a> + ?Sized, +{ + pub cursor: ZeroMap2dCursor<'l, 'a, K0, K1, V>, +} + +#[cfg(feature = "serde")] +impl<'a, 'l, K0, K1, V> Serialize for ZeroMap2dInnerMapSerialize<'a, 'l, K0, K1, V> +where + K0: ZeroMapKV<'a> + Serialize + ?Sized + Ord, + K1: ZeroMapKV<'a> + Serialize + ?Sized + Ord, + V: ZeroMapKV<'a> + Serialize + ?Sized, + K0::Container: Serialize, + K1::Container: Serialize, + V::Container: Serialize, +{ + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + let mut serde_map = serializer.serialize_map(None)?; + for (key1, v) in self.cursor.iter1() { + K1::Container::zvl_get_as_t(key1, |k| serde_map.serialize_key(k))?; + V::Container::zvl_get_as_t(v, |v| serde_map.serialize_value(v))?; + } + serde_map.end() + } +} + +/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate +#[cfg(feature = "serde")] +impl<'a, K0, K1, V> Serialize for ZeroMap2dBorrowed<'a, K0, K1, V> +where + K0: ZeroMapKV<'a> + Serialize + ?Sized + Ord, + K1: ZeroMapKV<'a> + Serialize + ?Sized + Ord, + V: ZeroMapKV<'a> + Serialize + ?Sized, + K0::Container: Serialize, + K1::Container: Serialize, + V::Container: Serialize, +{ + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + ZeroMap2d::<K0, K1, V>::from(*self).serialize(serializer) + } +} + +/// Modified example from https://serde.rs/deserialize-map.html +struct ZeroMap2dMapVisitor<'a, K0, K1, V> +where + K0: ZeroMapKV<'a> + ?Sized + Ord, + K1: ZeroMapKV<'a> + ?Sized + Ord, + V: ZeroMapKV<'a> + ?Sized, +{ + #[allow(clippy::type_complexity)] // it's a marker type, complexity doesn't matter + marker: PhantomData<fn() -> (&'a K0::OwnedType, &'a K1::OwnedType, &'a V::OwnedType)>, +} + +impl<'a, K0, K1, V> ZeroMap2dMapVisitor<'a, K0, K1, V> +where + K0: ZeroMapKV<'a> + ?Sized + Ord, + K1: ZeroMapKV<'a> + ?Sized + Ord, + V: ZeroMapKV<'a> + ?Sized, +{ + fn new() -> Self { + ZeroMap2dMapVisitor { + marker: PhantomData, + } + } +} + +impl<'a, 'de, K0, K1, V> Visitor<'de> for ZeroMap2dMapVisitor<'a, K0, K1, V> +where + K0: ZeroMapKV<'a> + Ord + ?Sized + Ord, + K1: ZeroMapKV<'a> + Ord + ?Sized + Ord, + V: ZeroMapKV<'a> + ?Sized, + K1::Container: Deserialize<'de>, + V::Container: Deserialize<'de>, + K0::OwnedType: Deserialize<'de>, + K1::OwnedType: Deserialize<'de>, + V::OwnedType: Deserialize<'de>, +{ + type Value = ZeroMap2d<'a, K0, K1, V>; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("a map produced by ZeroMap2d") + } + + fn visit_map<M>(self, mut access: M) -> Result<Self::Value, M::Error> + where + M: MapAccess<'de>, + { + let mut map = ZeroMap2d::with_capacity(access.size_hint().unwrap_or(0)); + + // On the first level, pull out the K0s and a TupleVecMap of the + // K1s and Vs, and then collect them into a ZeroMap2d + while let Some((key0, inner_map)) = + access.next_entry::<K0::OwnedType, TupleVecMap<K1::OwnedType, V::OwnedType>>()? + { + for (key1, value) in inner_map.entries.iter() { + if map + .try_append( + K0::Container::owned_as_t(&key0), + K1::Container::owned_as_t(key1), + V::Container::owned_as_t(value), + ) + .is_some() + { + return Err(de::Error::custom( + "ZeroMap2d's keys must be sorted while deserializing", + )); + } + } + } + + Ok(map) + } +} + +/// Helper struct for human-deserializing the inner map of a ZeroMap2d +struct TupleVecMap<K1, V> { + pub entries: Vec<(K1, V)>, +} + +struct TupleVecMapVisitor<K1, V> { + #[allow(clippy::type_complexity)] // it's a marker type, complexity doesn't matter + marker: PhantomData<fn() -> (K1, V)>, +} + +impl<K1, V> TupleVecMapVisitor<K1, V> { + fn new() -> Self { + TupleVecMapVisitor { + marker: PhantomData, + } + } +} + +impl<'de, K1, V> Visitor<'de> for TupleVecMapVisitor<K1, V> +where + K1: Deserialize<'de>, + V: Deserialize<'de>, +{ + type Value = TupleVecMap<K1, V>; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("an inner map produced by ZeroMap2d") + } + + fn visit_map<M>(self, mut access: M) -> Result<Self::Value, M::Error> + where + M: MapAccess<'de>, + { + let mut result = Vec::with_capacity(access.size_hint().unwrap_or(0)); + while let Some((key1, value)) = access.next_entry::<K1, V>()? { + result.push((key1, value)); + } + Ok(TupleVecMap { entries: result }) + } +} + +impl<'de, K1, V> Deserialize<'de> for TupleVecMap<K1, V> +where + K1: Deserialize<'de>, + V: Deserialize<'de>, +{ + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: Deserializer<'de>, + { + deserializer.deserialize_map(TupleVecMapVisitor::<K1, V>::new()) + } +} + +/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate +impl<'de, 'a, K0, K1, V> Deserialize<'de> for ZeroMap2d<'a, K0, K1, V> +where + K0: ZeroMapKV<'a> + Ord + ?Sized, + K1: ZeroMapKV<'a> + Ord + ?Sized, + V: ZeroMapKV<'a> + ?Sized, + K0::Container: Deserialize<'de>, + K1::Container: Deserialize<'de>, + V::Container: Deserialize<'de>, + K0::OwnedType: Deserialize<'de>, + K1::OwnedType: Deserialize<'de>, + V::OwnedType: Deserialize<'de>, + 'de: 'a, +{ + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: Deserializer<'de>, + { + if deserializer.is_human_readable() { + deserializer.deserialize_map(ZeroMap2dMapVisitor::<'a, K0, K1, V>::new()) + } else { + let (keys0, joiner, keys1, values): ( + K0::Container, + ZeroVec<u32>, + K1::Container, + V::Container, + ) = Deserialize::deserialize(deserializer)?; + // Invariant 1: len(keys0) == len(joiner) + if keys0.zvl_len() != joiner.len() { + return Err(de::Error::custom( + "Mismatched keys0 and joiner sizes in ZeroMap2d", + )); + } + // Invariant 2: len(keys1) == len(values) + if keys1.zvl_len() != values.zvl_len() { + return Err(de::Error::custom( + "Mismatched keys1 and value sizes in ZeroMap2d", + )); + } + // Invariant 3: joiner is sorted + if !joiner.zvl_is_ascending() { + return Err(de::Error::custom( + "ZeroMap2d deserializing joiner array out of order", + )); + } + // Invariant 4: the last element of joiner is the length of keys1 + if let Some(last_joiner0) = joiner.last() { + if keys1.zvl_len() != last_joiner0 as usize { + return Err(de::Error::custom( + "ZeroMap2d deserializing joiner array malformed", + )); + } + } + let result = Self { + keys0, + joiner, + keys1, + values, + }; + // In debug mode, check the optional invariants, too + #[cfg(debug_assertions)] + result.check_invariants(); + Ok(result) + } + } +} + +/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate +impl<'de, 'a, K0, K1, V> Deserialize<'de> for ZeroMap2dBorrowed<'a, K0, K1, V> +where + K0: ZeroMapKV<'a> + Ord + ?Sized, + K1: ZeroMapKV<'a> + Ord + ?Sized, + V: ZeroMapKV<'a> + ?Sized, + K0::Container: Deserialize<'de>, + K1::Container: Deserialize<'de>, + V::Container: Deserialize<'de>, + K0::OwnedType: Deserialize<'de>, + K1::OwnedType: Deserialize<'de>, + V::OwnedType: Deserialize<'de>, + 'de: 'a, +{ + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: Deserializer<'de>, + { + if deserializer.is_human_readable() { + Err(de::Error::custom( + "ZeroMap2dBorrowed cannot be deserialized from human-readable formats", + )) + } else { + let deserialized: ZeroMap2d<'a, K0, K1, V> = ZeroMap2d::deserialize(deserializer)?; + let keys0 = if let Some(keys0) = deserialized.keys0.zvl_as_borrowed_inner() { + keys0 + } else { + return Err(de::Error::custom( + "ZeroMap2dBorrowed can only deserialize in zero-copy ways", + )); + }; + let joiner = if let Some(joiner) = deserialized.joiner.zvl_as_borrowed_inner() { + joiner + } else { + return Err(de::Error::custom( + "ZeroMap2dBorrowed can only deserialize in zero-copy ways", + )); + }; + let keys1 = if let Some(keys1) = deserialized.keys1.zvl_as_borrowed_inner() { + keys1 + } else { + return Err(de::Error::custom( + "ZeroMap2dBorrowed can only deserialize in zero-copy ways", + )); + }; + let values = if let Some(values) = deserialized.values.zvl_as_borrowed_inner() { + values + } else { + return Err(de::Error::custom( + "ZeroMap2dBorrowed can only deserialize in zero-copy ways", + )); + }; + Ok(Self { + keys0, + joiner, + keys1, + values, + }) + } + } +} + +#[cfg(test)] +#[allow(non_camel_case_types)] +mod test { + use crate::map2d::{ZeroMap2d, ZeroMap2dBorrowed}; + + #[derive(serde::Serialize, serde::Deserialize)] + struct DeriveTest_ZeroMap2d<'data> { + #[serde(borrow)] + _data: ZeroMap2d<'data, u16, str, [u8]>, + } + + #[derive(serde::Serialize, serde::Deserialize)] + struct DeriveTest_ZeroMap2dBorrowed<'data> { + #[serde(borrow)] + _data: ZeroMap2dBorrowed<'data, u16, str, [u8]>, + } + + const JSON_STR: &str = "{\"1\":{\"1\":\"uno\"},\"2\":{\"2\":\"dos\",\"3\":\"tres\"}}"; + const BINCODE_BYTES: &[u8] = &[ + 8, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 3, 0, + 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 0, 3, 0, 20, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, + 3, 0, 6, 0, 117, 110, 111, 100, 111, 115, 116, 114, 101, 115, + ]; + + fn make_map() -> ZeroMap2d<'static, u32, u16, str> { + let mut map = ZeroMap2d::new(); + map.insert(&1, &1, "uno"); + map.insert(&2, &2, "dos"); + map.insert(&2, &3, "tres"); + map + } + + #[test] + fn test_serde_json() { + let map = make_map(); + let json_str = serde_json::to_string(&map).expect("serialize"); + assert_eq!(JSON_STR, json_str); + let new_map: ZeroMap2d<u32, u16, str> = + serde_json::from_str(&json_str).expect("deserialize"); + assert_eq!(format!("{new_map:?}"), format!("{map:?}")); + } + + #[test] + fn test_bincode() { + let map = make_map(); + let bincode_bytes = bincode::serialize(&map).expect("serialize"); + assert_eq!(BINCODE_BYTES, bincode_bytes); + let new_map: ZeroMap2d<u32, u16, str> = + bincode::deserialize(&bincode_bytes).expect("deserialize"); + assert_eq!( + format!("{new_map:?}"), + format!("{map:?}").replace("Owned", "Borrowed"), + ); + + let new_map: ZeroMap2dBorrowed<u32, u16, str> = + bincode::deserialize(&bincode_bytes).expect("deserialize"); + assert_eq!( + format!("{new_map:?}"), + format!("{map:?}") + .replace("Owned", "Borrowed") + .replace("ZeroMap2d", "ZeroMap2dBorrowed") + ); + } + + #[test] + fn test_sample_bincode() { + // This is the map from the main docs page for ZeroMap2d + let mut map: ZeroMap2d<u16, u16, str> = ZeroMap2d::new(); + map.insert(&1, &2, "three"); + let bincode_bytes: Vec<u8> = bincode::serialize(&map).expect("serialize"); + assert_eq!( + bincode_bytes.as_slice(), + &[ + 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, 4, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, + 0, 0, 2, 0, 11, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 116, 104, 114, 101, 101 + ] + ); + } +} diff --git a/third_party/rust/zerovec/src/samples.rs b/third_party/rust/zerovec/src/samples.rs new file mode 100644 index 0000000000..723aacdedc --- /dev/null +++ b/third_party/rust/zerovec/src/samples.rs @@ -0,0 +1,74 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Example data useful for testing ZeroVec. + +// This module is included directly in tests and can trigger the dead_code +// warning since not all samples are used in each test +#![allow(dead_code)] + +#[repr(align(8))] +struct Aligned<T>(pub T); + +// This is aligned so that we can test unaligned behavior at odd offsets +const ALIGNED_TEST_BUFFER_LE: Aligned<[u8; 80]> = Aligned([ + 0x00, 0x01, 0x02, 0x00, 0x04, 0x05, 0x06, 0x00, 0x08, 0x09, 0x0a, 0x00, 0x0c, 0x0d, 0x0e, 0x00, + 0x10, 0x11, 0x12, 0x00, 0x14, 0x15, 0x16, 0x00, 0x18, 0x19, 0x1a, 0x00, 0x1c, 0x1d, 0x1e, 0x00, + 0x20, 0x21, 0x22, 0x00, 0x24, 0x25, 0x26, 0x00, 0x28, 0x29, 0x2a, 0x00, 0x2c, 0x2d, 0x2e, 0x00, + 0x30, 0x31, 0x32, 0x00, 0x34, 0x35, 0x36, 0x00, 0x38, 0x39, 0x3a, 0x00, 0x3c, 0x3d, 0x3e, 0x00, + 0x40, 0x41, 0x42, 0x00, 0x44, 0x45, 0x46, 0x00, 0x48, 0x49, 0x4a, 0x00, 0x4c, 0x4d, 0x4e, 0x00, +]); + +/// An example byte array intended to be used in `ZeroVec<u32>`. +pub const TEST_BUFFER_LE: &[u8] = &ALIGNED_TEST_BUFFER_LE.0; + +/// u32 numbers corresponding to the above byte array. +pub const TEST_SLICE: &[u32] = &[ + 0x020100, 0x060504, 0x0a0908, 0x0e0d0c, 0x121110, 0x161514, 0x1a1918, 0x1e1d1c, 0x222120, + 0x262524, 0x2a2928, 0x2e2d2c, 0x323130, 0x363534, 0x3a3938, 0x3e3d3c, 0x424140, 0x464544, + 0x4a4948, 0x4e4d4c, +]; + +/// The sum of the numbers in TEST_SLICE. +pub const TEST_SUM: u32 = 52629240; + +/// Representation of TEST_SLICE in JSON. +pub const JSON_STR: &str = "[131328,394500,657672,920844,1184016,1447188,1710360,1973532,2236704,2499876,2763048,3026220,3289392,3552564,3815736,4078908,4342080,4605252,4868424,5131596]"; + +/// Representation of TEST_SLICE in Bincode. +pub const BINCODE_BUF: &[u8] = &[ + 80, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 4, 5, 6, 0, 8, 9, 10, 0, 12, 13, 14, 0, 16, 17, 18, 0, 20, + 21, 22, 0, 24, 25, 26, 0, 28, 29, 30, 0, 32, 33, 34, 0, 36, 37, 38, 0, 40, 41, 42, 0, 44, 45, + 46, 0, 48, 49, 50, 0, 52, 53, 54, 0, 56, 57, 58, 0, 60, 61, 62, 0, 64, 65, 66, 0, 68, 69, 70, + 0, 72, 73, 74, 0, 76, 77, 78, 0, +]; + +/// Representation of a VarZeroVec<str> with contents ["w", "ω", "文", "𑄃"] +pub const TEST_VARZEROSLICE_BYTES: &[u8] = &[ + 4, 0, 0, 0, 0, 0, 1, 0, 3, 0, 6, 0, 119, 207, 137, 230, 150, 135, 240, 145, 132, 131, +]; + +#[test] +fn validate() { + use crate::{VarZeroVec, ZeroVec}; + + assert_eq!( + ZeroVec::<u32>::parse_byte_slice(TEST_BUFFER_LE).unwrap(), + ZeroVec::alloc_from_slice(TEST_SLICE) + ); + + assert_eq!(TEST_SLICE.iter().sum::<u32>(), TEST_SUM); + + assert_eq!( + serde_json::from_str::<ZeroVec::<u32>>(JSON_STR).unwrap(), + ZeroVec::alloc_from_slice(TEST_SLICE) + ); + + assert_eq!( + bincode::deserialize::<ZeroVec::<u32>>(BINCODE_BUF).unwrap(), + ZeroVec::alloc_from_slice(TEST_SLICE) + ); + + VarZeroVec::<str>::parse_byte_slice(TEST_VARZEROSLICE_BYTES).unwrap(); +} diff --git a/third_party/rust/zerovec/src/ule/chars.rs b/third_party/rust/zerovec/src/ule/chars.rs new file mode 100644 index 0000000000..e4c1efc4ec --- /dev/null +++ b/third_party/rust/zerovec/src/ule/chars.rs @@ -0,0 +1,190 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +#![allow(clippy::upper_case_acronyms)] +//! ULE implementation for the `char` type. + +use super::*; +use crate::impl_ule_from_array; +use core::cmp::Ordering; +use core::convert::TryFrom; + +/// A u8 array of little-endian data corresponding to a Unicode scalar value. +/// +/// The bytes of a `CharULE` are guaranteed to represent a little-endian-encoded u32 that is a +/// valid `char` and can be converted without validation. +/// +/// # Examples +/// +/// Convert a `char` to a `CharULE` and back again: +/// +/// ``` +/// use zerovec::ule::{AsULE, CharULE, ULE}; +/// +/// let c1 = '𑄃'; +/// let ule = c1.to_unaligned(); +/// assert_eq!(CharULE::as_byte_slice(&[ule]), &[0x03, 0x11, 0x01]); +/// let c2 = char::from_unaligned(ule); +/// assert_eq!(c1, c2); +/// ``` +/// +/// Attempt to parse invalid bytes to a `CharULE`: +/// +/// ``` +/// use zerovec::ule::{CharULE, ULE}; +/// +/// let bytes: &[u8] = &[0xFF, 0xFF, 0xFF, 0xFF]; +/// CharULE::parse_byte_slice(bytes).expect_err("Invalid bytes"); +/// ``` +#[repr(transparent)] +#[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)] +pub struct CharULE([u8; 3]); + +impl CharULE { + /// Converts a [`char`] to a [`CharULE`]. This is equivalent to calling + /// [`AsULE::to_unaligned()`] + /// + /// See the type-level documentation for [`CharULE`] for more information. + #[inline] + pub const fn from_aligned(c: char) -> Self { + let [u0, u1, u2, _u3] = (c as u32).to_le_bytes(); + Self([u0, u1, u2]) + } + + impl_ule_from_array!(char, CharULE, Self([0; 3])); +} + +// Safety (based on the safety checklist on the ULE trait): +// 1. CharULE does not include any uninitialized or padding bytes. +// (achieved by `#[repr(transparent)]` on a type that satisfies this invariant) +// 2. CharULE is aligned to 1 byte. +// (achieved by `#[repr(transparent)]` on a type that satisfies this invariant) +// 3. The impl of validate_byte_slice() returns an error if any byte is not valid. +// 4. The impl of validate_byte_slice() returns an error if there are extra bytes. +// 5. The other ULE methods use the default impl. +// 6. CharULE byte equality is semantic equality +unsafe impl ULE for CharULE { + #[inline] + fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> { + if bytes.len() % 3 != 0 { + return Err(ZeroVecError::length::<Self>(bytes.len())); + } + // Validate the bytes + for chunk in bytes.chunks_exact(3) { + // TODO: Use slice::as_chunks() when stabilized + #[allow(clippy::indexing_slicing)] + // Won't panic because the chunks are always 3 bytes long + let u = u32::from_le_bytes([chunk[0], chunk[1], chunk[2], 0]); + char::try_from(u).map_err(|_| ZeroVecError::parse::<Self>())?; + } + Ok(()) + } +} + +impl AsULE for char { + type ULE = CharULE; + + #[inline] + fn to_unaligned(self) -> Self::ULE { + CharULE::from_aligned(self) + } + + #[inline] + fn from_unaligned(unaligned: Self::ULE) -> Self { + // Safe because the bytes of CharULE are defined to represent a valid Unicode scalar value. + unsafe { + Self::from_u32_unchecked(u32::from_le_bytes([ + unaligned.0[0], + unaligned.0[1], + unaligned.0[2], + 0, + ])) + } + } +} + +impl PartialOrd for CharULE { + fn partial_cmp(&self, other: &Self) -> Option<Ordering> { + Some(self.cmp(other)) + } +} + +impl Ord for CharULE { + fn cmp(&self, other: &Self) -> Ordering { + char::from_unaligned(*self).cmp(&char::from_unaligned(*other)) + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_from_array() { + const CHARS: [char; 2] = ['a', '🙃']; + const CHARS_ULE: [CharULE; 2] = CharULE::from_array(CHARS); + assert_eq!( + CharULE::as_byte_slice(&CHARS_ULE), + &[0x61, 0x00, 0x00, 0x43, 0xF6, 0x01] + ); + } + + #[test] + fn test_from_array_zst() { + const CHARS: [char; 0] = []; + const CHARS_ULE: [CharULE; 0] = CharULE::from_array(CHARS); + let bytes = CharULE::as_byte_slice(&CHARS_ULE); + let empty: &[u8] = &[]; + assert_eq!(bytes, empty); + } + + #[test] + fn test_parse() { + // 1-byte, 2-byte, 3-byte, and two 4-byte character in UTF-8 (not as relevant in UTF-32) + let chars = ['w', 'ω', '文', '𑄃', '🙃']; + let char_ules: Vec<CharULE> = chars.iter().copied().map(char::to_unaligned).collect(); + let char_bytes: &[u8] = CharULE::as_byte_slice(&char_ules); + + // Check parsing + let parsed_ules: &[CharULE] = CharULE::parse_byte_slice(char_bytes).unwrap(); + assert_eq!(char_ules, parsed_ules); + let parsed_chars: Vec<char> = parsed_ules + .iter() + .copied() + .map(char::from_unaligned) + .collect(); + assert_eq!(&chars, parsed_chars.as_slice()); + + // Compare to golden expected data + assert_eq!( + &[119, 0, 0, 201, 3, 0, 135, 101, 0, 3, 17, 1, 67, 246, 1], + char_bytes + ); + } + + #[test] + fn test_failures() { + // 119 and 120 are valid, but not 0xD800 (high surrogate) + let u32s = [119, 0xD800, 120]; + let u32_ules: Vec<RawBytesULE<4>> = u32s + .iter() + .copied() + .map(<u32 as AsULE>::to_unaligned) + .collect(); + let u32_bytes: &[u8] = RawBytesULE::<4>::as_byte_slice(&u32_ules); + let parsed_ules_result = CharULE::parse_byte_slice(u32_bytes); + assert!(parsed_ules_result.is_err()); + + // 0x20FFFF is out of range for a char + let u32s = [0x20FFFF]; + let u32_ules: Vec<RawBytesULE<4>> = u32s + .iter() + .copied() + .map(<u32 as AsULE>::to_unaligned) + .collect(); + let u32_bytes: &[u8] = RawBytesULE::<4>::as_byte_slice(&u32_ules); + let parsed_ules_result = CharULE::parse_byte_slice(u32_bytes); + assert!(parsed_ules_result.is_err()); + } +} diff --git a/third_party/rust/zerovec/src/ule/custom.rs b/third_party/rust/zerovec/src/ule/custom.rs new file mode 100644 index 0000000000..8cc6e9de4e --- /dev/null +++ b/third_party/rust/zerovec/src/ule/custom.rs @@ -0,0 +1,145 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Documentation on implementing custom VarULE types. +//! +//! This module contains documentation for defining custom VarULE types, +//! especially those using complex custom dynamically sized types. +//! +//! In *most cases* you should be able to create custom VarULE types using +//! [`#[make_varule]`](crate::make_ule). +//! +//! # Example +//! +//! For example, if your regular stack type is: +//! +//! ```rust +//! use zerofrom::ZeroFrom; +//! use zerovec::ule::*; +//! use zerovec::ZeroVec; +//! +//! #[derive(serde::Serialize, serde::Deserialize)] +//! struct Foo<'a> { +//! field1: char, +//! field2: u32, +//! #[serde(borrow)] +//! field3: ZeroVec<'a, u32>, +//! } +//! ``` +//! +//! then the ULE type will be implemented as follows. Ideally, you should have +//! `EncodeAsVarULE` and `ZeroFrom` implementations on `Foo` pertaining to `FooULE`, +//! as well as a `Serialize` impl on `FooULE` and a `Deserialize` impl on `Box<FooULE>` +//! to enable human-readable serialization and deserialization. +//! +//! ```rust +//! use zerovec::{ZeroVec, VarZeroVec, ZeroSlice}; +//! use zerovec::ule::*; +//! use zerofrom::ZeroFrom; +//! use core::mem; +//! +//! # #[derive(serde::Serialize, serde::Deserialize)] +//! # struct Foo<'a> { +//! # field1: char, +//! # field2: u32, +//! # #[serde(borrow)] +//! # field3: ZeroVec<'a, u32> +//! # } +//! +//! // Must be repr(packed) for safety of VarULE! +//! // Must also only contain ULE types +//! #[repr(packed)] +//! struct FooULE { +//! field1: <char as AsULE>::ULE, +//! field2: <u32 as AsULE>::ULE, +//! field3: ZeroSlice<u32>, +//! } +//! +//! // Safety (based on the safety checklist on the VarULE trait): +//! // 1. FooULE does not include any uninitialized or padding bytes. (achieved by `#[repr(packed)]` on +//! // a struct with only ULE fields) +//! // 2. FooULE is aligned to 1 byte. (achieved by `#[repr(packed)]` on +//! // a struct with only ULE fields) +//! // 3. The impl of `validate_byte_slice()` returns an error if any byte is not valid. +//! // 4. The impl of `validate_byte_slice()` returns an error if the slice cannot be used in its entirety +//! // 5. The impl of `from_byte_slice_unchecked()` returns a reference to the same data. +//! // 6. The other VarULE methods use the default impl. +//! // 7. FooULE byte equality is semantic equality +//! unsafe impl VarULE for FooULE { +//! fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> { +//! // validate each field +//! <char as AsULE>::ULE::validate_byte_slice(&bytes[0..3]).map_err(|_| ZeroVecError::parse::<Self>())?; +//! <u32 as AsULE>::ULE::validate_byte_slice(&bytes[3..7]).map_err(|_| ZeroVecError::parse::<Self>())?; +//! let _ = ZeroVec::<u32>::parse_byte_slice(&bytes[7..]).map_err(|_| ZeroVecError::parse::<Self>())?; +//! Ok(()) +//! } +//! unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self { +//! let ptr = bytes.as_ptr(); +//! let len = bytes.len(); +//! // subtract the length of the char and u32 to get the length of the array +//! let len_new = (len - 7) / 4; +//! // it's hard constructing custom DSTs, we fake a pointer/length construction +//! // eventually we can use the Pointer::Metadata APIs when they stabilize +//! let fake_slice = core::ptr::slice_from_raw_parts(ptr as *const <u32 as AsULE>::ULE, len_new); +//! &*(fake_slice as *const Self) +//! } +//! } +//! +//! unsafe impl EncodeAsVarULE<FooULE> for Foo<'_> { +//! fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R { +//! // take each field, convert to ULE byte slices, and pass them through +//! cb(&[<char as AsULE>::ULE::as_byte_slice(&[self.field1.to_unaligned()]), +//! <u32 as AsULE>::ULE::as_byte_slice(&[self.field2.to_unaligned()]), +//! // the ZeroVec is already in the correct slice format +//! self.field3.as_bytes()]) +//! } +//! } +//! +//! impl<'a> ZeroFrom<'a, FooULE> for Foo<'a> { +//! fn zero_from(other: &'a FooULE) -> Self { +//! Self { +//! field1: AsULE::from_unaligned(other.field1), +//! field2: AsULE::from_unaligned(other.field2), +//! field3: ZeroFrom::zero_from(&other.field3), +//! } +//! } +//! } +//! +//! +//! impl serde::Serialize for FooULE +//! { +//! fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> +//! where +//! S: serde::Serializer, +//! { +//! Foo::zero_from(self).serialize(serializer) +//! } +//! } +//! +//! impl<'de> serde::Deserialize<'de> for Box<FooULE> +//! { +//! fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> +//! where +//! D: serde::Deserializer<'de>, +//! { +//! let mut foo = Foo::deserialize(deserializer)?; +//! Ok(encode_varule_to_box(&foo)) +//! } +//! } +//! +//! fn main() { +//! let mut foos = [Foo {field1: 'u', field2: 983, field3: ZeroVec::alloc_from_slice(&[1212,2309,500,7000])}, +//! Foo {field1: 'l', field2: 1010, field3: ZeroVec::alloc_from_slice(&[1932, 0, 8888, 91237])}]; +//! +//! let vzv = VarZeroVec::<_>::from(&foos); +//! +//! assert_eq!(char::from_unaligned(vzv.get(0).unwrap().field1), 'u'); +//! assert_eq!(u32::from_unaligned(vzv.get(0).unwrap().field2), 983); +//! assert_eq!(&vzv.get(0).unwrap().field3, &[1212,2309,500,7000][..]); +//! +//! assert_eq!(char::from_unaligned(vzv.get(1).unwrap().field1), 'l'); +//! assert_eq!(u32::from_unaligned(vzv.get(1).unwrap().field2), 1010); +//! assert_eq!(&vzv.get(1).unwrap().field3, &[1932, 0, 8888, 91237][..]); +//! } +//! ``` diff --git a/third_party/rust/zerovec/src/ule/encode.rs b/third_party/rust/zerovec/src/ule/encode.rs new file mode 100644 index 0000000000..adea123aa2 --- /dev/null +++ b/third_party/rust/zerovec/src/ule/encode.rs @@ -0,0 +1,400 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::ule::*; +use crate::varzerovec::VarZeroVecFormat; +use crate::{VarZeroSlice, VarZeroVec, ZeroSlice, ZeroVec}; +use alloc::borrow::{Cow, ToOwned}; +use alloc::boxed::Box; +use alloc::string::String; +use alloc::{vec, vec::Vec}; +use core::mem; + +/// Allows types to be encoded as VarULEs. This is highly useful for implementing VarULE on +/// custom DSTs where the type cannot be obtained as a reference to some other type. +/// +/// [`Self::encode_var_ule_as_slices()`] should be implemented by providing an encoded slice for each field +/// of the VarULE type to the callback, in order. For an implementation to be safe, the slices +/// to the callback must, when concatenated, be a valid instance of the VarULE type. +/// +/// See the [custom VarULEdocumentation](crate::ule::custom) for examples. +/// +/// [`Self::encode_var_ule_as_slices()`] is only used to provide default implementations for [`Self::encode_var_ule_write()`] +/// and [`Self::encode_var_ule_len()`]. If you override the default implementations it is totally valid to +/// replace [`Self::encode_var_ule_as_slices()`]'s body with `unreachable!()`. This can be done for cases where +/// it is not possible to implement [`Self::encode_var_ule_as_slices()`] but the other methods still work. +/// +/// A typical implementation will take each field in the order found in the [`VarULE`] type, +/// convert it to ULE, call [`ULE::as_byte_slice()`] on them, and pass the slices to `cb` in order. +/// A trailing [`ZeroVec`](crate::ZeroVec) or [`VarZeroVec`](crate::VarZeroVec) can have their underlying +/// byte representation passed through. +/// +/// In case the compiler is not optimizing [`Self::encode_var_ule_len()`], it can be overridden. A typical +/// implementation will add up the sizes of each field on the [`VarULE`] type and then add in the byte length of the +/// dynamically-sized part. +/// +/// # Safety +/// +/// The safety invariants of [`Self::encode_var_ule_as_slices()`] are: +/// - It must call `cb` (only once) +/// - The slices passed to `cb`, if concatenated, should be a valid instance of the `T` [`VarULE`] type +/// (i.e. if fed to [`VarULE::validate_byte_slice()`] they must produce a successful result) +/// - It must return the return value of `cb` to the caller +/// +/// One or more of [`Self::encode_var_ule_len()`] and [`Self::encode_var_ule_write()`] may be provided. +/// If both are, then `zerovec` code is guaranteed to not call [`Self::encode_var_ule_as_slices()`], and it may be replaced +/// with `unreachable!()`. +/// +/// The safety invariants of [`Self::encode_var_ule_len()`] are: +/// - It must return the length of the corresponding VarULE type +/// +/// The safety invariants of [`Self::encode_var_ule_write()`] are: +/// - The slice written to `dst` must be a valid instance of the `T` [`VarULE`] type +pub unsafe trait EncodeAsVarULE<T: VarULE + ?Sized> { + /// Calls `cb` with a piecewise list of byte slices that when concatenated + /// produce the memory pattern of the corresponding instance of `T`. + /// + /// Do not call this function directly; instead use the other two. Some implementors + /// may define this function to panic. + fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R; + + /// Return the length, in bytes, of the corresponding [`VarULE`] type + fn encode_var_ule_len(&self) -> usize { + self.encode_var_ule_as_slices(|slices| slices.iter().map(|s| s.len()).sum()) + } + + /// Write the corresponding [`VarULE`] type to the `dst` buffer. `dst` should + /// be the size of [`Self::encode_var_ule_len()`] + fn encode_var_ule_write(&self, mut dst: &mut [u8]) { + debug_assert_eq!(self.encode_var_ule_len(), dst.len()); + self.encode_var_ule_as_slices(move |slices| { + #[allow(clippy::indexing_slicing)] // by debug_assert + for slice in slices { + dst[..slice.len()].copy_from_slice(slice); + dst = &mut dst[slice.len()..]; + } + }); + } +} + +/// Given an [`EncodeAsVarULE`] type `S`, encode it into a `Box<T>` +/// +/// This is primarily useful for generating `Deserialize` impls for VarULE types +pub fn encode_varule_to_box<S: EncodeAsVarULE<T>, T: VarULE + ?Sized>(x: &S) -> Box<T> { + // zero-fill the vector to avoid uninitialized data UB + let mut vec: Vec<u8> = vec![0; x.encode_var_ule_len()]; + x.encode_var_ule_write(&mut vec); + let boxed = mem::ManuallyDrop::new(vec.into_boxed_slice()); + unsafe { + // Safety: `ptr` is a box, and `T` is a VarULE which guarantees it has the same memory layout as `[u8]` + // and can be recouped via from_byte_slice_unchecked() + let ptr: *mut T = T::from_byte_slice_unchecked(&boxed) as *const T as *mut T; + + // Safety: we can construct an owned version since we have mem::forgotten the older owner + Box::from_raw(ptr) + } +} + +unsafe impl<T: VarULE + ?Sized> EncodeAsVarULE<T> for T { + fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R { + cb(&[T::as_byte_slice(self)]) + } +} + +unsafe impl<T: VarULE + ?Sized> EncodeAsVarULE<T> for &'_ T { + fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R { + cb(&[T::as_byte_slice(self)]) + } +} + +unsafe impl<T: VarULE + ?Sized> EncodeAsVarULE<T> for Cow<'_, T> +where + T: ToOwned, +{ + fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R { + cb(&[T::as_byte_slice(self.as_ref())]) + } +} + +unsafe impl<T: VarULE + ?Sized> EncodeAsVarULE<T> for Box<T> { + fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R { + cb(&[T::as_byte_slice(self)]) + } +} + +unsafe impl EncodeAsVarULE<str> for String { + fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R { + cb(&[self.as_bytes()]) + } +} + +// Note: This impl could technically use `T: AsULE`, but we want users to prefer `ZeroSlice<T>` +// for cases where T is not a ULE. Therefore, we can use the more efficient `memcpy` impl here. +unsafe impl<T> EncodeAsVarULE<[T]> for Vec<T> +where + T: ULE, +{ + fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R { + cb(&[<[T] as VarULE>::as_byte_slice(self)]) + } +} + +unsafe impl<T> EncodeAsVarULE<ZeroSlice<T>> for &'_ [T] +where + T: AsULE + 'static, +{ + fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R { + // unnecessary if the other two are implemented + unreachable!() + } + + #[inline] + fn encode_var_ule_len(&self) -> usize { + self.len() * core::mem::size_of::<T::ULE>() + } + + fn encode_var_ule_write(&self, dst: &mut [u8]) { + #[allow(non_snake_case)] + let S = core::mem::size_of::<T::ULE>(); + debug_assert_eq!(self.len() * S, dst.len()); + for (item, ref mut chunk) in self.iter().zip(dst.chunks_mut(S)) { + let ule = item.to_unaligned(); + chunk.copy_from_slice(ULE::as_byte_slice(core::slice::from_ref(&ule))); + } + } +} + +unsafe impl<T> EncodeAsVarULE<ZeroSlice<T>> for Vec<T> +where + T: AsULE + 'static, +{ + fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R { + // unnecessary if the other two are implemented + unreachable!() + } + + #[inline] + fn encode_var_ule_len(&self) -> usize { + self.as_slice().encode_var_ule_len() + } + + #[inline] + fn encode_var_ule_write(&self, dst: &mut [u8]) { + self.as_slice().encode_var_ule_write(dst) + } +} + +unsafe impl<T> EncodeAsVarULE<ZeroSlice<T>> for ZeroVec<'_, T> +where + T: AsULE + 'static, +{ + fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R { + // unnecessary if the other two are implemented + unreachable!() + } + + #[inline] + fn encode_var_ule_len(&self) -> usize { + self.as_bytes().len() + } + + fn encode_var_ule_write(&self, dst: &mut [u8]) { + debug_assert_eq!(self.as_bytes().len(), dst.len()); + dst.copy_from_slice(self.as_bytes()); + } +} + +unsafe impl<T, E, F> EncodeAsVarULE<VarZeroSlice<T, F>> for &'_ [E] +where + T: VarULE + ?Sized, + E: EncodeAsVarULE<T>, + F: VarZeroVecFormat, +{ + fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R { + // unnecessary if the other two are implemented + unimplemented!() + } + + #[allow(clippy::unwrap_used)] // TODO(#1410): Rethink length errors in VZV. + fn encode_var_ule_len(&self) -> usize { + crate::varzerovec::components::compute_serializable_len::<T, E, F>(self).unwrap() as usize + } + + fn encode_var_ule_write(&self, dst: &mut [u8]) { + crate::varzerovec::components::write_serializable_bytes::<T, E, F>(self, dst) + } +} + +unsafe impl<T, E, F> EncodeAsVarULE<VarZeroSlice<T, F>> for Vec<E> +where + T: VarULE + ?Sized, + E: EncodeAsVarULE<T>, + F: VarZeroVecFormat, +{ + fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R { + // unnecessary if the other two are implemented + unreachable!() + } + + #[inline] + fn encode_var_ule_len(&self) -> usize { + <_ as EncodeAsVarULE<VarZeroSlice<T, F>>>::encode_var_ule_len(&self.as_slice()) + } + + #[inline] + fn encode_var_ule_write(&self, dst: &mut [u8]) { + <_ as EncodeAsVarULE<VarZeroSlice<T, F>>>::encode_var_ule_write(&self.as_slice(), dst) + } +} + +unsafe impl<T, F> EncodeAsVarULE<VarZeroSlice<T, F>> for VarZeroVec<'_, T, F> +where + T: VarULE + ?Sized, + F: VarZeroVecFormat, +{ + fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R { + // unnecessary if the other two are implemented + unreachable!() + } + + #[inline] + fn encode_var_ule_len(&self) -> usize { + self.as_bytes().len() + } + + #[inline] + fn encode_var_ule_write(&self, dst: &mut [u8]) { + debug_assert_eq!(self.as_bytes().len(), dst.len()); + dst.copy_from_slice(self.as_bytes()); + } +} + +#[cfg(test)] +mod test { + use super::*; + + const STRING_ARRAY: [&str; 2] = ["hello", "world"]; + + const STRING_SLICE: &[&str] = &STRING_ARRAY; + + const U8_ARRAY: [u8; 8] = [0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07]; + + const U8_2D_ARRAY: [&[u8]; 2] = [&U8_ARRAY, &U8_ARRAY]; + + const U8_2D_SLICE: &[&[u8]] = &[&U8_ARRAY, &U8_ARRAY]; + + const U8_3D_ARRAY: [&[&[u8]]; 2] = [U8_2D_SLICE, U8_2D_SLICE]; + + const U8_3D_SLICE: &[&[&[u8]]] = &[U8_2D_SLICE, U8_2D_SLICE]; + + const U32_ARRAY: [u32; 4] = [0x00010203, 0x04050607, 0x08090A0B, 0x0C0D0E0F]; + + const U32_2D_ARRAY: [&[u32]; 2] = [&U32_ARRAY, &U32_ARRAY]; + + const U32_2D_SLICE: &[&[u32]] = &[&U32_ARRAY, &U32_ARRAY]; + + const U32_3D_ARRAY: [&[&[u32]]; 2] = [U32_2D_SLICE, U32_2D_SLICE]; + + const U32_3D_SLICE: &[&[&[u32]]] = &[U32_2D_SLICE, U32_2D_SLICE]; + + #[test] + fn test_vzv_from() { + type VZV<'a, T> = VarZeroVec<'a, T>; + type ZS<T> = ZeroSlice<T>; + type VZS<T> = VarZeroSlice<T>; + + let u8_zerovec: ZeroVec<u8> = ZeroVec::from_slice_or_alloc(&U8_ARRAY); + let u8_2d_zerovec: [ZeroVec<u8>; 2] = [u8_zerovec.clone(), u8_zerovec.clone()]; + let u8_2d_vec: Vec<Vec<u8>> = vec![U8_ARRAY.into(), U8_ARRAY.into()]; + let u8_3d_vec: Vec<Vec<Vec<u8>>> = vec![u8_2d_vec.clone(), u8_2d_vec.clone()]; + + let u32_zerovec: ZeroVec<u32> = ZeroVec::from_slice_or_alloc(&U32_ARRAY); + let u32_2d_zerovec: [ZeroVec<u32>; 2] = [u32_zerovec.clone(), u32_zerovec.clone()]; + let u32_2d_vec: Vec<Vec<u32>> = vec![U32_ARRAY.into(), U32_ARRAY.into()]; + let u32_3d_vec: Vec<Vec<Vec<u32>>> = vec![u32_2d_vec.clone(), u32_2d_vec.clone()]; + + let a: VZV<str> = VarZeroVec::from(&STRING_ARRAY); + let b: VZV<str> = VarZeroVec::from(STRING_SLICE); + let c: VZV<str> = VarZeroVec::from(&Vec::from(STRING_SLICE)); + assert_eq!(a, STRING_SLICE); + assert_eq!(a, b); + assert_eq!(a, c); + + let a: VZV<[u8]> = VarZeroVec::from(&U8_2D_ARRAY); + let b: VZV<[u8]> = VarZeroVec::from(U8_2D_SLICE); + let c: VZV<[u8]> = VarZeroVec::from(&u8_2d_vec); + assert_eq!(a, U8_2D_SLICE); + assert_eq!(a, b); + assert_eq!(a, c); + let u8_3d_vzv_brackets = &[a.clone(), a.clone()]; + + let a: VZV<ZS<u8>> = VarZeroVec::from(&U8_2D_ARRAY); + let b: VZV<ZS<u8>> = VarZeroVec::from(U8_2D_SLICE); + let c: VZV<ZS<u8>> = VarZeroVec::from(&u8_2d_vec); + let d: VZV<ZS<u8>> = VarZeroVec::from(&u8_2d_zerovec); + assert_eq!(a, U8_2D_SLICE); + assert_eq!(a, b); + assert_eq!(a, c); + assert_eq!(a, d); + let u8_3d_vzv_zeroslice = &[a.clone(), a.clone()]; + + let a: VZV<VZS<[u8]>> = VarZeroVec::from(&U8_3D_ARRAY); + let b: VZV<VZS<[u8]>> = VarZeroVec::from(U8_3D_SLICE); + let c: VZV<VZS<[u8]>> = VarZeroVec::from(&u8_3d_vec); + let d: VZV<VZS<[u8]>> = VarZeroVec::from(u8_3d_vzv_brackets); + assert_eq!( + a.iter() + .map(|x| x.iter().map(|y| y.to_vec()).collect::<Vec<Vec<u8>>>()) + .collect::<Vec<Vec<Vec<u8>>>>(), + u8_3d_vec + ); + assert_eq!(a, b); + assert_eq!(a, c); + assert_eq!(a, d); + + let a: VZV<VZS<ZS<u8>>> = VarZeroVec::from(&U8_3D_ARRAY); + let b: VZV<VZS<ZS<u8>>> = VarZeroVec::from(U8_3D_SLICE); + let c: VZV<VZS<ZS<u8>>> = VarZeroVec::from(&u8_3d_vec); + let d: VZV<VZS<ZS<u8>>> = VarZeroVec::from(u8_3d_vzv_zeroslice); + assert_eq!( + a.iter() + .map(|x| x + .iter() + .map(|y| y.iter().collect::<Vec<u8>>()) + .collect::<Vec<Vec<u8>>>()) + .collect::<Vec<Vec<Vec<u8>>>>(), + u8_3d_vec + ); + assert_eq!(a, b); + assert_eq!(a, c); + assert_eq!(a, d); + + let a: VZV<ZS<u32>> = VarZeroVec::from(&U32_2D_ARRAY); + let b: VZV<ZS<u32>> = VarZeroVec::from(U32_2D_SLICE); + let c: VZV<ZS<u32>> = VarZeroVec::from(&u32_2d_vec); + let d: VZV<ZS<u32>> = VarZeroVec::from(&u32_2d_zerovec); + assert_eq!(a, u32_2d_zerovec); + assert_eq!(a, b); + assert_eq!(a, c); + assert_eq!(a, d); + let u32_3d_vzv = &[a.clone(), a.clone()]; + + let a: VZV<VZS<ZS<u32>>> = VarZeroVec::from(&U32_3D_ARRAY); + let b: VZV<VZS<ZS<u32>>> = VarZeroVec::from(U32_3D_SLICE); + let c: VZV<VZS<ZS<u32>>> = VarZeroVec::from(&u32_3d_vec); + let d: VZV<VZS<ZS<u32>>> = VarZeroVec::from(u32_3d_vzv); + assert_eq!( + a.iter() + .map(|x| x + .iter() + .map(|y| y.iter().collect::<Vec<u32>>()) + .collect::<Vec<Vec<u32>>>()) + .collect::<Vec<Vec<Vec<u32>>>>(), + u32_3d_vec + ); + assert_eq!(a, b); + assert_eq!(a, c); + assert_eq!(a, d); + } +} diff --git a/third_party/rust/zerovec/src/ule/macros.rs b/third_party/rust/zerovec/src/ule/macros.rs new file mode 100644 index 0000000000..955b1eb2e4 --- /dev/null +++ b/third_party/rust/zerovec/src/ule/macros.rs @@ -0,0 +1,29 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +/// Given `Self` (`$aligned`), `Self::ULE` (`$unaligned`), and a conversion function (`$single` or +/// `Self::from_aligned`), implement `from_array` for arrays of `$aligned` to `$unaligned`. +/// +/// The `$default` argument is due to current compiler limitations. +/// Pass any (cheap to construct) value. +#[macro_export] +macro_rules! impl_ule_from_array { + ($aligned:ty, $unaligned:ty, $default:expr, $single:path) => { + #[doc = concat!("Convert an array of `", stringify!($aligned), "` to an array of `", stringify!($unaligned), "`.")] + pub const fn from_array<const N: usize>(arr: [$aligned; N]) -> [Self; N] { + let mut result = [$default; N]; + let mut i = 0; + // Won't panic because i < N and arr has length N + #[allow(clippy::indexing_slicing)] + while i < N { + result[i] = $single(arr[i]); + i += 1; + } + result + } + }; + ($aligned:ty, $unaligned:ty, $default:expr) => { + impl_ule_from_array!($aligned, $unaligned, $default, Self::from_aligned); + }; +} diff --git a/third_party/rust/zerovec/src/ule/mod.rs b/third_party/rust/zerovec/src/ule/mod.rs new file mode 100644 index 0000000000..5a6d9cd471 --- /dev/null +++ b/third_party/rust/zerovec/src/ule/mod.rs @@ -0,0 +1,394 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +#![allow(clippy::upper_case_acronyms)] + +//! Traits over unaligned little-endian data (ULE, pronounced "yule"). +//! +//! The main traits for this module are [`ULE`], [`AsULE`] and, [`VarULE`]. +//! +//! See [the design doc](https://github.com/unicode-org/icu4x/blob/main/utils/zerovec/design_doc.md) for details on how these traits +//! works under the hood. +mod chars; +#[cfg(doc)] +pub mod custom; +mod encode; +mod macros; +mod multi; +mod niche; +mod option; +mod plain; +mod slices; +mod unvalidated; + +pub mod tuple; +pub use super::ZeroVecError; +pub use chars::CharULE; +pub use encode::{encode_varule_to_box, EncodeAsVarULE}; +pub use multi::MultiFieldsULE; +pub use niche::{NicheBytes, NichedOption, NichedOptionULE}; +pub use option::{OptionULE, OptionVarULE}; +pub use plain::RawBytesULE; +pub use unvalidated::{UnvalidatedChar, UnvalidatedStr}; + +use alloc::alloc::Layout; +use alloc::borrow::ToOwned; +use alloc::boxed::Box; +use core::{mem, slice}; + +/// Fixed-width, byte-aligned data that can be cast to and from a little-endian byte slice. +/// +/// If you need to implement this trait, consider using [`#[make_ule]`](crate::make_ule) or +/// [`#[derive(ULE)]`](macro@ULE) instead. +/// +/// Types that are not fixed-width can implement [`VarULE`] instead. +/// +/// "ULE" stands for "Unaligned little-endian" +/// +/// # Safety +/// +/// Safety checklist for `ULE`: +/// +/// 1. The type *must not* include any uninitialized or padding bytes. +/// 2. The type must have an alignment of 1 byte. +/// 3. The impl of [`ULE::validate_byte_slice()`] *must* return an error if the given byte slice +/// would not represent a valid slice of this type. +/// 4. The impl of [`ULE::validate_byte_slice()`] *must* return an error if the given byte slice +/// cannot be used in its entirety (if its length is not a multiple of `size_of::<Self>()`). +/// 5. All other methods *must* be left with their default impl, or else implemented according to +/// their respective safety guidelines. +/// 6. Acknowledge the following note about the equality invariant. +/// +/// If the ULE type is a struct only containing other ULE types (or other types which satisfy invariants 1 and 2, +/// like `[u8; N]`), invariants 1 and 2 can be achieved via `#[repr(packed)]` or `#[repr(transparent)]`. +/// +/// # Equality invariant +/// +/// A non-safety invariant is that if `Self` implements `PartialEq`, the it *must* be logically +/// equivalent to byte equality on [`Self::as_byte_slice()`]. +/// +/// It may be necessary to introduce a "canonical form" of the ULE if logical equality does not +/// equal byte equality. In such a case, [`Self::validate_byte_slice()`] should return an error +/// for any values that are not in canonical form. For example, the decimal strings "1.23e4" and +/// "12.3e3" are logically equal, but not byte-for-byte equal, so we could define a canonical form +/// where only a single digit is allowed before `.`. +/// +/// Failure to follow this invariant will cause surprising behavior in `PartialEq`, which may +/// result in unpredictable operations on `ZeroVec`, `VarZeroVec`, and `ZeroMap`. +pub unsafe trait ULE +where + Self: Sized, + Self: Copy + 'static, +{ + /// Validates a byte slice, `&[u8]`. + /// + /// If `Self` is not well-defined for all possible bit values, the bytes should be validated. + /// If the bytes can be transmuted, *in their entirety*, to a valid slice of `Self`, then `Ok` + /// should be returned; otherwise, `Self::Error` should be returned. + fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError>; + + /// Parses a byte slice, `&[u8]`, and return it as `&[Self]` with the same lifetime. + /// + /// If `Self` is not well-defined for all possible bit values, the bytes should be validated, + /// and an error should be returned in the same cases as [`Self::validate_byte_slice()`]. + /// + /// The default implementation executes [`Self::validate_byte_slice()`] followed by + /// [`Self::from_byte_slice_unchecked`]. + /// + /// Note: The following equality should hold: `bytes.len() % size_of::<Self>() == 0`. This + /// means that the returned slice can span the entire byte slice. + fn parse_byte_slice(bytes: &[u8]) -> Result<&[Self], ZeroVecError> { + Self::validate_byte_slice(bytes)?; + debug_assert_eq!(bytes.len() % mem::size_of::<Self>(), 0); + Ok(unsafe { Self::from_byte_slice_unchecked(bytes) }) + } + + /// Takes a byte slice, `&[u8]`, and return it as `&[Self]` with the same lifetime, assuming + /// that this byte slice has previously been run through [`Self::parse_byte_slice()`] with + /// success. + /// + /// The default implementation performs a pointer cast to the same region of memory. + /// + /// # Safety + /// + /// ## Callers + /// + /// Callers of this method must take care to ensure that `bytes` was previously passed through + /// [`Self::validate_byte_slice()`] with success (and was not changed since then). + /// + /// ## Implementors + /// + /// Implementations of this method may call unsafe functions to cast the pointer to the correct + /// type, assuming the "Callers" invariant above. + /// + /// Keep in mind that `&[Self]` and `&[u8]` may have different lengths. + /// + /// Safety checklist: + /// + /// 1. This method *must* return the same result as [`Self::parse_byte_slice()`]. + /// 2. This method *must* return a slice to the same region of memory as the argument. + #[inline] + unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &[Self] { + let data = bytes.as_ptr(); + let len = bytes.len() / mem::size_of::<Self>(); + debug_assert_eq!(bytes.len() % mem::size_of::<Self>(), 0); + core::slice::from_raw_parts(data as *const Self, len) + } + + /// Given `&[Self]`, returns a `&[u8]` with the same lifetime. + /// + /// The default implementation performs a pointer cast to the same region of memory. + /// + /// # Safety + /// + /// Implementations of this method should call potentially unsafe functions to cast the + /// pointer to the correct type. + /// + /// Keep in mind that `&[Self]` and `&[u8]` may have different lengths. + #[inline] + #[allow(clippy::wrong_self_convention)] // https://github.com/rust-lang/rust-clippy/issues/7219 + fn as_byte_slice(slice: &[Self]) -> &[u8] { + unsafe { + slice::from_raw_parts(slice as *const [Self] as *const u8, mem::size_of_val(slice)) + } + } +} + +/// A trait for any type that has a 1:1 mapping with an unaligned little-endian (ULE) type. +/// +/// If you need to implement this trait, consider using [`#[make_ule]`](crate::make_ule) instead. +pub trait AsULE: Copy { + /// The ULE type corresponding to `Self`. + /// + /// Types having infallible conversions from all bit values (Plain Old Data) can use + /// `RawBytesULE` with the desired width; for example, `u32` uses `RawBytesULE<4>`. + /// + /// Types that are not well-defined for all bit values should implement a custom ULE. + type ULE: ULE; + + /// Converts from `Self` to `Self::ULE`. + /// + /// This function may involve byte order swapping (native-endian to little-endian). + /// + /// For best performance, mark your implementation of this function `#[inline]`. + fn to_unaligned(self) -> Self::ULE; + + /// Converts from `Self::ULE` to `Self`. + /// + /// This function may involve byte order swapping (little-endian to native-endian). + /// + /// For best performance, mark your implementation of this function `#[inline]`. + /// + /// # Safety + /// + /// This function is infallible because bit validation should have occurred when `Self::ULE` + /// was first constructed. An implementation may therefore involve an `unsafe{}` block, like + /// `from_bytes_unchecked()`. + fn from_unaligned(unaligned: Self::ULE) -> Self; +} + +/// An [`EqULE`] type is one whose byte sequence equals the byte sequence of its ULE type on +/// little-endian platforms. This enables certain performance optimizations, such as +/// [`ZeroVec::try_from_slice`](crate::ZeroVec::try_from_slice). +/// +/// # Implementation safety +/// +/// This trait is safe to implement if the type's ULE (as defined by `impl `[`AsULE`]` for T`) +/// has an equal byte sequence as the type itself on little-endian platforms; i.e., one where +/// `*const T` can be cast to a valid `*const T::ULE`. +pub unsafe trait EqULE: AsULE {} + +/// A trait for a type where aligned slices can be cast to unaligned slices. +/// +/// Auto-implemented on all types implementing [`EqULE`]. +pub trait SliceAsULE +where + Self: AsULE + Sized, +{ + /// Converts from `&[Self]` to `&[Self::ULE]` if possible. + /// + /// In general, this function returns `Some` on little-endian and `None` on big-endian. + fn slice_to_unaligned(slice: &[Self]) -> Option<&[Self::ULE]>; +} + +#[cfg(target_endian = "little")] +impl<T> SliceAsULE for T +where + T: EqULE, +{ + #[inline] + fn slice_to_unaligned(slice: &[Self]) -> Option<&[Self::ULE]> { + // This is safe because on little-endian platforms, the byte sequence of &[T] + // is equivalent to the byte sequence of &[T::ULE] by the contract of EqULE, + // and &[T::ULE] has equal or looser alignment than &[T]. + let ule_slice = + unsafe { core::slice::from_raw_parts(slice.as_ptr() as *const Self::ULE, slice.len()) }; + Some(ule_slice) + } +} + +#[cfg(not(target_endian = "little"))] +impl<T> SliceAsULE for T +where + T: EqULE, +{ + #[inline] + fn slice_to_unaligned(_: &[Self]) -> Option<&[Self::ULE]> { + None + } +} + +/// Variable-width, byte-aligned data that can be cast to and from a little-endian byte slice. +/// +/// If you need to implement this trait, consider using [`#[make_varule]`](crate::make_varule) or +/// [`#[derive(VarULE)]`](macro@VarULE) instead. +/// +/// This trait is mostly for unsized types like `str` and `[T]`. It can be implemented on sized types; +/// however, it is much more preferable to use [`ULE`] for that purpose. The [`custom`] module contains +/// additional documentation on how this type can be implemented on custom types. +/// +/// If deserialization with `VarZeroVec` is desired is recommended to implement `Deserialize` for +/// `Box<T>` (serde does not do this automatically for unsized `T`). +/// +/// For convenience it is typically desired to implement [`EncodeAsVarULE`] and [`ZeroFrom`](zerofrom::ZeroFrom) +/// on some stack type to convert to and from the ULE type efficiently when necessary. +/// +/// # Safety +/// +/// Safety checklist for `VarULE`: +/// +/// 1. The type *must not* include any uninitialized or padding bytes. +/// 2. The type must have an alignment of 1 byte. +/// 3. The impl of [`VarULE::validate_byte_slice()`] *must* return an error if the given byte slice +/// would not represent a valid slice of this type. +/// 4. The impl of [`VarULE::validate_byte_slice()`] *must* return an error if the given byte slice +/// cannot be used in its entirety. +/// 5. The impl of [`VarULE::from_byte_slice_unchecked()`] must produce a reference to the same +/// underlying data assuming that the given bytes previously passed validation. +/// 6. All other methods *must* be left with their default impl, or else implemented according to +/// their respective safety guidelines. +/// 7. Acknowledge the following note about the equality invariant. +/// +/// If the ULE type is a struct only containing other ULE/VarULE types (or other types which satisfy invariants 1 and 2, +/// like `[u8; N]`), invariants 1 and 2 can be achieved via `#[repr(packed)]` or `#[repr(transparent)]`. +/// +/// # Equality invariant +/// +/// A non-safety invariant is that if `Self` implements `PartialEq`, the it *must* be logically +/// equivalent to byte equality on [`Self::as_byte_slice()`]. +/// +/// It may be necessary to introduce a "canonical form" of the ULE if logical equality does not +/// equal byte equality. In such a case, [`Self::validate_byte_slice()`] should return an error +/// for any values that are not in canonical form. For example, the decimal strings "1.23e4" and +/// "12.3e3" are logically equal, but not byte-for-byte equal, so we could define a canonical form +/// where only a single digit is allowed before `.`. +/// +/// There may also be cases where a `VarULE` has muiltiple canonical forms, such as a faster +/// version and a smaller version. The cleanest way to handle this case would be separate types. +/// However, if this is not feasible, then the application should ensure that the data it is +/// deserializing is in the expected form. For example, if the data is being loaded from an +/// external source, then requests could carry information about the expected form of the data. +/// +/// Failure to follow this invariant will cause surprising behavior in `PartialEq`, which may +/// result in unpredictable operations on `ZeroVec`, `VarZeroVec`, and `ZeroMap`. +pub unsafe trait VarULE: 'static { + /// Validates a byte slice, `&[u8]`. + /// + /// If `Self` is not well-defined for all possible bit values, the bytes should be validated. + /// If the bytes can be transmuted, *in their entirety*, to a valid `&Self`, then `Ok` should + /// be returned; otherwise, `Self::Error` should be returned. + fn validate_byte_slice(_bytes: &[u8]) -> Result<(), ZeroVecError>; + + /// Parses a byte slice, `&[u8]`, and return it as `&Self` with the same lifetime. + /// + /// If `Self` is not well-defined for all possible bit values, the bytes should be validated, + /// and an error should be returned in the same cases as [`Self::validate_byte_slice()`]. + /// + /// The default implementation executes [`Self::validate_byte_slice()`] followed by + /// [`Self::from_byte_slice_unchecked`]. + /// + /// Note: The following equality should hold: `size_of_val(result) == size_of_val(bytes)`, + /// where `result` is the successful return value of the method. This means that the return + /// value spans the entire byte slice. + fn parse_byte_slice(bytes: &[u8]) -> Result<&Self, ZeroVecError> { + Self::validate_byte_slice(bytes)?; + let result = unsafe { Self::from_byte_slice_unchecked(bytes) }; + debug_assert_eq!(mem::size_of_val(result), mem::size_of_val(bytes)); + Ok(result) + } + + /// Takes a byte slice, `&[u8]`, and return it as `&Self` with the same lifetime, assuming + /// that this byte slice has previously been run through [`Self::parse_byte_slice()`] with + /// success. + /// + /// # Safety + /// + /// ## Callers + /// + /// Callers of this method must take care to ensure that `bytes` was previously passed through + /// [`Self::validate_byte_slice()`] with success (and was not changed since then). + /// + /// ## Implementors + /// + /// Implementations of this method may call unsafe functions to cast the pointer to the correct + /// type, assuming the "Callers" invariant above. + /// + /// Safety checklist: + /// + /// 1. This method *must* return the same result as [`Self::parse_byte_slice()`]. + /// 2. This method *must* return a slice to the same region of memory as the argument. + unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self; + + /// Given `&Self`, returns a `&[u8]` with the same lifetime. + /// + /// The default implementation performs a pointer cast to the same region of memory. + /// + /// # Safety + /// + /// Implementations of this method should call potentially unsafe functions to cast the + /// pointer to the correct type. + #[inline] + fn as_byte_slice(&self) -> &[u8] { + unsafe { slice::from_raw_parts(self as *const Self as *const u8, mem::size_of_val(self)) } + } + + /// Allocate on the heap as a `Box<T>` + #[inline] + fn to_boxed(&self) -> Box<Self> { + let bytesvec = self.as_byte_slice().to_owned().into_boxed_slice(); + let bytesvec = mem::ManuallyDrop::new(bytesvec); + unsafe { + // Get the pointer representation + let ptr: *mut Self = + Self::from_byte_slice_unchecked(&bytesvec) as *const Self as *mut Self; + assert_eq!(Layout::for_value(&*ptr), Layout::for_value(&**bytesvec)); + // Transmute the pointer to an owned pointer + Box::from_raw(ptr) + } + } +} + +// Proc macro reexports +// +// These exist so that our docs can use intra-doc links. +// Due to quirks of how rustdoc does documentation on reexports, these must be in this module and not reexported from +// a submodule + +/// Custom derive for [`ULE`]. +/// +/// This can be attached to [`Copy`] structs containing only [`ULE`] types. +/// +/// Most of the time, it is recommended one use [`#[make_ule]`](crate::make_ule) instead of defining +/// a custom ULE type. +#[cfg(feature = "derive")] +pub use zerovec_derive::ULE; + +/// Custom derive for [`VarULE`] +/// +/// This can be attached to structs containing only [`ULE`] types with one [`VarULE`] type at the end. +/// +/// Most of the time, it is recommended one use [`#[make_varule]`](crate::make_varule) instead of defining +/// a custom [`VarULE`] type. +#[cfg(feature = "derive")] +pub use zerovec_derive::VarULE; diff --git a/third_party/rust/zerovec/src/ule/multi.rs b/third_party/rust/zerovec/src/ule/multi.rs new file mode 100644 index 0000000000..3281b20888 --- /dev/null +++ b/third_party/rust/zerovec/src/ule/multi.rs @@ -0,0 +1,154 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use super::*; +use crate::varzerovec::Index32; +use crate::VarZeroSlice; +use core::mem; + +/// This type is used by the custom derive to represent multiple [`VarULE`] +/// fields packed into a single end-of-struct field. It is not recommended +/// to use this type directly. +/// +/// Logically, consider it to be `(V1, V2, V3, ..)` +/// where `V1` etc are potentially different [`VarULE`] types. +/// +/// Internally, it is represented by a VarZeroSlice. +#[derive(PartialEq, Eq, Debug)] +#[repr(transparent)] +pub struct MultiFieldsULE(VarZeroSlice<[u8], Index32>); + +impl MultiFieldsULE { + /// Compute the amount of bytes needed to support elements with lengths `lengths` + #[inline] + pub fn compute_encoded_len_for(lengths: &[usize]) -> usize { + #[allow(clippy::expect_used)] // See #1410 + unsafe { + // safe since BlankSliceEncoder is transparent over usize + let lengths = &*(lengths as *const [usize] as *const [BlankSliceEncoder]); + crate::varzerovec::components::compute_serializable_len::<_, _, Index32>(lengths) + .expect("Too many bytes to encode") as usize + } + } + + /// Construct a partially initialized MultiFieldsULE backed by a mutable byte buffer + pub fn new_from_lengths_partially_initialized<'a>( + lengths: &[usize], + output: &'a mut [u8], + ) -> &'a mut Self { + unsafe { + // safe since BlankSliceEncoder is transparent over usize + let lengths = &*(lengths as *const [usize] as *const [BlankSliceEncoder]); + crate::varzerovec::components::write_serializable_bytes::<_, _, Index32>( + lengths, output, + ); + debug_assert!( + <VarZeroSlice<[u8], Index32>>::validate_byte_slice(output).is_ok(), + "Encoded slice must be valid VarZeroSlice" + ); + // Safe since write_serializable_bytes produces a valid VarZeroSlice buffer + let slice = <VarZeroSlice<[u8], Index32>>::from_byte_slice_unchecked_mut(output); + // safe since `Self` is transparent over VarZeroSlice + mem::transmute::<&mut VarZeroSlice<_, Index32>, &mut Self>(slice) + } + } + + /// Given a buffer of size obtained by [`Self::compute_encoded_len_for()`], write element A to index idx + /// + /// # Safety + /// - `idx` must be in range + /// - `T` must be the appropriate type expected by the custom derive in this usage of this type + #[inline] + pub unsafe fn set_field_at<T: VarULE + ?Sized, A: EncodeAsVarULE<T> + ?Sized>( + &mut self, + idx: usize, + value: &A, + ) { + value.encode_var_ule_write(self.0.get_bytes_at_mut(idx)) + } + + /// Validate field at `index` to see if it is a valid `T` VarULE type + /// + /// # Safety + /// + /// - `index` must be in range + #[inline] + pub unsafe fn validate_field<T: VarULE + ?Sized>( + &self, + index: usize, + ) -> Result<(), ZeroVecError> { + T::validate_byte_slice(self.0.get_unchecked(index)) + } + + /// Get field at `index` as a value of type T + /// + /// # Safety + /// + /// - `index` must be in range + /// - Element at `index` must have been created with the VarULE type T + #[inline] + pub unsafe fn get_field<T: VarULE + ?Sized>(&self, index: usize) -> &T { + T::from_byte_slice_unchecked(self.0.get_unchecked(index)) + } + + /// Construct from a byte slice + /// + /// # Safety + /// - byte slice must be a valid VarZeroSlice<[u8]> + #[inline] + pub unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self { + // &Self is transparent over &VZS<..> + mem::transmute(<VarZeroSlice<[u8]>>::from_byte_slice_unchecked(bytes)) + } +} + +/// This lets us conveniently use the EncodeAsVarULE functionality to create +/// `VarZeroVec<[u8]>`s that have the right amount of space for elements +/// without having to duplicate any unsafe code +#[repr(transparent)] +struct BlankSliceEncoder(usize); + +unsafe impl EncodeAsVarULE<[u8]> for BlankSliceEncoder { + fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R { + // unnecessary if the other two are implemented + unreachable!() + } + + #[inline] + fn encode_var_ule_len(&self) -> usize { + self.0 + } + + #[inline] + fn encode_var_ule_write(&self, _dst: &mut [u8]) { + // do nothing + } +} + +// Safety (based on the safety checklist on the VarULE trait): +// 1. MultiFieldsULE does not include any uninitialized or padding bytes (achieved by being transparent over a VarULE type) +// 2. MultiFieldsULE is aligned to 1 byte (achieved by being transparent over a VarULE type) +// 3. The impl of `validate_byte_slice()` returns an error if any byte is not valid. +// 4. The impl of `validate_byte_slice()` returns an error if the slice cannot be used in its entirety +// 5. The impl of `from_byte_slice_unchecked()` returns a reference to the same data. +// 6. All other methods are defaulted +// 7. `MultiFieldsULE` byte equality is semantic equality (achieved by being transparent over a VarULE type) +unsafe impl VarULE for MultiFieldsULE { + /// Note: MultiFieldsULE is usually used in cases where one should be calling .validate_field() directly for + /// each field, rather than using the regular VarULE impl. + /// + /// This impl exists so that EncodeAsVarULE can work. + #[inline] + fn validate_byte_slice(slice: &[u8]) -> Result<(), ZeroVecError> { + <VarZeroSlice<[u8], Index32>>::validate_byte_slice(slice) + } + + #[inline] + unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self { + // &Self is transparent over &VZS<..> + mem::transmute(<VarZeroSlice<[u8], Index32>>::from_byte_slice_unchecked( + bytes, + )) + } +} diff --git a/third_party/rust/zerovec/src/ule/niche.rs b/third_party/rust/zerovec/src/ule/niche.rs new file mode 100644 index 0000000000..ae61faca0b --- /dev/null +++ b/third_party/rust/zerovec/src/ule/niche.rs @@ -0,0 +1,180 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use core::{marker::Copy, mem::size_of}; + +use super::{AsULE, ULE}; + +/// The [`ULE`] types implementing this trait guarantee that [`NicheBytes::NICHE_BIT_PATTERN`] +/// can never occur as a valid byte representation of the type. +/// +/// Guarantees for a valid implementation. +/// 1. N must be equal to `core::mem::sizeo_of::<Self>()` or else it will +/// cause panics. +/// 2. The bit pattern [`NicheBytes::NICHE_BIT_PATTERN`] must not be incorrect as it would lead to +/// weird behaviour. +/// 3. The abstractions built on top of this trait must panic on an invalid N. +/// 4. The abstractions built on this trait that use type punning must ensure that type being +/// punned is [`ULE`]. +pub trait NicheBytes<const N: usize> { + const NICHE_BIT_PATTERN: [u8; N]; +} + +/// [`ULE`] type for [`NichedOption<U,N>`] where U implements [`NicheBytes`]. +/// The invalid bit pattern is used as the niche. +/// +/// This uses 1 byte less than [`crate::ule::OptionULE<U>`] to represent [`NichedOption<U,N>`]. +/// +/// # Example +/// +/// ``` +/// use core::num::NonZeroI8; +/// use zerovec::ule::NichedOption; +/// use zerovec::ZeroVec; +/// +/// let bytes = &[0x00, 0x01, 0x02, 0x00]; +/// let zv_no: ZeroVec<NichedOption<NonZeroI8, 1>> = +/// ZeroVec::parse_byte_slice(bytes) +/// .expect("Unable to parse as NichedOption."); +/// +/// assert_eq!(zv_no.get(0).map(|e| e.0), Some(None)); +/// assert_eq!(zv_no.get(1).map(|e| e.0), Some(NonZeroI8::new(1))); +/// assert_eq!(zv_no.get(2).map(|e| e.0), Some(NonZeroI8::new(2))); +/// assert_eq!(zv_no.get(3).map(|e| e.0), Some(None)); +/// ``` +// Invariants: +// The union stores [`NicheBytes::NICHE_BIT_PATTERN`] when None. +// Any other bit pattern is a valid. +#[repr(C)] +pub union NichedOptionULE<U: NicheBytes<N> + ULE, const N: usize> { + /// Invariant: The value is `niche` only if the bytes equal NICHE_BIT_PATTERN. + niche: [u8; N], + /// Invariant: The value is `valid` if the `niche` field does not match NICHE_BIT_PATTERN. + valid: U, +} + +impl<U: NicheBytes<N> + ULE + core::fmt::Debug, const N: usize> core::fmt::Debug + for NichedOptionULE<U, N> +{ + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + self.get().fmt(f) + } +} + +impl<U: NicheBytes<N> + ULE, const N: usize> NichedOptionULE<U, N> { + /// New `NichedOptionULE<U, N>` from `Option<U>` + pub fn new(opt: Option<U>) -> Self { + assert!(N == core::mem::size_of::<U>()); + match opt { + Some(u) => Self { valid: u }, + None => Self { + niche: <U as NicheBytes<N>>::NICHE_BIT_PATTERN, + }, + } + } + + /// Convert to an `Option<U>` + pub fn get(self) -> Option<U> { + // Safety: The union stores NICHE_BIT_PATTERN when None otherwise a valid U + unsafe { + if self.niche == <U as NicheBytes<N>>::NICHE_BIT_PATTERN { + None + } else { + Some(self.valid) + } + } + } +} + +impl<U: NicheBytes<N> + ULE, const N: usize> Copy for NichedOptionULE<U, N> {} + +impl<U: NicheBytes<N> + ULE, const N: usize> Clone for NichedOptionULE<U, N> { + fn clone(&self) -> Self { + *self + } +} + +impl<U: NicheBytes<N> + ULE + PartialEq, const N: usize> PartialEq for NichedOptionULE<U, N> { + fn eq(&self, other: &Self) -> bool { + self.get().eq(&other.get()) + } +} + +impl<U: NicheBytes<N> + ULE + Eq, const N: usize> Eq for NichedOptionULE<U, N> {} + +/// Safety for ULE trait +/// 1. NichedOptionULE does not have any padding bytes due to `#[repr(C)]` on a struct +/// containing only ULE fields. +/// NichedOptionULE either contains NICHE_BIT_PATTERN or valid U byte sequences. +/// In both cases the data is initialized. +/// 2. NichedOptionULE is aligned to 1 byte due to `#[repr(packed)]` on a struct containing only +/// ULE fields. +/// 3. validate_byte_slice impl returns an error if invalid bytes are encountered. +/// 4. validate_byte_slice impl returns an error there are extra bytes. +/// 5. The other ULE methods are left to their default impl. +/// 6. NichedOptionULE equality is based on ULE equality of the subfield, assuming that NicheBytes +/// has been implemented correctly (this is a correctness but not a safety guarantee). +unsafe impl<U: NicheBytes<N> + ULE, const N: usize> ULE for NichedOptionULE<U, N> { + fn validate_byte_slice(bytes: &[u8]) -> Result<(), crate::ZeroVecError> { + let size = size_of::<Self>(); + // The implemention is only correct if NICHE_BIT_PATTERN has same number of bytes as the + // type. + debug_assert!(N == core::mem::size_of::<U>()); + + // The bytes should fully transmute to a collection of Self + if bytes.len() % size != 0 { + return Err(crate::ZeroVecError::length::<Self>(bytes.len())); + } + bytes.chunks(size).try_for_each(|chunk| { + // Associated const cannot be referenced in a pattern + // https://doc.rust-lang.org/error-index.html#E0158 + if chunk == <U as NicheBytes<N>>::NICHE_BIT_PATTERN { + Ok(()) + } else { + U::validate_byte_slice(chunk) + } + }) + } +} + +/// Optional type which uses [`NichedOptionULE<U,N>`] as ULE type. +/// The implementors guarantee that `N == core::mem::sizeo_of::<Self>()` +/// [`repr(transparent)`] guarantees that the layout is same as [`Option<U>`] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +#[repr(transparent)] +#[non_exhaustive] +pub struct NichedOption<U, const N: usize>(pub Option<U>); + +impl<U, const N: usize> NichedOption<U, N> { + pub const fn new(o: Option<U>) -> Self { + Self(o) + } +} + +impl<U, const N: usize> Default for NichedOption<U, N> { + fn default() -> Self { + Self(None) + } +} + +impl<U, const N: usize> From<Option<U>> for NichedOption<U, N> { + fn from(o: Option<U>) -> Self { + Self(o) + } +} + +impl<U: AsULE, const N: usize> AsULE for NichedOption<U, N> +where + U::ULE: NicheBytes<N>, +{ + type ULE = NichedOptionULE<U::ULE, N>; + + fn to_unaligned(self) -> Self::ULE { + NichedOptionULE::new(self.0.map(U::to_unaligned)) + } + + fn from_unaligned(unaligned: Self::ULE) -> Self { + Self(unaligned.get().map(U::from_unaligned)) + } +} diff --git a/third_party/rust/zerovec/src/ule/option.rs b/third_party/rust/zerovec/src/ule/option.rs new file mode 100644 index 0000000000..9b0dc5b28a --- /dev/null +++ b/third_party/rust/zerovec/src/ule/option.rs @@ -0,0 +1,264 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use super::*; +use core::cmp::Ordering; +use core::marker::PhantomData; +use core::mem::{self, MaybeUninit}; + +/// This type is the [`ULE`] type for `Option<U>` where `U` is a [`ULE`] type +/// +/// # Example +/// +/// ```rust +/// use zerovec::ZeroVec; +/// +/// let z = ZeroVec::alloc_from_slice(&[ +/// Some('a'), +/// Some('á'), +/// Some('ø'), +/// None, +/// Some('ł'), +/// ]); +/// +/// assert_eq!(z.get(2), Some(Some('ø'))); +/// assert_eq!(z.get(3), Some(None)); +/// ``` +// Invariants: +// The MaybeUninit is zeroed when None (bool = false), +// and is valid when Some (bool = true) +#[repr(packed)] +pub struct OptionULE<U>(bool, MaybeUninit<U>); + +impl<U: Copy> OptionULE<U> { + /// Obtain this as an `Option<T>` + pub fn get(self) -> Option<U> { + if self.0 { + unsafe { + // safety: self.0 is true so the MaybeUninit is valid + Some(self.1.assume_init()) + } + } else { + None + } + } + + /// Construct an `OptionULE<U>` from an equivalent `Option<T>` + pub fn new(opt: Option<U>) -> Self { + if let Some(inner) = opt { + Self(true, MaybeUninit::new(inner)) + } else { + Self(false, MaybeUninit::zeroed()) + } + } +} + +impl<U: Copy + core::fmt::Debug> core::fmt::Debug for OptionULE<U> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + self.get().fmt(f) + } +} + +// Safety (based on the safety checklist on the ULE trait): +// 1. OptionULE does not include any uninitialized or padding bytes. +// (achieved by `#[repr(packed)]` on a struct containing only ULE fields, +// in the context of this impl. The MaybeUninit is valid for all byte sequences, and we only generate +/// zeroed or valid-T byte sequences to fill it) +// 2. OptionULE is aligned to 1 byte. +// (achieved by `#[repr(packed)]` on a struct containing only ULE fields, in the context of this impl) +// 3. The impl of validate_byte_slice() returns an error if any byte is not valid. +// 4. The impl of validate_byte_slice() returns an error if there are extra bytes. +// 5. The other ULE methods use the default impl. +// 6. OptionULE byte equality is semantic equality by relying on the ULE equality +// invariant on the subfields +unsafe impl<U: ULE> ULE for OptionULE<U> { + fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> { + let size = mem::size_of::<Self>(); + if bytes.len() % size != 0 { + return Err(ZeroVecError::length::<Self>(bytes.len())); + } + for chunk in bytes.chunks(size) { + #[allow(clippy::indexing_slicing)] // `chunk` will have enough bytes to fit Self + match chunk[0] { + // https://doc.rust-lang.org/reference/types/boolean.html + // Rust booleans are always size 1, align 1 values with valid bit patterns 0x0 or 0x1 + 0 => { + if !chunk[1..].iter().all(|x| *x == 0) { + return Err(ZeroVecError::parse::<Self>()); + } + } + 1 => U::validate_byte_slice(&chunk[1..])?, + _ => return Err(ZeroVecError::parse::<Self>()), + } + } + Ok(()) + } +} + +impl<T: AsULE> AsULE for Option<T> { + type ULE = OptionULE<T::ULE>; + fn to_unaligned(self) -> OptionULE<T::ULE> { + OptionULE::new(self.map(T::to_unaligned)) + } + + fn from_unaligned(other: OptionULE<T::ULE>) -> Self { + other.get().map(T::from_unaligned) + } +} + +impl<U: Copy> Copy for OptionULE<U> {} + +impl<U: Copy> Clone for OptionULE<U> { + fn clone(&self) -> Self { + *self + } +} + +impl<U: Copy + PartialEq> PartialEq for OptionULE<U> { + fn eq(&self, other: &Self) -> bool { + self.get().eq(&other.get()) + } +} + +impl<U: Copy + Eq> Eq for OptionULE<U> {} + +/// A type allowing one to represent `Option<U>` for [`VarULE`] `U` types. +/// +/// ```rust +/// use zerovec::ule::OptionVarULE; +/// use zerovec::VarZeroVec; +/// +/// let mut zv: VarZeroVec<OptionVarULE<str>> = VarZeroVec::new(); +/// +/// zv.make_mut().push(&None::<&str>); +/// zv.make_mut().push(&Some("hello")); +/// zv.make_mut().push(&Some("world")); +/// zv.make_mut().push(&None::<&str>); +/// +/// assert_eq!(zv.get(0).unwrap().as_ref(), None); +/// assert_eq!(zv.get(1).unwrap().as_ref(), Some("hello")); +/// ``` +// The slice field is empty when None (bool = false), +// and is a valid T when Some (bool = true) +#[repr(packed)] +pub struct OptionVarULE<U: VarULE + ?Sized>(PhantomData<U>, bool, [u8]); + +impl<U: VarULE + ?Sized> OptionVarULE<U> { + /// Obtain this as an `Option<&U>` + pub fn as_ref(&self) -> Option<&U> { + if self.1 { + unsafe { + // Safety: byte field is a valid T if boolean field is true + Some(U::from_byte_slice_unchecked(&self.2)) + } + } else { + None + } + } +} + +impl<U: VarULE + ?Sized + core::fmt::Debug> core::fmt::Debug for OptionVarULE<U> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + self.as_ref().fmt(f) + } +} + +// Safety (based on the safety checklist on the VarULE trait): +// 1. OptionVarULE<T> does not include any uninitialized or padding bytes +// (achieved by being repr(packed) on ULE types) +// 2. OptionVarULE<T> is aligned to 1 byte (achieved by being repr(packed) on ULE types) +// 3. The impl of `validate_byte_slice()` returns an error if any byte is not valid. +// 4. The impl of `validate_byte_slice()` returns an error if the slice cannot be used in its entirety +// 5. The impl of `from_byte_slice_unchecked()` returns a reference to the same data. +// 6. All other methods are defaulted +// 7. OptionVarULE<T> byte equality is semantic equality (achieved by being an aggregate) +unsafe impl<U: VarULE + ?Sized> VarULE for OptionVarULE<U> { + #[inline] + fn validate_byte_slice(slice: &[u8]) -> Result<(), ZeroVecError> { + if slice.is_empty() { + return Err(ZeroVecError::length::<Self>(slice.len())); + } + #[allow(clippy::indexing_slicing)] // slice already verified to be nonempty + match slice[0] { + // https://doc.rust-lang.org/reference/types/boolean.html + // Rust booleans are always size 1, align 1 values with valid bit patterns 0x0 or 0x1 + 0 => { + if slice.len() != 1 { + Err(ZeroVecError::length::<Self>(slice.len())) + } else { + Ok(()) + } + } + 1 => U::validate_byte_slice(&slice[1..]), + _ => Err(ZeroVecError::parse::<Self>()), + } + } + + #[inline] + unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self { + let entire_struct_as_slice: *const [u8] = + ::core::ptr::slice_from_raw_parts(bytes.as_ptr(), bytes.len() - 1); + &*(entire_struct_as_slice as *const Self) + } +} + +unsafe impl<T, U> EncodeAsVarULE<OptionVarULE<U>> for Option<T> +where + T: EncodeAsVarULE<U>, + U: VarULE + ?Sized, +{ + fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R { + // unnecessary if the other two are implemented + unreachable!() + } + + #[inline] + fn encode_var_ule_len(&self) -> usize { + if let Some(ref inner) = *self { + // slice + boolean + 1 + inner.encode_var_ule_len() + } else { + // boolean + empty slice + 1 + } + } + + #[allow(clippy::indexing_slicing)] // This method is allowed to panic when lengths are invalid + fn encode_var_ule_write(&self, dst: &mut [u8]) { + if let Some(ref inner) = *self { + debug_assert!( + !dst.is_empty(), + "OptionVarULE must have at least one byte when Some" + ); + dst[0] = 1; + inner.encode_var_ule_write(&mut dst[1..]); + } else { + debug_assert!( + dst.len() == 1, + "OptionVarULE must have exactly one byte when None" + ); + dst[0] = 0; + } + } +} + +impl<U: VarULE + ?Sized + PartialEq> PartialEq for OptionVarULE<U> { + fn eq(&self, other: &Self) -> bool { + self.as_ref().eq(&other.as_ref()) + } +} + +impl<U: VarULE + ?Sized + Eq> Eq for OptionVarULE<U> {} + +impl<U: VarULE + ?Sized + PartialOrd> PartialOrd for OptionVarULE<U> { + fn partial_cmp(&self, other: &Self) -> Option<Ordering> { + self.as_ref().partial_cmp(&other.as_ref()) + } +} + +impl<U: VarULE + ?Sized + Ord> Ord for OptionVarULE<U> { + fn cmp(&self, other: &Self) -> Ordering { + self.as_ref().cmp(&other.as_ref()) + } +} diff --git a/third_party/rust/zerovec/src/ule/plain.rs b/third_party/rust/zerovec/src/ule/plain.rs new file mode 100644 index 0000000000..f244f6b682 --- /dev/null +++ b/third_party/rust/zerovec/src/ule/plain.rs @@ -0,0 +1,366 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +#![allow(clippy::upper_case_acronyms)] +//! ULE implementation for Plain Old Data types, including all sized integers. + +use super::*; +use crate::impl_ule_from_array; +use crate::ZeroSlice; +use core::num::{NonZeroI8, NonZeroU8}; + +/// A u8 array of little-endian data with infallible conversions to and from &[u8]. +#[repr(transparent)] +#[derive(Debug, PartialEq, Eq, Clone, Copy, PartialOrd, Ord, Hash)] +#[allow(clippy::exhaustive_structs)] // newtype +pub struct RawBytesULE<const N: usize>(pub [u8; N]); + +impl<const N: usize> RawBytesULE<N> { + #[inline] + pub fn as_bytes(&self) -> &[u8] { + &self.0 + } + + #[inline] + pub fn from_byte_slice_unchecked_mut(bytes: &mut [u8]) -> &mut [Self] { + let data = bytes.as_mut_ptr(); + let len = bytes.len() / N; + // Safe because Self is transparent over [u8; N] + unsafe { core::slice::from_raw_parts_mut(data as *mut Self, len) } + } +} + +// Safety (based on the safety checklist on the ULE trait): +// 1. RawBytesULE does not include any uninitialized or padding bytes. +// (achieved by `#[repr(transparent)]` on a type that satisfies this invariant) +// 2. RawBytesULE is aligned to 1 byte. +// (achieved by `#[repr(transparent)]` on a type that satisfies this invariant) +// 3. The impl of validate_byte_slice() returns an error if any byte is not valid (never). +// 4. The impl of validate_byte_slice() returns an error if there are leftover bytes. +// 5. The other ULE methods use the default impl. +// 6. RawBytesULE byte equality is semantic equality +unsafe impl<const N: usize> ULE for RawBytesULE<N> { + #[inline] + fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> { + if bytes.len() % N == 0 { + // Safe because Self is transparent over [u8; N] + Ok(()) + } else { + Err(ZeroVecError::length::<Self>(bytes.len())) + } + } +} + +impl<const N: usize> From<[u8; N]> for RawBytesULE<N> { + #[inline] + fn from(le_bytes: [u8; N]) -> Self { + Self(le_bytes) + } +} + +macro_rules! impl_byte_slice_size { + ($unsigned:ty, $size:literal) => { + impl RawBytesULE<$size> { + #[doc = concat!("Gets this `RawBytesULE` as a `", stringify!($unsigned), "`. This is equivalent to calling [`AsULE::from_unaligned()`] on the appropriately sized type.")] + #[inline] + pub fn as_unsigned_int(&self) -> $unsigned { + <$unsigned as $crate::ule::AsULE>::from_unaligned(*self) + } + + #[doc = concat!("Converts a `", stringify!($unsigned), "` to a `RawBytesULE`. This is equivalent to calling [`AsULE::to_unaligned()`] on the appropriately sized type.")] + #[inline] + pub const fn from_aligned(value: $unsigned) -> Self { + Self(value.to_le_bytes()) + } + + impl_ule_from_array!( + $unsigned, + RawBytesULE<$size>, + RawBytesULE([0; $size]) + ); + } + }; +} + +macro_rules! impl_const_constructors { + ($base:ty, $size:literal) => { + impl ZeroSlice<$base> { + /// This function can be used for constructing ZeroVecs in a const context, avoiding + /// parsing checks. + /// + /// This cannot be generic over T because of current limitations in `const`, but if + /// this method is needed in a non-const context, check out [`ZeroSlice::parse_byte_slice()`] + /// instead. + /// + /// See [`ZeroSlice::cast()`] for an example. + pub const fn try_from_bytes(bytes: &[u8]) -> Result<&Self, ZeroVecError> { + let len = bytes.len(); + #[allow(clippy::modulo_one)] + if len % $size == 0 { + Ok(unsafe { Self::from_bytes_unchecked(bytes) }) + } else { + Err(ZeroVecError::InvalidLength { + ty: concat!("<const construct: ", $size, ">"), + len, + }) + } + } + } + }; +} + +macro_rules! impl_byte_slice_type { + ($single_fn:ident, $type:ty, $size:literal) => { + impl From<$type> for RawBytesULE<$size> { + #[inline] + fn from(value: $type) -> Self { + Self(value.to_le_bytes()) + } + } + impl AsULE for $type { + type ULE = RawBytesULE<$size>; + #[inline] + fn to_unaligned(self) -> Self::ULE { + RawBytesULE(self.to_le_bytes()) + } + #[inline] + fn from_unaligned(unaligned: Self::ULE) -> Self { + <$type>::from_le_bytes(unaligned.0) + } + } + // EqULE is true because $type and RawBytesULE<$size> + // have the same byte sequence on little-endian + unsafe impl EqULE for $type {} + + impl RawBytesULE<$size> { + pub const fn $single_fn(v: $type) -> Self { + RawBytesULE(v.to_le_bytes()) + } + } + }; +} + +macro_rules! impl_byte_slice_unsigned_type { + ($type:ty, $size:literal) => { + impl_byte_slice_type!(from_unsigned, $type, $size); + }; +} + +macro_rules! impl_byte_slice_signed_type { + ($type:ty, $size:literal) => { + impl_byte_slice_type!(from_signed, $type, $size); + }; +} + +impl_byte_slice_size!(u16, 2); +impl_byte_slice_size!(u32, 4); +impl_byte_slice_size!(u64, 8); +impl_byte_slice_size!(u128, 16); + +impl_byte_slice_unsigned_type!(u16, 2); +impl_byte_slice_unsigned_type!(u32, 4); +impl_byte_slice_unsigned_type!(u64, 8); +impl_byte_slice_unsigned_type!(u128, 16); + +impl_byte_slice_signed_type!(i16, 2); +impl_byte_slice_signed_type!(i32, 4); +impl_byte_slice_signed_type!(i64, 8); +impl_byte_slice_signed_type!(i128, 16); + +impl_const_constructors!(u8, 1); +impl_const_constructors!(u16, 2); +impl_const_constructors!(u32, 4); +impl_const_constructors!(u64, 8); +impl_const_constructors!(u128, 16); + +// Note: The f32 and f64 const constructors currently have limited use because +// `f32::to_le_bytes` is not yet const. + +impl_const_constructors!(bool, 1); + +// Safety (based on the safety checklist on the ULE trait): +// 1. u8 does not include any uninitialized or padding bytes. +// 2. u8 is aligned to 1 byte. +// 3. The impl of validate_byte_slice() returns an error if any byte is not valid (never). +// 4. The impl of validate_byte_slice() returns an error if there are leftover bytes (never). +// 5. The other ULE methods use the default impl. +// 6. u8 byte equality is semantic equality +unsafe impl ULE for u8 { + #[inline] + fn validate_byte_slice(_bytes: &[u8]) -> Result<(), ZeroVecError> { + Ok(()) + } +} + +impl AsULE for u8 { + type ULE = Self; + #[inline] + fn to_unaligned(self) -> Self::ULE { + self + } + #[inline] + fn from_unaligned(unaligned: Self::ULE) -> Self { + unaligned + } +} + +// EqULE is true because u8 is its own ULE. +unsafe impl EqULE for u8 {} + +// Safety (based on the safety checklist on the ULE trait): +// 1. NonZeroU8 does not include any uninitialized or padding bytes. +// 2. NonZeroU8 is aligned to 1 byte. +// 3. The impl of validate_byte_slice() returns an error if any byte is not valid (0x00). +// 4. The impl of validate_byte_slice() returns an error if there are leftover bytes (never). +// 5. The other ULE methods use the default impl. +// 6. NonZeroU8 byte equality is semantic equality +unsafe impl ULE for NonZeroU8 { + #[inline] + fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> { + bytes.iter().try_for_each(|b| { + if *b == 0x00 { + Err(ZeroVecError::parse::<Self>()) + } else { + Ok(()) + } + }) + } +} + +impl AsULE for NonZeroU8 { + type ULE = Self; + #[inline] + fn to_unaligned(self) -> Self::ULE { + self + } + #[inline] + fn from_unaligned(unaligned: Self::ULE) -> Self { + unaligned + } +} + +unsafe impl EqULE for NonZeroU8 {} + +impl NicheBytes<1> for NonZeroU8 { + const NICHE_BIT_PATTERN: [u8; 1] = [0x00]; +} + +// Safety (based on the safety checklist on the ULE trait): +// 1. i8 does not include any uninitialized or padding bytes. +// 2. i8 is aligned to 1 byte. +// 3. The impl of validate_byte_slice() returns an error if any byte is not valid (never). +// 4. The impl of validate_byte_slice() returns an error if there are leftover bytes (never). +// 5. The other ULE methods use the default impl. +// 6. i8 byte equality is semantic equality +unsafe impl ULE for i8 { + #[inline] + fn validate_byte_slice(_bytes: &[u8]) -> Result<(), ZeroVecError> { + Ok(()) + } +} + +impl AsULE for i8 { + type ULE = Self; + #[inline] + fn to_unaligned(self) -> Self::ULE { + self + } + #[inline] + fn from_unaligned(unaligned: Self::ULE) -> Self { + unaligned + } +} + +// EqULE is true because i8 is its own ULE. +unsafe impl EqULE for i8 {} + +impl AsULE for NonZeroI8 { + type ULE = NonZeroU8; + #[inline] + fn to_unaligned(self) -> Self::ULE { + // Safety: NonZeroU8 and NonZeroI8 have same size + unsafe { core::mem::transmute(self) } + } + + #[inline] + fn from_unaligned(unaligned: Self::ULE) -> Self { + // Safety: NonZeroU8 and NonZeroI8 have same size + unsafe { core::mem::transmute(unaligned) } + } +} + +// These impls are actually safe and portable due to Rust always using IEEE 754, see the documentation +// on f32::from_bits: https://doc.rust-lang.org/stable/std/primitive.f32.html#method.from_bits +// +// The only potential problem is that some older platforms treat signaling NaNs differently. This is +// still quite portable, signalingness is not typically super important. + +impl AsULE for f32 { + type ULE = RawBytesULE<4>; + #[inline] + fn to_unaligned(self) -> Self::ULE { + self.to_bits().to_unaligned() + } + #[inline] + fn from_unaligned(unaligned: Self::ULE) -> Self { + Self::from_bits(u32::from_unaligned(unaligned)) + } +} + +impl AsULE for f64 { + type ULE = RawBytesULE<8>; + #[inline] + fn to_unaligned(self) -> Self::ULE { + self.to_bits().to_unaligned() + } + #[inline] + fn from_unaligned(unaligned: Self::ULE) -> Self { + Self::from_bits(u64::from_unaligned(unaligned)) + } +} + +// The from_bits documentation mentions that they have identical byte representations to integers +// and EqULE only cares about LE systems +unsafe impl EqULE for f32 {} +unsafe impl EqULE for f64 {} + +// The bool impl is not as efficient as it could be +// We can, in the future, have https://github.com/unicode-org/icu4x/blob/main/utils/zerovec/design_doc.md#bitpacking +// for better bitpacking + +// Safety (based on the safety checklist on the ULE trait): +// 1. bool does not include any uninitialized or padding bytes (the remaining 7 bytes in bool are by definition zero) +// 2. bool is aligned to 1 byte. +// 3. The impl of validate_byte_slice() returns an error if any byte is not valid (bytes that are not 0 or 1). +// 4. The impl of validate_byte_slice() returns an error if there are leftover bytes (never). +// 5. The other ULE methods use the default impl. +// 6. bool byte equality is semantic equality +unsafe impl ULE for bool { + #[inline] + fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> { + for byte in bytes { + // https://doc.rust-lang.org/reference/types/boolean.html + // Rust booleans are always size 1, align 1 values with valid bit patterns 0x0 or 0x1 + if *byte > 1 { + return Err(ZeroVecError::parse::<Self>()); + } + } + Ok(()) + } +} + +impl AsULE for bool { + type ULE = Self; + #[inline] + fn to_unaligned(self) -> Self::ULE { + self + } + #[inline] + fn from_unaligned(unaligned: Self::ULE) -> Self { + unaligned + } +} + +// EqULE is true because bool is its own ULE. +unsafe impl EqULE for bool {} diff --git a/third_party/rust/zerovec/src/ule/slices.rs b/third_party/rust/zerovec/src/ule/slices.rs new file mode 100644 index 0000000000..75ea57e02e --- /dev/null +++ b/third_party/rust/zerovec/src/ule/slices.rs @@ -0,0 +1,103 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::ule::*; +use core::str; + +// Safety (based on the safety checklist on the ULE trait): +// 1. [T; N] does not include any uninitialized or padding bytes since T is ULE +// 2. [T; N] is aligned to 1 byte since T is ULE +// 3. The impl of validate_byte_slice() returns an error if any byte is not valid. +// 4. The impl of validate_byte_slice() returns an error if there are leftover bytes. +// 5. The other ULE methods use the default impl. +// 6. [T; N] byte equality is semantic equality since T is ULE +unsafe impl<T: ULE, const N: usize> ULE for [T; N] { + #[inline] + fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> { + // a slice of multiple Selfs is equivalent to just a larger slice of Ts + T::validate_byte_slice(bytes) + } +} + +impl<T: AsULE, const N: usize> AsULE for [T; N] { + type ULE = [T::ULE; N]; + #[inline] + fn to_unaligned(self) -> Self::ULE { + self.map(T::to_unaligned) + } + #[inline] + fn from_unaligned(unaligned: Self::ULE) -> Self { + unaligned.map(T::from_unaligned) + } +} + +unsafe impl<T: EqULE, const N: usize> EqULE for [T; N] {} + +// Safety (based on the safety checklist on the VarULE trait): +// 1. str does not include any uninitialized or padding bytes. +// 2. str is aligned to 1 byte. +// 3. The impl of `validate_byte_slice()` returns an error if any byte is not valid. +// 4. The impl of `validate_byte_slice()` returns an error if the slice cannot be used in its entirety +// 5. The impl of `from_byte_slice_unchecked()` returns a reference to the same data. +// 6. `parse_byte_slice()` is equivalent to `validate_byte_slice()` followed by `from_byte_slice_unchecked()` +// 7. str byte equality is semantic equality +unsafe impl VarULE for str { + #[inline] + fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> { + str::from_utf8(bytes).map_err(|_| ZeroVecError::parse::<Self>())?; + Ok(()) + } + + #[inline] + fn parse_byte_slice(bytes: &[u8]) -> Result<&Self, ZeroVecError> { + str::from_utf8(bytes).map_err(|_| ZeroVecError::parse::<Self>()) + } + /// Invariant: must be safe to call when called on a slice that previously + /// succeeded with `parse_byte_slice` + #[inline] + unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self { + str::from_utf8_unchecked(bytes) + } +} + +/// Note: VarULE is well-defined for all `[T]` where `T: ULE`, but [`ZeroSlice`] is more ergonomic +/// when `T` is a low-level ULE type. For example: +/// +/// ```no_run +/// # use zerovec::ZeroSlice; +/// # use zerovec::VarZeroVec; +/// # use zerovec::ule::AsULE; +/// // OK: [u8] is a useful type +/// let _: VarZeroVec<[u8]> = unimplemented!(); +/// +/// // Technically works, but [u32::ULE] is not very useful +/// let _: VarZeroVec<[<u32 as AsULE>::ULE]> = unimplemented!(); +/// +/// // Better: ZeroSlice<u32> +/// let _: VarZeroVec<ZeroSlice<u32>> = unimplemented!(); +/// ``` +/// +/// [`ZeroSlice`]: crate::ZeroSlice +// Safety (based on the safety checklist on the VarULE trait): +// 1. [T] does not include any uninitialized or padding bytes (achieved by being a slice of a ULE type) +// 2. [T] is aligned to 1 byte (achieved by being a slice of a ULE type) +// 3. The impl of `validate_byte_slice()` returns an error if any byte is not valid. +// 4. The impl of `validate_byte_slice()` returns an error if the slice cannot be used in its entirety +// 5. The impl of `from_byte_slice_unchecked()` returns a reference to the same data. +// 6. All other methods are defaulted +// 7. `[T]` byte equality is semantic equality (achieved by being a slice of a ULE type) +unsafe impl<T> VarULE for [T] +where + T: ULE, +{ + #[inline] + fn validate_byte_slice(slice: &[u8]) -> Result<(), ZeroVecError> { + T::validate_byte_slice(slice) + } + + #[inline] + unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self { + T::from_byte_slice_unchecked(bytes) + } +} diff --git a/third_party/rust/zerovec/src/ule/tuple.rs b/third_party/rust/zerovec/src/ule/tuple.rs new file mode 100644 index 0000000000..3e0f291b3f --- /dev/null +++ b/third_party/rust/zerovec/src/ule/tuple.rs @@ -0,0 +1,179 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! ULE impls for tuples. +//! +//! Rust does not guarantee the layout of tuples, so ZeroVec defines its own tuple ULE types. +//! +//! Impls are defined for tuples of up to 6 elements. For longer tuples, use a custom struct +//! with [`#[make_ule]`](crate::make_ule). +//! +//! # Examples +//! +//! ``` +//! use zerovec::ZeroVec; +//! +//! // ZeroVec of tuples! +//! let zerovec: ZeroVec<(u32, char)> = [(1, 'a'), (1234901, '啊'), (100, 'अ')] +//! .iter() +//! .copied() +//! .collect(); +//! +//! assert_eq!(zerovec.get(1), Some((1234901, '啊'))); +//! ``` + +use super::*; +use core::fmt; +use core::mem; + +macro_rules! tuple_ule { + ($name:ident, $len:literal, [ $($t:ident $i:tt),+ ]) => { + #[doc = concat!("ULE type for tuples with ", $len, " elements.")] + #[repr(packed)] + #[allow(clippy::exhaustive_structs)] // stable + pub struct $name<$($t),+>($(pub $t),+); + + // Safety (based on the safety checklist on the ULE trait): + // 1. TupleULE does not include any uninitialized or padding bytes. + // (achieved by `#[repr(packed)]` on a struct containing only ULE fields) + // 2. TupleULE is aligned to 1 byte. + // (achieved by `#[repr(packed)]` on a struct containing only ULE fields) + // 3. The impl of validate_byte_slice() returns an error if any byte is not valid. + // 4. The impl of validate_byte_slice() returns an error if there are extra bytes. + // 5. The other ULE methods use the default impl. + // 6. TupleULE byte equality is semantic equality by relying on the ULE equality + // invariant on the subfields + unsafe impl<$($t: ULE),+> ULE for $name<$($t),+> { + fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> { + // expands to: 0size + mem::size_of::<A>() + mem::size_of::<B>(); + let ule_bytes = 0usize $(+ mem::size_of::<$t>())+; + if bytes.len() % ule_bytes != 0 { + return Err(ZeroVecError::length::<Self>(bytes.len())); + } + for chunk in bytes.chunks(ule_bytes) { + let mut i = 0; + $( + let j = i; + i += mem::size_of::<$t>(); + #[allow(clippy::indexing_slicing)] // length checked + <$t>::validate_byte_slice(&chunk[j..i])?; + )+ + } + Ok(()) + } + } + + impl<$($t: AsULE),+> AsULE for ($($t),+) { + type ULE = $name<$(<$t>::ULE),+>; + + #[inline] + fn to_unaligned(self) -> Self::ULE { + $name($( + self.$i.to_unaligned() + ),+) + } + + #[inline] + fn from_unaligned(unaligned: Self::ULE) -> Self { + ($( + <$t>::from_unaligned(unaligned.$i) + ),+) + } + } + + impl<$($t: fmt::Debug + ULE),+> fmt::Debug for $name<$($t),+> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { + ($(self.$i),+).fmt(f) + } + } + + // We need manual impls since `#[derive()]` is disallowed on packed types + impl<$($t: PartialEq + ULE),+> PartialEq for $name<$($t),+> { + fn eq(&self, other: &Self) -> bool { + ($(self.$i),+).eq(&($(other.$i),+)) + } + } + + impl<$($t: Eq + ULE),+> Eq for $name<$($t),+> {} + + impl<$($t: PartialOrd + ULE),+> PartialOrd for $name<$($t),+> { + fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> { + ($(self.$i),+).partial_cmp(&($(other.$i),+)) + } + } + + impl<$($t: Ord + ULE),+> Ord for $name<$($t),+> { + fn cmp(&self, other: &Self) -> core::cmp::Ordering { + ($(self.$i),+).cmp(&($(other.$i),+)) + } + } + + impl<$($t: ULE),+> Clone for $name<$($t),+> { + fn clone(&self) -> Self { + *self + } + } + + impl<$($t: ULE),+> Copy for $name<$($t),+> {} + + impl<'a, $($t: Ord + AsULE + 'static),+> crate::map::ZeroMapKV<'a> for ($($t),+) { + type Container = crate::ZeroVec<'a, ($($t),+)>; + type Slice = crate::ZeroSlice<($($t),+)>; + type GetType = $name<$(<$t>::ULE),+>; + type OwnedType = ($($t),+); + } + }; +} + +tuple_ule!(Tuple2ULE, "2", [ A 0, B 1 ]); +tuple_ule!(Tuple3ULE, "3", [ A 0, B 1, C 2 ]); +tuple_ule!(Tuple4ULE, "4", [ A 0, B 1, C 2, D 3 ]); +tuple_ule!(Tuple5ULE, "5", [ A 0, B 1, C 2, D 3, E 4 ]); +tuple_ule!(Tuple6ULE, "6", [ A 0, B 1, C 2, D 3, E 4, F 5 ]); + +#[test] +fn test_pairule_validate() { + use crate::ZeroVec; + let vec: Vec<(u32, char)> = vec![(1, 'a'), (1234901, '啊'), (100, 'अ')]; + let zerovec: ZeroVec<(u32, char)> = vec.iter().copied().collect(); + let bytes = zerovec.as_bytes(); + let zerovec2 = ZeroVec::parse_byte_slice(bytes).unwrap(); + assert_eq!(zerovec, zerovec2); + + // Test failed validation with a correctly sized but differently constrained tuple + // Note: 1234901 is not a valid char + let zerovec3 = ZeroVec::<(char, u32)>::parse_byte_slice(bytes); + assert!(zerovec3.is_err()); +} + +#[test] +fn test_tripleule_validate() { + use crate::ZeroVec; + let vec: Vec<(u32, char, i8)> = vec![(1, 'a', -5), (1234901, '啊', 3), (100, 'अ', -127)]; + let zerovec: ZeroVec<(u32, char, i8)> = vec.iter().copied().collect(); + let bytes = zerovec.as_bytes(); + let zerovec2 = ZeroVec::parse_byte_slice(bytes).unwrap(); + assert_eq!(zerovec, zerovec2); + + // Test failed validation with a correctly sized but differently constrained tuple + // Note: 1234901 is not a valid char + let zerovec3 = ZeroVec::<(char, i8, u32)>::parse_byte_slice(bytes); + assert!(zerovec3.is_err()); +} + +#[test] +fn test_quadule_validate() { + use crate::ZeroVec; + let vec: Vec<(u32, char, i8, u16)> = + vec![(1, 'a', -5, 3), (1234901, '啊', 3, 11), (100, 'अ', -127, 0)]; + let zerovec: ZeroVec<(u32, char, i8, u16)> = vec.iter().copied().collect(); + let bytes = zerovec.as_bytes(); + let zerovec2 = ZeroVec::parse_byte_slice(bytes).unwrap(); + assert_eq!(zerovec, zerovec2); + + // Test failed validation with a correctly sized but differently constrained tuple + // Note: 1234901 is not a valid char + let zerovec3 = ZeroVec::<(char, i8, u16, u32)>::parse_byte_slice(bytes); + assert!(zerovec3.is_err()); +} diff --git a/third_party/rust/zerovec/src/ule/unvalidated.rs b/third_party/rust/zerovec/src/ule/unvalidated.rs new file mode 100644 index 0000000000..21cfb0c0d5 --- /dev/null +++ b/third_party/rust/zerovec/src/ule/unvalidated.rs @@ -0,0 +1,527 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use super::{AsULE, RawBytesULE, VarULE}; +use crate::ule::EqULE; +use crate::{map::ZeroMapKV, VarZeroSlice, VarZeroVec, ZeroVecError}; +use alloc::boxed::Box; +use core::cmp::Ordering; +use core::fmt; +use core::ops::Deref; + +/// A byte slice that is expected to be a UTF-8 string but does not enforce that invariant. +/// +/// Use this type instead of `str` if you don't need to enforce UTF-8 during deserialization. For +/// example, strings that are keys of a map don't need to ever be reified as `str`s. +/// +/// [`UnvalidatedStr`] derefs to `[u8]`. To obtain a `str`, use [`Self::try_as_str()`]. +/// +/// The main advantage of this type over `[u8]` is that it serializes as a string in +/// human-readable formats like JSON. +/// +/// # Examples +/// +/// Using an [`UnvalidatedStr`] as the key of a [`ZeroMap`]: +/// +/// ``` +/// use zerovec::ule::UnvalidatedStr; +/// use zerovec::ZeroMap; +/// +/// let map: ZeroMap<UnvalidatedStr, usize> = [ +/// (UnvalidatedStr::from_str("abc"), 11), +/// (UnvalidatedStr::from_str("def"), 22), +/// (UnvalidatedStr::from_str("ghi"), 33), +/// ] +/// .into_iter() +/// .collect(); +/// +/// let key = "abc"; +/// let value = map.get_copied_by(|uvstr| uvstr.as_bytes().cmp(key.as_bytes())); +/// assert_eq!(Some(11), value); +/// ``` +/// +/// [`ZeroMap`]: crate::ZeroMap +#[repr(transparent)] +#[derive(PartialEq, Eq, PartialOrd, Ord)] +#[allow(clippy::exhaustive_structs)] // transparent newtype +pub struct UnvalidatedStr([u8]); + +impl fmt::Debug for UnvalidatedStr { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // Debug as a string if possible + match self.try_as_str() { + Ok(s) => fmt::Debug::fmt(s, f), + Err(_) => fmt::Debug::fmt(&self.0, f), + } + } +} + +impl UnvalidatedStr { + /// Create a [`UnvalidatedStr`] from a byte slice. + #[inline] + pub const fn from_bytes(other: &[u8]) -> &Self { + // Safety: UnvalidatedStr is transparent over [u8] + unsafe { core::mem::transmute(other) } + } + + /// Create a [`UnvalidatedStr`] from a string slice. + #[inline] + pub const fn from_str(s: &str) -> &Self { + Self::from_bytes(s.as_bytes()) + } + + /// Create a [`UnvalidatedStr`] from boxed bytes. + #[inline] + pub fn from_boxed_bytes(other: Box<[u8]>) -> Box<Self> { + // Safety: UnvalidatedStr is transparent over [u8] + unsafe { core::mem::transmute(other) } + } + + /// Create a [`UnvalidatedStr`] from a boxed `str`. + #[inline] + pub fn from_boxed_str(other: Box<str>) -> Box<Self> { + Self::from_boxed_bytes(other.into_boxed_bytes()) + } + + /// Get the bytes from a [`UnvalidatedStr]. + #[inline] + pub const fn as_bytes(&self) -> &[u8] { + &self.0 + } + + /// Attempt to convert a [`UnvalidatedStr`] to a `str`. + /// + /// # Examples + /// + /// ``` + /// use zerovec::ule::UnvalidatedStr; + /// + /// static A: &UnvalidatedStr = UnvalidatedStr::from_bytes(b"abc"); + /// + /// let b = A.try_as_str().unwrap(); + /// assert_eq!(b, "abc"); + /// ``` + // Note: this is const starting in 1.63 + #[inline] + pub fn try_as_str(&self) -> Result<&str, core::str::Utf8Error> { + core::str::from_utf8(&self.0) + } +} + +impl<'a> From<&'a str> for &'a UnvalidatedStr { + #[inline] + fn from(other: &'a str) -> Self { + UnvalidatedStr::from_str(other) + } +} + +impl From<Box<str>> for Box<UnvalidatedStr> { + #[inline] + fn from(other: Box<str>) -> Self { + UnvalidatedStr::from_boxed_str(other) + } +} + +impl Deref for UnvalidatedStr { + type Target = [u8]; + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl<'a> ZeroMapKV<'a> for UnvalidatedStr { + type Container = VarZeroVec<'a, UnvalidatedStr>; + type Slice = VarZeroSlice<UnvalidatedStr>; + type GetType = UnvalidatedStr; + type OwnedType = Box<UnvalidatedStr>; +} + +// Safety (based on the safety checklist on the VarULE trait): +// 1. UnvalidatedStr does not include any uninitialized or padding bytes (transparent over a ULE) +// 2. UnvalidatedStr is aligned to 1 byte (transparent over a ULE) +// 3. The impl of `validate_byte_slice()` returns an error if any byte is not valid (impossible) +// 4. The impl of `validate_byte_slice()` returns an error if the slice cannot be used in its entirety (impossible) +// 5. The impl of `from_byte_slice_unchecked()` returns a reference to the same data (returns the argument directly) +// 6. All other methods are defaulted +// 7. `[T]` byte equality is semantic equality (transparent over a ULE) +unsafe impl VarULE for UnvalidatedStr { + #[inline] + fn validate_byte_slice(_: &[u8]) -> Result<(), ZeroVecError> { + Ok(()) + } + #[inline] + unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self { + UnvalidatedStr::from_bytes(bytes) + } +} + +/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate +#[cfg(feature = "serde")] +impl serde::Serialize for UnvalidatedStr { + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: serde::Serializer, + { + use serde::ser::Error; + let s = self + .try_as_str() + .map_err(|_| S::Error::custom("invalid UTF-8 in UnvalidatedStr"))?; + if serializer.is_human_readable() { + serializer.serialize_str(s) + } else { + serializer.serialize_bytes(s.as_bytes()) + } + } +} + +/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate +#[cfg(feature = "serde")] +impl<'de> serde::Deserialize<'de> for Box<UnvalidatedStr> { + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: serde::Deserializer<'de>, + { + if deserializer.is_human_readable() { + let boxed_str = Box::<str>::deserialize(deserializer)?; + Ok(UnvalidatedStr::from_boxed_str(boxed_str)) + } else { + let boxed_bytes = Box::<[u8]>::deserialize(deserializer)?; + Ok(UnvalidatedStr::from_boxed_bytes(boxed_bytes)) + } + } +} + +/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate +#[cfg(feature = "serde")] +impl<'de, 'a> serde::Deserialize<'de> for &'a UnvalidatedStr +where + 'de: 'a, +{ + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: serde::Deserializer<'de>, + { + if deserializer.is_human_readable() { + let s = <&str>::deserialize(deserializer)?; + Ok(UnvalidatedStr::from_str(s)) + } else { + let bytes = <&[u8]>::deserialize(deserializer)?; + Ok(UnvalidatedStr::from_bytes(bytes)) + } + } +} + +/// A u8 array of little-endian data that is expected to be a Unicode scalar value, but is not +/// validated as such. +/// +/// Use this type instead of `char` when you want to deal with data that is expected to be valid +/// Unicode scalar values, but you want control over when or if you validate that assumption. +/// +/// # Examples +/// +/// ``` +/// use zerovec::ule::{RawBytesULE, UnvalidatedChar, ULE}; +/// use zerovec::{ZeroSlice, ZeroVec}; +/// +/// // data known to be little-endian three-byte chunks of valid Unicode scalar values +/// let data = [0x68, 0x00, 0x00, 0x69, 0x00, 0x00, 0x4B, 0xF4, 0x01]; +/// // ground truth expectation +/// let real = ['h', 'i', '👋']; +/// +/// let chars: &ZeroSlice<UnvalidatedChar> = ZeroSlice::parse_byte_slice(&data).expect("invalid data length"); +/// let parsed: Vec<_> = chars.iter().map(|c| unsafe { c.to_char_unchecked() }).collect(); +/// assert_eq!(&parsed, &real); +/// +/// let real_chars: ZeroVec<_> = real.iter().copied().map(UnvalidatedChar::from_char).collect(); +/// let serialized_data = chars.as_bytes(); +/// assert_eq!(serialized_data, &data); +/// ``` +#[repr(transparent)] +#[derive(PartialEq, Eq, Clone, Copy, Hash)] +pub struct UnvalidatedChar([u8; 3]); + +impl UnvalidatedChar { + /// Create a [`UnvalidatedChar`] from a `char`. + /// + /// # Examples + /// + /// ``` + /// use zerovec::ule::UnvalidatedChar; + /// + /// let a = UnvalidatedChar::from_char('a'); + /// assert_eq!(a.try_to_char().unwrap(), 'a'); + /// ``` + #[inline] + pub const fn from_char(c: char) -> Self { + let [u0, u1, u2, _u3] = (c as u32).to_le_bytes(); + Self([u0, u1, u2]) + } + + #[inline] + #[doc(hidden)] + pub const fn from_u24(c: u32) -> Self { + let [u0, u1, u2, _u3] = c.to_le_bytes(); + Self([u0, u1, u2]) + } + + /// Attempt to convert a [`UnvalidatedChar`] to a `char`. + /// + /// # Examples + /// + /// ``` + /// use zerovec::ule::{AsULE, UnvalidatedChar}; + /// + /// let a = UnvalidatedChar::from_char('a'); + /// assert_eq!(a.try_to_char(), Ok('a')); + /// + /// let b = UnvalidatedChar::from_unaligned([0xFF, 0xFF, 0xFF].into()); + /// assert!(matches!(b.try_to_char(), Err(_))); + /// ``` + #[inline] + pub fn try_to_char(self) -> Result<char, core::char::CharTryFromError> { + let [u0, u1, u2] = self.0; + char::try_from(u32::from_le_bytes([u0, u1, u2, 0])) + } + + /// Convert a [`UnvalidatedChar`] to a `char', returning [`char::REPLACEMENT_CHARACTER`] + /// if the `UnvalidatedChar` does not represent a valid Unicode scalar value. + /// + /// # Examples + /// + /// ``` + /// use zerovec::ule::{AsULE, UnvalidatedChar}; + /// + /// let a = UnvalidatedChar::from_unaligned([0xFF, 0xFF, 0xFF].into()); + /// assert_eq!(a.to_char_lossy(), char::REPLACEMENT_CHARACTER); + /// ``` + #[inline] + pub fn to_char_lossy(self) -> char { + self.try_to_char().unwrap_or(char::REPLACEMENT_CHARACTER) + } + + /// Convert a [`UnvalidatedChar`] to a `char` without checking that it is + /// a valid Unicode scalar value. + /// + /// # Safety + /// + /// The `UnvalidatedChar` must be a valid Unicode scalar value in little-endian order. + /// + /// # Examples + /// + /// ``` + /// use zerovec::ule::UnvalidatedChar; + /// + /// let a = UnvalidatedChar::from_char('a'); + /// assert_eq!(unsafe { a.to_char_unchecked() }, 'a'); + /// ``` + #[inline] + pub unsafe fn to_char_unchecked(self) -> char { + let [u0, u1, u2] = self.0; + char::from_u32_unchecked(u32::from_le_bytes([u0, u1, u2, 0])) + } +} + +impl RawBytesULE<3> { + /// Converts a [`UnvalidatedChar`] to its ULE type. This is equivalent to calling + /// [`AsULE::to_unaligned`]. + #[inline] + pub const fn from_unvalidated_char(uc: UnvalidatedChar) -> Self { + RawBytesULE(uc.0) + } +} + +impl AsULE for UnvalidatedChar { + type ULE = RawBytesULE<3>; + + #[inline] + fn to_unaligned(self) -> Self::ULE { + RawBytesULE(self.0) + } + + #[inline] + fn from_unaligned(unaligned: Self::ULE) -> Self { + Self(unaligned.0) + } +} + +// Safety: UnvalidatedChar is always the little-endian representation of a char, +// which corresponds to its AsULE::ULE type +unsafe impl EqULE for UnvalidatedChar {} + +impl fmt::Debug for UnvalidatedChar { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // Debug as a char if possible + match self.try_to_char() { + Ok(c) => fmt::Debug::fmt(&c, f), + Err(_) => fmt::Debug::fmt(&self.0, f), + } + } +} + +impl PartialOrd for UnvalidatedChar { + fn partial_cmp(&self, other: &Self) -> Option<Ordering> { + Some(self.cmp(other)) + } +} + +impl Ord for UnvalidatedChar { + // custom implementation, as derived Ord would compare lexicographically + fn cmp(&self, other: &Self) -> Ordering { + let [a0, a1, a2] = self.0; + let a = u32::from_le_bytes([a0, a1, a2, 0]); + let [b0, b1, b2] = other.0; + let b = u32::from_le_bytes([b0, b1, b2, 0]); + a.cmp(&b) + } +} + +impl From<char> for UnvalidatedChar { + #[inline] + fn from(value: char) -> Self { + Self::from_char(value) + } +} + +impl TryFrom<UnvalidatedChar> for char { + type Error = core::char::CharTryFromError; + + #[inline] + fn try_from(value: UnvalidatedChar) -> Result<char, Self::Error> { + value.try_to_char() + } +} + +/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate +#[cfg(feature = "serde")] +impl serde::Serialize for UnvalidatedChar { + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: serde::Serializer, + { + use serde::ser::Error; + let c = self + .try_to_char() + .map_err(|_| S::Error::custom("invalid Unicode scalar value in UnvalidatedChar"))?; + if serializer.is_human_readable() { + serializer.serialize_char(c) + } else { + self.0.serialize(serializer) + } + } +} + +/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate +#[cfg(feature = "serde")] +impl<'de> serde::Deserialize<'de> for UnvalidatedChar { + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: serde::Deserializer<'de>, + { + if deserializer.is_human_readable() { + let c = <char>::deserialize(deserializer)?; + Ok(UnvalidatedChar::from_char(c)) + } else { + let bytes = <[u8; 3]>::deserialize(deserializer)?; + Ok(UnvalidatedChar(bytes)) + } + } +} + +#[cfg(feature = "databake")] +impl databake::Bake for UnvalidatedChar { + fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream { + match self.try_to_char() { + Ok(ch) => { + env.insert("zerovec"); + let ch = ch.bake(env); + databake::quote! { + zerovec::ule::UnvalidatedChar::from_char(#ch) + } + } + Err(_) => { + env.insert("zerovec"); + let u24 = u32::from_le_bytes([self.0[0], self.0[1], self.0[2], 0]); + databake::quote! { + zerovec::ule::UnvalidatedChar::from_u24(#u24) + } + } + } + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::ZeroVec; + + #[test] + fn test_serde_fail() { + let uc = UnvalidatedChar([0xFF, 0xFF, 0xFF]); + serde_json::to_string(&uc).expect_err("serialize invalid char bytes"); + bincode::serialize(&uc).expect_err("serialize invalid char bytes"); + } + + #[test] + fn test_serde_json() { + let c = '🙃'; + let uc = UnvalidatedChar::from_char(c); + let json_ser = serde_json::to_string(&uc).unwrap(); + + assert_eq!(json_ser, r#""🙃""#); + + let json_de: UnvalidatedChar = serde_json::from_str(&json_ser).unwrap(); + + assert_eq!(uc, json_de); + } + + #[test] + fn test_serde_bincode() { + let c = '🙃'; + let uc = UnvalidatedChar::from_char(c); + let bytes_ser = bincode::serialize(&uc).unwrap(); + + assert_eq!(bytes_ser, [0x43, 0xF6, 0x01]); + + let bytes_de: UnvalidatedChar = bincode::deserialize(&bytes_ser).unwrap(); + + assert_eq!(uc, bytes_de); + } + + #[test] + fn test_representation() { + let chars = ['w', 'ω', '文', '𑄃', '🙃']; + + // backed by [UnvalidatedChar] + let uvchars: Vec<_> = chars + .iter() + .copied() + .map(UnvalidatedChar::from_char) + .collect(); + // backed by [RawBytesULE<3>] + let zvec: ZeroVec<_> = uvchars.clone().into_iter().collect(); + + let ule_bytes = zvec.as_bytes(); + let uvbytes; + unsafe { + let ptr = &uvchars[..] as *const _ as *const u8; + uvbytes = core::slice::from_raw_parts(ptr, ule_bytes.len()); + } + + // UnvalidatedChar is defined as little-endian, so this must be true on all platforms + // also asserts that to_unaligned/from_unaligned are no-ops + assert_eq!(uvbytes, ule_bytes); + + assert_eq!( + &[119, 0, 0, 201, 3, 0, 135, 101, 0, 3, 17, 1, 67, 246, 1], + ule_bytes + ); + } + + #[test] + fn test_char_bake() { + databake::test_bake!(UnvalidatedChar, const: crate::ule::UnvalidatedChar::from_char('b'), zerovec); + // surrogate code point + databake::test_bake!(UnvalidatedChar, const: crate::ule::UnvalidatedChar::from_u24(55296u32), zerovec); + } +} diff --git a/third_party/rust/zerovec/src/varzerovec/components.rs b/third_party/rust/zerovec/src/varzerovec/components.rs new file mode 100644 index 0000000000..9b48a5bd60 --- /dev/null +++ b/third_party/rust/zerovec/src/varzerovec/components.rs @@ -0,0 +1,574 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::ule::*; +use alloc::boxed::Box; +use alloc::format; +use alloc::string::String; +use alloc::vec::Vec; +use core::cmp::Ordering; +use core::convert::TryFrom; +use core::marker::PhantomData; +use core::ops::Range; + +// Also used by owned.rs +pub(super) const LENGTH_WIDTH: usize = 4; +pub(super) const METADATA_WIDTH: usize = 0; +pub(super) const MAX_LENGTH: usize = u32::MAX as usize; +pub(super) const MAX_INDEX: usize = u32::MAX as usize; + +/// This trait allows switching between different possible internal +/// representations of VarZeroVec. +/// +/// Currently this crate supports two formats: [`Index16`] and [`Index32`], +/// with [`Index16`] being the default for all [`VarZeroVec`](super::VarZeroVec) +/// types unless explicitly specified otherwise. +/// +/// Do not implement this trait, its internals may be changed in the future, +/// and all of its associated items are hidden from the docs. +#[allow(clippy::missing_safety_doc)] // no safety section for you, don't implement this trait period +pub unsafe trait VarZeroVecFormat: 'static + Sized { + #[doc(hidden)] + const INDEX_WIDTH: usize; + #[doc(hidden)] + const MAX_VALUE: u32; + /// This is always `RawBytesULE<Self::INDEX_WIDTH>` however + /// Rust does not currently support using associated constants in const + /// generics + #[doc(hidden)] + type RawBytes: ULE; + + // various conversions because RawBytes is an associated constant now + #[doc(hidden)] + fn rawbytes_to_usize(raw: Self::RawBytes) -> usize; + #[doc(hidden)] + fn usize_to_rawbytes(u: usize) -> Self::RawBytes; + + #[doc(hidden)] + fn rawbytes_from_byte_slice_unchecked_mut(bytes: &mut [u8]) -> &mut [Self::RawBytes]; +} + +/// This is a [`VarZeroVecFormat`] that stores u16s in the index array. +/// Will have a smaller data size, but it's more likely for larger arrays +/// to be unrepresentable (and error on construction) +/// +/// This is the default index size used by all [`VarZeroVec`](super::VarZeroVec) types. +#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] +#[allow(clippy::exhaustive_structs)] // marker +pub struct Index16; + +/// This is a [`VarZeroVecFormat`] that stores u32s in the index array. +/// Will have a larger data size, but will support large arrays without +/// problems. +#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] +#[allow(clippy::exhaustive_structs)] // marker +pub struct Index32; + +unsafe impl VarZeroVecFormat for Index16 { + const INDEX_WIDTH: usize = 2; + const MAX_VALUE: u32 = u16::MAX as u32; + type RawBytes = RawBytesULE<2>; + #[inline] + fn rawbytes_to_usize(raw: Self::RawBytes) -> usize { + raw.as_unsigned_int() as usize + } + #[inline] + fn usize_to_rawbytes(u: usize) -> Self::RawBytes { + (u as u16).to_unaligned() + } + #[inline] + fn rawbytes_from_byte_slice_unchecked_mut(bytes: &mut [u8]) -> &mut [Self::RawBytes] { + Self::RawBytes::from_byte_slice_unchecked_mut(bytes) + } +} + +unsafe impl VarZeroVecFormat for Index32 { + const INDEX_WIDTH: usize = 4; + const MAX_VALUE: u32 = u32::MAX; + type RawBytes = RawBytesULE<4>; + #[inline] + fn rawbytes_to_usize(raw: Self::RawBytes) -> usize { + raw.as_unsigned_int() as usize + } + #[inline] + fn usize_to_rawbytes(u: usize) -> Self::RawBytes { + (u as u32).to_unaligned() + } + #[inline] + fn rawbytes_from_byte_slice_unchecked_mut(bytes: &mut [u8]) -> &mut [Self::RawBytes] { + Self::RawBytes::from_byte_slice_unchecked_mut(bytes) + } +} + +/// A more parsed version of `VarZeroSlice`. This type is where most of the VarZeroVec +/// internal representation code lies. +/// +/// This is *basically* an `&'a [u8]` to a zero copy buffer, but split out into +/// the buffer components. Logically this is capable of behaving as +/// a `&'a [T::VarULE]`, but since `T::VarULE` is unsized that type does not actually +/// exist. +/// +/// See [`VarZeroVecComponents::parse_byte_slice()`] for information on the internal invariants involved +#[derive(Debug)] +pub struct VarZeroVecComponents<'a, T: ?Sized, F> { + /// The number of elements + len: u32, + /// The list of indices into the `things` slice + indices: &'a [u8], + /// The contiguous list of `T::VarULE`s + things: &'a [u8], + /// The original slice this was constructed from + entire_slice: &'a [u8], + marker: PhantomData<(&'a T, F)>, +} + +// #[derive()] won't work here since we do not want it to be +// bound on T: Copy +impl<'a, T: ?Sized, F> Copy for VarZeroVecComponents<'a, T, F> {} +impl<'a, T: ?Sized, F> Clone for VarZeroVecComponents<'a, T, F> { + fn clone(&self) -> Self { + *self + } +} + +impl<'a, T: VarULE + ?Sized, F> Default for VarZeroVecComponents<'a, T, F> { + #[inline] + fn default() -> Self { + Self::new() + } +} + +impl<'a, T: VarULE + ?Sized, F> VarZeroVecComponents<'a, T, F> { + #[inline] + pub fn new() -> Self { + Self { + len: 0, + indices: &[], + things: &[], + entire_slice: &[], + marker: PhantomData, + } + } +} +impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroVecComponents<'a, T, F> { + /// Construct a new VarZeroVecComponents, checking invariants about the overall buffer size: + /// + /// - There must be either zero or at least four bytes (if four, this is the "length" parsed as a usize) + /// - There must be at least `4*length + 4` bytes total, to form the array `indices` of indices + /// - `indices[i]..indices[i+1]` must index into a valid section of + /// `things`, such that it parses to a `T::VarULE` + /// - `indices[len - 1]..things.len()` must index into a valid section of + /// `things`, such that it parses to a `T::VarULE` + #[inline] + pub fn parse_byte_slice(slice: &'a [u8]) -> Result<Self, ZeroVecError> { + // The empty VZV is special-cased to the empty slice + if slice.is_empty() { + return Ok(VarZeroVecComponents { + len: 0, + indices: &[], + things: &[], + entire_slice: slice, + marker: PhantomData, + }); + } + let len_bytes = slice + .get(0..LENGTH_WIDTH) + .ok_or(ZeroVecError::VarZeroVecFormatError)?; + let len_ule = RawBytesULE::<LENGTH_WIDTH>::parse_byte_slice(len_bytes) + .map_err(|_| ZeroVecError::VarZeroVecFormatError)?; + + let len = len_ule + .get(0) + .ok_or(ZeroVecError::VarZeroVecFormatError)? + .as_unsigned_int(); + let indices_bytes = slice + .get( + LENGTH_WIDTH + METADATA_WIDTH + ..LENGTH_WIDTH + METADATA_WIDTH + F::INDEX_WIDTH * (len as usize), + ) + .ok_or(ZeroVecError::VarZeroVecFormatError)?; + let things = slice + .get(F::INDEX_WIDTH * (len as usize) + LENGTH_WIDTH + METADATA_WIDTH..) + .ok_or(ZeroVecError::VarZeroVecFormatError)?; + + let borrowed = VarZeroVecComponents { + len, + indices: indices_bytes, + things, + entire_slice: slice, + marker: PhantomData, + }; + + borrowed.check_indices_and_things()?; + + Ok(borrowed) + } + + /// Construct a [`VarZeroVecComponents`] from a byte slice that has previously + /// successfully returned a [`VarZeroVecComponents`] when passed to + /// [`VarZeroVecComponents::parse_byte_slice()`]. Will return the same + /// object as one would get from calling [`VarZeroVecComponents::parse_byte_slice()`]. + /// + /// # Safety + /// The bytes must have previously successfully run through + /// [`VarZeroVecComponents::parse_byte_slice()`] + pub unsafe fn from_bytes_unchecked(slice: &'a [u8]) -> Self { + // The empty VZV is special-cased to the empty slice + if slice.is_empty() { + return VarZeroVecComponents { + len: 0, + indices: &[], + things: &[], + entire_slice: slice, + marker: PhantomData, + }; + } + let len_bytes = slice.get_unchecked(0..LENGTH_WIDTH); + let len_ule = RawBytesULE::<LENGTH_WIDTH>::from_byte_slice_unchecked(len_bytes); + + let len = len_ule.get_unchecked(0).as_unsigned_int(); + let indices_bytes = slice.get_unchecked( + LENGTH_WIDTH + METADATA_WIDTH + ..LENGTH_WIDTH + METADATA_WIDTH + F::INDEX_WIDTH * (len as usize), + ); + let things = + slice.get_unchecked(LENGTH_WIDTH + METADATA_WIDTH + F::INDEX_WIDTH * (len as usize)..); + + VarZeroVecComponents { + len, + indices: indices_bytes, + things, + entire_slice: slice, + marker: PhantomData, + } + } + + /// Get the number of elements in this vector + #[inline] + pub fn len(self) -> usize { + self.len as usize + } + + /// Returns `true` if the vector contains no elements. + #[inline] + pub fn is_empty(self) -> bool { + self.indices.is_empty() + } + + /// Get the idx'th element out of this slice. Returns `None` if out of bounds. + #[inline] + pub fn get(self, idx: usize) -> Option<&'a T> { + if idx >= self.len() { + return None; + } + Some(unsafe { self.get_unchecked(idx) }) + } + + /// Get the idx'th element out of this slice. Does not bounds check. + /// + /// Safety: + /// - `idx` must be in bounds (`idx < self.len()`) + #[inline] + pub(crate) unsafe fn get_unchecked(self, idx: usize) -> &'a T { + let range = self.get_things_range(idx); + let things_slice = self.things.get_unchecked(range); + T::from_byte_slice_unchecked(things_slice) + } + + /// Get the range in `things` for the element at `idx`. Does not bounds check. + /// + /// Safety: + /// - `idx` must be in bounds (`idx < self.len()`) + #[inline] + unsafe fn get_things_range(self, idx: usize) -> Range<usize> { + let start = F::rawbytes_to_usize(*self.indices_slice().get_unchecked(idx)); + let end = if idx + 1 == self.len() { + self.things.len() + } else { + F::rawbytes_to_usize(*self.indices_slice().get_unchecked(idx + 1)) + }; + debug_assert!(start <= end); + start..end + } + + /// Get the range in `entire_slice` for the element at `idx`. Does not bounds check. + /// + /// Safety: + /// - `idx` must be in bounds (`idx < self.len()`) + #[inline] + pub(crate) unsafe fn get_range(self, idx: usize) -> Range<usize> { + let range = self.get_things_range(idx); + let offset = (self.things as *const [u8] as *const u8) + .offset_from(self.entire_slice as *const [u8] as *const u8) + as usize; + range.start + offset..range.end + offset + } + + /// Check the internal invariants of VarZeroVecComponents: + /// + /// - `indices[i]..indices[i+1]` must index into a valid section of + /// `things`, such that it parses to a `T::VarULE` + /// - `indices[len - 1]..things.len()` must index into a valid section of + /// `things`, such that it parses to a `T::VarULE` + /// - `indices` is monotonically increasing + /// + /// This method is NOT allowed to call any other methods on VarZeroVecComponents since all other methods + /// assume that the slice has been passed through check_indices_and_things + #[inline] + #[allow(clippy::len_zero)] // more explicit to enforce safety invariants + fn check_indices_and_things(self) -> Result<(), ZeroVecError> { + assert_eq!(self.len(), self.indices_slice().len()); + if self.len() == 0 { + if self.things.len() > 0 { + return Err(ZeroVecError::VarZeroVecFormatError); + } else { + return Ok(()); + } + } + // Safety: i is in bounds (assertion above) + let mut start = F::rawbytes_to_usize(unsafe { *self.indices_slice().get_unchecked(0) }); + if start != 0 { + return Err(ZeroVecError::VarZeroVecFormatError); + } + for i in 0..self.len() { + let end = if i == self.len() - 1 { + self.things.len() + } else { + // Safety: i+1 is in bounds (assertion above) + F::rawbytes_to_usize(unsafe { *self.indices_slice().get_unchecked(i + 1) }) + }; + if start > end { + return Err(ZeroVecError::VarZeroVecFormatError); + } + if end > self.things.len() { + return Err(ZeroVecError::VarZeroVecFormatError); + } + // Safety: start..end is a valid range in self.things + let bytes = unsafe { self.things.get_unchecked(start..end) }; + T::parse_byte_slice(bytes)?; + start = end; + } + Ok(()) + } + + /// Create an iterator over the Ts contained in VarZeroVecComponents + #[inline] + pub fn iter(self) -> impl Iterator<Item = &'a T> { + self.indices_slice() + .iter() + .copied() + .map(F::rawbytes_to_usize) + .zip( + self.indices_slice() + .iter() + .copied() + .map(F::rawbytes_to_usize) + .skip(1) + .chain([self.things.len()]), + ) + .map(move |(start, end)| unsafe { self.things.get_unchecked(start..end) }) + .map(|bytes| unsafe { T::from_byte_slice_unchecked(bytes) }) + } + + pub fn to_vec(self) -> Vec<Box<T>> { + self.iter().map(T::to_boxed).collect() + } + + #[inline] + fn indices_slice(&self) -> &'a [F::RawBytes] { + unsafe { F::RawBytes::from_byte_slice_unchecked(self.indices) } + } + + // Dump a debuggable representation of this type + #[allow(unused)] // useful for debugging + pub(crate) fn dump(&self) -> String { + let indices = self + .indices_slice() + .iter() + .copied() + .map(F::rawbytes_to_usize) + .collect::<Vec<_>>(); + format!("VarZeroVecComponents {{ indices: {indices:?} }}") + } +} + +impl<'a, T, F> VarZeroVecComponents<'a, T, F> +where + T: VarULE, + T: ?Sized, + T: Ord, + F: VarZeroVecFormat, +{ + /// Binary searches a sorted `VarZeroVecComponents<T>` for the given element. For more information, see + /// the primitive function [`binary_search`](slice::binary_search). + pub fn binary_search(&self, needle: &T) -> Result<usize, usize> { + self.binary_search_impl(|probe| probe.cmp(needle), self.indices_slice()) + } + + pub fn binary_search_in_range( + &self, + needle: &T, + range: Range<usize>, + ) -> Option<Result<usize, usize>> { + let indices_slice = self.indices_slice().get(range)?; + Some(self.binary_search_impl(|probe| probe.cmp(needle), indices_slice)) + } +} + +impl<'a, T, F> VarZeroVecComponents<'a, T, F> +where + T: VarULE, + T: ?Sized, + F: VarZeroVecFormat, +{ + /// Binary searches a sorted `VarZeroVecComponents<T>` for the given predicate. For more information, see + /// the primitive function [`binary_search_by`](slice::binary_search_by). + pub fn binary_search_by(&self, predicate: impl FnMut(&T) -> Ordering) -> Result<usize, usize> { + self.binary_search_impl(predicate, self.indices_slice()) + } + + pub fn binary_search_in_range_by( + &self, + predicate: impl FnMut(&T) -> Ordering, + range: Range<usize>, + ) -> Option<Result<usize, usize>> { + let indices_slice = self.indices_slice().get(range)?; + Some(self.binary_search_impl(predicate, indices_slice)) + } + + /// Binary searches a sorted `VarZeroVecComponents<T>` with the given predicate. For more information, see + /// the primitive function [`binary_search`](slice::binary_search). + fn binary_search_impl( + &self, + mut predicate: impl FnMut(&T) -> Ordering, + indices_slice: &[F::RawBytes], + ) -> Result<usize, usize> { + // This code is an absolute atrocity. This code is not a place of honor. This + // code is known to the State of California to cause cancer. + // + // Unfortunately, the stdlib's `binary_search*` functions can only operate on slices. + // We do not have a slice. We have something we can .get() and index on, but that is not + // a slice. + // + // The `binary_search*` functions also do not have a variant where they give you the element's + // index, which we could otherwise use to directly index `self`. + // We do have `self.indices`, but these are indices into a byte buffer, which cannot in + // isolation be used to recoup the logical index of the element they refer to. + // + // However, `binary_search_by()` provides references to the elements of the slice being iterated. + // Since the layout of Rust slices is well-defined, we can do pointer arithmetic on these references + // to obtain the index being used by the search. + // + // It's worth noting that the slice we choose to search is irrelevant, as long as it has the appropriate + // length. `self.indices` is defined to have length `self.len()`, so it is convenient to use + // here and does not require additional allocations. + // + // The alternative to doing this is to implement our own binary search. This is significantly less fun. + + // Note: We always use zero_index relative to the whole indices array, even if we are + // only searching a subslice of it. + let zero_index = self.indices.as_ptr() as *const _ as usize; + indices_slice.binary_search_by(|probe: &_| { + // `self.indices` is a vec of unaligned F::INDEX_WIDTH values, so we divide by F::INDEX_WIDTH + // to get the actual index + let index = (probe as *const _ as usize - zero_index) / F::INDEX_WIDTH; + // safety: we know this is in bounds + let actual_probe = unsafe { self.get_unchecked(index) }; + predicate(actual_probe) + }) + } +} + +/// Collects the bytes for a VarZeroSlice into a Vec. +pub fn get_serializable_bytes_non_empty<T, A, F>(elements: &[A]) -> Option<Vec<u8>> +where + T: VarULE + ?Sized, + A: EncodeAsVarULE<T>, + F: VarZeroVecFormat, +{ + debug_assert!(!elements.is_empty()); + let len = compute_serializable_len::<T, A, F>(elements)?; + debug_assert!(len >= LENGTH_WIDTH as u32); + let mut output: Vec<u8> = alloc::vec![0; len as usize]; + write_serializable_bytes::<T, A, F>(elements, &mut output); + Some(output) +} + +/// Writes the bytes for a VarZeroSlice into an output buffer. +/// +/// Every byte in the buffer will be initialized after calling this function. +/// +/// # Panics +/// +/// Panics if the buffer is not exactly the correct length. +pub fn write_serializable_bytes<T, A, F>(elements: &[A], output: &mut [u8]) +where + T: VarULE + ?Sized, + A: EncodeAsVarULE<T>, + F: VarZeroVecFormat, +{ + assert!(elements.len() <= MAX_LENGTH); + let num_elements_bytes = elements.len().to_le_bytes(); + #[allow(clippy::indexing_slicing)] // Function contract allows panicky behavior + output[0..LENGTH_WIDTH].copy_from_slice(&num_elements_bytes[0..LENGTH_WIDTH]); + + // idx_offset = offset from the start of the buffer for the next index + let mut idx_offset: usize = LENGTH_WIDTH + METADATA_WIDTH; + // first_dat_offset = offset from the start of the buffer of the first data block + let first_dat_offset: usize = idx_offset + elements.len() * F::INDEX_WIDTH; + // dat_offset = offset from the start of the buffer of the next data block + let mut dat_offset: usize = first_dat_offset; + + for element in elements.iter() { + let element_len = element.encode_var_ule_len(); + + let idx_limit = idx_offset + F::INDEX_WIDTH; + #[allow(clippy::indexing_slicing)] // Function contract allows panicky behavior + let idx_slice = &mut output[idx_offset..idx_limit]; + // VZV expects data offsets to be stored relative to the first data block + let idx = dat_offset - first_dat_offset; + assert!(idx <= MAX_INDEX); + #[allow(clippy::indexing_slicing)] // this function is explicitly panicky + idx_slice.copy_from_slice(&idx.to_le_bytes()[..F::INDEX_WIDTH]); + + let dat_limit = dat_offset + element_len; + #[allow(clippy::indexing_slicing)] // Function contract allows panicky behavior + let dat_slice = &mut output[dat_offset..dat_limit]; + element.encode_var_ule_write(dat_slice); + debug_assert_eq!(T::validate_byte_slice(dat_slice), Ok(())); + + idx_offset = idx_limit; + dat_offset = dat_limit; + } + + debug_assert_eq!( + idx_offset, + LENGTH_WIDTH + METADATA_WIDTH + F::INDEX_WIDTH * elements.len() + ); + assert_eq!(dat_offset, output.len()); +} + +pub fn compute_serializable_len<T, A, F>(elements: &[A]) -> Option<u32> +where + T: VarULE + ?Sized, + A: EncodeAsVarULE<T>, + F: VarZeroVecFormat, +{ + let idx_len: u32 = u32::try_from(elements.len()) + .ok()? + .checked_mul(F::INDEX_WIDTH as u32)? + .checked_add(LENGTH_WIDTH as u32)? + .checked_add(METADATA_WIDTH as u32)?; + let data_len: u32 = elements + .iter() + .map(|v| u32::try_from(v.encode_var_ule_len()).ok()) + .try_fold(0u32, |s, v| s.checked_add(v?))?; + let ret = idx_len.checked_add(data_len); + if let Some(r) = ret { + if r >= F::MAX_VALUE { + return None; + } + } + ret +} diff --git a/third_party/rust/zerovec/src/varzerovec/databake.rs b/third_party/rust/zerovec/src/varzerovec/databake.rs new file mode 100644 index 0000000000..a3f9db2d17 --- /dev/null +++ b/third_party/rust/zerovec/src/varzerovec/databake.rs @@ -0,0 +1,68 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::{ule::VarULE, VarZeroSlice, VarZeroVec}; +use databake::*; + +impl<T: VarULE + ?Sized> Bake for VarZeroVec<'_, T> { + fn bake(&self, env: &CrateEnv) -> TokenStream { + env.insert("zerovec"); + if self.is_empty() { + quote! { zerovec::VarZeroVec::new() } + } else { + let bytes = databake::Bake::bake(&self.as_bytes(), env); + // Safe because self.as_bytes is a safe input + quote! { unsafe { zerovec::VarZeroVec::from_bytes_unchecked(#bytes) } } + } + } +} + +impl<T: VarULE + ?Sized> Bake for &VarZeroSlice<T> { + fn bake(&self, env: &CrateEnv) -> TokenStream { + env.insert("zerovec"); + if self.is_empty() { + quote! { zerovec::VarZeroSlice::new_empty() } + } else { + let bytes = databake::Bake::bake(&self.as_bytes(), env); + // Safe because self.as_bytes is a safe input + quote! { unsafe { zerovec::VarZeroSlice::from_bytes_unchecked(#bytes) } } + } + } +} + +#[test] +fn test_baked_vec() { + test_bake!( + VarZeroVec<str>, + const: crate::VarZeroVec::new(), + zerovec + ); + test_bake!( + VarZeroVec<str>, + const: unsafe { + crate::VarZeroVec::from_bytes_unchecked( + b"\x02\x01\0\x16\0M\x01\\\x11" + ) + }, + zerovec + ); +} + +#[test] +fn test_baked_slice() { + test_bake!( + &VarZeroSlice<str>, + const: crate::VarZeroSlice::new_empty(), + zerovec + ); + test_bake!( + &VarZeroSlice<str>, + const: unsafe { + crate::VarZeroSlice::from_bytes_unchecked( + b"\x02\x01\0\x16\0M\x01\\\x11" + ) + }, + zerovec + ); +} diff --git a/third_party/rust/zerovec/src/varzerovec/mod.rs b/third_party/rust/zerovec/src/varzerovec/mod.rs new file mode 100644 index 0000000000..2e9f680006 --- /dev/null +++ b/third_party/rust/zerovec/src/varzerovec/mod.rs @@ -0,0 +1,26 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! See [`VarZeroVec`](crate::VarZeroVec) for details + +pub(crate) mod components; +pub(crate) mod owned; +pub(crate) mod slice; +pub(crate) mod vec; + +#[cfg(feature = "databake")] +mod databake; + +#[cfg(feature = "serde")] +mod serde; + +pub use crate::{VarZeroSlice, VarZeroVec}; + +#[cfg(feature = "bench")] +#[doc(hidden)] +pub use components::VarZeroVecComponents; + +pub use components::{Index16, Index32, VarZeroVecFormat}; + +pub use owned::VarZeroVecOwned; diff --git a/third_party/rust/zerovec/src/varzerovec/owned.rs b/third_party/rust/zerovec/src/varzerovec/owned.rs new file mode 100644 index 0000000000..c5556315fb --- /dev/null +++ b/third_party/rust/zerovec/src/varzerovec/owned.rs @@ -0,0 +1,662 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +// The mutation operations in this file should panic to prevent undefined behavior +#![allow(clippy::unwrap_used)] +#![allow(clippy::expect_used)] +#![allow(clippy::indexing_slicing)] +#![allow(clippy::panic)] + +use super::*; +use crate::ule::*; +use alloc::boxed::Box; +use alloc::vec::Vec; +use core::any; +use core::convert::TryInto; +use core::marker::PhantomData; +use core::ops::Deref; +use core::ops::Range; +use core::{fmt, ptr, slice}; + +use super::components::LENGTH_WIDTH; +use super::components::MAX_INDEX; +use super::components::MAX_LENGTH; +use super::components::METADATA_WIDTH; + +/// A fully-owned [`VarZeroVec`]. This type has no lifetime but has the same +/// internal buffer representation of [`VarZeroVec`], making it cheaply convertible to +/// [`VarZeroVec`] and [`VarZeroSlice`]. +/// +/// The `F` type parameter is a [`VarZeroVecFormat`] (see its docs for more details), which can be used to select the +/// precise format of the backing buffer with various size and performance tradeoffs. It defaults to [`Index16`]. +pub struct VarZeroVecOwned<T: ?Sized, F = Index16> { + marker: PhantomData<(Box<T>, F)>, + // safety invariant: must parse into a valid VarZeroVecComponents + entire_slice: Vec<u8>, +} + +impl<T: ?Sized, F> Clone for VarZeroVecOwned<T, F> { + fn clone(&self) -> Self { + VarZeroVecOwned { + marker: self.marker, + entire_slice: self.entire_slice.clone(), + } + } +} + +// The effect of a shift on the indices in the varzerovec. +#[derive(PartialEq)] +enum ShiftType { + Insert, + Replace, + Remove, +} + +impl<T: VarULE + ?Sized, F: VarZeroVecFormat> Deref for VarZeroVecOwned<T, F> { + type Target = VarZeroSlice<T, F>; + fn deref(&self) -> &VarZeroSlice<T, F> { + self.as_slice() + } +} + +impl<T: VarULE + ?Sized, F> VarZeroVecOwned<T, F> { + /// Construct an empty VarZeroVecOwned + pub fn new() -> Self { + Self { + marker: PhantomData, + entire_slice: Vec::new(), + } + } +} + +impl<T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroVecOwned<T, F> { + /// Construct a VarZeroVecOwned from a [`VarZeroSlice`] by cloning the internal data + pub fn from_slice(slice: &VarZeroSlice<T, F>) -> Self { + Self { + marker: PhantomData, + entire_slice: slice.as_bytes().into(), + } + } + + /// Construct a VarZeroVecOwned from a list of elements + pub fn try_from_elements<A>(elements: &[A]) -> Result<Self, &'static str> + where + A: EncodeAsVarULE<T>, + { + Ok(if elements.is_empty() { + Self::from_slice(VarZeroSlice::new_empty()) + } else { + Self { + marker: PhantomData, + // TODO(#1410): Rethink length errors in VZV. + entire_slice: components::get_serializable_bytes_non_empty::<T, A, F>(elements) + .ok_or( + "Attempted to build VarZeroVec out of elements that \ + cumulatively are larger than a u32 in size", + )?, + } + }) + } + + /// Obtain this `VarZeroVec` as a [`VarZeroSlice`] + pub fn as_slice(&self) -> &VarZeroSlice<T, F> { + let slice: &[u8] = &self.entire_slice; + unsafe { + // safety: the slice is known to come from a valid parsed VZV + VarZeroSlice::from_byte_slice_unchecked(slice) + } + } + + /// Try to allocate a buffer with enough capacity for `capacity` + /// elements. Since `T` can take up an arbitrary size this will + /// just allocate enough space for 4-byte Ts + pub(crate) fn with_capacity(capacity: usize) -> Self { + Self { + marker: PhantomData, + entire_slice: Vec::with_capacity(capacity * (F::INDEX_WIDTH + 4)), + } + } + + /// Try to reserve space for `capacity` + /// elements. Since `T` can take up an arbitrary size this will + /// just allocate enough space for 4-byte Ts + pub(crate) fn reserve(&mut self, capacity: usize) { + self.entire_slice.reserve(capacity * (F::INDEX_WIDTH + 4)) + } + + /// Get the position of a specific element in the data segment. + /// + /// If `idx == self.len()`, it will return the size of the data segment (where a new element would go). + /// + /// ## Safety + /// `idx <= self.len()` and `self.as_encoded_bytes()` is well-formed. + unsafe fn element_position_unchecked(&self, idx: usize) -> usize { + let len = self.len(); + let out = if idx == len { + self.entire_slice.len() - LENGTH_WIDTH - METADATA_WIDTH - (F::INDEX_WIDTH * len) + } else { + F::rawbytes_to_usize(*self.index_data(idx)) + }; + debug_assert!( + out + LENGTH_WIDTH + METADATA_WIDTH + len * F::INDEX_WIDTH <= self.entire_slice.len() + ); + out + } + + /// Get the range of a specific element in the data segment. + /// + /// ## Safety + /// `idx < self.len()` and `self.as_encoded_bytes()` is well-formed. + unsafe fn element_range_unchecked(&self, idx: usize) -> core::ops::Range<usize> { + let start = self.element_position_unchecked(idx); + let end = self.element_position_unchecked(idx + 1); + debug_assert!(start <= end, "{start} > {end}"); + start..end + } + + /// Set the number of elements in the list without any checks. + /// + /// ## Safety + /// No safe functions may be called until `self.as_encoded_bytes()` is well-formed. + unsafe fn set_len(&mut self, len: usize) { + assert!(len <= MAX_LENGTH); + let len_bytes = len.to_le_bytes(); + self.entire_slice[0..LENGTH_WIDTH].copy_from_slice(&len_bytes[0..LENGTH_WIDTH]); + // Double-check that the length fits in the length field + assert_eq!(len_bytes[LENGTH_WIDTH..].iter().sum::<u8>(), 0); + } + + fn index_range(index: usize) -> Range<usize> { + let pos = LENGTH_WIDTH + METADATA_WIDTH + F::INDEX_WIDTH * index; + pos..pos + F::INDEX_WIDTH + } + + /// Return the raw bytes representing the given `index`. + /// + /// ## Safety + /// The index must be valid, and self.as_encoded_bytes() must be well-formed + unsafe fn index_data(&self, index: usize) -> &F::RawBytes { + &F::RawBytes::from_byte_slice_unchecked(&self.entire_slice[Self::index_range(index)])[0] + } + + /// Return the mutable slice representing the given `index`. + /// + /// ## Safety + /// The index must be valid. self.as_encoded_bytes() must have allocated space + /// for this index, but need not have its length appropriately set. + unsafe fn index_data_mut(&mut self, index: usize) -> &mut F::RawBytes { + let ptr = self.entire_slice.as_mut_ptr(); + let range = Self::index_range(index); + + // Doing this instead of just `get_unchecked_mut()` because it's unclear + // if `get_unchecked_mut()` can be called out of bounds on a slice even + // if we know the buffer is larger. + let data = slice::from_raw_parts_mut(ptr.add(range.start), F::INDEX_WIDTH); + + &mut F::rawbytes_from_byte_slice_unchecked_mut(data)[0] + } + + /// Shift the indices starting with and after `starting_index` by the provided `amount`. + /// + /// ## Safety + /// Adding `amount` to each index after `starting_index` must not result in the slice from becoming malformed. + /// The length of the slice must be correctly set. + unsafe fn shift_indices(&mut self, starting_index: usize, amount: i32) { + let len = self.len(); + let indices = F::rawbytes_from_byte_slice_unchecked_mut( + &mut self.entire_slice[LENGTH_WIDTH + METADATA_WIDTH + ..LENGTH_WIDTH + METADATA_WIDTH + F::INDEX_WIDTH * len], + ); + for idx in &mut indices[starting_index..] { + let mut new_idx = F::rawbytes_to_usize(*idx); + if amount > 0 { + new_idx = new_idx.checked_add(amount.try_into().unwrap()).unwrap(); + } else { + new_idx = new_idx.checked_sub((-amount).try_into().unwrap()).unwrap(); + } + *idx = F::usize_to_rawbytes(new_idx); + } + } + + /// Get this [`VarZeroVecOwned`] as a borrowed [`VarZeroVec`] + /// + /// If you wish to repeatedly call methods on this [`VarZeroVecOwned`], + /// it is more efficient to perform this conversion first + pub fn as_varzerovec<'a>(&'a self) -> VarZeroVec<'a, T, F> { + self.as_slice().into() + } + + /// Empty the vector + pub fn clear(&mut self) { + self.entire_slice.clear() + } + + /// Consume this vector and return the backing buffer + #[inline] + pub fn into_bytes(self) -> Vec<u8> { + self.entire_slice + } + + /// Invalidate and resize the data at an index, optionally inserting or removing the index. + /// Also updates affected indices and the length. + /// Returns a slice to the new element data - it doesn't contain uninitialized data but its value is indeterminate. + /// + /// ## Safety + /// - `index` must be a valid index, or, if `shift_type == ShiftType::Insert`, `index == self.len()` is allowed. + /// - `new_size` musn't result in the data segment growing larger than `F::MAX_VALUE`. + unsafe fn shift(&mut self, index: usize, new_size: usize, shift_type: ShiftType) -> &mut [u8] { + // The format of the encoded data is: + // - four bytes of "len" + // - len*4 bytes for an array of indices + // - the actual data to which the indices point + // + // When inserting or removing an element, the size of the indices segment must be changed, + // so the data before the target element must be shifted by 4 bytes in addition to the + // shifting needed for the new element size. + let len = self.len(); + let slice_len = self.entire_slice.len(); + + let prev_element = match shift_type { + ShiftType::Insert => { + let pos = self.element_position_unchecked(index); + // In the case of an insert, there's no previous element, + // so it's an empty range at the new position. + pos..pos + } + _ => self.element_range_unchecked(index), + }; + + // How much shifting must be done in bytes due to removal/insertion of an index. + let index_shift: i64 = match shift_type { + ShiftType::Insert => F::INDEX_WIDTH as i64, + ShiftType::Replace => 0, + ShiftType::Remove => -(F::INDEX_WIDTH as i64), + }; + // The total shift in byte size of the owned slice. + let shift: i64 = + new_size as i64 - (prev_element.end - prev_element.start) as i64 + index_shift; + let new_slice_len = slice_len.wrapping_add(shift as usize); + if shift > 0 { + if new_slice_len > F::MAX_VALUE as usize { + panic!( + "Attempted to grow VarZeroVec to an encoded size that does not fit within the length size used by {}", + any::type_name::<F>() + ); + } + self.entire_slice.resize(new_slice_len, 0); + } + + // Now that we've ensured there's enough space, we can shift the data around. + { + // Note: There are no references introduced between pointer creation and pointer use, and all + // raw pointers are derived from a single &mut. This preserves pointer provenance. + let slice_range = self.entire_slice.as_mut_ptr_range(); + let old_slice_end = slice_range.start.add(slice_len); + let data_start = slice_range + .start + .add(LENGTH_WIDTH + METADATA_WIDTH + len * F::INDEX_WIDTH); + let prev_element_p = + data_start.add(prev_element.start)..data_start.add(prev_element.end); + + // The memory range of the affected index. + // When inserting: where the new index goes. + // When removing: where the index being removed is. + // When replacing: unused. + let index_range = { + let index_start = slice_range + .start + .add(LENGTH_WIDTH + METADATA_WIDTH + F::INDEX_WIDTH * index); + index_start..index_start.add(F::INDEX_WIDTH) + }; + + unsafe fn shift_bytes(block: Range<*const u8>, to: *mut u8) { + debug_assert!(block.end >= block.start); + ptr::copy(block.start, to, block.end.offset_from(block.start) as usize); + } + + if shift_type == ShiftType::Remove { + // Move the data before the element back by 4 to remove the index. + shift_bytes(index_range.end..prev_element_p.start, index_range.start); + } + + // Shift data after the element to its new position. + shift_bytes( + prev_element_p.end..old_slice_end, + prev_element_p + .start + .offset((new_size as i64 + index_shift) as isize), + ); + + let first_affected_index = match shift_type { + ShiftType::Insert => { + // Move data before the element forward by 4 to make space for a new index. + shift_bytes(index_range.start..prev_element_p.start, index_range.end); + + *self.index_data_mut(index) = F::usize_to_rawbytes(prev_element.start); + self.set_len(len + 1); + index + 1 + } + ShiftType::Remove => { + self.set_len(len - 1); + index + } + ShiftType::Replace => index + 1, + }; + // No raw pointer use should occur after this point (because of self.index_data and self.set_len). + + // Set the new slice length. This must be done after shifting data around to avoid uninitialized data. + self.entire_slice.set_len(new_slice_len); + + // Shift the affected indices. + self.shift_indices(first_affected_index, (shift - index_shift) as i32); + }; + + debug_assert!(self.verify_integrity()); + + // Return a mut slice to the new element data. + let element_pos = LENGTH_WIDTH + + METADATA_WIDTH + + self.len() * F::INDEX_WIDTH + + self.element_position_unchecked(index); + &mut self.entire_slice[element_pos..element_pos + new_size] + } + + /// Checks the internal invariants of the vec to ensure safe code will not cause UB. + /// Returns whether integrity was verified. + /// + /// Note: an index is valid if it doesn't point to data past the end of the slice and is + /// less than or equal to all future indices. The length of the index segment is not part of each index. + fn verify_integrity(&self) -> bool { + if self.is_empty() && !self.entire_slice.is_empty() { + return false; + } + let slice_len = self.entire_slice.len(); + match slice_len { + 0 => return true, + 1..=3 => return false, + _ => (), + } + let len = unsafe { + RawBytesULE::<LENGTH_WIDTH>::from_byte_slice_unchecked( + &self.entire_slice[..LENGTH_WIDTH], + )[0] + .as_unsigned_int() + }; + if len == 0 { + // An empty vec must have an empty slice: there is only a single valid byte representation. + return false; + } + if slice_len < LENGTH_WIDTH + METADATA_WIDTH + len as usize * F::INDEX_WIDTH { + // Not enough room for the indices. + return false; + } + let data_len = + self.entire_slice.len() - LENGTH_WIDTH - METADATA_WIDTH - len as usize * F::INDEX_WIDTH; + if data_len > MAX_INDEX { + // The data segment is too long. + return false; + } + + // Test index validity. + let indices = unsafe { + F::RawBytes::from_byte_slice_unchecked( + &self.entire_slice[LENGTH_WIDTH + METADATA_WIDTH + ..LENGTH_WIDTH + METADATA_WIDTH + len as usize * F::INDEX_WIDTH], + ) + }; + for idx in indices { + if F::rawbytes_to_usize(*idx) > data_len { + // Indices must not point past the data segment. + return false; + } + } + for window in indices.windows(2) { + if F::rawbytes_to_usize(window[0]) > F::rawbytes_to_usize(window[1]) { + // Indices must be in non-decreasing order. + return false; + } + } + true + } + + /// Insert an element at the end of this vector + pub fn push<A: EncodeAsVarULE<T> + ?Sized>(&mut self, element: &A) { + self.insert(self.len(), element) + } + + /// Insert an element at index `idx` + pub fn insert<A: EncodeAsVarULE<T> + ?Sized>(&mut self, index: usize, element: &A) { + let len = self.len(); + if index > len { + panic!("Called out-of-bounds insert() on VarZeroVec, index {index} len {len}"); + } + + let value_len = element.encode_var_ule_len(); + + if len == 0 { + let header_len = LENGTH_WIDTH + METADATA_WIDTH + F::INDEX_WIDTH; + let cap = header_len + value_len; + self.entire_slice.resize(cap, 0); + self.entire_slice[0] = 1; // set length + element.encode_var_ule_write(&mut self.entire_slice[header_len..]); + return; + } + + assert!(value_len < MAX_INDEX); + unsafe { + let place = self.shift(index, value_len, ShiftType::Insert); + element.encode_var_ule_write(place); + } + } + + /// Remove the element at index `idx` + pub fn remove(&mut self, index: usize) { + let len = self.len(); + if index >= len { + panic!("Called out-of-bounds remove() on VarZeroVec, index {index} len {len}"); + } + if len == 1 { + // This is removing the last element. Set the slice to empty to ensure all empty vecs have empty data slices. + self.entire_slice.clear(); + return; + } + unsafe { + self.shift(index, 0, ShiftType::Remove); + } + } + + /// Replace the element at index `idx` with another + pub fn replace<A: EncodeAsVarULE<T> + ?Sized>(&mut self, index: usize, element: &A) { + let len = self.len(); + if index >= len { + panic!("Called out-of-bounds replace() on VarZeroVec, index {index} len {len}"); + } + + let value_len = element.encode_var_ule_len(); + + assert!(value_len < MAX_INDEX); + unsafe { + let place = self.shift(index, value_len, ShiftType::Replace); + element.encode_var_ule_write(place); + } + } +} + +impl<T: VarULE + ?Sized, F: VarZeroVecFormat> fmt::Debug for VarZeroVecOwned<T, F> +where + T: fmt::Debug, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + VarZeroSlice::fmt(self, f) + } +} + +impl<T: VarULE + ?Sized, F> Default for VarZeroVecOwned<T, F> { + fn default() -> Self { + Self::new() + } +} + +impl<T, A, F> PartialEq<&'_ [A]> for VarZeroVecOwned<T, F> +where + T: VarULE + ?Sized, + T: PartialEq, + A: AsRef<T>, + F: VarZeroVecFormat, +{ + #[inline] + fn eq(&self, other: &&[A]) -> bool { + self.iter().eq(other.iter().map(|t| t.as_ref())) + } +} + +impl<'a, T: ?Sized + VarULE, F: VarZeroVecFormat> From<&'a VarZeroSlice<T, F>> + for VarZeroVecOwned<T, F> +{ + fn from(other: &'a VarZeroSlice<T, F>) -> Self { + Self::from_slice(other) + } +} + +#[cfg(test)] +mod test { + use super::VarZeroVecOwned; + #[test] + fn test_insert_integrity() { + let mut items: Vec<String> = Vec::new(); + let mut zerovec = VarZeroVecOwned::<str>::new(); + + // Insert into an empty vec. + items.insert(0, "1234567890".into()); + zerovec.insert(0, "1234567890"); + assert_eq!(zerovec, &*items); + + zerovec.insert(1, "foo3"); + items.insert(1, "foo3".into()); + assert_eq!(zerovec, &*items); + + // Insert at the end. + items.insert(items.len(), "qwertyuiop".into()); + zerovec.insert(zerovec.len(), "qwertyuiop"); + assert_eq!(zerovec, &*items); + + items.insert(0, "asdfghjkl;".into()); + zerovec.insert(0, "asdfghjkl;"); + assert_eq!(zerovec, &*items); + + items.insert(2, "".into()); + zerovec.insert(2, ""); + assert_eq!(zerovec, &*items); + } + + #[test] + // ensure that inserting empty items works + fn test_empty_inserts() { + let mut items: Vec<String> = Vec::new(); + let mut zerovec = VarZeroVecOwned::<str>::new(); + + // Insert into an empty vec. + items.insert(0, "".into()); + zerovec.insert(0, ""); + assert_eq!(zerovec, &*items); + + items.insert(0, "".into()); + zerovec.insert(0, ""); + assert_eq!(zerovec, &*items); + + items.insert(0, "1234567890".into()); + zerovec.insert(0, "1234567890"); + assert_eq!(zerovec, &*items); + + items.insert(0, "".into()); + zerovec.insert(0, ""); + assert_eq!(zerovec, &*items); + } + + #[test] + fn test_small_insert_integrity() { + // Tests that insert() works even when there + // is not enough space for the new index in entire_slice.len() + let mut items: Vec<String> = Vec::new(); + let mut zerovec = VarZeroVecOwned::<str>::new(); + + // Insert into an empty vec. + items.insert(0, "abc".into()); + zerovec.insert(0, "abc"); + assert_eq!(zerovec, &*items); + + zerovec.insert(1, "def"); + items.insert(1, "def".into()); + assert_eq!(zerovec, &*items); + } + + #[test] + #[should_panic] + fn test_insert_past_end() { + VarZeroVecOwned::<str>::new().insert(1, ""); + } + + #[test] + fn test_remove_integrity() { + let mut items: Vec<&str> = vec!["apples", "bananas", "eeples", "", "baneenees", "five", ""]; + let mut zerovec = VarZeroVecOwned::<str>::try_from_elements(&items).unwrap(); + + for index in [0, 2, 4, 0, 1, 1, 0] { + items.remove(index); + zerovec.remove(index); + assert_eq!(zerovec, &*items, "index {}, len {}", index, items.len()); + } + } + + #[test] + fn test_removing_last_element_clears() { + let mut zerovec = VarZeroVecOwned::<str>::try_from_elements(&["buy some apples"]).unwrap(); + assert!(!zerovec.as_bytes().is_empty()); + zerovec.remove(0); + assert!(zerovec.as_bytes().is_empty()); + } + + #[test] + #[should_panic] + fn test_remove_past_end() { + VarZeroVecOwned::<str>::new().remove(0); + } + + #[test] + fn test_replace_integrity() { + let mut items: Vec<&str> = vec!["apples", "bananas", "eeples", "", "baneenees", "five", ""]; + let mut zerovec = VarZeroVecOwned::<str>::try_from_elements(&items).unwrap(); + + // Replace with an element of the same size (and the first element) + items[0] = "blablah"; + zerovec.replace(0, "blablah"); + assert_eq!(zerovec, &*items); + + // Replace with a smaller element + items[1] = "twily"; + zerovec.replace(1, "twily"); + assert_eq!(zerovec, &*items); + + // Replace an empty element + items[3] = "aoeuidhtns"; + zerovec.replace(3, "aoeuidhtns"); + assert_eq!(zerovec, &*items); + + // Replace the last element + items[6] = "0123456789"; + zerovec.replace(6, "0123456789"); + assert_eq!(zerovec, &*items); + + // Replace with an empty element + items[2] = ""; + zerovec.replace(2, ""); + assert_eq!(zerovec, &*items); + } + + #[test] + #[should_panic] + fn test_replace_past_end() { + VarZeroVecOwned::<str>::new().replace(0, ""); + } +} diff --git a/third_party/rust/zerovec/src/varzerovec/serde.rs b/third_party/rust/zerovec/src/varzerovec/serde.rs new file mode 100644 index 0000000000..8025fc085b --- /dev/null +++ b/third_party/rust/zerovec/src/varzerovec/serde.rs @@ -0,0 +1,268 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use super::{VarZeroSlice, VarZeroVec, VarZeroVecFormat}; +use crate::ule::*; +use alloc::boxed::Box; +use alloc::vec::Vec; +use core::fmt; +use core::marker::PhantomData; +use serde::de::{self, Deserialize, Deserializer, SeqAccess, Visitor}; +#[cfg(feature = "serde")] +use serde::ser::{Serialize, SerializeSeq, Serializer}; + +struct VarZeroVecVisitor<T: ?Sized, F: VarZeroVecFormat> { + #[allow(clippy::type_complexity)] // this is a private marker type, who cares + marker: PhantomData<(fn() -> Box<T>, F)>, +} + +impl<T: ?Sized, F: VarZeroVecFormat> Default for VarZeroVecVisitor<T, F> { + fn default() -> Self { + Self { + marker: PhantomData, + } + } +} + +impl<'de, T, F> Visitor<'de> for VarZeroVecVisitor<T, F> +where + T: VarULE + ?Sized, + Box<T>: Deserialize<'de>, + F: VarZeroVecFormat, +{ + type Value = VarZeroVec<'de, T, F>; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("a sequence or borrowed buffer of bytes") + } + + fn visit_borrowed_bytes<E>(self, bytes: &'de [u8]) -> Result<Self::Value, E> + where + E: de::Error, + { + VarZeroVec::parse_byte_slice(bytes).map_err(de::Error::custom) + } + + fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error> + where + A: SeqAccess<'de>, + { + let mut vec: Vec<Box<T>> = if let Some(capacity) = seq.size_hint() { + Vec::with_capacity(capacity) + } else { + Vec::new() + }; + while let Some(value) = seq.next_element::<Box<T>>()? { + vec.push(value); + } + Ok(VarZeroVec::from(&vec)) + } +} + +/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate +impl<'de, 'a, T, F> Deserialize<'de> for VarZeroVec<'a, T, F> +where + T: VarULE + ?Sized, + Box<T>: Deserialize<'de>, + F: VarZeroVecFormat, + 'de: 'a, +{ + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: Deserializer<'de>, + { + let visitor = VarZeroVecVisitor::<T, F>::default(); + if deserializer.is_human_readable() { + deserializer.deserialize_seq(visitor) + } else { + deserializer.deserialize_bytes(visitor) + } + } +} + +/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate +impl<'de, 'a, T, F> Deserialize<'de> for &'a VarZeroSlice<T, F> +where + T: VarULE + ?Sized, + Box<T>: Deserialize<'de>, + F: VarZeroVecFormat, + 'de: 'a, +{ + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: Deserializer<'de>, + { + if deserializer.is_human_readable() { + Err(de::Error::custom( + "&VarZeroSlice cannot be deserialized from human-readable formats", + )) + } else { + let deserialized = VarZeroVec::<'a, T, F>::deserialize(deserializer)?; + let borrowed = if let VarZeroVec::Borrowed(b) = deserialized { + b + } else { + return Err(de::Error::custom( + "&VarZeroSlice can only deserialize in zero-copy ways", + )); + }; + Ok(borrowed) + } + } +} + +/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate +impl<'de, T, F> Deserialize<'de> for Box<VarZeroSlice<T, F>> +where + T: VarULE + ?Sized, + Box<T>: Deserialize<'de>, + F: VarZeroVecFormat, +{ + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: Deserializer<'de>, + { + let deserialized = VarZeroVec::<T, F>::deserialize(deserializer)?; + Ok(deserialized.to_boxed()) + } +} + +/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate +#[cfg(feature = "serde")] +impl<T, F> Serialize for VarZeroVec<'_, T, F> +where + T: Serialize + VarULE + ?Sized, + F: VarZeroVecFormat, +{ + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + if serializer.is_human_readable() { + let mut seq = serializer.serialize_seq(Some(self.len()))?; + for value in self.iter() { + seq.serialize_element(value)?; + } + seq.end() + } else { + serializer.serialize_bytes(self.as_bytes()) + } + } +} + +/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate +#[cfg(feature = "serde")] +impl<T, F> Serialize for VarZeroSlice<T, F> +where + T: Serialize + VarULE + ?Sized, + F: VarZeroVecFormat, +{ + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + self.as_varzerovec().serialize(serializer) + } +} + +#[cfg(test)] +#[allow(non_camel_case_types)] +mod test { + use crate::{VarZeroSlice, VarZeroVec}; + + #[derive(serde::Serialize, serde::Deserialize)] + struct DeriveTest_VarZeroVec<'data> { + #[serde(borrow)] + _data: VarZeroVec<'data, str>, + } + + #[derive(serde::Serialize, serde::Deserialize)] + struct DeriveTest_VarZeroSlice<'data> { + #[serde(borrow)] + _data: &'data VarZeroSlice<str>, + } + + #[derive(serde::Serialize, serde::Deserialize)] + struct DeriveTest_VarZeroVec_of_VarZeroSlice<'data> { + #[serde(borrow)] + _data: VarZeroVec<'data, VarZeroSlice<str>>, + } + + // ["foo", "bar", "baz", "dolor", "quux", "lorem ipsum"]; + const BYTES: &[u8] = &[ + 6, 0, 0, 0, 0, 0, 3, 0, 6, 0, 9, 0, 14, 0, 18, 0, 102, 111, 111, 98, 97, 114, 98, 97, 122, + 100, 111, 108, 111, 114, 113, 117, 117, 120, 108, 111, 114, 101, 109, 32, 105, 112, 115, + 117, 109, + ]; + const JSON_STR: &str = "[\"foo\",\"bar\",\"baz\",\"dolor\",\"quux\",\"lorem ipsum\"]"; + const BINCODE_BUF: &[u8] = &[ + 45, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 3, 0, 6, 0, 9, 0, 14, 0, 18, 0, 102, 111, 111, + 98, 97, 114, 98, 97, 122, 100, 111, 108, 111, 114, 113, 117, 117, 120, 108, 111, 114, 101, + 109, 32, 105, 112, 115, 117, 109, + ]; + + // ["w", "ω", "文", "𑄃"] + const NONASCII_STR: &[&str] = &["w", "ω", "文", "𑄃"]; + const NONASCII_BYTES: &[u8] = &[ + 4, 0, 0, 0, 0, 0, 1, 0, 3, 0, 6, 0, 119, 207, 137, 230, 150, 135, 240, 145, 132, 131, + ]; + #[test] + fn test_serde_json() { + let zerovec_orig: VarZeroVec<str> = VarZeroVec::parse_byte_slice(BYTES).expect("parse"); + let json_str = serde_json::to_string(&zerovec_orig).expect("serialize"); + assert_eq!(JSON_STR, json_str); + // VarZeroVec should deserialize from JSON to either Vec or VarZeroVec + let vec_new: Vec<Box<str>> = + serde_json::from_str(&json_str).expect("deserialize from buffer to Vec"); + assert_eq!(zerovec_orig.to_vec(), vec_new); + let zerovec_new: VarZeroVec<str> = + serde_json::from_str(&json_str).expect("deserialize from buffer to VarZeroVec"); + assert_eq!(zerovec_orig.to_vec(), zerovec_new.to_vec()); + assert!(zerovec_new.is_owned()); + } + + #[test] + fn test_serde_bincode() { + let zerovec_orig: VarZeroVec<str> = VarZeroVec::parse_byte_slice(BYTES).expect("parse"); + let bincode_buf = bincode::serialize(&zerovec_orig).expect("serialize"); + assert_eq!(BINCODE_BUF, bincode_buf); + let zerovec_new: VarZeroVec<str> = + bincode::deserialize(&bincode_buf).expect("deserialize from buffer to VarZeroVec"); + assert_eq!(zerovec_orig.to_vec(), zerovec_new.to_vec()); + assert!(!zerovec_new.is_owned()); + } + + #[test] + fn test_vzv_borrowed() { + let zerovec_orig: &VarZeroSlice<str> = + VarZeroSlice::parse_byte_slice(BYTES).expect("parse"); + let bincode_buf = bincode::serialize(&zerovec_orig).expect("serialize"); + assert_eq!(BINCODE_BUF, bincode_buf); + let zerovec_new: &VarZeroSlice<str> = + bincode::deserialize(&bincode_buf).expect("deserialize from buffer to VarZeroSlice"); + assert_eq!(zerovec_orig.to_vec(), zerovec_new.to_vec()); + } + + #[test] + fn test_nonascii_bincode() { + let src_vec = NONASCII_STR + .iter() + .copied() + .map(Box::<str>::from) + .collect::<Vec<_>>(); + let mut zerovec: VarZeroVec<str> = + VarZeroVec::parse_byte_slice(NONASCII_BYTES).expect("parse"); + assert_eq!(zerovec.to_vec(), src_vec); + let bincode_buf = bincode::serialize(&zerovec).expect("serialize"); + let zerovec_result = + bincode::deserialize::<VarZeroVec<str>>(&bincode_buf).expect("deserialize"); + assert_eq!(zerovec_result.to_vec(), src_vec); + + // try again with owned zerovec + zerovec.make_mut(); + let bincode_buf = bincode::serialize(&zerovec).expect("serialize"); + let zerovec_result = + bincode::deserialize::<VarZeroVec<str>>(&bincode_buf).expect("deserialize"); + assert_eq!(zerovec_result.to_vec(), src_vec); + } +} diff --git a/third_party/rust/zerovec/src/varzerovec/slice.rs b/third_party/rust/zerovec/src/varzerovec/slice.rs new file mode 100644 index 0000000000..119f1d38f8 --- /dev/null +++ b/third_party/rust/zerovec/src/varzerovec/slice.rs @@ -0,0 +1,573 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use super::components::VarZeroVecComponents; +use super::*; +use crate::ule::*; +use alloc::boxed::Box; +use alloc::vec::Vec; +use core::cmp::{Ord, Ordering, PartialOrd}; +use core::fmt; +use core::marker::PhantomData; +use core::mem; + +use core::ops::Index; +use core::ops::Range; + +/// A zero-copy "slice", that works for unsized types, i.e. the zero-copy version of `[T]` +/// where `T` is not `Sized`. +/// +/// This behaves similarly to [`VarZeroVec<T>`], however [`VarZeroVec<T>`] is allowed to contain +/// owned data and as such is ideal for deserialization since most human readable +/// serialization formats cannot unconditionally deserialize zero-copy. +/// +/// This type can be used inside [`VarZeroVec<T>`](crate::VarZeroVec) and [`ZeroMap`](crate::ZeroMap): +/// This essentially allows for the construction of zero-copy types isomorphic to `Vec<Vec<T>>` by instead +/// using `VarZeroVec<ZeroSlice<T>>`. +/// +/// The `F` type parameter is a [`VarZeroVecFormat`] (see its docs for more details), which can be used to select the +/// precise format of the backing buffer with various size and performance tradeoffs. It defaults to [`Index16`]. +/// +/// This type can be nested within itself to allow for multi-level nested `Vec`s. +/// +/// # Examples +/// +/// ## Nested Slices +/// +/// The following code constructs the conceptual zero-copy equivalent of `Vec<Vec<Vec<str>>>` +/// +/// ```rust +/// use zerovec::ule::*; +/// use zerovec::{VarZeroSlice, VarZeroVec, ZeroVec}; +/// let strings_1: Vec<&str> = vec!["foo", "bar", "baz"]; +/// let strings_2: Vec<&str> = vec!["twelve", "seventeen", "forty two"]; +/// let strings_3: Vec<&str> = vec!["我", "喜歡", "烏龍茶"]; +/// let strings_4: Vec<&str> = vec!["w", "ω", "文", "𑄃"]; +/// let strings_12 = vec![&*strings_1, &*strings_2]; +/// let strings_34 = vec![&*strings_3, &*strings_4]; +/// let all_strings = vec![strings_12, strings_34]; +/// +/// let vzv_1: VarZeroVec<str> = VarZeroVec::from(&strings_1); +/// let vzv_2: VarZeroVec<str> = VarZeroVec::from(&strings_2); +/// let vzv_3: VarZeroVec<str> = VarZeroVec::from(&strings_3); +/// let vzv_4: VarZeroVec<str> = VarZeroVec::from(&strings_4); +/// let vzv_12 = VarZeroVec::from(&[vzv_1.as_slice(), vzv_2.as_slice()]); +/// let vzv_34 = VarZeroVec::from(&[vzv_3.as_slice(), vzv_4.as_slice()]); +/// let vzv_all = VarZeroVec::from(&[vzv_12.as_slice(), vzv_34.as_slice()]); +/// +/// let reconstructed: Vec<Vec<Vec<String>>> = vzv_all +/// .iter() +/// .map(|v: &VarZeroSlice<VarZeroSlice<str>>| { +/// v.iter() +/// .map(|x: &VarZeroSlice<_>| { +/// x.as_varzerovec() +/// .iter() +/// .map(|s| s.to_owned()) +/// .collect::<Vec<String>>() +/// }) +/// .collect::<Vec<_>>() +/// }) +/// .collect::<Vec<_>>(); +/// assert_eq!(reconstructed, all_strings); +/// +/// let bytes = vzv_all.as_bytes(); +/// let vzv_from_bytes: VarZeroVec<VarZeroSlice<VarZeroSlice<str>>> = +/// VarZeroVec::parse_byte_slice(bytes).unwrap(); +/// assert_eq!(vzv_from_bytes, vzv_all); +/// ``` +/// +/// ## Iterate over Windows +/// +/// Although [`VarZeroSlice`] does not itself have a `.windows` iterator like +/// [core::slice::Windows], this behavior can be easily modeled using an iterator: +/// +/// ``` +/// use zerovec::VarZeroVec; +/// +/// let vzv = VarZeroVec::<str>::from(&["a", "b", "c", "d"]); +/// # let mut pairs: Vec<(&str, &str)> = Vec::new(); +/// +/// let mut it = vzv.iter().peekable(); +/// while let (Some(x), Some(y)) = (it.next(), it.peek()) { +/// // Evaluate (x, y) here. +/// # pairs.push((x, y)); +/// } +/// # assert_eq!(pairs, &[("a", "b"), ("b", "c"), ("c", "d")]); +/// ``` +// +// safety invariant: The slice MUST be one which parses to +// a valid VarZeroVecComponents<T> +#[repr(transparent)] +pub struct VarZeroSlice<T: ?Sized, F = Index16> { + marker: PhantomData<(F, T)>, + /// The original slice this was constructed from + entire_slice: [u8], +} + +impl<T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroSlice<T, F> { + /// Construct a new empty VarZeroSlice + pub const fn new_empty() -> &'static Self { + // The empty VZV is special-cased to the empty slice + unsafe { mem::transmute(&[] as &[u8]) } + } + + /// Obtain a [`VarZeroVecComponents`] borrowing from the internal buffer + #[inline] + pub(crate) fn as_components<'a>(&'a self) -> VarZeroVecComponents<'a, T, F> { + unsafe { + // safety: VarZeroSlice is guaranteed to parse here + VarZeroVecComponents::from_bytes_unchecked(&self.entire_slice) + } + } + + /// Uses a `&[u8]` buffer as a `VarZeroSlice<T>` without any verification. + /// + /// # Safety + /// + /// `bytes` need to be an output from [`VarZeroSlice::as_bytes()`]. + pub const unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { + // self is really just a wrapper around a byte slice + mem::transmute(bytes) + } + + /// Get the number of elements in this slice + /// + /// # Example + /// + /// ```rust + /// # use std::str::Utf8Error; + /// # use zerovec::ule::ZeroVecError; + /// # use zerovec::VarZeroVec; + /// + /// let strings = vec!["foo", "bar", "baz", "quux"]; + /// let vec = VarZeroVec::<str>::from(&strings); + /// + /// assert_eq!(vec.len(), 4); + /// # Ok::<(), ZeroVecError>(()) + /// ``` + pub fn len(&self) -> usize { + self.as_components().len() + } + + /// Returns `true` if the slice contains no elements. + /// + /// # Examples + /// + /// ``` + /// # use std::str::Utf8Error; + /// # use zerovec::ule::ZeroVecError; + /// # use zerovec::VarZeroVec; + /// + /// let strings: Vec<String> = vec![]; + /// let vec = VarZeroVec::<str>::from(&strings); + /// + /// assert!(vec.is_empty()); + /// # Ok::<(), ZeroVecError>(()) + /// ``` + pub fn is_empty(&self) -> bool { + self.as_components().is_empty() + } + + /// Obtain an iterator over this slice's elements + /// + /// # Example + /// + /// ```rust + /// # use std::str::Utf8Error; + /// # use zerovec::ule::ZeroVecError; + /// # use zerovec::VarZeroVec; + /// + /// let strings = vec!["foo", "bar", "baz", "quux"]; + /// let vec = VarZeroVec::<str>::from(&strings); + /// + /// let mut iter_results: Vec<&str> = vec.iter().collect(); + /// assert_eq!(iter_results[0], "foo"); + /// assert_eq!(iter_results[1], "bar"); + /// assert_eq!(iter_results[2], "baz"); + /// assert_eq!(iter_results[3], "quux"); + /// # Ok::<(), ZeroVecError>(()) + /// ``` + pub fn iter<'b>(&'b self) -> impl Iterator<Item = &'b T> { + self.as_components().iter() + } + + /// Get one of this slice's elements, returning `None` if the index is out of bounds + /// + /// # Example + /// + /// ```rust + /// # use std::str::Utf8Error; + /// # use zerovec::ule::ZeroVecError; + /// # use zerovec::VarZeroVec; + /// + /// let strings = vec!["foo", "bar", "baz", "quux"]; + /// let vec = VarZeroVec::<str>::from(&strings); + /// + /// let mut iter_results: Vec<&str> = vec.iter().collect(); + /// assert_eq!(vec.get(0), Some("foo")); + /// assert_eq!(vec.get(1), Some("bar")); + /// assert_eq!(vec.get(2), Some("baz")); + /// assert_eq!(vec.get(3), Some("quux")); + /// assert_eq!(vec.get(4), None); + /// # Ok::<(), ZeroVecError>(()) + /// ``` + pub fn get(&self, idx: usize) -> Option<&T> { + self.as_components().get(idx) + } + + /// Get one of this slice's elements + /// + /// # Safety + /// + /// `index` must be in range + /// + /// # Example + /// + /// ```rust + /// # use std::str::Utf8Error; + /// # use zerovec::ule::ZeroVecError; + /// # use zerovec::VarZeroVec; + /// + /// let strings = vec!["foo", "bar", "baz", "quux"]; + /// let vec = VarZeroVec::<str>::from(&strings); + /// + /// let mut iter_results: Vec<&str> = vec.iter().collect(); + /// unsafe { + /// assert_eq!(vec.get_unchecked(0), "foo"); + /// assert_eq!(vec.get_unchecked(1), "bar"); + /// assert_eq!(vec.get_unchecked(2), "baz"); + /// assert_eq!(vec.get_unchecked(3), "quux"); + /// } + /// # Ok::<(), ZeroVecError>(()) + /// ``` + pub unsafe fn get_unchecked(&self, idx: usize) -> &T { + self.as_components().get_unchecked(idx) + } + + /// Obtain an owned `Vec<Box<T>>` out of this + pub fn to_vec(&self) -> Vec<Box<T>> { + self.as_components().to_vec() + } + + /// Get a reference to the entire encoded backing buffer of this slice + /// + /// The bytes can be passed back to [`Self::parse_byte_slice()`]. + /// + /// To take the bytes as a vector, see [`VarZeroVec::into_bytes()`]. + /// + /// # Example + /// + /// ```rust + /// # use std::str::Utf8Error; + /// # use zerovec::ule::ZeroVecError; + /// # use zerovec::VarZeroVec; + /// + /// let strings = vec!["foo", "bar", "baz"]; + /// let vzv = VarZeroVec::<str>::from(&strings); + /// + /// assert_eq!(vzv, VarZeroVec::parse_byte_slice(vzv.as_bytes()).unwrap()); + /// + /// # Ok::<(), ZeroVecError>(()) + /// ``` + #[inline] + pub const fn as_bytes(&self) -> &[u8] { + &self.entire_slice + } + + /// Get this [`VarZeroSlice`] as a borrowed [`VarZeroVec`] + /// + /// If you wish to repeatedly call methods on this [`VarZeroSlice`], + /// it is more efficient to perform this conversion first + pub const fn as_varzerovec<'a>(&'a self) -> VarZeroVec<'a, T, F> { + VarZeroVec::Borrowed(self) + } + + /// Parse a VarZeroSlice from a slice of the appropriate format + /// + /// Slices of the right format can be obtained via [`VarZeroSlice::as_bytes()`] + pub fn parse_byte_slice<'a>(slice: &'a [u8]) -> Result<&'a Self, ZeroVecError> { + <Self as VarULE>::parse_byte_slice(slice) + } + + /// Convert a `bytes` array known to represent a `VarZeroSlice` to a mutable reference to a `VarZeroSlice` + /// + /// # Safety + /// - `bytes` must be a valid sequence of bytes for this VarZeroVec + pub(crate) unsafe fn from_byte_slice_unchecked_mut(bytes: &mut [u8]) -> &mut Self { + // self is really just a wrapper around a byte slice + mem::transmute(bytes) + } + + pub(crate) unsafe fn get_bytes_at_mut(&mut self, idx: usize) -> &mut [u8] { + let range = self.as_components().get_range(idx); + #[allow(clippy::indexing_slicing)] // get_range() is known to return in-bounds ranges + &mut self.entire_slice[range] + } +} + +impl<T, F> VarZeroSlice<T, F> +where + T: VarULE, + T: ?Sized, + T: Ord, + F: VarZeroVecFormat, +{ + /// Binary searches a sorted `VarZeroVec<T>` for the given element. For more information, see + /// the standard library function [`binary_search`]. + /// + /// # Example + /// + /// ``` + /// # use std::str::Utf8Error; + /// # use zerovec::ule::ZeroVecError; + /// # use zerovec::VarZeroVec; + /// + /// let strings = vec!["a", "b", "f", "g"]; + /// let vec = VarZeroVec::<str>::from(&strings); + /// + /// assert_eq!(vec.binary_search("f"), Ok(2)); + /// assert_eq!(vec.binary_search("e"), Err(2)); + /// # Ok::<(), ZeroVecError>(()) + /// ``` + /// + /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search + #[inline] + pub fn binary_search(&self, x: &T) -> Result<usize, usize> { + self.as_components().binary_search(x) + } + + /// Binary searches a `VarZeroVec<T>` for the given element within a certain sorted range. + /// + /// If the range is out of bounds, returns `None`. Otherwise, returns a `Result` according + /// to the behavior of the standard library function [`binary_search`]. + /// + /// The index is returned relative to the start of the range. + /// + /// # Example + /// + /// ``` + /// # use std::str::Utf8Error; + /// # use zerovec::ule::ZeroVecError; + /// # use zerovec::VarZeroVec; + /// + /// let strings = vec!["a", "b", "f", "g", "m", "n", "q"]; + /// let vec = VarZeroVec::<str>::from(&strings); + /// + /// // Same behavior as binary_search when the range covers the whole slice: + /// assert_eq!(vec.binary_search_in_range("g", 0..7), Some(Ok(3))); + /// assert_eq!(vec.binary_search_in_range("h", 0..7), Some(Err(4))); + /// + /// // Will not look outside of the range: + /// assert_eq!(vec.binary_search_in_range("g", 0..1), Some(Err(1))); + /// assert_eq!(vec.binary_search_in_range("g", 6..7), Some(Err(0))); + /// + /// // Will return indices relative to the start of the range: + /// assert_eq!(vec.binary_search_in_range("g", 1..6), Some(Ok(2))); + /// assert_eq!(vec.binary_search_in_range("h", 1..6), Some(Err(3))); + /// + /// // Will return `None` if the range is out of bounds: + /// assert_eq!(vec.binary_search_in_range("x", 100..200), None); + /// assert_eq!(vec.binary_search_in_range("x", 0..200), None); + /// # Ok::<(), ZeroVecError>(()) + /// ``` + /// + /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search + #[inline] + pub fn binary_search_in_range( + &self, + x: &T, + range: Range<usize>, + ) -> Option<Result<usize, usize>> { + self.as_components().binary_search_in_range(x, range) + } +} + +impl<T, F> VarZeroSlice<T, F> +where + T: VarULE, + T: ?Sized, + F: VarZeroVecFormat, +{ + /// Binary searches a sorted `VarZeroVec<T>` for the given predicate. For more information, see + /// the standard library function [`binary_search_by`]. + /// + /// # Example + /// + /// ``` + /// # use std::str::Utf8Error; + /// # use zerovec::ule::ZeroVecError; + /// # use zerovec::VarZeroVec; + /// + /// let strings = vec!["a", "b", "f", "g"]; + /// let vec = VarZeroVec::<str>::from(&strings); + /// + /// assert_eq!(vec.binary_search_by(|probe| probe.cmp("f")), Ok(2)); + /// assert_eq!(vec.binary_search_by(|probe| probe.cmp("e")), Err(2)); + /// # Ok::<(), ZeroVecError>(()) + /// ``` + /// + /// [`binary_search_by`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search_by + #[inline] + pub fn binary_search_by(&self, predicate: impl FnMut(&T) -> Ordering) -> Result<usize, usize> { + self.as_components().binary_search_by(predicate) + } + + /// Binary searches a `VarZeroVec<T>` for the given predicate within a certain sorted range. + /// + /// If the range is out of bounds, returns `None`. Otherwise, returns a `Result` according + /// to the behavior of the standard library function [`binary_search`]. + /// + /// The index is returned relative to the start of the range. + /// + /// # Example + /// + /// ``` + /// # use std::str::Utf8Error; + /// # use zerovec::ule::ZeroVecError; + /// # use zerovec::VarZeroVec; + /// + /// let strings = vec!["a", "b", "f", "g", "m", "n", "q"]; + /// let vec = VarZeroVec::<str>::from(&strings); + /// + /// // Same behavior as binary_search when the range covers the whole slice: + /// assert_eq!( + /// vec.binary_search_in_range_by(|v| v.cmp("g"), 0..7), + /// Some(Ok(3)) + /// ); + /// assert_eq!( + /// vec.binary_search_in_range_by(|v| v.cmp("h"), 0..7), + /// Some(Err(4)) + /// ); + /// + /// // Will not look outside of the range: + /// assert_eq!( + /// vec.binary_search_in_range_by(|v| v.cmp("g"), 0..1), + /// Some(Err(1)) + /// ); + /// assert_eq!( + /// vec.binary_search_in_range_by(|v| v.cmp("g"), 6..7), + /// Some(Err(0)) + /// ); + /// + /// // Will return indices relative to the start of the range: + /// assert_eq!( + /// vec.binary_search_in_range_by(|v| v.cmp("g"), 1..6), + /// Some(Ok(2)) + /// ); + /// assert_eq!( + /// vec.binary_search_in_range_by(|v| v.cmp("h"), 1..6), + /// Some(Err(3)) + /// ); + /// + /// // Will return `None` if the range is out of bounds: + /// assert_eq!( + /// vec.binary_search_in_range_by(|v| v.cmp("x"), 100..200), + /// None + /// ); + /// assert_eq!(vec.binary_search_in_range_by(|v| v.cmp("x"), 0..200), None); + /// # Ok::<(), ZeroVecError>(()) + /// ``` + /// + /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search + pub fn binary_search_in_range_by( + &self, + predicate: impl FnMut(&T) -> Ordering, + range: Range<usize>, + ) -> Option<Result<usize, usize>> { + self.as_components() + .binary_search_in_range_by(predicate, range) + } +} +// Safety (based on the safety checklist on the VarULE trait): +// 1. VarZeroSlice does not include any uninitialized or padding bytes (achieved by `#[repr(transparent)]` on a +// `[u8]` slice which satisfies this invariant) +// 2. VarZeroSlice is aligned to 1 byte (achieved by `#[repr(transparent)]` on a +// `[u8]` slice which satisfies this invariant) +// 3. The impl of `validate_byte_slice()` returns an error if any byte is not valid. +// 4. The impl of `validate_byte_slice()` returns an error if the slice cannot be used in its entirety +// 5. The impl of `from_byte_slice_unchecked()` returns a reference to the same data. +// 6. `as_byte_slice()` is equivalent to a regular transmute of the underlying data +// 7. VarZeroSlice byte equality is semantic equality (relying on the guideline of the underlying VarULE type) +unsafe impl<T: VarULE + ?Sized + 'static, F: VarZeroVecFormat> VarULE for VarZeroSlice<T, F> { + fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> { + let _: VarZeroVecComponents<T, F> = VarZeroVecComponents::parse_byte_slice(bytes)?; + Ok(()) + } + + unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self { + // self is really just a wrapper around a byte slice + mem::transmute(bytes) + } + + fn as_byte_slice(&self) -> &[u8] { + &self.entire_slice + } +} + +impl<T: VarULE + ?Sized, F: VarZeroVecFormat> Index<usize> for VarZeroSlice<T, F> { + type Output = T; + fn index(&self, index: usize) -> &Self::Output { + #[allow(clippy::panic)] // documented + match self.get(index) { + Some(x) => x, + None => panic!( + "index out of bounds: the len is {} but the index is {index}", + self.len() + ), + } + } +} + +impl<T, F> PartialEq<VarZeroSlice<T, F>> for VarZeroSlice<T, F> +where + T: VarULE, + T: ?Sized, + T: PartialEq, + F: VarZeroVecFormat, +{ + #[inline] + fn eq(&self, other: &VarZeroSlice<T, F>) -> bool { + // VarULE has an API guarantee that this is equivalent + // to `T::VarULE::eq()` + self.entire_slice.eq(&other.entire_slice) + } +} + +impl<T, F> Eq for VarZeroSlice<T, F> +where + T: VarULE, + T: ?Sized, + T: Eq, + F: VarZeroVecFormat, +{ +} + +impl<T: VarULE + ?Sized + PartialOrd, F: VarZeroVecFormat> PartialOrd for VarZeroSlice<T, F> { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option<Ordering> { + self.iter().partial_cmp(other.iter()) + } +} + +impl<T: VarULE + ?Sized + Ord, F: VarZeroVecFormat> Ord for VarZeroSlice<T, F> { + #[inline] + fn cmp(&self, other: &Self) -> Ordering { + self.iter().cmp(other.iter()) + } +} + +impl<T: VarULE + ?Sized, F: VarZeroVecFormat> fmt::Debug for VarZeroSlice<T, F> +where + T: fmt::Debug, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_list().entries(self.iter()).finish() + } +} + +impl<T: ?Sized, F: VarZeroVecFormat> AsRef<VarZeroSlice<T, F>> for VarZeroSlice<T, F> { + fn as_ref(&self) -> &VarZeroSlice<T, F> { + self + } +} diff --git a/third_party/rust/zerovec/src/varzerovec/vec.rs b/third_party/rust/zerovec/src/varzerovec/vec.rs new file mode 100644 index 0000000000..64928509f8 --- /dev/null +++ b/third_party/rust/zerovec/src/varzerovec/vec.rs @@ -0,0 +1,531 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::ule::*; + +use alloc::vec::Vec; +use core::cmp::{Ord, Ordering, PartialOrd}; +use core::fmt; +use core::ops::Deref; + +use super::*; + +/// A zero-copy, byte-aligned vector for variable-width types. +/// +/// `VarZeroVec<T>` is designed as a drop-in replacement for `Vec<T>` in situations where it is +/// desirable to borrow data from an unaligned byte slice, such as zero-copy deserialization, and +/// where `T`'s data is variable-length (e.g. `String`) +/// +/// `T` must implement [`VarULE`], which is already implemented for [`str`] and `[u8]`. For storing more +/// complicated series of elements, it is implemented on `ZeroSlice<T>` as well as `VarZeroSlice<T>` +/// for nesting. [`zerovec::make_varule`](crate::make_varule) may be used to generate +/// a dynamically-sized [`VarULE`] type and conversions to and from a custom type. +/// +/// For example, here are some owned types and their zero-copy equivalents: +/// +/// - `Vec<String>`: `VarZeroVec<'a, str>` +/// - `Vec<Vec<u8>>>`: `VarZeroVec<'a, [u8]>` +/// - `Vec<Vec<u32>>`: `VarZeroVec<'a, ZeroSlice<u32>>` +/// - `Vec<Vec<String>>`: `VarZeroVec<'a, VarZeroSlice<str>>` +/// +/// Most of the methods on `VarZeroVec<'a, T>` come from its [`Deref`] implementation to [`VarZeroSlice<T>`](VarZeroSlice). +/// +/// For creating zero-copy vectors of fixed-size types, see [`ZeroVec`](crate::ZeroVec). +/// +/// `VarZeroVec<T>` behaves much like [`Cow`](alloc::borrow::Cow), where it can be constructed from +/// owned data (and then mutated!) but can also borrow from some buffer. +/// +/// The `F` type parameter is a [`VarZeroVecFormat`] (see its docs for more details), which can be used to select the +/// precise format of the backing buffer with various size and performance tradeoffs. It defaults to [`Index16`]. +/// +/// # Bytes and Equality +/// +/// Two [`VarZeroVec`]s are equal if and only if their bytes are equal, as described in the trait +/// [`VarULE`]. However, we do not guarantee stability of byte equality or serialization format +/// across major SemVer releases. +/// +/// To compare a [`Vec<T>`] to a [`VarZeroVec<T>`], it is generally recommended to use +/// [`Iterator::eq`], since it is somewhat expensive at runtime to convert from a [`Vec<T>`] to a +/// [`VarZeroVec<T>`] or vice-versa. +/// +/// Prior to zerovec reaching 1.0, the precise byte representation of [`VarZeroVec`] is still +/// under consideration, with different options along the space-time spectrum. See +/// [#1410](https://github.com/unicode-org/icu4x/issues/1410). +/// +/// # Example +/// +/// ```rust +/// # use std::str::Utf8Error; +/// # use zerovec::ule::ZeroVecError; +/// use zerovec::VarZeroVec; +/// +/// // The little-endian bytes correspond to the list of strings. +/// let strings = vec!["w", "ω", "文", "𑄃"]; +/// +/// #[derive(serde::Serialize, serde::Deserialize)] +/// struct Data<'a> { +/// #[serde(borrow)] +/// strings: VarZeroVec<'a, str>, +/// } +/// +/// let data = Data { +/// strings: VarZeroVec::from(&strings), +/// }; +/// +/// let bincode_bytes = +/// bincode::serialize(&data).expect("Serialization should be successful"); +/// +/// // Will deserialize without allocations +/// let deserialized: Data = bincode::deserialize(&bincode_bytes) +/// .expect("Deserialization should be successful"); +/// +/// assert_eq!(deserialized.strings.get(2), Some("文")); +/// assert_eq!(deserialized.strings, &*strings); +/// # Ok::<(), ZeroVecError>(()) +/// ``` +/// +/// Here's another example with `ZeroSlice<T>` (similar to `[T]`): +/// +/// ```rust +/// # use std::str::Utf8Error; +/// # use zerovec::ule::ZeroVecError; +/// use zerovec::ule::*; +/// use zerovec::VarZeroVec; +/// use zerovec::ZeroSlice; +/// use zerovec::ZeroVec; +/// +/// // The structured list correspond to the list of integers. +/// let numbers: &[&[u32]] = &[ +/// &[12, 25, 38], +/// &[39179, 100], +/// &[42, 55555], +/// &[12345, 54321, 9], +/// ]; +/// +/// #[derive(serde::Serialize, serde::Deserialize)] +/// struct Data<'a> { +/// #[serde(borrow)] +/// vecs: VarZeroVec<'a, ZeroSlice<u32>>, +/// } +/// +/// let data = Data { +/// vecs: VarZeroVec::from(numbers), +/// }; +/// +/// let bincode_bytes = +/// bincode::serialize(&data).expect("Serialization should be successful"); +/// +/// let deserialized: Data = bincode::deserialize(&bincode_bytes) +/// .expect("Deserialization should be successful"); +/// +/// assert_eq!(deserialized.vecs[0].get(1).unwrap(), 25); +/// assert_eq!(deserialized.vecs[1], *numbers[1]); +/// +/// # Ok::<(), ZeroVecError>(()) +/// ``` +/// +/// [`VarZeroVec`]s can be nested infinitely via a similar mechanism, see the docs of [`VarZeroSlice`] +/// for more information. +/// +/// # How it Works +/// +/// `VarZeroVec<T>`, when used with non-human-readable serializers (like `bincode`), will +/// serialize to a specially formatted list of bytes. The format is: +/// +/// - 4 bytes for `length` (interpreted as a little-endian u32) +/// - `4 * length` bytes of `indices` (interpreted as little-endian u32) +/// - Remaining bytes for actual `data` +/// +/// Each element in the `indices` array points to the starting index of its corresponding +/// data part in the `data` list. The ending index can be calculated from the starting index +/// of the next element (or the length of the slice if dealing with the last element). +/// +/// See [the design doc](https://github.com/unicode-org/icu4x/blob/main/utils/zerovec/design_doc.md) for more details. +/// +/// [`ule`]: crate::ule +#[non_exhaustive] +pub enum VarZeroVec<'a, T: ?Sized, F = Index16> { + /// An allocated VarZeroVec, allowing for mutations. + /// + /// # Examples + /// + /// ``` + /// use zerovec::VarZeroVec; + /// + /// let mut vzv = VarZeroVec::<str>::default(); + /// vzv.make_mut().push("foo"); + /// vzv.make_mut().push("bar"); + /// assert!(matches!(vzv, VarZeroVec::Owned(_))); + /// ``` + Owned(VarZeroVecOwned<T, F>), + /// A borrowed VarZeroVec, requiring no allocations. + /// + /// If a mutating operation is invoked on VarZeroVec, the Borrowed is converted to Owned. + /// + /// # Examples + /// + /// ``` + /// use zerovec::VarZeroVec; + /// + /// let bytes = &[ + /// 4, 0, 0, 0, 0, 0, 1, 0, 3, 0, 6, 0, 119, 207, 137, 230, 150, 135, 240, + /// 145, 132, 131, + /// ]; + /// + /// let vzv: VarZeroVec<str> = VarZeroVec::parse_byte_slice(bytes).unwrap(); + /// assert!(matches!(vzv, VarZeroVec::Borrowed(_))); + /// ``` + Borrowed(&'a VarZeroSlice<T, F>), +} + +impl<'a, T: ?Sized, F> Clone for VarZeroVec<'a, T, F> { + fn clone(&self) -> Self { + match *self { + VarZeroVec::Owned(ref o) => o.clone().into(), + VarZeroVec::Borrowed(b) => b.into(), + } + } +} + +impl<T: VarULE + ?Sized, F: VarZeroVecFormat> fmt::Debug for VarZeroVec<'_, T, F> +where + T: fmt::Debug, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + VarZeroSlice::fmt(self, f) + } +} + +impl<'a, T: ?Sized, F> From<VarZeroVecOwned<T, F>> for VarZeroVec<'a, T, F> { + #[inline] + fn from(other: VarZeroVecOwned<T, F>) -> Self { + VarZeroVec::Owned(other) + } +} + +impl<'a, T: ?Sized, F> From<&'a VarZeroSlice<T, F>> for VarZeroVec<'a, T, F> { + fn from(other: &'a VarZeroSlice<T, F>) -> Self { + VarZeroVec::Borrowed(other) + } +} + +impl<'a, T: ?Sized + VarULE, F: VarZeroVecFormat> From<VarZeroVec<'a, T, F>> + for VarZeroVecOwned<T, F> +{ + #[inline] + fn from(other: VarZeroVec<'a, T, F>) -> Self { + match other { + VarZeroVec::Owned(o) => o, + VarZeroVec::Borrowed(b) => b.into(), + } + } +} + +impl<T: VarULE + ?Sized> Default for VarZeroVec<'_, T> { + #[inline] + fn default() -> Self { + Self::new() + } +} + +impl<T: VarULE + ?Sized, F: VarZeroVecFormat> Deref for VarZeroVec<'_, T, F> { + type Target = VarZeroSlice<T, F>; + fn deref(&self) -> &VarZeroSlice<T, F> { + self.as_slice() + } +} + +impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroVec<'a, T, F> { + /// Creates a new, empty `VarZeroVec<T>`. + /// + /// # Examples + /// + /// ``` + /// use zerovec::VarZeroVec; + /// + /// let vzv: VarZeroVec<str> = VarZeroVec::new(); + /// assert!(vzv.is_empty()); + /// ``` + #[inline] + pub const fn new() -> Self { + Self::Borrowed(VarZeroSlice::new_empty()) + } + + /// Parse a VarZeroVec from a slice of the appropriate format + /// + /// Slices of the right format can be obtained via [`VarZeroSlice::as_bytes()`]. + /// + /// # Example + /// + /// ```rust + /// # use std::str::Utf8Error; + /// # use zerovec::ule::ZeroVecError; + /// # use zerovec::VarZeroVec; + /// + /// let strings = vec!["foo", "bar", "baz", "quux"]; + /// let vec = VarZeroVec::<str>::from(&strings); + /// + /// assert_eq!(&vec[0], "foo"); + /// assert_eq!(&vec[1], "bar"); + /// assert_eq!(&vec[2], "baz"); + /// assert_eq!(&vec[3], "quux"); + /// # Ok::<(), ZeroVecError>(()) + /// ``` + pub fn parse_byte_slice(slice: &'a [u8]) -> Result<Self, ZeroVecError> { + let borrowed = VarZeroSlice::<T, F>::parse_byte_slice(slice)?; + + Ok(VarZeroVec::Borrowed(borrowed)) + } + + /// Uses a `&[u8]` buffer as a `VarZeroVec<T>` without any verification. + /// + /// # Safety + /// + /// `bytes` need to be an output from [`VarZeroSlice::as_bytes()`]. + pub const unsafe fn from_bytes_unchecked(bytes: &'a [u8]) -> Self { + Self::Borrowed(core::mem::transmute(bytes)) + } + + /// Convert this into a mutable vector of the owned `T` type, cloning if necessary. + /// + /// + /// # Example + /// + /// ```rust,ignore + /// # use std::str::Utf8Error; + /// # use zerovec::ule::ZeroVecError; + /// # use zerovec::VarZeroVec; + /// + /// let strings = vec!["foo", "bar", "baz", "quux"]; + /// let mut vec = VarZeroVec::<str>::from(&strings); + /// + /// assert_eq!(vec.len(), 4); + /// let mutvec = vec.make_mut(); + /// mutvec.push("lorem ipsum".into()); + /// mutvec[2] = "dolor sit".into(); + /// assert_eq!(&vec[0], "foo"); + /// assert_eq!(&vec[1], "bar"); + /// assert_eq!(&vec[2], "dolor sit"); + /// assert_eq!(&vec[3], "quux"); + /// assert_eq!(&vec[4], "lorem ipsum"); + /// # Ok::<(), ZeroVecError>(()) + /// ``` + // + // This function is crate-public for now since we don't yet want to stabilize + // the internal implementation details + pub fn make_mut(&mut self) -> &mut VarZeroVecOwned<T, F> { + match self { + VarZeroVec::Owned(ref mut vec) => vec, + VarZeroVec::Borrowed(slice) => { + let new_self = VarZeroVecOwned::from_slice(slice); + *self = new_self.into(); + // recursion is limited since we are guaranteed to hit the Owned branch + self.make_mut() + } + } + } + + /// Converts a borrowed ZeroVec to an owned ZeroVec. No-op if already owned. + /// + /// # Example + /// + /// ``` + /// # use std::str::Utf8Error; + /// # use zerovec::ule::ZeroVecError; + /// # use zerovec::VarZeroVec; + /// + /// let strings = vec!["foo", "bar", "baz", "quux"]; + /// let vec = VarZeroVec::<str>::from(&strings); + /// + /// assert_eq!(vec.len(), 4); + /// // has 'static lifetime + /// let owned = vec.into_owned(); + /// # Ok::<(), ZeroVecError>(()) + /// ``` + pub fn into_owned(mut self) -> VarZeroVec<'static, T, F> { + self.make_mut(); + match self { + VarZeroVec::Owned(vec) => vec.into(), + _ => unreachable!(), + } + } + + /// Obtain this `VarZeroVec` as a [`VarZeroSlice`] + pub fn as_slice(&self) -> &VarZeroSlice<T, F> { + match *self { + VarZeroVec::Owned(ref owned) => owned, + VarZeroVec::Borrowed(b) => b, + } + } + + /// Takes the byte vector representing the encoded data of this VarZeroVec. If borrowed, + /// this function allocates a byte vector and copies the borrowed bytes into it. + /// + /// The bytes can be passed back to [`Self::parse_byte_slice()`]. + /// + /// To get a reference to the bytes without moving, see [`VarZeroSlice::as_bytes()`]. + /// + /// # Example + /// + /// ```rust + /// # use std::str::Utf8Error; + /// # use zerovec::ule::ZeroVecError; + /// # use zerovec::VarZeroVec; + /// + /// let strings = vec!["foo", "bar", "baz"]; + /// let bytes = VarZeroVec::<str>::from(&strings).into_bytes(); + /// + /// let mut borrowed: VarZeroVec<str> = VarZeroVec::parse_byte_slice(&bytes)?; + /// assert_eq!(borrowed, &*strings); + /// + /// # Ok::<(), ZeroVecError>(()) + /// ``` + pub fn into_bytes(self) -> Vec<u8> { + match self { + VarZeroVec::Owned(vec) => vec.into_bytes(), + VarZeroVec::Borrowed(vec) => vec.as_bytes().to_vec(), + } + } + + /// Return whether the [`VarZeroVec`] is operating on owned or borrowed + /// data. [`VarZeroVec::into_owned()`] and [`VarZeroVec::make_mut()`] can + /// be used to force it into an owned type + pub fn is_owned(&self) -> bool { + match self { + VarZeroVec::Owned(..) => true, + VarZeroVec::Borrowed(..) => false, + } + } + + #[cfg(feature = "bench")] + #[doc(hidden)] + pub fn as_components<'b>(&'b self) -> VarZeroVecComponents<'b, T, F> { + self.as_slice().as_components() + } +} + +impl<A, T, F> From<&Vec<A>> for VarZeroVec<'static, T, F> +where + T: VarULE + ?Sized, + A: EncodeAsVarULE<T>, + F: VarZeroVecFormat, +{ + #[inline] + fn from(elements: &Vec<A>) -> Self { + Self::from(elements.as_slice()) + } +} + +impl<A, T, F> From<&[A]> for VarZeroVec<'static, T, F> +where + T: VarULE + ?Sized, + A: EncodeAsVarULE<T>, + F: VarZeroVecFormat, +{ + #[inline] + fn from(elements: &[A]) -> Self { + if elements.is_empty() { + VarZeroSlice::new_empty().into() + } else { + #[allow(clippy::unwrap_used)] // TODO(#1410) Better story for fallibility + VarZeroVecOwned::try_from_elements(elements).unwrap().into() + } + } +} + +impl<A, T, F, const N: usize> From<&[A; N]> for VarZeroVec<'static, T, F> +where + T: VarULE + ?Sized, + A: EncodeAsVarULE<T>, + F: VarZeroVecFormat, +{ + #[inline] + fn from(elements: &[A; N]) -> Self { + Self::from(elements.as_slice()) + } +} + +impl<'a, 'b, T, F> PartialEq<VarZeroVec<'b, T, F>> for VarZeroVec<'a, T, F> +where + T: VarULE, + T: ?Sized, + T: PartialEq, + F: VarZeroVecFormat, +{ + #[inline] + fn eq(&self, other: &VarZeroVec<'b, T, F>) -> bool { + // VZV::from_elements used to produce a non-canonical representation of the + // empty VZV, so we cannot use byte equality for empty vecs. + if self.is_empty() || other.is_empty() { + return self.is_empty() && other.is_empty(); + } + // VarULE has an API guarantee that byte equality is semantic equality. + // For non-empty VZVs, there's only a single metadata representation, + // so this guarantee extends to the whole VZV representation. + self.as_bytes().eq(other.as_bytes()) + } +} + +impl<'a, T, F> Eq for VarZeroVec<'a, T, F> +where + T: VarULE, + T: ?Sized, + T: Eq, + F: VarZeroVecFormat, +{ +} + +impl<T, A, F> PartialEq<&'_ [A]> for VarZeroVec<'_, T, F> +where + T: VarULE + ?Sized, + T: PartialEq, + A: AsRef<T>, + F: VarZeroVecFormat, +{ + #[inline] + fn eq(&self, other: &&[A]) -> bool { + self.iter().eq(other.iter().map(|t| t.as_ref())) + } +} + +impl<T, A, F, const N: usize> PartialEq<[A; N]> for VarZeroVec<'_, T, F> +where + T: VarULE + ?Sized, + T: PartialEq, + A: AsRef<T>, + F: VarZeroVecFormat, +{ + #[inline] + fn eq(&self, other: &[A; N]) -> bool { + self.iter().eq(other.iter().map(|t| t.as_ref())) + } +} + +impl<'a, T: VarULE + ?Sized + PartialOrd, F: VarZeroVecFormat> PartialOrd for VarZeroVec<'a, T, F> { + fn partial_cmp(&self, other: &Self) -> Option<Ordering> { + self.iter().partial_cmp(other.iter()) + } +} + +impl<'a, T: VarULE + ?Sized + Ord, F: VarZeroVecFormat> Ord for VarZeroVec<'a, T, F> { + fn cmp(&self, other: &Self) -> Ordering { + self.iter().cmp(other.iter()) + } +} + +#[test] +fn assert_single_empty_representation() { + assert_eq!( + VarZeroVec::<str>::new().as_bytes(), + VarZeroVec::<str>::from(&[] as &[&str]).as_bytes() + ); +} + +#[test] +fn weird_empty_representation_equality() { + assert_eq!( + VarZeroVec::<str>::parse_byte_slice(&[0, 0, 0, 0]).unwrap(), + VarZeroVec::<str>::parse_byte_slice(&[]).unwrap() + ); +} diff --git a/third_party/rust/zerovec/src/yoke_impls.rs b/third_party/rust/zerovec/src/yoke_impls.rs new file mode 100644 index 0000000000..66f756dce5 --- /dev/null +++ b/third_party/rust/zerovec/src/yoke_impls.rs @@ -0,0 +1,551 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +// This way we can copy-paste Yokeable impls +#![allow(unknown_lints)] // forgetting_copy_types +#![allow(renamed_and_removed_lints)] // forgetting_copy_types +#![allow(forgetting_copy_types)] +#![allow(clippy::forget_copy)] +#![allow(clippy::forget_non_drop)] + +use crate::flexzerovec::FlexZeroVec; +use crate::map::ZeroMapBorrowed; +use crate::map::ZeroMapKV; +use crate::map2d::ZeroMap2dBorrowed; +use crate::ule::*; +use crate::{VarZeroVec, ZeroMap, ZeroMap2d, ZeroVec}; +use core::{mem, ptr}; +use yoke::*; + +// This impl is similar to the impl on Cow and is safe for the same reasons +/// This impl requires enabling the optional `yoke` Cargo feature of the `zerovec` crate +unsafe impl<'a, T: 'static + AsULE + ?Sized> Yokeable<'a> for ZeroVec<'static, T> { + type Output = ZeroVec<'a, T>; + #[inline] + fn transform(&'a self) -> &'a Self::Output { + self + } + #[inline] + fn transform_owned(self) -> Self::Output { + self + } + #[inline] + unsafe fn make(from: Self::Output) -> Self { + debug_assert!(mem::size_of::<Self::Output>() == mem::size_of::<Self>()); + let from = mem::ManuallyDrop::new(from); + let ptr: *const Self = (&*from as *const Self::Output).cast(); + ptr::read(ptr) + } + #[inline] + fn transform_mut<F>(&'a mut self, f: F) + where + F: 'static + for<'b> FnOnce(&'b mut Self::Output), + { + unsafe { f(mem::transmute::<&mut Self, &mut Self::Output>(self)) } + } +} + +// This impl is similar to the impl on Cow and is safe for the same reasons +/// This impl requires enabling the optional `yoke` Cargo feature of the `zerovec` crate +unsafe impl<'a, T: 'static + VarULE + ?Sized> Yokeable<'a> for VarZeroVec<'static, T> { + type Output = VarZeroVec<'a, T>; + #[inline] + fn transform(&'a self) -> &'a Self::Output { + self + } + #[inline] + fn transform_owned(self) -> Self::Output { + self + } + #[inline] + unsafe fn make(from: Self::Output) -> Self { + debug_assert!(mem::size_of::<Self::Output>() == mem::size_of::<Self>()); + let from = mem::ManuallyDrop::new(from); + let ptr: *const Self = (&*from as *const Self::Output).cast(); + ptr::read(ptr) + } + #[inline] + fn transform_mut<F>(&'a mut self, f: F) + where + F: 'static + for<'b> FnOnce(&'b mut Self::Output), + { + unsafe { f(mem::transmute::<&mut Self, &mut Self::Output>(self)) } + } +} + +// This impl is similar to the impl on Cow and is safe for the same reasons +/// This impl requires enabling the optional `yoke` Cargo feature of the `zerovec` crate +unsafe impl<'a> Yokeable<'a> for FlexZeroVec<'static> { + type Output = FlexZeroVec<'a>; + #[inline] + fn transform(&'a self) -> &'a Self::Output { + self + } + #[inline] + fn transform_owned(self) -> Self::Output { + self + } + #[inline] + unsafe fn make(from: Self::Output) -> Self { + debug_assert!(mem::size_of::<Self::Output>() == mem::size_of::<Self>()); + let from = mem::ManuallyDrop::new(from); + let ptr: *const Self = (&*from as *const Self::Output).cast(); + ptr::read(ptr) + } + #[inline] + fn transform_mut<F>(&'a mut self, f: F) + where + F: 'static + for<'b> FnOnce(&'b mut Self::Output), + { + unsafe { f(mem::transmute::<&mut Self, &mut Self::Output>(self)) } + } +} + +/// This impl requires enabling the optional `yoke` Cargo feature of the `zerovec` crate +#[allow(clippy::transmute_ptr_to_ptr)] +unsafe impl<'a, K, V> Yokeable<'a> for ZeroMap<'static, K, V> +where + K: 'static + for<'b> ZeroMapKV<'b> + ?Sized, + V: 'static + for<'b> ZeroMapKV<'b> + ?Sized, + <K as ZeroMapKV<'static>>::Container: for<'b> Yokeable<'b>, + <V as ZeroMapKV<'static>>::Container: for<'b> Yokeable<'b>, +{ + type Output = ZeroMap<'a, K, V>; + #[inline] + fn transform(&'a self) -> &'a Self::Output { + unsafe { + // Unfortunately, because K and V are generic, rustc is + // unaware that these are covariant types, and cannot perform this cast automatically. + // We transmute it instead, and enforce the lack of a lifetime with the `K, V: 'static` bound + mem::transmute::<&Self, &Self::Output>(self) + } + } + #[inline] + fn transform_owned(self) -> Self::Output { + debug_assert!(mem::size_of::<Self::Output>() == mem::size_of::<Self>()); + unsafe { + // Similar problem as transform(), but we need to use ptr::read since + // the compiler isn't sure of the sizes + let this = mem::ManuallyDrop::new(self); + let ptr: *const Self::Output = (&*this as *const Self).cast(); + ptr::read(ptr) + } + } + #[inline] + unsafe fn make(from: Self::Output) -> Self { + debug_assert!(mem::size_of::<Self::Output>() == mem::size_of::<Self>()); + let from = mem::ManuallyDrop::new(from); + let ptr: *const Self = (&*from as *const Self::Output).cast(); + ptr::read(ptr) + } + #[inline] + fn transform_mut<F>(&'a mut self, f: F) + where + F: 'static + for<'b> FnOnce(&'b mut Self::Output), + { + unsafe { f(mem::transmute::<&mut Self, &mut Self::Output>(self)) } + } +} + +/// This impl requires enabling the optional `yoke` Cargo feature of the `zerovec` crate +#[allow(clippy::transmute_ptr_to_ptr)] +unsafe impl<'a, K, V> Yokeable<'a> for ZeroMapBorrowed<'static, K, V> +where + K: 'static + for<'b> ZeroMapKV<'b> + ?Sized, + V: 'static + for<'b> ZeroMapKV<'b> + ?Sized, + &'static <K as ZeroMapKV<'static>>::Slice: for<'b> Yokeable<'b>, + &'static <V as ZeroMapKV<'static>>::Slice: for<'b> Yokeable<'b>, +{ + type Output = ZeroMapBorrowed<'a, K, V>; + #[inline] + fn transform(&'a self) -> &'a Self::Output { + unsafe { + // Unfortunately, because K and V are generic, rustc is + // unaware that these are covariant types, and cannot perform this cast automatically. + // We transmute it instead, and enforce the lack of a lifetime with the `K, V: 'static` bound + mem::transmute::<&Self, &Self::Output>(self) + } + } + #[inline] + fn transform_owned(self) -> Self::Output { + debug_assert!(mem::size_of::<Self::Output>() == mem::size_of::<Self>()); + unsafe { + // Similar problem as transform(), but we need to use ptr::read since + // the compiler isn't sure of the sizes + let this = mem::ManuallyDrop::new(self); + let ptr: *const Self::Output = (&*this as *const Self).cast(); + ptr::read(ptr) + } + } + #[inline] + unsafe fn make(from: Self::Output) -> Self { + debug_assert!(mem::size_of::<Self::Output>() == mem::size_of::<Self>()); + let from = mem::ManuallyDrop::new(from); + let ptr: *const Self = (&*from as *const Self::Output).cast(); + ptr::read(ptr) + } + #[inline] + fn transform_mut<F>(&'a mut self, f: F) + where + F: 'static + for<'b> FnOnce(&'b mut Self::Output), + { + unsafe { f(mem::transmute::<&mut Self, &mut Self::Output>(self)) } + } +} + +/// This impl requires enabling the optional `yoke` Cargo feature of the `zerovec` crate +#[allow(clippy::transmute_ptr_to_ptr)] +unsafe impl<'a, K0, K1, V> Yokeable<'a> for ZeroMap2d<'static, K0, K1, V> +where + K0: 'static + for<'b> ZeroMapKV<'b> + ?Sized, + K1: 'static + for<'b> ZeroMapKV<'b> + ?Sized, + V: 'static + for<'b> ZeroMapKV<'b> + ?Sized, + <K0 as ZeroMapKV<'static>>::Container: for<'b> Yokeable<'b>, + <K1 as ZeroMapKV<'static>>::Container: for<'b> Yokeable<'b>, + <V as ZeroMapKV<'static>>::Container: for<'b> Yokeable<'b>, +{ + type Output = ZeroMap2d<'a, K0, K1, V>; + #[inline] + fn transform(&'a self) -> &'a Self::Output { + unsafe { + // Unfortunately, because K and V are generic, rustc is + // unaware that these are covariant types, and cannot perform this cast automatically. + // We transmute it instead, and enforce the lack of a lifetime with the `K0, K1, V: 'static` bound + mem::transmute::<&Self, &Self::Output>(self) + } + } + #[inline] + fn transform_owned(self) -> Self::Output { + debug_assert!(mem::size_of::<Self::Output>() == mem::size_of::<Self>()); + unsafe { + // Similar problem as transform(), but we need to use ptr::read since + // the compiler isn't sure of the sizes + let this = mem::ManuallyDrop::new(self); + let ptr: *const Self::Output = (&*this as *const Self).cast(); + ptr::read(ptr) + } + } + #[inline] + unsafe fn make(from: Self::Output) -> Self { + debug_assert!(mem::size_of::<Self::Output>() == mem::size_of::<Self>()); + let from = mem::ManuallyDrop::new(from); + let ptr: *const Self = (&*from as *const Self::Output).cast(); + ptr::read(ptr) + } + #[inline] + fn transform_mut<F>(&'a mut self, f: F) + where + F: 'static + for<'b> FnOnce(&'b mut Self::Output), + { + unsafe { f(mem::transmute::<&mut Self, &mut Self::Output>(self)) } + } +} + +/// This impl requires enabling the optional `yoke` Cargo feature of the `zerovec` crate +#[allow(clippy::transmute_ptr_to_ptr)] +unsafe impl<'a, K0, K1, V> Yokeable<'a> for ZeroMap2dBorrowed<'static, K0, K1, V> +where + K0: 'static + for<'b> ZeroMapKV<'b> + ?Sized, + K1: 'static + for<'b> ZeroMapKV<'b> + ?Sized, + V: 'static + for<'b> ZeroMapKV<'b> + ?Sized, + &'static <K0 as ZeroMapKV<'static>>::Slice: for<'b> Yokeable<'b>, + &'static <K1 as ZeroMapKV<'static>>::Slice: for<'b> Yokeable<'b>, + &'static <V as ZeroMapKV<'static>>::Slice: for<'b> Yokeable<'b>, +{ + type Output = ZeroMap2dBorrowed<'a, K0, K1, V>; + #[inline] + fn transform(&'a self) -> &'a Self::Output { + unsafe { + // Unfortunately, because K and V are generic, rustc is + // unaware that these are covariant types, and cannot perform this cast automatically. + // We transmute it instead, and enforce the lack of a lifetime with the `K0, K1, V: 'static` bound + mem::transmute::<&Self, &Self::Output>(self) + } + } + #[inline] + fn transform_owned(self) -> Self::Output { + debug_assert!(mem::size_of::<Self::Output>() == mem::size_of::<Self>()); + unsafe { + // Similar problem as transform(), but we need to use ptr::read since + // the compiler isn't sure of the sizes + let this = mem::ManuallyDrop::new(self); + let ptr: *const Self::Output = (&*this as *const Self).cast(); + ptr::read(ptr) + } + } + #[inline] + unsafe fn make(from: Self::Output) -> Self { + debug_assert!(mem::size_of::<Self::Output>() == mem::size_of::<Self>()); + let from = mem::ManuallyDrop::new(from); + let ptr: *const Self = (&*from as *const Self::Output).cast(); + ptr::read(ptr) + } + #[inline] + fn transform_mut<F>(&'a mut self, f: F) + where + F: 'static + for<'b> FnOnce(&'b mut Self::Output), + { + unsafe { f(mem::transmute::<&mut Self, &mut Self::Output>(self)) } + } +} + +#[cfg(test)] +#[allow(non_camel_case_types, non_snake_case)] +mod test { + use super::*; + use crate::{vecs::FlexZeroSlice, VarZeroSlice, ZeroSlice}; + use databake::*; + + // Note: The following derives cover Yoke as well as Serde and databake. These may partially + // duplicate tests elsewhere in this crate, but they are here for completeness. + + #[derive(yoke::Yokeable, zerofrom::ZeroFrom)] + #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] + #[cfg_attr(feature = "databake", derive(databake::Bake), databake(path = zerovec::yoke_impls::test))] + struct DeriveTest_ZeroVec<'data> { + #[cfg_attr(feature = "serde", serde(borrow))] + _data: ZeroVec<'data, u16>, + } + + #[test] + #[ignore] // https://github.com/rust-lang/rust/issues/98906 + fn bake_ZeroVec() { + test_bake!( + DeriveTest_ZeroVec<'static>, + crate::yoke_impls::test::DeriveTest_ZeroVec { + _data: crate::ZeroVec::new(), + }, + zerovec, + ); + } + + #[derive(yoke::Yokeable)] + #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] + #[cfg_attr(feature = "databake", derive(databake::Bake), databake(path = zerovec::yoke_impls::test))] + struct DeriveTest_ZeroSlice<'data> { + #[cfg_attr(feature = "serde", serde(borrow))] + _data: &'data ZeroSlice<u16>, + } + + #[test] + fn bake_ZeroSlice() { + test_bake!( + DeriveTest_ZeroSlice<'static>, + crate::yoke_impls::test::DeriveTest_ZeroSlice { + _data: crate::ZeroSlice::new_empty(), + }, + zerovec, + ); + } + + #[derive(yoke::Yokeable, zerofrom::ZeroFrom)] + #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] + #[cfg_attr(feature = "databake", derive(databake::Bake), databake(path = zerovec::yoke_impls::test))] + struct DeriveTest_FlexZeroVec<'data> { + #[cfg_attr(feature = "serde", serde(borrow))] + _data: FlexZeroVec<'data>, + } + + #[test] + fn bake_FlexZeroVec() { + test_bake!( + DeriveTest_FlexZeroVec<'static>, + crate::yoke_impls::test::DeriveTest_FlexZeroVec { + _data: crate::vecs::FlexZeroVec::new(), + }, + zerovec, + ); + } + + #[derive(yoke::Yokeable)] + #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] + #[cfg_attr(feature = "databake", derive(databake::Bake), databake(path = zerovec::yoke_impls::test))] + struct DeriveTest_FlexZeroSlice<'data> { + #[cfg_attr(feature = "serde", serde(borrow))] + _data: &'data FlexZeroSlice, + } + + #[test] + fn bake_FlexZeroSlice() { + test_bake!( + DeriveTest_FlexZeroSlice<'static>, + crate::yoke_impls::test::DeriveTest_FlexZeroSlice { + _data: unsafe { crate::vecs::FlexZeroSlice::from_byte_slice_unchecked(b"\x01\0") }, + }, + zerovec, + ); + } + + #[derive(yoke::Yokeable, zerofrom::ZeroFrom)] + #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] + #[cfg_attr(feature = "databake", derive(databake::Bake), databake(path = zerovec::yoke_impls::test))] + struct DeriveTest_VarZeroVec<'data> { + #[cfg_attr(feature = "serde", serde(borrow))] + _data: VarZeroVec<'data, str>, + } + + #[test] + fn bake_VarZeroVec() { + test_bake!( + DeriveTest_VarZeroVec<'static>, + crate::yoke_impls::test::DeriveTest_VarZeroVec { + _data: crate::VarZeroVec::new(), + }, + zerovec, + ); + } + + #[derive(yoke::Yokeable)] + #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] + #[cfg_attr(feature = "databake", derive(databake::Bake), databake(path = zerovec::yoke_impls::test))] + struct DeriveTest_VarZeroSlice<'data> { + #[cfg_attr(feature = "serde", serde(borrow))] + _data: &'data VarZeroSlice<str>, + } + + #[test] + fn bake_VarZeroSlice() { + test_bake!( + DeriveTest_VarZeroSlice<'static>, + crate::yoke_impls::test::DeriveTest_VarZeroSlice { + _data: crate::VarZeroSlice::new_empty() + }, + zerovec, + ); + } + + #[derive(yoke::Yokeable, zerofrom::ZeroFrom)] + #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] + #[cfg_attr(feature = "databake", derive(databake::Bake), databake(path = zerovec::yoke_impls::test))] + #[yoke(prove_covariance_manually)] + struct DeriveTest_ZeroMap<'data> { + #[cfg_attr(feature = "serde", serde(borrow))] + _data: ZeroMap<'data, [u8], str>, + } + + #[test] + fn bake_ZeroMap() { + test_bake!( + DeriveTest_ZeroMap<'static>, + crate::yoke_impls::test::DeriveTest_ZeroMap { + _data: unsafe { + #[allow(unused_unsafe)] + crate::ZeroMap::from_parts_unchecked( + crate::VarZeroVec::new(), + crate::VarZeroVec::new(), + ) + }, + }, + zerovec, + ); + } + + #[derive(yoke::Yokeable)] + #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] + #[cfg_attr(feature = "databake", derive(databake::Bake), databake(path = zerovec::yoke_impls::test))] + #[yoke(prove_covariance_manually)] + struct DeriveTest_ZeroMapBorrowed<'data> { + #[cfg_attr(feature = "serde", serde(borrow))] + _data: ZeroMapBorrowed<'data, [u8], str>, + } + + #[test] + fn bake_ZeroMapBorrowed() { + test_bake!( + DeriveTest_ZeroMapBorrowed<'static>, + crate::yoke_impls::test::DeriveTest_ZeroMapBorrowed { + _data: unsafe { + #[allow(unused_unsafe)] + crate::maps::ZeroMapBorrowed::from_parts_unchecked( + crate::VarZeroSlice::new_empty(), + crate::VarZeroSlice::new_empty(), + ) + }, + }, + zerovec, + ); + } + + #[derive(yoke::Yokeable, zerofrom::ZeroFrom)] + #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] + #[cfg_attr(feature = "databake", derive(databake::Bake), databake(path = zerovec::yoke_impls::test))] + #[yoke(prove_covariance_manually)] + struct DeriveTest_ZeroMapWithULE<'data> { + #[cfg_attr(feature = "serde", serde(borrow))] + _data: ZeroMap<'data, ZeroSlice<u32>, str>, + } + + #[test] + fn bake_ZeroMapWithULE() { + test_bake!( + DeriveTest_ZeroMapWithULE<'static>, + crate::yoke_impls::test::DeriveTest_ZeroMapWithULE { + _data: unsafe { + #[allow(unused_unsafe)] + crate::ZeroMap::from_parts_unchecked( + crate::VarZeroVec::new(), + crate::VarZeroVec::new(), + ) + }, + }, + zerovec, + ); + } + + #[derive(yoke::Yokeable, zerofrom::ZeroFrom)] + #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] + #[cfg_attr(feature = "databake", derive(databake::Bake), databake(path = zerovec::yoke_impls::test))] + #[yoke(prove_covariance_manually)] + struct DeriveTest_ZeroMap2d<'data> { + #[cfg_attr(feature = "serde", serde(borrow))] + _data: ZeroMap2d<'data, u16, u16, str>, + } + + #[test] + fn bake_ZeroMap2d() { + test_bake!( + DeriveTest_ZeroMap2d<'static>, + crate::yoke_impls::test::DeriveTest_ZeroMap2d { + _data: unsafe { + #[allow(unused_unsafe)] + crate::ZeroMap2d::from_parts_unchecked( + crate::ZeroVec::new(), + crate::ZeroVec::new(), + crate::ZeroVec::new(), + crate::VarZeroVec::new(), + ) + }, + }, + zerovec, + ); + } + + #[derive(yoke::Yokeable)] + #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] + #[cfg_attr(feature = "databake", derive(databake::Bake), databake(path = zerovec::yoke_impls::test))] + #[yoke(prove_covariance_manually)] + struct DeriveTest_ZeroMap2dBorrowed<'data> { + #[cfg_attr(feature = "serde", serde(borrow))] + _data: ZeroMap2dBorrowed<'data, u16, u16, str>, + } + + #[test] + fn bake_ZeroMap2dBorrowed() { + test_bake!( + DeriveTest_ZeroMap2dBorrowed<'static>, + crate::yoke_impls::test::DeriveTest_ZeroMap2dBorrowed { + _data: unsafe { + #[allow(unused_unsafe)] + crate::maps::ZeroMap2dBorrowed::from_parts_unchecked( + crate::ZeroSlice::new_empty(), + crate::ZeroSlice::new_empty(), + crate::ZeroSlice::new_empty(), + crate::VarZeroSlice::new_empty(), + ) + }, + }, + zerovec, + ); + } +} diff --git a/third_party/rust/zerovec/src/zerofrom_impls.rs b/third_party/rust/zerovec/src/zerofrom_impls.rs new file mode 100644 index 0000000000..d17e432c4f --- /dev/null +++ b/third_party/rust/zerovec/src/zerofrom_impls.rs @@ -0,0 +1,124 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::map::ZeroMapKV; +use crate::ule::*; +use crate::vecs::{FlexZeroSlice, FlexZeroVec}; +use crate::{VarZeroSlice, VarZeroVec, ZeroMap, ZeroMap2d, ZeroSlice, ZeroVec}; +use zerofrom::ZeroFrom; + +impl<'zf, T> ZeroFrom<'zf, ZeroVec<'_, T>> for ZeroVec<'zf, T> +where + T: 'static + AsULE + ?Sized, +{ + #[inline] + fn zero_from(other: &'zf ZeroVec<'_, T>) -> Self { + ZeroVec::new_borrowed(other.as_ule_slice()) + } +} + +impl<'zf, T> ZeroFrom<'zf, ZeroSlice<T>> for ZeroVec<'zf, T> +where + T: 'static + AsULE + ?Sized, +{ + #[inline] + fn zero_from(other: &'zf ZeroSlice<T>) -> Self { + ZeroVec::new_borrowed(other.as_ule_slice()) + } +} + +impl<'zf, T> ZeroFrom<'zf, ZeroSlice<T>> for &'zf ZeroSlice<T> +where + T: 'static + AsULE + ?Sized, +{ + #[inline] + fn zero_from(other: &'zf ZeroSlice<T>) -> Self { + other + } +} + +impl<'zf> ZeroFrom<'zf, FlexZeroVec<'_>> for FlexZeroVec<'zf> { + #[inline] + fn zero_from(other: &'zf FlexZeroVec<'_>) -> Self { + FlexZeroVec::Borrowed(other) + } +} + +impl<'zf> ZeroFrom<'zf, FlexZeroSlice> for FlexZeroVec<'zf> { + #[inline] + fn zero_from(other: &'zf FlexZeroSlice) -> Self { + FlexZeroVec::Borrowed(other) + } +} + +impl<'zf> ZeroFrom<'zf, FlexZeroSlice> for &'zf FlexZeroSlice { + #[inline] + fn zero_from(other: &'zf FlexZeroSlice) -> Self { + other + } +} + +impl<'zf, T> ZeroFrom<'zf, VarZeroSlice<T>> for VarZeroVec<'zf, T> +where + T: 'static + VarULE + ?Sized, +{ + #[inline] + fn zero_from(other: &'zf VarZeroSlice<T>) -> Self { + other.into() + } +} + +impl<'zf, T> ZeroFrom<'zf, VarZeroVec<'_, T>> for VarZeroVec<'zf, T> +where + T: 'static + VarULE + ?Sized, +{ + #[inline] + fn zero_from(other: &'zf VarZeroVec<'_, T>) -> Self { + other.as_slice().into() + } +} + +impl<'zf, T> ZeroFrom<'zf, VarZeroSlice<T>> for &'zf VarZeroSlice<T> +where + T: 'static + VarULE + ?Sized, +{ + #[inline] + fn zero_from(other: &'zf VarZeroSlice<T>) -> Self { + other + } +} + +impl<'zf, 's, K, V> ZeroFrom<'zf, ZeroMap<'s, K, V>> for ZeroMap<'zf, K, V> +where + K: 'static + for<'b> ZeroMapKV<'b> + ?Sized, + V: 'static + for<'b> ZeroMapKV<'b> + ?Sized, + <K as ZeroMapKV<'zf>>::Container: ZeroFrom<'zf, <K as ZeroMapKV<'s>>::Container>, + <V as ZeroMapKV<'zf>>::Container: ZeroFrom<'zf, <V as ZeroMapKV<'s>>::Container>, +{ + fn zero_from(other: &'zf ZeroMap<'s, K, V>) -> Self { + ZeroMap { + keys: K::Container::zero_from(&other.keys), + values: V::Container::zero_from(&other.values), + } + } +} + +impl<'zf, 's, K0, K1, V> ZeroFrom<'zf, ZeroMap2d<'s, K0, K1, V>> for ZeroMap2d<'zf, K0, K1, V> +where + K0: 'static + for<'b> ZeroMapKV<'b> + ?Sized, + K1: 'static + for<'b> ZeroMapKV<'b> + ?Sized, + V: 'static + for<'b> ZeroMapKV<'b> + ?Sized, + <K0 as ZeroMapKV<'zf>>::Container: ZeroFrom<'zf, <K0 as ZeroMapKV<'s>>::Container>, + <K1 as ZeroMapKV<'zf>>::Container: ZeroFrom<'zf, <K1 as ZeroMapKV<'s>>::Container>, + <V as ZeroMapKV<'zf>>::Container: ZeroFrom<'zf, <V as ZeroMapKV<'s>>::Container>, +{ + fn zero_from(other: &'zf ZeroMap2d<'s, K0, K1, V>) -> Self { + ZeroMap2d { + keys0: K0::Container::zero_from(&other.keys0), + joiner: ZeroVec::zero_from(&other.joiner), + keys1: K1::Container::zero_from(&other.keys1), + values: V::Container::zero_from(&other.values), + } + } +} diff --git a/third_party/rust/zerovec/src/zerovec/databake.rs b/third_party/rust/zerovec/src/zerovec/databake.rs new file mode 100644 index 0000000000..31f1675946 --- /dev/null +++ b/third_party/rust/zerovec/src/zerovec/databake.rs @@ -0,0 +1,69 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use super::ZeroVec; +use crate::{ule::AsULE, ZeroSlice}; +use databake::*; + +impl<T> Bake for ZeroVec<'_, T> +where + T: AsULE + ?Sized + Bake, +{ + fn bake(&self, env: &CrateEnv) -> TokenStream { + env.insert("zerovec"); + if self.is_empty() { + quote! { zerovec::ZeroVec::new() } + } else { + let bytes = databake::Bake::bake(&self.as_bytes(), env); + quote! { unsafe { zerovec::ZeroVec::from_bytes_unchecked(#bytes) } } + } + } +} + +impl<T> Bake for &ZeroSlice<T> +where + T: AsULE + ?Sized, +{ + fn bake(&self, env: &CrateEnv) -> TokenStream { + env.insert("zerovec"); + if self.is_empty() { + quote! { zerovec::ZeroSlice::new_empty() } + } else { + let bytes = databake::Bake::bake(&self.as_bytes(), env); + quote! { unsafe { zerovec::ZeroSlice::from_bytes_unchecked(#bytes) } } + } + } +} + +#[test] +fn test_baked_vec() { + test_bake!( + ZeroVec<u32>, + const: crate::ZeroVec::new(), + zerovec + ); + test_bake!( + ZeroVec<u32>, + const: unsafe { + crate::ZeroVec::from_bytes_unchecked(b"\x02\x01\0\x16\0M\x01\\") + }, + zerovec + ); +} + +#[test] +fn test_baked_slice() { + test_bake!( + &ZeroSlice<u32>, + const: crate::ZeroSlice::new_empty(), + zerovec + ); + test_bake!( + &ZeroSlice<u32>, + const: unsafe { + crate::ZeroSlice::from_bytes_unchecked(b"\x02\x01\0\x16\0M\x01\\") + }, + zerovec + ); +} diff --git a/third_party/rust/zerovec/src/zerovec/mod.rs b/third_party/rust/zerovec/src/zerovec/mod.rs new file mode 100644 index 0000000000..e6186be0a2 --- /dev/null +++ b/third_party/rust/zerovec/src/zerovec/mod.rs @@ -0,0 +1,1137 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +#[cfg(feature = "databake")] +mod databake; + +#[cfg(feature = "serde")] +mod serde; + +mod slice; + +pub use slice::ZeroSlice; + +use crate::ule::*; +use alloc::borrow::Cow; +use alloc::vec::Vec; +use core::cmp::{Ord, Ordering, PartialOrd}; +use core::fmt; +use core::iter::FromIterator; +use core::marker::PhantomData; +use core::mem; +use core::num::NonZeroUsize; +use core::ops::Deref; +use core::ptr; + +/// A zero-copy, byte-aligned vector for fixed-width types. +/// +/// `ZeroVec<T>` is designed as a drop-in replacement for `Vec<T>` in situations where it is +/// desirable to borrow data from an unaligned byte slice, such as zero-copy deserialization. +/// +/// `T` must implement [`AsULE`], which is auto-implemented for a number of built-in types, +/// including all fixed-width multibyte integers. For variable-width types like [`str`], +/// see [`VarZeroVec`](crate::VarZeroVec). [`zerovec::make_ule`](crate::make_ule) may +/// be used to automatically implement [`AsULE`] for a type and generate the underlying [`ULE`] type. +/// +/// Typically, the zero-copy equivalent of a `Vec<T>` will simply be `ZeroVec<'a, T>`. +/// +/// Most of the methods on `ZeroVec<'a, T>` come from its [`Deref`] implementation to [`ZeroSlice<T>`](ZeroSlice). +/// +/// For creating zero-copy vectors of fixed-size types, see [`VarZeroVec`](crate::VarZeroVec). +/// +/// `ZeroVec<T>` behaves much like [`Cow`](alloc::borrow::Cow), where it can be constructed from +/// owned data (and then mutated!) but can also borrow from some buffer. +/// +/// # Example +/// +/// ``` +/// use zerovec::ZeroVec; +/// +/// // The little-endian bytes correspond to the numbers on the following line. +/// let nums: &[u16] = &[211, 281, 421, 461]; +/// +/// #[derive(serde::Serialize, serde::Deserialize)] +/// struct Data<'a> { +/// #[serde(borrow)] +/// nums: ZeroVec<'a, u16>, +/// } +/// +/// // The owned version will allocate +/// let data = Data { +/// nums: ZeroVec::alloc_from_slice(nums), +/// }; +/// let bincode_bytes = +/// bincode::serialize(&data).expect("Serialization should be successful"); +/// +/// // Will deserialize without allocations +/// let deserialized: Data = bincode::deserialize(&bincode_bytes) +/// .expect("Deserialization should be successful"); +/// +/// // This deserializes without allocation! +/// assert!(!deserialized.nums.is_owned()); +/// assert_eq!(deserialized.nums.get(2), Some(421)); +/// assert_eq!(deserialized.nums, nums); +/// ``` +/// +/// [`ule`]: crate::ule +/// +/// # How it Works +/// +/// `ZeroVec<T>` represents a slice of `T` as a slice of `T::ULE`. The difference between `T` and +/// `T::ULE` is that `T::ULE` must be encoded in little-endian with 1-byte alignment. When accessing +/// items from `ZeroVec<T>`, we fetch the `T::ULE`, convert it on the fly to `T`, and return `T` by +/// value. +/// +/// Benchmarks can be found in the project repository, with some results found in the [crate-level documentation](crate). +/// +/// See [the design doc](https://github.com/unicode-org/icu4x/blob/main/utils/zerovec/design_doc.md) for more details. +pub struct ZeroVec<'a, T> +where + T: AsULE, +{ + vector: EyepatchHackVector<T::ULE>, + + /// Marker type, signalling variance and dropck behavior + /// by containing all potential types this type represents + #[allow(clippy::type_complexity)] // needed to get correct marker type behavior + marker: PhantomData<(Vec<T::ULE>, &'a [T::ULE])>, +} + +// Send inherits as long as all fields are Send, but also references are Send only +// when their contents are Sync (this is the core purpose of Sync), so +// we need a Send+Sync bound since this struct can logically be a vector or a slice. +unsafe impl<'a, T: AsULE> Send for ZeroVec<'a, T> where T::ULE: Send + Sync {} +// Sync typically inherits as long as all fields are Sync +unsafe impl<'a, T: AsULE> Sync for ZeroVec<'a, T> where T::ULE: Sync {} + +impl<'a, T: AsULE> Deref for ZeroVec<'a, T> { + type Target = ZeroSlice<T>; + #[inline] + fn deref(&self) -> &Self::Target { + let slice: &[T::ULE] = self.vector.as_slice(); + ZeroSlice::from_ule_slice(slice) + } +} + +// Represents an unsafe potentially-owned vector/slice type, without a lifetime +// working around dropck limitations. +// +// Must either be constructed by deconstructing a Vec<U>, or from &[U] with capacity set to +// zero. Should not outlive its source &[U] in the borrowed case; this type does not in +// and of itself uphold this guarantee, but the .as_slice() method assumes it. +// +// After https://github.com/rust-lang/rust/issues/34761 stabilizes, +// we should remove this type and use #[may_dangle] +struct EyepatchHackVector<U> { + /// Pointer to data + /// This pointer is *always* valid, the reason it is represented as a raw pointer + /// is that it may logically represent an `&[T::ULE]` or the ptr,len of a `Vec<T::ULE>` + buf: *mut [U], + /// Borrowed if zero. Capacity of buffer above if not + capacity: usize, +} + +impl<U> EyepatchHackVector<U> { + // Return a slice to the inner data for an arbitrary caller-specified lifetime + #[inline] + unsafe fn as_arbitrary_slice<'a>(&self) -> &'a [U] { + &*self.buf + } + // Return a slice to the inner data + #[inline] + const fn as_slice<'a>(&'a self) -> &'a [U] { + unsafe { &*(self.buf as *const [U]) } + } + + /// Return this type as a vector + /// + /// Data MUST be known to be owned beforehand + /// + /// Because this borrows self, this is effectively creating two owners to the same + /// data, make sure that `self` is cleaned up after this + /// + /// (this does not simply take `self` since then it wouldn't be usable from the Drop impl) + unsafe fn get_vec(&self) -> Vec<U> { + debug_assert!(self.capacity != 0); + let slice: &[U] = self.as_slice(); + let len = slice.len(); + // Safety: we are assuming owned, and in owned cases + // this always represents a valid vector + Vec::from_raw_parts(self.buf as *mut U, len, self.capacity) + } +} + +impl<U> Drop for EyepatchHackVector<U> { + #[inline] + fn drop(&mut self) { + if self.capacity != 0 { + unsafe { + // we don't need to clean up self here since we're already in a Drop impl + let _ = self.get_vec(); + } + } + } +} + +impl<'a, T: AsULE> Clone for ZeroVec<'a, T> { + fn clone(&self) -> Self { + if self.is_owned() { + ZeroVec::new_owned(self.as_ule_slice().into()) + } else { + Self { + vector: EyepatchHackVector { + buf: self.vector.buf, + capacity: 0, + }, + marker: PhantomData, + } + } + } +} + +impl<'a, T: AsULE> AsRef<ZeroSlice<T>> for ZeroVec<'a, T> { + fn as_ref(&self) -> &ZeroSlice<T> { + self.deref() + } +} + +impl<T> fmt::Debug for ZeroVec<'_, T> +where + T: AsULE + fmt::Debug, +{ + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "ZeroVec({:?})", self.to_vec()) + } +} + +impl<T> Eq for ZeroVec<'_, T> where T: AsULE + Eq + ?Sized {} + +impl<'a, 'b, T> PartialEq<ZeroVec<'b, T>> for ZeroVec<'a, T> +where + T: AsULE + PartialEq + ?Sized, +{ + #[inline] + fn eq(&self, other: &ZeroVec<'b, T>) -> bool { + // Note: T implements PartialEq but not T::ULE + self.iter().eq(other.iter()) + } +} + +impl<T> PartialEq<&[T]> for ZeroVec<'_, T> +where + T: AsULE + PartialEq + ?Sized, +{ + #[inline] + fn eq(&self, other: &&[T]) -> bool { + self.iter().eq(other.iter().copied()) + } +} + +impl<T, const N: usize> PartialEq<[T; N]> for ZeroVec<'_, T> +where + T: AsULE + PartialEq + ?Sized, +{ + #[inline] + fn eq(&self, other: &[T; N]) -> bool { + self.iter().eq(other.iter().copied()) + } +} + +impl<'a, T: AsULE> Default for ZeroVec<'a, T> { + #[inline] + fn default() -> Self { + Self::new() + } +} + +impl<'a, T: AsULE + PartialOrd> PartialOrd for ZeroVec<'a, T> { + fn partial_cmp(&self, other: &Self) -> Option<Ordering> { + self.iter().partial_cmp(other.iter()) + } +} + +impl<'a, T: AsULE + Ord> Ord for ZeroVec<'a, T> { + fn cmp(&self, other: &Self) -> Ordering { + self.iter().cmp(other.iter()) + } +} + +impl<'a, T: AsULE> AsRef<[T::ULE]> for ZeroVec<'a, T> { + fn as_ref(&self) -> &[T::ULE] { + self.as_ule_slice() + } +} + +impl<'a, T: AsULE> From<&'a [T::ULE]> for ZeroVec<'a, T> { + fn from(other: &'a [T::ULE]) -> Self { + ZeroVec::new_borrowed(other) + } +} + +impl<'a, T: AsULE> From<Vec<T::ULE>> for ZeroVec<'a, T> { + fn from(other: Vec<T::ULE>) -> Self { + ZeroVec::new_owned(other) + } +} + +impl<'a, T> ZeroVec<'a, T> +where + T: AsULE + ?Sized, +{ + /// Creates a new, borrowed, empty `ZeroVec<T>`. + /// + /// # Examples + /// + /// ``` + /// use zerovec::ZeroVec; + /// + /// let zv: ZeroVec<u16> = ZeroVec::new(); + /// assert!(zv.is_empty()); + /// ``` + #[inline] + pub const fn new() -> Self { + Self::new_borrowed(&[]) + } + + /// Same as `ZeroSlice::len`, which is available through `Deref` and not `const`. + pub const fn const_len(&self) -> usize { + self.vector.as_slice().len() + } + + /// Creates a new owned `ZeroVec` using an existing + /// allocated backing buffer + /// + /// If you have a slice of `&[T]`s, prefer using + /// [`Self::alloc_from_slice()`]. + #[inline] + pub fn new_owned(vec: Vec<T::ULE>) -> Self { + // Deconstruct the vector into parts + // This is the only part of the code that goes from Vec + // to ZeroVec, all other such operations should use this function + let capacity = vec.capacity(); + let len = vec.len(); + let ptr = mem::ManuallyDrop::new(vec).as_mut_ptr(); + let slice = ptr::slice_from_raw_parts_mut(ptr, len); + Self { + vector: EyepatchHackVector { + buf: slice, + capacity, + }, + marker: PhantomData, + } + } + + /// Creates a new borrowed `ZeroVec` using an existing + /// backing buffer + #[inline] + pub const fn new_borrowed(slice: &'a [T::ULE]) -> Self { + let slice = slice as *const [_] as *mut [_]; + Self { + vector: EyepatchHackVector { + buf: slice, + capacity: 0, + }, + marker: PhantomData, + } + } + + /// Creates a new, owned, empty `ZeroVec<T>`, with a certain capacity pre-allocated. + pub fn with_capacity(capacity: usize) -> Self { + Self::new_owned(Vec::with_capacity(capacity)) + } + + /// Parses a `&[u8]` buffer into a `ZeroVec<T>`. + /// + /// This function is infallible for built-in integer types, but fallible for other types, + /// such as `char`. For more information, see [`ULE::parse_byte_slice`]. + /// + /// The bytes within the byte buffer must remain constant for the life of the ZeroVec. + /// + /// # Endianness + /// + /// The byte buffer must be encoded in little-endian, even if running in a big-endian + /// environment. This ensures a consistent representation of data across platforms. + /// + /// # Example + /// + /// ``` + /// use zerovec::ZeroVec; + /// + /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; + /// let zerovec: ZeroVec<u16> = + /// ZeroVec::parse_byte_slice(bytes).expect("infallible"); + /// + /// assert!(!zerovec.is_owned()); + /// assert_eq!(zerovec.get(2), Some(421)); + /// ``` + pub fn parse_byte_slice(bytes: &'a [u8]) -> Result<Self, ZeroVecError> { + let slice: &'a [T::ULE] = T::ULE::parse_byte_slice(bytes)?; + Ok(Self::new_borrowed(slice)) + } + + /// Uses a `&[u8]` buffer as a `ZeroVec<T>` without any verification. + /// + /// # Safety + /// + /// `bytes` need to be an output from [`ZeroSlice::as_bytes()`]. + pub const unsafe fn from_bytes_unchecked(bytes: &'a [u8]) -> Self { + // &[u8] and &[T::ULE] are the same slice with different length metadata. + Self::new_borrowed(core::slice::from_raw_parts( + bytes.as_ptr() as *const T::ULE, + bytes.len() / core::mem::size_of::<T::ULE>(), + )) + } + + /// Converts a `ZeroVec<T>` into a `ZeroVec<u8>`, retaining the current ownership model. + /// + /// Note that the length of the ZeroVec may change. + /// + /// # Examples + /// + /// Convert a borrowed `ZeroVec`: + /// + /// ``` + /// use zerovec::ZeroVec; + /// + /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; + /// let zerovec: ZeroVec<u16> = + /// ZeroVec::parse_byte_slice(bytes).expect("infallible"); + /// let zv_bytes = zerovec.into_bytes(); + /// + /// assert!(!zv_bytes.is_owned()); + /// assert_eq!(zv_bytes.get(0), Some(0xD3)); + /// ``` + /// + /// Convert an owned `ZeroVec`: + /// + /// ``` + /// use zerovec::ZeroVec; + /// + /// let nums: &[u16] = &[211, 281, 421, 461]; + /// let zerovec = ZeroVec::alloc_from_slice(nums); + /// let zv_bytes = zerovec.into_bytes(); + /// + /// assert!(zv_bytes.is_owned()); + /// assert_eq!(zv_bytes.get(0), Some(0xD3)); + /// ``` + pub fn into_bytes(self) -> ZeroVec<'a, u8> { + match self.into_cow() { + Cow::Borrowed(slice) => { + let bytes: &'a [u8] = T::ULE::as_byte_slice(slice); + ZeroVec::new_borrowed(bytes) + } + Cow::Owned(vec) => { + let bytes = Vec::from(T::ULE::as_byte_slice(&vec)); + ZeroVec::new_owned(bytes) + } + } + } + + /// Casts a `ZeroVec<T>` to a compatible `ZeroVec<P>`. + /// + /// `T` and `P` are compatible if they have the same `ULE` representation. + /// + /// If the `ULE`s of `T` and `P` are different types but have the same size, + /// use [`Self::try_into_converted()`]. + /// + /// # Examples + /// + /// ``` + /// use zerovec::ZeroVec; + /// + /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x80]; + /// + /// let zerovec_u16: ZeroVec<u16> = + /// ZeroVec::parse_byte_slice(bytes).expect("infallible"); + /// assert_eq!(zerovec_u16.get(3), Some(32973)); + /// + /// let zerovec_i16: ZeroVec<i16> = zerovec_u16.cast(); + /// assert_eq!(zerovec_i16.get(3), Some(-32563)); + /// ``` + pub fn cast<P>(self) -> ZeroVec<'a, P> + where + P: AsULE<ULE = T::ULE>, + { + match self.into_cow() { + Cow::Owned(v) => ZeroVec::new_owned(v), + Cow::Borrowed(v) => ZeroVec::new_borrowed(v), + } + } + + /// Converts a `ZeroVec<T>` into a `ZeroVec<P>`, retaining the current ownership model. + /// + /// If `T` and `P` have the exact same `ULE`, use [`Self::cast()`]. + /// + /// # Panics + /// + /// Panics if `T::ULE` and `P::ULE` are not the same size. + /// + /// # Examples + /// + /// Convert a borrowed `ZeroVec`: + /// + /// ``` + /// use zerovec::ZeroVec; + /// + /// let bytes: &[u8] = &[0x7F, 0xF3, 0x01, 0x49, 0xF6, 0x01]; + /// let zv_char: ZeroVec<char> = + /// ZeroVec::parse_byte_slice(bytes).expect("valid code points"); + /// let zv_u8_3: ZeroVec<[u8; 3]> = + /// zv_char.try_into_converted().expect("infallible conversion"); + /// + /// assert!(!zv_u8_3.is_owned()); + /// assert_eq!(zv_u8_3.get(0), Some([0x7F, 0xF3, 0x01])); + /// ``` + /// + /// Convert an owned `ZeroVec`: + /// + /// ``` + /// use zerovec::ZeroVec; + /// + /// let chars: &[char] = &['🍿', '🙉']; + /// let zv_char = ZeroVec::alloc_from_slice(chars); + /// let zv_u8_3: ZeroVec<[u8; 3]> = + /// zv_char.try_into_converted().expect("length is divisible"); + /// + /// assert!(zv_u8_3.is_owned()); + /// assert_eq!(zv_u8_3.get(0), Some([0x7F, 0xF3, 0x01])); + /// ``` + /// + /// If the types are not the same size, we refuse to convert: + /// + /// ```should_panic + /// use zerovec::ZeroVec; + /// + /// let bytes: &[u8] = &[0x7F, 0xF3, 0x01, 0x49, 0xF6, 0x01]; + /// let zv_char: ZeroVec<char> = + /// ZeroVec::parse_byte_slice(bytes).expect("valid code points"); + /// + /// // Panics! mem::size_of::<char::ULE> != mem::size_of::<u16::ULE> + /// zv_char.try_into_converted::<u16>(); + /// ``` + /// + /// Instead, convert to bytes and then parse: + /// + /// ``` + /// use zerovec::ZeroVec; + /// + /// let bytes: &[u8] = &[0x7F, 0xF3, 0x01, 0x49, 0xF6, 0x01]; + /// let zv_char: ZeroVec<char> = + /// ZeroVec::parse_byte_slice(bytes).expect("valid code points"); + /// let zv_u16: ZeroVec<u16> = + /// zv_char.into_bytes().try_into_parsed().expect("infallible"); + /// + /// assert!(!zv_u16.is_owned()); + /// assert_eq!(zv_u16.get(0), Some(0xF37F)); + /// ``` + pub fn try_into_converted<P: AsULE>(self) -> Result<ZeroVec<'a, P>, ZeroVecError> { + assert_eq!( + core::mem::size_of::<<T as AsULE>::ULE>(), + core::mem::size_of::<<P as AsULE>::ULE>() + ); + match self.into_cow() { + Cow::Borrowed(old_slice) => { + let bytes: &'a [u8] = T::ULE::as_byte_slice(old_slice); + let new_slice = P::ULE::parse_byte_slice(bytes)?; + Ok(ZeroVec::new_borrowed(new_slice)) + } + Cow::Owned(old_vec) => { + let bytes: &[u8] = T::ULE::as_byte_slice(&old_vec); + P::ULE::validate_byte_slice(bytes)?; + // Feature "vec_into_raw_parts" is not yet stable (#65816). Polyfill: + let (ptr, len, cap) = { + // Take ownership of the pointer + let mut v = mem::ManuallyDrop::new(old_vec); + // Fetch the pointer, length, and capacity + (v.as_mut_ptr(), v.len(), v.capacity()) + }; + // Safety checklist for Vec::from_raw_parts: + // 1. ptr came from a Vec<T> + // 2. P and T are asserted above to be the same size + // 3. length is what it was before + // 4. capacity is what it was before + let new_vec = unsafe { + let ptr = ptr as *mut P::ULE; + Vec::from_raw_parts(ptr, len, cap) + }; + Ok(ZeroVec::new_owned(new_vec)) + } + } + } + + /// Check if this type is fully owned + #[inline] + pub fn is_owned(&self) -> bool { + self.vector.capacity != 0 + } + + /// If this is a borrowed ZeroVec, return it as a slice that covers + /// its lifetime parameter + #[inline] + pub fn as_maybe_borrowed(&self) -> Option<&'a ZeroSlice<T>> { + if self.is_owned() { + None + } else { + // We can extend the lifetime of the slice to 'a + // since we know it is borrowed + let ule_slice = unsafe { self.vector.as_arbitrary_slice() }; + Some(ZeroSlice::from_ule_slice(ule_slice)) + } + } + + /// If the ZeroVec is owned, returns the capacity of the vector. + /// + /// Otherwise, if the ZeroVec is borrowed, returns `None`. + /// + /// # Examples + /// + /// ``` + /// use zerovec::ZeroVec; + /// + /// let mut zv = ZeroVec::<u8>::new_borrowed(&[0, 1, 2, 3]); + /// assert!(!zv.is_owned()); + /// assert_eq!(zv.owned_capacity(), None); + /// + /// // Convert to owned without appending anything + /// zv.with_mut(|v| ()); + /// assert!(zv.is_owned()); + /// assert_eq!(zv.owned_capacity(), Some(4.try_into().unwrap())); + /// + /// // Double the size by appending + /// zv.with_mut(|v| v.push(0)); + /// assert!(zv.is_owned()); + /// assert_eq!(zv.owned_capacity(), Some(8.try_into().unwrap())); + /// ``` + #[inline] + pub fn owned_capacity(&self) -> Option<NonZeroUsize> { + NonZeroUsize::try_from(self.vector.capacity).ok() + } +} + +impl<'a> ZeroVec<'a, u8> { + /// Converts a `ZeroVec<u8>` into a `ZeroVec<T>`, retaining the current ownership model. + /// + /// Note that the length of the ZeroVec may change. + /// + /// # Examples + /// + /// Convert a borrowed `ZeroVec`: + /// + /// ``` + /// use zerovec::ZeroVec; + /// + /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; + /// let zv_bytes = ZeroVec::new_borrowed(bytes); + /// let zerovec: ZeroVec<u16> = zv_bytes.try_into_parsed().expect("infallible"); + /// + /// assert!(!zerovec.is_owned()); + /// assert_eq!(zerovec.get(0), Some(211)); + /// ``` + /// + /// Convert an owned `ZeroVec`: + /// + /// ``` + /// use zerovec::ZeroVec; + /// + /// let bytes: Vec<u8> = vec![0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; + /// let zv_bytes = ZeroVec::new_owned(bytes); + /// let zerovec: ZeroVec<u16> = zv_bytes.try_into_parsed().expect("infallible"); + /// + /// assert!(zerovec.is_owned()); + /// assert_eq!(zerovec.get(0), Some(211)); + /// ``` + pub fn try_into_parsed<T: AsULE>(self) -> Result<ZeroVec<'a, T>, ZeroVecError> { + match self.into_cow() { + Cow::Borrowed(bytes) => { + let slice: &'a [T::ULE] = T::ULE::parse_byte_slice(bytes)?; + Ok(ZeroVec::new_borrowed(slice)) + } + Cow::Owned(vec) => { + let slice = Vec::from(T::ULE::parse_byte_slice(&vec)?); + Ok(ZeroVec::new_owned(slice)) + } + } + } +} + +impl<'a, T> ZeroVec<'a, T> +where + T: AsULE, +{ + /// Creates a `ZeroVec<T>` from a `&[T]` by allocating memory. + /// + /// This function results in an `Owned` instance of `ZeroVec<T>`. + /// + /// # Example + /// + /// ``` + /// use zerovec::ZeroVec; + /// + /// // The little-endian bytes correspond to the numbers on the following line. + /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; + /// let nums: &[u16] = &[211, 281, 421, 461]; + /// + /// let zerovec = ZeroVec::alloc_from_slice(nums); + /// + /// assert!(zerovec.is_owned()); + /// assert_eq!(bytes, zerovec.as_bytes()); + /// ``` + #[inline] + pub fn alloc_from_slice(other: &[T]) -> Self { + Self::new_owned(other.iter().copied().map(T::to_unaligned).collect()) + } + + /// Creates a `Vec<T>` from a `ZeroVec<T>`. + /// + /// # Example + /// + /// ``` + /// use zerovec::ZeroVec; + /// + /// let nums: &[u16] = &[211, 281, 421, 461]; + /// let vec: Vec<u16> = ZeroVec::alloc_from_slice(nums).to_vec(); + /// + /// assert_eq!(nums, vec.as_slice()); + /// ``` + #[inline] + pub fn to_vec(&self) -> Vec<T> { + self.iter().collect() + } +} + +impl<'a, T> ZeroVec<'a, T> +where + T: EqULE, +{ + /// Attempts to create a `ZeroVec<'a, T>` from a `&'a [T]` by borrowing the argument. + /// + /// If this is not possible, such as on a big-endian platform, `None` is returned. + /// + /// # Example + /// + /// ``` + /// use zerovec::ZeroVec; + /// + /// // The little-endian bytes correspond to the numbers on the following line. + /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; + /// let nums: &[u16] = &[211, 281, 421, 461]; + /// + /// if let Some(zerovec) = ZeroVec::try_from_slice(nums) { + /// assert!(!zerovec.is_owned()); + /// assert_eq!(bytes, zerovec.as_bytes()); + /// } + /// ``` + #[inline] + pub fn try_from_slice(slice: &'a [T]) -> Option<Self> { + T::slice_to_unaligned(slice).map(|ule_slice| Self::new_borrowed(ule_slice)) + } + + /// Creates a `ZeroVec<'a, T>` from a `&'a [T]`, either by borrowing the argument or by + /// allocating a new vector. + /// + /// This is a cheap operation on little-endian platforms, falling back to a more expensive + /// operation on big-endian platforms. + /// + /// # Example + /// + /// ``` + /// use zerovec::ZeroVec; + /// + /// // The little-endian bytes correspond to the numbers on the following line. + /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; + /// let nums: &[u16] = &[211, 281, 421, 461]; + /// + /// let zerovec = ZeroVec::from_slice_or_alloc(nums); + /// + /// // Note: zerovec could be either borrowed or owned. + /// assert_eq!(bytes, zerovec.as_bytes()); + /// ``` + #[inline] + pub fn from_slice_or_alloc(slice: &'a [T]) -> Self { + Self::try_from_slice(slice).unwrap_or_else(|| Self::alloc_from_slice(slice)) + } +} + +impl<'a, T> ZeroVec<'a, T> +where + T: AsULE, +{ + /// Mutates each element according to a given function, meant to be + /// a more convenient version of calling `.iter_mut()` with + /// [`ZeroVec::with_mut()`] which serves fewer use cases. + /// + /// This will convert the ZeroVec into an owned ZeroVec if not already the case. + /// + /// # Example + /// + /// ``` + /// use zerovec::ule::AsULE; + /// use zerovec::ZeroVec; + /// + /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; + /// let mut zerovec: ZeroVec<u16> = + /// ZeroVec::parse_byte_slice(bytes).expect("infallible"); + /// + /// zerovec.for_each_mut(|item| *item += 1); + /// + /// assert_eq!(zerovec.to_vec(), &[212, 282, 422, 462]); + /// assert!(zerovec.is_owned()); + /// ``` + #[inline] + pub fn for_each_mut(&mut self, mut f: impl FnMut(&mut T)) { + self.to_mut_slice().iter_mut().for_each(|item| { + let mut aligned = T::from_unaligned(*item); + f(&mut aligned); + *item = aligned.to_unaligned() + }) + } + + /// Same as [`ZeroVec::for_each_mut()`], but bubbles up errors. + /// + /// # Example + /// + /// ``` + /// use zerovec::ule::AsULE; + /// use zerovec::ZeroVec; + /// + /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; + /// let mut zerovec: ZeroVec<u16> = + /// ZeroVec::parse_byte_slice(bytes).expect("infallible"); + /// + /// zerovec.try_for_each_mut(|item| { + /// *item = item.checked_add(1).ok_or(())?; + /// Ok(()) + /// })?; + /// + /// assert_eq!(zerovec.to_vec(), &[212, 282, 422, 462]); + /// assert!(zerovec.is_owned()); + /// # Ok::<(), ()>(()) + /// ``` + #[inline] + pub fn try_for_each_mut<E>( + &mut self, + mut f: impl FnMut(&mut T) -> Result<(), E>, + ) -> Result<(), E> { + self.to_mut_slice().iter_mut().try_for_each(|item| { + let mut aligned = T::from_unaligned(*item); + f(&mut aligned)?; + *item = aligned.to_unaligned(); + Ok(()) + }) + } + + /// Converts a borrowed ZeroVec to an owned ZeroVec. No-op if already owned. + /// + /// # Example + /// + /// ``` + /// use zerovec::ZeroVec; + /// + /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; + /// let zerovec: ZeroVec<u16> = + /// ZeroVec::parse_byte_slice(bytes).expect("infallible"); + /// assert!(!zerovec.is_owned()); + /// + /// let owned = zerovec.into_owned(); + /// assert!(owned.is_owned()); + /// ``` + pub fn into_owned(self) -> ZeroVec<'static, T> { + match self.into_cow() { + Cow::Owned(vec) => ZeroVec::new_owned(vec), + Cow::Borrowed(b) => { + let vec: Vec<T::ULE> = b.into(); + ZeroVec::new_owned(vec) + } + } + } + + /// Allows the ZeroVec to be mutated by converting it to an owned variant, and producing + /// a mutable vector of ULEs. If you only need a mutable slice, consider using [`Self::to_mut_slice()`] + /// instead. + /// + /// # Example + /// + /// ```rust + /// # use crate::zerovec::ule::AsULE; + /// use zerovec::ZeroVec; + /// + /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; + /// let mut zerovec: ZeroVec<u16> = + /// ZeroVec::parse_byte_slice(bytes).expect("infallible"); + /// assert!(!zerovec.is_owned()); + /// + /// zerovec.with_mut(|v| v.push(12_u16.to_unaligned())); + /// assert!(zerovec.is_owned()); + /// ``` + pub fn with_mut<R>(&mut self, f: impl FnOnce(&mut Vec<T::ULE>) -> R) -> R { + // We're in danger if f() panics whilst we've moved a vector out of self; + // replace it with an empty dummy vector for now + let this = mem::take(self); + let mut vec = match this.into_cow() { + Cow::Owned(v) => v, + Cow::Borrowed(s) => s.into(), + }; + let ret = f(&mut vec); + *self = Self::new_owned(vec); + ret + } + + /// Allows the ZeroVec to be mutated by converting it to an owned variant (if necessary) + /// and returning a slice to its backing buffer. [`Self::with_mut()`] allows for mutation + /// of the vector itself. + /// + /// # Example + /// + /// ```rust + /// # use crate::zerovec::ule::AsULE; + /// use zerovec::ZeroVec; + /// + /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; + /// let mut zerovec: ZeroVec<u16> = + /// ZeroVec::parse_byte_slice(bytes).expect("infallible"); + /// assert!(!zerovec.is_owned()); + /// + /// zerovec.to_mut_slice()[1] = 5u16.to_unaligned(); + /// assert!(zerovec.is_owned()); + /// ``` + pub fn to_mut_slice(&mut self) -> &mut [T::ULE] { + if !self.is_owned() { + // `buf` is either a valid vector or slice of `T::ULE`s, either + // way it's always valid + let slice = self.vector.as_slice(); + *self = ZeroVec::new_owned(slice.into()); + } + unsafe { &mut *self.vector.buf } + } + /// Remove all elements from this ZeroVec and reset it to an empty borrowed state. + pub fn clear(&mut self) { + *self = Self::new_borrowed(&[]) + } + + /// Converts the type into a `Cow<'a, [T::ULE]>`, which is + /// the logical equivalent of this type's internal representation + #[inline] + pub fn into_cow(self) -> Cow<'a, [T::ULE]> { + let this = mem::ManuallyDrop::new(self); + if this.is_owned() { + let vec = unsafe { + // safe to call: we know it's owned, + // and `self`/`this` are thenceforth no longer used or dropped + { this }.vector.get_vec() + }; + Cow::Owned(vec) + } else { + // We can extend the lifetime of the slice to 'a + // since we know it is borrowed + let slice = unsafe { { this }.vector.as_arbitrary_slice() }; + Cow::Borrowed(slice) + } + } +} + +impl<T: AsULE> FromIterator<T> for ZeroVec<'_, T> { + /// Creates an owned [`ZeroVec`] from an iterator of values. + fn from_iter<I>(iter: I) -> Self + where + I: IntoIterator<Item = T>, + { + ZeroVec::new_owned(iter.into_iter().map(|t| t.to_unaligned()).collect()) + } +} + +/// Convenience wrapper for [`ZeroSlice::from_ule_slice`]. The value will be created at compile-time, +/// meaning that all arguments must also be constant. +/// +/// # Arguments +/// +/// * `$aligned` - The type of an element in its canonical, aligned form, e.g., `char`. +/// * `$convert` - A const function that converts an `$aligned` into its unaligned equivalent, e.g., +/// `const fn from_aligned(a: CanonicalType) -> CanonicalType::ULE`. +/// * `$x` - The elements that the `ZeroSlice` will hold. +/// +/// # Examples +/// +/// Using array-conversion functions provided by this crate: +/// +/// ``` +/// use zerovec::{ZeroSlice, zeroslice, ule::AsULE}; +/// use zerovec::ule::UnvalidatedChar; +/// +/// const SIGNATURE: &ZeroSlice<char> = zeroslice!(char; <char as AsULE>::ULE::from_aligned; ['b', 'y', 'e', '✌']); +/// const EMPTY: &ZeroSlice<u32> = zeroslice![]; +/// const UC: &ZeroSlice<UnvalidatedChar> = +/// zeroslice!( +/// UnvalidatedChar; +/// <UnvalidatedChar as AsULE>::ULE::from_unvalidated_char; +/// [UnvalidatedChar::from_char('a')] +/// ); +/// let empty: &ZeroSlice<u32> = zeroslice![]; +/// let nums = zeroslice!(u32; <u32 as AsULE>::ULE::from_unsigned; [1, 2, 3, 4, 5]); +/// assert_eq!(nums.last().unwrap(), 5); +/// ``` +/// +/// Using a custom array-conversion function: +/// +/// ``` +/// use zerovec::{ule::AsULE, ule::RawBytesULE, zeroslice, ZeroSlice}; +/// +/// const fn be_convert(num: i16) -> <i16 as AsULE>::ULE { +/// RawBytesULE(num.to_be_bytes()) +/// } +/// +/// const NUMBERS_BE: &ZeroSlice<i16> = +/// zeroslice!(i16; be_convert; [1, -2, 3, -4, 5]); +/// ``` +#[macro_export] +macro_rules! zeroslice { + () => ( + $crate::ZeroSlice::new_empty() + ); + ($aligned:ty; $convert:expr; [$($x:expr),+ $(,)?]) => ( + $crate::ZeroSlice::<$aligned>::from_ule_slice( + {const X: &[<$aligned as $crate::ule::AsULE>::ULE] = &[ + $($convert($x)),* + ]; X} + ) + ); +} + +/// Creates a borrowed `ZeroVec`. Convenience wrapper for `zeroslice!(...).as_zerovec()`. The value +/// will be created at compile-time, meaning that all arguments must also be constant. +/// +/// See [`zeroslice!`](crate::zeroslice) for more information. +/// +/// # Examples +/// +/// ``` +/// use zerovec::{ZeroVec, zerovec, ule::AsULE}; +/// +/// const SIGNATURE: ZeroVec<char> = zerovec!(char; <char as AsULE>::ULE::from_aligned; ['a', 'y', 'e', '✌']); +/// assert!(!SIGNATURE.is_owned()); +/// +/// const EMPTY: ZeroVec<u32> = zerovec![]; +/// assert!(!EMPTY.is_owned()); +/// ``` +#[macro_export] +macro_rules! zerovec { + () => ( + $crate::ZeroVec::new() + ); + ($aligned:ty; $convert:expr; [$($x:expr),+ $(,)?]) => ( + $crate::zeroslice![$aligned; $convert; [$($x),+]].as_zerovec() + ); +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::samples::*; + + #[test] + fn test_get() { + { + let zerovec = ZeroVec::from_slice_or_alloc(TEST_SLICE); + assert_eq!(zerovec.get(0), Some(TEST_SLICE[0])); + assert_eq!(zerovec.get(1), Some(TEST_SLICE[1])); + assert_eq!(zerovec.get(2), Some(TEST_SLICE[2])); + } + { + let zerovec = ZeroVec::<u32>::parse_byte_slice(TEST_BUFFER_LE).unwrap(); + assert_eq!(zerovec.get(0), Some(TEST_SLICE[0])); + assert_eq!(zerovec.get(1), Some(TEST_SLICE[1])); + assert_eq!(zerovec.get(2), Some(TEST_SLICE[2])); + } + } + + #[test] + fn test_binary_search() { + { + let zerovec = ZeroVec::from_slice_or_alloc(TEST_SLICE); + assert_eq!(Ok(3), zerovec.binary_search(&0x0e0d0c)); + assert_eq!(Err(3), zerovec.binary_search(&0x0c0d0c)); + } + { + let zerovec = ZeroVec::<u32>::parse_byte_slice(TEST_BUFFER_LE).unwrap(); + assert_eq!(Ok(3), zerovec.binary_search(&0x0e0d0c)); + assert_eq!(Err(3), zerovec.binary_search(&0x0c0d0c)); + } + } + + #[test] + fn test_odd_alignment() { + assert_eq!( + Some(0x020100), + ZeroVec::<u32>::parse_byte_slice(TEST_BUFFER_LE) + .unwrap() + .get(0) + ); + assert_eq!( + Some(0x04000201), + ZeroVec::<u32>::parse_byte_slice(&TEST_BUFFER_LE[1..77]) + .unwrap() + .get(0) + ); + assert_eq!( + Some(0x05040002), + ZeroVec::<u32>::parse_byte_slice(&TEST_BUFFER_LE[2..78]) + .unwrap() + .get(0) + ); + assert_eq!( + Some(0x06050400), + ZeroVec::<u32>::parse_byte_slice(&TEST_BUFFER_LE[3..79]) + .unwrap() + .get(0) + ); + assert_eq!( + Some(0x060504), + ZeroVec::<u32>::parse_byte_slice(&TEST_BUFFER_LE[4..]) + .unwrap() + .get(0) + ); + assert_eq!( + Some(0x4e4d4c00), + ZeroVec::<u32>::parse_byte_slice(&TEST_BUFFER_LE[75..79]) + .unwrap() + .get(0) + ); + assert_eq!( + Some(0x4e4d4c00), + ZeroVec::<u32>::parse_byte_slice(&TEST_BUFFER_LE[3..79]) + .unwrap() + .get(18) + ); + assert_eq!( + Some(0x4e4d4c), + ZeroVec::<u32>::parse_byte_slice(&TEST_BUFFER_LE[76..]) + .unwrap() + .get(0) + ); + assert_eq!( + Some(0x4e4d4c), + ZeroVec::<u32>::parse_byte_slice(TEST_BUFFER_LE) + .unwrap() + .get(19) + ); + // TODO(#1144): Check for correct slice length in RawBytesULE + // assert_eq!( + // None, + // ZeroVec::<u32>::parse_byte_slice(&TEST_BUFFER_LE[77..]) + // .unwrap() + // .get(0) + // ); + assert_eq!( + None, + ZeroVec::<u32>::parse_byte_slice(TEST_BUFFER_LE) + .unwrap() + .get(20) + ); + assert_eq!( + None, + ZeroVec::<u32>::parse_byte_slice(&TEST_BUFFER_LE[3..79]) + .unwrap() + .get(19) + ); + } +} diff --git a/third_party/rust/zerovec/src/zerovec/serde.rs b/third_party/rust/zerovec/src/zerovec/serde.rs new file mode 100644 index 0000000000..bb180d5a19 --- /dev/null +++ b/third_party/rust/zerovec/src/zerovec/serde.rs @@ -0,0 +1,221 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use super::{ZeroSlice, ZeroVec}; +use crate::ule::*; +use alloc::boxed::Box; +use alloc::vec::Vec; +use core::fmt; +use core::marker::PhantomData; +use core::mem; +use serde::de::{self, Deserialize, Deserializer, SeqAccess, Visitor}; +#[cfg(feature = "serde")] +use serde::ser::{Serialize, SerializeSeq, Serializer}; + +struct ZeroVecVisitor<T> { + marker: PhantomData<fn() -> T>, +} + +impl<T> Default for ZeroVecVisitor<T> { + fn default() -> Self { + Self { + marker: PhantomData, + } + } +} + +impl<'de, T> Visitor<'de> for ZeroVecVisitor<T> +where + T: 'de + Deserialize<'de> + AsULE, +{ + type Value = ZeroVec<'de, T>; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("a sequence or borrowed buffer of fixed-width elements") + } + + fn visit_borrowed_bytes<E>(self, bytes: &'de [u8]) -> Result<Self::Value, E> + where + E: de::Error, + { + ZeroVec::parse_byte_slice(bytes).map_err(de::Error::custom) + } + + fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error> + where + A: SeqAccess<'de>, + { + let mut vec: Vec<T::ULE> = if let Some(capacity) = seq.size_hint() { + Vec::with_capacity(capacity) + } else { + Vec::new() + }; + while let Some(value) = seq.next_element::<T>()? { + vec.push(T::to_unaligned(value)); + } + Ok(ZeroVec::new_owned(vec)) + } +} + +/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate +impl<'de, 'a, T> Deserialize<'de> for ZeroVec<'a, T> +where + T: 'de + Deserialize<'de> + AsULE, + 'de: 'a, +{ + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: Deserializer<'de>, + { + let visitor = ZeroVecVisitor::default(); + if deserializer.is_human_readable() { + deserializer.deserialize_seq(visitor) + } else { + deserializer.deserialize_bytes(visitor) + } + } +} + +/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate +impl<T> Serialize for ZeroVec<'_, T> +where + T: Serialize + AsULE, +{ + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + if serializer.is_human_readable() { + let mut seq = serializer.serialize_seq(Some(self.len()))?; + for value in self.iter() { + seq.serialize_element(&value)?; + } + seq.end() + } else { + serializer.serialize_bytes(self.as_bytes()) + } + } +} + +/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate +impl<'de, T> Deserialize<'de> for Box<ZeroSlice<T>> +where + T: Deserialize<'de> + AsULE + 'static, +{ + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: Deserializer<'de>, + { + let mut zv = ZeroVec::<T>::deserialize(deserializer)?; + let vec = zv.with_mut(mem::take); + Ok(ZeroSlice::from_boxed_slice(vec.into_boxed_slice())) + } +} + +/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate +impl<'de, 'a, T> Deserialize<'de> for &'a ZeroSlice<T> +where + T: Deserialize<'de> + AsULE + 'static, + 'de: 'a, +{ + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: Deserializer<'de>, + { + if deserializer.is_human_readable() { + Err(de::Error::custom( + "&ZeroSlice cannot be deserialized from human-readable formats", + )) + } else { + let deserialized: ZeroVec<'a, T> = ZeroVec::deserialize(deserializer)?; + let borrowed = if let Some(b) = deserialized.as_maybe_borrowed() { + b + } else { + return Err(de::Error::custom( + "&ZeroSlice can only deserialize in zero-copy ways", + )); + }; + Ok(borrowed) + } + } +} + +/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate +impl<T> Serialize for ZeroSlice<T> +where + T: Serialize + AsULE, +{ + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + self.as_zerovec().serialize(serializer) + } +} + +#[cfg(test)] +#[allow(non_camel_case_types)] +mod test { + use crate::samples::*; + use crate::ZeroVec; + + #[derive(serde::Serialize, serde::Deserialize)] + struct DeriveTest_ZeroVec<'data> { + #[serde(borrow)] + _data: ZeroVec<'data, u16>, + } + + #[test] + fn test_serde_json() { + let zerovec_orig = ZeroVec::from_slice_or_alloc(TEST_SLICE); + let json_str = serde_json::to_string(&zerovec_orig).expect("serialize"); + assert_eq!(JSON_STR, json_str); + // ZeroVec should deserialize from JSON to either Vec or ZeroVec + let vec_new: Vec<u32> = + serde_json::from_str(&json_str).expect("deserialize from buffer to Vec"); + assert_eq!( + zerovec_orig, + ZeroVec::<u32>::from_slice_or_alloc(vec_new.as_slice()) + ); + let zerovec_new: ZeroVec<u32> = + serde_json::from_str(&json_str).expect("deserialize from buffer to ZeroVec"); + assert_eq!(zerovec_orig, zerovec_new); + assert!(zerovec_new.is_owned()); + } + + #[test] + fn test_serde_bincode() { + let zerovec_orig = ZeroVec::from_slice_or_alloc(TEST_SLICE); + let bincode_buf = bincode::serialize(&zerovec_orig).expect("serialize"); + assert_eq!(BINCODE_BUF, bincode_buf); + // ZeroVec should deserialize from Bincode to ZeroVec but not Vec + bincode::deserialize::<Vec<u32>>(&bincode_buf).expect_err("deserialize from buffer to Vec"); + let zerovec_new: ZeroVec<u32> = + bincode::deserialize(&bincode_buf).expect("deserialize from buffer to ZeroVec"); + assert_eq!(zerovec_orig, zerovec_new); + + assert!(!zerovec_new.is_owned()); + } + + #[test] + fn test_chars_valid() { + // 1-byte, 2-byte, 3-byte, and 4-byte character in UTF-8 (not as relevant in UTF-32) + let zerovec_orig = ZeroVec::alloc_from_slice(&['w', 'ω', '文', '𑄃']); + let bincode_buf = bincode::serialize(&zerovec_orig).expect("serialize"); + let zerovec_new: ZeroVec<char> = + bincode::deserialize(&bincode_buf).expect("deserialize from buffer to ZeroVec"); + assert_eq!(zerovec_orig, zerovec_new); + + assert!(!zerovec_new.is_owned()); + } + + #[test] + fn test_chars_invalid() { + // 119 and 120 are valid, but not 0xD800 (high surrogate) + let zerovec_orig: ZeroVec<u32> = ZeroVec::from_slice_or_alloc(&[119, 0xD800, 120]); + let bincode_buf = bincode::serialize(&zerovec_orig).expect("serialize"); + let zerovec_result = bincode::deserialize::<ZeroVec<char>>(&bincode_buf); + assert!(zerovec_result.is_err()); + } +} diff --git a/third_party/rust/zerovec/src/zerovec/slice.rs b/third_party/rust/zerovec/src/zerovec/slice.rs new file mode 100644 index 0000000000..12d88deff8 --- /dev/null +++ b/third_party/rust/zerovec/src/zerovec/slice.rs @@ -0,0 +1,596 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use super::*; +use alloc::boxed::Box; +use core::cmp::Ordering; +use core::ops::Range; + +/// A zero-copy "slice", i.e. the zero-copy version of `[T]`. This behaves +/// similarly to [`ZeroVec<T>`], however [`ZeroVec<T>`] is allowed to contain +/// owned data and as such is ideal for deserialization since most human readable +/// serialization formats cannot unconditionally deserialize zero-copy. +/// +/// This type can be used inside [`VarZeroVec<T>`](crate::VarZeroVec) and [`ZeroMap`](crate::ZeroMap): +/// This essentially allows for the construction of zero-copy types isomorphic to `Vec<Vec<T>>` by instead +/// using `VarZeroVec<ZeroSlice<T>>`. See the [`VarZeroVec`](crate::VarZeroVec) docs for an example. +/// +/// # Examples +/// +/// Const-construct a ZeroSlice of u16: +/// +/// ``` +/// use zerovec::ule::AsULE; +/// use zerovec::ZeroSlice; +/// +/// const DATA: &ZeroSlice<u16> = +/// ZeroSlice::<u16>::from_ule_slice(&<u16 as AsULE>::ULE::from_array([ +/// 211, 281, 421, 32973, +/// ])); +/// +/// assert_eq!(DATA.get(1), Some(281)); +/// ``` +#[repr(transparent)] +pub struct ZeroSlice<T: AsULE>([T::ULE]); + +impl<T> ZeroSlice<T> +where + T: AsULE, +{ + /// Returns an empty slice. + pub const fn new_empty() -> &'static Self { + Self::from_ule_slice(&[]) + } + + /// Get this [`ZeroSlice`] as a borrowed [`ZeroVec`] + /// + /// [`ZeroSlice`] does not have most of the methods that [`ZeroVec`] does, + /// so it is recommended to convert it to a [`ZeroVec`] before doing anything. + #[inline] + pub const fn as_zerovec(&self) -> ZeroVec<'_, T> { + ZeroVec::new_borrowed(&self.0) + } + + /// Attempt to construct a `&ZeroSlice<T>` from a byte slice, returning an error + /// if it's not a valid byte sequence + pub fn parse_byte_slice(bytes: &[u8]) -> Result<&Self, ZeroVecError> { + T::ULE::parse_byte_slice(bytes).map(Self::from_ule_slice) + } + + /// Uses a `&[u8]` buffer as a `ZeroVec<T>` without any verification. + /// + /// # Safety + /// + /// `bytes` need to be an output from [`ZeroSlice::as_bytes()`]. + pub const unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { + // &[u8] and &[T::ULE] are the same slice with different length metadata. + Self::from_ule_slice(core::slice::from_raw_parts( + bytes.as_ptr() as *const T::ULE, + bytes.len() / core::mem::size_of::<T::ULE>(), + )) + } + + /// Construct a `&ZeroSlice<T>` from a slice of ULEs. + /// + /// This function can be used for constructing ZeroVecs in a const context, avoiding + /// parsing checks. + /// + /// See [`ZeroSlice`] for an example. + #[inline] + pub const fn from_ule_slice(slice: &[T::ULE]) -> &Self { + // This is safe because ZeroSlice is transparent over [T::ULE] + // so &ZeroSlice<T> can be safely cast from &[T::ULE] + unsafe { &*(slice as *const _ as *const Self) } + } + + /// Construct a `Box<ZeroSlice<T>>` from a boxed slice of ULEs + #[inline] + pub fn from_boxed_slice(slice: Box<[T::ULE]>) -> Box<Self> { + // This is safe because ZeroSlice is transparent over [T::ULE] + // so Box<ZeroSlice<T>> can be safely cast from Box<[T::ULE]> + unsafe { Box::from_raw(Box::into_raw(slice) as *mut Self) } + } + + /// Returns this slice as its underlying `&[u8]` byte buffer representation. + /// + /// Useful for serialization. + /// + /// # Example + /// + /// ``` + /// use zerovec::ZeroVec; + /// + /// // The little-endian bytes correspond to the numbers on the following line. + /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x80]; + /// let nums: &[u16] = &[211, 281, 421, 32973]; + /// + /// let zerovec = ZeroVec::alloc_from_slice(nums); + /// + /// assert_eq!(bytes, zerovec.as_bytes()); + /// ``` + #[inline] + pub fn as_bytes(&self) -> &[u8] { + T::ULE::as_byte_slice(self.as_ule_slice()) + } + + /// Dereferences this slice as `&[T::ULE]`. + #[inline] + pub const fn as_ule_slice(&self) -> &[T::ULE] { + &self.0 + } + + /// Returns the number of elements in this slice. + /// + /// # Example + /// + /// ``` + /// use zerovec::ule::AsULE; + /// use zerovec::ZeroVec; + /// + /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x80]; + /// let zerovec: ZeroVec<u16> = + /// ZeroVec::parse_byte_slice(bytes).expect("infallible"); + /// + /// assert_eq!(4, zerovec.len()); + /// assert_eq!( + /// bytes.len(), + /// zerovec.len() * std::mem::size_of::<<u16 as AsULE>::ULE>() + /// ); + /// ``` + #[inline] + pub const fn len(&self) -> usize { + self.as_ule_slice().len() + } + + /// Returns whether this slice is empty. + /// + /// # Example + /// + /// ``` + /// use zerovec::ZeroVec; + /// + /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x80]; + /// let zerovec: ZeroVec<u16> = + /// ZeroVec::parse_byte_slice(bytes).expect("infallible"); + /// assert!(!zerovec.is_empty()); + /// + /// let emptyvec: ZeroVec<u16> = + /// ZeroVec::parse_byte_slice(&[]).expect("infallible"); + /// assert!(emptyvec.is_empty()); + /// ``` + #[inline] + pub const fn is_empty(&self) -> bool { + self.as_ule_slice().is_empty() + } +} + +impl<T> ZeroSlice<T> +where + T: AsULE, +{ + /// Gets the element at the specified index. Returns `None` if out of range. + /// + /// # Example + /// + /// ``` + /// use zerovec::ZeroVec; + /// + /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x80]; + /// let zerovec: ZeroVec<u16> = + /// ZeroVec::parse_byte_slice(bytes).expect("infallible"); + /// + /// assert_eq!(zerovec.get(2), Some(421)); + /// assert_eq!(zerovec.get(4), None); + /// ``` + #[inline] + pub fn get(&self, index: usize) -> Option<T> { + self.as_ule_slice() + .get(index) + .copied() + .map(T::from_unaligned) + } + + /// Gets the entire slice as an array of length `N`. Returns `None` if the slice + /// does not have exactly `N` elements. + /// + /// # Example + /// + /// ``` + /// use zerovec::ZeroVec; + /// + /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x80]; + /// let zerovec: ZeroVec<u16> = + /// ZeroVec::parse_byte_slice(bytes).expect("infallible"); + /// let array: [u16; 4] = + /// zerovec.get_as_array().expect("should be 4 items in array"); + /// + /// assert_eq!(array[2], 421); + /// ``` + pub fn get_as_array<const N: usize>(&self) -> Option<[T; N]> { + let ule_array = <&[T::ULE; N]>::try_from(self.as_ule_slice()).ok()?; + Some(ule_array.map(|u| T::from_unaligned(u))) + } + + /// Gets a subslice of elements within a certain range. Returns `None` if the range + /// is out of bounds of this `ZeroSlice`. + /// + /// # Example + /// + /// ``` + /// use zerovec::ZeroVec; + /// + /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x80]; + /// let zerovec: ZeroVec<u16> = + /// ZeroVec::parse_byte_slice(bytes).expect("infallible"); + /// + /// assert_eq!( + /// zerovec.get_subslice(1..3), + /// Some(&*ZeroVec::from_slice_or_alloc(&[0x0119, 0x01A5])) + /// ); + /// assert_eq!(zerovec.get_subslice(3..5), None); + /// ``` + #[inline] + pub fn get_subslice(&self, range: Range<usize>) -> Option<&ZeroSlice<T>> { + self.0.get(range).map(ZeroSlice::from_ule_slice) + } + + /// Get a borrowed reference to the underlying ULE type at a specified index. + /// + /// Prefer [`Self::get()`] over this method where possible since working + /// directly with `ULE` types is less ergonomic + pub fn get_ule_ref(&self, index: usize) -> Option<&T::ULE> { + self.as_ule_slice().get(index) + } + + /// Casts a `ZeroSlice<T>` to a compatible `ZeroSlice<P>`. + /// + /// `T` and `P` are compatible if they have the same `ULE` representation. + /// + /// If the `ULE`s of `T` and `P` are different, use [`Self::try_as_converted()`]. + /// + /// # Examples + /// + /// ``` + /// use zerovec::ZeroSlice; + /// + /// const BYTES: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x80]; + /// const ZS_U16: &ZeroSlice<u16> = { + /// match ZeroSlice::<u16>::try_from_bytes(BYTES) { + /// Ok(s) => s, + /// Err(_) => unreachable!(), + /// } + /// }; + /// + /// let zs_i16: &ZeroSlice<i16> = ZS_U16.cast(); + /// + /// assert_eq!(ZS_U16.get(3), Some(32973)); + /// assert_eq!(zs_i16.get(3), Some(-32563)); + /// ``` + #[inline] + pub const fn cast<P>(&self) -> &ZeroSlice<P> + where + P: AsULE<ULE = T::ULE>, + { + ZeroSlice::<P>::from_ule_slice(self.as_ule_slice()) + } + + /// Converts a `&ZeroSlice<T>` into a `&ZeroSlice<P>`. + /// + /// The resulting slice will have the same length as the original slice + /// if and only if `T::ULE` and `P::ULE` are the same size. + /// + /// If `T` and `P` have the exact same `ULE`, use [`Self::cast()`]. + /// + /// # Examples + /// + /// ``` + /// use zerovec::ZeroSlice; + /// + /// const BYTES: &[u8] = &[0x7F, 0xF3, 0x01, 0x00, 0x49, 0xF6, 0x01, 0x00]; + /// const ZS_U32: &ZeroSlice<u32> = { + /// match ZeroSlice::<u32>::try_from_bytes(BYTES) { + /// Ok(s) => s, + /// Err(_) => unreachable!(), + /// } + /// }; + /// + /// let zs_u8_4: &ZeroSlice<[u8; 4]> = + /// ZS_U32.try_as_converted().expect("valid code points"); + /// + /// assert_eq!(ZS_U32.get(0), Some(127871)); + /// assert_eq!(zs_u8_4.get(0), Some([0x7F, 0xF3, 0x01, 0x00])); + /// ``` + #[inline] + pub fn try_as_converted<P: AsULE>(&self) -> Result<&ZeroSlice<P>, ZeroVecError> { + let new_slice = P::ULE::parse_byte_slice(self.as_bytes())?; + Ok(ZeroSlice::from_ule_slice(new_slice)) + } + + /// Gets the first element. Returns `None` if empty. + /// + /// # Example + /// + /// ``` + /// use zerovec::ZeroVec; + /// + /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x80]; + /// let zerovec: ZeroVec<u16> = + /// ZeroVec::parse_byte_slice(bytes).expect("infallible"); + /// + /// assert_eq!(zerovec.first(), Some(211)); + /// ``` + #[inline] + pub fn first(&self) -> Option<T> { + self.as_ule_slice().first().copied().map(T::from_unaligned) + } + + /// Gets the last element. Returns `None` if empty. + /// + /// # Example + /// + /// ``` + /// use zerovec::ZeroVec; + /// + /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x80]; + /// let zerovec: ZeroVec<u16> = + /// ZeroVec::parse_byte_slice(bytes).expect("infallible"); + /// + /// assert_eq!(zerovec.last(), Some(32973)); + /// ``` + #[inline] + pub fn last(&self) -> Option<T> { + self.as_ule_slice().last().copied().map(T::from_unaligned) + } + + /// Gets an iterator over the elements. + /// + /// # Example + /// + /// ``` + /// use zerovec::ZeroVec; + /// + /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x80]; + /// let zerovec: ZeroVec<u16> = + /// ZeroVec::parse_byte_slice(bytes).expect("infallible"); + /// let mut it = zerovec.iter(); + /// + /// assert_eq!(it.next(), Some(211)); + /// assert_eq!(it.next(), Some(281)); + /// assert_eq!(it.next(), Some(421)); + /// assert_eq!(it.next(), Some(32973)); + /// assert_eq!(it.next(), None); + /// ``` + #[inline] + pub fn iter(&self) -> impl DoubleEndedIterator<Item = T> + ExactSizeIterator<Item = T> + '_ { + self.as_ule_slice().iter().copied().map(T::from_unaligned) + } + + /// Returns a tuple with the first element and a subslice of the remaining elements. + /// + /// # Example + /// + /// ``` + /// use zerovec::ule::AsULE; + /// use zerovec::ZeroSlice; + /// + /// const DATA: &ZeroSlice<u16> = + /// ZeroSlice::<u16>::from_ule_slice(&<u16 as AsULE>::ULE::from_array([ + /// 211, 281, 421, 32973, + /// ])); + /// const EXPECTED_VALUE: (u16, &ZeroSlice<u16>) = ( + /// 211, + /// ZeroSlice::<u16>::from_ule_slice(&<u16 as AsULE>::ULE::from_array([ + /// 281, 421, 32973, + /// ])), + /// ); + /// assert_eq!(EXPECTED_VALUE, DATA.split_first().unwrap()); + /// ``` + #[inline] + pub fn split_first(&self) -> Option<(T, &ZeroSlice<T>)> { + if let Some(first) = self.first() { + return Some(( + first, + // `unwrap()` must succeed, because `first()` returned `Some`. + #[allow(clippy::unwrap_used)] + self.get_subslice(1..self.len()).unwrap(), + )); + } + None + } +} + +impl<T> ZeroSlice<T> +where + T: AsULE + Ord, +{ + /// Binary searches a sorted `ZeroVec<T>` for the given element. For more information, see + /// the primitive function [`binary_search`]. + /// + /// # Example + /// + /// ``` + /// use zerovec::ZeroVec; + /// + /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x80]; + /// let zerovec: ZeroVec<u16> = + /// ZeroVec::parse_byte_slice(bytes).expect("infallible"); + /// + /// assert_eq!(zerovec.binary_search(&281), Ok(1)); + /// assert_eq!(zerovec.binary_search(&282), Err(2)); + /// ``` + /// + /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search + #[inline] + pub fn binary_search(&self, x: &T) -> Result<usize, usize> { + self.as_ule_slice() + .binary_search_by(|probe| T::from_unaligned(*probe).cmp(x)) + } +} + +impl<T> ZeroSlice<T> +where + T: AsULE, +{ + /// Binary searches a sorted `ZeroVec<T>` based on a given predicate. For more information, see + /// the primitive function [`binary_search_by`]. + /// + /// # Example + /// + /// ``` + /// use zerovec::ZeroVec; + /// + /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x80]; + /// let zerovec: ZeroVec<u16> = + /// ZeroVec::parse_byte_slice(bytes).expect("infallible"); + /// + /// assert_eq!(zerovec.binary_search_by(|x| x.cmp(&281)), Ok(1)); + /// assert_eq!(zerovec.binary_search_by(|x| x.cmp(&282)), Err(2)); + /// ``` + /// + /// [`binary_search_by`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search_by + #[inline] + pub fn binary_search_by( + &self, + mut predicate: impl FnMut(T) -> Ordering, + ) -> Result<usize, usize> { + self.as_ule_slice() + .binary_search_by(|probe| predicate(T::from_unaligned(*probe))) + } +} + +// Safety (based on the safety checklist on the VarULE trait): +// (`ZeroSlice<T>` is a transparent wrapper around [T::ULE]) +// 1. [T::ULE] does not include any uninitialized or padding bytes (achieved by being a slice of a ULE type) +// 2. [T::ULE] is aligned to 1 byte (achieved by being a slice of a ULE type) +// 3. The impl of `validate_byte_slice()` returns an error if any byte is not valid. +// 4. The impl of `validate_byte_slice()` returns an error if the slice cannot be used in its entirety +// 5. The impl of `from_byte_slice_unchecked()` returns a reference to the same data. +// 6. `as_byte_slice()` and `parse_byte_slice()` are defaulted +// 7. `[T::ULE]` byte equality is semantic equality (relying on the guideline of the underlying `ULE` type) +unsafe impl<T: AsULE + 'static> VarULE for ZeroSlice<T> { + #[inline] + fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> { + T::ULE::validate_byte_slice(bytes) + } + + #[inline] + unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self { + Self::from_ule_slice(T::ULE::from_byte_slice_unchecked(bytes)) + } +} + +impl<T> Eq for ZeroSlice<T> where T: AsULE + Eq {} + +impl<T> PartialEq<ZeroSlice<T>> for ZeroSlice<T> +where + T: AsULE + PartialEq, +{ + #[inline] + fn eq(&self, other: &ZeroSlice<T>) -> bool { + self.as_zerovec().eq(&other.as_zerovec()) + } +} + +impl<T> PartialEq<[T]> for ZeroSlice<T> +where + T: AsULE + PartialEq, +{ + #[inline] + fn eq(&self, other: &[T]) -> bool { + self.iter().eq(other.iter().copied()) + } +} + +impl<'a, T> PartialEq<ZeroVec<'a, T>> for ZeroSlice<T> +where + T: AsULE + PartialEq, +{ + #[inline] + fn eq(&self, other: &ZeroVec<'a, T>) -> bool { + self.as_zerovec().eq(other) + } +} + +impl<'a, T> PartialEq<ZeroSlice<T>> for ZeroVec<'a, T> +where + T: AsULE + PartialEq, +{ + #[inline] + fn eq(&self, other: &ZeroSlice<T>) -> bool { + self.eq(&other.as_zerovec()) + } +} + +impl<T> fmt::Debug for ZeroSlice<T> +where + T: AsULE + fmt::Debug, +{ + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.as_zerovec().fmt(f) + } +} + +impl<T: AsULE + PartialOrd> PartialOrd for ZeroSlice<T> { + fn partial_cmp(&self, other: &Self) -> Option<Ordering> { + self.iter().partial_cmp(other.iter()) + } +} + +impl<T: AsULE + Ord> Ord for ZeroSlice<T> { + fn cmp(&self, other: &Self) -> Ordering { + self.iter().cmp(other.iter()) + } +} + +impl<T: AsULE> AsRef<ZeroSlice<T>> for Vec<T::ULE> { + fn as_ref(&self) -> &ZeroSlice<T> { + ZeroSlice::<T>::from_ule_slice(self) + } +} + +impl<T: AsULE> AsRef<ZeroSlice<T>> for &[T::ULE] { + fn as_ref(&self) -> &ZeroSlice<T> { + ZeroSlice::<T>::from_ule_slice(self) + } +} + +impl<T> Default for &ZeroSlice<T> +where + T: AsULE, +{ + fn default() -> Self { + ZeroSlice::from_ule_slice(&[]) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::zeroslice; + + #[test] + fn test_split_first() { + { + // empty slice. + assert_eq!(None, ZeroSlice::<u16>::new_empty().split_first()); + } + { + // single element slice + const DATA: &ZeroSlice<u16> = + zeroslice!(u16; <u16 as AsULE>::ULE::from_unsigned; [211]); + assert_eq!((211, zeroslice![]), DATA.split_first().unwrap()); + } + { + // slice with many elements. + const DATA: &ZeroSlice<u16> = + zeroslice!(u16; <u16 as AsULE>::ULE::from_unsigned; [211, 281, 421, 32973]); + const EXPECTED_VALUE: (u16, &ZeroSlice<u16>) = ( + 211, + zeroslice!(u16; <u16 as AsULE>::ULE::from_unsigned; [281, 421, 32973]), + ); + + assert_eq!(EXPECTED_VALUE, DATA.split_first().unwrap()); + } + } +} |