diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
commit | 26a029d407be480d791972afb5975cf62c9360a6 (patch) | |
tree | f435a8308119effd964b339f76abb83a57c29483 /third_party/rust/zerovec/src/ule | |
parent | Initial commit. (diff) | |
download | firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip |
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/zerovec/src/ule')
-rw-r--r-- | third_party/rust/zerovec/src/ule/chars.rs | 190 | ||||
-rw-r--r-- | third_party/rust/zerovec/src/ule/custom.rs | 145 | ||||
-rw-r--r-- | third_party/rust/zerovec/src/ule/encode.rs | 400 | ||||
-rw-r--r-- | third_party/rust/zerovec/src/ule/macros.rs | 29 | ||||
-rw-r--r-- | third_party/rust/zerovec/src/ule/mod.rs | 394 | ||||
-rw-r--r-- | third_party/rust/zerovec/src/ule/multi.rs | 154 | ||||
-rw-r--r-- | third_party/rust/zerovec/src/ule/niche.rs | 180 | ||||
-rw-r--r-- | third_party/rust/zerovec/src/ule/option.rs | 264 | ||||
-rw-r--r-- | third_party/rust/zerovec/src/ule/plain.rs | 366 | ||||
-rw-r--r-- | third_party/rust/zerovec/src/ule/slices.rs | 103 | ||||
-rw-r--r-- | third_party/rust/zerovec/src/ule/tuple.rs | 179 | ||||
-rw-r--r-- | third_party/rust/zerovec/src/ule/unvalidated.rs | 527 |
12 files changed, 2931 insertions, 0 deletions
diff --git a/third_party/rust/zerovec/src/ule/chars.rs b/third_party/rust/zerovec/src/ule/chars.rs new file mode 100644 index 0000000000..e4c1efc4ec --- /dev/null +++ b/third_party/rust/zerovec/src/ule/chars.rs @@ -0,0 +1,190 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +#![allow(clippy::upper_case_acronyms)] +//! ULE implementation for the `char` type. + +use super::*; +use crate::impl_ule_from_array; +use core::cmp::Ordering; +use core::convert::TryFrom; + +/// A u8 array of little-endian data corresponding to a Unicode scalar value. +/// +/// The bytes of a `CharULE` are guaranteed to represent a little-endian-encoded u32 that is a +/// valid `char` and can be converted without validation. +/// +/// # Examples +/// +/// Convert a `char` to a `CharULE` and back again: +/// +/// ``` +/// use zerovec::ule::{AsULE, CharULE, ULE}; +/// +/// let c1 = '๐'; +/// let ule = c1.to_unaligned(); +/// assert_eq!(CharULE::as_byte_slice(&[ule]), &[0x03, 0x11, 0x01]); +/// let c2 = char::from_unaligned(ule); +/// assert_eq!(c1, c2); +/// ``` +/// +/// Attempt to parse invalid bytes to a `CharULE`: +/// +/// ``` +/// use zerovec::ule::{CharULE, ULE}; +/// +/// let bytes: &[u8] = &[0xFF, 0xFF, 0xFF, 0xFF]; +/// CharULE::parse_byte_slice(bytes).expect_err("Invalid bytes"); +/// ``` +#[repr(transparent)] +#[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)] +pub struct CharULE([u8; 3]); + +impl CharULE { + /// Converts a [`char`] to a [`CharULE`]. This is equivalent to calling + /// [`AsULE::to_unaligned()`] + /// + /// See the type-level documentation for [`CharULE`] for more information. + #[inline] + pub const fn from_aligned(c: char) -> Self { + let [u0, u1, u2, _u3] = (c as u32).to_le_bytes(); + Self([u0, u1, u2]) + } + + impl_ule_from_array!(char, CharULE, Self([0; 3])); +} + +// Safety (based on the safety checklist on the ULE trait): +// 1. CharULE does not include any uninitialized or padding bytes. +// (achieved by `#[repr(transparent)]` on a type that satisfies this invariant) +// 2. CharULE is aligned to 1 byte. +// (achieved by `#[repr(transparent)]` on a type that satisfies this invariant) +// 3. The impl of validate_byte_slice() returns an error if any byte is not valid. +// 4. The impl of validate_byte_slice() returns an error if there are extra bytes. +// 5. The other ULE methods use the default impl. +// 6. CharULE byte equality is semantic equality +unsafe impl ULE for CharULE { + #[inline] + fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> { + if bytes.len() % 3 != 0 { + return Err(ZeroVecError::length::<Self>(bytes.len())); + } + // Validate the bytes + for chunk in bytes.chunks_exact(3) { + // TODO: Use slice::as_chunks() when stabilized + #[allow(clippy::indexing_slicing)] + // Won't panic because the chunks are always 3 bytes long + let u = u32::from_le_bytes([chunk[0], chunk[1], chunk[2], 0]); + char::try_from(u).map_err(|_| ZeroVecError::parse::<Self>())?; + } + Ok(()) + } +} + +impl AsULE for char { + type ULE = CharULE; + + #[inline] + fn to_unaligned(self) -> Self::ULE { + CharULE::from_aligned(self) + } + + #[inline] + fn from_unaligned(unaligned: Self::ULE) -> Self { + // Safe because the bytes of CharULE are defined to represent a valid Unicode scalar value. + unsafe { + Self::from_u32_unchecked(u32::from_le_bytes([ + unaligned.0[0], + unaligned.0[1], + unaligned.0[2], + 0, + ])) + } + } +} + +impl PartialOrd for CharULE { + fn partial_cmp(&self, other: &Self) -> Option<Ordering> { + Some(self.cmp(other)) + } +} + +impl Ord for CharULE { + fn cmp(&self, other: &Self) -> Ordering { + char::from_unaligned(*self).cmp(&char::from_unaligned(*other)) + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_from_array() { + const CHARS: [char; 2] = ['a', '๐']; + const CHARS_ULE: [CharULE; 2] = CharULE::from_array(CHARS); + assert_eq!( + CharULE::as_byte_slice(&CHARS_ULE), + &[0x61, 0x00, 0x00, 0x43, 0xF6, 0x01] + ); + } + + #[test] + fn test_from_array_zst() { + const CHARS: [char; 0] = []; + const CHARS_ULE: [CharULE; 0] = CharULE::from_array(CHARS); + let bytes = CharULE::as_byte_slice(&CHARS_ULE); + let empty: &[u8] = &[]; + assert_eq!(bytes, empty); + } + + #[test] + fn test_parse() { + // 1-byte, 2-byte, 3-byte, and two 4-byte character in UTF-8 (not as relevant in UTF-32) + let chars = ['w', 'ฯ', 'ๆ', '๐', '๐']; + let char_ules: Vec<CharULE> = chars.iter().copied().map(char::to_unaligned).collect(); + let char_bytes: &[u8] = CharULE::as_byte_slice(&char_ules); + + // Check parsing + let parsed_ules: &[CharULE] = CharULE::parse_byte_slice(char_bytes).unwrap(); + assert_eq!(char_ules, parsed_ules); + let parsed_chars: Vec<char> = parsed_ules + .iter() + .copied() + .map(char::from_unaligned) + .collect(); + assert_eq!(&chars, parsed_chars.as_slice()); + + // Compare to golden expected data + assert_eq!( + &[119, 0, 0, 201, 3, 0, 135, 101, 0, 3, 17, 1, 67, 246, 1], + char_bytes + ); + } + + #[test] + fn test_failures() { + // 119 and 120 are valid, but not 0xD800 (high surrogate) + let u32s = [119, 0xD800, 120]; + let u32_ules: Vec<RawBytesULE<4>> = u32s + .iter() + .copied() + .map(<u32 as AsULE>::to_unaligned) + .collect(); + let u32_bytes: &[u8] = RawBytesULE::<4>::as_byte_slice(&u32_ules); + let parsed_ules_result = CharULE::parse_byte_slice(u32_bytes); + assert!(parsed_ules_result.is_err()); + + // 0x20FFFF is out of range for a char + let u32s = [0x20FFFF]; + let u32_ules: Vec<RawBytesULE<4>> = u32s + .iter() + .copied() + .map(<u32 as AsULE>::to_unaligned) + .collect(); + let u32_bytes: &[u8] = RawBytesULE::<4>::as_byte_slice(&u32_ules); + let parsed_ules_result = CharULE::parse_byte_slice(u32_bytes); + assert!(parsed_ules_result.is_err()); + } +} diff --git a/third_party/rust/zerovec/src/ule/custom.rs b/third_party/rust/zerovec/src/ule/custom.rs new file mode 100644 index 0000000000..8cc6e9de4e --- /dev/null +++ b/third_party/rust/zerovec/src/ule/custom.rs @@ -0,0 +1,145 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Documentation on implementing custom VarULE types. +//! +//! This module contains documentation for defining custom VarULE types, +//! especially those using complex custom dynamically sized types. +//! +//! In *most cases* you should be able to create custom VarULE types using +//! [`#[make_varule]`](crate::make_ule). +//! +//! # Example +//! +//! For example, if your regular stack type is: +//! +//! ```rust +//! use zerofrom::ZeroFrom; +//! use zerovec::ule::*; +//! use zerovec::ZeroVec; +//! +//! #[derive(serde::Serialize, serde::Deserialize)] +//! struct Foo<'a> { +//! field1: char, +//! field2: u32, +//! #[serde(borrow)] +//! field3: ZeroVec<'a, u32>, +//! } +//! ``` +//! +//! then the ULE type will be implemented as follows. Ideally, you should have +//! `EncodeAsVarULE` and `ZeroFrom` implementations on `Foo` pertaining to `FooULE`, +//! as well as a `Serialize` impl on `FooULE` and a `Deserialize` impl on `Box<FooULE>` +//! to enable human-readable serialization and deserialization. +//! +//! ```rust +//! use zerovec::{ZeroVec, VarZeroVec, ZeroSlice}; +//! use zerovec::ule::*; +//! use zerofrom::ZeroFrom; +//! use core::mem; +//! +//! # #[derive(serde::Serialize, serde::Deserialize)] +//! # struct Foo<'a> { +//! # field1: char, +//! # field2: u32, +//! # #[serde(borrow)] +//! # field3: ZeroVec<'a, u32> +//! # } +//! +//! // Must be repr(packed) for safety of VarULE! +//! // Must also only contain ULE types +//! #[repr(packed)] +//! struct FooULE { +//! field1: <char as AsULE>::ULE, +//! field2: <u32 as AsULE>::ULE, +//! field3: ZeroSlice<u32>, +//! } +//! +//! // Safety (based on the safety checklist on the VarULE trait): +//! // 1. FooULE does not include any uninitialized or padding bytes. (achieved by `#[repr(packed)]` on +//! // a struct with only ULE fields) +//! // 2. FooULE is aligned to 1 byte. (achieved by `#[repr(packed)]` on +//! // a struct with only ULE fields) +//! // 3. The impl of `validate_byte_slice()` returns an error if any byte is not valid. +//! // 4. The impl of `validate_byte_slice()` returns an error if the slice cannot be used in its entirety +//! // 5. The impl of `from_byte_slice_unchecked()` returns a reference to the same data. +//! // 6. The other VarULE methods use the default impl. +//! // 7. FooULE byte equality is semantic equality +//! unsafe impl VarULE for FooULE { +//! fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> { +//! // validate each field +//! <char as AsULE>::ULE::validate_byte_slice(&bytes[0..3]).map_err(|_| ZeroVecError::parse::<Self>())?; +//! <u32 as AsULE>::ULE::validate_byte_slice(&bytes[3..7]).map_err(|_| ZeroVecError::parse::<Self>())?; +//! let _ = ZeroVec::<u32>::parse_byte_slice(&bytes[7..]).map_err(|_| ZeroVecError::parse::<Self>())?; +//! Ok(()) +//! } +//! unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self { +//! let ptr = bytes.as_ptr(); +//! let len = bytes.len(); +//! // subtract the length of the char and u32 to get the length of the array +//! let len_new = (len - 7) / 4; +//! // it's hard constructing custom DSTs, we fake a pointer/length construction +//! // eventually we can use the Pointer::Metadata APIs when they stabilize +//! let fake_slice = core::ptr::slice_from_raw_parts(ptr as *const <u32 as AsULE>::ULE, len_new); +//! &*(fake_slice as *const Self) +//! } +//! } +//! +//! unsafe impl EncodeAsVarULE<FooULE> for Foo<'_> { +//! fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R { +//! // take each field, convert to ULE byte slices, and pass them through +//! cb(&[<char as AsULE>::ULE::as_byte_slice(&[self.field1.to_unaligned()]), +//! <u32 as AsULE>::ULE::as_byte_slice(&[self.field2.to_unaligned()]), +//! // the ZeroVec is already in the correct slice format +//! self.field3.as_bytes()]) +//! } +//! } +//! +//! impl<'a> ZeroFrom<'a, FooULE> for Foo<'a> { +//! fn zero_from(other: &'a FooULE) -> Self { +//! Self { +//! field1: AsULE::from_unaligned(other.field1), +//! field2: AsULE::from_unaligned(other.field2), +//! field3: ZeroFrom::zero_from(&other.field3), +//! } +//! } +//! } +//! +//! +//! impl serde::Serialize for FooULE +//! { +//! fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> +//! where +//! S: serde::Serializer, +//! { +//! Foo::zero_from(self).serialize(serializer) +//! } +//! } +//! +//! impl<'de> serde::Deserialize<'de> for Box<FooULE> +//! { +//! fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> +//! where +//! D: serde::Deserializer<'de>, +//! { +//! let mut foo = Foo::deserialize(deserializer)?; +//! Ok(encode_varule_to_box(&foo)) +//! } +//! } +//! +//! fn main() { +//! let mut foos = [Foo {field1: 'u', field2: 983, field3: ZeroVec::alloc_from_slice(&[1212,2309,500,7000])}, +//! Foo {field1: 'l', field2: 1010, field3: ZeroVec::alloc_from_slice(&[1932, 0, 8888, 91237])}]; +//! +//! let vzv = VarZeroVec::<_>::from(&foos); +//! +//! assert_eq!(char::from_unaligned(vzv.get(0).unwrap().field1), 'u'); +//! assert_eq!(u32::from_unaligned(vzv.get(0).unwrap().field2), 983); +//! assert_eq!(&vzv.get(0).unwrap().field3, &[1212,2309,500,7000][..]); +//! +//! assert_eq!(char::from_unaligned(vzv.get(1).unwrap().field1), 'l'); +//! assert_eq!(u32::from_unaligned(vzv.get(1).unwrap().field2), 1010); +//! assert_eq!(&vzv.get(1).unwrap().field3, &[1932, 0, 8888, 91237][..]); +//! } +//! ``` diff --git a/third_party/rust/zerovec/src/ule/encode.rs b/third_party/rust/zerovec/src/ule/encode.rs new file mode 100644 index 0000000000..adea123aa2 --- /dev/null +++ b/third_party/rust/zerovec/src/ule/encode.rs @@ -0,0 +1,400 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::ule::*; +use crate::varzerovec::VarZeroVecFormat; +use crate::{VarZeroSlice, VarZeroVec, ZeroSlice, ZeroVec}; +use alloc::borrow::{Cow, ToOwned}; +use alloc::boxed::Box; +use alloc::string::String; +use alloc::{vec, vec::Vec}; +use core::mem; + +/// Allows types to be encoded as VarULEs. This is highly useful for implementing VarULE on +/// custom DSTs where the type cannot be obtained as a reference to some other type. +/// +/// [`Self::encode_var_ule_as_slices()`] should be implemented by providing an encoded slice for each field +/// of the VarULE type to the callback, in order. For an implementation to be safe, the slices +/// to the callback must, when concatenated, be a valid instance of the VarULE type. +/// +/// See the [custom VarULEdocumentation](crate::ule::custom) for examples. +/// +/// [`Self::encode_var_ule_as_slices()`] is only used to provide default implementations for [`Self::encode_var_ule_write()`] +/// and [`Self::encode_var_ule_len()`]. If you override the default implementations it is totally valid to +/// replace [`Self::encode_var_ule_as_slices()`]'s body with `unreachable!()`. This can be done for cases where +/// it is not possible to implement [`Self::encode_var_ule_as_slices()`] but the other methods still work. +/// +/// A typical implementation will take each field in the order found in the [`VarULE`] type, +/// convert it to ULE, call [`ULE::as_byte_slice()`] on them, and pass the slices to `cb` in order. +/// A trailing [`ZeroVec`](crate::ZeroVec) or [`VarZeroVec`](crate::VarZeroVec) can have their underlying +/// byte representation passed through. +/// +/// In case the compiler is not optimizing [`Self::encode_var_ule_len()`], it can be overridden. A typical +/// implementation will add up the sizes of each field on the [`VarULE`] type and then add in the byte length of the +/// dynamically-sized part. +/// +/// # Safety +/// +/// The safety invariants of [`Self::encode_var_ule_as_slices()`] are: +/// - It must call `cb` (only once) +/// - The slices passed to `cb`, if concatenated, should be a valid instance of the `T` [`VarULE`] type +/// (i.e. if fed to [`VarULE::validate_byte_slice()`] they must produce a successful result) +/// - It must return the return value of `cb` to the caller +/// +/// One or more of [`Self::encode_var_ule_len()`] and [`Self::encode_var_ule_write()`] may be provided. +/// If both are, then `zerovec` code is guaranteed to not call [`Self::encode_var_ule_as_slices()`], and it may be replaced +/// with `unreachable!()`. +/// +/// The safety invariants of [`Self::encode_var_ule_len()`] are: +/// - It must return the length of the corresponding VarULE type +/// +/// The safety invariants of [`Self::encode_var_ule_write()`] are: +/// - The slice written to `dst` must be a valid instance of the `T` [`VarULE`] type +pub unsafe trait EncodeAsVarULE<T: VarULE + ?Sized> { + /// Calls `cb` with a piecewise list of byte slices that when concatenated + /// produce the memory pattern of the corresponding instance of `T`. + /// + /// Do not call this function directly; instead use the other two. Some implementors + /// may define this function to panic. + fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R; + + /// Return the length, in bytes, of the corresponding [`VarULE`] type + fn encode_var_ule_len(&self) -> usize { + self.encode_var_ule_as_slices(|slices| slices.iter().map(|s| s.len()).sum()) + } + + /// Write the corresponding [`VarULE`] type to the `dst` buffer. `dst` should + /// be the size of [`Self::encode_var_ule_len()`] + fn encode_var_ule_write(&self, mut dst: &mut [u8]) { + debug_assert_eq!(self.encode_var_ule_len(), dst.len()); + self.encode_var_ule_as_slices(move |slices| { + #[allow(clippy::indexing_slicing)] // by debug_assert + for slice in slices { + dst[..slice.len()].copy_from_slice(slice); + dst = &mut dst[slice.len()..]; + } + }); + } +} + +/// Given an [`EncodeAsVarULE`] type `S`, encode it into a `Box<T>` +/// +/// This is primarily useful for generating `Deserialize` impls for VarULE types +pub fn encode_varule_to_box<S: EncodeAsVarULE<T>, T: VarULE + ?Sized>(x: &S) -> Box<T> { + // zero-fill the vector to avoid uninitialized data UB + let mut vec: Vec<u8> = vec![0; x.encode_var_ule_len()]; + x.encode_var_ule_write(&mut vec); + let boxed = mem::ManuallyDrop::new(vec.into_boxed_slice()); + unsafe { + // Safety: `ptr` is a box, and `T` is a VarULE which guarantees it has the same memory layout as `[u8]` + // and can be recouped via from_byte_slice_unchecked() + let ptr: *mut T = T::from_byte_slice_unchecked(&boxed) as *const T as *mut T; + + // Safety: we can construct an owned version since we have mem::forgotten the older owner + Box::from_raw(ptr) + } +} + +unsafe impl<T: VarULE + ?Sized> EncodeAsVarULE<T> for T { + fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R { + cb(&[T::as_byte_slice(self)]) + } +} + +unsafe impl<T: VarULE + ?Sized> EncodeAsVarULE<T> for &'_ T { + fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R { + cb(&[T::as_byte_slice(self)]) + } +} + +unsafe impl<T: VarULE + ?Sized> EncodeAsVarULE<T> for Cow<'_, T> +where + T: ToOwned, +{ + fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R { + cb(&[T::as_byte_slice(self.as_ref())]) + } +} + +unsafe impl<T: VarULE + ?Sized> EncodeAsVarULE<T> for Box<T> { + fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R { + cb(&[T::as_byte_slice(self)]) + } +} + +unsafe impl EncodeAsVarULE<str> for String { + fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R { + cb(&[self.as_bytes()]) + } +} + +// Note: This impl could technically use `T: AsULE`, but we want users to prefer `ZeroSlice<T>` +// for cases where T is not a ULE. Therefore, we can use the more efficient `memcpy` impl here. +unsafe impl<T> EncodeAsVarULE<[T]> for Vec<T> +where + T: ULE, +{ + fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R { + cb(&[<[T] as VarULE>::as_byte_slice(self)]) + } +} + +unsafe impl<T> EncodeAsVarULE<ZeroSlice<T>> for &'_ [T] +where + T: AsULE + 'static, +{ + fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R { + // unnecessary if the other two are implemented + unreachable!() + } + + #[inline] + fn encode_var_ule_len(&self) -> usize { + self.len() * core::mem::size_of::<T::ULE>() + } + + fn encode_var_ule_write(&self, dst: &mut [u8]) { + #[allow(non_snake_case)] + let S = core::mem::size_of::<T::ULE>(); + debug_assert_eq!(self.len() * S, dst.len()); + for (item, ref mut chunk) in self.iter().zip(dst.chunks_mut(S)) { + let ule = item.to_unaligned(); + chunk.copy_from_slice(ULE::as_byte_slice(core::slice::from_ref(&ule))); + } + } +} + +unsafe impl<T> EncodeAsVarULE<ZeroSlice<T>> for Vec<T> +where + T: AsULE + 'static, +{ + fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R { + // unnecessary if the other two are implemented + unreachable!() + } + + #[inline] + fn encode_var_ule_len(&self) -> usize { + self.as_slice().encode_var_ule_len() + } + + #[inline] + fn encode_var_ule_write(&self, dst: &mut [u8]) { + self.as_slice().encode_var_ule_write(dst) + } +} + +unsafe impl<T> EncodeAsVarULE<ZeroSlice<T>> for ZeroVec<'_, T> +where + T: AsULE + 'static, +{ + fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R { + // unnecessary if the other two are implemented + unreachable!() + } + + #[inline] + fn encode_var_ule_len(&self) -> usize { + self.as_bytes().len() + } + + fn encode_var_ule_write(&self, dst: &mut [u8]) { + debug_assert_eq!(self.as_bytes().len(), dst.len()); + dst.copy_from_slice(self.as_bytes()); + } +} + +unsafe impl<T, E, F> EncodeAsVarULE<VarZeroSlice<T, F>> for &'_ [E] +where + T: VarULE + ?Sized, + E: EncodeAsVarULE<T>, + F: VarZeroVecFormat, +{ + fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R { + // unnecessary if the other two are implemented + unimplemented!() + } + + #[allow(clippy::unwrap_used)] // TODO(#1410): Rethink length errors in VZV. + fn encode_var_ule_len(&self) -> usize { + crate::varzerovec::components::compute_serializable_len::<T, E, F>(self).unwrap() as usize + } + + fn encode_var_ule_write(&self, dst: &mut [u8]) { + crate::varzerovec::components::write_serializable_bytes::<T, E, F>(self, dst) + } +} + +unsafe impl<T, E, F> EncodeAsVarULE<VarZeroSlice<T, F>> for Vec<E> +where + T: VarULE + ?Sized, + E: EncodeAsVarULE<T>, + F: VarZeroVecFormat, +{ + fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R { + // unnecessary if the other two are implemented + unreachable!() + } + + #[inline] + fn encode_var_ule_len(&self) -> usize { + <_ as EncodeAsVarULE<VarZeroSlice<T, F>>>::encode_var_ule_len(&self.as_slice()) + } + + #[inline] + fn encode_var_ule_write(&self, dst: &mut [u8]) { + <_ as EncodeAsVarULE<VarZeroSlice<T, F>>>::encode_var_ule_write(&self.as_slice(), dst) + } +} + +unsafe impl<T, F> EncodeAsVarULE<VarZeroSlice<T, F>> for VarZeroVec<'_, T, F> +where + T: VarULE + ?Sized, + F: VarZeroVecFormat, +{ + fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R { + // unnecessary if the other two are implemented + unreachable!() + } + + #[inline] + fn encode_var_ule_len(&self) -> usize { + self.as_bytes().len() + } + + #[inline] + fn encode_var_ule_write(&self, dst: &mut [u8]) { + debug_assert_eq!(self.as_bytes().len(), dst.len()); + dst.copy_from_slice(self.as_bytes()); + } +} + +#[cfg(test)] +mod test { + use super::*; + + const STRING_ARRAY: [&str; 2] = ["hello", "world"]; + + const STRING_SLICE: &[&str] = &STRING_ARRAY; + + const U8_ARRAY: [u8; 8] = [0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07]; + + const U8_2D_ARRAY: [&[u8]; 2] = [&U8_ARRAY, &U8_ARRAY]; + + const U8_2D_SLICE: &[&[u8]] = &[&U8_ARRAY, &U8_ARRAY]; + + const U8_3D_ARRAY: [&[&[u8]]; 2] = [U8_2D_SLICE, U8_2D_SLICE]; + + const U8_3D_SLICE: &[&[&[u8]]] = &[U8_2D_SLICE, U8_2D_SLICE]; + + const U32_ARRAY: [u32; 4] = [0x00010203, 0x04050607, 0x08090A0B, 0x0C0D0E0F]; + + const U32_2D_ARRAY: [&[u32]; 2] = [&U32_ARRAY, &U32_ARRAY]; + + const U32_2D_SLICE: &[&[u32]] = &[&U32_ARRAY, &U32_ARRAY]; + + const U32_3D_ARRAY: [&[&[u32]]; 2] = [U32_2D_SLICE, U32_2D_SLICE]; + + const U32_3D_SLICE: &[&[&[u32]]] = &[U32_2D_SLICE, U32_2D_SLICE]; + + #[test] + fn test_vzv_from() { + type VZV<'a, T> = VarZeroVec<'a, T>; + type ZS<T> = ZeroSlice<T>; + type VZS<T> = VarZeroSlice<T>; + + let u8_zerovec: ZeroVec<u8> = ZeroVec::from_slice_or_alloc(&U8_ARRAY); + let u8_2d_zerovec: [ZeroVec<u8>; 2] = [u8_zerovec.clone(), u8_zerovec.clone()]; + let u8_2d_vec: Vec<Vec<u8>> = vec![U8_ARRAY.into(), U8_ARRAY.into()]; + let u8_3d_vec: Vec<Vec<Vec<u8>>> = vec![u8_2d_vec.clone(), u8_2d_vec.clone()]; + + let u32_zerovec: ZeroVec<u32> = ZeroVec::from_slice_or_alloc(&U32_ARRAY); + let u32_2d_zerovec: [ZeroVec<u32>; 2] = [u32_zerovec.clone(), u32_zerovec.clone()]; + let u32_2d_vec: Vec<Vec<u32>> = vec![U32_ARRAY.into(), U32_ARRAY.into()]; + let u32_3d_vec: Vec<Vec<Vec<u32>>> = vec![u32_2d_vec.clone(), u32_2d_vec.clone()]; + + let a: VZV<str> = VarZeroVec::from(&STRING_ARRAY); + let b: VZV<str> = VarZeroVec::from(STRING_SLICE); + let c: VZV<str> = VarZeroVec::from(&Vec::from(STRING_SLICE)); + assert_eq!(a, STRING_SLICE); + assert_eq!(a, b); + assert_eq!(a, c); + + let a: VZV<[u8]> = VarZeroVec::from(&U8_2D_ARRAY); + let b: VZV<[u8]> = VarZeroVec::from(U8_2D_SLICE); + let c: VZV<[u8]> = VarZeroVec::from(&u8_2d_vec); + assert_eq!(a, U8_2D_SLICE); + assert_eq!(a, b); + assert_eq!(a, c); + let u8_3d_vzv_brackets = &[a.clone(), a.clone()]; + + let a: VZV<ZS<u8>> = VarZeroVec::from(&U8_2D_ARRAY); + let b: VZV<ZS<u8>> = VarZeroVec::from(U8_2D_SLICE); + let c: VZV<ZS<u8>> = VarZeroVec::from(&u8_2d_vec); + let d: VZV<ZS<u8>> = VarZeroVec::from(&u8_2d_zerovec); + assert_eq!(a, U8_2D_SLICE); + assert_eq!(a, b); + assert_eq!(a, c); + assert_eq!(a, d); + let u8_3d_vzv_zeroslice = &[a.clone(), a.clone()]; + + let a: VZV<VZS<[u8]>> = VarZeroVec::from(&U8_3D_ARRAY); + let b: VZV<VZS<[u8]>> = VarZeroVec::from(U8_3D_SLICE); + let c: VZV<VZS<[u8]>> = VarZeroVec::from(&u8_3d_vec); + let d: VZV<VZS<[u8]>> = VarZeroVec::from(u8_3d_vzv_brackets); + assert_eq!( + a.iter() + .map(|x| x.iter().map(|y| y.to_vec()).collect::<Vec<Vec<u8>>>()) + .collect::<Vec<Vec<Vec<u8>>>>(), + u8_3d_vec + ); + assert_eq!(a, b); + assert_eq!(a, c); + assert_eq!(a, d); + + let a: VZV<VZS<ZS<u8>>> = VarZeroVec::from(&U8_3D_ARRAY); + let b: VZV<VZS<ZS<u8>>> = VarZeroVec::from(U8_3D_SLICE); + let c: VZV<VZS<ZS<u8>>> = VarZeroVec::from(&u8_3d_vec); + let d: VZV<VZS<ZS<u8>>> = VarZeroVec::from(u8_3d_vzv_zeroslice); + assert_eq!( + a.iter() + .map(|x| x + .iter() + .map(|y| y.iter().collect::<Vec<u8>>()) + .collect::<Vec<Vec<u8>>>()) + .collect::<Vec<Vec<Vec<u8>>>>(), + u8_3d_vec + ); + assert_eq!(a, b); + assert_eq!(a, c); + assert_eq!(a, d); + + let a: VZV<ZS<u32>> = VarZeroVec::from(&U32_2D_ARRAY); + let b: VZV<ZS<u32>> = VarZeroVec::from(U32_2D_SLICE); + let c: VZV<ZS<u32>> = VarZeroVec::from(&u32_2d_vec); + let d: VZV<ZS<u32>> = VarZeroVec::from(&u32_2d_zerovec); + assert_eq!(a, u32_2d_zerovec); + assert_eq!(a, b); + assert_eq!(a, c); + assert_eq!(a, d); + let u32_3d_vzv = &[a.clone(), a.clone()]; + + let a: VZV<VZS<ZS<u32>>> = VarZeroVec::from(&U32_3D_ARRAY); + let b: VZV<VZS<ZS<u32>>> = VarZeroVec::from(U32_3D_SLICE); + let c: VZV<VZS<ZS<u32>>> = VarZeroVec::from(&u32_3d_vec); + let d: VZV<VZS<ZS<u32>>> = VarZeroVec::from(u32_3d_vzv); + assert_eq!( + a.iter() + .map(|x| x + .iter() + .map(|y| y.iter().collect::<Vec<u32>>()) + .collect::<Vec<Vec<u32>>>()) + .collect::<Vec<Vec<Vec<u32>>>>(), + u32_3d_vec + ); + assert_eq!(a, b); + assert_eq!(a, c); + assert_eq!(a, d); + } +} diff --git a/third_party/rust/zerovec/src/ule/macros.rs b/third_party/rust/zerovec/src/ule/macros.rs new file mode 100644 index 0000000000..955b1eb2e4 --- /dev/null +++ b/third_party/rust/zerovec/src/ule/macros.rs @@ -0,0 +1,29 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +/// Given `Self` (`$aligned`), `Self::ULE` (`$unaligned`), and a conversion function (`$single` or +/// `Self::from_aligned`), implement `from_array` for arrays of `$aligned` to `$unaligned`. +/// +/// The `$default` argument is due to current compiler limitations. +/// Pass any (cheap to construct) value. +#[macro_export] +macro_rules! impl_ule_from_array { + ($aligned:ty, $unaligned:ty, $default:expr, $single:path) => { + #[doc = concat!("Convert an array of `", stringify!($aligned), "` to an array of `", stringify!($unaligned), "`.")] + pub const fn from_array<const N: usize>(arr: [$aligned; N]) -> [Self; N] { + let mut result = [$default; N]; + let mut i = 0; + // Won't panic because i < N and arr has length N + #[allow(clippy::indexing_slicing)] + while i < N { + result[i] = $single(arr[i]); + i += 1; + } + result + } + }; + ($aligned:ty, $unaligned:ty, $default:expr) => { + impl_ule_from_array!($aligned, $unaligned, $default, Self::from_aligned); + }; +} diff --git a/third_party/rust/zerovec/src/ule/mod.rs b/third_party/rust/zerovec/src/ule/mod.rs new file mode 100644 index 0000000000..5a6d9cd471 --- /dev/null +++ b/third_party/rust/zerovec/src/ule/mod.rs @@ -0,0 +1,394 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +#![allow(clippy::upper_case_acronyms)] + +//! Traits over unaligned little-endian data (ULE, pronounced "yule"). +//! +//! The main traits for this module are [`ULE`], [`AsULE`] and, [`VarULE`]. +//! +//! See [the design doc](https://github.com/unicode-org/icu4x/blob/main/utils/zerovec/design_doc.md) for details on how these traits +//! works under the hood. +mod chars; +#[cfg(doc)] +pub mod custom; +mod encode; +mod macros; +mod multi; +mod niche; +mod option; +mod plain; +mod slices; +mod unvalidated; + +pub mod tuple; +pub use super::ZeroVecError; +pub use chars::CharULE; +pub use encode::{encode_varule_to_box, EncodeAsVarULE}; +pub use multi::MultiFieldsULE; +pub use niche::{NicheBytes, NichedOption, NichedOptionULE}; +pub use option::{OptionULE, OptionVarULE}; +pub use plain::RawBytesULE; +pub use unvalidated::{UnvalidatedChar, UnvalidatedStr}; + +use alloc::alloc::Layout; +use alloc::borrow::ToOwned; +use alloc::boxed::Box; +use core::{mem, slice}; + +/// Fixed-width, byte-aligned data that can be cast to and from a little-endian byte slice. +/// +/// If you need to implement this trait, consider using [`#[make_ule]`](crate::make_ule) or +/// [`#[derive(ULE)]`](macro@ULE) instead. +/// +/// Types that are not fixed-width can implement [`VarULE`] instead. +/// +/// "ULE" stands for "Unaligned little-endian" +/// +/// # Safety +/// +/// Safety checklist for `ULE`: +/// +/// 1. The type *must not* include any uninitialized or padding bytes. +/// 2. The type must have an alignment of 1 byte. +/// 3. The impl of [`ULE::validate_byte_slice()`] *must* return an error if the given byte slice +/// would not represent a valid slice of this type. +/// 4. The impl of [`ULE::validate_byte_slice()`] *must* return an error if the given byte slice +/// cannot be used in its entirety (if its length is not a multiple of `size_of::<Self>()`). +/// 5. All other methods *must* be left with their default impl, or else implemented according to +/// their respective safety guidelines. +/// 6. Acknowledge the following note about the equality invariant. +/// +/// If the ULE type is a struct only containing other ULE types (or other types which satisfy invariants 1 and 2, +/// like `[u8; N]`), invariants 1 and 2 can be achieved via `#[repr(packed)]` or `#[repr(transparent)]`. +/// +/// # Equality invariant +/// +/// A non-safety invariant is that if `Self` implements `PartialEq`, the it *must* be logically +/// equivalent to byte equality on [`Self::as_byte_slice()`]. +/// +/// It may be necessary to introduce a "canonical form" of the ULE if logical equality does not +/// equal byte equality. In such a case, [`Self::validate_byte_slice()`] should return an error +/// for any values that are not in canonical form. For example, the decimal strings "1.23e4" and +/// "12.3e3" are logically equal, but not byte-for-byte equal, so we could define a canonical form +/// where only a single digit is allowed before `.`. +/// +/// Failure to follow this invariant will cause surprising behavior in `PartialEq`, which may +/// result in unpredictable operations on `ZeroVec`, `VarZeroVec`, and `ZeroMap`. +pub unsafe trait ULE +where + Self: Sized, + Self: Copy + 'static, +{ + /// Validates a byte slice, `&[u8]`. + /// + /// If `Self` is not well-defined for all possible bit values, the bytes should be validated. + /// If the bytes can be transmuted, *in their entirety*, to a valid slice of `Self`, then `Ok` + /// should be returned; otherwise, `Self::Error` should be returned. + fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError>; + + /// Parses a byte slice, `&[u8]`, and return it as `&[Self]` with the same lifetime. + /// + /// If `Self` is not well-defined for all possible bit values, the bytes should be validated, + /// and an error should be returned in the same cases as [`Self::validate_byte_slice()`]. + /// + /// The default implementation executes [`Self::validate_byte_slice()`] followed by + /// [`Self::from_byte_slice_unchecked`]. + /// + /// Note: The following equality should hold: `bytes.len() % size_of::<Self>() == 0`. This + /// means that the returned slice can span the entire byte slice. + fn parse_byte_slice(bytes: &[u8]) -> Result<&[Self], ZeroVecError> { + Self::validate_byte_slice(bytes)?; + debug_assert_eq!(bytes.len() % mem::size_of::<Self>(), 0); + Ok(unsafe { Self::from_byte_slice_unchecked(bytes) }) + } + + /// Takes a byte slice, `&[u8]`, and return it as `&[Self]` with the same lifetime, assuming + /// that this byte slice has previously been run through [`Self::parse_byte_slice()`] with + /// success. + /// + /// The default implementation performs a pointer cast to the same region of memory. + /// + /// # Safety + /// + /// ## Callers + /// + /// Callers of this method must take care to ensure that `bytes` was previously passed through + /// [`Self::validate_byte_slice()`] with success (and was not changed since then). + /// + /// ## Implementors + /// + /// Implementations of this method may call unsafe functions to cast the pointer to the correct + /// type, assuming the "Callers" invariant above. + /// + /// Keep in mind that `&[Self]` and `&[u8]` may have different lengths. + /// + /// Safety checklist: + /// + /// 1. This method *must* return the same result as [`Self::parse_byte_slice()`]. + /// 2. This method *must* return a slice to the same region of memory as the argument. + #[inline] + unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &[Self] { + let data = bytes.as_ptr(); + let len = bytes.len() / mem::size_of::<Self>(); + debug_assert_eq!(bytes.len() % mem::size_of::<Self>(), 0); + core::slice::from_raw_parts(data as *const Self, len) + } + + /// Given `&[Self]`, returns a `&[u8]` with the same lifetime. + /// + /// The default implementation performs a pointer cast to the same region of memory. + /// + /// # Safety + /// + /// Implementations of this method should call potentially unsafe functions to cast the + /// pointer to the correct type. + /// + /// Keep in mind that `&[Self]` and `&[u8]` may have different lengths. + #[inline] + #[allow(clippy::wrong_self_convention)] // https://github.com/rust-lang/rust-clippy/issues/7219 + fn as_byte_slice(slice: &[Self]) -> &[u8] { + unsafe { + slice::from_raw_parts(slice as *const [Self] as *const u8, mem::size_of_val(slice)) + } + } +} + +/// A trait for any type that has a 1:1 mapping with an unaligned little-endian (ULE) type. +/// +/// If you need to implement this trait, consider using [`#[make_ule]`](crate::make_ule) instead. +pub trait AsULE: Copy { + /// The ULE type corresponding to `Self`. + /// + /// Types having infallible conversions from all bit values (Plain Old Data) can use + /// `RawBytesULE` with the desired width; for example, `u32` uses `RawBytesULE<4>`. + /// + /// Types that are not well-defined for all bit values should implement a custom ULE. + type ULE: ULE; + + /// Converts from `Self` to `Self::ULE`. + /// + /// This function may involve byte order swapping (native-endian to little-endian). + /// + /// For best performance, mark your implementation of this function `#[inline]`. + fn to_unaligned(self) -> Self::ULE; + + /// Converts from `Self::ULE` to `Self`. + /// + /// This function may involve byte order swapping (little-endian to native-endian). + /// + /// For best performance, mark your implementation of this function `#[inline]`. + /// + /// # Safety + /// + /// This function is infallible because bit validation should have occurred when `Self::ULE` + /// was first constructed. An implementation may therefore involve an `unsafe{}` block, like + /// `from_bytes_unchecked()`. + fn from_unaligned(unaligned: Self::ULE) -> Self; +} + +/// An [`EqULE`] type is one whose byte sequence equals the byte sequence of its ULE type on +/// little-endian platforms. This enables certain performance optimizations, such as +/// [`ZeroVec::try_from_slice`](crate::ZeroVec::try_from_slice). +/// +/// # Implementation safety +/// +/// This trait is safe to implement if the type's ULE (as defined by `impl `[`AsULE`]` for T`) +/// has an equal byte sequence as the type itself on little-endian platforms; i.e., one where +/// `*const T` can be cast to a valid `*const T::ULE`. +pub unsafe trait EqULE: AsULE {} + +/// A trait for a type where aligned slices can be cast to unaligned slices. +/// +/// Auto-implemented on all types implementing [`EqULE`]. +pub trait SliceAsULE +where + Self: AsULE + Sized, +{ + /// Converts from `&[Self]` to `&[Self::ULE]` if possible. + /// + /// In general, this function returns `Some` on little-endian and `None` on big-endian. + fn slice_to_unaligned(slice: &[Self]) -> Option<&[Self::ULE]>; +} + +#[cfg(target_endian = "little")] +impl<T> SliceAsULE for T +where + T: EqULE, +{ + #[inline] + fn slice_to_unaligned(slice: &[Self]) -> Option<&[Self::ULE]> { + // This is safe because on little-endian platforms, the byte sequence of &[T] + // is equivalent to the byte sequence of &[T::ULE] by the contract of EqULE, + // and &[T::ULE] has equal or looser alignment than &[T]. + let ule_slice = + unsafe { core::slice::from_raw_parts(slice.as_ptr() as *const Self::ULE, slice.len()) }; + Some(ule_slice) + } +} + +#[cfg(not(target_endian = "little"))] +impl<T> SliceAsULE for T +where + T: EqULE, +{ + #[inline] + fn slice_to_unaligned(_: &[Self]) -> Option<&[Self::ULE]> { + None + } +} + +/// Variable-width, byte-aligned data that can be cast to and from a little-endian byte slice. +/// +/// If you need to implement this trait, consider using [`#[make_varule]`](crate::make_varule) or +/// [`#[derive(VarULE)]`](macro@VarULE) instead. +/// +/// This trait is mostly for unsized types like `str` and `[T]`. It can be implemented on sized types; +/// however, it is much more preferable to use [`ULE`] for that purpose. The [`custom`] module contains +/// additional documentation on how this type can be implemented on custom types. +/// +/// If deserialization with `VarZeroVec` is desired is recommended to implement `Deserialize` for +/// `Box<T>` (serde does not do this automatically for unsized `T`). +/// +/// For convenience it is typically desired to implement [`EncodeAsVarULE`] and [`ZeroFrom`](zerofrom::ZeroFrom) +/// on some stack type to convert to and from the ULE type efficiently when necessary. +/// +/// # Safety +/// +/// Safety checklist for `VarULE`: +/// +/// 1. The type *must not* include any uninitialized or padding bytes. +/// 2. The type must have an alignment of 1 byte. +/// 3. The impl of [`VarULE::validate_byte_slice()`] *must* return an error if the given byte slice +/// would not represent a valid slice of this type. +/// 4. The impl of [`VarULE::validate_byte_slice()`] *must* return an error if the given byte slice +/// cannot be used in its entirety. +/// 5. The impl of [`VarULE::from_byte_slice_unchecked()`] must produce a reference to the same +/// underlying data assuming that the given bytes previously passed validation. +/// 6. All other methods *must* be left with their default impl, or else implemented according to +/// their respective safety guidelines. +/// 7. Acknowledge the following note about the equality invariant. +/// +/// If the ULE type is a struct only containing other ULE/VarULE types (or other types which satisfy invariants 1 and 2, +/// like `[u8; N]`), invariants 1 and 2 can be achieved via `#[repr(packed)]` or `#[repr(transparent)]`. +/// +/// # Equality invariant +/// +/// A non-safety invariant is that if `Self` implements `PartialEq`, the it *must* be logically +/// equivalent to byte equality on [`Self::as_byte_slice()`]. +/// +/// It may be necessary to introduce a "canonical form" of the ULE if logical equality does not +/// equal byte equality. In such a case, [`Self::validate_byte_slice()`] should return an error +/// for any values that are not in canonical form. For example, the decimal strings "1.23e4" and +/// "12.3e3" are logically equal, but not byte-for-byte equal, so we could define a canonical form +/// where only a single digit is allowed before `.`. +/// +/// There may also be cases where a `VarULE` has muiltiple canonical forms, such as a faster +/// version and a smaller version. The cleanest way to handle this case would be separate types. +/// However, if this is not feasible, then the application should ensure that the data it is +/// deserializing is in the expected form. For example, if the data is being loaded from an +/// external source, then requests could carry information about the expected form of the data. +/// +/// Failure to follow this invariant will cause surprising behavior in `PartialEq`, which may +/// result in unpredictable operations on `ZeroVec`, `VarZeroVec`, and `ZeroMap`. +pub unsafe trait VarULE: 'static { + /// Validates a byte slice, `&[u8]`. + /// + /// If `Self` is not well-defined for all possible bit values, the bytes should be validated. + /// If the bytes can be transmuted, *in their entirety*, to a valid `&Self`, then `Ok` should + /// be returned; otherwise, `Self::Error` should be returned. + fn validate_byte_slice(_bytes: &[u8]) -> Result<(), ZeroVecError>; + + /// Parses a byte slice, `&[u8]`, and return it as `&Self` with the same lifetime. + /// + /// If `Self` is not well-defined for all possible bit values, the bytes should be validated, + /// and an error should be returned in the same cases as [`Self::validate_byte_slice()`]. + /// + /// The default implementation executes [`Self::validate_byte_slice()`] followed by + /// [`Self::from_byte_slice_unchecked`]. + /// + /// Note: The following equality should hold: `size_of_val(result) == size_of_val(bytes)`, + /// where `result` is the successful return value of the method. This means that the return + /// value spans the entire byte slice. + fn parse_byte_slice(bytes: &[u8]) -> Result<&Self, ZeroVecError> { + Self::validate_byte_slice(bytes)?; + let result = unsafe { Self::from_byte_slice_unchecked(bytes) }; + debug_assert_eq!(mem::size_of_val(result), mem::size_of_val(bytes)); + Ok(result) + } + + /// Takes a byte slice, `&[u8]`, and return it as `&Self` with the same lifetime, assuming + /// that this byte slice has previously been run through [`Self::parse_byte_slice()`] with + /// success. + /// + /// # Safety + /// + /// ## Callers + /// + /// Callers of this method must take care to ensure that `bytes` was previously passed through + /// [`Self::validate_byte_slice()`] with success (and was not changed since then). + /// + /// ## Implementors + /// + /// Implementations of this method may call unsafe functions to cast the pointer to the correct + /// type, assuming the "Callers" invariant above. + /// + /// Safety checklist: + /// + /// 1. This method *must* return the same result as [`Self::parse_byte_slice()`]. + /// 2. This method *must* return a slice to the same region of memory as the argument. + unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self; + + /// Given `&Self`, returns a `&[u8]` with the same lifetime. + /// + /// The default implementation performs a pointer cast to the same region of memory. + /// + /// # Safety + /// + /// Implementations of this method should call potentially unsafe functions to cast the + /// pointer to the correct type. + #[inline] + fn as_byte_slice(&self) -> &[u8] { + unsafe { slice::from_raw_parts(self as *const Self as *const u8, mem::size_of_val(self)) } + } + + /// Allocate on the heap as a `Box<T>` + #[inline] + fn to_boxed(&self) -> Box<Self> { + let bytesvec = self.as_byte_slice().to_owned().into_boxed_slice(); + let bytesvec = mem::ManuallyDrop::new(bytesvec); + unsafe { + // Get the pointer representation + let ptr: *mut Self = + Self::from_byte_slice_unchecked(&bytesvec) as *const Self as *mut Self; + assert_eq!(Layout::for_value(&*ptr), Layout::for_value(&**bytesvec)); + // Transmute the pointer to an owned pointer + Box::from_raw(ptr) + } + } +} + +// Proc macro reexports +// +// These exist so that our docs can use intra-doc links. +// Due to quirks of how rustdoc does documentation on reexports, these must be in this module and not reexported from +// a submodule + +/// Custom derive for [`ULE`]. +/// +/// This can be attached to [`Copy`] structs containing only [`ULE`] types. +/// +/// Most of the time, it is recommended one use [`#[make_ule]`](crate::make_ule) instead of defining +/// a custom ULE type. +#[cfg(feature = "derive")] +pub use zerovec_derive::ULE; + +/// Custom derive for [`VarULE`] +/// +/// This can be attached to structs containing only [`ULE`] types with one [`VarULE`] type at the end. +/// +/// Most of the time, it is recommended one use [`#[make_varule]`](crate::make_varule) instead of defining +/// a custom [`VarULE`] type. +#[cfg(feature = "derive")] +pub use zerovec_derive::VarULE; diff --git a/third_party/rust/zerovec/src/ule/multi.rs b/third_party/rust/zerovec/src/ule/multi.rs new file mode 100644 index 0000000000..3281b20888 --- /dev/null +++ b/third_party/rust/zerovec/src/ule/multi.rs @@ -0,0 +1,154 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use super::*; +use crate::varzerovec::Index32; +use crate::VarZeroSlice; +use core::mem; + +/// This type is used by the custom derive to represent multiple [`VarULE`] +/// fields packed into a single end-of-struct field. It is not recommended +/// to use this type directly. +/// +/// Logically, consider it to be `(V1, V2, V3, ..)` +/// where `V1` etc are potentially different [`VarULE`] types. +/// +/// Internally, it is represented by a VarZeroSlice. +#[derive(PartialEq, Eq, Debug)] +#[repr(transparent)] +pub struct MultiFieldsULE(VarZeroSlice<[u8], Index32>); + +impl MultiFieldsULE { + /// Compute the amount of bytes needed to support elements with lengths `lengths` + #[inline] + pub fn compute_encoded_len_for(lengths: &[usize]) -> usize { + #[allow(clippy::expect_used)] // See #1410 + unsafe { + // safe since BlankSliceEncoder is transparent over usize + let lengths = &*(lengths as *const [usize] as *const [BlankSliceEncoder]); + crate::varzerovec::components::compute_serializable_len::<_, _, Index32>(lengths) + .expect("Too many bytes to encode") as usize + } + } + + /// Construct a partially initialized MultiFieldsULE backed by a mutable byte buffer + pub fn new_from_lengths_partially_initialized<'a>( + lengths: &[usize], + output: &'a mut [u8], + ) -> &'a mut Self { + unsafe { + // safe since BlankSliceEncoder is transparent over usize + let lengths = &*(lengths as *const [usize] as *const [BlankSliceEncoder]); + crate::varzerovec::components::write_serializable_bytes::<_, _, Index32>( + lengths, output, + ); + debug_assert!( + <VarZeroSlice<[u8], Index32>>::validate_byte_slice(output).is_ok(), + "Encoded slice must be valid VarZeroSlice" + ); + // Safe since write_serializable_bytes produces a valid VarZeroSlice buffer + let slice = <VarZeroSlice<[u8], Index32>>::from_byte_slice_unchecked_mut(output); + // safe since `Self` is transparent over VarZeroSlice + mem::transmute::<&mut VarZeroSlice<_, Index32>, &mut Self>(slice) + } + } + + /// Given a buffer of size obtained by [`Self::compute_encoded_len_for()`], write element A to index idx + /// + /// # Safety + /// - `idx` must be in range + /// - `T` must be the appropriate type expected by the custom derive in this usage of this type + #[inline] + pub unsafe fn set_field_at<T: VarULE + ?Sized, A: EncodeAsVarULE<T> + ?Sized>( + &mut self, + idx: usize, + value: &A, + ) { + value.encode_var_ule_write(self.0.get_bytes_at_mut(idx)) + } + + /// Validate field at `index` to see if it is a valid `T` VarULE type + /// + /// # Safety + /// + /// - `index` must be in range + #[inline] + pub unsafe fn validate_field<T: VarULE + ?Sized>( + &self, + index: usize, + ) -> Result<(), ZeroVecError> { + T::validate_byte_slice(self.0.get_unchecked(index)) + } + + /// Get field at `index` as a value of type T + /// + /// # Safety + /// + /// - `index` must be in range + /// - Element at `index` must have been created with the VarULE type T + #[inline] + pub unsafe fn get_field<T: VarULE + ?Sized>(&self, index: usize) -> &T { + T::from_byte_slice_unchecked(self.0.get_unchecked(index)) + } + + /// Construct from a byte slice + /// + /// # Safety + /// - byte slice must be a valid VarZeroSlice<[u8]> + #[inline] + pub unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self { + // &Self is transparent over &VZS<..> + mem::transmute(<VarZeroSlice<[u8]>>::from_byte_slice_unchecked(bytes)) + } +} + +/// This lets us conveniently use the EncodeAsVarULE functionality to create +/// `VarZeroVec<[u8]>`s that have the right amount of space for elements +/// without having to duplicate any unsafe code +#[repr(transparent)] +struct BlankSliceEncoder(usize); + +unsafe impl EncodeAsVarULE<[u8]> for BlankSliceEncoder { + fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R { + // unnecessary if the other two are implemented + unreachable!() + } + + #[inline] + fn encode_var_ule_len(&self) -> usize { + self.0 + } + + #[inline] + fn encode_var_ule_write(&self, _dst: &mut [u8]) { + // do nothing + } +} + +// Safety (based on the safety checklist on the VarULE trait): +// 1. MultiFieldsULE does not include any uninitialized or padding bytes (achieved by being transparent over a VarULE type) +// 2. MultiFieldsULE is aligned to 1 byte (achieved by being transparent over a VarULE type) +// 3. The impl of `validate_byte_slice()` returns an error if any byte is not valid. +// 4. The impl of `validate_byte_slice()` returns an error if the slice cannot be used in its entirety +// 5. The impl of `from_byte_slice_unchecked()` returns a reference to the same data. +// 6. All other methods are defaulted +// 7. `MultiFieldsULE` byte equality is semantic equality (achieved by being transparent over a VarULE type) +unsafe impl VarULE for MultiFieldsULE { + /// Note: MultiFieldsULE is usually used in cases where one should be calling .validate_field() directly for + /// each field, rather than using the regular VarULE impl. + /// + /// This impl exists so that EncodeAsVarULE can work. + #[inline] + fn validate_byte_slice(slice: &[u8]) -> Result<(), ZeroVecError> { + <VarZeroSlice<[u8], Index32>>::validate_byte_slice(slice) + } + + #[inline] + unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self { + // &Self is transparent over &VZS<..> + mem::transmute(<VarZeroSlice<[u8], Index32>>::from_byte_slice_unchecked( + bytes, + )) + } +} diff --git a/third_party/rust/zerovec/src/ule/niche.rs b/third_party/rust/zerovec/src/ule/niche.rs new file mode 100644 index 0000000000..ae61faca0b --- /dev/null +++ b/third_party/rust/zerovec/src/ule/niche.rs @@ -0,0 +1,180 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use core::{marker::Copy, mem::size_of}; + +use super::{AsULE, ULE}; + +/// The [`ULE`] types implementing this trait guarantee that [`NicheBytes::NICHE_BIT_PATTERN`] +/// can never occur as a valid byte representation of the type. +/// +/// Guarantees for a valid implementation. +/// 1. N must be equal to `core::mem::sizeo_of::<Self>()` or else it will +/// cause panics. +/// 2. The bit pattern [`NicheBytes::NICHE_BIT_PATTERN`] must not be incorrect as it would lead to +/// weird behaviour. +/// 3. The abstractions built on top of this trait must panic on an invalid N. +/// 4. The abstractions built on this trait that use type punning must ensure that type being +/// punned is [`ULE`]. +pub trait NicheBytes<const N: usize> { + const NICHE_BIT_PATTERN: [u8; N]; +} + +/// [`ULE`] type for [`NichedOption<U,N>`] where U implements [`NicheBytes`]. +/// The invalid bit pattern is used as the niche. +/// +/// This uses 1 byte less than [`crate::ule::OptionULE<U>`] to represent [`NichedOption<U,N>`]. +/// +/// # Example +/// +/// ``` +/// use core::num::NonZeroI8; +/// use zerovec::ule::NichedOption; +/// use zerovec::ZeroVec; +/// +/// let bytes = &[0x00, 0x01, 0x02, 0x00]; +/// let zv_no: ZeroVec<NichedOption<NonZeroI8, 1>> = +/// ZeroVec::parse_byte_slice(bytes) +/// .expect("Unable to parse as NichedOption."); +/// +/// assert_eq!(zv_no.get(0).map(|e| e.0), Some(None)); +/// assert_eq!(zv_no.get(1).map(|e| e.0), Some(NonZeroI8::new(1))); +/// assert_eq!(zv_no.get(2).map(|e| e.0), Some(NonZeroI8::new(2))); +/// assert_eq!(zv_no.get(3).map(|e| e.0), Some(None)); +/// ``` +// Invariants: +// The union stores [`NicheBytes::NICHE_BIT_PATTERN`] when None. +// Any other bit pattern is a valid. +#[repr(C)] +pub union NichedOptionULE<U: NicheBytes<N> + ULE, const N: usize> { + /// Invariant: The value is `niche` only if the bytes equal NICHE_BIT_PATTERN. + niche: [u8; N], + /// Invariant: The value is `valid` if the `niche` field does not match NICHE_BIT_PATTERN. + valid: U, +} + +impl<U: NicheBytes<N> + ULE + core::fmt::Debug, const N: usize> core::fmt::Debug + for NichedOptionULE<U, N> +{ + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + self.get().fmt(f) + } +} + +impl<U: NicheBytes<N> + ULE, const N: usize> NichedOptionULE<U, N> { + /// New `NichedOptionULE<U, N>` from `Option<U>` + pub fn new(opt: Option<U>) -> Self { + assert!(N == core::mem::size_of::<U>()); + match opt { + Some(u) => Self { valid: u }, + None => Self { + niche: <U as NicheBytes<N>>::NICHE_BIT_PATTERN, + }, + } + } + + /// Convert to an `Option<U>` + pub fn get(self) -> Option<U> { + // Safety: The union stores NICHE_BIT_PATTERN when None otherwise a valid U + unsafe { + if self.niche == <U as NicheBytes<N>>::NICHE_BIT_PATTERN { + None + } else { + Some(self.valid) + } + } + } +} + +impl<U: NicheBytes<N> + ULE, const N: usize> Copy for NichedOptionULE<U, N> {} + +impl<U: NicheBytes<N> + ULE, const N: usize> Clone for NichedOptionULE<U, N> { + fn clone(&self) -> Self { + *self + } +} + +impl<U: NicheBytes<N> + ULE + PartialEq, const N: usize> PartialEq for NichedOptionULE<U, N> { + fn eq(&self, other: &Self) -> bool { + self.get().eq(&other.get()) + } +} + +impl<U: NicheBytes<N> + ULE + Eq, const N: usize> Eq for NichedOptionULE<U, N> {} + +/// Safety for ULE trait +/// 1. NichedOptionULE does not have any padding bytes due to `#[repr(C)]` on a struct +/// containing only ULE fields. +/// NichedOptionULE either contains NICHE_BIT_PATTERN or valid U byte sequences. +/// In both cases the data is initialized. +/// 2. NichedOptionULE is aligned to 1 byte due to `#[repr(packed)]` on a struct containing only +/// ULE fields. +/// 3. validate_byte_slice impl returns an error if invalid bytes are encountered. +/// 4. validate_byte_slice impl returns an error there are extra bytes. +/// 5. The other ULE methods are left to their default impl. +/// 6. NichedOptionULE equality is based on ULE equality of the subfield, assuming that NicheBytes +/// has been implemented correctly (this is a correctness but not a safety guarantee). +unsafe impl<U: NicheBytes<N> + ULE, const N: usize> ULE for NichedOptionULE<U, N> { + fn validate_byte_slice(bytes: &[u8]) -> Result<(), crate::ZeroVecError> { + let size = size_of::<Self>(); + // The implemention is only correct if NICHE_BIT_PATTERN has same number of bytes as the + // type. + debug_assert!(N == core::mem::size_of::<U>()); + + // The bytes should fully transmute to a collection of Self + if bytes.len() % size != 0 { + return Err(crate::ZeroVecError::length::<Self>(bytes.len())); + } + bytes.chunks(size).try_for_each(|chunk| { + // Associated const cannot be referenced in a pattern + // https://doc.rust-lang.org/error-index.html#E0158 + if chunk == <U as NicheBytes<N>>::NICHE_BIT_PATTERN { + Ok(()) + } else { + U::validate_byte_slice(chunk) + } + }) + } +} + +/// Optional type which uses [`NichedOptionULE<U,N>`] as ULE type. +/// The implementors guarantee that `N == core::mem::sizeo_of::<Self>()` +/// [`repr(transparent)`] guarantees that the layout is same as [`Option<U>`] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +#[repr(transparent)] +#[non_exhaustive] +pub struct NichedOption<U, const N: usize>(pub Option<U>); + +impl<U, const N: usize> NichedOption<U, N> { + pub const fn new(o: Option<U>) -> Self { + Self(o) + } +} + +impl<U, const N: usize> Default for NichedOption<U, N> { + fn default() -> Self { + Self(None) + } +} + +impl<U, const N: usize> From<Option<U>> for NichedOption<U, N> { + fn from(o: Option<U>) -> Self { + Self(o) + } +} + +impl<U: AsULE, const N: usize> AsULE for NichedOption<U, N> +where + U::ULE: NicheBytes<N>, +{ + type ULE = NichedOptionULE<U::ULE, N>; + + fn to_unaligned(self) -> Self::ULE { + NichedOptionULE::new(self.0.map(U::to_unaligned)) + } + + fn from_unaligned(unaligned: Self::ULE) -> Self { + Self(unaligned.get().map(U::from_unaligned)) + } +} diff --git a/third_party/rust/zerovec/src/ule/option.rs b/third_party/rust/zerovec/src/ule/option.rs new file mode 100644 index 0000000000..9b0dc5b28a --- /dev/null +++ b/third_party/rust/zerovec/src/ule/option.rs @@ -0,0 +1,264 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use super::*; +use core::cmp::Ordering; +use core::marker::PhantomData; +use core::mem::{self, MaybeUninit}; + +/// This type is the [`ULE`] type for `Option<U>` where `U` is a [`ULE`] type +/// +/// # Example +/// +/// ```rust +/// use zerovec::ZeroVec; +/// +/// let z = ZeroVec::alloc_from_slice(&[ +/// Some('a'), +/// Some('รก'), +/// Some('รธ'), +/// None, +/// Some('ล'), +/// ]); +/// +/// assert_eq!(z.get(2), Some(Some('รธ'))); +/// assert_eq!(z.get(3), Some(None)); +/// ``` +// Invariants: +// The MaybeUninit is zeroed when None (bool = false), +// and is valid when Some (bool = true) +#[repr(packed)] +pub struct OptionULE<U>(bool, MaybeUninit<U>); + +impl<U: Copy> OptionULE<U> { + /// Obtain this as an `Option<T>` + pub fn get(self) -> Option<U> { + if self.0 { + unsafe { + // safety: self.0 is true so the MaybeUninit is valid + Some(self.1.assume_init()) + } + } else { + None + } + } + + /// Construct an `OptionULE<U>` from an equivalent `Option<T>` + pub fn new(opt: Option<U>) -> Self { + if let Some(inner) = opt { + Self(true, MaybeUninit::new(inner)) + } else { + Self(false, MaybeUninit::zeroed()) + } + } +} + +impl<U: Copy + core::fmt::Debug> core::fmt::Debug for OptionULE<U> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + self.get().fmt(f) + } +} + +// Safety (based on the safety checklist on the ULE trait): +// 1. OptionULE does not include any uninitialized or padding bytes. +// (achieved by `#[repr(packed)]` on a struct containing only ULE fields, +// in the context of this impl. The MaybeUninit is valid for all byte sequences, and we only generate +/// zeroed or valid-T byte sequences to fill it) +// 2. OptionULE is aligned to 1 byte. +// (achieved by `#[repr(packed)]` on a struct containing only ULE fields, in the context of this impl) +// 3. The impl of validate_byte_slice() returns an error if any byte is not valid. +// 4. The impl of validate_byte_slice() returns an error if there are extra bytes. +// 5. The other ULE methods use the default impl. +// 6. OptionULE byte equality is semantic equality by relying on the ULE equality +// invariant on the subfields +unsafe impl<U: ULE> ULE for OptionULE<U> { + fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> { + let size = mem::size_of::<Self>(); + if bytes.len() % size != 0 { + return Err(ZeroVecError::length::<Self>(bytes.len())); + } + for chunk in bytes.chunks(size) { + #[allow(clippy::indexing_slicing)] // `chunk` will have enough bytes to fit Self + match chunk[0] { + // https://doc.rust-lang.org/reference/types/boolean.html + // Rust booleans are always size 1, align 1 values with valid bit patterns 0x0 or 0x1 + 0 => { + if !chunk[1..].iter().all(|x| *x == 0) { + return Err(ZeroVecError::parse::<Self>()); + } + } + 1 => U::validate_byte_slice(&chunk[1..])?, + _ => return Err(ZeroVecError::parse::<Self>()), + } + } + Ok(()) + } +} + +impl<T: AsULE> AsULE for Option<T> { + type ULE = OptionULE<T::ULE>; + fn to_unaligned(self) -> OptionULE<T::ULE> { + OptionULE::new(self.map(T::to_unaligned)) + } + + fn from_unaligned(other: OptionULE<T::ULE>) -> Self { + other.get().map(T::from_unaligned) + } +} + +impl<U: Copy> Copy for OptionULE<U> {} + +impl<U: Copy> Clone for OptionULE<U> { + fn clone(&self) -> Self { + *self + } +} + +impl<U: Copy + PartialEq> PartialEq for OptionULE<U> { + fn eq(&self, other: &Self) -> bool { + self.get().eq(&other.get()) + } +} + +impl<U: Copy + Eq> Eq for OptionULE<U> {} + +/// A type allowing one to represent `Option<U>` for [`VarULE`] `U` types. +/// +/// ```rust +/// use zerovec::ule::OptionVarULE; +/// use zerovec::VarZeroVec; +/// +/// let mut zv: VarZeroVec<OptionVarULE<str>> = VarZeroVec::new(); +/// +/// zv.make_mut().push(&None::<&str>); +/// zv.make_mut().push(&Some("hello")); +/// zv.make_mut().push(&Some("world")); +/// zv.make_mut().push(&None::<&str>); +/// +/// assert_eq!(zv.get(0).unwrap().as_ref(), None); +/// assert_eq!(zv.get(1).unwrap().as_ref(), Some("hello")); +/// ``` +// The slice field is empty when None (bool = false), +// and is a valid T when Some (bool = true) +#[repr(packed)] +pub struct OptionVarULE<U: VarULE + ?Sized>(PhantomData<U>, bool, [u8]); + +impl<U: VarULE + ?Sized> OptionVarULE<U> { + /// Obtain this as an `Option<&U>` + pub fn as_ref(&self) -> Option<&U> { + if self.1 { + unsafe { + // Safety: byte field is a valid T if boolean field is true + Some(U::from_byte_slice_unchecked(&self.2)) + } + } else { + None + } + } +} + +impl<U: VarULE + ?Sized + core::fmt::Debug> core::fmt::Debug for OptionVarULE<U> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + self.as_ref().fmt(f) + } +} + +// Safety (based on the safety checklist on the VarULE trait): +// 1. OptionVarULE<T> does not include any uninitialized or padding bytes +// (achieved by being repr(packed) on ULE types) +// 2. OptionVarULE<T> is aligned to 1 byte (achieved by being repr(packed) on ULE types) +// 3. The impl of `validate_byte_slice()` returns an error if any byte is not valid. +// 4. The impl of `validate_byte_slice()` returns an error if the slice cannot be used in its entirety +// 5. The impl of `from_byte_slice_unchecked()` returns a reference to the same data. +// 6. All other methods are defaulted +// 7. OptionVarULE<T> byte equality is semantic equality (achieved by being an aggregate) +unsafe impl<U: VarULE + ?Sized> VarULE for OptionVarULE<U> { + #[inline] + fn validate_byte_slice(slice: &[u8]) -> Result<(), ZeroVecError> { + if slice.is_empty() { + return Err(ZeroVecError::length::<Self>(slice.len())); + } + #[allow(clippy::indexing_slicing)] // slice already verified to be nonempty + match slice[0] { + // https://doc.rust-lang.org/reference/types/boolean.html + // Rust booleans are always size 1, align 1 values with valid bit patterns 0x0 or 0x1 + 0 => { + if slice.len() != 1 { + Err(ZeroVecError::length::<Self>(slice.len())) + } else { + Ok(()) + } + } + 1 => U::validate_byte_slice(&slice[1..]), + _ => Err(ZeroVecError::parse::<Self>()), + } + } + + #[inline] + unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self { + let entire_struct_as_slice: *const [u8] = + ::core::ptr::slice_from_raw_parts(bytes.as_ptr(), bytes.len() - 1); + &*(entire_struct_as_slice as *const Self) + } +} + +unsafe impl<T, U> EncodeAsVarULE<OptionVarULE<U>> for Option<T> +where + T: EncodeAsVarULE<U>, + U: VarULE + ?Sized, +{ + fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R { + // unnecessary if the other two are implemented + unreachable!() + } + + #[inline] + fn encode_var_ule_len(&self) -> usize { + if let Some(ref inner) = *self { + // slice + boolean + 1 + inner.encode_var_ule_len() + } else { + // boolean + empty slice + 1 + } + } + + #[allow(clippy::indexing_slicing)] // This method is allowed to panic when lengths are invalid + fn encode_var_ule_write(&self, dst: &mut [u8]) { + if let Some(ref inner) = *self { + debug_assert!( + !dst.is_empty(), + "OptionVarULE must have at least one byte when Some" + ); + dst[0] = 1; + inner.encode_var_ule_write(&mut dst[1..]); + } else { + debug_assert!( + dst.len() == 1, + "OptionVarULE must have exactly one byte when None" + ); + dst[0] = 0; + } + } +} + +impl<U: VarULE + ?Sized + PartialEq> PartialEq for OptionVarULE<U> { + fn eq(&self, other: &Self) -> bool { + self.as_ref().eq(&other.as_ref()) + } +} + +impl<U: VarULE + ?Sized + Eq> Eq for OptionVarULE<U> {} + +impl<U: VarULE + ?Sized + PartialOrd> PartialOrd for OptionVarULE<U> { + fn partial_cmp(&self, other: &Self) -> Option<Ordering> { + self.as_ref().partial_cmp(&other.as_ref()) + } +} + +impl<U: VarULE + ?Sized + Ord> Ord for OptionVarULE<U> { + fn cmp(&self, other: &Self) -> Ordering { + self.as_ref().cmp(&other.as_ref()) + } +} diff --git a/third_party/rust/zerovec/src/ule/plain.rs b/third_party/rust/zerovec/src/ule/plain.rs new file mode 100644 index 0000000000..f244f6b682 --- /dev/null +++ b/third_party/rust/zerovec/src/ule/plain.rs @@ -0,0 +1,366 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +#![allow(clippy::upper_case_acronyms)] +//! ULE implementation for Plain Old Data types, including all sized integers. + +use super::*; +use crate::impl_ule_from_array; +use crate::ZeroSlice; +use core::num::{NonZeroI8, NonZeroU8}; + +/// A u8 array of little-endian data with infallible conversions to and from &[u8]. +#[repr(transparent)] +#[derive(Debug, PartialEq, Eq, Clone, Copy, PartialOrd, Ord, Hash)] +#[allow(clippy::exhaustive_structs)] // newtype +pub struct RawBytesULE<const N: usize>(pub [u8; N]); + +impl<const N: usize> RawBytesULE<N> { + #[inline] + pub fn as_bytes(&self) -> &[u8] { + &self.0 + } + + #[inline] + pub fn from_byte_slice_unchecked_mut(bytes: &mut [u8]) -> &mut [Self] { + let data = bytes.as_mut_ptr(); + let len = bytes.len() / N; + // Safe because Self is transparent over [u8; N] + unsafe { core::slice::from_raw_parts_mut(data as *mut Self, len) } + } +} + +// Safety (based on the safety checklist on the ULE trait): +// 1. RawBytesULE does not include any uninitialized or padding bytes. +// (achieved by `#[repr(transparent)]` on a type that satisfies this invariant) +// 2. RawBytesULE is aligned to 1 byte. +// (achieved by `#[repr(transparent)]` on a type that satisfies this invariant) +// 3. The impl of validate_byte_slice() returns an error if any byte is not valid (never). +// 4. The impl of validate_byte_slice() returns an error if there are leftover bytes. +// 5. The other ULE methods use the default impl. +// 6. RawBytesULE byte equality is semantic equality +unsafe impl<const N: usize> ULE for RawBytesULE<N> { + #[inline] + fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> { + if bytes.len() % N == 0 { + // Safe because Self is transparent over [u8; N] + Ok(()) + } else { + Err(ZeroVecError::length::<Self>(bytes.len())) + } + } +} + +impl<const N: usize> From<[u8; N]> for RawBytesULE<N> { + #[inline] + fn from(le_bytes: [u8; N]) -> Self { + Self(le_bytes) + } +} + +macro_rules! impl_byte_slice_size { + ($unsigned:ty, $size:literal) => { + impl RawBytesULE<$size> { + #[doc = concat!("Gets this `RawBytesULE` as a `", stringify!($unsigned), "`. This is equivalent to calling [`AsULE::from_unaligned()`] on the appropriately sized type.")] + #[inline] + pub fn as_unsigned_int(&self) -> $unsigned { + <$unsigned as $crate::ule::AsULE>::from_unaligned(*self) + } + + #[doc = concat!("Converts a `", stringify!($unsigned), "` to a `RawBytesULE`. This is equivalent to calling [`AsULE::to_unaligned()`] on the appropriately sized type.")] + #[inline] + pub const fn from_aligned(value: $unsigned) -> Self { + Self(value.to_le_bytes()) + } + + impl_ule_from_array!( + $unsigned, + RawBytesULE<$size>, + RawBytesULE([0; $size]) + ); + } + }; +} + +macro_rules! impl_const_constructors { + ($base:ty, $size:literal) => { + impl ZeroSlice<$base> { + /// This function can be used for constructing ZeroVecs in a const context, avoiding + /// parsing checks. + /// + /// This cannot be generic over T because of current limitations in `const`, but if + /// this method is needed in a non-const context, check out [`ZeroSlice::parse_byte_slice()`] + /// instead. + /// + /// See [`ZeroSlice::cast()`] for an example. + pub const fn try_from_bytes(bytes: &[u8]) -> Result<&Self, ZeroVecError> { + let len = bytes.len(); + #[allow(clippy::modulo_one)] + if len % $size == 0 { + Ok(unsafe { Self::from_bytes_unchecked(bytes) }) + } else { + Err(ZeroVecError::InvalidLength { + ty: concat!("<const construct: ", $size, ">"), + len, + }) + } + } + } + }; +} + +macro_rules! impl_byte_slice_type { + ($single_fn:ident, $type:ty, $size:literal) => { + impl From<$type> for RawBytesULE<$size> { + #[inline] + fn from(value: $type) -> Self { + Self(value.to_le_bytes()) + } + } + impl AsULE for $type { + type ULE = RawBytesULE<$size>; + #[inline] + fn to_unaligned(self) -> Self::ULE { + RawBytesULE(self.to_le_bytes()) + } + #[inline] + fn from_unaligned(unaligned: Self::ULE) -> Self { + <$type>::from_le_bytes(unaligned.0) + } + } + // EqULE is true because $type and RawBytesULE<$size> + // have the same byte sequence on little-endian + unsafe impl EqULE for $type {} + + impl RawBytesULE<$size> { + pub const fn $single_fn(v: $type) -> Self { + RawBytesULE(v.to_le_bytes()) + } + } + }; +} + +macro_rules! impl_byte_slice_unsigned_type { + ($type:ty, $size:literal) => { + impl_byte_slice_type!(from_unsigned, $type, $size); + }; +} + +macro_rules! impl_byte_slice_signed_type { + ($type:ty, $size:literal) => { + impl_byte_slice_type!(from_signed, $type, $size); + }; +} + +impl_byte_slice_size!(u16, 2); +impl_byte_slice_size!(u32, 4); +impl_byte_slice_size!(u64, 8); +impl_byte_slice_size!(u128, 16); + +impl_byte_slice_unsigned_type!(u16, 2); +impl_byte_slice_unsigned_type!(u32, 4); +impl_byte_slice_unsigned_type!(u64, 8); +impl_byte_slice_unsigned_type!(u128, 16); + +impl_byte_slice_signed_type!(i16, 2); +impl_byte_slice_signed_type!(i32, 4); +impl_byte_slice_signed_type!(i64, 8); +impl_byte_slice_signed_type!(i128, 16); + +impl_const_constructors!(u8, 1); +impl_const_constructors!(u16, 2); +impl_const_constructors!(u32, 4); +impl_const_constructors!(u64, 8); +impl_const_constructors!(u128, 16); + +// Note: The f32 and f64 const constructors currently have limited use because +// `f32::to_le_bytes` is not yet const. + +impl_const_constructors!(bool, 1); + +// Safety (based on the safety checklist on the ULE trait): +// 1. u8 does not include any uninitialized or padding bytes. +// 2. u8 is aligned to 1 byte. +// 3. The impl of validate_byte_slice() returns an error if any byte is not valid (never). +// 4. The impl of validate_byte_slice() returns an error if there are leftover bytes (never). +// 5. The other ULE methods use the default impl. +// 6. u8 byte equality is semantic equality +unsafe impl ULE for u8 { + #[inline] + fn validate_byte_slice(_bytes: &[u8]) -> Result<(), ZeroVecError> { + Ok(()) + } +} + +impl AsULE for u8 { + type ULE = Self; + #[inline] + fn to_unaligned(self) -> Self::ULE { + self + } + #[inline] + fn from_unaligned(unaligned: Self::ULE) -> Self { + unaligned + } +} + +// EqULE is true because u8 is its own ULE. +unsafe impl EqULE for u8 {} + +// Safety (based on the safety checklist on the ULE trait): +// 1. NonZeroU8 does not include any uninitialized or padding bytes. +// 2. NonZeroU8 is aligned to 1 byte. +// 3. The impl of validate_byte_slice() returns an error if any byte is not valid (0x00). +// 4. The impl of validate_byte_slice() returns an error if there are leftover bytes (never). +// 5. The other ULE methods use the default impl. +// 6. NonZeroU8 byte equality is semantic equality +unsafe impl ULE for NonZeroU8 { + #[inline] + fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> { + bytes.iter().try_for_each(|b| { + if *b == 0x00 { + Err(ZeroVecError::parse::<Self>()) + } else { + Ok(()) + } + }) + } +} + +impl AsULE for NonZeroU8 { + type ULE = Self; + #[inline] + fn to_unaligned(self) -> Self::ULE { + self + } + #[inline] + fn from_unaligned(unaligned: Self::ULE) -> Self { + unaligned + } +} + +unsafe impl EqULE for NonZeroU8 {} + +impl NicheBytes<1> for NonZeroU8 { + const NICHE_BIT_PATTERN: [u8; 1] = [0x00]; +} + +// Safety (based on the safety checklist on the ULE trait): +// 1. i8 does not include any uninitialized or padding bytes. +// 2. i8 is aligned to 1 byte. +// 3. The impl of validate_byte_slice() returns an error if any byte is not valid (never). +// 4. The impl of validate_byte_slice() returns an error if there are leftover bytes (never). +// 5. The other ULE methods use the default impl. +// 6. i8 byte equality is semantic equality +unsafe impl ULE for i8 { + #[inline] + fn validate_byte_slice(_bytes: &[u8]) -> Result<(), ZeroVecError> { + Ok(()) + } +} + +impl AsULE for i8 { + type ULE = Self; + #[inline] + fn to_unaligned(self) -> Self::ULE { + self + } + #[inline] + fn from_unaligned(unaligned: Self::ULE) -> Self { + unaligned + } +} + +// EqULE is true because i8 is its own ULE. +unsafe impl EqULE for i8 {} + +impl AsULE for NonZeroI8 { + type ULE = NonZeroU8; + #[inline] + fn to_unaligned(self) -> Self::ULE { + // Safety: NonZeroU8 and NonZeroI8 have same size + unsafe { core::mem::transmute(self) } + } + + #[inline] + fn from_unaligned(unaligned: Self::ULE) -> Self { + // Safety: NonZeroU8 and NonZeroI8 have same size + unsafe { core::mem::transmute(unaligned) } + } +} + +// These impls are actually safe and portable due to Rust always using IEEE 754, see the documentation +// on f32::from_bits: https://doc.rust-lang.org/stable/std/primitive.f32.html#method.from_bits +// +// The only potential problem is that some older platforms treat signaling NaNs differently. This is +// still quite portable, signalingness is not typically super important. + +impl AsULE for f32 { + type ULE = RawBytesULE<4>; + #[inline] + fn to_unaligned(self) -> Self::ULE { + self.to_bits().to_unaligned() + } + #[inline] + fn from_unaligned(unaligned: Self::ULE) -> Self { + Self::from_bits(u32::from_unaligned(unaligned)) + } +} + +impl AsULE for f64 { + type ULE = RawBytesULE<8>; + #[inline] + fn to_unaligned(self) -> Self::ULE { + self.to_bits().to_unaligned() + } + #[inline] + fn from_unaligned(unaligned: Self::ULE) -> Self { + Self::from_bits(u64::from_unaligned(unaligned)) + } +} + +// The from_bits documentation mentions that they have identical byte representations to integers +// and EqULE only cares about LE systems +unsafe impl EqULE for f32 {} +unsafe impl EqULE for f64 {} + +// The bool impl is not as efficient as it could be +// We can, in the future, have https://github.com/unicode-org/icu4x/blob/main/utils/zerovec/design_doc.md#bitpacking +// for better bitpacking + +// Safety (based on the safety checklist on the ULE trait): +// 1. bool does not include any uninitialized or padding bytes (the remaining 7 bytes in bool are by definition zero) +// 2. bool is aligned to 1 byte. +// 3. The impl of validate_byte_slice() returns an error if any byte is not valid (bytes that are not 0 or 1). +// 4. The impl of validate_byte_slice() returns an error if there are leftover bytes (never). +// 5. The other ULE methods use the default impl. +// 6. bool byte equality is semantic equality +unsafe impl ULE for bool { + #[inline] + fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> { + for byte in bytes { + // https://doc.rust-lang.org/reference/types/boolean.html + // Rust booleans are always size 1, align 1 values with valid bit patterns 0x0 or 0x1 + if *byte > 1 { + return Err(ZeroVecError::parse::<Self>()); + } + } + Ok(()) + } +} + +impl AsULE for bool { + type ULE = Self; + #[inline] + fn to_unaligned(self) -> Self::ULE { + self + } + #[inline] + fn from_unaligned(unaligned: Self::ULE) -> Self { + unaligned + } +} + +// EqULE is true because bool is its own ULE. +unsafe impl EqULE for bool {} diff --git a/third_party/rust/zerovec/src/ule/slices.rs b/third_party/rust/zerovec/src/ule/slices.rs new file mode 100644 index 0000000000..75ea57e02e --- /dev/null +++ b/third_party/rust/zerovec/src/ule/slices.rs @@ -0,0 +1,103 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::ule::*; +use core::str; + +// Safety (based on the safety checklist on the ULE trait): +// 1. [T; N] does not include any uninitialized or padding bytes since T is ULE +// 2. [T; N] is aligned to 1 byte since T is ULE +// 3. The impl of validate_byte_slice() returns an error if any byte is not valid. +// 4. The impl of validate_byte_slice() returns an error if there are leftover bytes. +// 5. The other ULE methods use the default impl. +// 6. [T; N] byte equality is semantic equality since T is ULE +unsafe impl<T: ULE, const N: usize> ULE for [T; N] { + #[inline] + fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> { + // a slice of multiple Selfs is equivalent to just a larger slice of Ts + T::validate_byte_slice(bytes) + } +} + +impl<T: AsULE, const N: usize> AsULE for [T; N] { + type ULE = [T::ULE; N]; + #[inline] + fn to_unaligned(self) -> Self::ULE { + self.map(T::to_unaligned) + } + #[inline] + fn from_unaligned(unaligned: Self::ULE) -> Self { + unaligned.map(T::from_unaligned) + } +} + +unsafe impl<T: EqULE, const N: usize> EqULE for [T; N] {} + +// Safety (based on the safety checklist on the VarULE trait): +// 1. str does not include any uninitialized or padding bytes. +// 2. str is aligned to 1 byte. +// 3. The impl of `validate_byte_slice()` returns an error if any byte is not valid. +// 4. The impl of `validate_byte_slice()` returns an error if the slice cannot be used in its entirety +// 5. The impl of `from_byte_slice_unchecked()` returns a reference to the same data. +// 6. `parse_byte_slice()` is equivalent to `validate_byte_slice()` followed by `from_byte_slice_unchecked()` +// 7. str byte equality is semantic equality +unsafe impl VarULE for str { + #[inline] + fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> { + str::from_utf8(bytes).map_err(|_| ZeroVecError::parse::<Self>())?; + Ok(()) + } + + #[inline] + fn parse_byte_slice(bytes: &[u8]) -> Result<&Self, ZeroVecError> { + str::from_utf8(bytes).map_err(|_| ZeroVecError::parse::<Self>()) + } + /// Invariant: must be safe to call when called on a slice that previously + /// succeeded with `parse_byte_slice` + #[inline] + unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self { + str::from_utf8_unchecked(bytes) + } +} + +/// Note: VarULE is well-defined for all `[T]` where `T: ULE`, but [`ZeroSlice`] is more ergonomic +/// when `T` is a low-level ULE type. For example: +/// +/// ```no_run +/// # use zerovec::ZeroSlice; +/// # use zerovec::VarZeroVec; +/// # use zerovec::ule::AsULE; +/// // OK: [u8] is a useful type +/// let _: VarZeroVec<[u8]> = unimplemented!(); +/// +/// // Technically works, but [u32::ULE] is not very useful +/// let _: VarZeroVec<[<u32 as AsULE>::ULE]> = unimplemented!(); +/// +/// // Better: ZeroSlice<u32> +/// let _: VarZeroVec<ZeroSlice<u32>> = unimplemented!(); +/// ``` +/// +/// [`ZeroSlice`]: crate::ZeroSlice +// Safety (based on the safety checklist on the VarULE trait): +// 1. [T] does not include any uninitialized or padding bytes (achieved by being a slice of a ULE type) +// 2. [T] is aligned to 1 byte (achieved by being a slice of a ULE type) +// 3. The impl of `validate_byte_slice()` returns an error if any byte is not valid. +// 4. The impl of `validate_byte_slice()` returns an error if the slice cannot be used in its entirety +// 5. The impl of `from_byte_slice_unchecked()` returns a reference to the same data. +// 6. All other methods are defaulted +// 7. `[T]` byte equality is semantic equality (achieved by being a slice of a ULE type) +unsafe impl<T> VarULE for [T] +where + T: ULE, +{ + #[inline] + fn validate_byte_slice(slice: &[u8]) -> Result<(), ZeroVecError> { + T::validate_byte_slice(slice) + } + + #[inline] + unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self { + T::from_byte_slice_unchecked(bytes) + } +} diff --git a/third_party/rust/zerovec/src/ule/tuple.rs b/third_party/rust/zerovec/src/ule/tuple.rs new file mode 100644 index 0000000000..3e0f291b3f --- /dev/null +++ b/third_party/rust/zerovec/src/ule/tuple.rs @@ -0,0 +1,179 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! ULE impls for tuples. +//! +//! Rust does not guarantee the layout of tuples, so ZeroVec defines its own tuple ULE types. +//! +//! Impls are defined for tuples of up to 6 elements. For longer tuples, use a custom struct +//! with [`#[make_ule]`](crate::make_ule). +//! +//! # Examples +//! +//! ``` +//! use zerovec::ZeroVec; +//! +//! // ZeroVec of tuples! +//! let zerovec: ZeroVec<(u32, char)> = [(1, 'a'), (1234901, 'ๅ'), (100, 'เค
')] +//! .iter() +//! .copied() +//! .collect(); +//! +//! assert_eq!(zerovec.get(1), Some((1234901, 'ๅ'))); +//! ``` + +use super::*; +use core::fmt; +use core::mem; + +macro_rules! tuple_ule { + ($name:ident, $len:literal, [ $($t:ident $i:tt),+ ]) => { + #[doc = concat!("ULE type for tuples with ", $len, " elements.")] + #[repr(packed)] + #[allow(clippy::exhaustive_structs)] // stable + pub struct $name<$($t),+>($(pub $t),+); + + // Safety (based on the safety checklist on the ULE trait): + // 1. TupleULE does not include any uninitialized or padding bytes. + // (achieved by `#[repr(packed)]` on a struct containing only ULE fields) + // 2. TupleULE is aligned to 1 byte. + // (achieved by `#[repr(packed)]` on a struct containing only ULE fields) + // 3. The impl of validate_byte_slice() returns an error if any byte is not valid. + // 4. The impl of validate_byte_slice() returns an error if there are extra bytes. + // 5. The other ULE methods use the default impl. + // 6. TupleULE byte equality is semantic equality by relying on the ULE equality + // invariant on the subfields + unsafe impl<$($t: ULE),+> ULE for $name<$($t),+> { + fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> { + // expands to: 0size + mem::size_of::<A>() + mem::size_of::<B>(); + let ule_bytes = 0usize $(+ mem::size_of::<$t>())+; + if bytes.len() % ule_bytes != 0 { + return Err(ZeroVecError::length::<Self>(bytes.len())); + } + for chunk in bytes.chunks(ule_bytes) { + let mut i = 0; + $( + let j = i; + i += mem::size_of::<$t>(); + #[allow(clippy::indexing_slicing)] // length checked + <$t>::validate_byte_slice(&chunk[j..i])?; + )+ + } + Ok(()) + } + } + + impl<$($t: AsULE),+> AsULE for ($($t),+) { + type ULE = $name<$(<$t>::ULE),+>; + + #[inline] + fn to_unaligned(self) -> Self::ULE { + $name($( + self.$i.to_unaligned() + ),+) + } + + #[inline] + fn from_unaligned(unaligned: Self::ULE) -> Self { + ($( + <$t>::from_unaligned(unaligned.$i) + ),+) + } + } + + impl<$($t: fmt::Debug + ULE),+> fmt::Debug for $name<$($t),+> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { + ($(self.$i),+).fmt(f) + } + } + + // We need manual impls since `#[derive()]` is disallowed on packed types + impl<$($t: PartialEq + ULE),+> PartialEq for $name<$($t),+> { + fn eq(&self, other: &Self) -> bool { + ($(self.$i),+).eq(&($(other.$i),+)) + } + } + + impl<$($t: Eq + ULE),+> Eq for $name<$($t),+> {} + + impl<$($t: PartialOrd + ULE),+> PartialOrd for $name<$($t),+> { + fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> { + ($(self.$i),+).partial_cmp(&($(other.$i),+)) + } + } + + impl<$($t: Ord + ULE),+> Ord for $name<$($t),+> { + fn cmp(&self, other: &Self) -> core::cmp::Ordering { + ($(self.$i),+).cmp(&($(other.$i),+)) + } + } + + impl<$($t: ULE),+> Clone for $name<$($t),+> { + fn clone(&self) -> Self { + *self + } + } + + impl<$($t: ULE),+> Copy for $name<$($t),+> {} + + impl<'a, $($t: Ord + AsULE + 'static),+> crate::map::ZeroMapKV<'a> for ($($t),+) { + type Container = crate::ZeroVec<'a, ($($t),+)>; + type Slice = crate::ZeroSlice<($($t),+)>; + type GetType = $name<$(<$t>::ULE),+>; + type OwnedType = ($($t),+); + } + }; +} + +tuple_ule!(Tuple2ULE, "2", [ A 0, B 1 ]); +tuple_ule!(Tuple3ULE, "3", [ A 0, B 1, C 2 ]); +tuple_ule!(Tuple4ULE, "4", [ A 0, B 1, C 2, D 3 ]); +tuple_ule!(Tuple5ULE, "5", [ A 0, B 1, C 2, D 3, E 4 ]); +tuple_ule!(Tuple6ULE, "6", [ A 0, B 1, C 2, D 3, E 4, F 5 ]); + +#[test] +fn test_pairule_validate() { + use crate::ZeroVec; + let vec: Vec<(u32, char)> = vec![(1, 'a'), (1234901, 'ๅ'), (100, 'เค
')]; + let zerovec: ZeroVec<(u32, char)> = vec.iter().copied().collect(); + let bytes = zerovec.as_bytes(); + let zerovec2 = ZeroVec::parse_byte_slice(bytes).unwrap(); + assert_eq!(zerovec, zerovec2); + + // Test failed validation with a correctly sized but differently constrained tuple + // Note: 1234901 is not a valid char + let zerovec3 = ZeroVec::<(char, u32)>::parse_byte_slice(bytes); + assert!(zerovec3.is_err()); +} + +#[test] +fn test_tripleule_validate() { + use crate::ZeroVec; + let vec: Vec<(u32, char, i8)> = vec![(1, 'a', -5), (1234901, 'ๅ', 3), (100, 'เค
', -127)]; + let zerovec: ZeroVec<(u32, char, i8)> = vec.iter().copied().collect(); + let bytes = zerovec.as_bytes(); + let zerovec2 = ZeroVec::parse_byte_slice(bytes).unwrap(); + assert_eq!(zerovec, zerovec2); + + // Test failed validation with a correctly sized but differently constrained tuple + // Note: 1234901 is not a valid char + let zerovec3 = ZeroVec::<(char, i8, u32)>::parse_byte_slice(bytes); + assert!(zerovec3.is_err()); +} + +#[test] +fn test_quadule_validate() { + use crate::ZeroVec; + let vec: Vec<(u32, char, i8, u16)> = + vec![(1, 'a', -5, 3), (1234901, 'ๅ', 3, 11), (100, 'เค
', -127, 0)]; + let zerovec: ZeroVec<(u32, char, i8, u16)> = vec.iter().copied().collect(); + let bytes = zerovec.as_bytes(); + let zerovec2 = ZeroVec::parse_byte_slice(bytes).unwrap(); + assert_eq!(zerovec, zerovec2); + + // Test failed validation with a correctly sized but differently constrained tuple + // Note: 1234901 is not a valid char + let zerovec3 = ZeroVec::<(char, i8, u16, u32)>::parse_byte_slice(bytes); + assert!(zerovec3.is_err()); +} diff --git a/third_party/rust/zerovec/src/ule/unvalidated.rs b/third_party/rust/zerovec/src/ule/unvalidated.rs new file mode 100644 index 0000000000..21cfb0c0d5 --- /dev/null +++ b/third_party/rust/zerovec/src/ule/unvalidated.rs @@ -0,0 +1,527 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use super::{AsULE, RawBytesULE, VarULE}; +use crate::ule::EqULE; +use crate::{map::ZeroMapKV, VarZeroSlice, VarZeroVec, ZeroVecError}; +use alloc::boxed::Box; +use core::cmp::Ordering; +use core::fmt; +use core::ops::Deref; + +/// A byte slice that is expected to be a UTF-8 string but does not enforce that invariant. +/// +/// Use this type instead of `str` if you don't need to enforce UTF-8 during deserialization. For +/// example, strings that are keys of a map don't need to ever be reified as `str`s. +/// +/// [`UnvalidatedStr`] derefs to `[u8]`. To obtain a `str`, use [`Self::try_as_str()`]. +/// +/// The main advantage of this type over `[u8]` is that it serializes as a string in +/// human-readable formats like JSON. +/// +/// # Examples +/// +/// Using an [`UnvalidatedStr`] as the key of a [`ZeroMap`]: +/// +/// ``` +/// use zerovec::ule::UnvalidatedStr; +/// use zerovec::ZeroMap; +/// +/// let map: ZeroMap<UnvalidatedStr, usize> = [ +/// (UnvalidatedStr::from_str("abc"), 11), +/// (UnvalidatedStr::from_str("def"), 22), +/// (UnvalidatedStr::from_str("ghi"), 33), +/// ] +/// .into_iter() +/// .collect(); +/// +/// let key = "abc"; +/// let value = map.get_copied_by(|uvstr| uvstr.as_bytes().cmp(key.as_bytes())); +/// assert_eq!(Some(11), value); +/// ``` +/// +/// [`ZeroMap`]: crate::ZeroMap +#[repr(transparent)] +#[derive(PartialEq, Eq, PartialOrd, Ord)] +#[allow(clippy::exhaustive_structs)] // transparent newtype +pub struct UnvalidatedStr([u8]); + +impl fmt::Debug for UnvalidatedStr { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // Debug as a string if possible + match self.try_as_str() { + Ok(s) => fmt::Debug::fmt(s, f), + Err(_) => fmt::Debug::fmt(&self.0, f), + } + } +} + +impl UnvalidatedStr { + /// Create a [`UnvalidatedStr`] from a byte slice. + #[inline] + pub const fn from_bytes(other: &[u8]) -> &Self { + // Safety: UnvalidatedStr is transparent over [u8] + unsafe { core::mem::transmute(other) } + } + + /// Create a [`UnvalidatedStr`] from a string slice. + #[inline] + pub const fn from_str(s: &str) -> &Self { + Self::from_bytes(s.as_bytes()) + } + + /// Create a [`UnvalidatedStr`] from boxed bytes. + #[inline] + pub fn from_boxed_bytes(other: Box<[u8]>) -> Box<Self> { + // Safety: UnvalidatedStr is transparent over [u8] + unsafe { core::mem::transmute(other) } + } + + /// Create a [`UnvalidatedStr`] from a boxed `str`. + #[inline] + pub fn from_boxed_str(other: Box<str>) -> Box<Self> { + Self::from_boxed_bytes(other.into_boxed_bytes()) + } + + /// Get the bytes from a [`UnvalidatedStr]. + #[inline] + pub const fn as_bytes(&self) -> &[u8] { + &self.0 + } + + /// Attempt to convert a [`UnvalidatedStr`] to a `str`. + /// + /// # Examples + /// + /// ``` + /// use zerovec::ule::UnvalidatedStr; + /// + /// static A: &UnvalidatedStr = UnvalidatedStr::from_bytes(b"abc"); + /// + /// let b = A.try_as_str().unwrap(); + /// assert_eq!(b, "abc"); + /// ``` + // Note: this is const starting in 1.63 + #[inline] + pub fn try_as_str(&self) -> Result<&str, core::str::Utf8Error> { + core::str::from_utf8(&self.0) + } +} + +impl<'a> From<&'a str> for &'a UnvalidatedStr { + #[inline] + fn from(other: &'a str) -> Self { + UnvalidatedStr::from_str(other) + } +} + +impl From<Box<str>> for Box<UnvalidatedStr> { + #[inline] + fn from(other: Box<str>) -> Self { + UnvalidatedStr::from_boxed_str(other) + } +} + +impl Deref for UnvalidatedStr { + type Target = [u8]; + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl<'a> ZeroMapKV<'a> for UnvalidatedStr { + type Container = VarZeroVec<'a, UnvalidatedStr>; + type Slice = VarZeroSlice<UnvalidatedStr>; + type GetType = UnvalidatedStr; + type OwnedType = Box<UnvalidatedStr>; +} + +// Safety (based on the safety checklist on the VarULE trait): +// 1. UnvalidatedStr does not include any uninitialized or padding bytes (transparent over a ULE) +// 2. UnvalidatedStr is aligned to 1 byte (transparent over a ULE) +// 3. The impl of `validate_byte_slice()` returns an error if any byte is not valid (impossible) +// 4. The impl of `validate_byte_slice()` returns an error if the slice cannot be used in its entirety (impossible) +// 5. The impl of `from_byte_slice_unchecked()` returns a reference to the same data (returns the argument directly) +// 6. All other methods are defaulted +// 7. `[T]` byte equality is semantic equality (transparent over a ULE) +unsafe impl VarULE for UnvalidatedStr { + #[inline] + fn validate_byte_slice(_: &[u8]) -> Result<(), ZeroVecError> { + Ok(()) + } + #[inline] + unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self { + UnvalidatedStr::from_bytes(bytes) + } +} + +/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate +#[cfg(feature = "serde")] +impl serde::Serialize for UnvalidatedStr { + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: serde::Serializer, + { + use serde::ser::Error; + let s = self + .try_as_str() + .map_err(|_| S::Error::custom("invalid UTF-8 in UnvalidatedStr"))?; + if serializer.is_human_readable() { + serializer.serialize_str(s) + } else { + serializer.serialize_bytes(s.as_bytes()) + } + } +} + +/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate +#[cfg(feature = "serde")] +impl<'de> serde::Deserialize<'de> for Box<UnvalidatedStr> { + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: serde::Deserializer<'de>, + { + if deserializer.is_human_readable() { + let boxed_str = Box::<str>::deserialize(deserializer)?; + Ok(UnvalidatedStr::from_boxed_str(boxed_str)) + } else { + let boxed_bytes = Box::<[u8]>::deserialize(deserializer)?; + Ok(UnvalidatedStr::from_boxed_bytes(boxed_bytes)) + } + } +} + +/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate +#[cfg(feature = "serde")] +impl<'de, 'a> serde::Deserialize<'de> for &'a UnvalidatedStr +where + 'de: 'a, +{ + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: serde::Deserializer<'de>, + { + if deserializer.is_human_readable() { + let s = <&str>::deserialize(deserializer)?; + Ok(UnvalidatedStr::from_str(s)) + } else { + let bytes = <&[u8]>::deserialize(deserializer)?; + Ok(UnvalidatedStr::from_bytes(bytes)) + } + } +} + +/// A u8 array of little-endian data that is expected to be a Unicode scalar value, but is not +/// validated as such. +/// +/// Use this type instead of `char` when you want to deal with data that is expected to be valid +/// Unicode scalar values, but you want control over when or if you validate that assumption. +/// +/// # Examples +/// +/// ``` +/// use zerovec::ule::{RawBytesULE, UnvalidatedChar, ULE}; +/// use zerovec::{ZeroSlice, ZeroVec}; +/// +/// // data known to be little-endian three-byte chunks of valid Unicode scalar values +/// let data = [0x68, 0x00, 0x00, 0x69, 0x00, 0x00, 0x4B, 0xF4, 0x01]; +/// // ground truth expectation +/// let real = ['h', 'i', '๐']; +/// +/// let chars: &ZeroSlice<UnvalidatedChar> = ZeroSlice::parse_byte_slice(&data).expect("invalid data length"); +/// let parsed: Vec<_> = chars.iter().map(|c| unsafe { c.to_char_unchecked() }).collect(); +/// assert_eq!(&parsed, &real); +/// +/// let real_chars: ZeroVec<_> = real.iter().copied().map(UnvalidatedChar::from_char).collect(); +/// let serialized_data = chars.as_bytes(); +/// assert_eq!(serialized_data, &data); +/// ``` +#[repr(transparent)] +#[derive(PartialEq, Eq, Clone, Copy, Hash)] +pub struct UnvalidatedChar([u8; 3]); + +impl UnvalidatedChar { + /// Create a [`UnvalidatedChar`] from a `char`. + /// + /// # Examples + /// + /// ``` + /// use zerovec::ule::UnvalidatedChar; + /// + /// let a = UnvalidatedChar::from_char('a'); + /// assert_eq!(a.try_to_char().unwrap(), 'a'); + /// ``` + #[inline] + pub const fn from_char(c: char) -> Self { + let [u0, u1, u2, _u3] = (c as u32).to_le_bytes(); + Self([u0, u1, u2]) + } + + #[inline] + #[doc(hidden)] + pub const fn from_u24(c: u32) -> Self { + let [u0, u1, u2, _u3] = c.to_le_bytes(); + Self([u0, u1, u2]) + } + + /// Attempt to convert a [`UnvalidatedChar`] to a `char`. + /// + /// # Examples + /// + /// ``` + /// use zerovec::ule::{AsULE, UnvalidatedChar}; + /// + /// let a = UnvalidatedChar::from_char('a'); + /// assert_eq!(a.try_to_char(), Ok('a')); + /// + /// let b = UnvalidatedChar::from_unaligned([0xFF, 0xFF, 0xFF].into()); + /// assert!(matches!(b.try_to_char(), Err(_))); + /// ``` + #[inline] + pub fn try_to_char(self) -> Result<char, core::char::CharTryFromError> { + let [u0, u1, u2] = self.0; + char::try_from(u32::from_le_bytes([u0, u1, u2, 0])) + } + + /// Convert a [`UnvalidatedChar`] to a `char', returning [`char::REPLACEMENT_CHARACTER`] + /// if the `UnvalidatedChar` does not represent a valid Unicode scalar value. + /// + /// # Examples + /// + /// ``` + /// use zerovec::ule::{AsULE, UnvalidatedChar}; + /// + /// let a = UnvalidatedChar::from_unaligned([0xFF, 0xFF, 0xFF].into()); + /// assert_eq!(a.to_char_lossy(), char::REPLACEMENT_CHARACTER); + /// ``` + #[inline] + pub fn to_char_lossy(self) -> char { + self.try_to_char().unwrap_or(char::REPLACEMENT_CHARACTER) + } + + /// Convert a [`UnvalidatedChar`] to a `char` without checking that it is + /// a valid Unicode scalar value. + /// + /// # Safety + /// + /// The `UnvalidatedChar` must be a valid Unicode scalar value in little-endian order. + /// + /// # Examples + /// + /// ``` + /// use zerovec::ule::UnvalidatedChar; + /// + /// let a = UnvalidatedChar::from_char('a'); + /// assert_eq!(unsafe { a.to_char_unchecked() }, 'a'); + /// ``` + #[inline] + pub unsafe fn to_char_unchecked(self) -> char { + let [u0, u1, u2] = self.0; + char::from_u32_unchecked(u32::from_le_bytes([u0, u1, u2, 0])) + } +} + +impl RawBytesULE<3> { + /// Converts a [`UnvalidatedChar`] to its ULE type. This is equivalent to calling + /// [`AsULE::to_unaligned`]. + #[inline] + pub const fn from_unvalidated_char(uc: UnvalidatedChar) -> Self { + RawBytesULE(uc.0) + } +} + +impl AsULE for UnvalidatedChar { + type ULE = RawBytesULE<3>; + + #[inline] + fn to_unaligned(self) -> Self::ULE { + RawBytesULE(self.0) + } + + #[inline] + fn from_unaligned(unaligned: Self::ULE) -> Self { + Self(unaligned.0) + } +} + +// Safety: UnvalidatedChar is always the little-endian representation of a char, +// which corresponds to its AsULE::ULE type +unsafe impl EqULE for UnvalidatedChar {} + +impl fmt::Debug for UnvalidatedChar { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // Debug as a char if possible + match self.try_to_char() { + Ok(c) => fmt::Debug::fmt(&c, f), + Err(_) => fmt::Debug::fmt(&self.0, f), + } + } +} + +impl PartialOrd for UnvalidatedChar { + fn partial_cmp(&self, other: &Self) -> Option<Ordering> { + Some(self.cmp(other)) + } +} + +impl Ord for UnvalidatedChar { + // custom implementation, as derived Ord would compare lexicographically + fn cmp(&self, other: &Self) -> Ordering { + let [a0, a1, a2] = self.0; + let a = u32::from_le_bytes([a0, a1, a2, 0]); + let [b0, b1, b2] = other.0; + let b = u32::from_le_bytes([b0, b1, b2, 0]); + a.cmp(&b) + } +} + +impl From<char> for UnvalidatedChar { + #[inline] + fn from(value: char) -> Self { + Self::from_char(value) + } +} + +impl TryFrom<UnvalidatedChar> for char { + type Error = core::char::CharTryFromError; + + #[inline] + fn try_from(value: UnvalidatedChar) -> Result<char, Self::Error> { + value.try_to_char() + } +} + +/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate +#[cfg(feature = "serde")] +impl serde::Serialize for UnvalidatedChar { + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: serde::Serializer, + { + use serde::ser::Error; + let c = self + .try_to_char() + .map_err(|_| S::Error::custom("invalid Unicode scalar value in UnvalidatedChar"))?; + if serializer.is_human_readable() { + serializer.serialize_char(c) + } else { + self.0.serialize(serializer) + } + } +} + +/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate +#[cfg(feature = "serde")] +impl<'de> serde::Deserialize<'de> for UnvalidatedChar { + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: serde::Deserializer<'de>, + { + if deserializer.is_human_readable() { + let c = <char>::deserialize(deserializer)?; + Ok(UnvalidatedChar::from_char(c)) + } else { + let bytes = <[u8; 3]>::deserialize(deserializer)?; + Ok(UnvalidatedChar(bytes)) + } + } +} + +#[cfg(feature = "databake")] +impl databake::Bake for UnvalidatedChar { + fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream { + match self.try_to_char() { + Ok(ch) => { + env.insert("zerovec"); + let ch = ch.bake(env); + databake::quote! { + zerovec::ule::UnvalidatedChar::from_char(#ch) + } + } + Err(_) => { + env.insert("zerovec"); + let u24 = u32::from_le_bytes([self.0[0], self.0[1], self.0[2], 0]); + databake::quote! { + zerovec::ule::UnvalidatedChar::from_u24(#u24) + } + } + } + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::ZeroVec; + + #[test] + fn test_serde_fail() { + let uc = UnvalidatedChar([0xFF, 0xFF, 0xFF]); + serde_json::to_string(&uc).expect_err("serialize invalid char bytes"); + bincode::serialize(&uc).expect_err("serialize invalid char bytes"); + } + + #[test] + fn test_serde_json() { + let c = '๐'; + let uc = UnvalidatedChar::from_char(c); + let json_ser = serde_json::to_string(&uc).unwrap(); + + assert_eq!(json_ser, r#""๐""#); + + let json_de: UnvalidatedChar = serde_json::from_str(&json_ser).unwrap(); + + assert_eq!(uc, json_de); + } + + #[test] + fn test_serde_bincode() { + let c = '๐'; + let uc = UnvalidatedChar::from_char(c); + let bytes_ser = bincode::serialize(&uc).unwrap(); + + assert_eq!(bytes_ser, [0x43, 0xF6, 0x01]); + + let bytes_de: UnvalidatedChar = bincode::deserialize(&bytes_ser).unwrap(); + + assert_eq!(uc, bytes_de); + } + + #[test] + fn test_representation() { + let chars = ['w', 'ฯ', 'ๆ', '๐', '๐']; + + // backed by [UnvalidatedChar] + let uvchars: Vec<_> = chars + .iter() + .copied() + .map(UnvalidatedChar::from_char) + .collect(); + // backed by [RawBytesULE<3>] + let zvec: ZeroVec<_> = uvchars.clone().into_iter().collect(); + + let ule_bytes = zvec.as_bytes(); + let uvbytes; + unsafe { + let ptr = &uvchars[..] as *const _ as *const u8; + uvbytes = core::slice::from_raw_parts(ptr, ule_bytes.len()); + } + + // UnvalidatedChar is defined as little-endian, so this must be true on all platforms + // also asserts that to_unaligned/from_unaligned are no-ops + assert_eq!(uvbytes, ule_bytes); + + assert_eq!( + &[119, 0, 0, 201, 3, 0, 135, 101, 0, 3, 17, 1, 67, 246, 1], + ule_bytes + ); + } + + #[test] + fn test_char_bake() { + databake::test_bake!(UnvalidatedChar, const: crate::ule::UnvalidatedChar::from_char('b'), zerovec); + // surrogate code point + databake::test_bake!(UnvalidatedChar, const: crate::ule::UnvalidatedChar::from_u24(55296u32), zerovec); + } +} |