diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-06-07 05:48:48 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-06-07 05:48:48 +0000 |
commit | ef24de24a82fe681581cc130f342363c47c0969a (patch) | |
tree | 0d494f7e1a38b95c92426f58fe6eaa877303a86c /vendor/zerovec/src/ule | |
parent | Releasing progress-linux version 1.74.1+dfsg1-1~progress7.99u1. (diff) | |
download | rustc-ef24de24a82fe681581cc130f342363c47c0969a.tar.xz rustc-ef24de24a82fe681581cc130f342363c47c0969a.zip |
Merging upstream version 1.75.0+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/zerovec/src/ule')
-rw-r--r-- | vendor/zerovec/src/ule/chars.rs | 45 | ||||
-rw-r--r-- | vendor/zerovec/src/ule/custom.rs | 4 | ||||
-rw-r--r-- | vendor/zerovec/src/ule/encode.rs | 8 | ||||
-rw-r--r-- | vendor/zerovec/src/ule/macros.rs | 29 | ||||
-rw-r--r-- | vendor/zerovec/src/ule/mod.rs | 10 | ||||
-rw-r--r-- | vendor/zerovec/src/ule/multi.rs | 8 | ||||
-rw-r--r-- | vendor/zerovec/src/ule/option.rs | 3 | ||||
-rw-r--r-- | vendor/zerovec/src/ule/plain.rs | 145 | ||||
-rw-r--r-- | vendor/zerovec/src/ule/tuple.rs | 11 | ||||
-rw-r--r-- | vendor/zerovec/src/ule/unvalidated.rs | 318 |
10 files changed, 487 insertions, 94 deletions
diff --git a/vendor/zerovec/src/ule/chars.rs b/vendor/zerovec/src/ule/chars.rs index 7a4a97a4a..e0ec25240 100644 --- a/vendor/zerovec/src/ule/chars.rs +++ b/vendor/zerovec/src/ule/chars.rs @@ -6,10 +6,11 @@ //! ULE implementation for the `char` type. use super::*; +use crate::impl_ule_from_array; use core::cmp::Ordering; use core::convert::TryFrom; -/// A u8 array of little-endian data corresponding to a Unicode code point. +/// A u8 array of little-endian data corresponding to a Unicode scalar value. /// /// The bytes of a `CharULE` are guaranteed to represent a little-endian-encoded u32 that is a /// valid `char` and can be converted without validation. @@ -40,6 +41,20 @@ use core::convert::TryFrom; #[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)] pub struct CharULE([u8; 3]); +impl CharULE { + /// Converts a [`char`] to a [`CharULE`]. This is equivalent to calling + /// [`AsULE::to_unaligned()`] + /// + /// See the type-level documentation for [`CharULE`] for more information. + #[inline] + pub const fn from_aligned(c: char) -> Self { + let [u0, u1, u2, _u3] = (c as u32).to_le_bytes(); + Self([u0, u1, u2]) + } + + impl_ule_from_array!(char, CharULE, Self([0; 3])); +} + // Safety (based on the safety checklist on the ULE trait): // 1. CharULE does not include any uninitialized or padding bytes. // (achieved by `#[repr(transparent)]` on a type that satisfies this invariant) @@ -72,13 +87,12 @@ impl AsULE for char { #[inline] fn to_unaligned(self) -> Self::ULE { - let [u0, u1, u2, _u3] = u32::from(self).to_le_bytes(); - CharULE([u0, u1, u2]) + CharULE::from_aligned(self) } #[inline] fn from_unaligned(unaligned: Self::ULE) -> Self { - // Safe because the bytes of CharULE are defined to represent a valid Unicode code point. + // Safe because the bytes of CharULE are defined to represent a valid Unicode scalar value. unsafe { Self::from_u32_unchecked(u32::from_le_bytes([ unaligned.0[0], @@ -107,6 +121,25 @@ mod test { use super::*; #[test] + fn test_from_array() { + const CHARS: [char; 2] = ['a', '🙃']; + const CHARS_ULE: [CharULE; 2] = CharULE::from_array(CHARS); + assert_eq!( + CharULE::as_byte_slice(&CHARS_ULE), + &[0x61, 0x00, 0x00, 0x43, 0xF6, 0x01] + ); + } + + #[test] + fn test_from_array_zst() { + const CHARS: [char; 0] = []; + const CHARS_ULE: [CharULE; 0] = CharULE::from_array(CHARS); + let bytes = CharULE::as_byte_slice(&CHARS_ULE); + let empty: &[u8] = &[]; + assert_eq!(bytes, empty); + } + + #[test] fn test_parse() { // 1-byte, 2-byte, 3-byte, and two 4-byte character in UTF-8 (not as relevant in UTF-32) let chars = ['w', 'ω', '文', '𑄃', '🙃']; @@ -141,7 +174,7 @@ mod test { .collect(); let u32_bytes: &[u8] = RawBytesULE::<4>::as_byte_slice(&u32_ules); let parsed_ules_result = CharULE::parse_byte_slice(u32_bytes); - assert!(matches!(parsed_ules_result, Err(_))); + assert!(parsed_ules_result.is_err()); // 0x20FFFF is out of range for a char let u32s = [0x20FFFF]; @@ -152,6 +185,6 @@ mod test { .collect(); let u32_bytes: &[u8] = RawBytesULE::<4>::as_byte_slice(&u32_ules); let parsed_ules_result = CharULE::parse_byte_slice(u32_bytes); - assert!(matches!(parsed_ules_result, Err(_))); + assert!(parsed_ules_result.is_err()); } } diff --git a/vendor/zerovec/src/ule/custom.rs b/vendor/zerovec/src/ule/custom.rs index b2e4cb0e5..8cc6e9de4 100644 --- a/vendor/zerovec/src/ule/custom.rs +++ b/vendor/zerovec/src/ule/custom.rs @@ -129,8 +129,8 @@ //! } //! //! fn main() { -//! let mut foos = vec![Foo {field1: 'u', field2: 983, field3: ZeroVec::alloc_from_slice(&[1212,2309,500,7000])}, -//! Foo {field1: 'l', field2: 1010, field3: ZeroVec::alloc_from_slice(&[1932, 0, 8888, 91237])}]; +//! let mut foos = [Foo {field1: 'u', field2: 983, field3: ZeroVec::alloc_from_slice(&[1212,2309,500,7000])}, +//! Foo {field1: 'l', field2: 1010, field3: ZeroVec::alloc_from_slice(&[1932, 0, 8888, 91237])}]; //! //! let vzv = VarZeroVec::<_>::from(&foos); //! diff --git a/vendor/zerovec/src/ule/encode.rs b/vendor/zerovec/src/ule/encode.rs index 2091cf06b..adea123aa 100644 --- a/vendor/zerovec/src/ule/encode.rs +++ b/vendor/zerovec/src/ule/encode.rs @@ -8,7 +8,7 @@ use crate::{VarZeroSlice, VarZeroVec, ZeroSlice, ZeroVec}; use alloc::borrow::{Cow, ToOwned}; use alloc::boxed::Box; use alloc::string::String; -use alloc::vec::Vec; +use alloc::{vec, vec::Vec}; use core::mem; /// Allows types to be encoded as VarULEs. This is highly useful for implementing VarULE on @@ -82,16 +82,14 @@ pub unsafe trait EncodeAsVarULE<T: VarULE + ?Sized> { /// /// This is primarily useful for generating `Deserialize` impls for VarULE types pub fn encode_varule_to_box<S: EncodeAsVarULE<T>, T: VarULE + ?Sized>(x: &S) -> Box<T> { - let mut vec: Vec<u8> = Vec::new(); // zero-fill the vector to avoid uninitialized data UB - vec.resize(x.encode_var_ule_len(), 0); + let mut vec: Vec<u8> = vec![0; x.encode_var_ule_len()]; x.encode_var_ule_write(&mut vec); - let boxed = vec.into_boxed_slice(); + let boxed = mem::ManuallyDrop::new(vec.into_boxed_slice()); unsafe { // Safety: `ptr` is a box, and `T` is a VarULE which guarantees it has the same memory layout as `[u8]` // and can be recouped via from_byte_slice_unchecked() let ptr: *mut T = T::from_byte_slice_unchecked(&boxed) as *const T as *mut T; - mem::forget(boxed); // Safety: we can construct an owned version since we have mem::forgotten the older owner Box::from_raw(ptr) diff --git a/vendor/zerovec/src/ule/macros.rs b/vendor/zerovec/src/ule/macros.rs new file mode 100644 index 000000000..955b1eb2e --- /dev/null +++ b/vendor/zerovec/src/ule/macros.rs @@ -0,0 +1,29 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +/// Given `Self` (`$aligned`), `Self::ULE` (`$unaligned`), and a conversion function (`$single` or +/// `Self::from_aligned`), implement `from_array` for arrays of `$aligned` to `$unaligned`. +/// +/// The `$default` argument is due to current compiler limitations. +/// Pass any (cheap to construct) value. +#[macro_export] +macro_rules! impl_ule_from_array { + ($aligned:ty, $unaligned:ty, $default:expr, $single:path) => { + #[doc = concat!("Convert an array of `", stringify!($aligned), "` to an array of `", stringify!($unaligned), "`.")] + pub const fn from_array<const N: usize>(arr: [$aligned; N]) -> [Self; N] { + let mut result = [$default; N]; + let mut i = 0; + // Won't panic because i < N and arr has length N + #[allow(clippy::indexing_slicing)] + while i < N { + result[i] = $single(arr[i]); + i += 1; + } + result + } + }; + ($aligned:ty, $unaligned:ty, $default:expr) => { + impl_ule_from_array!($aligned, $unaligned, $default, Self::from_aligned); + }; +} diff --git a/vendor/zerovec/src/ule/mod.rs b/vendor/zerovec/src/ule/mod.rs index e8ecd26e5..5a6d9cd47 100644 --- a/vendor/zerovec/src/ule/mod.rs +++ b/vendor/zerovec/src/ule/mod.rs @@ -14,6 +14,7 @@ mod chars; #[cfg(doc)] pub mod custom; mod encode; +mod macros; mod multi; mod niche; mod option; @@ -29,7 +30,7 @@ pub use multi::MultiFieldsULE; pub use niche::{NicheBytes, NichedOption, NichedOptionULE}; pub use option::{OptionULE, OptionVarULE}; pub use plain::RawBytesULE; -pub use unvalidated::UnvalidatedStr; +pub use unvalidated::{UnvalidatedChar, UnvalidatedStr}; use alloc::alloc::Layout; use alloc::borrow::ToOwned; @@ -156,7 +157,7 @@ where /// A trait for any type that has a 1:1 mapping with an unaligned little-endian (ULE) type. /// -/// If you need to implement this trait, consider using [`#[make_varule]`](crate::make_ule) instead. +/// If you need to implement this trait, consider using [`#[make_ule]`](crate::make_ule) instead. pub trait AsULE: Copy { /// The ULE type corresponding to `Self`. /// @@ -356,13 +357,12 @@ pub unsafe trait VarULE: 'static { #[inline] fn to_boxed(&self) -> Box<Self> { let bytesvec = self.as_byte_slice().to_owned().into_boxed_slice(); + let bytesvec = mem::ManuallyDrop::new(bytesvec); unsafe { // Get the pointer representation let ptr: *mut Self = Self::from_byte_slice_unchecked(&bytesvec) as *const Self as *mut Self; - assert_eq!(Layout::for_value(&*ptr), Layout::for_value(&*bytesvec)); - // Forget the allocation - mem::forget(bytesvec); + assert_eq!(Layout::for_value(&*ptr), Layout::for_value(&**bytesvec)); // Transmute the pointer to an owned pointer Box::from_raw(ptr) } diff --git a/vendor/zerovec/src/ule/multi.rs b/vendor/zerovec/src/ule/multi.rs index 0ba0aea89..3281b2088 100644 --- a/vendor/zerovec/src/ule/multi.rs +++ b/vendor/zerovec/src/ule/multi.rs @@ -44,7 +44,7 @@ impl MultiFieldsULE { lengths, output, ); debug_assert!( - <VarZeroSlice<[u8]>>::validate_byte_slice(output).is_ok(), + <VarZeroSlice<[u8], Index32>>::validate_byte_slice(output).is_ok(), "Encoded slice must be valid VarZeroSlice" ); // Safe since write_serializable_bytes produces a valid VarZeroSlice buffer @@ -141,12 +141,14 @@ unsafe impl VarULE for MultiFieldsULE { /// This impl exists so that EncodeAsVarULE can work. #[inline] fn validate_byte_slice(slice: &[u8]) -> Result<(), ZeroVecError> { - <VarZeroSlice<[u8]>>::validate_byte_slice(slice) + <VarZeroSlice<[u8], Index32>>::validate_byte_slice(slice) } #[inline] unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self { // &Self is transparent over &VZS<..> - mem::transmute(<VarZeroSlice<[u8]>>::from_byte_slice_unchecked(bytes)) + mem::transmute(<VarZeroSlice<[u8], Index32>>::from_byte_slice_unchecked( + bytes, + )) } } diff --git a/vendor/zerovec/src/ule/option.rs b/vendor/zerovec/src/ule/option.rs index 50b193aac..9b0dc5b28 100644 --- a/vendor/zerovec/src/ule/option.rs +++ b/vendor/zerovec/src/ule/option.rs @@ -197,9 +197,8 @@ unsafe impl<U: VarULE + ?Sized> VarULE for OptionVarULE<U> { #[inline] unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self { - let metadata = bytes.len() - 1; let entire_struct_as_slice: *const [u8] = - ::core::slice::from_raw_parts(bytes.as_ptr(), metadata); + ::core::ptr::slice_from_raw_parts(bytes.as_ptr(), bytes.len() - 1); &*(entire_struct_as_slice as *const Self) } } diff --git a/vendor/zerovec/src/ule/plain.rs b/vendor/zerovec/src/ule/plain.rs index 49455d45f..f244f6b68 100644 --- a/vendor/zerovec/src/ule/plain.rs +++ b/vendor/zerovec/src/ule/plain.rs @@ -6,6 +6,7 @@ //! ULE implementation for Plain Old Data types, including all sized integers. use super::*; +use crate::impl_ule_from_array; use crate::ZeroSlice; use core::num::{NonZeroI8, NonZeroU8}; @@ -15,69 +16,69 @@ use core::num::{NonZeroI8, NonZeroU8}; #[allow(clippy::exhaustive_structs)] // newtype pub struct RawBytesULE<const N: usize>(pub [u8; N]); -macro_rules! impl_byte_slice_size { - ($unsigned:ty, $size:literal) => { - impl From<[u8; $size]> for RawBytesULE<$size> { - #[inline] - fn from(le_bytes: [u8; $size]) -> Self { - Self(le_bytes) - } - } - impl RawBytesULE<$size> { - #[inline] - pub fn as_bytes(&self) -> &[u8] { - &self.0 - } - } - // Safety (based on the safety checklist on the ULE trait): - // 1. RawBytesULE does not include any uninitialized or padding bytes. - // (achieved by `#[repr(transparent)]` on a type that satisfies this invariant) - // 2. RawBytesULE is aligned to 1 byte. - // (achieved by `#[repr(transparent)]` on a type that satisfies this invariant) - // 3. The impl of validate_byte_slice() returns an error if any byte is not valid (never). - // 4. The impl of validate_byte_slice() returns an error if there are leftover bytes. - // 5. The other ULE methods use the default impl. - // 6. RawBytesULE byte equality is semantic equality - unsafe impl ULE for RawBytesULE<$size> { - #[inline] - fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> { - if bytes.len() % $size == 0 { - // Safe because Self is transparent over [u8; $size] - Ok(()) - } else { - Err(ZeroVecError::length::<Self>(bytes.len())) - } - } +impl<const N: usize> RawBytesULE<N> { + #[inline] + pub fn as_bytes(&self) -> &[u8] { + &self.0 + } + + #[inline] + pub fn from_byte_slice_unchecked_mut(bytes: &mut [u8]) -> &mut [Self] { + let data = bytes.as_mut_ptr(); + let len = bytes.len() / N; + // Safe because Self is transparent over [u8; N] + unsafe { core::slice::from_raw_parts_mut(data as *mut Self, len) } + } +} + +// Safety (based on the safety checklist on the ULE trait): +// 1. RawBytesULE does not include any uninitialized or padding bytes. +// (achieved by `#[repr(transparent)]` on a type that satisfies this invariant) +// 2. RawBytesULE is aligned to 1 byte. +// (achieved by `#[repr(transparent)]` on a type that satisfies this invariant) +// 3. The impl of validate_byte_slice() returns an error if any byte is not valid (never). +// 4. The impl of validate_byte_slice() returns an error if there are leftover bytes. +// 5. The other ULE methods use the default impl. +// 6. RawBytesULE byte equality is semantic equality +unsafe impl<const N: usize> ULE for RawBytesULE<N> { + #[inline] + fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> { + if bytes.len() % N == 0 { + // Safe because Self is transparent over [u8; N] + Ok(()) + } else { + Err(ZeroVecError::length::<Self>(bytes.len())) } + } +} - impl RawBytesULE<$size> { - #[inline] - pub fn from_byte_slice_unchecked_mut(bytes: &mut [u8]) -> &mut [Self] { - let data = bytes.as_mut_ptr(); - let len = bytes.len() / $size; - // Safe because Self is transparent over [u8; $size] - unsafe { core::slice::from_raw_parts_mut(data as *mut Self, len) } - } +impl<const N: usize> From<[u8; N]> for RawBytesULE<N> { + #[inline] + fn from(le_bytes: [u8; N]) -> Self { + Self(le_bytes) + } +} - /// Gets this RawBytesULE as an unsigned int. This is equivalent to calling - /// [AsULE::from_unaligned()] on the appropriately sized type. +macro_rules! impl_byte_slice_size { + ($unsigned:ty, $size:literal) => { + impl RawBytesULE<$size> { + #[doc = concat!("Gets this `RawBytesULE` as a `", stringify!($unsigned), "`. This is equivalent to calling [`AsULE::from_unaligned()`] on the appropriately sized type.")] #[inline] pub fn as_unsigned_int(&self) -> $unsigned { <$unsigned as $crate::ule::AsULE>::from_unaligned(*self) } - /// Convert an array of native-endian aligned integers to an array of RawBytesULE. - pub const fn from_array<const N: usize>(arr: [$unsigned; N]) -> [Self; N] { - let mut result = [RawBytesULE([0; $size]); N]; - let mut i = 0; - // Won't panic because i < N and arr has length N - #[allow(clippy::indexing_slicing)] - while i < N { - result[i].0 = arr[i].to_le_bytes(); - i += 1; - } - result + #[doc = concat!("Converts a `", stringify!($unsigned), "` to a `RawBytesULE`. This is equivalent to calling [`AsULE::to_unaligned()`] on the appropriately sized type.")] + #[inline] + pub const fn from_aligned(value: $unsigned) -> Self { + Self(value.to_le_bytes()) } + + impl_ule_from_array!( + $unsigned, + RawBytesULE<$size>, + RawBytesULE([0; $size]) + ); } }; } @@ -110,7 +111,7 @@ macro_rules! impl_const_constructors { } macro_rules! impl_byte_slice_type { - ($type:ty, $size:literal) => { + ($single_fn:ident, $type:ty, $size:literal) => { impl From<$type> for RawBytesULE<$size> { #[inline] fn from(value: $type) -> Self { @@ -131,6 +132,24 @@ macro_rules! impl_byte_slice_type { // EqULE is true because $type and RawBytesULE<$size> // have the same byte sequence on little-endian unsafe impl EqULE for $type {} + + impl RawBytesULE<$size> { + pub const fn $single_fn(v: $type) -> Self { + RawBytesULE(v.to_le_bytes()) + } + } + }; +} + +macro_rules! impl_byte_slice_unsigned_type { + ($type:ty, $size:literal) => { + impl_byte_slice_type!(from_unsigned, $type, $size); + }; +} + +macro_rules! impl_byte_slice_signed_type { + ($type:ty, $size:literal) => { + impl_byte_slice_type!(from_signed, $type, $size); }; } @@ -139,15 +158,15 @@ impl_byte_slice_size!(u32, 4); impl_byte_slice_size!(u64, 8); impl_byte_slice_size!(u128, 16); -impl_byte_slice_type!(u16, 2); -impl_byte_slice_type!(u32, 4); -impl_byte_slice_type!(u64, 8); -impl_byte_slice_type!(u128, 16); +impl_byte_slice_unsigned_type!(u16, 2); +impl_byte_slice_unsigned_type!(u32, 4); +impl_byte_slice_unsigned_type!(u64, 8); +impl_byte_slice_unsigned_type!(u128, 16); -impl_byte_slice_type!(i16, 2); -impl_byte_slice_type!(i32, 4); -impl_byte_slice_type!(i64, 8); -impl_byte_slice_type!(i128, 16); +impl_byte_slice_signed_type!(i16, 2); +impl_byte_slice_signed_type!(i32, 4); +impl_byte_slice_signed_type!(i64, 8); +impl_byte_slice_signed_type!(i128, 16); impl_const_constructors!(u8, 1); impl_const_constructors!(u16, 2); diff --git a/vendor/zerovec/src/ule/tuple.rs b/vendor/zerovec/src/ule/tuple.rs index c26567e98..3e0f291b3 100644 --- a/vendor/zerovec/src/ule/tuple.rs +++ b/vendor/zerovec/src/ule/tuple.rs @@ -111,10 +111,7 @@ macro_rules! tuple_ule { impl<$($t: ULE),+> Clone for $name<$($t),+> { fn clone(&self) -> Self { - // copy to the stack to avoid hitting a future incompat error - // https://github.com/rust-lang/rust/issues/82523#issuecomment-947900712 - let stack = ($(self.$i),+); - $name($(stack.$i),+) + *self } } @@ -147,7 +144,7 @@ fn test_pairule_validate() { // Test failed validation with a correctly sized but differently constrained tuple // Note: 1234901 is not a valid char let zerovec3 = ZeroVec::<(char, u32)>::parse_byte_slice(bytes); - assert!(matches!(zerovec3, Err(_))); + assert!(zerovec3.is_err()); } #[test] @@ -162,7 +159,7 @@ fn test_tripleule_validate() { // Test failed validation with a correctly sized but differently constrained tuple // Note: 1234901 is not a valid char let zerovec3 = ZeroVec::<(char, i8, u32)>::parse_byte_slice(bytes); - assert!(matches!(zerovec3, Err(_))); + assert!(zerovec3.is_err()); } #[test] @@ -178,5 +175,5 @@ fn test_quadule_validate() { // Test failed validation with a correctly sized but differently constrained tuple // Note: 1234901 is not a valid char let zerovec3 = ZeroVec::<(char, i8, u16, u32)>::parse_byte_slice(bytes); - assert!(matches!(zerovec3, Err(_))); + assert!(zerovec3.is_err()); } diff --git a/vendor/zerovec/src/ule/unvalidated.rs b/vendor/zerovec/src/ule/unvalidated.rs index 4564c8673..21cfb0c0d 100644 --- a/vendor/zerovec/src/ule/unvalidated.rs +++ b/vendor/zerovec/src/ule/unvalidated.rs @@ -2,9 +2,11 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). -use super::VarULE; +use super::{AsULE, RawBytesULE, VarULE}; +use crate::ule::EqULE; use crate::{map::ZeroMapKV, VarZeroSlice, VarZeroVec, ZeroVecError}; use alloc::boxed::Box; +use core::cmp::Ordering; use core::fmt; use core::ops::Deref; @@ -209,3 +211,317 @@ where } } } + +/// A u8 array of little-endian data that is expected to be a Unicode scalar value, but is not +/// validated as such. +/// +/// Use this type instead of `char` when you want to deal with data that is expected to be valid +/// Unicode scalar values, but you want control over when or if you validate that assumption. +/// +/// # Examples +/// +/// ``` +/// use zerovec::ule::{RawBytesULE, UnvalidatedChar, ULE}; +/// use zerovec::{ZeroSlice, ZeroVec}; +/// +/// // data known to be little-endian three-byte chunks of valid Unicode scalar values +/// let data = [0x68, 0x00, 0x00, 0x69, 0x00, 0x00, 0x4B, 0xF4, 0x01]; +/// // ground truth expectation +/// let real = ['h', 'i', '👋']; +/// +/// let chars: &ZeroSlice<UnvalidatedChar> = ZeroSlice::parse_byte_slice(&data).expect("invalid data length"); +/// let parsed: Vec<_> = chars.iter().map(|c| unsafe { c.to_char_unchecked() }).collect(); +/// assert_eq!(&parsed, &real); +/// +/// let real_chars: ZeroVec<_> = real.iter().copied().map(UnvalidatedChar::from_char).collect(); +/// let serialized_data = chars.as_bytes(); +/// assert_eq!(serialized_data, &data); +/// ``` +#[repr(transparent)] +#[derive(PartialEq, Eq, Clone, Copy, Hash)] +pub struct UnvalidatedChar([u8; 3]); + +impl UnvalidatedChar { + /// Create a [`UnvalidatedChar`] from a `char`. + /// + /// # Examples + /// + /// ``` + /// use zerovec::ule::UnvalidatedChar; + /// + /// let a = UnvalidatedChar::from_char('a'); + /// assert_eq!(a.try_to_char().unwrap(), 'a'); + /// ``` + #[inline] + pub const fn from_char(c: char) -> Self { + let [u0, u1, u2, _u3] = (c as u32).to_le_bytes(); + Self([u0, u1, u2]) + } + + #[inline] + #[doc(hidden)] + pub const fn from_u24(c: u32) -> Self { + let [u0, u1, u2, _u3] = c.to_le_bytes(); + Self([u0, u1, u2]) + } + + /// Attempt to convert a [`UnvalidatedChar`] to a `char`. + /// + /// # Examples + /// + /// ``` + /// use zerovec::ule::{AsULE, UnvalidatedChar}; + /// + /// let a = UnvalidatedChar::from_char('a'); + /// assert_eq!(a.try_to_char(), Ok('a')); + /// + /// let b = UnvalidatedChar::from_unaligned([0xFF, 0xFF, 0xFF].into()); + /// assert!(matches!(b.try_to_char(), Err(_))); + /// ``` + #[inline] + pub fn try_to_char(self) -> Result<char, core::char::CharTryFromError> { + let [u0, u1, u2] = self.0; + char::try_from(u32::from_le_bytes([u0, u1, u2, 0])) + } + + /// Convert a [`UnvalidatedChar`] to a `char', returning [`char::REPLACEMENT_CHARACTER`] + /// if the `UnvalidatedChar` does not represent a valid Unicode scalar value. + /// + /// # Examples + /// + /// ``` + /// use zerovec::ule::{AsULE, UnvalidatedChar}; + /// + /// let a = UnvalidatedChar::from_unaligned([0xFF, 0xFF, 0xFF].into()); + /// assert_eq!(a.to_char_lossy(), char::REPLACEMENT_CHARACTER); + /// ``` + #[inline] + pub fn to_char_lossy(self) -> char { + self.try_to_char().unwrap_or(char::REPLACEMENT_CHARACTER) + } + + /// Convert a [`UnvalidatedChar`] to a `char` without checking that it is + /// a valid Unicode scalar value. + /// + /// # Safety + /// + /// The `UnvalidatedChar` must be a valid Unicode scalar value in little-endian order. + /// + /// # Examples + /// + /// ``` + /// use zerovec::ule::UnvalidatedChar; + /// + /// let a = UnvalidatedChar::from_char('a'); + /// assert_eq!(unsafe { a.to_char_unchecked() }, 'a'); + /// ``` + #[inline] + pub unsafe fn to_char_unchecked(self) -> char { + let [u0, u1, u2] = self.0; + char::from_u32_unchecked(u32::from_le_bytes([u0, u1, u2, 0])) + } +} + +impl RawBytesULE<3> { + /// Converts a [`UnvalidatedChar`] to its ULE type. This is equivalent to calling + /// [`AsULE::to_unaligned`]. + #[inline] + pub const fn from_unvalidated_char(uc: UnvalidatedChar) -> Self { + RawBytesULE(uc.0) + } +} + +impl AsULE for UnvalidatedChar { + type ULE = RawBytesULE<3>; + + #[inline] + fn to_unaligned(self) -> Self::ULE { + RawBytesULE(self.0) + } + + #[inline] + fn from_unaligned(unaligned: Self::ULE) -> Self { + Self(unaligned.0) + } +} + +// Safety: UnvalidatedChar is always the little-endian representation of a char, +// which corresponds to its AsULE::ULE type +unsafe impl EqULE for UnvalidatedChar {} + +impl fmt::Debug for UnvalidatedChar { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // Debug as a char if possible + match self.try_to_char() { + Ok(c) => fmt::Debug::fmt(&c, f), + Err(_) => fmt::Debug::fmt(&self.0, f), + } + } +} + +impl PartialOrd for UnvalidatedChar { + fn partial_cmp(&self, other: &Self) -> Option<Ordering> { + Some(self.cmp(other)) + } +} + +impl Ord for UnvalidatedChar { + // custom implementation, as derived Ord would compare lexicographically + fn cmp(&self, other: &Self) -> Ordering { + let [a0, a1, a2] = self.0; + let a = u32::from_le_bytes([a0, a1, a2, 0]); + let [b0, b1, b2] = other.0; + let b = u32::from_le_bytes([b0, b1, b2, 0]); + a.cmp(&b) + } +} + +impl From<char> for UnvalidatedChar { + #[inline] + fn from(value: char) -> Self { + Self::from_char(value) + } +} + +impl TryFrom<UnvalidatedChar> for char { + type Error = core::char::CharTryFromError; + + #[inline] + fn try_from(value: UnvalidatedChar) -> Result<char, Self::Error> { + value.try_to_char() + } +} + +/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate +#[cfg(feature = "serde")] +impl serde::Serialize for UnvalidatedChar { + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: serde::Serializer, + { + use serde::ser::Error; + let c = self + .try_to_char() + .map_err(|_| S::Error::custom("invalid Unicode scalar value in UnvalidatedChar"))?; + if serializer.is_human_readable() { + serializer.serialize_char(c) + } else { + self.0.serialize(serializer) + } + } +} + +/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate +#[cfg(feature = "serde")] +impl<'de> serde::Deserialize<'de> for UnvalidatedChar { + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: serde::Deserializer<'de>, + { + if deserializer.is_human_readable() { + let c = <char>::deserialize(deserializer)?; + Ok(UnvalidatedChar::from_char(c)) + } else { + let bytes = <[u8; 3]>::deserialize(deserializer)?; + Ok(UnvalidatedChar(bytes)) + } + } +} + +#[cfg(feature = "databake")] +impl databake::Bake for UnvalidatedChar { + fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream { + match self.try_to_char() { + Ok(ch) => { + env.insert("zerovec"); + let ch = ch.bake(env); + databake::quote! { + zerovec::ule::UnvalidatedChar::from_char(#ch) + } + } + Err(_) => { + env.insert("zerovec"); + let u24 = u32::from_le_bytes([self.0[0], self.0[1], self.0[2], 0]); + databake::quote! { + zerovec::ule::UnvalidatedChar::from_u24(#u24) + } + } + } + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::ZeroVec; + + #[test] + fn test_serde_fail() { + let uc = UnvalidatedChar([0xFF, 0xFF, 0xFF]); + serde_json::to_string(&uc).expect_err("serialize invalid char bytes"); + bincode::serialize(&uc).expect_err("serialize invalid char bytes"); + } + + #[test] + fn test_serde_json() { + let c = '🙃'; + let uc = UnvalidatedChar::from_char(c); + let json_ser = serde_json::to_string(&uc).unwrap(); + + assert_eq!(json_ser, r#""🙃""#); + + let json_de: UnvalidatedChar = serde_json::from_str(&json_ser).unwrap(); + + assert_eq!(uc, json_de); + } + + #[test] + fn test_serde_bincode() { + let c = '🙃'; + let uc = UnvalidatedChar::from_char(c); + let bytes_ser = bincode::serialize(&uc).unwrap(); + + assert_eq!(bytes_ser, [0x43, 0xF6, 0x01]); + + let bytes_de: UnvalidatedChar = bincode::deserialize(&bytes_ser).unwrap(); + + assert_eq!(uc, bytes_de); + } + + #[test] + fn test_representation() { + let chars = ['w', 'ω', '文', '𑄃', '🙃']; + + // backed by [UnvalidatedChar] + let uvchars: Vec<_> = chars + .iter() + .copied() + .map(UnvalidatedChar::from_char) + .collect(); + // backed by [RawBytesULE<3>] + let zvec: ZeroVec<_> = uvchars.clone().into_iter().collect(); + + let ule_bytes = zvec.as_bytes(); + let uvbytes; + unsafe { + let ptr = &uvchars[..] as *const _ as *const u8; + uvbytes = core::slice::from_raw_parts(ptr, ule_bytes.len()); + } + + // UnvalidatedChar is defined as little-endian, so this must be true on all platforms + // also asserts that to_unaligned/from_unaligned are no-ops + assert_eq!(uvbytes, ule_bytes); + + assert_eq!( + &[119, 0, 0, 201, 3, 0, 135, 101, 0, 3, 17, 1, 67, 246, 1], + ule_bytes + ); + } + + #[test] + fn test_char_bake() { + databake::test_bake!(UnvalidatedChar, const: crate::ule::UnvalidatedChar::from_char('b'), zerovec); + // surrogate code point + databake::test_bake!(UnvalidatedChar, const: crate::ule::UnvalidatedChar::from_u24(55296u32), zerovec); + } +} |