summaryrefslogtreecommitdiffstats
path: root/vendor/zerovec/src/ule
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-06-07 05:48:48 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-06-07 05:48:48 +0000
commitef24de24a82fe681581cc130f342363c47c0969a (patch)
tree0d494f7e1a38b95c92426f58fe6eaa877303a86c /vendor/zerovec/src/ule
parentReleasing progress-linux version 1.74.1+dfsg1-1~progress7.99u1. (diff)
downloadrustc-ef24de24a82fe681581cc130f342363c47c0969a.tar.xz
rustc-ef24de24a82fe681581cc130f342363c47c0969a.zip
Merging upstream version 1.75.0+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/zerovec/src/ule')
-rw-r--r--vendor/zerovec/src/ule/chars.rs45
-rw-r--r--vendor/zerovec/src/ule/custom.rs4
-rw-r--r--vendor/zerovec/src/ule/encode.rs8
-rw-r--r--vendor/zerovec/src/ule/macros.rs29
-rw-r--r--vendor/zerovec/src/ule/mod.rs10
-rw-r--r--vendor/zerovec/src/ule/multi.rs8
-rw-r--r--vendor/zerovec/src/ule/option.rs3
-rw-r--r--vendor/zerovec/src/ule/plain.rs145
-rw-r--r--vendor/zerovec/src/ule/tuple.rs11
-rw-r--r--vendor/zerovec/src/ule/unvalidated.rs318
10 files changed, 487 insertions, 94 deletions
diff --git a/vendor/zerovec/src/ule/chars.rs b/vendor/zerovec/src/ule/chars.rs
index 7a4a97a4a..e0ec25240 100644
--- a/vendor/zerovec/src/ule/chars.rs
+++ b/vendor/zerovec/src/ule/chars.rs
@@ -6,10 +6,11 @@
//! ULE implementation for the `char` type.
use super::*;
+use crate::impl_ule_from_array;
use core::cmp::Ordering;
use core::convert::TryFrom;
-/// A u8 array of little-endian data corresponding to a Unicode code point.
+/// A u8 array of little-endian data corresponding to a Unicode scalar value.
///
/// The bytes of a `CharULE` are guaranteed to represent a little-endian-encoded u32 that is a
/// valid `char` and can be converted without validation.
@@ -40,6 +41,20 @@ use core::convert::TryFrom;
#[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)]
pub struct CharULE([u8; 3]);
+impl CharULE {
+ /// Converts a [`char`] to a [`CharULE`]. This is equivalent to calling
+ /// [`AsULE::to_unaligned()`]
+ ///
+ /// See the type-level documentation for [`CharULE`] for more information.
+ #[inline]
+ pub const fn from_aligned(c: char) -> Self {
+ let [u0, u1, u2, _u3] = (c as u32).to_le_bytes();
+ Self([u0, u1, u2])
+ }
+
+ impl_ule_from_array!(char, CharULE, Self([0; 3]));
+}
+
// Safety (based on the safety checklist on the ULE trait):
// 1. CharULE does not include any uninitialized or padding bytes.
// (achieved by `#[repr(transparent)]` on a type that satisfies this invariant)
@@ -72,13 +87,12 @@ impl AsULE for char {
#[inline]
fn to_unaligned(self) -> Self::ULE {
- let [u0, u1, u2, _u3] = u32::from(self).to_le_bytes();
- CharULE([u0, u1, u2])
+ CharULE::from_aligned(self)
}
#[inline]
fn from_unaligned(unaligned: Self::ULE) -> Self {
- // Safe because the bytes of CharULE are defined to represent a valid Unicode code point.
+ // Safe because the bytes of CharULE are defined to represent a valid Unicode scalar value.
unsafe {
Self::from_u32_unchecked(u32::from_le_bytes([
unaligned.0[0],
@@ -107,6 +121,25 @@ mod test {
use super::*;
#[test]
+ fn test_from_array() {
+ const CHARS: [char; 2] = ['a', '🙃'];
+ const CHARS_ULE: [CharULE; 2] = CharULE::from_array(CHARS);
+ assert_eq!(
+ CharULE::as_byte_slice(&CHARS_ULE),
+ &[0x61, 0x00, 0x00, 0x43, 0xF6, 0x01]
+ );
+ }
+
+ #[test]
+ fn test_from_array_zst() {
+ const CHARS: [char; 0] = [];
+ const CHARS_ULE: [CharULE; 0] = CharULE::from_array(CHARS);
+ let bytes = CharULE::as_byte_slice(&CHARS_ULE);
+ let empty: &[u8] = &[];
+ assert_eq!(bytes, empty);
+ }
+
+ #[test]
fn test_parse() {
// 1-byte, 2-byte, 3-byte, and two 4-byte character in UTF-8 (not as relevant in UTF-32)
let chars = ['w', 'ω', '文', '𑄃', '🙃'];
@@ -141,7 +174,7 @@ mod test {
.collect();
let u32_bytes: &[u8] = RawBytesULE::<4>::as_byte_slice(&u32_ules);
let parsed_ules_result = CharULE::parse_byte_slice(u32_bytes);
- assert!(matches!(parsed_ules_result, Err(_)));
+ assert!(parsed_ules_result.is_err());
// 0x20FFFF is out of range for a char
let u32s = [0x20FFFF];
@@ -152,6 +185,6 @@ mod test {
.collect();
let u32_bytes: &[u8] = RawBytesULE::<4>::as_byte_slice(&u32_ules);
let parsed_ules_result = CharULE::parse_byte_slice(u32_bytes);
- assert!(matches!(parsed_ules_result, Err(_)));
+ assert!(parsed_ules_result.is_err());
}
}
diff --git a/vendor/zerovec/src/ule/custom.rs b/vendor/zerovec/src/ule/custom.rs
index b2e4cb0e5..8cc6e9de4 100644
--- a/vendor/zerovec/src/ule/custom.rs
+++ b/vendor/zerovec/src/ule/custom.rs
@@ -129,8 +129,8 @@
//! }
//!
//! fn main() {
-//! let mut foos = vec![Foo {field1: 'u', field2: 983, field3: ZeroVec::alloc_from_slice(&[1212,2309,500,7000])},
-//! Foo {field1: 'l', field2: 1010, field3: ZeroVec::alloc_from_slice(&[1932, 0, 8888, 91237])}];
+//! let mut foos = [Foo {field1: 'u', field2: 983, field3: ZeroVec::alloc_from_slice(&[1212,2309,500,7000])},
+//! Foo {field1: 'l', field2: 1010, field3: ZeroVec::alloc_from_slice(&[1932, 0, 8888, 91237])}];
//!
//! let vzv = VarZeroVec::<_>::from(&foos);
//!
diff --git a/vendor/zerovec/src/ule/encode.rs b/vendor/zerovec/src/ule/encode.rs
index 2091cf06b..adea123aa 100644
--- a/vendor/zerovec/src/ule/encode.rs
+++ b/vendor/zerovec/src/ule/encode.rs
@@ -8,7 +8,7 @@ use crate::{VarZeroSlice, VarZeroVec, ZeroSlice, ZeroVec};
use alloc::borrow::{Cow, ToOwned};
use alloc::boxed::Box;
use alloc::string::String;
-use alloc::vec::Vec;
+use alloc::{vec, vec::Vec};
use core::mem;
/// Allows types to be encoded as VarULEs. This is highly useful for implementing VarULE on
@@ -82,16 +82,14 @@ pub unsafe trait EncodeAsVarULE<T: VarULE + ?Sized> {
///
/// This is primarily useful for generating `Deserialize` impls for VarULE types
pub fn encode_varule_to_box<S: EncodeAsVarULE<T>, T: VarULE + ?Sized>(x: &S) -> Box<T> {
- let mut vec: Vec<u8> = Vec::new();
// zero-fill the vector to avoid uninitialized data UB
- vec.resize(x.encode_var_ule_len(), 0);
+ let mut vec: Vec<u8> = vec![0; x.encode_var_ule_len()];
x.encode_var_ule_write(&mut vec);
- let boxed = vec.into_boxed_slice();
+ let boxed = mem::ManuallyDrop::new(vec.into_boxed_slice());
unsafe {
// Safety: `ptr` is a box, and `T` is a VarULE which guarantees it has the same memory layout as `[u8]`
// and can be recouped via from_byte_slice_unchecked()
let ptr: *mut T = T::from_byte_slice_unchecked(&boxed) as *const T as *mut T;
- mem::forget(boxed);
// Safety: we can construct an owned version since we have mem::forgotten the older owner
Box::from_raw(ptr)
diff --git a/vendor/zerovec/src/ule/macros.rs b/vendor/zerovec/src/ule/macros.rs
new file mode 100644
index 000000000..955b1eb2e
--- /dev/null
+++ b/vendor/zerovec/src/ule/macros.rs
@@ -0,0 +1,29 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+/// Given `Self` (`$aligned`), `Self::ULE` (`$unaligned`), and a conversion function (`$single` or
+/// `Self::from_aligned`), implement `from_array` for arrays of `$aligned` to `$unaligned`.
+///
+/// The `$default` argument is due to current compiler limitations.
+/// Pass any (cheap to construct) value.
+#[macro_export]
+macro_rules! impl_ule_from_array {
+ ($aligned:ty, $unaligned:ty, $default:expr, $single:path) => {
+ #[doc = concat!("Convert an array of `", stringify!($aligned), "` to an array of `", stringify!($unaligned), "`.")]
+ pub const fn from_array<const N: usize>(arr: [$aligned; N]) -> [Self; N] {
+ let mut result = [$default; N];
+ let mut i = 0;
+ // Won't panic because i < N and arr has length N
+ #[allow(clippy::indexing_slicing)]
+ while i < N {
+ result[i] = $single(arr[i]);
+ i += 1;
+ }
+ result
+ }
+ };
+ ($aligned:ty, $unaligned:ty, $default:expr) => {
+ impl_ule_from_array!($aligned, $unaligned, $default, Self::from_aligned);
+ };
+}
diff --git a/vendor/zerovec/src/ule/mod.rs b/vendor/zerovec/src/ule/mod.rs
index e8ecd26e5..5a6d9cd47 100644
--- a/vendor/zerovec/src/ule/mod.rs
+++ b/vendor/zerovec/src/ule/mod.rs
@@ -14,6 +14,7 @@ mod chars;
#[cfg(doc)]
pub mod custom;
mod encode;
+mod macros;
mod multi;
mod niche;
mod option;
@@ -29,7 +30,7 @@ pub use multi::MultiFieldsULE;
pub use niche::{NicheBytes, NichedOption, NichedOptionULE};
pub use option::{OptionULE, OptionVarULE};
pub use plain::RawBytesULE;
-pub use unvalidated::UnvalidatedStr;
+pub use unvalidated::{UnvalidatedChar, UnvalidatedStr};
use alloc::alloc::Layout;
use alloc::borrow::ToOwned;
@@ -156,7 +157,7 @@ where
/// A trait for any type that has a 1:1 mapping with an unaligned little-endian (ULE) type.
///
-/// If you need to implement this trait, consider using [`#[make_varule]`](crate::make_ule) instead.
+/// If you need to implement this trait, consider using [`#[make_ule]`](crate::make_ule) instead.
pub trait AsULE: Copy {
/// The ULE type corresponding to `Self`.
///
@@ -356,13 +357,12 @@ pub unsafe trait VarULE: 'static {
#[inline]
fn to_boxed(&self) -> Box<Self> {
let bytesvec = self.as_byte_slice().to_owned().into_boxed_slice();
+ let bytesvec = mem::ManuallyDrop::new(bytesvec);
unsafe {
// Get the pointer representation
let ptr: *mut Self =
Self::from_byte_slice_unchecked(&bytesvec) as *const Self as *mut Self;
- assert_eq!(Layout::for_value(&*ptr), Layout::for_value(&*bytesvec));
- // Forget the allocation
- mem::forget(bytesvec);
+ assert_eq!(Layout::for_value(&*ptr), Layout::for_value(&**bytesvec));
// Transmute the pointer to an owned pointer
Box::from_raw(ptr)
}
diff --git a/vendor/zerovec/src/ule/multi.rs b/vendor/zerovec/src/ule/multi.rs
index 0ba0aea89..3281b2088 100644
--- a/vendor/zerovec/src/ule/multi.rs
+++ b/vendor/zerovec/src/ule/multi.rs
@@ -44,7 +44,7 @@ impl MultiFieldsULE {
lengths, output,
);
debug_assert!(
- <VarZeroSlice<[u8]>>::validate_byte_slice(output).is_ok(),
+ <VarZeroSlice<[u8], Index32>>::validate_byte_slice(output).is_ok(),
"Encoded slice must be valid VarZeroSlice"
);
// Safe since write_serializable_bytes produces a valid VarZeroSlice buffer
@@ -141,12 +141,14 @@ unsafe impl VarULE for MultiFieldsULE {
/// This impl exists so that EncodeAsVarULE can work.
#[inline]
fn validate_byte_slice(slice: &[u8]) -> Result<(), ZeroVecError> {
- <VarZeroSlice<[u8]>>::validate_byte_slice(slice)
+ <VarZeroSlice<[u8], Index32>>::validate_byte_slice(slice)
}
#[inline]
unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self {
// &Self is transparent over &VZS<..>
- mem::transmute(<VarZeroSlice<[u8]>>::from_byte_slice_unchecked(bytes))
+ mem::transmute(<VarZeroSlice<[u8], Index32>>::from_byte_slice_unchecked(
+ bytes,
+ ))
}
}
diff --git a/vendor/zerovec/src/ule/option.rs b/vendor/zerovec/src/ule/option.rs
index 50b193aac..9b0dc5b28 100644
--- a/vendor/zerovec/src/ule/option.rs
+++ b/vendor/zerovec/src/ule/option.rs
@@ -197,9 +197,8 @@ unsafe impl<U: VarULE + ?Sized> VarULE for OptionVarULE<U> {
#[inline]
unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self {
- let metadata = bytes.len() - 1;
let entire_struct_as_slice: *const [u8] =
- ::core::slice::from_raw_parts(bytes.as_ptr(), metadata);
+ ::core::ptr::slice_from_raw_parts(bytes.as_ptr(), bytes.len() - 1);
&*(entire_struct_as_slice as *const Self)
}
}
diff --git a/vendor/zerovec/src/ule/plain.rs b/vendor/zerovec/src/ule/plain.rs
index 49455d45f..f244f6b68 100644
--- a/vendor/zerovec/src/ule/plain.rs
+++ b/vendor/zerovec/src/ule/plain.rs
@@ -6,6 +6,7 @@
//! ULE implementation for Plain Old Data types, including all sized integers.
use super::*;
+use crate::impl_ule_from_array;
use crate::ZeroSlice;
use core::num::{NonZeroI8, NonZeroU8};
@@ -15,69 +16,69 @@ use core::num::{NonZeroI8, NonZeroU8};
#[allow(clippy::exhaustive_structs)] // newtype
pub struct RawBytesULE<const N: usize>(pub [u8; N]);
-macro_rules! impl_byte_slice_size {
- ($unsigned:ty, $size:literal) => {
- impl From<[u8; $size]> for RawBytesULE<$size> {
- #[inline]
- fn from(le_bytes: [u8; $size]) -> Self {
- Self(le_bytes)
- }
- }
- impl RawBytesULE<$size> {
- #[inline]
- pub fn as_bytes(&self) -> &[u8] {
- &self.0
- }
- }
- // Safety (based on the safety checklist on the ULE trait):
- // 1. RawBytesULE does not include any uninitialized or padding bytes.
- // (achieved by `#[repr(transparent)]` on a type that satisfies this invariant)
- // 2. RawBytesULE is aligned to 1 byte.
- // (achieved by `#[repr(transparent)]` on a type that satisfies this invariant)
- // 3. The impl of validate_byte_slice() returns an error if any byte is not valid (never).
- // 4. The impl of validate_byte_slice() returns an error if there are leftover bytes.
- // 5. The other ULE methods use the default impl.
- // 6. RawBytesULE byte equality is semantic equality
- unsafe impl ULE for RawBytesULE<$size> {
- #[inline]
- fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> {
- if bytes.len() % $size == 0 {
- // Safe because Self is transparent over [u8; $size]
- Ok(())
- } else {
- Err(ZeroVecError::length::<Self>(bytes.len()))
- }
- }
+impl<const N: usize> RawBytesULE<N> {
+ #[inline]
+ pub fn as_bytes(&self) -> &[u8] {
+ &self.0
+ }
+
+ #[inline]
+ pub fn from_byte_slice_unchecked_mut(bytes: &mut [u8]) -> &mut [Self] {
+ let data = bytes.as_mut_ptr();
+ let len = bytes.len() / N;
+ // Safe because Self is transparent over [u8; N]
+ unsafe { core::slice::from_raw_parts_mut(data as *mut Self, len) }
+ }
+}
+
+// Safety (based on the safety checklist on the ULE trait):
+// 1. RawBytesULE does not include any uninitialized or padding bytes.
+// (achieved by `#[repr(transparent)]` on a type that satisfies this invariant)
+// 2. RawBytesULE is aligned to 1 byte.
+// (achieved by `#[repr(transparent)]` on a type that satisfies this invariant)
+// 3. The impl of validate_byte_slice() returns an error if any byte is not valid (never).
+// 4. The impl of validate_byte_slice() returns an error if there are leftover bytes.
+// 5. The other ULE methods use the default impl.
+// 6. RawBytesULE byte equality is semantic equality
+unsafe impl<const N: usize> ULE for RawBytesULE<N> {
+ #[inline]
+ fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> {
+ if bytes.len() % N == 0 {
+ // Safe because Self is transparent over [u8; N]
+ Ok(())
+ } else {
+ Err(ZeroVecError::length::<Self>(bytes.len()))
}
+ }
+}
- impl RawBytesULE<$size> {
- #[inline]
- pub fn from_byte_slice_unchecked_mut(bytes: &mut [u8]) -> &mut [Self] {
- let data = bytes.as_mut_ptr();
- let len = bytes.len() / $size;
- // Safe because Self is transparent over [u8; $size]
- unsafe { core::slice::from_raw_parts_mut(data as *mut Self, len) }
- }
+impl<const N: usize> From<[u8; N]> for RawBytesULE<N> {
+ #[inline]
+ fn from(le_bytes: [u8; N]) -> Self {
+ Self(le_bytes)
+ }
+}
- /// Gets this RawBytesULE as an unsigned int. This is equivalent to calling
- /// [AsULE::from_unaligned()] on the appropriately sized type.
+macro_rules! impl_byte_slice_size {
+ ($unsigned:ty, $size:literal) => {
+ impl RawBytesULE<$size> {
+ #[doc = concat!("Gets this `RawBytesULE` as a `", stringify!($unsigned), "`. This is equivalent to calling [`AsULE::from_unaligned()`] on the appropriately sized type.")]
#[inline]
pub fn as_unsigned_int(&self) -> $unsigned {
<$unsigned as $crate::ule::AsULE>::from_unaligned(*self)
}
- /// Convert an array of native-endian aligned integers to an array of RawBytesULE.
- pub const fn from_array<const N: usize>(arr: [$unsigned; N]) -> [Self; N] {
- let mut result = [RawBytesULE([0; $size]); N];
- let mut i = 0;
- // Won't panic because i < N and arr has length N
- #[allow(clippy::indexing_slicing)]
- while i < N {
- result[i].0 = arr[i].to_le_bytes();
- i += 1;
- }
- result
+ #[doc = concat!("Converts a `", stringify!($unsigned), "` to a `RawBytesULE`. This is equivalent to calling [`AsULE::to_unaligned()`] on the appropriately sized type.")]
+ #[inline]
+ pub const fn from_aligned(value: $unsigned) -> Self {
+ Self(value.to_le_bytes())
}
+
+ impl_ule_from_array!(
+ $unsigned,
+ RawBytesULE<$size>,
+ RawBytesULE([0; $size])
+ );
}
};
}
@@ -110,7 +111,7 @@ macro_rules! impl_const_constructors {
}
macro_rules! impl_byte_slice_type {
- ($type:ty, $size:literal) => {
+ ($single_fn:ident, $type:ty, $size:literal) => {
impl From<$type> for RawBytesULE<$size> {
#[inline]
fn from(value: $type) -> Self {
@@ -131,6 +132,24 @@ macro_rules! impl_byte_slice_type {
// EqULE is true because $type and RawBytesULE<$size>
// have the same byte sequence on little-endian
unsafe impl EqULE for $type {}
+
+ impl RawBytesULE<$size> {
+ pub const fn $single_fn(v: $type) -> Self {
+ RawBytesULE(v.to_le_bytes())
+ }
+ }
+ };
+}
+
+macro_rules! impl_byte_slice_unsigned_type {
+ ($type:ty, $size:literal) => {
+ impl_byte_slice_type!(from_unsigned, $type, $size);
+ };
+}
+
+macro_rules! impl_byte_slice_signed_type {
+ ($type:ty, $size:literal) => {
+ impl_byte_slice_type!(from_signed, $type, $size);
};
}
@@ -139,15 +158,15 @@ impl_byte_slice_size!(u32, 4);
impl_byte_slice_size!(u64, 8);
impl_byte_slice_size!(u128, 16);
-impl_byte_slice_type!(u16, 2);
-impl_byte_slice_type!(u32, 4);
-impl_byte_slice_type!(u64, 8);
-impl_byte_slice_type!(u128, 16);
+impl_byte_slice_unsigned_type!(u16, 2);
+impl_byte_slice_unsigned_type!(u32, 4);
+impl_byte_slice_unsigned_type!(u64, 8);
+impl_byte_slice_unsigned_type!(u128, 16);
-impl_byte_slice_type!(i16, 2);
-impl_byte_slice_type!(i32, 4);
-impl_byte_slice_type!(i64, 8);
-impl_byte_slice_type!(i128, 16);
+impl_byte_slice_signed_type!(i16, 2);
+impl_byte_slice_signed_type!(i32, 4);
+impl_byte_slice_signed_type!(i64, 8);
+impl_byte_slice_signed_type!(i128, 16);
impl_const_constructors!(u8, 1);
impl_const_constructors!(u16, 2);
diff --git a/vendor/zerovec/src/ule/tuple.rs b/vendor/zerovec/src/ule/tuple.rs
index c26567e98..3e0f291b3 100644
--- a/vendor/zerovec/src/ule/tuple.rs
+++ b/vendor/zerovec/src/ule/tuple.rs
@@ -111,10 +111,7 @@ macro_rules! tuple_ule {
impl<$($t: ULE),+> Clone for $name<$($t),+> {
fn clone(&self) -> Self {
- // copy to the stack to avoid hitting a future incompat error
- // https://github.com/rust-lang/rust/issues/82523#issuecomment-947900712
- let stack = ($(self.$i),+);
- $name($(stack.$i),+)
+ *self
}
}
@@ -147,7 +144,7 @@ fn test_pairule_validate() {
// Test failed validation with a correctly sized but differently constrained tuple
// Note: 1234901 is not a valid char
let zerovec3 = ZeroVec::<(char, u32)>::parse_byte_slice(bytes);
- assert!(matches!(zerovec3, Err(_)));
+ assert!(zerovec3.is_err());
}
#[test]
@@ -162,7 +159,7 @@ fn test_tripleule_validate() {
// Test failed validation with a correctly sized but differently constrained tuple
// Note: 1234901 is not a valid char
let zerovec3 = ZeroVec::<(char, i8, u32)>::parse_byte_slice(bytes);
- assert!(matches!(zerovec3, Err(_)));
+ assert!(zerovec3.is_err());
}
#[test]
@@ -178,5 +175,5 @@ fn test_quadule_validate() {
// Test failed validation with a correctly sized but differently constrained tuple
// Note: 1234901 is not a valid char
let zerovec3 = ZeroVec::<(char, i8, u16, u32)>::parse_byte_slice(bytes);
- assert!(matches!(zerovec3, Err(_)));
+ assert!(zerovec3.is_err());
}
diff --git a/vendor/zerovec/src/ule/unvalidated.rs b/vendor/zerovec/src/ule/unvalidated.rs
index 4564c8673..21cfb0c0d 100644
--- a/vendor/zerovec/src/ule/unvalidated.rs
+++ b/vendor/zerovec/src/ule/unvalidated.rs
@@ -2,9 +2,11 @@
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
-use super::VarULE;
+use super::{AsULE, RawBytesULE, VarULE};
+use crate::ule::EqULE;
use crate::{map::ZeroMapKV, VarZeroSlice, VarZeroVec, ZeroVecError};
use alloc::boxed::Box;
+use core::cmp::Ordering;
use core::fmt;
use core::ops::Deref;
@@ -209,3 +211,317 @@ where
}
}
}
+
+/// A u8 array of little-endian data that is expected to be a Unicode scalar value, but is not
+/// validated as such.
+///
+/// Use this type instead of `char` when you want to deal with data that is expected to be valid
+/// Unicode scalar values, but you want control over when or if you validate that assumption.
+///
+/// # Examples
+///
+/// ```
+/// use zerovec::ule::{RawBytesULE, UnvalidatedChar, ULE};
+/// use zerovec::{ZeroSlice, ZeroVec};
+///
+/// // data known to be little-endian three-byte chunks of valid Unicode scalar values
+/// let data = [0x68, 0x00, 0x00, 0x69, 0x00, 0x00, 0x4B, 0xF4, 0x01];
+/// // ground truth expectation
+/// let real = ['h', 'i', '👋'];
+///
+/// let chars: &ZeroSlice<UnvalidatedChar> = ZeroSlice::parse_byte_slice(&data).expect("invalid data length");
+/// let parsed: Vec<_> = chars.iter().map(|c| unsafe { c.to_char_unchecked() }).collect();
+/// assert_eq!(&parsed, &real);
+///
+/// let real_chars: ZeroVec<_> = real.iter().copied().map(UnvalidatedChar::from_char).collect();
+/// let serialized_data = chars.as_bytes();
+/// assert_eq!(serialized_data, &data);
+/// ```
+#[repr(transparent)]
+#[derive(PartialEq, Eq, Clone, Copy, Hash)]
+pub struct UnvalidatedChar([u8; 3]);
+
+impl UnvalidatedChar {
+ /// Create a [`UnvalidatedChar`] from a `char`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use zerovec::ule::UnvalidatedChar;
+ ///
+ /// let a = UnvalidatedChar::from_char('a');
+ /// assert_eq!(a.try_to_char().unwrap(), 'a');
+ /// ```
+ #[inline]
+ pub const fn from_char(c: char) -> Self {
+ let [u0, u1, u2, _u3] = (c as u32).to_le_bytes();
+ Self([u0, u1, u2])
+ }
+
+ #[inline]
+ #[doc(hidden)]
+ pub const fn from_u24(c: u32) -> Self {
+ let [u0, u1, u2, _u3] = c.to_le_bytes();
+ Self([u0, u1, u2])
+ }
+
+ /// Attempt to convert a [`UnvalidatedChar`] to a `char`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use zerovec::ule::{AsULE, UnvalidatedChar};
+ ///
+ /// let a = UnvalidatedChar::from_char('a');
+ /// assert_eq!(a.try_to_char(), Ok('a'));
+ ///
+ /// let b = UnvalidatedChar::from_unaligned([0xFF, 0xFF, 0xFF].into());
+ /// assert!(matches!(b.try_to_char(), Err(_)));
+ /// ```
+ #[inline]
+ pub fn try_to_char(self) -> Result<char, core::char::CharTryFromError> {
+ let [u0, u1, u2] = self.0;
+ char::try_from(u32::from_le_bytes([u0, u1, u2, 0]))
+ }
+
+ /// Convert a [`UnvalidatedChar`] to a `char', returning [`char::REPLACEMENT_CHARACTER`]
+ /// if the `UnvalidatedChar` does not represent a valid Unicode scalar value.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use zerovec::ule::{AsULE, UnvalidatedChar};
+ ///
+ /// let a = UnvalidatedChar::from_unaligned([0xFF, 0xFF, 0xFF].into());
+ /// assert_eq!(a.to_char_lossy(), char::REPLACEMENT_CHARACTER);
+ /// ```
+ #[inline]
+ pub fn to_char_lossy(self) -> char {
+ self.try_to_char().unwrap_or(char::REPLACEMENT_CHARACTER)
+ }
+
+ /// Convert a [`UnvalidatedChar`] to a `char` without checking that it is
+ /// a valid Unicode scalar value.
+ ///
+ /// # Safety
+ ///
+ /// The `UnvalidatedChar` must be a valid Unicode scalar value in little-endian order.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use zerovec::ule::UnvalidatedChar;
+ ///
+ /// let a = UnvalidatedChar::from_char('a');
+ /// assert_eq!(unsafe { a.to_char_unchecked() }, 'a');
+ /// ```
+ #[inline]
+ pub unsafe fn to_char_unchecked(self) -> char {
+ let [u0, u1, u2] = self.0;
+ char::from_u32_unchecked(u32::from_le_bytes([u0, u1, u2, 0]))
+ }
+}
+
+impl RawBytesULE<3> {
+ /// Converts a [`UnvalidatedChar`] to its ULE type. This is equivalent to calling
+ /// [`AsULE::to_unaligned`].
+ #[inline]
+ pub const fn from_unvalidated_char(uc: UnvalidatedChar) -> Self {
+ RawBytesULE(uc.0)
+ }
+}
+
+impl AsULE for UnvalidatedChar {
+ type ULE = RawBytesULE<3>;
+
+ #[inline]
+ fn to_unaligned(self) -> Self::ULE {
+ RawBytesULE(self.0)
+ }
+
+ #[inline]
+ fn from_unaligned(unaligned: Self::ULE) -> Self {
+ Self(unaligned.0)
+ }
+}
+
+// Safety: UnvalidatedChar is always the little-endian representation of a char,
+// which corresponds to its AsULE::ULE type
+unsafe impl EqULE for UnvalidatedChar {}
+
+impl fmt::Debug for UnvalidatedChar {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ // Debug as a char if possible
+ match self.try_to_char() {
+ Ok(c) => fmt::Debug::fmt(&c, f),
+ Err(_) => fmt::Debug::fmt(&self.0, f),
+ }
+ }
+}
+
+impl PartialOrd for UnvalidatedChar {
+ fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+ Some(self.cmp(other))
+ }
+}
+
+impl Ord for UnvalidatedChar {
+ // custom implementation, as derived Ord would compare lexicographically
+ fn cmp(&self, other: &Self) -> Ordering {
+ let [a0, a1, a2] = self.0;
+ let a = u32::from_le_bytes([a0, a1, a2, 0]);
+ let [b0, b1, b2] = other.0;
+ let b = u32::from_le_bytes([b0, b1, b2, 0]);
+ a.cmp(&b)
+ }
+}
+
+impl From<char> for UnvalidatedChar {
+ #[inline]
+ fn from(value: char) -> Self {
+ Self::from_char(value)
+ }
+}
+
+impl TryFrom<UnvalidatedChar> for char {
+ type Error = core::char::CharTryFromError;
+
+ #[inline]
+ fn try_from(value: UnvalidatedChar) -> Result<char, Self::Error> {
+ value.try_to_char()
+ }
+}
+
+/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate
+#[cfg(feature = "serde")]
+impl serde::Serialize for UnvalidatedChar {
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: serde::Serializer,
+ {
+ use serde::ser::Error;
+ let c = self
+ .try_to_char()
+ .map_err(|_| S::Error::custom("invalid Unicode scalar value in UnvalidatedChar"))?;
+ if serializer.is_human_readable() {
+ serializer.serialize_char(c)
+ } else {
+ self.0.serialize(serializer)
+ }
+ }
+}
+
+/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate
+#[cfg(feature = "serde")]
+impl<'de> serde::Deserialize<'de> for UnvalidatedChar {
+ fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+ where
+ D: serde::Deserializer<'de>,
+ {
+ if deserializer.is_human_readable() {
+ let c = <char>::deserialize(deserializer)?;
+ Ok(UnvalidatedChar::from_char(c))
+ } else {
+ let bytes = <[u8; 3]>::deserialize(deserializer)?;
+ Ok(UnvalidatedChar(bytes))
+ }
+ }
+}
+
+#[cfg(feature = "databake")]
+impl databake::Bake for UnvalidatedChar {
+ fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream {
+ match self.try_to_char() {
+ Ok(ch) => {
+ env.insert("zerovec");
+ let ch = ch.bake(env);
+ databake::quote! {
+ zerovec::ule::UnvalidatedChar::from_char(#ch)
+ }
+ }
+ Err(_) => {
+ env.insert("zerovec");
+ let u24 = u32::from_le_bytes([self.0[0], self.0[1], self.0[2], 0]);
+ databake::quote! {
+ zerovec::ule::UnvalidatedChar::from_u24(#u24)
+ }
+ }
+ }
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+ use crate::ZeroVec;
+
+ #[test]
+ fn test_serde_fail() {
+ let uc = UnvalidatedChar([0xFF, 0xFF, 0xFF]);
+ serde_json::to_string(&uc).expect_err("serialize invalid char bytes");
+ bincode::serialize(&uc).expect_err("serialize invalid char bytes");
+ }
+
+ #[test]
+ fn test_serde_json() {
+ let c = '🙃';
+ let uc = UnvalidatedChar::from_char(c);
+ let json_ser = serde_json::to_string(&uc).unwrap();
+
+ assert_eq!(json_ser, r#""🙃""#);
+
+ let json_de: UnvalidatedChar = serde_json::from_str(&json_ser).unwrap();
+
+ assert_eq!(uc, json_de);
+ }
+
+ #[test]
+ fn test_serde_bincode() {
+ let c = '🙃';
+ let uc = UnvalidatedChar::from_char(c);
+ let bytes_ser = bincode::serialize(&uc).unwrap();
+
+ assert_eq!(bytes_ser, [0x43, 0xF6, 0x01]);
+
+ let bytes_de: UnvalidatedChar = bincode::deserialize(&bytes_ser).unwrap();
+
+ assert_eq!(uc, bytes_de);
+ }
+
+ #[test]
+ fn test_representation() {
+ let chars = ['w', 'ω', '文', '𑄃', '🙃'];
+
+ // backed by [UnvalidatedChar]
+ let uvchars: Vec<_> = chars
+ .iter()
+ .copied()
+ .map(UnvalidatedChar::from_char)
+ .collect();
+ // backed by [RawBytesULE<3>]
+ let zvec: ZeroVec<_> = uvchars.clone().into_iter().collect();
+
+ let ule_bytes = zvec.as_bytes();
+ let uvbytes;
+ unsafe {
+ let ptr = &uvchars[..] as *const _ as *const u8;
+ uvbytes = core::slice::from_raw_parts(ptr, ule_bytes.len());
+ }
+
+ // UnvalidatedChar is defined as little-endian, so this must be true on all platforms
+ // also asserts that to_unaligned/from_unaligned are no-ops
+ assert_eq!(uvbytes, ule_bytes);
+
+ assert_eq!(
+ &[119, 0, 0, 201, 3, 0, 135, 101, 0, 3, 17, 1, 67, 246, 1],
+ ule_bytes
+ );
+ }
+
+ #[test]
+ fn test_char_bake() {
+ databake::test_bake!(UnvalidatedChar, const: crate::ule::UnvalidatedChar::from_char('b'), zerovec);
+ // surrogate code point
+ databake::test_bake!(UnvalidatedChar, const: crate::ule::UnvalidatedChar::from_u24(55296u32), zerovec);
+ }
+}