summaryrefslogtreecommitdiffstats
path: root/third_party/rust/zerovec/src/ule
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
commit26a029d407be480d791972afb5975cf62c9360a6 (patch)
treef435a8308119effd964b339f76abb83a57c29483 /third_party/rust/zerovec/src/ule
parentInitial commit. (diff)
downloadfirefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz
firefox-26a029d407be480d791972afb5975cf62c9360a6.zip
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/zerovec/src/ule')
-rw-r--r--third_party/rust/zerovec/src/ule/chars.rs190
-rw-r--r--third_party/rust/zerovec/src/ule/custom.rs145
-rw-r--r--third_party/rust/zerovec/src/ule/encode.rs400
-rw-r--r--third_party/rust/zerovec/src/ule/macros.rs29
-rw-r--r--third_party/rust/zerovec/src/ule/mod.rs394
-rw-r--r--third_party/rust/zerovec/src/ule/multi.rs154
-rw-r--r--third_party/rust/zerovec/src/ule/niche.rs180
-rw-r--r--third_party/rust/zerovec/src/ule/option.rs264
-rw-r--r--third_party/rust/zerovec/src/ule/plain.rs366
-rw-r--r--third_party/rust/zerovec/src/ule/slices.rs103
-rw-r--r--third_party/rust/zerovec/src/ule/tuple.rs179
-rw-r--r--third_party/rust/zerovec/src/ule/unvalidated.rs527
12 files changed, 2931 insertions, 0 deletions
diff --git a/third_party/rust/zerovec/src/ule/chars.rs b/third_party/rust/zerovec/src/ule/chars.rs
new file mode 100644
index 0000000000..e4c1efc4ec
--- /dev/null
+++ b/third_party/rust/zerovec/src/ule/chars.rs
@@ -0,0 +1,190 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+#![allow(clippy::upper_case_acronyms)]
+//! ULE implementation for the `char` type.
+
+use super::*;
+use crate::impl_ule_from_array;
+use core::cmp::Ordering;
+use core::convert::TryFrom;
+
+/// A u8 array of little-endian data corresponding to a Unicode scalar value.
+///
+/// The bytes of a `CharULE` are guaranteed to represent a little-endian-encoded u32 that is a
+/// valid `char` and can be converted without validation.
+///
+/// # Examples
+///
+/// Convert a `char` to a `CharULE` and back again:
+///
+/// ```
+/// use zerovec::ule::{AsULE, CharULE, ULE};
+///
+/// let c1 = '๐‘„ƒ';
+/// let ule = c1.to_unaligned();
+/// assert_eq!(CharULE::as_byte_slice(&[ule]), &[0x03, 0x11, 0x01]);
+/// let c2 = char::from_unaligned(ule);
+/// assert_eq!(c1, c2);
+/// ```
+///
+/// Attempt to parse invalid bytes to a `CharULE`:
+///
+/// ```
+/// use zerovec::ule::{CharULE, ULE};
+///
+/// let bytes: &[u8] = &[0xFF, 0xFF, 0xFF, 0xFF];
+/// CharULE::parse_byte_slice(bytes).expect_err("Invalid bytes");
+/// ```
+#[repr(transparent)]
+#[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)]
+pub struct CharULE([u8; 3]);
+
+impl CharULE {
+ /// Converts a [`char`] to a [`CharULE`]. This is equivalent to calling
+ /// [`AsULE::to_unaligned()`]
+ ///
+ /// See the type-level documentation for [`CharULE`] for more information.
+ #[inline]
+ pub const fn from_aligned(c: char) -> Self {
+ let [u0, u1, u2, _u3] = (c as u32).to_le_bytes();
+ Self([u0, u1, u2])
+ }
+
+ impl_ule_from_array!(char, CharULE, Self([0; 3]));
+}
+
+// Safety (based on the safety checklist on the ULE trait):
+// 1. CharULE does not include any uninitialized or padding bytes.
+// (achieved by `#[repr(transparent)]` on a type that satisfies this invariant)
+// 2. CharULE is aligned to 1 byte.
+// (achieved by `#[repr(transparent)]` on a type that satisfies this invariant)
+// 3. The impl of validate_byte_slice() returns an error if any byte is not valid.
+// 4. The impl of validate_byte_slice() returns an error if there are extra bytes.
+// 5. The other ULE methods use the default impl.
+// 6. CharULE byte equality is semantic equality
+unsafe impl ULE for CharULE {
+ #[inline]
+ fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> {
+ if bytes.len() % 3 != 0 {
+ return Err(ZeroVecError::length::<Self>(bytes.len()));
+ }
+ // Validate the bytes
+ for chunk in bytes.chunks_exact(3) {
+ // TODO: Use slice::as_chunks() when stabilized
+ #[allow(clippy::indexing_slicing)]
+ // Won't panic because the chunks are always 3 bytes long
+ let u = u32::from_le_bytes([chunk[0], chunk[1], chunk[2], 0]);
+ char::try_from(u).map_err(|_| ZeroVecError::parse::<Self>())?;
+ }
+ Ok(())
+ }
+}
+
+impl AsULE for char {
+ type ULE = CharULE;
+
+ #[inline]
+ fn to_unaligned(self) -> Self::ULE {
+ CharULE::from_aligned(self)
+ }
+
+ #[inline]
+ fn from_unaligned(unaligned: Self::ULE) -> Self {
+ // Safe because the bytes of CharULE are defined to represent a valid Unicode scalar value.
+ unsafe {
+ Self::from_u32_unchecked(u32::from_le_bytes([
+ unaligned.0[0],
+ unaligned.0[1],
+ unaligned.0[2],
+ 0,
+ ]))
+ }
+ }
+}
+
+impl PartialOrd for CharULE {
+ fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+ Some(self.cmp(other))
+ }
+}
+
+impl Ord for CharULE {
+ fn cmp(&self, other: &Self) -> Ordering {
+ char::from_unaligned(*self).cmp(&char::from_unaligned(*other))
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+
+ #[test]
+ fn test_from_array() {
+ const CHARS: [char; 2] = ['a', '๐Ÿ™ƒ'];
+ const CHARS_ULE: [CharULE; 2] = CharULE::from_array(CHARS);
+ assert_eq!(
+ CharULE::as_byte_slice(&CHARS_ULE),
+ &[0x61, 0x00, 0x00, 0x43, 0xF6, 0x01]
+ );
+ }
+
+ #[test]
+ fn test_from_array_zst() {
+ const CHARS: [char; 0] = [];
+ const CHARS_ULE: [CharULE; 0] = CharULE::from_array(CHARS);
+ let bytes = CharULE::as_byte_slice(&CHARS_ULE);
+ let empty: &[u8] = &[];
+ assert_eq!(bytes, empty);
+ }
+
+ #[test]
+ fn test_parse() {
+ // 1-byte, 2-byte, 3-byte, and two 4-byte character in UTF-8 (not as relevant in UTF-32)
+ let chars = ['w', 'ฯ‰', 'ๆ–‡', '๐‘„ƒ', '๐Ÿ™ƒ'];
+ let char_ules: Vec<CharULE> = chars.iter().copied().map(char::to_unaligned).collect();
+ let char_bytes: &[u8] = CharULE::as_byte_slice(&char_ules);
+
+ // Check parsing
+ let parsed_ules: &[CharULE] = CharULE::parse_byte_slice(char_bytes).unwrap();
+ assert_eq!(char_ules, parsed_ules);
+ let parsed_chars: Vec<char> = parsed_ules
+ .iter()
+ .copied()
+ .map(char::from_unaligned)
+ .collect();
+ assert_eq!(&chars, parsed_chars.as_slice());
+
+ // Compare to golden expected data
+ assert_eq!(
+ &[119, 0, 0, 201, 3, 0, 135, 101, 0, 3, 17, 1, 67, 246, 1],
+ char_bytes
+ );
+ }
+
+ #[test]
+ fn test_failures() {
+ // 119 and 120 are valid, but not 0xD800 (high surrogate)
+ let u32s = [119, 0xD800, 120];
+ let u32_ules: Vec<RawBytesULE<4>> = u32s
+ .iter()
+ .copied()
+ .map(<u32 as AsULE>::to_unaligned)
+ .collect();
+ let u32_bytes: &[u8] = RawBytesULE::<4>::as_byte_slice(&u32_ules);
+ let parsed_ules_result = CharULE::parse_byte_slice(u32_bytes);
+ assert!(parsed_ules_result.is_err());
+
+ // 0x20FFFF is out of range for a char
+ let u32s = [0x20FFFF];
+ let u32_ules: Vec<RawBytesULE<4>> = u32s
+ .iter()
+ .copied()
+ .map(<u32 as AsULE>::to_unaligned)
+ .collect();
+ let u32_bytes: &[u8] = RawBytesULE::<4>::as_byte_slice(&u32_ules);
+ let parsed_ules_result = CharULE::parse_byte_slice(u32_bytes);
+ assert!(parsed_ules_result.is_err());
+ }
+}
diff --git a/third_party/rust/zerovec/src/ule/custom.rs b/third_party/rust/zerovec/src/ule/custom.rs
new file mode 100644
index 0000000000..8cc6e9de4e
--- /dev/null
+++ b/third_party/rust/zerovec/src/ule/custom.rs
@@ -0,0 +1,145 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! Documentation on implementing custom VarULE types.
+//!
+//! This module contains documentation for defining custom VarULE types,
+//! especially those using complex custom dynamically sized types.
+//!
+//! In *most cases* you should be able to create custom VarULE types using
+//! [`#[make_varule]`](crate::make_ule).
+//!
+//! # Example
+//!
+//! For example, if your regular stack type is:
+//!
+//! ```rust
+//! use zerofrom::ZeroFrom;
+//! use zerovec::ule::*;
+//! use zerovec::ZeroVec;
+//!
+//! #[derive(serde::Serialize, serde::Deserialize)]
+//! struct Foo<'a> {
+//! field1: char,
+//! field2: u32,
+//! #[serde(borrow)]
+//! field3: ZeroVec<'a, u32>,
+//! }
+//! ```
+//!
+//! then the ULE type will be implemented as follows. Ideally, you should have
+//! `EncodeAsVarULE` and `ZeroFrom` implementations on `Foo` pertaining to `FooULE`,
+//! as well as a `Serialize` impl on `FooULE` and a `Deserialize` impl on `Box<FooULE>`
+//! to enable human-readable serialization and deserialization.
+//!
+//! ```rust
+//! use zerovec::{ZeroVec, VarZeroVec, ZeroSlice};
+//! use zerovec::ule::*;
+//! use zerofrom::ZeroFrom;
+//! use core::mem;
+//!
+//! # #[derive(serde::Serialize, serde::Deserialize)]
+//! # struct Foo<'a> {
+//! # field1: char,
+//! # field2: u32,
+//! # #[serde(borrow)]
+//! # field3: ZeroVec<'a, u32>
+//! # }
+//!
+//! // Must be repr(packed) for safety of VarULE!
+//! // Must also only contain ULE types
+//! #[repr(packed)]
+//! struct FooULE {
+//! field1: <char as AsULE>::ULE,
+//! field2: <u32 as AsULE>::ULE,
+//! field3: ZeroSlice<u32>,
+//! }
+//!
+//! // Safety (based on the safety checklist on the VarULE trait):
+//! // 1. FooULE does not include any uninitialized or padding bytes. (achieved by `#[repr(packed)]` on
+//! // a struct with only ULE fields)
+//! // 2. FooULE is aligned to 1 byte. (achieved by `#[repr(packed)]` on
+//! // a struct with only ULE fields)
+//! // 3. The impl of `validate_byte_slice()` returns an error if any byte is not valid.
+//! // 4. The impl of `validate_byte_slice()` returns an error if the slice cannot be used in its entirety
+//! // 5. The impl of `from_byte_slice_unchecked()` returns a reference to the same data.
+//! // 6. The other VarULE methods use the default impl.
+//! // 7. FooULE byte equality is semantic equality
+//! unsafe impl VarULE for FooULE {
+//! fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> {
+//! // validate each field
+//! <char as AsULE>::ULE::validate_byte_slice(&bytes[0..3]).map_err(|_| ZeroVecError::parse::<Self>())?;
+//! <u32 as AsULE>::ULE::validate_byte_slice(&bytes[3..7]).map_err(|_| ZeroVecError::parse::<Self>())?;
+//! let _ = ZeroVec::<u32>::parse_byte_slice(&bytes[7..]).map_err(|_| ZeroVecError::parse::<Self>())?;
+//! Ok(())
+//! }
+//! unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self {
+//! let ptr = bytes.as_ptr();
+//! let len = bytes.len();
+//! // subtract the length of the char and u32 to get the length of the array
+//! let len_new = (len - 7) / 4;
+//! // it's hard constructing custom DSTs, we fake a pointer/length construction
+//! // eventually we can use the Pointer::Metadata APIs when they stabilize
+//! let fake_slice = core::ptr::slice_from_raw_parts(ptr as *const <u32 as AsULE>::ULE, len_new);
+//! &*(fake_slice as *const Self)
+//! }
+//! }
+//!
+//! unsafe impl EncodeAsVarULE<FooULE> for Foo<'_> {
+//! fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R {
+//! // take each field, convert to ULE byte slices, and pass them through
+//! cb(&[<char as AsULE>::ULE::as_byte_slice(&[self.field1.to_unaligned()]),
+//! <u32 as AsULE>::ULE::as_byte_slice(&[self.field2.to_unaligned()]),
+//! // the ZeroVec is already in the correct slice format
+//! self.field3.as_bytes()])
+//! }
+//! }
+//!
+//! impl<'a> ZeroFrom<'a, FooULE> for Foo<'a> {
+//! fn zero_from(other: &'a FooULE) -> Self {
+//! Self {
+//! field1: AsULE::from_unaligned(other.field1),
+//! field2: AsULE::from_unaligned(other.field2),
+//! field3: ZeroFrom::zero_from(&other.field3),
+//! }
+//! }
+//! }
+//!
+//!
+//! impl serde::Serialize for FooULE
+//! {
+//! fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+//! where
+//! S: serde::Serializer,
+//! {
+//! Foo::zero_from(self).serialize(serializer)
+//! }
+//! }
+//!
+//! impl<'de> serde::Deserialize<'de> for Box<FooULE>
+//! {
+//! fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+//! where
+//! D: serde::Deserializer<'de>,
+//! {
+//! let mut foo = Foo::deserialize(deserializer)?;
+//! Ok(encode_varule_to_box(&foo))
+//! }
+//! }
+//!
+//! fn main() {
+//! let mut foos = [Foo {field1: 'u', field2: 983, field3: ZeroVec::alloc_from_slice(&[1212,2309,500,7000])},
+//! Foo {field1: 'l', field2: 1010, field3: ZeroVec::alloc_from_slice(&[1932, 0, 8888, 91237])}];
+//!
+//! let vzv = VarZeroVec::<_>::from(&foos);
+//!
+//! assert_eq!(char::from_unaligned(vzv.get(0).unwrap().field1), 'u');
+//! assert_eq!(u32::from_unaligned(vzv.get(0).unwrap().field2), 983);
+//! assert_eq!(&vzv.get(0).unwrap().field3, &[1212,2309,500,7000][..]);
+//!
+//! assert_eq!(char::from_unaligned(vzv.get(1).unwrap().field1), 'l');
+//! assert_eq!(u32::from_unaligned(vzv.get(1).unwrap().field2), 1010);
+//! assert_eq!(&vzv.get(1).unwrap().field3, &[1932, 0, 8888, 91237][..]);
+//! }
+//! ```
diff --git a/third_party/rust/zerovec/src/ule/encode.rs b/third_party/rust/zerovec/src/ule/encode.rs
new file mode 100644
index 0000000000..adea123aa2
--- /dev/null
+++ b/third_party/rust/zerovec/src/ule/encode.rs
@@ -0,0 +1,400 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use crate::ule::*;
+use crate::varzerovec::VarZeroVecFormat;
+use crate::{VarZeroSlice, VarZeroVec, ZeroSlice, ZeroVec};
+use alloc::borrow::{Cow, ToOwned};
+use alloc::boxed::Box;
+use alloc::string::String;
+use alloc::{vec, vec::Vec};
+use core::mem;
+
+/// Allows types to be encoded as VarULEs. This is highly useful for implementing VarULE on
+/// custom DSTs where the type cannot be obtained as a reference to some other type.
+///
+/// [`Self::encode_var_ule_as_slices()`] should be implemented by providing an encoded slice for each field
+/// of the VarULE type to the callback, in order. For an implementation to be safe, the slices
+/// to the callback must, when concatenated, be a valid instance of the VarULE type.
+///
+/// See the [custom VarULEdocumentation](crate::ule::custom) for examples.
+///
+/// [`Self::encode_var_ule_as_slices()`] is only used to provide default implementations for [`Self::encode_var_ule_write()`]
+/// and [`Self::encode_var_ule_len()`]. If you override the default implementations it is totally valid to
+/// replace [`Self::encode_var_ule_as_slices()`]'s body with `unreachable!()`. This can be done for cases where
+/// it is not possible to implement [`Self::encode_var_ule_as_slices()`] but the other methods still work.
+///
+/// A typical implementation will take each field in the order found in the [`VarULE`] type,
+/// convert it to ULE, call [`ULE::as_byte_slice()`] on them, and pass the slices to `cb` in order.
+/// A trailing [`ZeroVec`](crate::ZeroVec) or [`VarZeroVec`](crate::VarZeroVec) can have their underlying
+/// byte representation passed through.
+///
+/// In case the compiler is not optimizing [`Self::encode_var_ule_len()`], it can be overridden. A typical
+/// implementation will add up the sizes of each field on the [`VarULE`] type and then add in the byte length of the
+/// dynamically-sized part.
+///
+/// # Safety
+///
+/// The safety invariants of [`Self::encode_var_ule_as_slices()`] are:
+/// - It must call `cb` (only once)
+/// - The slices passed to `cb`, if concatenated, should be a valid instance of the `T` [`VarULE`] type
+/// (i.e. if fed to [`VarULE::validate_byte_slice()`] they must produce a successful result)
+/// - It must return the return value of `cb` to the caller
+///
+/// One or more of [`Self::encode_var_ule_len()`] and [`Self::encode_var_ule_write()`] may be provided.
+/// If both are, then `zerovec` code is guaranteed to not call [`Self::encode_var_ule_as_slices()`], and it may be replaced
+/// with `unreachable!()`.
+///
+/// The safety invariants of [`Self::encode_var_ule_len()`] are:
+/// - It must return the length of the corresponding VarULE type
+///
+/// The safety invariants of [`Self::encode_var_ule_write()`] are:
+/// - The slice written to `dst` must be a valid instance of the `T` [`VarULE`] type
+pub unsafe trait EncodeAsVarULE<T: VarULE + ?Sized> {
+ /// Calls `cb` with a piecewise list of byte slices that when concatenated
+ /// produce the memory pattern of the corresponding instance of `T`.
+ ///
+ /// Do not call this function directly; instead use the other two. Some implementors
+ /// may define this function to panic.
+ fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R;
+
+ /// Return the length, in bytes, of the corresponding [`VarULE`] type
+ fn encode_var_ule_len(&self) -> usize {
+ self.encode_var_ule_as_slices(|slices| slices.iter().map(|s| s.len()).sum())
+ }
+
+ /// Write the corresponding [`VarULE`] type to the `dst` buffer. `dst` should
+ /// be the size of [`Self::encode_var_ule_len()`]
+ fn encode_var_ule_write(&self, mut dst: &mut [u8]) {
+ debug_assert_eq!(self.encode_var_ule_len(), dst.len());
+ self.encode_var_ule_as_slices(move |slices| {
+ #[allow(clippy::indexing_slicing)] // by debug_assert
+ for slice in slices {
+ dst[..slice.len()].copy_from_slice(slice);
+ dst = &mut dst[slice.len()..];
+ }
+ });
+ }
+}
+
+/// Given an [`EncodeAsVarULE`] type `S`, encode it into a `Box<T>`
+///
+/// This is primarily useful for generating `Deserialize` impls for VarULE types
+pub fn encode_varule_to_box<S: EncodeAsVarULE<T>, T: VarULE + ?Sized>(x: &S) -> Box<T> {
+ // zero-fill the vector to avoid uninitialized data UB
+ let mut vec: Vec<u8> = vec![0; x.encode_var_ule_len()];
+ x.encode_var_ule_write(&mut vec);
+ let boxed = mem::ManuallyDrop::new(vec.into_boxed_slice());
+ unsafe {
+ // Safety: `ptr` is a box, and `T` is a VarULE which guarantees it has the same memory layout as `[u8]`
+ // and can be recouped via from_byte_slice_unchecked()
+ let ptr: *mut T = T::from_byte_slice_unchecked(&boxed) as *const T as *mut T;
+
+ // Safety: we can construct an owned version since we have mem::forgotten the older owner
+ Box::from_raw(ptr)
+ }
+}
+
+unsafe impl<T: VarULE + ?Sized> EncodeAsVarULE<T> for T {
+ fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R {
+ cb(&[T::as_byte_slice(self)])
+ }
+}
+
+unsafe impl<T: VarULE + ?Sized> EncodeAsVarULE<T> for &'_ T {
+ fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R {
+ cb(&[T::as_byte_slice(self)])
+ }
+}
+
+unsafe impl<T: VarULE + ?Sized> EncodeAsVarULE<T> for Cow<'_, T>
+where
+ T: ToOwned,
+{
+ fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R {
+ cb(&[T::as_byte_slice(self.as_ref())])
+ }
+}
+
+unsafe impl<T: VarULE + ?Sized> EncodeAsVarULE<T> for Box<T> {
+ fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R {
+ cb(&[T::as_byte_slice(self)])
+ }
+}
+
+unsafe impl EncodeAsVarULE<str> for String {
+ fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R {
+ cb(&[self.as_bytes()])
+ }
+}
+
+// Note: This impl could technically use `T: AsULE`, but we want users to prefer `ZeroSlice<T>`
+// for cases where T is not a ULE. Therefore, we can use the more efficient `memcpy` impl here.
+unsafe impl<T> EncodeAsVarULE<[T]> for Vec<T>
+where
+ T: ULE,
+{
+ fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R {
+ cb(&[<[T] as VarULE>::as_byte_slice(self)])
+ }
+}
+
+unsafe impl<T> EncodeAsVarULE<ZeroSlice<T>> for &'_ [T]
+where
+ T: AsULE + 'static,
+{
+ fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R {
+ // unnecessary if the other two are implemented
+ unreachable!()
+ }
+
+ #[inline]
+ fn encode_var_ule_len(&self) -> usize {
+ self.len() * core::mem::size_of::<T::ULE>()
+ }
+
+ fn encode_var_ule_write(&self, dst: &mut [u8]) {
+ #[allow(non_snake_case)]
+ let S = core::mem::size_of::<T::ULE>();
+ debug_assert_eq!(self.len() * S, dst.len());
+ for (item, ref mut chunk) in self.iter().zip(dst.chunks_mut(S)) {
+ let ule = item.to_unaligned();
+ chunk.copy_from_slice(ULE::as_byte_slice(core::slice::from_ref(&ule)));
+ }
+ }
+}
+
+unsafe impl<T> EncodeAsVarULE<ZeroSlice<T>> for Vec<T>
+where
+ T: AsULE + 'static,
+{
+ fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R {
+ // unnecessary if the other two are implemented
+ unreachable!()
+ }
+
+ #[inline]
+ fn encode_var_ule_len(&self) -> usize {
+ self.as_slice().encode_var_ule_len()
+ }
+
+ #[inline]
+ fn encode_var_ule_write(&self, dst: &mut [u8]) {
+ self.as_slice().encode_var_ule_write(dst)
+ }
+}
+
+unsafe impl<T> EncodeAsVarULE<ZeroSlice<T>> for ZeroVec<'_, T>
+where
+ T: AsULE + 'static,
+{
+ fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R {
+ // unnecessary if the other two are implemented
+ unreachable!()
+ }
+
+ #[inline]
+ fn encode_var_ule_len(&self) -> usize {
+ self.as_bytes().len()
+ }
+
+ fn encode_var_ule_write(&self, dst: &mut [u8]) {
+ debug_assert_eq!(self.as_bytes().len(), dst.len());
+ dst.copy_from_slice(self.as_bytes());
+ }
+}
+
+unsafe impl<T, E, F> EncodeAsVarULE<VarZeroSlice<T, F>> for &'_ [E]
+where
+ T: VarULE + ?Sized,
+ E: EncodeAsVarULE<T>,
+ F: VarZeroVecFormat,
+{
+ fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R {
+ // unnecessary if the other two are implemented
+ unimplemented!()
+ }
+
+ #[allow(clippy::unwrap_used)] // TODO(#1410): Rethink length errors in VZV.
+ fn encode_var_ule_len(&self) -> usize {
+ crate::varzerovec::components::compute_serializable_len::<T, E, F>(self).unwrap() as usize
+ }
+
+ fn encode_var_ule_write(&self, dst: &mut [u8]) {
+ crate::varzerovec::components::write_serializable_bytes::<T, E, F>(self, dst)
+ }
+}
+
+unsafe impl<T, E, F> EncodeAsVarULE<VarZeroSlice<T, F>> for Vec<E>
+where
+ T: VarULE + ?Sized,
+ E: EncodeAsVarULE<T>,
+ F: VarZeroVecFormat,
+{
+ fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R {
+ // unnecessary if the other two are implemented
+ unreachable!()
+ }
+
+ #[inline]
+ fn encode_var_ule_len(&self) -> usize {
+ <_ as EncodeAsVarULE<VarZeroSlice<T, F>>>::encode_var_ule_len(&self.as_slice())
+ }
+
+ #[inline]
+ fn encode_var_ule_write(&self, dst: &mut [u8]) {
+ <_ as EncodeAsVarULE<VarZeroSlice<T, F>>>::encode_var_ule_write(&self.as_slice(), dst)
+ }
+}
+
+unsafe impl<T, F> EncodeAsVarULE<VarZeroSlice<T, F>> for VarZeroVec<'_, T, F>
+where
+ T: VarULE + ?Sized,
+ F: VarZeroVecFormat,
+{
+ fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R {
+ // unnecessary if the other two are implemented
+ unreachable!()
+ }
+
+ #[inline]
+ fn encode_var_ule_len(&self) -> usize {
+ self.as_bytes().len()
+ }
+
+ #[inline]
+ fn encode_var_ule_write(&self, dst: &mut [u8]) {
+ debug_assert_eq!(self.as_bytes().len(), dst.len());
+ dst.copy_from_slice(self.as_bytes());
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+
+ const STRING_ARRAY: [&str; 2] = ["hello", "world"];
+
+ const STRING_SLICE: &[&str] = &STRING_ARRAY;
+
+ const U8_ARRAY: [u8; 8] = [0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07];
+
+ const U8_2D_ARRAY: [&[u8]; 2] = [&U8_ARRAY, &U8_ARRAY];
+
+ const U8_2D_SLICE: &[&[u8]] = &[&U8_ARRAY, &U8_ARRAY];
+
+ const U8_3D_ARRAY: [&[&[u8]]; 2] = [U8_2D_SLICE, U8_2D_SLICE];
+
+ const U8_3D_SLICE: &[&[&[u8]]] = &[U8_2D_SLICE, U8_2D_SLICE];
+
+ const U32_ARRAY: [u32; 4] = [0x00010203, 0x04050607, 0x08090A0B, 0x0C0D0E0F];
+
+ const U32_2D_ARRAY: [&[u32]; 2] = [&U32_ARRAY, &U32_ARRAY];
+
+ const U32_2D_SLICE: &[&[u32]] = &[&U32_ARRAY, &U32_ARRAY];
+
+ const U32_3D_ARRAY: [&[&[u32]]; 2] = [U32_2D_SLICE, U32_2D_SLICE];
+
+ const U32_3D_SLICE: &[&[&[u32]]] = &[U32_2D_SLICE, U32_2D_SLICE];
+
+ #[test]
+ fn test_vzv_from() {
+ type VZV<'a, T> = VarZeroVec<'a, T>;
+ type ZS<T> = ZeroSlice<T>;
+ type VZS<T> = VarZeroSlice<T>;
+
+ let u8_zerovec: ZeroVec<u8> = ZeroVec::from_slice_or_alloc(&U8_ARRAY);
+ let u8_2d_zerovec: [ZeroVec<u8>; 2] = [u8_zerovec.clone(), u8_zerovec.clone()];
+ let u8_2d_vec: Vec<Vec<u8>> = vec![U8_ARRAY.into(), U8_ARRAY.into()];
+ let u8_3d_vec: Vec<Vec<Vec<u8>>> = vec![u8_2d_vec.clone(), u8_2d_vec.clone()];
+
+ let u32_zerovec: ZeroVec<u32> = ZeroVec::from_slice_or_alloc(&U32_ARRAY);
+ let u32_2d_zerovec: [ZeroVec<u32>; 2] = [u32_zerovec.clone(), u32_zerovec.clone()];
+ let u32_2d_vec: Vec<Vec<u32>> = vec![U32_ARRAY.into(), U32_ARRAY.into()];
+ let u32_3d_vec: Vec<Vec<Vec<u32>>> = vec![u32_2d_vec.clone(), u32_2d_vec.clone()];
+
+ let a: VZV<str> = VarZeroVec::from(&STRING_ARRAY);
+ let b: VZV<str> = VarZeroVec::from(STRING_SLICE);
+ let c: VZV<str> = VarZeroVec::from(&Vec::from(STRING_SLICE));
+ assert_eq!(a, STRING_SLICE);
+ assert_eq!(a, b);
+ assert_eq!(a, c);
+
+ let a: VZV<[u8]> = VarZeroVec::from(&U8_2D_ARRAY);
+ let b: VZV<[u8]> = VarZeroVec::from(U8_2D_SLICE);
+ let c: VZV<[u8]> = VarZeroVec::from(&u8_2d_vec);
+ assert_eq!(a, U8_2D_SLICE);
+ assert_eq!(a, b);
+ assert_eq!(a, c);
+ let u8_3d_vzv_brackets = &[a.clone(), a.clone()];
+
+ let a: VZV<ZS<u8>> = VarZeroVec::from(&U8_2D_ARRAY);
+ let b: VZV<ZS<u8>> = VarZeroVec::from(U8_2D_SLICE);
+ let c: VZV<ZS<u8>> = VarZeroVec::from(&u8_2d_vec);
+ let d: VZV<ZS<u8>> = VarZeroVec::from(&u8_2d_zerovec);
+ assert_eq!(a, U8_2D_SLICE);
+ assert_eq!(a, b);
+ assert_eq!(a, c);
+ assert_eq!(a, d);
+ let u8_3d_vzv_zeroslice = &[a.clone(), a.clone()];
+
+ let a: VZV<VZS<[u8]>> = VarZeroVec::from(&U8_3D_ARRAY);
+ let b: VZV<VZS<[u8]>> = VarZeroVec::from(U8_3D_SLICE);
+ let c: VZV<VZS<[u8]>> = VarZeroVec::from(&u8_3d_vec);
+ let d: VZV<VZS<[u8]>> = VarZeroVec::from(u8_3d_vzv_brackets);
+ assert_eq!(
+ a.iter()
+ .map(|x| x.iter().map(|y| y.to_vec()).collect::<Vec<Vec<u8>>>())
+ .collect::<Vec<Vec<Vec<u8>>>>(),
+ u8_3d_vec
+ );
+ assert_eq!(a, b);
+ assert_eq!(a, c);
+ assert_eq!(a, d);
+
+ let a: VZV<VZS<ZS<u8>>> = VarZeroVec::from(&U8_3D_ARRAY);
+ let b: VZV<VZS<ZS<u8>>> = VarZeroVec::from(U8_3D_SLICE);
+ let c: VZV<VZS<ZS<u8>>> = VarZeroVec::from(&u8_3d_vec);
+ let d: VZV<VZS<ZS<u8>>> = VarZeroVec::from(u8_3d_vzv_zeroslice);
+ assert_eq!(
+ a.iter()
+ .map(|x| x
+ .iter()
+ .map(|y| y.iter().collect::<Vec<u8>>())
+ .collect::<Vec<Vec<u8>>>())
+ .collect::<Vec<Vec<Vec<u8>>>>(),
+ u8_3d_vec
+ );
+ assert_eq!(a, b);
+ assert_eq!(a, c);
+ assert_eq!(a, d);
+
+ let a: VZV<ZS<u32>> = VarZeroVec::from(&U32_2D_ARRAY);
+ let b: VZV<ZS<u32>> = VarZeroVec::from(U32_2D_SLICE);
+ let c: VZV<ZS<u32>> = VarZeroVec::from(&u32_2d_vec);
+ let d: VZV<ZS<u32>> = VarZeroVec::from(&u32_2d_zerovec);
+ assert_eq!(a, u32_2d_zerovec);
+ assert_eq!(a, b);
+ assert_eq!(a, c);
+ assert_eq!(a, d);
+ let u32_3d_vzv = &[a.clone(), a.clone()];
+
+ let a: VZV<VZS<ZS<u32>>> = VarZeroVec::from(&U32_3D_ARRAY);
+ let b: VZV<VZS<ZS<u32>>> = VarZeroVec::from(U32_3D_SLICE);
+ let c: VZV<VZS<ZS<u32>>> = VarZeroVec::from(&u32_3d_vec);
+ let d: VZV<VZS<ZS<u32>>> = VarZeroVec::from(u32_3d_vzv);
+ assert_eq!(
+ a.iter()
+ .map(|x| x
+ .iter()
+ .map(|y| y.iter().collect::<Vec<u32>>())
+ .collect::<Vec<Vec<u32>>>())
+ .collect::<Vec<Vec<Vec<u32>>>>(),
+ u32_3d_vec
+ );
+ assert_eq!(a, b);
+ assert_eq!(a, c);
+ assert_eq!(a, d);
+ }
+}
diff --git a/third_party/rust/zerovec/src/ule/macros.rs b/third_party/rust/zerovec/src/ule/macros.rs
new file mode 100644
index 0000000000..955b1eb2e4
--- /dev/null
+++ b/third_party/rust/zerovec/src/ule/macros.rs
@@ -0,0 +1,29 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+/// Given `Self` (`$aligned`), `Self::ULE` (`$unaligned`), and a conversion function (`$single` or
+/// `Self::from_aligned`), implement `from_array` for arrays of `$aligned` to `$unaligned`.
+///
+/// The `$default` argument is due to current compiler limitations.
+/// Pass any (cheap to construct) value.
+#[macro_export]
+macro_rules! impl_ule_from_array {
+ ($aligned:ty, $unaligned:ty, $default:expr, $single:path) => {
+ #[doc = concat!("Convert an array of `", stringify!($aligned), "` to an array of `", stringify!($unaligned), "`.")]
+ pub const fn from_array<const N: usize>(arr: [$aligned; N]) -> [Self; N] {
+ let mut result = [$default; N];
+ let mut i = 0;
+ // Won't panic because i < N and arr has length N
+ #[allow(clippy::indexing_slicing)]
+ while i < N {
+ result[i] = $single(arr[i]);
+ i += 1;
+ }
+ result
+ }
+ };
+ ($aligned:ty, $unaligned:ty, $default:expr) => {
+ impl_ule_from_array!($aligned, $unaligned, $default, Self::from_aligned);
+ };
+}
diff --git a/third_party/rust/zerovec/src/ule/mod.rs b/third_party/rust/zerovec/src/ule/mod.rs
new file mode 100644
index 0000000000..5a6d9cd471
--- /dev/null
+++ b/third_party/rust/zerovec/src/ule/mod.rs
@@ -0,0 +1,394 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+#![allow(clippy::upper_case_acronyms)]
+
+//! Traits over unaligned little-endian data (ULE, pronounced "yule").
+//!
+//! The main traits for this module are [`ULE`], [`AsULE`] and, [`VarULE`].
+//!
+//! See [the design doc](https://github.com/unicode-org/icu4x/blob/main/utils/zerovec/design_doc.md) for details on how these traits
+//! works under the hood.
+mod chars;
+#[cfg(doc)]
+pub mod custom;
+mod encode;
+mod macros;
+mod multi;
+mod niche;
+mod option;
+mod plain;
+mod slices;
+mod unvalidated;
+
+pub mod tuple;
+pub use super::ZeroVecError;
+pub use chars::CharULE;
+pub use encode::{encode_varule_to_box, EncodeAsVarULE};
+pub use multi::MultiFieldsULE;
+pub use niche::{NicheBytes, NichedOption, NichedOptionULE};
+pub use option::{OptionULE, OptionVarULE};
+pub use plain::RawBytesULE;
+pub use unvalidated::{UnvalidatedChar, UnvalidatedStr};
+
+use alloc::alloc::Layout;
+use alloc::borrow::ToOwned;
+use alloc::boxed::Box;
+use core::{mem, slice};
+
+/// Fixed-width, byte-aligned data that can be cast to and from a little-endian byte slice.
+///
+/// If you need to implement this trait, consider using [`#[make_ule]`](crate::make_ule) or
+/// [`#[derive(ULE)]`](macro@ULE) instead.
+///
+/// Types that are not fixed-width can implement [`VarULE`] instead.
+///
+/// "ULE" stands for "Unaligned little-endian"
+///
+/// # Safety
+///
+/// Safety checklist for `ULE`:
+///
+/// 1. The type *must not* include any uninitialized or padding bytes.
+/// 2. The type must have an alignment of 1 byte.
+/// 3. The impl of [`ULE::validate_byte_slice()`] *must* return an error if the given byte slice
+/// would not represent a valid slice of this type.
+/// 4. The impl of [`ULE::validate_byte_slice()`] *must* return an error if the given byte slice
+/// cannot be used in its entirety (if its length is not a multiple of `size_of::<Self>()`).
+/// 5. All other methods *must* be left with their default impl, or else implemented according to
+/// their respective safety guidelines.
+/// 6. Acknowledge the following note about the equality invariant.
+///
+/// If the ULE type is a struct only containing other ULE types (or other types which satisfy invariants 1 and 2,
+/// like `[u8; N]`), invariants 1 and 2 can be achieved via `#[repr(packed)]` or `#[repr(transparent)]`.
+///
+/// # Equality invariant
+///
+/// A non-safety invariant is that if `Self` implements `PartialEq`, the it *must* be logically
+/// equivalent to byte equality on [`Self::as_byte_slice()`].
+///
+/// It may be necessary to introduce a "canonical form" of the ULE if logical equality does not
+/// equal byte equality. In such a case, [`Self::validate_byte_slice()`] should return an error
+/// for any values that are not in canonical form. For example, the decimal strings "1.23e4" and
+/// "12.3e3" are logically equal, but not byte-for-byte equal, so we could define a canonical form
+/// where only a single digit is allowed before `.`.
+///
+/// Failure to follow this invariant will cause surprising behavior in `PartialEq`, which may
+/// result in unpredictable operations on `ZeroVec`, `VarZeroVec`, and `ZeroMap`.
+pub unsafe trait ULE
+where
+ Self: Sized,
+ Self: Copy + 'static,
+{
+ /// Validates a byte slice, `&[u8]`.
+ ///
+ /// If `Self` is not well-defined for all possible bit values, the bytes should be validated.
+ /// If the bytes can be transmuted, *in their entirety*, to a valid slice of `Self`, then `Ok`
+ /// should be returned; otherwise, `Self::Error` should be returned.
+ fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError>;
+
+ /// Parses a byte slice, `&[u8]`, and return it as `&[Self]` with the same lifetime.
+ ///
+ /// If `Self` is not well-defined for all possible bit values, the bytes should be validated,
+ /// and an error should be returned in the same cases as [`Self::validate_byte_slice()`].
+ ///
+ /// The default implementation executes [`Self::validate_byte_slice()`] followed by
+ /// [`Self::from_byte_slice_unchecked`].
+ ///
+ /// Note: The following equality should hold: `bytes.len() % size_of::<Self>() == 0`. This
+ /// means that the returned slice can span the entire byte slice.
+ fn parse_byte_slice(bytes: &[u8]) -> Result<&[Self], ZeroVecError> {
+ Self::validate_byte_slice(bytes)?;
+ debug_assert_eq!(bytes.len() % mem::size_of::<Self>(), 0);
+ Ok(unsafe { Self::from_byte_slice_unchecked(bytes) })
+ }
+
+ /// Takes a byte slice, `&[u8]`, and return it as `&[Self]` with the same lifetime, assuming
+ /// that this byte slice has previously been run through [`Self::parse_byte_slice()`] with
+ /// success.
+ ///
+ /// The default implementation performs a pointer cast to the same region of memory.
+ ///
+ /// # Safety
+ ///
+ /// ## Callers
+ ///
+ /// Callers of this method must take care to ensure that `bytes` was previously passed through
+ /// [`Self::validate_byte_slice()`] with success (and was not changed since then).
+ ///
+ /// ## Implementors
+ ///
+ /// Implementations of this method may call unsafe functions to cast the pointer to the correct
+ /// type, assuming the "Callers" invariant above.
+ ///
+ /// Keep in mind that `&[Self]` and `&[u8]` may have different lengths.
+ ///
+ /// Safety checklist:
+ ///
+ /// 1. This method *must* return the same result as [`Self::parse_byte_slice()`].
+ /// 2. This method *must* return a slice to the same region of memory as the argument.
+ #[inline]
+ unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &[Self] {
+ let data = bytes.as_ptr();
+ let len = bytes.len() / mem::size_of::<Self>();
+ debug_assert_eq!(bytes.len() % mem::size_of::<Self>(), 0);
+ core::slice::from_raw_parts(data as *const Self, len)
+ }
+
+ /// Given `&[Self]`, returns a `&[u8]` with the same lifetime.
+ ///
+ /// The default implementation performs a pointer cast to the same region of memory.
+ ///
+ /// # Safety
+ ///
+ /// Implementations of this method should call potentially unsafe functions to cast the
+ /// pointer to the correct type.
+ ///
+ /// Keep in mind that `&[Self]` and `&[u8]` may have different lengths.
+ #[inline]
+ #[allow(clippy::wrong_self_convention)] // https://github.com/rust-lang/rust-clippy/issues/7219
+ fn as_byte_slice(slice: &[Self]) -> &[u8] {
+ unsafe {
+ slice::from_raw_parts(slice as *const [Self] as *const u8, mem::size_of_val(slice))
+ }
+ }
+}
+
+/// A trait for any type that has a 1:1 mapping with an unaligned little-endian (ULE) type.
+///
+/// If you need to implement this trait, consider using [`#[make_ule]`](crate::make_ule) instead.
+pub trait AsULE: Copy {
+ /// The ULE type corresponding to `Self`.
+ ///
+ /// Types having infallible conversions from all bit values (Plain Old Data) can use
+ /// `RawBytesULE` with the desired width; for example, `u32` uses `RawBytesULE<4>`.
+ ///
+ /// Types that are not well-defined for all bit values should implement a custom ULE.
+ type ULE: ULE;
+
+ /// Converts from `Self` to `Self::ULE`.
+ ///
+ /// This function may involve byte order swapping (native-endian to little-endian).
+ ///
+ /// For best performance, mark your implementation of this function `#[inline]`.
+ fn to_unaligned(self) -> Self::ULE;
+
+ /// Converts from `Self::ULE` to `Self`.
+ ///
+ /// This function may involve byte order swapping (little-endian to native-endian).
+ ///
+ /// For best performance, mark your implementation of this function `#[inline]`.
+ ///
+ /// # Safety
+ ///
+ /// This function is infallible because bit validation should have occurred when `Self::ULE`
+ /// was first constructed. An implementation may therefore involve an `unsafe{}` block, like
+ /// `from_bytes_unchecked()`.
+ fn from_unaligned(unaligned: Self::ULE) -> Self;
+}
+
+/// An [`EqULE`] type is one whose byte sequence equals the byte sequence of its ULE type on
+/// little-endian platforms. This enables certain performance optimizations, such as
+/// [`ZeroVec::try_from_slice`](crate::ZeroVec::try_from_slice).
+///
+/// # Implementation safety
+///
+/// This trait is safe to implement if the type's ULE (as defined by `impl `[`AsULE`]` for T`)
+/// has an equal byte sequence as the type itself on little-endian platforms; i.e., one where
+/// `*const T` can be cast to a valid `*const T::ULE`.
+pub unsafe trait EqULE: AsULE {}
+
+/// A trait for a type where aligned slices can be cast to unaligned slices.
+///
+/// Auto-implemented on all types implementing [`EqULE`].
+pub trait SliceAsULE
+where
+ Self: AsULE + Sized,
+{
+ /// Converts from `&[Self]` to `&[Self::ULE]` if possible.
+ ///
+ /// In general, this function returns `Some` on little-endian and `None` on big-endian.
+ fn slice_to_unaligned(slice: &[Self]) -> Option<&[Self::ULE]>;
+}
+
+#[cfg(target_endian = "little")]
+impl<T> SliceAsULE for T
+where
+ T: EqULE,
+{
+ #[inline]
+ fn slice_to_unaligned(slice: &[Self]) -> Option<&[Self::ULE]> {
+ // This is safe because on little-endian platforms, the byte sequence of &[T]
+ // is equivalent to the byte sequence of &[T::ULE] by the contract of EqULE,
+ // and &[T::ULE] has equal or looser alignment than &[T].
+ let ule_slice =
+ unsafe { core::slice::from_raw_parts(slice.as_ptr() as *const Self::ULE, slice.len()) };
+ Some(ule_slice)
+ }
+}
+
+#[cfg(not(target_endian = "little"))]
+impl<T> SliceAsULE for T
+where
+ T: EqULE,
+{
+ #[inline]
+ fn slice_to_unaligned(_: &[Self]) -> Option<&[Self::ULE]> {
+ None
+ }
+}
+
+/// Variable-width, byte-aligned data that can be cast to and from a little-endian byte slice.
+///
+/// If you need to implement this trait, consider using [`#[make_varule]`](crate::make_varule) or
+/// [`#[derive(VarULE)]`](macro@VarULE) instead.
+///
+/// This trait is mostly for unsized types like `str` and `[T]`. It can be implemented on sized types;
+/// however, it is much more preferable to use [`ULE`] for that purpose. The [`custom`] module contains
+/// additional documentation on how this type can be implemented on custom types.
+///
+/// If deserialization with `VarZeroVec` is desired is recommended to implement `Deserialize` for
+/// `Box<T>` (serde does not do this automatically for unsized `T`).
+///
+/// For convenience it is typically desired to implement [`EncodeAsVarULE`] and [`ZeroFrom`](zerofrom::ZeroFrom)
+/// on some stack type to convert to and from the ULE type efficiently when necessary.
+///
+/// # Safety
+///
+/// Safety checklist for `VarULE`:
+///
+/// 1. The type *must not* include any uninitialized or padding bytes.
+/// 2. The type must have an alignment of 1 byte.
+/// 3. The impl of [`VarULE::validate_byte_slice()`] *must* return an error if the given byte slice
+/// would not represent a valid slice of this type.
+/// 4. The impl of [`VarULE::validate_byte_slice()`] *must* return an error if the given byte slice
+/// cannot be used in its entirety.
+/// 5. The impl of [`VarULE::from_byte_slice_unchecked()`] must produce a reference to the same
+/// underlying data assuming that the given bytes previously passed validation.
+/// 6. All other methods *must* be left with their default impl, or else implemented according to
+/// their respective safety guidelines.
+/// 7. Acknowledge the following note about the equality invariant.
+///
+/// If the ULE type is a struct only containing other ULE/VarULE types (or other types which satisfy invariants 1 and 2,
+/// like `[u8; N]`), invariants 1 and 2 can be achieved via `#[repr(packed)]` or `#[repr(transparent)]`.
+///
+/// # Equality invariant
+///
+/// A non-safety invariant is that if `Self` implements `PartialEq`, the it *must* be logically
+/// equivalent to byte equality on [`Self::as_byte_slice()`].
+///
+/// It may be necessary to introduce a "canonical form" of the ULE if logical equality does not
+/// equal byte equality. In such a case, [`Self::validate_byte_slice()`] should return an error
+/// for any values that are not in canonical form. For example, the decimal strings "1.23e4" and
+/// "12.3e3" are logically equal, but not byte-for-byte equal, so we could define a canonical form
+/// where only a single digit is allowed before `.`.
+///
+/// There may also be cases where a `VarULE` has muiltiple canonical forms, such as a faster
+/// version and a smaller version. The cleanest way to handle this case would be separate types.
+/// However, if this is not feasible, then the application should ensure that the data it is
+/// deserializing is in the expected form. For example, if the data is being loaded from an
+/// external source, then requests could carry information about the expected form of the data.
+///
+/// Failure to follow this invariant will cause surprising behavior in `PartialEq`, which may
+/// result in unpredictable operations on `ZeroVec`, `VarZeroVec`, and `ZeroMap`.
+pub unsafe trait VarULE: 'static {
+ /// Validates a byte slice, `&[u8]`.
+ ///
+ /// If `Self` is not well-defined for all possible bit values, the bytes should be validated.
+ /// If the bytes can be transmuted, *in their entirety*, to a valid `&Self`, then `Ok` should
+ /// be returned; otherwise, `Self::Error` should be returned.
+ fn validate_byte_slice(_bytes: &[u8]) -> Result<(), ZeroVecError>;
+
+ /// Parses a byte slice, `&[u8]`, and return it as `&Self` with the same lifetime.
+ ///
+ /// If `Self` is not well-defined for all possible bit values, the bytes should be validated,
+ /// and an error should be returned in the same cases as [`Self::validate_byte_slice()`].
+ ///
+ /// The default implementation executes [`Self::validate_byte_slice()`] followed by
+ /// [`Self::from_byte_slice_unchecked`].
+ ///
+ /// Note: The following equality should hold: `size_of_val(result) == size_of_val(bytes)`,
+ /// where `result` is the successful return value of the method. This means that the return
+ /// value spans the entire byte slice.
+ fn parse_byte_slice(bytes: &[u8]) -> Result<&Self, ZeroVecError> {
+ Self::validate_byte_slice(bytes)?;
+ let result = unsafe { Self::from_byte_slice_unchecked(bytes) };
+ debug_assert_eq!(mem::size_of_val(result), mem::size_of_val(bytes));
+ Ok(result)
+ }
+
+ /// Takes a byte slice, `&[u8]`, and return it as `&Self` with the same lifetime, assuming
+ /// that this byte slice has previously been run through [`Self::parse_byte_slice()`] with
+ /// success.
+ ///
+ /// # Safety
+ ///
+ /// ## Callers
+ ///
+ /// Callers of this method must take care to ensure that `bytes` was previously passed through
+ /// [`Self::validate_byte_slice()`] with success (and was not changed since then).
+ ///
+ /// ## Implementors
+ ///
+ /// Implementations of this method may call unsafe functions to cast the pointer to the correct
+ /// type, assuming the "Callers" invariant above.
+ ///
+ /// Safety checklist:
+ ///
+ /// 1. This method *must* return the same result as [`Self::parse_byte_slice()`].
+ /// 2. This method *must* return a slice to the same region of memory as the argument.
+ unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self;
+
+ /// Given `&Self`, returns a `&[u8]` with the same lifetime.
+ ///
+ /// The default implementation performs a pointer cast to the same region of memory.
+ ///
+ /// # Safety
+ ///
+ /// Implementations of this method should call potentially unsafe functions to cast the
+ /// pointer to the correct type.
+ #[inline]
+ fn as_byte_slice(&self) -> &[u8] {
+ unsafe { slice::from_raw_parts(self as *const Self as *const u8, mem::size_of_val(self)) }
+ }
+
+ /// Allocate on the heap as a `Box<T>`
+ #[inline]
+ fn to_boxed(&self) -> Box<Self> {
+ let bytesvec = self.as_byte_slice().to_owned().into_boxed_slice();
+ let bytesvec = mem::ManuallyDrop::new(bytesvec);
+ unsafe {
+ // Get the pointer representation
+ let ptr: *mut Self =
+ Self::from_byte_slice_unchecked(&bytesvec) as *const Self as *mut Self;
+ assert_eq!(Layout::for_value(&*ptr), Layout::for_value(&**bytesvec));
+ // Transmute the pointer to an owned pointer
+ Box::from_raw(ptr)
+ }
+ }
+}
+
+// Proc macro reexports
+//
+// These exist so that our docs can use intra-doc links.
+// Due to quirks of how rustdoc does documentation on reexports, these must be in this module and not reexported from
+// a submodule
+
+/// Custom derive for [`ULE`].
+///
+/// This can be attached to [`Copy`] structs containing only [`ULE`] types.
+///
+/// Most of the time, it is recommended one use [`#[make_ule]`](crate::make_ule) instead of defining
+/// a custom ULE type.
+#[cfg(feature = "derive")]
+pub use zerovec_derive::ULE;
+
+/// Custom derive for [`VarULE`]
+///
+/// This can be attached to structs containing only [`ULE`] types with one [`VarULE`] type at the end.
+///
+/// Most of the time, it is recommended one use [`#[make_varule]`](crate::make_varule) instead of defining
+/// a custom [`VarULE`] type.
+#[cfg(feature = "derive")]
+pub use zerovec_derive::VarULE;
diff --git a/third_party/rust/zerovec/src/ule/multi.rs b/third_party/rust/zerovec/src/ule/multi.rs
new file mode 100644
index 0000000000..3281b20888
--- /dev/null
+++ b/third_party/rust/zerovec/src/ule/multi.rs
@@ -0,0 +1,154 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use super::*;
+use crate::varzerovec::Index32;
+use crate::VarZeroSlice;
+use core::mem;
+
+/// This type is used by the custom derive to represent multiple [`VarULE`]
+/// fields packed into a single end-of-struct field. It is not recommended
+/// to use this type directly.
+///
+/// Logically, consider it to be `(V1, V2, V3, ..)`
+/// where `V1` etc are potentially different [`VarULE`] types.
+///
+/// Internally, it is represented by a VarZeroSlice.
+#[derive(PartialEq, Eq, Debug)]
+#[repr(transparent)]
+pub struct MultiFieldsULE(VarZeroSlice<[u8], Index32>);
+
+impl MultiFieldsULE {
+ /// Compute the amount of bytes needed to support elements with lengths `lengths`
+ #[inline]
+ pub fn compute_encoded_len_for(lengths: &[usize]) -> usize {
+ #[allow(clippy::expect_used)] // See #1410
+ unsafe {
+ // safe since BlankSliceEncoder is transparent over usize
+ let lengths = &*(lengths as *const [usize] as *const [BlankSliceEncoder]);
+ crate::varzerovec::components::compute_serializable_len::<_, _, Index32>(lengths)
+ .expect("Too many bytes to encode") as usize
+ }
+ }
+
+ /// Construct a partially initialized MultiFieldsULE backed by a mutable byte buffer
+ pub fn new_from_lengths_partially_initialized<'a>(
+ lengths: &[usize],
+ output: &'a mut [u8],
+ ) -> &'a mut Self {
+ unsafe {
+ // safe since BlankSliceEncoder is transparent over usize
+ let lengths = &*(lengths as *const [usize] as *const [BlankSliceEncoder]);
+ crate::varzerovec::components::write_serializable_bytes::<_, _, Index32>(
+ lengths, output,
+ );
+ debug_assert!(
+ <VarZeroSlice<[u8], Index32>>::validate_byte_slice(output).is_ok(),
+ "Encoded slice must be valid VarZeroSlice"
+ );
+ // Safe since write_serializable_bytes produces a valid VarZeroSlice buffer
+ let slice = <VarZeroSlice<[u8], Index32>>::from_byte_slice_unchecked_mut(output);
+ // safe since `Self` is transparent over VarZeroSlice
+ mem::transmute::<&mut VarZeroSlice<_, Index32>, &mut Self>(slice)
+ }
+ }
+
+ /// Given a buffer of size obtained by [`Self::compute_encoded_len_for()`], write element A to index idx
+ ///
+ /// # Safety
+ /// - `idx` must be in range
+ /// - `T` must be the appropriate type expected by the custom derive in this usage of this type
+ #[inline]
+ pub unsafe fn set_field_at<T: VarULE + ?Sized, A: EncodeAsVarULE<T> + ?Sized>(
+ &mut self,
+ idx: usize,
+ value: &A,
+ ) {
+ value.encode_var_ule_write(self.0.get_bytes_at_mut(idx))
+ }
+
+ /// Validate field at `index` to see if it is a valid `T` VarULE type
+ ///
+ /// # Safety
+ ///
+ /// - `index` must be in range
+ #[inline]
+ pub unsafe fn validate_field<T: VarULE + ?Sized>(
+ &self,
+ index: usize,
+ ) -> Result<(), ZeroVecError> {
+ T::validate_byte_slice(self.0.get_unchecked(index))
+ }
+
+ /// Get field at `index` as a value of type T
+ ///
+ /// # Safety
+ ///
+ /// - `index` must be in range
+ /// - Element at `index` must have been created with the VarULE type T
+ #[inline]
+ pub unsafe fn get_field<T: VarULE + ?Sized>(&self, index: usize) -> &T {
+ T::from_byte_slice_unchecked(self.0.get_unchecked(index))
+ }
+
+ /// Construct from a byte slice
+ ///
+ /// # Safety
+ /// - byte slice must be a valid VarZeroSlice<[u8]>
+ #[inline]
+ pub unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self {
+ // &Self is transparent over &VZS<..>
+ mem::transmute(<VarZeroSlice<[u8]>>::from_byte_slice_unchecked(bytes))
+ }
+}
+
+/// This lets us conveniently use the EncodeAsVarULE functionality to create
+/// `VarZeroVec<[u8]>`s that have the right amount of space for elements
+/// without having to duplicate any unsafe code
+#[repr(transparent)]
+struct BlankSliceEncoder(usize);
+
+unsafe impl EncodeAsVarULE<[u8]> for BlankSliceEncoder {
+ fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R {
+ // unnecessary if the other two are implemented
+ unreachable!()
+ }
+
+ #[inline]
+ fn encode_var_ule_len(&self) -> usize {
+ self.0
+ }
+
+ #[inline]
+ fn encode_var_ule_write(&self, _dst: &mut [u8]) {
+ // do nothing
+ }
+}
+
+// Safety (based on the safety checklist on the VarULE trait):
+// 1. MultiFieldsULE does not include any uninitialized or padding bytes (achieved by being transparent over a VarULE type)
+// 2. MultiFieldsULE is aligned to 1 byte (achieved by being transparent over a VarULE type)
+// 3. The impl of `validate_byte_slice()` returns an error if any byte is not valid.
+// 4. The impl of `validate_byte_slice()` returns an error if the slice cannot be used in its entirety
+// 5. The impl of `from_byte_slice_unchecked()` returns a reference to the same data.
+// 6. All other methods are defaulted
+// 7. `MultiFieldsULE` byte equality is semantic equality (achieved by being transparent over a VarULE type)
+unsafe impl VarULE for MultiFieldsULE {
+ /// Note: MultiFieldsULE is usually used in cases where one should be calling .validate_field() directly for
+ /// each field, rather than using the regular VarULE impl.
+ ///
+ /// This impl exists so that EncodeAsVarULE can work.
+ #[inline]
+ fn validate_byte_slice(slice: &[u8]) -> Result<(), ZeroVecError> {
+ <VarZeroSlice<[u8], Index32>>::validate_byte_slice(slice)
+ }
+
+ #[inline]
+ unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self {
+ // &Self is transparent over &VZS<..>
+ mem::transmute(<VarZeroSlice<[u8], Index32>>::from_byte_slice_unchecked(
+ bytes,
+ ))
+ }
+}
diff --git a/third_party/rust/zerovec/src/ule/niche.rs b/third_party/rust/zerovec/src/ule/niche.rs
new file mode 100644
index 0000000000..ae61faca0b
--- /dev/null
+++ b/third_party/rust/zerovec/src/ule/niche.rs
@@ -0,0 +1,180 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use core::{marker::Copy, mem::size_of};
+
+use super::{AsULE, ULE};
+
+/// The [`ULE`] types implementing this trait guarantee that [`NicheBytes::NICHE_BIT_PATTERN`]
+/// can never occur as a valid byte representation of the type.
+///
+/// Guarantees for a valid implementation.
+/// 1. N must be equal to `core::mem::sizeo_of::<Self>()` or else it will
+/// cause panics.
+/// 2. The bit pattern [`NicheBytes::NICHE_BIT_PATTERN`] must not be incorrect as it would lead to
+/// weird behaviour.
+/// 3. The abstractions built on top of this trait must panic on an invalid N.
+/// 4. The abstractions built on this trait that use type punning must ensure that type being
+/// punned is [`ULE`].
+pub trait NicheBytes<const N: usize> {
+ const NICHE_BIT_PATTERN: [u8; N];
+}
+
+/// [`ULE`] type for [`NichedOption<U,N>`] where U implements [`NicheBytes`].
+/// The invalid bit pattern is used as the niche.
+///
+/// This uses 1 byte less than [`crate::ule::OptionULE<U>`] to represent [`NichedOption<U,N>`].
+///
+/// # Example
+///
+/// ```
+/// use core::num::NonZeroI8;
+/// use zerovec::ule::NichedOption;
+/// use zerovec::ZeroVec;
+///
+/// let bytes = &[0x00, 0x01, 0x02, 0x00];
+/// let zv_no: ZeroVec<NichedOption<NonZeroI8, 1>> =
+/// ZeroVec::parse_byte_slice(bytes)
+/// .expect("Unable to parse as NichedOption.");
+///
+/// assert_eq!(zv_no.get(0).map(|e| e.0), Some(None));
+/// assert_eq!(zv_no.get(1).map(|e| e.0), Some(NonZeroI8::new(1)));
+/// assert_eq!(zv_no.get(2).map(|e| e.0), Some(NonZeroI8::new(2)));
+/// assert_eq!(zv_no.get(3).map(|e| e.0), Some(None));
+/// ```
+// Invariants:
+// The union stores [`NicheBytes::NICHE_BIT_PATTERN`] when None.
+// Any other bit pattern is a valid.
+#[repr(C)]
+pub union NichedOptionULE<U: NicheBytes<N> + ULE, const N: usize> {
+ /// Invariant: The value is `niche` only if the bytes equal NICHE_BIT_PATTERN.
+ niche: [u8; N],
+ /// Invariant: The value is `valid` if the `niche` field does not match NICHE_BIT_PATTERN.
+ valid: U,
+}
+
+impl<U: NicheBytes<N> + ULE + core::fmt::Debug, const N: usize> core::fmt::Debug
+ for NichedOptionULE<U, N>
+{
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ self.get().fmt(f)
+ }
+}
+
+impl<U: NicheBytes<N> + ULE, const N: usize> NichedOptionULE<U, N> {
+ /// New `NichedOptionULE<U, N>` from `Option<U>`
+ pub fn new(opt: Option<U>) -> Self {
+ assert!(N == core::mem::size_of::<U>());
+ match opt {
+ Some(u) => Self { valid: u },
+ None => Self {
+ niche: <U as NicheBytes<N>>::NICHE_BIT_PATTERN,
+ },
+ }
+ }
+
+ /// Convert to an `Option<U>`
+ pub fn get(self) -> Option<U> {
+ // Safety: The union stores NICHE_BIT_PATTERN when None otherwise a valid U
+ unsafe {
+ if self.niche == <U as NicheBytes<N>>::NICHE_BIT_PATTERN {
+ None
+ } else {
+ Some(self.valid)
+ }
+ }
+ }
+}
+
+impl<U: NicheBytes<N> + ULE, const N: usize> Copy for NichedOptionULE<U, N> {}
+
+impl<U: NicheBytes<N> + ULE, const N: usize> Clone for NichedOptionULE<U, N> {
+ fn clone(&self) -> Self {
+ *self
+ }
+}
+
+impl<U: NicheBytes<N> + ULE + PartialEq, const N: usize> PartialEq for NichedOptionULE<U, N> {
+ fn eq(&self, other: &Self) -> bool {
+ self.get().eq(&other.get())
+ }
+}
+
+impl<U: NicheBytes<N> + ULE + Eq, const N: usize> Eq for NichedOptionULE<U, N> {}
+
+/// Safety for ULE trait
+/// 1. NichedOptionULE does not have any padding bytes due to `#[repr(C)]` on a struct
+/// containing only ULE fields.
+/// NichedOptionULE either contains NICHE_BIT_PATTERN or valid U byte sequences.
+/// In both cases the data is initialized.
+/// 2. NichedOptionULE is aligned to 1 byte due to `#[repr(packed)]` on a struct containing only
+/// ULE fields.
+/// 3. validate_byte_slice impl returns an error if invalid bytes are encountered.
+/// 4. validate_byte_slice impl returns an error there are extra bytes.
+/// 5. The other ULE methods are left to their default impl.
+/// 6. NichedOptionULE equality is based on ULE equality of the subfield, assuming that NicheBytes
+/// has been implemented correctly (this is a correctness but not a safety guarantee).
+unsafe impl<U: NicheBytes<N> + ULE, const N: usize> ULE for NichedOptionULE<U, N> {
+ fn validate_byte_slice(bytes: &[u8]) -> Result<(), crate::ZeroVecError> {
+ let size = size_of::<Self>();
+ // The implemention is only correct if NICHE_BIT_PATTERN has same number of bytes as the
+ // type.
+ debug_assert!(N == core::mem::size_of::<U>());
+
+ // The bytes should fully transmute to a collection of Self
+ if bytes.len() % size != 0 {
+ return Err(crate::ZeroVecError::length::<Self>(bytes.len()));
+ }
+ bytes.chunks(size).try_for_each(|chunk| {
+ // Associated const cannot be referenced in a pattern
+ // https://doc.rust-lang.org/error-index.html#E0158
+ if chunk == <U as NicheBytes<N>>::NICHE_BIT_PATTERN {
+ Ok(())
+ } else {
+ U::validate_byte_slice(chunk)
+ }
+ })
+ }
+}
+
+/// Optional type which uses [`NichedOptionULE<U,N>`] as ULE type.
+/// The implementors guarantee that `N == core::mem::sizeo_of::<Self>()`
+/// [`repr(transparent)`] guarantees that the layout is same as [`Option<U>`]
+#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
+#[repr(transparent)]
+#[non_exhaustive]
+pub struct NichedOption<U, const N: usize>(pub Option<U>);
+
+impl<U, const N: usize> NichedOption<U, N> {
+ pub const fn new(o: Option<U>) -> Self {
+ Self(o)
+ }
+}
+
+impl<U, const N: usize> Default for NichedOption<U, N> {
+ fn default() -> Self {
+ Self(None)
+ }
+}
+
+impl<U, const N: usize> From<Option<U>> for NichedOption<U, N> {
+ fn from(o: Option<U>) -> Self {
+ Self(o)
+ }
+}
+
+impl<U: AsULE, const N: usize> AsULE for NichedOption<U, N>
+where
+ U::ULE: NicheBytes<N>,
+{
+ type ULE = NichedOptionULE<U::ULE, N>;
+
+ fn to_unaligned(self) -> Self::ULE {
+ NichedOptionULE::new(self.0.map(U::to_unaligned))
+ }
+
+ fn from_unaligned(unaligned: Self::ULE) -> Self {
+ Self(unaligned.get().map(U::from_unaligned))
+ }
+}
diff --git a/third_party/rust/zerovec/src/ule/option.rs b/third_party/rust/zerovec/src/ule/option.rs
new file mode 100644
index 0000000000..9b0dc5b28a
--- /dev/null
+++ b/third_party/rust/zerovec/src/ule/option.rs
@@ -0,0 +1,264 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use super::*;
+use core::cmp::Ordering;
+use core::marker::PhantomData;
+use core::mem::{self, MaybeUninit};
+
+/// This type is the [`ULE`] type for `Option<U>` where `U` is a [`ULE`] type
+///
+/// # Example
+///
+/// ```rust
+/// use zerovec::ZeroVec;
+///
+/// let z = ZeroVec::alloc_from_slice(&[
+/// Some('a'),
+/// Some('รก'),
+/// Some('รธ'),
+/// None,
+/// Some('ล‚'),
+/// ]);
+///
+/// assert_eq!(z.get(2), Some(Some('รธ')));
+/// assert_eq!(z.get(3), Some(None));
+/// ```
+// Invariants:
+// The MaybeUninit is zeroed when None (bool = false),
+// and is valid when Some (bool = true)
+#[repr(packed)]
+pub struct OptionULE<U>(bool, MaybeUninit<U>);
+
+impl<U: Copy> OptionULE<U> {
+ /// Obtain this as an `Option<T>`
+ pub fn get(self) -> Option<U> {
+ if self.0 {
+ unsafe {
+ // safety: self.0 is true so the MaybeUninit is valid
+ Some(self.1.assume_init())
+ }
+ } else {
+ None
+ }
+ }
+
+ /// Construct an `OptionULE<U>` from an equivalent `Option<T>`
+ pub fn new(opt: Option<U>) -> Self {
+ if let Some(inner) = opt {
+ Self(true, MaybeUninit::new(inner))
+ } else {
+ Self(false, MaybeUninit::zeroed())
+ }
+ }
+}
+
+impl<U: Copy + core::fmt::Debug> core::fmt::Debug for OptionULE<U> {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ self.get().fmt(f)
+ }
+}
+
+// Safety (based on the safety checklist on the ULE trait):
+// 1. OptionULE does not include any uninitialized or padding bytes.
+// (achieved by `#[repr(packed)]` on a struct containing only ULE fields,
+// in the context of this impl. The MaybeUninit is valid for all byte sequences, and we only generate
+/// zeroed or valid-T byte sequences to fill it)
+// 2. OptionULE is aligned to 1 byte.
+// (achieved by `#[repr(packed)]` on a struct containing only ULE fields, in the context of this impl)
+// 3. The impl of validate_byte_slice() returns an error if any byte is not valid.
+// 4. The impl of validate_byte_slice() returns an error if there are extra bytes.
+// 5. The other ULE methods use the default impl.
+// 6. OptionULE byte equality is semantic equality by relying on the ULE equality
+// invariant on the subfields
+unsafe impl<U: ULE> ULE for OptionULE<U> {
+ fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> {
+ let size = mem::size_of::<Self>();
+ if bytes.len() % size != 0 {
+ return Err(ZeroVecError::length::<Self>(bytes.len()));
+ }
+ for chunk in bytes.chunks(size) {
+ #[allow(clippy::indexing_slicing)] // `chunk` will have enough bytes to fit Self
+ match chunk[0] {
+ // https://doc.rust-lang.org/reference/types/boolean.html
+ // Rust booleans are always size 1, align 1 values with valid bit patterns 0x0 or 0x1
+ 0 => {
+ if !chunk[1..].iter().all(|x| *x == 0) {
+ return Err(ZeroVecError::parse::<Self>());
+ }
+ }
+ 1 => U::validate_byte_slice(&chunk[1..])?,
+ _ => return Err(ZeroVecError::parse::<Self>()),
+ }
+ }
+ Ok(())
+ }
+}
+
+impl<T: AsULE> AsULE for Option<T> {
+ type ULE = OptionULE<T::ULE>;
+ fn to_unaligned(self) -> OptionULE<T::ULE> {
+ OptionULE::new(self.map(T::to_unaligned))
+ }
+
+ fn from_unaligned(other: OptionULE<T::ULE>) -> Self {
+ other.get().map(T::from_unaligned)
+ }
+}
+
+impl<U: Copy> Copy for OptionULE<U> {}
+
+impl<U: Copy> Clone for OptionULE<U> {
+ fn clone(&self) -> Self {
+ *self
+ }
+}
+
+impl<U: Copy + PartialEq> PartialEq for OptionULE<U> {
+ fn eq(&self, other: &Self) -> bool {
+ self.get().eq(&other.get())
+ }
+}
+
+impl<U: Copy + Eq> Eq for OptionULE<U> {}
+
+/// A type allowing one to represent `Option<U>` for [`VarULE`] `U` types.
+///
+/// ```rust
+/// use zerovec::ule::OptionVarULE;
+/// use zerovec::VarZeroVec;
+///
+/// let mut zv: VarZeroVec<OptionVarULE<str>> = VarZeroVec::new();
+///
+/// zv.make_mut().push(&None::<&str>);
+/// zv.make_mut().push(&Some("hello"));
+/// zv.make_mut().push(&Some("world"));
+/// zv.make_mut().push(&None::<&str>);
+///
+/// assert_eq!(zv.get(0).unwrap().as_ref(), None);
+/// assert_eq!(zv.get(1).unwrap().as_ref(), Some("hello"));
+/// ```
+// The slice field is empty when None (bool = false),
+// and is a valid T when Some (bool = true)
+#[repr(packed)]
+pub struct OptionVarULE<U: VarULE + ?Sized>(PhantomData<U>, bool, [u8]);
+
+impl<U: VarULE + ?Sized> OptionVarULE<U> {
+ /// Obtain this as an `Option<&U>`
+ pub fn as_ref(&self) -> Option<&U> {
+ if self.1 {
+ unsafe {
+ // Safety: byte field is a valid T if boolean field is true
+ Some(U::from_byte_slice_unchecked(&self.2))
+ }
+ } else {
+ None
+ }
+ }
+}
+
+impl<U: VarULE + ?Sized + core::fmt::Debug> core::fmt::Debug for OptionVarULE<U> {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ self.as_ref().fmt(f)
+ }
+}
+
+// Safety (based on the safety checklist on the VarULE trait):
+// 1. OptionVarULE<T> does not include any uninitialized or padding bytes
+// (achieved by being repr(packed) on ULE types)
+// 2. OptionVarULE<T> is aligned to 1 byte (achieved by being repr(packed) on ULE types)
+// 3. The impl of `validate_byte_slice()` returns an error if any byte is not valid.
+// 4. The impl of `validate_byte_slice()` returns an error if the slice cannot be used in its entirety
+// 5. The impl of `from_byte_slice_unchecked()` returns a reference to the same data.
+// 6. All other methods are defaulted
+// 7. OptionVarULE<T> byte equality is semantic equality (achieved by being an aggregate)
+unsafe impl<U: VarULE + ?Sized> VarULE for OptionVarULE<U> {
+ #[inline]
+ fn validate_byte_slice(slice: &[u8]) -> Result<(), ZeroVecError> {
+ if slice.is_empty() {
+ return Err(ZeroVecError::length::<Self>(slice.len()));
+ }
+ #[allow(clippy::indexing_slicing)] // slice already verified to be nonempty
+ match slice[0] {
+ // https://doc.rust-lang.org/reference/types/boolean.html
+ // Rust booleans are always size 1, align 1 values with valid bit patterns 0x0 or 0x1
+ 0 => {
+ if slice.len() != 1 {
+ Err(ZeroVecError::length::<Self>(slice.len()))
+ } else {
+ Ok(())
+ }
+ }
+ 1 => U::validate_byte_slice(&slice[1..]),
+ _ => Err(ZeroVecError::parse::<Self>()),
+ }
+ }
+
+ #[inline]
+ unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self {
+ let entire_struct_as_slice: *const [u8] =
+ ::core::ptr::slice_from_raw_parts(bytes.as_ptr(), bytes.len() - 1);
+ &*(entire_struct_as_slice as *const Self)
+ }
+}
+
+unsafe impl<T, U> EncodeAsVarULE<OptionVarULE<U>> for Option<T>
+where
+ T: EncodeAsVarULE<U>,
+ U: VarULE + ?Sized,
+{
+ fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R {
+ // unnecessary if the other two are implemented
+ unreachable!()
+ }
+
+ #[inline]
+ fn encode_var_ule_len(&self) -> usize {
+ if let Some(ref inner) = *self {
+ // slice + boolean
+ 1 + inner.encode_var_ule_len()
+ } else {
+ // boolean + empty slice
+ 1
+ }
+ }
+
+ #[allow(clippy::indexing_slicing)] // This method is allowed to panic when lengths are invalid
+ fn encode_var_ule_write(&self, dst: &mut [u8]) {
+ if let Some(ref inner) = *self {
+ debug_assert!(
+ !dst.is_empty(),
+ "OptionVarULE must have at least one byte when Some"
+ );
+ dst[0] = 1;
+ inner.encode_var_ule_write(&mut dst[1..]);
+ } else {
+ debug_assert!(
+ dst.len() == 1,
+ "OptionVarULE must have exactly one byte when None"
+ );
+ dst[0] = 0;
+ }
+ }
+}
+
+impl<U: VarULE + ?Sized + PartialEq> PartialEq for OptionVarULE<U> {
+ fn eq(&self, other: &Self) -> bool {
+ self.as_ref().eq(&other.as_ref())
+ }
+}
+
+impl<U: VarULE + ?Sized + Eq> Eq for OptionVarULE<U> {}
+
+impl<U: VarULE + ?Sized + PartialOrd> PartialOrd for OptionVarULE<U> {
+ fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+ self.as_ref().partial_cmp(&other.as_ref())
+ }
+}
+
+impl<U: VarULE + ?Sized + Ord> Ord for OptionVarULE<U> {
+ fn cmp(&self, other: &Self) -> Ordering {
+ self.as_ref().cmp(&other.as_ref())
+ }
+}
diff --git a/third_party/rust/zerovec/src/ule/plain.rs b/third_party/rust/zerovec/src/ule/plain.rs
new file mode 100644
index 0000000000..f244f6b682
--- /dev/null
+++ b/third_party/rust/zerovec/src/ule/plain.rs
@@ -0,0 +1,366 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+#![allow(clippy::upper_case_acronyms)]
+//! ULE implementation for Plain Old Data types, including all sized integers.
+
+use super::*;
+use crate::impl_ule_from_array;
+use crate::ZeroSlice;
+use core::num::{NonZeroI8, NonZeroU8};
+
+/// A u8 array of little-endian data with infallible conversions to and from &[u8].
+#[repr(transparent)]
+#[derive(Debug, PartialEq, Eq, Clone, Copy, PartialOrd, Ord, Hash)]
+#[allow(clippy::exhaustive_structs)] // newtype
+pub struct RawBytesULE<const N: usize>(pub [u8; N]);
+
+impl<const N: usize> RawBytesULE<N> {
+ #[inline]
+ pub fn as_bytes(&self) -> &[u8] {
+ &self.0
+ }
+
+ #[inline]
+ pub fn from_byte_slice_unchecked_mut(bytes: &mut [u8]) -> &mut [Self] {
+ let data = bytes.as_mut_ptr();
+ let len = bytes.len() / N;
+ // Safe because Self is transparent over [u8; N]
+ unsafe { core::slice::from_raw_parts_mut(data as *mut Self, len) }
+ }
+}
+
+// Safety (based on the safety checklist on the ULE trait):
+// 1. RawBytesULE does not include any uninitialized or padding bytes.
+// (achieved by `#[repr(transparent)]` on a type that satisfies this invariant)
+// 2. RawBytesULE is aligned to 1 byte.
+// (achieved by `#[repr(transparent)]` on a type that satisfies this invariant)
+// 3. The impl of validate_byte_slice() returns an error if any byte is not valid (never).
+// 4. The impl of validate_byte_slice() returns an error if there are leftover bytes.
+// 5. The other ULE methods use the default impl.
+// 6. RawBytesULE byte equality is semantic equality
+unsafe impl<const N: usize> ULE for RawBytesULE<N> {
+ #[inline]
+ fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> {
+ if bytes.len() % N == 0 {
+ // Safe because Self is transparent over [u8; N]
+ Ok(())
+ } else {
+ Err(ZeroVecError::length::<Self>(bytes.len()))
+ }
+ }
+}
+
+impl<const N: usize> From<[u8; N]> for RawBytesULE<N> {
+ #[inline]
+ fn from(le_bytes: [u8; N]) -> Self {
+ Self(le_bytes)
+ }
+}
+
+macro_rules! impl_byte_slice_size {
+ ($unsigned:ty, $size:literal) => {
+ impl RawBytesULE<$size> {
+ #[doc = concat!("Gets this `RawBytesULE` as a `", stringify!($unsigned), "`. This is equivalent to calling [`AsULE::from_unaligned()`] on the appropriately sized type.")]
+ #[inline]
+ pub fn as_unsigned_int(&self) -> $unsigned {
+ <$unsigned as $crate::ule::AsULE>::from_unaligned(*self)
+ }
+
+ #[doc = concat!("Converts a `", stringify!($unsigned), "` to a `RawBytesULE`. This is equivalent to calling [`AsULE::to_unaligned()`] on the appropriately sized type.")]
+ #[inline]
+ pub const fn from_aligned(value: $unsigned) -> Self {
+ Self(value.to_le_bytes())
+ }
+
+ impl_ule_from_array!(
+ $unsigned,
+ RawBytesULE<$size>,
+ RawBytesULE([0; $size])
+ );
+ }
+ };
+}
+
+macro_rules! impl_const_constructors {
+ ($base:ty, $size:literal) => {
+ impl ZeroSlice<$base> {
+ /// This function can be used for constructing ZeroVecs in a const context, avoiding
+ /// parsing checks.
+ ///
+ /// This cannot be generic over T because of current limitations in `const`, but if
+ /// this method is needed in a non-const context, check out [`ZeroSlice::parse_byte_slice()`]
+ /// instead.
+ ///
+ /// See [`ZeroSlice::cast()`] for an example.
+ pub const fn try_from_bytes(bytes: &[u8]) -> Result<&Self, ZeroVecError> {
+ let len = bytes.len();
+ #[allow(clippy::modulo_one)]
+ if len % $size == 0 {
+ Ok(unsafe { Self::from_bytes_unchecked(bytes) })
+ } else {
+ Err(ZeroVecError::InvalidLength {
+ ty: concat!("<const construct: ", $size, ">"),
+ len,
+ })
+ }
+ }
+ }
+ };
+}
+
+macro_rules! impl_byte_slice_type {
+ ($single_fn:ident, $type:ty, $size:literal) => {
+ impl From<$type> for RawBytesULE<$size> {
+ #[inline]
+ fn from(value: $type) -> Self {
+ Self(value.to_le_bytes())
+ }
+ }
+ impl AsULE for $type {
+ type ULE = RawBytesULE<$size>;
+ #[inline]
+ fn to_unaligned(self) -> Self::ULE {
+ RawBytesULE(self.to_le_bytes())
+ }
+ #[inline]
+ fn from_unaligned(unaligned: Self::ULE) -> Self {
+ <$type>::from_le_bytes(unaligned.0)
+ }
+ }
+ // EqULE is true because $type and RawBytesULE<$size>
+ // have the same byte sequence on little-endian
+ unsafe impl EqULE for $type {}
+
+ impl RawBytesULE<$size> {
+ pub const fn $single_fn(v: $type) -> Self {
+ RawBytesULE(v.to_le_bytes())
+ }
+ }
+ };
+}
+
+macro_rules! impl_byte_slice_unsigned_type {
+ ($type:ty, $size:literal) => {
+ impl_byte_slice_type!(from_unsigned, $type, $size);
+ };
+}
+
+macro_rules! impl_byte_slice_signed_type {
+ ($type:ty, $size:literal) => {
+ impl_byte_slice_type!(from_signed, $type, $size);
+ };
+}
+
+impl_byte_slice_size!(u16, 2);
+impl_byte_slice_size!(u32, 4);
+impl_byte_slice_size!(u64, 8);
+impl_byte_slice_size!(u128, 16);
+
+impl_byte_slice_unsigned_type!(u16, 2);
+impl_byte_slice_unsigned_type!(u32, 4);
+impl_byte_slice_unsigned_type!(u64, 8);
+impl_byte_slice_unsigned_type!(u128, 16);
+
+impl_byte_slice_signed_type!(i16, 2);
+impl_byte_slice_signed_type!(i32, 4);
+impl_byte_slice_signed_type!(i64, 8);
+impl_byte_slice_signed_type!(i128, 16);
+
+impl_const_constructors!(u8, 1);
+impl_const_constructors!(u16, 2);
+impl_const_constructors!(u32, 4);
+impl_const_constructors!(u64, 8);
+impl_const_constructors!(u128, 16);
+
+// Note: The f32 and f64 const constructors currently have limited use because
+// `f32::to_le_bytes` is not yet const.
+
+impl_const_constructors!(bool, 1);
+
+// Safety (based on the safety checklist on the ULE trait):
+// 1. u8 does not include any uninitialized or padding bytes.
+// 2. u8 is aligned to 1 byte.
+// 3. The impl of validate_byte_slice() returns an error if any byte is not valid (never).
+// 4. The impl of validate_byte_slice() returns an error if there are leftover bytes (never).
+// 5. The other ULE methods use the default impl.
+// 6. u8 byte equality is semantic equality
+unsafe impl ULE for u8 {
+ #[inline]
+ fn validate_byte_slice(_bytes: &[u8]) -> Result<(), ZeroVecError> {
+ Ok(())
+ }
+}
+
+impl AsULE for u8 {
+ type ULE = Self;
+ #[inline]
+ fn to_unaligned(self) -> Self::ULE {
+ self
+ }
+ #[inline]
+ fn from_unaligned(unaligned: Self::ULE) -> Self {
+ unaligned
+ }
+}
+
+// EqULE is true because u8 is its own ULE.
+unsafe impl EqULE for u8 {}
+
+// Safety (based on the safety checklist on the ULE trait):
+// 1. NonZeroU8 does not include any uninitialized or padding bytes.
+// 2. NonZeroU8 is aligned to 1 byte.
+// 3. The impl of validate_byte_slice() returns an error if any byte is not valid (0x00).
+// 4. The impl of validate_byte_slice() returns an error if there are leftover bytes (never).
+// 5. The other ULE methods use the default impl.
+// 6. NonZeroU8 byte equality is semantic equality
+unsafe impl ULE for NonZeroU8 {
+ #[inline]
+ fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> {
+ bytes.iter().try_for_each(|b| {
+ if *b == 0x00 {
+ Err(ZeroVecError::parse::<Self>())
+ } else {
+ Ok(())
+ }
+ })
+ }
+}
+
+impl AsULE for NonZeroU8 {
+ type ULE = Self;
+ #[inline]
+ fn to_unaligned(self) -> Self::ULE {
+ self
+ }
+ #[inline]
+ fn from_unaligned(unaligned: Self::ULE) -> Self {
+ unaligned
+ }
+}
+
+unsafe impl EqULE for NonZeroU8 {}
+
+impl NicheBytes<1> for NonZeroU8 {
+ const NICHE_BIT_PATTERN: [u8; 1] = [0x00];
+}
+
+// Safety (based on the safety checklist on the ULE trait):
+// 1. i8 does not include any uninitialized or padding bytes.
+// 2. i8 is aligned to 1 byte.
+// 3. The impl of validate_byte_slice() returns an error if any byte is not valid (never).
+// 4. The impl of validate_byte_slice() returns an error if there are leftover bytes (never).
+// 5. The other ULE methods use the default impl.
+// 6. i8 byte equality is semantic equality
+unsafe impl ULE for i8 {
+ #[inline]
+ fn validate_byte_slice(_bytes: &[u8]) -> Result<(), ZeroVecError> {
+ Ok(())
+ }
+}
+
+impl AsULE for i8 {
+ type ULE = Self;
+ #[inline]
+ fn to_unaligned(self) -> Self::ULE {
+ self
+ }
+ #[inline]
+ fn from_unaligned(unaligned: Self::ULE) -> Self {
+ unaligned
+ }
+}
+
+// EqULE is true because i8 is its own ULE.
+unsafe impl EqULE for i8 {}
+
+impl AsULE for NonZeroI8 {
+ type ULE = NonZeroU8;
+ #[inline]
+ fn to_unaligned(self) -> Self::ULE {
+ // Safety: NonZeroU8 and NonZeroI8 have same size
+ unsafe { core::mem::transmute(self) }
+ }
+
+ #[inline]
+ fn from_unaligned(unaligned: Self::ULE) -> Self {
+ // Safety: NonZeroU8 and NonZeroI8 have same size
+ unsafe { core::mem::transmute(unaligned) }
+ }
+}
+
+// These impls are actually safe and portable due to Rust always using IEEE 754, see the documentation
+// on f32::from_bits: https://doc.rust-lang.org/stable/std/primitive.f32.html#method.from_bits
+//
+// The only potential problem is that some older platforms treat signaling NaNs differently. This is
+// still quite portable, signalingness is not typically super important.
+
+impl AsULE for f32 {
+ type ULE = RawBytesULE<4>;
+ #[inline]
+ fn to_unaligned(self) -> Self::ULE {
+ self.to_bits().to_unaligned()
+ }
+ #[inline]
+ fn from_unaligned(unaligned: Self::ULE) -> Self {
+ Self::from_bits(u32::from_unaligned(unaligned))
+ }
+}
+
+impl AsULE for f64 {
+ type ULE = RawBytesULE<8>;
+ #[inline]
+ fn to_unaligned(self) -> Self::ULE {
+ self.to_bits().to_unaligned()
+ }
+ #[inline]
+ fn from_unaligned(unaligned: Self::ULE) -> Self {
+ Self::from_bits(u64::from_unaligned(unaligned))
+ }
+}
+
+// The from_bits documentation mentions that they have identical byte representations to integers
+// and EqULE only cares about LE systems
+unsafe impl EqULE for f32 {}
+unsafe impl EqULE for f64 {}
+
+// The bool impl is not as efficient as it could be
+// We can, in the future, have https://github.com/unicode-org/icu4x/blob/main/utils/zerovec/design_doc.md#bitpacking
+// for better bitpacking
+
+// Safety (based on the safety checklist on the ULE trait):
+// 1. bool does not include any uninitialized or padding bytes (the remaining 7 bytes in bool are by definition zero)
+// 2. bool is aligned to 1 byte.
+// 3. The impl of validate_byte_slice() returns an error if any byte is not valid (bytes that are not 0 or 1).
+// 4. The impl of validate_byte_slice() returns an error if there are leftover bytes (never).
+// 5. The other ULE methods use the default impl.
+// 6. bool byte equality is semantic equality
+unsafe impl ULE for bool {
+ #[inline]
+ fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> {
+ for byte in bytes {
+ // https://doc.rust-lang.org/reference/types/boolean.html
+ // Rust booleans are always size 1, align 1 values with valid bit patterns 0x0 or 0x1
+ if *byte > 1 {
+ return Err(ZeroVecError::parse::<Self>());
+ }
+ }
+ Ok(())
+ }
+}
+
+impl AsULE for bool {
+ type ULE = Self;
+ #[inline]
+ fn to_unaligned(self) -> Self::ULE {
+ self
+ }
+ #[inline]
+ fn from_unaligned(unaligned: Self::ULE) -> Self {
+ unaligned
+ }
+}
+
+// EqULE is true because bool is its own ULE.
+unsafe impl EqULE for bool {}
diff --git a/third_party/rust/zerovec/src/ule/slices.rs b/third_party/rust/zerovec/src/ule/slices.rs
new file mode 100644
index 0000000000..75ea57e02e
--- /dev/null
+++ b/third_party/rust/zerovec/src/ule/slices.rs
@@ -0,0 +1,103 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use crate::ule::*;
+use core::str;
+
+// Safety (based on the safety checklist on the ULE trait):
+// 1. [T; N] does not include any uninitialized or padding bytes since T is ULE
+// 2. [T; N] is aligned to 1 byte since T is ULE
+// 3. The impl of validate_byte_slice() returns an error if any byte is not valid.
+// 4. The impl of validate_byte_slice() returns an error if there are leftover bytes.
+// 5. The other ULE methods use the default impl.
+// 6. [T; N] byte equality is semantic equality since T is ULE
+unsafe impl<T: ULE, const N: usize> ULE for [T; N] {
+ #[inline]
+ fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> {
+ // a slice of multiple Selfs is equivalent to just a larger slice of Ts
+ T::validate_byte_slice(bytes)
+ }
+}
+
+impl<T: AsULE, const N: usize> AsULE for [T; N] {
+ type ULE = [T::ULE; N];
+ #[inline]
+ fn to_unaligned(self) -> Self::ULE {
+ self.map(T::to_unaligned)
+ }
+ #[inline]
+ fn from_unaligned(unaligned: Self::ULE) -> Self {
+ unaligned.map(T::from_unaligned)
+ }
+}
+
+unsafe impl<T: EqULE, const N: usize> EqULE for [T; N] {}
+
+// Safety (based on the safety checklist on the VarULE trait):
+// 1. str does not include any uninitialized or padding bytes.
+// 2. str is aligned to 1 byte.
+// 3. The impl of `validate_byte_slice()` returns an error if any byte is not valid.
+// 4. The impl of `validate_byte_slice()` returns an error if the slice cannot be used in its entirety
+// 5. The impl of `from_byte_slice_unchecked()` returns a reference to the same data.
+// 6. `parse_byte_slice()` is equivalent to `validate_byte_slice()` followed by `from_byte_slice_unchecked()`
+// 7. str byte equality is semantic equality
+unsafe impl VarULE for str {
+ #[inline]
+ fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> {
+ str::from_utf8(bytes).map_err(|_| ZeroVecError::parse::<Self>())?;
+ Ok(())
+ }
+
+ #[inline]
+ fn parse_byte_slice(bytes: &[u8]) -> Result<&Self, ZeroVecError> {
+ str::from_utf8(bytes).map_err(|_| ZeroVecError::parse::<Self>())
+ }
+ /// Invariant: must be safe to call when called on a slice that previously
+ /// succeeded with `parse_byte_slice`
+ #[inline]
+ unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self {
+ str::from_utf8_unchecked(bytes)
+ }
+}
+
+/// Note: VarULE is well-defined for all `[T]` where `T: ULE`, but [`ZeroSlice`] is more ergonomic
+/// when `T` is a low-level ULE type. For example:
+///
+/// ```no_run
+/// # use zerovec::ZeroSlice;
+/// # use zerovec::VarZeroVec;
+/// # use zerovec::ule::AsULE;
+/// // OK: [u8] is a useful type
+/// let _: VarZeroVec<[u8]> = unimplemented!();
+///
+/// // Technically works, but [u32::ULE] is not very useful
+/// let _: VarZeroVec<[<u32 as AsULE>::ULE]> = unimplemented!();
+///
+/// // Better: ZeroSlice<u32>
+/// let _: VarZeroVec<ZeroSlice<u32>> = unimplemented!();
+/// ```
+///
+/// [`ZeroSlice`]: crate::ZeroSlice
+// Safety (based on the safety checklist on the VarULE trait):
+// 1. [T] does not include any uninitialized or padding bytes (achieved by being a slice of a ULE type)
+// 2. [T] is aligned to 1 byte (achieved by being a slice of a ULE type)
+// 3. The impl of `validate_byte_slice()` returns an error if any byte is not valid.
+// 4. The impl of `validate_byte_slice()` returns an error if the slice cannot be used in its entirety
+// 5. The impl of `from_byte_slice_unchecked()` returns a reference to the same data.
+// 6. All other methods are defaulted
+// 7. `[T]` byte equality is semantic equality (achieved by being a slice of a ULE type)
+unsafe impl<T> VarULE for [T]
+where
+ T: ULE,
+{
+ #[inline]
+ fn validate_byte_slice(slice: &[u8]) -> Result<(), ZeroVecError> {
+ T::validate_byte_slice(slice)
+ }
+
+ #[inline]
+ unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self {
+ T::from_byte_slice_unchecked(bytes)
+ }
+}
diff --git a/third_party/rust/zerovec/src/ule/tuple.rs b/third_party/rust/zerovec/src/ule/tuple.rs
new file mode 100644
index 0000000000..3e0f291b3f
--- /dev/null
+++ b/third_party/rust/zerovec/src/ule/tuple.rs
@@ -0,0 +1,179 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! ULE impls for tuples.
+//!
+//! Rust does not guarantee the layout of tuples, so ZeroVec defines its own tuple ULE types.
+//!
+//! Impls are defined for tuples of up to 6 elements. For longer tuples, use a custom struct
+//! with [`#[make_ule]`](crate::make_ule).
+//!
+//! # Examples
+//!
+//! ```
+//! use zerovec::ZeroVec;
+//!
+//! // ZeroVec of tuples!
+//! let zerovec: ZeroVec<(u32, char)> = [(1, 'a'), (1234901, 'ๅ•Š'), (100, 'เค…')]
+//! .iter()
+//! .copied()
+//! .collect();
+//!
+//! assert_eq!(zerovec.get(1), Some((1234901, 'ๅ•Š')));
+//! ```
+
+use super::*;
+use core::fmt;
+use core::mem;
+
+macro_rules! tuple_ule {
+ ($name:ident, $len:literal, [ $($t:ident $i:tt),+ ]) => {
+ #[doc = concat!("ULE type for tuples with ", $len, " elements.")]
+ #[repr(packed)]
+ #[allow(clippy::exhaustive_structs)] // stable
+ pub struct $name<$($t),+>($(pub $t),+);
+
+ // Safety (based on the safety checklist on the ULE trait):
+ // 1. TupleULE does not include any uninitialized or padding bytes.
+ // (achieved by `#[repr(packed)]` on a struct containing only ULE fields)
+ // 2. TupleULE is aligned to 1 byte.
+ // (achieved by `#[repr(packed)]` on a struct containing only ULE fields)
+ // 3. The impl of validate_byte_slice() returns an error if any byte is not valid.
+ // 4. The impl of validate_byte_slice() returns an error if there are extra bytes.
+ // 5. The other ULE methods use the default impl.
+ // 6. TupleULE byte equality is semantic equality by relying on the ULE equality
+ // invariant on the subfields
+ unsafe impl<$($t: ULE),+> ULE for $name<$($t),+> {
+ fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> {
+ // expands to: 0size + mem::size_of::<A>() + mem::size_of::<B>();
+ let ule_bytes = 0usize $(+ mem::size_of::<$t>())+;
+ if bytes.len() % ule_bytes != 0 {
+ return Err(ZeroVecError::length::<Self>(bytes.len()));
+ }
+ for chunk in bytes.chunks(ule_bytes) {
+ let mut i = 0;
+ $(
+ let j = i;
+ i += mem::size_of::<$t>();
+ #[allow(clippy::indexing_slicing)] // length checked
+ <$t>::validate_byte_slice(&chunk[j..i])?;
+ )+
+ }
+ Ok(())
+ }
+ }
+
+ impl<$($t: AsULE),+> AsULE for ($($t),+) {
+ type ULE = $name<$(<$t>::ULE),+>;
+
+ #[inline]
+ fn to_unaligned(self) -> Self::ULE {
+ $name($(
+ self.$i.to_unaligned()
+ ),+)
+ }
+
+ #[inline]
+ fn from_unaligned(unaligned: Self::ULE) -> Self {
+ ($(
+ <$t>::from_unaligned(unaligned.$i)
+ ),+)
+ }
+ }
+
+ impl<$($t: fmt::Debug + ULE),+> fmt::Debug for $name<$($t),+> {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
+ ($(self.$i),+).fmt(f)
+ }
+ }
+
+ // We need manual impls since `#[derive()]` is disallowed on packed types
+ impl<$($t: PartialEq + ULE),+> PartialEq for $name<$($t),+> {
+ fn eq(&self, other: &Self) -> bool {
+ ($(self.$i),+).eq(&($(other.$i),+))
+ }
+ }
+
+ impl<$($t: Eq + ULE),+> Eq for $name<$($t),+> {}
+
+ impl<$($t: PartialOrd + ULE),+> PartialOrd for $name<$($t),+> {
+ fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
+ ($(self.$i),+).partial_cmp(&($(other.$i),+))
+ }
+ }
+
+ impl<$($t: Ord + ULE),+> Ord for $name<$($t),+> {
+ fn cmp(&self, other: &Self) -> core::cmp::Ordering {
+ ($(self.$i),+).cmp(&($(other.$i),+))
+ }
+ }
+
+ impl<$($t: ULE),+> Clone for $name<$($t),+> {
+ fn clone(&self) -> Self {
+ *self
+ }
+ }
+
+ impl<$($t: ULE),+> Copy for $name<$($t),+> {}
+
+ impl<'a, $($t: Ord + AsULE + 'static),+> crate::map::ZeroMapKV<'a> for ($($t),+) {
+ type Container = crate::ZeroVec<'a, ($($t),+)>;
+ type Slice = crate::ZeroSlice<($($t),+)>;
+ type GetType = $name<$(<$t>::ULE),+>;
+ type OwnedType = ($($t),+);
+ }
+ };
+}
+
+tuple_ule!(Tuple2ULE, "2", [ A 0, B 1 ]);
+tuple_ule!(Tuple3ULE, "3", [ A 0, B 1, C 2 ]);
+tuple_ule!(Tuple4ULE, "4", [ A 0, B 1, C 2, D 3 ]);
+tuple_ule!(Tuple5ULE, "5", [ A 0, B 1, C 2, D 3, E 4 ]);
+tuple_ule!(Tuple6ULE, "6", [ A 0, B 1, C 2, D 3, E 4, F 5 ]);
+
+#[test]
+fn test_pairule_validate() {
+ use crate::ZeroVec;
+ let vec: Vec<(u32, char)> = vec![(1, 'a'), (1234901, 'ๅ•Š'), (100, 'เค…')];
+ let zerovec: ZeroVec<(u32, char)> = vec.iter().copied().collect();
+ let bytes = zerovec.as_bytes();
+ let zerovec2 = ZeroVec::parse_byte_slice(bytes).unwrap();
+ assert_eq!(zerovec, zerovec2);
+
+ // Test failed validation with a correctly sized but differently constrained tuple
+ // Note: 1234901 is not a valid char
+ let zerovec3 = ZeroVec::<(char, u32)>::parse_byte_slice(bytes);
+ assert!(zerovec3.is_err());
+}
+
+#[test]
+fn test_tripleule_validate() {
+ use crate::ZeroVec;
+ let vec: Vec<(u32, char, i8)> = vec![(1, 'a', -5), (1234901, 'ๅ•Š', 3), (100, 'เค…', -127)];
+ let zerovec: ZeroVec<(u32, char, i8)> = vec.iter().copied().collect();
+ let bytes = zerovec.as_bytes();
+ let zerovec2 = ZeroVec::parse_byte_slice(bytes).unwrap();
+ assert_eq!(zerovec, zerovec2);
+
+ // Test failed validation with a correctly sized but differently constrained tuple
+ // Note: 1234901 is not a valid char
+ let zerovec3 = ZeroVec::<(char, i8, u32)>::parse_byte_slice(bytes);
+ assert!(zerovec3.is_err());
+}
+
+#[test]
+fn test_quadule_validate() {
+ use crate::ZeroVec;
+ let vec: Vec<(u32, char, i8, u16)> =
+ vec![(1, 'a', -5, 3), (1234901, 'ๅ•Š', 3, 11), (100, 'เค…', -127, 0)];
+ let zerovec: ZeroVec<(u32, char, i8, u16)> = vec.iter().copied().collect();
+ let bytes = zerovec.as_bytes();
+ let zerovec2 = ZeroVec::parse_byte_slice(bytes).unwrap();
+ assert_eq!(zerovec, zerovec2);
+
+ // Test failed validation with a correctly sized but differently constrained tuple
+ // Note: 1234901 is not a valid char
+ let zerovec3 = ZeroVec::<(char, i8, u16, u32)>::parse_byte_slice(bytes);
+ assert!(zerovec3.is_err());
+}
diff --git a/third_party/rust/zerovec/src/ule/unvalidated.rs b/third_party/rust/zerovec/src/ule/unvalidated.rs
new file mode 100644
index 0000000000..21cfb0c0d5
--- /dev/null
+++ b/third_party/rust/zerovec/src/ule/unvalidated.rs
@@ -0,0 +1,527 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use super::{AsULE, RawBytesULE, VarULE};
+use crate::ule::EqULE;
+use crate::{map::ZeroMapKV, VarZeroSlice, VarZeroVec, ZeroVecError};
+use alloc::boxed::Box;
+use core::cmp::Ordering;
+use core::fmt;
+use core::ops::Deref;
+
+/// A byte slice that is expected to be a UTF-8 string but does not enforce that invariant.
+///
+/// Use this type instead of `str` if you don't need to enforce UTF-8 during deserialization. For
+/// example, strings that are keys of a map don't need to ever be reified as `str`s.
+///
+/// [`UnvalidatedStr`] derefs to `[u8]`. To obtain a `str`, use [`Self::try_as_str()`].
+///
+/// The main advantage of this type over `[u8]` is that it serializes as a string in
+/// human-readable formats like JSON.
+///
+/// # Examples
+///
+/// Using an [`UnvalidatedStr`] as the key of a [`ZeroMap`]:
+///
+/// ```
+/// use zerovec::ule::UnvalidatedStr;
+/// use zerovec::ZeroMap;
+///
+/// let map: ZeroMap<UnvalidatedStr, usize> = [
+/// (UnvalidatedStr::from_str("abc"), 11),
+/// (UnvalidatedStr::from_str("def"), 22),
+/// (UnvalidatedStr::from_str("ghi"), 33),
+/// ]
+/// .into_iter()
+/// .collect();
+///
+/// let key = "abc";
+/// let value = map.get_copied_by(|uvstr| uvstr.as_bytes().cmp(key.as_bytes()));
+/// assert_eq!(Some(11), value);
+/// ```
+///
+/// [`ZeroMap`]: crate::ZeroMap
+#[repr(transparent)]
+#[derive(PartialEq, Eq, PartialOrd, Ord)]
+#[allow(clippy::exhaustive_structs)] // transparent newtype
+pub struct UnvalidatedStr([u8]);
+
+impl fmt::Debug for UnvalidatedStr {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ // Debug as a string if possible
+ match self.try_as_str() {
+ Ok(s) => fmt::Debug::fmt(s, f),
+ Err(_) => fmt::Debug::fmt(&self.0, f),
+ }
+ }
+}
+
+impl UnvalidatedStr {
+ /// Create a [`UnvalidatedStr`] from a byte slice.
+ #[inline]
+ pub const fn from_bytes(other: &[u8]) -> &Self {
+ // Safety: UnvalidatedStr is transparent over [u8]
+ unsafe { core::mem::transmute(other) }
+ }
+
+ /// Create a [`UnvalidatedStr`] from a string slice.
+ #[inline]
+ pub const fn from_str(s: &str) -> &Self {
+ Self::from_bytes(s.as_bytes())
+ }
+
+ /// Create a [`UnvalidatedStr`] from boxed bytes.
+ #[inline]
+ pub fn from_boxed_bytes(other: Box<[u8]>) -> Box<Self> {
+ // Safety: UnvalidatedStr is transparent over [u8]
+ unsafe { core::mem::transmute(other) }
+ }
+
+ /// Create a [`UnvalidatedStr`] from a boxed `str`.
+ #[inline]
+ pub fn from_boxed_str(other: Box<str>) -> Box<Self> {
+ Self::from_boxed_bytes(other.into_boxed_bytes())
+ }
+
+ /// Get the bytes from a [`UnvalidatedStr].
+ #[inline]
+ pub const fn as_bytes(&self) -> &[u8] {
+ &self.0
+ }
+
+ /// Attempt to convert a [`UnvalidatedStr`] to a `str`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use zerovec::ule::UnvalidatedStr;
+ ///
+ /// static A: &UnvalidatedStr = UnvalidatedStr::from_bytes(b"abc");
+ ///
+ /// let b = A.try_as_str().unwrap();
+ /// assert_eq!(b, "abc");
+ /// ```
+ // Note: this is const starting in 1.63
+ #[inline]
+ pub fn try_as_str(&self) -> Result<&str, core::str::Utf8Error> {
+ core::str::from_utf8(&self.0)
+ }
+}
+
+impl<'a> From<&'a str> for &'a UnvalidatedStr {
+ #[inline]
+ fn from(other: &'a str) -> Self {
+ UnvalidatedStr::from_str(other)
+ }
+}
+
+impl From<Box<str>> for Box<UnvalidatedStr> {
+ #[inline]
+ fn from(other: Box<str>) -> Self {
+ UnvalidatedStr::from_boxed_str(other)
+ }
+}
+
+impl Deref for UnvalidatedStr {
+ type Target = [u8];
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
+impl<'a> ZeroMapKV<'a> for UnvalidatedStr {
+ type Container = VarZeroVec<'a, UnvalidatedStr>;
+ type Slice = VarZeroSlice<UnvalidatedStr>;
+ type GetType = UnvalidatedStr;
+ type OwnedType = Box<UnvalidatedStr>;
+}
+
+// Safety (based on the safety checklist on the VarULE trait):
+// 1. UnvalidatedStr does not include any uninitialized or padding bytes (transparent over a ULE)
+// 2. UnvalidatedStr is aligned to 1 byte (transparent over a ULE)
+// 3. The impl of `validate_byte_slice()` returns an error if any byte is not valid (impossible)
+// 4. The impl of `validate_byte_slice()` returns an error if the slice cannot be used in its entirety (impossible)
+// 5. The impl of `from_byte_slice_unchecked()` returns a reference to the same data (returns the argument directly)
+// 6. All other methods are defaulted
+// 7. `[T]` byte equality is semantic equality (transparent over a ULE)
+unsafe impl VarULE for UnvalidatedStr {
+ #[inline]
+ fn validate_byte_slice(_: &[u8]) -> Result<(), ZeroVecError> {
+ Ok(())
+ }
+ #[inline]
+ unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self {
+ UnvalidatedStr::from_bytes(bytes)
+ }
+}
+
+/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate
+#[cfg(feature = "serde")]
+impl serde::Serialize for UnvalidatedStr {
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: serde::Serializer,
+ {
+ use serde::ser::Error;
+ let s = self
+ .try_as_str()
+ .map_err(|_| S::Error::custom("invalid UTF-8 in UnvalidatedStr"))?;
+ if serializer.is_human_readable() {
+ serializer.serialize_str(s)
+ } else {
+ serializer.serialize_bytes(s.as_bytes())
+ }
+ }
+}
+
+/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate
+#[cfg(feature = "serde")]
+impl<'de> serde::Deserialize<'de> for Box<UnvalidatedStr> {
+ fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+ where
+ D: serde::Deserializer<'de>,
+ {
+ if deserializer.is_human_readable() {
+ let boxed_str = Box::<str>::deserialize(deserializer)?;
+ Ok(UnvalidatedStr::from_boxed_str(boxed_str))
+ } else {
+ let boxed_bytes = Box::<[u8]>::deserialize(deserializer)?;
+ Ok(UnvalidatedStr::from_boxed_bytes(boxed_bytes))
+ }
+ }
+}
+
+/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate
+#[cfg(feature = "serde")]
+impl<'de, 'a> serde::Deserialize<'de> for &'a UnvalidatedStr
+where
+ 'de: 'a,
+{
+ fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+ where
+ D: serde::Deserializer<'de>,
+ {
+ if deserializer.is_human_readable() {
+ let s = <&str>::deserialize(deserializer)?;
+ Ok(UnvalidatedStr::from_str(s))
+ } else {
+ let bytes = <&[u8]>::deserialize(deserializer)?;
+ Ok(UnvalidatedStr::from_bytes(bytes))
+ }
+ }
+}
+
+/// A u8 array of little-endian data that is expected to be a Unicode scalar value, but is not
+/// validated as such.
+///
+/// Use this type instead of `char` when you want to deal with data that is expected to be valid
+/// Unicode scalar values, but you want control over when or if you validate that assumption.
+///
+/// # Examples
+///
+/// ```
+/// use zerovec::ule::{RawBytesULE, UnvalidatedChar, ULE};
+/// use zerovec::{ZeroSlice, ZeroVec};
+///
+/// // data known to be little-endian three-byte chunks of valid Unicode scalar values
+/// let data = [0x68, 0x00, 0x00, 0x69, 0x00, 0x00, 0x4B, 0xF4, 0x01];
+/// // ground truth expectation
+/// let real = ['h', 'i', '๐Ÿ‘‹'];
+///
+/// let chars: &ZeroSlice<UnvalidatedChar> = ZeroSlice::parse_byte_slice(&data).expect("invalid data length");
+/// let parsed: Vec<_> = chars.iter().map(|c| unsafe { c.to_char_unchecked() }).collect();
+/// assert_eq!(&parsed, &real);
+///
+/// let real_chars: ZeroVec<_> = real.iter().copied().map(UnvalidatedChar::from_char).collect();
+/// let serialized_data = chars.as_bytes();
+/// assert_eq!(serialized_data, &data);
+/// ```
+#[repr(transparent)]
+#[derive(PartialEq, Eq, Clone, Copy, Hash)]
+pub struct UnvalidatedChar([u8; 3]);
+
+impl UnvalidatedChar {
+ /// Create a [`UnvalidatedChar`] from a `char`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use zerovec::ule::UnvalidatedChar;
+ ///
+ /// let a = UnvalidatedChar::from_char('a');
+ /// assert_eq!(a.try_to_char().unwrap(), 'a');
+ /// ```
+ #[inline]
+ pub const fn from_char(c: char) -> Self {
+ let [u0, u1, u2, _u3] = (c as u32).to_le_bytes();
+ Self([u0, u1, u2])
+ }
+
+ #[inline]
+ #[doc(hidden)]
+ pub const fn from_u24(c: u32) -> Self {
+ let [u0, u1, u2, _u3] = c.to_le_bytes();
+ Self([u0, u1, u2])
+ }
+
+ /// Attempt to convert a [`UnvalidatedChar`] to a `char`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use zerovec::ule::{AsULE, UnvalidatedChar};
+ ///
+ /// let a = UnvalidatedChar::from_char('a');
+ /// assert_eq!(a.try_to_char(), Ok('a'));
+ ///
+ /// let b = UnvalidatedChar::from_unaligned([0xFF, 0xFF, 0xFF].into());
+ /// assert!(matches!(b.try_to_char(), Err(_)));
+ /// ```
+ #[inline]
+ pub fn try_to_char(self) -> Result<char, core::char::CharTryFromError> {
+ let [u0, u1, u2] = self.0;
+ char::try_from(u32::from_le_bytes([u0, u1, u2, 0]))
+ }
+
+ /// Convert a [`UnvalidatedChar`] to a `char', returning [`char::REPLACEMENT_CHARACTER`]
+ /// if the `UnvalidatedChar` does not represent a valid Unicode scalar value.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use zerovec::ule::{AsULE, UnvalidatedChar};
+ ///
+ /// let a = UnvalidatedChar::from_unaligned([0xFF, 0xFF, 0xFF].into());
+ /// assert_eq!(a.to_char_lossy(), char::REPLACEMENT_CHARACTER);
+ /// ```
+ #[inline]
+ pub fn to_char_lossy(self) -> char {
+ self.try_to_char().unwrap_or(char::REPLACEMENT_CHARACTER)
+ }
+
+ /// Convert a [`UnvalidatedChar`] to a `char` without checking that it is
+ /// a valid Unicode scalar value.
+ ///
+ /// # Safety
+ ///
+ /// The `UnvalidatedChar` must be a valid Unicode scalar value in little-endian order.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use zerovec::ule::UnvalidatedChar;
+ ///
+ /// let a = UnvalidatedChar::from_char('a');
+ /// assert_eq!(unsafe { a.to_char_unchecked() }, 'a');
+ /// ```
+ #[inline]
+ pub unsafe fn to_char_unchecked(self) -> char {
+ let [u0, u1, u2] = self.0;
+ char::from_u32_unchecked(u32::from_le_bytes([u0, u1, u2, 0]))
+ }
+}
+
+impl RawBytesULE<3> {
+ /// Converts a [`UnvalidatedChar`] to its ULE type. This is equivalent to calling
+ /// [`AsULE::to_unaligned`].
+ #[inline]
+ pub const fn from_unvalidated_char(uc: UnvalidatedChar) -> Self {
+ RawBytesULE(uc.0)
+ }
+}
+
+impl AsULE for UnvalidatedChar {
+ type ULE = RawBytesULE<3>;
+
+ #[inline]
+ fn to_unaligned(self) -> Self::ULE {
+ RawBytesULE(self.0)
+ }
+
+ #[inline]
+ fn from_unaligned(unaligned: Self::ULE) -> Self {
+ Self(unaligned.0)
+ }
+}
+
+// Safety: UnvalidatedChar is always the little-endian representation of a char,
+// which corresponds to its AsULE::ULE type
+unsafe impl EqULE for UnvalidatedChar {}
+
+impl fmt::Debug for UnvalidatedChar {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ // Debug as a char if possible
+ match self.try_to_char() {
+ Ok(c) => fmt::Debug::fmt(&c, f),
+ Err(_) => fmt::Debug::fmt(&self.0, f),
+ }
+ }
+}
+
+impl PartialOrd for UnvalidatedChar {
+ fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+ Some(self.cmp(other))
+ }
+}
+
+impl Ord for UnvalidatedChar {
+ // custom implementation, as derived Ord would compare lexicographically
+ fn cmp(&self, other: &Self) -> Ordering {
+ let [a0, a1, a2] = self.0;
+ let a = u32::from_le_bytes([a0, a1, a2, 0]);
+ let [b0, b1, b2] = other.0;
+ let b = u32::from_le_bytes([b0, b1, b2, 0]);
+ a.cmp(&b)
+ }
+}
+
+impl From<char> for UnvalidatedChar {
+ #[inline]
+ fn from(value: char) -> Self {
+ Self::from_char(value)
+ }
+}
+
+impl TryFrom<UnvalidatedChar> for char {
+ type Error = core::char::CharTryFromError;
+
+ #[inline]
+ fn try_from(value: UnvalidatedChar) -> Result<char, Self::Error> {
+ value.try_to_char()
+ }
+}
+
+/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate
+#[cfg(feature = "serde")]
+impl serde::Serialize for UnvalidatedChar {
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: serde::Serializer,
+ {
+ use serde::ser::Error;
+ let c = self
+ .try_to_char()
+ .map_err(|_| S::Error::custom("invalid Unicode scalar value in UnvalidatedChar"))?;
+ if serializer.is_human_readable() {
+ serializer.serialize_char(c)
+ } else {
+ self.0.serialize(serializer)
+ }
+ }
+}
+
+/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate
+#[cfg(feature = "serde")]
+impl<'de> serde::Deserialize<'de> for UnvalidatedChar {
+ fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+ where
+ D: serde::Deserializer<'de>,
+ {
+ if deserializer.is_human_readable() {
+ let c = <char>::deserialize(deserializer)?;
+ Ok(UnvalidatedChar::from_char(c))
+ } else {
+ let bytes = <[u8; 3]>::deserialize(deserializer)?;
+ Ok(UnvalidatedChar(bytes))
+ }
+ }
+}
+
+#[cfg(feature = "databake")]
+impl databake::Bake for UnvalidatedChar {
+ fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream {
+ match self.try_to_char() {
+ Ok(ch) => {
+ env.insert("zerovec");
+ let ch = ch.bake(env);
+ databake::quote! {
+ zerovec::ule::UnvalidatedChar::from_char(#ch)
+ }
+ }
+ Err(_) => {
+ env.insert("zerovec");
+ let u24 = u32::from_le_bytes([self.0[0], self.0[1], self.0[2], 0]);
+ databake::quote! {
+ zerovec::ule::UnvalidatedChar::from_u24(#u24)
+ }
+ }
+ }
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+ use crate::ZeroVec;
+
+ #[test]
+ fn test_serde_fail() {
+ let uc = UnvalidatedChar([0xFF, 0xFF, 0xFF]);
+ serde_json::to_string(&uc).expect_err("serialize invalid char bytes");
+ bincode::serialize(&uc).expect_err("serialize invalid char bytes");
+ }
+
+ #[test]
+ fn test_serde_json() {
+ let c = '๐Ÿ™ƒ';
+ let uc = UnvalidatedChar::from_char(c);
+ let json_ser = serde_json::to_string(&uc).unwrap();
+
+ assert_eq!(json_ser, r#""๐Ÿ™ƒ""#);
+
+ let json_de: UnvalidatedChar = serde_json::from_str(&json_ser).unwrap();
+
+ assert_eq!(uc, json_de);
+ }
+
+ #[test]
+ fn test_serde_bincode() {
+ let c = '๐Ÿ™ƒ';
+ let uc = UnvalidatedChar::from_char(c);
+ let bytes_ser = bincode::serialize(&uc).unwrap();
+
+ assert_eq!(bytes_ser, [0x43, 0xF6, 0x01]);
+
+ let bytes_de: UnvalidatedChar = bincode::deserialize(&bytes_ser).unwrap();
+
+ assert_eq!(uc, bytes_de);
+ }
+
+ #[test]
+ fn test_representation() {
+ let chars = ['w', 'ฯ‰', 'ๆ–‡', '๐‘„ƒ', '๐Ÿ™ƒ'];
+
+ // backed by [UnvalidatedChar]
+ let uvchars: Vec<_> = chars
+ .iter()
+ .copied()
+ .map(UnvalidatedChar::from_char)
+ .collect();
+ // backed by [RawBytesULE<3>]
+ let zvec: ZeroVec<_> = uvchars.clone().into_iter().collect();
+
+ let ule_bytes = zvec.as_bytes();
+ let uvbytes;
+ unsafe {
+ let ptr = &uvchars[..] as *const _ as *const u8;
+ uvbytes = core::slice::from_raw_parts(ptr, ule_bytes.len());
+ }
+
+ // UnvalidatedChar is defined as little-endian, so this must be true on all platforms
+ // also asserts that to_unaligned/from_unaligned are no-ops
+ assert_eq!(uvbytes, ule_bytes);
+
+ assert_eq!(
+ &[119, 0, 0, 201, 3, 0, 135, 101, 0, 3, 17, 1, 67, 246, 1],
+ ule_bytes
+ );
+ }
+
+ #[test]
+ fn test_char_bake() {
+ databake::test_bake!(UnvalidatedChar, const: crate::ule::UnvalidatedChar::from_char('b'), zerovec);
+ // surrogate code point
+ databake::test_bake!(UnvalidatedChar, const: crate::ule::UnvalidatedChar::from_u24(55296u32), zerovec);
+ }
+}