Merging upstream version 1.75.0+dfsg1.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-06-07 05:48:48 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-06-07 05:48:48 +0000
commit: ef24de24a82fe681581cc130f342363c47c0969a (patch)
tree: 0d494f7e1a38b95c92426f58fe6eaa877303a86c /vendor/zerovec/src/ule
parent: Releasing progress-linux version 1.74.1+dfsg1-1~progress7.99u1. (diff)
download: rustc-ef24de24a82fe681581cc130f342363c47c0969a.tar.xz
rustc-ef24de24a82fe681581cc130f342363c47c0969a.zip
10 files changed, 487 insertions, 94 deletions
diff --git a/vendor/zerovec/src/ule/chars.rs b/vendor/zerovec/src/ule/chars.rs
index 7a4a97a4a..e0ec25240 100644
--- a/vendor/zerovec/src/ule/chars.rs
+++ b/vendor/zerovec/src/ule/chars.rs
@@ -6,10 +6,11 @@
 //! ULE implementation for the `char` type.
 
 use super::*;
+use crate::impl_ule_from_array;
 use core::cmp::Ordering;
 use core::convert::TryFrom;
 
-/// A u8 array of little-endian data corresponding to a Unicode code point.
+/// A u8 array of little-endian data corresponding to a Unicode scalar value.
 ///
 /// The bytes of a `CharULE` are guaranteed to represent a little-endian-encoded u32 that is a
 /// valid `char` and can be converted without validation.
@@ -40,6 +41,20 @@ use core::convert::TryFrom;
 #[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)]
 pub struct CharULE([u8; 3]);
 
+impl CharULE {
+    /// Converts a [`char`] to a [`CharULE`]. This is equivalent to calling
+    /// [`AsULE::to_unaligned()`]
+    ///
+    /// See the type-level documentation for [`CharULE`] for more information.
+    #[inline]
+    pub const fn from_aligned(c: char) -> Self {
+        let [u0, u1, u2, _u3] = (c as u32).to_le_bytes();
+        Self([u0, u1, u2])
+    }
+
+    impl_ule_from_array!(char, CharULE, Self([0; 3]));
+}
+
 // Safety (based on the safety checklist on the ULE trait):
 //  1. CharULE does not include any uninitialized or padding bytes.
 //     (achieved by `#[repr(transparent)]` on a type that satisfies this invariant)
@@ -72,13 +87,12 @@ impl AsULE for char {
 
     #[inline]
     fn to_unaligned(self) -> Self::ULE {
-        let [u0, u1, u2, _u3] = u32::from(self).to_le_bytes();
-        CharULE([u0, u1, u2])
+        CharULE::from_aligned(self)
     }
 
     #[inline]
     fn from_unaligned(unaligned: Self::ULE) -> Self {
-        // Safe because the bytes of CharULE are defined to represent a valid Unicode code point.
+        // Safe because the bytes of CharULE are defined to represent a valid Unicode scalar value.
         unsafe {
             Self::from_u32_unchecked(u32::from_le_bytes([
                 unaligned.0[0],
@@ -107,6 +121,25 @@ mod test {
     use super::*;
 
     #[test]
+    fn test_from_array() {
+        const CHARS: [char; 2] = ['a', '🙃'];
+        const CHARS_ULE: [CharULE; 2] = CharULE::from_array(CHARS);
+        assert_eq!(
+            CharULE::as_byte_slice(&CHARS_ULE),
+            &[0x61, 0x00, 0x00, 0x43, 0xF6, 0x01]
+        );
+    }
+
+    #[test]
+    fn test_from_array_zst() {
+        const CHARS: [char; 0] = [];
+        const CHARS_ULE: [CharULE; 0] = CharULE::from_array(CHARS);
+        let bytes = CharULE::as_byte_slice(&CHARS_ULE);
+        let empty: &[u8] = &[];
+        assert_eq!(bytes, empty);
+    }
+
+    #[test]
     fn test_parse() {
         // 1-byte, 2-byte, 3-byte, and two 4-byte character in UTF-8 (not as relevant in UTF-32)
         let chars = ['w', 'ω', '文', '𑄃', '🙃'];
@@ -141,7 +174,7 @@ mod test {
             .collect();
         let u32_bytes: &[u8] = RawBytesULE::<4>::as_byte_slice(&u32_ules);
         let parsed_ules_result = CharULE::parse_byte_slice(u32_bytes);
-        assert!(matches!(parsed_ules_result, Err(_)));
+        assert!(parsed_ules_result.is_err());
 
         // 0x20FFFF is out of range for a char
         let u32s = [0x20FFFF];
@@ -152,6 +185,6 @@ mod test {
             .collect();
         let u32_bytes: &[u8] = RawBytesULE::<4>::as_byte_slice(&u32_ules);
         let parsed_ules_result = CharULE::parse_byte_slice(u32_bytes);
-        assert!(matches!(parsed_ules_result, Err(_)));
+        assert!(parsed_ules_result.is_err());
     }
 }
diff --git a/vendor/zerovec/src/ule/custom.rs b/vendor/zerovec/src/ule/custom.rs
index b2e4cb0e5..8cc6e9de4 100644
--- a/vendor/zerovec/src/ule/custom.rs
+++ b/vendor/zerovec/src/ule/custom.rs
@@ -129,8 +129,8 @@
 //! }
 //!
 //! fn main() {
-//!     let mut foos = vec![Foo {field1: 'u', field2: 983, field3: ZeroVec::alloc_from_slice(&[1212,2309,500,7000])},
-//!                         Foo {field1: 'l', field2: 1010, field3: ZeroVec::alloc_from_slice(&[1932, 0, 8888, 91237])}];
+//!     let mut foos = [Foo {field1: 'u', field2: 983, field3: ZeroVec::alloc_from_slice(&[1212,2309,500,7000])},
+//!                     Foo {field1: 'l', field2: 1010, field3: ZeroVec::alloc_from_slice(&[1932, 0, 8888, 91237])}];
 //!
 //!     let vzv = VarZeroVec::<_>::from(&foos);
 //!
diff --git a/vendor/zerovec/src/ule/encode.rs b/vendor/zerovec/src/ule/encode.rs
index 2091cf06b..adea123aa 100644
--- a/vendor/zerovec/src/ule/encode.rs
+++ b/vendor/zerovec/src/ule/encode.rs
@@ -8,7 +8,7 @@ use crate::{VarZeroSlice, VarZeroVec, ZeroSlice, ZeroVec};
 use alloc::borrow::{Cow, ToOwned};
 use alloc::boxed::Box;
 use alloc::string::String;
-use alloc::vec::Vec;
+use alloc::{vec, vec::Vec};
 use core::mem;
 
 /// Allows types to be encoded as VarULEs. This is highly useful for implementing VarULE on
@@ -82,16 +82,14 @@ pub unsafe trait EncodeAsVarULE<T: VarULE + ?Sized> {
 ///
 /// This is primarily useful for generating `Deserialize` impls for VarULE types
 pub fn encode_varule_to_box<S: EncodeAsVarULE<T>, T: VarULE + ?Sized>(x: &S) -> Box<T> {
-    let mut vec: Vec<u8> = Vec::new();
     // zero-fill the vector to avoid uninitialized data UB
-    vec.resize(x.encode_var_ule_len(), 0);
+    let mut vec: Vec<u8> = vec![0; x.encode_var_ule_len()];
     x.encode_var_ule_write(&mut vec);
-    let boxed = vec.into_boxed_slice();
+    let boxed = mem::ManuallyDrop::new(vec.into_boxed_slice());
     unsafe {
         // Safety: `ptr` is a box, and `T` is a VarULE which guarantees it has the same memory layout as `[u8]`
         // and can be recouped via from_byte_slice_unchecked()
         let ptr: *mut T = T::from_byte_slice_unchecked(&boxed) as *const T as *mut T;
-        mem::forget(boxed);
 
         // Safety: we can construct an owned version since we have mem::forgotten the older owner
         Box::from_raw(ptr)
diff --git a/vendor/zerovec/src/ule/macros.rs b/vendor/zerovec/src/ule/macros.rs
new file mode 100644
index 000000000..955b1eb2e
--- /dev/null
+++ b/vendor/zerovec/src/ule/macros.rs
@@ -0,0 +1,29 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+/// Given `Self` (`$aligned`), `Self::ULE` (`$unaligned`), and a conversion function (`$single` or
+/// `Self::from_aligned`), implement `from_array` for arrays of `$aligned` to `$unaligned`.
+///
+/// The `$default` argument is due to current compiler limitations.
+/// Pass any (cheap to construct) value.
+#[macro_export]
+macro_rules! impl_ule_from_array {
+    ($aligned:ty, $unaligned:ty, $default:expr, $single:path) => {
+        #[doc = concat!("Convert an array of `", stringify!($aligned), "` to an array of `", stringify!($unaligned), "`.")]
+        pub const fn from_array<const N: usize>(arr: [$aligned; N]) -> [Self; N] {
+            let mut result = [$default; N];
+            let mut i = 0;
+            // Won't panic because i < N and arr has length N
+            #[allow(clippy::indexing_slicing)]
+            while i < N {
+                result[i] = $single(arr[i]);
+                i += 1;
+            }
+            result
+        }
+    };
+    ($aligned:ty, $unaligned:ty, $default:expr) => {
+        impl_ule_from_array!($aligned, $unaligned, $default, Self::from_aligned);
+    };
+}
diff --git a/vendor/zerovec/src/ule/mod.rs b/vendor/zerovec/src/ule/mod.rs
index e8ecd26e5..5a6d9cd47 100644
--- a/vendor/zerovec/src/ule/mod.rs
+++ b/vendor/zerovec/src/ule/mod.rs
@@ -14,6 +14,7 @@ mod chars;
 #[cfg(doc)]
 pub mod custom;
 mod encode;
+mod macros;
 mod multi;
 mod niche;
 mod option;
@@ -29,7 +30,7 @@ pub use multi::MultiFieldsULE;
 pub use niche::{NicheBytes, NichedOption, NichedOptionULE};
 pub use option::{OptionULE, OptionVarULE};
 pub use plain::RawBytesULE;
-pub use unvalidated::UnvalidatedStr;
+pub use unvalidated::{UnvalidatedChar, UnvalidatedStr};
 
 use alloc::alloc::Layout;
 use alloc::borrow::ToOwned;
@@ -156,7 +157,7 @@ where
 
 /// A trait for any type that has a 1:1 mapping with an unaligned little-endian (ULE) type.
 ///
-/// If you need to implement this trait, consider using [`#[make_varule]`](crate::make_ule) instead.
+/// If you need to implement this trait, consider using [`#[make_ule]`](crate::make_ule) instead.
 pub trait AsULE: Copy {
     /// The ULE type corresponding to `Self`.
     ///
@@ -356,13 +357,12 @@ pub unsafe trait VarULE: 'static {
     #[inline]
     fn to_boxed(&self) -> Box<Self> {
         let bytesvec = self.as_byte_slice().to_owned().into_boxed_slice();
+        let bytesvec = mem::ManuallyDrop::new(bytesvec);
         unsafe {
             // Get the pointer representation
             let ptr: *mut Self =
                 Self::from_byte_slice_unchecked(&bytesvec) as *const Self as *mut Self;
-            assert_eq!(Layout::for_value(&*ptr), Layout::for_value(&*bytesvec));
-            // Forget the allocation
-            mem::forget(bytesvec);
+            assert_eq!(Layout::for_value(&*ptr), Layout::for_value(&**bytesvec));
             // Transmute the pointer to an owned pointer
             Box::from_raw(ptr)
         }
diff --git a/vendor/zerovec/src/ule/multi.rs b/vendor/zerovec/src/ule/multi.rs
index 0ba0aea89..3281b2088 100644
--- a/vendor/zerovec/src/ule/multi.rs
+++ b/vendor/zerovec/src/ule/multi.rs
@@ -44,7 +44,7 @@ impl MultiFieldsULE {
                 lengths, output,
             );
             debug_assert!(
-                <VarZeroSlice<[u8]>>::validate_byte_slice(output).is_ok(),
+                <VarZeroSlice<[u8], Index32>>::validate_byte_slice(output).is_ok(),
                 "Encoded slice must be valid VarZeroSlice"
             );
             // Safe since write_serializable_bytes produces a valid VarZeroSlice buffer
@@ -141,12 +141,14 @@ unsafe impl VarULE for MultiFieldsULE {
     /// This impl exists so that EncodeAsVarULE can work.
     #[inline]
     fn validate_byte_slice(slice: &[u8]) -> Result<(), ZeroVecError> {
-        <VarZeroSlice<[u8]>>::validate_byte_slice(slice)
+        <VarZeroSlice<[u8], Index32>>::validate_byte_slice(slice)
     }
 
     #[inline]
     unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self {
         // &Self is transparent over &VZS<..>
-        mem::transmute(<VarZeroSlice<[u8]>>::from_byte_slice_unchecked(bytes))
+        mem::transmute(<VarZeroSlice<[u8], Index32>>::from_byte_slice_unchecked(
+            bytes,
+        ))
     }
 }
diff --git a/vendor/zerovec/src/ule/option.rs b/vendor/zerovec/src/ule/option.rs
index 50b193aac..9b0dc5b28 100644
--- a/vendor/zerovec/src/ule/option.rs
+++ b/vendor/zerovec/src/ule/option.rs
@@ -197,9 +197,8 @@ unsafe impl<U: VarULE + ?Sized> VarULE for OptionVarULE<U> {
 
     #[inline]
     unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self {
-        let metadata = bytes.len() - 1;
         let entire_struct_as_slice: *const [u8] =
-            ::core::slice::from_raw_parts(bytes.as_ptr(), metadata);
+            ::core::ptr::slice_from_raw_parts(bytes.as_ptr(), bytes.len() - 1);
         &*(entire_struct_as_slice as *const Self)
     }
 }
diff --git a/vendor/zerovec/src/ule/plain.rs b/vendor/zerovec/src/ule/plain.rs
index 49455d45f..f244f6b68 100644
--- a/vendor/zerovec/src/ule/plain.rs
+++ b/vendor/zerovec/src/ule/plain.rs
@@ -6,6 +6,7 @@
 //! ULE implementation for Plain Old Data types, including all sized integers.
 
 use super::*;
+use crate::impl_ule_from_array;
 use crate::ZeroSlice;
 use core::num::{NonZeroI8, NonZeroU8};
 
@@ -15,69 +16,69 @@ use core::num::{NonZeroI8, NonZeroU8};
 #[allow(clippy::exhaustive_structs)] // newtype
 pub struct RawBytesULE<const N: usize>(pub [u8; N]);
 
-macro_rules! impl_byte_slice_size {
-    ($unsigned:ty, $size:literal) => {
-        impl From<[u8; $size]> for RawBytesULE<$size> {
-            #[inline]
-            fn from(le_bytes: [u8; $size]) -> Self {
-                Self(le_bytes)
-            }
-        }
-        impl RawBytesULE<$size> {
-            #[inline]
-            pub fn as_bytes(&self) -> &[u8] {
-                &self.0
-            }
-        }
-        // Safety (based on the safety checklist on the ULE trait):
-        //  1. RawBytesULE does not include any uninitialized or padding bytes.
-        //     (achieved by `#[repr(transparent)]` on a type that satisfies this invariant)
-        //  2. RawBytesULE is aligned to 1 byte.
-        //     (achieved by `#[repr(transparent)]` on a type that satisfies this invariant)
-        //  3. The impl of validate_byte_slice() returns an error if any byte is not valid (never).
-        //  4. The impl of validate_byte_slice() returns an error if there are leftover bytes.
-        //  5. The other ULE methods use the default impl.
-        //  6. RawBytesULE byte equality is semantic equality
-        unsafe impl ULE for RawBytesULE<$size> {
-            #[inline]
-            fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> {
-                if bytes.len() % $size == 0 {
-                    // Safe because Self is transparent over [u8; $size]
-                    Ok(())
-                } else {
-                    Err(ZeroVecError::length::<Self>(bytes.len()))
-                }
-            }
+impl<const N: usize> RawBytesULE<N> {
+    #[inline]
+    pub fn as_bytes(&self) -> &[u8] {
+        &self.0
+    }
+
+    #[inline]
+    pub fn from_byte_slice_unchecked_mut(bytes: &mut [u8]) -> &mut [Self] {
+        let data = bytes.as_mut_ptr();
+        let len = bytes.len() / N;
+        // Safe because Self is transparent over [u8; N]
+        unsafe { core::slice::from_raw_parts_mut(data as *mut Self, len) }
+    }
+}
+
+// Safety (based on the safety checklist on the ULE trait):
+//  1. RawBytesULE does not include any uninitialized or padding bytes.
+//     (achieved by `#[repr(transparent)]` on a type that satisfies this invariant)
+//  2. RawBytesULE is aligned to 1 byte.
+//     (achieved by `#[repr(transparent)]` on a type that satisfies this invariant)
+//  3. The impl of validate_byte_slice() returns an error if any byte is not valid (never).
+//  4. The impl of validate_byte_slice() returns an error if there are leftover bytes.
+//  5. The other ULE methods use the default impl.
+//  6. RawBytesULE byte equality is semantic equality
+unsafe impl<const N: usize> ULE for RawBytesULE<N> {
+    #[inline]
+    fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> {
+        if bytes.len() % N == 0 {
+            // Safe because Self is transparent over [u8; N]
+            Ok(())
+        } else {
+            Err(ZeroVecError::length::<Self>(bytes.len()))
         }
+    }
+}
 
-        impl RawBytesULE<$size> {
-            #[inline]
-            pub fn from_byte_slice_unchecked_mut(bytes: &mut [u8]) -> &mut [Self] {
-                let data = bytes.as_mut_ptr();
-                let len = bytes.len() / $size;
-                // Safe because Self is transparent over [u8; $size]
-                unsafe { core::slice::from_raw_parts_mut(data as *mut Self, len) }
-            }
+impl<const N: usize> From<[u8; N]> for RawBytesULE<N> {
+    #[inline]
+    fn from(le_bytes: [u8; N]) -> Self {
+        Self(le_bytes)
+    }
+}
 
-            /// Gets this RawBytesULE as an unsigned int. This is equivalent to calling
-            /// [AsULE::from_unaligned()] on the appropriately sized type.
+macro_rules! impl_byte_slice_size {
+    ($unsigned:ty, $size:literal) => {
+        impl RawBytesULE<$size> {
+            #[doc = concat!("Gets this `RawBytesULE` as a `", stringify!($unsigned), "`. This is equivalent to calling [`AsULE::from_unaligned()`] on the appropriately sized type.")]
             #[inline]
             pub fn as_unsigned_int(&self) -> $unsigned {
                 <$unsigned as $crate::ule::AsULE>::from_unaligned(*self)
             }
 
-            /// Convert an array of native-endian aligned integers to an array of RawBytesULE.
-            pub const fn from_array<const N: usize>(arr: [$unsigned; N]) -> [Self; N] {
-                let mut result = [RawBytesULE([0; $size]); N];
-                let mut i = 0;
-                // Won't panic because i < N and arr has length N
-                #[allow(clippy::indexing_slicing)]
-                while i < N {
-                    result[i].0 = arr[i].to_le_bytes();
-                    i += 1;
-                }
-                result
+            #[doc = concat!("Converts a `", stringify!($unsigned), "` to a `RawBytesULE`. This is equivalent to calling [`AsULE::to_unaligned()`] on the appropriately sized type.")]
+            #[inline]
+            pub const fn from_aligned(value: $unsigned) -> Self {
+                Self(value.to_le_bytes())
             }
+
+            impl_ule_from_array!(
+                $unsigned,
+                RawBytesULE<$size>,
+                RawBytesULE([0; $size])
+            );
         }
     };
 }
@@ -110,7 +111,7 @@ macro_rules! impl_const_constructors {
 }
 
 macro_rules! impl_byte_slice_type {
-    ($type:ty, $size:literal) => {
+    ($single_fn:ident, $type:ty, $size:literal) => {
         impl From<$type> for RawBytesULE<$size> {
             #[inline]
             fn from(value: $type) -> Self {
@@ -131,6 +132,24 @@ macro_rules! impl_byte_slice_type {
         // EqULE is true because $type and RawBytesULE<$size>
         // have the same byte sequence on little-endian
         unsafe impl EqULE for $type {}
+
+        impl RawBytesULE<$size> {
+            pub const fn $single_fn(v: $type) -> Self {
+                RawBytesULE(v.to_le_bytes())
+            }
+        }
+    };
+}
+
+macro_rules! impl_byte_slice_unsigned_type {
+    ($type:ty, $size:literal) => {
+        impl_byte_slice_type!(from_unsigned, $type, $size);
+    };
+}
+
+macro_rules! impl_byte_slice_signed_type {
+    ($type:ty, $size:literal) => {
+        impl_byte_slice_type!(from_signed, $type, $size);
     };
 }
 
@@ -139,15 +158,15 @@ impl_byte_slice_size!(u32, 4);
 impl_byte_slice_size!(u64, 8);
 impl_byte_slice_size!(u128, 16);
 
-impl_byte_slice_type!(u16, 2);
-impl_byte_slice_type!(u32, 4);
-impl_byte_slice_type!(u64, 8);
-impl_byte_slice_type!(u128, 16);
+impl_byte_slice_unsigned_type!(u16, 2);
+impl_byte_slice_unsigned_type!(u32, 4);
+impl_byte_slice_unsigned_type!(u64, 8);
+impl_byte_slice_unsigned_type!(u128, 16);
 
-impl_byte_slice_type!(i16, 2);
-impl_byte_slice_type!(i32, 4);
-impl_byte_slice_type!(i64, 8);
-impl_byte_slice_type!(i128, 16);
+impl_byte_slice_signed_type!(i16, 2);
+impl_byte_slice_signed_type!(i32, 4);
+impl_byte_slice_signed_type!(i64, 8);
+impl_byte_slice_signed_type!(i128, 16);
 
 impl_const_constructors!(u8, 1);
 impl_const_constructors!(u16, 2);
diff --git a/vendor/zerovec/src/ule/tuple.rs b/vendor/zerovec/src/ule/tuple.rs
index c26567e98..3e0f291b3 100644
--- a/vendor/zerovec/src/ule/tuple.rs
+++ b/vendor/zerovec/src/ule/tuple.rs
@@ -111,10 +111,7 @@ macro_rules! tuple_ule {
 
         impl<$($t: ULE),+> Clone for $name<$($t),+> {
             fn clone(&self) -> Self {
-                // copy to the stack to avoid hitting a future incompat error
-                // https://github.com/rust-lang/rust/issues/82523#issuecomment-947900712
-                let stack = ($(self.$i),+);
-                $name($(stack.$i),+)
+                *self
             }
         }
 
@@ -147,7 +144,7 @@ fn test_pairule_validate() {
     // Test failed validation with a correctly sized but differently constrained tuple
     // Note: 1234901 is not a valid char
     let zerovec3 = ZeroVec::<(char, u32)>::parse_byte_slice(bytes);
-    assert!(matches!(zerovec3, Err(_)));
+    assert!(zerovec3.is_err());
 }
 
 #[test]
@@ -162,7 +159,7 @@ fn test_tripleule_validate() {
     // Test failed validation with a correctly sized but differently constrained tuple
     // Note: 1234901 is not a valid char
     let zerovec3 = ZeroVec::<(char, i8, u32)>::parse_byte_slice(bytes);
-    assert!(matches!(zerovec3, Err(_)));
+    assert!(zerovec3.is_err());
 }
 
 #[test]
@@ -178,5 +175,5 @@ fn test_quadule_validate() {
     // Test failed validation with a correctly sized but differently constrained tuple
     // Note: 1234901 is not a valid char
     let zerovec3 = ZeroVec::<(char, i8, u16, u32)>::parse_byte_slice(bytes);
-    assert!(matches!(zerovec3, Err(_)));
+    assert!(zerovec3.is_err());
 }
diff --git a/vendor/zerovec/src/ule/unvalidated.rs b/vendor/zerovec/src/ule/unvalidated.rs
index 4564c8673..21cfb0c0d 100644
--- a/vendor/zerovec/src/ule/unvalidated.rs
+++ b/vendor/zerovec/src/ule/unvalidated.rs
@@ -2,9 +2,11 @@
 // called LICENSE at the top level of the ICU4X source tree
 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
 
-use super::VarULE;
+use super::{AsULE, RawBytesULE, VarULE};
+use crate::ule::EqULE;
 use crate::{map::ZeroMapKV, VarZeroSlice, VarZeroVec, ZeroVecError};
 use alloc::boxed::Box;
+use core::cmp::Ordering;
 use core::fmt;
 use core::ops::Deref;
 
@@ -209,3 +211,317 @@ where
         }
     }
 }
+
+/// A u8 array of little-endian data that is expected to be a Unicode scalar value, but is not
+/// validated as such.
+///
+/// Use this type instead of `char` when you want to deal with data that is expected to be valid
+/// Unicode scalar values, but you want control over when or if you validate that assumption.
+///
+/// # Examples
+///
+/// ```
+/// use zerovec::ule::{RawBytesULE, UnvalidatedChar, ULE};
+/// use zerovec::{ZeroSlice, ZeroVec};
+///
+/// // data known to be little-endian three-byte chunks of valid Unicode scalar values
+/// let data = [0x68, 0x00, 0x00, 0x69, 0x00, 0x00, 0x4B, 0xF4, 0x01];
+/// // ground truth expectation
+/// let real = ['h', 'i', '👋'];
+///
+/// let chars: &ZeroSlice<UnvalidatedChar> = ZeroSlice::parse_byte_slice(&data).expect("invalid data length");
+/// let parsed: Vec<_> = chars.iter().map(|c| unsafe { c.to_char_unchecked() }).collect();
+/// assert_eq!(&parsed, &real);
+///
+/// let real_chars: ZeroVec<_> = real.iter().copied().map(UnvalidatedChar::from_char).collect();
+/// let serialized_data = chars.as_bytes();
+/// assert_eq!(serialized_data, &data);
+/// ```
+#[repr(transparent)]
+#[derive(PartialEq, Eq, Clone, Copy, Hash)]
+pub struct UnvalidatedChar([u8; 3]);
+
+impl UnvalidatedChar {
+    /// Create a [`UnvalidatedChar`] from a `char`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use zerovec::ule::UnvalidatedChar;
+    ///
+    /// let a = UnvalidatedChar::from_char('a');
+    /// assert_eq!(a.try_to_char().unwrap(), 'a');
+    /// ```
+    #[inline]
+    pub const fn from_char(c: char) -> Self {
+        let [u0, u1, u2, _u3] = (c as u32).to_le_bytes();
+        Self([u0, u1, u2])
+    }
+
+    #[inline]
+    #[doc(hidden)]
+    pub const fn from_u24(c: u32) -> Self {
+        let [u0, u1, u2, _u3] = c.to_le_bytes();
+        Self([u0, u1, u2])
+    }
+
+    /// Attempt to convert a [`UnvalidatedChar`] to a `char`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use zerovec::ule::{AsULE, UnvalidatedChar};
+    ///
+    /// let a = UnvalidatedChar::from_char('a');
+    /// assert_eq!(a.try_to_char(), Ok('a'));
+    ///
+    /// let b = UnvalidatedChar::from_unaligned([0xFF, 0xFF, 0xFF].into());
+    /// assert!(matches!(b.try_to_char(), Err(_)));
+    /// ```
+    #[inline]
+    pub fn try_to_char(self) -> Result<char, core::char::CharTryFromError> {
+        let [u0, u1, u2] = self.0;
+        char::try_from(u32::from_le_bytes([u0, u1, u2, 0]))
+    }
+
+    /// Convert a [`UnvalidatedChar`] to a `char', returning [`char::REPLACEMENT_CHARACTER`]
+    /// if the `UnvalidatedChar` does not represent a valid Unicode scalar value.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use zerovec::ule::{AsULE, UnvalidatedChar};
+    ///
+    /// let a = UnvalidatedChar::from_unaligned([0xFF, 0xFF, 0xFF].into());
+    /// assert_eq!(a.to_char_lossy(), char::REPLACEMENT_CHARACTER);
+    /// ```
+    #[inline]
+    pub fn to_char_lossy(self) -> char {
+        self.try_to_char().unwrap_or(char::REPLACEMENT_CHARACTER)
+    }
+
+    /// Convert a [`UnvalidatedChar`] to a `char` without checking that it is
+    /// a valid Unicode scalar value.
+    ///
+    /// # Safety
+    ///
+    /// The `UnvalidatedChar` must be a valid Unicode scalar value in little-endian order.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use zerovec::ule::UnvalidatedChar;
+    ///
+    /// let a = UnvalidatedChar::from_char('a');
+    /// assert_eq!(unsafe { a.to_char_unchecked() }, 'a');
+    /// ```
+    #[inline]
+    pub unsafe fn to_char_unchecked(self) -> char {
+        let [u0, u1, u2] = self.0;
+        char::from_u32_unchecked(u32::from_le_bytes([u0, u1, u2, 0]))
+    }
+}
+
+impl RawBytesULE<3> {
+    /// Converts a [`UnvalidatedChar`] to its ULE type. This is equivalent to calling
+    /// [`AsULE::to_unaligned`].
+    #[inline]
+    pub const fn from_unvalidated_char(uc: UnvalidatedChar) -> Self {
+        RawBytesULE(uc.0)
+    }
+}
+
+impl AsULE for UnvalidatedChar {
+    type ULE = RawBytesULE<3>;
+
+    #[inline]
+    fn to_unaligned(self) -> Self::ULE {
+        RawBytesULE(self.0)
+    }
+
+    #[inline]
+    fn from_unaligned(unaligned: Self::ULE) -> Self {
+        Self(unaligned.0)
+    }
+}
+
+// Safety: UnvalidatedChar is always the little-endian representation of a char,
+// which corresponds to its AsULE::ULE type
+unsafe impl EqULE for UnvalidatedChar {}
+
+impl fmt::Debug for UnvalidatedChar {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        // Debug as a char if possible
+        match self.try_to_char() {
+            Ok(c) => fmt::Debug::fmt(&c, f),
+            Err(_) => fmt::Debug::fmt(&self.0, f),
+        }
+    }
+}
+
+impl PartialOrd for UnvalidatedChar {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl Ord for UnvalidatedChar {
+    // custom implementation, as derived Ord would compare lexicographically
+    fn cmp(&self, other: &Self) -> Ordering {
+        let [a0, a1, a2] = self.0;
+        let a = u32::from_le_bytes([a0, a1, a2, 0]);
+        let [b0, b1, b2] = other.0;
+        let b = u32::from_le_bytes([b0, b1, b2, 0]);
+        a.cmp(&b)
+    }
+}
+
+impl From<char> for UnvalidatedChar {
+    #[inline]
+    fn from(value: char) -> Self {
+        Self::from_char(value)
+    }
+}
+
+impl TryFrom<UnvalidatedChar> for char {
+    type Error = core::char::CharTryFromError;
+
+    #[inline]
+    fn try_from(value: UnvalidatedChar) -> Result<char, Self::Error> {
+        value.try_to_char()
+    }
+}
+
+/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate
+#[cfg(feature = "serde")]
+impl serde::Serialize for UnvalidatedChar {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        use serde::ser::Error;
+        let c = self
+            .try_to_char()
+            .map_err(|_| S::Error::custom("invalid Unicode scalar value in UnvalidatedChar"))?;
+        if serializer.is_human_readable() {
+            serializer.serialize_char(c)
+        } else {
+            self.0.serialize(serializer)
+        }
+    }
+}
+
+/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate
+#[cfg(feature = "serde")]
+impl<'de> serde::Deserialize<'de> for UnvalidatedChar {
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+    where
+        D: serde::Deserializer<'de>,
+    {
+        if deserializer.is_human_readable() {
+            let c = <char>::deserialize(deserializer)?;
+            Ok(UnvalidatedChar::from_char(c))
+        } else {
+            let bytes = <[u8; 3]>::deserialize(deserializer)?;
+            Ok(UnvalidatedChar(bytes))
+        }
+    }
+}
+
+#[cfg(feature = "databake")]
+impl databake::Bake for UnvalidatedChar {
+    fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream {
+        match self.try_to_char() {
+            Ok(ch) => {
+                env.insert("zerovec");
+                let ch = ch.bake(env);
+                databake::quote! {
+                    zerovec::ule::UnvalidatedChar::from_char(#ch)
+                }
+            }
+            Err(_) => {
+                env.insert("zerovec");
+                let u24 = u32::from_le_bytes([self.0[0], self.0[1], self.0[2], 0]);
+                databake::quote! {
+                    zerovec::ule::UnvalidatedChar::from_u24(#u24)
+                }
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use crate::ZeroVec;
+
+    #[test]
+    fn test_serde_fail() {
+        let uc = UnvalidatedChar([0xFF, 0xFF, 0xFF]);
+        serde_json::to_string(&uc).expect_err("serialize invalid char bytes");
+        bincode::serialize(&uc).expect_err("serialize invalid char bytes");
+    }
+
+    #[test]
+    fn test_serde_json() {
+        let c = '🙃';
+        let uc = UnvalidatedChar::from_char(c);
+        let json_ser = serde_json::to_string(&uc).unwrap();
+
+        assert_eq!(json_ser, r#""🙃""#);
+
+        let json_de: UnvalidatedChar = serde_json::from_str(&json_ser).unwrap();
+
+        assert_eq!(uc, json_de);
+    }
+
+    #[test]
+    fn test_serde_bincode() {
+        let c = '🙃';
+        let uc = UnvalidatedChar::from_char(c);
+        let bytes_ser = bincode::serialize(&uc).unwrap();
+
+        assert_eq!(bytes_ser, [0x43, 0xF6, 0x01]);
+
+        let bytes_de: UnvalidatedChar = bincode::deserialize(&bytes_ser).unwrap();
+
+        assert_eq!(uc, bytes_de);
+    }
+
+    #[test]
+    fn test_representation() {
+        let chars = ['w', 'ω', '文', '𑄃', '🙃'];
+
+        // backed by [UnvalidatedChar]
+        let uvchars: Vec<_> = chars
+            .iter()
+            .copied()
+            .map(UnvalidatedChar::from_char)
+            .collect();
+        // backed by [RawBytesULE<3>]
+        let zvec: ZeroVec<_> = uvchars.clone().into_iter().collect();
+
+        let ule_bytes = zvec.as_bytes();
+        let uvbytes;
+        unsafe {
+            let ptr = &uvchars[..] as *const _ as *const u8;
+            uvbytes = core::slice::from_raw_parts(ptr, ule_bytes.len());
+        }
+
+        // UnvalidatedChar is defined as little-endian, so this must be true on all platforms
+        // also asserts that to_unaligned/from_unaligned are no-ops
+        assert_eq!(uvbytes, ule_bytes);
+
+        assert_eq!(
+            &[119, 0, 0, 201, 3, 0, 135, 101, 0, 3, 17, 1, 67, 246, 1],
+            ule_bytes
+        );
+    }
+
+    #[test]
+    fn test_char_bake() {
+        databake::test_bake!(UnvalidatedChar, const: crate::ule::UnvalidatedChar::from_char('b'), zerovec);
+        // surrogate code point
+        databake::test_bake!(UnvalidatedChar, const: crate::ule::UnvalidatedChar::from_u24(55296u32), zerovec);
+    }
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-06-07 05:48:48 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-06-07 05:48:48 +0000
commit	ef24de24a82fe681581cc130f342363c47c0969a (patch)
tree	0d494f7e1a38b95c92426f58fe6eaa877303a86c /vendor/zerovec/src/ule
parent	Releasing progress-linux version 1.74.1+dfsg1-1~progress7.99u1. (diff)
download	rustc-ef24de24a82fe681581cc130f342363c47c0969a.tar.xz rustc-ef24de24a82fe681581cc130f342363c47c0969a.zip