// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). #![allow(clippy::upper_case_acronyms)] //! ULE implementation for Plain Old Data types, including all sized integers. use super::*; use crate::ZeroSlice; use core::num::{NonZeroI8, NonZeroU8}; /// A u8 array of little-endian data with infallible conversions to and from &[u8]. #[repr(transparent)] #[derive(Debug, PartialEq, Eq, Clone, Copy, PartialOrd, Ord, Hash)] #[allow(clippy::exhaustive_structs)] // newtype pub struct RawBytesULE(pub [u8; N]); macro_rules! impl_byte_slice_size { ($unsigned:ty, $size:literal) => { impl From<[u8; $size]> for RawBytesULE<$size> { #[inline] fn from(le_bytes: [u8; $size]) -> Self { Self(le_bytes) } } impl RawBytesULE<$size> { #[inline] pub fn as_bytes(&self) -> &[u8] { &self.0 } } // Safety (based on the safety checklist on the ULE trait): // 1. RawBytesULE does not include any uninitialized or padding bytes. // (achieved by `#[repr(transparent)]` on a type that satisfies this invariant) // 2. RawBytesULE is aligned to 1 byte. // (achieved by `#[repr(transparent)]` on a type that satisfies this invariant) // 3. The impl of validate_byte_slice() returns an error if any byte is not valid (never). // 4. The impl of validate_byte_slice() returns an error if there are leftover bytes. // 5. The other ULE methods use the default impl. // 6. RawBytesULE byte equality is semantic equality unsafe impl ULE for RawBytesULE<$size> { #[inline] fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> { if bytes.len() % $size == 0 { // Safe because Self is transparent over [u8; $size] Ok(()) } else { Err(ZeroVecError::length::(bytes.len())) } } } impl RawBytesULE<$size> { #[inline] pub fn from_byte_slice_unchecked_mut(bytes: &mut [u8]) -> &mut [Self] { let data = bytes.as_mut_ptr(); let len = bytes.len() / $size; // Safe because Self is transparent over [u8; $size] unsafe { core::slice::from_raw_parts_mut(data as *mut Self, len) } } /// Gets this RawBytesULE as an unsigned int. This is equivalent to calling /// [AsULE::from_unaligned()] on the appropriately sized type. #[inline] pub fn as_unsigned_int(&self) -> $unsigned { <$unsigned as $crate::ule::AsULE>::from_unaligned(*self) } /// Convert an array of native-endian aligned integers to an array of RawBytesULE. pub const fn from_array(arr: [$unsigned; N]) -> [Self; N] { let mut result = [RawBytesULE([0; $size]); N]; let mut i = 0; // Won't panic because i < N and arr has length N #[allow(clippy::indexing_slicing)] while i < N { result[i].0 = arr[i].to_le_bytes(); i += 1; } result } } }; } macro_rules! impl_const_constructors { ($base:ty, $size:literal) => { impl ZeroSlice<$base> { /// This function can be used for constructing ZeroVecs in a const context, avoiding /// parsing checks. /// /// This cannot be generic over T because of current limitations in `const`, but if /// this method is needed in a non-const context, check out [`ZeroSlice::parse_byte_slice()`] /// instead. /// /// See [`ZeroSlice::cast()`] for an example. pub const fn try_from_bytes(bytes: &[u8]) -> Result<&Self, ZeroVecError> { let len = bytes.len(); #[allow(clippy::modulo_one)] if len % $size == 0 { Ok(unsafe { Self::from_bytes_unchecked(bytes) }) } else { Err(ZeroVecError::InvalidLength { ty: concat!(""), len, }) } } } }; } macro_rules! impl_byte_slice_type { ($type:ty, $size:literal) => { impl From<$type> for RawBytesULE<$size> { #[inline] fn from(value: $type) -> Self { Self(value.to_le_bytes()) } } impl AsULE for $type { type ULE = RawBytesULE<$size>; #[inline] fn to_unaligned(self) -> Self::ULE { RawBytesULE(self.to_le_bytes()) } #[inline] fn from_unaligned(unaligned: Self::ULE) -> Self { <$type>::from_le_bytes(unaligned.0) } } // EqULE is true because $type and RawBytesULE<$size> // have the same byte sequence on little-endian unsafe impl EqULE for $type {} }; } impl_byte_slice_size!(u16, 2); impl_byte_slice_size!(u32, 4); impl_byte_slice_size!(u64, 8); impl_byte_slice_size!(u128, 16); impl_byte_slice_type!(u16, 2); impl_byte_slice_type!(u32, 4); impl_byte_slice_type!(u64, 8); impl_byte_slice_type!(u128, 16); impl_byte_slice_type!(i16, 2); impl_byte_slice_type!(i32, 4); impl_byte_slice_type!(i64, 8); impl_byte_slice_type!(i128, 16); impl_const_constructors!(u8, 1); impl_const_constructors!(u16, 2); impl_const_constructors!(u32, 4); impl_const_constructors!(u64, 8); impl_const_constructors!(u128, 16); // Note: The f32 and f64 const constructors currently have limited use because // `f32::to_le_bytes` is not yet const. impl_const_constructors!(bool, 1); // Safety (based on the safety checklist on the ULE trait): // 1. u8 does not include any uninitialized or padding bytes. // 2. u8 is aligned to 1 byte. // 3. The impl of validate_byte_slice() returns an error if any byte is not valid (never). // 4. The impl of validate_byte_slice() returns an error if there are leftover bytes (never). // 5. The other ULE methods use the default impl. // 6. u8 byte equality is semantic equality unsafe impl ULE for u8 { #[inline] fn validate_byte_slice(_bytes: &[u8]) -> Result<(), ZeroVecError> { Ok(()) } } impl AsULE for u8 { type ULE = Self; #[inline] fn to_unaligned(self) -> Self::ULE { self } #[inline] fn from_unaligned(unaligned: Self::ULE) -> Self { unaligned } } // EqULE is true because u8 is its own ULE. unsafe impl EqULE for u8 {} // Safety (based on the safety checklist on the ULE trait): // 1. NonZeroU8 does not include any uninitialized or padding bytes. // 2. NonZeroU8 is aligned to 1 byte. // 3. The impl of validate_byte_slice() returns an error if any byte is not valid (0x00). // 4. The impl of validate_byte_slice() returns an error if there are leftover bytes (never). // 5. The other ULE methods use the default impl. // 6. NonZeroU8 byte equality is semantic equality unsafe impl ULE for NonZeroU8 { #[inline] fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> { bytes.iter().try_for_each(|b| { if *b == 0x00 { Err(ZeroVecError::parse::()) } else { Ok(()) } }) } } impl AsULE for NonZeroU8 { type ULE = Self; #[inline] fn to_unaligned(self) -> Self::ULE { self } #[inline] fn from_unaligned(unaligned: Self::ULE) -> Self { unaligned } } unsafe impl EqULE for NonZeroU8 {} impl NicheBytes<1> for NonZeroU8 { const NICHE_BIT_PATTERN: [u8; 1] = [0x00]; } // Safety (based on the safety checklist on the ULE trait): // 1. i8 does not include any uninitialized or padding bytes. // 2. i8 is aligned to 1 byte. // 3. The impl of validate_byte_slice() returns an error if any byte is not valid (never). // 4. The impl of validate_byte_slice() returns an error if there are leftover bytes (never). // 5. The other ULE methods use the default impl. // 6. i8 byte equality is semantic equality unsafe impl ULE for i8 { #[inline] fn validate_byte_slice(_bytes: &[u8]) -> Result<(), ZeroVecError> { Ok(()) } } impl AsULE for i8 { type ULE = Self; #[inline] fn to_unaligned(self) -> Self::ULE { self } #[inline] fn from_unaligned(unaligned: Self::ULE) -> Self { unaligned } } // EqULE is true because i8 is its own ULE. unsafe impl EqULE for i8 {} impl AsULE for NonZeroI8 { type ULE = NonZeroU8; #[inline] fn to_unaligned(self) -> Self::ULE { // Safety: NonZeroU8 and NonZeroI8 have same size unsafe { core::mem::transmute(self) } } #[inline] fn from_unaligned(unaligned: Self::ULE) -> Self { // Safety: NonZeroU8 and NonZeroI8 have same size unsafe { core::mem::transmute(unaligned) } } } // These impls are actually safe and portable due to Rust always using IEEE 754, see the documentation // on f32::from_bits: https://doc.rust-lang.org/stable/std/primitive.f32.html#method.from_bits // // The only potential problem is that some older platforms treat signaling NaNs differently. This is // still quite portable, signalingness is not typically super important. impl AsULE for f32 { type ULE = RawBytesULE<4>; #[inline] fn to_unaligned(self) -> Self::ULE { self.to_bits().to_unaligned() } #[inline] fn from_unaligned(unaligned: Self::ULE) -> Self { Self::from_bits(u32::from_unaligned(unaligned)) } } impl AsULE for f64 { type ULE = RawBytesULE<8>; #[inline] fn to_unaligned(self) -> Self::ULE { self.to_bits().to_unaligned() } #[inline] fn from_unaligned(unaligned: Self::ULE) -> Self { Self::from_bits(u64::from_unaligned(unaligned)) } } // The from_bits documentation mentions that they have identical byte representations to integers // and EqULE only cares about LE systems unsafe impl EqULE for f32 {} unsafe impl EqULE for f64 {} // The bool impl is not as efficient as it could be // We can, in the future, have https://github.com/unicode-org/icu4x/blob/main/utils/zerovec/design_doc.md#bitpacking // for better bitpacking // Safety (based on the safety checklist on the ULE trait): // 1. bool does not include any uninitialized or padding bytes (the remaining 7 bytes in bool are by definition zero) // 2. bool is aligned to 1 byte. // 3. The impl of validate_byte_slice() returns an error if any byte is not valid (bytes that are not 0 or 1). // 4. The impl of validate_byte_slice() returns an error if there are leftover bytes (never). // 5. The other ULE methods use the default impl. // 6. bool byte equality is semantic equality unsafe impl ULE for bool { #[inline] fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> { for byte in bytes { // https://doc.rust-lang.org/reference/types/boolean.html // Rust booleans are always size 1, align 1 values with valid bit patterns 0x0 or 0x1 if *byte > 1 { return Err(ZeroVecError::parse::()); } } Ok(()) } } impl AsULE for bool { type ULE = Self; #[inline] fn to_unaligned(self) -> Self::ULE { self } #[inline] fn from_unaligned(unaligned: Self::ULE) -> Self { unaligned } } // EqULE is true because bool is its own ULE. unsafe impl EqULE for bool {}