diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
commit | 26a029d407be480d791972afb5975cf62c9360a6 (patch) | |
tree | f435a8308119effd964b339f76abb83a57c29483 /third_party/rust/zerovec/src/ule/chars.rs | |
parent | Initial commit. (diff) | |
download | firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip |
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/zerovec/src/ule/chars.rs')
-rw-r--r-- | third_party/rust/zerovec/src/ule/chars.rs | 190 |
1 files changed, 190 insertions, 0 deletions
diff --git a/third_party/rust/zerovec/src/ule/chars.rs b/third_party/rust/zerovec/src/ule/chars.rs new file mode 100644 index 0000000000..e4c1efc4ec --- /dev/null +++ b/third_party/rust/zerovec/src/ule/chars.rs @@ -0,0 +1,190 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +#![allow(clippy::upper_case_acronyms)] +//! ULE implementation for the `char` type. + +use super::*; +use crate::impl_ule_from_array; +use core::cmp::Ordering; +use core::convert::TryFrom; + +/// A u8 array of little-endian data corresponding to a Unicode scalar value. +/// +/// The bytes of a `CharULE` are guaranteed to represent a little-endian-encoded u32 that is a +/// valid `char` and can be converted without validation. +/// +/// # Examples +/// +/// Convert a `char` to a `CharULE` and back again: +/// +/// ``` +/// use zerovec::ule::{AsULE, CharULE, ULE}; +/// +/// let c1 = '𑄃'; +/// let ule = c1.to_unaligned(); +/// assert_eq!(CharULE::as_byte_slice(&[ule]), &[0x03, 0x11, 0x01]); +/// let c2 = char::from_unaligned(ule); +/// assert_eq!(c1, c2); +/// ``` +/// +/// Attempt to parse invalid bytes to a `CharULE`: +/// +/// ``` +/// use zerovec::ule::{CharULE, ULE}; +/// +/// let bytes: &[u8] = &[0xFF, 0xFF, 0xFF, 0xFF]; +/// CharULE::parse_byte_slice(bytes).expect_err("Invalid bytes"); +/// ``` +#[repr(transparent)] +#[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)] +pub struct CharULE([u8; 3]); + +impl CharULE { + /// Converts a [`char`] to a [`CharULE`]. This is equivalent to calling + /// [`AsULE::to_unaligned()`] + /// + /// See the type-level documentation for [`CharULE`] for more information. + #[inline] + pub const fn from_aligned(c: char) -> Self { + let [u0, u1, u2, _u3] = (c as u32).to_le_bytes(); + Self([u0, u1, u2]) + } + + impl_ule_from_array!(char, CharULE, Self([0; 3])); +} + +// Safety (based on the safety checklist on the ULE trait): +// 1. CharULE does not include any uninitialized or padding bytes. +// (achieved by `#[repr(transparent)]` on a type that satisfies this invariant) +// 2. CharULE is aligned to 1 byte. +// (achieved by `#[repr(transparent)]` on a type that satisfies this invariant) +// 3. The impl of validate_byte_slice() returns an error if any byte is not valid. +// 4. The impl of validate_byte_slice() returns an error if there are extra bytes. +// 5. The other ULE methods use the default impl. +// 6. CharULE byte equality is semantic equality +unsafe impl ULE for CharULE { + #[inline] + fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> { + if bytes.len() % 3 != 0 { + return Err(ZeroVecError::length::<Self>(bytes.len())); + } + // Validate the bytes + for chunk in bytes.chunks_exact(3) { + // TODO: Use slice::as_chunks() when stabilized + #[allow(clippy::indexing_slicing)] + // Won't panic because the chunks are always 3 bytes long + let u = u32::from_le_bytes([chunk[0], chunk[1], chunk[2], 0]); + char::try_from(u).map_err(|_| ZeroVecError::parse::<Self>())?; + } + Ok(()) + } +} + +impl AsULE for char { + type ULE = CharULE; + + #[inline] + fn to_unaligned(self) -> Self::ULE { + CharULE::from_aligned(self) + } + + #[inline] + fn from_unaligned(unaligned: Self::ULE) -> Self { + // Safe because the bytes of CharULE are defined to represent a valid Unicode scalar value. + unsafe { + Self::from_u32_unchecked(u32::from_le_bytes([ + unaligned.0[0], + unaligned.0[1], + unaligned.0[2], + 0, + ])) + } + } +} + +impl PartialOrd for CharULE { + fn partial_cmp(&self, other: &Self) -> Option<Ordering> { + Some(self.cmp(other)) + } +} + +impl Ord for CharULE { + fn cmp(&self, other: &Self) -> Ordering { + char::from_unaligned(*self).cmp(&char::from_unaligned(*other)) + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_from_array() { + const CHARS: [char; 2] = ['a', '🙃']; + const CHARS_ULE: [CharULE; 2] = CharULE::from_array(CHARS); + assert_eq!( + CharULE::as_byte_slice(&CHARS_ULE), + &[0x61, 0x00, 0x00, 0x43, 0xF6, 0x01] + ); + } + + #[test] + fn test_from_array_zst() { + const CHARS: [char; 0] = []; + const CHARS_ULE: [CharULE; 0] = CharULE::from_array(CHARS); + let bytes = CharULE::as_byte_slice(&CHARS_ULE); + let empty: &[u8] = &[]; + assert_eq!(bytes, empty); + } + + #[test] + fn test_parse() { + // 1-byte, 2-byte, 3-byte, and two 4-byte character in UTF-8 (not as relevant in UTF-32) + let chars = ['w', 'ω', '文', '𑄃', '🙃']; + let char_ules: Vec<CharULE> = chars.iter().copied().map(char::to_unaligned).collect(); + let char_bytes: &[u8] = CharULE::as_byte_slice(&char_ules); + + // Check parsing + let parsed_ules: &[CharULE] = CharULE::parse_byte_slice(char_bytes).unwrap(); + assert_eq!(char_ules, parsed_ules); + let parsed_chars: Vec<char> = parsed_ules + .iter() + .copied() + .map(char::from_unaligned) + .collect(); + assert_eq!(&chars, parsed_chars.as_slice()); + + // Compare to golden expected data + assert_eq!( + &[119, 0, 0, 201, 3, 0, 135, 101, 0, 3, 17, 1, 67, 246, 1], + char_bytes + ); + } + + #[test] + fn test_failures() { + // 119 and 120 are valid, but not 0xD800 (high surrogate) + let u32s = [119, 0xD800, 120]; + let u32_ules: Vec<RawBytesULE<4>> = u32s + .iter() + .copied() + .map(<u32 as AsULE>::to_unaligned) + .collect(); + let u32_bytes: &[u8] = RawBytesULE::<4>::as_byte_slice(&u32_ules); + let parsed_ules_result = CharULE::parse_byte_slice(u32_bytes); + assert!(parsed_ules_result.is_err()); + + // 0x20FFFF is out of range for a char + let u32s = [0x20FFFF]; + let u32_ules: Vec<RawBytesULE<4>> = u32s + .iter() + .copied() + .map(<u32 as AsULE>::to_unaligned) + .collect(); + let u32_bytes: &[u8] = RawBytesULE::<4>::as_byte_slice(&u32_ules); + let parsed_ules_result = CharULE::parse_byte_slice(u32_bytes); + assert!(parsed_ules_result.is_err()); + } +} |