Adding upstream version 124.0.1.upstream/124.0.1

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-19 00:47:55 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-19 00:47:55 +0000
commit: 26a029d407be480d791972afb5975cf62c9360a6 (patch)
tree: f435a8308119effd964b339f76abb83a57c29483 /third_party/rust/zerovec/src/ule/chars.rs
parent: Initial commit. (diff)
download: firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz
firefox-26a029d407be480d791972afb5975cf62c9360a6.zip
1 files changed, 190 insertions, 0 deletions
diff --git a/third_party/rust/zerovec/src/ule/chars.rs b/third_party/rust/zerovec/src/ule/chars.rs
new file mode 100644
index 0000000000..e4c1efc4ec
--- /dev/null
+++ b/third_party/rust/zerovec/src/ule/chars.rs
@@ -0,0 +1,190 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+#![allow(clippy::upper_case_acronyms)]
+//! ULE implementation for the `char` type.
+
+use super::*;
+use crate::impl_ule_from_array;
+use core::cmp::Ordering;
+use core::convert::TryFrom;
+
+/// A u8 array of little-endian data corresponding to a Unicode scalar value.
+///
+/// The bytes of a `CharULE` are guaranteed to represent a little-endian-encoded u32 that is a
+/// valid `char` and can be converted without validation.
+///
+/// # Examples
+///
+/// Convert a `char` to a `CharULE` and back again:
+///
+/// ```
+/// use zerovec::ule::{AsULE, CharULE, ULE};
+///
+/// let c1 = '𑄃';
+/// let ule = c1.to_unaligned();
+/// assert_eq!(CharULE::as_byte_slice(&[ule]), &[0x03, 0x11, 0x01]);
+/// let c2 = char::from_unaligned(ule);
+/// assert_eq!(c1, c2);
+/// ```
+///
+/// Attempt to parse invalid bytes to a `CharULE`:
+///
+/// ```
+/// use zerovec::ule::{CharULE, ULE};
+///
+/// let bytes: &[u8] = &[0xFF, 0xFF, 0xFF, 0xFF];
+/// CharULE::parse_byte_slice(bytes).expect_err("Invalid bytes");
+/// ```
+#[repr(transparent)]
+#[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)]
+pub struct CharULE([u8; 3]);
+
+impl CharULE {
+    /// Converts a [`char`] to a [`CharULE`]. This is equivalent to calling
+    /// [`AsULE::to_unaligned()`]
+    ///
+    /// See the type-level documentation for [`CharULE`] for more information.
+    #[inline]
+    pub const fn from_aligned(c: char) -> Self {
+        let [u0, u1, u2, _u3] = (c as u32).to_le_bytes();
+        Self([u0, u1, u2])
+    }
+
+    impl_ule_from_array!(char, CharULE, Self([0; 3]));
+}
+
+// Safety (based on the safety checklist on the ULE trait):
+//  1. CharULE does not include any uninitialized or padding bytes.
+//     (achieved by `#[repr(transparent)]` on a type that satisfies this invariant)
+//  2. CharULE is aligned to 1 byte.
+//     (achieved by `#[repr(transparent)]` on a type that satisfies this invariant)
+//  3. The impl of validate_byte_slice() returns an error if any byte is not valid.
+//  4. The impl of validate_byte_slice() returns an error if there are extra bytes.
+//  5. The other ULE methods use the default impl.
+//  6. CharULE byte equality is semantic equality
+unsafe impl ULE for CharULE {
+    #[inline]
+    fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> {
+        if bytes.len() % 3 != 0 {
+            return Err(ZeroVecError::length::<Self>(bytes.len()));
+        }
+        // Validate the bytes
+        for chunk in bytes.chunks_exact(3) {
+            // TODO: Use slice::as_chunks() when stabilized
+            #[allow(clippy::indexing_slicing)]
+            // Won't panic because the chunks are always 3 bytes long
+            let u = u32::from_le_bytes([chunk[0], chunk[1], chunk[2], 0]);
+            char::try_from(u).map_err(|_| ZeroVecError::parse::<Self>())?;
+        }
+        Ok(())
+    }
+}
+
+impl AsULE for char {
+    type ULE = CharULE;
+
+    #[inline]
+    fn to_unaligned(self) -> Self::ULE {
+        CharULE::from_aligned(self)
+    }
+
+    #[inline]
+    fn from_unaligned(unaligned: Self::ULE) -> Self {
+        // Safe because the bytes of CharULE are defined to represent a valid Unicode scalar value.
+        unsafe {
+            Self::from_u32_unchecked(u32::from_le_bytes([
+                unaligned.0[0],
+                unaligned.0[1],
+                unaligned.0[2],
+                0,
+            ]))
+        }
+    }
+}
+
+impl PartialOrd for CharULE {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl Ord for CharULE {
+    fn cmp(&self, other: &Self) -> Ordering {
+        char::from_unaligned(*self).cmp(&char::from_unaligned(*other))
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    #[test]
+    fn test_from_array() {
+        const CHARS: [char; 2] = ['a', '🙃'];
+        const CHARS_ULE: [CharULE; 2] = CharULE::from_array(CHARS);
+        assert_eq!(
+            CharULE::as_byte_slice(&CHARS_ULE),
+            &[0x61, 0x00, 0x00, 0x43, 0xF6, 0x01]
+        );
+    }
+
+    #[test]
+    fn test_from_array_zst() {
+        const CHARS: [char; 0] = [];
+        const CHARS_ULE: [CharULE; 0] = CharULE::from_array(CHARS);
+        let bytes = CharULE::as_byte_slice(&CHARS_ULE);
+        let empty: &[u8] = &[];
+        assert_eq!(bytes, empty);
+    }
+
+    #[test]
+    fn test_parse() {
+        // 1-byte, 2-byte, 3-byte, and two 4-byte character in UTF-8 (not as relevant in UTF-32)
+        let chars = ['w', 'ω', '文', '𑄃', '🙃'];
+        let char_ules: Vec<CharULE> = chars.iter().copied().map(char::to_unaligned).collect();
+        let char_bytes: &[u8] = CharULE::as_byte_slice(&char_ules);
+
+        // Check parsing
+        let parsed_ules: &[CharULE] = CharULE::parse_byte_slice(char_bytes).unwrap();
+        assert_eq!(char_ules, parsed_ules);
+        let parsed_chars: Vec<char> = parsed_ules
+            .iter()
+            .copied()
+            .map(char::from_unaligned)
+            .collect();
+        assert_eq!(&chars, parsed_chars.as_slice());
+
+        // Compare to golden expected data
+        assert_eq!(
+            &[119, 0, 0, 201, 3, 0, 135, 101, 0, 3, 17, 1, 67, 246, 1],
+            char_bytes
+        );
+    }
+
+    #[test]
+    fn test_failures() {
+        // 119 and 120 are valid, but not 0xD800 (high surrogate)
+        let u32s = [119, 0xD800, 120];
+        let u32_ules: Vec<RawBytesULE<4>> = u32s
+            .iter()
+            .copied()
+            .map(<u32 as AsULE>::to_unaligned)
+            .collect();
+        let u32_bytes: &[u8] = RawBytesULE::<4>::as_byte_slice(&u32_ules);
+        let parsed_ules_result = CharULE::parse_byte_slice(u32_bytes);
+        assert!(parsed_ules_result.is_err());
+
+        // 0x20FFFF is out of range for a char
+        let u32s = [0x20FFFF];
+        let u32_ules: Vec<RawBytesULE<4>> = u32s
+            .iter()
+            .copied()
+            .map(<u32 as AsULE>::to_unaligned)
+            .collect();
+        let u32_bytes: &[u8] = RawBytesULE::<4>::as_byte_slice(&u32_ules);
+        let parsed_ules_result = CharULE::parse_byte_slice(u32_bytes);
+        assert!(parsed_ules_result.is_err());
+    }
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-19 00:47:55 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-19 00:47:55 +0000
commit	26a029d407be480d791972afb5975cf62c9360a6 (patch)
tree	f435a8308119effd964b339f76abb83a57c29483 /third_party/rust/zerovec/src/ule/chars.rs
parent	Initial commit. (diff)
download	firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip