Merging upstream version 1.67.1+dfsg1.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-17 12:18:32 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-17 12:18:32 +0000
commit: 4547b622d8d29df964fa2914213088b148c498fc (patch)
tree: 9fc6b25f3c3add6b745be9a2400a6e96140046e9 /vendor/tinystr/src/int_ops.rs
parent: Releasing progress-linux version 1.66.0+dfsg1-1~progress7.99u1. (diff)
download: rustc-4547b622d8d29df964fa2914213088b148c498fc.tar.xz
rustc-4547b622d8d29df964fa2914213088b148c498fc.zip
1 files changed, 315 insertions, 0 deletions
diff --git a/vendor/tinystr/src/int_ops.rs b/vendor/tinystr/src/int_ops.rs
new file mode 100644
index 000000000..102b052f2
--- /dev/null
+++ b/vendor/tinystr/src/int_ops.rs
@@ -0,0 +1,315 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use crate::asciibyte::AsciiByte;
+
+/// Internal helper struct that performs operations on aligned integers.
+/// Supports strings up to 4 bytes long.
+#[repr(transparent)]
+pub struct Aligned4(u32);
+
+impl Aligned4 {
+    /// # Panics
+    /// Panics if N is greater than 4
+    #[inline]
+    pub const fn from_bytes<const N: usize>(src: &[u8; N]) -> Self {
+        let mut bytes = [0; 4];
+        let mut i = 0;
+        // The function documentation defines when panics may occur
+        #[allow(clippy::indexing_slicing)]
+        while i < N {
+            bytes[i] = src[i];
+            i += 1;
+        }
+        Self(u32::from_ne_bytes(bytes))
+    }
+
+    #[inline]
+    pub const fn from_ascii_bytes<const N: usize>(src: &[AsciiByte; N]) -> Self {
+        Self::from_bytes::<N>(unsafe { core::mem::transmute(src) })
+    }
+
+    #[inline]
+    pub const fn to_bytes(&self) -> [u8; 4] {
+        self.0.to_ne_bytes()
+    }
+
+    #[inline]
+    pub const fn to_ascii_bytes(&self) -> [AsciiByte; 4] {
+        unsafe { core::mem::transmute(self.to_bytes()) }
+    }
+
+    pub const fn len(&self) -> usize {
+        let word = self.0;
+        #[cfg(target_endian = "little")]
+        let len = (4 - word.leading_zeros() / 8) as usize;
+        #[cfg(target_endian = "big")]
+        let len = (4 - word.trailing_zeros() / 8) as usize;
+        len
+    }
+
+    pub const fn is_ascii_alphabetic(&self) -> bool {
+        let word = self.0;
+        // Each of the following bitmasks set *the high bit* (0x8) to 0 for valid and 1 for invalid.
+        // `mask` sets all NUL bytes to 0.
+        let mask = (word + 0x7f7f_7f7f) & 0x8080_8080;
+        // `lower` converts the string to lowercase. It may also change the value of non-alpha
+        // characters, but this does not matter for the alphabetic test that follows.
+        let lower = word | 0x2020_2020;
+        // `alpha` sets all alphabetic bytes to 0. We only need check for lowercase characters.
+        let alpha = !(lower + 0x1f1f_1f1f) | (lower + 0x0505_0505);
+        // The overall string is valid if every character passes at least one test.
+        // We performed two tests here: non-NUL (`mask`) and alphabetic (`alpha`).
+        (alpha & mask) == 0
+    }
+
+    pub const fn is_ascii_alphanumeric(&self) -> bool {
+        let word = self.0;
+        // See explanatory comments in is_ascii_alphabetic
+        let mask = (word + 0x7f7f_7f7f) & 0x8080_8080;
+        let numeric = !(word + 0x5050_5050) | (word + 0x4646_4646);
+        let lower = word | 0x2020_2020;
+        let alpha = !(lower + 0x1f1f_1f1f) | (lower + 0x0505_0505);
+        (alpha & numeric & mask) == 0
+    }
+
+    pub const fn is_ascii_numeric(&self) -> bool {
+        let word = self.0;
+        // See explanatory comments in is_ascii_alphabetic
+        let mask = (word + 0x7f7f_7f7f) & 0x8080_8080;
+        let numeric = !(word + 0x5050_5050) | (word + 0x4646_4646);
+        (numeric & mask) == 0
+    }
+
+    pub const fn is_ascii_lowercase(&self) -> bool {
+        let word = self.0;
+        // For efficiency, this function tests for an invalid string rather than a valid string.
+        // A string is ASCII lowercase iff it contains no uppercase ASCII characters.
+        // `invalid_case` sets all uppercase ASCII characters to 0 and all others to 1.
+        let invalid_case = !(word + 0x3f3f_3f3f) | (word + 0x2525_2525);
+        // The string is valid if it contains no invalid characters (if all high bits are 1).
+        (invalid_case & 0x8080_8080) == 0x8080_8080
+    }
+
+    pub const fn is_ascii_titlecase(&self) -> bool {
+        let word = self.0;
+        // See explanatory comments in is_ascii_lowercase
+        let invalid_case = if cfg!(target_endian = "little") {
+            !(word + 0x3f3f_3f1f) | (word + 0x2525_2505)
+        } else {
+            !(word + 0x1f3f_3f3f) | (word + 0x0525_2525)
+        };
+        (invalid_case & 0x8080_8080) == 0x8080_8080
+    }
+
+    pub const fn is_ascii_uppercase(&self) -> bool {
+        let word = self.0;
+        // See explanatory comments in is_ascii_lowercase
+        let invalid_case = !(word + 0x1f1f_1f1f) | (word + 0x0505_0505);
+        (invalid_case & 0x8080_8080) == 0x8080_8080
+    }
+
+    pub const fn is_ascii_alphabetic_lowercase(&self) -> bool {
+        let word = self.0;
+        // `mask` sets all NUL bytes to 0.
+        let mask = (word + 0x7f7f_7f7f) & 0x8080_8080;
+        // `lower_alpha` sets all lowercase ASCII characters to 0 and all others to 1.
+        let lower_alpha = !(word + 0x1f1f_1f1f) | (word + 0x0505_0505);
+        // The overall string is valid if every character passes at least one test.
+        // We performed two tests here: non-NUL (`mask`) and lowercase ASCII character (`alpha`).
+        (lower_alpha & mask) == 0
+    }
+
+    pub const fn is_ascii_alphabetic_titlecase(&self) -> bool {
+        let word = self.0;
+        // See explanatory comments in is_ascii_alphabetic_lowercase
+        let mask = (word + 0x7f7f_7f7f) & 0x8080_8080;
+        let title_case = if cfg!(target_endian = "little") {
+            !(word + 0x1f1f_1f3f) | (word + 0x0505_0525)
+        } else {
+            !(word + 0x3f1f_1f1f) | (word + 0x2505_0505)
+        };
+        (title_case & mask) == 0
+    }
+
+    pub const fn is_ascii_alphabetic_uppercase(&self) -> bool {
+        let word = self.0;
+        // See explanatory comments in is_ascii_alphabetic_lowercase
+        let mask = (word + 0x7f7f_7f7f) & 0x8080_8080;
+        let upper_alpha = !(word + 0x3f3f_3f3f) | (word + 0x2525_2525);
+        (upper_alpha & mask) == 0
+    }
+
+    pub const fn to_ascii_lowercase(&self) -> Self {
+        let word = self.0;
+        let result = word | (((word + 0x3f3f_3f3f) & !(word + 0x2525_2525) & 0x8080_8080) >> 2);
+        Self(result)
+    }
+
+    pub const fn to_ascii_titlecase(&self) -> Self {
+        let word = self.0.to_le();
+        let mask = ((word + 0x3f3f_3f1f) & !(word + 0x2525_2505) & 0x8080_8080) >> 2;
+        let result = (word | mask) & !(0x20 & mask);
+        Self(u32::from_le(result))
+    }
+
+    pub const fn to_ascii_uppercase(&self) -> Self {
+        let word = self.0;
+        let result = word & !(((word + 0x1f1f_1f1f) & !(word + 0x0505_0505) & 0x8080_8080) >> 2);
+        Self(result)
+    }
+}
+
+/// Internal helper struct that performs operations on aligned integers.
+/// Supports strings up to 8 bytes long.
+#[repr(transparent)]
+pub struct Aligned8(u64);
+
+impl Aligned8 {
+    /// # Panics
+    /// Panics if N is greater than 8
+    #[inline]
+    pub const fn from_bytes<const N: usize>(src: &[u8; N]) -> Self {
+        let mut bytes = [0; 8];
+        let mut i = 0;
+        // The function documentation defines when panics may occur
+        #[allow(clippy::indexing_slicing)]
+        while i < N {
+            bytes[i] = src[i];
+            i += 1;
+        }
+        Self(u64::from_ne_bytes(bytes))
+    }
+
+    #[inline]
+    pub const fn from_ascii_bytes<const N: usize>(src: &[AsciiByte; N]) -> Self {
+        Self::from_bytes::<N>(unsafe { core::mem::transmute(src) })
+    }
+
+    #[inline]
+    pub const fn to_bytes(&self) -> [u8; 8] {
+        self.0.to_ne_bytes()
+    }
+
+    #[inline]
+    pub const fn to_ascii_bytes(&self) -> [AsciiByte; 8] {
+        unsafe { core::mem::transmute(self.to_bytes()) }
+    }
+
+    pub const fn len(&self) -> usize {
+        let word = self.0;
+        #[cfg(target_endian = "little")]
+        let len = (8 - word.leading_zeros() / 8) as usize;
+        #[cfg(target_endian = "big")]
+        let len = (8 - word.trailing_zeros() / 8) as usize;
+        len
+    }
+
+    pub const fn is_ascii_alphabetic(&self) -> bool {
+        let word = self.0;
+        let mask = (word + 0x7f7f_7f7f_7f7f_7f7f) & 0x8080_8080_8080_8080;
+        let lower = word | 0x2020_2020_2020_2020;
+        let alpha = !(lower + 0x1f1f_1f1f_1f1f_1f1f) | (lower + 0x0505_0505_0505_0505);
+        (alpha & mask) == 0
+    }
+
+    pub const fn is_ascii_alphanumeric(&self) -> bool {
+        let word = self.0;
+        let mask = (word + 0x7f7f_7f7f_7f7f_7f7f) & 0x8080_8080_8080_8080;
+        let numeric = !(word + 0x5050_5050_5050_5050) | (word + 0x4646_4646_4646_4646);
+        let lower = word | 0x2020_2020_2020_2020;
+        let alpha = !(lower + 0x1f1f_1f1f_1f1f_1f1f) | (lower + 0x0505_0505_0505_0505);
+        (alpha & numeric & mask) == 0
+    }
+
+    pub const fn is_ascii_numeric(&self) -> bool {
+        let word = self.0;
+        let mask = (word + 0x7f7f_7f7f_7f7f_7f7f) & 0x8080_8080_8080_8080;
+        let numeric = !(word + 0x5050_5050_5050_5050) | (word + 0x4646_4646_4646_4646);
+        (numeric & mask) == 0
+    }
+
+    pub const fn is_ascii_lowercase(&self) -> bool {
+        let word = self.0;
+        let invalid_case = !(word + 0x3f3f_3f3f_3f3f_3f3f) | (word + 0x2525_2525_2525_2525);
+        (invalid_case & 0x8080_8080_8080_8080) == 0x8080_8080_8080_8080
+    }
+
+    pub const fn is_ascii_titlecase(&self) -> bool {
+        let word = self.0;
+        let invalid_case = if cfg!(target_endian = "little") {
+            !(word + 0x3f3f_3f3f_3f3f_3f1f) | (word + 0x2525_2525_2525_2505)
+        } else {
+            !(word + 0x1f3f_3f3f_3f3f_3f3f) | (word + 0x0525_2525_2525_2525)
+        };
+        (invalid_case & 0x8080_8080_8080_8080) == 0x8080_8080_8080_8080
+    }
+
+    pub const fn is_ascii_uppercase(&self) -> bool {
+        let word = self.0;
+        let invalid_case = !(word + 0x1f1f_1f1f_1f1f_1f1f) | (word + 0x0505_0505_0505_0505);
+        (invalid_case & 0x8080_8080_8080_8080) == 0x8080_8080_8080_8080
+    }
+
+    pub const fn is_ascii_alphabetic_lowercase(&self) -> bool {
+        let word = self.0;
+        // `mask` sets all NUL bytes to 0.
+        let mask = (word + 0x7f7f_7f7f_7f7f_7f7f) & 0x8080_8080_8080_8080;
+        // `lower_alpha` sets all lowercase ASCII characters to 0 and all others to 1.
+        let lower_alpha = !(word + 0x1f1f_1f1f_1f1f_1f1f) | (word + 0x0505_0505_0505_0505);
+        // The overall string is valid if every character passes at least one test.
+        // We performed two tests here: non-NUL (`mask`) and lowercase ASCII character (`alpha`).
+        (lower_alpha & mask) == 0
+    }
+
+    pub const fn is_ascii_alphabetic_titlecase(&self) -> bool {
+        let word = self.0;
+        // See explanatory comments in is_ascii_alphabetic_lowercase
+        let mask = (word + 0x7f7f_7f7f_7f7f_7f7f) & 0x8080_8080_8080_8080;
+        let title_case = if cfg!(target_endian = "little") {
+            !(word + 0x1f1f_1f1f_1f1f_1f3f) | (word + 0x0505_0505_0505_0525)
+        } else {
+            !(word + 0x3f1f_1f1f_1f1f_1f1f) | (word + 0x2505_0505_0505_0505)
+        };
+        (title_case & mask) == 0
+    }
+
+    pub const fn is_ascii_alphabetic_uppercase(&self) -> bool {
+        let word = self.0;
+        // See explanatory comments in is_ascii_alphabetic_lowercase
+        let mask = (word + 0x7f7f_7f7f_7f7f_7f7f) & 0x8080_8080_8080_8080;
+        let upper_alpha = !(word + 0x3f3f_3f3f_3f3f_3f3f) | (word + 0x2525_2525_2525_2525);
+        (upper_alpha & mask) == 0
+    }
+
+    pub const fn to_ascii_lowercase(&self) -> Self {
+        let word = self.0;
+        let result = word
+            | (((word + 0x3f3f_3f3f_3f3f_3f3f)
+                & !(word + 0x2525_2525_2525_2525)
+                & 0x8080_8080_8080_8080)
+                >> 2);
+        Self(result)
+    }
+
+    pub const fn to_ascii_titlecase(&self) -> Self {
+        let word = self.0.to_le();
+        let mask = ((word + 0x3f3f_3f3f_3f3f_3f1f)
+            & !(word + 0x2525_2525_2525_2505)
+            & 0x8080_8080_8080_8080)
+            >> 2;
+        let result = (word | mask) & !(0x20 & mask);
+        Self(u64::from_le(result))
+    }
+
+    pub const fn to_ascii_uppercase(&self) -> Self {
+        let word = self.0;
+        let result = word
+            & !(((word + 0x1f1f_1f1f_1f1f_1f1f)
+                & !(word + 0x0505_0505_0505_0505)
+                & 0x8080_8080_8080_8080)
+                >> 2);
+        Self(result)
+    }
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-17 12:18:32 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-17 12:18:32 +0000
commit	4547b622d8d29df964fa2914213088b148c498fc (patch)
tree	9fc6b25f3c3add6b745be9a2400a6e96140046e9 /vendor/tinystr/src/int_ops.rs
parent	Releasing progress-linux version 1.66.0+dfsg1-1~progress7.99u1. (diff)
download	rustc-4547b622d8d29df964fa2914213088b148c498fc.tar.xz rustc-4547b622d8d29df964fa2914213088b148c498fc.zip