diff options
Diffstat (limited to 'vendor/tinystr/src')
-rw-r--r-- | vendor/tinystr/src/helpers.rs | 32 | ||||
-rw-r--r-- | vendor/tinystr/src/lib.rs | 105 | ||||
-rw-r--r-- | vendor/tinystr/src/tinystr16.rs | 327 | ||||
-rw-r--r-- | vendor/tinystr/src/tinystr4.rs | 299 | ||||
-rw-r--r-- | vendor/tinystr/src/tinystr8.rs | 319 | ||||
-rw-r--r-- | vendor/tinystr/src/tinystrauto.rs | 72 |
6 files changed, 1154 insertions, 0 deletions
diff --git a/vendor/tinystr/src/helpers.rs b/vendor/tinystr/src/helpers.rs new file mode 100644 index 000000000..c3d17d028 --- /dev/null +++ b/vendor/tinystr/src/helpers.rs @@ -0,0 +1,32 @@ +use std::num::NonZeroU32; +use std::ptr::copy_nonoverlapping; + +use super::Error; + +#[cfg(any(feature = "std", test))] +pub use std::string::String; + +#[cfg(all(not(feature = "std"), not(test)))] +extern crate alloc; + +#[cfg(all(not(feature = "std"), not(test)))] +pub use alloc::string::String; + +#[inline(always)] +pub(crate) unsafe fn make_4byte_bytes( + bytes: &[u8], + len: usize, + mask: u32, +) -> Result<NonZeroU32, Error> { + // Mask is always supplied as little-endian. + let mask = u32::from_le(mask); + let mut word: u32 = 0; + copy_nonoverlapping(bytes.as_ptr(), &mut word as *mut u32 as *mut u8, len); + if (word & mask) != 0 { + return Err(Error::NonAscii); + } + if ((mask - word) & mask) != 0 { + return Err(Error::InvalidNull); + } + Ok(NonZeroU32::new_unchecked(word)) +} diff --git a/vendor/tinystr/src/lib.rs b/vendor/tinystr/src/lib.rs new file mode 100644 index 000000000..6f4c59658 --- /dev/null +++ b/vendor/tinystr/src/lib.rs @@ -0,0 +1,105 @@ +//! `tinystr` is a small ASCII-only bounded length string representation. +//! +//! The crate is meant to be used for scenarios where one needs a fast +//! and memory efficient way to store and manipulate short ASCII-only strings. +//! +//! `tinystr` converts each string into an unsigned integer, and uses bitmasking +//! to compare, convert cases and test for common characteristics of strings. +//! +//! # Details +//! +//! The crate provides three structs and an enum: +//! * `TinyStr4` an ASCII-only string limited to 4 characters. +//! * `TinyStr8` an ASCII-only string limited to 8 characters. +//! * `TinyStr16` an ASCII-only string limited to 16 characters. +//! * `TinyStrAuto` (enum): +//! * `Tiny` when the string is 16 characters or less. +//! * `Heap` when the string is 17 or more characters. +//! +//! `TinyStrAuto` stores the string as a TinyStr16 when it is short enough, or else falls back to a +//! standard `String`. You should use TinyStrAuto when you expect most strings to be 16 characters +//! or smaller, but occasionally you receive one that exceeds that length. Unlike the structs, +//! `TinyStrAuto` does not implement `Copy`. +//! +//! # no_std +//! +//! Disable the `std` feature of this crate to make it `#[no_std]`. Doing so disables `TinyStrAuto`. +//! You can re-enable `TinyStrAuto` in `#[no_std]` mode by enabling the `alloc` feature. +//! +//! # Example +//! +//! ``` +//! use tinystr::{TinyStr4, TinyStr8, TinyStr16, TinyStrAuto}; +//! +//! let s1: TinyStr4 = "tEsT".parse() +//! .expect("Failed to parse."); +//! +//! assert_eq!(s1, "tEsT"); +//! assert_eq!(s1.to_ascii_uppercase(), "TEST"); +//! assert_eq!(s1.to_ascii_lowercase(), "test"); +//! assert_eq!(s1.to_ascii_titlecase(), "Test"); +//! assert_eq!(s1.is_ascii_alphanumeric(), true); +//! +//! let s2: TinyStr8 = "New York".parse() +//! .expect("Failed to parse."); +//! +//! assert_eq!(s2, "New York"); +//! assert_eq!(s2.to_ascii_uppercase(), "NEW YORK"); +//! assert_eq!(s2.to_ascii_lowercase(), "new york"); +//! assert_eq!(s2.to_ascii_titlecase(), "New york"); +//! assert_eq!(s2.is_ascii_alphanumeric(), false); +//! +//! let s3: TinyStr16 = "metaMoRphosis123".parse() +//! .expect("Failed to parse."); +//! +//! assert_eq!(s3, "metaMoRphosis123"); +//! assert_eq!(s3.to_ascii_uppercase(), "METAMORPHOSIS123"); +//! assert_eq!(s3.to_ascii_lowercase(), "metamorphosis123"); +//! assert_eq!(s3.to_ascii_titlecase(), "Metamorphosis123"); +//! assert_eq!(s3.is_ascii_alphanumeric(), true); +//! +//! let s4: TinyStrAuto = "shortNoAlloc".parse().unwrap(); +//! assert!(matches!(s4, TinyStrAuto::Tiny { .. })); +//! assert_eq!(s4, "shortNoAlloc"); +//! +//! let s5: TinyStrAuto = "longFallbackToHeap".parse().unwrap(); +//! assert!(matches!(s5, TinyStrAuto::Heap { .. })); +//! assert_eq!(s5, "longFallbackToHeap"); +//! ``` + +#![no_std] + +#[cfg(any(feature = "std", test))] +extern crate std; + +#[cfg(all(not(feature = "std"), not(test)))] +extern crate core as std; + +mod helpers; +mod tinystr16; +mod tinystr4; +mod tinystr8; + +#[cfg(any(feature = "std", feature = "alloc"))] +mod tinystrauto; + +pub use tinystr16::TinyStr16; +pub use tinystr4::TinyStr4; +pub use tinystr8::TinyStr8; + +#[cfg(any(feature = "std", feature = "alloc"))] +pub use tinystrauto::TinyStrAuto; + +#[cfg(feature = "macros")] +pub use tinystr_macros as macros; + +/// Enum to store the various types of errors that can cause parsing a TinyStr to fail. +#[derive(PartialEq, Eq, Debug)] +pub enum Error { + /// String is too large or too small to store as TinyStr. + InvalidSize, + /// String is empty. + InvalidNull, + /// String contains non-ASCII character(s). + NonAscii, +} diff --git a/vendor/tinystr/src/tinystr16.rs b/vendor/tinystr/src/tinystr16.rs new file mode 100644 index 000000000..7403813f2 --- /dev/null +++ b/vendor/tinystr/src/tinystr16.rs @@ -0,0 +1,327 @@ +use std::cmp::Ordering; +use std::convert::Into; +use std::fmt; +use std::num::NonZeroU128; +use std::ops::Deref; +use std::ptr::copy_nonoverlapping; +use std::str::FromStr; + +use crate::Error; + +/// A tiny string that is from 1 to 16 non-NUL ASCII characters. +/// +/// # Examples +/// +/// ``` +/// use tinystr::TinyStr16; +/// +/// let s1: TinyStr16 = "Metamorphosis".parse() +/// .expect("Failed to parse."); +/// +/// assert_eq!(s1, "Metamorphosis"); +/// assert!(s1.is_ascii_alphabetic()); +/// ``` +#[derive(Copy, Clone, PartialEq, Eq, Hash)] +pub struct TinyStr16(NonZeroU128); + +impl TinyStr16 { + /// Creates a TinyStr16 from a byte slice. + /// + /// # Examples + /// + /// ``` + /// use tinystr::TinyStr16; + /// + /// let s1 = TinyStr16::from_bytes("Testing".as_bytes()) + /// .expect("Failed to parse."); + /// + /// assert_eq!(s1, "Testing"); + /// ``` + #[inline(always)] + pub fn from_bytes(bytes: &[u8]) -> Result<Self, Error> { + let len = bytes.len(); + if len < 1 || len > 16 { + return Err(Error::InvalidSize); + } + unsafe { + let mut word: u128 = 0; + copy_nonoverlapping(bytes.as_ptr(), &mut word as *mut u128 as *mut u8, len); + let mask = 0x80808080_80808080_80808080_80808080u128 >> (8 * (16 - len)); + // TODO: could do this with #cfg(target_endian), but this is clearer and + // more confidence-inspiring. + let mask = u128::from_le(mask); + if (word & mask) != 0 { + return Err(Error::NonAscii); + } + if ((mask - word) & mask) != 0 { + return Err(Error::InvalidNull); + } + Ok(Self(NonZeroU128::new_unchecked(word))) + } + } + + /// An unsafe constructor intended for cases where the consumer + /// guarantees that the input is a little endian integer which + /// is a correct representation of a `TinyStr16` string. + /// + /// # Examples + /// + /// ``` + /// use tinystr::TinyStr16; + /// + /// let s1: TinyStr16 = "Metamorphosis".parse() + /// .expect("Failed to parse."); + /// + /// let num: u128 = s1.into(); + /// + /// let s2 = unsafe { TinyStr16::new_unchecked(num) }; + /// + /// assert_eq!(s1, s2); + /// assert_eq!(s2.as_str(), "Metamorphosis"); + /// ``` + /// + /// # Safety + /// + /// The method does not validate the `u128` to be properly encoded + /// value for `TinyStr16`. + /// The value can be retrieved via `Into<u128> for TinyStr16`. + #[inline(always)] + pub const unsafe fn new_unchecked(text: u128) -> Self { + Self(NonZeroU128::new_unchecked(u128::from_le(text))) + } + + /// Extracts a string slice containing the entire `TinyStr16`. + /// + /// # Examples + /// + /// ``` + /// use tinystr::TinyStr16; + /// + /// let s1: TinyStr16 = "Metamorphosis".parse() + /// .expect("Failed to parse."); + /// + /// assert_eq!(s1.as_str(), "Metamorphosis"); + /// ``` + #[inline(always)] + pub fn as_str(&self) -> &str { + self.deref() + } + + /// Checks if the value is composed of ASCII alphabetic characters: + /// + /// * U+0041 'A' ..= U+005A 'Z', or + /// * U+0061 'a' ..= U+007A 'z'. + /// + /// # Examples + /// + /// ``` + /// use tinystr::TinyStr16; + /// + /// let s1: TinyStr16 = "Metamorphosis".parse() + /// .expect("Failed to parse."); + /// let s2: TinyStr16 = "Met3mo4pho!is".parse() + /// .expect("Failed to parse."); + /// + /// assert!(s1.is_ascii_alphabetic()); + /// assert!(!s2.is_ascii_alphabetic()); + /// ``` + pub fn is_ascii_alphabetic(self) -> bool { + let word = self.0.get(); + let mask = + (word + 0x7f7f7f7f_7f7f7f7f_7f7f7f7f_7f7f7f7f) & 0x80808080_80808080_80808080_80808080; + let lower = word | 0x20202020_20202020_20202020_20202020; + let alpha = !(lower + 0x1f1f1f1f_1f1f1f1f_1f1f1f1f_1f1f1f1f) + | (lower + 0x05050505_05050505_05050505_05050505); + (alpha & mask) == 0 + } + + /// Checks if the value is composed of ASCII alphanumeric characters: + /// + /// * U+0041 'A' ..= U+005A 'Z', or + /// * U+0061 'a' ..= U+007A 'z', or + /// * U+0030 '0' ..= U+0039 '9'. + /// + /// # Examples + /// + /// ``` + /// use tinystr::TinyStr16; + /// + /// let s1: TinyStr16 = "A15bingA1".parse() + /// .expect("Failed to parse."); + /// let s2: TinyStr16 = "[3@w00Fs1".parse() + /// .expect("Failed to parse."); + /// + /// assert!(s1.is_ascii_alphanumeric()); + /// assert!(!s2.is_ascii_alphanumeric()); + /// ``` + pub fn is_ascii_alphanumeric(self) -> bool { + let word = self.0.get(); + let mask = + (word + 0x7f7f7f7f_7f7f7f7f_7f7f7f7f_7f7f7f7f) & 0x80808080_80808080_80808080_80808080; + let numeric = !(word + 0x50505050_50505050_50505050_50505050) + | (word + 0x46464646_46464646_46464646_46464646); + let lower = word | 0x20202020_20202020_20202020_20202020; + let alpha = !(lower + 0x1f1f1f1f_1f1f1f1f_1f1f1f1f_1f1f1f1f) + | (lower + 0x05050505_05050505_05050505_05050505); + (alpha & numeric & mask) == 0 + } + + /// Checks if the value is composed of ASCII decimal digits: + /// + /// * U+0030 '0' ..= U+0039 '9'. + /// + /// # Examples + /// + /// ``` + /// use tinystr::TinyStr16; + /// + /// let s1: TinyStr16 = "31212314141".parse() + /// .expect("Failed to parse."); + /// let s2: TinyStr16 = "3d3d3d3d".parse() + /// .expect("Failed to parse."); + /// + /// assert!(s1.is_ascii_numeric()); + /// assert!(!s2.is_ascii_numeric()); + /// ``` + pub fn is_ascii_numeric(self) -> bool { + let word = self.0.get(); + let mask = + (word + 0x7f7f7f7f_7f7f7f7f_7f7f7f7f_7f7f7f7f) & 0x80808080_80808080_80808080_80808080; + let numeric = !(word + 0x50505050_50505050_50505050_50505050) + | (word + 0x46464646_46464646_46464646_46464646); + (numeric & mask) == 0 + } + + /// Converts this type to its ASCII lower case equivalent in-place. + /// + /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', other characters are unchanged. + /// + /// # Examples + /// + /// ``` + /// use tinystr::TinyStr16; + /// + /// let s1: TinyStr16 = "MeTAmOrpHo3sis".parse() + /// .expect("Failed to parse."); + /// + /// assert_eq!(s1.to_ascii_lowercase(), "metamorpho3sis"); + /// ``` + pub fn to_ascii_lowercase(self) -> Self { + let word = self.0.get(); + let result = word + | (((word + 0x3f3f3f3f_3f3f3f3f_3f3f3f3f_3f3f3f3f) + & !(word + 0x25252525_25252525_25252525_25252525) + & 0x80808080_80808080_80808080_80808080) + >> 2); + unsafe { Self(NonZeroU128::new_unchecked(result)) } + } + + /// Converts this type to its ASCII title case equivalent in-place. + /// + /// First character, if is an ASCII letter 'a' to 'z' is mapped to 'A' to 'Z', + /// other characters are unchanged. + /// + /// # Examples + /// + /// ``` + /// use tinystr::TinyStr16; + /// + /// let s1: TinyStr16 = "metamorphosis".parse() + /// .expect("Failed to parse."); + /// + /// assert_eq!(s1.to_ascii_titlecase(), "Metamorphosis"); + /// ``` + pub fn to_ascii_titlecase(self) -> Self { + let word = self.0.get().to_le(); + let mask = ((word + 0x3f3f3f3f_3f3f3f3f_3f3f3f3f_3f3f3f1f) + & !(word + 0x25252525_25252525_25252525_25252505) + & 0x80808080_80808080_80808080_80808080) + >> 2; + let result = (word | mask) & !(0x20 & mask); + unsafe { Self(NonZeroU128::new_unchecked(u128::from_le(result))) } + } + + /// Converts this type to its ASCII upper case equivalent in-place. + /// + /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', other characters are unchanged. + /// + /// # Examples + /// + /// ``` + /// use tinystr::TinyStr16; + /// + /// let s1: TinyStr16 = "Met3amorphosis".parse() + /// .expect("Failed to parse."); + /// + /// assert_eq!(s1.to_ascii_uppercase(), "MET3AMORPHOSIS"); + /// ``` + pub fn to_ascii_uppercase(self) -> Self { + let word = self.0.get(); + let result = word + & !(((word + 0x1f1f1f1f_1f1f1f1f_1f1f1f1f_1f1f1f1f) + & !(word + 0x05050505_05050505_05050505_05050505) + & 0x80808080_80808080_80808080_80808080) + >> 2); + unsafe { Self(NonZeroU128::new_unchecked(result)) } + } +} + +impl fmt::Display for TinyStr16 { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.deref()) + } +} + +impl fmt::Debug for TinyStr16 { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{:?}", self.deref()) + } +} + +impl Deref for TinyStr16 { + type Target = str; + + #[inline(always)] + fn deref(&self) -> &str { + // Again, could use #cfg to hand-roll a big-endian implementation. + let word = self.0.get().to_le(); + let len = (16 - word.leading_zeros() / 8) as usize; + unsafe { + let slice = core::slice::from_raw_parts(&self.0 as *const _ as *const u8, len); + std::str::from_utf8_unchecked(slice) + } + } +} + +impl PartialEq<&str> for TinyStr16 { + fn eq(&self, other: &&str) -> bool { + self.deref() == *other + } +} + +impl PartialOrd for TinyStr16 { + fn partial_cmp(&self, other: &Self) -> Option<Ordering> { + Some(self.cmp(other)) + } +} + +impl Ord for TinyStr16 { + fn cmp(&self, other: &Self) -> Ordering { + self.0.get().to_be().cmp(&other.0.get().to_be()) + } +} + +impl FromStr for TinyStr16 { + type Err = Error; + + #[inline(always)] + fn from_str(text: &str) -> Result<Self, Self::Err> { + Self::from_bytes(text.as_bytes()) + } +} + +impl Into<u128> for TinyStr16 { + fn into(self) -> u128 { + self.0.get().to_le() + } +} diff --git a/vendor/tinystr/src/tinystr4.rs b/vendor/tinystr/src/tinystr4.rs new file mode 100644 index 000000000..c63d25113 --- /dev/null +++ b/vendor/tinystr/src/tinystr4.rs @@ -0,0 +1,299 @@ +use std::cmp::Ordering; +use std::convert::Into; +use std::fmt; +use std::num::NonZeroU32; +use std::ops::Deref; +use std::str::FromStr; + +use crate::helpers::make_4byte_bytes; +use crate::Error; + +/// A tiny string that is from 1 to 4 non-NUL ASCII characters. +/// +/// # Examples +/// +/// ``` +/// use tinystr::TinyStr4; +/// +/// let s1: TinyStr4 = "Test".parse() +/// .expect("Failed to parse."); +/// +/// assert_eq!(s1, "Test"); +/// assert!(s1.is_ascii_alphabetic()); +/// ``` +#[derive(Copy, Clone, PartialEq, Eq, Hash)] +pub struct TinyStr4(NonZeroU32); + +impl TinyStr4 { + /// Creates a TinyStr4 from a byte slice. + /// + /// # Examples + /// + /// ``` + /// use tinystr::TinyStr4; + /// + /// let s1 = TinyStr4::from_bytes("Test".as_bytes()) + /// .expect("Failed to parse."); + /// + /// assert_eq!(s1, "Test"); + /// ``` + #[inline(always)] + pub fn from_bytes(bytes: &[u8]) -> Result<Self, Error> { + unsafe { + match bytes.len() { + 1 => make_4byte_bytes(bytes, 1, 0x80).map(Self), + 2 => make_4byte_bytes(bytes, 2, 0x8080).map(Self), + 3 => make_4byte_bytes(bytes, 3, 0x0080_8080).map(Self), + 4 => make_4byte_bytes(bytes, 4, 0x8080_8080).map(Self), + _ => Err(Error::InvalidSize), + } + } + } + + /// An unsafe constructor intended for cases where the consumer + /// guarantees that the input is a little endian integer which + /// is a correct representation of a `TinyStr4` string. + /// + /// # Examples + /// + /// ``` + /// use tinystr::TinyStr4; + /// + /// let s1: TinyStr4 = "Test".parse() + /// .expect("Failed to parse."); + /// + /// let num: u32 = s1.into(); + /// + /// let s2 = unsafe { TinyStr4::new_unchecked(num) }; + /// + /// assert_eq!(s1, s2); + /// assert_eq!(s2.as_str(), "Test"); + /// ``` + /// + /// # Safety + /// + /// The method does not validate the `u32` to be properly encoded + /// value for `TinyStr4`. + /// The value can be retrieved via `Into<u32> for TinyStr4`. + #[inline(always)] + pub const unsafe fn new_unchecked(text: u32) -> Self { + Self(NonZeroU32::new_unchecked(u32::from_le(text))) + } + + /// Extracts a string slice containing the entire `TinyStr4`. + /// + /// # Examples + /// + /// ``` + /// use tinystr::TinyStr4; + /// + /// let s1: TinyStr4 = "Test".parse() + /// .expect("Failed to parse."); + /// + /// assert_eq!(s1.as_str(), "Test"); + /// ``` + #[inline(always)] + pub fn as_str(&self) -> &str { + self.deref() + } + + /// Checks if the value is composed of ASCII alphabetic characters: + /// + /// * U+0041 'A' ..= U+005A 'Z', or + /// * U+0061 'a' ..= U+007A 'z'. + /// + /// # Examples + /// + /// ``` + /// use tinystr::TinyStr4; + /// + /// let s1: TinyStr4 = "Test".parse() + /// .expect("Failed to parse."); + /// let s2: TinyStr4 = "Te3t".parse() + /// .expect("Failed to parse."); + /// + /// assert!(s1.is_ascii_alphabetic()); + /// assert!(!s2.is_ascii_alphabetic()); + /// ``` + pub fn is_ascii_alphabetic(self) -> bool { + let word = self.0.get(); + let mask = (word + 0x7f7f_7f7f) & 0x8080_8080; + let lower = word | 0x2020_2020; + let alpha = !(lower + 0x1f1f_1f1f) | (lower + 0x0505_0505); + (alpha & mask) == 0 + } + + /// Checks if the value is composed of ASCII alphanumeric characters: + /// + /// * U+0041 'A' ..= U+005A 'Z', or + /// * U+0061 'a' ..= U+007A 'z', or + /// * U+0030 '0' ..= U+0039 '9'. + /// + /// # Examples + /// + /// ``` + /// use tinystr::TinyStr4; + /// + /// let s1: TinyStr4 = "A15b".parse() + /// .expect("Failed to parse."); + /// let s2: TinyStr4 = "[3@w".parse() + /// .expect("Failed to parse."); + /// + /// assert!(s1.is_ascii_alphanumeric()); + /// assert!(!s2.is_ascii_alphanumeric()); + /// ``` + pub fn is_ascii_alphanumeric(self) -> bool { + let word = self.0.get(); + let mask = (word + 0x7f7f_7f7f) & 0x8080_8080; + let numeric = !(word + 0x5050_5050) | (word + 0x4646_4646); + let lower = word | 0x2020_2020; + let alpha = !(lower + 0x1f1f_1f1f) | (lower + 0x0505_0505); + (alpha & numeric & mask) == 0 + } + + /// Checks if the value is composed of ASCII decimal digits: + /// + /// * U+0030 '0' ..= U+0039 '9'. + /// + /// # Examples + /// + /// ``` + /// use tinystr::TinyStr4; + /// + /// let s1: TinyStr4 = "312".parse() + /// .expect("Failed to parse."); + /// let s2: TinyStr4 = "3d".parse() + /// .expect("Failed to parse."); + /// + /// assert!(s1.is_ascii_numeric()); + /// assert!(!s2.is_ascii_numeric()); + /// ``` + pub fn is_ascii_numeric(self) -> bool { + let word = self.0.get(); + let mask = (word + 0x7f7f_7f7f) & 0x8080_8080; + let numeric = !(word + 0x5050_5050) | (word + 0x4646_4646); + (numeric & mask) == 0 + } + + /// Converts this type to its ASCII lower case equivalent in-place. + /// + /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', other characters are unchanged. + /// + /// # Examples + /// + /// ``` + /// use tinystr::TinyStr4; + /// + /// let s1: TinyStr4 = "TeS3".parse() + /// .expect("Failed to parse."); + /// + /// assert_eq!(s1.to_ascii_lowercase(), "tes3"); + /// ``` + pub fn to_ascii_lowercase(self) -> Self { + let word = self.0.get(); + let result = word | (((word + 0x3f3f_3f3f) & !(word + 0x2525_2525) & 0x8080_8080) >> 2); + unsafe { Self(NonZeroU32::new_unchecked(result)) } + } + + /// Converts this type to its ASCII title case equivalent in-place. + /// + /// First character, if is an ASCII letter 'a' to 'z' is mapped to 'A' to 'Z', + /// other characters are unchanged. + /// + /// # Examples + /// + /// ``` + /// use tinystr::TinyStr4; + /// + /// let s1: TinyStr4 = "test".parse() + /// .expect("Failed to parse."); + /// + /// assert_eq!(s1.to_ascii_titlecase(), "Test"); + /// ``` + pub fn to_ascii_titlecase(self) -> Self { + let word = self.0.get().to_le(); + let mask = ((word + 0x3f3f_3f1f) & !(word + 0x2525_2505) & 0x8080_8080) >> 2; + let result = (word | mask) & !(0x20 & mask); + unsafe { Self(NonZeroU32::new_unchecked(u32::from_le(result))) } + } + + /// Converts this type to its ASCII upper case equivalent in-place. + /// + /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', other characters are unchanged. + /// + /// # Examples + /// + /// ``` + /// use tinystr::TinyStr4; + /// + /// let s1: TinyStr4 = "Tes3".parse() + /// .expect("Failed to parse."); + /// + /// assert_eq!(s1.to_ascii_uppercase(), "TES3"); + /// ``` + pub fn to_ascii_uppercase(self) -> Self { + let word = self.0.get(); + let result = word & !(((word + 0x1f1f_1f1f) & !(word + 0x0505_0505) & 0x8080_8080) >> 2); + unsafe { Self(NonZeroU32::new_unchecked(result)) } + } +} + +impl fmt::Display for TinyStr4 { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.deref()) + } +} + +impl fmt::Debug for TinyStr4 { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{:?}", self.deref()) + } +} + +impl Deref for TinyStr4 { + type Target = str; + + #[inline(always)] + fn deref(&self) -> &str { + // Again, could use #cfg to hand-roll a big-endian implementation. + let word = self.0.get().to_le(); + let len = (4 - word.leading_zeros() / 8) as usize; + unsafe { + let slice = core::slice::from_raw_parts(&self.0 as *const _ as *const u8, len); + std::str::from_utf8_unchecked(slice) + } + } +} + +impl PartialEq<&str> for TinyStr4 { + fn eq(&self, other: &&str) -> bool { + self.deref() == *other + } +} + +impl PartialOrd for TinyStr4 { + fn partial_cmp(&self, other: &Self) -> Option<Ordering> { + Some(self.cmp(other)) + } +} + +impl Ord for TinyStr4 { + fn cmp(&self, other: &Self) -> Ordering { + self.0.get().to_be().cmp(&other.0.get().to_be()) + } +} + +impl FromStr for TinyStr4 { + type Err = Error; + + #[inline(always)] + fn from_str(text: &str) -> Result<Self, Self::Err> { + Self::from_bytes(text.as_bytes()) + } +} + +impl Into<u32> for TinyStr4 { + fn into(self) -> u32 { + self.0.get().to_le() + } +} diff --git a/vendor/tinystr/src/tinystr8.rs b/vendor/tinystr/src/tinystr8.rs new file mode 100644 index 000000000..e121c519a --- /dev/null +++ b/vendor/tinystr/src/tinystr8.rs @@ -0,0 +1,319 @@ +use std::cmp::Ordering; +use std::convert::Into; +use std::fmt; +use std::num::NonZeroU64; +use std::ops::Deref; +use std::ptr::copy_nonoverlapping; +use std::str::FromStr; + +use crate::Error; + +/// A tiny string that is from 1 to 8 non-NUL ASCII characters. +/// +/// # Examples +/// +/// ``` +/// use tinystr::TinyStr8; +/// +/// let s1: TinyStr8 = "Testing".parse() +/// .expect("Failed to parse."); +/// +/// assert_eq!(s1, "Testing"); +/// assert!(s1.is_ascii_alphabetic()); +/// ``` +#[derive(Copy, Clone, PartialEq, Eq, Hash)] +pub struct TinyStr8(NonZeroU64); + +impl TinyStr8 { + /// Creates a TinyStr8 from a byte slice. + /// + /// # Examples + /// + /// ``` + /// use tinystr::TinyStr8; + /// + /// let s1 = TinyStr8::from_bytes("Testing".as_bytes()) + /// .expect("Failed to parse."); + /// + /// assert_eq!(s1, "Testing"); + /// ``` + #[inline(always)] + pub fn from_bytes(bytes: &[u8]) -> Result<Self, Error> { + let len = bytes.len(); + if len < 1 || len > 8 { + return Err(Error::InvalidSize); + } + unsafe { + let mut word: u64 = 0; + copy_nonoverlapping(bytes.as_ptr(), &mut word as *mut u64 as *mut u8, len); + let mask = 0x80808080_80808080u64 >> (8 * (8 - len)); + // TODO: could do this with #cfg(target_endian), but this is clearer and + // more confidence-inspiring. + let mask = u64::from_le(mask); + if (word & mask) != 0 { + return Err(Error::NonAscii); + } + if ((mask - word) & mask) != 0 { + return Err(Error::InvalidNull); + } + Ok(Self(NonZeroU64::new_unchecked(word))) + } + } + + /// An unsafe constructor intended for cases where the consumer + /// guarantees that the input is a little endian integer which + /// is a correct representation of a `TinyStr8` string. + /// + /// # Examples + /// + /// ``` + /// use tinystr::TinyStr8; + /// + /// let s1: TinyStr8 = "Testing".parse() + /// .expect("Failed to parse."); + /// + /// let num: u64 = s1.into(); + /// + /// let s2 = unsafe { TinyStr8::new_unchecked(num) }; + /// + /// assert_eq!(s1, s2); + /// assert_eq!(s2.as_str(), "Testing"); + /// ``` + /// + /// # Safety + /// + /// The method does not validate the `u64` to be properly encoded + /// value for `TinyStr8`. + /// The value can be retrieved via `Into<u64> for TinyStr8`. + #[inline(always)] + pub const unsafe fn new_unchecked(text: u64) -> Self { + Self(NonZeroU64::new_unchecked(u64::from_le(text))) + } + + /// Extracts a string slice containing the entire `TinyStr8`. + /// + /// # Examples + /// + /// ``` + /// use tinystr::TinyStr8; + /// + /// let s1: TinyStr8 = "Testing".parse() + /// .expect("Failed to parse."); + /// + /// assert_eq!(s1.as_str(), "Testing"); + /// ``` + #[inline(always)] + pub fn as_str(&self) -> &str { + self.deref() + } + + /// Checks if the value is composed of ASCII alphabetic characters: + /// + /// * U+0041 'A' ..= U+005A 'Z', or + /// * U+0061 'a' ..= U+007A 'z'. + /// + /// # Examples + /// + /// ``` + /// use tinystr::TinyStr8; + /// + /// let s1: TinyStr8 = "Testing".parse() + /// .expect("Failed to parse."); + /// let s2: TinyStr8 = "Te3ting".parse() + /// .expect("Failed to parse."); + /// + /// assert!(s1.is_ascii_alphabetic()); + /// assert!(!s2.is_ascii_alphabetic()); + /// ``` + pub fn is_ascii_alphabetic(self) -> bool { + let word = self.0.get(); + let mask = (word + 0x7f7f7f7f_7f7f7f7f) & 0x80808080_80808080; + let lower = word | 0x20202020_20202020; + let alpha = !(lower + 0x1f1f1f1f_1f1f1f1f) | (lower + 0x05050505_05050505); + (alpha & mask) == 0 + } + + /// Checks if the value is composed of ASCII alphanumeric characters: + /// + /// * U+0041 'A' ..= U+005A 'Z', or + /// * U+0061 'a' ..= U+007A 'z', or + /// * U+0030 '0' ..= U+0039 '9'. + /// + /// # Examples + /// + /// ``` + /// use tinystr::TinyStr8; + /// + /// let s1: TinyStr8 = "A15bing".parse() + /// .expect("Failed to parse."); + /// let s2: TinyStr8 = "[3@wing".parse() + /// .expect("Failed to parse."); + /// + /// assert!(s1.is_ascii_alphanumeric()); + /// assert!(!s2.is_ascii_alphanumeric()); + /// ``` + pub fn is_ascii_alphanumeric(self) -> bool { + let word = self.0.get(); + let mask = (word + 0x7f7f7f7f_7f7f7f7f) & 0x80808080_80808080; + let numeric = !(word + 0x50505050_50505050) | (word + 0x46464646_46464646); + let lower = word | 0x20202020_20202020; + let alpha = !(lower + 0x1f1f1f1f_1f1f1f1f) | (lower + 0x05050505_05050505); + (alpha & numeric & mask) == 0 + } + + /// Checks if the value is composed of ASCII decimal digits: + /// + /// * U+0030 '0' ..= U+0039 '9'. + /// + /// # Examples + /// + /// ``` + /// use tinystr::TinyStr8; + /// + /// let s1: TinyStr8 = "3121029".parse() + /// .expect("Failed to parse."); + /// let s2: TinyStr8 = "3d212d".parse() + /// .expect("Failed to parse."); + /// + /// assert!(s1.is_ascii_numeric()); + /// assert!(!s2.is_ascii_numeric()); + /// ``` + pub fn is_ascii_numeric(self) -> bool { + let word = self.0.get(); + let mask = (word + 0x7f7f7f7f_7f7f7f7f) & 0x80808080_80808080; + let numeric = !(word + 0x50505050_50505050) | (word + 0x46464646_46464646); + (numeric & mask) == 0 + } + + /// Converts this type to its ASCII lower case equivalent in-place. + /// + /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', other characters are unchanged. + /// + /// # Examples + /// + /// ``` + /// use tinystr::TinyStr8; + /// + /// let s1: TinyStr8 = "TeS3ing".parse() + /// .expect("Failed to parse."); + /// + /// assert_eq!(s1.to_ascii_lowercase(), "tes3ing"); + /// ``` + pub fn to_ascii_lowercase(self) -> Self { + let word = self.0.get(); + let result = word + | (((word + 0x3f3f3f3f_3f3f3f3f) + & !(word + 0x25252525_25252525) + & 0x80808080_80808080) + >> 2); + unsafe { Self(NonZeroU64::new_unchecked(result)) } + } + + /// Converts this type to its ASCII title case equivalent in-place. + /// + /// First character, if is an ASCII letter 'a' to 'z' is mapped to 'A' to 'Z', + /// other characters are unchanged. + /// + /// # Examples + /// + /// ``` + /// use tinystr::TinyStr8; + /// + /// let s1: TinyStr8 = "testing".parse() + /// .expect("Failed to parse."); + /// + /// assert_eq!(s1.to_ascii_titlecase(), "Testing"); + /// ``` + pub fn to_ascii_titlecase(self) -> Self { + let word = self.0.get().to_le(); + let mask = + ((word + 0x3f3f3f3f_3f3f3f1f) & !(word + 0x25252525_25252505) & 0x80808080_80808080) + >> 2; + let result = (word | mask) & !(0x20 & mask); + unsafe { Self(NonZeroU64::new_unchecked(u64::from_le(result))) } + } + + /// Converts this type to its ASCII upper case equivalent in-place. + /// + /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', other characters are unchanged. + /// + /// # Examples + /// + /// ``` + /// use tinystr::TinyStr8; + /// + /// let s1: TinyStr8 = "Tes3ing".parse() + /// .expect("Failed to parse."); + /// + /// assert_eq!(s1.to_ascii_uppercase(), "TES3ING"); + /// ``` + pub fn to_ascii_uppercase(self) -> Self { + let word = self.0.get(); + let result = word + & !(((word + 0x1f1f1f1f_1f1f1f1f) + & !(word + 0x05050505_05050505) + & 0x80808080_80808080) + >> 2); + unsafe { Self(NonZeroU64::new_unchecked(result)) } + } +} + +impl fmt::Display for TinyStr8 { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.deref()) + } +} + +impl fmt::Debug for TinyStr8 { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{:?}", self.deref()) + } +} + +impl Deref for TinyStr8 { + type Target = str; + + #[inline(always)] + fn deref(&self) -> &str { + // Again, could use #cfg to hand-roll a big-endian implementation. + let word = self.0.get().to_le(); + let len = (8 - word.leading_zeros() / 8) as usize; + unsafe { + let slice = core::slice::from_raw_parts(&self.0 as *const _ as *const u8, len); + std::str::from_utf8_unchecked(slice) + } + } +} + +impl PartialEq<&str> for TinyStr8 { + fn eq(&self, other: &&str) -> bool { + self.deref() == *other + } +} + +impl PartialOrd for TinyStr8 { + fn partial_cmp(&self, other: &Self) -> Option<Ordering> { + Some(self.cmp(other)) + } +} + +impl Ord for TinyStr8 { + fn cmp(&self, other: &Self) -> Ordering { + self.0.get().to_be().cmp(&other.0.get().to_be()) + } +} + +impl FromStr for TinyStr8 { + type Err = Error; + + #[inline(always)] + fn from_str(text: &str) -> Result<Self, Self::Err> { + TinyStr8::from_bytes(text.as_bytes()) + } +} + +impl Into<u64> for TinyStr8 { + fn into(self) -> u64 { + self.0.get().to_le() + } +} diff --git a/vendor/tinystr/src/tinystrauto.rs b/vendor/tinystr/src/tinystrauto.rs new file mode 100644 index 000000000..9e2387cc1 --- /dev/null +++ b/vendor/tinystr/src/tinystrauto.rs @@ -0,0 +1,72 @@ +use std::fmt; +use std::ops::Deref; +use std::str::FromStr; + +use crate::helpers::String; +use crate::Error; +use crate::TinyStr16; + +/// An ASCII string that is tiny when <= 16 chars and a String otherwise. +/// +/// # Examples +/// +/// ``` +/// use tinystr::TinyStrAuto; +/// +/// let s1: TinyStrAuto = "Testing".parse() +/// .expect("Failed to parse."); +/// +/// assert_eq!(s1, "Testing"); +/// ``` +#[derive(Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)] +pub enum TinyStrAuto { + /// Up to 16 characters stored on the stack. + Tiny(TinyStr16), + /// 17 or more characters stored on the heap. + Heap(String), +} + +impl fmt::Display for TinyStrAuto { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.deref().fmt(f) + } +} + +impl Deref for TinyStrAuto { + type Target = str; + + fn deref(&self) -> &str { + use TinyStrAuto::*; + match self { + Tiny(value) => value.deref(), + Heap(value) => value.deref(), + } + } +} + +impl PartialEq<&str> for TinyStrAuto { + fn eq(&self, other: &&str) -> bool { + self.deref() == *other + } +} + +impl FromStr for TinyStrAuto { + type Err = Error; + + fn from_str(text: &str) -> Result<Self, Self::Err> { + if text.len() <= 16 { + match TinyStr16::from_str(text) { + Ok(result) => Ok(TinyStrAuto::Tiny(result)), + Err(err) => Err(err), + } + } else { + if !text.is_ascii() { + return Err(Error::NonAscii); + } + match String::from_str(text) { + Ok(result) => Ok(TinyStrAuto::Heap(result)), + Err(_) => unreachable!(), + } + } + } +} |