diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
commit | 36d22d82aa202bb199967e9512281e9a53db42c9 (patch) | |
tree | 105e8c98ddea1c1e4784a60a5a6410fa416be2de /third_party/rust/unix_str/src | |
parent | Initial commit. (diff) | |
download | firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip |
Adding upstream version 115.7.0esr.upstream/115.7.0esr
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/unix_str/src')
-rw-r--r-- | third_party/rust/unix_str/src/lib.rs | 1385 | ||||
-rw-r--r-- | third_party/rust/unix_str/src/lossy.rs | 222 | ||||
-rw-r--r-- | third_party/rust/unix_str/src/sys.rs | 256 | ||||
-rw-r--r-- | third_party/rust/unix_str/src/sys_common.rs | 39 | ||||
-rw-r--r-- | third_party/rust/unix_str/src/sys_common/bytestring.rs | 45 |
5 files changed, 1947 insertions, 0 deletions
diff --git a/third_party/rust/unix_str/src/lib.rs b/third_party/rust/unix_str/src/lib.rs new file mode 100644 index 0000000000..bf669233c5 --- /dev/null +++ b/third_party/rust/unix_str/src/lib.rs @@ -0,0 +1,1385 @@ +//! Strings that are compatible wuth Unix-like operating systems. +//! +//! * [`UnixString`] and [`UnixStr`] are useful when you need to with Unix strings. +//! Conversions between [`UnixString`], [`UnixStr`] and Rust strings work similarly +//! to those for `CString` and `CStr`. +//! +//! * [`UnixString`] represents an owned string in Unix's preferred +//! representation. +//! +//! * [`UnixStr`] represents a borrowed reference to a string in a format that +//! can be passed to a Unix-lie operating system. It can be converted into +//! a UTF-8 Rust string slice in a similar way to [`UnixString`]. +//! +//! # Conversions +//! +//! [`UnixStr`] implements two methods, [`from_bytes`] and [`as_bytes`]. +//! These do inexpensive conversions from and to UTF-8 byte slices. +//! +//! Additionally, [`UnixString`] provides [`from_vec`] and [`into_vec`] methods +//! that consume their arguments, and take or produce vectors of [`u8`]. +//! +//! [`UnixString`]: struct.UnixString.html +//! [`UnixStr`]: struct.UnixStr.html +//! [`from_vec`]: struct.UnixString.html#method.from_vec +//! [`into_vec`]: struct.UnixString.html#method.into_vec +//! [`from_bytes`]: struct.UnixStrExt.html#method.from_bytes +//! [`as_bytes`]: struct.UnixStrExt.html#method.as_bytes + +#![cfg_attr(feature = "shrink_to", feature(shrink_to))] +#![cfg_attr(feature = "toowned_clone_into", feature(toowned_clone_into))] +#![no_std] + +#[cfg(feature = "alloc")] +extern crate alloc; + +use core::cmp; +use core::fmt; +use core::hash::{Hash, Hasher}; +use core::mem; + +#[cfg(feature = "alloc")] +use alloc::borrow::{Borrow, Cow, ToOwned}; +#[cfg(feature = "alloc")] +use alloc::boxed::Box; +#[cfg(feature = "alloc")] +use alloc::rc::Rc; +#[cfg(feature = "alloc")] +use alloc::string::String; +#[cfg(feature = "alloc")] +use alloc::sync::Arc; +#[cfg(feature = "alloc")] +use alloc::vec::Vec; +#[cfg(feature = "alloc")] +use core::ops; +#[cfg(feature = "alloc")] +use core::str::FromStr; + +mod lossy; + +mod sys; +#[cfg(feature = "alloc")] +use sys::Buf; +use sys::Slice; + +mod sys_common; +use sys_common::AsInner; +#[cfg(feature = "alloc")] +use sys_common::{FromInner, IntoInner}; + +/// A type that can represent owned, mutable Unix strings, but is cheaply +/// inter-convertible with Rust strings. +/// +/// The need for this type arises from the fact that: +/// +/// * On Unix systems, strings are often arbitrary sequences of non-zero +/// bytes, in many cases interpreted as UTF-8. +/// +/// * In Rust, strings are always valid UTF-8, which may contain zeros. +/// +/// `UnixString` and [`UnixStr`] bridge this gap by simultaneously representing +/// Rust and platform-native string values, and in particular allowing a Rust +/// string to be converted into a “Unix” string with no cost if possible. +/// A consequence of this is that `UnixString` instances are *not* `NULL` +/// terminated; in order to pass to e.g., Unix system call, you should create +/// a `CStr`. +/// +/// `UnixString` is to [`&UnixStr`] as `String` is to `&str`: the former +/// in each pair are owned strings; the latter are borrowed references. +/// +/// Note, `UnixString` and [`UnixStr`] internally do not hold in the form native +/// to the platform: `UnixString`s are stored as a sequence of 8-bit values. +/// +/// # Creating an `UnixString` +/// +/// **From a Rust string**: `UnixString` implements `From<String>`, so you can +/// use `my_string.from` to create an `UnixString` from a normal Rust string. +/// +/// **From slices:** Just like you can start with an empty Rust [`String`] +/// and then [`push_str`][String.push_str] `&str` sub-string slices into it, +/// you can create an empty `UnixString` with the [`new`] method and then push +/// string slices into it with the [`push`] method. +/// +/// # Extracting a borrowed reference to the whole OS string +/// +/// You can use the [`as_unix_str`] method to get a [`&UnixStr`] from +/// a `UnixString`; this is effectively a borrowed reference to the whole +/// string. +/// +/// # Conversions +/// +/// See the [module's toplevel documentation about conversions][conversions] +/// for a discussion on the traits which `UnixString` implements for +/// [conversions] from/to native representations. +/// +/// [`UnixStr`]: struct.UnixStr.html +/// [`&UnixStr`]: struct.UnixStr.html +/// [`CStr`]: struct.CStr.html +/// [`new`]: #method.new +/// [`push`]: #method.push +/// [`as_unix_str`]: #method.as_unix_str +/// [conversions]: index.html#conversions +#[derive(Clone)] +#[cfg(feature = "alloc")] +pub struct UnixString { + inner: Buf, +} + +/// Borrowed reference to a Unix string (see [`UnixString`]). +/// +/// This type represents a borrowed reference to a string in Unix's preferred +/// representation. +/// +/// `&UnixStr` is to [`UnixString`] as `&str` is to `String`: the former +/// in each pair are borrowed references; the latter are owned strings. +/// +/// See the [module's toplevel documentation about conversions][conversions] +/// for a discussion on the traits which `UnixStr` implements for [conversions] +/// from/to native representations. +/// +/// [`UnixString`]: struct.UnixString.html +/// [conversions]: index.html#conversions +// FIXME: +// `UnixStr::from_inner` current implementation relies on `UnixStr` being +// layout-compatible with `Slice`. When attribute privacy is implemented, +// `UnixStr` should be annotated as `#[repr(transparent)]`. Anyway, `UnixStr` +// representation and layout are considered implementation detail, are +// not documented and must not be relied upon. +pub struct UnixStr { + inner: Slice, +} + +#[cfg(feature = "alloc")] +impl UnixString { + /// Constructs a new empty `UnixString`. + /// + /// # Examples + /// + /// ``` + /// use unix_str::UnixString; + /// + /// let unix_string = UnixString::new(); + /// ``` + pub fn new() -> Self { + Self { + inner: Buf::from_string(String::new()), + } + } + + /// Converts to an [`UnixStr`] slice. + /// + /// [`UnixStr`]: struct.UnixStr.html + /// + /// # Examples + /// + /// ``` + /// use unix_str::{UnixString, UnixStr}; + /// + /// let unix_string = UnixString::from("foo"); + /// let unix_str = UnixStr::new("foo"); + /// assert_eq!(unix_string.as_unix_str(), unix_str); + /// ``` + pub fn as_unix_str(&self) -> &UnixStr { + self + } + + /// Converts the `UnixString` into a `String` if it contains valid Unicode data. + /// + /// On failure, ownership of the original `UnixString` is returned. + /// + /// # Examples + /// + /// ``` + /// use unix_str::UnixString; + /// + /// let unix_string = UnixString::from("foo"); + /// let string = unix_string.into_string(); + /// assert_eq!(string, Ok(String::from("foo"))); + /// ``` + pub fn into_string(self) -> Result<String, UnixString> { + self.inner + .into_string() + .map_err(|buf| UnixString { inner: buf }) + } + + /// Extends the string with the given [`&UnixStr`] slice. + /// + /// [`&UnixStr`]: struct.UnixStr.html + /// + /// # Examples + /// + /// ``` + /// use unix_str::UnixString; + /// + /// let mut unix_string = UnixString::from("foo"); + /// unix_string.push("bar"); + /// assert_eq!(&unix_string, "foobar"); + /// ``` + pub fn push<T: AsRef<UnixStr>>(&mut self, s: T) { + self.inner.push_slice(&s.as_ref().inner) + } + + /// Creates a new `UnixString` with the given capacity. + /// + /// The string will be able to hold exactly `capacity` length units of other + /// OS strings without reallocating. If `capacity` is 0, the string will not + /// allocate. + /// + /// See main `UnixString` documentation information about encoding. + /// + /// # Examples + /// + /// ``` + /// use unix_str::UnixString; + /// + /// let mut unix_string = UnixString::with_capacity(10); + /// let capacity = unix_string.capacity(); + /// + /// // This push is done without reallocating + /// unix_string.push("foo"); + /// + /// assert_eq!(capacity, unix_string.capacity()); + /// ``` + pub fn with_capacity(capacity: usize) -> Self { + Self { + inner: Buf::with_capacity(capacity), + } + } + + /// Truncates the `UnixString` to zero length. + /// + /// # Examples + /// + /// ``` + /// use unix_str::UnixString; + /// + /// let mut unix_string = UnixString::from("foo"); + /// assert_eq!(&unix_string, "foo"); + /// + /// unix_string.clear(); + /// assert_eq!(&unix_string, ""); + /// ``` + pub fn clear(&mut self) { + self.inner.clear() + } + + /// Returns the capacity this `UnixString` can hold without reallocating. + /// + /// See `UnixString` introduction for information about encoding. + /// + /// # Examples + /// + /// ``` + /// use unix_str::UnixString; + /// + /// let unix_string = UnixString::with_capacity(10); + /// assert!(unix_string.capacity() >= 10); + /// ``` + pub fn capacity(&self) -> usize { + self.inner.capacity() + } + + /// Reserves capacity for at least `additional` more capacity to be inserted + /// in the given `UnixString`. + /// + /// The collection may reserve more space to avoid frequent reallocations. + /// + /// # Examples + /// + /// ``` + /// use unix_str::UnixString; + /// + /// let mut s = UnixString::new(); + /// s.reserve(10); + /// assert!(s.capacity() >= 10); + /// ``` + pub fn reserve(&mut self, additional: usize) { + self.inner.reserve(additional) + } + + /// Reserves the minimum capacity for exactly `additional` more capacity to + /// be inserted in the given `UnixString`. Does nothing if the capacity is + /// already sufficient. + /// + /// Note that the allocator may give the collection more space than it + /// requests. Therefore, capacity can not be relied upon to be precisely + /// minimal. Prefer reserve if future insertions are expected. + /// + /// # Examples + /// + /// ``` + /// use unix_str::UnixString; + /// + /// let mut s = UnixString::new(); + /// s.reserve_exact(10); + /// assert!(s.capacity() >= 10); + /// ``` + pub fn reserve_exact(&mut self, additional: usize) { + self.inner.reserve_exact(additional) + } + + /// Shrinks the capacity of the `UnixString` to match its length. + /// + /// # Examples + /// + /// ``` + /// use unix_str::UnixString; + /// + /// let mut s = UnixString::from("foo"); + /// + /// s.reserve(100); + /// assert!(s.capacity() >= 100); + /// + /// s.shrink_to_fit(); + /// assert_eq!(3, s.capacity()); + /// ``` + pub fn shrink_to_fit(&mut self) { + self.inner.shrink_to_fit() + } + + /// Shrinks the capacity of the `UnixString` with a lower bound. + /// + /// The capacity will remain at least as large as both the length + /// and the supplied value. + /// + /// Panics if the current capacity is smaller than the supplied + /// minimum capacity. + /// + /// # Examples + /// + /// ``` + /// #![feature(shrink_to)] + /// use std::ffi::UnixString; + /// + /// let mut s = UnixString::from("foo"); + /// + /// s.reserve(100); + /// assert!(s.capacity() >= 100); + /// + /// s.shrink_to(10); + /// assert!(s.capacity() >= 10); + /// s.shrink_to(0); + /// assert!(s.capacity() >= 3); + /// ``` + #[inline] + #[cfg(feature = "shrink_to")] + pub fn shrink_to(&mut self, min_capacity: usize) { + self.inner.shrink_to(min_capacity) + } + + /// Converts this `UnixString` into a boxed [`UnixStr`]. + /// + /// [`UnixStr`]: struct.UnixStr.html + /// + /// # Examples + /// + /// ``` + /// use unix_str::{UnixString, UnixStr}; + /// + /// let s = UnixString::from("hello"); + /// + /// let b: Box<UnixStr> = s.into_boxed_unix_str(); + /// ``` + pub fn into_boxed_unix_str(self) -> Box<UnixStr> { + let rw = Box::into_raw(self.inner.into_box()) as *mut UnixStr; + unsafe { Box::from_raw(rw) } + } + + /// Creates a `UnixString` from a byte vector. + /// + /// See the module documentation for an example. + /// + pub fn from_vec(vec: Vec<u8>) -> Self { + FromInner::from_inner(Buf { inner: vec }) + } + + /// Yields the underlying byte vector of this `UnixString`. + /// + /// See the module documentation for an example. + pub fn into_vec(self) -> Vec<u8> { + self.into_inner().inner + } +} + +#[cfg(feature = "alloc")] +impl From<String> for UnixString { + /// Converts a `String` into a [`UnixString`]. + /// + /// The conversion copies the data, and includes an allocation on the heap. + /// + /// [`UnixString`]: ../../std/ffi/struct.UnixString.html + fn from(s: String) -> Self { + UnixString { + inner: Buf::from_string(s), + } + } +} + +#[cfg(feature = "alloc")] +impl<T: ?Sized + AsRef<UnixStr>> From<&T> for UnixString { + fn from(s: &T) -> Self { + s.as_ref().to_unix_string() + } +} + +#[cfg(feature = "alloc")] +impl ops::Index<ops::RangeFull> for UnixString { + type Output = UnixStr; + + #[inline] + fn index(&self, _index: ops::RangeFull) -> &UnixStr { + UnixStr::from_inner(self.inner.as_slice()) + } +} + +#[cfg(feature = "alloc")] +impl ops::IndexMut<ops::RangeFull> for UnixString { + #[inline] + fn index_mut(&mut self, _index: ops::RangeFull) -> &mut UnixStr { + UnixStr::from_inner_mut(self.inner.as_mut_slice()) + } +} + +#[cfg(feature = "alloc")] +impl ops::Deref for UnixString { + type Target = UnixStr; + + #[inline] + fn deref(&self) -> &UnixStr { + &self[..] + } +} + +#[cfg(feature = "alloc")] +impl ops::DerefMut for UnixString { + #[inline] + fn deref_mut(&mut self) -> &mut UnixStr { + &mut self[..] + } +} + +#[cfg(feature = "alloc")] +impl Default for UnixString { + /// Constructs an empty `UnixString`. + #[inline] + fn default() -> Self { + Self::new() + } +} + +#[cfg(feature = "alloc")] +impl fmt::Debug for UnixString { + fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(&**self, formatter) + } +} + +#[cfg(feature = "alloc")] +impl PartialEq for UnixString { + fn eq(&self, other: &Self) -> bool { + &**self == &**other + } +} + +#[cfg(feature = "alloc")] +impl PartialEq<str> for UnixString { + fn eq(&self, other: &str) -> bool { + &**self == other + } +} + +#[cfg(feature = "alloc")] +impl PartialEq<UnixString> for str { + fn eq(&self, other: &UnixString) -> bool { + &**other == self + } +} + +#[cfg(feature = "alloc")] +impl PartialEq<&str> for UnixString { + fn eq(&self, other: &&str) -> bool { + **self == **other + } +} + +#[cfg(feature = "alloc")] +impl<'a> PartialEq<UnixString> for &'a str { + fn eq(&self, other: &UnixString) -> bool { + **other == **self + } +} + +#[cfg(feature = "alloc")] +impl Eq for UnixString {} + +#[cfg(feature = "alloc")] +impl PartialOrd for UnixString { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> { + (&**self).partial_cmp(&**other) + } + #[inline] + fn lt(&self, other: &Self) -> bool { + &**self < &**other + } + #[inline] + fn le(&self, other: &Self) -> bool { + &**self <= &**other + } + #[inline] + fn gt(&self, other: &Self) -> bool { + &**self > &**other + } + #[inline] + fn ge(&self, other: &Self) -> bool { + &**self >= &**other + } +} + +#[cfg(feature = "alloc")] +impl PartialOrd<str> for UnixString { + #[inline] + fn partial_cmp(&self, other: &str) -> Option<cmp::Ordering> { + (&**self).partial_cmp(other) + } +} + +#[cfg(feature = "alloc")] +impl Ord for UnixString { + #[inline] + fn cmp(&self, other: &Self) -> cmp::Ordering { + (&**self).cmp(&**other) + } +} + +#[cfg(feature = "alloc")] +impl Hash for UnixString { + #[inline] + fn hash<H: Hasher>(&self, state: &mut H) { + (&**self).hash(state) + } +} + +impl UnixStr { + /// Coerces into an `UnixStr` slice. + /// + /// # Examples + /// + /// ``` + /// use unix_str::UnixStr; + /// + /// let unix_str = UnixStr::new("foo"); + /// ``` + #[inline] + pub fn new<S: AsRef<UnixStr> + ?Sized>(s: &S) -> &UnixStr { + s.as_ref() + } + + #[inline] + fn from_inner(inner: &Slice) -> &UnixStr { + // Safety: UnixStr is just a wrapper of Slice, + // therefore converting &Slice to &UnixStr is safe. + unsafe { &*(inner as *const Slice as *const UnixStr) } + } + + #[inline] + #[cfg(feature = "alloc")] + fn from_inner_mut(inner: &mut Slice) -> &mut UnixStr { + // Safety: UnixStr is just a wrapper of Slice, + // therefore converting &mut Slice to &mut UnixStr is safe. + // Any method that mutates UnixStr must be careful not to + // break platform-specific encoding, in particular Wtf8 on Windows. + unsafe { &mut *(inner as *mut Slice as *mut UnixStr) } + } + + /// Yields a `&str` slice if the `UnixStr` is valid Unicode. + /// + /// This conversion may entail doing a check for UTF-8 validity. + /// + /// # Examples + /// + /// ``` + /// use unix_str::UnixStr; + /// + /// let unix_str = UnixStr::new("foo"); + /// assert_eq!(unix_str.to_str(), Some("foo")); + /// ``` + pub fn to_str(&self) -> Option<&str> { + self.inner.to_str() + } + + /// Converts an `UnixStr` to a `Cow<str>`. + /// + /// Any non-Unicode sequences are replaced with + /// `U+FFFD REPLACEMENT CHARACTER`. + /// + /// + /// # Examples + /// + /// Calling `to_string_lossy` on an `UnixStr` with invalid unicode: + /// + /// ``` + /// use unix_str::UnixStr; + /// + /// // Here, the values 0x66 and 0x6f correspond to 'f' and 'o' + /// // respectively. The value 0x80 is a lone continuation byte, invalid + /// // in a UTF-8 sequence. + /// let source = [0x66, 0x6f, 0x80, 0x6f]; + /// let unix_str = UnixStr::from_bytes(&source[..]); + /// + /// assert_eq!(unix_str.to_string_lossy(), "fo�o"); + /// ``` + #[cfg(feature = "alloc")] + pub fn to_string_lossy(&self) -> Cow<'_, str> { + self.inner.to_string_lossy() + } + + /// Copies the slice into an owned [`UnixString`]. + /// + /// [`UnixString`]: struct.UnixString.html + /// + /// # Examples + /// + /// ``` + /// use unix_str::{UnixStr, UnixString}; + /// + /// let unix_str = UnixStr::new("foo"); + /// let unix_string = unix_str.to_unix_string(); + /// assert_eq!(unix_string, UnixString::from("foo")); + /// ``` + #[cfg(feature = "alloc")] + pub fn to_unix_string(&self) -> UnixString { + UnixString { + inner: self.inner.to_owned(), + } + } + + /// Checks whether the `UnixStr` is empty. + /// + /// # Examples + /// + /// ``` + /// use unix_str::UnixStr; + /// + /// let unix_str = UnixStr::new(""); + /// assert!(unix_str.is_empty()); + /// + /// let unix_str = UnixStr::new("foo"); + /// assert!(!unix_str.is_empty()); + /// ``` + #[inline] + pub fn is_empty(&self) -> bool { + self.inner.inner.is_empty() + } + + /// Returns the length of this `UnixStr`. + /// + /// Note that this does **not** return the number of bytes in the string in + /// OS string form. + /// + /// The length returned is that of the underlying storage used by `UnixStr`. + /// As discussed in the [`UnixString`] introduction, [`UnixString`] and + /// `UnixStr` store strings in a form best suited for cheap inter-conversion + /// between native-platform and Rust string forms, which may differ + /// significantly from both of them, including in storage size and encoding. + /// + /// This number is simply useful for passing to other methods, like + /// [`UnixString::with_capacity`] to avoid reallocations. + /// + /// [`UnixString`]: struct.UnixString.html + /// [`UnixString::with_capacity`]: struct.UnixString.html#method.with_capacity + /// + /// # Examples + /// + /// ``` + /// use unix_str::UnixStr; + /// + /// let unix_str = UnixStr::new(""); + /// assert_eq!(unix_str.len(), 0); + /// + /// let unix_str = UnixStr::new("foo"); + /// assert_eq!(unix_str.len(), 3); + /// ``` + pub fn len(&self) -> usize { + self.inner.inner.len() + } + + /// Converts a `Box<UnixStr>` into an [`UnixString`] without copying + /// allocating. + /// + /// [`UnixString`]: struct.UnixString.html + #[cfg(feature = "alloc")] + pub fn into_unix_string(self: Box<UnixStr>) -> UnixString { + let boxed = unsafe { Box::from_raw(Box::into_raw(self) as *mut Slice) }; + UnixString { + inner: Buf::from_box(boxed), + } + } + + /// Gets the underlying byte representation. + /// + /// Note: it is *crucial* that this API is private, to avoid + /// revealing the internal, platform-specific encodings. + #[inline] + fn bytes(&self) -> &[u8] { + unsafe { &*(&self.inner as *const _ as *const [u8]) } + } + + /// Converts this string to its ASCII lower case equivalent in-place. + /// + /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', but non-ASCII letters + /// are unchanged. + /// + /// To return a new lowercased value without modifying the existing one, use + /// [`to_ascii_lowercase`]. + /// + /// [`to_ascii_lowercase`]: #method.to_ascii_lowercase + /// + /// # Examples + /// + /// ``` + /// use unix_str::UnixString; + /// + /// let mut s = UnixString::from("GRÜßE, JÜRGEN ❤"); + /// + /// s.make_ascii_lowercase(); + /// + /// assert_eq!("grÜße, jÜrgen ❤", s); + /// ``` + #[cfg(feature = "unixstring_ascii")] + pub fn make_ascii_lowercase(&mut self) { + self.inner.make_ascii_lowercase() + } + + /// Converts this string to its ASCII upper case equivalent in-place. + /// + /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', + /// but non-ASCII letters are unchanged. + /// + /// To return a new uppercased value without modifying the existing one, use + /// [`to_ascii_uppercase`]. + /// + /// [`to_ascii_uppercase`]: #method.to_ascii_uppercase + /// + /// # Examples + /// + /// ``` + /// use unix_str::UnixString; + /// + /// let mut s = UnixString::from("Grüße, Jürgen ❤"); + /// + /// s.make_ascii_uppercase(); + /// + /// assert_eq!("GRüßE, JüRGEN ❤", s); + /// ``` + #[cfg(feature = "unixstring_ascii")] + pub fn make_ascii_uppercase(&mut self) { + self.inner.make_ascii_uppercase() + } + + /// Returns a copy of this string where each character is mapped to its + /// ASCII lower case equivalent. + /// + /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', + /// but non-ASCII letters are unchanged. + /// + /// To lowercase the value in-place, use [`make_ascii_lowercase`]. + /// + /// [`make_ascii_lowercase`]: #method.make_ascii_lowercase + /// + /// # Examples + /// + /// ``` + /// use unix_str::UnixString; + /// let s = UnixString::from("Grüße, Jürgen ❤"); + /// + /// assert_eq!("grüße, jürgen ❤", s.to_ascii_lowercase()); + /// ``` + #[cfg(all(feature = "alloc", feature = "unixstring_ascii"))] + pub fn to_ascii_lowercase(&self) -> UnixString { + UnixString::from_inner(self.inner.to_ascii_lowercase()) + } + + /// Returns a copy of this string where each character is mapped to its + /// ASCII upper case equivalent. + /// + /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', + /// but non-ASCII letters are unchanged. + /// + /// To uppercase the value in-place, use [`make_ascii_uppercase`]. + /// + /// [`make_ascii_uppercase`]: #method.make_ascii_uppercase + /// + /// # Examples + /// + /// ``` + /// use unix_str::UnixString; + /// let s = UnixString::from("Grüße, Jürgen ❤"); + /// + /// assert_eq!("GRüßE, JüRGEN ❤", s.to_ascii_uppercase()); + /// ``` + #[cfg(all(feature = "alloc", feature = "unixstring_ascii"))] + pub fn to_ascii_uppercase(&self) -> UnixString { + UnixString::from_inner(self.inner.to_ascii_uppercase()) + } + + /// Checks if all characters in this string are within the ASCII range. + /// + /// # Examples + /// + /// ``` + /// use unix_str::UnixString; + /// + /// let ascii = UnixString::from("hello!\n"); + /// let non_ascii = UnixString::from("Grüße, Jürgen ❤"); + /// + /// assert!(ascii.is_ascii()); + /// assert!(!non_ascii.is_ascii()); + /// ``` + #[cfg(feature = "unixstring_ascii")] + pub fn is_ascii(&self) -> bool { + self.inner.is_ascii() + } + + /// Checks that two strings are an ASCII case-insensitive match. + /// + /// Same as `to_ascii_lowercase(a) == to_ascii_lowercase(b)`, + /// but without allocating and copying temporaries. + /// + /// # Examples + /// + /// ``` + /// use unix_str::UnixString; + /// + /// assert!(UnixString::from("Ferris").eq_ignore_ascii_case("FERRIS")); + /// assert!(UnixString::from("Ferrös").eq_ignore_ascii_case("FERRöS")); + /// assert!(!UnixString::from("Ferrös").eq_ignore_ascii_case("FERRÖS")); + /// ``` + #[cfg(feature = "unixstring_ascii")] + pub fn eq_ignore_ascii_case<S: ?Sized + AsRef<UnixStr>>(&self, other: &S) -> bool { + self.inner.eq_ignore_ascii_case(&other.as_ref().inner) + } + + /// Creates a `UnixStr` from a byte slice. + /// + /// See the module documentation for an example. + pub fn from_bytes(slice: &[u8]) -> &Self { + unsafe { mem::transmute(slice) } + } + + /// Gets the underlying byte view of the `UnixStr` slice. + /// + /// See the module documentation for an example. + pub fn as_bytes(&self) -> &[u8] { + &self.as_inner().inner + } +} + +#[cfg(feature = "alloc")] +impl From<&UnixStr> for Box<UnixStr> { + fn from(s: &UnixStr) -> Self { + let rw = Box::into_raw(s.inner.into_box()) as *mut UnixStr; + unsafe { Box::from_raw(rw) } + } +} + +#[cfg(feature = "alloc")] +impl From<Cow<'_, UnixStr>> for Box<UnixStr> { + #[inline] + fn from(cow: Cow<'_, UnixStr>) -> Self { + match cow { + Cow::Borrowed(s) => Box::from(s), + Cow::Owned(s) => Box::from(s), + } + } +} + +#[cfg(feature = "alloc")] +impl From<Box<UnixStr>> for UnixString { + /// Converts a `Box<UnixStr>` into a `UnixString` without copying or + /// allocating. + /// + /// [`UnixStr`]: ../ffi/struct.UnixStr.html + fn from(boxed: Box<UnixStr>) -> Self { + boxed.into_unix_string() + } +} + +#[cfg(feature = "alloc")] +impl From<UnixString> for Box<UnixStr> { + /// Converts a [`UnixString`] into a `Box<UnixStr>` without copying or + /// allocating. + /// + /// [`UnixString`]: ../ffi/struct.UnixString.html + fn from(s: UnixString) -> Self { + s.into_boxed_unix_str() + } +} + +#[cfg(feature = "alloc")] +impl Clone for Box<UnixStr> { + #[inline] + fn clone(&self) -> Self { + self.to_unix_string().into_boxed_unix_str() + } +} + +#[cfg(feature = "alloc")] +impl From<UnixString> for Arc<UnixStr> { + /// Converts a [`UnixString`] into a `Arc<UnixStr>` without copying or + /// allocating. + /// + /// [`UnixString`]: ../ffi/struct.UnixString.html + #[inline] + fn from(s: UnixString) -> Self { + let arc = s.inner.into_arc(); + unsafe { Arc::from_raw(Arc::into_raw(arc) as *const UnixStr) } + } +} + +#[cfg(feature = "alloc")] +impl From<&UnixStr> for Arc<UnixStr> { + #[inline] + fn from(s: &UnixStr) -> Self { + let arc = s.inner.into_arc(); + unsafe { Arc::from_raw(Arc::into_raw(arc) as *const UnixStr) } + } +} + +#[cfg(feature = "alloc")] +impl From<UnixString> for Rc<UnixStr> { + /// Converts a [`UnixString`] into a `Rc<UnixStr>` without copying or + /// allocating. + /// + /// [`UnixString`]: ../ffi/struct.UnixString.html + #[inline] + fn from(s: UnixString) -> Self { + let rc = s.inner.into_rc(); + unsafe { Rc::from_raw(Rc::into_raw(rc) as *const UnixStr) } + } +} + +#[cfg(feature = "alloc")] +impl From<&UnixStr> for Rc<UnixStr> { + #[inline] + fn from(s: &UnixStr) -> Self { + let rc = s.inner.into_rc(); + unsafe { Rc::from_raw(Rc::into_raw(rc) as *const UnixStr) } + } +} + +#[cfg(feature = "alloc")] +impl<'a> From<UnixString> for Cow<'a, UnixStr> { + #[inline] + fn from(s: UnixString) -> Self { + Cow::Owned(s) + } +} + +#[cfg(feature = "alloc")] +impl<'a> From<&'a UnixStr> for Cow<'a, UnixStr> { + #[inline] + fn from(s: &'a UnixStr) -> Self { + Cow::Borrowed(s) + } +} + +#[cfg(feature = "alloc")] +impl<'a> From<&'a UnixString> for Cow<'a, UnixStr> { + #[inline] + fn from(s: &'a UnixString) -> Self { + Cow::Borrowed(s.as_unix_str()) + } +} + +#[cfg(feature = "alloc")] +impl<'a> From<Cow<'a, UnixStr>> for UnixString { + #[inline] + fn from(s: Cow<'a, UnixStr>) -> Self { + s.into_owned() + } +} + +#[cfg(feature = "alloc")] +impl Default for Box<UnixStr> { + fn default() -> Self { + let rw = Box::into_raw(Slice::empty_box()) as *mut UnixStr; + unsafe { Box::from_raw(rw) } + } +} + +impl Default for &UnixStr { + /// Creates an empty `UnixStr`. + #[inline] + fn default() -> Self { + UnixStr::new("") + } +} + +impl PartialEq for UnixStr { + #[inline] + fn eq(&self, other: &UnixStr) -> bool { + self.bytes().eq(other.bytes()) + } +} + +impl PartialEq<str> for UnixStr { + #[inline] + fn eq(&self, other: &str) -> bool { + *self == *UnixStr::new(other) + } +} + +impl PartialEq<UnixStr> for str { + #[inline] + fn eq(&self, other: &UnixStr) -> bool { + *other == *UnixStr::new(self) + } +} + +impl Eq for UnixStr {} + +impl PartialOrd for UnixStr { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> { + self.bytes().partial_cmp(other.bytes()) + } + #[inline] + fn lt(&self, other: &Self) -> bool { + self.bytes().lt(other.bytes()) + } + #[inline] + fn le(&self, other: &Self) -> bool { + self.bytes().le(other.bytes()) + } + #[inline] + fn gt(&self, other: &Self) -> bool { + self.bytes().gt(other.bytes()) + } + #[inline] + fn ge(&self, other: &Self) -> bool { + self.bytes().ge(other.bytes()) + } +} + +impl PartialOrd<str> for UnixStr { + #[inline] + fn partial_cmp(&self, other: &str) -> Option<cmp::Ordering> { + self.partial_cmp(Self::new(other)) + } +} + +// FIXME (#19470): cannot provide PartialOrd<UnixStr> for str until we +// have more flexible coherence rules. + +impl Ord for UnixStr { + #[inline] + fn cmp(&self, other: &Self) -> cmp::Ordering { + self.bytes().cmp(other.bytes()) + } +} + +#[cfg(feature = "alloc")] +macro_rules! impl_cmp { + ($lhs:ty, $rhs: ty) => { + impl<'a, 'b> PartialEq<$rhs> for $lhs { + #[inline] + fn eq(&self, other: &$rhs) -> bool { + <UnixStr as PartialEq>::eq(self, other) + } + } + + impl<'a, 'b> PartialEq<$lhs> for $rhs { + #[inline] + fn eq(&self, other: &$lhs) -> bool { + <UnixStr as PartialEq>::eq(self, other) + } + } + + impl<'a, 'b> PartialOrd<$rhs> for $lhs { + #[inline] + fn partial_cmp(&self, other: &$rhs) -> Option<cmp::Ordering> { + <UnixStr as PartialOrd>::partial_cmp(self, other) + } + } + + impl<'a, 'b> PartialOrd<$lhs> for $rhs { + #[inline] + fn partial_cmp(&self, other: &$lhs) -> Option<cmp::Ordering> { + <UnixStr as PartialOrd>::partial_cmp(self, other) + } + } + }; +} + +#[cfg(feature = "alloc")] +impl_cmp!(UnixString, UnixStr); +#[cfg(feature = "alloc")] +impl_cmp!(UnixString, &'a UnixStr); +#[cfg(feature = "alloc")] +impl_cmp!(Cow<'a, UnixStr>, UnixStr); +#[cfg(feature = "alloc")] +impl_cmp!(Cow<'a, UnixStr>, &'b UnixStr); +#[cfg(feature = "alloc")] +impl_cmp!(Cow<'a, UnixStr>, UnixString); + +impl Hash for UnixStr { + #[inline] + fn hash<H: Hasher>(&self, state: &mut H) { + self.bytes().hash(state) + } +} + +impl fmt::Debug for UnixStr { + fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(&self.inner, formatter) + } +} + +#[cfg(feature = "alloc")] +impl Borrow<UnixStr> for UnixString { + fn borrow(&self) -> &UnixStr { + &self[..] + } +} + +#[cfg(feature = "alloc")] +impl ToOwned for UnixStr { + type Owned = UnixString; + fn to_owned(&self) -> Self::Owned { + self.to_unix_string() + } + #[cfg(feature = "toowned_clone_into")] + fn clone_into(&self, target: &mut Self::Owned) { + self.inner.clone_into(&mut target.inner) + } +} + +impl AsRef<UnixStr> for UnixStr { + fn as_ref(&self) -> &UnixStr { + self + } +} + +#[cfg(feature = "alloc")] +impl AsRef<UnixStr> for UnixString { + #[inline] + fn as_ref(&self) -> &UnixStr { + self + } +} + +impl AsRef<UnixStr> for str { + #[inline] + fn as_ref(&self) -> &UnixStr { + UnixStr::from_inner(Slice::from_str(self)) + } +} + +#[cfg(feature = "alloc")] +impl AsRef<UnixStr> for String { + #[inline] + fn as_ref(&self) -> &UnixStr { + (&**self).as_ref() + } +} + +#[cfg(feature = "alloc")] +impl FromInner<Buf> for UnixString { + fn from_inner(buf: Buf) -> UnixString { + UnixString { inner: buf } + } +} + +#[cfg(feature = "alloc")] +impl IntoInner<Buf> for UnixString { + fn into_inner(self) -> Buf { + self.inner + } +} + +impl AsInner<Slice> for UnixStr { + #[inline] + fn as_inner(&self) -> &Slice { + &self.inner + } +} + +#[cfg(feature = "alloc")] +impl FromStr for UnixString { + type Err = core::convert::Infallible; + + fn from_str(s: &str) -> Result<Self, Self::Err> { + Ok(UnixString::from(s)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use sys_common::{AsInner, IntoInner}; + + use alloc::rc::Rc; + use alloc::sync::Arc; + + #[test] + fn test_unix_string_with_capacity() { + let unix_string = UnixString::with_capacity(0); + assert_eq!(0, unix_string.inner.into_inner().capacity()); + + let unix_string = UnixString::with_capacity(10); + assert_eq!(10, unix_string.inner.into_inner().capacity()); + + let mut unix_string = UnixString::with_capacity(0); + unix_string.push("abc"); + assert!(unix_string.inner.into_inner().capacity() >= 3); + } + + #[test] + fn test_unix_string_clear() { + let mut unix_string = UnixString::from("abc"); + assert_eq!(3, unix_string.inner.as_inner().len()); + + unix_string.clear(); + assert_eq!(&unix_string, ""); + assert_eq!(0, unix_string.inner.as_inner().len()); + } + + #[test] + fn test_unix_string_capacity() { + let unix_string = UnixString::with_capacity(0); + assert_eq!(0, unix_string.capacity()); + + let unix_string = UnixString::with_capacity(10); + assert_eq!(10, unix_string.capacity()); + + let mut unix_string = UnixString::with_capacity(0); + unix_string.push("abc"); + assert!(unix_string.capacity() >= 3); + } + + #[test] + fn test_unix_string_reserve() { + let mut unix_string = UnixString::new(); + assert_eq!(unix_string.capacity(), 0); + + unix_string.reserve(2); + assert!(unix_string.capacity() >= 2); + + for _ in 0..16 { + unix_string.push("a"); + } + + assert!(unix_string.capacity() >= 16); + unix_string.reserve(16); + assert!(unix_string.capacity() >= 32); + + unix_string.push("a"); + + unix_string.reserve(16); + assert!(unix_string.capacity() >= 33) + } + + #[test] + fn test_unix_string_reserve_exact() { + let mut unix_string = UnixString::new(); + assert_eq!(unix_string.capacity(), 0); + + unix_string.reserve_exact(2); + assert!(unix_string.capacity() >= 2); + + for _ in 0..16 { + unix_string.push("a"); + } + + assert!(unix_string.capacity() >= 16); + unix_string.reserve_exact(16); + assert!(unix_string.capacity() >= 32); + + unix_string.push("a"); + + unix_string.reserve_exact(16); + assert!(unix_string.capacity() >= 33) + } + + #[test] + fn test_unix_string_default() { + let unix_string: UnixString = Default::default(); + assert_eq!("", &unix_string); + } + + #[test] + fn test_unix_str_is_empty() { + let mut unix_string = UnixString::new(); + assert!(unix_string.is_empty()); + + unix_string.push("abc"); + assert!(!unix_string.is_empty()); + + unix_string.clear(); + assert!(unix_string.is_empty()); + } + + #[test] + fn test_unix_str_len() { + let mut unix_string = UnixString::new(); + assert_eq!(0, unix_string.len()); + + unix_string.push("abc"); + assert_eq!(3, unix_string.len()); + + unix_string.clear(); + assert_eq!(0, unix_string.len()); + } + + #[test] + fn test_unix_str_default() { + let unix_str: &UnixStr = Default::default(); + assert_eq!("", unix_str); + } + + #[test] + fn into_boxed() { + let orig = "Hello, world!"; + let unix_str = UnixStr::new(orig); + let boxed: Box<UnixStr> = Box::from(unix_str); + let unix_string = unix_str.to_owned().into_boxed_unix_str().into_unix_string(); + assert_eq!(unix_str, &*boxed); + assert_eq!(&*boxed, &*unix_string); + assert_eq!(&*unix_string, unix_str); + } + + #[test] + fn boxed_default() { + let boxed = <Box<UnixStr>>::default(); + assert!(boxed.is_empty()); + } + + #[test] + #[cfg(feature = "toowned_clone_into")] + fn test_unix_str_clone_into() { + let mut unix_string = UnixString::with_capacity(123); + unix_string.push("hello"); + let unix_str = UnixStr::new("bonjour"); + unix_str.clone_into(&mut unix_string); + assert_eq!(unix_str, unix_string); + assert!(unix_string.capacity() >= 123); + } + + #[test] + fn into_rc() { + let orig = "Hello, world!"; + let unix_str = UnixStr::new(orig); + let rc: Rc<UnixStr> = Rc::from(unix_str); + let arc: Arc<UnixStr> = Arc::from(unix_str); + + assert_eq!(&*rc, unix_str); + assert_eq!(&*arc, unix_str); + + let rc2: Rc<UnixStr> = Rc::from(unix_str.to_owned()); + let arc2: Arc<UnixStr> = Arc::from(unix_str.to_owned()); + + assert_eq!(&*rc2, unix_str); + assert_eq!(&*arc2, unix_str); + } +} diff --git a/third_party/rust/unix_str/src/lossy.rs b/third_party/rust/unix_str/src/lossy.rs new file mode 100644 index 0000000000..270ae30d71 --- /dev/null +++ b/third_party/rust/unix_str/src/lossy.rs @@ -0,0 +1,222 @@ +use core::char; +use core::fmt::{self, Write}; +use core::mem; +use core::str as core_str; + +// https://tools.ietf.org/html/rfc3629 +static UTF8_CHAR_WIDTH: [u8; 256] = [ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, // 0x1F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, // 0x3F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, // 0x5F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, // 0x7F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, // 0x9F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, // 0xBF + 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, // 0xDF + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xEF + 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xFF +]; + +/// Given a first byte, determines how many bytes are in this UTF-8 character. +#[inline] +pub fn utf8_char_width(b: u8) -> usize { + UTF8_CHAR_WIDTH[b as usize] as usize +} + +/// Lossy UTF-8 string. +pub struct Utf8Lossy { + bytes: [u8], +} + +impl Utf8Lossy { + pub fn from_bytes(bytes: &[u8]) -> &Utf8Lossy { + // SAFETY: Both use the same memory layout, and UTF-8 correctness isn't required. + unsafe { mem::transmute(bytes) } + } + + pub fn chunks(&self) -> Utf8LossyChunksIter<'_> { + Utf8LossyChunksIter { + source: &self.bytes, + } + } +} + +/// Iterator over lossy UTF-8 string +#[allow(missing_debug_implementations)] +pub struct Utf8LossyChunksIter<'a> { + source: &'a [u8], +} + +#[derive(PartialEq, Eq, Debug)] +pub struct Utf8LossyChunk<'a> { + /// Sequence of valid chars. + /// Can be empty between broken UTF-8 chars. + pub valid: &'a str, + /// Single broken char, empty if none. + /// Empty iff iterator item is last. + pub broken: &'a [u8], +} + +impl<'a> Iterator for Utf8LossyChunksIter<'a> { + type Item = Utf8LossyChunk<'a>; + + fn next(&mut self) -> Option<Utf8LossyChunk<'a>> { + if self.source.is_empty() { + return None; + } + + const TAG_CONT_U8: u8 = 128; + fn safe_get(xs: &[u8], i: usize) -> u8 { + *xs.get(i).unwrap_or(&0) + } + + let mut i = 0; + while i < self.source.len() { + let i_ = i; + + // SAFETY: `i` starts at `0`, is less than `self.source.len()`, and + // only increases, so `0 <= i < self.source.len()`. + let byte = unsafe { *self.source.get_unchecked(i) }; + i += 1; + + if byte < 128 { + } else { + let w = utf8_char_width(byte); + + macro_rules! error { + () => {{ + // SAFETY: We have checked up to `i` that source is valid UTF-8. + unsafe { + let r = Utf8LossyChunk { + valid: core_str::from_utf8_unchecked(&self.source[0..i_]), + broken: &self.source[i_..i], + }; + self.source = &self.source[i..]; + return Some(r); + } + }}; + } + + match w { + 2 => { + if safe_get(self.source, i) & 192 != TAG_CONT_U8 { + error!(); + } + i += 1; + } + 3 => { + match (byte, safe_get(self.source, i)) { + (0xE0, 0xA0..=0xBF) => (), + (0xE1..=0xEC, 0x80..=0xBF) => (), + (0xED, 0x80..=0x9F) => (), + (0xEE..=0xEF, 0x80..=0xBF) => (), + _ => { + error!(); + } + } + i += 1; + if safe_get(self.source, i) & 192 != TAG_CONT_U8 { + error!(); + } + i += 1; + } + 4 => { + match (byte, safe_get(self.source, i)) { + (0xF0, 0x90..=0xBF) => (), + (0xF1..=0xF3, 0x80..=0xBF) => (), + (0xF4, 0x80..=0x8F) => (), + _ => { + error!(); + } + } + i += 1; + if safe_get(self.source, i) & 192 != TAG_CONT_U8 { + error!(); + } + i += 1; + if safe_get(self.source, i) & 192 != TAG_CONT_U8 { + error!(); + } + i += 1; + } + _ => { + error!(); + } + } + } + } + + let r = Utf8LossyChunk { + // SAFETY: We have checked that the entire source is valid UTF-8. + valid: unsafe { core_str::from_utf8_unchecked(self.source) }, + broken: &[], + }; + self.source = &[]; + Some(r) + } +} + +impl fmt::Display for Utf8Lossy { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // If we're the empty string then our iterator won't actually yield + // anything, so perform the formatting manually + if self.bytes.is_empty() { + return "".fmt(f); + } + + for Utf8LossyChunk { valid, broken } in self.chunks() { + // If we successfully decoded the whole chunk as a valid string then + // we can return a direct formatting of the string which will also + // respect various formatting flags if possible. + if valid.len() == self.bytes.len() { + assert!(broken.is_empty()); + return valid.fmt(f); + } + + f.write_str(valid)?; + if !broken.is_empty() { + f.write_char(char::REPLACEMENT_CHARACTER)?; + } + } + Ok(()) + } +} + +impl fmt::Debug for Utf8Lossy { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_char('"')?; + + for Utf8LossyChunk { valid, broken } in self.chunks() { + // Valid part. + // Here we partially parse UTF-8 again which is suboptimal. + { + let mut from = 0; + for (i, c) in valid.char_indices() { + let esc = c.escape_debug(); + // If char needs escaping, flush backlog so far and write, else skip + if esc.len() != 1 { + f.write_str(&valid[from..i])?; + for c in esc { + f.write_char(c)?; + } + from = i + c.len_utf8(); + } + } + f.write_str(&valid[from..])?; + } + + // Broken parts of string as hex escape. + for &b in broken { + write!(f, "\\x{:02x}", b)?; + } + } + + f.write_char('"') + } +} diff --git a/third_party/rust/unix_str/src/sys.rs b/third_party/rust/unix_str/src/sys.rs new file mode 100644 index 0000000000..cf1de79ee4 --- /dev/null +++ b/third_party/rust/unix_str/src/sys.rs @@ -0,0 +1,256 @@ +//! The underlying UnixString/UnixStr implementation: just a `Vec<u8>`/`[u8]`. + +use crate::sys_common::bytestring::debug_fmt_bytestring; +#[cfg(feature = "alloc")] +use crate::sys_common::{AsInner, IntoInner}; +use core::fmt; +use core::mem; +use core::str; + +#[cfg(feature = "alloc")] +use alloc::borrow::Cow; +#[cfg(feature = "alloc")] +use alloc::boxed::Box; +#[cfg(feature = "alloc")] +use alloc::rc::Rc; +#[cfg(feature = "alloc")] +use alloc::string::String; +#[cfg(feature = "alloc")] +use alloc::sync::Arc; +#[cfg(feature = "alloc")] +use alloc::vec::Vec; + +#[cfg(all(feature = "alloc", feature = "toowned_clone_into"))] +use alloc::borrow::ToOwned; + +#[cfg(feature = "alloc")] +#[derive(Clone, Hash)] +pub(crate) struct Buf { + pub inner: Vec<u8>, +} + +// FIXME: +// `Buf::as_slice` current implementation relies +// on `Slice` being layout-compatible with `[u8]`. +// When attribute privacy is implemented, `Slice` should be annotated as `#[repr(transparent)]`. +// Anyway, `Slice` representation and layout are considered implementation detail, are +// not documented and must not be relied upon. +pub(crate) struct Slice { + pub inner: [u8], +} + +impl fmt::Debug for Slice { + fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + debug_fmt_bytestring(&self.inner, formatter) + } +} + +#[cfg(feature = "alloc")] +impl IntoInner<Vec<u8>> for Buf { + fn into_inner(self) -> Vec<u8> { + self.inner + } +} + +#[cfg(feature = "alloc")] +impl AsInner<[u8]> for Buf { + fn as_inner(&self) -> &[u8] { + &self.inner + } +} + +#[cfg(feature = "alloc")] +impl Buf { + pub fn from_string(s: String) -> Self { + Self { + inner: s.into_bytes(), + } + } + + #[inline] + pub fn with_capacity(capacity: usize) -> Self { + Buf { + inner: Vec::with_capacity(capacity), + } + } + + #[inline] + pub fn clear(&mut self) { + self.inner.clear() + } + + #[inline] + pub fn capacity(&self) -> usize { + self.inner.capacity() + } + + #[inline] + pub fn reserve(&mut self, additional: usize) { + self.inner.reserve(additional) + } + + #[inline] + pub fn reserve_exact(&mut self, additional: usize) { + self.inner.reserve_exact(additional) + } + + #[inline] + pub fn shrink_to_fit(&mut self) { + self.inner.shrink_to_fit() + } + + #[inline] + #[cfg(feature = "shrink_to")] + pub fn shrink_to(&mut self, min_capacity: usize) { + self.inner.shrink_to(min_capacity) + } + + #[inline] + pub fn as_slice(&self) -> &Slice { + // Safety: Slice just wraps [u8], + // and &*self.inner is &[u8], therefore + // transmuting &[u8] to &Slice is safe. + unsafe { mem::transmute(&*self.inner) } + } + + #[inline] + pub fn as_mut_slice(&mut self) -> &mut Slice { + // Safety: Slice just wraps [u8], + // and &mut *self.inner is &mut [u8], therefore + // transmuting &mut [u8] to &mut Slice is safe. + unsafe { mem::transmute(&mut *self.inner) } + } + + pub fn into_string(self) -> Result<String, Self> { + String::from_utf8(self.inner).map_err(|p| Self { + inner: p.into_bytes(), + }) + } + + pub fn push_slice(&mut self, s: &Slice) { + self.inner.extend_from_slice(&s.inner) + } + + #[inline] + pub fn into_box(self) -> Box<Slice> { + unsafe { mem::transmute(self.inner.into_boxed_slice()) } + } + + #[inline] + pub fn from_box(boxed: Box<Slice>) -> Self { + let inner: Box<[u8]> = unsafe { mem::transmute(boxed) }; + Self { + inner: inner.into_vec(), + } + } + + #[inline] + pub fn into_arc(&self) -> Arc<Slice> { + self.as_slice().into_arc() + } + + #[inline] + pub fn into_rc(&self) -> Rc<Slice> { + self.as_slice().into_rc() + } +} + +impl Slice { + #[inline] + fn from_u8_slice(s: &[u8]) -> &Self { + unsafe { mem::transmute(s) } + } + + #[inline] + pub fn from_str(s: &str) -> &Self { + Self::from_u8_slice(s.as_bytes()) + } + + pub fn to_str(&self) -> Option<&str> { + str::from_utf8(&self.inner).ok() + } + + #[cfg(feature = "alloc")] + pub fn to_string_lossy(&self) -> Cow<'_, str> { + String::from_utf8_lossy(&self.inner) + } + + #[cfg(feature = "alloc")] + pub fn to_owned(&self) -> Buf { + Buf { + inner: self.inner.to_vec(), + } + } + + #[cfg(all(feature = "alloc", feature = "toowned_clone_into"))] + pub fn clone_into(&self, buf: &mut Buf) { + self.inner.clone_into(&mut buf.inner) + } + + #[inline] + #[cfg(feature = "alloc")] + pub fn into_box(&self) -> Box<Self> { + let boxed: Box<[u8]> = self.inner.into(); + unsafe { mem::transmute(boxed) } + } + + #[cfg(feature = "alloc")] + pub fn empty_box() -> Box<Self> { + let boxed: Box<[u8]> = Default::default(); + unsafe { mem::transmute(boxed) } + } + + #[inline] + #[cfg(feature = "alloc")] + pub fn into_arc(&self) -> Arc<Self> { + let arc: Arc<[u8]> = Arc::from(&self.inner); + unsafe { Arc::from_raw(Arc::into_raw(arc) as *const Self) } + } + + #[inline] + #[cfg(feature = "alloc")] + pub fn into_rc(&self) -> Rc<Self> { + let rc: Rc<[u8]> = Rc::from(&self.inner); + unsafe { Rc::from_raw(Rc::into_raw(rc) as *const Self) } + } + + #[inline] + #[cfg(feature = "unixstring_ascii")] + pub fn make_ascii_lowercase(&mut self) { + self.inner.make_ascii_lowercase() + } + + #[inline] + #[cfg(feature = "unixstring_ascii")] + pub fn make_ascii_uppercase(&mut self) { + self.inner.make_ascii_uppercase() + } + + #[inline] + #[cfg(all(feature = "alloc", feature = "unixstring_ascii"))] + pub fn to_ascii_lowercase(&self) -> Buf { + Buf { + inner: self.inner.to_ascii_lowercase(), + } + } + + #[inline] + #[cfg(all(feature = "alloc", feature = "unixstring_ascii"))] + pub fn to_ascii_uppercase(&self) -> Buf { + Buf { + inner: self.inner.to_ascii_uppercase(), + } + } + + #[inline] + #[cfg(feature = "unixstring_ascii")] + pub fn is_ascii(&self) -> bool { + self.inner.is_ascii() + } + + #[inline] + #[cfg(feature = "unixstring_ascii")] + pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool { + self.inner.eq_ignore_ascii_case(&other.inner) + } +} diff --git a/third_party/rust/unix_str/src/sys_common.rs b/third_party/rust/unix_str/src/sys_common.rs new file mode 100644 index 0000000000..c18994e4a2 --- /dev/null +++ b/third_party/rust/unix_str/src/sys_common.rs @@ -0,0 +1,39 @@ +//! Platform-independent platform abstraction +//! +//! This is the platform-independent portion of the standard library's +//! platform abstraction layer, whereas `std::sys` is the +//! platform-specific portion. +//! +//! The relationship between `std::sys_common`, `std::sys` and the +//! rest of `std` is complex, with dependencies going in all +//! directions: `std` depending on `sys_common`, `sys_common` +//! depending on `sys`, and `sys` depending on `sys_common` and `std`. +//! Ideally `sys_common` would be split into two and the dependencies +//! between them all would form a dag, facilitating the extraction of +//! `std::sys` from the standard library. + +pub mod bytestring; + +/// A trait for viewing representations from std types +#[doc(hidden)] +pub trait AsInner<Inner: ?Sized> { + fn as_inner(&self) -> &Inner; +} + +/// A trait for viewing representations from std types +#[doc(hidden)] +pub trait AsInnerMut<Inner: ?Sized> { + fn as_inner_mut(&mut self) -> &mut Inner; +} + +/// A trait for extracting representations from std types +#[doc(hidden)] +pub trait IntoInner<Inner> { + fn into_inner(self) -> Inner; +} + +/// A trait for creating std types from internal representations +#[doc(hidden)] +pub trait FromInner<Inner> { + fn from_inner(inner: Inner) -> Self; +} diff --git a/third_party/rust/unix_str/src/sys_common/bytestring.rs b/third_party/rust/unix_str/src/sys_common/bytestring.rs new file mode 100644 index 0000000000..ac6b7e893f --- /dev/null +++ b/third_party/rust/unix_str/src/sys_common/bytestring.rs @@ -0,0 +1,45 @@ +use crate::lossy::{Utf8Lossy, Utf8LossyChunk}; +use core::fmt::{Formatter, Result, Write}; + +pub fn debug_fmt_bytestring(slice: &[u8], f: &mut Formatter<'_>) -> Result { + // Writes out a valid unicode string with the correct escape sequences + fn write_str_escaped(f: &mut Formatter<'_>, s: &str) -> Result { + for c in s.chars().flat_map(|c| c.escape_debug()) { + f.write_char(c)? + } + Ok(()) + } + + f.write_str("\"")?; + for Utf8LossyChunk { valid, broken } in Utf8Lossy::from_bytes(slice).chunks() { + write_str_escaped(f, valid)?; + for b in broken { + write!(f, "\\x{:02X}", b)?; + } + } + f.write_str("\"") +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::fmt::{Debug, Formatter, Result}; + use alloc::format; + + #[test] + fn smoke() { + struct Helper<'a>(&'a [u8]); + + impl Debug for Helper<'_> { + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + debug_fmt_bytestring(self.0, f) + } + } + + let input = b"\xF0hello,\tworld"; + let expected = r#""\xF0hello,\tworld""#; + let output = format!("{:?}", Helper(input)); + + assert!(output == expected); + } +} |