Adding upstream version 115.7.0esr.upstream/115.7.0esr

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 19:33:14 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 19:33:14 +0000
commit: 36d22d82aa202bb199967e9512281e9a53db42c9 (patch)
tree: 105e8c98ddea1c1e4784a60a5a6410fa416be2de /third_party/rust/unix_str/src
parent: Initial commit. (diff)
download: firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz
firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip
5 files changed, 1947 insertions, 0 deletions
diff --git a/third_party/rust/unix_str/src/lib.rs b/third_party/rust/unix_str/src/lib.rs
new file mode 100644
index 0000000000..bf669233c5
--- /dev/null
+++ b/third_party/rust/unix_str/src/lib.rs
@@ -0,0 +1,1385 @@
+//! Strings that are compatible wuth Unix-like operating systems.
+//!
+//! * [`UnixString`] and [`UnixStr`] are useful when you need to with Unix strings.
+//! Conversions between [`UnixString`], [`UnixStr`] and Rust strings work similarly
+//! to those for `CString` and `CStr`.
+//!
+//! * [`UnixString`] represents an owned string in Unix's preferred
+//! representation.
+//!
+//! * [`UnixStr`] represents a borrowed reference to a string in a format that
+//! can be passed to a Unix-lie operating system. It can be converted into
+//! a UTF-8 Rust string slice in a similar way to [`UnixString`].
+//!
+//! # Conversions
+//!
+//! [`UnixStr`] implements two methods, [`from_bytes`] and [`as_bytes`].
+//! These do inexpensive conversions from and to UTF-8 byte slices.
+//!
+//! Additionally, [`UnixString`] provides [`from_vec`] and [`into_vec`] methods
+//! that consume their arguments, and take or produce vectors of [`u8`].
+//!
+//! [`UnixString`]: struct.UnixString.html
+//! [`UnixStr`]: struct.UnixStr.html
+//! [`from_vec`]: struct.UnixString.html#method.from_vec
+//! [`into_vec`]: struct.UnixString.html#method.into_vec
+//! [`from_bytes`]: struct.UnixStrExt.html#method.from_bytes
+//! [`as_bytes`]: struct.UnixStrExt.html#method.as_bytes
+
+#![cfg_attr(feature = "shrink_to", feature(shrink_to))]
+#![cfg_attr(feature = "toowned_clone_into", feature(toowned_clone_into))]
+#![no_std]
+
+#[cfg(feature = "alloc")]
+extern crate alloc;
+
+use core::cmp;
+use core::fmt;
+use core::hash::{Hash, Hasher};
+use core::mem;
+
+#[cfg(feature = "alloc")]
+use alloc::borrow::{Borrow, Cow, ToOwned};
+#[cfg(feature = "alloc")]
+use alloc::boxed::Box;
+#[cfg(feature = "alloc")]
+use alloc::rc::Rc;
+#[cfg(feature = "alloc")]
+use alloc::string::String;
+#[cfg(feature = "alloc")]
+use alloc::sync::Arc;
+#[cfg(feature = "alloc")]
+use alloc::vec::Vec;
+#[cfg(feature = "alloc")]
+use core::ops;
+#[cfg(feature = "alloc")]
+use core::str::FromStr;
+
+mod lossy;
+
+mod sys;
+#[cfg(feature = "alloc")]
+use sys::Buf;
+use sys::Slice;
+
+mod sys_common;
+use sys_common::AsInner;
+#[cfg(feature = "alloc")]
+use sys_common::{FromInner, IntoInner};
+
+/// A type that can represent owned, mutable Unix strings, but is cheaply
+/// inter-convertible with Rust strings.
+///
+/// The need for this type arises from the fact that:
+///
+/// * On Unix systems, strings are often arbitrary sequences of non-zero
+///   bytes, in many cases interpreted as UTF-8.
+///
+/// * In Rust, strings are always valid UTF-8, which may contain zeros.
+///
+/// `UnixString` and [`UnixStr`] bridge this gap by simultaneously representing
+/// Rust and platform-native string values, and in particular allowing a Rust
+/// string to be converted into a “Unix” string with no cost if possible.
+/// A consequence of this is that `UnixString` instances are *not* `NULL`
+/// terminated; in order to pass to e.g., Unix system call, you should create
+/// a `CStr`.
+///
+/// `UnixString` is to [`&UnixStr`] as `String` is to `&str`: the former
+/// in each pair are owned strings; the latter are borrowed references.
+///
+/// Note, `UnixString` and [`UnixStr`] internally do not hold in the form native
+/// to the platform: `UnixString`s are stored as a sequence of 8-bit values.
+///
+/// # Creating an `UnixString`
+///
+/// **From a Rust string**: `UnixString` implements `From<String>`, so you can
+/// use `my_string.from` to create an `UnixString` from a normal Rust string.
+///
+/// **From slices:** Just like you can start with an empty Rust [`String`]
+/// and then [`push_str`][String.push_str] `&str` sub-string slices into it,
+/// you can create an empty `UnixString` with the [`new`] method and then push
+/// string slices into it with the [`push`] method.
+///
+/// # Extracting a borrowed reference to the whole OS string
+///
+/// You can use the [`as_unix_str`] method to get a [`&UnixStr`] from
+/// a `UnixString`; this is effectively a borrowed reference to the whole
+/// string.
+///
+/// # Conversions
+///
+/// See the [module's toplevel documentation about conversions][conversions]
+/// for a discussion on the traits which `UnixString` implements for
+/// [conversions] from/to native representations.
+///
+/// [`UnixStr`]: struct.UnixStr.html
+/// [`&UnixStr`]: struct.UnixStr.html
+/// [`CStr`]: struct.CStr.html
+/// [`new`]: #method.new
+/// [`push`]: #method.push
+/// [`as_unix_str`]: #method.as_unix_str
+/// [conversions]: index.html#conversions
+#[derive(Clone)]
+#[cfg(feature = "alloc")]
+pub struct UnixString {
+    inner: Buf,
+}
+
+/// Borrowed reference to a Unix string (see [`UnixString`]).
+///
+/// This type represents a borrowed reference to a string in Unix's preferred
+/// representation.
+///
+/// `&UnixStr` is to [`UnixString`] as `&str` is to `String`: the former
+/// in each pair are borrowed references; the latter are owned strings.
+///
+/// See the [module's toplevel documentation about conversions][conversions]
+/// for a discussion on the traits which `UnixStr` implements for [conversions]
+/// from/to native representations.
+///
+/// [`UnixString`]: struct.UnixString.html
+/// [conversions]: index.html#conversions
+// FIXME:
+// `UnixStr::from_inner` current implementation relies on `UnixStr` being
+// layout-compatible with `Slice`. When attribute privacy is implemented,
+// `UnixStr` should be annotated as `#[repr(transparent)]`. Anyway, `UnixStr`
+// representation and layout are considered implementation detail, are
+// not documented and must not be relied upon.
+pub struct UnixStr {
+    inner: Slice,
+}
+
+#[cfg(feature = "alloc")]
+impl UnixString {
+    /// Constructs a new empty `UnixString`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use unix_str::UnixString;
+    ///
+    /// let unix_string = UnixString::new();
+    /// ```
+    pub fn new() -> Self {
+        Self {
+            inner: Buf::from_string(String::new()),
+        }
+    }
+
+    /// Converts to an [`UnixStr`] slice.
+    ///
+    /// [`UnixStr`]: struct.UnixStr.html
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use unix_str::{UnixString, UnixStr};
+    ///
+    /// let unix_string = UnixString::from("foo");
+    /// let unix_str = UnixStr::new("foo");
+    /// assert_eq!(unix_string.as_unix_str(), unix_str);
+    /// ```
+    pub fn as_unix_str(&self) -> &UnixStr {
+        self
+    }
+
+    /// Converts the `UnixString` into a `String` if it contains valid Unicode data.
+    ///
+    /// On failure, ownership of the original `UnixString` is returned.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use unix_str::UnixString;
+    ///
+    /// let unix_string = UnixString::from("foo");
+    /// let string = unix_string.into_string();
+    /// assert_eq!(string, Ok(String::from("foo")));
+    /// ```
+    pub fn into_string(self) -> Result<String, UnixString> {
+        self.inner
+            .into_string()
+            .map_err(|buf| UnixString { inner: buf })
+    }
+
+    /// Extends the string with the given [`&UnixStr`] slice.
+    ///
+    /// [`&UnixStr`]: struct.UnixStr.html
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use unix_str::UnixString;
+    ///
+    /// let mut unix_string = UnixString::from("foo");
+    /// unix_string.push("bar");
+    /// assert_eq!(&unix_string, "foobar");
+    /// ```
+    pub fn push<T: AsRef<UnixStr>>(&mut self, s: T) {
+        self.inner.push_slice(&s.as_ref().inner)
+    }
+
+    /// Creates a new `UnixString` with the given capacity.
+    ///
+    /// The string will be able to hold exactly `capacity` length units of other
+    /// OS strings without reallocating. If `capacity` is 0, the string will not
+    /// allocate.
+    ///
+    /// See main `UnixString` documentation information about encoding.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use unix_str::UnixString;
+    ///
+    /// let mut unix_string = UnixString::with_capacity(10);
+    /// let capacity = unix_string.capacity();
+    ///
+    /// // This push is done without reallocating
+    /// unix_string.push("foo");
+    ///
+    /// assert_eq!(capacity, unix_string.capacity());
+    /// ```
+    pub fn with_capacity(capacity: usize) -> Self {
+        Self {
+            inner: Buf::with_capacity(capacity),
+        }
+    }
+
+    /// Truncates the `UnixString` to zero length.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use unix_str::UnixString;
+    ///
+    /// let mut unix_string = UnixString::from("foo");
+    /// assert_eq!(&unix_string, "foo");
+    ///
+    /// unix_string.clear();
+    /// assert_eq!(&unix_string, "");
+    /// ```
+    pub fn clear(&mut self) {
+        self.inner.clear()
+    }
+
+    /// Returns the capacity this `UnixString` can hold without reallocating.
+    ///
+    /// See `UnixString` introduction for information about encoding.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use unix_str::UnixString;
+    ///
+    /// let unix_string = UnixString::with_capacity(10);
+    /// assert!(unix_string.capacity() >= 10);
+    /// ```
+    pub fn capacity(&self) -> usize {
+        self.inner.capacity()
+    }
+
+    /// Reserves capacity for at least `additional` more capacity to be inserted
+    /// in the given `UnixString`.
+    ///
+    /// The collection may reserve more space to avoid frequent reallocations.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use unix_str::UnixString;
+    ///
+    /// let mut s = UnixString::new();
+    /// s.reserve(10);
+    /// assert!(s.capacity() >= 10);
+    /// ```
+    pub fn reserve(&mut self, additional: usize) {
+        self.inner.reserve(additional)
+    }
+
+    /// Reserves the minimum capacity for exactly `additional` more capacity to
+    /// be inserted in the given `UnixString`. Does nothing if the capacity is
+    /// already sufficient.
+    ///
+    /// Note that the allocator may give the collection more space than it
+    /// requests. Therefore, capacity can not be relied upon to be precisely
+    /// minimal. Prefer reserve if future insertions are expected.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use unix_str::UnixString;
+    ///
+    /// let mut s = UnixString::new();
+    /// s.reserve_exact(10);
+    /// assert!(s.capacity() >= 10);
+    /// ```
+    pub fn reserve_exact(&mut self, additional: usize) {
+        self.inner.reserve_exact(additional)
+    }
+
+    /// Shrinks the capacity of the `UnixString` to match its length.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use unix_str::UnixString;
+    ///
+    /// let mut s = UnixString::from("foo");
+    ///
+    /// s.reserve(100);
+    /// assert!(s.capacity() >= 100);
+    ///
+    /// s.shrink_to_fit();
+    /// assert_eq!(3, s.capacity());
+    /// ```
+    pub fn shrink_to_fit(&mut self) {
+        self.inner.shrink_to_fit()
+    }
+
+    /// Shrinks the capacity of the `UnixString` with a lower bound.
+    ///
+    /// The capacity will remain at least as large as both the length
+    /// and the supplied value.
+    ///
+    /// Panics if the current capacity is smaller than the supplied
+    /// minimum capacity.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(shrink_to)]
+    /// use std::ffi::UnixString;
+    ///
+    /// let mut s = UnixString::from("foo");
+    ///
+    /// s.reserve(100);
+    /// assert!(s.capacity() >= 100);
+    ///
+    /// s.shrink_to(10);
+    /// assert!(s.capacity() >= 10);
+    /// s.shrink_to(0);
+    /// assert!(s.capacity() >= 3);
+    /// ```
+    #[inline]
+    #[cfg(feature = "shrink_to")]
+    pub fn shrink_to(&mut self, min_capacity: usize) {
+        self.inner.shrink_to(min_capacity)
+    }
+
+    /// Converts this `UnixString` into a boxed [`UnixStr`].
+    ///
+    /// [`UnixStr`]: struct.UnixStr.html
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use unix_str::{UnixString, UnixStr};
+    ///
+    /// let s = UnixString::from("hello");
+    ///
+    /// let b: Box<UnixStr> = s.into_boxed_unix_str();
+    /// ```
+    pub fn into_boxed_unix_str(self) -> Box<UnixStr> {
+        let rw = Box::into_raw(self.inner.into_box()) as *mut UnixStr;
+        unsafe { Box::from_raw(rw) }
+    }
+
+    /// Creates a `UnixString` from a byte vector.
+    ///
+    /// See the module documentation for an example.
+    ///
+    pub fn from_vec(vec: Vec<u8>) -> Self {
+        FromInner::from_inner(Buf { inner: vec })
+    }
+
+    /// Yields the underlying byte vector of this `UnixString`.
+    ///
+    /// See the module documentation for an example.
+    pub fn into_vec(self) -> Vec<u8> {
+        self.into_inner().inner
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl From<String> for UnixString {
+    /// Converts a `String` into a [`UnixString`].
+    ///
+    /// The conversion copies the data, and includes an allocation on the heap.
+    ///
+    /// [`UnixString`]: ../../std/ffi/struct.UnixString.html
+    fn from(s: String) -> Self {
+        UnixString {
+            inner: Buf::from_string(s),
+        }
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl<T: ?Sized + AsRef<UnixStr>> From<&T> for UnixString {
+    fn from(s: &T) -> Self {
+        s.as_ref().to_unix_string()
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl ops::Index<ops::RangeFull> for UnixString {
+    type Output = UnixStr;
+
+    #[inline]
+    fn index(&self, _index: ops::RangeFull) -> &UnixStr {
+        UnixStr::from_inner(self.inner.as_slice())
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl ops::IndexMut<ops::RangeFull> for UnixString {
+    #[inline]
+    fn index_mut(&mut self, _index: ops::RangeFull) -> &mut UnixStr {
+        UnixStr::from_inner_mut(self.inner.as_mut_slice())
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl ops::Deref for UnixString {
+    type Target = UnixStr;
+
+    #[inline]
+    fn deref(&self) -> &UnixStr {
+        &self[..]
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl ops::DerefMut for UnixString {
+    #[inline]
+    fn deref_mut(&mut self) -> &mut UnixStr {
+        &mut self[..]
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl Default for UnixString {
+    /// Constructs an empty `UnixString`.
+    #[inline]
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl fmt::Debug for UnixString {
+    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
+        fmt::Debug::fmt(&**self, formatter)
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl PartialEq for UnixString {
+    fn eq(&self, other: &Self) -> bool {
+        &**self == &**other
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl PartialEq<str> for UnixString {
+    fn eq(&self, other: &str) -> bool {
+        &**self == other
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl PartialEq<UnixString> for str {
+    fn eq(&self, other: &UnixString) -> bool {
+        &**other == self
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl PartialEq<&str> for UnixString {
+    fn eq(&self, other: &&str) -> bool {
+        **self == **other
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl<'a> PartialEq<UnixString> for &'a str {
+    fn eq(&self, other: &UnixString) -> bool {
+        **other == **self
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl Eq for UnixString {}
+
+#[cfg(feature = "alloc")]
+impl PartialOrd for UnixString {
+    #[inline]
+    fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
+        (&**self).partial_cmp(&**other)
+    }
+    #[inline]
+    fn lt(&self, other: &Self) -> bool {
+        &**self < &**other
+    }
+    #[inline]
+    fn le(&self, other: &Self) -> bool {
+        &**self <= &**other
+    }
+    #[inline]
+    fn gt(&self, other: &Self) -> bool {
+        &**self > &**other
+    }
+    #[inline]
+    fn ge(&self, other: &Self) -> bool {
+        &**self >= &**other
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl PartialOrd<str> for UnixString {
+    #[inline]
+    fn partial_cmp(&self, other: &str) -> Option<cmp::Ordering> {
+        (&**self).partial_cmp(other)
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl Ord for UnixString {
+    #[inline]
+    fn cmp(&self, other: &Self) -> cmp::Ordering {
+        (&**self).cmp(&**other)
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl Hash for UnixString {
+    #[inline]
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        (&**self).hash(state)
+    }
+}
+
+impl UnixStr {
+    /// Coerces into an `UnixStr` slice.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use unix_str::UnixStr;
+    ///
+    /// let unix_str = UnixStr::new("foo");
+    /// ```
+    #[inline]
+    pub fn new<S: AsRef<UnixStr> + ?Sized>(s: &S) -> &UnixStr {
+        s.as_ref()
+    }
+
+    #[inline]
+    fn from_inner(inner: &Slice) -> &UnixStr {
+        // Safety: UnixStr is just a wrapper of Slice,
+        // therefore converting &Slice to &UnixStr is safe.
+        unsafe { &*(inner as *const Slice as *const UnixStr) }
+    }
+
+    #[inline]
+    #[cfg(feature = "alloc")]
+    fn from_inner_mut(inner: &mut Slice) -> &mut UnixStr {
+        // Safety: UnixStr is just a wrapper of Slice,
+        // therefore converting &mut Slice to &mut UnixStr is safe.
+        // Any method that mutates UnixStr must be careful not to
+        // break platform-specific encoding, in particular Wtf8 on Windows.
+        unsafe { &mut *(inner as *mut Slice as *mut UnixStr) }
+    }
+
+    /// Yields a `&str` slice if the `UnixStr` is valid Unicode.
+    ///
+    /// This conversion may entail doing a check for UTF-8 validity.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use unix_str::UnixStr;
+    ///
+    /// let unix_str = UnixStr::new("foo");
+    /// assert_eq!(unix_str.to_str(), Some("foo"));
+    /// ```
+    pub fn to_str(&self) -> Option<&str> {
+        self.inner.to_str()
+    }
+
+    /// Converts an `UnixStr` to a `Cow<str>`.
+    ///
+    /// Any non-Unicode sequences are replaced with
+    /// `U+FFFD REPLACEMENT CHARACTER`.
+    ///
+    ///
+    /// # Examples
+    ///
+    /// Calling `to_string_lossy` on an `UnixStr` with invalid unicode:
+    ///
+    /// ```
+    /// use unix_str::UnixStr;
+    ///
+    /// // Here, the values 0x66 and 0x6f correspond to 'f' and 'o'
+    /// // respectively. The value 0x80 is a lone continuation byte, invalid
+    /// // in a UTF-8 sequence.
+    /// let source = [0x66, 0x6f, 0x80, 0x6f];
+    /// let unix_str = UnixStr::from_bytes(&source[..]);
+    ///
+    /// assert_eq!(unix_str.to_string_lossy(), "fo�o");
+    /// ```
+    #[cfg(feature = "alloc")]
+    pub fn to_string_lossy(&self) -> Cow<'_, str> {
+        self.inner.to_string_lossy()
+    }
+
+    /// Copies the slice into an owned [`UnixString`].
+    ///
+    /// [`UnixString`]: struct.UnixString.html
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use unix_str::{UnixStr, UnixString};
+    ///
+    /// let unix_str = UnixStr::new("foo");
+    /// let unix_string = unix_str.to_unix_string();
+    /// assert_eq!(unix_string, UnixString::from("foo"));
+    /// ```
+    #[cfg(feature = "alloc")]
+    pub fn to_unix_string(&self) -> UnixString {
+        UnixString {
+            inner: self.inner.to_owned(),
+        }
+    }
+
+    /// Checks whether the `UnixStr` is empty.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use unix_str::UnixStr;
+    ///
+    /// let unix_str = UnixStr::new("");
+    /// assert!(unix_str.is_empty());
+    ///
+    /// let unix_str = UnixStr::new("foo");
+    /// assert!(!unix_str.is_empty());
+    /// ```
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.inner.inner.is_empty()
+    }
+
+    /// Returns the length of this `UnixStr`.
+    ///
+    /// Note that this does **not** return the number of bytes in the string in
+    /// OS string form.
+    ///
+    /// The length returned is that of the underlying storage used by `UnixStr`.
+    /// As discussed in the [`UnixString`] introduction, [`UnixString`] and
+    /// `UnixStr` store strings in a form best suited for cheap inter-conversion
+    /// between native-platform and Rust string forms, which may differ
+    /// significantly from both of them, including in storage size and encoding.
+    ///
+    /// This number is simply useful for passing to other methods, like
+    /// [`UnixString::with_capacity`] to avoid reallocations.
+    ///
+    /// [`UnixString`]: struct.UnixString.html
+    /// [`UnixString::with_capacity`]: struct.UnixString.html#method.with_capacity
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use unix_str::UnixStr;
+    ///
+    /// let unix_str = UnixStr::new("");
+    /// assert_eq!(unix_str.len(), 0);
+    ///
+    /// let unix_str = UnixStr::new("foo");
+    /// assert_eq!(unix_str.len(), 3);
+    /// ```
+    pub fn len(&self) -> usize {
+        self.inner.inner.len()
+    }
+
+    /// Converts a `Box<UnixStr>` into an [`UnixString`] without copying
+    /// allocating.
+    ///
+    /// [`UnixString`]: struct.UnixString.html
+    #[cfg(feature = "alloc")]
+    pub fn into_unix_string(self: Box<UnixStr>) -> UnixString {
+        let boxed = unsafe { Box::from_raw(Box::into_raw(self) as *mut Slice) };
+        UnixString {
+            inner: Buf::from_box(boxed),
+        }
+    }
+
+    /// Gets the underlying byte representation.
+    ///
+    /// Note: it is *crucial* that this API is private, to avoid
+    /// revealing the internal, platform-specific encodings.
+    #[inline]
+    fn bytes(&self) -> &[u8] {
+        unsafe { &*(&self.inner as *const _ as *const [u8]) }
+    }
+
+    /// Converts this string to its ASCII lower case equivalent in-place.
+    ///
+    /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', but non-ASCII letters
+    /// are unchanged.
+    ///
+    /// To return a new lowercased value without modifying the existing one, use
+    /// [`to_ascii_lowercase`].
+    ///
+    /// [`to_ascii_lowercase`]: #method.to_ascii_lowercase
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use unix_str::UnixString;
+    ///
+    /// let mut s = UnixString::from("GRÜßE, JÜRGEN ❤");
+    ///
+    /// s.make_ascii_lowercase();
+    ///
+    /// assert_eq!("grÜße, jÜrgen ❤", s);
+    /// ```
+    #[cfg(feature = "unixstring_ascii")]
+    pub fn make_ascii_lowercase(&mut self) {
+        self.inner.make_ascii_lowercase()
+    }
+
+    /// Converts this string to its ASCII upper case equivalent in-place.
+    ///
+    /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
+    /// but non-ASCII letters are unchanged.
+    ///
+    /// To return a new uppercased value without modifying the existing one, use
+    /// [`to_ascii_uppercase`].
+    ///
+    /// [`to_ascii_uppercase`]: #method.to_ascii_uppercase
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use unix_str::UnixString;
+    ///
+    /// let mut s = UnixString::from("Grüße, Jürgen ❤");
+    ///
+    /// s.make_ascii_uppercase();
+    ///
+    /// assert_eq!("GRüßE, JüRGEN ❤", s);
+    /// ```
+    #[cfg(feature = "unixstring_ascii")]
+    pub fn make_ascii_uppercase(&mut self) {
+        self.inner.make_ascii_uppercase()
+    }
+
+    /// Returns a copy of this string where each character is mapped to its
+    /// ASCII lower case equivalent.
+    ///
+    /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
+    /// but non-ASCII letters are unchanged.
+    ///
+    /// To lowercase the value in-place, use [`make_ascii_lowercase`].
+    ///
+    /// [`make_ascii_lowercase`]: #method.make_ascii_lowercase
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use unix_str::UnixString;
+    /// let s = UnixString::from("Grüße, Jürgen ❤");
+    ///
+    /// assert_eq!("grüße, jürgen ❤", s.to_ascii_lowercase());
+    /// ```
+    #[cfg(all(feature = "alloc", feature = "unixstring_ascii"))]
+    pub fn to_ascii_lowercase(&self) -> UnixString {
+        UnixString::from_inner(self.inner.to_ascii_lowercase())
+    }
+
+    /// Returns a copy of this string where each character is mapped to its
+    /// ASCII upper case equivalent.
+    ///
+    /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
+    /// but non-ASCII letters are unchanged.
+    ///
+    /// To uppercase the value in-place, use [`make_ascii_uppercase`].
+    ///
+    /// [`make_ascii_uppercase`]: #method.make_ascii_uppercase
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use unix_str::UnixString;
+    /// let s = UnixString::from("Grüße, Jürgen ❤");
+    ///
+    /// assert_eq!("GRüßE, JüRGEN ❤", s.to_ascii_uppercase());
+    /// ```
+    #[cfg(all(feature = "alloc", feature = "unixstring_ascii"))]
+    pub fn to_ascii_uppercase(&self) -> UnixString {
+        UnixString::from_inner(self.inner.to_ascii_uppercase())
+    }
+
+    /// Checks if all characters in this string are within the ASCII range.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use unix_str::UnixString;
+    ///
+    /// let ascii = UnixString::from("hello!\n");
+    /// let non_ascii = UnixString::from("Grüße, Jürgen ❤");
+    ///
+    /// assert!(ascii.is_ascii());
+    /// assert!(!non_ascii.is_ascii());
+    /// ```
+    #[cfg(feature = "unixstring_ascii")]
+    pub fn is_ascii(&self) -> bool {
+        self.inner.is_ascii()
+    }
+
+    /// Checks that two strings are an ASCII case-insensitive match.
+    ///
+    /// Same as `to_ascii_lowercase(a) == to_ascii_lowercase(b)`,
+    /// but without allocating and copying temporaries.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use unix_str::UnixString;
+    ///
+    /// assert!(UnixString::from("Ferris").eq_ignore_ascii_case("FERRIS"));
+    /// assert!(UnixString::from("Ferrös").eq_ignore_ascii_case("FERRöS"));
+    /// assert!(!UnixString::from("Ferrös").eq_ignore_ascii_case("FERRÖS"));
+    /// ```
+    #[cfg(feature = "unixstring_ascii")]
+    pub fn eq_ignore_ascii_case<S: ?Sized + AsRef<UnixStr>>(&self, other: &S) -> bool {
+        self.inner.eq_ignore_ascii_case(&other.as_ref().inner)
+    }
+
+    /// Creates a `UnixStr` from a byte slice.
+    ///
+    /// See the module documentation for an example.
+    pub fn from_bytes(slice: &[u8]) -> &Self {
+        unsafe { mem::transmute(slice) }
+    }
+
+    /// Gets the underlying byte view of the `UnixStr` slice.
+    ///
+    /// See the module documentation for an example.
+    pub fn as_bytes(&self) -> &[u8] {
+        &self.as_inner().inner
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl From<&UnixStr> for Box<UnixStr> {
+    fn from(s: &UnixStr) -> Self {
+        let rw = Box::into_raw(s.inner.into_box()) as *mut UnixStr;
+        unsafe { Box::from_raw(rw) }
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl From<Cow<'_, UnixStr>> for Box<UnixStr> {
+    #[inline]
+    fn from(cow: Cow<'_, UnixStr>) -> Self {
+        match cow {
+            Cow::Borrowed(s) => Box::from(s),
+            Cow::Owned(s) => Box::from(s),
+        }
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl From<Box<UnixStr>> for UnixString {
+    /// Converts a `Box<UnixStr>` into a `UnixString` without copying or
+    /// allocating.
+    ///
+    /// [`UnixStr`]: ../ffi/struct.UnixStr.html
+    fn from(boxed: Box<UnixStr>) -> Self {
+        boxed.into_unix_string()
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl From<UnixString> for Box<UnixStr> {
+    /// Converts a [`UnixString`] into a `Box<UnixStr>` without copying or
+    /// allocating.
+    ///
+    /// [`UnixString`]: ../ffi/struct.UnixString.html
+    fn from(s: UnixString) -> Self {
+        s.into_boxed_unix_str()
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl Clone for Box<UnixStr> {
+    #[inline]
+    fn clone(&self) -> Self {
+        self.to_unix_string().into_boxed_unix_str()
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl From<UnixString> for Arc<UnixStr> {
+    /// Converts a [`UnixString`] into a `Arc<UnixStr>` without copying or
+    /// allocating.
+    ///
+    /// [`UnixString`]: ../ffi/struct.UnixString.html
+    #[inline]
+    fn from(s: UnixString) -> Self {
+        let arc = s.inner.into_arc();
+        unsafe { Arc::from_raw(Arc::into_raw(arc) as *const UnixStr) }
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl From<&UnixStr> for Arc<UnixStr> {
+    #[inline]
+    fn from(s: &UnixStr) -> Self {
+        let arc = s.inner.into_arc();
+        unsafe { Arc::from_raw(Arc::into_raw(arc) as *const UnixStr) }
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl From<UnixString> for Rc<UnixStr> {
+    /// Converts a [`UnixString`] into a `Rc<UnixStr>` without copying or
+    /// allocating.
+    ///
+    /// [`UnixString`]: ../ffi/struct.UnixString.html
+    #[inline]
+    fn from(s: UnixString) -> Self {
+        let rc = s.inner.into_rc();
+        unsafe { Rc::from_raw(Rc::into_raw(rc) as *const UnixStr) }
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl From<&UnixStr> for Rc<UnixStr> {
+    #[inline]
+    fn from(s: &UnixStr) -> Self {
+        let rc = s.inner.into_rc();
+        unsafe { Rc::from_raw(Rc::into_raw(rc) as *const UnixStr) }
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl<'a> From<UnixString> for Cow<'a, UnixStr> {
+    #[inline]
+    fn from(s: UnixString) -> Self {
+        Cow::Owned(s)
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl<'a> From<&'a UnixStr> for Cow<'a, UnixStr> {
+    #[inline]
+    fn from(s: &'a UnixStr) -> Self {
+        Cow::Borrowed(s)
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl<'a> From<&'a UnixString> for Cow<'a, UnixStr> {
+    #[inline]
+    fn from(s: &'a UnixString) -> Self {
+        Cow::Borrowed(s.as_unix_str())
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl<'a> From<Cow<'a, UnixStr>> for UnixString {
+    #[inline]
+    fn from(s: Cow<'a, UnixStr>) -> Self {
+        s.into_owned()
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl Default for Box<UnixStr> {
+    fn default() -> Self {
+        let rw = Box::into_raw(Slice::empty_box()) as *mut UnixStr;
+        unsafe { Box::from_raw(rw) }
+    }
+}
+
+impl Default for &UnixStr {
+    /// Creates an empty `UnixStr`.
+    #[inline]
+    fn default() -> Self {
+        UnixStr::new("")
+    }
+}
+
+impl PartialEq for UnixStr {
+    #[inline]
+    fn eq(&self, other: &UnixStr) -> bool {
+        self.bytes().eq(other.bytes())
+    }
+}
+
+impl PartialEq<str> for UnixStr {
+    #[inline]
+    fn eq(&self, other: &str) -> bool {
+        *self == *UnixStr::new(other)
+    }
+}
+
+impl PartialEq<UnixStr> for str {
+    #[inline]
+    fn eq(&self, other: &UnixStr) -> bool {
+        *other == *UnixStr::new(self)
+    }
+}
+
+impl Eq for UnixStr {}
+
+impl PartialOrd for UnixStr {
+    #[inline]
+    fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
+        self.bytes().partial_cmp(other.bytes())
+    }
+    #[inline]
+    fn lt(&self, other: &Self) -> bool {
+        self.bytes().lt(other.bytes())
+    }
+    #[inline]
+    fn le(&self, other: &Self) -> bool {
+        self.bytes().le(other.bytes())
+    }
+    #[inline]
+    fn gt(&self, other: &Self) -> bool {
+        self.bytes().gt(other.bytes())
+    }
+    #[inline]
+    fn ge(&self, other: &Self) -> bool {
+        self.bytes().ge(other.bytes())
+    }
+}
+
+impl PartialOrd<str> for UnixStr {
+    #[inline]
+    fn partial_cmp(&self, other: &str) -> Option<cmp::Ordering> {
+        self.partial_cmp(Self::new(other))
+    }
+}
+
+// FIXME (#19470): cannot provide PartialOrd<UnixStr> for str until we
+// have more flexible coherence rules.
+
+impl Ord for UnixStr {
+    #[inline]
+    fn cmp(&self, other: &Self) -> cmp::Ordering {
+        self.bytes().cmp(other.bytes())
+    }
+}
+
+#[cfg(feature = "alloc")]
+macro_rules! impl_cmp {
+    ($lhs:ty, $rhs: ty) => {
+        impl<'a, 'b> PartialEq<$rhs> for $lhs {
+            #[inline]
+            fn eq(&self, other: &$rhs) -> bool {
+                <UnixStr as PartialEq>::eq(self, other)
+            }
+        }
+
+        impl<'a, 'b> PartialEq<$lhs> for $rhs {
+            #[inline]
+            fn eq(&self, other: &$lhs) -> bool {
+                <UnixStr as PartialEq>::eq(self, other)
+            }
+        }
+
+        impl<'a, 'b> PartialOrd<$rhs> for $lhs {
+            #[inline]
+            fn partial_cmp(&self, other: &$rhs) -> Option<cmp::Ordering> {
+                <UnixStr as PartialOrd>::partial_cmp(self, other)
+            }
+        }
+
+        impl<'a, 'b> PartialOrd<$lhs> for $rhs {
+            #[inline]
+            fn partial_cmp(&self, other: &$lhs) -> Option<cmp::Ordering> {
+                <UnixStr as PartialOrd>::partial_cmp(self, other)
+            }
+        }
+    };
+}
+
+#[cfg(feature = "alloc")]
+impl_cmp!(UnixString, UnixStr);
+#[cfg(feature = "alloc")]
+impl_cmp!(UnixString, &'a UnixStr);
+#[cfg(feature = "alloc")]
+impl_cmp!(Cow<'a, UnixStr>, UnixStr);
+#[cfg(feature = "alloc")]
+impl_cmp!(Cow<'a, UnixStr>, &'b UnixStr);
+#[cfg(feature = "alloc")]
+impl_cmp!(Cow<'a, UnixStr>, UnixString);
+
+impl Hash for UnixStr {
+    #[inline]
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        self.bytes().hash(state)
+    }
+}
+
+impl fmt::Debug for UnixStr {
+    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
+        fmt::Debug::fmt(&self.inner, formatter)
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl Borrow<UnixStr> for UnixString {
+    fn borrow(&self) -> &UnixStr {
+        &self[..]
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl ToOwned for UnixStr {
+    type Owned = UnixString;
+    fn to_owned(&self) -> Self::Owned {
+        self.to_unix_string()
+    }
+    #[cfg(feature = "toowned_clone_into")]
+    fn clone_into(&self, target: &mut Self::Owned) {
+        self.inner.clone_into(&mut target.inner)
+    }
+}
+
+impl AsRef<UnixStr> for UnixStr {
+    fn as_ref(&self) -> &UnixStr {
+        self
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl AsRef<UnixStr> for UnixString {
+    #[inline]
+    fn as_ref(&self) -> &UnixStr {
+        self
+    }
+}
+
+impl AsRef<UnixStr> for str {
+    #[inline]
+    fn as_ref(&self) -> &UnixStr {
+        UnixStr::from_inner(Slice::from_str(self))
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl AsRef<UnixStr> for String {
+    #[inline]
+    fn as_ref(&self) -> &UnixStr {
+        (&**self).as_ref()
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl FromInner<Buf> for UnixString {
+    fn from_inner(buf: Buf) -> UnixString {
+        UnixString { inner: buf }
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl IntoInner<Buf> for UnixString {
+    fn into_inner(self) -> Buf {
+        self.inner
+    }
+}
+
+impl AsInner<Slice> for UnixStr {
+    #[inline]
+    fn as_inner(&self) -> &Slice {
+        &self.inner
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl FromStr for UnixString {
+    type Err = core::convert::Infallible;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        Ok(UnixString::from(s))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use sys_common::{AsInner, IntoInner};
+
+    use alloc::rc::Rc;
+    use alloc::sync::Arc;
+
+    #[test]
+    fn test_unix_string_with_capacity() {
+        let unix_string = UnixString::with_capacity(0);
+        assert_eq!(0, unix_string.inner.into_inner().capacity());
+
+        let unix_string = UnixString::with_capacity(10);
+        assert_eq!(10, unix_string.inner.into_inner().capacity());
+
+        let mut unix_string = UnixString::with_capacity(0);
+        unix_string.push("abc");
+        assert!(unix_string.inner.into_inner().capacity() >= 3);
+    }
+
+    #[test]
+    fn test_unix_string_clear() {
+        let mut unix_string = UnixString::from("abc");
+        assert_eq!(3, unix_string.inner.as_inner().len());
+
+        unix_string.clear();
+        assert_eq!(&unix_string, "");
+        assert_eq!(0, unix_string.inner.as_inner().len());
+    }
+
+    #[test]
+    fn test_unix_string_capacity() {
+        let unix_string = UnixString::with_capacity(0);
+        assert_eq!(0, unix_string.capacity());
+
+        let unix_string = UnixString::with_capacity(10);
+        assert_eq!(10, unix_string.capacity());
+
+        let mut unix_string = UnixString::with_capacity(0);
+        unix_string.push("abc");
+        assert!(unix_string.capacity() >= 3);
+    }
+
+    #[test]
+    fn test_unix_string_reserve() {
+        let mut unix_string = UnixString::new();
+        assert_eq!(unix_string.capacity(), 0);
+
+        unix_string.reserve(2);
+        assert!(unix_string.capacity() >= 2);
+
+        for _ in 0..16 {
+            unix_string.push("a");
+        }
+
+        assert!(unix_string.capacity() >= 16);
+        unix_string.reserve(16);
+        assert!(unix_string.capacity() >= 32);
+
+        unix_string.push("a");
+
+        unix_string.reserve(16);
+        assert!(unix_string.capacity() >= 33)
+    }
+
+    #[test]
+    fn test_unix_string_reserve_exact() {
+        let mut unix_string = UnixString::new();
+        assert_eq!(unix_string.capacity(), 0);
+
+        unix_string.reserve_exact(2);
+        assert!(unix_string.capacity() >= 2);
+
+        for _ in 0..16 {
+            unix_string.push("a");
+        }
+
+        assert!(unix_string.capacity() >= 16);
+        unix_string.reserve_exact(16);
+        assert!(unix_string.capacity() >= 32);
+
+        unix_string.push("a");
+
+        unix_string.reserve_exact(16);
+        assert!(unix_string.capacity() >= 33)
+    }
+
+    #[test]
+    fn test_unix_string_default() {
+        let unix_string: UnixString = Default::default();
+        assert_eq!("", &unix_string);
+    }
+
+    #[test]
+    fn test_unix_str_is_empty() {
+        let mut unix_string = UnixString::new();
+        assert!(unix_string.is_empty());
+
+        unix_string.push("abc");
+        assert!(!unix_string.is_empty());
+
+        unix_string.clear();
+        assert!(unix_string.is_empty());
+    }
+
+    #[test]
+    fn test_unix_str_len() {
+        let mut unix_string = UnixString::new();
+        assert_eq!(0, unix_string.len());
+
+        unix_string.push("abc");
+        assert_eq!(3, unix_string.len());
+
+        unix_string.clear();
+        assert_eq!(0, unix_string.len());
+    }
+
+    #[test]
+    fn test_unix_str_default() {
+        let unix_str: &UnixStr = Default::default();
+        assert_eq!("", unix_str);
+    }
+
+    #[test]
+    fn into_boxed() {
+        let orig = "Hello, world!";
+        let unix_str = UnixStr::new(orig);
+        let boxed: Box<UnixStr> = Box::from(unix_str);
+        let unix_string = unix_str.to_owned().into_boxed_unix_str().into_unix_string();
+        assert_eq!(unix_str, &*boxed);
+        assert_eq!(&*boxed, &*unix_string);
+        assert_eq!(&*unix_string, unix_str);
+    }
+
+    #[test]
+    fn boxed_default() {
+        let boxed = <Box<UnixStr>>::default();
+        assert!(boxed.is_empty());
+    }
+
+    #[test]
+    #[cfg(feature = "toowned_clone_into")]
+    fn test_unix_str_clone_into() {
+        let mut unix_string = UnixString::with_capacity(123);
+        unix_string.push("hello");
+        let unix_str = UnixStr::new("bonjour");
+        unix_str.clone_into(&mut unix_string);
+        assert_eq!(unix_str, unix_string);
+        assert!(unix_string.capacity() >= 123);
+    }
+
+    #[test]
+    fn into_rc() {
+        let orig = "Hello, world!";
+        let unix_str = UnixStr::new(orig);
+        let rc: Rc<UnixStr> = Rc::from(unix_str);
+        let arc: Arc<UnixStr> = Arc::from(unix_str);
+
+        assert_eq!(&*rc, unix_str);
+        assert_eq!(&*arc, unix_str);
+
+        let rc2: Rc<UnixStr> = Rc::from(unix_str.to_owned());
+        let arc2: Arc<UnixStr> = Arc::from(unix_str.to_owned());
+
+        assert_eq!(&*rc2, unix_str);
+        assert_eq!(&*arc2, unix_str);
+    }
+}
diff --git a/third_party/rust/unix_str/src/lossy.rs b/third_party/rust/unix_str/src/lossy.rs
new file mode 100644
index 0000000000..270ae30d71
--- /dev/null
+++ b/third_party/rust/unix_str/src/lossy.rs
@@ -0,0 +1,222 @@
+use core::char;
+use core::fmt::{self, Write};
+use core::mem;
+use core::str as core_str;
+
+// https://tools.ietf.org/html/rfc3629
+static UTF8_CHAR_WIDTH: [u8; 256] = [
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, // 0x1F
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, // 0x3F
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, // 0x5F
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, // 0x7F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, // 0x9F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, // 0xBF
+    0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    2, // 0xDF
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xEF
+    4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xFF
+];
+
+/// Given a first byte, determines how many bytes are in this UTF-8 character.
+#[inline]
+pub fn utf8_char_width(b: u8) -> usize {
+    UTF8_CHAR_WIDTH[b as usize] as usize
+}
+
+/// Lossy UTF-8 string.
+pub struct Utf8Lossy {
+    bytes: [u8],
+}
+
+impl Utf8Lossy {
+    pub fn from_bytes(bytes: &[u8]) -> &Utf8Lossy {
+        // SAFETY: Both use the same memory layout, and UTF-8 correctness isn't required.
+        unsafe { mem::transmute(bytes) }
+    }
+
+    pub fn chunks(&self) -> Utf8LossyChunksIter<'_> {
+        Utf8LossyChunksIter {
+            source: &self.bytes,
+        }
+    }
+}
+
+/// Iterator over lossy UTF-8 string
+#[allow(missing_debug_implementations)]
+pub struct Utf8LossyChunksIter<'a> {
+    source: &'a [u8],
+}
+
+#[derive(PartialEq, Eq, Debug)]
+pub struct Utf8LossyChunk<'a> {
+    /// Sequence of valid chars.
+    /// Can be empty between broken UTF-8 chars.
+    pub valid: &'a str,
+    /// Single broken char, empty if none.
+    /// Empty iff iterator item is last.
+    pub broken: &'a [u8],
+}
+
+impl<'a> Iterator for Utf8LossyChunksIter<'a> {
+    type Item = Utf8LossyChunk<'a>;
+
+    fn next(&mut self) -> Option<Utf8LossyChunk<'a>> {
+        if self.source.is_empty() {
+            return None;
+        }
+
+        const TAG_CONT_U8: u8 = 128;
+        fn safe_get(xs: &[u8], i: usize) -> u8 {
+            *xs.get(i).unwrap_or(&0)
+        }
+
+        let mut i = 0;
+        while i < self.source.len() {
+            let i_ = i;
+
+            // SAFETY: `i` starts at `0`, is less than `self.source.len()`, and
+            // only increases, so `0 <= i < self.source.len()`.
+            let byte = unsafe { *self.source.get_unchecked(i) };
+            i += 1;
+
+            if byte < 128 {
+            } else {
+                let w = utf8_char_width(byte);
+
+                macro_rules! error {
+                    () => {{
+                        // SAFETY: We have checked up to `i` that source is valid UTF-8.
+                        unsafe {
+                            let r = Utf8LossyChunk {
+                                valid: core_str::from_utf8_unchecked(&self.source[0..i_]),
+                                broken: &self.source[i_..i],
+                            };
+                            self.source = &self.source[i..];
+                            return Some(r);
+                        }
+                    }};
+                }
+
+                match w {
+                    2 => {
+                        if safe_get(self.source, i) & 192 != TAG_CONT_U8 {
+                            error!();
+                        }
+                        i += 1;
+                    }
+                    3 => {
+                        match (byte, safe_get(self.source, i)) {
+                            (0xE0, 0xA0..=0xBF) => (),
+                            (0xE1..=0xEC, 0x80..=0xBF) => (),
+                            (0xED, 0x80..=0x9F) => (),
+                            (0xEE..=0xEF, 0x80..=0xBF) => (),
+                            _ => {
+                                error!();
+                            }
+                        }
+                        i += 1;
+                        if safe_get(self.source, i) & 192 != TAG_CONT_U8 {
+                            error!();
+                        }
+                        i += 1;
+                    }
+                    4 => {
+                        match (byte, safe_get(self.source, i)) {
+                            (0xF0, 0x90..=0xBF) => (),
+                            (0xF1..=0xF3, 0x80..=0xBF) => (),
+                            (0xF4, 0x80..=0x8F) => (),
+                            _ => {
+                                error!();
+                            }
+                        }
+                        i += 1;
+                        if safe_get(self.source, i) & 192 != TAG_CONT_U8 {
+                            error!();
+                        }
+                        i += 1;
+                        if safe_get(self.source, i) & 192 != TAG_CONT_U8 {
+                            error!();
+                        }
+                        i += 1;
+                    }
+                    _ => {
+                        error!();
+                    }
+                }
+            }
+        }
+
+        let r = Utf8LossyChunk {
+            // SAFETY: We have checked that the entire source is valid UTF-8.
+            valid: unsafe { core_str::from_utf8_unchecked(self.source) },
+            broken: &[],
+        };
+        self.source = &[];
+        Some(r)
+    }
+}
+
+impl fmt::Display for Utf8Lossy {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        // If we're the empty string then our iterator won't actually yield
+        // anything, so perform the formatting manually
+        if self.bytes.is_empty() {
+            return "".fmt(f);
+        }
+
+        for Utf8LossyChunk { valid, broken } in self.chunks() {
+            // If we successfully decoded the whole chunk as a valid string then
+            // we can return a direct formatting of the string which will also
+            // respect various formatting flags if possible.
+            if valid.len() == self.bytes.len() {
+                assert!(broken.is_empty());
+                return valid.fmt(f);
+            }
+
+            f.write_str(valid)?;
+            if !broken.is_empty() {
+                f.write_char(char::REPLACEMENT_CHARACTER)?;
+            }
+        }
+        Ok(())
+    }
+}
+
+impl fmt::Debug for Utf8Lossy {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.write_char('"')?;
+
+        for Utf8LossyChunk { valid, broken } in self.chunks() {
+            // Valid part.
+            // Here we partially parse UTF-8 again which is suboptimal.
+            {
+                let mut from = 0;
+                for (i, c) in valid.char_indices() {
+                    let esc = c.escape_debug();
+                    // If char needs escaping, flush backlog so far and write, else skip
+                    if esc.len() != 1 {
+                        f.write_str(&valid[from..i])?;
+                        for c in esc {
+                            f.write_char(c)?;
+                        }
+                        from = i + c.len_utf8();
+                    }
+                }
+                f.write_str(&valid[from..])?;
+            }
+
+            // Broken parts of string as hex escape.
+            for &b in broken {
+                write!(f, "\\x{:02x}", b)?;
+            }
+        }
+
+        f.write_char('"')
+    }
+}
diff --git a/third_party/rust/unix_str/src/sys.rs b/third_party/rust/unix_str/src/sys.rs
new file mode 100644
index 0000000000..cf1de79ee4
--- /dev/null
+++ b/third_party/rust/unix_str/src/sys.rs
@@ -0,0 +1,256 @@
+//! The underlying UnixString/UnixStr implementation: just a `Vec<u8>`/`[u8]`.
+
+use crate::sys_common::bytestring::debug_fmt_bytestring;
+#[cfg(feature = "alloc")]
+use crate::sys_common::{AsInner, IntoInner};
+use core::fmt;
+use core::mem;
+use core::str;
+
+#[cfg(feature = "alloc")]
+use alloc::borrow::Cow;
+#[cfg(feature = "alloc")]
+use alloc::boxed::Box;
+#[cfg(feature = "alloc")]
+use alloc::rc::Rc;
+#[cfg(feature = "alloc")]
+use alloc::string::String;
+#[cfg(feature = "alloc")]
+use alloc::sync::Arc;
+#[cfg(feature = "alloc")]
+use alloc::vec::Vec;
+
+#[cfg(all(feature = "alloc", feature = "toowned_clone_into"))]
+use alloc::borrow::ToOwned;
+
+#[cfg(feature = "alloc")]
+#[derive(Clone, Hash)]
+pub(crate) struct Buf {
+    pub inner: Vec<u8>,
+}
+
+// FIXME:
+// `Buf::as_slice` current implementation relies
+// on `Slice` being layout-compatible with `[u8]`.
+// When attribute privacy is implemented, `Slice` should be annotated as `#[repr(transparent)]`.
+// Anyway, `Slice` representation and layout are considered implementation detail, are
+// not documented and must not be relied upon.
+pub(crate) struct Slice {
+    pub inner: [u8],
+}
+
+impl fmt::Debug for Slice {
+    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
+        debug_fmt_bytestring(&self.inner, formatter)
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl IntoInner<Vec<u8>> for Buf {
+    fn into_inner(self) -> Vec<u8> {
+        self.inner
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl AsInner<[u8]> for Buf {
+    fn as_inner(&self) -> &[u8] {
+        &self.inner
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl Buf {
+    pub fn from_string(s: String) -> Self {
+        Self {
+            inner: s.into_bytes(),
+        }
+    }
+
+    #[inline]
+    pub fn with_capacity(capacity: usize) -> Self {
+        Buf {
+            inner: Vec::with_capacity(capacity),
+        }
+    }
+
+    #[inline]
+    pub fn clear(&mut self) {
+        self.inner.clear()
+    }
+
+    #[inline]
+    pub fn capacity(&self) -> usize {
+        self.inner.capacity()
+    }
+
+    #[inline]
+    pub fn reserve(&mut self, additional: usize) {
+        self.inner.reserve(additional)
+    }
+
+    #[inline]
+    pub fn reserve_exact(&mut self, additional: usize) {
+        self.inner.reserve_exact(additional)
+    }
+
+    #[inline]
+    pub fn shrink_to_fit(&mut self) {
+        self.inner.shrink_to_fit()
+    }
+
+    #[inline]
+    #[cfg(feature = "shrink_to")]
+    pub fn shrink_to(&mut self, min_capacity: usize) {
+        self.inner.shrink_to(min_capacity)
+    }
+
+    #[inline]
+    pub fn as_slice(&self) -> &Slice {
+        // Safety: Slice just wraps [u8],
+        // and &*self.inner is &[u8], therefore
+        // transmuting &[u8] to &Slice is safe.
+        unsafe { mem::transmute(&*self.inner) }
+    }
+
+    #[inline]
+    pub fn as_mut_slice(&mut self) -> &mut Slice {
+        // Safety: Slice just wraps [u8],
+        // and &mut *self.inner is &mut [u8], therefore
+        // transmuting &mut [u8] to &mut Slice is safe.
+        unsafe { mem::transmute(&mut *self.inner) }
+    }
+
+    pub fn into_string(self) -> Result<String, Self> {
+        String::from_utf8(self.inner).map_err(|p| Self {
+            inner: p.into_bytes(),
+        })
+    }
+
+    pub fn push_slice(&mut self, s: &Slice) {
+        self.inner.extend_from_slice(&s.inner)
+    }
+
+    #[inline]
+    pub fn into_box(self) -> Box<Slice> {
+        unsafe { mem::transmute(self.inner.into_boxed_slice()) }
+    }
+
+    #[inline]
+    pub fn from_box(boxed: Box<Slice>) -> Self {
+        let inner: Box<[u8]> = unsafe { mem::transmute(boxed) };
+        Self {
+            inner: inner.into_vec(),
+        }
+    }
+
+    #[inline]
+    pub fn into_arc(&self) -> Arc<Slice> {
+        self.as_slice().into_arc()
+    }
+
+    #[inline]
+    pub fn into_rc(&self) -> Rc<Slice> {
+        self.as_slice().into_rc()
+    }
+}
+
+impl Slice {
+    #[inline]
+    fn from_u8_slice(s: &[u8]) -> &Self {
+        unsafe { mem::transmute(s) }
+    }
+
+    #[inline]
+    pub fn from_str(s: &str) -> &Self {
+        Self::from_u8_slice(s.as_bytes())
+    }
+
+    pub fn to_str(&self) -> Option<&str> {
+        str::from_utf8(&self.inner).ok()
+    }
+
+    #[cfg(feature = "alloc")]
+    pub fn to_string_lossy(&self) -> Cow<'_, str> {
+        String::from_utf8_lossy(&self.inner)
+    }
+
+    #[cfg(feature = "alloc")]
+    pub fn to_owned(&self) -> Buf {
+        Buf {
+            inner: self.inner.to_vec(),
+        }
+    }
+
+    #[cfg(all(feature = "alloc", feature = "toowned_clone_into"))]
+    pub fn clone_into(&self, buf: &mut Buf) {
+        self.inner.clone_into(&mut buf.inner)
+    }
+
+    #[inline]
+    #[cfg(feature = "alloc")]
+    pub fn into_box(&self) -> Box<Self> {
+        let boxed: Box<[u8]> = self.inner.into();
+        unsafe { mem::transmute(boxed) }
+    }
+
+    #[cfg(feature = "alloc")]
+    pub fn empty_box() -> Box<Self> {
+        let boxed: Box<[u8]> = Default::default();
+        unsafe { mem::transmute(boxed) }
+    }
+
+    #[inline]
+    #[cfg(feature = "alloc")]
+    pub fn into_arc(&self) -> Arc<Self> {
+        let arc: Arc<[u8]> = Arc::from(&self.inner);
+        unsafe { Arc::from_raw(Arc::into_raw(arc) as *const Self) }
+    }
+
+    #[inline]
+    #[cfg(feature = "alloc")]
+    pub fn into_rc(&self) -> Rc<Self> {
+        let rc: Rc<[u8]> = Rc::from(&self.inner);
+        unsafe { Rc::from_raw(Rc::into_raw(rc) as *const Self) }
+    }
+
+    #[inline]
+    #[cfg(feature = "unixstring_ascii")]
+    pub fn make_ascii_lowercase(&mut self) {
+        self.inner.make_ascii_lowercase()
+    }
+
+    #[inline]
+    #[cfg(feature = "unixstring_ascii")]
+    pub fn make_ascii_uppercase(&mut self) {
+        self.inner.make_ascii_uppercase()
+    }
+
+    #[inline]
+    #[cfg(all(feature = "alloc", feature = "unixstring_ascii"))]
+    pub fn to_ascii_lowercase(&self) -> Buf {
+        Buf {
+            inner: self.inner.to_ascii_lowercase(),
+        }
+    }
+
+    #[inline]
+    #[cfg(all(feature = "alloc", feature = "unixstring_ascii"))]
+    pub fn to_ascii_uppercase(&self) -> Buf {
+        Buf {
+            inner: self.inner.to_ascii_uppercase(),
+        }
+    }
+
+    #[inline]
+    #[cfg(feature = "unixstring_ascii")]
+    pub fn is_ascii(&self) -> bool {
+        self.inner.is_ascii()
+    }
+
+    #[inline]
+    #[cfg(feature = "unixstring_ascii")]
+    pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool {
+        self.inner.eq_ignore_ascii_case(&other.inner)
+    }
+}
diff --git a/third_party/rust/unix_str/src/sys_common.rs b/third_party/rust/unix_str/src/sys_common.rs
new file mode 100644
index 0000000000..c18994e4a2
--- /dev/null
+++ b/third_party/rust/unix_str/src/sys_common.rs
@@ -0,0 +1,39 @@
+//! Platform-independent platform abstraction
+//!
+//! This is the platform-independent portion of the standard library's
+//! platform abstraction layer, whereas `std::sys` is the
+//! platform-specific portion.
+//!
+//! The relationship between `std::sys_common`, `std::sys` and the
+//! rest of `std` is complex, with dependencies going in all
+//! directions: `std` depending on `sys_common`, `sys_common`
+//! depending on `sys`, and `sys` depending on `sys_common` and `std`.
+//! Ideally `sys_common` would be split into two and the dependencies
+//! between them all would form a dag, facilitating the extraction of
+//! `std::sys` from the standard library.
+
+pub mod bytestring;
+
+/// A trait for viewing representations from std types
+#[doc(hidden)]
+pub trait AsInner<Inner: ?Sized> {
+    fn as_inner(&self) -> &Inner;
+}
+
+/// A trait for viewing representations from std types
+#[doc(hidden)]
+pub trait AsInnerMut<Inner: ?Sized> {
+    fn as_inner_mut(&mut self) -> &mut Inner;
+}
+
+/// A trait for extracting representations from std types
+#[doc(hidden)]
+pub trait IntoInner<Inner> {
+    fn into_inner(self) -> Inner;
+}
+
+/// A trait for creating std types from internal representations
+#[doc(hidden)]
+pub trait FromInner<Inner> {
+    fn from_inner(inner: Inner) -> Self;
+}
diff --git a/third_party/rust/unix_str/src/sys_common/bytestring.rs b/third_party/rust/unix_str/src/sys_common/bytestring.rs
new file mode 100644
index 0000000000..ac6b7e893f
--- /dev/null
+++ b/third_party/rust/unix_str/src/sys_common/bytestring.rs
@@ -0,0 +1,45 @@
+use crate::lossy::{Utf8Lossy, Utf8LossyChunk};
+use core::fmt::{Formatter, Result, Write};
+
+pub fn debug_fmt_bytestring(slice: &[u8], f: &mut Formatter<'_>) -> Result {
+    // Writes out a valid unicode string with the correct escape sequences
+    fn write_str_escaped(f: &mut Formatter<'_>, s: &str) -> Result {
+        for c in s.chars().flat_map(|c| c.escape_debug()) {
+            f.write_char(c)?
+        }
+        Ok(())
+    }
+
+    f.write_str("\"")?;
+    for Utf8LossyChunk { valid, broken } in Utf8Lossy::from_bytes(slice).chunks() {
+        write_str_escaped(f, valid)?;
+        for b in broken {
+            write!(f, "\\x{:02X}", b)?;
+        }
+    }
+    f.write_str("\"")
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::fmt::{Debug, Formatter, Result};
+    use alloc::format;
+
+    #[test]
+    fn smoke() {
+        struct Helper<'a>(&'a [u8]);
+
+        impl Debug for Helper<'_> {
+            fn fmt(&self, f: &mut Formatter<'_>) -> Result {
+                debug_fmt_bytestring(self.0, f)
+            }
+        }
+
+        let input = b"\xF0hello,\tworld";
+        let expected = r#""\xF0hello,\tworld""#;
+        let output = format!("{:?}", Helper(input));
+
+        assert!(output == expected);
+    }
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 19:33:14 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 19:33:14 +0000
commit	36d22d82aa202bb199967e9512281e9a53db42c9 (patch)
tree	105e8c98ddea1c1e4784a60a5a6410fa416be2de /third_party/rust/unix_str/src
parent	Initial commit. (diff)
download	firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip