summaryrefslogtreecommitdiffstats
path: root/third_party/rust/os_str_bytes/src
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
commit36d22d82aa202bb199967e9512281e9a53db42c9 (patch)
tree105e8c98ddea1c1e4784a60a5a6410fa416be2de /third_party/rust/os_str_bytes/src
parentInitial commit. (diff)
downloadfirefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz
firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip
Adding upstream version 115.7.0esr.upstream/115.7.0esrupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/os_str_bytes/src')
-rw-r--r--third_party/rust/os_str_bytes/src/common/mod.rs43
-rw-r--r--third_party/rust/os_str_bytes/src/common/raw.rs45
-rw-r--r--third_party/rust/os_str_bytes/src/iter.rs111
-rw-r--r--third_party/rust/os_str_bytes/src/lib.rs623
-rw-r--r--third_party/rust/os_str_bytes/src/pattern.rs71
-rw-r--r--third_party/rust/os_str_bytes/src/raw_str.rs1547
-rw-r--r--third_party/rust/os_str_bytes/src/util.rs9
-rw-r--r--third_party/rust/os_str_bytes/src/wasm/mod.rs58
-rw-r--r--third_party/rust/os_str_bytes/src/wasm/raw.rs34
-rw-r--r--third_party/rust/os_str_bytes/src/windows/mod.rs113
-rw-r--r--third_party/rust/os_str_bytes/src/windows/raw.rs46
-rw-r--r--third_party/rust/os_str_bytes/src/windows/wtf8/code_points.rs129
-rw-r--r--third_party/rust/os_str_bytes/src/windows/wtf8/convert.rs181
-rw-r--r--third_party/rust/os_str_bytes/src/windows/wtf8/mod.rs18
-rw-r--r--third_party/rust/os_str_bytes/src/windows/wtf8/string.rs67
15 files changed, 3095 insertions, 0 deletions
diff --git a/third_party/rust/os_str_bytes/src/common/mod.rs b/third_party/rust/os_str_bytes/src/common/mod.rs
new file mode 100644
index 0000000000..e28aba6696
--- /dev/null
+++ b/third_party/rust/os_str_bytes/src/common/mod.rs
@@ -0,0 +1,43 @@
+use std::borrow::Cow;
+use std::convert::Infallible;
+use std::ffi::OsStr;
+use std::ffi::OsString;
+use std::result;
+
+#[cfg(all(target_vendor = "fortanix", target_env = "sgx"))]
+use std::os::fortanix_sgx as os;
+#[cfg(target_os = "solid_asp3")]
+use std::os::solid as os;
+#[cfg(any(target_os = "hermit", unix))]
+use std::os::unix as os;
+#[cfg(target_os = "wasi")]
+use std::os::wasi as os;
+#[cfg(target_os = "xous")]
+use std::os::xous as os;
+
+use os::ffi::OsStrExt;
+use os::ffi::OsStringExt;
+
+if_raw_str! {
+ pub(super) mod raw;
+}
+
+pub(super) type EncodingError = Infallible;
+
+type Result<T> = result::Result<T, EncodingError>;
+
+pub(super) fn os_str_from_bytes(string: &[u8]) -> Result<Cow<'_, OsStr>> {
+ Ok(Cow::Borrowed(OsStrExt::from_bytes(string)))
+}
+
+pub(super) fn os_str_to_bytes(os_string: &OsStr) -> Cow<'_, [u8]> {
+ Cow::Borrowed(OsStrExt::as_bytes(os_string))
+}
+
+pub(super) fn os_string_from_vec(string: Vec<u8>) -> Result<OsString> {
+ Ok(OsStringExt::from_vec(string))
+}
+
+pub(super) fn os_string_into_vec(os_string: OsString) -> Vec<u8> {
+ OsStringExt::into_vec(os_string)
+}
diff --git a/third_party/rust/os_str_bytes/src/common/raw.rs b/third_party/rust/os_str_bytes/src/common/raw.rs
new file mode 100644
index 0000000000..97d0353d7e
--- /dev/null
+++ b/third_party/rust/os_str_bytes/src/common/raw.rs
@@ -0,0 +1,45 @@
+use std::fmt;
+use std::fmt::Formatter;
+
+use super::Result;
+
+#[inline(always)]
+pub(crate) const fn is_continuation(_: u8) -> bool {
+ false
+}
+
+#[inline(always)]
+pub(crate) fn validate_bytes(_: &[u8]) -> Result<()> {
+ Ok(())
+}
+
+#[inline(always)]
+pub(crate) fn decode_code_point(_: &[u8]) -> u32 {
+ unreachable!();
+}
+
+pub(crate) fn ends_with(string: &[u8], suffix: &[u8]) -> bool {
+ string.ends_with(suffix)
+}
+
+pub(crate) fn starts_with(string: &[u8], prefix: &[u8]) -> bool {
+ string.starts_with(prefix)
+}
+
+pub(crate) fn debug(string: &[u8], f: &mut Formatter<'_>) -> fmt::Result {
+ for byte in string {
+ write!(f, "\\x{:02X}", byte)?;
+ }
+ Ok(())
+}
+
+#[cfg(feature = "uniquote")]
+pub(crate) mod uniquote {
+ use uniquote::Formatter;
+ use uniquote::Quote;
+ use uniquote::Result;
+
+ pub(crate) fn escape(string: &[u8], f: &mut Formatter<'_>) -> Result {
+ string.escape(f)
+ }
+}
diff --git a/third_party/rust/os_str_bytes/src/iter.rs b/third_party/rust/os_str_bytes/src/iter.rs
new file mode 100644
index 0000000000..03ff982412
--- /dev/null
+++ b/third_party/rust/os_str_bytes/src/iter.rs
@@ -0,0 +1,111 @@
+//! Iterators provided by this crate.
+
+#![cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "raw_os_str")))]
+
+use std::convert;
+use std::fmt;
+use std::fmt::Debug;
+use std::fmt::Formatter;
+use std::iter::FusedIterator;
+
+use super::pattern::Encoded;
+use super::Pattern;
+use super::RawOsStr;
+
+// [memchr::memmem::FindIter] is not currently used, since this struct would
+// become self-referential. Additionally, that iterator does not implement
+// [DoubleEndedIterator], and its implementation would likely require
+// significant changes to implement that trait.
+/// The iterator returned by [`RawOsStr::split`].
+pub struct Split<'a, P>
+where
+ P: Pattern,
+{
+ string: Option<&'a RawOsStr>,
+ pat: P::__Encoded,
+}
+
+impl<'a, P> Split<'a, P>
+where
+ P: Pattern,
+{
+ #[track_caller]
+ pub(super) fn new(string: &'a RawOsStr, pat: P) -> Self {
+ let pat = pat.__encode();
+ assert!(
+ !pat.__get().is_empty(),
+ "cannot split using an empty pattern",
+ );
+ Self {
+ string: Some(string),
+ pat,
+ }
+ }
+}
+
+macro_rules! impl_next {
+ ( $self:ident , $split_method:ident , $swap_fn:expr ) => {{
+ $self
+ .string?
+ .$split_method(&$self.pat)
+ .map(|substrings| {
+ let (substring, string) = $swap_fn(substrings);
+ $self.string = Some(string);
+ substring
+ })
+ .or_else(|| $self.string.take())
+ }};
+}
+
+impl<P> Clone for Split<'_, P>
+where
+ P: Pattern,
+{
+ #[inline]
+ fn clone(&self) -> Self {
+ Self {
+ string: self.string,
+ pat: self.pat.clone(),
+ }
+ }
+}
+
+impl<P> Debug for Split<'_, P>
+where
+ P: Pattern,
+{
+ #[inline]
+ fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+ f.debug_struct("Split")
+ .field("string", &self.string)
+ .field("pat", &self.pat)
+ .finish()
+ }
+}
+
+impl<P> DoubleEndedIterator for Split<'_, P>
+where
+ P: Pattern,
+{
+ fn next_back(&mut self) -> Option<Self::Item> {
+ impl_next!(self, rsplit_once_raw, |(prefix, suffix)| (suffix, prefix))
+ }
+}
+
+impl<P> FusedIterator for Split<'_, P> where P: Pattern {}
+
+impl<'a, P> Iterator for Split<'a, P>
+where
+ P: Pattern,
+{
+ type Item = &'a RawOsStr;
+
+ #[inline]
+ fn last(mut self) -> Option<Self::Item> {
+ self.next_back()
+ }
+
+ fn next(&mut self) -> Option<Self::Item> {
+ impl_next!(self, split_once_raw, convert::identity)
+ }
+}
diff --git a/third_party/rust/os_str_bytes/src/lib.rs b/third_party/rust/os_str_bytes/src/lib.rs
new file mode 100644
index 0000000000..40154c99be
--- /dev/null
+++ b/third_party/rust/os_str_bytes/src/lib.rs
@@ -0,0 +1,623 @@
+//! This crate allows interacting with the data stored by [`OsStr`] and
+//! [`OsString`], without resorting to panics or corruption for invalid UTF-8.
+//! Thus, methods can be used that are already defined on [`[u8]`][slice] and
+//! [`Vec<u8>`].
+//!
+//! Typically, the only way to losslessly construct [`OsStr`] or [`OsString`]
+//! from a byte sequence is to use `OsStr::new(str::from_utf8(bytes)?)`, which
+//! requires the bytes to be valid in UTF-8. However, since this crate makes
+//! conversions directly between the platform encoding and raw bytes, even some
+//! strings invalid in UTF-8 can be converted.
+//!
+//! In most cases, [`RawOsStr`] and [`RawOsString`] should be used.
+//! [`OsStrBytes`] and [`OsStringBytes`] provide lower-level APIs that are
+//! easier to misuse.
+//!
+//! # Encoding
+//!
+//! The encoding of bytes returned or accepted by methods of this crate is
+//! intentionally left unspecified. It may vary for different platforms, so
+//! defining it would run contrary to the goal of generic string handling.
+//! However, the following invariants will always be upheld:
+//!
+//! - The encoding will be compatible with UTF-8. In particular, splitting an
+//! encoded byte sequence by a UTF-8&ndash;encoded character always produces
+//! other valid byte sequences. They can be re-encoded without error using
+//! [`RawOsString::into_os_string`] and similar methods.
+//!
+//! - All characters valid in platform strings are representable. [`OsStr`] and
+//! [`OsString`] can always be losslessly reconstructed from extracted bytes.
+//!
+//! Note that the chosen encoding may not match how Rust stores these strings
+//! internally, which is undocumented. For instance, the result of calling
+//! [`OsStr::len`] will not necessarily match the number of bytes this crate
+//! uses to represent the same string.
+//!
+//! Additionally, concatenation may yield unexpected results without a UTF-8
+//! separator. If two platform strings need to be concatenated, the only safe
+//! way to do so is using [`OsString::push`]. This limitation also makes it
+//! undesirable to use the bytes in interchange.
+//!
+//! Since this encoding can change between versions and platforms, it should
+//! not be used for storage. The standard library provides implementations of
+//! [`OsStrExt`] and [`OsStringExt`] for various platforms, which should be
+//! preferred for that use case.
+//!
+//! # User Input
+//!
+//! Traits in this crate should ideally not be used to convert byte sequences
+//! that did not originate from [`OsStr`] or a related struct. The encoding
+//! used by this crate is an implementation detail, so it does not make sense
+//! to expose it to users.
+//!
+//! Crate [bstr] offers some useful alternative methods, such as
+//! [`ByteSlice::to_os_str`] and [`ByteVec::into_os_string`], that are meant
+//! for user input. But, they reject some byte sequences used to represent
+//! valid platform strings, which would be undesirable for reliable path
+//! handling. They are best used only when accepting unknown input.
+//!
+//! This crate is meant to help when you already have an instance of [`OsStr`]
+//! and need to modify the data in a lossless way.
+//!
+//! # Features
+//!
+//! These features are optional and can be enabled or disabled in a
+//! "Cargo.toml" file.
+//!
+//! ### Default Features
+//!
+//! - **memchr** -
+//! Changes the implementation to use crate [memchr] for better performance.
+//! This feature is useless when "raw\_os\_str" is disabled.
+//!
+//! For more information, see [`RawOsStr`][memchr complexity].
+//!
+//! - **raw\_os\_str** -
+//! Provides:
+//! - [`iter`]
+//! - [`Pattern`]
+//! - [`RawOsStr`]
+//! - [`RawOsStrCow`]
+//! - [`RawOsString`]
+//!
+//! ### Optional Features
+//!
+//! - **checked\_conversions** -
+//! Provides:
+//! - [`EncodingError`]
+//! - [`OsStrBytes::from_raw_bytes`]
+//! - [`OsStringBytes::from_raw_vec`]
+//! - [`RawOsStr::from_raw_bytes`]
+//! - [`RawOsString::from_raw_vec`]
+//!
+//! Because this feature should not be used in libraries, the
+//! "OS_STR_BYTES_CHECKED_CONVERSIONS" environment variable must be defined
+//! during compilation.
+//!
+//! - **print\_bytes** -
+//! Provides implementations of [`print_bytes::ToBytes`] for [`RawOsStr`] and
+//! [`RawOsString`].
+//!
+//! - **uniquote** -
+//! Provides implementations of [`uniquote::Quote`] for [`RawOsStr`] and
+//! [`RawOsString`].
+//!
+//! # Implementation
+//!
+//! Some methods return [`Cow`] to account for platform differences. However,
+//! no guarantee is made that the same variant of that enum will always be
+//! returned for the same platform. Whichever can be constructed most
+//! efficiently will be returned.
+//!
+//! All traits are [sealed], meaning that they can only be implemented by this
+//! crate. Otherwise, backward compatibility would be more difficult to
+//! maintain for new features.
+//!
+//! # Complexity
+//!
+//! Conversion method complexities will vary based on what functionality is
+//! available for the platform. At worst, they will all be linear, but some can
+//! take constant time. For example, [`RawOsString::into_os_string`] might be
+//! able to reuse its allocation.
+//!
+//! # Examples
+//!
+//! ```
+//! # use std::io;
+//! #
+//! # #[cfg(feature = "raw_os_str")]
+//! # {
+//! # #[cfg(any())]
+//! use std::env;
+//! use std::fs;
+//!
+//! use os_str_bytes::RawOsStr;
+//!
+//! # mod env {
+//! # use std::env;
+//! # use std::ffi::OsString;
+//! #
+//! # pub fn args_os() -> impl Iterator<Item = OsString> {
+//! # let mut file = env::temp_dir();
+//! # file.push("os_str_bytes\u{E9}.txt");
+//! # return vec![OsString::new(), file.into_os_string()].into_iter();
+//! # }
+//! # }
+//! #
+//! for file in env::args_os().skip(1) {
+//! if !RawOsStr::new(&file).starts_with('-') {
+//! let string = "Hello, world!";
+//! fs::write(&file, string)?;
+//! assert_eq!(string, fs::read_to_string(file)?);
+//! }
+//! }
+//! # }
+//! #
+//! # Ok::<_, io::Error>(())
+//! ```
+//!
+//! [bstr]: https://crates.io/crates/bstr
+//! [`ByteSlice::to_os_str`]: https://docs.rs/bstr/0.2.12/bstr/trait.ByteSlice.html#method.to_os_str
+//! [`ByteVec::into_os_string`]: https://docs.rs/bstr/0.2.12/bstr/trait.ByteVec.html#method.into_os_string
+//! [memchr complexity]: RawOsStr#complexity
+//! [memchr]: https://crates.io/crates/memchr
+//! [`OsStrExt`]: ::std::os::unix::ffi::OsStrExt
+//! [`OsStringExt`]: ::std::os::unix::ffi::OsStringExt
+//! [sealed]: https://rust-lang.github.io/api-guidelines/future-proofing.html#c-sealed
+//! [print\_bytes]: https://crates.io/crates/print_bytes
+
+#![cfg_attr(not(feature = "checked_conversions"), allow(deprecated))]
+// Only require a nightly compiler when building documentation for docs.rs.
+// This is a private option that should not be used.
+// https://github.com/rust-lang/docs.rs/issues/147#issuecomment-389544407
+// https://github.com/dylni/os_str_bytes/issues/2
+#![cfg_attr(os_str_bytes_docs_rs, feature(doc_cfg))]
+// Nightly is also currently required for the SGX platform.
+#![cfg_attr(
+ all(target_vendor = "fortanix", target_env = "sgx"),
+ feature(sgx_platform)
+)]
+#![warn(unsafe_op_in_unsafe_fn)]
+#![warn(unused_results)]
+
+use std::borrow::Cow;
+use std::error::Error;
+use std::ffi::OsStr;
+use std::ffi::OsString;
+use std::fmt;
+use std::fmt::Display;
+use std::fmt::Formatter;
+use std::path::Path;
+use std::path::PathBuf;
+use std::result;
+
+macro_rules! if_checked_conversions {
+ ( $($item:item)+ ) => {
+ $(
+ #[cfg(feature = "checked_conversions")]
+ $item
+ )+
+ };
+}
+
+#[cfg(not(os_str_bytes_docs_rs))]
+if_checked_conversions! {
+ const _: &str = env!(
+ "OS_STR_BYTES_CHECKED_CONVERSIONS",
+ "The 'OS_STR_BYTES_CHECKED_CONVERSIONS' environment variable must be \
+ defined to use the 'checked_conversions' feature.",
+ );
+}
+
+#[rustfmt::skip]
+macro_rules! deprecated_checked_conversion {
+ ( $message:expr , $item:item ) => {
+ #[cfg_attr(
+ not(feature = "checked_conversions"),
+ deprecated = $message
+ )]
+ $item
+ };
+}
+
+macro_rules! expect_encoded {
+ ( $result:expr ) => {
+ $result.expect("invalid raw bytes")
+ };
+}
+
+macro_rules! if_raw_str {
+ ( $($item:item)+ ) => {
+ $(
+ #[cfg(feature = "raw_os_str")]
+ $item
+ )+
+ };
+}
+
+#[cfg_attr(
+ all(target_family = "wasm", target_os = "unknown"),
+ path = "wasm/mod.rs"
+)]
+#[cfg_attr(windows, path = "windows/mod.rs")]
+#[cfg_attr(
+ not(any(all(target_family = "wasm", target_os = "unknown"), windows)),
+ path = "common/mod.rs"
+)]
+mod imp;
+
+#[cfg(any(
+ all(
+ feature = "raw_os_str",
+ target_family = "wasm",
+ target_os = "unknown",
+ ),
+ windows,
+))]
+mod util;
+
+if_raw_str! {
+ pub mod iter;
+
+ mod pattern;
+ pub use pattern::Pattern;
+
+ mod raw_str;
+ pub use raw_str::RawOsStr;
+ pub use raw_str::RawOsStrCow;
+ pub use raw_str::RawOsString;
+}
+
+deprecated_checked_conversion! {
+ "use `OsStrBytes::assert_from_raw_bytes` or \
+ `OsStringBytes::assert_from_raw_vec` instead, or enable the \
+ 'checked_conversions' feature",
+ /// The error that occurs when a byte sequence is not representable in the
+ /// platform encoding.
+ ///
+ /// [`Result::unwrap`] should almost always be called on results containing
+ /// this error. It should be known whether or not byte sequences are
+ /// properly encoded for the platform, since [the module-level
+ /// documentation][encoding] discourages using encoded bytes in
+ /// interchange. Results are returned primarily to make panicking behavior
+ /// explicit.
+ ///
+ /// On Unix, this error is never returned, but [`OsStrExt`] or
+ /// [`OsStringExt`] should be used instead if that needs to be guaranteed.
+ ///
+ /// [encoding]: self#encoding
+ /// [`OsStrExt`]: ::std::os::unix::ffi::OsStrExt
+ /// [`OsStringExt`]: ::std::os::unix::ffi::OsStringExt
+ /// [`Result::unwrap`]: ::std::result::Result::unwrap
+ #[derive(Clone, Debug, Eq, PartialEq)]
+ #[cfg_attr(
+ os_str_bytes_docs_rs,
+ doc(cfg(feature = "checked_conversions"))
+ )]
+ pub struct EncodingError(imp::EncodingError);
+}
+
+impl Display for EncodingError {
+ #[inline]
+ fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+ self.0.fmt(f)
+ }
+}
+
+impl Error for EncodingError {}
+
+type Result<T> = result::Result<T, EncodingError>;
+
+fn from_raw_bytes<'a, S>(
+ string: S,
+) -> result::Result<Cow<'a, OsStr>, imp::EncodingError>
+where
+ S: Into<Cow<'a, [u8]>>,
+{
+ match string.into() {
+ Cow::Borrowed(string) => imp::os_str_from_bytes(string),
+ Cow::Owned(string) => imp::os_string_from_vec(string).map(Cow::Owned),
+ }
+}
+
+fn cow_os_str_into_path(string: Cow<'_, OsStr>) -> Cow<'_, Path> {
+ match string {
+ Cow::Borrowed(string) => Cow::Borrowed(Path::new(string)),
+ Cow::Owned(string) => Cow::Owned(string.into()),
+ }
+}
+
+/// A platform agnostic variant of [`OsStrExt`].
+///
+/// For more information, see [the module-level documentation][module].
+///
+/// [module]: self
+/// [`OsStrExt`]: ::std::os::unix::ffi::OsStrExt
+pub trait OsStrBytes: private::Sealed + ToOwned {
+ /// Converts a byte string into an equivalent platform-native string.
+ ///
+ /// # Panics
+ ///
+ /// Panics if the string is not valid for the [unspecified encoding] used
+ /// by this crate.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::env;
+ /// use std::ffi::OsStr;
+ /// # use std::io;
+ ///
+ /// use os_str_bytes::OsStrBytes;
+ ///
+ /// let os_string = env::current_exe()?;
+ /// let os_bytes = os_string.to_raw_bytes();
+ /// assert_eq!(os_string, OsStr::assert_from_raw_bytes(os_bytes));
+ /// #
+ /// # Ok::<_, io::Error>(())
+ /// ```
+ ///
+ /// [unspecified encoding]: self#encoding
+ #[must_use = "method should not be used for validation"]
+ #[track_caller]
+ fn assert_from_raw_bytes<'a, S>(string: S) -> Cow<'a, Self>
+ where
+ S: Into<Cow<'a, [u8]>>;
+
+ deprecated_checked_conversion! {
+ "use `assert_from_raw_bytes` instead, or enable the \
+ 'checked_conversions' feature",
+ /// Converts a byte string into an equivalent platform-native string.
+ ///
+ /// [`assert_from_raw_bytes`] should almost always be used instead. For
+ /// more information, see [`EncodingError`].
+ ///
+ /// # Errors
+ ///
+ /// See documentation for [`EncodingError`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::env;
+ /// use std::ffi::OsStr;
+ /// # use std::io;
+ ///
+ /// use os_str_bytes::OsStrBytes;
+ ///
+ /// let os_string = env::current_exe()?;
+ /// let os_bytes = os_string.to_raw_bytes();
+ /// assert_eq!(os_string, OsStr::from_raw_bytes(os_bytes).unwrap());
+ /// #
+ /// # Ok::<_, io::Error>(())
+ /// ```
+ ///
+ /// [`assert_from_raw_bytes`]: Self::assert_from_raw_bytes
+ #[cfg_attr(
+ os_str_bytes_docs_rs,
+ doc(cfg(feature = "checked_conversions"))
+ )]
+ fn from_raw_bytes<'a, S>(string: S) -> Result<Cow<'a, Self>>
+ where
+ S: Into<Cow<'a, [u8]>>;
+ }
+
+ /// Converts a platform-native string into an equivalent byte string.
+ ///
+ /// The returned string will use an [unspecified encoding].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::ffi::OsStr;
+ ///
+ /// use os_str_bytes::OsStrBytes;
+ ///
+ /// let string = "foobar";
+ /// let os_string = OsStr::new(string);
+ /// assert_eq!(string.as_bytes(), &*os_string.to_raw_bytes());
+ /// ```
+ ///
+ /// [unspecified encoding]: self#encoding
+ #[must_use]
+ fn to_raw_bytes(&self) -> Cow<'_, [u8]>;
+}
+
+impl OsStrBytes for OsStr {
+ #[inline]
+ fn assert_from_raw_bytes<'a, S>(string: S) -> Cow<'a, Self>
+ where
+ S: Into<Cow<'a, [u8]>>,
+ {
+ expect_encoded!(from_raw_bytes(string))
+ }
+
+ #[inline]
+ fn from_raw_bytes<'a, S>(string: S) -> Result<Cow<'a, Self>>
+ where
+ S: Into<Cow<'a, [u8]>>,
+ {
+ from_raw_bytes(string).map_err(EncodingError)
+ }
+
+ #[inline]
+ fn to_raw_bytes(&self) -> Cow<'_, [u8]> {
+ imp::os_str_to_bytes(self)
+ }
+}
+
+impl OsStrBytes for Path {
+ #[inline]
+ fn assert_from_raw_bytes<'a, S>(string: S) -> Cow<'a, Self>
+ where
+ S: Into<Cow<'a, [u8]>>,
+ {
+ cow_os_str_into_path(OsStr::assert_from_raw_bytes(string))
+ }
+
+ #[inline]
+ fn from_raw_bytes<'a, S>(string: S) -> Result<Cow<'a, Self>>
+ where
+ S: Into<Cow<'a, [u8]>>,
+ {
+ OsStr::from_raw_bytes(string).map(cow_os_str_into_path)
+ }
+
+ #[inline]
+ fn to_raw_bytes(&self) -> Cow<'_, [u8]> {
+ self.as_os_str().to_raw_bytes()
+ }
+}
+
+/// A platform agnostic variant of [`OsStringExt`].
+///
+/// For more information, see [the module-level documentation][module].
+///
+/// [module]: self
+/// [`OsStringExt`]: ::std::os::unix::ffi::OsStringExt
+pub trait OsStringBytes: private::Sealed + Sized {
+ /// Converts a byte string into an equivalent platform-native string.
+ ///
+ /// # Panics
+ ///
+ /// Panics if the string is not valid for the [unspecified encoding] used
+ /// by this crate.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::env;
+ /// use std::ffi::OsString;
+ /// # use std::io;
+ ///
+ /// use os_str_bytes::OsStringBytes;
+ ///
+ /// let os_string = env::current_exe()?;
+ /// let os_bytes = os_string.clone().into_raw_vec();
+ /// assert_eq!(os_string, OsString::assert_from_raw_vec(os_bytes));
+ /// #
+ /// # Ok::<_, io::Error>(())
+ /// ```
+ ///
+ /// [unspecified encoding]: self#encoding
+ #[must_use = "method should not be used for validation"]
+ #[track_caller]
+ fn assert_from_raw_vec(string: Vec<u8>) -> Self;
+
+ deprecated_checked_conversion! {
+ "use `assert_from_raw_vec` instead, or enable the \
+ 'checked_conversions' feature",
+ /// Converts a byte string into an equivalent platform-native string.
+ ///
+ /// [`assert_from_raw_vec`] should almost always be used instead. For
+ /// more information, see [`EncodingError`].
+ ///
+ /// # Errors
+ ///
+ /// See documentation for [`EncodingError`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::env;
+ /// use std::ffi::OsString;
+ /// # use std::io;
+ ///
+ /// use os_str_bytes::OsStringBytes;
+ ///
+ /// let os_string = env::current_exe()?;
+ /// let os_bytes = os_string.clone().into_raw_vec();
+ /// assert_eq!(os_string, OsString::from_raw_vec(os_bytes).unwrap());
+ /// #
+ /// # Ok::<_, io::Error>(())
+ /// ```
+ ///
+ /// [`assert_from_raw_vec`]: Self::assert_from_raw_vec
+ #[cfg_attr(
+ os_str_bytes_docs_rs,
+ doc(cfg(feature = "checked_conversions"))
+ )]
+ fn from_raw_vec(string: Vec<u8>) -> Result<Self>;
+ }
+
+ /// Converts a platform-native string into an equivalent byte string.
+ ///
+ /// The returned string will use an [unspecified encoding].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::ffi::OsString;
+ ///
+ /// use os_str_bytes::OsStringBytes;
+ ///
+ /// let string = "foobar".to_owned();
+ /// let os_string: OsString = string.clone().into();
+ /// assert_eq!(string.into_bytes(), os_string.into_raw_vec());
+ /// ```
+ ///
+ /// [unspecified encoding]: self#encoding
+ #[must_use]
+ fn into_raw_vec(self) -> Vec<u8>;
+}
+
+impl OsStringBytes for OsString {
+ #[inline]
+ fn assert_from_raw_vec(string: Vec<u8>) -> Self {
+ expect_encoded!(imp::os_string_from_vec(string))
+ }
+
+ #[inline]
+ fn from_raw_vec(string: Vec<u8>) -> Result<Self> {
+ imp::os_string_from_vec(string).map_err(EncodingError)
+ }
+
+ #[inline]
+ fn into_raw_vec(self) -> Vec<u8> {
+ imp::os_string_into_vec(self)
+ }
+}
+
+impl OsStringBytes for PathBuf {
+ #[inline]
+ fn assert_from_raw_vec(string: Vec<u8>) -> Self {
+ OsString::assert_from_raw_vec(string).into()
+ }
+
+ #[inline]
+ fn from_raw_vec(string: Vec<u8>) -> Result<Self> {
+ OsString::from_raw_vec(string).map(Into::into)
+ }
+
+ #[inline]
+ fn into_raw_vec(self) -> Vec<u8> {
+ self.into_os_string().into_raw_vec()
+ }
+}
+
+mod private {
+ use std::ffi::OsStr;
+ use std::ffi::OsString;
+ use std::path::Path;
+ use std::path::PathBuf;
+
+ if_raw_str! {
+ use std::borrow::Cow;
+
+ use super::RawOsStr;
+ }
+
+ pub trait Sealed {}
+
+ impl Sealed for char {}
+ impl Sealed for OsStr {}
+ impl Sealed for OsString {}
+ impl Sealed for Path {}
+ impl Sealed for PathBuf {}
+ impl Sealed for &str {}
+ impl Sealed for &String {}
+
+ if_raw_str! {
+ impl Sealed for Cow<'_, RawOsStr> {}
+ }
+}
diff --git a/third_party/rust/os_str_bytes/src/pattern.rs b/third_party/rust/os_str_bytes/src/pattern.rs
new file mode 100644
index 0000000000..11f86bf31d
--- /dev/null
+++ b/third_party/rust/os_str_bytes/src/pattern.rs
@@ -0,0 +1,71 @@
+use std::fmt::Debug;
+
+use super::private;
+
+pub trait Encoded {
+ fn __get(&self) -> &[u8];
+}
+
+#[derive(Clone, Debug)]
+pub struct EncodedChar {
+ buffer: [u8; 4],
+ length: usize,
+}
+
+impl Encoded for EncodedChar {
+ fn __get(&self) -> &[u8] {
+ &self.buffer[..self.length]
+ }
+}
+
+impl Encoded for &str {
+ fn __get(&self) -> &[u8] {
+ self.as_bytes()
+ }
+}
+
+/// Allows a type to be used for searching by [`RawOsStr`] and [`RawOsString`].
+///
+/// This trait is very similar to [`str::pattern::Pattern`], but its methods
+/// are private and it is implemented for different types.
+///
+/// [`RawOsStr`]: super::RawOsStr
+/// [`RawOsString`]: super::RawOsString
+/// [`str::pattern::Pattern`]: ::std::str::pattern::Pattern
+#[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "raw_os_str")))]
+pub trait Pattern: private::Sealed {
+ #[doc(hidden)]
+ type __Encoded: Clone + Debug + Encoded;
+
+ #[doc(hidden)]
+ fn __encode(self) -> Self::__Encoded;
+}
+
+impl Pattern for char {
+ type __Encoded = EncodedChar;
+
+ fn __encode(self) -> Self::__Encoded {
+ let mut encoded = EncodedChar {
+ buffer: [0; 4],
+ length: 0,
+ };
+ encoded.length = self.encode_utf8(&mut encoded.buffer).len();
+ encoded
+ }
+}
+
+impl Pattern for &str {
+ type __Encoded = Self;
+
+ fn __encode(self) -> Self::__Encoded {
+ self
+ }
+}
+
+impl<'a> Pattern for &'a String {
+ type __Encoded = <&'a str as Pattern>::__Encoded;
+
+ fn __encode(self) -> Self::__Encoded {
+ (**self).__encode()
+ }
+}
diff --git a/third_party/rust/os_str_bytes/src/raw_str.rs b/third_party/rust/os_str_bytes/src/raw_str.rs
new file mode 100644
index 0000000000..659b34d9cb
--- /dev/null
+++ b/third_party/rust/os_str_bytes/src/raw_str.rs
@@ -0,0 +1,1547 @@
+use std::borrow::Borrow;
+use std::borrow::Cow;
+use std::borrow::ToOwned;
+use std::ffi::OsStr;
+use std::ffi::OsString;
+use std::fmt;
+use std::fmt::Debug;
+use std::fmt::Display;
+use std::fmt::Formatter;
+use std::mem;
+use std::ops::Deref;
+use std::ops::Index;
+use std::ops::Range;
+use std::ops::RangeFrom;
+use std::ops::RangeFull;
+use std::ops::RangeInclusive;
+use std::ops::RangeTo;
+use std::ops::RangeToInclusive;
+use std::result;
+use std::str;
+
+#[cfg(feature = "memchr")]
+use memchr::memmem::find;
+#[cfg(feature = "memchr")]
+use memchr::memmem::rfind;
+
+use super::imp;
+use super::imp::raw;
+use super::iter::Split;
+use super::pattern::Encoded as EncodedPattern;
+use super::private;
+use super::Pattern;
+
+if_checked_conversions! {
+ use super::EncodingError;
+ use super::Result;
+}
+
+#[cfg(not(feature = "memchr"))]
+fn find(string: &[u8], pat: &[u8]) -> Option<usize> {
+ (0..=string.len().checked_sub(pat.len())?)
+ .find(|&x| string[x..].starts_with(pat))
+}
+
+#[cfg(not(feature = "memchr"))]
+fn rfind(string: &[u8], pat: &[u8]) -> Option<usize> {
+ (pat.len()..=string.len())
+ .rfind(|&x| string[..x].ends_with(pat))
+ .map(|x| x - pat.len())
+}
+
+#[allow(clippy::missing_safety_doc)]
+unsafe trait TransmuteBox {
+ fn transmute_box<R>(self: Box<Self>) -> Box<R>
+ where
+ R: ?Sized + TransmuteBox,
+ {
+ let value = Box::into_raw(self);
+ // SAFETY: This trait is only implemented for types that can be
+ // transmuted.
+ unsafe { Box::from_raw(mem::transmute_copy(&value)) }
+ }
+}
+
+// SAFETY: This struct has a layout that makes this operation safe.
+unsafe impl TransmuteBox for RawOsStr {}
+unsafe impl TransmuteBox for [u8] {}
+
+/// A container for borrowed byte strings converted by this crate.
+///
+/// This wrapper is intended to prevent violating the invariants of the
+/// [unspecified encoding] used by this crate and minimize encoding
+/// conversions.
+///
+/// # Indices
+///
+/// Methods of this struct that accept indices require that the index lie on a
+/// UTF-8 boundary. Although it is possible to manipulate platform strings
+/// based on other indices, this crate currently does not support them for
+/// slicing methods. They would add significant complication to the
+/// implementation and are generally not necessary. However, all indices
+/// returned by this struct can be used for slicing.
+///
+/// On Unix, all indices are permitted, to avoid false positives. However,
+/// relying on this implementation detail is discouraged. Platform-specific
+/// indices are error-prone.
+///
+/// # Complexity
+///
+/// All searching methods have worst-case multiplicative time complexity (i.e.,
+/// `O(self.raw_len() * pat.len())`). Enabling the "memchr" feature allows
+/// these methods to instead run in linear time in the worst case (documented
+/// for [`memchr::memmem::find`][memchr complexity]).
+///
+/// # Safety
+///
+/// Although this type is annotated with `#[repr(transparent)]`, the inner
+/// representation is not stable. Transmuting between this type and any other
+/// causes immediate undefined behavior.
+///
+/// [memchr complexity]: memchr::memmem::find#complexity
+/// [unspecified encoding]: super#encoding
+#[derive(Eq, Hash, Ord, PartialEq, PartialOrd)]
+#[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "raw_os_str")))]
+#[repr(transparent)]
+pub struct RawOsStr([u8]);
+
+impl RawOsStr {
+ const fn from_inner(string: &[u8]) -> &Self {
+ // SAFETY: This struct has a layout that makes this operation safe.
+ unsafe { mem::transmute(string) }
+ }
+
+ /// Converts a platform-native string into a representation that can be
+ /// more easily manipulated.
+ ///
+ /// This method performs the necessary conversion immediately, so it can be
+ /// expensive to call. It is recommended to continue using the returned
+ /// instance as long as possible (instead of the original [`OsStr`]), to
+ /// avoid repeated conversions.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::env;
+ /// # use std::io;
+ ///
+ /// use os_str_bytes::RawOsStr;
+ ///
+ /// let os_string = env::current_exe()?.into_os_string();
+ /// println!("{:?}", RawOsStr::new(&os_string));
+ /// #
+ /// # Ok::<_, io::Error>(())
+ /// ```
+ #[inline]
+ #[must_use]
+ pub fn new(string: &OsStr) -> Cow<'_, Self> {
+ match imp::os_str_to_bytes(string) {
+ Cow::Borrowed(string) => Cow::Borrowed(Self::from_inner(string)),
+ Cow::Owned(string) => Cow::Owned(RawOsString(string)),
+ }
+ }
+
+ /// Wraps a string, without copying or encoding conversion.
+ ///
+ /// This method is much more efficient than [`RawOsStr::new`], since the
+ /// [encoding] used by this crate is compatible with UTF-8.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use os_str_bytes::RawOsStr;
+ ///
+ /// let string = "foobar";
+ /// let raw = RawOsStr::from_str(string);
+ /// assert_eq!(string, raw);
+ /// ```
+ ///
+ /// [encoding]: super#encoding
+ #[allow(clippy::should_implement_trait)]
+ #[inline]
+ #[must_use]
+ pub fn from_str(string: &str) -> &Self {
+ Self::from_inner(string.as_bytes())
+ }
+
+ /// Wraps a byte string, without copying or encoding conversion.
+ ///
+ /// # Panics
+ ///
+ /// Panics if the string is not valid for the [unspecified encoding] used
+ /// by this crate.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::env;
+ /// # use std::io;
+ ///
+ /// use os_str_bytes::RawOsStr;
+ ///
+ /// let os_string = env::current_exe()?.into_os_string();
+ /// let raw = RawOsStr::new(&os_string);
+ /// let raw_bytes = raw.as_raw_bytes();
+ /// assert_eq!(&*raw, RawOsStr::assert_from_raw_bytes(raw_bytes));
+ /// #
+ /// # Ok::<_, io::Error>(())
+ /// ```
+ ///
+ /// [unspecified encoding]: super#encoding
+ #[inline]
+ #[must_use = "method should not be used for validation"]
+ #[track_caller]
+ pub fn assert_from_raw_bytes(string: &[u8]) -> &Self {
+ expect_encoded!(raw::validate_bytes(string));
+
+ Self::from_inner(string)
+ }
+
+ if_checked_conversions! {
+ /// Wraps a byte string, without copying or encoding conversion.
+ ///
+ /// [`assert_from_raw_bytes`] should almost always be used instead. For
+ /// more information, see [`EncodingError`].
+ ///
+ /// # Errors
+ ///
+ /// See documentation for [`EncodingError`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::env;
+ /// # use std::io;
+ ///
+ /// use os_str_bytes::RawOsStr;
+ ///
+ /// let os_string = env::current_exe()?.into_os_string();
+ /// let raw = RawOsStr::new(&os_string);
+ /// assert_eq!(Ok(&*raw), RawOsStr::from_raw_bytes(raw.as_raw_bytes()));
+ /// #
+ /// # Ok::<_, io::Error>(())
+ /// ```
+ ///
+ /// [`assert_from_raw_bytes`]: Self::assert_from_raw_bytes
+ #[cfg_attr(
+ os_str_bytes_docs_rs,
+ doc(cfg(feature = "checked_conversions"))
+ )]
+ #[inline]
+ pub fn from_raw_bytes(string: &[u8]) -> Result<&Self> {
+ raw::validate_bytes(string)
+ .map(|()| Self::from_inner(string))
+ .map_err(EncodingError)
+ }
+ }
+
+ /// Wraps a byte string, without copying or encoding conversion.
+ ///
+ /// # Safety
+ ///
+ /// The string must be valid for the [unspecified encoding] used by this
+ /// crate.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::env;
+ /// # use std::io;
+ ///
+ /// use os_str_bytes::RawOsStr;
+ ///
+ /// let os_string = env::current_exe()?.into_os_string();
+ /// let raw = RawOsStr::new(&os_string);
+ /// let raw_bytes = raw.as_raw_bytes();
+ /// assert_eq!(&*raw, unsafe {
+ /// RawOsStr::from_raw_bytes_unchecked(raw_bytes)
+ /// });
+ /// #
+ /// # Ok::<_, io::Error>(())
+ /// ```
+ ///
+ /// [unspecified encoding]: super#encoding
+ #[inline]
+ #[must_use]
+ #[track_caller]
+ pub unsafe fn from_raw_bytes_unchecked(string: &[u8]) -> &Self {
+ if cfg!(debug_assertions) {
+ expect_encoded!(raw::validate_bytes(string));
+ }
+
+ Self::from_inner(string)
+ }
+
+ /// Returns the byte string stored by this container.
+ ///
+ /// The returned string will use an [unspecified encoding].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use os_str_bytes::RawOsStr;
+ ///
+ /// let string = "foobar";
+ /// let raw = RawOsStr::from_str(string);
+ /// assert_eq!(string.as_bytes(), raw.as_raw_bytes());
+ /// ```
+ ///
+ /// [unspecified encoding]: super#encoding
+ #[inline]
+ #[must_use]
+ pub fn as_raw_bytes(&self) -> &[u8] {
+ &self.0
+ }
+
+ /// Equivalent to [`str::contains`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use os_str_bytes::RawOsStr;
+ ///
+ /// let raw = RawOsStr::from_str("foobar");
+ /// assert!(raw.contains("oo"));
+ /// assert!(!raw.contains("of"));
+ /// ```
+ #[inline]
+ #[must_use]
+ pub fn contains<P>(&self, pat: P) -> bool
+ where
+ P: Pattern,
+ {
+ self.find(pat).is_some()
+ }
+
+ /// Equivalent to [`str::ends_with`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use os_str_bytes::RawOsStr;
+ ///
+ /// let raw = RawOsStr::from_str("foobar");
+ /// assert!(raw.ends_with("bar"));
+ /// assert!(!raw.ends_with("foo"));
+ /// ```
+ #[inline]
+ #[must_use]
+ pub fn ends_with<P>(&self, pat: P) -> bool
+ where
+ P: Pattern,
+ {
+ let pat = pat.__encode();
+ let pat = pat.__get();
+
+ self.0.ends_with(pat)
+ }
+
+ /// Equivalent to [`str::ends_with`] but accepts this type for the pattern.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use os_str_bytes::RawOsStr;
+ ///
+ /// let raw = RawOsStr::from_str("foobar");
+ /// assert!(raw.ends_with_os(RawOsStr::from_str("bar")));
+ /// assert!(!raw.ends_with_os(RawOsStr::from_str("foo")));
+ /// ```
+ #[inline]
+ #[must_use]
+ pub fn ends_with_os(&self, pat: &Self) -> bool {
+ raw::ends_with(&self.0, &pat.0)
+ }
+
+ /// Equivalent to [`str::find`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use os_str_bytes::RawOsStr;
+ ///
+ /// let raw = RawOsStr::from_str("foobar");
+ /// assert_eq!(Some(1), raw.find("o"));
+ /// assert_eq!(None, raw.find("of"));
+ /// ```
+ #[inline]
+ #[must_use]
+ pub fn find<P>(&self, pat: P) -> Option<usize>
+ where
+ P: Pattern,
+ {
+ let pat = pat.__encode();
+ let pat = pat.__get();
+
+ find(&self.0, pat)
+ }
+
+ /// Equivalent to [`str::is_empty`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use os_str_bytes::RawOsStr;
+ ///
+ /// assert!(RawOsStr::from_str("").is_empty());
+ /// assert!(!RawOsStr::from_str("foobar").is_empty());
+ /// ```
+ #[inline]
+ #[must_use]
+ pub fn is_empty(&self) -> bool {
+ self.0.is_empty()
+ }
+
+ /// Returns the length of the byte string stored by this container.
+ ///
+ /// Only the following assumptions can be made about the result:
+ /// - The length of any Unicode character is the length of its UTF-8
+ /// representation (i.e., [`char::len_utf8`]).
+ /// - Splitting a string at a UTF-8 boundary will return two strings with
+ /// lengths that sum to the length of the original string.
+ ///
+ /// This method may return a different result than would [`OsStr::len`]
+ /// when called on same string, since [`OsStr`] uses an unspecified
+ /// encoding.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use os_str_bytes::RawOsStr;
+ ///
+ /// assert_eq!(6, RawOsStr::from_str("foobar").raw_len());
+ /// assert_eq!(0, RawOsStr::from_str("").raw_len());
+ /// ```
+ #[inline]
+ #[must_use]
+ pub fn raw_len(&self) -> usize {
+ self.0.len()
+ }
+
+ /// Equivalent to [`str::rfind`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use os_str_bytes::RawOsStr;
+ ///
+ /// let raw = RawOsStr::from_str("foobar");
+ /// assert_eq!(Some(2), raw.rfind("o"));
+ /// assert_eq!(None, raw.rfind("of"));
+ /// ```
+ #[inline]
+ #[must_use]
+ pub fn rfind<P>(&self, pat: P) -> Option<usize>
+ where
+ P: Pattern,
+ {
+ let pat = pat.__encode();
+ let pat = pat.__get();
+
+ rfind(&self.0, pat)
+ }
+
+ fn split_once_raw_with<P, F>(
+ &self,
+ pat: &P,
+ find_fn: F,
+ ) -> Option<(&Self, &Self)>
+ where
+ F: FnOnce(&[u8], &[u8]) -> Option<usize>,
+ P: EncodedPattern,
+ {
+ let pat = pat.__get();
+
+ let index = find_fn(&self.0, pat)?;
+ let prefix = &self.0[..index];
+ let suffix = &self.0[index + pat.len()..];
+ Some((Self::from_inner(prefix), Self::from_inner(suffix)))
+ }
+
+ pub(super) fn rsplit_once_raw<P>(&self, pat: &P) -> Option<(&Self, &Self)>
+ where
+ P: EncodedPattern,
+ {
+ self.split_once_raw_with(pat, rfind)
+ }
+
+ /// Equivalent to [`str::rsplit_once`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use os_str_bytes::RawOsStr;
+ ///
+ /// let raw = RawOsStr::from_str("foobar");
+ /// assert_eq!(
+ /// Some((RawOsStr::from_str("fo"), RawOsStr::from_str("bar"))),
+ /// raw.rsplit_once("o"),
+ /// );
+ /// assert_eq!(None, raw.rsplit_once("of"));
+ /// ```
+ #[inline]
+ #[must_use]
+ pub fn rsplit_once<P>(&self, pat: P) -> Option<(&Self, &Self)>
+ where
+ P: Pattern,
+ {
+ self.rsplit_once_raw(&pat.__encode())
+ }
+
+ // https://github.com/rust-lang/rust/blob/49c68bd53f90e375bfb3cbba8c1c67a9e0adb9c0/src/libcore/str/mod.rs#L2184-L2221
+ #[cold]
+ #[inline(never)]
+ #[track_caller]
+ fn index_boundary_error(&self, index: usize) -> ! {
+ debug_assert!(raw::is_continuation(self.0[index]));
+
+ let start = expect_encoded!(self.0[..index]
+ .iter()
+ .rposition(|&x| !raw::is_continuation(x)));
+ let mut end = index + 1;
+ end += self.0[end..]
+ .iter()
+ .take_while(|&&x| raw::is_continuation(x))
+ .count();
+ let code_point = raw::decode_code_point(&self.0[start..end]);
+ panic!(
+ "byte index {} is not a valid boundary; it is inside U+{:04X} \
+ (bytes {}..{})",
+ index, code_point, start, end,
+ );
+ }
+
+ #[track_caller]
+ fn check_bound(&self, index: usize) {
+ if let Some(&byte) = self.0.get(index) {
+ if raw::is_continuation(byte) {
+ self.index_boundary_error(index);
+ }
+ }
+ }
+
+ /// Equivalent to [`str::split`], but empty patterns are not accepted.
+ ///
+ /// # Panics
+ ///
+ /// Panics if the pattern is empty.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use os_str_bytes::RawOsStr;
+ ///
+ /// let raw = RawOsStr::from_str("foobar");
+ /// assert_eq!(["f", "", "bar"], *raw.split("o").collect::<Vec<_>>());
+ /// ```
+ #[inline]
+ #[must_use]
+ #[track_caller]
+ pub fn split<P>(&self, pat: P) -> Split<'_, P>
+ where
+ P: Pattern,
+ {
+ Split::new(self, pat)
+ }
+
+ /// Equivalent to [`str::split_at`].
+ ///
+ /// # Panics
+ ///
+ /// Panics if the index is not a [valid boundary].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use os_str_bytes::RawOsStr;
+ ///
+ /// let raw = RawOsStr::from_str("foobar");
+ /// assert_eq!(
+ /// ((RawOsStr::from_str("fo"), RawOsStr::from_str("obar"))),
+ /// raw.split_at(2),
+ /// );
+ /// ```
+ ///
+ /// [valid boundary]: #indices
+ #[inline]
+ #[must_use]
+ #[track_caller]
+ pub fn split_at(&self, mid: usize) -> (&Self, &Self) {
+ self.check_bound(mid);
+
+ let (prefix, suffix) = self.0.split_at(mid);
+ (Self::from_inner(prefix), Self::from_inner(suffix))
+ }
+
+ pub(super) fn split_once_raw<P>(&self, pat: &P) -> Option<(&Self, &Self)>
+ where
+ P: EncodedPattern,
+ {
+ self.split_once_raw_with(pat, find)
+ }
+
+ /// Equivalent to [`str::split_once`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use os_str_bytes::RawOsStr;
+ ///
+ /// let raw = RawOsStr::from_str("foobar");
+ /// assert_eq!(
+ /// Some((RawOsStr::from_str("f"), RawOsStr::from_str("obar"))),
+ /// raw.split_once("o"),
+ /// );
+ /// assert_eq!(None, raw.split_once("of"));
+ /// ```
+ #[inline]
+ #[must_use]
+ pub fn split_once<P>(&self, pat: P) -> Option<(&Self, &Self)>
+ where
+ P: Pattern,
+ {
+ self.split_once_raw(&pat.__encode())
+ }
+
+ /// Equivalent to [`str::starts_with`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use os_str_bytes::RawOsStr;
+ ///
+ /// let raw = RawOsStr::from_str("foobar");
+ /// assert!(raw.starts_with("foo"));
+ /// assert!(!raw.starts_with("bar"));
+ /// ```
+ #[inline]
+ #[must_use]
+ pub fn starts_with<P>(&self, pat: P) -> bool
+ where
+ P: Pattern,
+ {
+ let pat = pat.__encode();
+ let pat = pat.__get();
+
+ self.0.starts_with(pat)
+ }
+
+ /// Equivalent to [`str::starts_with`] but accepts this type for the
+ /// pattern.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use os_str_bytes::RawOsStr;
+ ///
+ /// let raw = RawOsStr::from_str("foobar");
+ /// assert!(raw.starts_with_os(RawOsStr::from_str("foo")));
+ /// assert!(!raw.starts_with_os(RawOsStr::from_str("bar")));
+ /// ```
+ #[inline]
+ #[must_use]
+ pub fn starts_with_os(&self, pat: &Self) -> bool {
+ raw::starts_with(&self.0, &pat.0)
+ }
+
+ /// Equivalent to [`str::strip_prefix`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use os_str_bytes::RawOsStr;
+ ///
+ /// let raw = RawOsStr::from_str("111foo1bar111");
+ /// assert_eq!(
+ /// Some(RawOsStr::from_str("11foo1bar111")),
+ /// raw.strip_prefix("1"),
+ /// );
+ /// assert_eq!(None, raw.strip_prefix("o"));
+ /// ```
+ #[inline]
+ #[must_use]
+ pub fn strip_prefix<P>(&self, pat: P) -> Option<&Self>
+ where
+ P: Pattern,
+ {
+ let pat = pat.__encode();
+ let pat = pat.__get();
+
+ self.0.strip_prefix(pat).map(Self::from_inner)
+ }
+
+ /// Equivalent to [`str::strip_suffix`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use os_str_bytes::RawOsStr;
+ ///
+ /// let raw = RawOsStr::from_str("111foo1bar111");
+ /// assert_eq!(
+ /// Some(RawOsStr::from_str("111foo1bar11")),
+ /// raw.strip_suffix("1"),
+ /// );
+ /// assert_eq!(None, raw.strip_suffix("o"));
+ /// ```
+ #[inline]
+ #[must_use]
+ pub fn strip_suffix<P>(&self, pat: P) -> Option<&Self>
+ where
+ P: Pattern,
+ {
+ let pat = pat.__encode();
+ let pat = pat.__get();
+
+ self.0.strip_suffix(pat).map(Self::from_inner)
+ }
+
+ /// Converts this representation back to a platform-native string.
+ ///
+ /// When possible, use [`RawOsStrCow::into_os_str`] for a more efficient
+ /// conversion on some platforms.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::env;
+ /// # use std::io;
+ ///
+ /// use os_str_bytes::RawOsStr;
+ ///
+ /// let os_string = env::current_exe()?.into_os_string();
+ /// let raw = RawOsStr::new(&os_string);
+ /// assert_eq!(os_string, raw.to_os_str());
+ /// #
+ /// # Ok::<_, io::Error>(())
+ /// ```
+ #[inline]
+ #[must_use]
+ pub fn to_os_str(&self) -> Cow<'_, OsStr> {
+ expect_encoded!(imp::os_str_from_bytes(&self.0))
+ }
+
+ /// Equivalent to [`OsStr::to_str`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use os_str_bytes::RawOsStr;
+ ///
+ /// let string = "foobar";
+ /// let raw = RawOsStr::from_str(string);
+ /// assert_eq!(Some(string), raw.to_str());
+ /// ```
+ #[inline]
+ #[must_use]
+ pub fn to_str(&self) -> Option<&str> {
+ str::from_utf8(&self.0).ok()
+ }
+
+ /// Converts this string to the best UTF-8 representation possible.
+ ///
+ /// Invalid sequences will be replaced with
+ /// [`char::REPLACEMENT_CHARACTER`].
+ ///
+ /// This method may return a different result than would
+ /// [`OsStr::to_string_lossy`] when called on same string, since [`OsStr`]
+ /// uses an unspecified encoding.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::env;
+ /// # use std::io;
+ ///
+ /// use os_str_bytes::RawOsStr;
+ ///
+ /// let os_string = env::current_exe()?.into_os_string();
+ /// let raw = RawOsStr::new(&os_string);
+ /// println!("{}", raw.to_str_lossy());
+ /// #
+ /// # Ok::<_, io::Error>(())
+ /// ```
+ #[inline]
+ #[must_use]
+ pub fn to_str_lossy(&self) -> Cow<'_, str> {
+ String::from_utf8_lossy(&self.0)
+ }
+
+ fn trim_matches_raw_with<P, F>(&self, pat: &P, strip_fn: F) -> &Self
+ where
+ F: for<'a> Fn(&'a [u8], &[u8]) -> Option<&'a [u8]>,
+ P: EncodedPattern,
+ {
+ let pat = pat.__get();
+ if pat.is_empty() {
+ return self;
+ }
+
+ let mut string = &self.0;
+ while let Some(substring) = strip_fn(string, pat) {
+ string = substring;
+ }
+ Self::from_inner(string)
+ }
+
+ fn trim_end_matches_raw<P>(&self, pat: &P) -> &Self
+ where
+ P: EncodedPattern,
+ {
+ self.trim_matches_raw_with(pat, <[_]>::strip_suffix)
+ }
+
+ /// Equivalent to [`str::trim_end_matches`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use os_str_bytes::RawOsStr;
+ ///
+ /// let raw = RawOsStr::from_str("111foo1bar111");
+ /// assert_eq!("111foo1bar", raw.trim_end_matches("1"));
+ /// assert_eq!("111foo1bar111", raw.trim_end_matches("o"));
+ /// ```
+ #[inline]
+ #[must_use]
+ pub fn trim_end_matches<P>(&self, pat: P) -> &Self
+ where
+ P: Pattern,
+ {
+ self.trim_end_matches_raw(&pat.__encode())
+ }
+
+ /// Equivalent to [`str::trim_matches`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use os_str_bytes::RawOsStr;
+ ///
+ /// let raw = RawOsStr::from_str("111foo1bar111");
+ /// assert_eq!("foo1bar", raw.trim_matches("1"));
+ /// assert_eq!("111foo1bar111", raw.trim_matches("o"));
+ /// ```
+ #[inline]
+ #[must_use]
+ pub fn trim_matches<P>(&self, pat: P) -> &Self
+ where
+ P: Pattern,
+ {
+ let pat = pat.__encode();
+ self.trim_start_matches_raw(&pat).trim_end_matches_raw(&pat)
+ }
+
+ fn trim_start_matches_raw<P>(&self, pat: &P) -> &Self
+ where
+ P: EncodedPattern,
+ {
+ self.trim_matches_raw_with(pat, <[_]>::strip_prefix)
+ }
+
+ /// Equivalent to [`str::trim_start_matches`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use os_str_bytes::RawOsStr;
+ ///
+ /// let raw = RawOsStr::from_str("111foo1bar111");
+ /// assert_eq!("foo1bar111", raw.trim_start_matches("1"));
+ /// assert_eq!("111foo1bar111", raw.trim_start_matches("o"));
+ /// ```
+ #[inline]
+ #[must_use]
+ pub fn trim_start_matches<P>(&self, pat: P) -> &Self
+ where
+ P: Pattern,
+ {
+ self.trim_start_matches_raw(&pat.__encode())
+ }
+}
+
+impl AsRef<Self> for RawOsStr {
+ #[inline]
+ fn as_ref(&self) -> &Self {
+ self
+ }
+}
+
+impl AsRef<RawOsStr> for str {
+ #[inline]
+ fn as_ref(&self) -> &RawOsStr {
+ RawOsStr::from_str(self)
+ }
+}
+
+impl AsRef<RawOsStr> for String {
+ #[inline]
+ fn as_ref(&self) -> &RawOsStr {
+ (**self).as_ref()
+ }
+}
+
+impl Default for &RawOsStr {
+ #[inline]
+ fn default() -> Self {
+ RawOsStr::from_str("")
+ }
+}
+
+impl<'a> From<&'a RawOsStr> for Cow<'a, RawOsStr> {
+ #[inline]
+ fn from(value: &'a RawOsStr) -> Self {
+ Cow::Borrowed(value)
+ }
+}
+
+impl From<Box<str>> for Box<RawOsStr> {
+ #[inline]
+ fn from(value: Box<str>) -> Self {
+ value.into_boxed_bytes().transmute_box()
+ }
+}
+
+impl ToOwned for RawOsStr {
+ type Owned = RawOsString;
+
+ #[inline]
+ fn to_owned(&self) -> Self::Owned {
+ RawOsString(self.0.to_owned())
+ }
+}
+
+/// Extensions to [`Cow<RawOsStr>`] for additional conversions.
+///
+/// [`Cow<RawOsStr>`]: Cow
+#[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "raw_os_str")))]
+pub trait RawOsStrCow<'a>: private::Sealed {
+ /// Converts this representation back to a platform-native string.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::env;
+ /// # use std::io;
+ ///
+ /// use os_str_bytes::RawOsStr;
+ /// use os_str_bytes::RawOsStrCow;
+ ///
+ /// let os_string = env::current_exe()?.into_os_string();
+ /// let raw = RawOsStr::new(&os_string);
+ /// assert_eq!(os_string, raw.into_os_str());
+ /// #
+ /// # Ok::<_, io::Error>(())
+ /// ```
+ #[must_use]
+ fn into_os_str(self) -> Cow<'a, OsStr>;
+
+ /// Returns the byte string stored by this container.
+ ///
+ /// The returned string will use an [unspecified encoding].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::borrow::Cow;
+ ///
+ /// use os_str_bytes::RawOsStr;
+ /// use os_str_bytes::RawOsStrCow;
+ ///
+ /// let string = "foobar";
+ /// let raw = Cow::Borrowed(RawOsStr::from_str(string));
+ /// assert_eq!(string.as_bytes(), &*raw.into_raw_bytes());
+ /// ```
+ ///
+ /// [unspecified encoding]: super#encoding
+ #[must_use]
+ fn into_raw_bytes(self) -> Cow<'a, [u8]>;
+}
+
+impl<'a> RawOsStrCow<'a> for Cow<'a, RawOsStr> {
+ #[inline]
+ fn into_os_str(self) -> Cow<'a, OsStr> {
+ match self {
+ Cow::Borrowed(string) => string.to_os_str(),
+ Cow::Owned(string) => Cow::Owned(string.into_os_string()),
+ }
+ }
+
+ #[inline]
+ fn into_raw_bytes(self) -> Cow<'a, [u8]> {
+ match self {
+ Cow::Borrowed(string) => Cow::Borrowed(&string.0),
+ Cow::Owned(string) => Cow::Owned(string.0),
+ }
+ }
+}
+
+/// A container for owned byte strings converted by this crate.
+///
+/// For more information, see [`RawOsStr`].
+#[derive(Clone, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
+#[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "raw_os_str")))]
+pub struct RawOsString(Vec<u8>);
+
+impl RawOsString {
+ /// Converts a platform-native string into a representation that can be
+ /// more easily manipulated.
+ ///
+ /// For more information, see [`RawOsStr::new`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::env;
+ /// # use std::io;
+ ///
+ /// use os_str_bytes::RawOsString;
+ ///
+ /// let os_string = env::current_exe()?.into_os_string();
+ /// println!("{:?}", RawOsString::new(os_string));
+ /// #
+ /// # Ok::<_, io::Error>(())
+ /// ```
+ #[inline]
+ #[must_use]
+ pub fn new(string: OsString) -> Self {
+ Self(imp::os_string_into_vec(string))
+ }
+
+ /// Wraps a string, without copying or encoding conversion.
+ ///
+ /// This method is much more efficient than [`RawOsString::new`], since the
+ /// [encoding] used by this crate is compatible with UTF-8.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use os_str_bytes::RawOsString;
+ ///
+ /// let string = "foobar".to_owned();
+ /// let raw = RawOsString::from_string(string.clone());
+ /// assert_eq!(string, raw);
+ /// ```
+ ///
+ /// [encoding]: super#encoding
+ #[inline]
+ #[must_use]
+ pub fn from_string(string: String) -> Self {
+ Self(string.into_bytes())
+ }
+
+ /// Wraps a byte string, without copying or encoding conversion.
+ ///
+ /// # Panics
+ ///
+ /// Panics if the string is not valid for the [unspecified encoding] used
+ /// by this crate.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::env;
+ /// # use std::io;
+ ///
+ /// use os_str_bytes::RawOsString;
+ ///
+ /// let os_string = env::current_exe()?.into_os_string();
+ /// let raw = RawOsString::new(os_string);
+ /// let raw_bytes = raw.clone().into_raw_vec();
+ /// assert_eq!(raw, RawOsString::assert_from_raw_vec(raw_bytes));
+ /// #
+ /// # Ok::<_, io::Error>(())
+ /// ```
+ ///
+ /// [unspecified encoding]: super#encoding
+ #[inline]
+ #[must_use = "method should not be used for validation"]
+ #[track_caller]
+ pub fn assert_from_raw_vec(string: Vec<u8>) -> Self {
+ expect_encoded!(raw::validate_bytes(&string));
+
+ Self(string)
+ }
+
+ if_checked_conversions! {
+ /// Wraps a byte string, without copying or encoding conversion.
+ ///
+ /// [`assert_from_raw_vec`] should almost always be used instead. For
+ /// more information, see [`EncodingError`].
+ ///
+ /// # Errors
+ ///
+ /// See documentation for [`EncodingError`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::env;
+ /// # use std::io;
+ ///
+ /// use os_str_bytes::RawOsString;
+ ///
+ /// let os_string = env::current_exe()?.into_os_string();
+ /// let raw = RawOsString::new(os_string);
+ /// let raw_clone = raw.clone();
+ /// assert_eq!(Ok(raw), RawOsString::from_raw_vec(raw_clone.into_raw_vec()));
+ /// #
+ /// # Ok::<_, io::Error>(())
+ /// ```
+ ///
+ /// [`assert_from_raw_vec`]: Self::assert_from_raw_vec
+ #[cfg_attr(
+ os_str_bytes_docs_rs,
+ doc(cfg(feature = "checked_conversions"))
+ )]
+ #[inline]
+ pub fn from_raw_vec(string: Vec<u8>) -> Result<Self> {
+ raw::validate_bytes(&string)
+ .map(|()| Self(string))
+ .map_err(EncodingError)
+ }
+ }
+
+ /// Wraps a byte string, without copying or encoding conversion.
+ ///
+ /// # Safety
+ ///
+ /// The string must be valid for the [unspecified encoding] used by this
+ /// crate.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::env;
+ /// # use std::io;
+ ///
+ /// use os_str_bytes::RawOsString;
+ ///
+ /// let os_string = env::current_exe()?.into_os_string();
+ /// let raw = RawOsString::new(os_string);
+ /// let raw_bytes = raw.clone().into_raw_vec();
+ /// assert_eq!(raw, unsafe {
+ /// RawOsString::from_raw_vec_unchecked(raw_bytes)
+ /// });
+ /// #
+ /// # Ok::<_, io::Error>(())
+ /// ```
+ ///
+ /// [unspecified encoding]: super#encoding
+ #[inline]
+ #[must_use]
+ #[track_caller]
+ pub unsafe fn from_raw_vec_unchecked(string: Vec<u8>) -> Self {
+ if cfg!(debug_assertions) {
+ expect_encoded!(raw::validate_bytes(&string));
+ }
+
+ Self(string)
+ }
+
+ /// Equivalent to [`String::clear`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::env;
+ /// # use std::io;
+ ///
+ /// use os_str_bytes::RawOsString;
+ ///
+ /// let os_string = env::current_exe()?.into_os_string();
+ /// let mut raw = RawOsString::new(os_string);
+ /// raw.clear();
+ /// assert!(raw.is_empty());
+ /// #
+ /// # Ok::<_, io::Error>(())
+ /// ```
+ #[inline]
+ pub fn clear(&mut self) {
+ self.0.clear();
+ }
+
+ /// Equivalent to [`String::into_boxed_str`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use os_str_bytes::RawOsString;
+ ///
+ /// let string = "foobar".to_owned();
+ /// let raw = RawOsString::from_string(string.clone());
+ /// assert_eq!(string, *raw.into_box());
+ /// ```
+ #[inline]
+ #[must_use]
+ pub fn into_box(self) -> Box<RawOsStr> {
+ self.0.into_boxed_slice().transmute_box()
+ }
+
+ /// Converts this representation back to a platform-native string.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::env;
+ /// # use std::io;
+ ///
+ /// use os_str_bytes::RawOsString;
+ ///
+ /// let os_string = env::current_exe()?.into_os_string();
+ /// let raw = RawOsString::new(os_string.clone());
+ /// assert_eq!(os_string, raw.into_os_string());
+ /// #
+ /// # Ok::<_, io::Error>(())
+ /// ```
+ #[inline]
+ #[must_use]
+ pub fn into_os_string(self) -> OsString {
+ expect_encoded!(imp::os_string_from_vec(self.0))
+ }
+
+ /// Returns the byte string stored by this container.
+ ///
+ /// The returned string will use an [unspecified encoding].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use os_str_bytes::RawOsString;
+ ///
+ /// let string = "foobar".to_owned();
+ /// let raw = RawOsString::from_string(string.clone());
+ /// assert_eq!(string.into_bytes(), raw.into_raw_vec());
+ /// ```
+ ///
+ /// [unspecified encoding]: super#encoding
+ #[inline]
+ #[must_use]
+ pub fn into_raw_vec(self) -> Vec<u8> {
+ self.0
+ }
+
+ /// Equivalent to [`OsString::into_string`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use os_str_bytes::RawOsString;
+ ///
+ /// let string = "foobar".to_owned();
+ /// let raw = RawOsString::from_string(string.clone());
+ /// assert_eq!(Ok(string), raw.into_string());
+ /// ```
+ #[inline]
+ pub fn into_string(self) -> result::Result<String, Self> {
+ String::from_utf8(self.0).map_err(|x| Self(x.into_bytes()))
+ }
+
+ /// Equivalent to [`String::shrink_to_fit`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use os_str_bytes::RawOsString;
+ ///
+ /// let string = "foobar".to_owned();
+ /// let mut raw = RawOsString::from_string(string.clone());
+ /// raw.shrink_to_fit();
+ /// assert_eq!(string, raw);
+ /// ```
+ #[inline]
+ pub fn shrink_to_fit(&mut self) {
+ self.0.shrink_to_fit();
+ }
+
+ /// Equivalent to [`String::split_off`].
+ ///
+ /// # Panics
+ ///
+ /// Panics if the index is not a [valid boundary].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use os_str_bytes::RawOsString;
+ ///
+ /// let mut raw = RawOsString::from_string("foobar".to_owned());
+ /// assert_eq!("bar", raw.split_off(3));
+ /// assert_eq!("foo", raw);
+ /// ```
+ ///
+ /// [valid boundary]: RawOsStr#indices
+ #[inline]
+ #[must_use]
+ #[track_caller]
+ pub fn split_off(&mut self, at: usize) -> Self {
+ self.check_bound(at);
+
+ Self(self.0.split_off(at))
+ }
+
+ /// Equivalent to [`String::truncate`].
+ ///
+ /// # Panics
+ ///
+ /// Panics if the index is not a [valid boundary].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use os_str_bytes::RawOsString;
+ ///
+ /// let mut raw = RawOsString::from_string("foobar".to_owned());
+ /// raw.truncate(3);
+ /// assert_eq!("foo", raw);
+ /// ```
+ ///
+ /// [valid boundary]: RawOsStr#indices
+ #[inline]
+ #[track_caller]
+ pub fn truncate(&mut self, new_len: usize) {
+ self.check_bound(new_len);
+
+ self.0.truncate(new_len);
+ }
+}
+
+impl AsRef<RawOsStr> for RawOsString {
+ #[inline]
+ fn as_ref(&self) -> &RawOsStr {
+ self
+ }
+}
+
+impl Borrow<RawOsStr> for RawOsString {
+ #[inline]
+ fn borrow(&self) -> &RawOsStr {
+ self
+ }
+}
+
+impl Deref for RawOsString {
+ type Target = RawOsStr;
+
+ #[inline]
+ fn deref(&self) -> &Self::Target {
+ RawOsStr::from_inner(&self.0)
+ }
+}
+
+impl From<RawOsString> for Box<RawOsStr> {
+ #[inline]
+ fn from(value: RawOsString) -> Self {
+ value.into_box()
+ }
+}
+
+impl From<Box<RawOsStr>> for RawOsString {
+ #[inline]
+ fn from(value: Box<RawOsStr>) -> Self {
+ Self(value.transmute_box::<[_]>().into_vec())
+ }
+}
+
+impl From<RawOsString> for Cow<'_, RawOsStr> {
+ #[inline]
+ fn from(value: RawOsString) -> Self {
+ Cow::Owned(value)
+ }
+}
+
+impl From<String> for RawOsString {
+ #[inline]
+ fn from(value: String) -> Self {
+ Self::from_string(value)
+ }
+}
+
+struct DebugBuffer<'a>(&'a [u8]);
+
+impl Debug for DebugBuffer<'_> {
+ fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+ f.write_str("\"")?;
+
+ let mut string = self.0;
+ let mut invalid_length = 0;
+ while !string.is_empty() {
+ let (invalid, substring) = string.split_at(invalid_length);
+
+ let valid = match str::from_utf8(substring) {
+ Ok(valid) => {
+ string = &[];
+ valid
+ }
+ Err(error) => {
+ let (valid, substring) =
+ substring.split_at(error.valid_up_to());
+
+ let invalid_char_length =
+ error.error_len().unwrap_or_else(|| substring.len());
+ if valid.is_empty() {
+ invalid_length += invalid_char_length;
+ continue;
+ }
+ string = substring;
+ invalid_length = invalid_char_length;
+
+ // SAFETY: This slice was validated to be UTF-8.
+ unsafe { str::from_utf8_unchecked(valid) }
+ }
+ };
+
+ raw::debug(invalid, f)?;
+ Display::fmt(&valid.escape_debug(), f)?;
+ }
+
+ f.write_str("\"")
+ }
+}
+
+macro_rules! r#impl {
+ ( $type:ty ) => {
+ impl Debug for $type {
+ #[inline]
+ fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+ f.debug_tuple(stringify!($type))
+ .field(&DebugBuffer(&self.0))
+ .finish()
+ }
+ }
+ };
+}
+r#impl!(RawOsStr);
+r#impl!(RawOsString);
+
+macro_rules! r#impl {
+ ( $index_type:ty $(, $index_var:ident , $($bound:expr),+)? ) => {
+ impl Index<$index_type> for RawOsStr {
+ type Output = Self;
+
+ #[inline]
+ fn index(&self, idx: $index_type) -> &Self::Output {
+ $(
+ let $index_var = &idx;
+ $(self.check_bound($bound);)+
+ )?
+
+ Self::from_inner(&self.0[idx])
+ }
+ }
+
+ impl Index<$index_type> for RawOsString {
+ type Output = RawOsStr;
+
+ #[allow(clippy::indexing_slicing)]
+ #[inline]
+ fn index(&self, idx: $index_type) -> &Self::Output {
+ &(**self)[idx]
+ }
+ }
+ };
+}
+r#impl!(Range<usize>, x, x.start, x.end);
+r#impl!(RangeFrom<usize>, x, x.start);
+r#impl!(RangeFull);
+// [usize::MAX] will always be a valid inclusive end index.
+#[rustfmt::skip]
+r#impl!(RangeInclusive<usize>, x, *x.start(), x.end().wrapping_add(1));
+r#impl!(RangeTo<usize>, x, x.end);
+r#impl!(RangeToInclusive<usize>, x, x.end.wrapping_add(1));
+
+macro_rules! r#impl {
+ ( $type:ty , $other_type:ty ) => {
+ impl PartialEq<$other_type> for $type {
+ #[inline]
+ fn eq(&self, other: &$other_type) -> bool {
+ let raw: &RawOsStr = self;
+ let other: &RawOsStr = other.as_ref();
+ raw == other
+ }
+ }
+
+ impl PartialEq<$type> for $other_type {
+ #[inline]
+ fn eq(&self, other: &$type) -> bool {
+ other == self
+ }
+ }
+ };
+}
+r#impl!(RawOsStr, RawOsString);
+r#impl!(&RawOsStr, RawOsString);
+r#impl!(RawOsStr, str);
+r#impl!(RawOsStr, String);
+r#impl!(&RawOsStr, String);
+r#impl!(RawOsString, str);
+r#impl!(RawOsString, &str);
+r#impl!(RawOsString, String);
+
+#[cfg(feature = "print_bytes")]
+#[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "print_bytes")))]
+mod print_bytes {
+ use print_bytes::ByteStr;
+ use print_bytes::ToBytes;
+ #[cfg(windows)]
+ use print_bytes::WideStr;
+
+ #[cfg(windows)]
+ use crate::imp::raw;
+
+ use super::RawOsStr;
+ use super::RawOsString;
+
+ impl ToBytes for RawOsStr {
+ #[inline]
+ fn to_bytes(&self) -> ByteStr<'_> {
+ self.0.to_bytes()
+ }
+
+ #[cfg(windows)]
+ #[inline]
+ fn to_wide(&self) -> Option<WideStr> {
+ Some(WideStr::new(raw::encode_wide_unchecked(&self.0).collect()))
+ }
+ }
+
+ impl ToBytes for RawOsString {
+ #[inline]
+ fn to_bytes(&self) -> ByteStr<'_> {
+ (**self).to_bytes()
+ }
+
+ #[cfg(windows)]
+ #[inline]
+ fn to_wide(&self) -> Option<WideStr> {
+ (**self).to_wide()
+ }
+ }
+}
+
+#[cfg(feature = "uniquote")]
+#[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "uniquote")))]
+mod uniquote {
+ use uniquote::Formatter;
+ use uniquote::Quote;
+ use uniquote::Result;
+
+ use crate::imp::raw;
+
+ use super::RawOsStr;
+ use super::RawOsString;
+
+ impl Quote for RawOsStr {
+ #[inline]
+ fn escape(&self, f: &mut Formatter<'_>) -> Result {
+ raw::uniquote::escape(&self.0, f)
+ }
+ }
+
+ impl Quote for RawOsString {
+ #[inline]
+ fn escape(&self, f: &mut Formatter<'_>) -> Result {
+ (**self).escape(f)
+ }
+ }
+}
diff --git a/third_party/rust/os_str_bytes/src/util.rs b/third_party/rust/os_str_bytes/src/util.rs
new file mode 100644
index 0000000000..f931969c52
--- /dev/null
+++ b/third_party/rust/os_str_bytes/src/util.rs
@@ -0,0 +1,9 @@
+pub(super) const BYTE_SHIFT: u8 = 6;
+
+pub(super) const CONT_MASK: u8 = (1 << BYTE_SHIFT) - 1;
+
+pub(super) const CONT_TAG: u8 = 0b1000_0000;
+
+pub(super) const fn is_continuation(byte: u8) -> bool {
+ byte & !CONT_MASK == CONT_TAG
+}
diff --git a/third_party/rust/os_str_bytes/src/wasm/mod.rs b/third_party/rust/os_str_bytes/src/wasm/mod.rs
new file mode 100644
index 0000000000..a8a2996018
--- /dev/null
+++ b/third_party/rust/os_str_bytes/src/wasm/mod.rs
@@ -0,0 +1,58 @@
+use std::borrow::Cow;
+use std::error::Error;
+use std::ffi::OsStr;
+use std::ffi::OsString;
+use std::fmt;
+use std::fmt::Display;
+use std::fmt::Formatter;
+use std::result;
+use std::str;
+use std::str::Utf8Error;
+
+if_raw_str! {
+ pub(super) mod raw;
+}
+
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub(super) struct EncodingError(Utf8Error);
+
+impl Display for EncodingError {
+ fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+ write!(f, "os_str_bytes: {}", self.0)
+ }
+}
+
+impl Error for EncodingError {}
+
+type Result<T> = result::Result<T, EncodingError>;
+
+macro_rules! expect_utf8 {
+ ( $result:expr ) => {
+ $result.expect(
+ "platform string contains invalid UTF-8, which should not be \
+ possible",
+ )
+ };
+}
+
+fn from_bytes(string: &[u8]) -> Result<&str> {
+ str::from_utf8(string).map_err(EncodingError)
+}
+
+pub(super) fn os_str_from_bytes(string: &[u8]) -> Result<Cow<'_, OsStr>> {
+ from_bytes(string).map(|x| Cow::Borrowed(OsStr::new(x)))
+}
+
+pub(super) fn os_str_to_bytes(os_string: &OsStr) -> Cow<'_, [u8]> {
+ Cow::Borrowed(expect_utf8!(os_string.to_str()).as_bytes())
+}
+
+pub(super) fn os_string_from_vec(string: Vec<u8>) -> Result<OsString> {
+ String::from_utf8(string)
+ .map(Into::into)
+ .map_err(|x| EncodingError(x.utf8_error()))
+}
+
+pub(super) fn os_string_into_vec(os_string: OsString) -> Vec<u8> {
+ expect_utf8!(os_string.into_string()).into_bytes()
+}
diff --git a/third_party/rust/os_str_bytes/src/wasm/raw.rs b/third_party/rust/os_str_bytes/src/wasm/raw.rs
new file mode 100644
index 0000000000..fb291a65fa
--- /dev/null
+++ b/third_party/rust/os_str_bytes/src/wasm/raw.rs
@@ -0,0 +1,34 @@
+use std::fmt;
+use std::fmt::Formatter;
+use std::str;
+
+pub(crate) use crate::util::is_continuation;
+
+use super::Result;
+
+#[allow(dead_code)]
+#[path = "../common/raw.rs"]
+mod common_raw;
+pub(crate) use common_raw::ends_with;
+pub(crate) use common_raw::starts_with;
+#[cfg(feature = "uniquote")]
+pub(crate) use common_raw::uniquote;
+
+pub(crate) fn validate_bytes(string: &[u8]) -> Result<()> {
+ super::from_bytes(string).map(drop)
+}
+
+pub(crate) fn decode_code_point(string: &[u8]) -> u32 {
+ let string = expect_encoded!(str::from_utf8(string));
+ let mut chars = string.chars();
+ let ch = chars
+ .next()
+ .expect("cannot parse code point from empty string");
+ assert_eq!(None, chars.next(), "multiple code points found");
+ ch.into()
+}
+
+pub(crate) fn debug(string: &[u8], _: &mut Formatter<'_>) -> fmt::Result {
+ assert!(string.is_empty());
+ Ok(())
+}
diff --git a/third_party/rust/os_str_bytes/src/windows/mod.rs b/third_party/rust/os_str_bytes/src/windows/mod.rs
new file mode 100644
index 0000000000..ed9e60b050
--- /dev/null
+++ b/third_party/rust/os_str_bytes/src/windows/mod.rs
@@ -0,0 +1,113 @@
+// These functions are necessarily inefficient, because they must revert
+// encoding conversions performed by the standard library. However, there is
+// currently no better alternative.
+
+use std::borrow::Cow;
+use std::error::Error;
+use std::ffi::OsStr;
+use std::ffi::OsString;
+use std::fmt;
+use std::fmt::Display;
+use std::fmt::Formatter;
+use std::ops::Not;
+use std::os::windows::ffi::OsStrExt;
+use std::os::windows::ffi::OsStringExt;
+use std::result;
+use std::str;
+
+if_raw_str! {
+ pub(super) mod raw;
+}
+
+mod wtf8;
+use wtf8::DecodeWide;
+
+#[cfg(test)]
+mod tests;
+
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub(super) enum EncodingError {
+ Byte(u8),
+ CodePoint(u32),
+ End(),
+}
+
+impl EncodingError {
+ fn position(&self) -> Cow<'_, str> {
+ match self {
+ Self::Byte(byte) => Cow::Owned(format!("byte b'\\x{:02X}'", byte)),
+ Self::CodePoint(code_point) => {
+ Cow::Owned(format!("code point U+{:04X}", code_point))
+ }
+ Self::End() => Cow::Borrowed("end of string"),
+ }
+ }
+}
+
+impl Display for EncodingError {
+ fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+ write!(
+ f,
+ "byte sequence is not representable in the platform encoding; \
+ error at {}",
+ self.position(),
+ )
+ }
+}
+
+impl Error for EncodingError {}
+
+type Result<T> = result::Result<T, EncodingError>;
+
+fn from_bytes(string: &[u8]) -> Result<Option<OsString>> {
+ let mut encoder = wtf8::encode_wide(string);
+
+ // Collecting an iterator into a result ignores the size hint:
+ // https://github.com/rust-lang/rust/issues/48994
+ let mut encoded_string = Vec::with_capacity(encoder.size_hint().0);
+ for wchar in &mut encoder {
+ encoded_string.push(wchar?);
+ }
+
+ debug_assert_eq!(str::from_utf8(string).is_ok(), encoder.is_still_utf8());
+ Ok(encoder
+ .is_still_utf8()
+ .not()
+ .then(|| OsStringExt::from_wide(&encoded_string)))
+}
+
+fn to_bytes(os_string: &OsStr) -> Vec<u8> {
+ let encoder = OsStrExt::encode_wide(os_string);
+
+ let mut string = Vec::with_capacity(encoder.size_hint().0);
+ string.extend(DecodeWide::new(encoder));
+ string
+}
+
+pub(super) fn os_str_from_bytes(string: &[u8]) -> Result<Cow<'_, OsStr>> {
+ from_bytes(string).map(|os_string| {
+ os_string.map(Cow::Owned).unwrap_or_else(|| {
+ // SAFETY: This slice was validated to be UTF-8.
+ Cow::Borrowed(OsStr::new(unsafe {
+ str::from_utf8_unchecked(string)
+ }))
+ })
+ })
+}
+
+pub(super) fn os_str_to_bytes(os_string: &OsStr) -> Cow<'_, [u8]> {
+ Cow::Owned(to_bytes(os_string))
+}
+
+pub(super) fn os_string_from_vec(string: Vec<u8>) -> Result<OsString> {
+ from_bytes(&string).map(|os_string| {
+ os_string.unwrap_or_else(|| {
+ // SAFETY: This slice was validated to be UTF-8.
+ unsafe { String::from_utf8_unchecked(string) }.into()
+ })
+ })
+}
+
+pub(super) fn os_string_into_vec(os_string: OsString) -> Vec<u8> {
+ to_bytes(&os_string)
+}
diff --git a/third_party/rust/os_str_bytes/src/windows/raw.rs b/third_party/rust/os_str_bytes/src/windows/raw.rs
new file mode 100644
index 0000000000..80953dea79
--- /dev/null
+++ b/third_party/rust/os_str_bytes/src/windows/raw.rs
@@ -0,0 +1,46 @@
+use std::fmt;
+use std::fmt::Formatter;
+
+pub(crate) use crate::util::is_continuation;
+
+use super::wtf8;
+pub(crate) use super::wtf8::ends_with;
+pub(crate) use super::wtf8::starts_with;
+use super::wtf8::CodePoints;
+use super::Result;
+
+pub(crate) fn validate_bytes(string: &[u8]) -> Result<()> {
+ wtf8::encode_wide(string).try_for_each(|x| x.map(drop))
+}
+
+pub(crate) fn encode_wide_unchecked(
+ string: &[u8],
+) -> impl '_ + Iterator<Item = u16> {
+ wtf8::encode_wide(string).map(|x| expect_encoded!(x))
+}
+
+pub(crate) fn decode_code_point(string: &[u8]) -> u32 {
+ let mut code_points = CodePoints::new(string.iter().copied());
+ let code_point = expect_encoded!(code_points
+ .next()
+ .expect("cannot parse code point from empty string"));
+ assert_eq!(None, code_points.next(), "multiple code points found");
+ code_point
+}
+
+pub(crate) fn debug(string: &[u8], f: &mut Formatter<'_>) -> fmt::Result {
+ for wchar in encode_wide_unchecked(string) {
+ write!(f, "\\u{{{:X}}}", wchar)?;
+ }
+ Ok(())
+}
+
+#[cfg(feature = "uniquote")]
+pub(crate) mod uniquote {
+ use uniquote::Formatter;
+ use uniquote::Result;
+
+ pub(crate) fn escape(string: &[u8], f: &mut Formatter<'_>) -> Result {
+ f.escape_utf16(super::encode_wide_unchecked(string))
+ }
+}
diff --git a/third_party/rust/os_str_bytes/src/windows/wtf8/code_points.rs b/third_party/rust/os_str_bytes/src/windows/wtf8/code_points.rs
new file mode 100644
index 0000000000..9800d781fc
--- /dev/null
+++ b/third_party/rust/os_str_bytes/src/windows/wtf8/code_points.rs
@@ -0,0 +1,129 @@
+use std::iter::FusedIterator;
+use std::iter::Peekable;
+use std::mem;
+
+use crate::util::is_continuation;
+use crate::util::BYTE_SHIFT;
+use crate::util::CONT_MASK;
+
+use super::EncodingError;
+use super::Result;
+
+pub(in super::super) struct CodePoints<I>
+where
+ I: Iterator<Item = u8>,
+{
+ iter: Peekable<I>,
+ surrogate: bool,
+ still_utf8: bool,
+}
+
+impl<I> CodePoints<I>
+where
+ I: Iterator<Item = u8>,
+{
+ pub(in super::super) fn new<S>(string: S) -> Self
+ where
+ S: IntoIterator<IntoIter = I>,
+ {
+ Self {
+ iter: string.into_iter().peekable(),
+ surrogate: false,
+ still_utf8: true,
+ }
+ }
+
+ pub(super) fn is_still_utf8(&self) -> bool {
+ self.still_utf8
+ }
+
+ fn consume_next(&mut self, code_point: &mut u32) -> Result<()> {
+ let &byte = self.iter.peek().ok_or(EncodingError::End())?;
+
+ if !is_continuation(byte) {
+ self.surrogate = false;
+ // Not consuming this byte will be useful if this crate ever offers
+ // a way to encode lossily.
+ return Err(EncodingError::Byte(byte));
+ }
+ *code_point =
+ (*code_point << BYTE_SHIFT) | u32::from(byte & CONT_MASK);
+
+ let removed = self.iter.next();
+ debug_assert_eq!(Some(byte), removed);
+
+ Ok(())
+ }
+
+ pub(super) fn inner_size_hint(&self) -> (usize, Option<usize>) {
+ self.iter.size_hint()
+ }
+}
+
+impl<I> FusedIterator for CodePoints<I> where
+ I: FusedIterator + Iterator<Item = u8>
+{
+}
+
+impl<I> Iterator for CodePoints<I>
+where
+ I: Iterator<Item = u8>,
+{
+ type Item = Result<u32>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ let byte = self.iter.next()?;
+ let mut code_point: u32 = byte.into();
+
+ macro_rules! consume_next {
+ () => {{
+ if let Err(error) = self.consume_next(&mut code_point) {
+ return Some(Err(error));
+ }
+ }};
+ }
+
+ let prev_surrogate = mem::replace(&mut self.surrogate, false);
+
+ let mut invalid = false;
+ if !byte.is_ascii() {
+ if byte < 0xC2 {
+ return Some(Err(EncodingError::Byte(byte)));
+ }
+
+ if byte < 0xE0 {
+ code_point &= 0x1F;
+ } else {
+ code_point &= 0x0F;
+ consume_next!();
+
+ if byte >= 0xF0 {
+ if code_point.wrapping_sub(0x10) >= 0x100 {
+ invalid = true;
+ }
+ consume_next!();
+
+ // This condition is optimized to detect surrogate code points.
+ } else if code_point & 0xFE0 == 0x360 {
+ self.still_utf8 = false;
+ if code_point & 0x10 == 0 {
+ self.surrogate = true;
+ } else if prev_surrogate {
+ // Decoding a broken surrogate pair would be lossy.
+ invalid = true;
+ }
+ }
+
+ if code_point < 0x20 {
+ invalid = true;
+ }
+ }
+ consume_next!();
+ }
+ if invalid {
+ return Some(Err(EncodingError::CodePoint(code_point)));
+ }
+
+ Some(Ok(code_point))
+ }
+}
diff --git a/third_party/rust/os_str_bytes/src/windows/wtf8/convert.rs b/third_party/rust/os_str_bytes/src/windows/wtf8/convert.rs
new file mode 100644
index 0000000000..70a8a9f58c
--- /dev/null
+++ b/third_party/rust/os_str_bytes/src/windows/wtf8/convert.rs
@@ -0,0 +1,181 @@
+use std::char;
+use std::char::DecodeUtf16;
+use std::iter::FusedIterator;
+use std::num::NonZeroU16;
+
+use crate::util::BYTE_SHIFT;
+use crate::util::CONT_MASK;
+use crate::util::CONT_TAG;
+
+use super::CodePoints;
+use super::Result;
+
+const MIN_HIGH_SURROGATE: u16 = 0xD800;
+
+const MIN_LOW_SURROGATE: u16 = 0xDC00;
+
+const MIN_SURROGATE_CODE: u32 = (u16::MAX as u32) + 1;
+
+macro_rules! static_assert {
+ ( $condition:expr ) => {
+ const _: () = assert!($condition, "static assertion failed");
+ };
+}
+
+pub(in super::super) struct DecodeWide<I>
+where
+ I: Iterator<Item = u16>,
+{
+ iter: DecodeUtf16<I>,
+ code_point: u32,
+ shifts: u8,
+}
+
+impl<I> DecodeWide<I>
+where
+ I: Iterator<Item = u16>,
+{
+ pub(in super::super) fn new<S>(string: S) -> Self
+ where
+ S: IntoIterator<IntoIter = I, Item = I::Item>,
+ {
+ Self {
+ iter: char::decode_utf16(string),
+ code_point: 0,
+ shifts: 0,
+ }
+ }
+
+ #[inline(always)]
+ fn get_raw_byte(&self) -> u8 {
+ (self.code_point >> (self.shifts * BYTE_SHIFT)) as u8
+ }
+}
+
+impl<I> Iterator for DecodeWide<I>
+where
+ I: Iterator<Item = u16>,
+{
+ type Item = u8;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ if let Some(shifts) = self.shifts.checked_sub(1) {
+ self.shifts = shifts;
+ return Some((self.get_raw_byte() & CONT_MASK) | CONT_TAG);
+ }
+
+ self.code_point = self
+ .iter
+ .next()?
+ .map(Into::into)
+ .unwrap_or_else(|x| x.unpaired_surrogate().into());
+
+ macro_rules! decode {
+ ( $tag:expr ) => {
+ Some(self.get_raw_byte() | $tag)
+ };
+ }
+ macro_rules! try_decode {
+ ( $tag:expr , $upper_bound:expr ) => {
+ if self.code_point < $upper_bound {
+ return decode!($tag);
+ }
+ self.shifts += 1;
+ };
+ }
+ try_decode!(0, 0x80);
+ try_decode!(0xC0, 0x800);
+ try_decode!(0xE0, MIN_SURROGATE_CODE);
+ decode!(0xF0)
+ }
+
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ let (low, high) = self.iter.size_hint();
+ let shifts = self.shifts.into();
+ (
+ low.saturating_add(shifts),
+ high.and_then(|x| x.checked_mul(4))
+ .and_then(|x| x.checked_add(shifts)),
+ )
+ }
+}
+
+pub(in super::super) struct EncodeWide<I>
+where
+ I: Iterator<Item = u8>,
+{
+ iter: CodePoints<I>,
+ surrogate: Option<NonZeroU16>,
+}
+
+impl<I> EncodeWide<I>
+where
+ I: Iterator<Item = u8>,
+{
+ fn new<S>(string: S) -> Self
+ where
+ S: IntoIterator<IntoIter = I>,
+ {
+ Self {
+ iter: CodePoints::new(string),
+ surrogate: None,
+ }
+ }
+
+ pub(in super::super) fn is_still_utf8(&self) -> bool {
+ self.iter.is_still_utf8()
+ }
+}
+
+impl<I> FusedIterator for EncodeWide<I> where
+ I: FusedIterator + Iterator<Item = u8>
+{
+}
+
+impl<I> Iterator for EncodeWide<I>
+where
+ I: Iterator<Item = u8>,
+{
+ type Item = Result<u16>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ if let Some(surrogate) = self.surrogate.take() {
+ return Some(Ok(surrogate.get()));
+ }
+
+ self.iter.next().map(|code_point| {
+ code_point.map(|code_point| {
+ code_point
+ .checked_sub(MIN_SURROGATE_CODE)
+ .map(|offset| {
+ static_assert!(MIN_LOW_SURROGATE != 0);
+
+ // SAFETY: The above static assertion guarantees that
+ // this value will not be zero.
+ self.surrogate = Some(unsafe {
+ NonZeroU16::new_unchecked(
+ (offset & 0x3FF) as u16 | MIN_LOW_SURROGATE,
+ )
+ });
+ (offset >> 10) as u16 | MIN_HIGH_SURROGATE
+ })
+ .unwrap_or(code_point as u16)
+ })
+ })
+ }
+
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ let (low, high) = self.iter.inner_size_hint();
+ let additional = self.surrogate.is_some().into();
+ (
+ (low.saturating_add(2) / 3).saturating_add(additional),
+ high.and_then(|x| x.checked_add(additional)),
+ )
+ }
+}
+
+pub(in super::super) fn encode_wide(
+ string: &[u8],
+) -> EncodeWide<impl '_ + Iterator<Item = u8>> {
+ EncodeWide::new(string.iter().copied())
+}
diff --git a/third_party/rust/os_str_bytes/src/windows/wtf8/mod.rs b/third_party/rust/os_str_bytes/src/windows/wtf8/mod.rs
new file mode 100644
index 0000000000..d8b0dc4a7f
--- /dev/null
+++ b/third_party/rust/os_str_bytes/src/windows/wtf8/mod.rs
@@ -0,0 +1,18 @@
+// This module implements the WTF-8 encoding specification:
+// https://simonsapin.github.io/wtf-8/
+
+use super::EncodingError;
+use super::Result;
+
+mod code_points;
+pub(super) use code_points::CodePoints;
+
+mod convert;
+pub(super) use convert::encode_wide;
+pub(super) use convert::DecodeWide;
+
+if_raw_str! {
+ mod string;
+ pub(crate) use string::ends_with;
+ pub(crate) use string::starts_with;
+}
diff --git a/third_party/rust/os_str_bytes/src/windows/wtf8/string.rs b/third_party/rust/os_str_bytes/src/windows/wtf8/string.rs
new file mode 100644
index 0000000000..b3523a2eff
--- /dev/null
+++ b/third_party/rust/os_str_bytes/src/windows/wtf8/string.rs
@@ -0,0 +1,67 @@
+use crate::util;
+
+const SURROGATE_LENGTH: usize = 3;
+
+pub(crate) fn ends_with(string: &[u8], mut suffix: &[u8]) -> bool {
+ let index = if let Some(index) = string.len().checked_sub(suffix.len()) {
+ index
+ } else {
+ return false;
+ };
+ if let Some(&byte) = string.get(index) {
+ if util::is_continuation(byte) {
+ let index = expect_encoded!(index.checked_sub(1));
+ let mut wide_surrogate =
+ if let Some(surrogate) = suffix.get(..SURROGATE_LENGTH) {
+ super::encode_wide(surrogate)
+ } else {
+ return false;
+ };
+ let surrogate_wchar = wide_surrogate
+ .next()
+ .expect("failed decoding non-empty suffix");
+
+ if wide_surrogate.next().is_some()
+ || super::encode_wide(&string[index..])
+ .take_while(Result::is_ok)
+ .nth(1)
+ != Some(surrogate_wchar)
+ {
+ return false;
+ }
+ suffix = &suffix[SURROGATE_LENGTH..];
+ }
+ }
+ string.ends_with(suffix)
+}
+
+pub(crate) fn starts_with(string: &[u8], mut prefix: &[u8]) -> bool {
+ if let Some(&byte) = string.get(prefix.len()) {
+ if util::is_continuation(byte) {
+ let index = if let Some(index) =
+ prefix.len().checked_sub(SURROGATE_LENGTH)
+ {
+ index
+ } else {
+ return false;
+ };
+ let (substring, surrogate) = prefix.split_at(index);
+ let mut wide_surrogate = super::encode_wide(surrogate);
+ let surrogate_wchar = wide_surrogate
+ .next()
+ .expect("failed decoding non-empty prefix");
+
+ if surrogate_wchar.is_err()
+ || wide_surrogate.next().is_some()
+ || super::encode_wide(&string[index..])
+ .next()
+ .expect("failed decoding non-empty substring")
+ != surrogate_wchar
+ {
+ return false;
+ }
+ prefix = substring;
+ }
+ }
+ string.starts_with(prefix)
+}