summaryrefslogtreecommitdiffstats
path: root/third_party/rust/os_str_bytes/src/lib.rs
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/os_str_bytes/src/lib.rs')
-rw-r--r--third_party/rust/os_str_bytes/src/lib.rs623
1 files changed, 623 insertions, 0 deletions
diff --git a/third_party/rust/os_str_bytes/src/lib.rs b/third_party/rust/os_str_bytes/src/lib.rs
new file mode 100644
index 0000000000..40154c99be
--- /dev/null
+++ b/third_party/rust/os_str_bytes/src/lib.rs
@@ -0,0 +1,623 @@
+//! This crate allows interacting with the data stored by [`OsStr`] and
+//! [`OsString`], without resorting to panics or corruption for invalid UTF-8.
+//! Thus, methods can be used that are already defined on [`[u8]`][slice] and
+//! [`Vec<u8>`].
+//!
+//! Typically, the only way to losslessly construct [`OsStr`] or [`OsString`]
+//! from a byte sequence is to use `OsStr::new(str::from_utf8(bytes)?)`, which
+//! requires the bytes to be valid in UTF-8. However, since this crate makes
+//! conversions directly between the platform encoding and raw bytes, even some
+//! strings invalid in UTF-8 can be converted.
+//!
+//! In most cases, [`RawOsStr`] and [`RawOsString`] should be used.
+//! [`OsStrBytes`] and [`OsStringBytes`] provide lower-level APIs that are
+//! easier to misuse.
+//!
+//! # Encoding
+//!
+//! The encoding of bytes returned or accepted by methods of this crate is
+//! intentionally left unspecified. It may vary for different platforms, so
+//! defining it would run contrary to the goal of generic string handling.
+//! However, the following invariants will always be upheld:
+//!
+//! - The encoding will be compatible with UTF-8. In particular, splitting an
+//! encoded byte sequence by a UTF-8&ndash;encoded character always produces
+//! other valid byte sequences. They can be re-encoded without error using
+//! [`RawOsString::into_os_string`] and similar methods.
+//!
+//! - All characters valid in platform strings are representable. [`OsStr`] and
+//! [`OsString`] can always be losslessly reconstructed from extracted bytes.
+//!
+//! Note that the chosen encoding may not match how Rust stores these strings
+//! internally, which is undocumented. For instance, the result of calling
+//! [`OsStr::len`] will not necessarily match the number of bytes this crate
+//! uses to represent the same string.
+//!
+//! Additionally, concatenation may yield unexpected results without a UTF-8
+//! separator. If two platform strings need to be concatenated, the only safe
+//! way to do so is using [`OsString::push`]. This limitation also makes it
+//! undesirable to use the bytes in interchange.
+//!
+//! Since this encoding can change between versions and platforms, it should
+//! not be used for storage. The standard library provides implementations of
+//! [`OsStrExt`] and [`OsStringExt`] for various platforms, which should be
+//! preferred for that use case.
+//!
+//! # User Input
+//!
+//! Traits in this crate should ideally not be used to convert byte sequences
+//! that did not originate from [`OsStr`] or a related struct. The encoding
+//! used by this crate is an implementation detail, so it does not make sense
+//! to expose it to users.
+//!
+//! Crate [bstr] offers some useful alternative methods, such as
+//! [`ByteSlice::to_os_str`] and [`ByteVec::into_os_string`], that are meant
+//! for user input. But, they reject some byte sequences used to represent
+//! valid platform strings, which would be undesirable for reliable path
+//! handling. They are best used only when accepting unknown input.
+//!
+//! This crate is meant to help when you already have an instance of [`OsStr`]
+//! and need to modify the data in a lossless way.
+//!
+//! # Features
+//!
+//! These features are optional and can be enabled or disabled in a
+//! "Cargo.toml" file.
+//!
+//! ### Default Features
+//!
+//! - **memchr** -
+//! Changes the implementation to use crate [memchr] for better performance.
+//! This feature is useless when "raw\_os\_str" is disabled.
+//!
+//! For more information, see [`RawOsStr`][memchr complexity].
+//!
+//! - **raw\_os\_str** -
+//! Provides:
+//! - [`iter`]
+//! - [`Pattern`]
+//! - [`RawOsStr`]
+//! - [`RawOsStrCow`]
+//! - [`RawOsString`]
+//!
+//! ### Optional Features
+//!
+//! - **checked\_conversions** -
+//! Provides:
+//! - [`EncodingError`]
+//! - [`OsStrBytes::from_raw_bytes`]
+//! - [`OsStringBytes::from_raw_vec`]
+//! - [`RawOsStr::from_raw_bytes`]
+//! - [`RawOsString::from_raw_vec`]
+//!
+//! Because this feature should not be used in libraries, the
+//! "OS_STR_BYTES_CHECKED_CONVERSIONS" environment variable must be defined
+//! during compilation.
+//!
+//! - **print\_bytes** -
+//! Provides implementations of [`print_bytes::ToBytes`] for [`RawOsStr`] and
+//! [`RawOsString`].
+//!
+//! - **uniquote** -
+//! Provides implementations of [`uniquote::Quote`] for [`RawOsStr`] and
+//! [`RawOsString`].
+//!
+//! # Implementation
+//!
+//! Some methods return [`Cow`] to account for platform differences. However,
+//! no guarantee is made that the same variant of that enum will always be
+//! returned for the same platform. Whichever can be constructed most
+//! efficiently will be returned.
+//!
+//! All traits are [sealed], meaning that they can only be implemented by this
+//! crate. Otherwise, backward compatibility would be more difficult to
+//! maintain for new features.
+//!
+//! # Complexity
+//!
+//! Conversion method complexities will vary based on what functionality is
+//! available for the platform. At worst, they will all be linear, but some can
+//! take constant time. For example, [`RawOsString::into_os_string`] might be
+//! able to reuse its allocation.
+//!
+//! # Examples
+//!
+//! ```
+//! # use std::io;
+//! #
+//! # #[cfg(feature = "raw_os_str")]
+//! # {
+//! # #[cfg(any())]
+//! use std::env;
+//! use std::fs;
+//!
+//! use os_str_bytes::RawOsStr;
+//!
+//! # mod env {
+//! # use std::env;
+//! # use std::ffi::OsString;
+//! #
+//! # pub fn args_os() -> impl Iterator<Item = OsString> {
+//! # let mut file = env::temp_dir();
+//! # file.push("os_str_bytes\u{E9}.txt");
+//! # return vec![OsString::new(), file.into_os_string()].into_iter();
+//! # }
+//! # }
+//! #
+//! for file in env::args_os().skip(1) {
+//! if !RawOsStr::new(&file).starts_with('-') {
+//! let string = "Hello, world!";
+//! fs::write(&file, string)?;
+//! assert_eq!(string, fs::read_to_string(file)?);
+//! }
+//! }
+//! # }
+//! #
+//! # Ok::<_, io::Error>(())
+//! ```
+//!
+//! [bstr]: https://crates.io/crates/bstr
+//! [`ByteSlice::to_os_str`]: https://docs.rs/bstr/0.2.12/bstr/trait.ByteSlice.html#method.to_os_str
+//! [`ByteVec::into_os_string`]: https://docs.rs/bstr/0.2.12/bstr/trait.ByteVec.html#method.into_os_string
+//! [memchr complexity]: RawOsStr#complexity
+//! [memchr]: https://crates.io/crates/memchr
+//! [`OsStrExt`]: ::std::os::unix::ffi::OsStrExt
+//! [`OsStringExt`]: ::std::os::unix::ffi::OsStringExt
+//! [sealed]: https://rust-lang.github.io/api-guidelines/future-proofing.html#c-sealed
+//! [print\_bytes]: https://crates.io/crates/print_bytes
+
+#![cfg_attr(not(feature = "checked_conversions"), allow(deprecated))]
+// Only require a nightly compiler when building documentation for docs.rs.
+// This is a private option that should not be used.
+// https://github.com/rust-lang/docs.rs/issues/147#issuecomment-389544407
+// https://github.com/dylni/os_str_bytes/issues/2
+#![cfg_attr(os_str_bytes_docs_rs, feature(doc_cfg))]
+// Nightly is also currently required for the SGX platform.
+#![cfg_attr(
+ all(target_vendor = "fortanix", target_env = "sgx"),
+ feature(sgx_platform)
+)]
+#![warn(unsafe_op_in_unsafe_fn)]
+#![warn(unused_results)]
+
+use std::borrow::Cow;
+use std::error::Error;
+use std::ffi::OsStr;
+use std::ffi::OsString;
+use std::fmt;
+use std::fmt::Display;
+use std::fmt::Formatter;
+use std::path::Path;
+use std::path::PathBuf;
+use std::result;
+
+macro_rules! if_checked_conversions {
+ ( $($item:item)+ ) => {
+ $(
+ #[cfg(feature = "checked_conversions")]
+ $item
+ )+
+ };
+}
+
+#[cfg(not(os_str_bytes_docs_rs))]
+if_checked_conversions! {
+ const _: &str = env!(
+ "OS_STR_BYTES_CHECKED_CONVERSIONS",
+ "The 'OS_STR_BYTES_CHECKED_CONVERSIONS' environment variable must be \
+ defined to use the 'checked_conversions' feature.",
+ );
+}
+
+#[rustfmt::skip]
+macro_rules! deprecated_checked_conversion {
+ ( $message:expr , $item:item ) => {
+ #[cfg_attr(
+ not(feature = "checked_conversions"),
+ deprecated = $message
+ )]
+ $item
+ };
+}
+
+macro_rules! expect_encoded {
+ ( $result:expr ) => {
+ $result.expect("invalid raw bytes")
+ };
+}
+
+macro_rules! if_raw_str {
+ ( $($item:item)+ ) => {
+ $(
+ #[cfg(feature = "raw_os_str")]
+ $item
+ )+
+ };
+}
+
+#[cfg_attr(
+ all(target_family = "wasm", target_os = "unknown"),
+ path = "wasm/mod.rs"
+)]
+#[cfg_attr(windows, path = "windows/mod.rs")]
+#[cfg_attr(
+ not(any(all(target_family = "wasm", target_os = "unknown"), windows)),
+ path = "common/mod.rs"
+)]
+mod imp;
+
+#[cfg(any(
+ all(
+ feature = "raw_os_str",
+ target_family = "wasm",
+ target_os = "unknown",
+ ),
+ windows,
+))]
+mod util;
+
+if_raw_str! {
+ pub mod iter;
+
+ mod pattern;
+ pub use pattern::Pattern;
+
+ mod raw_str;
+ pub use raw_str::RawOsStr;
+ pub use raw_str::RawOsStrCow;
+ pub use raw_str::RawOsString;
+}
+
+deprecated_checked_conversion! {
+ "use `OsStrBytes::assert_from_raw_bytes` or \
+ `OsStringBytes::assert_from_raw_vec` instead, or enable the \
+ 'checked_conversions' feature",
+ /// The error that occurs when a byte sequence is not representable in the
+ /// platform encoding.
+ ///
+ /// [`Result::unwrap`] should almost always be called on results containing
+ /// this error. It should be known whether or not byte sequences are
+ /// properly encoded for the platform, since [the module-level
+ /// documentation][encoding] discourages using encoded bytes in
+ /// interchange. Results are returned primarily to make panicking behavior
+ /// explicit.
+ ///
+ /// On Unix, this error is never returned, but [`OsStrExt`] or
+ /// [`OsStringExt`] should be used instead if that needs to be guaranteed.
+ ///
+ /// [encoding]: self#encoding
+ /// [`OsStrExt`]: ::std::os::unix::ffi::OsStrExt
+ /// [`OsStringExt`]: ::std::os::unix::ffi::OsStringExt
+ /// [`Result::unwrap`]: ::std::result::Result::unwrap
+ #[derive(Clone, Debug, Eq, PartialEq)]
+ #[cfg_attr(
+ os_str_bytes_docs_rs,
+ doc(cfg(feature = "checked_conversions"))
+ )]
+ pub struct EncodingError(imp::EncodingError);
+}
+
+impl Display for EncodingError {
+ #[inline]
+ fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+ self.0.fmt(f)
+ }
+}
+
+impl Error for EncodingError {}
+
+type Result<T> = result::Result<T, EncodingError>;
+
+fn from_raw_bytes<'a, S>(
+ string: S,
+) -> result::Result<Cow<'a, OsStr>, imp::EncodingError>
+where
+ S: Into<Cow<'a, [u8]>>,
+{
+ match string.into() {
+ Cow::Borrowed(string) => imp::os_str_from_bytes(string),
+ Cow::Owned(string) => imp::os_string_from_vec(string).map(Cow::Owned),
+ }
+}
+
+fn cow_os_str_into_path(string: Cow<'_, OsStr>) -> Cow<'_, Path> {
+ match string {
+ Cow::Borrowed(string) => Cow::Borrowed(Path::new(string)),
+ Cow::Owned(string) => Cow::Owned(string.into()),
+ }
+}
+
+/// A platform agnostic variant of [`OsStrExt`].
+///
+/// For more information, see [the module-level documentation][module].
+///
+/// [module]: self
+/// [`OsStrExt`]: ::std::os::unix::ffi::OsStrExt
+pub trait OsStrBytes: private::Sealed + ToOwned {
+ /// Converts a byte string into an equivalent platform-native string.
+ ///
+ /// # Panics
+ ///
+ /// Panics if the string is not valid for the [unspecified encoding] used
+ /// by this crate.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::env;
+ /// use std::ffi::OsStr;
+ /// # use std::io;
+ ///
+ /// use os_str_bytes::OsStrBytes;
+ ///
+ /// let os_string = env::current_exe()?;
+ /// let os_bytes = os_string.to_raw_bytes();
+ /// assert_eq!(os_string, OsStr::assert_from_raw_bytes(os_bytes));
+ /// #
+ /// # Ok::<_, io::Error>(())
+ /// ```
+ ///
+ /// [unspecified encoding]: self#encoding
+ #[must_use = "method should not be used for validation"]
+ #[track_caller]
+ fn assert_from_raw_bytes<'a, S>(string: S) -> Cow<'a, Self>
+ where
+ S: Into<Cow<'a, [u8]>>;
+
+ deprecated_checked_conversion! {
+ "use `assert_from_raw_bytes` instead, or enable the \
+ 'checked_conversions' feature",
+ /// Converts a byte string into an equivalent platform-native string.
+ ///
+ /// [`assert_from_raw_bytes`] should almost always be used instead. For
+ /// more information, see [`EncodingError`].
+ ///
+ /// # Errors
+ ///
+ /// See documentation for [`EncodingError`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::env;
+ /// use std::ffi::OsStr;
+ /// # use std::io;
+ ///
+ /// use os_str_bytes::OsStrBytes;
+ ///
+ /// let os_string = env::current_exe()?;
+ /// let os_bytes = os_string.to_raw_bytes();
+ /// assert_eq!(os_string, OsStr::from_raw_bytes(os_bytes).unwrap());
+ /// #
+ /// # Ok::<_, io::Error>(())
+ /// ```
+ ///
+ /// [`assert_from_raw_bytes`]: Self::assert_from_raw_bytes
+ #[cfg_attr(
+ os_str_bytes_docs_rs,
+ doc(cfg(feature = "checked_conversions"))
+ )]
+ fn from_raw_bytes<'a, S>(string: S) -> Result<Cow<'a, Self>>
+ where
+ S: Into<Cow<'a, [u8]>>;
+ }
+
+ /// Converts a platform-native string into an equivalent byte string.
+ ///
+ /// The returned string will use an [unspecified encoding].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::ffi::OsStr;
+ ///
+ /// use os_str_bytes::OsStrBytes;
+ ///
+ /// let string = "foobar";
+ /// let os_string = OsStr::new(string);
+ /// assert_eq!(string.as_bytes(), &*os_string.to_raw_bytes());
+ /// ```
+ ///
+ /// [unspecified encoding]: self#encoding
+ #[must_use]
+ fn to_raw_bytes(&self) -> Cow<'_, [u8]>;
+}
+
+impl OsStrBytes for OsStr {
+ #[inline]
+ fn assert_from_raw_bytes<'a, S>(string: S) -> Cow<'a, Self>
+ where
+ S: Into<Cow<'a, [u8]>>,
+ {
+ expect_encoded!(from_raw_bytes(string))
+ }
+
+ #[inline]
+ fn from_raw_bytes<'a, S>(string: S) -> Result<Cow<'a, Self>>
+ where
+ S: Into<Cow<'a, [u8]>>,
+ {
+ from_raw_bytes(string).map_err(EncodingError)
+ }
+
+ #[inline]
+ fn to_raw_bytes(&self) -> Cow<'_, [u8]> {
+ imp::os_str_to_bytes(self)
+ }
+}
+
+impl OsStrBytes for Path {
+ #[inline]
+ fn assert_from_raw_bytes<'a, S>(string: S) -> Cow<'a, Self>
+ where
+ S: Into<Cow<'a, [u8]>>,
+ {
+ cow_os_str_into_path(OsStr::assert_from_raw_bytes(string))
+ }
+
+ #[inline]
+ fn from_raw_bytes<'a, S>(string: S) -> Result<Cow<'a, Self>>
+ where
+ S: Into<Cow<'a, [u8]>>,
+ {
+ OsStr::from_raw_bytes(string).map(cow_os_str_into_path)
+ }
+
+ #[inline]
+ fn to_raw_bytes(&self) -> Cow<'_, [u8]> {
+ self.as_os_str().to_raw_bytes()
+ }
+}
+
+/// A platform agnostic variant of [`OsStringExt`].
+///
+/// For more information, see [the module-level documentation][module].
+///
+/// [module]: self
+/// [`OsStringExt`]: ::std::os::unix::ffi::OsStringExt
+pub trait OsStringBytes: private::Sealed + Sized {
+ /// Converts a byte string into an equivalent platform-native string.
+ ///
+ /// # Panics
+ ///
+ /// Panics if the string is not valid for the [unspecified encoding] used
+ /// by this crate.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::env;
+ /// use std::ffi::OsString;
+ /// # use std::io;
+ ///
+ /// use os_str_bytes::OsStringBytes;
+ ///
+ /// let os_string = env::current_exe()?;
+ /// let os_bytes = os_string.clone().into_raw_vec();
+ /// assert_eq!(os_string, OsString::assert_from_raw_vec(os_bytes));
+ /// #
+ /// # Ok::<_, io::Error>(())
+ /// ```
+ ///
+ /// [unspecified encoding]: self#encoding
+ #[must_use = "method should not be used for validation"]
+ #[track_caller]
+ fn assert_from_raw_vec(string: Vec<u8>) -> Self;
+
+ deprecated_checked_conversion! {
+ "use `assert_from_raw_vec` instead, or enable the \
+ 'checked_conversions' feature",
+ /// Converts a byte string into an equivalent platform-native string.
+ ///
+ /// [`assert_from_raw_vec`] should almost always be used instead. For
+ /// more information, see [`EncodingError`].
+ ///
+ /// # Errors
+ ///
+ /// See documentation for [`EncodingError`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::env;
+ /// use std::ffi::OsString;
+ /// # use std::io;
+ ///
+ /// use os_str_bytes::OsStringBytes;
+ ///
+ /// let os_string = env::current_exe()?;
+ /// let os_bytes = os_string.clone().into_raw_vec();
+ /// assert_eq!(os_string, OsString::from_raw_vec(os_bytes).unwrap());
+ /// #
+ /// # Ok::<_, io::Error>(())
+ /// ```
+ ///
+ /// [`assert_from_raw_vec`]: Self::assert_from_raw_vec
+ #[cfg_attr(
+ os_str_bytes_docs_rs,
+ doc(cfg(feature = "checked_conversions"))
+ )]
+ fn from_raw_vec(string: Vec<u8>) -> Result<Self>;
+ }
+
+ /// Converts a platform-native string into an equivalent byte string.
+ ///
+ /// The returned string will use an [unspecified encoding].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::ffi::OsString;
+ ///
+ /// use os_str_bytes::OsStringBytes;
+ ///
+ /// let string = "foobar".to_owned();
+ /// let os_string: OsString = string.clone().into();
+ /// assert_eq!(string.into_bytes(), os_string.into_raw_vec());
+ /// ```
+ ///
+ /// [unspecified encoding]: self#encoding
+ #[must_use]
+ fn into_raw_vec(self) -> Vec<u8>;
+}
+
+impl OsStringBytes for OsString {
+ #[inline]
+ fn assert_from_raw_vec(string: Vec<u8>) -> Self {
+ expect_encoded!(imp::os_string_from_vec(string))
+ }
+
+ #[inline]
+ fn from_raw_vec(string: Vec<u8>) -> Result<Self> {
+ imp::os_string_from_vec(string).map_err(EncodingError)
+ }
+
+ #[inline]
+ fn into_raw_vec(self) -> Vec<u8> {
+ imp::os_string_into_vec(self)
+ }
+}
+
+impl OsStringBytes for PathBuf {
+ #[inline]
+ fn assert_from_raw_vec(string: Vec<u8>) -> Self {
+ OsString::assert_from_raw_vec(string).into()
+ }
+
+ #[inline]
+ fn from_raw_vec(string: Vec<u8>) -> Result<Self> {
+ OsString::from_raw_vec(string).map(Into::into)
+ }
+
+ #[inline]
+ fn into_raw_vec(self) -> Vec<u8> {
+ self.into_os_string().into_raw_vec()
+ }
+}
+
+mod private {
+ use std::ffi::OsStr;
+ use std::ffi::OsString;
+ use std::path::Path;
+ use std::path::PathBuf;
+
+ if_raw_str! {
+ use std::borrow::Cow;
+
+ use super::RawOsStr;
+ }
+
+ pub trait Sealed {}
+
+ impl Sealed for char {}
+ impl Sealed for OsStr {}
+ impl Sealed for OsString {}
+ impl Sealed for Path {}
+ impl Sealed for PathBuf {}
+ impl Sealed for &str {}
+ impl Sealed for &String {}
+
+ if_raw_str! {
+ impl Sealed for Cow<'_, RawOsStr> {}
+ }
+}