diff options
Diffstat (limited to 'vendor/os_str_bytes/src/lib.rs')
-rw-r--r-- | vendor/os_str_bytes/src/lib.rs | 432 |
1 files changed, 432 insertions, 0 deletions
diff --git a/vendor/os_str_bytes/src/lib.rs b/vendor/os_str_bytes/src/lib.rs new file mode 100644 index 000000000..9a99059c6 --- /dev/null +++ b/vendor/os_str_bytes/src/lib.rs @@ -0,0 +1,432 @@ +//! This crate allows interacting with the data stored by [`OsStr`] and +//! [`OsString`], without resorting to panics or corruption for invalid UTF-8. +//! Thus, methods can be used that are already defined on [`[u8]`][slice] and +//! [`Vec<u8>`]. +//! +//! Typically, the only way to losslessly construct [`OsStr`] or [`OsString`] +//! from a byte sequence is to use `OsStr::new(str::from_utf8(bytes)?)`, which +//! requires the bytes to be valid in UTF-8. However, since this crate makes +//! conversions directly between the platform encoding and raw bytes, even some +//! strings invalid in UTF-8 can be converted. +//! +//! In most cases, [`RawOsStr`] and [`RawOsString`] should be used. +//! [`OsStrBytes`] and [`OsStringBytes`] provide lower-level APIs that are +//! easier to misuse. +//! +//! # Encoding +//! +//! The encoding of bytes returned or accepted by methods of this crate is +//! intentionally left unspecified. It may vary for different platforms, so +//! defining it would run contrary to the goal of generic string handling. +//! However, the following invariants will always be upheld: +//! +//! - The encoding will be compatible with UTF-8. In particular, splitting an +//! encoded byte sequence by a UTF-8–encoded character always produces other +//! valid byte sequences. They can be re-encoded without error using +//! [`OsStrBytes::from_raw_bytes`] and similar methods. +//! +//! - All characters valid in platform strings are representable. [`OsStr`] and +//! [`OsString`] can always be losslessly reconstructed from extracted bytes. +//! +//! Note that the chosen encoding may not match how Rust stores these strings +//! internally, which is undocumented. For instance, the result of calling +//! [`OsStr::len`] will not necessarily match the number of bytes this crate +//! uses to represent the same string. +//! +//! Additionally, concatenation may yield unexpected results without a UTF-8 +//! separator. If two platform strings need to be concatenated, the only safe +//! way to do so is using [`OsString::push`]. This limitation also makes it +//! undesirable to use the bytes in interchange. +//! +//! Since this encoding can change between versions and platforms, it should +//! not be used for storage. The standard library provides implementations of +//! [`OsStrExt`] and [`OsStringExt`] for various platforms, which should be +//! preferred for that use case. +//! +//! # User Input +//! +//! Traits in this crate should ideally not be used to convert byte sequences +//! that did not originate from [`OsStr`] or a related struct. The encoding +//! used by this crate is an implementation detail, so it does not make sense +//! to expose it to users. +//! +//! Crate [bstr] offers some useful alternative methods, such as +//! [`ByteSlice::to_os_str`] and [`ByteVec::into_os_string`], that are meant +//! for user input. But, they reject some byte sequences used to represent +//! valid platform strings, which would be undesirable for reliable path +//! handling. They are best used only when accepting unknown input. +//! +//! This crate is meant to help when you already have an instance of [`OsStr`] +//! and need to modify the data in a lossless way. +//! +//! # Features +//! +//! These features are optional and can be enabled or disabled in a +//! "Cargo.toml" file. +//! +//! ### Default Features +//! +//! - **memchr** - +//! Changes the implementation to use crate [memchr] for better performance. +//! This feature is useless when "raw\_os\_str" is disabled. +//! +//! For more information, see [`RawOsStr`][memchr complexity]. +//! +//! - **raw\_os\_str** - +//! Enables use of [`RawOsStr`] and [`RawOsString`]. +//! +//! ### Optional Features +//! +//! - **print\_bytes** - +//! Provides implementations of [`print_bytes::ToBytes`] for [`RawOsStr`] and +//! [`RawOsString`]. +//! +//! - **uniquote** - +//! Provides implementations of [`uniquote::Quote`] for [`RawOsStr`] and +//! [`RawOsString`]. +//! +//! # Implementation +//! +//! Some methods return [`Cow`] to account for platform differences. However, +//! no guarantee is made that the same variant of that enum will always be +//! returned for the same platform. Whichever can be constructed most +//! efficiently will be returned. +//! +//! All traits are [sealed], meaning that they can only be implemented by this +//! crate. Otherwise, backward compatibility would be more difficult to +//! maintain for new features. +//! +//! # Complexity +//! +//! The time complexities of trait methods will vary based on what +//! functionality is available for the platform. At worst, they will all be +//! linear, but some can take constant time. For example, +//! [`OsStringBytes::from_raw_vec`] might be able to reuse the allocation for +//! its argument. +//! +//! # Examples +//! +//! ``` +//! # #[cfg(any())] +//! use std::env; +//! use std::fs; +//! # use std::io; +//! +//! use os_str_bytes::OsStrBytes; +//! +//! # mod env { +//! # use std::env; +//! # use std::ffi::OsString; +//! # +//! # pub fn args_os() -> impl Iterator<Item = OsString> { +//! # let mut file = env::temp_dir(); +//! # file.push("os_str_bytes\u{E9}.txt"); +//! # return vec![OsString::new(), file.into_os_string()].into_iter(); +//! # } +//! # } +//! # +//! for file in env::args_os().skip(1) { +//! if file.to_raw_bytes().first() != Some(&b'-') { +//! let string = "Hello, world!"; +//! fs::write(&file, string)?; +//! assert_eq!(string, fs::read_to_string(file)?); +//! } +//! } +//! # +//! # Ok::<_, io::Error>(()) +//! ``` +//! +//! [bstr]: https://crates.io/crates/bstr +//! [`ByteSlice::to_os_str`]: https://docs.rs/bstr/0.2.12/bstr/trait.ByteSlice.html#method.to_os_str +//! [`ByteVec::into_os_string`]: https://docs.rs/bstr/0.2.12/bstr/trait.ByteVec.html#method.into_os_string +//! [memchr complexity]: RawOsStr#complexity +//! [memchr]: https://crates.io/crates/memchr +//! [`OsStrExt`]: ::std::os::unix::ffi::OsStrExt +//! [`OsStringExt`]: ::std::os::unix::ffi::OsStringExt +//! [sealed]: https://rust-lang.github.io/api-guidelines/future-proofing.html#c-sealed +//! [print\_bytes]: https://crates.io/crates/print_bytes + +// Only require a nightly compiler when building documentation for docs.rs. +// This is a private option that should not be used. +// https://github.com/rust-lang/docs.rs/issues/147#issuecomment-389544407 +// https://github.com/dylni/os_str_bytes/issues/2 +#![cfg_attr(os_str_bytes_docs_rs, feature(doc_cfg))] +// Nightly is also currently required for the SGX platform. +#![cfg_attr( + all(target_vendor = "fortanix", target_env = "sgx"), + feature(sgx_platform) +)] +#![forbid(unsafe_op_in_unsafe_fn)] +#![warn(unused_results)] + +use std::borrow::Cow; +use std::error::Error; +use std::ffi::OsStr; +use std::ffi::OsString; +use std::fmt; +use std::fmt::Display; +use std::fmt::Formatter; +use std::path::Path; +use std::path::PathBuf; +use std::result; + +macro_rules! if_raw_str { + ( $($item:item)+ ) => { + $( + #[cfg(feature = "raw_os_str")] + $item + )+ + }; +} + +#[cfg_attr( + all(target_arch = "wasm32", target_os = "unknown"), + path = "wasm32/mod.rs" +)] +#[cfg_attr(windows, path = "windows/mod.rs")] +#[cfg_attr( + not(any(all(target_arch = "wasm32", target_os = "unknown"), windows)), + path = "common/mod.rs" +)] +mod imp; + +mod util; + +if_raw_str! { + pub mod iter; + + mod pattern; + pub use pattern::Pattern; + + mod raw_str; + pub use raw_str::RawOsStr; + pub use raw_str::RawOsString; +} + +/// The error that occurs when a byte sequence is not representable in the +/// platform encoding. +/// +/// [`Result::unwrap`] should almost always be called on results containing +/// this error. It should be known whether or not byte sequences are properly +/// encoded for the platform, since [the module-level documentation][encoding] +/// discourages using encoded bytes in interchange. Results are returned +/// primarily to make panicking behavior explicit. +/// +/// On Unix, this error is never returned, but [`OsStrExt`] or [`OsStringExt`] +/// should be used instead if that needs to be guaranteed. +/// +/// [encoding]: self#encoding +/// [`OsStrExt`]: ::std::os::unix::ffi::OsStrExt +/// [`OsStringExt`]: ::std::os::unix::ffi::OsStringExt +/// [`Result::unwrap`]: ::std::result::Result::unwrap +#[derive(Debug, Eq, PartialEq)] +pub struct EncodingError(imp::EncodingError); + +impl Display for EncodingError { + #[inline] + fn fmt(&self, formatter: &mut Formatter<'_>) -> fmt::Result { + self.0.fmt(formatter) + } +} + +impl Error for EncodingError {} + +type Result<T> = result::Result<T, EncodingError>; + +/// A platform agnostic variant of [`OsStrExt`]. +/// +/// For more information, see [the module-level documentation][module]. +/// +/// [module]: self +/// [`OsStrExt`]: ::std::os::unix::ffi::OsStrExt +pub trait OsStrBytes: private::Sealed + ToOwned { + /// Converts a byte slice into an equivalent platform-native string. + /// + /// Provided byte strings should always be valid for the [unspecified + /// encoding] used by this crate. + /// + /// # Errors + /// + /// See documentation for [`EncodingError`]. + /// + /// # Examples + /// + /// ``` + /// use std::env; + /// use std::ffi::OsStr; + /// # use std::io; + /// + /// use os_str_bytes::OsStrBytes; + /// + /// let os_string = env::current_exe()?; + /// let os_bytes = os_string.to_raw_bytes(); + /// assert_eq!(os_string, OsStr::from_raw_bytes(os_bytes).unwrap()); + /// # + /// # Ok::<_, io::Error>(()) + /// ``` + /// + /// [unspecified encoding]: self#encoding + fn from_raw_bytes<'a, S>(string: S) -> Result<Cow<'a, Self>> + where + S: Into<Cow<'a, [u8]>>; + + /// Converts a platform-native string into an equivalent byte slice. + /// + /// The returned bytes string will use an [unspecified encoding]. + /// + /// # Examples + /// + /// ``` + /// use std::env; + /// # use std::io; + /// + /// use os_str_bytes::OsStrBytes; + /// + /// let os_string = env::current_exe()?; + /// println!("{:?}", os_string.to_raw_bytes()); + /// # + /// # Ok::<_, io::Error>(()) + /// ``` + /// + /// [unspecified encoding]: self#encoding + #[must_use] + fn to_raw_bytes(&self) -> Cow<'_, [u8]>; +} + +impl OsStrBytes for OsStr { + #[inline] + fn from_raw_bytes<'a, S>(string: S) -> Result<Cow<'a, Self>> + where + S: Into<Cow<'a, [u8]>>, + { + match string.into() { + Cow::Borrowed(string) => { + imp::os_str_from_bytes(string).map_err(EncodingError) + } + Cow::Owned(string) => { + OsStringBytes::from_raw_vec(string).map(Cow::Owned) + } + } + } + + #[inline] + fn to_raw_bytes(&self) -> Cow<'_, [u8]> { + imp::os_str_to_bytes(self) + } +} + +impl OsStrBytes for Path { + #[inline] + fn from_raw_bytes<'a, S>(string: S) -> Result<Cow<'a, Self>> + where + S: Into<Cow<'a, [u8]>>, + { + OsStr::from_raw_bytes(string).map(|os_string| match os_string { + Cow::Borrowed(os_string) => Cow::Borrowed(Self::new(os_string)), + Cow::Owned(os_string) => Cow::Owned(os_string.into()), + }) + } + + #[inline] + fn to_raw_bytes(&self) -> Cow<'_, [u8]> { + self.as_os_str().to_raw_bytes() + } +} + +/// A platform agnostic variant of [`OsStringExt`]. +/// +/// For more information, see [the module-level documentation][module]. +/// +/// [module]: self +/// [`OsStringExt`]: ::std::os::unix::ffi::OsStringExt +pub trait OsStringBytes: private::Sealed + Sized { + /// Converts a byte vector into an equivalent platform-native string. + /// + /// Provided byte strings should always be valid for the [unspecified + /// encoding] used by this crate. + /// + /// # Errors + /// + /// See documentation for [`EncodingError`]. + /// + /// # Examples + /// + /// ``` + /// use std::env; + /// use std::ffi::OsString; + /// # use std::io; + /// + /// use os_str_bytes::OsStringBytes; + /// + /// let os_string = env::current_exe()?; + /// let os_bytes = os_string.clone().into_raw_vec(); + /// assert_eq!(os_string, OsString::from_raw_vec(os_bytes).unwrap()); + /// # + /// # Ok::<_, io::Error>(()) + /// ``` + /// + /// [unspecified encoding]: self#encoding + fn from_raw_vec(string: Vec<u8>) -> Result<Self>; + + /// Converts a platform-native string into an equivalent byte vector. + /// + /// The returned byte string will use an [unspecified encoding]. + /// + /// # Examples + /// + /// ``` + /// use std::env; + /// # use std::io; + /// + /// use os_str_bytes::OsStringBytes; + /// + /// let os_string = env::current_exe()?; + /// println!("{:?}", os_string.into_raw_vec()); + /// # + /// # Ok::<_, io::Error>(()) + /// ``` + /// + /// [unspecified encoding]: self#encoding + #[must_use] + fn into_raw_vec(self) -> Vec<u8>; +} + +impl OsStringBytes for OsString { + #[inline] + fn from_raw_vec(string: Vec<u8>) -> Result<Self> { + imp::os_string_from_vec(string).map_err(EncodingError) + } + + #[inline] + fn into_raw_vec(self) -> Vec<u8> { + imp::os_string_into_vec(self) + } +} + +impl OsStringBytes for PathBuf { + #[inline] + fn from_raw_vec(string: Vec<u8>) -> Result<Self> { + OsString::from_raw_vec(string).map(Into::into) + } + + #[inline] + fn into_raw_vec(self) -> Vec<u8> { + self.into_os_string().into_raw_vec() + } +} + +mod private { + use std::ffi::OsStr; + use std::ffi::OsString; + use std::path::Path; + use std::path::PathBuf; + + pub trait Sealed {} + impl Sealed for char {} + impl Sealed for OsStr {} + impl Sealed for OsString {} + impl Sealed for Path {} + impl Sealed for PathBuf {} + impl Sealed for &str {} + impl Sealed for &String {} +} |