diff options
Diffstat (limited to 'library/std/src/ffi')
-rw-r--r-- | library/std/src/ffi/mod.rs | 8 | ||||
-rw-r--r-- | library/std/src/ffi/os_str.rs | 102 |
2 files changed, 96 insertions, 14 deletions
diff --git a/library/std/src/ffi/mod.rs b/library/std/src/ffi/mod.rs index d987bf69b..3ddb87487 100644 --- a/library/std/src/ffi/mod.rs +++ b/library/std/src/ffi/mod.rs @@ -127,6 +127,14 @@ //! trait, which provides a [`from_wide`] method to convert a native Windows //! string (without the terminating nul character) to an [`OsString`]. //! +//! ## On all platforms +//! +//! On all platforms, [`OsStr`] consists of a sequence of bytes that is encoded as a superset of +//! UTF-8; see [`OsString`] for more details on its encoding on different platforms. +//! +//! For limited, inexpensive conversions from and to bytes, see [`OsStr::as_os_str_bytes`] and +//! [`OsStr::from_os_str_bytes_unchecked`]. +//! //! [Unicode scalar value]: https://www.unicode.org/glossary/#unicode_scalar_value //! [Unicode code point]: https://www.unicode.org/glossary/#code_point //! [`env::set_var()`]: crate::env::set_var "env::set_var" diff --git a/library/std/src/ffi/os_str.rs b/library/std/src/ffi/os_str.rs index 5c0541d3c..e7bad9d54 100644 --- a/library/std/src/ffi/os_str.rs +++ b/library/std/src/ffi/os_str.rs @@ -667,6 +667,51 @@ impl OsStr { s.as_ref() } + /// Converts a slice of bytes to an OS string slice without checking that the string contains + /// valid `OsStr`-encoded data. + /// + /// The byte encoding is an unspecified, platform-specific, self-synchronizing superset of UTF-8. + /// By being a self-synchronizing superset of UTF-8, this encoding is also a superset of 7-bit + /// ASCII. + /// + /// See the [module's toplevel documentation about conversions][conversions] for safe, + /// cross-platform [conversions] from/to native representations. + /// + /// # Safety + /// + /// As the encoding is unspecified, callers must pass in bytes that originated as a mixture of + /// validated UTF-8 and bytes from [`OsStr::as_os_str_bytes`] from within the same rust version + /// built for the same target platform. For example, reconstructing an `OsStr` from bytes sent + /// over the network or stored in a file will likely violate these safety rules. + /// + /// Due to the encoding being self-synchronizing, the bytes from [`OsStr::as_os_str_bytes`] can be + /// split either immediately before or immediately after any valid non-empty UTF-8 substring. + /// + /// # Example + /// + /// ``` + /// #![feature(os_str_bytes)] + /// + /// use std::ffi::OsStr; + /// + /// let os_str = OsStr::new("Mary had a little lamb"); + /// let bytes = os_str.as_os_str_bytes(); + /// let words = bytes.split(|b| *b == b' '); + /// let words: Vec<&OsStr> = words.map(|word| { + /// // SAFETY: + /// // - Each `word` only contains content that originated from `OsStr::as_os_str_bytes` + /// // - Only split with ASCII whitespace which is a non-empty UTF-8 substring + /// unsafe { OsStr::from_os_str_bytes_unchecked(word) } + /// }).collect(); + /// ``` + /// + /// [conversions]: super#conversions + #[inline] + #[unstable(feature = "os_str_bytes", issue = "111544")] + pub unsafe fn from_os_str_bytes_unchecked(bytes: &[u8]) -> &Self { + Self::from_inner(Slice::from_os_str_bytes_unchecked(bytes)) + } + #[inline] fn from_inner(inner: &Slice) -> &OsStr { // SAFETY: OsStr is just a wrapper of Slice, @@ -700,7 +745,7 @@ impl OsStr { without modifying the original"] #[inline] pub fn to_str(&self) -> Option<&str> { - self.inner.to_str() + self.inner.to_str().ok() } /// Converts an `OsStr` to a <code>[Cow]<[str]></code>. @@ -837,13 +882,24 @@ impl OsStr { OsString { inner: Buf::from_box(boxed) } } - /// Gets the underlying byte representation. + /// Converts an OS string slice to a byte slice. To convert the byte slice back into an OS + /// string slice, use the [`OsStr::from_os_str_bytes_unchecked`] function. + /// + /// The byte encoding is an unspecified, platform-specific, self-synchronizing superset of UTF-8. + /// By being a self-synchronizing superset of UTF-8, this encoding is also a superset of 7-bit + /// ASCII. + /// + /// Note: As the encoding is unspecified, any sub-slice of bytes that is not valid UTF-8 should + /// be treated as opaque and only comparable within the same rust version built for the same + /// target platform. For example, sending the slice over the network or storing it in a file + /// will likely result in incompatible byte slices. See [`OsString`] for more encoding details + /// and [`std::ffi`] for platform-specific, specified conversions. /// - /// Note: it is *crucial* that this API is not externally public, to avoid - /// revealing the internal, platform-specific encodings. + /// [`std::ffi`]: crate::ffi #[inline] - pub(crate) fn bytes(&self) -> &[u8] { - unsafe { &*(&self.inner as *const _ as *const [u8]) } + #[unstable(feature = "os_str_bytes", issue = "111544")] + pub fn as_os_str_bytes(&self) -> &[u8] { + self.inner.as_os_str_bytes() } /// Converts this string to its ASCII lower case equivalent in-place. @@ -1109,6 +1165,24 @@ impl<'a> From<Cow<'a, OsStr>> for OsString { } } +#[stable(feature = "str_tryfrom_osstr_impl", since = "1.72.0")] +impl<'a> TryFrom<&'a OsStr> for &'a str { + type Error = crate::str::Utf8Error; + + /// Tries to convert an `&OsStr` to a `&str`. + /// + /// ``` + /// use std::ffi::OsStr; + /// + /// let os_str = OsStr::new("foo"); + /// let as_str = <&str>::try_from(os_str).unwrap(); + /// assert_eq!(as_str, "foo"); + /// ``` + fn try_from(value: &'a OsStr) -> Result<Self, Self::Error> { + value.inner.to_str() + } +} + #[stable(feature = "box_default_extra", since = "1.17.0")] impl Default for Box<OsStr> { #[inline] @@ -1131,7 +1205,7 @@ impl Default for &OsStr { impl PartialEq for OsStr { #[inline] fn eq(&self, other: &OsStr) -> bool { - self.bytes().eq(other.bytes()) + self.as_os_str_bytes().eq(other.as_os_str_bytes()) } } @@ -1158,23 +1232,23 @@ impl Eq for OsStr {} impl PartialOrd for OsStr { #[inline] fn partial_cmp(&self, other: &OsStr) -> Option<cmp::Ordering> { - self.bytes().partial_cmp(other.bytes()) + self.as_os_str_bytes().partial_cmp(other.as_os_str_bytes()) } #[inline] fn lt(&self, other: &OsStr) -> bool { - self.bytes().lt(other.bytes()) + self.as_os_str_bytes().lt(other.as_os_str_bytes()) } #[inline] fn le(&self, other: &OsStr) -> bool { - self.bytes().le(other.bytes()) + self.as_os_str_bytes().le(other.as_os_str_bytes()) } #[inline] fn gt(&self, other: &OsStr) -> bool { - self.bytes().gt(other.bytes()) + self.as_os_str_bytes().gt(other.as_os_str_bytes()) } #[inline] fn ge(&self, other: &OsStr) -> bool { - self.bytes().ge(other.bytes()) + self.as_os_str_bytes().ge(other.as_os_str_bytes()) } } @@ -1193,7 +1267,7 @@ impl PartialOrd<str> for OsStr { impl Ord for OsStr { #[inline] fn cmp(&self, other: &OsStr) -> cmp::Ordering { - self.bytes().cmp(other.bytes()) + self.as_os_str_bytes().cmp(other.as_os_str_bytes()) } } @@ -1243,7 +1317,7 @@ impl_cmp!(Cow<'a, OsStr>, OsString); impl Hash for OsStr { #[inline] fn hash<H: Hasher>(&self, state: &mut H) { - self.bytes().hash(state) + self.as_os_str_bytes().hash(state) } } |