diff options
Diffstat (limited to 'library/std/src/ffi')
-rw-r--r-- | library/std/src/ffi/mod.rs | 2 | ||||
-rw-r--r-- | library/std/src/ffi/os_str.rs | 73 |
2 files changed, 71 insertions, 4 deletions
diff --git a/library/std/src/ffi/mod.rs b/library/std/src/ffi/mod.rs index 3ddb87487..ee9f6ed08 100644 --- a/library/std/src/ffi/mod.rs +++ b/library/std/src/ffi/mod.rs @@ -156,6 +156,8 @@ #[stable(feature = "alloc_c_string", since = "1.64.0")] pub use alloc::ffi::{CString, FromVecWithNulError, IntoStringError, NulError}; +#[stable(feature = "cstr_from_bytes_until_nul", since = "1.73.0")] +pub use core::ffi::FromBytesUntilNulError; #[stable(feature = "core_c_str", since = "1.64.0")] pub use core::ffi::{CStr, FromBytesWithNulError}; diff --git a/library/std/src/ffi/os_str.rs b/library/std/src/ffi/os_str.rs index e7bad9d54..43cecb19b 100644 --- a/library/std/src/ffi/os_str.rs +++ b/library/std/src/ffi/os_str.rs @@ -110,12 +110,12 @@ impl crate::sealed::Sealed for OsString {} /// [conversions]: super#conversions #[cfg_attr(not(test), rustc_diagnostic_item = "OsStr")] #[stable(feature = "rust1", since = "1.0.0")] -// FIXME: // `OsStr::from_inner` current implementation relies // on `OsStr` being layout-compatible with `Slice`. -// When attribute privacy is implemented, `OsStr` should be annotated as `#[repr(transparent)]`. -// Anyway, `OsStr` representation and layout are considered implementation details, are -// not documented and must not be relied upon. +// However, `OsStr` layout is considered an implementation detail and must not be relied upon. We +// want `repr(transparent)` but we don't want it to show up in rustdoc, so we hide it under +// `cfg(doc)`. This is an ad-hoc implementation of attribute privacy. +#[cfg_attr(not(doc), repr(transparent))] pub struct OsStr { inner: Slice, } @@ -141,6 +141,51 @@ impl OsString { OsString { inner: Buf::from_string(String::new()) } } + /// Converts bytes to an `OsString` without checking that the bytes contains + /// valid [`OsStr`]-encoded data. + /// + /// The byte encoding is an unspecified, platform-specific, self-synchronizing superset of UTF-8. + /// By being a self-synchronizing superset of UTF-8, this encoding is also a superset of 7-bit + /// ASCII. + /// + /// See the [module's toplevel documentation about conversions][conversions] for safe, + /// cross-platform [conversions] from/to native representations. + /// + /// # Safety + /// + /// As the encoding is unspecified, callers must pass in bytes that originated as a mixture of + /// validated UTF-8 and bytes from [`OsStr::as_os_str_bytes`] from within the same rust version + /// built for the same target platform. For example, reconstructing an `OsString` from bytes sent + /// over the network or stored in a file will likely violate these safety rules. + /// + /// Due to the encoding being self-synchronizing, the bytes from [`OsStr::as_os_str_bytes`] can be + /// split either immediately before or immediately after any valid non-empty UTF-8 substring. + /// + /// # Example + /// + /// ``` + /// #![feature(os_str_bytes)] + /// + /// use std::ffi::OsStr; + /// + /// let os_str = OsStr::new("Mary had a little lamb"); + /// let bytes = os_str.as_os_str_bytes(); + /// let words = bytes.split(|b| *b == b' '); + /// let words: Vec<&OsStr> = words.map(|word| { + /// // SAFETY: + /// // - Each `word` only contains content that originated from `OsStr::as_os_str_bytes` + /// // - Only split with ASCII whitespace which is a non-empty UTF-8 substring + /// unsafe { OsStr::from_os_str_bytes_unchecked(word) } + /// }).collect(); + /// ``` + /// + /// [conversions]: super#conversions + #[inline] + #[unstable(feature = "os_str_bytes", issue = "111544")] + pub unsafe fn from_os_str_bytes_unchecked(bytes: Vec<u8>) -> Self { + OsString { inner: Buf::from_os_str_bytes_unchecked(bytes) } + } + /// Converts to an [`OsStr`] slice. /// /// # Examples @@ -159,6 +204,26 @@ impl OsString { self } + /// Converts the `OsString` into a byte slice. To convert the byte slice back into an + /// `OsString`, use the [`OsStr::from_os_str_bytes_unchecked`] function. + /// + /// The byte encoding is an unspecified, platform-specific, self-synchronizing superset of UTF-8. + /// By being a self-synchronizing superset of UTF-8, this encoding is also a superset of 7-bit + /// ASCII. + /// + /// Note: As the encoding is unspecified, any sub-slice of bytes that is not valid UTF-8 should + /// be treated as opaque and only comparable within the same rust version built for the same + /// target platform. For example, sending the bytes over the network or storing it in a file + /// will likely result in incompatible data. See [`OsString`] for more encoding details + /// and [`std::ffi`] for platform-specific, specified conversions. + /// + /// [`std::ffi`]: crate::ffi + #[inline] + #[unstable(feature = "os_str_bytes", issue = "111544")] + pub fn into_os_str_bytes(self) -> Vec<u8> { + self.inner.into_os_str_bytes() + } + /// Converts the `OsString` into a [`String`] if it contains valid Unicode data. /// /// On failure, ownership of the original `OsString` is returned. |