summaryrefslogtreecommitdiffstats
path: root/library/std/src/ffi
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-30 03:57:31 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-30 03:57:31 +0000
commitdc0db358abe19481e475e10c32149b53370f1a1c (patch)
treeab8ce99c4b255ce46f99ef402c27916055b899ee /library/std/src/ffi
parentReleasing progress-linux version 1.71.1+dfsg1-2~progress7.99u1. (diff)
downloadrustc-dc0db358abe19481e475e10c32149b53370f1a1c.tar.xz
rustc-dc0db358abe19481e475e10c32149b53370f1a1c.zip
Merging upstream version 1.72.1+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'library/std/src/ffi')
-rw-r--r--library/std/src/ffi/mod.rs8
-rw-r--r--library/std/src/ffi/os_str.rs102
2 files changed, 96 insertions, 14 deletions
diff --git a/library/std/src/ffi/mod.rs b/library/std/src/ffi/mod.rs
index d987bf69b..3ddb87487 100644
--- a/library/std/src/ffi/mod.rs
+++ b/library/std/src/ffi/mod.rs
@@ -127,6 +127,14 @@
//! trait, which provides a [`from_wide`] method to convert a native Windows
//! string (without the terminating nul character) to an [`OsString`].
//!
+//! ## On all platforms
+//!
+//! On all platforms, [`OsStr`] consists of a sequence of bytes that is encoded as a superset of
+//! UTF-8; see [`OsString`] for more details on its encoding on different platforms.
+//!
+//! For limited, inexpensive conversions from and to bytes, see [`OsStr::as_os_str_bytes`] and
+//! [`OsStr::from_os_str_bytes_unchecked`].
+//!
//! [Unicode scalar value]: https://www.unicode.org/glossary/#unicode_scalar_value
//! [Unicode code point]: https://www.unicode.org/glossary/#code_point
//! [`env::set_var()`]: crate::env::set_var "env::set_var"
diff --git a/library/std/src/ffi/os_str.rs b/library/std/src/ffi/os_str.rs
index 5c0541d3c..e7bad9d54 100644
--- a/library/std/src/ffi/os_str.rs
+++ b/library/std/src/ffi/os_str.rs
@@ -667,6 +667,51 @@ impl OsStr {
s.as_ref()
}
+ /// Converts a slice of bytes to an OS string slice without checking that the string contains
+ /// valid `OsStr`-encoded data.
+ ///
+ /// The byte encoding is an unspecified, platform-specific, self-synchronizing superset of UTF-8.
+ /// By being a self-synchronizing superset of UTF-8, this encoding is also a superset of 7-bit
+ /// ASCII.
+ ///
+ /// See the [module's toplevel documentation about conversions][conversions] for safe,
+ /// cross-platform [conversions] from/to native representations.
+ ///
+ /// # Safety
+ ///
+ /// As the encoding is unspecified, callers must pass in bytes that originated as a mixture of
+ /// validated UTF-8 and bytes from [`OsStr::as_os_str_bytes`] from within the same rust version
+ /// built for the same target platform. For example, reconstructing an `OsStr` from bytes sent
+ /// over the network or stored in a file will likely violate these safety rules.
+ ///
+ /// Due to the encoding being self-synchronizing, the bytes from [`OsStr::as_os_str_bytes`] can be
+ /// split either immediately before or immediately after any valid non-empty UTF-8 substring.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// #![feature(os_str_bytes)]
+ ///
+ /// use std::ffi::OsStr;
+ ///
+ /// let os_str = OsStr::new("Mary had a little lamb");
+ /// let bytes = os_str.as_os_str_bytes();
+ /// let words = bytes.split(|b| *b == b' ');
+ /// let words: Vec<&OsStr> = words.map(|word| {
+ /// // SAFETY:
+ /// // - Each `word` only contains content that originated from `OsStr::as_os_str_bytes`
+ /// // - Only split with ASCII whitespace which is a non-empty UTF-8 substring
+ /// unsafe { OsStr::from_os_str_bytes_unchecked(word) }
+ /// }).collect();
+ /// ```
+ ///
+ /// [conversions]: super#conversions
+ #[inline]
+ #[unstable(feature = "os_str_bytes", issue = "111544")]
+ pub unsafe fn from_os_str_bytes_unchecked(bytes: &[u8]) -> &Self {
+ Self::from_inner(Slice::from_os_str_bytes_unchecked(bytes))
+ }
+
#[inline]
fn from_inner(inner: &Slice) -> &OsStr {
// SAFETY: OsStr is just a wrapper of Slice,
@@ -700,7 +745,7 @@ impl OsStr {
without modifying the original"]
#[inline]
pub fn to_str(&self) -> Option<&str> {
- self.inner.to_str()
+ self.inner.to_str().ok()
}
/// Converts an `OsStr` to a <code>[Cow]<[str]></code>.
@@ -837,13 +882,24 @@ impl OsStr {
OsString { inner: Buf::from_box(boxed) }
}
- /// Gets the underlying byte representation.
+ /// Converts an OS string slice to a byte slice. To convert the byte slice back into an OS
+ /// string slice, use the [`OsStr::from_os_str_bytes_unchecked`] function.
+ ///
+ /// The byte encoding is an unspecified, platform-specific, self-synchronizing superset of UTF-8.
+ /// By being a self-synchronizing superset of UTF-8, this encoding is also a superset of 7-bit
+ /// ASCII.
+ ///
+ /// Note: As the encoding is unspecified, any sub-slice of bytes that is not valid UTF-8 should
+ /// be treated as opaque and only comparable within the same rust version built for the same
+ /// target platform. For example, sending the slice over the network or storing it in a file
+ /// will likely result in incompatible byte slices. See [`OsString`] for more encoding details
+ /// and [`std::ffi`] for platform-specific, specified conversions.
///
- /// Note: it is *crucial* that this API is not externally public, to avoid
- /// revealing the internal, platform-specific encodings.
+ /// [`std::ffi`]: crate::ffi
#[inline]
- pub(crate) fn bytes(&self) -> &[u8] {
- unsafe { &*(&self.inner as *const _ as *const [u8]) }
+ #[unstable(feature = "os_str_bytes", issue = "111544")]
+ pub fn as_os_str_bytes(&self) -> &[u8] {
+ self.inner.as_os_str_bytes()
}
/// Converts this string to its ASCII lower case equivalent in-place.
@@ -1109,6 +1165,24 @@ impl<'a> From<Cow<'a, OsStr>> for OsString {
}
}
+#[stable(feature = "str_tryfrom_osstr_impl", since = "1.72.0")]
+impl<'a> TryFrom<&'a OsStr> for &'a str {
+ type Error = crate::str::Utf8Error;
+
+ /// Tries to convert an `&OsStr` to a `&str`.
+ ///
+ /// ```
+ /// use std::ffi::OsStr;
+ ///
+ /// let os_str = OsStr::new("foo");
+ /// let as_str = <&str>::try_from(os_str).unwrap();
+ /// assert_eq!(as_str, "foo");
+ /// ```
+ fn try_from(value: &'a OsStr) -> Result<Self, Self::Error> {
+ value.inner.to_str()
+ }
+}
+
#[stable(feature = "box_default_extra", since = "1.17.0")]
impl Default for Box<OsStr> {
#[inline]
@@ -1131,7 +1205,7 @@ impl Default for &OsStr {
impl PartialEq for OsStr {
#[inline]
fn eq(&self, other: &OsStr) -> bool {
- self.bytes().eq(other.bytes())
+ self.as_os_str_bytes().eq(other.as_os_str_bytes())
}
}
@@ -1158,23 +1232,23 @@ impl Eq for OsStr {}
impl PartialOrd for OsStr {
#[inline]
fn partial_cmp(&self, other: &OsStr) -> Option<cmp::Ordering> {
- self.bytes().partial_cmp(other.bytes())
+ self.as_os_str_bytes().partial_cmp(other.as_os_str_bytes())
}
#[inline]
fn lt(&self, other: &OsStr) -> bool {
- self.bytes().lt(other.bytes())
+ self.as_os_str_bytes().lt(other.as_os_str_bytes())
}
#[inline]
fn le(&self, other: &OsStr) -> bool {
- self.bytes().le(other.bytes())
+ self.as_os_str_bytes().le(other.as_os_str_bytes())
}
#[inline]
fn gt(&self, other: &OsStr) -> bool {
- self.bytes().gt(other.bytes())
+ self.as_os_str_bytes().gt(other.as_os_str_bytes())
}
#[inline]
fn ge(&self, other: &OsStr) -> bool {
- self.bytes().ge(other.bytes())
+ self.as_os_str_bytes().ge(other.as_os_str_bytes())
}
}
@@ -1193,7 +1267,7 @@ impl PartialOrd<str> for OsStr {
impl Ord for OsStr {
#[inline]
fn cmp(&self, other: &OsStr) -> cmp::Ordering {
- self.bytes().cmp(other.bytes())
+ self.as_os_str_bytes().cmp(other.as_os_str_bytes())
}
}
@@ -1243,7 +1317,7 @@ impl_cmp!(Cow<'a, OsStr>, OsString);
impl Hash for OsStr {
#[inline]
fn hash<H: Hasher>(&self, state: &mut H) {
- self.bytes().hash(state)
+ self.as_os_str_bytes().hash(state)
}
}