summaryrefslogtreecommitdiffstats
path: root/library/std/src/ffi
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-06-19 09:26:03 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-06-19 09:26:03 +0000
commit9918693037dce8aa4bb6f08741b6812923486c18 (patch)
tree21d2b40bec7e6a7ea664acee056eb3d08e15a1cf /library/std/src/ffi
parentReleasing progress-linux version 1.75.0+dfsg1-5~progress7.99u1. (diff)
downloadrustc-9918693037dce8aa4bb6f08741b6812923486c18.tar.xz
rustc-9918693037dce8aa4bb6f08741b6812923486c18.zip
Merging upstream version 1.76.0+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'library/std/src/ffi')
-rw-r--r--library/std/src/ffi/os_str.rs82
-rw-r--r--library/std/src/ffi/os_str/tests.rs54
2 files changed, 130 insertions, 6 deletions
diff --git a/library/std/src/ffi/os_str.rs b/library/std/src/ffi/os_str.rs
index fa9d48771..819731821 100644
--- a/library/std/src/ffi/os_str.rs
+++ b/library/std/src/ffi/os_str.rs
@@ -6,9 +6,10 @@ use crate::cmp;
use crate::collections::TryReserveError;
use crate::fmt;
use crate::hash::{Hash, Hasher};
-use crate::ops;
+use crate::ops::{self, Range};
use crate::rc::Rc;
-use crate::str::FromStr;
+use crate::slice;
+use crate::str::{from_utf8 as str_from_utf8, FromStr};
use crate::sync::Arc;
use crate::sys::os_str::{Buf, Slice};
@@ -963,6 +964,83 @@ impl OsStr {
self.inner.as_encoded_bytes()
}
+ /// Takes a substring based on a range that corresponds to the return value of
+ /// [`OsStr::as_encoded_bytes`].
+ ///
+ /// The range's start and end must lie on valid `OsStr` boundaries.
+ /// A valid `OsStr` boundary is one of:
+ /// - The start of the string
+ /// - The end of the string
+ /// - Immediately before a valid non-empty UTF-8 substring
+ /// - Immediately after a valid non-empty UTF-8 substring
+ ///
+ /// # Panics
+ ///
+ /// Panics if `range` does not lie on valid `OsStr` boundaries or if it
+ /// exceeds the end of the string.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// #![feature(os_str_slice)]
+ ///
+ /// use std::ffi::OsStr;
+ ///
+ /// let os_str = OsStr::new("foo=bar");
+ /// let bytes = os_str.as_encoded_bytes();
+ /// if let Some(index) = bytes.iter().position(|b| *b == b'=') {
+ /// let key = os_str.slice_encoded_bytes(..index);
+ /// let value = os_str.slice_encoded_bytes(index + 1..);
+ /// assert_eq!(key, "foo");
+ /// assert_eq!(value, "bar");
+ /// }
+ /// ```
+ #[unstable(feature = "os_str_slice", issue = "118485")]
+ pub fn slice_encoded_bytes<R: ops::RangeBounds<usize>>(&self, range: R) -> &Self {
+ #[track_caller]
+ fn check_valid_boundary(bytes: &[u8], index: usize) {
+ if index == 0 || index == bytes.len() {
+ return;
+ }
+
+ // Fast path
+ if bytes[index - 1].is_ascii() || bytes[index].is_ascii() {
+ return;
+ }
+
+ let (before, after) = bytes.split_at(index);
+
+ // UTF-8 takes at most 4 bytes per codepoint, so we don't
+ // need to check more than that.
+ let after = after.get(..4).unwrap_or(after);
+ match str_from_utf8(after) {
+ Ok(_) => return,
+ Err(err) if err.valid_up_to() != 0 => return,
+ Err(_) => (),
+ }
+
+ for len in 2..=4.min(index) {
+ let before = &before[index - len..];
+ if str_from_utf8(before).is_ok() {
+ return;
+ }
+ }
+
+ panic!("byte index {index} is not an OsStr boundary");
+ }
+
+ let encoded_bytes = self.as_encoded_bytes();
+ let Range { start, end } = slice::range(range, ..encoded_bytes.len());
+ check_valid_boundary(encoded_bytes, start);
+ check_valid_boundary(encoded_bytes, end);
+
+ // SAFETY: `slice::range` ensures that `start` and `end` are valid
+ let slice = unsafe { encoded_bytes.get_unchecked(start..end) };
+
+ // SAFETY: `slice` comes from `self` and we validated the boundaries
+ unsafe { Self::from_encoded_bytes_unchecked(slice) }
+ }
+
/// Converts this string to its ASCII lower case equivalent in-place.
///
/// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
diff --git a/library/std/src/ffi/os_str/tests.rs b/library/std/src/ffi/os_str/tests.rs
index d7926749a..60cde376d 100644
--- a/library/std/src/ffi/os_str/tests.rs
+++ b/library/std/src/ffi/os_str/tests.rs
@@ -1,8 +1,4 @@
use super::*;
-use crate::sys_common::{AsInner, IntoInner};
-
-use crate::rc::Rc;
-use crate::sync::Arc;
#[test]
fn test_os_string_with_capacity() {
@@ -177,3 +173,53 @@ fn into_rc() {
assert_eq!(&*rc2, os_str);
assert_eq!(&*arc2, os_str);
}
+
+#[test]
+fn slice_encoded_bytes() {
+ let os_str = OsStr::new("123ΞΈαƒ’πŸ¦€");
+ // ASCII
+ let digits = os_str.slice_encoded_bytes(..3);
+ assert_eq!(digits, "123");
+ let three = os_str.slice_encoded_bytes(2..3);
+ assert_eq!(three, "3");
+ // 2-byte UTF-8
+ let theta = os_str.slice_encoded_bytes(3..5);
+ assert_eq!(theta, "ΞΈ");
+ // 3-byte UTF-8
+ let gani = os_str.slice_encoded_bytes(5..8);
+ assert_eq!(gani, "αƒ’");
+ // 4-byte UTF-8
+ let crab = os_str.slice_encoded_bytes(8..);
+ assert_eq!(crab, "πŸ¦€");
+}
+
+#[test]
+#[should_panic(expected = "byte index 2 is not an OsStr boundary")]
+fn slice_mid_char() {
+ let crab = OsStr::new("πŸ¦€");
+ let _ = crab.slice_encoded_bytes(..2);
+}
+
+#[cfg(windows)]
+#[test]
+#[should_panic(expected = "byte index 3 is not an OsStr boundary")]
+fn slice_between_surrogates() {
+ use crate::os::windows::ffi::OsStringExt;
+
+ let os_string = OsString::from_wide(&[0xD800, 0xD800]);
+ assert_eq!(os_string.as_encoded_bytes(), &[0xED, 0xA0, 0x80, 0xED, 0xA0, 0x80]);
+ let _ = os_string.slice_encoded_bytes(..3);
+}
+
+#[cfg(windows)]
+#[test]
+fn slice_surrogate_edge() {
+ use crate::os::windows::ffi::OsStringExt;
+
+ let os_string = OsString::from_wide(&[0xD800]);
+ let mut with_crab = os_string.clone();
+ with_crab.push("πŸ¦€");
+
+ assert_eq!(with_crab.slice_encoded_bytes(..3), os_string);
+ assert_eq!(with_crab.slice_encoded_bytes(3..), "πŸ¦€");
+}