summaryrefslogtreecommitdiffstats
path: root/library/core/src/str
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-06-19 09:26:03 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-06-19 09:26:03 +0000
commit9918693037dce8aa4bb6f08741b6812923486c18 (patch)
tree21d2b40bec7e6a7ea664acee056eb3d08e15a1cf /library/core/src/str
parentReleasing progress-linux version 1.75.0+dfsg1-5~progress7.99u1. (diff)
downloadrustc-9918693037dce8aa4bb6f08741b6812923486c18.tar.xz
rustc-9918693037dce8aa4bb6f08741b6812923486c18.zip
Merging upstream version 1.76.0+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'library/core/src/str')
-rw-r--r--library/core/src/str/iter.rs50
-rw-r--r--library/core/src/str/mod.rs79
-rw-r--r--library/core/src/str/pattern.rs20
-rw-r--r--library/core/src/str/traits.rs46
4 files changed, 160 insertions, 35 deletions
diff --git a/library/core/src/str/iter.rs b/library/core/src/str/iter.rs
index c30f01b3c..dd2efb005 100644
--- a/library/core/src/str/iter.rs
+++ b/library/core/src/str/iter.rs
@@ -8,6 +8,7 @@ use crate::iter::{TrustedRandomAccess, TrustedRandomAccessNoCoerce};
use crate::ops::Try;
use crate::option;
use crate::slice::{self, Split as SliceSplit};
+use core::num::NonZeroUsize;
use super::from_utf8_unchecked;
use super::pattern::Pattern;
@@ -50,6 +51,55 @@ impl<'a> Iterator for Chars<'a> {
}
#[inline]
+ fn advance_by(&mut self, mut remainder: usize) -> Result<(), NonZeroUsize> {
+ const CHUNK_SIZE: usize = 32;
+
+ if remainder >= CHUNK_SIZE {
+ let mut chunks = self.iter.as_slice().array_chunks::<CHUNK_SIZE>();
+ let mut bytes_skipped: usize = 0;
+
+ while remainder > CHUNK_SIZE
+ && let Some(chunk) = chunks.next()
+ {
+ bytes_skipped += CHUNK_SIZE;
+
+ let mut start_bytes = [false; CHUNK_SIZE];
+
+ for i in 0..CHUNK_SIZE {
+ start_bytes[i] = !super::validations::utf8_is_cont_byte(chunk[i]);
+ }
+
+ remainder -= start_bytes.into_iter().map(|i| i as u8).sum::<u8>() as usize;
+ }
+
+ // SAFETY: The amount of bytes exists since we just iterated over them,
+ // so advance_by will succeed.
+ unsafe { self.iter.advance_by(bytes_skipped).unwrap_unchecked() };
+
+ // skip trailing continuation bytes
+ while self.iter.len() > 0 {
+ let b = self.iter.as_slice()[0];
+ if !super::validations::utf8_is_cont_byte(b) {
+ break;
+ }
+ // SAFETY: We just peeked at the byte, therefore it exists
+ unsafe { self.iter.advance_by(1).unwrap_unchecked() };
+ }
+ }
+
+ while (remainder > 0) && (self.iter.len() > 0) {
+ remainder -= 1;
+ let b = self.iter.as_slice()[0];
+ let slurp = super::validations::utf8_char_width(b);
+ // SAFETY: utf8 validity requires that the string must contain
+ // the continuation bytes (if any)
+ unsafe { self.iter.advance_by(slurp).unwrap_unchecked() };
+ }
+
+ NonZeroUsize::new(remainder).map_or(Ok(()), Err)
+ }
+
+ #[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
let len = self.iter.len();
// `(len + 3)` can't overflow, because we know that the `slice::Iter`
diff --git a/library/core/src/str/mod.rs b/library/core/src/str/mod.rs
index 27178328b..a22c46edc 100644
--- a/library/core/src/str/mod.rs
+++ b/library/core/src/str/mod.rs
@@ -2423,6 +2423,85 @@ impl str {
me.make_ascii_lowercase()
}
+ /// Returns a string slice with leading ASCII whitespace removed.
+ ///
+ /// 'Whitespace' refers to the definition used by
+ /// [`u8::is_ascii_whitespace`].
+ ///
+ /// [`u8::is_ascii_whitespace`]: u8::is_ascii_whitespace
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// #![feature(byte_slice_trim_ascii)]
+ ///
+ /// assert_eq!(" \t \u{3000}hello world\n".trim_ascii_start(), "\u{3000}hello world\n");
+ /// assert_eq!(" ".trim_ascii_start(), "");
+ /// assert_eq!("".trim_ascii_start(), "");
+ /// ```
+ #[unstable(feature = "byte_slice_trim_ascii", issue = "94035")]
+ #[must_use = "this returns the trimmed string as a new slice, \
+ without modifying the original"]
+ #[inline]
+ pub const fn trim_ascii_start(&self) -> &str {
+ // SAFETY: Removing ASCII characters from a `&str` does not invalidate
+ // UTF-8.
+ unsafe { core::str::from_utf8_unchecked(self.as_bytes().trim_ascii_start()) }
+ }
+
+ /// Returns a string slice with trailing ASCII whitespace removed.
+ ///
+ /// 'Whitespace' refers to the definition used by
+ /// [`u8::is_ascii_whitespace`].
+ ///
+ /// [`u8::is_ascii_whitespace`]: u8::is_ascii_whitespace
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// #![feature(byte_slice_trim_ascii)]
+ ///
+ /// assert_eq!("\r hello world\u{3000}\n ".trim_ascii_end(), "\r hello world\u{3000}");
+ /// assert_eq!(" ".trim_ascii_end(), "");
+ /// assert_eq!("".trim_ascii_end(), "");
+ /// ```
+ #[unstable(feature = "byte_slice_trim_ascii", issue = "94035")]
+ #[must_use = "this returns the trimmed string as a new slice, \
+ without modifying the original"]
+ #[inline]
+ pub const fn trim_ascii_end(&self) -> &str {
+ // SAFETY: Removing ASCII characters from a `&str` does not invalidate
+ // UTF-8.
+ unsafe { core::str::from_utf8_unchecked(self.as_bytes().trim_ascii_end()) }
+ }
+
+ /// Returns a string slice with leading and trailing ASCII whitespace
+ /// removed.
+ ///
+ /// 'Whitespace' refers to the definition used by
+ /// [`u8::is_ascii_whitespace`].
+ ///
+ /// [`u8::is_ascii_whitespace`]: u8::is_ascii_whitespace
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// #![feature(byte_slice_trim_ascii)]
+ ///
+ /// assert_eq!("\r hello world\n ".trim_ascii(), "hello world");
+ /// assert_eq!(" ".trim_ascii(), "");
+ /// assert_eq!("".trim_ascii(), "");
+ /// ```
+ #[unstable(feature = "byte_slice_trim_ascii", issue = "94035")]
+ #[must_use = "this returns the trimmed string as a new slice, \
+ without modifying the original"]
+ #[inline]
+ pub const fn trim_ascii(&self) -> &str {
+ // SAFETY: Removing ASCII characters from a `&str` does not invalidate
+ // UTF-8.
+ unsafe { core::str::from_utf8_unchecked(self.as_bytes().trim_ascii()) }
+ }
+
/// Return an iterator that escapes each char in `self` with [`char::escape_debug`].
///
/// Note: only extended grapheme codepoints that begin the string will be
diff --git a/library/core/src/str/pattern.rs b/library/core/src/str/pattern.rs
index 701e61e66..caa54e00f 100644
--- a/library/core/src/str/pattern.rs
+++ b/library/core/src/str/pattern.rs
@@ -1740,9 +1740,9 @@ fn simd_contains(needle: &str, haystack: &str) -> Option<bool> {
debug_assert!(needle.len() > 1);
use crate::ops::BitAnd;
+ use crate::simd::cmp::SimdPartialEq;
use crate::simd::mask8x16 as Mask;
use crate::simd::u8x16 as Block;
- use crate::simd::{SimdPartialEq, ToBitMask};
let first_probe = needle[0];
let last_byte_offset = needle.len() - 1;
@@ -1765,7 +1765,7 @@ fn simd_contains(needle: &str, haystack: &str) -> Option<bool> {
};
// do a naive search if the haystack is too small to fit
- if haystack.len() < Block::LANES + last_byte_offset {
+ if haystack.len() < Block::LEN + last_byte_offset {
return Some(haystack.windows(needle.len()).any(|c| c == needle));
}
@@ -1812,7 +1812,7 @@ fn simd_contains(needle: &str, haystack: &str) -> Option<bool> {
let eq_first: Mask = a.simd_eq(first_probe);
let eq_last: Mask = b.simd_eq(second_probe);
let both = eq_first.bitand(eq_last);
- let mask = both.to_bitmask();
+ let mask = both.to_bitmask() as u16;
return mask;
};
@@ -1822,32 +1822,32 @@ fn simd_contains(needle: &str, haystack: &str) -> Option<bool> {
// The loop condition must ensure that there's enough headroom to read LANE bytes,
// and not only at the current index but also at the index shifted by block_offset
const UNROLL: usize = 4;
- while i + last_byte_offset + UNROLL * Block::LANES < haystack.len() && !result {
+ while i + last_byte_offset + UNROLL * Block::LEN < haystack.len() && !result {
let mut masks = [0u16; UNROLL];
for j in 0..UNROLL {
- masks[j] = test_chunk(i + j * Block::LANES);
+ masks[j] = test_chunk(i + j * Block::LEN);
}
for j in 0..UNROLL {
let mask = masks[j];
if mask != 0 {
- result |= check_mask(i + j * Block::LANES, mask, result);
+ result |= check_mask(i + j * Block::LEN, mask, result);
}
}
- i += UNROLL * Block::LANES;
+ i += UNROLL * Block::LEN;
}
- while i + last_byte_offset + Block::LANES < haystack.len() && !result {
+ while i + last_byte_offset + Block::LEN < haystack.len() && !result {
let mask = test_chunk(i);
if mask != 0 {
result |= check_mask(i, mask, result);
}
- i += Block::LANES;
+ i += Block::LEN;
}
// Process the tail that didn't fit into LANES-sized steps.
// This simply repeats the same procedure but as right-aligned chunk instead
// of a left-aligned one. The last byte must be exactly flush with the string end so
// we don't miss a single byte or read out of bounds.
- let i = haystack.len() - last_byte_offset - Block::LANES;
+ let i = haystack.len() - last_byte_offset - Block::LEN;
let mask = test_chunk(i);
if mask != 0 {
result |= check_mask(i, mask, result);
diff --git a/library/core/src/str/traits.rs b/library/core/src/str/traits.rs
index 16fb1dad7..777ad0d81 100644
--- a/library/core/src/str/traits.rs
+++ b/library/core/src/str/traits.rs
@@ -1,8 +1,8 @@
//! Trait implementations for `str`.
use crate::cmp::Ordering;
-use crate::intrinsics::assert_unsafe_precondition;
use crate::ops;
+use crate::panic::debug_assert_nounwind;
use crate::ptr;
use crate::slice::SliceIndex;
@@ -191,39 +191,35 @@ unsafe impl SliceIndex<str> for ops::Range<usize> {
#[inline]
unsafe fn get_unchecked(self, slice: *const str) -> *const Self::Output {
let slice = slice as *const [u8];
+
+ debug_assert_nounwind!(
+ // We'd like to check that the bounds are on char boundaries,
+ // but there's not really a way to do so without reading
+ // behind the pointer, which has aliasing implications.
+ // It's also not possible to move this check up to
+ // `str::get_unchecked` without adding a special function
+ // to `SliceIndex` just for this.
+ self.end >= self.start && self.end <= slice.len(),
+ "str::get_unchecked requires that the range is within the string slice",
+ );
+
// SAFETY: the caller guarantees that `self` is in bounds of `slice`
// which satisfies all the conditions for `add`.
- let ptr = unsafe {
- let this = ops::Range { ..self };
- assert_unsafe_precondition!(
- "str::get_unchecked requires that the range is within the string slice",
- (this: ops::Range<usize>, slice: *const [u8]) =>
- // We'd like to check that the bounds are on char boundaries,
- // but there's not really a way to do so without reading
- // behind the pointer, which has aliasing implications.
- // It's also not possible to move this check up to
- // `str::get_unchecked` without adding a special function
- // to `SliceIndex` just for this.
- this.end >= this.start && this.end <= slice.len()
- );
- slice.as_ptr().add(self.start)
- };
+ let ptr = unsafe { slice.as_ptr().add(self.start) };
let len = self.end - self.start;
ptr::slice_from_raw_parts(ptr, len) as *const str
}
#[inline]
unsafe fn get_unchecked_mut(self, slice: *mut str) -> *mut Self::Output {
let slice = slice as *mut [u8];
+
+ debug_assert_nounwind!(
+ self.end >= self.start && self.end <= slice.len(),
+ "str::get_unchecked_mut requires that the range is within the string slice",
+ );
+
// SAFETY: see comments for `get_unchecked`.
- let ptr = unsafe {
- let this = ops::Range { ..self };
- assert_unsafe_precondition!(
- "str::get_unchecked_mut requires that the range is within the string slice",
- (this: ops::Range<usize>, slice: *mut [u8]) =>
- this.end >= this.start && this.end <= slice.len()
- );
- slice.as_mut_ptr().add(self.start)
- };
+ let ptr = unsafe { slice.as_mut_ptr().add(self.start) };
let len = self.end - self.start;
ptr::slice_from_raw_parts_mut(ptr, len) as *mut str
}