Merging upstream version 1.76.0+dfsg1.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-06-19 09:26:03 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-06-19 09:26:03 +0000
commit: 9918693037dce8aa4bb6f08741b6812923486c18 (patch)
tree: 21d2b40bec7e6a7ea664acee056eb3d08e15a1cf /library/core/src/str
parent: Releasing progress-linux version 1.75.0+dfsg1-5~progress7.99u1. (diff)
download: rustc-9918693037dce8aa4bb6f08741b6812923486c18.tar.xz
rustc-9918693037dce8aa4bb6f08741b6812923486c18.zip
4 files changed, 160 insertions, 35 deletions
diff --git a/library/core/src/str/iter.rs b/library/core/src/str/iter.rs
index c30f01b3c..dd2efb005 100644
--- a/library/core/src/str/iter.rs
+++ b/library/core/src/str/iter.rs
@@ -8,6 +8,7 @@ use crate::iter::{TrustedRandomAccess, TrustedRandomAccessNoCoerce};
 use crate::ops::Try;
 use crate::option;
 use crate::slice::{self, Split as SliceSplit};
+use core::num::NonZeroUsize;
 
 use super::from_utf8_unchecked;
 use super::pattern::Pattern;
@@ -50,6 +51,55 @@ impl<'a> Iterator for Chars<'a> {
     }
 
     #[inline]
+    fn advance_by(&mut self, mut remainder: usize) -> Result<(), NonZeroUsize> {
+        const CHUNK_SIZE: usize = 32;
+
+        if remainder >= CHUNK_SIZE {
+            let mut chunks = self.iter.as_slice().array_chunks::<CHUNK_SIZE>();
+            let mut bytes_skipped: usize = 0;
+
+            while remainder > CHUNK_SIZE
+                && let Some(chunk) = chunks.next()
+            {
+                bytes_skipped += CHUNK_SIZE;
+
+                let mut start_bytes = [false; CHUNK_SIZE];
+
+                for i in 0..CHUNK_SIZE {
+                    start_bytes[i] = !super::validations::utf8_is_cont_byte(chunk[i]);
+                }
+
+                remainder -= start_bytes.into_iter().map(|i| i as u8).sum::<u8>() as usize;
+            }
+
+            // SAFETY: The amount of bytes exists since we just iterated over them,
+            // so advance_by will succeed.
+            unsafe { self.iter.advance_by(bytes_skipped).unwrap_unchecked() };
+
+            // skip trailing continuation bytes
+            while self.iter.len() > 0 {
+                let b = self.iter.as_slice()[0];
+                if !super::validations::utf8_is_cont_byte(b) {
+                    break;
+                }
+                // SAFETY: We just peeked at the byte, therefore it exists
+                unsafe { self.iter.advance_by(1).unwrap_unchecked() };
+            }
+        }
+
+        while (remainder > 0) && (self.iter.len() > 0) {
+            remainder -= 1;
+            let b = self.iter.as_slice()[0];
+            let slurp = super::validations::utf8_char_width(b);
+            // SAFETY: utf8 validity requires that the string must contain
+            // the continuation bytes (if any)
+            unsafe { self.iter.advance_by(slurp).unwrap_unchecked() };
+        }
+
+        NonZeroUsize::new(remainder).map_or(Ok(()), Err)
+    }
+
+    #[inline]
     fn size_hint(&self) -> (usize, Option<usize>) {
         let len = self.iter.len();
         // `(len + 3)` can't overflow, because we know that the `slice::Iter`
diff --git a/library/core/src/str/mod.rs b/library/core/src/str/mod.rs
index 27178328b..a22c46edc 100644
--- a/library/core/src/str/mod.rs
+++ b/library/core/src/str/mod.rs
@@ -2423,6 +2423,85 @@ impl str {
         me.make_ascii_lowercase()
     }
 
+    /// Returns a string slice with leading ASCII whitespace removed.
+    ///
+    /// 'Whitespace' refers to the definition used by
+    /// [`u8::is_ascii_whitespace`].
+    ///
+    /// [`u8::is_ascii_whitespace`]: u8::is_ascii_whitespace
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(byte_slice_trim_ascii)]
+    ///
+    /// assert_eq!(" \t \u{3000}hello world\n".trim_ascii_start(), "\u{3000}hello world\n");
+    /// assert_eq!("  ".trim_ascii_start(), "");
+    /// assert_eq!("".trim_ascii_start(), "");
+    /// ```
+    #[unstable(feature = "byte_slice_trim_ascii", issue = "94035")]
+    #[must_use = "this returns the trimmed string as a new slice, \
+                  without modifying the original"]
+    #[inline]
+    pub const fn trim_ascii_start(&self) -> &str {
+        // SAFETY: Removing ASCII characters from a `&str` does not invalidate
+        // UTF-8.
+        unsafe { core::str::from_utf8_unchecked(self.as_bytes().trim_ascii_start()) }
+    }
+
+    /// Returns a string slice with trailing ASCII whitespace removed.
+    ///
+    /// 'Whitespace' refers to the definition used by
+    /// [`u8::is_ascii_whitespace`].
+    ///
+    /// [`u8::is_ascii_whitespace`]: u8::is_ascii_whitespace
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(byte_slice_trim_ascii)]
+    ///
+    /// assert_eq!("\r hello world\u{3000}\n ".trim_ascii_end(), "\r hello world\u{3000}");
+    /// assert_eq!("  ".trim_ascii_end(), "");
+    /// assert_eq!("".trim_ascii_end(), "");
+    /// ```
+    #[unstable(feature = "byte_slice_trim_ascii", issue = "94035")]
+    #[must_use = "this returns the trimmed string as a new slice, \
+                  without modifying the original"]
+    #[inline]
+    pub const fn trim_ascii_end(&self) -> &str {
+        // SAFETY: Removing ASCII characters from a `&str` does not invalidate
+        // UTF-8.
+        unsafe { core::str::from_utf8_unchecked(self.as_bytes().trim_ascii_end()) }
+    }
+
+    /// Returns a string slice with leading and trailing ASCII whitespace
+    /// removed.
+    ///
+    /// 'Whitespace' refers to the definition used by
+    /// [`u8::is_ascii_whitespace`].
+    ///
+    /// [`u8::is_ascii_whitespace`]: u8::is_ascii_whitespace
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(byte_slice_trim_ascii)]
+    ///
+    /// assert_eq!("\r hello world\n ".trim_ascii(), "hello world");
+    /// assert_eq!("  ".trim_ascii(), "");
+    /// assert_eq!("".trim_ascii(), "");
+    /// ```
+    #[unstable(feature = "byte_slice_trim_ascii", issue = "94035")]
+    #[must_use = "this returns the trimmed string as a new slice, \
+                  without modifying the original"]
+    #[inline]
+    pub const fn trim_ascii(&self) -> &str {
+        // SAFETY: Removing ASCII characters from a `&str` does not invalidate
+        // UTF-8.
+        unsafe { core::str::from_utf8_unchecked(self.as_bytes().trim_ascii()) }
+    }
+
     /// Return an iterator that escapes each char in `self` with [`char::escape_debug`].
     ///
     /// Note: only extended grapheme codepoints that begin the string will be
diff --git a/library/core/src/str/pattern.rs b/library/core/src/str/pattern.rs
index 701e61e66..caa54e00f 100644
--- a/library/core/src/str/pattern.rs
+++ b/library/core/src/str/pattern.rs
@@ -1740,9 +1740,9 @@ fn simd_contains(needle: &str, haystack: &str) -> Option<bool> {
     debug_assert!(needle.len() > 1);
 
     use crate::ops::BitAnd;
+    use crate::simd::cmp::SimdPartialEq;
     use crate::simd::mask8x16 as Mask;
     use crate::simd::u8x16 as Block;
-    use crate::simd::{SimdPartialEq, ToBitMask};
 
     let first_probe = needle[0];
     let last_byte_offset = needle.len() - 1;
@@ -1765,7 +1765,7 @@ fn simd_contains(needle: &str, haystack: &str) -> Option<bool> {
     };
 
     // do a naive search if the haystack is too small to fit
-    if haystack.len() < Block::LANES + last_byte_offset {
+    if haystack.len() < Block::LEN + last_byte_offset {
         return Some(haystack.windows(needle.len()).any(|c| c == needle));
     }
 
@@ -1812,7 +1812,7 @@ fn simd_contains(needle: &str, haystack: &str) -> Option<bool> {
         let eq_first: Mask = a.simd_eq(first_probe);
         let eq_last: Mask = b.simd_eq(second_probe);
         let both = eq_first.bitand(eq_last);
-        let mask = both.to_bitmask();
+        let mask = both.to_bitmask() as u16;
 
         return mask;
     };
@@ -1822,32 +1822,32 @@ fn simd_contains(needle: &str, haystack: &str) -> Option<bool> {
     // The loop condition must ensure that there's enough headroom to read LANE bytes,
     // and not only at the current index but also at the index shifted by block_offset
     const UNROLL: usize = 4;
-    while i + last_byte_offset + UNROLL * Block::LANES < haystack.len() && !result {
+    while i + last_byte_offset + UNROLL * Block::LEN < haystack.len() && !result {
         let mut masks = [0u16; UNROLL];
         for j in 0..UNROLL {
-            masks[j] = test_chunk(i + j * Block::LANES);
+            masks[j] = test_chunk(i + j * Block::LEN);
         }
         for j in 0..UNROLL {
             let mask = masks[j];
             if mask != 0 {
-                result |= check_mask(i + j * Block::LANES, mask, result);
+                result |= check_mask(i + j * Block::LEN, mask, result);
             }
         }
-        i += UNROLL * Block::LANES;
+        i += UNROLL * Block::LEN;
     }
-    while i + last_byte_offset + Block::LANES < haystack.len() && !result {
+    while i + last_byte_offset + Block::LEN < haystack.len() && !result {
         let mask = test_chunk(i);
         if mask != 0 {
             result |= check_mask(i, mask, result);
         }
-        i += Block::LANES;
+        i += Block::LEN;
     }
 
     // Process the tail that didn't fit into LANES-sized steps.
     // This simply repeats the same procedure but as right-aligned chunk instead
     // of a left-aligned one. The last byte must be exactly flush with the string end so
     // we don't miss a single byte or read out of bounds.
-    let i = haystack.len() - last_byte_offset - Block::LANES;
+    let i = haystack.len() - last_byte_offset - Block::LEN;
     let mask = test_chunk(i);
     if mask != 0 {
         result |= check_mask(i, mask, result);
diff --git a/library/core/src/str/traits.rs b/library/core/src/str/traits.rs
index 16fb1dad7..777ad0d81 100644
--- a/library/core/src/str/traits.rs
+++ b/library/core/src/str/traits.rs
@@ -1,8 +1,8 @@
 //! Trait implementations for `str`.
 
 use crate::cmp::Ordering;
-use crate::intrinsics::assert_unsafe_precondition;
 use crate::ops;
+use crate::panic::debug_assert_nounwind;
 use crate::ptr;
 use crate::slice::SliceIndex;
 
@@ -191,39 +191,35 @@ unsafe impl SliceIndex<str> for ops::Range<usize> {
     #[inline]
     unsafe fn get_unchecked(self, slice: *const str) -> *const Self::Output {
         let slice = slice as *const [u8];
+
+        debug_assert_nounwind!(
+            // We'd like to check that the bounds are on char boundaries,
+            // but there's not really a way to do so without reading
+            // behind the pointer, which has aliasing implications.
+            // It's also not possible to move this check up to
+            // `str::get_unchecked` without adding a special function
+            // to `SliceIndex` just for this.
+            self.end >= self.start && self.end <= slice.len(),
+            "str::get_unchecked requires that the range is within the string slice",
+        );
+
         // SAFETY: the caller guarantees that `self` is in bounds of `slice`
         // which satisfies all the conditions for `add`.
-        let ptr = unsafe {
-            let this = ops::Range { ..self };
-            assert_unsafe_precondition!(
-                "str::get_unchecked requires that the range is within the string slice",
-                (this: ops::Range<usize>, slice: *const [u8]) =>
-                // We'd like to check that the bounds are on char boundaries,
-                // but there's not really a way to do so without reading
-                // behind the pointer, which has aliasing implications.
-                // It's also not possible to move this check up to
-                // `str::get_unchecked` without adding a special function
-                // to `SliceIndex` just for this.
-                this.end >= this.start && this.end <= slice.len()
-            );
-            slice.as_ptr().add(self.start)
-        };
+        let ptr = unsafe { slice.as_ptr().add(self.start) };
         let len = self.end - self.start;
         ptr::slice_from_raw_parts(ptr, len) as *const str
     }
     #[inline]
     unsafe fn get_unchecked_mut(self, slice: *mut str) -> *mut Self::Output {
         let slice = slice as *mut [u8];
+
+        debug_assert_nounwind!(
+            self.end >= self.start && self.end <= slice.len(),
+            "str::get_unchecked_mut requires that the range is within the string slice",
+        );
+
         // SAFETY: see comments for `get_unchecked`.
-        let ptr = unsafe {
-            let this = ops::Range { ..self };
-            assert_unsafe_precondition!(
-                "str::get_unchecked_mut requires that the range is within the string slice",
-                (this: ops::Range<usize>, slice: *mut [u8]) =>
-                this.end >= this.start && this.end <= slice.len()
-            );
-            slice.as_mut_ptr().add(self.start)
-        };
+        let ptr = unsafe { slice.as_mut_ptr().add(self.start) };
         let len = self.end - self.start;
         ptr::slice_from_raw_parts_mut(ptr, len) as *mut str
     }
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-06-19 09:26:03 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-06-19 09:26:03 +0000
commit	9918693037dce8aa4bb6f08741b6812923486c18 (patch)
tree	21d2b40bec7e6a7ea664acee056eb3d08e15a1cf /library/core/src/str
parent	Releasing progress-linux version 1.75.0+dfsg1-5~progress7.99u1. (diff)
download	rustc-9918693037dce8aa4bb6f08741b6812923486c18.tar.xz rustc-9918693037dce8aa4bb6f08741b6812923486c18.zip