Merging upstream version 1.73.0+dfsg1.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-05-30 03:59:35 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-05-30 03:59:35 +0000
commit: d1b2d29528b7794b41e66fc2136e395a02f8529b (patch)
tree: a4a17504b260206dec3cf55b2dca82929a348ac2 /library/core/src/str
parent: Releasing progress-linux version 1.72.1+dfsg1-1~progress7.99u1. (diff)
download: rustc-d1b2d29528b7794b41e66fc2136e395a02f8529b.tar.xz
rustc-d1b2d29528b7794b41e66fc2136e395a02f8529b.zip
4 files changed, 83 insertions, 14 deletions
diff --git a/library/core/src/str/iter.rs b/library/core/src/str/iter.rs
index 772c36055..cd16810c4 100644
--- a/library/core/src/str/iter.rs
+++ b/library/core/src/str/iter.rs
@@ -1439,11 +1439,22 @@ impl<'a> Iterator for EncodeUtf16<'a> {
 
     #[inline]
     fn size_hint(&self) -> (usize, Option<usize>) {
-        let (low, high) = self.chars.size_hint();
-        // every char gets either one u16 or two u16,
-        // so this iterator is between 1 or 2 times as
-        // long as the underlying iterator.
-        (low, high.and_then(|n| n.checked_mul(2)))
+        let len = self.chars.iter.len();
+        // The highest bytes:code units ratio occurs for 3-byte sequences,
+        // since a 4-byte sequence results in 2 code units. The lower bound
+        // is therefore determined by assuming the remaining bytes contain as
+        // many 3-byte sequences as possible. The highest bytes:code units
+        // ratio is for 1-byte sequences, so use this for the upper bound.
+        // `(len + 2)` can't overflow, because we know that the `slice::Iter`
+        // belongs to a slice in memory which has a maximum length of
+        // `isize::MAX` (that's well below `usize::MAX`)
+        if self.extra == 0 {
+            ((len + 2) / 3, Some(len))
+        } else {
+            // We're in the middle of a surrogate pair, so add the remaining
+            // surrogate to the bounds.
+            ((len + 2) / 3 + 1, Some(len + 1))
+        }
     }
 }
 
diff --git a/library/core/src/str/mod.rs b/library/core/src/str/mod.rs
index 9a93bb729..e5f34952c 100644
--- a/library/core/src/str/mod.rs
+++ b/library/core/src/str/mod.rs
@@ -267,14 +267,13 @@ impl str {
 
     /// Finds the closest `x` not below `index` where `is_char_boundary(x)` is `true`.
     ///
+    /// If `index` is greater than the length of the string, this returns the length of the string.
+    ///
     /// This method is the natural complement to [`floor_char_boundary`]. See that method
     /// for more details.
     ///
     /// [`floor_char_boundary`]: str::floor_char_boundary
     ///
-    /// # Panics
-    ///
-    /// Panics if `index > self.len()`.
     ///
     /// # Examples
     ///
@@ -292,7 +291,7 @@ impl str {
     #[inline]
     pub fn ceil_char_boundary(&self, index: usize) -> usize {
         if index > self.len() {
-            slice_error_fail(self, index, index)
+            self.len()
         } else {
             let upper_bound = Ord::min(index + 4, self.len());
             self.as_bytes()[index..upper_bound]
@@ -952,6 +951,10 @@ impl str {
     ///
     /// Line terminators are not included in the lines returned by the iterator.
     ///
+    /// Note that any carriage return (`\r`) not immediately followed by a
+    /// line feed (`\n`) does not split a line. These carriage returns are
+    /// thereby included in the produced lines.
+    ///
     /// The final line ending is optional. A string that ends with a final line
     /// ending will return the same lines as an otherwise identical string
     /// without a final line ending.
@@ -961,18 +964,19 @@ impl str {
     /// Basic usage:
     ///
     /// ```
-    /// let text = "foo\r\nbar\n\nbaz\n";
+    /// let text = "foo\r\nbar\n\nbaz\r";
     /// let mut lines = text.lines();
     ///
     /// assert_eq!(Some("foo"), lines.next());
     /// assert_eq!(Some("bar"), lines.next());
     /// assert_eq!(Some(""), lines.next());
-    /// assert_eq!(Some("baz"), lines.next());
+    /// // Trailing carriage return is included in the last line
+    /// assert_eq!(Some("baz\r"), lines.next());
     ///
     /// assert_eq!(None, lines.next());
     /// ```
     ///
-    /// The final line ending isn't required:
+    /// The final line does not require any ending:
     ///
     /// ```
     /// let text = "foo\nbar\n\r\nbaz";
@@ -1666,7 +1670,7 @@ impl str {
     /// If the pattern allows a reverse search but its results might differ
     /// from a forward search, the [`rmatches`] method can be used.
     ///
-    /// [`rmatches`]: str::matches
+    /// [`rmatches`]: str::rmatches
     ///
     /// # Examples
     ///
diff --git a/library/core/src/str/pattern.rs b/library/core/src/str/pattern.rs
index 91ee2903a..d5d6d60ac 100644
--- a/library/core/src/str/pattern.rs
+++ b/library/core/src/str/pattern.rs
@@ -1750,7 +1750,9 @@ fn simd_contains(needle: &str, haystack: &str) -> Option<bool> {
         1
     } else {
         // try a few bytes in case first and last byte of the needle are the same
-        let Some(second_probe_offset) = (needle.len().saturating_sub(4)..needle.len()).rfind(|&idx| needle[idx] != first_probe) else {
+        let Some(second_probe_offset) =
+            (needle.len().saturating_sub(4)..needle.len()).rfind(|&idx| needle[idx] != first_probe)
+        else {
             // fall back to other search methods if we can't find any different bytes
             // since we could otherwise hit some degenerate cases
             return None;
diff --git a/library/core/src/str/traits.rs b/library/core/src/str/traits.rs
index 1d52335f2..2b37af66b 100644
--- a/library/core/src/str/traits.rs
+++ b/library/core/src/str/traits.rs
@@ -252,6 +252,58 @@ unsafe impl SliceIndex<str> for ops::Range<usize> {
     }
 }
 
+/// Implements substring slicing for arbitrary bounds.
+///
+/// Returns a slice of the given string bounded by the byte indices
+/// provided by each bound.
+///
+/// This operation is *O*(1).
+///
+/// # Panics
+///
+/// Panics if `begin` or `end` (if it exists and once adjusted for
+/// inclusion/exclusion) does not point to the starting byte offset of
+/// a character (as defined by `is_char_boundary`), if `begin > end`, or if
+/// `end > len`.
+#[stable(feature = "slice_index_str_with_ops_bound_pair", since = "1.73.0")]
+unsafe impl SliceIndex<str> for (ops::Bound<usize>, ops::Bound<usize>) {
+    type Output = str;
+
+    #[inline]
+    fn get(self, slice: &str) -> Option<&str> {
+        crate::slice::index::into_range(slice.len(), self)?.get(slice)
+    }
+
+    #[inline]
+    fn get_mut(self, slice: &mut str) -> Option<&mut str> {
+        crate::slice::index::into_range(slice.len(), self)?.get_mut(slice)
+    }
+
+    #[inline]
+    unsafe fn get_unchecked(self, slice: *const str) -> *const str {
+        let len = (slice as *const [u8]).len();
+        // SAFETY: the caller has to uphold the safety contract for `get_unchecked`.
+        unsafe { crate::slice::index::into_range_unchecked(len, self).get_unchecked(slice) }
+    }
+
+    #[inline]
+    unsafe fn get_unchecked_mut(self, slice: *mut str) -> *mut str {
+        let len = (slice as *mut [u8]).len();
+        // SAFETY: the caller has to uphold the safety contract for `get_unchecked_mut`.
+        unsafe { crate::slice::index::into_range_unchecked(len, self).get_unchecked_mut(slice) }
+    }
+
+    #[inline]
+    fn index(self, slice: &str) -> &str {
+        crate::slice::index::into_slice_range(slice.len(), self).index(slice)
+    }
+
+    #[inline]
+    fn index_mut(self, slice: &mut str) -> &mut str {
+        crate::slice::index::into_slice_range(slice.len(), self).index_mut(slice)
+    }
+}
+
 /// Implements substring slicing with syntax `&self[.. end]` or `&mut
 /// self[.. end]`.
 ///
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-05-30 03:59:35 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-05-30 03:59:35 +0000
commit	d1b2d29528b7794b41e66fc2136e395a02f8529b (patch)
tree	a4a17504b260206dec3cf55b2dca82929a348ac2 /library/core/src/str
parent	Releasing progress-linux version 1.72.1+dfsg1-1~progress7.99u1. (diff)
download	rustc-d1b2d29528b7794b41e66fc2136e395a02f8529b.tar.xz rustc-d1b2d29528b7794b41e66fc2136e395a02f8529b.zip