diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-30 03:59:35 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-30 03:59:35 +0000 |
commit | d1b2d29528b7794b41e66fc2136e395a02f8529b (patch) | |
tree | a4a17504b260206dec3cf55b2dca82929a348ac2 /library/core/src/str | |
parent | Releasing progress-linux version 1.72.1+dfsg1-1~progress7.99u1. (diff) | |
download | rustc-d1b2d29528b7794b41e66fc2136e395a02f8529b.tar.xz rustc-d1b2d29528b7794b41e66fc2136e395a02f8529b.zip |
Merging upstream version 1.73.0+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'library/core/src/str')
-rw-r--r-- | library/core/src/str/iter.rs | 21 | ||||
-rw-r--r-- | library/core/src/str/mod.rs | 20 | ||||
-rw-r--r-- | library/core/src/str/pattern.rs | 4 | ||||
-rw-r--r-- | library/core/src/str/traits.rs | 52 |
4 files changed, 83 insertions, 14 deletions
diff --git a/library/core/src/str/iter.rs b/library/core/src/str/iter.rs index 772c36055..cd16810c4 100644 --- a/library/core/src/str/iter.rs +++ b/library/core/src/str/iter.rs @@ -1439,11 +1439,22 @@ impl<'a> Iterator for EncodeUtf16<'a> { #[inline] fn size_hint(&self) -> (usize, Option<usize>) { - let (low, high) = self.chars.size_hint(); - // every char gets either one u16 or two u16, - // so this iterator is between 1 or 2 times as - // long as the underlying iterator. - (low, high.and_then(|n| n.checked_mul(2))) + let len = self.chars.iter.len(); + // The highest bytes:code units ratio occurs for 3-byte sequences, + // since a 4-byte sequence results in 2 code units. The lower bound + // is therefore determined by assuming the remaining bytes contain as + // many 3-byte sequences as possible. The highest bytes:code units + // ratio is for 1-byte sequences, so use this for the upper bound. + // `(len + 2)` can't overflow, because we know that the `slice::Iter` + // belongs to a slice in memory which has a maximum length of + // `isize::MAX` (that's well below `usize::MAX`) + if self.extra == 0 { + ((len + 2) / 3, Some(len)) + } else { + // We're in the middle of a surrogate pair, so add the remaining + // surrogate to the bounds. + ((len + 2) / 3 + 1, Some(len + 1)) + } } } diff --git a/library/core/src/str/mod.rs b/library/core/src/str/mod.rs index 9a93bb729..e5f34952c 100644 --- a/library/core/src/str/mod.rs +++ b/library/core/src/str/mod.rs @@ -267,14 +267,13 @@ impl str { /// Finds the closest `x` not below `index` where `is_char_boundary(x)` is `true`. /// + /// If `index` is greater than the length of the string, this returns the length of the string. + /// /// This method is the natural complement to [`floor_char_boundary`]. See that method /// for more details. /// /// [`floor_char_boundary`]: str::floor_char_boundary /// - /// # Panics - /// - /// Panics if `index > self.len()`. /// /// # Examples /// @@ -292,7 +291,7 @@ impl str { #[inline] pub fn ceil_char_boundary(&self, index: usize) -> usize { if index > self.len() { - slice_error_fail(self, index, index) + self.len() } else { let upper_bound = Ord::min(index + 4, self.len()); self.as_bytes()[index..upper_bound] @@ -952,6 +951,10 @@ impl str { /// /// Line terminators are not included in the lines returned by the iterator. /// + /// Note that any carriage return (`\r`) not immediately followed by a + /// line feed (`\n`) does not split a line. These carriage returns are + /// thereby included in the produced lines. + /// /// The final line ending is optional. A string that ends with a final line /// ending will return the same lines as an otherwise identical string /// without a final line ending. @@ -961,18 +964,19 @@ impl str { /// Basic usage: /// /// ``` - /// let text = "foo\r\nbar\n\nbaz\n"; + /// let text = "foo\r\nbar\n\nbaz\r"; /// let mut lines = text.lines(); /// /// assert_eq!(Some("foo"), lines.next()); /// assert_eq!(Some("bar"), lines.next()); /// assert_eq!(Some(""), lines.next()); - /// assert_eq!(Some("baz"), lines.next()); + /// // Trailing carriage return is included in the last line + /// assert_eq!(Some("baz\r"), lines.next()); /// /// assert_eq!(None, lines.next()); /// ``` /// - /// The final line ending isn't required: + /// The final line does not require any ending: /// /// ``` /// let text = "foo\nbar\n\r\nbaz"; @@ -1666,7 +1670,7 @@ impl str { /// If the pattern allows a reverse search but its results might differ /// from a forward search, the [`rmatches`] method can be used. /// - /// [`rmatches`]: str::matches + /// [`rmatches`]: str::rmatches /// /// # Examples /// diff --git a/library/core/src/str/pattern.rs b/library/core/src/str/pattern.rs index 91ee2903a..d5d6d60ac 100644 --- a/library/core/src/str/pattern.rs +++ b/library/core/src/str/pattern.rs @@ -1750,7 +1750,9 @@ fn simd_contains(needle: &str, haystack: &str) -> Option<bool> { 1 } else { // try a few bytes in case first and last byte of the needle are the same - let Some(second_probe_offset) = (needle.len().saturating_sub(4)..needle.len()).rfind(|&idx| needle[idx] != first_probe) else { + let Some(second_probe_offset) = + (needle.len().saturating_sub(4)..needle.len()).rfind(|&idx| needle[idx] != first_probe) + else { // fall back to other search methods if we can't find any different bytes // since we could otherwise hit some degenerate cases return None; diff --git a/library/core/src/str/traits.rs b/library/core/src/str/traits.rs index 1d52335f2..2b37af66b 100644 --- a/library/core/src/str/traits.rs +++ b/library/core/src/str/traits.rs @@ -252,6 +252,58 @@ unsafe impl SliceIndex<str> for ops::Range<usize> { } } +/// Implements substring slicing for arbitrary bounds. +/// +/// Returns a slice of the given string bounded by the byte indices +/// provided by each bound. +/// +/// This operation is *O*(1). +/// +/// # Panics +/// +/// Panics if `begin` or `end` (if it exists and once adjusted for +/// inclusion/exclusion) does not point to the starting byte offset of +/// a character (as defined by `is_char_boundary`), if `begin > end`, or if +/// `end > len`. +#[stable(feature = "slice_index_str_with_ops_bound_pair", since = "1.73.0")] +unsafe impl SliceIndex<str> for (ops::Bound<usize>, ops::Bound<usize>) { + type Output = str; + + #[inline] + fn get(self, slice: &str) -> Option<&str> { + crate::slice::index::into_range(slice.len(), self)?.get(slice) + } + + #[inline] + fn get_mut(self, slice: &mut str) -> Option<&mut str> { + crate::slice::index::into_range(slice.len(), self)?.get_mut(slice) + } + + #[inline] + unsafe fn get_unchecked(self, slice: *const str) -> *const str { + let len = (slice as *const [u8]).len(); + // SAFETY: the caller has to uphold the safety contract for `get_unchecked`. + unsafe { crate::slice::index::into_range_unchecked(len, self).get_unchecked(slice) } + } + + #[inline] + unsafe fn get_unchecked_mut(self, slice: *mut str) -> *mut str { + let len = (slice as *mut [u8]).len(); + // SAFETY: the caller has to uphold the safety contract for `get_unchecked_mut`. + unsafe { crate::slice::index::into_range_unchecked(len, self).get_unchecked_mut(slice) } + } + + #[inline] + fn index(self, slice: &str) -> &str { + crate::slice::index::into_slice_range(slice.len(), self).index(slice) + } + + #[inline] + fn index_mut(self, slice: &mut str) -> &mut str { + crate::slice::index::into_slice_range(slice.len(), self).index_mut(slice) + } +} + /// Implements substring slicing with syntax `&self[.. end]` or `&mut /// self[.. end]`. /// |