summaryrefslogtreecommitdiffstats
path: root/library/core/src/str
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-30 03:59:35 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-30 03:59:35 +0000
commitd1b2d29528b7794b41e66fc2136e395a02f8529b (patch)
treea4a17504b260206dec3cf55b2dca82929a348ac2 /library/core/src/str
parentReleasing progress-linux version 1.72.1+dfsg1-1~progress7.99u1. (diff)
downloadrustc-d1b2d29528b7794b41e66fc2136e395a02f8529b.tar.xz
rustc-d1b2d29528b7794b41e66fc2136e395a02f8529b.zip
Merging upstream version 1.73.0+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'library/core/src/str')
-rw-r--r--library/core/src/str/iter.rs21
-rw-r--r--library/core/src/str/mod.rs20
-rw-r--r--library/core/src/str/pattern.rs4
-rw-r--r--library/core/src/str/traits.rs52
4 files changed, 83 insertions, 14 deletions
diff --git a/library/core/src/str/iter.rs b/library/core/src/str/iter.rs
index 772c36055..cd16810c4 100644
--- a/library/core/src/str/iter.rs
+++ b/library/core/src/str/iter.rs
@@ -1439,11 +1439,22 @@ impl<'a> Iterator for EncodeUtf16<'a> {
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
- let (low, high) = self.chars.size_hint();
- // every char gets either one u16 or two u16,
- // so this iterator is between 1 or 2 times as
- // long as the underlying iterator.
- (low, high.and_then(|n| n.checked_mul(2)))
+ let len = self.chars.iter.len();
+ // The highest bytes:code units ratio occurs for 3-byte sequences,
+ // since a 4-byte sequence results in 2 code units. The lower bound
+ // is therefore determined by assuming the remaining bytes contain as
+ // many 3-byte sequences as possible. The highest bytes:code units
+ // ratio is for 1-byte sequences, so use this for the upper bound.
+ // `(len + 2)` can't overflow, because we know that the `slice::Iter`
+ // belongs to a slice in memory which has a maximum length of
+ // `isize::MAX` (that's well below `usize::MAX`)
+ if self.extra == 0 {
+ ((len + 2) / 3, Some(len))
+ } else {
+ // We're in the middle of a surrogate pair, so add the remaining
+ // surrogate to the bounds.
+ ((len + 2) / 3 + 1, Some(len + 1))
+ }
}
}
diff --git a/library/core/src/str/mod.rs b/library/core/src/str/mod.rs
index 9a93bb729..e5f34952c 100644
--- a/library/core/src/str/mod.rs
+++ b/library/core/src/str/mod.rs
@@ -267,14 +267,13 @@ impl str {
/// Finds the closest `x` not below `index` where `is_char_boundary(x)` is `true`.
///
+ /// If `index` is greater than the length of the string, this returns the length of the string.
+ ///
/// This method is the natural complement to [`floor_char_boundary`]. See that method
/// for more details.
///
/// [`floor_char_boundary`]: str::floor_char_boundary
///
- /// # Panics
- ///
- /// Panics if `index > self.len()`.
///
/// # Examples
///
@@ -292,7 +291,7 @@ impl str {
#[inline]
pub fn ceil_char_boundary(&self, index: usize) -> usize {
if index > self.len() {
- slice_error_fail(self, index, index)
+ self.len()
} else {
let upper_bound = Ord::min(index + 4, self.len());
self.as_bytes()[index..upper_bound]
@@ -952,6 +951,10 @@ impl str {
///
/// Line terminators are not included in the lines returned by the iterator.
///
+ /// Note that any carriage return (`\r`) not immediately followed by a
+ /// line feed (`\n`) does not split a line. These carriage returns are
+ /// thereby included in the produced lines.
+ ///
/// The final line ending is optional. A string that ends with a final line
/// ending will return the same lines as an otherwise identical string
/// without a final line ending.
@@ -961,18 +964,19 @@ impl str {
/// Basic usage:
///
/// ```
- /// let text = "foo\r\nbar\n\nbaz\n";
+ /// let text = "foo\r\nbar\n\nbaz\r";
/// let mut lines = text.lines();
///
/// assert_eq!(Some("foo"), lines.next());
/// assert_eq!(Some("bar"), lines.next());
/// assert_eq!(Some(""), lines.next());
- /// assert_eq!(Some("baz"), lines.next());
+ /// // Trailing carriage return is included in the last line
+ /// assert_eq!(Some("baz\r"), lines.next());
///
/// assert_eq!(None, lines.next());
/// ```
///
- /// The final line ending isn't required:
+ /// The final line does not require any ending:
///
/// ```
/// let text = "foo\nbar\n\r\nbaz";
@@ -1666,7 +1670,7 @@ impl str {
/// If the pattern allows a reverse search but its results might differ
/// from a forward search, the [`rmatches`] method can be used.
///
- /// [`rmatches`]: str::matches
+ /// [`rmatches`]: str::rmatches
///
/// # Examples
///
diff --git a/library/core/src/str/pattern.rs b/library/core/src/str/pattern.rs
index 91ee2903a..d5d6d60ac 100644
--- a/library/core/src/str/pattern.rs
+++ b/library/core/src/str/pattern.rs
@@ -1750,7 +1750,9 @@ fn simd_contains(needle: &str, haystack: &str) -> Option<bool> {
1
} else {
// try a few bytes in case first and last byte of the needle are the same
- let Some(second_probe_offset) = (needle.len().saturating_sub(4)..needle.len()).rfind(|&idx| needle[idx] != first_probe) else {
+ let Some(second_probe_offset) =
+ (needle.len().saturating_sub(4)..needle.len()).rfind(|&idx| needle[idx] != first_probe)
+ else {
// fall back to other search methods if we can't find any different bytes
// since we could otherwise hit some degenerate cases
return None;
diff --git a/library/core/src/str/traits.rs b/library/core/src/str/traits.rs
index 1d52335f2..2b37af66b 100644
--- a/library/core/src/str/traits.rs
+++ b/library/core/src/str/traits.rs
@@ -252,6 +252,58 @@ unsafe impl SliceIndex<str> for ops::Range<usize> {
}
}
+/// Implements substring slicing for arbitrary bounds.
+///
+/// Returns a slice of the given string bounded by the byte indices
+/// provided by each bound.
+///
+/// This operation is *O*(1).
+///
+/// # Panics
+///
+/// Panics if `begin` or `end` (if it exists and once adjusted for
+/// inclusion/exclusion) does not point to the starting byte offset of
+/// a character (as defined by `is_char_boundary`), if `begin > end`, or if
+/// `end > len`.
+#[stable(feature = "slice_index_str_with_ops_bound_pair", since = "1.73.0")]
+unsafe impl SliceIndex<str> for (ops::Bound<usize>, ops::Bound<usize>) {
+ type Output = str;
+
+ #[inline]
+ fn get(self, slice: &str) -> Option<&str> {
+ crate::slice::index::into_range(slice.len(), self)?.get(slice)
+ }
+
+ #[inline]
+ fn get_mut(self, slice: &mut str) -> Option<&mut str> {
+ crate::slice::index::into_range(slice.len(), self)?.get_mut(slice)
+ }
+
+ #[inline]
+ unsafe fn get_unchecked(self, slice: *const str) -> *const str {
+ let len = (slice as *const [u8]).len();
+ // SAFETY: the caller has to uphold the safety contract for `get_unchecked`.
+ unsafe { crate::slice::index::into_range_unchecked(len, self).get_unchecked(slice) }
+ }
+
+ #[inline]
+ unsafe fn get_unchecked_mut(self, slice: *mut str) -> *mut str {
+ let len = (slice as *mut [u8]).len();
+ // SAFETY: the caller has to uphold the safety contract for `get_unchecked_mut`.
+ unsafe { crate::slice::index::into_range_unchecked(len, self).get_unchecked_mut(slice) }
+ }
+
+ #[inline]
+ fn index(self, slice: &str) -> &str {
+ crate::slice::index::into_slice_range(slice.len(), self).index(slice)
+ }
+
+ #[inline]
+ fn index_mut(self, slice: &mut str) -> &mut str {
+ crate::slice::index::into_slice_range(slice.len(), self).index_mut(slice)
+ }
+}
+
/// Implements substring slicing with syntax `&self[.. end]` or `&mut
/// self[.. end]`.
///