From 36d22d82aa202bb199967e9512281e9a53db42c9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 21:33:14 +0200 Subject: Adding upstream version 115.7.0esr. Signed-off-by: Daniel Baumann --- third_party/rust/rayon/src/str.rs | 848 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 848 insertions(+) create mode 100644 third_party/rust/rayon/src/str.rs (limited to 'third_party/rust/rayon/src/str.rs') diff --git a/third_party/rust/rayon/src/str.rs b/third_party/rust/rayon/src/str.rs new file mode 100644 index 0000000000..c85754e3d8 --- /dev/null +++ b/third_party/rust/rayon/src/str.rs @@ -0,0 +1,848 @@ +//! Parallel iterator types for [strings][std::str] +//! +//! You will rarely need to interact with this module directly unless you need +//! to name one of the iterator types. +//! +//! Note: [`ParallelString::par_split()`] and [`par_split_terminator()`] +//! reference a `Pattern` trait which is not visible outside this crate. +//! This trait is intentionally kept private, for use only by Rayon itself. +//! It is implemented for `char`, `&[char]`, and any function or closure +//! `F: Fn(char) -> bool + Sync + Send`. +//! +//! [`ParallelString::par_split()`]: trait.ParallelString.html#method.par_split +//! [`par_split_terminator()`]: trait.ParallelString.html#method.par_split_terminator +//! +//! [std::str]: https://doc.rust-lang.org/stable/std/str/ + +use crate::iter::plumbing::*; +use crate::iter::*; +use crate::split_producer::*; + +/// Test if a byte is the start of a UTF-8 character. +/// (extracted from `str::is_char_boundary`) +#[inline] +fn is_char_boundary(b: u8) -> bool { + // This is bit magic equivalent to: b < 128 || b >= 192 + (b as i8) >= -0x40 +} + +/// Find the index of a character boundary near the midpoint. +#[inline] +fn find_char_midpoint(chars: &str) -> usize { + let mid = chars.len() / 2; + + // We want to split near the midpoint, but we need to find an actual + // character boundary. So we look at the raw bytes, first scanning + // forward from the midpoint for a boundary, then trying backward. + let (left, right) = chars.as_bytes().split_at(mid); + match right.iter().copied().position(is_char_boundary) { + Some(i) => mid + i, + None => left + .iter() + .copied() + .rposition(is_char_boundary) + .unwrap_or(0), + } +} + +/// Try to split a string near the midpoint. +#[inline] +fn split(chars: &str) -> Option<(&str, &str)> { + let index = find_char_midpoint(chars); + if index > 0 { + Some(chars.split_at(index)) + } else { + None + } +} + +/// Parallel extensions for strings. +pub trait ParallelString { + /// Returns a plain string slice, which is used to implement the rest of + /// the parallel methods. + fn as_parallel_string(&self) -> &str; + + /// Returns a parallel iterator over the characters of a string. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// let max = "hello".par_chars().max_by_key(|c| *c as i32); + /// assert_eq!(Some('o'), max); + /// ``` + fn par_chars(&self) -> Chars<'_> { + Chars { + chars: self.as_parallel_string(), + } + } + + /// Returns a parallel iterator over the characters of a string, with their positions. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// let min = "hello".par_char_indices().min_by_key(|&(_i, c)| c as i32); + /// assert_eq!(Some((1, 'e')), min); + /// ``` + fn par_char_indices(&self) -> CharIndices<'_> { + CharIndices { + chars: self.as_parallel_string(), + } + } + + /// Returns a parallel iterator over the bytes of a string. + /// + /// Note that multi-byte sequences (for code points greater than `U+007F`) + /// are produced as separate items, but will not be split across threads. + /// If you would prefer an indexed iterator without that guarantee, consider + /// `string.as_bytes().par_iter().copied()` instead. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// let max = "hello".par_bytes().max(); + /// assert_eq!(Some(b'o'), max); + /// ``` + fn par_bytes(&self) -> Bytes<'_> { + Bytes { + chars: self.as_parallel_string(), + } + } + + /// Returns a parallel iterator over a string encoded as UTF-16. + /// + /// Note that surrogate pairs (for code points greater than `U+FFFF`) are + /// produced as separate items, but will not be split across threads. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let max = "hello".par_encode_utf16().max(); + /// assert_eq!(Some(b'o' as u16), max); + /// + /// let text = "Zażółć gęślą jaźń"; + /// let utf8_len = text.len(); + /// let utf16_len = text.par_encode_utf16().count(); + /// assert!(utf16_len <= utf8_len); + /// ``` + fn par_encode_utf16(&self) -> EncodeUtf16<'_> { + EncodeUtf16 { + chars: self.as_parallel_string(), + } + } + + /// Returns a parallel iterator over substrings separated by a + /// given character or predicate, similar to `str::split`. + /// + /// Note: the `Pattern` trait is private, for use only by Rayon itself. + /// It is implemented for `char`, `&[char]`, and any function or closure + /// `F: Fn(char) -> bool + Sync + Send`. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// let total = "1, 2, buckle, 3, 4, door" + /// .par_split(',') + /// .filter_map(|s| s.trim().parse::().ok()) + /// .sum(); + /// assert_eq!(10, total); + /// ``` + fn par_split(&self, separator: P) -> Split<'_, P> { + Split::new(self.as_parallel_string(), separator) + } + + /// Returns a parallel iterator over substrings terminated by a + /// given character or predicate, similar to `str::split_terminator`. + /// It's equivalent to `par_split`, except it doesn't produce an empty + /// substring after a trailing terminator. + /// + /// Note: the `Pattern` trait is private, for use only by Rayon itself. + /// It is implemented for `char`, `&[char]`, and any function or closure + /// `F: Fn(char) -> bool + Sync + Send`. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// let parts: Vec<_> = "((1 + 3) * 2)" + /// .par_split_terminator(|c| c == '(' || c == ')') + /// .collect(); + /// assert_eq!(vec!["", "", "1 + 3", " * 2"], parts); + /// ``` + fn par_split_terminator(&self, terminator: P) -> SplitTerminator<'_, P> { + SplitTerminator::new(self.as_parallel_string(), terminator) + } + + /// Returns a parallel iterator over the lines of a string, ending with an + /// optional carriage return and with a newline (`\r\n` or just `\n`). + /// The final line ending is optional, and line endings are not included in + /// the output strings. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// let lengths: Vec<_> = "hello world\nfizbuzz" + /// .par_lines() + /// .map(|l| l.len()) + /// .collect(); + /// assert_eq!(vec![11, 7], lengths); + /// ``` + fn par_lines(&self) -> Lines<'_> { + Lines(self.as_parallel_string()) + } + + /// Returns a parallel iterator over the sub-slices of a string that are + /// separated by any amount of whitespace. + /// + /// As with `str::split_whitespace`, 'whitespace' is defined according to + /// the terms of the Unicode Derived Core Property `White_Space`. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// let longest = "which is the longest word?" + /// .par_split_whitespace() + /// .max_by_key(|word| word.len()); + /// assert_eq!(Some("longest"), longest); + /// ``` + fn par_split_whitespace(&self) -> SplitWhitespace<'_> { + SplitWhitespace(self.as_parallel_string()) + } + + /// Returns a parallel iterator over substrings that match a + /// given character or predicate, similar to `str::matches`. + /// + /// Note: the `Pattern` trait is private, for use only by Rayon itself. + /// It is implemented for `char`, `&[char]`, and any function or closure + /// `F: Fn(char) -> bool + Sync + Send`. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// let total = "1, 2, buckle, 3, 4, door" + /// .par_matches(char::is_numeric) + /// .map(|s| s.parse::().expect("digit")) + /// .sum(); + /// assert_eq!(10, total); + /// ``` + fn par_matches(&self, pattern: P) -> Matches<'_, P> { + Matches { + chars: self.as_parallel_string(), + pattern, + } + } + + /// Returns a parallel iterator over substrings that match a given character + /// or predicate, with their positions, similar to `str::match_indices`. + /// + /// Note: the `Pattern` trait is private, for use only by Rayon itself. + /// It is implemented for `char`, `&[char]`, and any function or closure + /// `F: Fn(char) -> bool + Sync + Send`. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// let digits: Vec<_> = "1, 2, buckle, 3, 4, door" + /// .par_match_indices(char::is_numeric) + /// .collect(); + /// assert_eq!(digits, vec![(0, "1"), (3, "2"), (14, "3"), (17, "4")]); + /// ``` + fn par_match_indices(&self, pattern: P) -> MatchIndices<'_, P> { + MatchIndices { + chars: self.as_parallel_string(), + pattern, + } + } +} + +impl ParallelString for str { + #[inline] + fn as_parallel_string(&self) -> &str { + self + } +} + +// ///////////////////////////////////////////////////////////////////////// + +/// We hide the `Pattern` trait in a private module, as its API is not meant +/// for general consumption. If we could have privacy on trait items, then it +/// would be nicer to have its basic existence and implementors public while +/// keeping all of the methods private. +mod private { + use crate::iter::plumbing::Folder; + + /// Pattern-matching trait for `ParallelString`, somewhat like a mix of + /// `std::str::pattern::{Pattern, Searcher}`. + /// + /// Implementing this trait is not permitted outside of `rayon`. + pub trait Pattern: Sized + Sync + Send { + private_decl! {} + fn find_in(&self, haystack: &str) -> Option; + fn rfind_in(&self, haystack: &str) -> Option; + fn is_suffix_of(&self, haystack: &str) -> bool; + fn fold_splits<'ch, F>(&self, haystack: &'ch str, folder: F, skip_last: bool) -> F + where + F: Folder<&'ch str>; + fn fold_matches<'ch, F>(&self, haystack: &'ch str, folder: F) -> F + where + F: Folder<&'ch str>; + fn fold_match_indices<'ch, F>(&self, haystack: &'ch str, folder: F, base: usize) -> F + where + F: Folder<(usize, &'ch str)>; + } +} +use self::private::Pattern; + +#[inline] +fn offset(base: usize) -> impl Fn((usize, T)) -> (usize, T) { + move |(i, x)| (base + i, x) +} + +macro_rules! impl_pattern { + (&$self:ident => $pattern:expr) => { + private_impl! {} + + #[inline] + fn find_in(&$self, chars: &str) -> Option { + chars.find($pattern) + } + + #[inline] + fn rfind_in(&$self, chars: &str) -> Option { + chars.rfind($pattern) + } + + #[inline] + fn is_suffix_of(&$self, chars: &str) -> bool { + chars.ends_with($pattern) + } + + fn fold_splits<'ch, F>(&$self, chars: &'ch str, folder: F, skip_last: bool) -> F + where + F: Folder<&'ch str>, + { + let mut split = chars.split($pattern); + if skip_last { + split.next_back(); + } + folder.consume_iter(split) + } + + fn fold_matches<'ch, F>(&$self, chars: &'ch str, folder: F) -> F + where + F: Folder<&'ch str>, + { + folder.consume_iter(chars.matches($pattern)) + } + + fn fold_match_indices<'ch, F>(&$self, chars: &'ch str, folder: F, base: usize) -> F + where + F: Folder<(usize, &'ch str)>, + { + folder.consume_iter(chars.match_indices($pattern).map(offset(base))) + } + } +} + +impl Pattern for char { + impl_pattern!(&self => *self); +} + +impl Pattern for &[char] { + impl_pattern!(&self => *self); +} + +impl bool> Pattern for FN { + impl_pattern!(&self => self); +} + +// ///////////////////////////////////////////////////////////////////////// + +/// Parallel iterator over the characters of a string +#[derive(Debug, Clone)] +pub struct Chars<'ch> { + chars: &'ch str, +} + +struct CharsProducer<'ch> { + chars: &'ch str, +} + +impl<'ch> ParallelIterator for Chars<'ch> { + type Item = char; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + bridge_unindexed(CharsProducer { chars: self.chars }, consumer) + } +} + +impl<'ch> UnindexedProducer for CharsProducer<'ch> { + type Item = char; + + fn split(self) -> (Self, Option) { + match split(self.chars) { + Some((left, right)) => ( + CharsProducer { chars: left }, + Some(CharsProducer { chars: right }), + ), + None => (self, None), + } + } + + fn fold_with(self, folder: F) -> F + where + F: Folder, + { + folder.consume_iter(self.chars.chars()) + } +} + +// ///////////////////////////////////////////////////////////////////////// + +/// Parallel iterator over the characters of a string, with their positions +#[derive(Debug, Clone)] +pub struct CharIndices<'ch> { + chars: &'ch str, +} + +struct CharIndicesProducer<'ch> { + index: usize, + chars: &'ch str, +} + +impl<'ch> ParallelIterator for CharIndices<'ch> { + type Item = (usize, char); + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + let producer = CharIndicesProducer { + index: 0, + chars: self.chars, + }; + bridge_unindexed(producer, consumer) + } +} + +impl<'ch> UnindexedProducer for CharIndicesProducer<'ch> { + type Item = (usize, char); + + fn split(self) -> (Self, Option) { + match split(self.chars) { + Some((left, right)) => ( + CharIndicesProducer { + chars: left, + ..self + }, + Some(CharIndicesProducer { + chars: right, + index: self.index + left.len(), + }), + ), + None => (self, None), + } + } + + fn fold_with(self, folder: F) -> F + where + F: Folder, + { + let base = self.index; + folder.consume_iter(self.chars.char_indices().map(offset(base))) + } +} + +// ///////////////////////////////////////////////////////////////////////// + +/// Parallel iterator over the bytes of a string +#[derive(Debug, Clone)] +pub struct Bytes<'ch> { + chars: &'ch str, +} + +struct BytesProducer<'ch> { + chars: &'ch str, +} + +impl<'ch> ParallelIterator for Bytes<'ch> { + type Item = u8; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + bridge_unindexed(BytesProducer { chars: self.chars }, consumer) + } +} + +impl<'ch> UnindexedProducer for BytesProducer<'ch> { + type Item = u8; + + fn split(self) -> (Self, Option) { + match split(self.chars) { + Some((left, right)) => ( + BytesProducer { chars: left }, + Some(BytesProducer { chars: right }), + ), + None => (self, None), + } + } + + fn fold_with(self, folder: F) -> F + where + F: Folder, + { + folder.consume_iter(self.chars.bytes()) + } +} + +// ///////////////////////////////////////////////////////////////////////// + +/// Parallel iterator over a string encoded as UTF-16 +#[derive(Debug, Clone)] +pub struct EncodeUtf16<'ch> { + chars: &'ch str, +} + +struct EncodeUtf16Producer<'ch> { + chars: &'ch str, +} + +impl<'ch> ParallelIterator for EncodeUtf16<'ch> { + type Item = u16; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + bridge_unindexed(EncodeUtf16Producer { chars: self.chars }, consumer) + } +} + +impl<'ch> UnindexedProducer for EncodeUtf16Producer<'ch> { + type Item = u16; + + fn split(self) -> (Self, Option) { + match split(self.chars) { + Some((left, right)) => ( + EncodeUtf16Producer { chars: left }, + Some(EncodeUtf16Producer { chars: right }), + ), + None => (self, None), + } + } + + fn fold_with(self, folder: F) -> F + where + F: Folder, + { + folder.consume_iter(self.chars.encode_utf16()) + } +} + +// ///////////////////////////////////////////////////////////////////////// + +/// Parallel iterator over substrings separated by a pattern +#[derive(Debug, Clone)] +pub struct Split<'ch, P: Pattern> { + chars: &'ch str, + separator: P, +} + +impl<'ch, P: Pattern> Split<'ch, P> { + fn new(chars: &'ch str, separator: P) -> Self { + Split { chars, separator } + } +} + +impl<'ch, P: Pattern> ParallelIterator for Split<'ch, P> { + type Item = &'ch str; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + let producer = SplitProducer::new(self.chars, &self.separator); + bridge_unindexed(producer, consumer) + } +} + +/// Implement support for `SplitProducer`. +impl<'ch, P: Pattern> Fissile

for &'ch str { + fn length(&self) -> usize { + self.len() + } + + fn midpoint(&self, end: usize) -> usize { + // First find a suitable UTF-8 boundary. + find_char_midpoint(&self[..end]) + } + + fn find(&self, separator: &P, start: usize, end: usize) -> Option { + separator.find_in(&self[start..end]) + } + + fn rfind(&self, separator: &P, end: usize) -> Option { + separator.rfind_in(&self[..end]) + } + + fn split_once(self, index: usize) -> (Self, Self) { + let (left, right) = self.split_at(index); + let mut right_iter = right.chars(); + right_iter.next(); // skip the separator + (left, right_iter.as_str()) + } + + fn fold_splits(self, separator: &P, folder: F, skip_last: bool) -> F + where + F: Folder, + { + separator.fold_splits(self, folder, skip_last) + } +} + +// ///////////////////////////////////////////////////////////////////////// + +/// Parallel iterator over substrings separated by a terminator pattern +#[derive(Debug, Clone)] +pub struct SplitTerminator<'ch, P: Pattern> { + chars: &'ch str, + terminator: P, +} + +struct SplitTerminatorProducer<'ch, 'sep, P: Pattern> { + splitter: SplitProducer<'sep, P, &'ch str>, + skip_last: bool, +} + +impl<'ch, P: Pattern> SplitTerminator<'ch, P> { + fn new(chars: &'ch str, terminator: P) -> Self { + SplitTerminator { chars, terminator } + } +} + +impl<'ch, 'sep, P: Pattern + 'sep> SplitTerminatorProducer<'ch, 'sep, P> { + fn new(chars: &'ch str, terminator: &'sep P) -> Self { + SplitTerminatorProducer { + splitter: SplitProducer::new(chars, terminator), + skip_last: chars.is_empty() || terminator.is_suffix_of(chars), + } + } +} + +impl<'ch, P: Pattern> ParallelIterator for SplitTerminator<'ch, P> { + type Item = &'ch str; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + let producer = SplitTerminatorProducer::new(self.chars, &self.terminator); + bridge_unindexed(producer, consumer) + } +} + +impl<'ch, 'sep, P: Pattern + 'sep> UnindexedProducer for SplitTerminatorProducer<'ch, 'sep, P> { + type Item = &'ch str; + + fn split(mut self) -> (Self, Option) { + let (left, right) = self.splitter.split(); + self.splitter = left; + let right = right.map(|right| { + let skip_last = self.skip_last; + self.skip_last = false; + SplitTerminatorProducer { + splitter: right, + skip_last, + } + }); + (self, right) + } + + fn fold_with(self, folder: F) -> F + where + F: Folder, + { + self.splitter.fold_with(folder, self.skip_last) + } +} + +// ///////////////////////////////////////////////////////////////////////// + +/// Parallel iterator over lines in a string +#[derive(Debug, Clone)] +pub struct Lines<'ch>(&'ch str); + +#[inline] +fn no_carriage_return(line: &str) -> &str { + line.strip_suffix('\r').unwrap_or(line) +} + +impl<'ch> ParallelIterator for Lines<'ch> { + type Item = &'ch str; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + self.0 + .par_split_terminator('\n') + .map(no_carriage_return) + .drive_unindexed(consumer) + } +} + +// ///////////////////////////////////////////////////////////////////////// + +/// Parallel iterator over substrings separated by whitespace +#[derive(Debug, Clone)] +pub struct SplitWhitespace<'ch>(&'ch str); + +#[inline] +fn not_empty(s: &&str) -> bool { + !s.is_empty() +} + +impl<'ch> ParallelIterator for SplitWhitespace<'ch> { + type Item = &'ch str; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + self.0 + .par_split(char::is_whitespace) + .filter(not_empty) + .drive_unindexed(consumer) + } +} + +// ///////////////////////////////////////////////////////////////////////// + +/// Parallel iterator over substrings that match a pattern +#[derive(Debug, Clone)] +pub struct Matches<'ch, P: Pattern> { + chars: &'ch str, + pattern: P, +} + +struct MatchesProducer<'ch, 'pat, P: Pattern> { + chars: &'ch str, + pattern: &'pat P, +} + +impl<'ch, P: Pattern> ParallelIterator for Matches<'ch, P> { + type Item = &'ch str; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + let producer = MatchesProducer { + chars: self.chars, + pattern: &self.pattern, + }; + bridge_unindexed(producer, consumer) + } +} + +impl<'ch, 'pat, P: Pattern> UnindexedProducer for MatchesProducer<'ch, 'pat, P> { + type Item = &'ch str; + + fn split(self) -> (Self, Option) { + match split(self.chars) { + Some((left, right)) => ( + MatchesProducer { + chars: left, + ..self + }, + Some(MatchesProducer { + chars: right, + ..self + }), + ), + None => (self, None), + } + } + + fn fold_with(self, folder: F) -> F + where + F: Folder, + { + self.pattern.fold_matches(self.chars, folder) + } +} + +// ///////////////////////////////////////////////////////////////////////// + +/// Parallel iterator over substrings that match a pattern, with their positions +#[derive(Debug, Clone)] +pub struct MatchIndices<'ch, P: Pattern> { + chars: &'ch str, + pattern: P, +} + +struct MatchIndicesProducer<'ch, 'pat, P: Pattern> { + index: usize, + chars: &'ch str, + pattern: &'pat P, +} + +impl<'ch, P: Pattern> ParallelIterator for MatchIndices<'ch, P> { + type Item = (usize, &'ch str); + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + let producer = MatchIndicesProducer { + index: 0, + chars: self.chars, + pattern: &self.pattern, + }; + bridge_unindexed(producer, consumer) + } +} + +impl<'ch, 'pat, P: Pattern> UnindexedProducer for MatchIndicesProducer<'ch, 'pat, P> { + type Item = (usize, &'ch str); + + fn split(self) -> (Self, Option) { + match split(self.chars) { + Some((left, right)) => ( + MatchIndicesProducer { + chars: left, + ..self + }, + Some(MatchIndicesProducer { + chars: right, + index: self.index + left.len(), + ..self + }), + ), + None => (self, None), + } + } + + fn fold_with(self, folder: F) -> F + where + F: Folder, + { + self.pattern + .fold_match_indices(self.chars, folder, self.index) + } +} -- cgit v1.2.3