diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:11:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:11:28 +0000 |
commit | 94a0819fe3a0d679c3042a77bfe6a2afc505daea (patch) | |
tree | 2b827afe6a05f3538db3f7803a88c4587fe85648 /vendor/regex/src | |
parent | Adding upstream version 1.64.0+dfsg1. (diff) | |
download | rustc-94a0819fe3a0d679c3042a77bfe6a2afc505daea.tar.xz rustc-94a0819fe3a0d679c3042a77bfe6a2afc505daea.zip |
Adding upstream version 1.66.0+dfsg1.upstream/1.66.0+dfsg1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/regex/src')
-rw-r--r-- | vendor/regex/src/backtrack.rs | 12 | ||||
-rw-r--r-- | vendor/regex/src/compile.rs | 57 | ||||
-rw-r--r-- | vendor/regex/src/dfa.rs | 38 | ||||
-rw-r--r-- | vendor/regex/src/exec.rs | 14 | ||||
-rw-r--r-- | vendor/regex/src/expand.rs | 8 | ||||
-rw-r--r-- | vendor/regex/src/input.rs | 4 | ||||
-rw-r--r-- | vendor/regex/src/lib.rs | 1 | ||||
-rw-r--r-- | vendor/regex/src/literal/imp.rs | 4 | ||||
-rw-r--r-- | vendor/regex/src/pattern.rs | 2 | ||||
-rw-r--r-- | vendor/regex/src/pikevm.rs | 2 | ||||
-rw-r--r-- | vendor/regex/src/prog.rs | 2 | ||||
-rw-r--r-- | vendor/regex/src/re_bytes.rs | 12 | ||||
-rw-r--r-- | vendor/regex/src/re_set.rs | 44 | ||||
-rw-r--r-- | vendor/regex/src/re_trait.rs | 13 | ||||
-rw-r--r-- | vendor/regex/src/re_unicode.rs | 24 | ||||
-rw-r--r-- | vendor/regex/src/utf8.rs | 2 |
16 files changed, 139 insertions, 100 deletions
diff --git a/vendor/regex/src/backtrack.rs b/vendor/regex/src/backtrack.rs index a3d25d662..4d83856ca 100644 --- a/vendor/regex/src/backtrack.rs +++ b/vendor/regex/src/backtrack.rs @@ -93,13 +93,7 @@ impl<'a, 'm, 'r, 's, I: Input> Bounded<'a, 'm, 'r, 's, I> { let mut cache = cache.borrow_mut(); let cache = &mut cache.backtrack; let start = input.at(start); - let mut b = Bounded { - prog: prog, - input: input, - matches: matches, - slots: slots, - m: cache, - }; + let mut b = Bounded { prog, input, matches, slots, m: cache }; b.exec_(start, end) } @@ -220,14 +214,14 @@ impl<'a, 'm, 'r, 's, I: Input> Bounded<'a, 'm, 'r, 's, I> { // job is popped and the old capture index is restored. self.m.jobs.push(Job::SaveRestore { slot: inst.slot, - old_pos: old_pos, + old_pos, }); self.slots[inst.slot] = Some(at.pos()); } ip = inst.goto; } Split(ref inst) => { - self.m.jobs.push(Job::Inst { ip: inst.goto2, at: at }); + self.m.jobs.push(Job::Inst { ip: inst.goto2, at }); ip = inst.goto1; } EmptyLook(ref inst) => { diff --git a/vendor/regex/src/compile.rs b/vendor/regex/src/compile.rs index 069f445c8..90ca25015 100644 --- a/vendor/regex/src/compile.rs +++ b/vendor/regex/src/compile.rs @@ -149,7 +149,8 @@ impl Compiler { self.compiled.start = dotstar_patch.entry; } self.compiled.captures = vec![None]; - let patch = self.c_capture(0, expr)?.unwrap_or(self.next_inst()); + let patch = + self.c_capture(0, expr)?.unwrap_or_else(|| self.next_inst()); if self.compiled.needs_dotstar() { self.fill(dotstar_patch.hole, patch.entry); } else { @@ -185,7 +186,7 @@ impl Compiler { self.fill_to_next(prev_hole); let split = self.push_split_hole(); let Patch { hole, entry } = - self.c_capture(0, expr)?.unwrap_or(self.next_inst()); + self.c_capture(0, expr)?.unwrap_or_else(|| self.next_inst()); self.fill_to_next(hole); self.compiled.matches.push(self.insts.len()); self.push_compiled(Inst::Match(i)); @@ -193,7 +194,7 @@ impl Compiler { } let i = exprs.len() - 1; let Patch { hole, entry } = - self.c_capture(0, &exprs[i])?.unwrap_or(self.next_inst()); + self.c_capture(0, &exprs[i])?.unwrap_or_else(|| self.next_inst()); self.fill(prev_hole, entry); self.fill_to_next(hole); self.compiled.matches.push(self.insts.len()); @@ -410,11 +411,11 @@ impl Compiler { } else { let entry = self.insts.len(); let hole = self.push_hole(InstHole::Save { slot: first_slot }); - let patch = self.c(expr)?.unwrap_or(self.next_inst()); + let patch = self.c(expr)?.unwrap_or_else(|| self.next_inst()); self.fill(hole, patch.entry); self.fill_to_next(patch.hole); let hole = self.push_hole(InstHole::Save { slot: first_slot + 1 }); - Ok(Some(Patch { hole: hole, entry: entry })) + Ok(Some(Patch { hole, entry })) } } @@ -448,7 +449,7 @@ impl Compiler { self.c_class(&[hir::ClassUnicodeRange::new(c, c)]) } } else { - let hole = self.push_hole(InstHole::Char { c: c }); + let hole = self.push_hole(InstHole::Char { c }); Ok(Some(Patch { hole, entry: self.insts.len() - 1 })) } } @@ -458,7 +459,7 @@ impl Compiler { assert!(!ranges.is_empty()); if self.compiled.uses_bytes() { - Ok(Some(CompileClass { c: self, ranges: ranges }.compile()?)) + Ok(Some(CompileClass { c: self, ranges }.compile()?)) } else { let ranges: Vec<(char, char)> = ranges.iter().map(|r| (r.start(), r.end())).collect(); @@ -467,9 +468,9 @@ impl Compiler { } else { self.extra_inst_bytes += ranges.len() * (size_of::<char>() * 2); - self.push_hole(InstHole::Ranges { ranges: ranges }) + self.push_hole(InstHole::Ranges { ranges }) }; - Ok(Some(Patch { hole: hole, entry: self.insts.len() - 1 })) + Ok(Some(Patch { hole, entry: self.insts.len() - 1 })) } } @@ -508,8 +509,8 @@ impl Compiler { } fn c_empty_look(&mut self, look: EmptyLook) -> ResultOrEmpty { - let hole = self.push_hole(InstHole::EmptyLook { look: look }); - Ok(Some(Patch { hole: hole, entry: self.insts.len() - 1 })) + let hole = self.push_hole(InstHole::EmptyLook { look }); + Ok(Some(Patch { hole, entry: self.insts.len() - 1 })) } fn c_concat<'a, I>(&mut self, exprs: I) -> ResultOrEmpty @@ -533,7 +534,7 @@ impl Compiler { hole = p.hole; } } - Ok(Some(Patch { hole: hole, entry: entry })) + Ok(Some(Patch { hole, entry })) } fn c_alternate(&mut self, exprs: &[Hir]) -> ResultOrEmpty { @@ -676,7 +677,7 @@ impl Compiler { // None). let patch_concat = self .c_concat(iter::repeat(expr).take(min))? - .unwrap_or(self.next_inst()); + .unwrap_or_else(|| self.next_inst()); if let Some(patch_rep) = self.c_repeat_zero_or_more(expr, greedy)? { self.fill(patch_concat.hole, patch_rep.entry); Ok(Some(Patch { hole: patch_rep.hole, entry: patch_concat.entry })) @@ -700,7 +701,7 @@ impl Compiler { } // Same reasoning as in c_repeat_range_min_or_more (we know that min < // max at this point). - let patch_concat = patch_concat.unwrap_or(self.next_inst()); + let patch_concat = patch_concat.unwrap_or_else(|| self.next_inst()); let initial_entry = patch_concat.entry; // It is much simpler to compile, e.g., `a{2,5}` as: // @@ -879,14 +880,14 @@ impl MaybeInst { } MaybeInst::Split1(goto1) => { MaybeInst::Compiled(Inst::Split(InstSplit { - goto1: goto1, + goto1, goto2: goto, })) } MaybeInst::Split2(goto2) => { MaybeInst::Compiled(Inst::Split(InstSplit { goto1: goto, - goto2: goto2, + goto2, })) } _ => unreachable!( @@ -900,9 +901,7 @@ impl MaybeInst { fn fill_split(&mut self, goto1: InstPtr, goto2: InstPtr) { let filled = match *self { - MaybeInst::Split => { - Inst::Split(InstSplit { goto1: goto1, goto2: goto2 }) - } + MaybeInst::Split => Inst::Split(InstSplit { goto1, goto2 }), _ => unreachable!( "must be called on Split instruction, \ instead it was called on: {:?}", @@ -960,19 +959,17 @@ enum InstHole { impl InstHole { fn fill(&self, goto: InstPtr) -> Inst { match *self { - InstHole::Save { slot } => { - Inst::Save(InstSave { goto: goto, slot: slot }) - } + InstHole::Save { slot } => Inst::Save(InstSave { goto, slot }), InstHole::EmptyLook { look } => { - Inst::EmptyLook(InstEmptyLook { goto: goto, look: look }) + Inst::EmptyLook(InstEmptyLook { goto, look }) } - InstHole::Char { c } => Inst::Char(InstChar { goto: goto, c: c }), + InstHole::Char { c } => Inst::Char(InstChar { goto, c }), InstHole::Ranges { ref ranges } => Inst::Ranges(InstRanges { - goto: goto, + goto, ranges: ranges.clone().into_boxed_slice(), }), InstHole::Bytes { start, end } => { - Inst::Bytes(InstBytes { goto: goto, start: start, end: end }) + Inst::Bytes(InstBytes { goto, start, end }) } } } @@ -1042,7 +1039,7 @@ impl<'a, 'b> CompileClass<'a, 'b> { let mut last_hole = Hole::None; for byte_range in seq { let key = SuffixCacheKey { - from_inst: from_inst, + from_inst, start: byte_range.start, end: byte_range.end, }; @@ -1132,7 +1129,7 @@ impl SuffixCache { } } *pos = self.dense.len(); - self.dense.push(SuffixCacheEntry { key: key, pc: pc }); + self.dense.push(SuffixCacheEntry { key, pc }); None } @@ -1143,8 +1140,8 @@ impl SuffixCache { fn hash(&self, suffix: &SuffixCacheKey) -> usize { // Basic FNV-1a hash as described: // https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function - const FNV_PRIME: u64 = 1099511628211; - let mut h = 14695981039346656037; + const FNV_PRIME: u64 = 1_099_511_628_211; + let mut h = 14_695_981_039_346_656_037; h = (h ^ (suffix.from_inst as u64)).wrapping_mul(FNV_PRIME); h = (h ^ (suffix.start as u64)).wrapping_mul(FNV_PRIME); h = (h ^ (suffix.end as u64)).wrapping_mul(FNV_PRIME); diff --git a/vendor/regex/src/dfa.rs b/vendor/regex/src/dfa.rs index 4aee8039c..dc9952120 100644 --- a/vendor/regex/src/dfa.rs +++ b/vendor/regex/src/dfa.rs @@ -31,7 +31,7 @@ considerably more complex than one might expect out of a DFA. A number of tricks are employed to make it fast. Tread carefully. N.B. While this implementation is heavily commented, Russ Cox's series of -articles on regexes is strongly recommended: https://swtch.com/~rsc/regexp/ +articles on regexes is strongly recommended: <https://swtch.com/~rsc/regexp/> (As is the DFA implementation in RE2, which heavily influenced this implementation.) */ @@ -454,10 +454,10 @@ impl<'a> Fsm<'a> { let mut cache = cache.borrow_mut(); let cache = &mut cache.dfa; let mut dfa = Fsm { - prog: prog, + prog, start: 0, // filled in below - at: at, - quit_after_match: quit_after_match, + at, + quit_after_match, last_match_si: STATE_UNKNOWN, last_cache_flush: at, cache: &mut cache.inner, @@ -484,10 +484,10 @@ impl<'a> Fsm<'a> { let mut cache = cache.borrow_mut(); let cache = &mut cache.dfa_reverse; let mut dfa = Fsm { - prog: prog, + prog, start: 0, // filled in below - at: at, - quit_after_match: quit_after_match, + at, + quit_after_match, last_match_si: STATE_UNKNOWN, last_cache_flush: at, cache: &mut cache.inner, @@ -515,9 +515,9 @@ impl<'a> Fsm<'a> { let mut cache = cache.borrow_mut(); let cache = &mut cache.dfa; let mut dfa = Fsm { - prog: prog, + prog, start: 0, // filled in below - at: at, + at, quit_after_match: false, last_match_si: STATE_UNKNOWN, last_cache_flush: at, @@ -1606,11 +1606,7 @@ struct StateMap { impl StateMap { fn new(num_byte_classes: usize) -> StateMap { - StateMap { - map: HashMap::new(), - states: vec![], - num_byte_classes: num_byte_classes, - } + StateMap { map: HashMap::new(), states: vec![], num_byte_classes } } fn len(&self) -> usize { @@ -1646,7 +1642,7 @@ impl Transitions { /// The number of byte classes corresponds to the stride. Every state will /// have `num_byte_classes` slots for transitions. fn new(num_byte_classes: usize) -> Transitions { - Transitions { table: vec![], num_byte_classes: num_byte_classes } + Transitions { table: vec![], num_byte_classes } } /// Returns the total number of states currently in this table. @@ -1696,27 +1692,27 @@ impl Transitions { impl StateFlags { fn is_match(&self) -> bool { - self.0 & 0b0000000_1 > 0 + self.0 & 0b0000_0001 > 0 } fn set_match(&mut self) { - self.0 |= 0b0000000_1; + self.0 |= 0b0000_0001; } fn is_word(&self) -> bool { - self.0 & 0b000000_1_0 > 0 + self.0 & 0b0000_0010 > 0 } fn set_word(&mut self) { - self.0 |= 0b000000_1_0; + self.0 |= 0b0000_0010; } fn has_empty(&self) -> bool { - self.0 & 0b00000_1_00 > 0 + self.0 & 0b0000_0100 > 0 } fn set_empty(&mut self) { - self.0 |= 0b00000_1_00; + self.0 |= 0b0000_0100; } } diff --git a/vendor/regex/src/exec.rs b/vendor/regex/src/exec.rs index d5fad1c0e..e75ca083a 100644 --- a/vendor/regex/src/exec.rs +++ b/vendor/regex/src/exec.rs @@ -288,10 +288,10 @@ impl ExecBuilder { exprs.push(expr); } Ok(Parsed { - exprs: exprs, + exprs, prefixes: prefixes.unwrap_or_else(Literals::empty), suffixes: suffixes.unwrap_or_else(Literals::empty), - bytes: bytes, + bytes, }) } @@ -311,7 +311,7 @@ impl ExecBuilder { match_type: MatchType::Nothing, }); let pool = ExecReadOnly::new_pool(&ro); - return Ok(Exec { ro: ro, pool }); + return Ok(Exec { ro, pool }); } let parsed = self.parse()?; let mut nfa = Compiler::new() @@ -340,12 +340,12 @@ impl ExecBuilder { let mut ro = ExecReadOnly { res: self.options.pats, - nfa: nfa, - dfa: dfa, - dfa_reverse: dfa_reverse, + nfa, + dfa, + dfa_reverse, suffixes: LiteralSearcher::suffixes(parsed.suffixes), #[cfg(feature = "perf-literal")] - ac: ac, + ac, match_type: MatchType::Nothing, }; ro.match_type = ro.choose_match_type(self.match_type); diff --git a/vendor/regex/src/expand.rs b/vendor/regex/src/expand.rs index fd9c2d05d..67b514926 100644 --- a/vendor/regex/src/expand.rs +++ b/vendor/regex/src/expand.rs @@ -127,7 +127,7 @@ impl From<usize> for Ref<'static> { /// If no such valid reference could be found, None is returned. fn find_cap_ref(replacement: &[u8]) -> Option<CaptureRef<'_>> { let mut i = 0; - let rep: &[u8] = replacement.as_ref(); + let rep: &[u8] = replacement; if rep.len() <= 1 || rep[0] != b'$' { return None; } @@ -136,7 +136,7 @@ fn find_cap_ref(replacement: &[u8]) -> Option<CaptureRef<'_>> { return find_cap_ref_braced(rep, i + 1); } let mut cap_end = i; - while rep.get(cap_end).map_or(false, is_valid_cap_letter) { + while rep.get(cap_end).copied().map_or(false, is_valid_cap_letter) { cap_end += 1; } if cap_end == i { @@ -183,8 +183,8 @@ fn find_cap_ref_braced(rep: &[u8], mut i: usize) -> Option<CaptureRef<'_>> { } /// Returns true if and only if the given byte is allowed in a capture name. -fn is_valid_cap_letter(b: &u8) -> bool { - match *b { +fn is_valid_cap_letter(b: u8) -> bool { + match b { b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_' => true, _ => false, } diff --git a/vendor/regex/src/input.rs b/vendor/regex/src/input.rs index 5d50ee340..df6c3e0c9 100644 --- a/vendor/regex/src/input.rs +++ b/vendor/regex/src/input.rs @@ -160,7 +160,7 @@ impl<'t> Input for CharInput<'t> { InputAt { pos: self.len(), c: None.into(), byte: None, len: 0 } } else { let c = decode_utf8(&self[i..]).map(|(c, _)| c).into(); - InputAt { pos: i, c: c, byte: None, len: c.len_utf8() } + InputAt { pos: i, c, byte: None, len: c.len_utf8() } } } @@ -231,7 +231,7 @@ pub struct ByteInput<'t> { impl<'t> ByteInput<'t> { /// Return a new byte-based input reader for the given string. pub fn new(text: &'t [u8], only_utf8: bool) -> ByteInput<'t> { - ByteInput { text: text, only_utf8: only_utf8 } + ByteInput { text, only_utf8 } } } diff --git a/vendor/regex/src/lib.rs b/vendor/regex/src/lib.rs index 7f2dec815..3e3b0a007 100644 --- a/vendor/regex/src/lib.rs +++ b/vendor/regex/src/lib.rs @@ -628,7 +628,6 @@ pub use crate::re_builder::unicode::*; #[cfg(feature = "std")] pub use crate::re_set::unicode::*; #[cfg(feature = "std")] -#[cfg(feature = "std")] pub use crate::re_unicode::{ escape, CaptureLocations, CaptureMatches, CaptureNames, Captures, Locations, Match, Matches, NoExpand, Regex, Replacer, ReplacerRef, Split, diff --git a/vendor/regex/src/literal/imp.rs b/vendor/regex/src/literal/imp.rs index 82f050a0d..90b2f1160 100644 --- a/vendor/regex/src/literal/imp.rs +++ b/vendor/regex/src/literal/imp.rs @@ -57,10 +57,10 @@ impl LiteralSearcher { fn new(lits: Literals, matcher: Matcher) -> Self { let complete = lits.all_complete(); LiteralSearcher { - complete: complete, + complete, lcp: Memmem::new(lits.longest_common_prefix()), lcs: Memmem::new(lits.longest_common_suffix()), - matcher: matcher, + matcher, } } diff --git a/vendor/regex/src/pattern.rs b/vendor/regex/src/pattern.rs index b4ffd8e16..00549e510 100644 --- a/vendor/regex/src/pattern.rs +++ b/vendor/regex/src/pattern.rs @@ -15,7 +15,7 @@ impl<'r, 't> Pattern<'t> for &'r Regex { fn into_searcher(self, haystack: &'t str) -> RegexSearcher<'r, 't> { RegexSearcher { - haystack: haystack, + haystack, it: self.find_iter(haystack), last_step_end: 0, next_match: None, diff --git a/vendor/regex/src/pikevm.rs b/vendor/regex/src/pikevm.rs index 9a1424086..8c9eac2d3 100644 --- a/vendor/regex/src/pikevm.rs +++ b/vendor/regex/src/pikevm.rs @@ -100,7 +100,7 @@ impl<'r, I: Input> Fsm<'r, I> { cache.clist.resize(prog.len(), prog.captures.len()); cache.nlist.resize(prog.len(), prog.captures.len()); let at = input.at(start); - Fsm { prog: prog, stack: &mut cache.stack, input: input }.exec_( + Fsm { prog, stack: &mut cache.stack, input }.exec_( &mut cache.clist, &mut cache.nlist, matches, diff --git a/vendor/regex/src/prog.rs b/vendor/regex/src/prog.rs index 475a8112a..c211f71d8 100644 --- a/vendor/regex/src/prog.rs +++ b/vendor/regex/src/prog.rs @@ -233,7 +233,7 @@ impl fmt::Debug for Program { if pc == self.start { write!(f, " (start)")?; } - write!(f, "\n")?; + writeln!(f)?; } Ok(()) } diff --git a/vendor/regex/src/re_bytes.rs b/vendor/regex/src/re_bytes.rs index ae55d6d25..d71969257 100644 --- a/vendor/regex/src/re_bytes.rs +++ b/vendor/regex/src/re_bytes.rs @@ -53,7 +53,7 @@ impl<'t> Match<'t> { /// Creates a new match from the given haystack and byte offsets. #[inline] fn new(haystack: &'t [u8], start: usize, end: usize) -> Match<'t> { - Match { text: haystack, start: start, end: end } + Match { text: haystack, start, end } } } @@ -255,7 +255,7 @@ impl Regex { pub fn captures<'t>(&self, text: &'t [u8]) -> Option<Captures<'t>> { let mut locs = self.capture_locations(); self.captures_read_at(&mut locs, text, 0).map(move |_| Captures { - text: text, + text, locs: locs.0, named_groups: self.0.capture_name_idx().clone(), }) @@ -578,7 +578,7 @@ impl Regex { /// context into consideration. For example, the `\A` anchor can only /// match when `start == 0`. pub fn is_match_at(&self, text: &[u8], start: usize) -> bool { - self.shortest_match_at(text, start).is_some() + self.0.searcher().is_match_at(text, start) } /// Returns the same as find, but starts the search at the given @@ -723,7 +723,7 @@ impl<'r, 't> Iterator for CaptureMatches<'r, 't> { fn next(&mut self) -> Option<Captures<'t>> { self.0.next().map(|locs| Captures { text: self.0.text(), - locs: locs, + locs, named_groups: self.0.regex().capture_name_idx().clone(), }) } @@ -877,7 +877,7 @@ impl CaptureLocations { self.0.pos(i) } - /// Returns the total number of capturing groups. + /// Returns the total number of capture groups (even if they didn't match). /// /// This is always at least `1` since every regex has at least `1` /// capturing group that corresponds to the entire match. @@ -979,7 +979,7 @@ impl<'t> Captures<'t> { expand_bytes(self, replacement, dst) } - /// Returns the number of captured groups. + /// Returns the total number of capture groups (even if they didn't match). /// /// This is always at least `1`, since every regex has at least one capture /// group that corresponds to the full match. diff --git a/vendor/regex/src/re_set.rs b/vendor/regex/src/re_set.rs index 73d59532e..a6d886d76 100644 --- a/vendor/regex/src/re_set.rs +++ b/vendor/regex/src/re_set.rs @@ -59,13 +59,45 @@ $(#[$doc_regexset_example])* /// 1. Does any regex in the set match? /// 2. If so, which regexes in the set match? /// -/// As with the main `Regex` type, it is cheaper to ask (1) instead of (2) -/// since the matching engines can stop after the first match is found. +/// As with the main [`Regex`][crate::Regex] type, it is cheaper to ask (1) +/// instead of (2) since the matching engines can stop after the first match +/// is found. /// -/// Other features like finding the location of successive matches or their -/// sub-captures aren't supported. If you need this functionality, the -/// recommended approach is to compile each regex in the set independently and -/// selectively match them based on which regexes in the set matched. +/// You cannot directly extract [`Match`][crate::Match] or +/// [`Captures`][crate::Captures] objects from a regex set. If you need these +/// operations, the recommended approach is to compile each pattern in the set +/// independently and scan the exact same input a second time with those +/// independently compiled patterns: +/// +/// ```rust +/// use regex::{Regex, RegexSet}; +/// +/// let patterns = ["foo", "bar"]; +/// // Both patterns will match different ranges of this string. +/// let text = "barfoo"; +/// +/// // Compile a set matching any of our patterns. +/// let set = RegexSet::new(&patterns).unwrap(); +/// // Compile each pattern independently. +/// let regexes: Vec<_> = set.patterns().iter() +/// .map(|pat| Regex::new(pat).unwrap()) +/// .collect(); +/// +/// // Match against the whole set first and identify the individual +/// // matching patterns. +/// let matches: Vec<&str> = set.matches(text).into_iter() +/// // Dereference the match index to get the corresponding +/// // compiled pattern. +/// .map(|match_idx| ®exes[match_idx]) +/// // To get match locations or any other info, we then have to search +/// // the exact same text again, using our separately-compiled pattern. +/// .map(|pat| pat.find(text).unwrap().as_str()) +/// .collect(); +/// +/// // Matches arrive in the order the constituent patterns were declared, +/// // not the order they appear in the input. +/// assert_eq!(vec!["foo", "bar"], matches); +/// ``` /// /// # Performance /// diff --git a/vendor/regex/src/re_trait.rs b/vendor/regex/src/re_trait.rs index 680aa5459..d0c717df5 100644 --- a/vendor/regex/src/re_trait.rs +++ b/vendor/regex/src/re_trait.rs @@ -74,8 +74,19 @@ impl<'c> Iterator for SubCapturesPosIter<'c> { self.idx += 1; x } + + fn size_hint(&self) -> (usize, Option<usize>) { + let len = self.locs.len() - self.idx; + (len, Some(len)) + } + + fn count(self) -> usize { + self.len() + } } +impl<'c> ExactSizeIterator for SubCapturesPosIter<'c> {} + impl<'c> FusedIterator for SubCapturesPosIter<'c> {} /// `RegularExpression` describes types that can implement regex searching. @@ -139,7 +150,7 @@ pub trait RegularExpression: Sized + fmt::Debug { /// Returns an iterator over all non-overlapping successive leftmost-first /// matches. fn find_iter(self, text: &Self::Text) -> Matches<'_, Self> { - Matches { re: self, text: text, last_end: 0, last_match: None } + Matches { re: self, text, last_end: 0, last_match: None } } /// Returns an iterator over all non-overlapping successive leftmost-first diff --git a/vendor/regex/src/re_unicode.rs b/vendor/regex/src/re_unicode.rs index e4871a621..60d81a7d9 100644 --- a/vendor/regex/src/re_unicode.rs +++ b/vendor/regex/src/re_unicode.rs @@ -61,7 +61,7 @@ impl<'t> Match<'t> { /// Creates a new match from the given haystack and byte offsets. #[inline] fn new(haystack: &'t str, start: usize, end: usize) -> Match<'t> { - Match { text: haystack, start: start, end: end } + Match { text: haystack, start, end } } } @@ -129,7 +129,7 @@ impl<'t> From<Match<'t>> for Range<usize> { /// assert!(haystack.contains(&re)); /// assert_eq!(haystack.find(&re), Some(1)); /// assert_eq!(haystack.match_indices(&re).collect::<Vec<_>>(), -/// vec![(1, 4), (5, 8)]); +/// vec![(1, "111"), (5, "222")]); /// assert_eq!(haystack.split(&re).collect::<Vec<_>>(), vec!["a", "b", "c"]); /// ``` #[derive(Clone)] @@ -311,7 +311,7 @@ impl Regex { pub fn captures<'t>(&self, text: &'t str) -> Option<Captures<'t>> { let mut locs = self.capture_locations(); self.captures_read_at(&mut locs, text, 0).map(move |_| Captures { - text: text, + text, locs: locs.0, named_groups: self.0.capture_name_idx().clone(), }) @@ -636,7 +636,7 @@ impl Regex { /// context into consideration. For example, the `\A` anchor can only /// match when `start == 0`. pub fn is_match_at(&self, text: &str, start: usize) -> bool { - self.shortest_match_at(text, start).is_some() + self.0.searcher_str().is_match_at(text, start) } /// Returns the same as find, but starts the search at the given @@ -887,7 +887,7 @@ impl CaptureLocations { self.0.pos(i) } - /// Returns the total number of capturing groups. + /// Returns the total number of capture groups (even if they didn't match). /// /// This is always at least `1` since every regex has at least `1` /// capturing group that corresponds to the entire match. @@ -989,7 +989,7 @@ impl<'t> Captures<'t> { expand_str(self, replacement, dst) } - /// Returns the number of captured groups. + /// Returns the total number of capture groups (even if they didn't match). /// /// This is always at least `1`, since every regex has at least one capture /// group that corresponds to the full match. @@ -1092,8 +1092,18 @@ impl<'c, 't> Iterator for SubCaptureMatches<'c, 't> { .next() .map(|cap| cap.map(|(s, e)| Match::new(self.caps.text, s, e))) } + + fn size_hint(&self) -> (usize, Option<usize>) { + self.it.size_hint() + } + + fn count(self) -> usize { + self.it.count() + } } +impl<'c, 't> ExactSizeIterator for SubCaptureMatches<'c, 't> {} + impl<'c, 't> FusedIterator for SubCaptureMatches<'c, 't> {} /// An iterator that yields all non-overlapping capture groups matching a @@ -1114,7 +1124,7 @@ impl<'r, 't> Iterator for CaptureMatches<'r, 't> { fn next(&mut self) -> Option<Captures<'t>> { self.0.next().map(|locs| Captures { text: self.0.text(), - locs: locs, + locs, named_groups: self.0.regex().capture_name_idx().clone(), }) } diff --git a/vendor/regex/src/utf8.rs b/vendor/regex/src/utf8.rs index 6e0608fdb..2dfd2c0d1 100644 --- a/vendor/regex/src/utf8.rs +++ b/vendor/regex/src/utf8.rs @@ -108,7 +108,7 @@ pub fn decode_utf8(src: &[u8]) -> Option<(char, usize)> { | ((b2 & !TAG_CONT) as u32) << 6 | ((b3 & !TAG_CONT) as u32); match cp { - 0x10000..=0x10FFFF => char::from_u32(cp).map(|cp| (cp, 4)), + 0x10000..=0x0010_FFFF => char::from_u32(cp).map(|cp| (cp, 4)), _ => None, } } |