summaryrefslogtreecommitdiffstats
path: root/vendor/regex/src
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:11:28 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:11:28 +0000
commit94a0819fe3a0d679c3042a77bfe6a2afc505daea (patch)
tree2b827afe6a05f3538db3f7803a88c4587fe85648 /vendor/regex/src
parentAdding upstream version 1.64.0+dfsg1. (diff)
downloadrustc-94a0819fe3a0d679c3042a77bfe6a2afc505daea.tar.xz
rustc-94a0819fe3a0d679c3042a77bfe6a2afc505daea.zip
Adding upstream version 1.66.0+dfsg1.upstream/1.66.0+dfsg1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/regex/src')
-rw-r--r--vendor/regex/src/backtrack.rs12
-rw-r--r--vendor/regex/src/compile.rs57
-rw-r--r--vendor/regex/src/dfa.rs38
-rw-r--r--vendor/regex/src/exec.rs14
-rw-r--r--vendor/regex/src/expand.rs8
-rw-r--r--vendor/regex/src/input.rs4
-rw-r--r--vendor/regex/src/lib.rs1
-rw-r--r--vendor/regex/src/literal/imp.rs4
-rw-r--r--vendor/regex/src/pattern.rs2
-rw-r--r--vendor/regex/src/pikevm.rs2
-rw-r--r--vendor/regex/src/prog.rs2
-rw-r--r--vendor/regex/src/re_bytes.rs12
-rw-r--r--vendor/regex/src/re_set.rs44
-rw-r--r--vendor/regex/src/re_trait.rs13
-rw-r--r--vendor/regex/src/re_unicode.rs24
-rw-r--r--vendor/regex/src/utf8.rs2
16 files changed, 139 insertions, 100 deletions
diff --git a/vendor/regex/src/backtrack.rs b/vendor/regex/src/backtrack.rs
index a3d25d662..4d83856ca 100644
--- a/vendor/regex/src/backtrack.rs
+++ b/vendor/regex/src/backtrack.rs
@@ -93,13 +93,7 @@ impl<'a, 'm, 'r, 's, I: Input> Bounded<'a, 'm, 'r, 's, I> {
let mut cache = cache.borrow_mut();
let cache = &mut cache.backtrack;
let start = input.at(start);
- let mut b = Bounded {
- prog: prog,
- input: input,
- matches: matches,
- slots: slots,
- m: cache,
- };
+ let mut b = Bounded { prog, input, matches, slots, m: cache };
b.exec_(start, end)
}
@@ -220,14 +214,14 @@ impl<'a, 'm, 'r, 's, I: Input> Bounded<'a, 'm, 'r, 's, I> {
// job is popped and the old capture index is restored.
self.m.jobs.push(Job::SaveRestore {
slot: inst.slot,
- old_pos: old_pos,
+ old_pos,
});
self.slots[inst.slot] = Some(at.pos());
}
ip = inst.goto;
}
Split(ref inst) => {
- self.m.jobs.push(Job::Inst { ip: inst.goto2, at: at });
+ self.m.jobs.push(Job::Inst { ip: inst.goto2, at });
ip = inst.goto1;
}
EmptyLook(ref inst) => {
diff --git a/vendor/regex/src/compile.rs b/vendor/regex/src/compile.rs
index 069f445c8..90ca25015 100644
--- a/vendor/regex/src/compile.rs
+++ b/vendor/regex/src/compile.rs
@@ -149,7 +149,8 @@ impl Compiler {
self.compiled.start = dotstar_patch.entry;
}
self.compiled.captures = vec![None];
- let patch = self.c_capture(0, expr)?.unwrap_or(self.next_inst());
+ let patch =
+ self.c_capture(0, expr)?.unwrap_or_else(|| self.next_inst());
if self.compiled.needs_dotstar() {
self.fill(dotstar_patch.hole, patch.entry);
} else {
@@ -185,7 +186,7 @@ impl Compiler {
self.fill_to_next(prev_hole);
let split = self.push_split_hole();
let Patch { hole, entry } =
- self.c_capture(0, expr)?.unwrap_or(self.next_inst());
+ self.c_capture(0, expr)?.unwrap_or_else(|| self.next_inst());
self.fill_to_next(hole);
self.compiled.matches.push(self.insts.len());
self.push_compiled(Inst::Match(i));
@@ -193,7 +194,7 @@ impl Compiler {
}
let i = exprs.len() - 1;
let Patch { hole, entry } =
- self.c_capture(0, &exprs[i])?.unwrap_or(self.next_inst());
+ self.c_capture(0, &exprs[i])?.unwrap_or_else(|| self.next_inst());
self.fill(prev_hole, entry);
self.fill_to_next(hole);
self.compiled.matches.push(self.insts.len());
@@ -410,11 +411,11 @@ impl Compiler {
} else {
let entry = self.insts.len();
let hole = self.push_hole(InstHole::Save { slot: first_slot });
- let patch = self.c(expr)?.unwrap_or(self.next_inst());
+ let patch = self.c(expr)?.unwrap_or_else(|| self.next_inst());
self.fill(hole, patch.entry);
self.fill_to_next(patch.hole);
let hole = self.push_hole(InstHole::Save { slot: first_slot + 1 });
- Ok(Some(Patch { hole: hole, entry: entry }))
+ Ok(Some(Patch { hole, entry }))
}
}
@@ -448,7 +449,7 @@ impl Compiler {
self.c_class(&[hir::ClassUnicodeRange::new(c, c)])
}
} else {
- let hole = self.push_hole(InstHole::Char { c: c });
+ let hole = self.push_hole(InstHole::Char { c });
Ok(Some(Patch { hole, entry: self.insts.len() - 1 }))
}
}
@@ -458,7 +459,7 @@ impl Compiler {
assert!(!ranges.is_empty());
if self.compiled.uses_bytes() {
- Ok(Some(CompileClass { c: self, ranges: ranges }.compile()?))
+ Ok(Some(CompileClass { c: self, ranges }.compile()?))
} else {
let ranges: Vec<(char, char)> =
ranges.iter().map(|r| (r.start(), r.end())).collect();
@@ -467,9 +468,9 @@ impl Compiler {
} else {
self.extra_inst_bytes +=
ranges.len() * (size_of::<char>() * 2);
- self.push_hole(InstHole::Ranges { ranges: ranges })
+ self.push_hole(InstHole::Ranges { ranges })
};
- Ok(Some(Patch { hole: hole, entry: self.insts.len() - 1 }))
+ Ok(Some(Patch { hole, entry: self.insts.len() - 1 }))
}
}
@@ -508,8 +509,8 @@ impl Compiler {
}
fn c_empty_look(&mut self, look: EmptyLook) -> ResultOrEmpty {
- let hole = self.push_hole(InstHole::EmptyLook { look: look });
- Ok(Some(Patch { hole: hole, entry: self.insts.len() - 1 }))
+ let hole = self.push_hole(InstHole::EmptyLook { look });
+ Ok(Some(Patch { hole, entry: self.insts.len() - 1 }))
}
fn c_concat<'a, I>(&mut self, exprs: I) -> ResultOrEmpty
@@ -533,7 +534,7 @@ impl Compiler {
hole = p.hole;
}
}
- Ok(Some(Patch { hole: hole, entry: entry }))
+ Ok(Some(Patch { hole, entry }))
}
fn c_alternate(&mut self, exprs: &[Hir]) -> ResultOrEmpty {
@@ -676,7 +677,7 @@ impl Compiler {
// None).
let patch_concat = self
.c_concat(iter::repeat(expr).take(min))?
- .unwrap_or(self.next_inst());
+ .unwrap_or_else(|| self.next_inst());
if let Some(patch_rep) = self.c_repeat_zero_or_more(expr, greedy)? {
self.fill(patch_concat.hole, patch_rep.entry);
Ok(Some(Patch { hole: patch_rep.hole, entry: patch_concat.entry }))
@@ -700,7 +701,7 @@ impl Compiler {
}
// Same reasoning as in c_repeat_range_min_or_more (we know that min <
// max at this point).
- let patch_concat = patch_concat.unwrap_or(self.next_inst());
+ let patch_concat = patch_concat.unwrap_or_else(|| self.next_inst());
let initial_entry = patch_concat.entry;
// It is much simpler to compile, e.g., `a{2,5}` as:
//
@@ -879,14 +880,14 @@ impl MaybeInst {
}
MaybeInst::Split1(goto1) => {
MaybeInst::Compiled(Inst::Split(InstSplit {
- goto1: goto1,
+ goto1,
goto2: goto,
}))
}
MaybeInst::Split2(goto2) => {
MaybeInst::Compiled(Inst::Split(InstSplit {
goto1: goto,
- goto2: goto2,
+ goto2,
}))
}
_ => unreachable!(
@@ -900,9 +901,7 @@ impl MaybeInst {
fn fill_split(&mut self, goto1: InstPtr, goto2: InstPtr) {
let filled = match *self {
- MaybeInst::Split => {
- Inst::Split(InstSplit { goto1: goto1, goto2: goto2 })
- }
+ MaybeInst::Split => Inst::Split(InstSplit { goto1, goto2 }),
_ => unreachable!(
"must be called on Split instruction, \
instead it was called on: {:?}",
@@ -960,19 +959,17 @@ enum InstHole {
impl InstHole {
fn fill(&self, goto: InstPtr) -> Inst {
match *self {
- InstHole::Save { slot } => {
- Inst::Save(InstSave { goto: goto, slot: slot })
- }
+ InstHole::Save { slot } => Inst::Save(InstSave { goto, slot }),
InstHole::EmptyLook { look } => {
- Inst::EmptyLook(InstEmptyLook { goto: goto, look: look })
+ Inst::EmptyLook(InstEmptyLook { goto, look })
}
- InstHole::Char { c } => Inst::Char(InstChar { goto: goto, c: c }),
+ InstHole::Char { c } => Inst::Char(InstChar { goto, c }),
InstHole::Ranges { ref ranges } => Inst::Ranges(InstRanges {
- goto: goto,
+ goto,
ranges: ranges.clone().into_boxed_slice(),
}),
InstHole::Bytes { start, end } => {
- Inst::Bytes(InstBytes { goto: goto, start: start, end: end })
+ Inst::Bytes(InstBytes { goto, start, end })
}
}
}
@@ -1042,7 +1039,7 @@ impl<'a, 'b> CompileClass<'a, 'b> {
let mut last_hole = Hole::None;
for byte_range in seq {
let key = SuffixCacheKey {
- from_inst: from_inst,
+ from_inst,
start: byte_range.start,
end: byte_range.end,
};
@@ -1132,7 +1129,7 @@ impl SuffixCache {
}
}
*pos = self.dense.len();
- self.dense.push(SuffixCacheEntry { key: key, pc: pc });
+ self.dense.push(SuffixCacheEntry { key, pc });
None
}
@@ -1143,8 +1140,8 @@ impl SuffixCache {
fn hash(&self, suffix: &SuffixCacheKey) -> usize {
// Basic FNV-1a hash as described:
// https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
- const FNV_PRIME: u64 = 1099511628211;
- let mut h = 14695981039346656037;
+ const FNV_PRIME: u64 = 1_099_511_628_211;
+ let mut h = 14_695_981_039_346_656_037;
h = (h ^ (suffix.from_inst as u64)).wrapping_mul(FNV_PRIME);
h = (h ^ (suffix.start as u64)).wrapping_mul(FNV_PRIME);
h = (h ^ (suffix.end as u64)).wrapping_mul(FNV_PRIME);
diff --git a/vendor/regex/src/dfa.rs b/vendor/regex/src/dfa.rs
index 4aee8039c..dc9952120 100644
--- a/vendor/regex/src/dfa.rs
+++ b/vendor/regex/src/dfa.rs
@@ -31,7 +31,7 @@ considerably more complex than one might expect out of a DFA. A number of
tricks are employed to make it fast. Tread carefully.
N.B. While this implementation is heavily commented, Russ Cox's series of
-articles on regexes is strongly recommended: https://swtch.com/~rsc/regexp/
+articles on regexes is strongly recommended: <https://swtch.com/~rsc/regexp/>
(As is the DFA implementation in RE2, which heavily influenced this
implementation.)
*/
@@ -454,10 +454,10 @@ impl<'a> Fsm<'a> {
let mut cache = cache.borrow_mut();
let cache = &mut cache.dfa;
let mut dfa = Fsm {
- prog: prog,
+ prog,
start: 0, // filled in below
- at: at,
- quit_after_match: quit_after_match,
+ at,
+ quit_after_match,
last_match_si: STATE_UNKNOWN,
last_cache_flush: at,
cache: &mut cache.inner,
@@ -484,10 +484,10 @@ impl<'a> Fsm<'a> {
let mut cache = cache.borrow_mut();
let cache = &mut cache.dfa_reverse;
let mut dfa = Fsm {
- prog: prog,
+ prog,
start: 0, // filled in below
- at: at,
- quit_after_match: quit_after_match,
+ at,
+ quit_after_match,
last_match_si: STATE_UNKNOWN,
last_cache_flush: at,
cache: &mut cache.inner,
@@ -515,9 +515,9 @@ impl<'a> Fsm<'a> {
let mut cache = cache.borrow_mut();
let cache = &mut cache.dfa;
let mut dfa = Fsm {
- prog: prog,
+ prog,
start: 0, // filled in below
- at: at,
+ at,
quit_after_match: false,
last_match_si: STATE_UNKNOWN,
last_cache_flush: at,
@@ -1606,11 +1606,7 @@ struct StateMap {
impl StateMap {
fn new(num_byte_classes: usize) -> StateMap {
- StateMap {
- map: HashMap::new(),
- states: vec![],
- num_byte_classes: num_byte_classes,
- }
+ StateMap { map: HashMap::new(), states: vec![], num_byte_classes }
}
fn len(&self) -> usize {
@@ -1646,7 +1642,7 @@ impl Transitions {
/// The number of byte classes corresponds to the stride. Every state will
/// have `num_byte_classes` slots for transitions.
fn new(num_byte_classes: usize) -> Transitions {
- Transitions { table: vec![], num_byte_classes: num_byte_classes }
+ Transitions { table: vec![], num_byte_classes }
}
/// Returns the total number of states currently in this table.
@@ -1696,27 +1692,27 @@ impl Transitions {
impl StateFlags {
fn is_match(&self) -> bool {
- self.0 & 0b0000000_1 > 0
+ self.0 & 0b0000_0001 > 0
}
fn set_match(&mut self) {
- self.0 |= 0b0000000_1;
+ self.0 |= 0b0000_0001;
}
fn is_word(&self) -> bool {
- self.0 & 0b000000_1_0 > 0
+ self.0 & 0b0000_0010 > 0
}
fn set_word(&mut self) {
- self.0 |= 0b000000_1_0;
+ self.0 |= 0b0000_0010;
}
fn has_empty(&self) -> bool {
- self.0 & 0b00000_1_00 > 0
+ self.0 & 0b0000_0100 > 0
}
fn set_empty(&mut self) {
- self.0 |= 0b00000_1_00;
+ self.0 |= 0b0000_0100;
}
}
diff --git a/vendor/regex/src/exec.rs b/vendor/regex/src/exec.rs
index d5fad1c0e..e75ca083a 100644
--- a/vendor/regex/src/exec.rs
+++ b/vendor/regex/src/exec.rs
@@ -288,10 +288,10 @@ impl ExecBuilder {
exprs.push(expr);
}
Ok(Parsed {
- exprs: exprs,
+ exprs,
prefixes: prefixes.unwrap_or_else(Literals::empty),
suffixes: suffixes.unwrap_or_else(Literals::empty),
- bytes: bytes,
+ bytes,
})
}
@@ -311,7 +311,7 @@ impl ExecBuilder {
match_type: MatchType::Nothing,
});
let pool = ExecReadOnly::new_pool(&ro);
- return Ok(Exec { ro: ro, pool });
+ return Ok(Exec { ro, pool });
}
let parsed = self.parse()?;
let mut nfa = Compiler::new()
@@ -340,12 +340,12 @@ impl ExecBuilder {
let mut ro = ExecReadOnly {
res: self.options.pats,
- nfa: nfa,
- dfa: dfa,
- dfa_reverse: dfa_reverse,
+ nfa,
+ dfa,
+ dfa_reverse,
suffixes: LiteralSearcher::suffixes(parsed.suffixes),
#[cfg(feature = "perf-literal")]
- ac: ac,
+ ac,
match_type: MatchType::Nothing,
};
ro.match_type = ro.choose_match_type(self.match_type);
diff --git a/vendor/regex/src/expand.rs b/vendor/regex/src/expand.rs
index fd9c2d05d..67b514926 100644
--- a/vendor/regex/src/expand.rs
+++ b/vendor/regex/src/expand.rs
@@ -127,7 +127,7 @@ impl From<usize> for Ref<'static> {
/// If no such valid reference could be found, None is returned.
fn find_cap_ref(replacement: &[u8]) -> Option<CaptureRef<'_>> {
let mut i = 0;
- let rep: &[u8] = replacement.as_ref();
+ let rep: &[u8] = replacement;
if rep.len() <= 1 || rep[0] != b'$' {
return None;
}
@@ -136,7 +136,7 @@ fn find_cap_ref(replacement: &[u8]) -> Option<CaptureRef<'_>> {
return find_cap_ref_braced(rep, i + 1);
}
let mut cap_end = i;
- while rep.get(cap_end).map_or(false, is_valid_cap_letter) {
+ while rep.get(cap_end).copied().map_or(false, is_valid_cap_letter) {
cap_end += 1;
}
if cap_end == i {
@@ -183,8 +183,8 @@ fn find_cap_ref_braced(rep: &[u8], mut i: usize) -> Option<CaptureRef<'_>> {
}
/// Returns true if and only if the given byte is allowed in a capture name.
-fn is_valid_cap_letter(b: &u8) -> bool {
- match *b {
+fn is_valid_cap_letter(b: u8) -> bool {
+ match b {
b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_' => true,
_ => false,
}
diff --git a/vendor/regex/src/input.rs b/vendor/regex/src/input.rs
index 5d50ee340..df6c3e0c9 100644
--- a/vendor/regex/src/input.rs
+++ b/vendor/regex/src/input.rs
@@ -160,7 +160,7 @@ impl<'t> Input for CharInput<'t> {
InputAt { pos: self.len(), c: None.into(), byte: None, len: 0 }
} else {
let c = decode_utf8(&self[i..]).map(|(c, _)| c).into();
- InputAt { pos: i, c: c, byte: None, len: c.len_utf8() }
+ InputAt { pos: i, c, byte: None, len: c.len_utf8() }
}
}
@@ -231,7 +231,7 @@ pub struct ByteInput<'t> {
impl<'t> ByteInput<'t> {
/// Return a new byte-based input reader for the given string.
pub fn new(text: &'t [u8], only_utf8: bool) -> ByteInput<'t> {
- ByteInput { text: text, only_utf8: only_utf8 }
+ ByteInput { text, only_utf8 }
}
}
diff --git a/vendor/regex/src/lib.rs b/vendor/regex/src/lib.rs
index 7f2dec815..3e3b0a007 100644
--- a/vendor/regex/src/lib.rs
+++ b/vendor/regex/src/lib.rs
@@ -628,7 +628,6 @@ pub use crate::re_builder::unicode::*;
#[cfg(feature = "std")]
pub use crate::re_set::unicode::*;
#[cfg(feature = "std")]
-#[cfg(feature = "std")]
pub use crate::re_unicode::{
escape, CaptureLocations, CaptureMatches, CaptureNames, Captures,
Locations, Match, Matches, NoExpand, Regex, Replacer, ReplacerRef, Split,
diff --git a/vendor/regex/src/literal/imp.rs b/vendor/regex/src/literal/imp.rs
index 82f050a0d..90b2f1160 100644
--- a/vendor/regex/src/literal/imp.rs
+++ b/vendor/regex/src/literal/imp.rs
@@ -57,10 +57,10 @@ impl LiteralSearcher {
fn new(lits: Literals, matcher: Matcher) -> Self {
let complete = lits.all_complete();
LiteralSearcher {
- complete: complete,
+ complete,
lcp: Memmem::new(lits.longest_common_prefix()),
lcs: Memmem::new(lits.longest_common_suffix()),
- matcher: matcher,
+ matcher,
}
}
diff --git a/vendor/regex/src/pattern.rs b/vendor/regex/src/pattern.rs
index b4ffd8e16..00549e510 100644
--- a/vendor/regex/src/pattern.rs
+++ b/vendor/regex/src/pattern.rs
@@ -15,7 +15,7 @@ impl<'r, 't> Pattern<'t> for &'r Regex {
fn into_searcher(self, haystack: &'t str) -> RegexSearcher<'r, 't> {
RegexSearcher {
- haystack: haystack,
+ haystack,
it: self.find_iter(haystack),
last_step_end: 0,
next_match: None,
diff --git a/vendor/regex/src/pikevm.rs b/vendor/regex/src/pikevm.rs
index 9a1424086..8c9eac2d3 100644
--- a/vendor/regex/src/pikevm.rs
+++ b/vendor/regex/src/pikevm.rs
@@ -100,7 +100,7 @@ impl<'r, I: Input> Fsm<'r, I> {
cache.clist.resize(prog.len(), prog.captures.len());
cache.nlist.resize(prog.len(), prog.captures.len());
let at = input.at(start);
- Fsm { prog: prog, stack: &mut cache.stack, input: input }.exec_(
+ Fsm { prog, stack: &mut cache.stack, input }.exec_(
&mut cache.clist,
&mut cache.nlist,
matches,
diff --git a/vendor/regex/src/prog.rs b/vendor/regex/src/prog.rs
index 475a8112a..c211f71d8 100644
--- a/vendor/regex/src/prog.rs
+++ b/vendor/regex/src/prog.rs
@@ -233,7 +233,7 @@ impl fmt::Debug for Program {
if pc == self.start {
write!(f, " (start)")?;
}
- write!(f, "\n")?;
+ writeln!(f)?;
}
Ok(())
}
diff --git a/vendor/regex/src/re_bytes.rs b/vendor/regex/src/re_bytes.rs
index ae55d6d25..d71969257 100644
--- a/vendor/regex/src/re_bytes.rs
+++ b/vendor/regex/src/re_bytes.rs
@@ -53,7 +53,7 @@ impl<'t> Match<'t> {
/// Creates a new match from the given haystack and byte offsets.
#[inline]
fn new(haystack: &'t [u8], start: usize, end: usize) -> Match<'t> {
- Match { text: haystack, start: start, end: end }
+ Match { text: haystack, start, end }
}
}
@@ -255,7 +255,7 @@ impl Regex {
pub fn captures<'t>(&self, text: &'t [u8]) -> Option<Captures<'t>> {
let mut locs = self.capture_locations();
self.captures_read_at(&mut locs, text, 0).map(move |_| Captures {
- text: text,
+ text,
locs: locs.0,
named_groups: self.0.capture_name_idx().clone(),
})
@@ -578,7 +578,7 @@ impl Regex {
/// context into consideration. For example, the `\A` anchor can only
/// match when `start == 0`.
pub fn is_match_at(&self, text: &[u8], start: usize) -> bool {
- self.shortest_match_at(text, start).is_some()
+ self.0.searcher().is_match_at(text, start)
}
/// Returns the same as find, but starts the search at the given
@@ -723,7 +723,7 @@ impl<'r, 't> Iterator for CaptureMatches<'r, 't> {
fn next(&mut self) -> Option<Captures<'t>> {
self.0.next().map(|locs| Captures {
text: self.0.text(),
- locs: locs,
+ locs,
named_groups: self.0.regex().capture_name_idx().clone(),
})
}
@@ -877,7 +877,7 @@ impl CaptureLocations {
self.0.pos(i)
}
- /// Returns the total number of capturing groups.
+ /// Returns the total number of capture groups (even if they didn't match).
///
/// This is always at least `1` since every regex has at least `1`
/// capturing group that corresponds to the entire match.
@@ -979,7 +979,7 @@ impl<'t> Captures<'t> {
expand_bytes(self, replacement, dst)
}
- /// Returns the number of captured groups.
+ /// Returns the total number of capture groups (even if they didn't match).
///
/// This is always at least `1`, since every regex has at least one capture
/// group that corresponds to the full match.
diff --git a/vendor/regex/src/re_set.rs b/vendor/regex/src/re_set.rs
index 73d59532e..a6d886d76 100644
--- a/vendor/regex/src/re_set.rs
+++ b/vendor/regex/src/re_set.rs
@@ -59,13 +59,45 @@ $(#[$doc_regexset_example])*
/// 1. Does any regex in the set match?
/// 2. If so, which regexes in the set match?
///
-/// As with the main `Regex` type, it is cheaper to ask (1) instead of (2)
-/// since the matching engines can stop after the first match is found.
+/// As with the main [`Regex`][crate::Regex] type, it is cheaper to ask (1)
+/// instead of (2) since the matching engines can stop after the first match
+/// is found.
///
-/// Other features like finding the location of successive matches or their
-/// sub-captures aren't supported. If you need this functionality, the
-/// recommended approach is to compile each regex in the set independently and
-/// selectively match them based on which regexes in the set matched.
+/// You cannot directly extract [`Match`][crate::Match] or
+/// [`Captures`][crate::Captures] objects from a regex set. If you need these
+/// operations, the recommended approach is to compile each pattern in the set
+/// independently and scan the exact same input a second time with those
+/// independently compiled patterns:
+///
+/// ```rust
+/// use regex::{Regex, RegexSet};
+///
+/// let patterns = ["foo", "bar"];
+/// // Both patterns will match different ranges of this string.
+/// let text = "barfoo";
+///
+/// // Compile a set matching any of our patterns.
+/// let set = RegexSet::new(&patterns).unwrap();
+/// // Compile each pattern independently.
+/// let regexes: Vec<_> = set.patterns().iter()
+/// .map(|pat| Regex::new(pat).unwrap())
+/// .collect();
+///
+/// // Match against the whole set first and identify the individual
+/// // matching patterns.
+/// let matches: Vec<&str> = set.matches(text).into_iter()
+/// // Dereference the match index to get the corresponding
+/// // compiled pattern.
+/// .map(|match_idx| &regexes[match_idx])
+/// // To get match locations or any other info, we then have to search
+/// // the exact same text again, using our separately-compiled pattern.
+/// .map(|pat| pat.find(text).unwrap().as_str())
+/// .collect();
+///
+/// // Matches arrive in the order the constituent patterns were declared,
+/// // not the order they appear in the input.
+/// assert_eq!(vec!["foo", "bar"], matches);
+/// ```
///
/// # Performance
///
diff --git a/vendor/regex/src/re_trait.rs b/vendor/regex/src/re_trait.rs
index 680aa5459..d0c717df5 100644
--- a/vendor/regex/src/re_trait.rs
+++ b/vendor/regex/src/re_trait.rs
@@ -74,8 +74,19 @@ impl<'c> Iterator for SubCapturesPosIter<'c> {
self.idx += 1;
x
}
+
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ let len = self.locs.len() - self.idx;
+ (len, Some(len))
+ }
+
+ fn count(self) -> usize {
+ self.len()
+ }
}
+impl<'c> ExactSizeIterator for SubCapturesPosIter<'c> {}
+
impl<'c> FusedIterator for SubCapturesPosIter<'c> {}
/// `RegularExpression` describes types that can implement regex searching.
@@ -139,7 +150,7 @@ pub trait RegularExpression: Sized + fmt::Debug {
/// Returns an iterator over all non-overlapping successive leftmost-first
/// matches.
fn find_iter(self, text: &Self::Text) -> Matches<'_, Self> {
- Matches { re: self, text: text, last_end: 0, last_match: None }
+ Matches { re: self, text, last_end: 0, last_match: None }
}
/// Returns an iterator over all non-overlapping successive leftmost-first
diff --git a/vendor/regex/src/re_unicode.rs b/vendor/regex/src/re_unicode.rs
index e4871a621..60d81a7d9 100644
--- a/vendor/regex/src/re_unicode.rs
+++ b/vendor/regex/src/re_unicode.rs
@@ -61,7 +61,7 @@ impl<'t> Match<'t> {
/// Creates a new match from the given haystack and byte offsets.
#[inline]
fn new(haystack: &'t str, start: usize, end: usize) -> Match<'t> {
- Match { text: haystack, start: start, end: end }
+ Match { text: haystack, start, end }
}
}
@@ -129,7 +129,7 @@ impl<'t> From<Match<'t>> for Range<usize> {
/// assert!(haystack.contains(&re));
/// assert_eq!(haystack.find(&re), Some(1));
/// assert_eq!(haystack.match_indices(&re).collect::<Vec<_>>(),
-/// vec![(1, 4), (5, 8)]);
+/// vec![(1, "111"), (5, "222")]);
/// assert_eq!(haystack.split(&re).collect::<Vec<_>>(), vec!["a", "b", "c"]);
/// ```
#[derive(Clone)]
@@ -311,7 +311,7 @@ impl Regex {
pub fn captures<'t>(&self, text: &'t str) -> Option<Captures<'t>> {
let mut locs = self.capture_locations();
self.captures_read_at(&mut locs, text, 0).map(move |_| Captures {
- text: text,
+ text,
locs: locs.0,
named_groups: self.0.capture_name_idx().clone(),
})
@@ -636,7 +636,7 @@ impl Regex {
/// context into consideration. For example, the `\A` anchor can only
/// match when `start == 0`.
pub fn is_match_at(&self, text: &str, start: usize) -> bool {
- self.shortest_match_at(text, start).is_some()
+ self.0.searcher_str().is_match_at(text, start)
}
/// Returns the same as find, but starts the search at the given
@@ -887,7 +887,7 @@ impl CaptureLocations {
self.0.pos(i)
}
- /// Returns the total number of capturing groups.
+ /// Returns the total number of capture groups (even if they didn't match).
///
/// This is always at least `1` since every regex has at least `1`
/// capturing group that corresponds to the entire match.
@@ -989,7 +989,7 @@ impl<'t> Captures<'t> {
expand_str(self, replacement, dst)
}
- /// Returns the number of captured groups.
+ /// Returns the total number of capture groups (even if they didn't match).
///
/// This is always at least `1`, since every regex has at least one capture
/// group that corresponds to the full match.
@@ -1092,8 +1092,18 @@ impl<'c, 't> Iterator for SubCaptureMatches<'c, 't> {
.next()
.map(|cap| cap.map(|(s, e)| Match::new(self.caps.text, s, e)))
}
+
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ self.it.size_hint()
+ }
+
+ fn count(self) -> usize {
+ self.it.count()
+ }
}
+impl<'c, 't> ExactSizeIterator for SubCaptureMatches<'c, 't> {}
+
impl<'c, 't> FusedIterator for SubCaptureMatches<'c, 't> {}
/// An iterator that yields all non-overlapping capture groups matching a
@@ -1114,7 +1124,7 @@ impl<'r, 't> Iterator for CaptureMatches<'r, 't> {
fn next(&mut self) -> Option<Captures<'t>> {
self.0.next().map(|locs| Captures {
text: self.0.text(),
- locs: locs,
+ locs,
named_groups: self.0.regex().capture_name_idx().clone(),
})
}
diff --git a/vendor/regex/src/utf8.rs b/vendor/regex/src/utf8.rs
index 6e0608fdb..2dfd2c0d1 100644
--- a/vendor/regex/src/utf8.rs
+++ b/vendor/regex/src/utf8.rs
@@ -108,7 +108,7 @@ pub fn decode_utf8(src: &[u8]) -> Option<(char, usize)> {
| ((b2 & !TAG_CONT) as u32) << 6
| ((b3 & !TAG_CONT) as u32);
match cp {
- 0x10000..=0x10FFFF => char::from_u32(cp).map(|cp| (cp, 4)),
+ 0x10000..=0x0010_FFFF => char::from_u32(cp).map(|cp| (cp, 4)),
_ => None,
}
}