diff options
Diffstat (limited to 'vendor/regex-automata/src/util/prefilter.rs')
-rw-r--r-- | vendor/regex-automata/src/util/prefilter.rs | 281 |
1 files changed, 0 insertions, 281 deletions
diff --git a/vendor/regex-automata/src/util/prefilter.rs b/vendor/regex-automata/src/util/prefilter.rs deleted file mode 100644 index 5fe151524..000000000 --- a/vendor/regex-automata/src/util/prefilter.rs +++ /dev/null @@ -1,281 +0,0 @@ -use crate::Match; - -/// A candidate is the result of running a prefilter on a haystack at a -/// particular position. The result is one of no match, a confirmed match or -/// a possible match. -/// -/// When no match is returned, the prefilter is guaranteeing that no possible -/// match can be found in the haystack, and the caller may trust this. That is, -/// all correct prefilters must never report false negatives. -/// -/// In some cases, a prefilter can confirm a match very quickly, in which case, -/// the caller may use this to stop what it's doing and report the match. In -/// this case, prefilter implementations must never report a false positive. -/// In other cases, the prefilter can only report a potential match, in which -/// case the callers must attempt to confirm the match. In this case, prefilter -/// implementations are permitted to return false positives. -#[derive(Clone, Debug)] -pub enum Candidate { - /// The prefilter reports that no match is possible. Prefilter - /// implementations will never report false negatives. - None, - /// The prefilter reports that a match has been confirmed at the provided - /// byte offsets. When this variant is reported, the prefilter is - /// guaranteeing a match. No false positives are permitted. - Match(Match), - /// The prefilter reports that a match *may* start at the given position. - /// When this variant is reported, it may correspond to a false positive. - PossibleStartOfMatch(usize), -} - -impl Candidate { - /// Convert this candidate into an option. This is useful when callers do - /// not distinguish between true positives and false positives (i.e., the - /// caller must always confirm the match in order to update some other - /// state). - /// - /// The byte offset in the option returned corresponds to the starting - /// position of the possible match. - pub fn into_option(self) -> Option<usize> { - match self { - Candidate::None => None, - Candidate::Match(ref m) => Some(m.start()), - Candidate::PossibleStartOfMatch(start) => Some(start), - } - } -} - -/// A prefilter describes the behavior of fast literal scanners for quickly -/// skipping past bytes in the haystack that we know cannot possibly -/// participate in a match. -pub trait Prefilter: core::fmt::Debug { - /// Returns the next possible match candidate. This may yield false - /// positives, so callers must confirm a match starting at the position - /// returned. This, however, must never produce false negatives. That is, - /// this must, at minimum, return the starting position of the next match - /// in the given haystack after or at the given position. - fn next_candidate( - &self, - state: &mut State, - haystack: &[u8], - at: usize, - ) -> Candidate; - - /// Returns the approximate total amount of heap used by this prefilter, in - /// units of bytes. - fn heap_bytes(&self) -> usize; - - /// Returns true if and only if this prefilter may return false positives - /// via the `Candidate::PossibleStartOfMatch` variant. This is most useful - /// when false positives are not posssible (in which case, implementations - /// should return false), which may allow completely avoiding heavier regex - /// machinery when the prefilter can quickly confirm its own matches. - /// - /// By default, this returns true, which is conservative; it is always - /// correct to return `true`. Returning `false` here and reporting a false - /// positive will result in incorrect searches. - fn reports_false_positives(&self) -> bool { - true - } -} - -impl<'a, P: Prefilter + ?Sized> Prefilter for &'a P { - #[inline] - fn next_candidate( - &self, - state: &mut State, - haystack: &[u8], - at: usize, - ) -> Candidate { - (**self).next_candidate(state, haystack, at) - } - - fn heap_bytes(&self) -> usize { - (**self).heap_bytes() - } - - fn reports_false_positives(&self) -> bool { - (**self).reports_false_positives() - } -} - -#[derive(Clone)] -pub struct Scanner<'p> { - prefilter: &'p dyn Prefilter, - state: State, -} - -impl<'p> Scanner<'p> { - pub fn new(prefilter: &'p dyn Prefilter) -> Scanner<'p> { - Scanner { prefilter, state: State::new() } - } - - pub(crate) fn is_effective(&mut self, at: usize) -> bool { - self.state.is_effective(at) - } - - pub(crate) fn reports_false_positives(&self) -> bool { - self.prefilter.reports_false_positives() - } - - pub(crate) fn next_candidate( - &mut self, - bytes: &[u8], - at: usize, - ) -> Candidate { - let cand = self.prefilter.next_candidate(&mut self.state, bytes, at); - match cand { - Candidate::None => { - self.state.update_skipped_bytes(bytes.len() - at); - } - Candidate::Match(ref m) => { - self.state.update_skipped_bytes(m.start() - at); - } - Candidate::PossibleStartOfMatch(i) => { - self.state.update_skipped_bytes(i - at); - } - } - cand - } -} - -impl<'p> core::fmt::Debug for Scanner<'p> { - fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { - f.debug_struct("Scanner").field("state", &self.state).finish() - } -} - -/// State tracks state associated with the effectiveness of a -/// prefilter. It is used to track how many bytes, on average, are skipped by -/// the prefilter. If this average dips below a certain threshold over time, -/// then the state renders the prefilter inert and stops using it. -/// -/// A prefilter state should be created for each search. (Where creating an -/// iterator via, e.g., `find_iter`, is treated as a single search.) -#[derive(Clone, Debug)] -pub struct State { - /// The number of skips that has been executed. - skips: usize, - /// The total number of bytes that have been skipped. - skipped: usize, - /// Once this heuristic has been deemed permanently ineffective, it will be - /// inert throughout the rest of its lifetime. This serves as a cheap way - /// to check inertness. - inert: bool, - /// The last (absolute) position at which a prefilter scanned to. - /// Prefilters can use this position to determine whether to re-scan or - /// not. - /// - /// Unlike other things that impact effectiveness, this is a fleeting - /// condition. That is, a prefilter can be considered ineffective if it is - /// at a position before `last_scan_at`, but can become effective again - /// once the search moves past `last_scan_at`. - /// - /// The utility of this is to both avoid additional overhead from calling - /// the prefilter and to avoid quadratic behavior. This ensures that a - /// prefilter will scan any particular byte at most once. (Note that some - /// prefilters, like the start-byte prefilter, do not need to use this - /// field at all, since it only looks for starting bytes.) - last_scan_at: usize, -} - -impl State { - /// The minimum number of skip attempts to try before considering whether - /// a prefilter is effective or not. - const MIN_SKIPS: usize = 40; - - /// The minimum amount of bytes that skipping must average. - /// - /// That is, after MIN_SKIPS have occurred, if the average number of bytes - /// skipped ever falls below MIN_AVG_SKIP, then the prefilter will be - /// rendered inert. - const MIN_AVG_SKIP: usize = 16; - - /// Create a fresh prefilter state. - pub fn new() -> State { - State { skips: 0, skipped: 0, inert: false, last_scan_at: 0 } - } - - /// Updates the position at which the last scan stopped. This may be - /// greater than the position of the last candidate reported. For example, - /// searching for the byte `z` in `abczdef` for the pattern `abcz` will - /// report a candidate at position `0`, but the end of its last scan will - /// be at position `3`. - /// - /// This position factors into the effectiveness of this prefilter. If the - /// current position is less than the last position at which a scan ended, - /// then the prefilter should not be re-run until the search moves past - /// that position. - /// - /// It is always correct to never update the last scan position. In fact, - /// it is also always correct to set the last scan position to an arbitrary - /// value. The key is setting it to a position in the future at which it - /// makes sense to restart the prefilter. - pub fn update_last_scan(&mut self, at: usize) { - if at > self.last_scan_at { - self.last_scan_at = at; - } - } - - /// Return true if and only if this state indicates that a prefilter is - /// still effective. If the prefilter is not effective, then this state - /// is rendered "inert." At which point, all subsequent calls to - /// `is_effective` on this state will return `false`. - /// - /// `at` should correspond to the current starting position of the search. - /// - /// Callers typically do not need to use this, as it represents the - /// default implementation of - /// [`Prefilter::is_effective`](trait.Prefilter.html#tymethod.is_effective). - fn is_effective(&mut self, at: usize) -> bool { - if self.inert { - return false; - } - if at < self.last_scan_at { - return false; - } - if self.skips < State::MIN_SKIPS { - return true; - } - - if self.skipped >= State::MIN_AVG_SKIP * self.skips { - return true; - } - - // We're inert. - self.inert = true; - false - } - - /// Update this state with the number of bytes skipped on the last - /// invocation of the prefilter. - fn update_skipped_bytes(&mut self, skipped: usize) { - self.skips += 1; - self.skipped += skipped; - } -} - -/// A `Prefilter` implementation that reports a possible match at every -/// position. -/// -/// This should generally not be used as an actual prefilter. It is only -/// useful when one needs to represent the absence of a prefilter in a generic -/// context. For example, a [`dfa::regex::Regex`](crate::dfa::regex::Regex) -/// uses this prefilter by default to indicate that no prefilter should be -/// used. -/// -/// A `None` prefilter value cannot be constructed. -#[derive(Clone, Debug)] -pub struct None { - _priv: (), -} - -impl Prefilter for None { - fn next_candidate(&self, _: &mut State, _: &[u8], at: usize) -> Candidate { - Candidate::PossibleStartOfMatch(at) - } - - fn heap_bytes(&self) -> usize { - 0 - } -} |