summaryrefslogtreecommitdiffstats
path: root/vendor/regex-automata/src/util/prefilter.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/regex-automata/src/util/prefilter.rs')
-rw-r--r--vendor/regex-automata/src/util/prefilter.rs281
1 files changed, 0 insertions, 281 deletions
diff --git a/vendor/regex-automata/src/util/prefilter.rs b/vendor/regex-automata/src/util/prefilter.rs
deleted file mode 100644
index 5fe151524..000000000
--- a/vendor/regex-automata/src/util/prefilter.rs
+++ /dev/null
@@ -1,281 +0,0 @@
-use crate::Match;
-
-/// A candidate is the result of running a prefilter on a haystack at a
-/// particular position. The result is one of no match, a confirmed match or
-/// a possible match.
-///
-/// When no match is returned, the prefilter is guaranteeing that no possible
-/// match can be found in the haystack, and the caller may trust this. That is,
-/// all correct prefilters must never report false negatives.
-///
-/// In some cases, a prefilter can confirm a match very quickly, in which case,
-/// the caller may use this to stop what it's doing and report the match. In
-/// this case, prefilter implementations must never report a false positive.
-/// In other cases, the prefilter can only report a potential match, in which
-/// case the callers must attempt to confirm the match. In this case, prefilter
-/// implementations are permitted to return false positives.
-#[derive(Clone, Debug)]
-pub enum Candidate {
- /// The prefilter reports that no match is possible. Prefilter
- /// implementations will never report false negatives.
- None,
- /// The prefilter reports that a match has been confirmed at the provided
- /// byte offsets. When this variant is reported, the prefilter is
- /// guaranteeing a match. No false positives are permitted.
- Match(Match),
- /// The prefilter reports that a match *may* start at the given position.
- /// When this variant is reported, it may correspond to a false positive.
- PossibleStartOfMatch(usize),
-}
-
-impl Candidate {
- /// Convert this candidate into an option. This is useful when callers do
- /// not distinguish between true positives and false positives (i.e., the
- /// caller must always confirm the match in order to update some other
- /// state).
- ///
- /// The byte offset in the option returned corresponds to the starting
- /// position of the possible match.
- pub fn into_option(self) -> Option<usize> {
- match self {
- Candidate::None => None,
- Candidate::Match(ref m) => Some(m.start()),
- Candidate::PossibleStartOfMatch(start) => Some(start),
- }
- }
-}
-
-/// A prefilter describes the behavior of fast literal scanners for quickly
-/// skipping past bytes in the haystack that we know cannot possibly
-/// participate in a match.
-pub trait Prefilter: core::fmt::Debug {
- /// Returns the next possible match candidate. This may yield false
- /// positives, so callers must confirm a match starting at the position
- /// returned. This, however, must never produce false negatives. That is,
- /// this must, at minimum, return the starting position of the next match
- /// in the given haystack after or at the given position.
- fn next_candidate(
- &self,
- state: &mut State,
- haystack: &[u8],
- at: usize,
- ) -> Candidate;
-
- /// Returns the approximate total amount of heap used by this prefilter, in
- /// units of bytes.
- fn heap_bytes(&self) -> usize;
-
- /// Returns true if and only if this prefilter may return false positives
- /// via the `Candidate::PossibleStartOfMatch` variant. This is most useful
- /// when false positives are not posssible (in which case, implementations
- /// should return false), which may allow completely avoiding heavier regex
- /// machinery when the prefilter can quickly confirm its own matches.
- ///
- /// By default, this returns true, which is conservative; it is always
- /// correct to return `true`. Returning `false` here and reporting a false
- /// positive will result in incorrect searches.
- fn reports_false_positives(&self) -> bool {
- true
- }
-}
-
-impl<'a, P: Prefilter + ?Sized> Prefilter for &'a P {
- #[inline]
- fn next_candidate(
- &self,
- state: &mut State,
- haystack: &[u8],
- at: usize,
- ) -> Candidate {
- (**self).next_candidate(state, haystack, at)
- }
-
- fn heap_bytes(&self) -> usize {
- (**self).heap_bytes()
- }
-
- fn reports_false_positives(&self) -> bool {
- (**self).reports_false_positives()
- }
-}
-
-#[derive(Clone)]
-pub struct Scanner<'p> {
- prefilter: &'p dyn Prefilter,
- state: State,
-}
-
-impl<'p> Scanner<'p> {
- pub fn new(prefilter: &'p dyn Prefilter) -> Scanner<'p> {
- Scanner { prefilter, state: State::new() }
- }
-
- pub(crate) fn is_effective(&mut self, at: usize) -> bool {
- self.state.is_effective(at)
- }
-
- pub(crate) fn reports_false_positives(&self) -> bool {
- self.prefilter.reports_false_positives()
- }
-
- pub(crate) fn next_candidate(
- &mut self,
- bytes: &[u8],
- at: usize,
- ) -> Candidate {
- let cand = self.prefilter.next_candidate(&mut self.state, bytes, at);
- match cand {
- Candidate::None => {
- self.state.update_skipped_bytes(bytes.len() - at);
- }
- Candidate::Match(ref m) => {
- self.state.update_skipped_bytes(m.start() - at);
- }
- Candidate::PossibleStartOfMatch(i) => {
- self.state.update_skipped_bytes(i - at);
- }
- }
- cand
- }
-}
-
-impl<'p> core::fmt::Debug for Scanner<'p> {
- fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
- f.debug_struct("Scanner").field("state", &self.state).finish()
- }
-}
-
-/// State tracks state associated with the effectiveness of a
-/// prefilter. It is used to track how many bytes, on average, are skipped by
-/// the prefilter. If this average dips below a certain threshold over time,
-/// then the state renders the prefilter inert and stops using it.
-///
-/// A prefilter state should be created for each search. (Where creating an
-/// iterator via, e.g., `find_iter`, is treated as a single search.)
-#[derive(Clone, Debug)]
-pub struct State {
- /// The number of skips that has been executed.
- skips: usize,
- /// The total number of bytes that have been skipped.
- skipped: usize,
- /// Once this heuristic has been deemed permanently ineffective, it will be
- /// inert throughout the rest of its lifetime. This serves as a cheap way
- /// to check inertness.
- inert: bool,
- /// The last (absolute) position at which a prefilter scanned to.
- /// Prefilters can use this position to determine whether to re-scan or
- /// not.
- ///
- /// Unlike other things that impact effectiveness, this is a fleeting
- /// condition. That is, a prefilter can be considered ineffective if it is
- /// at a position before `last_scan_at`, but can become effective again
- /// once the search moves past `last_scan_at`.
- ///
- /// The utility of this is to both avoid additional overhead from calling
- /// the prefilter and to avoid quadratic behavior. This ensures that a
- /// prefilter will scan any particular byte at most once. (Note that some
- /// prefilters, like the start-byte prefilter, do not need to use this
- /// field at all, since it only looks for starting bytes.)
- last_scan_at: usize,
-}
-
-impl State {
- /// The minimum number of skip attempts to try before considering whether
- /// a prefilter is effective or not.
- const MIN_SKIPS: usize = 40;
-
- /// The minimum amount of bytes that skipping must average.
- ///
- /// That is, after MIN_SKIPS have occurred, if the average number of bytes
- /// skipped ever falls below MIN_AVG_SKIP, then the prefilter will be
- /// rendered inert.
- const MIN_AVG_SKIP: usize = 16;
-
- /// Create a fresh prefilter state.
- pub fn new() -> State {
- State { skips: 0, skipped: 0, inert: false, last_scan_at: 0 }
- }
-
- /// Updates the position at which the last scan stopped. This may be
- /// greater than the position of the last candidate reported. For example,
- /// searching for the byte `z` in `abczdef` for the pattern `abcz` will
- /// report a candidate at position `0`, but the end of its last scan will
- /// be at position `3`.
- ///
- /// This position factors into the effectiveness of this prefilter. If the
- /// current position is less than the last position at which a scan ended,
- /// then the prefilter should not be re-run until the search moves past
- /// that position.
- ///
- /// It is always correct to never update the last scan position. In fact,
- /// it is also always correct to set the last scan position to an arbitrary
- /// value. The key is setting it to a position in the future at which it
- /// makes sense to restart the prefilter.
- pub fn update_last_scan(&mut self, at: usize) {
- if at > self.last_scan_at {
- self.last_scan_at = at;
- }
- }
-
- /// Return true if and only if this state indicates that a prefilter is
- /// still effective. If the prefilter is not effective, then this state
- /// is rendered "inert." At which point, all subsequent calls to
- /// `is_effective` on this state will return `false`.
- ///
- /// `at` should correspond to the current starting position of the search.
- ///
- /// Callers typically do not need to use this, as it represents the
- /// default implementation of
- /// [`Prefilter::is_effective`](trait.Prefilter.html#tymethod.is_effective).
- fn is_effective(&mut self, at: usize) -> bool {
- if self.inert {
- return false;
- }
- if at < self.last_scan_at {
- return false;
- }
- if self.skips < State::MIN_SKIPS {
- return true;
- }
-
- if self.skipped >= State::MIN_AVG_SKIP * self.skips {
- return true;
- }
-
- // We're inert.
- self.inert = true;
- false
- }
-
- /// Update this state with the number of bytes skipped on the last
- /// invocation of the prefilter.
- fn update_skipped_bytes(&mut self, skipped: usize) {
- self.skips += 1;
- self.skipped += skipped;
- }
-}
-
-/// A `Prefilter` implementation that reports a possible match at every
-/// position.
-///
-/// This should generally not be used as an actual prefilter. It is only
-/// useful when one needs to represent the absence of a prefilter in a generic
-/// context. For example, a [`dfa::regex::Regex`](crate::dfa::regex::Regex)
-/// uses this prefilter by default to indicate that no prefilter should be
-/// used.
-///
-/// A `None` prefilter value cannot be constructed.
-#[derive(Clone, Debug)]
-pub struct None {
- _priv: (),
-}
-
-impl Prefilter for None {
- fn next_candidate(&self, _: &mut State, _: &[u8], at: usize) -> Candidate {
- Candidate::PossibleStartOfMatch(at)
- }
-
- fn heap_bytes(&self) -> usize {
- 0
- }
-}