summaryrefslogtreecommitdiffstats
path: root/third_party/rust/regex-automata/src/meta/wrappers.rs
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
commit26a029d407be480d791972afb5975cf62c9360a6 (patch)
treef435a8308119effd964b339f76abb83a57c29483 /third_party/rust/regex-automata/src/meta/wrappers.rs
parentInitial commit. (diff)
downloadfirefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz
firefox-26a029d407be480d791972afb5975cf62c9360a6.zip
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/regex-automata/src/meta/wrappers.rs')
-rw-r--r--third_party/rust/regex-automata/src/meta/wrappers.rs1348
1 files changed, 1348 insertions, 0 deletions
diff --git a/third_party/rust/regex-automata/src/meta/wrappers.rs b/third_party/rust/regex-automata/src/meta/wrappers.rs
new file mode 100644
index 0000000000..08110d9bb8
--- /dev/null
+++ b/third_party/rust/regex-automata/src/meta/wrappers.rs
@@ -0,0 +1,1348 @@
+/*!
+This module contains a boat load of wrappers around each of our internal regex
+engines. They encapsulate a few things:
+
+1. The wrappers manage the conditional existence of the regex engine. Namely,
+the PikeVM is the only required regex engine. The rest are optional. These
+wrappers present a uniform API regardless of which engines are available. And
+availability might be determined by compile time features or by dynamic
+configuration via `meta::Config`. Encapsulating the conditional compilation
+features is in particular a huge simplification for the higher level code that
+composes these engines.
+2. The wrappers manage construction of each engine, including skipping it if
+the engine is unavailable or configured to not be used.
+3. The wrappers manage whether an engine *can* be used for a particular
+search configuration. For example, `BoundedBacktracker::get` only returns a
+backtracking engine when the haystack is bigger than the maximum supported
+length. The wrappers also sometimes take a position on when an engine *ought*
+to be used, but only in cases where the logic is extremely local to the engine
+itself. Otherwise, things like "choose between the backtracker and the one-pass
+DFA" are managed by the higher level meta strategy code.
+
+There are also corresponding wrappers for the various `Cache` types for each
+regex engine that needs them. If an engine is unavailable or not used, then a
+cache for it will *not* actually be allocated.
+*/
+
+use alloc::vec::Vec;
+
+use crate::{
+ meta::{
+ error::{BuildError, RetryError, RetryFailError},
+ regex::RegexInfo,
+ },
+ nfa::thompson::{pikevm, NFA},
+ util::{prefilter::Prefilter, primitives::NonMaxUsize},
+ HalfMatch, Input, Match, MatchKind, PatternID, PatternSet,
+};
+
+#[cfg(feature = "dfa-build")]
+use crate::dfa;
+#[cfg(feature = "dfa-onepass")]
+use crate::dfa::onepass;
+#[cfg(feature = "hybrid")]
+use crate::hybrid;
+#[cfg(feature = "nfa-backtrack")]
+use crate::nfa::thompson::backtrack;
+
+#[derive(Debug)]
+pub(crate) struct PikeVM(PikeVMEngine);
+
+impl PikeVM {
+ pub(crate) fn new(
+ info: &RegexInfo,
+ pre: Option<Prefilter>,
+ nfa: &NFA,
+ ) -> Result<PikeVM, BuildError> {
+ PikeVMEngine::new(info, pre, nfa).map(PikeVM)
+ }
+
+ pub(crate) fn create_cache(&self) -> PikeVMCache {
+ PikeVMCache::new(self)
+ }
+
+ #[cfg_attr(feature = "perf-inline", inline(always))]
+ pub(crate) fn get(&self) -> &PikeVMEngine {
+ &self.0
+ }
+}
+
+#[derive(Debug)]
+pub(crate) struct PikeVMEngine(pikevm::PikeVM);
+
+impl PikeVMEngine {
+ pub(crate) fn new(
+ info: &RegexInfo,
+ pre: Option<Prefilter>,
+ nfa: &NFA,
+ ) -> Result<PikeVMEngine, BuildError> {
+ let pikevm_config = pikevm::Config::new()
+ .match_kind(info.config().get_match_kind())
+ .prefilter(pre);
+ let engine = pikevm::Builder::new()
+ .configure(pikevm_config)
+ .build_from_nfa(nfa.clone())
+ .map_err(BuildError::nfa)?;
+ debug!("PikeVM built");
+ Ok(PikeVMEngine(engine))
+ }
+
+ #[cfg_attr(feature = "perf-inline", inline(always))]
+ pub(crate) fn is_match(
+ &self,
+ cache: &mut PikeVMCache,
+ input: &Input<'_>,
+ ) -> bool {
+ self.0.is_match(cache.0.as_mut().unwrap(), input.clone())
+ }
+
+ #[cfg_attr(feature = "perf-inline", inline(always))]
+ pub(crate) fn search_slots(
+ &self,
+ cache: &mut PikeVMCache,
+ input: &Input<'_>,
+ slots: &mut [Option<NonMaxUsize>],
+ ) -> Option<PatternID> {
+ self.0.search_slots(cache.0.as_mut().unwrap(), input, slots)
+ }
+
+ #[cfg_attr(feature = "perf-inline", inline(always))]
+ pub(crate) fn which_overlapping_matches(
+ &self,
+ cache: &mut PikeVMCache,
+ input: &Input<'_>,
+ patset: &mut PatternSet,
+ ) {
+ self.0.which_overlapping_matches(
+ cache.0.as_mut().unwrap(),
+ input,
+ patset,
+ )
+ }
+}
+
+#[derive(Clone, Debug)]
+pub(crate) struct PikeVMCache(Option<pikevm::Cache>);
+
+impl PikeVMCache {
+ pub(crate) fn none() -> PikeVMCache {
+ PikeVMCache(None)
+ }
+
+ pub(crate) fn new(builder: &PikeVM) -> PikeVMCache {
+ PikeVMCache(Some(builder.get().0.create_cache()))
+ }
+
+ pub(crate) fn reset(&mut self, builder: &PikeVM) {
+ self.0.as_mut().unwrap().reset(&builder.get().0);
+ }
+
+ pub(crate) fn memory_usage(&self) -> usize {
+ self.0.as_ref().map_or(0, |c| c.memory_usage())
+ }
+}
+
+#[derive(Debug)]
+pub(crate) struct BoundedBacktracker(Option<BoundedBacktrackerEngine>);
+
+impl BoundedBacktracker {
+ pub(crate) fn new(
+ info: &RegexInfo,
+ pre: Option<Prefilter>,
+ nfa: &NFA,
+ ) -> Result<BoundedBacktracker, BuildError> {
+ BoundedBacktrackerEngine::new(info, pre, nfa).map(BoundedBacktracker)
+ }
+
+ pub(crate) fn create_cache(&self) -> BoundedBacktrackerCache {
+ BoundedBacktrackerCache::new(self)
+ }
+
+ #[cfg_attr(feature = "perf-inline", inline(always))]
+ pub(crate) fn get(
+ &self,
+ input: &Input<'_>,
+ ) -> Option<&BoundedBacktrackerEngine> {
+ let engine = self.0.as_ref()?;
+ // It is difficult to make the backtracker give up early if it is
+ // guaranteed to eventually wind up in a match state. This is because
+ // of the greedy nature of a backtracker: it just blindly mushes
+ // forward. Every other regex engine is able to give up more quickly,
+ // so even if the backtracker might be able to zip through faster than
+ // (say) the PikeVM, we prefer the theoretical benefit that some other
+ // engine might be able to scan much less of the haystack than the
+ // backtracker.
+ //
+ // Now, if the haystack is really short already, then we allow the
+ // backtracker to run. (This hasn't been litigated quantitatively with
+ // benchmarks. Just a hunch.)
+ if input.get_earliest() && input.haystack().len() > 128 {
+ return None;
+ }
+ // If the backtracker is just going to return an error because the
+ // haystack is too long, then obviously do not use it.
+ if input.get_span().len() > engine.max_haystack_len() {
+ return None;
+ }
+ Some(engine)
+ }
+}
+
+#[derive(Debug)]
+pub(crate) struct BoundedBacktrackerEngine(
+ #[cfg(feature = "nfa-backtrack")] backtrack::BoundedBacktracker,
+ #[cfg(not(feature = "nfa-backtrack"))] (),
+);
+
+impl BoundedBacktrackerEngine {
+ pub(crate) fn new(
+ info: &RegexInfo,
+ pre: Option<Prefilter>,
+ nfa: &NFA,
+ ) -> Result<Option<BoundedBacktrackerEngine>, BuildError> {
+ #[cfg(feature = "nfa-backtrack")]
+ {
+ if !info.config().get_backtrack()
+ || info.config().get_match_kind() != MatchKind::LeftmostFirst
+ {
+ return Ok(None);
+ }
+ let backtrack_config = backtrack::Config::new().prefilter(pre);
+ let engine = backtrack::Builder::new()
+ .configure(backtrack_config)
+ .build_from_nfa(nfa.clone())
+ .map_err(BuildError::nfa)?;
+ debug!("BoundedBacktracker built");
+ Ok(Some(BoundedBacktrackerEngine(engine)))
+ }
+ #[cfg(not(feature = "nfa-backtrack"))]
+ {
+ Ok(None)
+ }
+ }
+
+ #[cfg_attr(feature = "perf-inline", inline(always))]
+ pub(crate) fn is_match(
+ &self,
+ cache: &mut BoundedBacktrackerCache,
+ input: &Input<'_>,
+ ) -> bool {
+ #[cfg(feature = "nfa-backtrack")]
+ {
+ // OK because we only permit access to this engine when we know
+ // the haystack is short enough for the backtracker to run without
+ // reporting an error.
+ self.0
+ .try_is_match(cache.0.as_mut().unwrap(), input.clone())
+ .unwrap()
+ }
+ #[cfg(not(feature = "nfa-backtrack"))]
+ {
+ // Impossible to reach because this engine is never constructed
+ // if the requisite features aren't enabled.
+ unreachable!()
+ }
+ }
+
+ #[cfg_attr(feature = "perf-inline", inline(always))]
+ pub(crate) fn search_slots(
+ &self,
+ cache: &mut BoundedBacktrackerCache,
+ input: &Input<'_>,
+ slots: &mut [Option<NonMaxUsize>],
+ ) -> Option<PatternID> {
+ #[cfg(feature = "nfa-backtrack")]
+ {
+ // OK because we only permit access to this engine when we know
+ // the haystack is short enough for the backtracker to run without
+ // reporting an error.
+ self.0
+ .try_search_slots(cache.0.as_mut().unwrap(), input, slots)
+ .unwrap()
+ }
+ #[cfg(not(feature = "nfa-backtrack"))]
+ {
+ // Impossible to reach because this engine is never constructed
+ // if the requisite features aren't enabled.
+ unreachable!()
+ }
+ }
+
+ #[cfg_attr(feature = "perf-inline", inline(always))]
+ fn max_haystack_len(&self) -> usize {
+ #[cfg(feature = "nfa-backtrack")]
+ {
+ self.0.max_haystack_len()
+ }
+ #[cfg(not(feature = "nfa-backtrack"))]
+ {
+ // Impossible to reach because this engine is never constructed
+ // if the requisite features aren't enabled.
+ unreachable!()
+ }
+ }
+}
+
+#[derive(Clone, Debug)]
+pub(crate) struct BoundedBacktrackerCache(
+ #[cfg(feature = "nfa-backtrack")] Option<backtrack::Cache>,
+ #[cfg(not(feature = "nfa-backtrack"))] (),
+);
+
+impl BoundedBacktrackerCache {
+ pub(crate) fn none() -> BoundedBacktrackerCache {
+ #[cfg(feature = "nfa-backtrack")]
+ {
+ BoundedBacktrackerCache(None)
+ }
+ #[cfg(not(feature = "nfa-backtrack"))]
+ {
+ BoundedBacktrackerCache(())
+ }
+ }
+
+ pub(crate) fn new(
+ builder: &BoundedBacktracker,
+ ) -> BoundedBacktrackerCache {
+ #[cfg(feature = "nfa-backtrack")]
+ {
+ BoundedBacktrackerCache(
+ builder.0.as_ref().map(|e| e.0.create_cache()),
+ )
+ }
+ #[cfg(not(feature = "nfa-backtrack"))]
+ {
+ BoundedBacktrackerCache(())
+ }
+ }
+
+ pub(crate) fn reset(&mut self, builder: &BoundedBacktracker) {
+ #[cfg(feature = "nfa-backtrack")]
+ if let Some(ref e) = builder.0 {
+ self.0.as_mut().unwrap().reset(&e.0);
+ }
+ }
+
+ pub(crate) fn memory_usage(&self) -> usize {
+ #[cfg(feature = "nfa-backtrack")]
+ {
+ self.0.as_ref().map_or(0, |c| c.memory_usage())
+ }
+ #[cfg(not(feature = "nfa-backtrack"))]
+ {
+ 0
+ }
+ }
+}
+
+#[derive(Debug)]
+pub(crate) struct OnePass(Option<OnePassEngine>);
+
+impl OnePass {
+ pub(crate) fn new(info: &RegexInfo, nfa: &NFA) -> OnePass {
+ OnePass(OnePassEngine::new(info, nfa))
+ }
+
+ pub(crate) fn create_cache(&self) -> OnePassCache {
+ OnePassCache::new(self)
+ }
+
+ #[cfg_attr(feature = "perf-inline", inline(always))]
+ pub(crate) fn get(&self, input: &Input<'_>) -> Option<&OnePassEngine> {
+ let engine = self.0.as_ref()?;
+ if !input.get_anchored().is_anchored()
+ && !engine.get_nfa().is_always_start_anchored()
+ {
+ return None;
+ }
+ Some(engine)
+ }
+
+ pub(crate) fn memory_usage(&self) -> usize {
+ self.0.as_ref().map_or(0, |e| e.memory_usage())
+ }
+}
+
+#[derive(Debug)]
+pub(crate) struct OnePassEngine(
+ #[cfg(feature = "dfa-onepass")] onepass::DFA,
+ #[cfg(not(feature = "dfa-onepass"))] (),
+);
+
+impl OnePassEngine {
+ pub(crate) fn new(info: &RegexInfo, nfa: &NFA) -> Option<OnePassEngine> {
+ #[cfg(feature = "dfa-onepass")]
+ {
+ if !info.config().get_onepass() {
+ return None;
+ }
+ // In order to even attempt building a one-pass DFA, we require
+ // that we either have at least one explicit capturing group or
+ // there's a Unicode word boundary somewhere. If we don't have
+ // either of these things, then the lazy DFA will almost certainly
+ // be useable and be much faster. The only case where it might
+ // not is if the lazy DFA isn't utilizing its cache effectively,
+ // but in those cases, the underlying regex is almost certainly
+ // not one-pass or is too big to fit within the current one-pass
+ // implementation limits.
+ if info.props_union().explicit_captures_len() == 0
+ && !info.props_union().look_set().contains_word_unicode()
+ {
+ debug!("not building OnePass because it isn't worth it");
+ return None;
+ }
+ let onepass_config = onepass::Config::new()
+ .match_kind(info.config().get_match_kind())
+ // Like for the lazy DFA, we unconditionally enable this
+ // because it doesn't cost much and makes the API more
+ // flexible.
+ .starts_for_each_pattern(true)
+ .byte_classes(info.config().get_byte_classes())
+ .size_limit(info.config().get_onepass_size_limit());
+ let result = onepass::Builder::new()
+ .configure(onepass_config)
+ .build_from_nfa(nfa.clone());
+ let engine = match result {
+ Ok(engine) => engine,
+ Err(_err) => {
+ debug!("OnePass failed to build: {}", _err);
+ return None;
+ }
+ };
+ debug!("OnePass built, {} bytes", engine.memory_usage());
+ Some(OnePassEngine(engine))
+ }
+ #[cfg(not(feature = "dfa-onepass"))]
+ {
+ None
+ }
+ }
+
+ #[cfg_attr(feature = "perf-inline", inline(always))]
+ pub(crate) fn search_slots(
+ &self,
+ cache: &mut OnePassCache,
+ input: &Input<'_>,
+ slots: &mut [Option<NonMaxUsize>],
+ ) -> Option<PatternID> {
+ #[cfg(feature = "dfa-onepass")]
+ {
+ // OK because we only permit getting a OnePassEngine when we know
+ // the search is anchored and thus an error cannot occur.
+ self.0
+ .try_search_slots(cache.0.as_mut().unwrap(), input, slots)
+ .unwrap()
+ }
+ #[cfg(not(feature = "dfa-onepass"))]
+ {
+ // Impossible to reach because this engine is never constructed
+ // if the requisite features aren't enabled.
+ unreachable!()
+ }
+ }
+
+ pub(crate) fn memory_usage(&self) -> usize {
+ #[cfg(feature = "dfa-onepass")]
+ {
+ self.0.memory_usage()
+ }
+ #[cfg(not(feature = "dfa-onepass"))]
+ {
+ // Impossible to reach because this engine is never constructed
+ // if the requisite features aren't enabled.
+ unreachable!()
+ }
+ }
+
+ #[cfg_attr(feature = "perf-inline", inline(always))]
+ fn get_nfa(&self) -> &NFA {
+ #[cfg(feature = "dfa-onepass")]
+ {
+ self.0.get_nfa()
+ }
+ #[cfg(not(feature = "dfa-onepass"))]
+ {
+ // Impossible to reach because this engine is never constructed
+ // if the requisite features aren't enabled.
+ unreachable!()
+ }
+ }
+}
+
+#[derive(Clone, Debug)]
+pub(crate) struct OnePassCache(
+ #[cfg(feature = "dfa-onepass")] Option<onepass::Cache>,
+ #[cfg(not(feature = "dfa-onepass"))] (),
+);
+
+impl OnePassCache {
+ pub(crate) fn none() -> OnePassCache {
+ #[cfg(feature = "dfa-onepass")]
+ {
+ OnePassCache(None)
+ }
+ #[cfg(not(feature = "dfa-onepass"))]
+ {
+ OnePassCache(())
+ }
+ }
+
+ pub(crate) fn new(builder: &OnePass) -> OnePassCache {
+ #[cfg(feature = "dfa-onepass")]
+ {
+ OnePassCache(builder.0.as_ref().map(|e| e.0.create_cache()))
+ }
+ #[cfg(not(feature = "dfa-onepass"))]
+ {
+ OnePassCache(())
+ }
+ }
+
+ pub(crate) fn reset(&mut self, builder: &OnePass) {
+ #[cfg(feature = "dfa-onepass")]
+ if let Some(ref e) = builder.0 {
+ self.0.as_mut().unwrap().reset(&e.0);
+ }
+ }
+
+ pub(crate) fn memory_usage(&self) -> usize {
+ #[cfg(feature = "dfa-onepass")]
+ {
+ self.0.as_ref().map_or(0, |c| c.memory_usage())
+ }
+ #[cfg(not(feature = "dfa-onepass"))]
+ {
+ 0
+ }
+ }
+}
+
+#[derive(Debug)]
+pub(crate) struct Hybrid(Option<HybridEngine>);
+
+impl Hybrid {
+ pub(crate) fn none() -> Hybrid {
+ Hybrid(None)
+ }
+
+ pub(crate) fn new(
+ info: &RegexInfo,
+ pre: Option<Prefilter>,
+ nfa: &NFA,
+ nfarev: &NFA,
+ ) -> Hybrid {
+ Hybrid(HybridEngine::new(info, pre, nfa, nfarev))
+ }
+
+ pub(crate) fn create_cache(&self) -> HybridCache {
+ HybridCache::new(self)
+ }
+
+ #[cfg_attr(feature = "perf-inline", inline(always))]
+ pub(crate) fn get(&self, _input: &Input<'_>) -> Option<&HybridEngine> {
+ let engine = self.0.as_ref()?;
+ Some(engine)
+ }
+
+ pub(crate) fn is_some(&self) -> bool {
+ self.0.is_some()
+ }
+}
+
+#[derive(Debug)]
+pub(crate) struct HybridEngine(
+ #[cfg(feature = "hybrid")] hybrid::regex::Regex,
+ #[cfg(not(feature = "hybrid"))] (),
+);
+
+impl HybridEngine {
+ pub(crate) fn new(
+ info: &RegexInfo,
+ pre: Option<Prefilter>,
+ nfa: &NFA,
+ nfarev: &NFA,
+ ) -> Option<HybridEngine> {
+ #[cfg(feature = "hybrid")]
+ {
+ if !info.config().get_hybrid() {
+ return None;
+ }
+ let dfa_config = hybrid::dfa::Config::new()
+ .match_kind(info.config().get_match_kind())
+ .prefilter(pre.clone())
+ // Enabling this is necessary for ensuring we can service any
+ // kind of 'Input' search without error. For the lazy DFA,
+ // this is not particularly costly, since the start states are
+ // generated lazily.
+ .starts_for_each_pattern(true)
+ .byte_classes(info.config().get_byte_classes())
+ .unicode_word_boundary(true)
+ .specialize_start_states(pre.is_some())
+ .cache_capacity(info.config().get_hybrid_cache_capacity())
+ // This makes it possible for building a lazy DFA to
+ // fail even though the NFA has already been built. Namely,
+ // if the cache capacity is too small to fit some minimum
+ // number of states (which is small, like 4 or 5), then the
+ // DFA will refuse to build.
+ //
+ // We shouldn't enable this to make building always work, since
+ // this could cause the allocation of a cache bigger than the
+ // provided capacity amount.
+ //
+ // This is effectively the only reason why building a lazy DFA
+ // could fail. If it does, then we simply suppress the error
+ // and return None.
+ .skip_cache_capacity_check(false)
+ // This and enabling heuristic Unicode word boundary support
+ // above make it so the lazy DFA can quit at match time.
+ .minimum_cache_clear_count(Some(3))
+ .minimum_bytes_per_state(Some(10));
+ let result = hybrid::dfa::Builder::new()
+ .configure(dfa_config.clone())
+ .build_from_nfa(nfa.clone());
+ let fwd = match result {
+ Ok(fwd) => fwd,
+ Err(_err) => {
+ debug!("forward lazy DFA failed to build: {}", _err);
+ return None;
+ }
+ };
+ let result = hybrid::dfa::Builder::new()
+ .configure(
+ dfa_config
+ .clone()
+ .match_kind(MatchKind::All)
+ .prefilter(None)
+ .specialize_start_states(false),
+ )
+ .build_from_nfa(nfarev.clone());
+ let rev = match result {
+ Ok(rev) => rev,
+ Err(_err) => {
+ debug!("reverse lazy DFA failed to build: {}", _err);
+ return None;
+ }
+ };
+ let engine =
+ hybrid::regex::Builder::new().build_from_dfas(fwd, rev);
+ debug!("lazy DFA built");
+ Some(HybridEngine(engine))
+ }
+ #[cfg(not(feature = "hybrid"))]
+ {
+ None
+ }
+ }
+
+ #[cfg_attr(feature = "perf-inline", inline(always))]
+ pub(crate) fn try_search(
+ &self,
+ cache: &mut HybridCache,
+ input: &Input<'_>,
+ ) -> Result<Option<Match>, RetryFailError> {
+ #[cfg(feature = "hybrid")]
+ {
+ let cache = cache.0.as_mut().unwrap();
+ self.0.try_search(cache, input).map_err(|e| e.into())
+ }
+ #[cfg(not(feature = "hybrid"))]
+ {
+ // Impossible to reach because this engine is never constructed
+ // if the requisite features aren't enabled.
+ unreachable!()
+ }
+ }
+
+ #[cfg_attr(feature = "perf-inline", inline(always))]
+ pub(crate) fn try_search_half_fwd(
+ &self,
+ cache: &mut HybridCache,
+ input: &Input<'_>,
+ ) -> Result<Option<HalfMatch>, RetryFailError> {
+ #[cfg(feature = "hybrid")]
+ {
+ let fwd = self.0.forward();
+ let mut fwdcache = cache.0.as_mut().unwrap().as_parts_mut().0;
+ fwd.try_search_fwd(&mut fwdcache, input).map_err(|e| e.into())
+ }
+ #[cfg(not(feature = "hybrid"))]
+ {
+ // Impossible to reach because this engine is never constructed
+ // if the requisite features aren't enabled.
+ unreachable!()
+ }
+ }
+
+ #[cfg_attr(feature = "perf-inline", inline(always))]
+ pub(crate) fn try_search_half_fwd_stopat(
+ &self,
+ cache: &mut HybridCache,
+ input: &Input<'_>,
+ ) -> Result<Result<HalfMatch, usize>, RetryFailError> {
+ #[cfg(feature = "hybrid")]
+ {
+ let dfa = self.0.forward();
+ let mut cache = cache.0.as_mut().unwrap().as_parts_mut().0;
+ crate::meta::stopat::hybrid_try_search_half_fwd(
+ dfa, &mut cache, input,
+ )
+ }
+ #[cfg(not(feature = "hybrid"))]
+ {
+ // Impossible to reach because this engine is never constructed
+ // if the requisite features aren't enabled.
+ unreachable!()
+ }
+ }
+
+ #[cfg_attr(feature = "perf-inline", inline(always))]
+ pub(crate) fn try_search_half_rev(
+ &self,
+ cache: &mut HybridCache,
+ input: &Input<'_>,
+ ) -> Result<Option<HalfMatch>, RetryFailError> {
+ #[cfg(feature = "hybrid")]
+ {
+ let rev = self.0.reverse();
+ let mut revcache = cache.0.as_mut().unwrap().as_parts_mut().1;
+ rev.try_search_rev(&mut revcache, input).map_err(|e| e.into())
+ }
+ #[cfg(not(feature = "hybrid"))]
+ {
+ // Impossible to reach because this engine is never constructed
+ // if the requisite features aren't enabled.
+ unreachable!()
+ }
+ }
+
+ #[cfg_attr(feature = "perf-inline", inline(always))]
+ pub(crate) fn try_search_half_rev_limited(
+ &self,
+ cache: &mut HybridCache,
+ input: &Input<'_>,
+ min_start: usize,
+ ) -> Result<Option<HalfMatch>, RetryError> {
+ #[cfg(feature = "hybrid")]
+ {
+ let dfa = self.0.reverse();
+ let mut cache = cache.0.as_mut().unwrap().as_parts_mut().1;
+ crate::meta::limited::hybrid_try_search_half_rev(
+ dfa, &mut cache, input, min_start,
+ )
+ }
+ #[cfg(not(feature = "hybrid"))]
+ {
+ // Impossible to reach because this engine is never constructed
+ // if the requisite features aren't enabled.
+ unreachable!()
+ }
+ }
+
+ #[inline]
+ pub(crate) fn try_which_overlapping_matches(
+ &self,
+ cache: &mut HybridCache,
+ input: &Input<'_>,
+ patset: &mut PatternSet,
+ ) -> Result<(), RetryFailError> {
+ #[cfg(feature = "hybrid")]
+ {
+ let fwd = self.0.forward();
+ let mut fwdcache = cache.0.as_mut().unwrap().as_parts_mut().0;
+ fwd.try_which_overlapping_matches(&mut fwdcache, input, patset)
+ .map_err(|e| e.into())
+ }
+ #[cfg(not(feature = "hybrid"))]
+ {
+ // Impossible to reach because this engine is never constructed
+ // if the requisite features aren't enabled.
+ unreachable!()
+ }
+ }
+}
+
+#[derive(Clone, Debug)]
+pub(crate) struct HybridCache(
+ #[cfg(feature = "hybrid")] Option<hybrid::regex::Cache>,
+ #[cfg(not(feature = "hybrid"))] (),
+);
+
+impl HybridCache {
+ pub(crate) fn none() -> HybridCache {
+ #[cfg(feature = "hybrid")]
+ {
+ HybridCache(None)
+ }
+ #[cfg(not(feature = "hybrid"))]
+ {
+ HybridCache(())
+ }
+ }
+
+ pub(crate) fn new(builder: &Hybrid) -> HybridCache {
+ #[cfg(feature = "hybrid")]
+ {
+ HybridCache(builder.0.as_ref().map(|e| e.0.create_cache()))
+ }
+ #[cfg(not(feature = "hybrid"))]
+ {
+ HybridCache(())
+ }
+ }
+
+ pub(crate) fn reset(&mut self, builder: &Hybrid) {
+ #[cfg(feature = "hybrid")]
+ if let Some(ref e) = builder.0 {
+ self.0.as_mut().unwrap().reset(&e.0);
+ }
+ }
+
+ pub(crate) fn memory_usage(&self) -> usize {
+ #[cfg(feature = "hybrid")]
+ {
+ self.0.as_ref().map_or(0, |c| c.memory_usage())
+ }
+ #[cfg(not(feature = "hybrid"))]
+ {
+ 0
+ }
+ }
+}
+
+#[derive(Debug)]
+pub(crate) struct DFA(Option<DFAEngine>);
+
+impl DFA {
+ pub(crate) fn none() -> DFA {
+ DFA(None)
+ }
+
+ pub(crate) fn new(
+ info: &RegexInfo,
+ pre: Option<Prefilter>,
+ nfa: &NFA,
+ nfarev: &NFA,
+ ) -> DFA {
+ DFA(DFAEngine::new(info, pre, nfa, nfarev))
+ }
+
+ #[cfg_attr(feature = "perf-inline", inline(always))]
+ pub(crate) fn get(&self, _input: &Input<'_>) -> Option<&DFAEngine> {
+ let engine = self.0.as_ref()?;
+ Some(engine)
+ }
+
+ pub(crate) fn is_some(&self) -> bool {
+ self.0.is_some()
+ }
+
+ pub(crate) fn memory_usage(&self) -> usize {
+ self.0.as_ref().map_or(0, |e| e.memory_usage())
+ }
+}
+
+#[derive(Debug)]
+pub(crate) struct DFAEngine(
+ #[cfg(feature = "dfa-build")] dfa::regex::Regex,
+ #[cfg(not(feature = "dfa-build"))] (),
+);
+
+impl DFAEngine {
+ pub(crate) fn new(
+ info: &RegexInfo,
+ pre: Option<Prefilter>,
+ nfa: &NFA,
+ nfarev: &NFA,
+ ) -> Option<DFAEngine> {
+ #[cfg(feature = "dfa-build")]
+ {
+ if !info.config().get_dfa() {
+ return None;
+ }
+ // If our NFA is anything but small, don't even bother with a DFA.
+ if let Some(state_limit) = info.config().get_dfa_state_limit() {
+ if nfa.states().len() > state_limit {
+ debug!(
+ "skipping full DFA because NFA has {} states, \
+ which exceeds the heuristic limit of {}",
+ nfa.states().len(),
+ state_limit,
+ );
+ return None;
+ }
+ }
+ // We cut the size limit in four because the total heap used by
+ // DFA construction is determinization aux memory and the DFA
+ // itself, and those things are configured independently in the
+ // lower level DFA builder API. And then split that in two because
+ // of forward and reverse DFAs.
+ let size_limit = info.config().get_dfa_size_limit().map(|n| n / 4);
+ let dfa_config = dfa::dense::Config::new()
+ .match_kind(info.config().get_match_kind())
+ .prefilter(pre.clone())
+ // Enabling this is necessary for ensuring we can service any
+ // kind of 'Input' search without error. For the full DFA, this
+ // can be quite costly. But since we have such a small bound
+ // on the size of the DFA, in practice, any multl-regexes are
+ // probably going to blow the limit anyway.
+ .starts_for_each_pattern(true)
+ .byte_classes(info.config().get_byte_classes())
+ .unicode_word_boundary(true)
+ .specialize_start_states(pre.is_some())
+ .determinize_size_limit(size_limit)
+ .dfa_size_limit(size_limit);
+ let result = dfa::dense::Builder::new()
+ .configure(dfa_config.clone())
+ .build_from_nfa(&nfa);
+ let fwd = match result {
+ Ok(fwd) => fwd,
+ Err(_err) => {
+ debug!("forward full DFA failed to build: {}", _err);
+ return None;
+ }
+ };
+ let result = dfa::dense::Builder::new()
+ .configure(
+ dfa_config
+ .clone()
+ // We never need unanchored reverse searches, so
+ // there's no point in building it into the DFA, which
+ // WILL take more space. (This isn't done for the lazy
+ // DFA because the DFA is, well, lazy. It doesn't pay
+ // the cost for supporting unanchored searches unless
+ // you actually do an unanchored search, which we
+ // don't.)
+ .start_kind(dfa::StartKind::Anchored)
+ .match_kind(MatchKind::All)
+ .prefilter(None)
+ .specialize_start_states(false),
+ )
+ .build_from_nfa(&nfarev);
+ let rev = match result {
+ Ok(rev) => rev,
+ Err(_err) => {
+ debug!("reverse full DFA failed to build: {}", _err);
+ return None;
+ }
+ };
+ let engine = dfa::regex::Builder::new().build_from_dfas(fwd, rev);
+ debug!(
+ "fully compiled forward and reverse DFAs built, {} bytes",
+ engine.forward().memory_usage()
+ + engine.reverse().memory_usage(),
+ );
+ Some(DFAEngine(engine))
+ }
+ #[cfg(not(feature = "dfa-build"))]
+ {
+ None
+ }
+ }
+
+ #[cfg_attr(feature = "perf-inline", inline(always))]
+ pub(crate) fn try_search(
+ &self,
+ input: &Input<'_>,
+ ) -> Result<Option<Match>, RetryFailError> {
+ #[cfg(feature = "dfa-build")]
+ {
+ self.0.try_search(input).map_err(|e| e.into())
+ }
+ #[cfg(not(feature = "dfa-build"))]
+ {
+ // Impossible to reach because this engine is never constructed
+ // if the requisite features aren't enabled.
+ unreachable!()
+ }
+ }
+
+ #[cfg_attr(feature = "perf-inline", inline(always))]
+ pub(crate) fn try_search_half_fwd(
+ &self,
+ input: &Input<'_>,
+ ) -> Result<Option<HalfMatch>, RetryFailError> {
+ #[cfg(feature = "dfa-build")]
+ {
+ use crate::dfa::Automaton;
+ self.0.forward().try_search_fwd(input).map_err(|e| e.into())
+ }
+ #[cfg(not(feature = "dfa-build"))]
+ {
+ // Impossible to reach because this engine is never constructed
+ // if the requisite features aren't enabled.
+ unreachable!()
+ }
+ }
+
+ #[cfg_attr(feature = "perf-inline", inline(always))]
+ pub(crate) fn try_search_half_fwd_stopat(
+ &self,
+ input: &Input<'_>,
+ ) -> Result<Result<HalfMatch, usize>, RetryFailError> {
+ #[cfg(feature = "dfa-build")]
+ {
+ let dfa = self.0.forward();
+ crate::meta::stopat::dfa_try_search_half_fwd(dfa, input)
+ }
+ #[cfg(not(feature = "dfa-build"))]
+ {
+ // Impossible to reach because this engine is never constructed
+ // if the requisite features aren't enabled.
+ unreachable!()
+ }
+ }
+
+ #[cfg_attr(feature = "perf-inline", inline(always))]
+ pub(crate) fn try_search_half_rev(
+ &self,
+ input: &Input<'_>,
+ ) -> Result<Option<HalfMatch>, RetryFailError> {
+ #[cfg(feature = "dfa-build")]
+ {
+ use crate::dfa::Automaton;
+ self.0.reverse().try_search_rev(&input).map_err(|e| e.into())
+ }
+ #[cfg(not(feature = "dfa-build"))]
+ {
+ // Impossible to reach because this engine is never constructed
+ // if the requisite features aren't enabled.
+ unreachable!()
+ }
+ }
+
+ #[cfg_attr(feature = "perf-inline", inline(always))]
+ pub(crate) fn try_search_half_rev_limited(
+ &self,
+ input: &Input<'_>,
+ min_start: usize,
+ ) -> Result<Option<HalfMatch>, RetryError> {
+ #[cfg(feature = "dfa-build")]
+ {
+ let dfa = self.0.reverse();
+ crate::meta::limited::dfa_try_search_half_rev(
+ dfa, input, min_start,
+ )
+ }
+ #[cfg(not(feature = "dfa-build"))]
+ {
+ // Impossible to reach because this engine is never constructed
+ // if the requisite features aren't enabled.
+ unreachable!()
+ }
+ }
+
+ #[inline]
+ pub(crate) fn try_which_overlapping_matches(
+ &self,
+ input: &Input<'_>,
+ patset: &mut PatternSet,
+ ) -> Result<(), RetryFailError> {
+ #[cfg(feature = "dfa-build")]
+ {
+ use crate::dfa::Automaton;
+ self.0
+ .forward()
+ .try_which_overlapping_matches(input, patset)
+ .map_err(|e| e.into())
+ }
+ #[cfg(not(feature = "dfa-build"))]
+ {
+ // Impossible to reach because this engine is never constructed
+ // if the requisite features aren't enabled.
+ unreachable!()
+ }
+ }
+
+ pub(crate) fn memory_usage(&self) -> usize {
+ #[cfg(feature = "dfa-build")]
+ {
+ self.0.forward().memory_usage() + self.0.reverse().memory_usage()
+ }
+ #[cfg(not(feature = "dfa-build"))]
+ {
+ // Impossible to reach because this engine is never constructed
+ // if the requisite features aren't enabled.
+ unreachable!()
+ }
+ }
+}
+
+#[derive(Debug)]
+pub(crate) struct ReverseHybrid(Option<ReverseHybridEngine>);
+
+impl ReverseHybrid {
+ pub(crate) fn none() -> ReverseHybrid {
+ ReverseHybrid(None)
+ }
+
+ pub(crate) fn new(info: &RegexInfo, nfarev: &NFA) -> ReverseHybrid {
+ ReverseHybrid(ReverseHybridEngine::new(info, nfarev))
+ }
+
+ pub(crate) fn create_cache(&self) -> ReverseHybridCache {
+ ReverseHybridCache::new(self)
+ }
+
+ #[cfg_attr(feature = "perf-inline", inline(always))]
+ pub(crate) fn get(
+ &self,
+ _input: &Input<'_>,
+ ) -> Option<&ReverseHybridEngine> {
+ let engine = self.0.as_ref()?;
+ Some(engine)
+ }
+}
+
+#[derive(Debug)]
+pub(crate) struct ReverseHybridEngine(
+ #[cfg(feature = "hybrid")] hybrid::dfa::DFA,
+ #[cfg(not(feature = "hybrid"))] (),
+);
+
+impl ReverseHybridEngine {
+ pub(crate) fn new(
+ info: &RegexInfo,
+ nfarev: &NFA,
+ ) -> Option<ReverseHybridEngine> {
+ #[cfg(feature = "hybrid")]
+ {
+ if !info.config().get_hybrid() {
+ return None;
+ }
+ // Since we only use this for reverse searches, we can hard-code
+ // a number of things like match semantics, prefilters, starts
+ // for each pattern and so on.
+ let dfa_config = hybrid::dfa::Config::new()
+ .match_kind(MatchKind::All)
+ .prefilter(None)
+ .starts_for_each_pattern(false)
+ .byte_classes(info.config().get_byte_classes())
+ .unicode_word_boundary(true)
+ .specialize_start_states(false)
+ .cache_capacity(info.config().get_hybrid_cache_capacity())
+ .skip_cache_capacity_check(false)
+ .minimum_cache_clear_count(Some(3))
+ .minimum_bytes_per_state(Some(10));
+ let result = hybrid::dfa::Builder::new()
+ .configure(dfa_config)
+ .build_from_nfa(nfarev.clone());
+ let rev = match result {
+ Ok(rev) => rev,
+ Err(_err) => {
+ debug!("lazy reverse DFA failed to build: {}", _err);
+ return None;
+ }
+ };
+ debug!("lazy reverse DFA built");
+ Some(ReverseHybridEngine(rev))
+ }
+ #[cfg(not(feature = "hybrid"))]
+ {
+ None
+ }
+ }
+
+ #[cfg_attr(feature = "perf-inline", inline(always))]
+ pub(crate) fn try_search_half_rev_limited(
+ &self,
+ cache: &mut ReverseHybridCache,
+ input: &Input<'_>,
+ min_start: usize,
+ ) -> Result<Option<HalfMatch>, RetryError> {
+ #[cfg(feature = "hybrid")]
+ {
+ let dfa = &self.0;
+ let mut cache = cache.0.as_mut().unwrap();
+ crate::meta::limited::hybrid_try_search_half_rev(
+ dfa, &mut cache, input, min_start,
+ )
+ }
+ #[cfg(not(feature = "hybrid"))]
+ {
+ // Impossible to reach because this engine is never constructed
+ // if the requisite features aren't enabled.
+ unreachable!()
+ }
+ }
+}
+
+#[derive(Clone, Debug)]
+pub(crate) struct ReverseHybridCache(
+ #[cfg(feature = "hybrid")] Option<hybrid::dfa::Cache>,
+ #[cfg(not(feature = "hybrid"))] (),
+);
+
+impl ReverseHybridCache {
+ pub(crate) fn none() -> ReverseHybridCache {
+ #[cfg(feature = "hybrid")]
+ {
+ ReverseHybridCache(None)
+ }
+ #[cfg(not(feature = "hybrid"))]
+ {
+ ReverseHybridCache(())
+ }
+ }
+
+ pub(crate) fn new(builder: &ReverseHybrid) -> ReverseHybridCache {
+ #[cfg(feature = "hybrid")]
+ {
+ ReverseHybridCache(builder.0.as_ref().map(|e| e.0.create_cache()))
+ }
+ #[cfg(not(feature = "hybrid"))]
+ {
+ ReverseHybridCache(())
+ }
+ }
+
+ pub(crate) fn reset(&mut self, builder: &ReverseHybrid) {
+ #[cfg(feature = "hybrid")]
+ if let Some(ref e) = builder.0 {
+ self.0.as_mut().unwrap().reset(&e.0);
+ }
+ }
+
+ pub(crate) fn memory_usage(&self) -> usize {
+ #[cfg(feature = "hybrid")]
+ {
+ self.0.as_ref().map_or(0, |c| c.memory_usage())
+ }
+ #[cfg(not(feature = "hybrid"))]
+ {
+ 0
+ }
+ }
+}
+
+#[derive(Debug)]
+pub(crate) struct ReverseDFA(Option<ReverseDFAEngine>);
+
+impl ReverseDFA {
+ pub(crate) fn none() -> ReverseDFA {
+ ReverseDFA(None)
+ }
+
+ pub(crate) fn new(info: &RegexInfo, nfarev: &NFA) -> ReverseDFA {
+ ReverseDFA(ReverseDFAEngine::new(info, nfarev))
+ }
+
+ #[cfg_attr(feature = "perf-inline", inline(always))]
+ pub(crate) fn get(&self, _input: &Input<'_>) -> Option<&ReverseDFAEngine> {
+ let engine = self.0.as_ref()?;
+ Some(engine)
+ }
+
+ pub(crate) fn is_some(&self) -> bool {
+ self.0.is_some()
+ }
+
+ pub(crate) fn memory_usage(&self) -> usize {
+ self.0.as_ref().map_or(0, |e| e.memory_usage())
+ }
+}
+
+#[derive(Debug)]
+pub(crate) struct ReverseDFAEngine(
+ #[cfg(feature = "dfa-build")] dfa::dense::DFA<Vec<u32>>,
+ #[cfg(not(feature = "dfa-build"))] (),
+);
+
+impl ReverseDFAEngine {
+ pub(crate) fn new(
+ info: &RegexInfo,
+ nfarev: &NFA,
+ ) -> Option<ReverseDFAEngine> {
+ #[cfg(feature = "dfa-build")]
+ {
+ if !info.config().get_dfa() {
+ return None;
+ }
+ // If our NFA is anything but small, don't even bother with a DFA.
+ if let Some(state_limit) = info.config().get_dfa_state_limit() {
+ if nfarev.states().len() > state_limit {
+ debug!(
+ "skipping full reverse DFA because NFA has {} states, \
+ which exceeds the heuristic limit of {}",
+ nfarev.states().len(),
+ state_limit,
+ );
+ return None;
+ }
+ }
+ // We cut the size limit in two because the total heap used by DFA
+ // construction is determinization aux memory and the DFA itself,
+ // and those things are configured independently in the lower level
+ // DFA builder API.
+ let size_limit = info.config().get_dfa_size_limit().map(|n| n / 2);
+ // Since we only use this for reverse searches, we can hard-code
+ // a number of things like match semantics, prefilters, starts
+ // for each pattern and so on. We also disable acceleration since
+ // it's incompatible with limited searches (which is the only
+ // operation we support for this kind of engine at the moment).
+ let dfa_config = dfa::dense::Config::new()
+ .match_kind(MatchKind::All)
+ .prefilter(None)
+ .accelerate(false)
+ .start_kind(dfa::StartKind::Anchored)
+ .starts_for_each_pattern(false)
+ .byte_classes(info.config().get_byte_classes())
+ .unicode_word_boundary(true)
+ .specialize_start_states(false)
+ .determinize_size_limit(size_limit)
+ .dfa_size_limit(size_limit);
+ let result = dfa::dense::Builder::new()
+ .configure(dfa_config)
+ .build_from_nfa(&nfarev);
+ let rev = match result {
+ Ok(rev) => rev,
+ Err(_err) => {
+ debug!("full reverse DFA failed to build: {}", _err);
+ return None;
+ }
+ };
+ debug!(
+ "fully compiled reverse DFA built, {} bytes",
+ rev.memory_usage()
+ );
+ Some(ReverseDFAEngine(rev))
+ }
+ #[cfg(not(feature = "dfa-build"))]
+ {
+ None
+ }
+ }
+
+ #[cfg_attr(feature = "perf-inline", inline(always))]
+ pub(crate) fn try_search_half_rev_limited(
+ &self,
+ input: &Input<'_>,
+ min_start: usize,
+ ) -> Result<Option<HalfMatch>, RetryError> {
+ #[cfg(feature = "dfa-build")]
+ {
+ let dfa = &self.0;
+ crate::meta::limited::dfa_try_search_half_rev(
+ dfa, input, min_start,
+ )
+ }
+ #[cfg(not(feature = "dfa-build"))]
+ {
+ // Impossible to reach because this engine is never constructed
+ // if the requisite features aren't enabled.
+ unreachable!()
+ }
+ }
+
+ pub(crate) fn memory_usage(&self) -> usize {
+ #[cfg(feature = "dfa-build")]
+ {
+ self.0.memory_usage()
+ }
+ #[cfg(not(feature = "dfa-build"))]
+ {
+ // Impossible to reach because this engine is never constructed
+ // if the requisite features aren't enabled.
+ unreachable!()
+ }
+ }
+}