/*! This module contains a boat load of wrappers around each of our internal regex engines. They encapsulate a few things: 1. The wrappers manage the conditional existence of the regex engine. Namely, the PikeVM is the only required regex engine. The rest are optional. These wrappers present a uniform API regardless of which engines are available. And availability might be determined by compile time features or by dynamic configuration via `meta::Config`. Encapsulating the conditional compilation features is in particular a huge simplification for the higher level code that composes these engines. 2. The wrappers manage construction of each engine, including skipping it if the engine is unavailable or configured to not be used. 3. The wrappers manage whether an engine *can* be used for a particular search configuration. For example, `BoundedBacktracker::get` only returns a backtracking engine when the haystack is bigger than the maximum supported length. The wrappers also sometimes take a position on when an engine *ought* to be used, but only in cases where the logic is extremely local to the engine itself. Otherwise, things like "choose between the backtracker and the one-pass DFA" are managed by the higher level meta strategy code. There are also corresponding wrappers for the various `Cache` types for each regex engine that needs them. If an engine is unavailable or not used, then a cache for it will *not* actually be allocated. */ use alloc::vec::Vec; use crate::{ meta::{ error::{BuildError, RetryError, RetryFailError}, regex::RegexInfo, }, nfa::thompson::{pikevm, NFA}, util::{prefilter::Prefilter, primitives::NonMaxUsize}, HalfMatch, Input, Match, MatchKind, PatternID, PatternSet, }; #[cfg(feature = "dfa-build")] use crate::dfa; #[cfg(feature = "dfa-onepass")] use crate::dfa::onepass; #[cfg(feature = "hybrid")] use crate::hybrid; #[cfg(feature = "nfa-backtrack")] use crate::nfa::thompson::backtrack; #[derive(Debug)] pub(crate) struct PikeVM(PikeVMEngine); impl PikeVM { pub(crate) fn new( info: &RegexInfo, pre: Option, nfa: &NFA, ) -> Result { PikeVMEngine::new(info, pre, nfa).map(PikeVM) } pub(crate) fn create_cache(&self) -> PikeVMCache { PikeVMCache::new(self) } #[cfg_attr(feature = "perf-inline", inline(always))] pub(crate) fn get(&self) -> &PikeVMEngine { &self.0 } } #[derive(Debug)] pub(crate) struct PikeVMEngine(pikevm::PikeVM); impl PikeVMEngine { pub(crate) fn new( info: &RegexInfo, pre: Option, nfa: &NFA, ) -> Result { let pikevm_config = pikevm::Config::new() .match_kind(info.config().get_match_kind()) .prefilter(pre); let engine = pikevm::Builder::new() .configure(pikevm_config) .build_from_nfa(nfa.clone()) .map_err(BuildError::nfa)?; debug!("PikeVM built"); Ok(PikeVMEngine(engine)) } #[cfg_attr(feature = "perf-inline", inline(always))] pub(crate) fn is_match( &self, cache: &mut PikeVMCache, input: &Input<'_>, ) -> bool { self.0.is_match(cache.0.as_mut().unwrap(), input.clone()) } #[cfg_attr(feature = "perf-inline", inline(always))] pub(crate) fn search_slots( &self, cache: &mut PikeVMCache, input: &Input<'_>, slots: &mut [Option], ) -> Option { self.0.search_slots(cache.0.as_mut().unwrap(), input, slots) } #[cfg_attr(feature = "perf-inline", inline(always))] pub(crate) fn which_overlapping_matches( &self, cache: &mut PikeVMCache, input: &Input<'_>, patset: &mut PatternSet, ) { self.0.which_overlapping_matches( cache.0.as_mut().unwrap(), input, patset, ) } } #[derive(Clone, Debug)] pub(crate) struct PikeVMCache(Option); impl PikeVMCache { pub(crate) fn none() -> PikeVMCache { PikeVMCache(None) } pub(crate) fn new(builder: &PikeVM) -> PikeVMCache { PikeVMCache(Some(builder.get().0.create_cache())) } pub(crate) fn reset(&mut self, builder: &PikeVM) { self.0.as_mut().unwrap().reset(&builder.get().0); } pub(crate) fn memory_usage(&self) -> usize { self.0.as_ref().map_or(0, |c| c.memory_usage()) } } #[derive(Debug)] pub(crate) struct BoundedBacktracker(Option); impl BoundedBacktracker { pub(crate) fn new( info: &RegexInfo, pre: Option, nfa: &NFA, ) -> Result { BoundedBacktrackerEngine::new(info, pre, nfa).map(BoundedBacktracker) } pub(crate) fn create_cache(&self) -> BoundedBacktrackerCache { BoundedBacktrackerCache::new(self) } #[cfg_attr(feature = "perf-inline", inline(always))] pub(crate) fn get( &self, input: &Input<'_>, ) -> Option<&BoundedBacktrackerEngine> { let engine = self.0.as_ref()?; // It is difficult to make the backtracker give up early if it is // guaranteed to eventually wind up in a match state. This is because // of the greedy nature of a backtracker: it just blindly mushes // forward. Every other regex engine is able to give up more quickly, // so even if the backtracker might be able to zip through faster than // (say) the PikeVM, we prefer the theoretical benefit that some other // engine might be able to scan much less of the haystack than the // backtracker. // // Now, if the haystack is really short already, then we allow the // backtracker to run. (This hasn't been litigated quantitatively with // benchmarks. Just a hunch.) if input.get_earliest() && input.haystack().len() > 128 { return None; } // If the backtracker is just going to return an error because the // haystack is too long, then obviously do not use it. if input.get_span().len() > engine.max_haystack_len() { return None; } Some(engine) } } #[derive(Debug)] pub(crate) struct BoundedBacktrackerEngine( #[cfg(feature = "nfa-backtrack")] backtrack::BoundedBacktracker, #[cfg(not(feature = "nfa-backtrack"))] (), ); impl BoundedBacktrackerEngine { pub(crate) fn new( info: &RegexInfo, pre: Option, nfa: &NFA, ) -> Result, BuildError> { #[cfg(feature = "nfa-backtrack")] { if !info.config().get_backtrack() || info.config().get_match_kind() != MatchKind::LeftmostFirst { return Ok(None); } let backtrack_config = backtrack::Config::new().prefilter(pre); let engine = backtrack::Builder::new() .configure(backtrack_config) .build_from_nfa(nfa.clone()) .map_err(BuildError::nfa)?; debug!( "BoundedBacktracker built (max haystack length: {:?})", engine.max_haystack_len() ); Ok(Some(BoundedBacktrackerEngine(engine))) } #[cfg(not(feature = "nfa-backtrack"))] { Ok(None) } } #[cfg_attr(feature = "perf-inline", inline(always))] pub(crate) fn is_match( &self, cache: &mut BoundedBacktrackerCache, input: &Input<'_>, ) -> bool { #[cfg(feature = "nfa-backtrack")] { // OK because we only permit access to this engine when we know // the haystack is short enough for the backtracker to run without // reporting an error. self.0 .try_is_match(cache.0.as_mut().unwrap(), input.clone()) .unwrap() } #[cfg(not(feature = "nfa-backtrack"))] { // Impossible to reach because this engine is never constructed // if the requisite features aren't enabled. unreachable!() } } #[cfg_attr(feature = "perf-inline", inline(always))] pub(crate) fn search_slots( &self, cache: &mut BoundedBacktrackerCache, input: &Input<'_>, slots: &mut [Option], ) -> Option { #[cfg(feature = "nfa-backtrack")] { // OK because we only permit access to this engine when we know // the haystack is short enough for the backtracker to run without // reporting an error. self.0 .try_search_slots(cache.0.as_mut().unwrap(), input, slots) .unwrap() } #[cfg(not(feature = "nfa-backtrack"))] { // Impossible to reach because this engine is never constructed // if the requisite features aren't enabled. unreachable!() } } #[cfg_attr(feature = "perf-inline", inline(always))] fn max_haystack_len(&self) -> usize { #[cfg(feature = "nfa-backtrack")] { self.0.max_haystack_len() } #[cfg(not(feature = "nfa-backtrack"))] { // Impossible to reach because this engine is never constructed // if the requisite features aren't enabled. unreachable!() } } } #[derive(Clone, Debug)] pub(crate) struct BoundedBacktrackerCache( #[cfg(feature = "nfa-backtrack")] Option, #[cfg(not(feature = "nfa-backtrack"))] (), ); impl BoundedBacktrackerCache { pub(crate) fn none() -> BoundedBacktrackerCache { #[cfg(feature = "nfa-backtrack")] { BoundedBacktrackerCache(None) } #[cfg(not(feature = "nfa-backtrack"))] { BoundedBacktrackerCache(()) } } pub(crate) fn new( builder: &BoundedBacktracker, ) -> BoundedBacktrackerCache { #[cfg(feature = "nfa-backtrack")] { BoundedBacktrackerCache( builder.0.as_ref().map(|e| e.0.create_cache()), ) } #[cfg(not(feature = "nfa-backtrack"))] { BoundedBacktrackerCache(()) } } pub(crate) fn reset(&mut self, builder: &BoundedBacktracker) { #[cfg(feature = "nfa-backtrack")] if let Some(ref e) = builder.0 { self.0.as_mut().unwrap().reset(&e.0); } } pub(crate) fn memory_usage(&self) -> usize { #[cfg(feature = "nfa-backtrack")] { self.0.as_ref().map_or(0, |c| c.memory_usage()) } #[cfg(not(feature = "nfa-backtrack"))] { 0 } } } #[derive(Debug)] pub(crate) struct OnePass(Option); impl OnePass { pub(crate) fn new(info: &RegexInfo, nfa: &NFA) -> OnePass { OnePass(OnePassEngine::new(info, nfa)) } pub(crate) fn create_cache(&self) -> OnePassCache { OnePassCache::new(self) } #[cfg_attr(feature = "perf-inline", inline(always))] pub(crate) fn get(&self, input: &Input<'_>) -> Option<&OnePassEngine> { let engine = self.0.as_ref()?; if !input.get_anchored().is_anchored() && !engine.get_nfa().is_always_start_anchored() { return None; } Some(engine) } pub(crate) fn memory_usage(&self) -> usize { self.0.as_ref().map_or(0, |e| e.memory_usage()) } } #[derive(Debug)] pub(crate) struct OnePassEngine( #[cfg(feature = "dfa-onepass")] onepass::DFA, #[cfg(not(feature = "dfa-onepass"))] (), ); impl OnePassEngine { pub(crate) fn new(info: &RegexInfo, nfa: &NFA) -> Option { #[cfg(feature = "dfa-onepass")] { if !info.config().get_onepass() { return None; } // In order to even attempt building a one-pass DFA, we require // that we either have at least one explicit capturing group or // there's a Unicode word boundary somewhere. If we don't have // either of these things, then the lazy DFA will almost certainly // be useable and be much faster. The only case where it might // not is if the lazy DFA isn't utilizing its cache effectively, // but in those cases, the underlying regex is almost certainly // not one-pass or is too big to fit within the current one-pass // implementation limits. if info.props_union().explicit_captures_len() == 0 && !info.props_union().look_set().contains_word_unicode() { debug!("not building OnePass because it isn't worth it"); return None; } let onepass_config = onepass::Config::new() .match_kind(info.config().get_match_kind()) // Like for the lazy DFA, we unconditionally enable this // because it doesn't cost much and makes the API more // flexible. .starts_for_each_pattern(true) .byte_classes(info.config().get_byte_classes()) .size_limit(info.config().get_onepass_size_limit()); let result = onepass::Builder::new() .configure(onepass_config) .build_from_nfa(nfa.clone()); let engine = match result { Ok(engine) => engine, Err(_err) => { debug!("OnePass failed to build: {}", _err); return None; } }; debug!("OnePass built, {} bytes", engine.memory_usage()); Some(OnePassEngine(engine)) } #[cfg(not(feature = "dfa-onepass"))] { None } } #[cfg_attr(feature = "perf-inline", inline(always))] pub(crate) fn search_slots( &self, cache: &mut OnePassCache, input: &Input<'_>, slots: &mut [Option], ) -> Option { #[cfg(feature = "dfa-onepass")] { // OK because we only permit getting a OnePassEngine when we know // the search is anchored and thus an error cannot occur. self.0 .try_search_slots(cache.0.as_mut().unwrap(), input, slots) .unwrap() } #[cfg(not(feature = "dfa-onepass"))] { // Impossible to reach because this engine is never constructed // if the requisite features aren't enabled. unreachable!() } } pub(crate) fn memory_usage(&self) -> usize { #[cfg(feature = "dfa-onepass")] { self.0.memory_usage() } #[cfg(not(feature = "dfa-onepass"))] { // Impossible to reach because this engine is never constructed // if the requisite features aren't enabled. unreachable!() } } #[cfg_attr(feature = "perf-inline", inline(always))] fn get_nfa(&self) -> &NFA { #[cfg(feature = "dfa-onepass")] { self.0.get_nfa() } #[cfg(not(feature = "dfa-onepass"))] { // Impossible to reach because this engine is never constructed // if the requisite features aren't enabled. unreachable!() } } } #[derive(Clone, Debug)] pub(crate) struct OnePassCache( #[cfg(feature = "dfa-onepass")] Option, #[cfg(not(feature = "dfa-onepass"))] (), ); impl OnePassCache { pub(crate) fn none() -> OnePassCache { #[cfg(feature = "dfa-onepass")] { OnePassCache(None) } #[cfg(not(feature = "dfa-onepass"))] { OnePassCache(()) } } pub(crate) fn new(builder: &OnePass) -> OnePassCache { #[cfg(feature = "dfa-onepass")] { OnePassCache(builder.0.as_ref().map(|e| e.0.create_cache())) } #[cfg(not(feature = "dfa-onepass"))] { OnePassCache(()) } } pub(crate) fn reset(&mut self, builder: &OnePass) { #[cfg(feature = "dfa-onepass")] if let Some(ref e) = builder.0 { self.0.as_mut().unwrap().reset(&e.0); } } pub(crate) fn memory_usage(&self) -> usize { #[cfg(feature = "dfa-onepass")] { self.0.as_ref().map_or(0, |c| c.memory_usage()) } #[cfg(not(feature = "dfa-onepass"))] { 0 } } } #[derive(Debug)] pub(crate) struct Hybrid(Option); impl Hybrid { pub(crate) fn none() -> Hybrid { Hybrid(None) } pub(crate) fn new( info: &RegexInfo, pre: Option, nfa: &NFA, nfarev: &NFA, ) -> Hybrid { Hybrid(HybridEngine::new(info, pre, nfa, nfarev)) } pub(crate) fn create_cache(&self) -> HybridCache { HybridCache::new(self) } #[cfg_attr(feature = "perf-inline", inline(always))] pub(crate) fn get(&self, _input: &Input<'_>) -> Option<&HybridEngine> { let engine = self.0.as_ref()?; Some(engine) } pub(crate) fn is_some(&self) -> bool { self.0.is_some() } } #[derive(Debug)] pub(crate) struct HybridEngine( #[cfg(feature = "hybrid")] hybrid::regex::Regex, #[cfg(not(feature = "hybrid"))] (), ); impl HybridEngine { pub(crate) fn new( info: &RegexInfo, pre: Option, nfa: &NFA, nfarev: &NFA, ) -> Option { #[cfg(feature = "hybrid")] { if !info.config().get_hybrid() { return None; } let dfa_config = hybrid::dfa::Config::new() .match_kind(info.config().get_match_kind()) .prefilter(pre.clone()) // Enabling this is necessary for ensuring we can service any // kind of 'Input' search without error. For the lazy DFA, // this is not particularly costly, since the start states are // generated lazily. .starts_for_each_pattern(true) .byte_classes(info.config().get_byte_classes()) .unicode_word_boundary(true) .specialize_start_states(pre.is_some()) .cache_capacity(info.config().get_hybrid_cache_capacity()) // This makes it possible for building a lazy DFA to // fail even though the NFA has already been built. Namely, // if the cache capacity is too small to fit some minimum // number of states (which is small, like 4 or 5), then the // DFA will refuse to build. // // We shouldn't enable this to make building always work, since // this could cause the allocation of a cache bigger than the // provided capacity amount. // // This is effectively the only reason why building a lazy DFA // could fail. If it does, then we simply suppress the error // and return None. .skip_cache_capacity_check(false) // This and enabling heuristic Unicode word boundary support // above make it so the lazy DFA can quit at match time. .minimum_cache_clear_count(Some(3)) .minimum_bytes_per_state(Some(10)); let result = hybrid::dfa::Builder::new() .configure(dfa_config.clone()) .build_from_nfa(nfa.clone()); let fwd = match result { Ok(fwd) => fwd, Err(_err) => { debug!("forward lazy DFA failed to build: {}", _err); return None; } }; let result = hybrid::dfa::Builder::new() .configure( dfa_config .clone() .match_kind(MatchKind::All) .prefilter(None) .specialize_start_states(false), ) .build_from_nfa(nfarev.clone()); let rev = match result { Ok(rev) => rev, Err(_err) => { debug!("reverse lazy DFA failed to build: {}", _err); return None; } }; let engine = hybrid::regex::Builder::new().build_from_dfas(fwd, rev); debug!("lazy DFA built"); Some(HybridEngine(engine)) } #[cfg(not(feature = "hybrid"))] { None } } #[cfg_attr(feature = "perf-inline", inline(always))] pub(crate) fn try_search( &self, cache: &mut HybridCache, input: &Input<'_>, ) -> Result, RetryFailError> { #[cfg(feature = "hybrid")] { let cache = cache.0.as_mut().unwrap(); self.0.try_search(cache, input).map_err(|e| e.into()) } #[cfg(not(feature = "hybrid"))] { // Impossible to reach because this engine is never constructed // if the requisite features aren't enabled. unreachable!() } } #[cfg_attr(feature = "perf-inline", inline(always))] pub(crate) fn try_search_half_fwd( &self, cache: &mut HybridCache, input: &Input<'_>, ) -> Result, RetryFailError> { #[cfg(feature = "hybrid")] { let fwd = self.0.forward(); let mut fwdcache = cache.0.as_mut().unwrap().as_parts_mut().0; fwd.try_search_fwd(&mut fwdcache, input).map_err(|e| e.into()) } #[cfg(not(feature = "hybrid"))] { // Impossible to reach because this engine is never constructed // if the requisite features aren't enabled. unreachable!() } } #[cfg_attr(feature = "perf-inline", inline(always))] pub(crate) fn try_search_half_fwd_stopat( &self, cache: &mut HybridCache, input: &Input<'_>, ) -> Result, RetryFailError> { #[cfg(feature = "hybrid")] { let dfa = self.0.forward(); let mut cache = cache.0.as_mut().unwrap().as_parts_mut().0; crate::meta::stopat::hybrid_try_search_half_fwd( dfa, &mut cache, input, ) } #[cfg(not(feature = "hybrid"))] { // Impossible to reach because this engine is never constructed // if the requisite features aren't enabled. unreachable!() } } #[cfg_attr(feature = "perf-inline", inline(always))] pub(crate) fn try_search_half_rev( &self, cache: &mut HybridCache, input: &Input<'_>, ) -> Result, RetryFailError> { #[cfg(feature = "hybrid")] { let rev = self.0.reverse(); let mut revcache = cache.0.as_mut().unwrap().as_parts_mut().1; rev.try_search_rev(&mut revcache, input).map_err(|e| e.into()) } #[cfg(not(feature = "hybrid"))] { // Impossible to reach because this engine is never constructed // if the requisite features aren't enabled. unreachable!() } } #[cfg_attr(feature = "perf-inline", inline(always))] pub(crate) fn try_search_half_rev_limited( &self, cache: &mut HybridCache, input: &Input<'_>, min_start: usize, ) -> Result, RetryError> { #[cfg(feature = "hybrid")] { let dfa = self.0.reverse(); let mut cache = cache.0.as_mut().unwrap().as_parts_mut().1; crate::meta::limited::hybrid_try_search_half_rev( dfa, &mut cache, input, min_start, ) } #[cfg(not(feature = "hybrid"))] { // Impossible to reach because this engine is never constructed // if the requisite features aren't enabled. unreachable!() } } #[inline] pub(crate) fn try_which_overlapping_matches( &self, cache: &mut HybridCache, input: &Input<'_>, patset: &mut PatternSet, ) -> Result<(), RetryFailError> { #[cfg(feature = "hybrid")] { let fwd = self.0.forward(); let mut fwdcache = cache.0.as_mut().unwrap().as_parts_mut().0; fwd.try_which_overlapping_matches(&mut fwdcache, input, patset) .map_err(|e| e.into()) } #[cfg(not(feature = "hybrid"))] { // Impossible to reach because this engine is never constructed // if the requisite features aren't enabled. unreachable!() } } } #[derive(Clone, Debug)] pub(crate) struct HybridCache( #[cfg(feature = "hybrid")] Option, #[cfg(not(feature = "hybrid"))] (), ); impl HybridCache { pub(crate) fn none() -> HybridCache { #[cfg(feature = "hybrid")] { HybridCache(None) } #[cfg(not(feature = "hybrid"))] { HybridCache(()) } } pub(crate) fn new(builder: &Hybrid) -> HybridCache { #[cfg(feature = "hybrid")] { HybridCache(builder.0.as_ref().map(|e| e.0.create_cache())) } #[cfg(not(feature = "hybrid"))] { HybridCache(()) } } pub(crate) fn reset(&mut self, builder: &Hybrid) { #[cfg(feature = "hybrid")] if let Some(ref e) = builder.0 { self.0.as_mut().unwrap().reset(&e.0); } } pub(crate) fn memory_usage(&self) -> usize { #[cfg(feature = "hybrid")] { self.0.as_ref().map_or(0, |c| c.memory_usage()) } #[cfg(not(feature = "hybrid"))] { 0 } } } #[derive(Debug)] pub(crate) struct DFA(Option); impl DFA { pub(crate) fn none() -> DFA { DFA(None) } pub(crate) fn new( info: &RegexInfo, pre: Option, nfa: &NFA, nfarev: &NFA, ) -> DFA { DFA(DFAEngine::new(info, pre, nfa, nfarev)) } #[cfg_attr(feature = "perf-inline", inline(always))] pub(crate) fn get(&self, _input: &Input<'_>) -> Option<&DFAEngine> { let engine = self.0.as_ref()?; Some(engine) } pub(crate) fn is_some(&self) -> bool { self.0.is_some() } pub(crate) fn memory_usage(&self) -> usize { self.0.as_ref().map_or(0, |e| e.memory_usage()) } } #[derive(Debug)] pub(crate) struct DFAEngine( #[cfg(feature = "dfa-build")] dfa::regex::Regex, #[cfg(not(feature = "dfa-build"))] (), ); impl DFAEngine { pub(crate) fn new( info: &RegexInfo, pre: Option, nfa: &NFA, nfarev: &NFA, ) -> Option { #[cfg(feature = "dfa-build")] { if !info.config().get_dfa() { return None; } // If our NFA is anything but small, don't even bother with a DFA. if let Some(state_limit) = info.config().get_dfa_state_limit() { if nfa.states().len() > state_limit { debug!( "skipping full DFA because NFA has {} states, \ which exceeds the heuristic limit of {}", nfa.states().len(), state_limit, ); return None; } } // We cut the size limit in four because the total heap used by // DFA construction is determinization aux memory and the DFA // itself, and those things are configured independently in the // lower level DFA builder API. And then split that in two because // of forward and reverse DFAs. let size_limit = info.config().get_dfa_size_limit().map(|n| n / 4); let dfa_config = dfa::dense::Config::new() .match_kind(info.config().get_match_kind()) .prefilter(pre.clone()) // Enabling this is necessary for ensuring we can service any // kind of 'Input' search without error. For the full DFA, this // can be quite costly. But since we have such a small bound // on the size of the DFA, in practice, any multl-regexes are // probably going to blow the limit anyway. .starts_for_each_pattern(true) .byte_classes(info.config().get_byte_classes()) .unicode_word_boundary(true) .specialize_start_states(pre.is_some()) .determinize_size_limit(size_limit) .dfa_size_limit(size_limit); let result = dfa::dense::Builder::new() .configure(dfa_config.clone()) .build_from_nfa(&nfa); let fwd = match result { Ok(fwd) => fwd, Err(_err) => { debug!("forward full DFA failed to build: {}", _err); return None; } }; let result = dfa::dense::Builder::new() .configure( dfa_config .clone() // We never need unanchored reverse searches, so // there's no point in building it into the DFA, which // WILL take more space. (This isn't done for the lazy // DFA because the DFA is, well, lazy. It doesn't pay // the cost for supporting unanchored searches unless // you actually do an unanchored search, which we // don't.) .start_kind(dfa::StartKind::Anchored) .match_kind(MatchKind::All) .prefilter(None) .specialize_start_states(false), ) .build_from_nfa(&nfarev); let rev = match result { Ok(rev) => rev, Err(_err) => { debug!("reverse full DFA failed to build: {}", _err); return None; } }; let engine = dfa::regex::Builder::new().build_from_dfas(fwd, rev); debug!( "fully compiled forward and reverse DFAs built, {} bytes", engine.forward().memory_usage() + engine.reverse().memory_usage(), ); Some(DFAEngine(engine)) } #[cfg(not(feature = "dfa-build"))] { None } } #[cfg_attr(feature = "perf-inline", inline(always))] pub(crate) fn try_search( &self, input: &Input<'_>, ) -> Result, RetryFailError> { #[cfg(feature = "dfa-build")] { self.0.try_search(input).map_err(|e| e.into()) } #[cfg(not(feature = "dfa-build"))] { // Impossible to reach because this engine is never constructed // if the requisite features aren't enabled. unreachable!() } } #[cfg_attr(feature = "perf-inline", inline(always))] pub(crate) fn try_search_half_fwd( &self, input: &Input<'_>, ) -> Result, RetryFailError> { #[cfg(feature = "dfa-build")] { use crate::dfa::Automaton; self.0.forward().try_search_fwd(input).map_err(|e| e.into()) } #[cfg(not(feature = "dfa-build"))] { // Impossible to reach because this engine is never constructed // if the requisite features aren't enabled. unreachable!() } } #[cfg_attr(feature = "perf-inline", inline(always))] pub(crate) fn try_search_half_fwd_stopat( &self, input: &Input<'_>, ) -> Result, RetryFailError> { #[cfg(feature = "dfa-build")] { let dfa = self.0.forward(); crate::meta::stopat::dfa_try_search_half_fwd(dfa, input) } #[cfg(not(feature = "dfa-build"))] { // Impossible to reach because this engine is never constructed // if the requisite features aren't enabled. unreachable!() } } #[cfg_attr(feature = "perf-inline", inline(always))] pub(crate) fn try_search_half_rev( &self, input: &Input<'_>, ) -> Result, RetryFailError> { #[cfg(feature = "dfa-build")] { use crate::dfa::Automaton; self.0.reverse().try_search_rev(&input).map_err(|e| e.into()) } #[cfg(not(feature = "dfa-build"))] { // Impossible to reach because this engine is never constructed // if the requisite features aren't enabled. unreachable!() } } #[cfg_attr(feature = "perf-inline", inline(always))] pub(crate) fn try_search_half_rev_limited( &self, input: &Input<'_>, min_start: usize, ) -> Result, RetryError> { #[cfg(feature = "dfa-build")] { let dfa = self.0.reverse(); crate::meta::limited::dfa_try_search_half_rev( dfa, input, min_start, ) } #[cfg(not(feature = "dfa-build"))] { // Impossible to reach because this engine is never constructed // if the requisite features aren't enabled. unreachable!() } } #[inline] pub(crate) fn try_which_overlapping_matches( &self, input: &Input<'_>, patset: &mut PatternSet, ) -> Result<(), RetryFailError> { #[cfg(feature = "dfa-build")] { use crate::dfa::Automaton; self.0 .forward() .try_which_overlapping_matches(input, patset) .map_err(|e| e.into()) } #[cfg(not(feature = "dfa-build"))] { // Impossible to reach because this engine is never constructed // if the requisite features aren't enabled. unreachable!() } } pub(crate) fn memory_usage(&self) -> usize { #[cfg(feature = "dfa-build")] { self.0.forward().memory_usage() + self.0.reverse().memory_usage() } #[cfg(not(feature = "dfa-build"))] { // Impossible to reach because this engine is never constructed // if the requisite features aren't enabled. unreachable!() } } } #[derive(Debug)] pub(crate) struct ReverseHybrid(Option); impl ReverseHybrid { pub(crate) fn none() -> ReverseHybrid { ReverseHybrid(None) } pub(crate) fn new(info: &RegexInfo, nfarev: &NFA) -> ReverseHybrid { ReverseHybrid(ReverseHybridEngine::new(info, nfarev)) } pub(crate) fn create_cache(&self) -> ReverseHybridCache { ReverseHybridCache::new(self) } #[cfg_attr(feature = "perf-inline", inline(always))] pub(crate) fn get( &self, _input: &Input<'_>, ) -> Option<&ReverseHybridEngine> { let engine = self.0.as_ref()?; Some(engine) } } #[derive(Debug)] pub(crate) struct ReverseHybridEngine( #[cfg(feature = "hybrid")] hybrid::dfa::DFA, #[cfg(not(feature = "hybrid"))] (), ); impl ReverseHybridEngine { pub(crate) fn new( info: &RegexInfo, nfarev: &NFA, ) -> Option { #[cfg(feature = "hybrid")] { if !info.config().get_hybrid() { return None; } // Since we only use this for reverse searches, we can hard-code // a number of things like match semantics, prefilters, starts // for each pattern and so on. let dfa_config = hybrid::dfa::Config::new() .match_kind(MatchKind::All) .prefilter(None) .starts_for_each_pattern(false) .byte_classes(info.config().get_byte_classes()) .unicode_word_boundary(true) .specialize_start_states(false) .cache_capacity(info.config().get_hybrid_cache_capacity()) .skip_cache_capacity_check(false) .minimum_cache_clear_count(Some(3)) .minimum_bytes_per_state(Some(10)); let result = hybrid::dfa::Builder::new() .configure(dfa_config) .build_from_nfa(nfarev.clone()); let rev = match result { Ok(rev) => rev, Err(_err) => { debug!("lazy reverse DFA failed to build: {}", _err); return None; } }; debug!("lazy reverse DFA built"); Some(ReverseHybridEngine(rev)) } #[cfg(not(feature = "hybrid"))] { None } } #[cfg_attr(feature = "perf-inline", inline(always))] pub(crate) fn try_search_half_rev_limited( &self, cache: &mut ReverseHybridCache, input: &Input<'_>, min_start: usize, ) -> Result, RetryError> { #[cfg(feature = "hybrid")] { let dfa = &self.0; let mut cache = cache.0.as_mut().unwrap(); crate::meta::limited::hybrid_try_search_half_rev( dfa, &mut cache, input, min_start, ) } #[cfg(not(feature = "hybrid"))] { // Impossible to reach because this engine is never constructed // if the requisite features aren't enabled. unreachable!() } } } #[derive(Clone, Debug)] pub(crate) struct ReverseHybridCache( #[cfg(feature = "hybrid")] Option, #[cfg(not(feature = "hybrid"))] (), ); impl ReverseHybridCache { pub(crate) fn none() -> ReverseHybridCache { #[cfg(feature = "hybrid")] { ReverseHybridCache(None) } #[cfg(not(feature = "hybrid"))] { ReverseHybridCache(()) } } pub(crate) fn new(builder: &ReverseHybrid) -> ReverseHybridCache { #[cfg(feature = "hybrid")] { ReverseHybridCache(builder.0.as_ref().map(|e| e.0.create_cache())) } #[cfg(not(feature = "hybrid"))] { ReverseHybridCache(()) } } pub(crate) fn reset(&mut self, builder: &ReverseHybrid) { #[cfg(feature = "hybrid")] if let Some(ref e) = builder.0 { self.0.as_mut().unwrap().reset(&e.0); } } pub(crate) fn memory_usage(&self) -> usize { #[cfg(feature = "hybrid")] { self.0.as_ref().map_or(0, |c| c.memory_usage()) } #[cfg(not(feature = "hybrid"))] { 0 } } } #[derive(Debug)] pub(crate) struct ReverseDFA(Option); impl ReverseDFA { pub(crate) fn none() -> ReverseDFA { ReverseDFA(None) } pub(crate) fn new(info: &RegexInfo, nfarev: &NFA) -> ReverseDFA { ReverseDFA(ReverseDFAEngine::new(info, nfarev)) } #[cfg_attr(feature = "perf-inline", inline(always))] pub(crate) fn get(&self, _input: &Input<'_>) -> Option<&ReverseDFAEngine> { let engine = self.0.as_ref()?; Some(engine) } pub(crate) fn is_some(&self) -> bool { self.0.is_some() } pub(crate) fn memory_usage(&self) -> usize { self.0.as_ref().map_or(0, |e| e.memory_usage()) } } #[derive(Debug)] pub(crate) struct ReverseDFAEngine( #[cfg(feature = "dfa-build")] dfa::dense::DFA>, #[cfg(not(feature = "dfa-build"))] (), ); impl ReverseDFAEngine { pub(crate) fn new( info: &RegexInfo, nfarev: &NFA, ) -> Option { #[cfg(feature = "dfa-build")] { if !info.config().get_dfa() { return None; } // If our NFA is anything but small, don't even bother with a DFA. if let Some(state_limit) = info.config().get_dfa_state_limit() { if nfarev.states().len() > state_limit { debug!( "skipping full reverse DFA because NFA has {} states, \ which exceeds the heuristic limit of {}", nfarev.states().len(), state_limit, ); return None; } } // We cut the size limit in two because the total heap used by DFA // construction is determinization aux memory and the DFA itself, // and those things are configured independently in the lower level // DFA builder API. let size_limit = info.config().get_dfa_size_limit().map(|n| n / 2); // Since we only use this for reverse searches, we can hard-code // a number of things like match semantics, prefilters, starts // for each pattern and so on. We also disable acceleration since // it's incompatible with limited searches (which is the only // operation we support for this kind of engine at the moment). let dfa_config = dfa::dense::Config::new() .match_kind(MatchKind::All) .prefilter(None) .accelerate(false) .start_kind(dfa::StartKind::Anchored) .starts_for_each_pattern(false) .byte_classes(info.config().get_byte_classes()) .unicode_word_boundary(true) .specialize_start_states(false) .determinize_size_limit(size_limit) .dfa_size_limit(size_limit); let result = dfa::dense::Builder::new() .configure(dfa_config) .build_from_nfa(&nfarev); let rev = match result { Ok(rev) => rev, Err(_err) => { debug!("full reverse DFA failed to build: {}", _err); return None; } }; debug!( "fully compiled reverse DFA built, {} bytes", rev.memory_usage() ); Some(ReverseDFAEngine(rev)) } #[cfg(not(feature = "dfa-build"))] { None } } #[cfg_attr(feature = "perf-inline", inline(always))] pub(crate) fn try_search_half_rev_limited( &self, input: &Input<'_>, min_start: usize, ) -> Result, RetryError> { #[cfg(feature = "dfa-build")] { let dfa = &self.0; crate::meta::limited::dfa_try_search_half_rev( dfa, input, min_start, ) } #[cfg(not(feature = "dfa-build"))] { // Impossible to reach because this engine is never constructed // if the requisite features aren't enabled. unreachable!() } } pub(crate) fn memory_usage(&self) -> usize { #[cfg(feature = "dfa-build")] { self.0.memory_usage() } #[cfg(not(feature = "dfa-build"))] { // Impossible to reach because this engine is never constructed // if the requisite features aren't enabled. unreachable!() } } }