summaryrefslogtreecommitdiffstats
path: root/vendor/regex-automata/src/determinize.rs
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:18:25 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:18:25 +0000
commit5363f350887b1e5b5dd21a86f88c8af9d7fea6da (patch)
tree35ca005eb6e0e9a1ba3bb5dbc033209ad445dc17 /vendor/regex-automata/src/determinize.rs
parentAdding debian version 1.66.0+dfsg1-1. (diff)
downloadrustc-5363f350887b1e5b5dd21a86f88c8af9d7fea6da.tar.xz
rustc-5363f350887b1e5b5dd21a86f88c8af9d7fea6da.zip
Merging upstream version 1.67.1+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/regex-automata/src/determinize.rs')
-rw-r--r--vendor/regex-automata/src/determinize.rs286
1 files changed, 0 insertions, 286 deletions
diff --git a/vendor/regex-automata/src/determinize.rs b/vendor/regex-automata/src/determinize.rs
deleted file mode 100644
index cf0c28585..000000000
--- a/vendor/regex-automata/src/determinize.rs
+++ /dev/null
@@ -1,286 +0,0 @@
-use std::collections::HashMap;
-use std::mem;
-use std::rc::Rc;
-
-use dense;
-use error::Result;
-use nfa::{self, NFA};
-use sparse_set::SparseSet;
-use state_id::{dead_id, StateID};
-
-type DFARepr<S> = dense::Repr<Vec<S>, S>;
-
-/// A determinizer converts an NFA to a DFA.
-///
-/// This determinizer follows the typical powerset construction, where each
-/// DFA state is comprised of one or more NFA states. In the worst case, there
-/// is one DFA state for every possible combination of NFA states. In practice,
-/// this only happens in certain conditions, typically when there are bounded
-/// repetitions.
-///
-/// The type variable `S` refers to the chosen state identifier representation
-/// used for the DFA.
-///
-/// The lifetime variable `'a` refers to the lifetime of the NFA being
-/// converted to a DFA.
-#[derive(Debug)]
-pub(crate) struct Determinizer<'a, S: StateID> {
- /// The NFA we're converting into a DFA.
- nfa: &'a NFA,
- /// The DFA we're building.
- dfa: DFARepr<S>,
- /// Each DFA state being built is defined as an *ordered* set of NFA
- /// states, along with a flag indicating whether the state is a match
- /// state or not.
- ///
- /// This is never empty. The first state is always a dummy state such that
- /// a state id == 0 corresponds to a dead state.
- builder_states: Vec<Rc<State>>,
- /// A cache of DFA states that already exist and can be easily looked up
- /// via ordered sets of NFA states.
- cache: HashMap<Rc<State>, S>,
- /// Scratch space for a stack of NFA states to visit, for depth first
- /// visiting without recursion.
- stack: Vec<nfa::StateID>,
- /// Scratch space for storing an ordered sequence of NFA states, for
- /// amortizing allocation.
- scratch_nfa_states: Vec<nfa::StateID>,
- /// Whether to build a DFA that finds the longest possible match.
- longest_match: bool,
-}
-
-/// An intermediate representation for a DFA state during determinization.
-#[derive(Debug, Eq, Hash, PartialEq)]
-struct State {
- /// Whether this state is a match state or not.
- is_match: bool,
- /// An ordered sequence of NFA states that make up this DFA state.
- nfa_states: Vec<nfa::StateID>,
-}
-
-impl<'a, S: StateID> Determinizer<'a, S> {
- /// Create a new determinizer for converting the given NFA to a DFA.
- pub fn new(nfa: &'a NFA) -> Determinizer<'a, S> {
- let dead = Rc::new(State::dead());
- let mut cache = HashMap::default();
- cache.insert(dead.clone(), dead_id());
-
- Determinizer {
- nfa,
- dfa: DFARepr::empty().anchored(nfa.is_anchored()),
- builder_states: vec![dead],
- cache,
- stack: vec![],
- scratch_nfa_states: vec![],
- longest_match: false,
- }
- }
-
- /// Instruct the determinizer to use equivalence classes as the transition
- /// alphabet instead of all possible byte values.
- pub fn with_byte_classes(mut self) -> Determinizer<'a, S> {
- let byte_classes = self.nfa.byte_classes().clone();
- self.dfa = DFARepr::empty_with_byte_classes(byte_classes)
- .anchored(self.nfa.is_anchored());
- self
- }
-
- /// Instruct the determinizer to build a DFA that recognizes the longest
- /// possible match instead of the leftmost first match. This is useful when
- /// constructing reverse DFAs for finding the start of a match.
- pub fn longest_match(mut self, yes: bool) -> Determinizer<'a, S> {
- self.longest_match = yes;
- self
- }
-
- /// Build the DFA. If there was a problem constructing the DFA (e.g., if
- /// the chosen state identifier representation is too small), then an error
- /// is returned.
- pub fn build(mut self) -> Result<DFARepr<S>> {
- let representative_bytes: Vec<u8> =
- self.dfa.byte_classes().representatives().collect();
- let mut sparse = self.new_sparse_set();
- let mut uncompiled = vec![self.add_start(&mut sparse)?];
- while let Some(dfa_id) = uncompiled.pop() {
- for &b in &representative_bytes {
- let (next_dfa_id, is_new) =
- self.cached_state(dfa_id, b, &mut sparse)?;
- self.dfa.add_transition(dfa_id, b, next_dfa_id);
- if is_new {
- uncompiled.push(next_dfa_id);
- }
- }
- }
-
- // At this point, we shuffle the matching states in the final DFA to
- // the beginning. This permits a DFA's match loop to detect a match
- // condition by merely inspecting the current state's identifier, and
- // avoids the need for any additional auxiliary storage.
- let is_match: Vec<bool> =
- self.builder_states.iter().map(|s| s.is_match).collect();
- self.dfa.shuffle_match_states(&is_match);
- Ok(self.dfa)
- }
-
- /// Return the identifier for the next DFA state given an existing DFA
- /// state and an input byte. If the next DFA state already exists, then
- /// return its identifier from the cache. Otherwise, build the state, cache
- /// it and return its identifier.
- ///
- /// The given sparse set is used for scratch space. It must have a capacity
- /// equivalent to the total number of NFA states, but its contents are
- /// otherwise unspecified.
- ///
- /// This routine returns a boolean indicating whether a new state was
- /// built. If a new state is built, then the caller needs to add it to its
- /// frontier of uncompiled DFA states to compute transitions for.
- fn cached_state(
- &mut self,
- dfa_id: S,
- b: u8,
- sparse: &mut SparseSet,
- ) -> Result<(S, bool)> {
- sparse.clear();
- // Compute the set of all reachable NFA states, including epsilons.
- self.next(dfa_id, b, sparse);
- // Build a candidate state and check if it has already been built.
- let state = self.new_state(sparse);
- if let Some(&cached_id) = self.cache.get(&state) {
- // Since we have a cached state, put the constructed state's
- // memory back into our scratch space, so that it can be reused.
- let _ =
- mem::replace(&mut self.scratch_nfa_states, state.nfa_states);
- return Ok((cached_id, false));
- }
- // Nothing was in the cache, so add this state to the cache.
- self.add_state(state).map(|s| (s, true))
- }
-
- /// Compute the set of all eachable NFA states, including the full epsilon
- /// closure, from a DFA state for a single byte of input.
- fn next(&mut self, dfa_id: S, b: u8, next_nfa_states: &mut SparseSet) {
- next_nfa_states.clear();
- for i in 0..self.builder_states[dfa_id.to_usize()].nfa_states.len() {
- let nfa_id = self.builder_states[dfa_id.to_usize()].nfa_states[i];
- match *self.nfa.state(nfa_id) {
- nfa::State::Union { .. }
- | nfa::State::Fail
- | nfa::State::Match => {}
- nfa::State::Range { range: ref r } => {
- if r.start <= b && b <= r.end {
- self.epsilon_closure(r.next, next_nfa_states);
- }
- }
- nfa::State::Sparse { ref ranges } => {
- for r in ranges.iter() {
- if r.start > b {
- break;
- } else if r.start <= b && b <= r.end {
- self.epsilon_closure(r.next, next_nfa_states);
- break;
- }
- }
- }
- }
- }
- }
-
- /// Compute the epsilon closure for the given NFA state.
- fn epsilon_closure(&mut self, start: nfa::StateID, set: &mut SparseSet) {
- if !self.nfa.state(start).is_epsilon() {
- set.insert(start);
- return;
- }
-
- self.stack.push(start);
- while let Some(mut id) = self.stack.pop() {
- loop {
- if set.contains(id) {
- break;
- }
- set.insert(id);
- match *self.nfa.state(id) {
- nfa::State::Range { .. }
- | nfa::State::Sparse { .. }
- | nfa::State::Fail
- | nfa::State::Match => break,
- nfa::State::Union { ref alternates } => {
- id = match alternates.get(0) {
- None => break,
- Some(&id) => id,
- };
- self.stack.extend(alternates[1..].iter().rev());
- }
- }
- }
- }
- }
-
- /// Compute the initial DFA state and return its identifier.
- ///
- /// The sparse set given is used for scratch space, and must have capacity
- /// equal to the total number of NFA states. Its contents are unspecified.
- fn add_start(&mut self, sparse: &mut SparseSet) -> Result<S> {
- sparse.clear();
- self.epsilon_closure(self.nfa.start(), sparse);
- let state = self.new_state(&sparse);
- let id = self.add_state(state)?;
- self.dfa.set_start_state(id);
- Ok(id)
- }
-
- /// Add the given state to the DFA and make it available in the cache.
- ///
- /// The state initially has no transitions. That is, it transitions to the
- /// dead state for all possible inputs.
- fn add_state(&mut self, state: State) -> Result<S> {
- let id = self.dfa.add_empty_state()?;
- let rstate = Rc::new(state);
- self.builder_states.push(rstate.clone());
- self.cache.insert(rstate, id);
- Ok(id)
- }
-
- /// Convert the given set of ordered NFA states to a DFA state.
- fn new_state(&mut self, set: &SparseSet) -> State {
- let mut state = State {
- is_match: false,
- nfa_states: mem::replace(&mut self.scratch_nfa_states, vec![]),
- };
- state.nfa_states.clear();
-
- for &id in set {
- match *self.nfa.state(id) {
- nfa::State::Range { .. } => {
- state.nfa_states.push(id);
- }
- nfa::State::Sparse { .. } => {
- state.nfa_states.push(id);
- }
- nfa::State::Fail => {
- break;
- }
- nfa::State::Match => {
- state.is_match = true;
- if !self.longest_match {
- break;
- }
- }
- nfa::State::Union { .. } => {}
- }
- }
- state
- }
-
- /// Create a new sparse set with enough capacity to hold all NFA states.
- fn new_sparse_set(&self) -> SparseSet {
- SparseSet::new(self.nfa.len())
- }
-}
-
-impl State {
- /// Create a new empty dead state.
- fn dead() -> State {
- State { nfa_states: vec![], is_match: false }
- }
-}