diff options
Diffstat (limited to 'vendor/regex-automata/src/dfa/transducer.rs')
-rw-r--r-- | vendor/regex-automata/src/dfa/transducer.rs | 207 |
1 files changed, 207 insertions, 0 deletions
diff --git a/vendor/regex-automata/src/dfa/transducer.rs b/vendor/regex-automata/src/dfa/transducer.rs new file mode 100644 index 000000000..58b34e00a --- /dev/null +++ b/vendor/regex-automata/src/dfa/transducer.rs @@ -0,0 +1,207 @@ +use crate::{ + dfa::{automaton::Automaton, dense, sparse}, + util::id::StateID, +}; + +impl<T: AsRef<[u32]>> fst::Automaton for dense::DFA<T> { + type State = StateID; + + #[inline] + fn start(&self) -> StateID { + self.start_state_forward(None, &[], 0, 0) + } + + #[inline] + fn is_match(&self, state: &StateID) -> bool { + self.is_match_state(*state) + } + + #[inline] + fn accept(&self, state: &StateID, byte: u8) -> StateID { + if fst::Automaton::is_match(self, state) { + return *state; + } + self.next_state(*state, byte) + } + + #[inline] + fn accept_eof(&self, state: &StateID) -> Option<StateID> { + if fst::Automaton::is_match(self, state) { + return Some(*state); + } + Some(self.next_eoi_state(*state)) + } + + #[inline] + fn can_match(&self, state: &StateID) -> bool { + !self.is_dead_state(*state) + } +} + +impl<T: AsRef<[u8]>> fst::Automaton for sparse::DFA<T> { + type State = StateID; + + #[inline] + fn start(&self) -> StateID { + self.start_state_forward(None, &[], 0, 0) + } + + #[inline] + fn is_match(&self, state: &StateID) -> bool { + self.is_match_state(*state) + } + + #[inline] + fn accept(&self, state: &StateID, byte: u8) -> StateID { + if fst::Automaton::is_match(self, state) { + return *state; + } + self.next_state(*state, byte) + } + + #[inline] + fn accept_eof(&self, state: &StateID) -> Option<StateID> { + if fst::Automaton::is_match(self, state) { + return Some(*state); + } + Some(self.next_eoi_state(*state)) + } + + #[inline] + fn can_match(&self, state: &StateID) -> bool { + !self.is_dead_state(*state) + } +} + +#[cfg(test)] +mod tests { + use bstr::BString; + use fst::{Automaton, IntoStreamer, Set, Streamer}; + + use crate::dfa::{dense, sparse}; + + fn search<A: Automaton, D: AsRef<[u8]>>( + set: &Set<D>, + aut: A, + ) -> Vec<BString> { + let mut stream = set.search(aut).into_stream(); + + let mut results = vec![]; + while let Some(key) = stream.next() { + results.push(BString::from(key)); + } + results + } + + #[test] + fn dense_anywhere() { + let set = + Set::from_iter(&["a", "bar", "baz", "wat", "xba", "xbax", "z"]) + .unwrap(); + let dfa = dense::DFA::new("ba.*").unwrap(); + let got = search(&set, &dfa); + assert_eq!(got, vec!["bar", "baz", "xba", "xbax"]); + } + + #[test] + fn dense_anchored() { + let set = + Set::from_iter(&["a", "bar", "baz", "wat", "xba", "xbax", "z"]) + .unwrap(); + let dfa = dense::Builder::new() + .configure(dense::Config::new().anchored(true)) + .build("ba.*") + .unwrap(); + let got = search(&set, &dfa); + assert_eq!(got, vec!["bar", "baz"]); + } + + #[test] + fn dense_assertions_start() { + let set = + Set::from_iter(&["a", "bar", "baz", "wat", "xba", "xbax", "z"]) + .unwrap(); + let dfa = dense::Builder::new().build("^ba.*").unwrap(); + let got = search(&set, &dfa); + assert_eq!(got, vec!["bar", "baz"]); + } + + #[test] + fn dense_assertions_end() { + let set = + Set::from_iter(&["a", "bar", "bax", "wat", "xba", "xbax", "z"]) + .unwrap(); + let dfa = dense::Builder::new().build(".*x$").unwrap(); + let got = search(&set, &dfa); + assert_eq!(got, vec!["bax", "xbax"]); + } + + #[test] + fn dense_assertions_word() { + let set = + Set::from_iter(&["foo", "foox", "xfoo", "zzz foo zzz"]).unwrap(); + let dfa = dense::Builder::new().build(r"(?-u)\bfoo\b").unwrap(); + let got = search(&set, &dfa); + assert_eq!(got, vec!["foo", "zzz foo zzz"]); + } + + #[test] + fn sparse_anywhere() { + let set = + Set::from_iter(&["a", "bar", "baz", "wat", "xba", "xbax", "z"]) + .unwrap(); + let dfa = sparse::DFA::new("ba.*").unwrap(); + let got = search(&set, &dfa); + assert_eq!(got, vec!["bar", "baz", "xba", "xbax"]); + } + + #[test] + fn sparse_anchored() { + let set = + Set::from_iter(&["a", "bar", "baz", "wat", "xba", "xbax", "z"]) + .unwrap(); + let dfa = dense::Builder::new() + .configure(dense::Config::new().anchored(true)) + .build("ba.*") + .unwrap() + .to_sparse() + .unwrap(); + let got = search(&set, &dfa); + assert_eq!(got, vec!["bar", "baz"]); + } + + #[test] + fn sparse_assertions_start() { + let set = + Set::from_iter(&["a", "bar", "baz", "wat", "xba", "xbax", "z"]) + .unwrap(); + let dfa = + dense::Builder::new().build("^ba.*").unwrap().to_sparse().unwrap(); + let got = search(&set, &dfa); + assert_eq!(got, vec!["bar", "baz"]); + } + + #[test] + fn sparse_assertions_end() { + let set = + Set::from_iter(&["a", "bar", "bax", "wat", "xba", "xbax", "z"]) + .unwrap(); + let dfa = + dense::Builder::new().build(".*x$").unwrap().to_sparse().unwrap(); + let got = search(&set, &dfa); + assert_eq!(got, vec!["bax", "xbax"]); + } + + #[test] + fn sparse_assertions_word() { + let set = + Set::from_iter(&["foo", "foox", "xfoo", "zzz foo zzz"]).unwrap(); + let dfa = dense::Builder::new() + .build(r"(?-u)\bfoo\b") + .unwrap() + .to_sparse() + .unwrap(); + let got = search(&set, &dfa); + assert_eq!(got, vec!["foo", "zzz foo zzz"]); + } +} |