summaryrefslogtreecommitdiffstats
path: root/vendor/regex-automata/src/dfa/transducer.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/regex-automata/src/dfa/transducer.rs')
-rw-r--r--vendor/regex-automata/src/dfa/transducer.rs207
1 files changed, 207 insertions, 0 deletions
diff --git a/vendor/regex-automata/src/dfa/transducer.rs b/vendor/regex-automata/src/dfa/transducer.rs
new file mode 100644
index 000000000..58b34e00a
--- /dev/null
+++ b/vendor/regex-automata/src/dfa/transducer.rs
@@ -0,0 +1,207 @@
+use crate::{
+ dfa::{automaton::Automaton, dense, sparse},
+ util::id::StateID,
+};
+
+impl<T: AsRef<[u32]>> fst::Automaton for dense::DFA<T> {
+ type State = StateID;
+
+ #[inline]
+ fn start(&self) -> StateID {
+ self.start_state_forward(None, &[], 0, 0)
+ }
+
+ #[inline]
+ fn is_match(&self, state: &StateID) -> bool {
+ self.is_match_state(*state)
+ }
+
+ #[inline]
+ fn accept(&self, state: &StateID, byte: u8) -> StateID {
+ if fst::Automaton::is_match(self, state) {
+ return *state;
+ }
+ self.next_state(*state, byte)
+ }
+
+ #[inline]
+ fn accept_eof(&self, state: &StateID) -> Option<StateID> {
+ if fst::Automaton::is_match(self, state) {
+ return Some(*state);
+ }
+ Some(self.next_eoi_state(*state))
+ }
+
+ #[inline]
+ fn can_match(&self, state: &StateID) -> bool {
+ !self.is_dead_state(*state)
+ }
+}
+
+impl<T: AsRef<[u8]>> fst::Automaton for sparse::DFA<T> {
+ type State = StateID;
+
+ #[inline]
+ fn start(&self) -> StateID {
+ self.start_state_forward(None, &[], 0, 0)
+ }
+
+ #[inline]
+ fn is_match(&self, state: &StateID) -> bool {
+ self.is_match_state(*state)
+ }
+
+ #[inline]
+ fn accept(&self, state: &StateID, byte: u8) -> StateID {
+ if fst::Automaton::is_match(self, state) {
+ return *state;
+ }
+ self.next_state(*state, byte)
+ }
+
+ #[inline]
+ fn accept_eof(&self, state: &StateID) -> Option<StateID> {
+ if fst::Automaton::is_match(self, state) {
+ return Some(*state);
+ }
+ Some(self.next_eoi_state(*state))
+ }
+
+ #[inline]
+ fn can_match(&self, state: &StateID) -> bool {
+ !self.is_dead_state(*state)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use bstr::BString;
+ use fst::{Automaton, IntoStreamer, Set, Streamer};
+
+ use crate::dfa::{dense, sparse};
+
+ fn search<A: Automaton, D: AsRef<[u8]>>(
+ set: &Set<D>,
+ aut: A,
+ ) -> Vec<BString> {
+ let mut stream = set.search(aut).into_stream();
+
+ let mut results = vec![];
+ while let Some(key) = stream.next() {
+ results.push(BString::from(key));
+ }
+ results
+ }
+
+ #[test]
+ fn dense_anywhere() {
+ let set =
+ Set::from_iter(&["a", "bar", "baz", "wat", "xba", "xbax", "z"])
+ .unwrap();
+ let dfa = dense::DFA::new("ba.*").unwrap();
+ let got = search(&set, &dfa);
+ assert_eq!(got, vec!["bar", "baz", "xba", "xbax"]);
+ }
+
+ #[test]
+ fn dense_anchored() {
+ let set =
+ Set::from_iter(&["a", "bar", "baz", "wat", "xba", "xbax", "z"])
+ .unwrap();
+ let dfa = dense::Builder::new()
+ .configure(dense::Config::new().anchored(true))
+ .build("ba.*")
+ .unwrap();
+ let got = search(&set, &dfa);
+ assert_eq!(got, vec!["bar", "baz"]);
+ }
+
+ #[test]
+ fn dense_assertions_start() {
+ let set =
+ Set::from_iter(&["a", "bar", "baz", "wat", "xba", "xbax", "z"])
+ .unwrap();
+ let dfa = dense::Builder::new().build("^ba.*").unwrap();
+ let got = search(&set, &dfa);
+ assert_eq!(got, vec!["bar", "baz"]);
+ }
+
+ #[test]
+ fn dense_assertions_end() {
+ let set =
+ Set::from_iter(&["a", "bar", "bax", "wat", "xba", "xbax", "z"])
+ .unwrap();
+ let dfa = dense::Builder::new().build(".*x$").unwrap();
+ let got = search(&set, &dfa);
+ assert_eq!(got, vec!["bax", "xbax"]);
+ }
+
+ #[test]
+ fn dense_assertions_word() {
+ let set =
+ Set::from_iter(&["foo", "foox", "xfoo", "zzz foo zzz"]).unwrap();
+ let dfa = dense::Builder::new().build(r"(?-u)\bfoo\b").unwrap();
+ let got = search(&set, &dfa);
+ assert_eq!(got, vec!["foo", "zzz foo zzz"]);
+ }
+
+ #[test]
+ fn sparse_anywhere() {
+ let set =
+ Set::from_iter(&["a", "bar", "baz", "wat", "xba", "xbax", "z"])
+ .unwrap();
+ let dfa = sparse::DFA::new("ba.*").unwrap();
+ let got = search(&set, &dfa);
+ assert_eq!(got, vec!["bar", "baz", "xba", "xbax"]);
+ }
+
+ #[test]
+ fn sparse_anchored() {
+ let set =
+ Set::from_iter(&["a", "bar", "baz", "wat", "xba", "xbax", "z"])
+ .unwrap();
+ let dfa = dense::Builder::new()
+ .configure(dense::Config::new().anchored(true))
+ .build("ba.*")
+ .unwrap()
+ .to_sparse()
+ .unwrap();
+ let got = search(&set, &dfa);
+ assert_eq!(got, vec!["bar", "baz"]);
+ }
+
+ #[test]
+ fn sparse_assertions_start() {
+ let set =
+ Set::from_iter(&["a", "bar", "baz", "wat", "xba", "xbax", "z"])
+ .unwrap();
+ let dfa =
+ dense::Builder::new().build("^ba.*").unwrap().to_sparse().unwrap();
+ let got = search(&set, &dfa);
+ assert_eq!(got, vec!["bar", "baz"]);
+ }
+
+ #[test]
+ fn sparse_assertions_end() {
+ let set =
+ Set::from_iter(&["a", "bar", "bax", "wat", "xba", "xbax", "z"])
+ .unwrap();
+ let dfa =
+ dense::Builder::new().build(".*x$").unwrap().to_sparse().unwrap();
+ let got = search(&set, &dfa);
+ assert_eq!(got, vec!["bax", "xbax"]);
+ }
+
+ #[test]
+ fn sparse_assertions_word() {
+ let set =
+ Set::from_iter(&["foo", "foox", "xfoo", "zzz foo zzz"]).unwrap();
+ let dfa = dense::Builder::new()
+ .build(r"(?-u)\bfoo\b")
+ .unwrap()
+ .to_sparse()
+ .unwrap();
+ let got = search(&set, &dfa);
+ assert_eq!(got, vec!["foo", "zzz foo zzz"]);
+ }
+}