diff options
Diffstat (limited to 'third_party/rust/regex-automata/tests/gen')
14 files changed, 191 insertions, 0 deletions
diff --git a/third_party/rust/regex-automata/tests/gen/README.md b/third_party/rust/regex-automata/tests/gen/README.md new file mode 100644 index 0000000000..59439a11fd --- /dev/null +++ b/third_party/rust/regex-automata/tests/gen/README.md @@ -0,0 +1,65 @@ +This directory contains tests for serialized objects from the regex-automata +crate. Currently, there are only two supported such objects: dense and sparse +DFAs. + +The idea behind these tests is to commit some serialized objects and run some +basic tests by deserializing them and running searches and ensuring they are +correct. We also make sure these are run under Miri, since deserialization is +one of the biggest places where undefined behavior might occur in this crate +(at the time of writing). + +The main thing we're testing is that the *current* code can still deserialize +*old* objects correctly. Generally speaking, compatibility extends to semver +compatible releases of this crate. Beyond that, no promises are made, although +in practice callers can at least depend on errors occurring. (The serialized +format always includes a version number, and incompatible changes increment +that version number such that an error will occur if an unsupported version is +detected.) + +To generate the dense DFAs, I used this command: + +``` +$ regex-cli generate serialize dense regex \ + MULTI_PATTERN_V2 \ + tests/gen/dense/ \ + --rustfmt \ + --safe \ + --starts-for-each-pattern \ + --specialize-start-states \ + --start-kind both \ + --unicode-word-boundary \ + --minimize \ + '\b[a-zA-Z]+\b' \ + '(?m)^\S+$' \ + '(?Rm)^\S+$' +``` + +And to generate the sparse DFAs, I used this command, which is the same as +above, but with `s/dense/sparse/g`. + +``` +$ regex-cli generate serialize sparse regex \ + MULTI_PATTERN_V2 \ + tests/gen/sparse/ \ + --rustfmt \ + --safe \ + --starts-for-each-pattern \ + --specialize-start-states \ + --start-kind both \ + --unicode-word-boundary \ + --minimize \ + '\b[a-zA-Z]+\b' \ + '(?m)^\S+$' \ + '(?Rm)^\S+$' +``` + +The idea is to try to enable as many of the DFA's options as possible in order +to test that serialization works for all of them. + +Arguably we should increase test coverage here, but this is a start. Note +that in particular, this does not need to test that serialization and +deserialization correctly roundtrips on its own. Indeed, the normal regex test +suite has a test that does a serialization round trip for every test supported +by DFAs. So that has very good coverage. What we're interested in testing here +is our compatibility promise: do DFAs generated with an older revision of the +code still deserialize correctly? diff --git a/third_party/rust/regex-automata/tests/gen/dense/mod.rs b/third_party/rust/regex-automata/tests/gen/dense/mod.rs new file mode 100644 index 0000000000..b4365d4e19 --- /dev/null +++ b/third_party/rust/regex-automata/tests/gen/dense/mod.rs @@ -0,0 +1,22 @@ +use regex_automata::{Input, Match}; + +mod multi_pattern_v2; + +#[test] +fn multi_pattern_v2() { + use multi_pattern_v2::MULTI_PATTERN_V2 as RE; + + assert_eq!(Some(Match::must(0, 0..4)), RE.find("abcd")); + assert_eq!(Some(Match::must(0, 2..6)), RE.find("@ abcd @")); + assert_eq!(Some(Match::must(1, 0..6)), RE.find("@abcd@")); + assert_eq!(Some(Match::must(0, 1..5)), RE.find("\nabcd\n")); + assert_eq!(Some(Match::must(0, 1..5)), RE.find("\nabcd wxyz\n")); + assert_eq!(Some(Match::must(1, 1..7)), RE.find("\n@abcd@\n")); + assert_eq!(Some(Match::must(2, 0..6)), RE.find("@abcd@\r\n")); + assert_eq!(Some(Match::must(1, 2..8)), RE.find("\r\n@abcd@")); + assert_eq!(Some(Match::must(2, 2..8)), RE.find("\r\n@abcd@\r\n")); + + // Fails because we have heuristic support for Unicode word boundaries + // enabled. + assert!(RE.try_search(&Input::new(b"\xFF@abcd@\xFF")).is_err()); +} diff --git a/third_party/rust/regex-automata/tests/gen/dense/multi_pattern_v2.rs b/third_party/rust/regex-automata/tests/gen/dense/multi_pattern_v2.rs new file mode 100644 index 0000000000..a95fd204b5 --- /dev/null +++ b/third_party/rust/regex-automata/tests/gen/dense/multi_pattern_v2.rs @@ -0,0 +1,43 @@ +// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: +// +// regex-cli generate serialize dense regex MULTI_PATTERN_V2 tests/gen/dense/ --rustfmt --safe --starts-for-each-pattern --specialize-start-states --start-kind both --unicode-word-boundary --minimize \b[a-zA-Z]+\b (?m)^\S+$ (?Rm)^\S+$ +// +// regex-cli 0.0.1 is available on crates.io. + +use regex_automata::{ + dfa::{dense::DFA, regex::Regex}, + util::{lazy::Lazy, wire::AlignAs}, +}; + +pub static MULTI_PATTERN_V2: Lazy<Regex<DFA<&'static [u32]>>> = + Lazy::new(|| { + let dfafwd = { + static ALIGNED: &AlignAs<[u8], u32> = &AlignAs { + _align: [], + #[cfg(target_endian = "big")] + bytes: *include_bytes!("multi_pattern_v2_fwd.bigendian.dfa"), + #[cfg(target_endian = "little")] + bytes: *include_bytes!( + "multi_pattern_v2_fwd.littleendian.dfa" + ), + }; + DFA::from_bytes(&ALIGNED.bytes) + .expect("serialized forward DFA should be valid") + .0 + }; + let dfarev = { + static ALIGNED: &AlignAs<[u8], u32> = &AlignAs { + _align: [], + #[cfg(target_endian = "big")] + bytes: *include_bytes!("multi_pattern_v2_rev.bigendian.dfa"), + #[cfg(target_endian = "little")] + bytes: *include_bytes!( + "multi_pattern_v2_rev.littleendian.dfa" + ), + }; + DFA::from_bytes(&ALIGNED.bytes) + .expect("serialized reverse DFA should be valid") + .0 + }; + Regex::builder().build_from_dfas(dfafwd, dfarev) + }); diff --git a/third_party/rust/regex-automata/tests/gen/dense/multi_pattern_v2_fwd.bigendian.dfa b/third_party/rust/regex-automata/tests/gen/dense/multi_pattern_v2_fwd.bigendian.dfa Binary files differnew file mode 100644 index 0000000000..6d6e040c36 --- /dev/null +++ b/third_party/rust/regex-automata/tests/gen/dense/multi_pattern_v2_fwd.bigendian.dfa diff --git a/third_party/rust/regex-automata/tests/gen/dense/multi_pattern_v2_fwd.littleendian.dfa b/third_party/rust/regex-automata/tests/gen/dense/multi_pattern_v2_fwd.littleendian.dfa Binary files differnew file mode 100644 index 0000000000..a1f4b3da15 --- /dev/null +++ b/third_party/rust/regex-automata/tests/gen/dense/multi_pattern_v2_fwd.littleendian.dfa diff --git a/third_party/rust/regex-automata/tests/gen/dense/multi_pattern_v2_rev.bigendian.dfa b/third_party/rust/regex-automata/tests/gen/dense/multi_pattern_v2_rev.bigendian.dfa Binary files differnew file mode 100644 index 0000000000..74f74ec2a9 --- /dev/null +++ b/third_party/rust/regex-automata/tests/gen/dense/multi_pattern_v2_rev.bigendian.dfa diff --git a/third_party/rust/regex-automata/tests/gen/dense/multi_pattern_v2_rev.littleendian.dfa b/third_party/rust/regex-automata/tests/gen/dense/multi_pattern_v2_rev.littleendian.dfa Binary files differnew file mode 100644 index 0000000000..663bdb9ead --- /dev/null +++ b/third_party/rust/regex-automata/tests/gen/dense/multi_pattern_v2_rev.littleendian.dfa diff --git a/third_party/rust/regex-automata/tests/gen/mod.rs b/third_party/rust/regex-automata/tests/gen/mod.rs new file mode 100644 index 0000000000..960cb4251a --- /dev/null +++ b/third_party/rust/regex-automata/tests/gen/mod.rs @@ -0,0 +1,2 @@ +mod dense; +mod sparse; diff --git a/third_party/rust/regex-automata/tests/gen/sparse/mod.rs b/third_party/rust/regex-automata/tests/gen/sparse/mod.rs new file mode 100644 index 0000000000..b4365d4e19 --- /dev/null +++ b/third_party/rust/regex-automata/tests/gen/sparse/mod.rs @@ -0,0 +1,22 @@ +use regex_automata::{Input, Match}; + +mod multi_pattern_v2; + +#[test] +fn multi_pattern_v2() { + use multi_pattern_v2::MULTI_PATTERN_V2 as RE; + + assert_eq!(Some(Match::must(0, 0..4)), RE.find("abcd")); + assert_eq!(Some(Match::must(0, 2..6)), RE.find("@ abcd @")); + assert_eq!(Some(Match::must(1, 0..6)), RE.find("@abcd@")); + assert_eq!(Some(Match::must(0, 1..5)), RE.find("\nabcd\n")); + assert_eq!(Some(Match::must(0, 1..5)), RE.find("\nabcd wxyz\n")); + assert_eq!(Some(Match::must(1, 1..7)), RE.find("\n@abcd@\n")); + assert_eq!(Some(Match::must(2, 0..6)), RE.find("@abcd@\r\n")); + assert_eq!(Some(Match::must(1, 2..8)), RE.find("\r\n@abcd@")); + assert_eq!(Some(Match::must(2, 2..8)), RE.find("\r\n@abcd@\r\n")); + + // Fails because we have heuristic support for Unicode word boundaries + // enabled. + assert!(RE.try_search(&Input::new(b"\xFF@abcd@\xFF")).is_err()); +} diff --git a/third_party/rust/regex-automata/tests/gen/sparse/multi_pattern_v2.rs b/third_party/rust/regex-automata/tests/gen/sparse/multi_pattern_v2.rs new file mode 100644 index 0000000000..911e3f5ddc --- /dev/null +++ b/third_party/rust/regex-automata/tests/gen/sparse/multi_pattern_v2.rs @@ -0,0 +1,37 @@ +// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: +// +// regex-cli generate serialize sparse regex MULTI_PATTERN_V2 regex-automata/tests/gen/sparse/ --rustfmt --safe --starts-for-each-pattern --specialize-start-states --start-kind both --unicode-word-boundary --minimize \b[a-zA-Z]+\b (?m)^\S+$ (?Rm)^\S+$ +// +// regex-cli 0.0.1 is available on crates.io. + +use regex_automata::{ + dfa::{regex::Regex, sparse::DFA}, + util::lazy::Lazy, +}; + +pub static MULTI_PATTERN_V2: Lazy<Regex<DFA<&'static [u8]>>> = + Lazy::new(|| { + let dfafwd = { + #[cfg(target_endian = "big")] + static BYTES: &'static [u8] = + include_bytes!("multi_pattern_v2_fwd.bigendian.dfa"); + #[cfg(target_endian = "little")] + static BYTES: &'static [u8] = + include_bytes!("multi_pattern_v2_fwd.littleendian.dfa"); + DFA::from_bytes(BYTES) + .expect("serialized forward DFA should be valid") + .0 + }; + let dfarev = { + #[cfg(target_endian = "big")] + static BYTES: &'static [u8] = + include_bytes!("multi_pattern_v2_rev.bigendian.dfa"); + #[cfg(target_endian = "little")] + static BYTES: &'static [u8] = + include_bytes!("multi_pattern_v2_rev.littleendian.dfa"); + DFA::from_bytes(BYTES) + .expect("serialized reverse DFA should be valid") + .0 + }; + Regex::builder().build_from_dfas(dfafwd, dfarev) + }); diff --git a/third_party/rust/regex-automata/tests/gen/sparse/multi_pattern_v2_fwd.bigendian.dfa b/third_party/rust/regex-automata/tests/gen/sparse/multi_pattern_v2_fwd.bigendian.dfa Binary files differnew file mode 100644 index 0000000000..aa04f63162 --- /dev/null +++ b/third_party/rust/regex-automata/tests/gen/sparse/multi_pattern_v2_fwd.bigendian.dfa diff --git a/third_party/rust/regex-automata/tests/gen/sparse/multi_pattern_v2_fwd.littleendian.dfa b/third_party/rust/regex-automata/tests/gen/sparse/multi_pattern_v2_fwd.littleendian.dfa Binary files differnew file mode 100644 index 0000000000..c27d92abe1 --- /dev/null +++ b/third_party/rust/regex-automata/tests/gen/sparse/multi_pattern_v2_fwd.littleendian.dfa diff --git a/third_party/rust/regex-automata/tests/gen/sparse/multi_pattern_v2_rev.bigendian.dfa b/third_party/rust/regex-automata/tests/gen/sparse/multi_pattern_v2_rev.bigendian.dfa Binary files differnew file mode 100644 index 0000000000..89867d30f6 --- /dev/null +++ b/third_party/rust/regex-automata/tests/gen/sparse/multi_pattern_v2_rev.bigendian.dfa diff --git a/third_party/rust/regex-automata/tests/gen/sparse/multi_pattern_v2_rev.littleendian.dfa b/third_party/rust/regex-automata/tests/gen/sparse/multi_pattern_v2_rev.littleendian.dfa Binary files differnew file mode 100644 index 0000000000..c0ca807f89 --- /dev/null +++ b/third_party/rust/regex-automata/tests/gen/sparse/multi_pattern_v2_rev.littleendian.dfa |