summaryrefslogtreecommitdiffstats
path: root/third_party/rust/regex-automata/tests/gen
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/regex-automata/tests/gen')
-rw-r--r--third_party/rust/regex-automata/tests/gen/README.md65
-rw-r--r--third_party/rust/regex-automata/tests/gen/dense/mod.rs22
-rw-r--r--third_party/rust/regex-automata/tests/gen/dense/multi_pattern_v2.rs43
-rw-r--r--third_party/rust/regex-automata/tests/gen/dense/multi_pattern_v2_fwd.bigendian.dfabin0 -> 11100 bytes
-rw-r--r--third_party/rust/regex-automata/tests/gen/dense/multi_pattern_v2_fwd.littleendian.dfabin0 -> 11100 bytes
-rw-r--r--third_party/rust/regex-automata/tests/gen/dense/multi_pattern_v2_rev.bigendian.dfabin0 -> 7584 bytes
-rw-r--r--third_party/rust/regex-automata/tests/gen/dense/multi_pattern_v2_rev.littleendian.dfabin0 -> 7584 bytes
-rw-r--r--third_party/rust/regex-automata/tests/gen/mod.rs2
-rw-r--r--third_party/rust/regex-automata/tests/gen/sparse/mod.rs22
-rw-r--r--third_party/rust/regex-automata/tests/gen/sparse/multi_pattern_v2.rs37
-rw-r--r--third_party/rust/regex-automata/tests/gen/sparse/multi_pattern_v2_fwd.bigendian.dfabin0 -> 3476 bytes
-rw-r--r--third_party/rust/regex-automata/tests/gen/sparse/multi_pattern_v2_fwd.littleendian.dfabin0 -> 3476 bytes
-rw-r--r--third_party/rust/regex-automata/tests/gen/sparse/multi_pattern_v2_rev.bigendian.dfabin0 -> 1920 bytes
-rw-r--r--third_party/rust/regex-automata/tests/gen/sparse/multi_pattern_v2_rev.littleendian.dfabin0 -> 1920 bytes
14 files changed, 191 insertions, 0 deletions
diff --git a/third_party/rust/regex-automata/tests/gen/README.md b/third_party/rust/regex-automata/tests/gen/README.md
new file mode 100644
index 0000000000..59439a11fd
--- /dev/null
+++ b/third_party/rust/regex-automata/tests/gen/README.md
@@ -0,0 +1,65 @@
+This directory contains tests for serialized objects from the regex-automata
+crate. Currently, there are only two supported such objects: dense and sparse
+DFAs.
+
+The idea behind these tests is to commit some serialized objects and run some
+basic tests by deserializing them and running searches and ensuring they are
+correct. We also make sure these are run under Miri, since deserialization is
+one of the biggest places where undefined behavior might occur in this crate
+(at the time of writing).
+
+The main thing we're testing is that the *current* code can still deserialize
+*old* objects correctly. Generally speaking, compatibility extends to semver
+compatible releases of this crate. Beyond that, no promises are made, although
+in practice callers can at least depend on errors occurring. (The serialized
+format always includes a version number, and incompatible changes increment
+that version number such that an error will occur if an unsupported version is
+detected.)
+
+To generate the dense DFAs, I used this command:
+
+```
+$ regex-cli generate serialize dense regex \
+ MULTI_PATTERN_V2 \
+ tests/gen/dense/ \
+ --rustfmt \
+ --safe \
+ --starts-for-each-pattern \
+ --specialize-start-states \
+ --start-kind both \
+ --unicode-word-boundary \
+ --minimize \
+ '\b[a-zA-Z]+\b' \
+ '(?m)^\S+$' \
+ '(?Rm)^\S+$'
+```
+
+And to generate the sparse DFAs, I used this command, which is the same as
+above, but with `s/dense/sparse/g`.
+
+```
+$ regex-cli generate serialize sparse regex \
+ MULTI_PATTERN_V2 \
+ tests/gen/sparse/ \
+ --rustfmt \
+ --safe \
+ --starts-for-each-pattern \
+ --specialize-start-states \
+ --start-kind both \
+ --unicode-word-boundary \
+ --minimize \
+ '\b[a-zA-Z]+\b' \
+ '(?m)^\S+$' \
+ '(?Rm)^\S+$'
+```
+
+The idea is to try to enable as many of the DFA's options as possible in order
+to test that serialization works for all of them.
+
+Arguably we should increase test coverage here, but this is a start. Note
+that in particular, this does not need to test that serialization and
+deserialization correctly roundtrips on its own. Indeed, the normal regex test
+suite has a test that does a serialization round trip for every test supported
+by DFAs. So that has very good coverage. What we're interested in testing here
+is our compatibility promise: do DFAs generated with an older revision of the
+code still deserialize correctly?
diff --git a/third_party/rust/regex-automata/tests/gen/dense/mod.rs b/third_party/rust/regex-automata/tests/gen/dense/mod.rs
new file mode 100644
index 0000000000..b4365d4e19
--- /dev/null
+++ b/third_party/rust/regex-automata/tests/gen/dense/mod.rs
@@ -0,0 +1,22 @@
+use regex_automata::{Input, Match};
+
+mod multi_pattern_v2;
+
+#[test]
+fn multi_pattern_v2() {
+ use multi_pattern_v2::MULTI_PATTERN_V2 as RE;
+
+ assert_eq!(Some(Match::must(0, 0..4)), RE.find("abcd"));
+ assert_eq!(Some(Match::must(0, 2..6)), RE.find("@ abcd @"));
+ assert_eq!(Some(Match::must(1, 0..6)), RE.find("@abcd@"));
+ assert_eq!(Some(Match::must(0, 1..5)), RE.find("\nabcd\n"));
+ assert_eq!(Some(Match::must(0, 1..5)), RE.find("\nabcd wxyz\n"));
+ assert_eq!(Some(Match::must(1, 1..7)), RE.find("\n@abcd@\n"));
+ assert_eq!(Some(Match::must(2, 0..6)), RE.find("@abcd@\r\n"));
+ assert_eq!(Some(Match::must(1, 2..8)), RE.find("\r\n@abcd@"));
+ assert_eq!(Some(Match::must(2, 2..8)), RE.find("\r\n@abcd@\r\n"));
+
+ // Fails because we have heuristic support for Unicode word boundaries
+ // enabled.
+ assert!(RE.try_search(&Input::new(b"\xFF@abcd@\xFF")).is_err());
+}
diff --git a/third_party/rust/regex-automata/tests/gen/dense/multi_pattern_v2.rs b/third_party/rust/regex-automata/tests/gen/dense/multi_pattern_v2.rs
new file mode 100644
index 0000000000..a95fd204b5
--- /dev/null
+++ b/third_party/rust/regex-automata/tests/gen/dense/multi_pattern_v2.rs
@@ -0,0 +1,43 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+// regex-cli generate serialize dense regex MULTI_PATTERN_V2 tests/gen/dense/ --rustfmt --safe --starts-for-each-pattern --specialize-start-states --start-kind both --unicode-word-boundary --minimize \b[a-zA-Z]+\b (?m)^\S+$ (?Rm)^\S+$
+//
+// regex-cli 0.0.1 is available on crates.io.
+
+use regex_automata::{
+ dfa::{dense::DFA, regex::Regex},
+ util::{lazy::Lazy, wire::AlignAs},
+};
+
+pub static MULTI_PATTERN_V2: Lazy<Regex<DFA<&'static [u32]>>> =
+ Lazy::new(|| {
+ let dfafwd = {
+ static ALIGNED: &AlignAs<[u8], u32> = &AlignAs {
+ _align: [],
+ #[cfg(target_endian = "big")]
+ bytes: *include_bytes!("multi_pattern_v2_fwd.bigendian.dfa"),
+ #[cfg(target_endian = "little")]
+ bytes: *include_bytes!(
+ "multi_pattern_v2_fwd.littleendian.dfa"
+ ),
+ };
+ DFA::from_bytes(&ALIGNED.bytes)
+ .expect("serialized forward DFA should be valid")
+ .0
+ };
+ let dfarev = {
+ static ALIGNED: &AlignAs<[u8], u32> = &AlignAs {
+ _align: [],
+ #[cfg(target_endian = "big")]
+ bytes: *include_bytes!("multi_pattern_v2_rev.bigendian.dfa"),
+ #[cfg(target_endian = "little")]
+ bytes: *include_bytes!(
+ "multi_pattern_v2_rev.littleendian.dfa"
+ ),
+ };
+ DFA::from_bytes(&ALIGNED.bytes)
+ .expect("serialized reverse DFA should be valid")
+ .0
+ };
+ Regex::builder().build_from_dfas(dfafwd, dfarev)
+ });
diff --git a/third_party/rust/regex-automata/tests/gen/dense/multi_pattern_v2_fwd.bigendian.dfa b/third_party/rust/regex-automata/tests/gen/dense/multi_pattern_v2_fwd.bigendian.dfa
new file mode 100644
index 0000000000..6d6e040c36
--- /dev/null
+++ b/third_party/rust/regex-automata/tests/gen/dense/multi_pattern_v2_fwd.bigendian.dfa
Binary files differ
diff --git a/third_party/rust/regex-automata/tests/gen/dense/multi_pattern_v2_fwd.littleendian.dfa b/third_party/rust/regex-automata/tests/gen/dense/multi_pattern_v2_fwd.littleendian.dfa
new file mode 100644
index 0000000000..a1f4b3da15
--- /dev/null
+++ b/third_party/rust/regex-automata/tests/gen/dense/multi_pattern_v2_fwd.littleendian.dfa
Binary files differ
diff --git a/third_party/rust/regex-automata/tests/gen/dense/multi_pattern_v2_rev.bigendian.dfa b/third_party/rust/regex-automata/tests/gen/dense/multi_pattern_v2_rev.bigendian.dfa
new file mode 100644
index 0000000000..74f74ec2a9
--- /dev/null
+++ b/third_party/rust/regex-automata/tests/gen/dense/multi_pattern_v2_rev.bigendian.dfa
Binary files differ
diff --git a/third_party/rust/regex-automata/tests/gen/dense/multi_pattern_v2_rev.littleendian.dfa b/third_party/rust/regex-automata/tests/gen/dense/multi_pattern_v2_rev.littleendian.dfa
new file mode 100644
index 0000000000..663bdb9ead
--- /dev/null
+++ b/third_party/rust/regex-automata/tests/gen/dense/multi_pattern_v2_rev.littleendian.dfa
Binary files differ
diff --git a/third_party/rust/regex-automata/tests/gen/mod.rs b/third_party/rust/regex-automata/tests/gen/mod.rs
new file mode 100644
index 0000000000..960cb4251a
--- /dev/null
+++ b/third_party/rust/regex-automata/tests/gen/mod.rs
@@ -0,0 +1,2 @@
+mod dense;
+mod sparse;
diff --git a/third_party/rust/regex-automata/tests/gen/sparse/mod.rs b/third_party/rust/regex-automata/tests/gen/sparse/mod.rs
new file mode 100644
index 0000000000..b4365d4e19
--- /dev/null
+++ b/third_party/rust/regex-automata/tests/gen/sparse/mod.rs
@@ -0,0 +1,22 @@
+use regex_automata::{Input, Match};
+
+mod multi_pattern_v2;
+
+#[test]
+fn multi_pattern_v2() {
+ use multi_pattern_v2::MULTI_PATTERN_V2 as RE;
+
+ assert_eq!(Some(Match::must(0, 0..4)), RE.find("abcd"));
+ assert_eq!(Some(Match::must(0, 2..6)), RE.find("@ abcd @"));
+ assert_eq!(Some(Match::must(1, 0..6)), RE.find("@abcd@"));
+ assert_eq!(Some(Match::must(0, 1..5)), RE.find("\nabcd\n"));
+ assert_eq!(Some(Match::must(0, 1..5)), RE.find("\nabcd wxyz\n"));
+ assert_eq!(Some(Match::must(1, 1..7)), RE.find("\n@abcd@\n"));
+ assert_eq!(Some(Match::must(2, 0..6)), RE.find("@abcd@\r\n"));
+ assert_eq!(Some(Match::must(1, 2..8)), RE.find("\r\n@abcd@"));
+ assert_eq!(Some(Match::must(2, 2..8)), RE.find("\r\n@abcd@\r\n"));
+
+ // Fails because we have heuristic support for Unicode word boundaries
+ // enabled.
+ assert!(RE.try_search(&Input::new(b"\xFF@abcd@\xFF")).is_err());
+}
diff --git a/third_party/rust/regex-automata/tests/gen/sparse/multi_pattern_v2.rs b/third_party/rust/regex-automata/tests/gen/sparse/multi_pattern_v2.rs
new file mode 100644
index 0000000000..911e3f5ddc
--- /dev/null
+++ b/third_party/rust/regex-automata/tests/gen/sparse/multi_pattern_v2.rs
@@ -0,0 +1,37 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+// regex-cli generate serialize sparse regex MULTI_PATTERN_V2 regex-automata/tests/gen/sparse/ --rustfmt --safe --starts-for-each-pattern --specialize-start-states --start-kind both --unicode-word-boundary --minimize \b[a-zA-Z]+\b (?m)^\S+$ (?Rm)^\S+$
+//
+// regex-cli 0.0.1 is available on crates.io.
+
+use regex_automata::{
+ dfa::{regex::Regex, sparse::DFA},
+ util::lazy::Lazy,
+};
+
+pub static MULTI_PATTERN_V2: Lazy<Regex<DFA<&'static [u8]>>> =
+ Lazy::new(|| {
+ let dfafwd = {
+ #[cfg(target_endian = "big")]
+ static BYTES: &'static [u8] =
+ include_bytes!("multi_pattern_v2_fwd.bigendian.dfa");
+ #[cfg(target_endian = "little")]
+ static BYTES: &'static [u8] =
+ include_bytes!("multi_pattern_v2_fwd.littleendian.dfa");
+ DFA::from_bytes(BYTES)
+ .expect("serialized forward DFA should be valid")
+ .0
+ };
+ let dfarev = {
+ #[cfg(target_endian = "big")]
+ static BYTES: &'static [u8] =
+ include_bytes!("multi_pattern_v2_rev.bigendian.dfa");
+ #[cfg(target_endian = "little")]
+ static BYTES: &'static [u8] =
+ include_bytes!("multi_pattern_v2_rev.littleendian.dfa");
+ DFA::from_bytes(BYTES)
+ .expect("serialized reverse DFA should be valid")
+ .0
+ };
+ Regex::builder().build_from_dfas(dfafwd, dfarev)
+ });
diff --git a/third_party/rust/regex-automata/tests/gen/sparse/multi_pattern_v2_fwd.bigendian.dfa b/third_party/rust/regex-automata/tests/gen/sparse/multi_pattern_v2_fwd.bigendian.dfa
new file mode 100644
index 0000000000..aa04f63162
--- /dev/null
+++ b/third_party/rust/regex-automata/tests/gen/sparse/multi_pattern_v2_fwd.bigendian.dfa
Binary files differ
diff --git a/third_party/rust/regex-automata/tests/gen/sparse/multi_pattern_v2_fwd.littleendian.dfa b/third_party/rust/regex-automata/tests/gen/sparse/multi_pattern_v2_fwd.littleendian.dfa
new file mode 100644
index 0000000000..c27d92abe1
--- /dev/null
+++ b/third_party/rust/regex-automata/tests/gen/sparse/multi_pattern_v2_fwd.littleendian.dfa
Binary files differ
diff --git a/third_party/rust/regex-automata/tests/gen/sparse/multi_pattern_v2_rev.bigendian.dfa b/third_party/rust/regex-automata/tests/gen/sparse/multi_pattern_v2_rev.bigendian.dfa
new file mode 100644
index 0000000000..89867d30f6
--- /dev/null
+++ b/third_party/rust/regex-automata/tests/gen/sparse/multi_pattern_v2_rev.bigendian.dfa
Binary files differ
diff --git a/third_party/rust/regex-automata/tests/gen/sparse/multi_pattern_v2_rev.littleendian.dfa b/third_party/rust/regex-automata/tests/gen/sparse/multi_pattern_v2_rev.littleendian.dfa
new file mode 100644
index 0000000000..c0ca807f89
--- /dev/null
+++ b/third_party/rust/regex-automata/tests/gen/sparse/multi_pattern_v2_rev.littleendian.dfa
Binary files differ