diff options
Diffstat (limited to 'third_party/rust/regex-automata/tests/fuzz')
12 files changed, 186 insertions, 0 deletions
diff --git a/third_party/rust/regex-automata/tests/fuzz/dense.rs b/third_party/rust/regex-automata/tests/fuzz/dense.rs new file mode 100644 index 0000000000..213891b3e8 --- /dev/null +++ b/third_party/rust/regex-automata/tests/fuzz/dense.rs @@ -0,0 +1,52 @@ +// This test was found by a fuzzer input that crafted a way to provide +// an invalid serialization of ByteClasses that passed our verification. +// Specifically, the verification step in the deserialization of ByteClasses +// used an iterator that depends on part of the serialized bytes being correct. +// (Specifically, the encoding of the number of classes.) +#[test] +fn invalid_byte_classes() { + let data = include_bytes!( + "testdata/deserialize_dense_crash-9486fb7c8a93b12c12a62166b43d31640c0208a9", + ); + let _ = fuzz_run(data); +} + +#[test] +fn invalid_byte_classes_min() { + let data = include_bytes!( + "testdata/deserialize_dense_minimized-from-9486fb7c8a93b12c12a62166b43d31640c0208a9", + ); + let _ = fuzz_run(data); +} + +// This is the code from the fuzz target. Kind of sucks to duplicate it here, +// but this is fundamentally how we interpret the date. +fn fuzz_run(given_data: &[u8]) -> Option<()> { + use regex_automata::dfa::Automaton; + + if given_data.len() < 2 { + return None; + } + let haystack_len = usize::from(given_data[0]); + let haystack = given_data.get(1..1 + haystack_len)?; + let given_dfa_bytes = given_data.get(1 + haystack_len..)?; + + // We help the fuzzer along by adding a preamble to the bytes that should + // at least make these first parts valid. The preamble expects a very + // specific sequence of bytes, so it makes sense to just force this. + let label = "rust-regex-automata-dfa-dense\x00\x00\x00"; + assert_eq!(0, label.len() % 4); + let endianness_check = 0xFEFFu32.to_ne_bytes().to_vec(); + let version_check = 2u32.to_ne_bytes().to_vec(); + let mut dfa_bytes: Vec<u8> = vec![]; + dfa_bytes.extend(label.as_bytes()); + dfa_bytes.extend(&endianness_check); + dfa_bytes.extend(&version_check); + dfa_bytes.extend(given_dfa_bytes); + // This is the real test: checking that any input we give to + // DFA::from_bytes will never result in a panic. + let (dfa, _) = + regex_automata::dfa::dense::DFA::from_bytes(&dfa_bytes).ok()?; + let _ = dfa.try_search_fwd(®ex_automata::Input::new(haystack)); + Some(()) +} diff --git a/third_party/rust/regex-automata/tests/fuzz/mod.rs b/third_party/rust/regex-automata/tests/fuzz/mod.rs new file mode 100644 index 0000000000..960cb4251a --- /dev/null +++ b/third_party/rust/regex-automata/tests/fuzz/mod.rs @@ -0,0 +1,2 @@ +mod dense; +mod sparse; diff --git a/third_party/rust/regex-automata/tests/fuzz/sparse.rs b/third_party/rust/regex-automata/tests/fuzz/sparse.rs new file mode 100644 index 0000000000..837ad10147 --- /dev/null +++ b/third_party/rust/regex-automata/tests/fuzz/sparse.rs @@ -0,0 +1,132 @@ +// This is a regression test for a bug in how special states are handled. The +// fuzzer found a case where a state returned true for 'is_special_state' but +// *didn't* return true for 'is_dead_state', 'is_quit_state', 'is_match_state', +// 'is_start_state' or 'is_accel_state'. This in turn tripped a debug assertion +// in the core matching loop that requires 'is_special_state' being true to +// imply that one of the other routines returns true. +// +// We fixed this by adding some validation to both dense and sparse DFAs that +// checks that this property is true for every state ID in the DFA. +#[test] +fn invalid_special_state() { + let data = include_bytes!( + "testdata/deserialize_sparse_crash-a1b839d899ced76d5d7d0f78f9edb7a421505838", + ); + let _ = fuzz_run(data); +} + +// This is an interesting case where a fuzzer generated a DFA with +// a transition to a state ID that decoded as a valid state, but +// where the ID itself did not point to one of the two existing +// states for this particular DFA. This combined with marking this +// transition's state ID as special but without actually making one of the +// 'is_{dead,quit,match,start,accel}_state' predicates return true ended up +// tripping the 'debug_assert(dfa.is_quit_state(sid))' code in the search +// routine. +// +// We fixed this in alloc mode by checking that every transition points to a +// valid state ID. Technically this bug still exists in core-only mode, but +// it's not clear how to fix it. And it's worth pointing out that the search +// routine won't panic in production. It will just provide invalid results. And +// that's acceptable within the contract of DFA::from_bytes. +#[test] +fn transition_to_invalid_but_valid_state() { + let data = include_bytes!( + "testdata/deserialize_sparse_crash-dbb8172d3984e7e7d03f4b5f8bb86ecd1460eff9", + ); + let _ = fuzz_run(data); +} + +// Another one caught by the fuzzer where it generated a DFA that reported a +// start state as a match state. Since matches are always delayed by one byte, +// start states specifically cannot be match states. And indeed, the search +// code relies on this. +#[test] +fn start_state_is_not_match_state() { + let data = include_bytes!( + "testdata/deserialize_sparse_crash-0da59c0434eaf35e5a6b470fa9244bb79c72b000", + ); + let _ = fuzz_run(data); +} + +// This is variation on 'transition_to_invalid_but_valid_state', but happens +// to a start state. Namely, the fuzz data here builds a DFA with a start +// state ID that is incorrect but points to a sequence of bytes that satisfies +// state decoding validation. This errant state in turn has a non-zero number +// of transitions, and its those transitions that point to a state that does +// *not* satisfy state decoding validation. But we never checked those. So the +// fix here was to add validation of the transitions off of the start state. +#[test] +fn start_state_has_valid_transitions() { + let data = include_bytes!( + "testdata/deserialize_sparse_crash-61fd8e3003bf9d99f6c1e5a8488727eefd234b98", + ); + let _ = fuzz_run(data); +} + +// This fuzz input generated a DFA with a state whose ID was in the match state +// ID range, but where the state itself was encoded with zero pattern IDs. We +// added validation code to check this case. +#[test] +fn match_state_inconsistency() { + let data = include_bytes!( + "testdata/deserialize_sparse_crash-c383ae07ec5e191422eadc492117439011816570", + ); + let _ = fuzz_run(data); +} + +// This fuzz input generated a DFA with a state whose ID was in the accelerator +// range, but who didn't have any accelerators. This violated an invariant that +// assumes that if 'dfa.is_accel_state(sid)' returns true, then the state must +// have some accelerators. +#[test] +fn invalid_accelerators() { + let data = include_bytes!( + "testdata/deserialize_sparse_crash-d07703ceb94b10dcd9e4acb809f2051420449e2b", + ); + let _ = fuzz_run(data); +} + +// This fuzz input generated a DFA with a state whose EOI transition led to +// a quit state, which is generally considered illegal. Why? Because the EOI +// transition is defined over a special sentinel alphabet element and one +// cannot configure a DFA to "quit" on that sentinel. +#[test] +fn eoi_transition_to_quit_state() { + let data = include_bytes!( + "testdata/deserialize_sparse_crash-18cfc246f2ddfc3dfc92b0c7893178c7cf65efa9", + ); + let _ = fuzz_run(data); +} + +// This is the code from the fuzz target. Kind of sucks to duplicate it here, +// but this is fundamentally how we interpret the date. +fn fuzz_run(given_data: &[u8]) -> Option<()> { + use regex_automata::dfa::Automaton; + + if given_data.len() < 2 { + return None; + } + let haystack_len = usize::from(given_data[0]); + let haystack = given_data.get(1..1 + haystack_len)?; + let given_dfa_bytes = given_data.get(1 + haystack_len..)?; + + // We help the fuzzer along by adding a preamble to the bytes that should + // at least make these first parts valid. The preamble expects a very + // specific sequence of bytes, so it makes sense to just force this. + let label = "rust-regex-automata-dfa-sparse\x00\x00"; + assert_eq!(0, label.len() % 4); + let endianness_check = 0xFEFFu32.to_ne_bytes().to_vec(); + let version_check = 2u32.to_ne_bytes().to_vec(); + let mut dfa_bytes: Vec<u8> = vec![]; + dfa_bytes.extend(label.as_bytes()); + dfa_bytes.extend(&endianness_check); + dfa_bytes.extend(&version_check); + dfa_bytes.extend(given_dfa_bytes); + // This is the real test: checking that any input we give to + // DFA::from_bytes will never result in a panic. + let (dfa, _) = + regex_automata::dfa::sparse::DFA::from_bytes(&dfa_bytes).ok()?; + let _ = dfa.try_search_fwd(®ex_automata::Input::new(haystack)); + Some(()) +} diff --git a/third_party/rust/regex-automata/tests/fuzz/testdata/deserialize_dense_crash-9486fb7c8a93b12c12a62166b43d31640c0208a9 b/third_party/rust/regex-automata/tests/fuzz/testdata/deserialize_dense_crash-9486fb7c8a93b12c12a62166b43d31640c0208a9 Binary files differnew file mode 100644 index 0000000000..972bfb2cd4 --- /dev/null +++ b/third_party/rust/regex-automata/tests/fuzz/testdata/deserialize_dense_crash-9486fb7c8a93b12c12a62166b43d31640c0208a9 diff --git a/third_party/rust/regex-automata/tests/fuzz/testdata/deserialize_dense_minimized-from-9486fb7c8a93b12c12a62166b43d31640c0208a9 b/third_party/rust/regex-automata/tests/fuzz/testdata/deserialize_dense_minimized-from-9486fb7c8a93b12c12a62166b43d31640c0208a9 Binary files differnew file mode 100644 index 0000000000..72dbdad825 --- /dev/null +++ b/third_party/rust/regex-automata/tests/fuzz/testdata/deserialize_dense_minimized-from-9486fb7c8a93b12c12a62166b43d31640c0208a9 diff --git a/third_party/rust/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-0da59c0434eaf35e5a6b470fa9244bb79c72b000 b/third_party/rust/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-0da59c0434eaf35e5a6b470fa9244bb79c72b000 Binary files differnew file mode 100644 index 0000000000..5ce508803e --- /dev/null +++ b/third_party/rust/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-0da59c0434eaf35e5a6b470fa9244bb79c72b000 diff --git a/third_party/rust/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-18cfc246f2ddfc3dfc92b0c7893178c7cf65efa9 b/third_party/rust/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-18cfc246f2ddfc3dfc92b0c7893178c7cf65efa9 Binary files differnew file mode 100644 index 0000000000..4fa13fbed4 --- /dev/null +++ b/third_party/rust/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-18cfc246f2ddfc3dfc92b0c7893178c7cf65efa9 diff --git a/third_party/rust/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-61fd8e3003bf9d99f6c1e5a8488727eefd234b98 b/third_party/rust/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-61fd8e3003bf9d99f6c1e5a8488727eefd234b98 Binary files differnew file mode 100644 index 0000000000..0f809f33f4 --- /dev/null +++ b/third_party/rust/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-61fd8e3003bf9d99f6c1e5a8488727eefd234b98 diff --git a/third_party/rust/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-a1b839d899ced76d5d7d0f78f9edb7a421505838 b/third_party/rust/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-a1b839d899ced76d5d7d0f78f9edb7a421505838 Binary files differnew file mode 100644 index 0000000000..8b435fd26e --- /dev/null +++ b/third_party/rust/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-a1b839d899ced76d5d7d0f78f9edb7a421505838 diff --git a/third_party/rust/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-c383ae07ec5e191422eadc492117439011816570 b/third_party/rust/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-c383ae07ec5e191422eadc492117439011816570 Binary files differnew file mode 100644 index 0000000000..69b65160c2 --- /dev/null +++ b/third_party/rust/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-c383ae07ec5e191422eadc492117439011816570 diff --git a/third_party/rust/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-d07703ceb94b10dcd9e4acb809f2051420449e2b b/third_party/rust/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-d07703ceb94b10dcd9e4acb809f2051420449e2b Binary files differnew file mode 100644 index 0000000000..15b43e47f2 --- /dev/null +++ b/third_party/rust/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-d07703ceb94b10dcd9e4acb809f2051420449e2b diff --git a/third_party/rust/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-dbb8172d3984e7e7d03f4b5f8bb86ecd1460eff9 b/third_party/rust/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-dbb8172d3984e7e7d03f4b5f8bb86ecd1460eff9 Binary files differnew file mode 100644 index 0000000000..aa72eb1dd6 --- /dev/null +++ b/third_party/rust/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-dbb8172d3984e7e7d03f4b5f8bb86ecd1460eff9 |