diff options
Diffstat (limited to 'third_party/rust/regex/tests/test_default.rs')
-rw-r--r-- | third_party/rust/regex/tests/test_default.rs | 222 |
1 files changed, 222 insertions, 0 deletions
diff --git a/third_party/rust/regex/tests/test_default.rs b/third_party/rust/regex/tests/test_default.rs new file mode 100644 index 0000000000..be627f7a68 --- /dev/null +++ b/third_party/rust/regex/tests/test_default.rs @@ -0,0 +1,222 @@ +#![cfg_attr(feature = "pattern", feature(pattern))] + +use regex; + +// Due to macro scoping rules, this definition only applies for the modules +// defined below. Effectively, it allows us to use the same tests for both +// native and dynamic regexes. +// +// This is also used to test the various matching engines. This one exercises +// the normal code path which automatically chooses the engine based on the +// regex and the input. Other dynamic tests explicitly set the engine to use. +macro_rules! regex_new { + ($re:expr) => {{ + use regex::Regex; + Regex::new($re) + }}; +} + +macro_rules! regex { + ($re:expr) => { + regex_new!($re).unwrap() + }; +} + +macro_rules! regex_set_new { + ($re:expr) => {{ + use regex::RegexSet; + RegexSet::new($re) + }}; +} + +macro_rules! regex_set { + ($res:expr) => { + regex_set_new!($res).unwrap() + }; +} + +// Must come before other module definitions. +include!("macros_str.rs"); +include!("macros.rs"); + +mod api; +mod api_str; +mod crazy; +mod flags; +mod fowler; +mod misc; +mod multiline; +mod noparse; +mod regression; +mod regression_fuzz; +mod replace; +mod searcher; +mod set; +mod shortest_match; +mod suffix_reverse; +#[cfg(feature = "unicode")] +mod unicode; +#[cfg(feature = "unicode-perl")] +mod word_boundary; +#[cfg(feature = "unicode-perl")] +mod word_boundary_unicode; + +#[test] +fn disallow_non_utf8() { + assert!(regex::Regex::new(r"(?-u)\xFF").is_err()); + assert!(regex::Regex::new(r"(?-u).").is_err()); + assert!(regex::Regex::new(r"(?-u)[\xFF]").is_err()); + assert!(regex::Regex::new(r"(?-u)☃").is_err()); +} + +#[test] +fn disallow_octal() { + assert!(regex::Regex::new(r"\0").is_err()); +} + +#[test] +fn allow_octal() { + assert!(regex::RegexBuilder::new(r"\0").octal(true).build().is_ok()); +} + +#[test] +fn oibits() { + use regex::bytes; + use regex::{Regex, RegexBuilder, RegexSet, RegexSetBuilder}; + use std::panic::{RefUnwindSafe, UnwindSafe}; + + fn assert_send<T: Send>() {} + fn assert_sync<T: Sync>() {} + fn assert_unwind_safe<T: UnwindSafe>() {} + fn assert_ref_unwind_safe<T: RefUnwindSafe>() {} + + assert_send::<Regex>(); + assert_sync::<Regex>(); + assert_unwind_safe::<Regex>(); + assert_ref_unwind_safe::<Regex>(); + assert_send::<RegexBuilder>(); + assert_sync::<RegexBuilder>(); + assert_unwind_safe::<RegexBuilder>(); + assert_ref_unwind_safe::<RegexBuilder>(); + + assert_send::<bytes::Regex>(); + assert_sync::<bytes::Regex>(); + assert_unwind_safe::<bytes::Regex>(); + assert_ref_unwind_safe::<bytes::Regex>(); + assert_send::<bytes::RegexBuilder>(); + assert_sync::<bytes::RegexBuilder>(); + assert_unwind_safe::<bytes::RegexBuilder>(); + assert_ref_unwind_safe::<bytes::RegexBuilder>(); + + assert_send::<RegexSet>(); + assert_sync::<RegexSet>(); + assert_unwind_safe::<RegexSet>(); + assert_ref_unwind_safe::<RegexSet>(); + assert_send::<RegexSetBuilder>(); + assert_sync::<RegexSetBuilder>(); + assert_unwind_safe::<RegexSetBuilder>(); + assert_ref_unwind_safe::<RegexSetBuilder>(); + + assert_send::<bytes::RegexSet>(); + assert_sync::<bytes::RegexSet>(); + assert_unwind_safe::<bytes::RegexSet>(); + assert_ref_unwind_safe::<bytes::RegexSet>(); + assert_send::<bytes::RegexSetBuilder>(); + assert_sync::<bytes::RegexSetBuilder>(); + assert_unwind_safe::<bytes::RegexSetBuilder>(); + assert_ref_unwind_safe::<bytes::RegexSetBuilder>(); +} + +// See: https://github.com/rust-lang/regex/issues/568 +#[test] +fn oibits_regression() { + use regex::Regex; + use std::panic; + + let _ = panic::catch_unwind(|| Regex::new("a").unwrap()); +} + +// See: https://github.com/rust-lang/regex/issues/750 +#[test] +#[cfg(target_pointer_width = "64")] +fn regex_is_reasonably_small() { + use std::mem::size_of; + + use regex::bytes; + use regex::{Regex, RegexSet}; + + assert_eq!(16, size_of::<Regex>()); + assert_eq!(16, size_of::<RegexSet>()); + assert_eq!(16, size_of::<bytes::Regex>()); + assert_eq!(16, size_of::<bytes::RegexSet>()); +} + +// See: https://github.com/rust-lang/regex/security/advisories/GHSA-m5pq-gvj9-9vr8 +// See: CVE-2022-24713 +// +// We test that our regex compiler will correctly return a "too big" error when +// we try to use a very large repetition on an *empty* sub-expression. +// +// At the time this test was written, the regex compiler does not represent +// empty sub-expressions with any bytecode instructions. In effect, it's an +// "optimization" to leave them out, since they would otherwise correspond +// to an unconditional JUMP in the regex bytecode (i.e., an unconditional +// epsilon transition in the NFA graph). Therefore, an empty sub-expression +// represents an interesting case for the compiler's size limits. Since it +// doesn't actually contribute any additional memory to the compiled regex +// instructions, the size limit machinery never detects it. Instead, it just +// dumbly tries to compile the empty sub-expression N times, where N is the +// repetition size. +// +// When N is very large, this will cause the compiler to essentially spin and +// do nothing for a decently large amount of time. It causes the regex to take +// quite a bit of time to compile, despite the concrete syntax of the regex +// being quite small. +// +// The degree to which this is actually a problem is somewhat of a judgment +// call. Some regexes simply take a long time to compile. But in general, you +// should be able to reasonably control this by setting lower or higher size +// limits on the compiled object size. But this mitigation doesn't work at all +// for this case. +// +// This particular test is somewhat narrow. It merely checks that regex +// compilation will, at some point, return a "too big" error. Before the +// fix landed, this test would eventually fail because the regex would be +// successfully compiled (after enough time elapsed). So while this test +// doesn't check that we exit in a reasonable amount of time, it does at least +// check that we are properly returning an error at some point. +#[test] +fn big_empty_regex_fails() { + use regex::Regex; + + let result = Regex::new("(?:){4294967295}"); + assert!(result.is_err()); +} + +// Below is a "billion laughs" variant of the previous test case. +#[test] +fn big_empty_reps_chain_regex_fails() { + use regex::Regex; + + let result = Regex::new("(?:){64}{64}{64}{64}{64}{64}"); + assert!(result.is_err()); +} + +// Below is another situation where a zero-length sub-expression can be +// introduced. +#[test] +fn big_zero_reps_regex_fails() { + use regex::Regex; + + let result = Regex::new(r"x{0}{4294967295}"); + assert!(result.is_err()); +} + +// Testing another case for completeness. +#[test] +fn empty_alt_regex_fails() { + use regex::Regex; + + let result = Regex::new(r"(?:|){4294967295}"); + assert!(result.is_err()); +} |