#![cfg_attr(feature = "pattern", feature(pattern))] use regex; // Due to macro scoping rules, this definition only applies for the modules // defined below. Effectively, it allows us to use the same tests for both // native and dynamic regexes. // // This is also used to test the various matching engines. This one exercises // the normal code path which automatically chooses the engine based on the // regex and the input. Other dynamic tests explicitly set the engine to use. macro_rules! regex_new { ($re:expr) => {{ use regex::Regex; Regex::new($re) }}; } macro_rules! regex { ($re:expr) => { regex_new!($re).unwrap() }; } macro_rules! regex_set_new { ($re:expr) => {{ use regex::RegexSet; RegexSet::new($re) }}; } macro_rules! regex_set { ($res:expr) => { regex_set_new!($res).unwrap() }; } // Must come before other module definitions. include!("macros_str.rs"); include!("macros.rs"); mod api; mod api_str; mod crazy; mod flags; mod fowler; mod misc; mod multiline; mod noparse; mod regression; mod regression_fuzz; mod replace; mod searcher; mod set; mod shortest_match; mod suffix_reverse; #[cfg(feature = "unicode")] mod unicode; #[cfg(feature = "unicode-perl")] mod word_boundary; #[cfg(feature = "unicode-perl")] mod word_boundary_unicode; #[test] fn disallow_non_utf8() { assert!(regex::Regex::new(r"(?-u)\xFF").is_err()); assert!(regex::Regex::new(r"(?-u).").is_err()); assert!(regex::Regex::new(r"(?-u)[\xFF]").is_err()); assert!(regex::Regex::new(r"(?-u)☃").is_err()); } #[test] fn disallow_octal() { assert!(regex::Regex::new(r"\0").is_err()); } #[test] fn allow_octal() { assert!(regex::RegexBuilder::new(r"\0").octal(true).build().is_ok()); } #[test] fn oibits() { use regex::bytes; use regex::{Regex, RegexBuilder, RegexSet, RegexSetBuilder}; use std::panic::{RefUnwindSafe, UnwindSafe}; fn assert_send() {} fn assert_sync() {} fn assert_unwind_safe() {} fn assert_ref_unwind_safe() {} assert_send::(); assert_sync::(); assert_unwind_safe::(); assert_ref_unwind_safe::(); assert_send::(); assert_sync::(); assert_unwind_safe::(); assert_ref_unwind_safe::(); assert_send::(); assert_sync::(); assert_unwind_safe::(); assert_ref_unwind_safe::(); assert_send::(); assert_sync::(); assert_unwind_safe::(); assert_ref_unwind_safe::(); assert_send::(); assert_sync::(); assert_unwind_safe::(); assert_ref_unwind_safe::(); assert_send::(); assert_sync::(); assert_unwind_safe::(); assert_ref_unwind_safe::(); assert_send::(); assert_sync::(); assert_unwind_safe::(); assert_ref_unwind_safe::(); assert_send::(); assert_sync::(); assert_unwind_safe::(); assert_ref_unwind_safe::(); } // See: https://github.com/rust-lang/regex/issues/568 #[test] fn oibits_regression() { use regex::Regex; use std::panic; let _ = panic::catch_unwind(|| Regex::new("a").unwrap()); } // See: https://github.com/rust-lang/regex/issues/750 #[test] #[cfg(target_pointer_width = "64")] fn regex_is_reasonably_small() { use std::mem::size_of; use regex::bytes; use regex::{Regex, RegexSet}; assert_eq!(16, size_of::()); assert_eq!(16, size_of::()); assert_eq!(16, size_of::()); assert_eq!(16, size_of::()); } // See: https://github.com/rust-lang/regex/security/advisories/GHSA-m5pq-gvj9-9vr8 // See: CVE-2022-24713 // // We test that our regex compiler will correctly return a "too big" error when // we try to use a very large repetition on an *empty* sub-expression. // // At the time this test was written, the regex compiler does not represent // empty sub-expressions with any bytecode instructions. In effect, it's an // "optimization" to leave them out, since they would otherwise correspond // to an unconditional JUMP in the regex bytecode (i.e., an unconditional // epsilon transition in the NFA graph). Therefore, an empty sub-expression // represents an interesting case for the compiler's size limits. Since it // doesn't actually contribute any additional memory to the compiled regex // instructions, the size limit machinery never detects it. Instead, it just // dumbly tries to compile the empty sub-expression N times, where N is the // repetition size. // // When N is very large, this will cause the compiler to essentially spin and // do nothing for a decently large amount of time. It causes the regex to take // quite a bit of time to compile, despite the concrete syntax of the regex // being quite small. // // The degree to which this is actually a problem is somewhat of a judgment // call. Some regexes simply take a long time to compile. But in general, you // should be able to reasonably control this by setting lower or higher size // limits on the compiled object size. But this mitigation doesn't work at all // for this case. // // This particular test is somewhat narrow. It merely checks that regex // compilation will, at some point, return a "too big" error. Before the // fix landed, this test would eventually fail because the regex would be // successfully compiled (after enough time elapsed). So while this test // doesn't check that we exit in a reasonable amount of time, it does at least // check that we are properly returning an error at some point. #[test] fn big_empty_regex_fails() { use regex::Regex; let result = Regex::new("(?:){4294967295}"); assert!(result.is_err()); } // Below is a "billion laughs" variant of the previous test case. #[test] fn big_empty_reps_chain_regex_fails() { use regex::Regex; let result = Regex::new("(?:){64}{64}{64}{64}{64}{64}"); assert!(result.is_err()); } // Below is another situation where a zero-length sub-expression can be // introduced. #[test] fn big_zero_reps_regex_fails() { use regex::Regex; let result = Regex::new(r"x{0}{4294967295}"); assert!(result.is_err()); } // Testing another case for completeness. #[test] fn empty_alt_regex_fails() { use regex::Regex; let result = Regex::new(r"(?:|){4294967295}"); assert!(result.is_err()); } // Regression test for: https://github.com/rust-lang/regex/issues/969 #[test] fn regression_i969() { use regex::Regex; let re = Regex::new(r"c.*d\z").unwrap(); assert_eq!(Some(6), re.shortest_match_at("ababcd", 4)); assert_eq!(Some(6), re.find_at("ababcd", 4).map(|m| m.end())); }