diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 12:47:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 12:47:55 +0000 |
commit | 2aadc03ef15cb5ca5cc2af8a7c08e070742f0ac4 (patch) | |
tree | 033cc839730fda84ff08db877037977be94e5e3a /vendor/regex/tests | |
parent | Initial commit. (diff) | |
download | cargo-2aadc03ef15cb5ca5cc2af8a7c08e070742f0ac4.tar.xz cargo-2aadc03ef15cb5ca5cc2af8a7c08e070742f0ac4.zip |
Adding upstream version 0.70.1+ds1.upstream/0.70.1+ds1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/regex/tests')
21 files changed, 1178 insertions, 0 deletions
diff --git a/vendor/regex/tests/fuzz/mod.rs b/vendor/regex/tests/fuzz/mod.rs new file mode 100644 index 0000000..88c196a --- /dev/null +++ b/vendor/regex/tests/fuzz/mod.rs @@ -0,0 +1,166 @@ +// This set of tests is different from regression_fuzz in that the tests start +// from the fuzzer data directly. The test essentially duplicates the fuzz +// target. I wonder if there's a better way to set this up... Hmmm. I bet +// `cargo fuzz` has something where it can run a target against crash files and +// verify that they pass. + +// This case found by the fuzzer causes the meta engine to use the "reverse +// inner" literal strategy. That in turn uses a specialized search routine +// for the lazy DFA in order to avoid worst case quadratic behavior. That +// specialized search routine had a bug where it assumed that start state +// specialization was disabled. But this is indeed not the case, since it +// reuses the "general" lazy DFA for the full regex created as part of the core +// strategy, which might very well have start states specialized due to the +// existence of a prefilter. +// +// This is a somewhat weird case because if the core engine has a prefilter, +// then it's usually the case that the "reverse inner" optimization won't be +// pursued in that case. But there are some heuristics that try to detect +// whether a prefilter is "fast" or not. If it's not, then the meta engine will +// attempt the reverse inner optimization. And indeed, that's what happens +// here. So the reverse inner optimization ends up with a lazy DFA that has +// start states specialized. Ideally this wouldn't happen because specializing +// start states without a prefilter inside the DFA can be disastrous for +// performance by causing the DFA to ping-pong in and out of the special state +// handling. In this case, it's probably not a huge deal because the lazy +// DFA is only used for part of the matching where as the work horse is the +// prefilter found by the reverse inner optimization. +// +// We could maybe fix this by refactoring the meta engine to be a little more +// careful. For example, by attempting the optimizations before building the +// core engine. But this is perhaps a little tricky. +#[test] +fn meta_stopat_specialize_start_states() { + let data = include_bytes!( + "testdata/crash-8760b19b25d74e3603d4c643e9c7404fdd3631f9", + ); + let _ = run(data); +} + +// Same bug as meta_stopat_specialize_start_states, but minimized by the +// fuzzer. +#[test] +fn meta_stopat_specialize_start_states_min() { + let data = include_bytes!( + "testdata/minimized-from-8760b19b25d74e3603d4c643e9c7404fdd3631f9", + ); + let _ = run(data); +} + +// This input generated a pattern with a fail state (e.g., \P{any}, [^\s\S] +// or [a&&b]). But the fail state was in a branch, where a subsequent branch +// should have led to an overall match, but handling of the fail state +// prevented it from doing so. A hand-minimized version of this is '[^\s\S]A|B' +// on the haystack 'B'. That should yield a match of 'B'. +// +// The underlying cause was an issue in how DFA determinization handled fail +// states. The bug didn't impact the PikeVM or the bounded backtracker. +#[test] +fn fail_branch_prevents_match() { + let data = include_bytes!( + "testdata/crash-cd33b13df59ea9d74503986f9d32a270dd43cc04", + ); + let _ = run(data); +} + +// This input generated a pattern that contained a sub-expression like this: +// +// a{0}{50000} +// +// This turned out to provoke quadratic behavior in the NFA compiler. +// Basically, the NFA compiler works in two phases. The first phase builds +// a more complicated-but-simpler-to-construct sequence of NFA states that +// includes unconditional epsilon transitions. As part of converting this +// sequence to the "final" NFA, we remove those unconditional espilon +// transition. The code responsible for doing this follows every chain of +// these transitions and remaps the state IDs. The way we were doing this +// before resulted in re-following every subsequent part of the chain for each +// state in the chain, which ended up being quadratic behavior. We effectively +// memoized this, which fixed the performance bug. +#[test] +fn slow_big_empty_chain() { + let data = include_bytes!( + "testdata/slow-unit-9ca9cc9929fee1fcbb847a78384effb8b98ea18a", + ); + let _ = run(data); +} + +// A different case of slow_big_empty_chain. +#[test] +fn slow_big_empty_chain2() { + let data = include_bytes!( + "testdata/slow-unit-3ab758ea520027fefd3f00e1384d9aeef155739e", + ); + let _ = run(data); +} + +// A different case of slow_big_empty_chain. +#[test] +fn slow_big_empty_chain3() { + let data = include_bytes!( + "testdata/slow-unit-b8a052f4254802edbe5f569b6ce6e9b6c927e9d6", + ); + let _ = run(data); +} + +// A different case of slow_big_empty_chain. +#[test] +fn slow_big_empty_chain4() { + let data = include_bytes!( + "testdata/slow-unit-93c73a43581f205f9aaffd9c17e52b34b17becd0", + ); + let _ = run(data); +} + +// A different case of slow_big_empty_chain. +#[test] +fn slow_big_empty_chain5() { + let data = include_bytes!( + "testdata/slow-unit-5345fccadf3812c53c3ccc7af5aa2741b7b2106c", + ); + let _ = run(data); +} + +// A different case of slow_big_empty_chain. +#[test] +fn slow_big_empty_chain6() { + let data = include_bytes!( + "testdata/slow-unit-6bd643eec330166e4ada91da2d3f284268481085", + ); + let _ = run(data); +} + +// This fuzz input generated a pattern with a large repetition that would fail +// NFA compilation, but its HIR was small. (HIR doesn't expand repetitions.) +// But, the bounds were high enough that the minimum length calculation +// overflowed. We fixed this by using saturating arithmetic (and also checked +// arithmetic for the maximum length calculation). +// +// Incidentally, this was the only unguarded arithmetic operation performed in +// the HIR smart constructors. And the fuzzer found it. Hah. Nice. +#[test] +fn minimum_len_overflow() { + let data = include_bytes!( + "testdata/crash-7eb3351f0965e5d6c1cb98aa8585949ef96531ff", + ); + let _ = run(data); +} + +// This is the fuzz target function. We duplicate it here since this is the +// thing we use to interpret the data. It is ultimately what we want to +// succeed. +fn run(data: &[u8]) -> Option<()> { + if data.len() < 2 { + return None; + } + let mut split_at = usize::from(data[0]); + let data = std::str::from_utf8(&data[1..]).ok()?; + // Split data into a regex and haystack to search. + let len = usize::try_from(data.chars().count()).ok()?; + split_at = std::cmp::max(split_at, 1) % len; + let char_index = data.char_indices().nth(split_at)?.0; + let (pattern, input) = data.split_at(char_index); + let re = regex::Regex::new(pattern).ok()?; + re.is_match(input); + Some(()) +} diff --git a/vendor/regex/tests/fuzz/testdata/crash-7eb3351f0965e5d6c1cb98aa8585949ef96531ff b/vendor/regex/tests/fuzz/testdata/crash-7eb3351f0965e5d6c1cb98aa8585949ef96531ff Binary files differnew file mode 100644 index 0000000..f7ffbc9 --- /dev/null +++ b/vendor/regex/tests/fuzz/testdata/crash-7eb3351f0965e5d6c1cb98aa8585949ef96531ff diff --git a/vendor/regex/tests/fuzz/testdata/crash-8760b19b25d74e3603d4c643e9c7404fdd3631f9 b/vendor/regex/tests/fuzz/testdata/crash-8760b19b25d74e3603d4c643e9c7404fdd3631f9 Binary files differnew file mode 100644 index 0000000..8674819 --- /dev/null +++ b/vendor/regex/tests/fuzz/testdata/crash-8760b19b25d74e3603d4c643e9c7404fdd3631f9 diff --git a/vendor/regex/tests/fuzz/testdata/crash-cd33b13df59ea9d74503986f9d32a270dd43cc04 b/vendor/regex/tests/fuzz/testdata/crash-cd33b13df59ea9d74503986f9d32a270dd43cc04 Binary files differnew file mode 100644 index 0000000..152769d --- /dev/null +++ b/vendor/regex/tests/fuzz/testdata/crash-cd33b13df59ea9d74503986f9d32a270dd43cc04 diff --git a/vendor/regex/tests/fuzz/testdata/minimized-from-8760b19b25d74e3603d4c643e9c7404fdd3631f9 b/vendor/regex/tests/fuzz/testdata/minimized-from-8760b19b25d74e3603d4c643e9c7404fdd3631f9 Binary files differnew file mode 100644 index 0000000..69663d5 --- /dev/null +++ b/vendor/regex/tests/fuzz/testdata/minimized-from-8760b19b25d74e3603d4c643e9c7404fdd3631f9 diff --git a/vendor/regex/tests/fuzz/testdata/slow-unit-3ab758ea520027fefd3f00e1384d9aeef155739e b/vendor/regex/tests/fuzz/testdata/slow-unit-3ab758ea520027fefd3f00e1384d9aeef155739e Binary files differnew file mode 100644 index 0000000..6c22803 --- /dev/null +++ b/vendor/regex/tests/fuzz/testdata/slow-unit-3ab758ea520027fefd3f00e1384d9aeef155739e diff --git a/vendor/regex/tests/fuzz/testdata/slow-unit-5345fccadf3812c53c3ccc7af5aa2741b7b2106c b/vendor/regex/tests/fuzz/testdata/slow-unit-5345fccadf3812c53c3ccc7af5aa2741b7b2106c Binary files differnew file mode 100644 index 0000000..0570f32 --- /dev/null +++ b/vendor/regex/tests/fuzz/testdata/slow-unit-5345fccadf3812c53c3ccc7af5aa2741b7b2106c diff --git a/vendor/regex/tests/fuzz/testdata/slow-unit-6bd643eec330166e4ada91da2d3f284268481085 b/vendor/regex/tests/fuzz/testdata/slow-unit-6bd643eec330166e4ada91da2d3f284268481085 Binary files differnew file mode 100644 index 0000000..182bc7f --- /dev/null +++ b/vendor/regex/tests/fuzz/testdata/slow-unit-6bd643eec330166e4ada91da2d3f284268481085 diff --git a/vendor/regex/tests/fuzz/testdata/slow-unit-93c73a43581f205f9aaffd9c17e52b34b17becd0 b/vendor/regex/tests/fuzz/testdata/slow-unit-93c73a43581f205f9aaffd9c17e52b34b17becd0 Binary files differnew file mode 100644 index 0000000..f939c33 --- /dev/null +++ b/vendor/regex/tests/fuzz/testdata/slow-unit-93c73a43581f205f9aaffd9c17e52b34b17becd0 diff --git a/vendor/regex/tests/fuzz/testdata/slow-unit-9ca9cc9929fee1fcbb847a78384effb8b98ea18a b/vendor/regex/tests/fuzz/testdata/slow-unit-9ca9cc9929fee1fcbb847a78384effb8b98ea18a Binary files differnew file mode 100644 index 0000000..a87de23 --- /dev/null +++ b/vendor/regex/tests/fuzz/testdata/slow-unit-9ca9cc9929fee1fcbb847a78384effb8b98ea18a diff --git a/vendor/regex/tests/fuzz/testdata/slow-unit-b8a052f4254802edbe5f569b6ce6e9b6c927e9d6 b/vendor/regex/tests/fuzz/testdata/slow-unit-b8a052f4254802edbe5f569b6ce6e9b6c927e9d6 Binary files differnew file mode 100644 index 0000000..dc33293 --- /dev/null +++ b/vendor/regex/tests/fuzz/testdata/slow-unit-b8a052f4254802edbe5f569b6ce6e9b6c927e9d6 diff --git a/vendor/regex/tests/lib.rs b/vendor/regex/tests/lib.rs new file mode 100644 index 0000000..5f45d16 --- /dev/null +++ b/vendor/regex/tests/lib.rs @@ -0,0 +1,58 @@ +#![cfg_attr(feature = "pattern", feature(pattern))] + +mod fuzz; +mod misc; +mod regression; +mod regression_fuzz; +mod replace; +#[cfg(feature = "pattern")] +mod searcher; +mod suite_bytes; +mod suite_bytes_set; +mod suite_string; +mod suite_string_set; + +const BLACKLIST: &[&str] = &[ + // Nothing to blacklist yet! +]; + +/*fn suite() -> anyhow::Result<regex_test::RegexTests> { + let _ = env_logger::try_init(); + + let mut tests = regex_test::RegexTests::new(); + macro_rules! load { + ($name:expr) => {{ + const DATA: &[u8] = + include_bytes!(concat!("../testdata/", $name, ".toml")); + tests.load_slice($name, DATA)?; + }}; + } + + load!("anchored"); + load!("bytes"); + load!("crazy"); + load!("crlf"); + load!("earliest"); + load!("empty"); + load!("expensive"); + load!("flags"); + load!("iter"); + load!("leftmost-all"); + load!("line-terminator"); + load!("misc"); + load!("multiline"); + load!("no-unicode"); + load!("overlapping"); + load!("regression"); + load!("set"); + load!("substring"); + load!("unicode"); + load!("utf8"); + load!("word-boundary"); + load!("word-boundary-special"); + load!("fowler/basic"); + load!("fowler/nullsubexpr"); + load!("fowler/repetition"); + + Ok(tests) +}*/ diff --git a/vendor/regex/tests/misc.rs b/vendor/regex/tests/misc.rs new file mode 100644 index 0000000..91e7d28 --- /dev/null +++ b/vendor/regex/tests/misc.rs @@ -0,0 +1,143 @@ +use regex::Regex; + +macro_rules! regex { + ($pattern:expr) => { + regex::Regex::new($pattern).unwrap() + }; +} + +#[test] +fn unclosed_group_error() { + let err = Regex::new(r"(").unwrap_err(); + let msg = err.to_string(); + assert!(msg.contains("unclosed group"), "error message: {:?}", msg); +} + +#[test] +fn regex_string() { + assert_eq!(r"[a-zA-Z0-9]+", regex!(r"[a-zA-Z0-9]+").as_str()); + assert_eq!(r"[a-zA-Z0-9]+", &format!("{}", regex!(r"[a-zA-Z0-9]+"))); + assert_eq!( + r#"Regex("[a-zA-Z0-9]+")"#, + &format!("{:?}", regex!(r"[a-zA-Z0-9]+")) + ); +} + +#[test] +fn capture_names() { + let re = regex!(r"(.)(?P<a>.)"); + assert_eq!(3, re.captures_len()); + assert_eq!((3, Some(3)), re.capture_names().size_hint()); + assert_eq!( + vec![None, None, Some("a")], + re.capture_names().collect::<Vec<_>>() + ); +} + +#[test] +fn capture_index() { + let re = regex!(r"^(?P<name>.+)$"); + let cap = re.captures("abc").unwrap(); + assert_eq!(&cap[0], "abc"); + assert_eq!(&cap[1], "abc"); + assert_eq!(&cap["name"], "abc"); +} + +#[test] +#[should_panic] +fn capture_index_panic_usize() { + let re = regex!(r"^(?P<name>.+)$"); + let cap = re.captures("abc").unwrap(); + let _ = cap[2]; +} + +#[test] +#[should_panic] +fn capture_index_panic_name() { + let re = regex!(r"^(?P<name>.+)$"); + let cap = re.captures("abc").unwrap(); + let _ = cap["bad name"]; +} + +#[test] +fn capture_index_lifetime() { + // This is a test of whether the types on `caps["..."]` are general + // enough. If not, this will fail to typecheck. + fn inner(s: &str) -> usize { + let re = regex!(r"(?P<number>[0-9]+)"); + let caps = re.captures(s).unwrap(); + caps["number"].len() + } + assert_eq!(3, inner("123")); +} + +#[test] +fn capture_misc() { + let re = regex!(r"(.)(?P<a>a)?(.)(?P<b>.)"); + let cap = re.captures("abc").unwrap(); + + assert_eq!(5, cap.len()); + + assert_eq!((0, 3), { + let m = cap.get(0).unwrap(); + (m.start(), m.end()) + }); + assert_eq!(None, cap.get(2)); + assert_eq!((2, 3), { + let m = cap.get(4).unwrap(); + (m.start(), m.end()) + }); + + assert_eq!("abc", cap.get(0).unwrap().as_str()); + assert_eq!(None, cap.get(2)); + assert_eq!("c", cap.get(4).unwrap().as_str()); + + assert_eq!(None, cap.name("a")); + assert_eq!("c", cap.name("b").unwrap().as_str()); +} + +#[test] +fn sub_capture_matches() { + let re = regex!(r"([a-z])(([a-z])|([0-9]))"); + let cap = re.captures("a5").unwrap(); + let subs: Vec<_> = cap.iter().collect(); + + assert_eq!(5, subs.len()); + assert!(subs[0].is_some()); + assert!(subs[1].is_some()); + assert!(subs[2].is_some()); + assert!(subs[3].is_none()); + assert!(subs[4].is_some()); + + assert_eq!("a5", subs[0].unwrap().as_str()); + assert_eq!("a", subs[1].unwrap().as_str()); + assert_eq!("5", subs[2].unwrap().as_str()); + assert_eq!("5", subs[4].unwrap().as_str()); +} + +// Test that the DFA can handle pathological cases. (This should result in the +// DFA's cache being flushed too frequently, which should cause it to quit and +// fall back to the NFA algorithm.) +#[test] +fn dfa_handles_pathological_case() { + fn ones_and_zeroes(count: usize) -> String { + let mut s = String::new(); + for i in 0..count { + if i % 3 == 0 { + s.push('1'); + } else { + s.push('0'); + } + } + s + } + + let re = regex!(r"[01]*1[01]{20}$"); + let text = { + let mut pieces = ones_and_zeroes(100_000); + pieces.push('1'); + pieces.push_str(&ones_and_zeroes(20)); + pieces + }; + assert!(re.is_match(&text)); +} diff --git a/vendor/regex/tests/regression.rs b/vendor/regex/tests/regression.rs new file mode 100644 index 0000000..a586701 --- /dev/null +++ b/vendor/regex/tests/regression.rs @@ -0,0 +1,94 @@ +use regex::Regex; + +macro_rules! regex { + ($pattern:expr) => { + regex::Regex::new($pattern).unwrap() + }; +} + +// See: https://github.com/rust-lang/regex/issues/48 +#[test] +fn invalid_regexes_no_crash() { + assert!(Regex::new("(*)").is_err()); + assert!(Regex::new("(?:?)").is_err()); + assert!(Regex::new("(?)").is_err()); + assert!(Regex::new("*").is_err()); +} + +// See: https://github.com/rust-lang/regex/issues/98 +#[test] +fn regression_many_repeat_stack_overflow() { + let re = regex!("^.{1,2500}"); + assert_eq!( + vec![0..1], + re.find_iter("a").map(|m| m.range()).collect::<Vec<_>>() + ); +} + +// See: https://github.com/rust-lang/regex/issues/555 +#[test] +fn regression_invalid_repetition_expr() { + assert!(Regex::new("(?m){1,1}").is_err()); +} + +// See: https://github.com/rust-lang/regex/issues/527 +#[test] +fn regression_invalid_flags_expression() { + assert!(Regex::new("(((?x)))").is_ok()); +} + +// See: https://github.com/rust-lang/regex/issues/129 +#[test] +fn regression_captures_rep() { + let re = regex!(r"([a-f]){2}(?P<foo>[x-z])"); + let caps = re.captures("abx").unwrap(); + assert_eq!(&caps["foo"], "x"); +} + +// See: https://github.com/BurntSushi/ripgrep/issues/1247 +#[cfg(feature = "unicode-perl")] +#[test] +fn regression_nfa_stops1() { + let re = regex::bytes::Regex::new(r"\bs(?:[ab])").unwrap(); + assert_eq!(0, re.find_iter(b"s\xE4").count()); +} + +// See: https://github.com/rust-lang/regex/issues/981 +#[cfg(feature = "unicode")] +#[test] +fn regression_bad_word_boundary() { + let re = regex!(r#"(?i:(?:\b|_)win(?:32|64|dows)?(?:\b|_))"#); + let hay = "ubi-Darwin-x86_64.tar.gz"; + assert!(!re.is_match(hay)); + let hay = "ubi-Windows-x86_64.zip"; + assert!(re.is_match(hay)); +} + +// See: https://github.com/rust-lang/regex/issues/982 +#[cfg(feature = "unicode-perl")] +#[test] +fn regression_unicode_perl_not_enabled() { + let pat = r"(\d+\s?(years|year|y))?\s?(\d+\s?(months|month|m))?\s?(\d+\s?(weeks|week|w))?\s?(\d+\s?(days|day|d))?\s?(\d+\s?(hours|hour|h))?"; + assert!(Regex::new(pat).is_ok()); +} + +// See: https://github.com/rust-lang/regex/issues/995 +#[test] +fn regression_big_regex_overflow() { + let pat = r" {2147483516}{2147483416}{5}"; + assert!(Regex::new(pat).is_err()); +} + +// See: https://github.com/rust-lang/regex/issues/999 +#[test] +fn regression_complete_literals_suffix_incorrect() { + let needles = vec![ + "aA", "bA", "cA", "dA", "eA", "fA", "gA", "hA", "iA", "jA", "kA", + "lA", "mA", "nA", "oA", "pA", "qA", "rA", "sA", "tA", "uA", "vA", + "wA", "xA", "yA", "zA", + ]; + let pattern = needles.join("|"); + let re = regex!(&pattern); + let hay = "FUBAR"; + assert_eq!(0, re.find_iter(hay).count()); +} diff --git a/vendor/regex/tests/regression_fuzz.rs b/vendor/regex/tests/regression_fuzz.rs new file mode 100644 index 0000000..f90ad4c --- /dev/null +++ b/vendor/regex/tests/regression_fuzz.rs @@ -0,0 +1,61 @@ +// These tests are only run for the "default" test target because some of them +// can take quite a long time. Some of them take long enough that it's not +// practical to run them in debug mode. :-/ + +use regex::Regex; + +macro_rules! regex { + ($pattern:expr) => { + regex::Regex::new($pattern).unwrap() + }; +} + +// See: https://oss-fuzz.com/testcase-detail/5673225499181056 +// +// Ignored by default since it takes too long in debug mode (almost a minute). +#[test] +#[ignore] +fn fuzz1() { + regex!(r"1}{55}{0}*{1}{55}{55}{5}*{1}{55}+{56}|;**"); +} + +// See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=26505 +// See: https://github.com/rust-lang/regex/issues/722 +#[test] +#[cfg(feature = "unicode")] +fn empty_any_errors_no_panic() { + assert!(Regex::new(r"\P{any}").is_ok()); +} + +// This tests that a very large regex errors during compilation instead of +// using gratuitous amounts of memory. The specific problem is that the +// compiler wasn't accounting for the memory used by Unicode character classes +// correctly. +// +// See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=33579 +#[test] +fn big_regex_fails_to_compile() { + let pat = "[\u{0}\u{e}\u{2}\\w~~>[l\t\u{0}]p?<]{971158}"; + assert!(Regex::new(pat).is_err()); +} + +// This was caught while on master but before a release went out(!). +// +// See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=58173 +#[test] +fn todo() { + let pat = "(?:z|xx)@|xx"; + assert!(Regex::new(pat).is_ok()); +} + +// This was caused by the fuzzer, and then minimized by hand. +// +// This was caused by a bug in DFA determinization that mishandled NFA fail +// states. +#[test] +fn fail_branch_prevents_match() { + let pat = r".*[a&&b]A|B"; + let hay = "B"; + let re = Regex::new(pat).unwrap(); + assert!(re.is_match(hay)); +} diff --git a/vendor/regex/tests/replace.rs b/vendor/regex/tests/replace.rs new file mode 100644 index 0000000..f26ae46 --- /dev/null +++ b/vendor/regex/tests/replace.rs @@ -0,0 +1,183 @@ +macro_rules! replace( + ($name:ident, $which:ident, $re:expr, + $search:expr, $replace:expr, $result:expr) => ( + #[test] + fn $name() { + let re = regex::Regex::new($re).unwrap(); + assert_eq!(re.$which($search, $replace), $result); + } + ); +); + +replace!(first, replace, r"[0-9]", "age: 26", "Z", "age: Z6"); +replace!(plus, replace, r"[0-9]+", "age: 26", "Z", "age: Z"); +replace!(all, replace_all, r"[0-9]", "age: 26", "Z", "age: ZZ"); +replace!(groups, replace, r"([^ ]+)[ ]+([^ ]+)", "w1 w2", "$2 $1", "w2 w1"); +replace!( + double_dollar, + replace, + r"([^ ]+)[ ]+([^ ]+)", + "w1 w2", + "$2 $$1", + "w2 $1" +); +// replace!(adjacent_index, replace, +// r"([^aeiouy])ies$", "skies", "$1y", "sky"); +replace!( + named, + replace_all, + r"(?P<first>[^ ]+)[ ]+(?P<last>[^ ]+)(?P<space>[ ]*)", + "w1 w2 w3 w4", + "$last $first$space", + "w2 w1 w4 w3" +); +replace!( + trim, + replace_all, + "^[ \t]+|[ \t]+$", + " \t trim me\t \t", + "", + "trim me" +); +replace!(number_hyphen, replace, r"(.)(.)", "ab", "$1-$2", "a-b"); +// replace!(number_underscore, replace, r"(.)(.)", "ab", "$1_$2", "a_b"); +replace!( + simple_expand, + replace_all, + r"([a-z]) ([a-z])", + "a b", + "$2 $1", + "b a" +); +replace!( + literal_dollar1, + replace_all, + r"([a-z]+) ([a-z]+)", + "a b", + "$$1", + "$1" +); +replace!( + literal_dollar2, + replace_all, + r"([a-z]+) ([a-z]+)", + "a b", + "$2 $$c $1", + "b $c a" +); +replace!( + no_expand1, + replace, + r"([^ ]+)[ ]+([^ ]+)", + "w1 w2", + regex::NoExpand("$2 $1"), + "$2 $1" +); +replace!( + no_expand2, + replace, + r"([^ ]+)[ ]+([^ ]+)", + "w1 w2", + regex::NoExpand("$$1"), + "$$1" +); +replace!( + closure_returning_reference, + replace, + r"([0-9]+)", + "age: 26", + |captures: ®ex::Captures<'_>| { captures[1][0..1].to_owned() }, + "age: 2" +); +replace!( + closure_returning_value, + replace, + r"[0-9]+", + "age: 26", + |_captures: ®ex::Captures<'_>| "Z".to_owned(), + "age: Z" +); + +// See https://github.com/rust-lang/regex/issues/314 +replace!( + match_at_start_replace_with_empty, + replace_all, + r"foo", + "foobar", + "", + "bar" +); + +// See https://github.com/rust-lang/regex/issues/393 +replace!(single_empty_match, replace, r"^", "bar", "foo", "foobar"); + +// See https://github.com/rust-lang/regex/issues/399 +replace!( + capture_longest_possible_name, + replace_all, + r"(.)", + "b", + "${1}a $1a", + "ba " +); + +replace!( + impl_string, + replace, + r"[0-9]", + "age: 26", + "Z".to_string(), + "age: Z6" +); +replace!( + impl_string_ref, + replace, + r"[0-9]", + "age: 26", + &"Z".to_string(), + "age: Z6" +); +replace!( + impl_cow_str_borrowed, + replace, + r"[0-9]", + "age: 26", + std::borrow::Cow::<'_, str>::Borrowed("Z"), + "age: Z6" +); +replace!( + impl_cow_str_borrowed_ref, + replace, + r"[0-9]", + "age: 26", + &std::borrow::Cow::<'_, str>::Borrowed("Z"), + "age: Z6" +); +replace!( + impl_cow_str_owned, + replace, + r"[0-9]", + "age: 26", + std::borrow::Cow::<'_, str>::Owned("Z".to_string()), + "age: Z6" +); +replace!( + impl_cow_str_owned_ref, + replace, + r"[0-9]", + "age: 26", + &std::borrow::Cow::<'_, str>::Owned("Z".to_string()), + "age: Z6" +); + +#[test] +fn replacen_no_captures() { + let re = regex::Regex::new(r"[0-9]").unwrap(); + assert_eq!(re.replacen("age: 1234", 2, "Z"), "age: ZZ34"); +} + +#[test] +fn replacen_with_captures() { + let re = regex::Regex::new(r"([0-9])").unwrap(); + assert_eq!(re.replacen("age: 1234", 2, "${1}Z"), "age: 1Z2Z34"); +} diff --git a/vendor/regex/tests/searcher.rs b/vendor/regex/tests/searcher.rs new file mode 100644 index 0000000..0a33104 --- /dev/null +++ b/vendor/regex/tests/searcher.rs @@ -0,0 +1,101 @@ +macro_rules! searcher { + ($name:ident, $re:expr, $haystack:expr) => ( + searcher!($name, $re, $haystack, vec vec![]); + ); + ($name:ident, $re:expr, $haystack:expr, $($steps:expr,)*) => ( + searcher!($name, $re, $haystack, vec vec![$($steps),*]); + ); + ($name:ident, $re:expr, $haystack:expr, $($steps:expr),*) => ( + searcher!($name, $re, $haystack, vec vec![$($steps),*]); + ); + ($name:ident, $re:expr, $haystack:expr, vec $expect_steps:expr) => ( + #[test] + #[allow(unused_imports)] + fn $name() { + use std::str::pattern::{Pattern, Searcher}; + use std::str::pattern::SearchStep::{Match, Reject, Done}; + let re = regex::Regex::new($re).unwrap(); + let mut se = re.into_searcher($haystack); + let mut got_steps = vec![]; + loop { + match se.next() { + Done => break, + step => { got_steps.push(step); } + } + } + assert_eq!(got_steps, $expect_steps); + } + ); +} + +searcher!(searcher_empty_regex_empty_haystack, r"", "", Match(0, 0)); +searcher!( + searcher_empty_regex, + r"", + "ab", + Match(0, 0), + Reject(0, 1), + Match(1, 1), + Reject(1, 2), + Match(2, 2) +); +#[cfg(feature = "unicode-perl")] +searcher!(searcher_empty_haystack, r"\d", ""); +#[cfg(feature = "unicode-perl")] +searcher!(searcher_one_match, r"\d", "5", Match(0, 1)); +#[cfg(feature = "unicode-perl")] +searcher!(searcher_no_match, r"\d", "a", Reject(0, 1)); +#[cfg(feature = "unicode-perl")] +searcher!( + searcher_two_adjacent_matches, + r"\d", + "56", + Match(0, 1), + Match(1, 2) +); +#[cfg(feature = "unicode-perl")] +searcher!( + searcher_two_non_adjacent_matches, + r"\d", + "5a6", + Match(0, 1), + Reject(1, 2), + Match(2, 3) +); +#[cfg(feature = "unicode-perl")] +searcher!(searcher_reject_first, r"\d", "a6", Reject(0, 1), Match(1, 2)); +#[cfg(feature = "unicode-perl")] +searcher!( + searcher_one_zero_length_matches, + r"\d*", + "a1b2", + Match(0, 0), // ^ + Reject(0, 1), // a + Match(1, 2), // a1 + Reject(2, 3), // a1b + Match(3, 4), // a1b2 +); +#[cfg(feature = "unicode-perl")] +searcher!( + searcher_many_zero_length_matches, + r"\d*", + "a1bbb2", + Match(0, 0), // ^ + Reject(0, 1), // a + Match(1, 2), // a1 + Reject(2, 3), // a1b + Match(3, 3), // a1bb + Reject(3, 4), // a1bb + Match(4, 4), // a1bbb + Reject(4, 5), // a1bbb + Match(5, 6), // a1bbba +); +searcher!( + searcher_unicode, + r".+?", + "Ⅰ1Ⅱ2", + Match(0, 3), + Match(3, 4), + Match(4, 7), + Match(7, 8) +); diff --git a/vendor/regex/tests/suite_bytes.rs b/vendor/regex/tests/suite_bytes.rs new file mode 100644 index 0000000..9a71e45 --- /dev/null +++ b/vendor/regex/tests/suite_bytes.rs @@ -0,0 +1,108 @@ +use { + anyhow::Result, + regex::bytes::{Regex, RegexBuilder}, + /*regex_test::{ + CompiledRegex, Match, RegexTest, Span, TestResult, TestRunner, + },*/ +}; + +/* /// Tests the default configuration of the hybrid NFA/DFA. +#[test] +fn default() -> Result<()> { + let mut runner = TestRunner::new()?; + runner + .expand(&["is_match", "find", "captures"], |test| test.compiles()) + .blacklist_iter(super::BLACKLIST) + .test_iter(crate::suite()?.iter(), compiler) + .assert(); + Ok(()) +} + +fn run_test(re: &Regex, test: &RegexTest) -> TestResult { + match test.additional_name() { + "is_match" => TestResult::matched(re.is_match(test.haystack())), + "find" => TestResult::matches( + re.find_iter(test.haystack()) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|m| Match { + id: 0, + span: Span { start: m.start(), end: m.end() }, + }), + ), + "captures" => { + let it = re + .captures_iter(test.haystack()) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|caps| testify_captures(&caps)); + TestResult::captures(it) + } + name => TestResult::fail(&format!("unrecognized test name: {}", name)), + } +} + +/// Converts the given regex test to a closure that searches with a +/// `bytes::Regex`. If the test configuration is unsupported, then a +/// `CompiledRegex` that skips the test is returned. +fn compiler( + test: &RegexTest, + _patterns: &[String], +) -> anyhow::Result<CompiledRegex> { + let skip = Ok(CompiledRegex::skip()); + + // We're only testing bytes::Regex here, which supports one pattern only. + let pattern = match test.regexes().len() { + 1 => &test.regexes()[0], + _ => return skip, + }; + // We only test is_match, find_iter and captures_iter. All of those are + // leftmost searches. + if !matches!(test.search_kind(), regex_test::SearchKind::Leftmost) { + return skip; + } + // The top-level single-pattern regex API always uses leftmost-first. + if !matches!(test.match_kind(), regex_test::MatchKind::LeftmostFirst) { + return skip; + } + // The top-level regex API always runs unanchored searches. ... But we can + // handle tests that are anchored but have only one match. + if test.anchored() && test.match_limit() != Some(1) { + return skip; + } + // We don't support tests with explicit search bounds. We could probably + // support this by using the 'find_at' (and such) APIs. + let bounds = test.bounds(); + if !(bounds.start == 0 && bounds.end == test.haystack().len()) { + return skip; + } + // The bytes::Regex API specifically does not support enabling UTF-8 mode. + // It could I suppose, but currently it does not. That is, it permits + // matches to have offsets that split codepoints. + if test.utf8() { + return skip; + } + // If the test requires Unicode but the Unicode feature isn't enabled, + // skip it. This is a little aggressive, but the test suite doesn't + // have any easy way of communicating which Unicode features are needed. + if test.unicode() && !cfg!(feature = "unicode") { + return skip; + } + let re = RegexBuilder::new(pattern) + .case_insensitive(test.case_insensitive()) + .unicode(test.unicode()) + .line_terminator(test.line_terminator()) + .build()?; + Ok(CompiledRegex::compiled(move |test| run_test(&re, test))) +} + +/// Convert `Captures` into the test suite's capture values. +fn testify_captures( + caps: ®ex::bytes::Captures<'_>, +) -> regex_test::Captures { + let spans = caps.iter().map(|group| { + group.map(|m| regex_test::Span { start: m.start(), end: m.end() }) + }); + // This unwrap is OK because we assume our 'caps' represents a match, and + // a match always gives a non-zero number of groups with the first group + // being non-None. + regex_test::Captures::new(0, spans).unwrap() +} */ diff --git a/vendor/regex/tests/suite_bytes_set.rs b/vendor/regex/tests/suite_bytes_set.rs new file mode 100644 index 0000000..02ff1ad --- /dev/null +++ b/vendor/regex/tests/suite_bytes_set.rs @@ -0,0 +1,71 @@ +use { + anyhow::Result, + regex::bytes::{RegexSet, RegexSetBuilder}, + //regex_test::{CompiledRegex, RegexTest, TestResult, TestRunner}, +}; + +/* /// Tests the default configuration of the hybrid NFA/DFA. +#[test] +fn default() -> Result<()> { + let mut runner = TestRunner::new()?; + runner + .expand(&["is_match", "which"], |test| test.compiles()) + .blacklist_iter(super::BLACKLIST) + .test_iter(crate::suite()?.iter(), compiler) + .assert(); + Ok(()) +} + +fn run_test(re: &RegexSet, test: &RegexTest) -> TestResult { + match test.additional_name() { + "is_match" => TestResult::matched(re.is_match(test.haystack())), + "which" => TestResult::which(re.matches(test.haystack()).iter()), + name => TestResult::fail(&format!("unrecognized test name: {}", name)), + } +} + +/// Converts the given regex test to a closure that searches with a +/// `bytes::Regex`. If the test configuration is unsupported, then a +/// `CompiledRegex` that skips the test is returned. +fn compiler( + test: &RegexTest, + _patterns: &[String], +) -> anyhow::Result<CompiledRegex> { + let skip = Ok(CompiledRegex::skip()); + + // The top-level RegexSet API only supports "overlapping" semantics. + if !matches!(test.search_kind(), regex_test::SearchKind::Overlapping) { + return skip; + } + // The top-level RegexSet API only supports "all" semantics. + if !matches!(test.match_kind(), regex_test::MatchKind::All) { + return skip; + } + // The top-level RegexSet API always runs unanchored searches. + if test.anchored() { + return skip; + } + // We don't support tests with explicit search bounds. + let bounds = test.bounds(); + if !(bounds.start == 0 && bounds.end == test.haystack().len()) { + return skip; + } + // The bytes::Regex API specifically does not support enabling UTF-8 mode. + // It could I suppose, but currently it does not. That is, it permits + // matches to have offsets that split codepoints. + if test.utf8() { + return skip; + } + // If the test requires Unicode but the Unicode feature isn't enabled, + // skip it. This is a little aggressive, but the test suite doesn't + // have any easy way of communicating which Unicode features are needed. + if test.unicode() && !cfg!(feature = "unicode") { + return skip; + } + let re = RegexSetBuilder::new(test.regexes()) + .case_insensitive(test.case_insensitive()) + .unicode(test.unicode()) + .line_terminator(test.line_terminator()) + .build()?; + Ok(CompiledRegex::compiled(move |test| run_test(&re, test))) +} */ diff --git a/vendor/regex/tests/suite_string.rs b/vendor/regex/tests/suite_string.rs new file mode 100644 index 0000000..7a24ae0 --- /dev/null +++ b/vendor/regex/tests/suite_string.rs @@ -0,0 +1,114 @@ +use { + anyhow::Result, + regex::{Regex, RegexBuilder}, + /*regex_test::{ + CompiledRegex, Match, RegexTest, Span, TestResult, TestRunner, + },*/ +}; + +/* /// Tests the default configuration of the hybrid NFA/DFA. +#[test] +fn default() -> Result<()> { + let mut runner = TestRunner::new()?; + runner + .expand(&["is_match", "find", "captures"], |test| test.compiles()) + .blacklist_iter(super::BLACKLIST) + .test_iter(crate::suite()?.iter(), compiler) + .assert(); + Ok(()) +} + +fn run_test(re: &Regex, test: &RegexTest) -> TestResult { + let hay = match std::str::from_utf8(test.haystack()) { + Ok(hay) => hay, + Err(err) => { + return TestResult::fail(&format!( + "haystack is not valid UTF-8: {}", + err + )); + } + }; + match test.additional_name() { + "is_match" => TestResult::matched(re.is_match(hay)), + "find" => TestResult::matches( + re.find_iter(hay) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|m| Match { + id: 0, + span: Span { start: m.start(), end: m.end() }, + }), + ), + "captures" => { + let it = re + .captures_iter(hay) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|caps| testify_captures(&caps)); + TestResult::captures(it) + } + name => TestResult::fail(&format!("unrecognized test name: {}", name)), + } +} */ + +/* /// Converts the given regex test to a closure that searches with a +/// `bytes::Regex`. If the test configuration is unsupported, then a +/// `CompiledRegex` that skips the test is returned. +fn compiler( + test: &RegexTest, + _patterns: &[String], +) -> anyhow::Result<CompiledRegex> { + let skip = Ok(CompiledRegex::skip()); + + // We're only testing bytes::Regex here, which supports one pattern only. + let pattern = match test.regexes().len() { + 1 => &test.regexes()[0], + _ => return skip, + }; + // We only test is_match, find_iter and captures_iter. All of those are + // leftmost searches. + if !matches!(test.search_kind(), regex_test::SearchKind::Leftmost) { + return skip; + } + // The top-level single-pattern regex API always uses leftmost-first. + if !matches!(test.match_kind(), regex_test::MatchKind::LeftmostFirst) { + return skip; + } + // The top-level regex API always runs unanchored searches. ... But we can + // handle tests that are anchored but have only one match. + if test.anchored() && test.match_limit() != Some(1) { + return skip; + } + // We don't support tests with explicit search bounds. We could probably + // support this by using the 'find_at' (and such) APIs. + let bounds = test.bounds(); + if !(bounds.start == 0 && bounds.end == test.haystack().len()) { + return skip; + } + // The Regex API specifically does not support disabling UTF-8 mode because + // it can only search &str which is always valid UTF-8. + if !test.utf8() { + return skip; + } + // If the test requires Unicode but the Unicode feature isn't enabled, + // skip it. This is a little aggressive, but the test suite doesn't + // have any easy way of communicating which Unicode features are needed. + if test.unicode() && !cfg!(feature = "unicode") { + return skip; + } + let re = RegexBuilder::new(pattern) + .case_insensitive(test.case_insensitive()) + .unicode(test.unicode()) + .line_terminator(test.line_terminator()) + .build()?; + Ok(CompiledRegex::compiled(move |test| run_test(&re, test))) +} + +/// Convert `Captures` into the test suite's capture values. +fn testify_captures(caps: ®ex::Captures<'_>) -> regex_test::Captures { + let spans = caps.iter().map(|group| { + group.map(|m| regex_test::Span { start: m.start(), end: m.end() }) + }); + // This unwrap is OK because we assume our 'caps' represents a match, and + // a match always gives a non-zero number of groups with the first group + // being non-None. + regex_test::Captures::new(0, spans).unwrap() +} */ diff --git a/vendor/regex/tests/suite_string_set.rs b/vendor/regex/tests/suite_string_set.rs new file mode 100644 index 0000000..811149e --- /dev/null +++ b/vendor/regex/tests/suite_string_set.rs @@ -0,0 +1,79 @@ +use { + anyhow::Result, + regex::{RegexSet, RegexSetBuilder}, + //regex_test::{CompiledRegex, RegexTest, TestResult, TestRunner}, +}; + +/* /// Tests the default configuration of the hybrid NFA/DFA. +#[test] +fn default() -> Result<()> { + let mut runner = TestRunner::new()?; + runner + .expand(&["is_match", "which"], |test| test.compiles()) + .blacklist_iter(super::BLACKLIST) + .test_iter(crate::suite()?.iter(), compiler) + .assert(); + Ok(()) +} + +fn run_test(re: &RegexSet, test: &RegexTest) -> TestResult { + let hay = match std::str::from_utf8(test.haystack()) { + Ok(hay) => hay, + Err(err) => { + return TestResult::fail(&format!( + "haystack is not valid UTF-8: {}", + err + )); + } + }; + match test.additional_name() { + "is_match" => TestResult::matched(re.is_match(hay)), + "which" => TestResult::which(re.matches(hay).iter()), + name => TestResult::fail(&format!("unrecognized test name: {}", name)), + } +} + +/// Converts the given regex test to a closure that searches with a +/// `bytes::Regex`. If the test configuration is unsupported, then a +/// `CompiledRegex` that skips the test is returned. +fn compiler( + test: &RegexTest, + _patterns: &[String], +) -> anyhow::Result<CompiledRegex> { + let skip = Ok(CompiledRegex::skip()); + + // The top-level RegexSet API only supports "overlapping" semantics. + if !matches!(test.search_kind(), regex_test::SearchKind::Overlapping) { + return skip; + } + // The top-level RegexSet API only supports "all" semantics. + if !matches!(test.match_kind(), regex_test::MatchKind::All) { + return skip; + } + // The top-level RegexSet API always runs unanchored searches. + if test.anchored() { + return skip; + } + // We don't support tests with explicit search bounds. + let bounds = test.bounds(); + if !(bounds.start == 0 && bounds.end == test.haystack().len()) { + return skip; + } + // The Regex API specifically does not support disabling UTF-8 mode because + // it can only search &str which is always valid UTF-8. + if !test.utf8() { + return skip; + } + // If the test requires Unicode but the Unicode feature isn't enabled, + // skip it. This is a little aggressive, but the test suite doesn't + // have any easy way of communicating which Unicode features are needed. + if test.unicode() && !cfg!(feature = "unicode") { + return skip; + } + let re = RegexSetBuilder::new(test.regexes()) + .case_insensitive(test.case_insensitive()) + .unicode(test.unicode()) + .line_terminator(test.line_terminator()) + .build()?; + Ok(CompiledRegex::compiled(move |test| run_test(&re, test))) +} */ |