diff options
Diffstat (limited to 'third_party/rust/regex/tests/crazy.rs')
-rw-r--r-- | third_party/rust/regex/tests/crazy.rs | 446 |
1 files changed, 446 insertions, 0 deletions
diff --git a/third_party/rust/regex/tests/crazy.rs b/third_party/rust/regex/tests/crazy.rs new file mode 100644 index 0000000000..8c72273d93 --- /dev/null +++ b/third_party/rust/regex/tests/crazy.rs @@ -0,0 +1,446 @@ +mat!(ascii_literal, r"a", "a", Some((0, 1))); + +// Some crazy expressions from regular-expressions.info. +mat!( + match_ranges, + r"(?-u)\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b", + "num: 255", + Some((5, 8)) +); +mat!( + match_ranges_not, + r"(?-u)\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b", + "num: 256", + None +); +mat!(match_float1, r"[-+]?[0-9]*\.?[0-9]+", "0.1", Some((0, 3))); +mat!(match_float2, r"[-+]?[0-9]*\.?[0-9]+", "0.1.2", Some((0, 3))); +mat!(match_float3, r"[-+]?[0-9]*\.?[0-9]+", "a1.2", Some((1, 4))); +mat!(match_float4, r"^[-+]?[0-9]*\.?[0-9]+$", "1.a", None); +mat!( + match_email, + r"(?i-u)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b", + "mine is jam.slam@gmail.com ", + Some((8, 26)) +); +mat!( + match_email_not, + r"(?i-u)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b", + "mine is jam.slam@gmail ", + None +); +mat!( + match_email_big, + r"[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?", + "mine is jam.slam@gmail.com ", + Some((8, 26)) +); +mat!( + match_date1, + r"(?-u)^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$", + "1900-01-01", + Some((0, 10)) +); +mat!( + match_date2, + r"(?-u)^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$", + "1900-00-01", + None +); +mat!( + match_date3, + r"(?-u)^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$", + "1900-13-01", + None +); + +// Do some crazy dancing with the start/end assertions. +matiter!(match_start_end_empty, r"^$", "", (0, 0)); +matiter!(match_start_end_empty_many_1, r"^$^$^$", "", (0, 0)); +matiter!(match_start_end_empty_many_2, r"^^^$$$", "", (0, 0)); +matiter!(match_start_end_empty_rev, r"$^", "", (0, 0)); +matiter!( + match_start_end_empty_rep, + r"(?:^$)*", + "a\nb\nc", + (0, 0), + (1, 1), + (2, 2), + (3, 3), + (4, 4), + (5, 5) +); +matiter!( + match_start_end_empty_rep_rev, + r"(?:$^)*", + "a\nb\nc", + (0, 0), + (1, 1), + (2, 2), + (3, 3), + (4, 4), + (5, 5) +); + +// Test negated character classes. +mat!(negclass_letters, r"[^ac]", "acx", Some((2, 3))); +mat!(negclass_letter_comma, r"[^a,]", "a,x", Some((2, 3))); +mat!(negclass_letter_space, r"[^a[:space:]]", "a x", Some((2, 3))); +mat!(negclass_comma, r"[^,]", ",,x", Some((2, 3))); +mat!(negclass_space, r"[^[:space:]]", " a", Some((1, 2))); +mat!(negclass_space_comma, r"[^,[:space:]]", ", a", Some((2, 3))); +mat!(negclass_comma_space, r"[^[:space:],]", " ,a", Some((2, 3))); +mat!(negclass_ascii, r"[^[:alpha:]Z]", "A1", Some((1, 2))); + +// Test that repeated empty expressions don't loop forever. +mat!(lazy_many_many, r"((?:.*)*?)=", "a=b", Some((0, 2))); +mat!(lazy_many_optional, r"((?:.?)*?)=", "a=b", Some((0, 2))); +mat!(lazy_one_many_many, r"((?:.*)+?)=", "a=b", Some((0, 2))); +mat!(lazy_one_many_optional, r"((?:.?)+?)=", "a=b", Some((0, 2))); +mat!(lazy_range_min_many, r"((?:.*){1,}?)=", "a=b", Some((0, 2))); +mat!(lazy_range_many, r"((?:.*){1,2}?)=", "a=b", Some((0, 2))); +mat!(greedy_many_many, r"((?:.*)*)=", "a=b", Some((0, 2))); +mat!(greedy_many_optional, r"((?:.?)*)=", "a=b", Some((0, 2))); +mat!(greedy_one_many_many, r"((?:.*)+)=", "a=b", Some((0, 2))); +mat!(greedy_one_many_optional, r"((?:.?)+)=", "a=b", Some((0, 2))); +mat!(greedy_range_min_many, r"((?:.*){1,})=", "a=b", Some((0, 2))); +mat!(greedy_range_many, r"((?:.*){1,2})=", "a=b", Some((0, 2))); + +// Test that we handle various flavors of empty expressions. +matiter!(match_empty1, r"", "", (0, 0)); +matiter!(match_empty2, r"", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); +matiter!(match_empty3, r"()", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); +matiter!(match_empty4, r"()*", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); +matiter!(match_empty5, r"()+", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); +matiter!(match_empty6, r"()?", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); +matiter!(match_empty7, r"()()", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); +matiter!(match_empty8, r"()+|z", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); +matiter!(match_empty9, r"z|()+", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); +matiter!(match_empty10, r"()+|b", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); +matiter!(match_empty11, r"b|()+", "abc", (0, 0), (1, 2), (3, 3)); + +// Test that the DFA can handle pathological cases. +// (This should result in the DFA's cache being flushed too frequently, which +// should cause it to quit and fall back to the NFA algorithm.) +#[test] +fn dfa_handles_pathological_case() { + fn ones_and_zeroes(count: usize) -> String { + use rand::{thread_rng, Rng}; + + let mut rng = thread_rng(); + let mut s = String::new(); + for _ in 0..count { + if rng.gen() { + s.push('1'); + } else { + s.push('0'); + } + } + s + } + + let re = regex!(r"[01]*1[01]{20}$"); + let text = { + let mut pieces = ones_and_zeroes(100_000); + pieces.push('1'); + pieces.push_str(&ones_and_zeroes(20)); + pieces + }; + assert!(re.is_match(text!(&*text))); +} + +#[test] +fn nest_limit_makes_it_parse() { + use regex::RegexBuilder; + + RegexBuilder::new( + r#"(?-u) + 2(?: + [45]\d{3}| + 7(?: + 1[0-267]| + 2[0-289]| + 3[0-29]| + 4[01]| + 5[1-3]| + 6[013]| + 7[0178]| + 91 + )| + 8(?: + 0[125]| + [139][1-6]| + 2[0157-9]| + 41| + 6[1-35]| + 7[1-5]| + 8[1-8]| + 90 + )| + 9(?: + 0[0-2]| + 1[0-4]| + 2[568]| + 3[3-6]| + 5[5-7]| + 6[0167]| + 7[15]| + 8[0146-9] + ) + )\d{4}| + 3(?: + 12?[5-7]\d{2}| + 0(?: + 2(?: + [025-79]\d| + [348]\d{1,2} + )| + 3(?: + [2-4]\d| + [56]\d? + ) + )| + 2(?: + 1\d{2}| + 2(?: + [12]\d| + [35]\d{1,2}| + 4\d? + ) + )| + 3(?: + 1\d{2}| + 2(?: + [2356]\d| + 4\d{1,2} + ) + )| + 4(?: + 1\d{2}| + 2(?: + 2\d{1,2}| + [47]| + 5\d{2} + ) + )| + 5(?: + 1\d{2}| + 29 + )| + [67]1\d{2}| + 8(?: + 1\d{2}| + 2(?: + 2\d{2}| + 3| + 4\d + ) + ) + )\d{3}| + 4(?: + 0(?: + 2(?: + [09]\d| + 7 + )| + 33\d{2} + )| + 1\d{3}| + 2(?: + 1\d{2}| + 2(?: + [25]\d?| + [348]\d| + [67]\d{1,2} + ) + )| + 3(?: + 1\d{2}(?: + \d{2} + )?| + 2(?: + [045]\d| + [236-9]\d{1,2} + )| + 32\d{2} + )| + 4(?: + [18]\d{2}| + 2(?: + [2-46]\d{2}| + 3 + )| + 5[25]\d{2} + )| + 5(?: + 1\d{2}| + 2(?: + 3\d| + 5 + ) + )| + 6(?: + [18]\d{2}| + 2(?: + 3(?: + \d{2} + )?| + [46]\d{1,2}| + 5\d{2}| + 7\d + )| + 5(?: + 3\d?| + 4\d| + [57]\d{1,2}| + 6\d{2}| + 8 + ) + )| + 71\d{2}| + 8(?: + [18]\d{2}| + 23\d{2}| + 54\d{2} + )| + 9(?: + [18]\d{2}| + 2[2-5]\d{2}| + 53\d{1,2} + ) + )\d{3}| + 5(?: + 02[03489]\d{2}| + 1\d{2}| + 2(?: + 1\d{2}| + 2(?: + 2(?: + \d{2} + )?| + [457]\d{2} + ) + )| + 3(?: + 1\d{2}| + 2(?: + [37](?: + \d{2} + )?| + [569]\d{2} + ) + )| + 4(?: + 1\d{2}| + 2[46]\d{2} + )| + 5(?: + 1\d{2}| + 26\d{1,2} + )| + 6(?: + [18]\d{2}| + 2| + 53\d{2} + )| + 7(?: + 1| + 24 + )\d{2}| + 8(?: + 1| + 26 + )\d{2}| + 91\d{2} + )\d{3}| + 6(?: + 0(?: + 1\d{2}| + 2(?: + 3\d{2}| + 4\d{1,2} + ) + )| + 2(?: + 2[2-5]\d{2}| + 5(?: + [3-5]\d{2}| + 7 + )| + 8\d{2} + )| + 3(?: + 1| + 2[3478] + )\d{2}| + 4(?: + 1| + 2[34] + )\d{2}| + 5(?: + 1| + 2[47] + )\d{2}| + 6(?: + [18]\d{2}| + 6(?: + 2(?: + 2\d| + [34]\d{2} + )| + 5(?: + [24]\d{2}| + 3\d| + 5\d{1,2} + ) + ) + )| + 72[2-5]\d{2}| + 8(?: + 1\d{2}| + 2[2-5]\d{2} + )| + 9(?: + 1\d{2}| + 2[2-6]\d{2} + ) + )\d{3}| + 7(?: + (?: + 02| + [3-589]1| + 6[12]| + 72[24] + )\d{2}| + 21\d{3}| + 32 + )\d{3}| + 8(?: + (?: + 4[12]| + [5-7]2| + 1\d? + )| + (?: + 0| + 3[12]| + [5-7]1| + 217 + )\d + )\d{4}| + 9(?: + [35]1| + (?: + [024]2| + 81 + )\d| + (?: + 1| + [24]1 + )\d{2} + )\d{3} + "#, + ) + .build() + .unwrap(); +} |