summaryrefslogtreecommitdiffstats
path: root/third_party/rust/regex/tests/crazy.rs
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/regex/tests/crazy.rs')
-rw-r--r--third_party/rust/regex/tests/crazy.rs446
1 files changed, 446 insertions, 0 deletions
diff --git a/third_party/rust/regex/tests/crazy.rs b/third_party/rust/regex/tests/crazy.rs
new file mode 100644
index 0000000000..8c72273d93
--- /dev/null
+++ b/third_party/rust/regex/tests/crazy.rs
@@ -0,0 +1,446 @@
+mat!(ascii_literal, r"a", "a", Some((0, 1)));
+
+// Some crazy expressions from regular-expressions.info.
+mat!(
+ match_ranges,
+ r"(?-u)\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b",
+ "num: 255",
+ Some((5, 8))
+);
+mat!(
+ match_ranges_not,
+ r"(?-u)\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b",
+ "num: 256",
+ None
+);
+mat!(match_float1, r"[-+]?[0-9]*\.?[0-9]+", "0.1", Some((0, 3)));
+mat!(match_float2, r"[-+]?[0-9]*\.?[0-9]+", "0.1.2", Some((0, 3)));
+mat!(match_float3, r"[-+]?[0-9]*\.?[0-9]+", "a1.2", Some((1, 4)));
+mat!(match_float4, r"^[-+]?[0-9]*\.?[0-9]+$", "1.a", None);
+mat!(
+ match_email,
+ r"(?i-u)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b",
+ "mine is jam.slam@gmail.com ",
+ Some((8, 26))
+);
+mat!(
+ match_email_not,
+ r"(?i-u)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b",
+ "mine is jam.slam@gmail ",
+ None
+);
+mat!(
+ match_email_big,
+ r"[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?",
+ "mine is jam.slam@gmail.com ",
+ Some((8, 26))
+);
+mat!(
+ match_date1,
+ r"(?-u)^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$",
+ "1900-01-01",
+ Some((0, 10))
+);
+mat!(
+ match_date2,
+ r"(?-u)^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$",
+ "1900-00-01",
+ None
+);
+mat!(
+ match_date3,
+ r"(?-u)^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$",
+ "1900-13-01",
+ None
+);
+
+// Do some crazy dancing with the start/end assertions.
+matiter!(match_start_end_empty, r"^$", "", (0, 0));
+matiter!(match_start_end_empty_many_1, r"^$^$^$", "", (0, 0));
+matiter!(match_start_end_empty_many_2, r"^^^$$$", "", (0, 0));
+matiter!(match_start_end_empty_rev, r"$^", "", (0, 0));
+matiter!(
+ match_start_end_empty_rep,
+ r"(?:^$)*",
+ "a\nb\nc",
+ (0, 0),
+ (1, 1),
+ (2, 2),
+ (3, 3),
+ (4, 4),
+ (5, 5)
+);
+matiter!(
+ match_start_end_empty_rep_rev,
+ r"(?:$^)*",
+ "a\nb\nc",
+ (0, 0),
+ (1, 1),
+ (2, 2),
+ (3, 3),
+ (4, 4),
+ (5, 5)
+);
+
+// Test negated character classes.
+mat!(negclass_letters, r"[^ac]", "acx", Some((2, 3)));
+mat!(negclass_letter_comma, r"[^a,]", "a,x", Some((2, 3)));
+mat!(negclass_letter_space, r"[^a[:space:]]", "a x", Some((2, 3)));
+mat!(negclass_comma, r"[^,]", ",,x", Some((2, 3)));
+mat!(negclass_space, r"[^[:space:]]", " a", Some((1, 2)));
+mat!(negclass_space_comma, r"[^,[:space:]]", ", a", Some((2, 3)));
+mat!(negclass_comma_space, r"[^[:space:],]", " ,a", Some((2, 3)));
+mat!(negclass_ascii, r"[^[:alpha:]Z]", "A1", Some((1, 2)));
+
+// Test that repeated empty expressions don't loop forever.
+mat!(lazy_many_many, r"((?:.*)*?)=", "a=b", Some((0, 2)));
+mat!(lazy_many_optional, r"((?:.?)*?)=", "a=b", Some((0, 2)));
+mat!(lazy_one_many_many, r"((?:.*)+?)=", "a=b", Some((0, 2)));
+mat!(lazy_one_many_optional, r"((?:.?)+?)=", "a=b", Some((0, 2)));
+mat!(lazy_range_min_many, r"((?:.*){1,}?)=", "a=b", Some((0, 2)));
+mat!(lazy_range_many, r"((?:.*){1,2}?)=", "a=b", Some((0, 2)));
+mat!(greedy_many_many, r"((?:.*)*)=", "a=b", Some((0, 2)));
+mat!(greedy_many_optional, r"((?:.?)*)=", "a=b", Some((0, 2)));
+mat!(greedy_one_many_many, r"((?:.*)+)=", "a=b", Some((0, 2)));
+mat!(greedy_one_many_optional, r"((?:.?)+)=", "a=b", Some((0, 2)));
+mat!(greedy_range_min_many, r"((?:.*){1,})=", "a=b", Some((0, 2)));
+mat!(greedy_range_many, r"((?:.*){1,2})=", "a=b", Some((0, 2)));
+
+// Test that we handle various flavors of empty expressions.
+matiter!(match_empty1, r"", "", (0, 0));
+matiter!(match_empty2, r"", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty3, r"()", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty4, r"()*", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty5, r"()+", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty6, r"()?", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty7, r"()()", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty8, r"()+|z", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty9, r"z|()+", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty10, r"()+|b", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty11, r"b|()+", "abc", (0, 0), (1, 2), (3, 3));
+
+// Test that the DFA can handle pathological cases.
+// (This should result in the DFA's cache being flushed too frequently, which
+// should cause it to quit and fall back to the NFA algorithm.)
+#[test]
+fn dfa_handles_pathological_case() {
+ fn ones_and_zeroes(count: usize) -> String {
+ use rand::{thread_rng, Rng};
+
+ let mut rng = thread_rng();
+ let mut s = String::new();
+ for _ in 0..count {
+ if rng.gen() {
+ s.push('1');
+ } else {
+ s.push('0');
+ }
+ }
+ s
+ }
+
+ let re = regex!(r"[01]*1[01]{20}$");
+ let text = {
+ let mut pieces = ones_and_zeroes(100_000);
+ pieces.push('1');
+ pieces.push_str(&ones_and_zeroes(20));
+ pieces
+ };
+ assert!(re.is_match(text!(&*text)));
+}
+
+#[test]
+fn nest_limit_makes_it_parse() {
+ use regex::RegexBuilder;
+
+ RegexBuilder::new(
+ r#"(?-u)
+ 2(?:
+ [45]\d{3}|
+ 7(?:
+ 1[0-267]|
+ 2[0-289]|
+ 3[0-29]|
+ 4[01]|
+ 5[1-3]|
+ 6[013]|
+ 7[0178]|
+ 91
+ )|
+ 8(?:
+ 0[125]|
+ [139][1-6]|
+ 2[0157-9]|
+ 41|
+ 6[1-35]|
+ 7[1-5]|
+ 8[1-8]|
+ 90
+ )|
+ 9(?:
+ 0[0-2]|
+ 1[0-4]|
+ 2[568]|
+ 3[3-6]|
+ 5[5-7]|
+ 6[0167]|
+ 7[15]|
+ 8[0146-9]
+ )
+ )\d{4}|
+ 3(?:
+ 12?[5-7]\d{2}|
+ 0(?:
+ 2(?:
+ [025-79]\d|
+ [348]\d{1,2}
+ )|
+ 3(?:
+ [2-4]\d|
+ [56]\d?
+ )
+ )|
+ 2(?:
+ 1\d{2}|
+ 2(?:
+ [12]\d|
+ [35]\d{1,2}|
+ 4\d?
+ )
+ )|
+ 3(?:
+ 1\d{2}|
+ 2(?:
+ [2356]\d|
+ 4\d{1,2}
+ )
+ )|
+ 4(?:
+ 1\d{2}|
+ 2(?:
+ 2\d{1,2}|
+ [47]|
+ 5\d{2}
+ )
+ )|
+ 5(?:
+ 1\d{2}|
+ 29
+ )|
+ [67]1\d{2}|
+ 8(?:
+ 1\d{2}|
+ 2(?:
+ 2\d{2}|
+ 3|
+ 4\d
+ )
+ )
+ )\d{3}|
+ 4(?:
+ 0(?:
+ 2(?:
+ [09]\d|
+ 7
+ )|
+ 33\d{2}
+ )|
+ 1\d{3}|
+ 2(?:
+ 1\d{2}|
+ 2(?:
+ [25]\d?|
+ [348]\d|
+ [67]\d{1,2}
+ )
+ )|
+ 3(?:
+ 1\d{2}(?:
+ \d{2}
+ )?|
+ 2(?:
+ [045]\d|
+ [236-9]\d{1,2}
+ )|
+ 32\d{2}
+ )|
+ 4(?:
+ [18]\d{2}|
+ 2(?:
+ [2-46]\d{2}|
+ 3
+ )|
+ 5[25]\d{2}
+ )|
+ 5(?:
+ 1\d{2}|
+ 2(?:
+ 3\d|
+ 5
+ )
+ )|
+ 6(?:
+ [18]\d{2}|
+ 2(?:
+ 3(?:
+ \d{2}
+ )?|
+ [46]\d{1,2}|
+ 5\d{2}|
+ 7\d
+ )|
+ 5(?:
+ 3\d?|
+ 4\d|
+ [57]\d{1,2}|
+ 6\d{2}|
+ 8
+ )
+ )|
+ 71\d{2}|
+ 8(?:
+ [18]\d{2}|
+ 23\d{2}|
+ 54\d{2}
+ )|
+ 9(?:
+ [18]\d{2}|
+ 2[2-5]\d{2}|
+ 53\d{1,2}
+ )
+ )\d{3}|
+ 5(?:
+ 02[03489]\d{2}|
+ 1\d{2}|
+ 2(?:
+ 1\d{2}|
+ 2(?:
+ 2(?:
+ \d{2}
+ )?|
+ [457]\d{2}
+ )
+ )|
+ 3(?:
+ 1\d{2}|
+ 2(?:
+ [37](?:
+ \d{2}
+ )?|
+ [569]\d{2}
+ )
+ )|
+ 4(?:
+ 1\d{2}|
+ 2[46]\d{2}
+ )|
+ 5(?:
+ 1\d{2}|
+ 26\d{1,2}
+ )|
+ 6(?:
+ [18]\d{2}|
+ 2|
+ 53\d{2}
+ )|
+ 7(?:
+ 1|
+ 24
+ )\d{2}|
+ 8(?:
+ 1|
+ 26
+ )\d{2}|
+ 91\d{2}
+ )\d{3}|
+ 6(?:
+ 0(?:
+ 1\d{2}|
+ 2(?:
+ 3\d{2}|
+ 4\d{1,2}
+ )
+ )|
+ 2(?:
+ 2[2-5]\d{2}|
+ 5(?:
+ [3-5]\d{2}|
+ 7
+ )|
+ 8\d{2}
+ )|
+ 3(?:
+ 1|
+ 2[3478]
+ )\d{2}|
+ 4(?:
+ 1|
+ 2[34]
+ )\d{2}|
+ 5(?:
+ 1|
+ 2[47]
+ )\d{2}|
+ 6(?:
+ [18]\d{2}|
+ 6(?:
+ 2(?:
+ 2\d|
+ [34]\d{2}
+ )|
+ 5(?:
+ [24]\d{2}|
+ 3\d|
+ 5\d{1,2}
+ )
+ )
+ )|
+ 72[2-5]\d{2}|
+ 8(?:
+ 1\d{2}|
+ 2[2-5]\d{2}
+ )|
+ 9(?:
+ 1\d{2}|
+ 2[2-6]\d{2}
+ )
+ )\d{3}|
+ 7(?:
+ (?:
+ 02|
+ [3-589]1|
+ 6[12]|
+ 72[24]
+ )\d{2}|
+ 21\d{3}|
+ 32
+ )\d{3}|
+ 8(?:
+ (?:
+ 4[12]|
+ [5-7]2|
+ 1\d?
+ )|
+ (?:
+ 0|
+ 3[12]|
+ [5-7]1|
+ 217
+ )\d
+ )\d{4}|
+ 9(?:
+ [35]1|
+ (?:
+ [024]2|
+ 81
+ )\d|
+ (?:
+ 1|
+ [24]1
+ )\d{2}
+ )\d{3}
+ "#,
+ )
+ .build()
+ .unwrap();
+}