1 files changed, 222 insertions, 0 deletions
diff --git a/third_party/rust/regex/tests/test_default.rs b/third_party/rust/regex/tests/test_default.rs
new file mode 100644
index 0000000000..be627f7a68
--- /dev/null
+++ b/third_party/rust/regex/tests/test_default.rs
@@ -0,0 +1,222 @@
+#![cfg_attr(feature = "pattern", feature(pattern))]
+
+use regex;
+
+// Due to macro scoping rules, this definition only applies for the modules
+// defined below. Effectively, it allows us to use the same tests for both
+// native and dynamic regexes.
+//
+// This is also used to test the various matching engines. This one exercises
+// the normal code path which automatically chooses the engine based on the
+// regex and the input. Other dynamic tests explicitly set the engine to use.
+macro_rules! regex_new {
+    ($re:expr) => {{
+        use regex::Regex;
+        Regex::new($re)
+    }};
+}
+
+macro_rules! regex {
+    ($re:expr) => {
+        regex_new!($re).unwrap()
+    };
+}
+
+macro_rules! regex_set_new {
+    ($re:expr) => {{
+        use regex::RegexSet;
+        RegexSet::new($re)
+    }};
+}
+
+macro_rules! regex_set {
+    ($res:expr) => {
+        regex_set_new!($res).unwrap()
+    };
+}
+
+// Must come before other module definitions.
+include!("macros_str.rs");
+include!("macros.rs");
+
+mod api;
+mod api_str;
+mod crazy;
+mod flags;
+mod fowler;
+mod misc;
+mod multiline;
+mod noparse;
+mod regression;
+mod regression_fuzz;
+mod replace;
+mod searcher;
+mod set;
+mod shortest_match;
+mod suffix_reverse;
+#[cfg(feature = "unicode")]
+mod unicode;
+#[cfg(feature = "unicode-perl")]
+mod word_boundary;
+#[cfg(feature = "unicode-perl")]
+mod word_boundary_unicode;
+
+#[test]
+fn disallow_non_utf8() {
+    assert!(regex::Regex::new(r"(?-u)\xFF").is_err());
+    assert!(regex::Regex::new(r"(?-u).").is_err());
+    assert!(regex::Regex::new(r"(?-u)[\xFF]").is_err());
+    assert!(regex::Regex::new(r"(?-u)☃").is_err());
+}
+
+#[test]
+fn disallow_octal() {
+    assert!(regex::Regex::new(r"\0").is_err());
+}
+
+#[test]
+fn allow_octal() {
+    assert!(regex::RegexBuilder::new(r"\0").octal(true).build().is_ok());
+}
+
+#[test]
+fn oibits() {
+    use regex::bytes;
+    use regex::{Regex, RegexBuilder, RegexSet, RegexSetBuilder};
+    use std::panic::{RefUnwindSafe, UnwindSafe};
+
+    fn assert_send<T: Send>() {}
+    fn assert_sync<T: Sync>() {}
+    fn assert_unwind_safe<T: UnwindSafe>() {}
+    fn assert_ref_unwind_safe<T: RefUnwindSafe>() {}
+
+    assert_send::<Regex>();
+    assert_sync::<Regex>();
+    assert_unwind_safe::<Regex>();
+    assert_ref_unwind_safe::<Regex>();
+    assert_send::<RegexBuilder>();
+    assert_sync::<RegexBuilder>();
+    assert_unwind_safe::<RegexBuilder>();
+    assert_ref_unwind_safe::<RegexBuilder>();
+
+    assert_send::<bytes::Regex>();
+    assert_sync::<bytes::Regex>();
+    assert_unwind_safe::<bytes::Regex>();
+    assert_ref_unwind_safe::<bytes::Regex>();
+    assert_send::<bytes::RegexBuilder>();
+    assert_sync::<bytes::RegexBuilder>();
+    assert_unwind_safe::<bytes::RegexBuilder>();
+    assert_ref_unwind_safe::<bytes::RegexBuilder>();
+
+    assert_send::<RegexSet>();
+    assert_sync::<RegexSet>();
+    assert_unwind_safe::<RegexSet>();
+    assert_ref_unwind_safe::<RegexSet>();
+    assert_send::<RegexSetBuilder>();
+    assert_sync::<RegexSetBuilder>();
+    assert_unwind_safe::<RegexSetBuilder>();
+    assert_ref_unwind_safe::<RegexSetBuilder>();
+
+    assert_send::<bytes::RegexSet>();
+    assert_sync::<bytes::RegexSet>();
+    assert_unwind_safe::<bytes::RegexSet>();
+    assert_ref_unwind_safe::<bytes::RegexSet>();
+    assert_send::<bytes::RegexSetBuilder>();
+    assert_sync::<bytes::RegexSetBuilder>();
+    assert_unwind_safe::<bytes::RegexSetBuilder>();
+    assert_ref_unwind_safe::<bytes::RegexSetBuilder>();
+}
+
+// See: https://github.com/rust-lang/regex/issues/568
+#[test]
+fn oibits_regression() {
+    use regex::Regex;
+    use std::panic;
+
+    let _ = panic::catch_unwind(|| Regex::new("a").unwrap());
+}
+
+// See: https://github.com/rust-lang/regex/issues/750
+#[test]
+#[cfg(target_pointer_width = "64")]
+fn regex_is_reasonably_small() {
+    use std::mem::size_of;
+
+    use regex::bytes;
+    use regex::{Regex, RegexSet};
+
+    assert_eq!(16, size_of::<Regex>());
+    assert_eq!(16, size_of::<RegexSet>());
+    assert_eq!(16, size_of::<bytes::Regex>());
+    assert_eq!(16, size_of::<bytes::RegexSet>());
+}
+
+// See: https://github.com/rust-lang/regex/security/advisories/GHSA-m5pq-gvj9-9vr8
+// See: CVE-2022-24713
+//
+// We test that our regex compiler will correctly return a "too big" error when
+// we try to use a very large repetition on an *empty* sub-expression.
+//
+// At the time this test was written, the regex compiler does not represent
+// empty sub-expressions with any bytecode instructions. In effect, it's an
+// "optimization" to leave them out, since they would otherwise correspond
+// to an unconditional JUMP in the regex bytecode (i.e., an unconditional
+// epsilon transition in the NFA graph). Therefore, an empty sub-expression
+// represents an interesting case for the compiler's size limits. Since it
+// doesn't actually contribute any additional memory to the compiled regex
+// instructions, the size limit machinery never detects it. Instead, it just
+// dumbly tries to compile the empty sub-expression N times, where N is the
+// repetition size.
+//
+// When N is very large, this will cause the compiler to essentially spin and
+// do nothing for a decently large amount of time. It causes the regex to take
+// quite a bit of time to compile, despite the concrete syntax of the regex
+// being quite small.
+//
+// The degree to which this is actually a problem is somewhat of a judgment
+// call. Some regexes simply take a long time to compile. But in general, you
+// should be able to reasonably control this by setting lower or higher size
+// limits on the compiled object size. But this mitigation doesn't work at all
+// for this case.
+//
+// This particular test is somewhat narrow. It merely checks that regex
+// compilation will, at some point, return a "too big" error. Before the
+// fix landed, this test would eventually fail because the regex would be
+// successfully compiled (after enough time elapsed). So while this test
+// doesn't check that we exit in a reasonable amount of time, it does at least
+// check that we are properly returning an error at some point.
+#[test]
+fn big_empty_regex_fails() {
+    use regex::Regex;
+
+    let result = Regex::new("(?:){4294967295}");
+    assert!(result.is_err());
+}
+
+// Below is a "billion laughs" variant of the previous test case.
+#[test]
+fn big_empty_reps_chain_regex_fails() {
+    use regex::Regex;
+
+    let result = Regex::new("(?:){64}{64}{64}{64}{64}{64}");
+    assert!(result.is_err());
+}
+
+// Below is another situation where a zero-length sub-expression can be
+// introduced.
+#[test]
+fn big_zero_reps_regex_fails() {
+    use regex::Regex;
+
+    let result = Regex::new(r"x{0}{4294967295}");
+    assert!(result.is_err());
+}
+
+// Testing another case for completeness.
+#[test]
+fn empty_alt_regex_fails() {
+    use regex::Regex;
+
+    let result = Regex::new(r"(?:|){4294967295}");
+    assert!(result.is_err());
+}