summaryrefslogtreecommitdiffstats
path: root/third_party/rust/regex/tests/test_default.rs
blob: be627f7a68f160cf8c373bf2bdd184f7143df628 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
#![cfg_attr(feature = "pattern", feature(pattern))]

use regex;

// Due to macro scoping rules, this definition only applies for the modules
// defined below. Effectively, it allows us to use the same tests for both
// native and dynamic regexes.
//
// This is also used to test the various matching engines. This one exercises
// the normal code path which automatically chooses the engine based on the
// regex and the input. Other dynamic tests explicitly set the engine to use.
macro_rules! regex_new {
    ($re:expr) => {{
        use regex::Regex;
        Regex::new($re)
    }};
}

macro_rules! regex {
    ($re:expr) => {
        regex_new!($re).unwrap()
    };
}

macro_rules! regex_set_new {
    ($re:expr) => {{
        use regex::RegexSet;
        RegexSet::new($re)
    }};
}

macro_rules! regex_set {
    ($res:expr) => {
        regex_set_new!($res).unwrap()
    };
}

// Must come before other module definitions.
include!("macros_str.rs");
include!("macros.rs");

mod api;
mod api_str;
mod crazy;
mod flags;
mod fowler;
mod misc;
mod multiline;
mod noparse;
mod regression;
mod regression_fuzz;
mod replace;
mod searcher;
mod set;
mod shortest_match;
mod suffix_reverse;
#[cfg(feature = "unicode")]
mod unicode;
#[cfg(feature = "unicode-perl")]
mod word_boundary;
#[cfg(feature = "unicode-perl")]
mod word_boundary_unicode;

#[test]
fn disallow_non_utf8() {
    assert!(regex::Regex::new(r"(?-u)\xFF").is_err());
    assert!(regex::Regex::new(r"(?-u).").is_err());
    assert!(regex::Regex::new(r"(?-u)[\xFF]").is_err());
    assert!(regex::Regex::new(r"(?-u)☃").is_err());
}

#[test]
fn disallow_octal() {
    assert!(regex::Regex::new(r"\0").is_err());
}

#[test]
fn allow_octal() {
    assert!(regex::RegexBuilder::new(r"\0").octal(true).build().is_ok());
}

#[test]
fn oibits() {
    use regex::bytes;
    use regex::{Regex, RegexBuilder, RegexSet, RegexSetBuilder};
    use std::panic::{RefUnwindSafe, UnwindSafe};

    fn assert_send<T: Send>() {}
    fn assert_sync<T: Sync>() {}
    fn assert_unwind_safe<T: UnwindSafe>() {}
    fn assert_ref_unwind_safe<T: RefUnwindSafe>() {}

    assert_send::<Regex>();
    assert_sync::<Regex>();
    assert_unwind_safe::<Regex>();
    assert_ref_unwind_safe::<Regex>();
    assert_send::<RegexBuilder>();
    assert_sync::<RegexBuilder>();
    assert_unwind_safe::<RegexBuilder>();
    assert_ref_unwind_safe::<RegexBuilder>();

    assert_send::<bytes::Regex>();
    assert_sync::<bytes::Regex>();
    assert_unwind_safe::<bytes::Regex>();
    assert_ref_unwind_safe::<bytes::Regex>();
    assert_send::<bytes::RegexBuilder>();
    assert_sync::<bytes::RegexBuilder>();
    assert_unwind_safe::<bytes::RegexBuilder>();
    assert_ref_unwind_safe::<bytes::RegexBuilder>();

    assert_send::<RegexSet>();
    assert_sync::<RegexSet>();
    assert_unwind_safe::<RegexSet>();
    assert_ref_unwind_safe::<RegexSet>();
    assert_send::<RegexSetBuilder>();
    assert_sync::<RegexSetBuilder>();
    assert_unwind_safe::<RegexSetBuilder>();
    assert_ref_unwind_safe::<RegexSetBuilder>();

    assert_send::<bytes::RegexSet>();
    assert_sync::<bytes::RegexSet>();
    assert_unwind_safe::<bytes::RegexSet>();
    assert_ref_unwind_safe::<bytes::RegexSet>();
    assert_send::<bytes::RegexSetBuilder>();
    assert_sync::<bytes::RegexSetBuilder>();
    assert_unwind_safe::<bytes::RegexSetBuilder>();
    assert_ref_unwind_safe::<bytes::RegexSetBuilder>();
}

// See: https://github.com/rust-lang/regex/issues/568
#[test]
fn oibits_regression() {
    use regex::Regex;
    use std::panic;

    let _ = panic::catch_unwind(|| Regex::new("a").unwrap());
}

// See: https://github.com/rust-lang/regex/issues/750
#[test]
#[cfg(target_pointer_width = "64")]
fn regex_is_reasonably_small() {
    use std::mem::size_of;

    use regex::bytes;
    use regex::{Regex, RegexSet};

    assert_eq!(16, size_of::<Regex>());
    assert_eq!(16, size_of::<RegexSet>());
    assert_eq!(16, size_of::<bytes::Regex>());
    assert_eq!(16, size_of::<bytes::RegexSet>());
}

// See: https://github.com/rust-lang/regex/security/advisories/GHSA-m5pq-gvj9-9vr8
// See: CVE-2022-24713
//
// We test that our regex compiler will correctly return a "too big" error when
// we try to use a very large repetition on an *empty* sub-expression.
//
// At the time this test was written, the regex compiler does not represent
// empty sub-expressions with any bytecode instructions. In effect, it's an
// "optimization" to leave them out, since they would otherwise correspond
// to an unconditional JUMP in the regex bytecode (i.e., an unconditional
// epsilon transition in the NFA graph). Therefore, an empty sub-expression
// represents an interesting case for the compiler's size limits. Since it
// doesn't actually contribute any additional memory to the compiled regex
// instructions, the size limit machinery never detects it. Instead, it just
// dumbly tries to compile the empty sub-expression N times, where N is the
// repetition size.
//
// When N is very large, this will cause the compiler to essentially spin and
// do nothing for a decently large amount of time. It causes the regex to take
// quite a bit of time to compile, despite the concrete syntax of the regex
// being quite small.
//
// The degree to which this is actually a problem is somewhat of a judgment
// call. Some regexes simply take a long time to compile. But in general, you
// should be able to reasonably control this by setting lower or higher size
// limits on the compiled object size. But this mitigation doesn't work at all
// for this case.
//
// This particular test is somewhat narrow. It merely checks that regex
// compilation will, at some point, return a "too big" error. Before the
// fix landed, this test would eventually fail because the regex would be
// successfully compiled (after enough time elapsed). So while this test
// doesn't check that we exit in a reasonable amount of time, it does at least
// check that we are properly returning an error at some point.
#[test]
fn big_empty_regex_fails() {
    use regex::Regex;

    let result = Regex::new("(?:){4294967295}");
    assert!(result.is_err());
}

// Below is a "billion laughs" variant of the previous test case.
#[test]
fn big_empty_reps_chain_regex_fails() {
    use regex::Regex;

    let result = Regex::new("(?:){64}{64}{64}{64}{64}{64}");
    assert!(result.is_err());
}

// Below is another situation where a zero-length sub-expression can be
// introduced.
#[test]
fn big_zero_reps_regex_fails() {
    use regex::Regex;

    let result = Regex::new(r"x{0}{4294967295}");
    assert!(result.is_err());
}

// Testing another case for completeness.
#[test]
fn empty_alt_regex_fails() {
    use regex::Regex;

    let result = Regex::new(r"(?:|){4294967295}");
    assert!(result.is_err());
}