summaryrefslogtreecommitdiffstats
path: root/third_party/rust/regex-automata/tests/dfa/onepass/suite.rs
blob: 20bd6965c80d7618ef8e3aefca4831ca1322b197 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
use {
    anyhow::Result,
    regex_automata::{
        dfa::onepass::{self, DFA},
        nfa::thompson,
        util::{iter, syntax},
    },
    regex_test::{
        CompiledRegex, Match, RegexTest, SearchKind, Span, TestResult,
        TestRunner,
    },
};

use crate::{create_input, suite, testify_captures, untestify_kind};

const EXPANSIONS: &[&str] = &["is_match", "find", "captures"];

/// Tests the default configuration of the hybrid NFA/DFA.
#[test]
fn default() -> Result<()> {
    let builder = DFA::builder();
    TestRunner::new()?
        .expand(EXPANSIONS, |t| t.compiles())
        .test_iter(suite()?.iter(), compiler(builder))
        .assert();
    Ok(())
}

/// Tests the hybrid NFA/DFA when 'starts_for_each_pattern' is enabled for all
/// tests.
#[test]
fn starts_for_each_pattern() -> Result<()> {
    let mut builder = DFA::builder();
    builder.configure(DFA::config().starts_for_each_pattern(true));
    TestRunner::new()?
        .expand(EXPANSIONS, |t| t.compiles())
        .test_iter(suite()?.iter(), compiler(builder))
        .assert();
    Ok(())
}

/// Tests the hybrid NFA/DFA when byte classes are disabled.
///
/// N.B. Disabling byte classes doesn't avoid any indirection at search time.
/// All it does is cause every byte value to be its own distinct equivalence
/// class.
#[test]
fn no_byte_classes() -> Result<()> {
    let mut builder = DFA::builder();
    builder.configure(DFA::config().byte_classes(false));
    TestRunner::new()?
        .expand(EXPANSIONS, |t| t.compiles())
        .test_iter(suite()?.iter(), compiler(builder))
        .assert();
    Ok(())
}

fn compiler(
    mut builder: onepass::Builder,
) -> impl FnMut(&RegexTest, &[String]) -> Result<CompiledRegex> {
    move |test, regexes| {
        // Check if our regex contains things that aren't supported by DFAs.
        // That is, Unicode word boundaries when searching non-ASCII text.
        if !configure_onepass_builder(test, &mut builder) {
            return Ok(CompiledRegex::skip());
        }
        let re = match builder.build_many(&regexes) {
            Ok(re) => re,
            Err(err) => {
                let msg = err.to_string();
                // This is pretty gross, but when a regex fails to compile as
                // a one-pass regex, then we want to be OK with that and just
                // skip the test. But we have to be careful to only skip it
                // when the expected result is that the regex compiles. If
                // the test is specifically checking that the regex does not
                // compile, then we should bubble up that error and allow the
                // test to pass.
                //
                // Since our error types are all generally opaque, we just
                // look for an error string. Not great, but not the end of the
                // world.
                if test.compiles() && msg.contains("not one-pass") {
                    return Ok(CompiledRegex::skip());
                }
                return Err(err.into());
            }
        };
        let mut cache = re.create_cache();
        Ok(CompiledRegex::compiled(move |test| -> TestResult {
            run_test(&re, &mut cache, test)
        }))
    }
}

fn run_test(
    re: &DFA,
    cache: &mut onepass::Cache,
    test: &RegexTest,
) -> TestResult {
    let input = create_input(test);
    match test.additional_name() {
        "is_match" => {
            TestResult::matched(re.is_match(cache, input.earliest(true)))
        }
        "find" => match test.search_kind() {
            SearchKind::Earliest | SearchKind::Leftmost => {
                let input =
                    input.earliest(test.search_kind() == SearchKind::Earliest);
                let mut caps = re.create_captures();
                let it = iter::Searcher::new(input)
                    .into_matches_iter(|input| {
                        re.try_search(cache, input, &mut caps)?;
                        Ok(caps.get_match())
                    })
                    .infallible()
                    .take(test.match_limit().unwrap_or(std::usize::MAX))
                    .map(|m| Match {
                        id: m.pattern().as_usize(),
                        span: Span { start: m.start(), end: m.end() },
                    });
                TestResult::matches(it)
            }
            SearchKind::Overlapping => {
                // The one-pass DFA does not support any kind of overlapping
                // search. This is not just a matter of not having the API.
                // It's fundamentally incompatible with the one-pass concept.
                // If overlapping matches were possible, then the one-pass DFA
                // would fail to build.
                TestResult::skip()
            }
        },
        "captures" => match test.search_kind() {
            SearchKind::Earliest | SearchKind::Leftmost => {
                let input =
                    input.earliest(test.search_kind() == SearchKind::Earliest);
                let it = iter::Searcher::new(input)
                    .into_captures_iter(re.create_captures(), |input, caps| {
                        re.try_search(cache, input, caps)
                    })
                    .infallible()
                    .take(test.match_limit().unwrap_or(std::usize::MAX))
                    .map(|caps| testify_captures(&caps));
                TestResult::captures(it)
            }
            SearchKind::Overlapping => {
                // The one-pass DFA does not support any kind of overlapping
                // search. This is not just a matter of not having the API.
                // It's fundamentally incompatible with the one-pass concept.
                // If overlapping matches were possible, then the one-pass DFA
                // would fail to build.
                TestResult::skip()
            }
        },
        name => TestResult::fail(&format!("unrecognized test name: {}", name)),
    }
}

/// Configures the given regex builder with all relevant settings on the given
/// regex test.
///
/// If the regex test has a setting that is unsupported, then this returns
/// false (implying the test should be skipped).
fn configure_onepass_builder(
    test: &RegexTest,
    builder: &mut onepass::Builder,
) -> bool {
    if !test.anchored() {
        return false;
    }
    let match_kind = match untestify_kind(test.match_kind()) {
        None => return false,
        Some(k) => k,
    };

    let config = DFA::config().match_kind(match_kind);
    builder
        .configure(config)
        .syntax(config_syntax(test))
        .thompson(config_thompson(test));
    true
}

/// Configuration of a Thompson NFA compiler from a regex test.
fn config_thompson(test: &RegexTest) -> thompson::Config {
    let mut lookm = regex_automata::util::look::LookMatcher::new();
    lookm.set_line_terminator(test.line_terminator());
    thompson::Config::new().utf8(test.utf8()).look_matcher(lookm)
}

/// Configuration of the regex parser from a regex test.
fn config_syntax(test: &RegexTest) -> syntax::Config {
    syntax::Config::new()
        .case_insensitive(test.case_insensitive())
        .unicode(test.unicode())
        .utf8(test.utf8())
        .line_terminator(test.line_terminator())
}