third_party/rust/regex-automata/tests/lib.rs


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114

// We have a similar config in the regex-automata crate root. Basically, it is
// just too annoying to deal with dead code when a subset of features is
// enabled.
#![cfg_attr(
    not(all(
        feature = "std",
        feature = "nfa",
        feature = "dfa",
        feature = "hybrid",
        feature = "perf-literal-substring",
        feature = "perf-literal-multisubstring",
    )),
    allow(dead_code, unused_imports, unused_variables)
)]
// Similar deal with Miri. Just let dead code warnings be.
#![cfg_attr(miri, allow(dead_code, unused_imports, unused_variables))]

#[cfg(any(feature = "dfa-search", feature = "dfa-onepass"))]
mod dfa;
#[cfg(feature = "dfa-search")]
mod fuzz;
#[cfg(feature = "dfa-search")]
mod gen;
#[cfg(feature = "hybrid")]
mod hybrid;
#[cfg(feature = "meta")]
mod meta;
#[cfg(any(feature = "nfa-backtrack", feature = "nfa-pikevm"))]
mod nfa;

fn suite() -> anyhow::Result<regex_test::RegexTests> {
    let _ = env_logger::try_init();

    let mut tests = regex_test::RegexTests::new();
    macro_rules! load {
        ($name:expr) => {{
            const DATA: &[u8] =
                include_bytes!(concat!("../../testdata/", $name, ".toml"));
            tests.load_slice($name, DATA)?;
        }};
    }

    load!("anchored");
    load!("bytes");
    load!("crazy");
    load!("crlf");
    load!("earliest");
    load!("empty");
    load!("expensive");
    load!("flags");
    load!("iter");
    load!("leftmost-all");
    load!("line-terminator");
    load!("misc");
    load!("multiline");
    load!("no-unicode");
    load!("overlapping");
    load!("regression");
    load!("set");
    load!("substring");
    load!("unicode");
    load!("utf8");
    load!("word-boundary");
    load!("fowler/basic");
    load!("fowler/nullsubexpr");
    load!("fowler/repetition");

    Ok(tests)
}

/// Configure a regex_automata::Input with the given test configuration.
fn create_input<'h>(
    test: &'h regex_test::RegexTest,
) -> regex_automata::Input<'h> {
    use regex_automata::Anchored;

    let bounds = test.bounds();
    let anchored = if test.anchored() { Anchored::Yes } else { Anchored::No };
    regex_automata::Input::new(test.haystack())
        .range(bounds.start..bounds.end)
        .anchored(anchored)
}

/// Convert capture matches into the test suite's capture values.
///
/// The given captures must represent a valid match, where the first capturing
/// group has a non-None span. Otherwise this panics.
fn testify_captures(
    caps: &regex_automata::util::captures::Captures,
) -> regex_test::Captures {
    assert!(caps.is_match(), "expected captures to represent a match");
    let spans = caps.iter().map(|group| {
        group.map(|m| regex_test::Span { start: m.start, end: m.end })
    });
    // These unwraps are OK because we assume our 'caps' represents a match,
    // and a match always gives a non-zero number of groups with the first
    // group being non-None.
    regex_test::Captures::new(caps.pattern().unwrap().as_usize(), spans)
        .unwrap()
}

/// Convert a test harness match kind to a regex-automata match kind. If
/// regex-automata doesn't support the harness kind, then `None` is returned.
fn untestify_kind(
    kind: regex_test::MatchKind,
) -> Option<regex_automata::MatchKind> {
    match kind {
        regex_test::MatchKind::All => Some(regex_automata::MatchKind::All),
        regex_test::MatchKind::LeftmostFirst => {
            Some(regex_automata::MatchKind::LeftmostFirst)
        }
        regex_test::MatchKind::LeftmostLongest => None,
    }
}