vendor/regex-automata/tests/nfa/thompson/pikevm/suite.rs


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109

use regex_automata::{
    nfa::thompson::{
        self,
        pikevm::{self, PikeVM},
    },
    MatchKind, SyntaxConfig,
};
use regex_syntax as syntax;

use regex_test::{
    bstr::{BString, ByteSlice},
    CompiledRegex, Match, MatchKind as TestMatchKind, RegexTest, RegexTests,
    SearchKind as TestSearchKind, TestResult, TestRunner,
};

use crate::{suite, Result};

/// Tests the default configuration of the hybrid NFA/DFA.
#[test]
fn default() -> Result<()> {
    let builder = PikeVM::builder();
    TestRunner::new()?.test_iter(suite()?.iter(), compiler(builder)).assert();
    Ok(())
}

fn compiler(
    mut builder: pikevm::Builder,
) -> impl FnMut(&RegexTest, &[BString]) -> Result<CompiledRegex> {
    move |test, regexes| {
        let regexes = regexes
            .iter()
            .map(|r| r.to_str().map(|s| s.to_string()))
            .collect::<std::result::Result<Vec<String>, _>>()?;
        if !configure_pikevm_builder(test, &mut builder) {
            return Ok(CompiledRegex::skip());
        }
        let re = builder.build_many(&regexes)?;
        let mut cache = re.create_cache();
        Ok(CompiledRegex::compiled(move |test| -> Vec<TestResult> {
            run_test(&re, &mut cache, test)
        }))
    }
}

fn run_test(
    re: &PikeVM,
    cache: &mut pikevm::Cache,
    test: &RegexTest,
) -> Vec<TestResult> {
    // let is_match = if re.is_match(cache, test.input()) {
    // TestResult::matched()
    // } else {
    // TestResult::no_match()
    // };
    // let is_match = is_match.name("is_match");

    let find_matches = match test.search_kind() {
        TestSearchKind::Earliest => {
            TestResult::skip().name("find_earliest_iter")
        }
        TestSearchKind::Leftmost => {
            let it = re
                .find_leftmost_iter(cache, test.input())
                .take(test.match_limit().unwrap_or(std::usize::MAX))
                .map(|m| Match {
                    id: m.pattern().as_usize(),
                    start: m.start(),
                    end: m.end(),
                });
            TestResult::matches(it).name("find_leftmost_iter")
        }
        TestSearchKind::Overlapping => {
            TestResult::skip().name("find_overlapping_iter")
        }
    };
    // vec![is_match, find_matches]
    vec![find_matches]
}

/// Configures the given regex builder with all relevant settings on the given
/// regex test.
///
/// If the regex test has a setting that is unsupported, then this returns
/// false (implying the test should be skipped).
fn configure_pikevm_builder(
    test: &RegexTest,
    builder: &mut pikevm::Builder,
) -> bool {
    let pikevm_config =
        PikeVM::config().anchored(test.anchored()).utf8(test.utf8());
    builder
        .configure(pikevm_config)
        .syntax(config_syntax(test))
        .thompson(config_thompson(test));
    true
}

/// Configuration of a Thompson NFA compiler from a regex test.
fn config_thompson(test: &RegexTest) -> thompson::Config {
    thompson::Config::new().utf8(test.utf8())
}

/// Configuration of the regex parser from a regex test.
fn config_syntax(test: &RegexTest) -> SyntaxConfig {
    SyntaxConfig::new()
        .case_insensitive(test.case_insensitive())
        .unicode(test.unicode())
        .utf8(test.utf8())
}