diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:18:32 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:18:32 +0000 |
commit | 4547b622d8d29df964fa2914213088b148c498fc (patch) | |
tree | 9fc6b25f3c3add6b745be9a2400a6e96140046e9 /vendor/regex-automata/tests | |
parent | Releasing progress-linux version 1.66.0+dfsg1-1~progress7.99u1. (diff) | |
download | rustc-4547b622d8d29df964fa2914213088b148c498fc.tar.xz rustc-4547b622d8d29df964fa2914213088b148c498fc.zip |
Merging upstream version 1.67.1+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/regex-automata/tests')
42 files changed, 8632 insertions, 822 deletions
diff --git a/vendor/regex-automata/tests/collection.rs b/vendor/regex-automata/tests/collection.rs deleted file mode 100644 index 68b03229e..000000000 --- a/vendor/regex-automata/tests/collection.rs +++ /dev/null @@ -1,461 +0,0 @@ -use std::collections::BTreeMap; -use std::env; -use std::fmt::{self, Write}; -use std::thread; - -use regex; -use regex_automata::{DenseDFA, ErrorKind, Regex, RegexBuilder, StateID, DFA}; -use serde_bytes; -use toml; - -macro_rules! load { - ($col:ident, $path:expr) => { - $col.extend(RegexTests::load( - concat!("../data/tests/", $path), - include_bytes!(concat!("../data/tests/", $path)), - )); - }; -} - -lazy_static! { - pub static ref SUITE: RegexTestCollection = { - let mut col = RegexTestCollection::new(); - load!(col, "fowler/basic.toml"); - load!(col, "fowler/nullsubexpr.toml"); - load!(col, "fowler/repetition.toml"); - load!(col, "fowler/repetition-long.toml"); - load!(col, "crazy.toml"); - load!(col, "flags.toml"); - load!(col, "iter.toml"); - load!(col, "no-unicode.toml"); - load!(col, "unicode.toml"); - col - }; -} - -#[derive(Clone, Debug)] -pub struct RegexTestCollection { - pub by_name: BTreeMap<String, RegexTest>, -} - -#[derive(Clone, Debug, Deserialize)] -pub struct RegexTests { - pub tests: Vec<RegexTest>, -} - -#[derive(Clone, Debug, Deserialize)] -pub struct RegexTest { - pub name: String, - #[serde(default)] - pub options: Vec<RegexTestOption>, - pub pattern: String, - #[serde(with = "serde_bytes")] - pub input: Vec<u8>, - #[serde(rename = "matches")] - pub matches: Vec<Match>, - #[serde(default)] - pub captures: Vec<Option<Match>>, - #[serde(default)] - pub fowler_line_number: Option<u64>, -} - -#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq)] -#[serde(rename_all = "kebab-case")] -pub enum RegexTestOption { - Anchored, - CaseInsensitive, - NoUnicode, - Escaped, - #[serde(rename = "invalid-utf8")] - InvalidUTF8, -} - -#[derive(Clone, Copy, Deserialize, Eq, PartialEq)] -pub struct Match { - pub start: usize, - pub end: usize, -} - -impl RegexTestCollection { - fn new() -> RegexTestCollection { - RegexTestCollection { by_name: BTreeMap::new() } - } - - fn extend(&mut self, tests: RegexTests) { - for test in tests.tests { - let name = test.name.clone(); - if self.by_name.contains_key(&name) { - panic!("found duplicate test {}", name); - } - self.by_name.insert(name, test); - } - } - - pub fn tests(&self) -> Vec<&RegexTest> { - self.by_name.values().collect() - } -} - -impl RegexTests { - fn load(path: &str, slice: &[u8]) -> RegexTests { - let mut data: RegexTests = toml::from_slice(slice) - .expect(&format!("failed to load {}", path)); - for test in &mut data.tests { - if test.options.contains(&RegexTestOption::Escaped) { - test.input = unescape_bytes(&test.input); - } - } - data - } -} - -#[derive(Debug)] -pub struct RegexTester { - asserted: bool, - results: RegexTestResults, - skip_expensive: bool, - whitelist: Vec<regex::Regex>, - blacklist: Vec<regex::Regex>, -} - -impl Drop for RegexTester { - fn drop(&mut self) { - // If we haven't asserted yet, then the test is probably buggy, so - // fail it. But if we're already panicking (e.g., a bug in the regex - // engine), then don't double-panic, which causes an immediate abort. - if !thread::panicking() && !self.asserted { - panic!("must call RegexTester::assert at end of test"); - } - } -} - -impl RegexTester { - pub fn new() -> RegexTester { - let mut tester = RegexTester { - asserted: false, - results: RegexTestResults::default(), - skip_expensive: false, - whitelist: vec![], - blacklist: vec![], - }; - for x in env::var("REGEX_TEST").unwrap_or("".to_string()).split(",") { - let x = x.trim(); - if x.is_empty() { - continue; - } - if x.starts_with("-") { - tester = tester.blacklist(&x[1..]); - } else { - tester = tester.whitelist(x); - } - } - tester - } - - pub fn skip_expensive(mut self) -> RegexTester { - self.skip_expensive = true; - self - } - - pub fn whitelist(mut self, name: &str) -> RegexTester { - self.whitelist.push(regex::Regex::new(name).unwrap()); - self - } - - pub fn blacklist(mut self, name: &str) -> RegexTester { - self.blacklist.push(regex::Regex::new(name).unwrap()); - self - } - - pub fn assert(&mut self) { - self.asserted = true; - self.results.assert(); - } - - pub fn build_regex<S: StateID>( - &self, - mut builder: RegexBuilder, - test: &RegexTest, - ) -> Option<Regex<DenseDFA<Vec<S>, S>>> { - if self.skip(test) { - return None; - } - self.apply_options(test, &mut builder); - - match builder.build_with_size::<S>(&test.pattern) { - Ok(re) => Some(re), - Err(err) => { - if let ErrorKind::Unsupported(_) = *err.kind() { - None - } else { - panic!( - "failed to build {:?} with pattern '{:?}': {}", - test.name, test.pattern, err - ); - } - } - } - } - - pub fn test_all<'a, I, T>(&mut self, builder: RegexBuilder, tests: I) - where - I: IntoIterator<IntoIter = T, Item = &'a RegexTest>, - T: Iterator<Item = &'a RegexTest>, - { - for test in tests { - let builder = builder.clone(); - let re: Regex = match self.build_regex(builder, test) { - None => continue, - Some(re) => re, - }; - self.test(test, &re); - } - } - - pub fn test<'a, D: DFA>(&mut self, test: &RegexTest, re: &Regex<D>) { - self.test_is_match(test, re); - self.test_find(test, re); - // Some tests (namely, fowler) are designed only to detect the - // first match even if there are more subsequent matches. To that - // end, we only test match iteration when the number of matches - // expected is not 1, or if the test name has 'iter' in it. - if test.name.contains("iter") || test.matches.len() != 1 { - self.test_find_iter(test, re); - } - } - - pub fn test_is_match<'a, D: DFA>( - &mut self, - test: &RegexTest, - re: &Regex<D>, - ) { - self.asserted = false; - - let got = re.is_match(&test.input); - let expected = test.matches.len() >= 1; - if got == expected { - self.results.succeeded.push(test.clone()); - return; - } - self.results.failed.push(RegexTestFailure { - test: test.clone(), - kind: RegexTestFailureKind::IsMatch, - }); - } - - pub fn test_find<'a, D: DFA>(&mut self, test: &RegexTest, re: &Regex<D>) { - self.asserted = false; - - let got = - re.find(&test.input).map(|(start, end)| Match { start, end }); - if got == test.matches.get(0).map(|&m| m) { - self.results.succeeded.push(test.clone()); - return; - } - self.results.failed.push(RegexTestFailure { - test: test.clone(), - kind: RegexTestFailureKind::Find { got }, - }); - } - - pub fn test_find_iter<'a, D: DFA>( - &mut self, - test: &RegexTest, - re: &Regex<D>, - ) { - self.asserted = false; - - let got: Vec<Match> = re - .find_iter(&test.input) - .map(|(start, end)| Match { start, end }) - .collect(); - if got == test.matches { - self.results.succeeded.push(test.clone()); - return; - } - self.results.failed.push(RegexTestFailure { - test: test.clone(), - kind: RegexTestFailureKind::FindIter { got }, - }); - } - - fn skip(&self, test: &RegexTest) -> bool { - if self.skip_expensive { - if test.name.starts_with("repetition-long") { - return true; - } - } - if !self.blacklist.is_empty() { - if self.blacklist.iter().any(|re| re.is_match(&test.name)) { - return true; - } - } - if !self.whitelist.is_empty() { - if !self.whitelist.iter().any(|re| re.is_match(&test.name)) { - return true; - } - } - false - } - - fn apply_options(&self, test: &RegexTest, builder: &mut RegexBuilder) { - for opt in &test.options { - match *opt { - RegexTestOption::Anchored => { - builder.anchored(true); - } - RegexTestOption::CaseInsensitive => { - builder.case_insensitive(true); - } - RegexTestOption::NoUnicode => { - builder.unicode(false); - } - RegexTestOption::Escaped => {} - RegexTestOption::InvalidUTF8 => { - builder.allow_invalid_utf8(true); - } - } - } - } -} - -#[derive(Clone, Debug, Default)] -pub struct RegexTestResults { - /// Tests that succeeded. - pub succeeded: Vec<RegexTest>, - /// Failed tests, indexed by group name. - pub failed: Vec<RegexTestFailure>, -} - -#[derive(Clone, Debug)] -pub struct RegexTestFailure { - test: RegexTest, - kind: RegexTestFailureKind, -} - -#[derive(Clone, Debug)] -pub enum RegexTestFailureKind { - IsMatch, - Find { got: Option<Match> }, - FindIter { got: Vec<Match> }, -} - -impl RegexTestResults { - pub fn assert(&self) { - if self.failed.is_empty() { - return; - } - let failures = self - .failed - .iter() - .map(|f| f.to_string()) - .collect::<Vec<String>>() - .join("\n\n"); - panic!( - "found {} failures:\n{}\n{}\n{}\n\n\ - Set the REGEX_TEST environment variable to filter tests, \n\ - e.g., REGEX_TEST=crazy-misc,-crazy-misc2 runs every test \n\ - whose name contains crazy-misc but not crazy-misc2\n\n", - self.failed.len(), - "~".repeat(79), - failures.trim(), - "~".repeat(79) - ) - } -} - -impl fmt::Display for RegexTestFailure { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!( - f, - "{}: {}\n \ - options: {:?}\n \ - pattern: {}\n \ - pattern (escape): {}\n \ - input: {}\n \ - input (escape): {}\n \ - input (hex): {}", - self.test.name, - self.kind.fmt(&self.test)?, - self.test.options, - self.test.pattern, - escape_default(&self.test.pattern), - nice_raw_bytes(&self.test.input), - escape_bytes(&self.test.input), - hex_bytes(&self.test.input) - ) - } -} - -impl RegexTestFailureKind { - fn fmt(&self, test: &RegexTest) -> Result<String, fmt::Error> { - let mut buf = String::new(); - match *self { - RegexTestFailureKind::IsMatch => { - if let Some(&m) = test.matches.get(0) { - write!(buf, "expected match (at {}), but none found", m)? - } else { - write!(buf, "expected no match, but found a match")? - } - } - RegexTestFailureKind::Find { got } => write!( - buf, - "expected {:?}, but found {:?}", - test.matches.get(0), - got - )?, - RegexTestFailureKind::FindIter { ref got } => write!( - buf, - "expected {:?}, but found {:?}", - test.matches, got - )?, - } - Ok(buf) - } -} - -impl fmt::Display for Match { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "({}, {})", self.start, self.end) - } -} - -impl fmt::Debug for Match { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "({}, {})", self.start, self.end) - } -} - -fn nice_raw_bytes(bytes: &[u8]) -> String { - use std::str; - - match str::from_utf8(bytes) { - Ok(s) => s.to_string(), - Err(_) => escape_bytes(bytes), - } -} - -fn escape_bytes(bytes: &[u8]) -> String { - use std::ascii; - - let escaped = bytes - .iter() - .flat_map(|&b| ascii::escape_default(b)) - .collect::<Vec<u8>>(); - String::from_utf8(escaped).unwrap() -} - -fn hex_bytes(bytes: &[u8]) -> String { - bytes.iter().map(|&b| format!(r"\x{:02X}", b)).collect() -} - -fn escape_default(s: &str) -> String { - s.chars().flat_map(|c| c.escape_default()).collect() -} - -fn unescape_bytes(bytes: &[u8]) -> Vec<u8> { - use std::str; - use unescape::unescape; - - unescape(&str::from_utf8(bytes).expect("all input must be valid UTF-8")) -} diff --git a/vendor/regex-automata/tests/data/bytes.toml b/vendor/regex-automata/tests/data/bytes.toml new file mode 100644 index 000000000..eb3a0942e --- /dev/null +++ b/vendor/regex-automata/tests/data/bytes.toml @@ -0,0 +1,235 @@ +# These are tests specifically crafted for regexes that can match arbitrary +# bytes. In some cases, we also test the Unicode variant as well, just because +# it's good sense to do so. But also, these tests aren't really about Unicode, +# but whether matches are only reported at valid UTF-8 boundaries. For most +# tests in this entire collection, utf8 = true. But for these tests, we use +# utf8 = false. + +[[tests]] +name = "word-boundary-ascii" +regex = ' \b' +input = " δ" +matches = [] +unicode = false +utf8 = false + +[[tests]] +name = "word-boundary-unicode" +regex = ' \b' +input = " δ" +matches = [[0, 1]] +unicode = true +utf8 = false + +[[tests]] +name = "word-boundary-ascii-not" +regex = ' \B' +input = " δ" +matches = [[0, 1]] +unicode = false +utf8 = false + +[[tests]] +name = "word-boundary-unicode-not" +regex = ' \B' +input = " δ" +matches = [] +unicode = true +utf8 = false + +[[tests]] +name = "perl-word-ascii" +regex = '\w+' +input = "aδ" +matches = [[0, 1]] +unicode = false +utf8 = false + +[[tests]] +name = "perl-word-unicode" +regex = '\w+' +input = "aδ" +matches = [[0, 3]] +unicode = true +utf8 = false + +[[tests]] +name = "perl-decimal-ascii" +regex = '\d+' +input = "1२३9" +matches = [[0, 1], [7, 8]] +unicode = false +utf8 = false + +[[tests]] +name = "perl-decimal-unicode" +regex = '\d+' +input = "1२३9" +matches = [[0, 8]] +unicode = true +utf8 = false + +[[tests]] +name = "perl-whitespace-ascii" +regex = '\s+' +input = " \u1680" +matches = [[0, 1]] +unicode = false +utf8 = false + +[[tests]] +name = "perl-whitespace-unicode" +regex = '\s+' +input = " \u1680" +matches = [[0, 4]] +unicode = true +utf8 = false + +# The first `(.+)` matches two Unicode codepoints, but can't match the 5th +# byte, which isn't valid UTF-8. The second (byte based) `(.+)` takes over and +# matches. +[[tests]] +name = "mixed-dot" +regex = '(.+)(?-u)(.+)' +input = '\xCE\x93\xCE\x94\xFF' +captures = [ + [[0, 5], [0, 4], [4, 5]], +] +unescape = true +unicode = true +utf8 = false + +[[tests]] +name = "case-one-ascii" +regex = 'a' +input = "A" +matches = [[0, 1]] +case_insensitive = true +unicode = false +utf8 = false + +[[tests]] +name = "case-one-unicode" +regex = 'a' +input = "A" +matches = [[0, 1]] +case_insensitive = true +unicode = true +utf8 = false + +[[tests]] +name = "case-class-simple-ascii" +regex = '[a-z]+' +input = "AaAaA" +matches = [[0, 5]] +case_insensitive = true +unicode = false +utf8 = false + +[[tests]] +name = "case-class-ascii" +regex = '[a-z]+' +input = "aA\u212AaA" +matches = [[0, 2], [5, 7]] +case_insensitive = true +unicode = false +utf8 = false + +[[tests]] +name = "case-class-unicode" +regex = '[a-z]+' +input = "aA\u212AaA" +matches = [[0, 7]] +case_insensitive = true +unicode = true +utf8 = false + +[[tests]] +name = "negate-ascii" +regex = '[^a]' +input = "δ" +matches = [[0, 1], [1, 2]] +unicode = false +utf8 = false + +[[tests]] +name = "negate-unicode" +regex = '[^a]' +input = "δ" +matches = [[0, 2]] +unicode = true +utf8 = false + +# When utf8=true, this won't match, because the implicit '.*?' prefix is +# Unicode aware and will refuse to match through invalid UTF-8 bytes. +[[tests]] +name = "dotstar-prefix-ascii" +regex = 'a' +input = '\xFFa' +matches = [[1, 2]] +unescape = true +unicode = false +utf8 = false + +[[tests]] +name = "dotstar-prefix-unicode" +regex = 'a' +input = '\xFFa' +matches = [[1, 2]] +unescape = true +unicode = true +utf8 = false + +[[tests]] +name = "null-bytes" +regex = '(?P<cstr>[^\x00]+)\x00' +input = 'foo\x00' +captures = [ + [[0, 4], [0, 3]], +] +unescape = true +unicode = false +utf8 = false + +[[tests]] +name = "invalid-utf8-anchor-100" +regex = '\xCC?^' +input = '\x8d#;\x1a\xa4s3\x05foobarX\\\x0f0t\xe4\x9b\xa4' +matches = [[0, 0]] +unescape = true +unicode = false +utf8 = false + +[[tests]] +name = "invalid-utf8-anchor-200" +regex = '^\xf7|4\xff\d\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a##########[] d\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a##########\[] #####\x80\S7|$' +input = '\x8d#;\x1a\xa4s3\x05foobarX\\\x0f0t\xe4\x9b\xa4' +matches = [[22, 22]] +unescape = true +unicode = false +utf8 = false + +[[tests]] +name = "invalid-utf8-anchor-300" +regex = '^|ddp\xff\xffdddddlQd@\x80' +input = '\x8d#;\x1a\xa4s3\x05foobarX\\\x0f0t\xe4\x9b\xa4' +matches = [[0, 0]] +unescape = true +unicode = false +utf8 = false + +[[tests]] +name = "word-boundary-ascii-100" +regex = '\Bx\B' +input = "áxβ" +matches = [] +unicode = false +utf8 = false + +[[tests]] +name = "word-boundary-ascii-200" +regex = '\B' +input = "0\U0007EF5E" +matches = [[2, 2], [3, 3], [4, 4], [5, 5]] +unicode = false +utf8 = false diff --git a/vendor/regex-automata/tests/data/crazy.toml b/vendor/regex-automata/tests/data/crazy.toml new file mode 100644 index 000000000..549b86cca --- /dev/null +++ b/vendor/regex-automata/tests/data/crazy.toml @@ -0,0 +1,302 @@ +# TODO: There are still a couple of manually written tests in crazy.rs. + +[[tests]] +name = "ranges" +regex = '(?-u)\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b' +input = "num: 255" +matches = [[5, 8]] + +[[tests]] +name = "ranges-not" +regex = '(?-u)\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b' +input = "num: 256" +matches = [] + +[[tests]] +name = "float1" +regex = '[-+]?[0-9]*\.?[0-9]+' +input = "0.1" +matches = [[0, 3]] + +[[tests]] +name = "float2" +regex = '[-+]?[0-9]*\.?[0-9]+' +input = "0.1.2" +matches = [[0, 3]] +match_limit = 1 + +[[tests]] +name = "float3" +regex = '[-+]?[0-9]*\.?[0-9]+' +input = "a1.2" +matches = [[1, 4]] + +[[tests]] +name = "float4" +regex = '[-+]?[0-9]*\.?[0-9]+' +input = "1.a" +matches = [[0, 1]] + +[[tests]] +name = "float5" +regex = '^[-+]?[0-9]*\.?[0-9]+$' +input = "1.a" +matches = [] + +[[tests]] +name = "email" +regex = '(?i-u)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b' +input = "mine is jam.slam@gmail.com " +matches = [[8, 26]] + +[[tests]] +name = "email-not" +regex = '(?i-u)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b' +input = "mine is jam.slam@gmail " +matches = [] + +[[tests]] +name = "email-big" +regex = '''[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?''' +input = "mine is jam.slam@gmail.com " +matches = [[8, 26]] + +[[tests]] +name = "date1" +regex = '(?-u)^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$' +input = "1900-01-01" +matches = [[0, 10]] + +[[tests]] +name = "date2" +regex = '(?-u)^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$' +input = "1900-00-01" +matches = [] + +[[tests]] +name = "date3" +regex = '(?-u)^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$' +input = "1900-13-01" +matches = [] + +[[tests]] +name = "start-end-empty" +regex = '^$' +input = "" +matches = [[0, 0]] + +[[tests]] +name = "start-end-empty-rev" +regex = '$^' +input = "" +matches = [[0, 0]] + +[[tests]] +name = "start-end-empty-many-1" +regex = '^$^$^$' +input = "" +matches = [[0, 0]] + +[[tests]] +name = "start-end-empty-many-2" +regex = '^^^$$$' +input = "" +matches = [[0, 0]] + +[[tests]] +name = "start-end-empty-rep" +regex = '(?:^$)*' +input = "a\nb\nc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] + +[[tests]] +name = "start-end-empty-rep-rev" +regex = '(?:$^)*' +input = "a\nb\nc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] + +[[tests]] +name = "neg-class-letter" +regex = '[^ac]' +input = "acx" +matches = [[2, 3]] + +[[tests]] +name = "neg-class-letter-comma" +regex = '[^a,]' +input = "a,x" +matches = [[2, 3]] + +[[tests]] +name = "neg-class-letter-space" +regex = '[^a[:space:]]' +input = "a x" +matches = [[2, 3]] + +[[tests]] +name = "neg-class-comma" +regex = '[^,]' +input = ",,x" +matches = [[2, 3]] + +[[tests]] +name = "neg-class-space" +regex = '[^[:space:]]' +input = " a" +matches = [[1, 2]] + +[[tests]] +name = "neg-class-space-comma" +regex = '[^,[:space:]]' +input = ", a" +matches = [[2, 3]] + +[[tests]] +name = "neg-class-comma-space" +regex = '[^[:space:],]' +input = " ,a" +matches = [[2, 3]] + +[[tests]] +name = "neg-class-ascii" +regex = '[^[:alpha:]Z]' +input = "A1" +matches = [[1, 2]] + +[[tests]] +name = "lazy-many-many" +regex = '((?:.*)*?)=' +input = "a=b" +matches = [[0, 2]] + +[[tests]] +name = "lazy-many-optional" +regex = '((?:.?)*?)=' +input = "a=b" +matches = [[0, 2]] + +[[tests]] +name = "lazy-one-many-many" +regex = '((?:.*)+?)=' +input = "a=b" +matches = [[0, 2]] + +[[tests]] +name = "lazy-one-many-optional" +regex = '((?:.?)+?)=' +input = "a=b" +matches = [[0, 2]] + +[[tests]] +name = "lazy-range-min-many" +regex = '((?:.*){1,}?)=' +input = "a=b" +matches = [[0, 2]] + +[[tests]] +name = "lazy-range-many" +regex = '((?:.*){1,2}?)=' +input = "a=b" +matches = [[0, 2]] + +[[tests]] +name = "greedy-many-many" +regex = '((?:.*)*)=' +input = "a=b" +matches = [[0, 2]] + +[[tests]] +name = "greedy-many-optional" +regex = '((?:.?)*)=' +input = "a=b" +matches = [[0, 2]] + +[[tests]] +name = "greedy-one-many-many" +regex = '((?:.*)+)=' +input = "a=b" +matches = [[0, 2]] + +[[tests]] +name = "greedy-one-many-optional" +regex = '((?:.?)+)=' +input = "a=b" +matches = [[0, 2]] + +[[tests]] +name = "greedy-range-min-many" +regex = '((?:.*){1,})=' +input = "a=b" +matches = [[0, 2]] + +[[tests]] +name = "greedy-range-many" +regex = '((?:.*){1,2})=' +input = "a=b" +matches = [[0, 2]] + +[[tests]] +name = "empty1" +regex = '' +input = "" +matches = [[0, 0]] + +[[tests]] +name = "empty2" +regex = '' +input = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[tests]] +name = "empty3" +regex = '()' +input = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[tests]] +name = "empty4" +regex = '()*' +input = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[tests]] +name = "empty5" +regex = '()+' +input = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[tests]] +name = "empty6" +regex = '()?' +input = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[tests]] +name = "empty7" +regex = '()()' +input = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[tests]] +name = "empty8" +regex = '()+|z' +input = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[tests]] +name = "empty9" +regex = 'z|()+' +input = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[tests]] +name = "empty10" +regex = '()+|b' +input = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[tests]] +name = "empty11" +regex = 'b|()+' +input = "abc" +matches = [[0, 0], [1, 2], [3, 3]] diff --git a/vendor/regex-automata/tests/data/earliest.toml b/vendor/regex-automata/tests/data/earliest.toml new file mode 100644 index 000000000..6714a850b --- /dev/null +++ b/vendor/regex-automata/tests/data/earliest.toml @@ -0,0 +1,48 @@ +[[tests]] +name = "no-greedy-100" +regex = 'a+' +input = "aaa" +matches = [[0, 1], [1, 2], [2, 3]] +search_kind = "earliest" + +[[tests]] +name = "no-greedy-200" +regex = 'abc+' +input = "zzzabccc" +matches = [[3, 6]] +search_kind = "earliest" + +[[tests]] +name = "is-ungreedy" +regex = 'a+?' +input = "aaa" +matches = [[0, 1], [1, 2], [2, 3]] +search_kind = "earliest" + +[[tests]] +name = "look-start-test" +regex = '^(abc|a)' +input = "abc" +matches = [[0, 1]] +search_kind = "earliest" + +[[tests]] +name = "look-end-test" +regex = '(abc|a)$' +input = "abc" +matches = [[0, 3]] +search_kind = "earliest" + +[[tests]] +name = "no-leftmost-first-100" +regex = 'abc|a' +input = "abc" +matches = [[0, 1]] +search_kind = "earliest" + +[[tests]] +name = "no-leftmost-first-200" +regex = 'aba|a' +input = "aba" +matches = [[0, 1], [2, 3]] +search_kind = "earliest" diff --git a/vendor/regex-automata/tests/data/empty.toml b/vendor/regex-automata/tests/data/empty.toml new file mode 100644 index 000000000..ad703e601 --- /dev/null +++ b/vendor/regex-automata/tests/data/empty.toml @@ -0,0 +1,113 @@ +[[tests]] +name = "100" +regex = "|b" +input = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[tests]] +name = "110" +regex = "b|" +input = "abc" +matches = [[0, 0], [1, 2], [3, 3]] + +[[tests]] +name = "120" +regex = "|z" +input = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[tests]] +name = "130" +regex = "z|" +input = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[tests]] +name = "200" +regex = "|" +input = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[tests]] +name = "210" +regex = "||" +input = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[tests]] +name = "220" +regex = "||b" +input = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[tests]] +name = "230" +regex = "b||" +input = "abc" +matches = [[0, 0], [1, 2], [3, 3]] + +[[tests]] +name = "240" +regex = "||z" +input = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[tests]] +name = "300" +regex = "(?:)|b" +input = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[tests]] +name = "310" +regex = "b|(?:)" +input = "abc" +matches = [[0, 0], [1, 2], [3, 3]] + +[[tests]] +name = "320" +regex = "(?:|)" +input = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[tests]] +name = "330" +regex = "(?:|)|z" +input = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[tests]] +name = "400" +regex = "a(?:)|b" +input = "abc" +matches = [[0, 1], [1, 2]] + +[[tests]] +name = "500" +regex = "" +input = "" +matches = [[0, 0]] + +[[tests]] +name = "510" +regex = "" +input = "a" +matches = [[0, 0], [1, 1]] + +[[tests]] +name = "520" +regex = "" +input = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[tests]] +name = "600" +regex = '(|a)*' +input = "aaa" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[tests]] +name = "610" +regex = '(|a)+' +input = "aaa" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] diff --git a/vendor/regex-automata/tests/data/expensive.toml b/vendor/regex-automata/tests/data/expensive.toml new file mode 100644 index 000000000..e062e3902 --- /dev/null +++ b/vendor/regex-automata/tests/data/expensive.toml @@ -0,0 +1,12 @@ +# These represent tests that may be expensive to run on some regex engines. For +# example, tests that build a full DFA ahead of time and minimize it can take a +# horrendously long time on regexes that are large (or result in an explosion +# in the number of states). We group these tests together so that such engines +# can simply skip these tests. + +# See: https://github.com/rust-lang/regex/issues/98 +[[tests]] +name = "regression-many-repeat-no-stack-overflow" +regex = '^.{1,2500}' +input = "a" +matches = [[0, 1]] diff --git a/vendor/regex-automata/tests/data/flags.toml b/vendor/regex-automata/tests/data/flags.toml new file mode 100644 index 000000000..2b631ef23 --- /dev/null +++ b/vendor/regex-automata/tests/data/flags.toml @@ -0,0 +1,67 @@ +[[tests]] +name = "1" +regex = "(?i)abc" +input = "ABC" +matches = [[0, 3]] + +[[tests]] +name = "2" +regex = "(?i)a(?-i)bc" +input = "Abc" +matches = [[0, 3]] + +[[tests]] +name = "3" +regex = "(?i)a(?-i)bc" +input = "ABC" +matches = [] + +[[tests]] +name = "4" +regex = "(?is)a." +input = "A\n" +matches = [[0, 2]] + +[[tests]] +name = "5" +regex = "(?is)a.(?-is)a." +input = "A\nab" +matches = [[0, 4]] + +[[tests]] +name = "6" +regex = "(?is)a.(?-is)a." +input = "A\na\n" +matches = [] + +[[tests]] +name = "7" +regex = "(?is)a.(?-is:a.)?" +input = "A\na\n" +matches = [[0, 2]] +match_limit = 1 + +[[tests]] +name = "8" +regex = "(?U)a+" +input = "aa" +matches = [[0, 1]] +match_limit = 1 + +[[tests]] +name = "9" +regex = "(?U)a+?" +input = "aa" +matches = [[0, 2]] + +[[tests]] +name = "10" +regex = "(?U)(?-U)a+" +input = "aa" +matches = [[0, 2]] + +[[tests]] +name = "11" +regex = '(?m)(?:^\d+$\n?)+' +input = "123\n456\n789" +matches = [[0, 11]] diff --git a/vendor/regex-automata/tests/data/fowler/basic.toml b/vendor/regex-automata/tests/data/fowler/basic.toml new file mode 100644 index 000000000..c965f26ff --- /dev/null +++ b/vendor/regex-automata/tests/data/fowler/basic.toml @@ -0,0 +1,1638 @@ +# !!! DO NOT EDIT !!! +# Automatically generated by scripts/fowler-to-toml. +# Numbers in the test names correspond to the line number of the test from +# the original dat file. + +[[tests]] +name = "basic3" +regex = '''abracadabra$''' +input = '''abracadabracadabra''' +captures = [[[7, 18]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic4" +regex = '''a...b''' +input = '''abababbb''' +captures = [[[2, 7]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic5" +regex = '''XXXXXX''' +input = '''..XXXXXX''' +captures = [[[2, 8]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic6" +regex = '''\)''' +input = '''()''' +captures = [[[1, 2]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic7" +regex = '''a]''' +input = '''a]a''' +captures = [[[0, 2]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic9" +regex = '''\}''' +input = '''}''' +captures = [[[0, 1]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic10" +regex = '''\]''' +input = ''']''' +captures = [[[0, 1]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic12" +regex = ''']''' +input = ''']''' +captures = [[[0, 1]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic15" +regex = '''^a''' +input = '''ax''' +captures = [[[0, 1]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic16" +regex = '''\^a''' +input = '''a^a''' +captures = [[[1, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic17" +regex = '''a\^''' +input = '''a^''' +captures = [[[0, 2]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic18" +regex = '''a$''' +input = '''aa''' +captures = [[[1, 2]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic19" +regex = '''a\$''' +input = '''a$''' +captures = [[[0, 2]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic20" +regex = '''^$''' +input = '''''' +captures = [[[0, 0]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic21" +regex = '''$^''' +input = '''''' +captures = [[[0, 0]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic22" +regex = '''a($)''' +input = '''aa''' +captures = [[[1, 2], [2, 2]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic23" +regex = '''a*(^a)''' +input = '''aa''' +captures = [[[0, 1], [0, 1]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic24" +regex = '''(..)*(...)*''' +input = '''a''' +captures = [[[0, 0]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic25" +regex = '''(..)*(...)*''' +input = '''abcd''' +captures = [[[0, 4], [2, 4]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic26" +regex = '''(ab|a)(bc|c)''' +input = '''abc''' +captures = [[[0, 3], [0, 2], [2, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic27" +regex = '''(ab)c|abc''' +input = '''abc''' +captures = [[[0, 3], [0, 2]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic28" +regex = '''a{0}b''' +input = '''ab''' +captures = [[[1, 2]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic29" +regex = '''(a*)(b?)(b+)b{3}''' +input = '''aaabbbbbbb''' +captures = [[[0, 10], [0, 3], [3, 4], [4, 7]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic30" +regex = '''(a*)(b{0,1})(b{1,})b{3}''' +input = '''aaabbbbbbb''' +captures = [[[0, 10], [0, 3], [3, 4], [4, 7]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic32" +regex = '''((a|a)|a)''' +input = '''a''' +captures = [[[0, 1], [0, 1], [0, 1]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic33" +regex = '''(a*)(a|aa)''' +input = '''aaaa''' +captures = [[[0, 4], [0, 3], [3, 4]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic34" +regex = '''a*(a.|aa)''' +input = '''aaaa''' +captures = [[[0, 4], [2, 4]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic35" +regex = '''a(b)|c(d)|a(e)f''' +input = '''aef''' +captures = [[[0, 3], [], [], [1, 2]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic36" +regex = '''(a|b)?.*''' +input = '''b''' +captures = [[[0, 1], [0, 1]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic37" +regex = '''(a|b)c|a(b|c)''' +input = '''ac''' +captures = [[[0, 2], [0, 1]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic38" +regex = '''(a|b)c|a(b|c)''' +input = '''ab''' +captures = [[[0, 2], [], [1, 2]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic39" +regex = '''(a|b)*c|(a|ab)*c''' +input = '''abc''' +captures = [[[0, 3], [1, 2]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic40" +regex = '''(a|b)*c|(a|ab)*c''' +input = '''xc''' +captures = [[[1, 2]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic41" +regex = '''(.a|.b).*|.*(.a|.b)''' +input = '''xa''' +captures = [[[0, 2], [0, 2]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic42" +regex = '''a?(ab|ba)ab''' +input = '''abab''' +captures = [[[0, 4], [0, 2]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic43" +regex = '''a?(ac{0}b|ba)ab''' +input = '''abab''' +captures = [[[0, 4], [0, 2]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic44" +regex = '''ab|abab''' +input = '''abbabab''' +captures = [[[0, 2]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic45" +regex = '''aba|bab|bba''' +input = '''baaabbbaba''' +captures = [[[5, 8]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic46" +regex = '''aba|bab''' +input = '''baaabbbaba''' +captures = [[[6, 9]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic47" +regex = '''(aa|aaa)*|(a|aaaaa)''' +input = '''aa''' +captures = [[[0, 2], [0, 2]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic48" +regex = '''(a.|.a.)*|(a|.a...)''' +input = '''aa''' +captures = [[[0, 2], [0, 2]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic49" +regex = '''ab|a''' +input = '''xabc''' +captures = [[[1, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic50" +regex = '''ab|a''' +input = '''xxabc''' +captures = [[[2, 4]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic51" +regex = '''(Ab|cD)*''' +input = '''aBcD''' +captures = [[[0, 4], [2, 4]]] +match_limit = 1 +unescape = true +case_insensitive = true + +[[tests]] +name = "basic52" +regex = '''[^-]''' +input = '''--a''' +captures = [[[2, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic53" +regex = '''[a-]*''' +input = '''--a''' +captures = [[[0, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic54" +regex = '''[a-m-]*''' +input = '''--amoma--''' +captures = [[[0, 4]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic55" +regex = ''':::1:::0:|:::1:1:0:''' +input = ''':::0:::1:::1:::0:''' +captures = [[[8, 17]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic56" +regex = ''':::1:::0:|:::1:1:1:''' +input = ''':::0:::1:::1:::0:''' +captures = [[[8, 17]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic57" +regex = '''[[:upper:]]''' +input = '''A''' +captures = [[[0, 1]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic58" +regex = '''[[:lower:]]+''' +input = '''`az{''' +captures = [[[1, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic59" +regex = '''[[:upper:]]+''' +input = '''@AZ[''' +captures = [[[1, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic65" +regex = '''\n''' +input = '''\n''' +captures = [[[0, 1]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic66" +regex = '''\n''' +input = '''\n''' +captures = [[[0, 1]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic67" +regex = '''[^a]''' +input = '''\n''' +captures = [[[0, 1]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic68" +regex = '''\na''' +input = '''\na''' +captures = [[[0, 2]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic69" +regex = '''(a)(b)(c)''' +input = '''abc''' +captures = [[[0, 3], [0, 1], [1, 2], [2, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic70" +regex = '''xxx''' +input = '''xxx''' +captures = [[[0, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic71" +regex = '''(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)''' +input = '''feb 6,''' +captures = [[[0, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic72" +regex = '''(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)''' +input = '''2/7''' +captures = [[[0, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic73" +regex = '''(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)''' +input = '''feb 1,Feb 6''' +captures = [[[5, 11]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic74" +regex = '''((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))''' +input = '''x''' +captures = [[[0, 1], [0, 1], [0, 1]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic75" +regex = '''((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))*''' +input = '''xx''' +captures = [[[0, 2], [1, 2], [1, 2]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic76" +regex = '''a?(ab|ba)*''' +input = '''ababababababababababababababababababababababababababababababababababababababababa''' +captures = [[[0, 81], [79, 81]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic77" +regex = '''abaa|abbaa|abbbaa|abbbbaa''' +input = '''ababbabbbabbbabbbbabbbbaa''' +captures = [[[18, 25]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic78" +regex = '''abaa|abbaa|abbbaa|abbbbaa''' +input = '''ababbabbbabbbabbbbabaa''' +captures = [[[18, 22]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic79" +regex = '''aaac|aabc|abac|abbc|baac|babc|bbac|bbbc''' +input = '''baaabbbabac''' +captures = [[[7, 11]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic80" +regex = '''.*''' +input = '''\x01\x7f''' +captures = [[[0, 2]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic81" +regex = '''aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll''' +input = '''XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa''' +captures = [[[53, 57]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic83" +regex = '''a*a*a*a*a*b''' +input = '''aaaaaaaaab''' +captures = [[[0, 10]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic84" +regex = '''^''' +input = '''''' +captures = [[[0, 0]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic85" +regex = '''$''' +input = '''''' +captures = [[[0, 0]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic86" +regex = '''^$''' +input = '''''' +captures = [[[0, 0]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic87" +regex = '''^a$''' +input = '''a''' +captures = [[[0, 1]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic88" +regex = '''abc''' +input = '''abc''' +captures = [[[0, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic89" +regex = '''abc''' +input = '''xabcy''' +captures = [[[1, 4]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic90" +regex = '''abc''' +input = '''ababc''' +captures = [[[2, 5]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic91" +regex = '''ab*c''' +input = '''abc''' +captures = [[[0, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic92" +regex = '''ab*bc''' +input = '''abc''' +captures = [[[0, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic93" +regex = '''ab*bc''' +input = '''abbc''' +captures = [[[0, 4]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic94" +regex = '''ab*bc''' +input = '''abbbbc''' +captures = [[[0, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic95" +regex = '''ab+bc''' +input = '''abbc''' +captures = [[[0, 4]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic96" +regex = '''ab+bc''' +input = '''abbbbc''' +captures = [[[0, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic97" +regex = '''ab?bc''' +input = '''abbc''' +captures = [[[0, 4]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic98" +regex = '''ab?bc''' +input = '''abc''' +captures = [[[0, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic99" +regex = '''ab?c''' +input = '''abc''' +captures = [[[0, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic100" +regex = '''^abc$''' +input = '''abc''' +captures = [[[0, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic101" +regex = '''^abc''' +input = '''abcc''' +captures = [[[0, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic102" +regex = '''abc$''' +input = '''aabc''' +captures = [[[1, 4]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic103" +regex = '''^''' +input = '''abc''' +captures = [[[0, 0]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic104" +regex = '''$''' +input = '''abc''' +captures = [[[3, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic105" +regex = '''a.c''' +input = '''abc''' +captures = [[[0, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic106" +regex = '''a.c''' +input = '''axc''' +captures = [[[0, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic107" +regex = '''a.*c''' +input = '''axyzc''' +captures = [[[0, 5]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic108" +regex = '''a[bc]d''' +input = '''abd''' +captures = [[[0, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic109" +regex = '''a[b-d]e''' +input = '''ace''' +captures = [[[0, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic110" +regex = '''a[b-d]''' +input = '''aac''' +captures = [[[1, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic111" +regex = '''a[-b]''' +input = '''a-''' +captures = [[[0, 2]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic112" +regex = '''a[b-]''' +input = '''a-''' +captures = [[[0, 2]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic113" +regex = '''a]''' +input = '''a]''' +captures = [[[0, 2]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic114" +regex = '''a[]]b''' +input = '''a]b''' +captures = [[[0, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic115" +regex = '''a[^bc]d''' +input = '''aed''' +captures = [[[0, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic116" +regex = '''a[^-b]c''' +input = '''adc''' +captures = [[[0, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic117" +regex = '''a[^]b]c''' +input = '''adc''' +captures = [[[0, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic118" +regex = '''ab|cd''' +input = '''abc''' +captures = [[[0, 2]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic119" +regex = '''ab|cd''' +input = '''abcd''' +captures = [[[0, 2]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic120" +regex = '''a\(b''' +input = '''a(b''' +captures = [[[0, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic121" +regex = '''a\(*b''' +input = '''ab''' +captures = [[[0, 2]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic122" +regex = '''a\(*b''' +input = '''a((b''' +captures = [[[0, 4]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic123" +regex = '''((a))''' +input = '''abc''' +captures = [[[0, 1], [0, 1], [0, 1]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic124" +regex = '''(a)b(c)''' +input = '''abc''' +captures = [[[0, 3], [0, 1], [2, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic125" +regex = '''a+b+c''' +input = '''aabbabc''' +captures = [[[4, 7]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic126" +regex = '''a*''' +input = '''aaa''' +captures = [[[0, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic128" +regex = '''(a*)*''' +input = '''-''' +captures = [[[0, 0], []]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic129" +regex = '''(a*)+''' +input = '''-''' +captures = [[[0, 0], [0, 0]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic131" +regex = '''(a*|b)*''' +input = '''-''' +captures = [[[0, 0], []]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic132" +regex = '''(a+|b)*''' +input = '''ab''' +captures = [[[0, 2], [1, 2]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic133" +regex = '''(a+|b)+''' +input = '''ab''' +captures = [[[0, 2], [1, 2]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic134" +regex = '''(a+|b)?''' +input = '''ab''' +captures = [[[0, 1], [0, 1]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic135" +regex = '''[^ab]*''' +input = '''cde''' +captures = [[[0, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic137" +regex = '''(^)*''' +input = '''-''' +captures = [[[0, 0], []]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic138" +regex = '''a*''' +input = '''''' +captures = [[[0, 0]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic139" +regex = '''([abc])*d''' +input = '''abbbcd''' +captures = [[[0, 6], [4, 5]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic140" +regex = '''([abc])*bcd''' +input = '''abcd''' +captures = [[[0, 4], [0, 1]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic141" +regex = '''a|b|c|d|e''' +input = '''e''' +captures = [[[0, 1]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic142" +regex = '''(a|b|c|d|e)f''' +input = '''ef''' +captures = [[[0, 2], [0, 1]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic144" +regex = '''((a*|b))*''' +input = '''-''' +captures = [[[0, 0], [], []]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic145" +regex = '''abcd*efg''' +input = '''abcdefg''' +captures = [[[0, 7]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic146" +regex = '''ab*''' +input = '''xabyabbbz''' +captures = [[[1, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic147" +regex = '''ab*''' +input = '''xayabbbz''' +captures = [[[1, 2]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic148" +regex = '''(ab|cd)e''' +input = '''abcde''' +captures = [[[2, 5], [2, 4]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic149" +regex = '''[abhgefdc]ij''' +input = '''hij''' +captures = [[[0, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic150" +regex = '''(a|b)c*d''' +input = '''abcd''' +captures = [[[1, 4], [1, 2]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic151" +regex = '''(ab|ab*)bc''' +input = '''abc''' +captures = [[[0, 3], [0, 1]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic152" +regex = '''a([bc]*)c*''' +input = '''abc''' +captures = [[[0, 3], [1, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic153" +regex = '''a([bc]*)(c*d)''' +input = '''abcd''' +captures = [[[0, 4], [1, 3], [3, 4]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic154" +regex = '''a([bc]+)(c*d)''' +input = '''abcd''' +captures = [[[0, 4], [1, 3], [3, 4]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic155" +regex = '''a([bc]*)(c+d)''' +input = '''abcd''' +captures = [[[0, 4], [1, 2], [2, 4]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic156" +regex = '''a[bcd]*dcdcde''' +input = '''adcdcde''' +captures = [[[0, 7]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic157" +regex = '''(ab|a)b*c''' +input = '''abc''' +captures = [[[0, 3], [0, 2]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic158" +regex = '''((a)(b)c)(d)''' +input = '''abcd''' +captures = [[[0, 4], [0, 3], [0, 1], [1, 2], [3, 4]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic159" +regex = '''[A-Za-z_][A-Za-z0-9_]*''' +input = '''alpha''' +captures = [[[0, 5]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic160" +regex = '''^a(bc+|b[eh])g|.h$''' +input = '''abh''' +captures = [[[1, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic161" +regex = '''(bc+d$|ef*g.|h?i(j|k))''' +input = '''effgz''' +captures = [[[0, 5], [0, 5]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic162" +regex = '''(bc+d$|ef*g.|h?i(j|k))''' +input = '''ij''' +captures = [[[0, 2], [0, 2], [1, 2]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic163" +regex = '''(bc+d$|ef*g.|h?i(j|k))''' +input = '''reffgz''' +captures = [[[1, 6], [1, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic164" +regex = '''(((((((((a)))))))))''' +input = '''a''' +captures = [[[0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic165" +regex = '''multiple words''' +input = '''multiple words yeah''' +captures = [[[0, 14]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic166" +regex = '''(.*)c(.*)''' +input = '''abcde''' +captures = [[[0, 5], [0, 2], [3, 5]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic167" +regex = '''abcd''' +input = '''abcd''' +captures = [[[0, 4]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic168" +regex = '''a(bc)d''' +input = '''abcd''' +captures = [[[0, 4], [1, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic169" +regex = '''a[\x01-\x03]?c''' +input = '''a\x02c''' +captures = [[[0, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic170" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +input = '''Muammar Qaddafi''' +captures = [[[0, 15], [], [10, 12]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic171" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +input = '''Mo'ammar Gadhafi''' +captures = [[[0, 16], [], [11, 13]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic172" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +input = '''Muammar Kaddafi''' +captures = [[[0, 15], [], [10, 12]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic173" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +input = '''Muammar Qadhafi''' +captures = [[[0, 15], [], [10, 12]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic174" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +input = '''Muammar Gadafi''' +captures = [[[0, 14], [], [10, 11]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic175" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +input = '''Mu'ammar Qadafi''' +captures = [[[0, 15], [], [11, 12]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic176" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +input = '''Moamar Gaddafi''' +captures = [[[0, 14], [], [9, 11]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic177" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +input = '''Mu'ammar Qadhdhafi''' +captures = [[[0, 18], [], [13, 15]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic178" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +input = '''Muammar Khaddafi''' +captures = [[[0, 16], [], [11, 13]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic179" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +input = '''Muammar Ghaddafy''' +captures = [[[0, 16], [], [11, 13]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic180" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +input = '''Muammar Ghadafi''' +captures = [[[0, 15], [], [11, 12]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic181" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +input = '''Muammar Ghaddafi''' +captures = [[[0, 16], [], [11, 13]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic182" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +input = '''Muamar Kaddafi''' +captures = [[[0, 14], [], [9, 11]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic183" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +input = '''Muammar Quathafi''' +captures = [[[0, 16], [], [11, 13]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic184" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +input = '''Muammar Gheddafi''' +captures = [[[0, 16], [], [11, 13]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic185" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +input = '''Moammar Khadafy''' +captures = [[[0, 15], [], [11, 12]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic186" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +input = '''Moammar Qudhafi''' +captures = [[[0, 15], [], [10, 12]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic187" +regex = '''a+(b|c)*d+''' +input = '''aabcdd''' +captures = [[[0, 6], [3, 4]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic188" +regex = '''^.+$''' +input = '''vivi''' +captures = [[[0, 4]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic189" +regex = '''^(.+)$''' +input = '''vivi''' +captures = [[[0, 4], [0, 4]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic190" +regex = '''^([^!.]+).att.com!(.+)$''' +input = '''gryphon.att.com!eby''' +captures = [[[0, 19], [0, 7], [16, 19]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic191" +regex = '''^([^!]+!)?([^!]+)$''' +input = '''bas''' +captures = [[[0, 3], [], [0, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic192" +regex = '''^([^!]+!)?([^!]+)$''' +input = '''bar!bas''' +captures = [[[0, 7], [0, 4], [4, 7]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic193" +regex = '''^([^!]+!)?([^!]+)$''' +input = '''foo!bas''' +captures = [[[0, 7], [0, 4], [4, 7]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic194" +regex = '''^.+!([^!]+!)([^!]+)$''' +input = '''foo!bar!bas''' +captures = [[[0, 11], [4, 8], [8, 11]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic195" +regex = '''((foo)|(bar))!bas''' +input = '''bar!bas''' +captures = [[[0, 7], [0, 3], [], [0, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic196" +regex = '''((foo)|(bar))!bas''' +input = '''foo!bar!bas''' +captures = [[[4, 11], [4, 7], [], [4, 7]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic197" +regex = '''((foo)|(bar))!bas''' +input = '''foo!bas''' +captures = [[[0, 7], [0, 3], [0, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic198" +regex = '''((foo)|bar)!bas''' +input = '''bar!bas''' +captures = [[[0, 7], [0, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic199" +regex = '''((foo)|bar)!bas''' +input = '''foo!bar!bas''' +captures = [[[4, 11], [4, 7]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic200" +regex = '''((foo)|bar)!bas''' +input = '''foo!bas''' +captures = [[[0, 7], [0, 3], [0, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic201" +regex = '''(foo|(bar))!bas''' +input = '''bar!bas''' +captures = [[[0, 7], [0, 3], [0, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic202" +regex = '''(foo|(bar))!bas''' +input = '''foo!bar!bas''' +captures = [[[4, 11], [4, 7], [4, 7]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic203" +regex = '''(foo|(bar))!bas''' +input = '''foo!bas''' +captures = [[[0, 7], [0, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic204" +regex = '''(foo|bar)!bas''' +input = '''bar!bas''' +captures = [[[0, 7], [0, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic205" +regex = '''(foo|bar)!bas''' +input = '''foo!bar!bas''' +captures = [[[4, 11], [4, 7]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic206" +regex = '''(foo|bar)!bas''' +input = '''foo!bas''' +captures = [[[0, 7], [0, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic207" +regex = '''^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$''' +input = '''foo!bar!bas''' +captures = [[[0, 11], [0, 11], [], [], [4, 8], [8, 11]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic208" +regex = '''^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$''' +input = '''bas''' +captures = [[[0, 3], [], [0, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic209" +regex = '''^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$''' +input = '''bar!bas''' +captures = [[[0, 7], [0, 4], [4, 7]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic210" +regex = '''^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$''' +input = '''foo!bar!bas''' +captures = [[[0, 11], [], [], [4, 8], [8, 11]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic211" +regex = '''^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$''' +input = '''foo!bas''' +captures = [[[0, 7], [0, 4], [4, 7]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic212" +regex = '''^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$''' +input = '''bas''' +captures = [[[0, 3], [0, 3], [], [0, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic213" +regex = '''^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$''' +input = '''bar!bas''' +captures = [[[0, 7], [0, 7], [0, 4], [4, 7]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic214" +regex = '''^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$''' +input = '''foo!bar!bas''' +captures = [[[0, 11], [0, 11], [], [], [4, 8], [8, 11]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic215" +regex = '''^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$''' +input = '''foo!bas''' +captures = [[[0, 7], [0, 7], [0, 4], [4, 7]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic216" +regex = '''.*(/XXX).*''' +input = '''/XXX''' +captures = [[[0, 4], [0, 4]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic217" +regex = '''.*(\\XXX).*''' +input = '''\\XXX''' +captures = [[[0, 4], [0, 4]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic218" +regex = '''\\XXX''' +input = '''\\XXX''' +captures = [[[0, 4]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic219" +regex = '''.*(/000).*''' +input = '''/000''' +captures = [[[0, 4], [0, 4]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic220" +regex = '''.*(\\000).*''' +input = '''\\000''' +captures = [[[0, 4], [0, 4]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "basic221" +regex = '''\\000''' +input = '''\\000''' +captures = [[[0, 4]]] +match_limit = 1 +unescape = true + diff --git a/vendor/regex-automata/tests/data/fowler/dat/README b/vendor/regex-automata/tests/data/fowler/dat/README new file mode 100644 index 000000000..e70072500 --- /dev/null +++ b/vendor/regex-automata/tests/data/fowler/dat/README @@ -0,0 +1,24 @@ +Test data was taken from the Go distribution, which was in turn taken from the +testregex test suite: + + http://www2.research.att.com/~astopen/testregex/testregex.html + +Unfortunately, the above link is now dead, but the test data lives on. + +The LICENSE in this directory corresponds to the LICENSE that the data was +originally released under. + +The tests themselves were modified for RE2/Go. A couple were modified further +by me (Andrew Gallant) (only in repetition.dat) so that RE2/Go would pass them. +(Yes, it seems like RE2/Go includes failing test cases.) This may or may not +have been a bad idea, but I think being consistent with an established Regex +library is worth something. + +After some number of years, these tests were transformed into a TOML format +using the fowler-to-toml script in the 'scripts' directory. To re-generate the +TOML files, then run the following from the root of this repository: + + ./scripts/fowler-to-toml tests/data/fowler tests/data/fowler/dat/*.dat + +which brings them into a sensible structured format in which other tests can +be written. diff --git a/vendor/regex-automata/tests/data/fowler/dat/basic.dat b/vendor/regex-automata/tests/data/fowler/dat/basic.dat new file mode 100644 index 000000000..e55efaeec --- /dev/null +++ b/vendor/regex-automata/tests/data/fowler/dat/basic.dat @@ -0,0 +1,221 @@ +NOTE all standard compliant implementations should pass these : 2002-05-31 + +BE abracadabra$ abracadabracadabra (7,18) +BE a...b abababbb (2,7) +BE XXXXXX ..XXXXXX (2,8) +E \) () (1,2) +BE a] a]a (0,2) +B } } (0,1) +E \} } (0,1) +BE \] ] (0,1) +B ] ] (0,1) +E ] ] (0,1) +B { { (0,1) +B } } (0,1) +BE ^a ax (0,1) +BE \^a a^a (1,3) +BE a\^ a^ (0,2) +BE a$ aa (1,2) +BE a\$ a$ (0,2) +BE ^$ NULL (0,0) +E $^ NULL (0,0) +E a($) aa (1,2)(2,2) +E a*(^a) aa (0,1)(0,1) +E (..)*(...)* a (0,0) +E (..)*(...)* abcd (0,4)(2,4) +E (ab|a)(bc|c) abc (0,3)(0,2)(2,3) +E (ab)c|abc abc (0,3)(0,2) +E a{0}b ab (1,2) +E (a*)(b?)(b+)b{3} aaabbbbbbb (0,10)(0,3)(3,4)(4,7) +E (a*)(b{0,1})(b{1,})b{3} aaabbbbbbb (0,10)(0,3)(3,4)(4,7) +E a{9876543210} NULL BADBR +E ((a|a)|a) a (0,1)(0,1)(0,1) +E (a*)(a|aa) aaaa (0,4)(0,3)(3,4) +E a*(a.|aa) aaaa (0,4)(2,4) +E a(b)|c(d)|a(e)f aef (0,3)(?,?)(?,?)(1,2) +E (a|b)?.* b (0,1)(0,1) +E (a|b)c|a(b|c) ac (0,2)(0,1) +E (a|b)c|a(b|c) ab (0,2)(?,?)(1,2) +E (a|b)*c|(a|ab)*c abc (0,3)(1,2) +E (a|b)*c|(a|ab)*c xc (1,2) +E (.a|.b).*|.*(.a|.b) xa (0,2)(0,2) +E a?(ab|ba)ab abab (0,4)(0,2) +E a?(ac{0}b|ba)ab abab (0,4)(0,2) +E ab|abab abbabab (0,2) +E aba|bab|bba baaabbbaba (5,8) +E aba|bab baaabbbaba (6,9) +E (aa|aaa)*|(a|aaaaa) aa (0,2)(0,2) +E (a.|.a.)*|(a|.a...) aa (0,2)(0,2) +E ab|a xabc (1,3) +E ab|a xxabc (2,4) +Ei (Ab|cD)* aBcD (0,4)(2,4) +BE [^-] --a (2,3) +BE [a-]* --a (0,3) +BE [a-m-]* --amoma-- (0,4) +E :::1:::0:|:::1:1:0: :::0:::1:::1:::0: (8,17) +E :::1:::0:|:::1:1:1: :::0:::1:::1:::0: (8,17) +{E [[:upper:]] A (0,1) [[<element>]] not supported +E [[:lower:]]+ `az{ (1,3) +E [[:upper:]]+ @AZ[ (1,3) +# No collation in Go +#BE [[-]] [[-]] (2,4) +#BE [[.NIL.]] NULL ECOLLATE +#BE [[=aleph=]] NULL ECOLLATE +} +BE$ \n \n (0,1) +BEn$ \n \n (0,1) +BE$ [^a] \n (0,1) +BE$ \na \na (0,2) +E (a)(b)(c) abc (0,3)(0,1)(1,2)(2,3) +BE xxx xxx (0,3) +E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) feb 6, (0,6) +E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) 2/7 (0,3) +E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) feb 1,Feb 6 (5,11) +E3 ((((((((((((((((((((((((((((((x)))))))))))))))))))))))))))))) x (0,1)(0,1)(0,1) +E3 ((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))* xx (0,2)(1,2)(1,2) +E a?(ab|ba)* ababababababababababababababababababababababababababababababababababababababababa (0,81)(79,81) +E abaa|abbaa|abbbaa|abbbbaa ababbabbbabbbabbbbabbbbaa (18,25) +E abaa|abbaa|abbbaa|abbbbaa ababbabbbabbbabbbbabaa (18,22) +E aaac|aabc|abac|abbc|baac|babc|bbac|bbbc baaabbbabac (7,11) +BE$ .* \x01\x7f (0,2) +E aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa (53,57) +L aaaa\nbbbb\ncccc\nddddd\neeeeee\nfffffff\ngggg\nhhhh\niiiii\njjjjj\nkkkkk\nllll XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa NOMATCH +E a*a*a*a*a*b aaaaaaaaab (0,10) +BE ^ NULL (0,0) +BE $ NULL (0,0) +BE ^$ NULL (0,0) +BE ^a$ a (0,1) +BE abc abc (0,3) +BE abc xabcy (1,4) +BE abc ababc (2,5) +BE ab*c abc (0,3) +BE ab*bc abc (0,3) +BE ab*bc abbc (0,4) +BE ab*bc abbbbc (0,6) +E ab+bc abbc (0,4) +E ab+bc abbbbc (0,6) +E ab?bc abbc (0,4) +E ab?bc abc (0,3) +E ab?c abc (0,3) +BE ^abc$ abc (0,3) +BE ^abc abcc (0,3) +BE abc$ aabc (1,4) +BE ^ abc (0,0) +BE $ abc (3,3) +BE a.c abc (0,3) +BE a.c axc (0,3) +BE a.*c axyzc (0,5) +BE a[bc]d abd (0,3) +BE a[b-d]e ace (0,3) +BE a[b-d] aac (1,3) +BE a[-b] a- (0,2) +BE a[b-] a- (0,2) +BE a] a] (0,2) +BE a[]]b a]b (0,3) +BE a[^bc]d aed (0,3) +BE a[^-b]c adc (0,3) +BE a[^]b]c adc (0,3) +E ab|cd abc (0,2) +E ab|cd abcd (0,2) +E a\(b a(b (0,3) +E a\(*b ab (0,2) +E a\(*b a((b (0,4) +E ((a)) abc (0,1)(0,1)(0,1) +E (a)b(c) abc (0,3)(0,1)(2,3) +E a+b+c aabbabc (4,7) +E a* aaa (0,3) +#E (a*)* - (0,0)(0,0) +E (a*)* - (0,0)(?,?) RE2/Go +E (a*)+ - (0,0)(0,0) +#E (a*|b)* - (0,0)(0,0) +E (a*|b)* - (0,0)(?,?) RE2/Go +E (a+|b)* ab (0,2)(1,2) +E (a+|b)+ ab (0,2)(1,2) +E (a+|b)? ab (0,1)(0,1) +BE [^ab]* cde (0,3) +#E (^)* - (0,0)(0,0) +E (^)* - (0,0)(?,?) RE2/Go +BE a* NULL (0,0) +E ([abc])*d abbbcd (0,6)(4,5) +E ([abc])*bcd abcd (0,4)(0,1) +E a|b|c|d|e e (0,1) +E (a|b|c|d|e)f ef (0,2)(0,1) +#E ((a*|b))* - (0,0)(0,0)(0,0) +E ((a*|b))* - (0,0)(?,?)(?,?) RE2/Go +BE abcd*efg abcdefg (0,7) +BE ab* xabyabbbz (1,3) +BE ab* xayabbbz (1,2) +E (ab|cd)e abcde (2,5)(2,4) +BE [abhgefdc]ij hij (0,3) +E (a|b)c*d abcd (1,4)(1,2) +E (ab|ab*)bc abc (0,3)(0,1) +E a([bc]*)c* abc (0,3)(1,3) +E a([bc]*)(c*d) abcd (0,4)(1,3)(3,4) +E a([bc]+)(c*d) abcd (0,4)(1,3)(3,4) +E a([bc]*)(c+d) abcd (0,4)(1,2)(2,4) +E a[bcd]*dcdcde adcdcde (0,7) +E (ab|a)b*c abc (0,3)(0,2) +E ((a)(b)c)(d) abcd (0,4)(0,3)(0,1)(1,2)(3,4) +BE [A-Za-z_][A-Za-z0-9_]* alpha (0,5) +E ^a(bc+|b[eh])g|.h$ abh (1,3) +E (bc+d$|ef*g.|h?i(j|k)) effgz (0,5)(0,5) +E (bc+d$|ef*g.|h?i(j|k)) ij (0,2)(0,2)(1,2) +E (bc+d$|ef*g.|h?i(j|k)) reffgz (1,6)(1,6) +E (((((((((a))))))))) a (0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1) +BE multiple words multiple words yeah (0,14) +E (.*)c(.*) abcde (0,5)(0,2)(3,5) +BE abcd abcd (0,4) +E a(bc)d abcd (0,4)(1,3) +E a[-]?c ac (0,3) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Qaddafi (0,15)(?,?)(10,12) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mo'ammar Gadhafi (0,16)(?,?)(11,13) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Kaddafi (0,15)(?,?)(10,12) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Qadhafi (0,15)(?,?)(10,12) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Gadafi (0,14)(?,?)(10,11) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mu'ammar Qadafi (0,15)(?,?)(11,12) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moamar Gaddafi (0,14)(?,?)(9,11) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mu'ammar Qadhdhafi (0,18)(?,?)(13,15) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Khaddafi (0,16)(?,?)(11,13) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghaddafy (0,16)(?,?)(11,13) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghadafi (0,15)(?,?)(11,12) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghaddafi (0,16)(?,?)(11,13) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muamar Kaddafi (0,14)(?,?)(9,11) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Quathafi (0,16)(?,?)(11,13) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Gheddafi (0,16)(?,?)(11,13) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moammar Khadafy (0,15)(?,?)(11,12) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moammar Qudhafi (0,15)(?,?)(10,12) +E a+(b|c)*d+ aabcdd (0,6)(3,4) +E ^.+$ vivi (0,4) +E ^(.+)$ vivi (0,4)(0,4) +E ^([^!.]+).att.com!(.+)$ gryphon.att.com!eby (0,19)(0,7)(16,19) +E ^([^!]+!)?([^!]+)$ bas (0,3)(?,?)(0,3) +E ^([^!]+!)?([^!]+)$ bar!bas (0,7)(0,4)(4,7) +E ^([^!]+!)?([^!]+)$ foo!bas (0,7)(0,4)(4,7) +E ^.+!([^!]+!)([^!]+)$ foo!bar!bas (0,11)(4,8)(8,11) +E ((foo)|(bar))!bas bar!bas (0,7)(0,3)(?,?)(0,3) +E ((foo)|(bar))!bas foo!bar!bas (4,11)(4,7)(?,?)(4,7) +E ((foo)|(bar))!bas foo!bas (0,7)(0,3)(0,3) +E ((foo)|bar)!bas bar!bas (0,7)(0,3) +E ((foo)|bar)!bas foo!bar!bas (4,11)(4,7) +E ((foo)|bar)!bas foo!bas (0,7)(0,3)(0,3) +E (foo|(bar))!bas bar!bas (0,7)(0,3)(0,3) +E (foo|(bar))!bas foo!bar!bas (4,11)(4,7)(4,7) +E (foo|(bar))!bas foo!bas (0,7)(0,3) +E (foo|bar)!bas bar!bas (0,7)(0,3) +E (foo|bar)!bas foo!bar!bas (4,11)(4,7) +E (foo|bar)!bas foo!bas (0,7)(0,3) +E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bar!bas (0,11)(0,11)(?,?)(?,?)(4,8)(8,11) +E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ bas (0,3)(?,?)(0,3) +E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ bar!bas (0,7)(0,4)(4,7) +E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ foo!bar!bas (0,11)(?,?)(?,?)(4,8)(8,11) +E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ foo!bas (0,7)(0,4)(4,7) +E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ bas (0,3)(0,3)(?,?)(0,3) +E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ bar!bas (0,7)(0,7)(0,4)(4,7) +E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bar!bas (0,11)(0,11)(?,?)(?,?)(4,8)(8,11) +E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bas (0,7)(0,7)(0,4)(4,7) +E .*(/XXX).* /XXX (0,4)(0,4) +E .*(\\XXX).* \XXX (0,4)(0,4) +E \\XXX \XXX (0,4) +E .*(/000).* /000 (0,4)(0,4) +E .*(\\000).* \000 (0,4)(0,4) +E \\000 \000 (0,4) diff --git a/vendor/regex-automata/tests/data/fowler/dat/nullsubexpr.dat b/vendor/regex-automata/tests/data/fowler/dat/nullsubexpr.dat new file mode 100644 index 000000000..2e18fbb91 --- /dev/null +++ b/vendor/regex-automata/tests/data/fowler/dat/nullsubexpr.dat @@ -0,0 +1,79 @@ +NOTE null subexpression matches : 2002-06-06 + +E (a*)* a (0,1)(0,1) +#E SAME x (0,0)(0,0) +E SAME x (0,0)(?,?) RE2/Go +E SAME aaaaaa (0,6)(0,6) +E SAME aaaaaax (0,6)(0,6) +E (a*)+ a (0,1)(0,1) +E SAME x (0,0)(0,0) +E SAME aaaaaa (0,6)(0,6) +E SAME aaaaaax (0,6)(0,6) +E (a+)* a (0,1)(0,1) +E SAME x (0,0) +E SAME aaaaaa (0,6)(0,6) +E SAME aaaaaax (0,6)(0,6) +E (a+)+ a (0,1)(0,1) +E SAME x NOMATCH +E SAME aaaaaa (0,6)(0,6) +E SAME aaaaaax (0,6)(0,6) + +E ([a]*)* a (0,1)(0,1) +#E SAME x (0,0)(0,0) +E SAME x (0,0)(?,?) RE2/Go +E SAME aaaaaa (0,6)(0,6) +E SAME aaaaaax (0,6)(0,6) +E ([a]*)+ a (0,1)(0,1) +E SAME x (0,0)(0,0) +E SAME aaaaaa (0,6)(0,6) +E SAME aaaaaax (0,6)(0,6) +E ([^b]*)* a (0,1)(0,1) +#E SAME b (0,0)(0,0) +E SAME b (0,0)(?,?) RE2/Go +E SAME aaaaaa (0,6)(0,6) +E SAME aaaaaab (0,6)(0,6) +E ([ab]*)* a (0,1)(0,1) +E SAME aaaaaa (0,6)(0,6) +E SAME ababab (0,6)(0,6) +E SAME bababa (0,6)(0,6) +E SAME b (0,1)(0,1) +E SAME bbbbbb (0,6)(0,6) +E SAME aaaabcde (0,5)(0,5) +E ([^a]*)* b (0,1)(0,1) +E SAME bbbbbb (0,6)(0,6) +#E SAME aaaaaa (0,0)(0,0) +E SAME aaaaaa (0,0)(?,?) RE2/Go +E ([^ab]*)* ccccxx (0,6)(0,6) +#E SAME ababab (0,0)(0,0) +E SAME ababab (0,0)(?,?) RE2/Go + +E ((z)+|a)* zabcde (0,2)(1,2) + +#{E a+? aaaaaa (0,1) no *? +? mimimal match ops +#E (a) aaa (0,1)(0,1) +#E (a*?) aaa (0,0)(0,0) +#E (a)*? aaa (0,0) +#E (a*?)*? aaa (0,0) +#} + +B \(a*\)*\(x\) x (0,1)(0,0)(0,1) +B \(a*\)*\(x\) ax (0,2)(0,1)(1,2) +B \(a*\)*\(x\) axa (0,2)(0,1)(1,2) +B \(a*\)*\(x\)\(\1\) x (0,1)(0,0)(0,1)(1,1) +B \(a*\)*\(x\)\(\1\) ax (0,2)(1,1)(1,2)(2,2) +B \(a*\)*\(x\)\(\1\) axa (0,3)(0,1)(1,2)(2,3) +B \(a*\)*\(x\)\(\1\)\(x\) axax (0,4)(0,1)(1,2)(2,3)(3,4) +B \(a*\)*\(x\)\(\1\)\(x\) axxa (0,3)(1,1)(1,2)(2,2)(2,3) + +#E (a*)*(x) x (0,1)(0,0)(0,1) +E (a*)*(x) x (0,1)(?,?)(0,1) RE2/Go +E (a*)*(x) ax (0,2)(0,1)(1,2) +E (a*)*(x) axa (0,2)(0,1)(1,2) + +E (a*)+(x) x (0,1)(0,0)(0,1) +E (a*)+(x) ax (0,2)(0,1)(1,2) +E (a*)+(x) axa (0,2)(0,1)(1,2) + +E (a*){2}(x) x (0,1)(0,0)(0,1) +E (a*){2}(x) ax (0,2)(1,1)(1,2) +E (a*){2}(x) axa (0,2)(1,1)(1,2) diff --git a/vendor/regex-automata/tests/data/fowler/dat/repetition-expensive.dat b/vendor/regex-automata/tests/data/fowler/dat/repetition-expensive.dat new file mode 100644 index 000000000..c91580236 --- /dev/null +++ b/vendor/regex-automata/tests/data/fowler/dat/repetition-expensive.dat @@ -0,0 +1,85 @@ +NOTE implicit vs. explicit repetitions : 2009-02-02 + +# Glenn Fowler <gsf@research.att.com> +# conforming matches (column 4) must match one of the following BREs +# NOMATCH +# (0,.)\((\(.\),\(.\))(?,?)(\2,\3)\)* +# (0,.)\((\(.\),\(.\))(\2,\3)(?,?)\)* +# i.e., each 3-tuple has two identical elements and one (?,?) + +NOTE additional repetition tests graciously provided by Chris Kuklewicz www.haskell.org 2009-02-02 + +:HA#100:E X(.?){0,}Y X1234567Y (0,9)(7,8) +:HA#101:E X(.?){1,}Y X1234567Y (0,9)(7,8) +:HA#102:E X(.?){2,}Y X1234567Y (0,9)(7,8) +:HA#103:E X(.?){3,}Y X1234567Y (0,9)(7,8) +:HA#104:E X(.?){4,}Y X1234567Y (0,9)(7,8) +:HA#105:E X(.?){5,}Y X1234567Y (0,9)(7,8) +:HA#106:E X(.?){6,}Y X1234567Y (0,9)(7,8) +:HA#107:E X(.?){7,}Y X1234567Y (0,9)(7,8) +:HA#108:E X(.?){8,}Y X1234567Y (0,9)(8,8) +#:HA#110:E X(.?){0,8}Y X1234567Y (0,9)(7,8) +:HA#110:E X(.?){0,8}Y X1234567Y (0,9)(8,8) RE2/Go +#:HA#111:E X(.?){1,8}Y X1234567Y (0,9)(7,8) +:HA#111:E X(.?){1,8}Y X1234567Y (0,9)(8,8) RE2/Go +#:HA#112:E X(.?){2,8}Y X1234567Y (0,9)(7,8) +:HA#112:E X(.?){2,8}Y X1234567Y (0,9)(8,8) RE2/Go +#:HA#113:E X(.?){3,8}Y X1234567Y (0,9)(7,8) +:HA#113:E X(.?){3,8}Y X1234567Y (0,9)(8,8) RE2/Go +#:HA#114:E X(.?){4,8}Y X1234567Y (0,9)(7,8) +:HA#114:E X(.?){4,8}Y X1234567Y (0,9)(8,8) RE2/Go +#:HA#115:E X(.?){5,8}Y X1234567Y (0,9)(7,8) +:HA#115:E X(.?){5,8}Y X1234567Y (0,9)(8,8) RE2/Go +#:HA#116:E X(.?){6,8}Y X1234567Y (0,9)(7,8) +:HA#116:E X(.?){6,8}Y X1234567Y (0,9)(8,8) RE2/Go +#:HA#117:E X(.?){7,8}Y X1234567Y (0,9)(7,8) +:HA#117:E X(.?){7,8}Y X1234567Y (0,9)(8,8) RE2/Go +:HA#118:E X(.?){8,8}Y X1234567Y (0,9)(8,8) + +# These test a fixed bug in my regex-tdfa that did not keep the expanded +# form properly grouped, so right association did the wrong thing with +# these ambiguous patterns (crafted just to test my code when I became +# suspicious of my implementation). The first subexpression should use +# "ab" then "a" then "bcd". + +# OS X / FreeBSD / NetBSD badly fail many of these, with impossible +# results like (0,6)(4,5)(6,6). + +:HA#260:E (a|ab|c|bcd){0,}(d*) ababcd (0,1)(0,1)(1,1) +:HA#261:E (a|ab|c|bcd){1,}(d*) ababcd (0,1)(0,1)(1,1) +:HA#262:E (a|ab|c|bcd){2,}(d*) ababcd (0,6)(3,6)(6,6) +:HA#263:E (a|ab|c|bcd){3,}(d*) ababcd (0,6)(3,6)(6,6) +:HA#264:E (a|ab|c|bcd){4,}(d*) ababcd NOMATCH +:HA#265:E (a|ab|c|bcd){0,10}(d*) ababcd (0,1)(0,1)(1,1) +:HA#266:E (a|ab|c|bcd){1,10}(d*) ababcd (0,1)(0,1)(1,1) +:HA#267:E (a|ab|c|bcd){2,10}(d*) ababcd (0,6)(3,6)(6,6) +:HA#268:E (a|ab|c|bcd){3,10}(d*) ababcd (0,6)(3,6)(6,6) +:HA#269:E (a|ab|c|bcd){4,10}(d*) ababcd NOMATCH +:HA#270:E (a|ab|c|bcd)*(d*) ababcd (0,1)(0,1)(1,1) +:HA#271:E (a|ab|c|bcd)+(d*) ababcd (0,1)(0,1)(1,1) + +# The above worked on Linux/GLIBC but the following often fail. +# They also trip up OS X / FreeBSD / NetBSD: + +#:HA#280:E (ab|a|c|bcd){0,}(d*) ababcd (0,6)(3,6)(6,6) +:HA#280:E (ab|a|c|bcd){0,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go +#:HA#281:E (ab|a|c|bcd){1,}(d*) ababcd (0,6)(3,6)(6,6) +:HA#281:E (ab|a|c|bcd){1,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go +#:HA#282:E (ab|a|c|bcd){2,}(d*) ababcd (0,6)(3,6)(6,6) +:HA#282:E (ab|a|c|bcd){2,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go +#:HA#283:E (ab|a|c|bcd){3,}(d*) ababcd (0,6)(3,6)(6,6) +:HA#283:E (ab|a|c|bcd){3,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go +:HA#284:E (ab|a|c|bcd){4,}(d*) ababcd NOMATCH +#:HA#285:E (ab|a|c|bcd){0,10}(d*) ababcd (0,6)(3,6)(6,6) +:HA#285:E (ab|a|c|bcd){0,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go +#:HA#286:E (ab|a|c|bcd){1,10}(d*) ababcd (0,6)(3,6)(6,6) +:HA#286:E (ab|a|c|bcd){1,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go +#:HA#287:E (ab|a|c|bcd){2,10}(d*) ababcd (0,6)(3,6)(6,6) +:HA#287:E (ab|a|c|bcd){2,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go +#:HA#288:E (ab|a|c|bcd){3,10}(d*) ababcd (0,6)(3,6)(6,6) +:HA#288:E (ab|a|c|bcd){3,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go +:HA#289:E (ab|a|c|bcd){4,10}(d*) ababcd NOMATCH +#:HA#290:E (ab|a|c|bcd)*(d*) ababcd (0,6)(3,6)(6,6) +:HA#290:E (ab|a|c|bcd)*(d*) ababcd (0,6)(4,5)(5,6) RE2/Go +#:HA#291:E (ab|a|c|bcd)+(d*) ababcd (0,6)(3,6)(6,6) +:HA#291:E (ab|a|c|bcd)+(d*) ababcd (0,6)(4,5)(5,6) RE2/Go diff --git a/vendor/regex-automata/tests/data/fowler/dat/repetition.dat b/vendor/regex-automata/tests/data/fowler/dat/repetition.dat new file mode 100644 index 000000000..2dac0823f --- /dev/null +++ b/vendor/regex-automata/tests/data/fowler/dat/repetition.dat @@ -0,0 +1,83 @@ +NOTE implicit vs. explicit repetitions : 2009-02-02 + +# Glenn Fowler <gsf@research.att.com> +# conforming matches (column 4) must match one of the following BREs +# NOMATCH +# (0,.)\((\(.\),\(.\))(?,?)(\2,\3)\)* +# (0,.)\((\(.\),\(.\))(\2,\3)(?,?)\)* +# i.e., each 3-tuple has two identical elements and one (?,?) + +E ((..)|(.)) NULL NOMATCH +E ((..)|(.))((..)|(.)) NULL NOMATCH +E ((..)|(.))((..)|(.))((..)|(.)) NULL NOMATCH + +E ((..)|(.)){1} NULL NOMATCH +E ((..)|(.)){2} NULL NOMATCH +E ((..)|(.)){3} NULL NOMATCH + +E ((..)|(.))* NULL (0,0) + +E ((..)|(.)) a (0,1)(0,1)(?,?)(0,1) +E ((..)|(.))((..)|(.)) a NOMATCH +E ((..)|(.))((..)|(.))((..)|(.)) a NOMATCH + +E ((..)|(.)){1} a (0,1)(0,1)(?,?)(0,1) +E ((..)|(.)){2} a NOMATCH +E ((..)|(.)){3} a NOMATCH + +E ((..)|(.))* a (0,1)(0,1)(?,?)(0,1) + +E ((..)|(.)) aa (0,2)(0,2)(0,2)(?,?) +E ((..)|(.))((..)|(.)) aa (0,2)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2) +E ((..)|(.))((..)|(.))((..)|(.)) aa NOMATCH + +E ((..)|(.)){1} aa (0,2)(0,2)(0,2)(?,?) +E ((..)|(.)){2} aa (0,2)(1,2)(?,?)(1,2) +E ((..)|(.)){3} aa NOMATCH + +E ((..)|(.))* aa (0,2)(0,2)(0,2)(?,?) + +E ((..)|(.)) aaa (0,2)(0,2)(0,2)(?,?) +E ((..)|(.))((..)|(.)) aaa (0,3)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3) +E ((..)|(.))((..)|(.))((..)|(.)) aaa (0,3)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2)(2,3)(?,?)(2,3) + +E ((..)|(.)){1} aaa (0,2)(0,2)(0,2)(?,?) +#E ((..)|(.)){2} aaa (0,3)(2,3)(?,?)(2,3) +E ((..)|(.)){2} aaa (0,3)(2,3)(0,2)(2,3) RE2/Go +E ((..)|(.)){3} aaa (0,3)(2,3)(?,?)(2,3) + +#E ((..)|(.))* aaa (0,3)(2,3)(?,?)(2,3) +E ((..)|(.))* aaa (0,3)(2,3)(0,2)(2,3) RE2/Go + +E ((..)|(.)) aaaa (0,2)(0,2)(0,2)(?,?) +E ((..)|(.))((..)|(.)) aaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?) +E ((..)|(.))((..)|(.))((..)|(.)) aaaa (0,4)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3)(3,4)(?,?)(3,4) + +E ((..)|(.)){1} aaaa (0,2)(0,2)(0,2)(?,?) +E ((..)|(.)){2} aaaa (0,4)(2,4)(2,4)(?,?) +#E ((..)|(.)){3} aaaa (0,4)(3,4)(?,?)(3,4) +E ((..)|(.)){3} aaaa (0,4)(3,4)(0,2)(3,4) RE2/Go + +E ((..)|(.))* aaaa (0,4)(2,4)(2,4)(?,?) + +E ((..)|(.)) aaaaa (0,2)(0,2)(0,2)(?,?) +E ((..)|(.))((..)|(.)) aaaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?) +E ((..)|(.))((..)|(.))((..)|(.)) aaaaa (0,5)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,5)(?,?)(4,5) + +E ((..)|(.)){1} aaaaa (0,2)(0,2)(0,2)(?,?) +E ((..)|(.)){2} aaaaa (0,4)(2,4)(2,4)(?,?) +#E ((..)|(.)){3} aaaaa (0,5)(4,5)(?,?)(4,5) +E ((..)|(.)){3} aaaaa (0,5)(4,5)(2,4)(4,5) RE2/Go + +#E ((..)|(.))* aaaaa (0,5)(4,5)(?,?)(4,5) +E ((..)|(.))* aaaaa (0,5)(4,5)(2,4)(4,5) RE2/Go + +E ((..)|(.)) aaaaaa (0,2)(0,2)(0,2)(?,?) +E ((..)|(.))((..)|(.)) aaaaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?) +E ((..)|(.))((..)|(.))((..)|(.)) aaaaaa (0,6)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,6)(4,6)(?,?) + +E ((..)|(.)){1} aaaaaa (0,2)(0,2)(0,2)(?,?) +E ((..)|(.)){2} aaaaaa (0,4)(2,4)(2,4)(?,?) +E ((..)|(.)){3} aaaaaa (0,6)(4,6)(4,6)(?,?) + +E ((..)|(.))* aaaaaa (0,6)(4,6)(4,6)(?,?) diff --git a/vendor/regex-automata/tests/data/fowler/nullsubexpr.toml b/vendor/regex-automata/tests/data/fowler/nullsubexpr.toml new file mode 100644 index 000000000..55d1d5b43 --- /dev/null +++ b/vendor/regex-automata/tests/data/fowler/nullsubexpr.toml @@ -0,0 +1,405 @@ +# !!! DO NOT EDIT !!! +# Automatically generated by scripts/fowler-to-toml. +# Numbers in the test names correspond to the line number of the test from +# the original dat file. + +[[tests]] +name = "nullsubexpr3" +regex = '''(a*)*''' +input = '''a''' +captures = [[[0, 1], [0, 1]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "nullsubexpr5" +regex = '''(a*)*''' +input = '''x''' +captures = [[[0, 0], []]] +match_limit = 1 +unescape = true + +[[tests]] +name = "nullsubexpr6" +regex = '''(a*)*''' +input = '''aaaaaa''' +captures = [[[0, 6], [0, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "nullsubexpr7" +regex = '''(a*)*''' +input = '''aaaaaax''' +captures = [[[0, 6], [0, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "nullsubexpr8" +regex = '''(a*)+''' +input = '''a''' +captures = [[[0, 1], [0, 1]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "nullsubexpr9" +regex = '''(a*)+''' +input = '''x''' +captures = [[[0, 0], [0, 0]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "nullsubexpr10" +regex = '''(a*)+''' +input = '''aaaaaa''' +captures = [[[0, 6], [0, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "nullsubexpr11" +regex = '''(a*)+''' +input = '''aaaaaax''' +captures = [[[0, 6], [0, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "nullsubexpr12" +regex = '''(a+)*''' +input = '''a''' +captures = [[[0, 1], [0, 1]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "nullsubexpr13" +regex = '''(a+)*''' +input = '''x''' +captures = [[[0, 0]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "nullsubexpr14" +regex = '''(a+)*''' +input = '''aaaaaa''' +captures = [[[0, 6], [0, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "nullsubexpr15" +regex = '''(a+)*''' +input = '''aaaaaax''' +captures = [[[0, 6], [0, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "nullsubexpr16" +regex = '''(a+)+''' +input = '''a''' +captures = [[[0, 1], [0, 1]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "nullsubexpr17" +regex = '''(a+)+''' +input = '''x''' +captures = [] +match_limit = 1 +unescape = true + +[[tests]] +name = "nullsubexpr18" +regex = '''(a+)+''' +input = '''aaaaaa''' +captures = [[[0, 6], [0, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "nullsubexpr19" +regex = '''(a+)+''' +input = '''aaaaaax''' +captures = [[[0, 6], [0, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "nullsubexpr21" +regex = '''([a]*)*''' +input = '''a''' +captures = [[[0, 1], [0, 1]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "nullsubexpr23" +regex = '''([a]*)*''' +input = '''x''' +captures = [[[0, 0], []]] +match_limit = 1 +unescape = true + +[[tests]] +name = "nullsubexpr24" +regex = '''([a]*)*''' +input = '''aaaaaa''' +captures = [[[0, 6], [0, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "nullsubexpr25" +regex = '''([a]*)*''' +input = '''aaaaaax''' +captures = [[[0, 6], [0, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "nullsubexpr26" +regex = '''([a]*)+''' +input = '''a''' +captures = [[[0, 1], [0, 1]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "nullsubexpr27" +regex = '''([a]*)+''' +input = '''x''' +captures = [[[0, 0], [0, 0]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "nullsubexpr28" +regex = '''([a]*)+''' +input = '''aaaaaa''' +captures = [[[0, 6], [0, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "nullsubexpr29" +regex = '''([a]*)+''' +input = '''aaaaaax''' +captures = [[[0, 6], [0, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "nullsubexpr30" +regex = '''([^b]*)*''' +input = '''a''' +captures = [[[0, 1], [0, 1]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "nullsubexpr32" +regex = '''([^b]*)*''' +input = '''b''' +captures = [[[0, 0], []]] +match_limit = 1 +unescape = true + +[[tests]] +name = "nullsubexpr33" +regex = '''([^b]*)*''' +input = '''aaaaaa''' +captures = [[[0, 6], [0, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "nullsubexpr34" +regex = '''([^b]*)*''' +input = '''aaaaaab''' +captures = [[[0, 6], [0, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "nullsubexpr35" +regex = '''([ab]*)*''' +input = '''a''' +captures = [[[0, 1], [0, 1]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "nullsubexpr36" +regex = '''([ab]*)*''' +input = '''aaaaaa''' +captures = [[[0, 6], [0, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "nullsubexpr37" +regex = '''([ab]*)*''' +input = '''ababab''' +captures = [[[0, 6], [0, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "nullsubexpr38" +regex = '''([ab]*)*''' +input = '''bababa''' +captures = [[[0, 6], [0, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "nullsubexpr39" +regex = '''([ab]*)*''' +input = '''b''' +captures = [[[0, 1], [0, 1]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "nullsubexpr40" +regex = '''([ab]*)*''' +input = '''bbbbbb''' +captures = [[[0, 6], [0, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "nullsubexpr41" +regex = '''([ab]*)*''' +input = '''aaaabcde''' +captures = [[[0, 5], [0, 5]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "nullsubexpr42" +regex = '''([^a]*)*''' +input = '''b''' +captures = [[[0, 1], [0, 1]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "nullsubexpr43" +regex = '''([^a]*)*''' +input = '''bbbbbb''' +captures = [[[0, 6], [0, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "nullsubexpr45" +regex = '''([^a]*)*''' +input = '''aaaaaa''' +captures = [[[0, 0], []]] +match_limit = 1 +unescape = true + +[[tests]] +name = "nullsubexpr46" +regex = '''([^ab]*)*''' +input = '''ccccxx''' +captures = [[[0, 6], [0, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "nullsubexpr48" +regex = '''([^ab]*)*''' +input = '''ababab''' +captures = [[[0, 0], []]] +match_limit = 1 +unescape = true + +[[tests]] +name = "nullsubexpr50" +regex = '''((z)+|a)*''' +input = '''zabcde''' +captures = [[[0, 2], [1, 2]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "nullsubexpr69" +regex = '''(a*)*(x)''' +input = '''x''' +captures = [[[0, 1], [], [0, 1]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "nullsubexpr70" +regex = '''(a*)*(x)''' +input = '''ax''' +captures = [[[0, 2], [0, 1], [1, 2]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "nullsubexpr71" +regex = '''(a*)*(x)''' +input = '''axa''' +captures = [[[0, 2], [0, 1], [1, 2]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "nullsubexpr73" +regex = '''(a*)+(x)''' +input = '''x''' +captures = [[[0, 1], [0, 0], [0, 1]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "nullsubexpr74" +regex = '''(a*)+(x)''' +input = '''ax''' +captures = [[[0, 2], [0, 1], [1, 2]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "nullsubexpr75" +regex = '''(a*)+(x)''' +input = '''axa''' +captures = [[[0, 2], [0, 1], [1, 2]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "nullsubexpr77" +regex = '''(a*){2}(x)''' +input = '''x''' +captures = [[[0, 1], [0, 0], [0, 1]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "nullsubexpr78" +regex = '''(a*){2}(x)''' +input = '''ax''' +captures = [[[0, 2], [1, 1], [1, 2]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "nullsubexpr79" +regex = '''(a*){2}(x)''' +input = '''axa''' +captures = [[[0, 2], [1, 1], [1, 2]]] +match_limit = 1 +unescape = true + diff --git a/vendor/regex-automata/tests/data/fowler/repetition-expensive.toml b/vendor/regex-automata/tests/data/fowler/repetition-expensive.toml new file mode 100644 index 000000000..81a896452 --- /dev/null +++ b/vendor/regex-automata/tests/data/fowler/repetition-expensive.toml @@ -0,0 +1,341 @@ +# !!! DO NOT EDIT !!! +# Automatically generated by scripts/fowler-to-toml. +# Numbers in the test names correspond to the line number of the test from +# the original dat file. + +[[tests]] +name = "repetition-expensive12" +regex = '''X(.?){0,}Y''' +input = '''X1234567Y''' +captures = [[[0, 9], [7, 8]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-expensive13" +regex = '''X(.?){1,}Y''' +input = '''X1234567Y''' +captures = [[[0, 9], [7, 8]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-expensive14" +regex = '''X(.?){2,}Y''' +input = '''X1234567Y''' +captures = [[[0, 9], [7, 8]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-expensive15" +regex = '''X(.?){3,}Y''' +input = '''X1234567Y''' +captures = [[[0, 9], [7, 8]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-expensive16" +regex = '''X(.?){4,}Y''' +input = '''X1234567Y''' +captures = [[[0, 9], [7, 8]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-expensive17" +regex = '''X(.?){5,}Y''' +input = '''X1234567Y''' +captures = [[[0, 9], [7, 8]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-expensive18" +regex = '''X(.?){6,}Y''' +input = '''X1234567Y''' +captures = [[[0, 9], [7, 8]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-expensive19" +regex = '''X(.?){7,}Y''' +input = '''X1234567Y''' +captures = [[[0, 9], [7, 8]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-expensive20" +regex = '''X(.?){8,}Y''' +input = '''X1234567Y''' +captures = [[[0, 9], [8, 8]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-expensive22" +regex = '''X(.?){0,8}Y''' +input = '''X1234567Y''' +captures = [[[0, 9], [8, 8]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-expensive24" +regex = '''X(.?){1,8}Y''' +input = '''X1234567Y''' +captures = [[[0, 9], [8, 8]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-expensive26" +regex = '''X(.?){2,8}Y''' +input = '''X1234567Y''' +captures = [[[0, 9], [8, 8]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-expensive28" +regex = '''X(.?){3,8}Y''' +input = '''X1234567Y''' +captures = [[[0, 9], [8, 8]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-expensive30" +regex = '''X(.?){4,8}Y''' +input = '''X1234567Y''' +captures = [[[0, 9], [8, 8]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-expensive32" +regex = '''X(.?){5,8}Y''' +input = '''X1234567Y''' +captures = [[[0, 9], [8, 8]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-expensive34" +regex = '''X(.?){6,8}Y''' +input = '''X1234567Y''' +captures = [[[0, 9], [8, 8]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-expensive36" +regex = '''X(.?){7,8}Y''' +input = '''X1234567Y''' +captures = [[[0, 9], [8, 8]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-expensive37" +regex = '''X(.?){8,8}Y''' +input = '''X1234567Y''' +captures = [[[0, 9], [8, 8]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-expensive48" +regex = '''(a|ab|c|bcd){0,}(d*)''' +input = '''ababcd''' +captures = [[[0, 1], [0, 1], [1, 1]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-expensive49" +regex = '''(a|ab|c|bcd){1,}(d*)''' +input = '''ababcd''' +captures = [[[0, 1], [0, 1], [1, 1]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-expensive50" +regex = '''(a|ab|c|bcd){2,}(d*)''' +input = '''ababcd''' +captures = [[[0, 6], [3, 6], [6, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-expensive51" +regex = '''(a|ab|c|bcd){3,}(d*)''' +input = '''ababcd''' +captures = [[[0, 6], [3, 6], [6, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-expensive52" +regex = '''(a|ab|c|bcd){4,}(d*)''' +input = '''ababcd''' +captures = [] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-expensive53" +regex = '''(a|ab|c|bcd){0,10}(d*)''' +input = '''ababcd''' +captures = [[[0, 1], [0, 1], [1, 1]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-expensive54" +regex = '''(a|ab|c|bcd){1,10}(d*)''' +input = '''ababcd''' +captures = [[[0, 1], [0, 1], [1, 1]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-expensive55" +regex = '''(a|ab|c|bcd){2,10}(d*)''' +input = '''ababcd''' +captures = [[[0, 6], [3, 6], [6, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-expensive56" +regex = '''(a|ab|c|bcd){3,10}(d*)''' +input = '''ababcd''' +captures = [[[0, 6], [3, 6], [6, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-expensive57" +regex = '''(a|ab|c|bcd){4,10}(d*)''' +input = '''ababcd''' +captures = [] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-expensive58" +regex = '''(a|ab|c|bcd)*(d*)''' +input = '''ababcd''' +captures = [[[0, 1], [0, 1], [1, 1]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-expensive59" +regex = '''(a|ab|c|bcd)+(d*)''' +input = '''ababcd''' +captures = [[[0, 1], [0, 1], [1, 1]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-expensive65" +regex = '''(ab|a|c|bcd){0,}(d*)''' +input = '''ababcd''' +captures = [[[0, 6], [4, 5], [5, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-expensive67" +regex = '''(ab|a|c|bcd){1,}(d*)''' +input = '''ababcd''' +captures = [[[0, 6], [4, 5], [5, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-expensive69" +regex = '''(ab|a|c|bcd){2,}(d*)''' +input = '''ababcd''' +captures = [[[0, 6], [4, 5], [5, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-expensive71" +regex = '''(ab|a|c|bcd){3,}(d*)''' +input = '''ababcd''' +captures = [[[0, 6], [4, 5], [5, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-expensive72" +regex = '''(ab|a|c|bcd){4,}(d*)''' +input = '''ababcd''' +captures = [] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-expensive74" +regex = '''(ab|a|c|bcd){0,10}(d*)''' +input = '''ababcd''' +captures = [[[0, 6], [4, 5], [5, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-expensive76" +regex = '''(ab|a|c|bcd){1,10}(d*)''' +input = '''ababcd''' +captures = [[[0, 6], [4, 5], [5, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-expensive78" +regex = '''(ab|a|c|bcd){2,10}(d*)''' +input = '''ababcd''' +captures = [[[0, 6], [4, 5], [5, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-expensive80" +regex = '''(ab|a|c|bcd){3,10}(d*)''' +input = '''ababcd''' +captures = [[[0, 6], [4, 5], [5, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-expensive81" +regex = '''(ab|a|c|bcd){4,10}(d*)''' +input = '''ababcd''' +captures = [] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-expensive83" +regex = '''(ab|a|c|bcd)*(d*)''' +input = '''ababcd''' +captures = [[[0, 6], [4, 5], [5, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-expensive85" +regex = '''(ab|a|c|bcd)+(d*)''' +input = '''ababcd''' +captures = [[[0, 6], [4, 5], [5, 6]]] +match_limit = 1 +unescape = true + diff --git a/vendor/regex-automata/tests/data/fowler/repetition-long.toml b/vendor/regex-automata/tests/data/fowler/repetition-long.toml new file mode 100644 index 000000000..fa24c834a --- /dev/null +++ b/vendor/regex-automata/tests/data/fowler/repetition-long.toml @@ -0,0 +1,341 @@ +# !!! DO NOT EDIT !!! +# Automatically generated by scripts/fowler-to-toml. +# Numbers in the test names correspond to the line number of the test from +# the original dat file. + +[[tests]] +name = "repetition-long12" +regex = '''X(.?){0,}Y''' +input = '''X1234567Y''' +captures = [[[0, 9], [7, 8]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-long13" +regex = '''X(.?){1,}Y''' +input = '''X1234567Y''' +captures = [[[0, 9], [7, 8]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-long14" +regex = '''X(.?){2,}Y''' +input = '''X1234567Y''' +captures = [[[0, 9], [7, 8]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-long15" +regex = '''X(.?){3,}Y''' +input = '''X1234567Y''' +captures = [[[0, 9], [7, 8]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-long16" +regex = '''X(.?){4,}Y''' +input = '''X1234567Y''' +captures = [[[0, 9], [7, 8]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-long17" +regex = '''X(.?){5,}Y''' +input = '''X1234567Y''' +captures = [[[0, 9], [7, 8]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-long18" +regex = '''X(.?){6,}Y''' +input = '''X1234567Y''' +captures = [[[0, 9], [7, 8]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-long19" +regex = '''X(.?){7,}Y''' +input = '''X1234567Y''' +captures = [[[0, 9], [7, 8]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-long20" +regex = '''X(.?){8,}Y''' +input = '''X1234567Y''' +captures = [[[0, 9], [8, 8]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-long22" +regex = '''X(.?){0,8}Y''' +input = '''X1234567Y''' +captures = [[[0, 9], [8, 8]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-long24" +regex = '''X(.?){1,8}Y''' +input = '''X1234567Y''' +captures = [[[0, 9], [8, 8]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-long26" +regex = '''X(.?){2,8}Y''' +input = '''X1234567Y''' +captures = [[[0, 9], [8, 8]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-long28" +regex = '''X(.?){3,8}Y''' +input = '''X1234567Y''' +captures = [[[0, 9], [8, 8]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-long30" +regex = '''X(.?){4,8}Y''' +input = '''X1234567Y''' +captures = [[[0, 9], [8, 8]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-long32" +regex = '''X(.?){5,8}Y''' +input = '''X1234567Y''' +captures = [[[0, 9], [8, 8]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-long34" +regex = '''X(.?){6,8}Y''' +input = '''X1234567Y''' +captures = [[[0, 9], [8, 8]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-long36" +regex = '''X(.?){7,8}Y''' +input = '''X1234567Y''' +captures = [[[0, 9], [8, 8]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-long37" +regex = '''X(.?){8,8}Y''' +input = '''X1234567Y''' +captures = [[[0, 9], [8, 8]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-long48" +regex = '''(a|ab|c|bcd){0,}(d*)''' +input = '''ababcd''' +captures = [[[0, 1], [0, 1], [1, 1]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-long49" +regex = '''(a|ab|c|bcd){1,}(d*)''' +input = '''ababcd''' +captures = [[[0, 1], [0, 1], [1, 1]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-long50" +regex = '''(a|ab|c|bcd){2,}(d*)''' +input = '''ababcd''' +captures = [[[0, 6], [3, 6], [6, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-long51" +regex = '''(a|ab|c|bcd){3,}(d*)''' +input = '''ababcd''' +captures = [[[0, 6], [3, 6], [6, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-long52" +regex = '''(a|ab|c|bcd){4,}(d*)''' +input = '''ababcd''' +captures = [] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-long53" +regex = '''(a|ab|c|bcd){0,10}(d*)''' +input = '''ababcd''' +captures = [[[0, 1], [0, 1], [1, 1]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-long54" +regex = '''(a|ab|c|bcd){1,10}(d*)''' +input = '''ababcd''' +captures = [[[0, 1], [0, 1], [1, 1]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-long55" +regex = '''(a|ab|c|bcd){2,10}(d*)''' +input = '''ababcd''' +captures = [[[0, 6], [3, 6], [6, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-long56" +regex = '''(a|ab|c|bcd){3,10}(d*)''' +input = '''ababcd''' +captures = [[[0, 6], [3, 6], [6, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-long57" +regex = '''(a|ab|c|bcd){4,10}(d*)''' +input = '''ababcd''' +captures = [] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-long58" +regex = '''(a|ab|c|bcd)*(d*)''' +input = '''ababcd''' +captures = [[[0, 1], [0, 1], [1, 1]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-long59" +regex = '''(a|ab|c|bcd)+(d*)''' +input = '''ababcd''' +captures = [[[0, 1], [0, 1], [1, 1]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-long65" +regex = '''(ab|a|c|bcd){0,}(d*)''' +input = '''ababcd''' +captures = [[[0, 6], [4, 5], [5, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-long67" +regex = '''(ab|a|c|bcd){1,}(d*)''' +input = '''ababcd''' +captures = [[[0, 6], [4, 5], [5, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-long69" +regex = '''(ab|a|c|bcd){2,}(d*)''' +input = '''ababcd''' +captures = [[[0, 6], [4, 5], [5, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-long71" +regex = '''(ab|a|c|bcd){3,}(d*)''' +input = '''ababcd''' +captures = [[[0, 6], [4, 5], [5, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-long72" +regex = '''(ab|a|c|bcd){4,}(d*)''' +input = '''ababcd''' +captures = [] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-long74" +regex = '''(ab|a|c|bcd){0,10}(d*)''' +input = '''ababcd''' +captures = [[[0, 6], [4, 5], [5, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-long76" +regex = '''(ab|a|c|bcd){1,10}(d*)''' +input = '''ababcd''' +captures = [[[0, 6], [4, 5], [5, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-long78" +regex = '''(ab|a|c|bcd){2,10}(d*)''' +input = '''ababcd''' +captures = [[[0, 6], [4, 5], [5, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-long80" +regex = '''(ab|a|c|bcd){3,10}(d*)''' +input = '''ababcd''' +captures = [[[0, 6], [4, 5], [5, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-long81" +regex = '''(ab|a|c|bcd){4,10}(d*)''' +input = '''ababcd''' +captures = [] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-long83" +regex = '''(ab|a|c|bcd)*(d*)''' +input = '''ababcd''' +captures = [[[0, 6], [4, 5], [5, 6]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition-long85" +regex = '''(ab|a|c|bcd)+(d*)''' +input = '''ababcd''' +captures = [[[0, 6], [4, 5], [5, 6]]] +match_limit = 1 +unescape = true + diff --git a/vendor/regex-automata/tests/data/fowler/repetition.toml b/vendor/regex-automata/tests/data/fowler/repetition.toml new file mode 100644 index 000000000..fc8da8df4 --- /dev/null +++ b/vendor/regex-automata/tests/data/fowler/repetition.toml @@ -0,0 +1,397 @@ +# !!! DO NOT EDIT !!! +# Automatically generated by scripts/fowler-to-toml. +# Numbers in the test names correspond to the line number of the test from +# the original dat file. + +[[tests]] +name = "repetition10" +regex = '''((..)|(.))''' +input = '''''' +captures = [] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition11" +regex = '''((..)|(.))((..)|(.))''' +input = '''''' +captures = [] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition12" +regex = '''((..)|(.))((..)|(.))((..)|(.))''' +input = '''''' +captures = [] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition14" +regex = '''((..)|(.)){1}''' +input = '''''' +captures = [] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition15" +regex = '''((..)|(.)){2}''' +input = '''''' +captures = [] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition16" +regex = '''((..)|(.)){3}''' +input = '''''' +captures = [] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition18" +regex = '''((..)|(.))*''' +input = '''''' +captures = [[[0, 0]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition20" +regex = '''((..)|(.))''' +input = '''a''' +captures = [[[0, 1], [0, 1], [], [0, 1]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition21" +regex = '''((..)|(.))((..)|(.))''' +input = '''a''' +captures = [] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition22" +regex = '''((..)|(.))((..)|(.))((..)|(.))''' +input = '''a''' +captures = [] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition24" +regex = '''((..)|(.)){1}''' +input = '''a''' +captures = [[[0, 1], [0, 1], [], [0, 1]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition25" +regex = '''((..)|(.)){2}''' +input = '''a''' +captures = [] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition26" +regex = '''((..)|(.)){3}''' +input = '''a''' +captures = [] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition28" +regex = '''((..)|(.))*''' +input = '''a''' +captures = [[[0, 1], [0, 1], [], [0, 1]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition30" +regex = '''((..)|(.))''' +input = '''aa''' +captures = [[[0, 2], [0, 2], [0, 2], []]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition31" +regex = '''((..)|(.))((..)|(.))''' +input = '''aa''' +captures = [[[0, 2], [0, 1], [], [0, 1], [1, 2], [], [1, 2]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition32" +regex = '''((..)|(.))((..)|(.))((..)|(.))''' +input = '''aa''' +captures = [] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition34" +regex = '''((..)|(.)){1}''' +input = '''aa''' +captures = [[[0, 2], [0, 2], [0, 2], []]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition35" +regex = '''((..)|(.)){2}''' +input = '''aa''' +captures = [[[0, 2], [1, 2], [], [1, 2]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition36" +regex = '''((..)|(.)){3}''' +input = '''aa''' +captures = [] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition38" +regex = '''((..)|(.))*''' +input = '''aa''' +captures = [[[0, 2], [0, 2], [0, 2], []]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition40" +regex = '''((..)|(.))''' +input = '''aaa''' +captures = [[[0, 2], [0, 2], [0, 2], []]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition41" +regex = '''((..)|(.))((..)|(.))''' +input = '''aaa''' +captures = [[[0, 3], [0, 2], [0, 2], [], [2, 3], [], [2, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition42" +regex = '''((..)|(.))((..)|(.))((..)|(.))''' +input = '''aaa''' +captures = [[[0, 3], [0, 1], [], [0, 1], [1, 2], [], [1, 2], [2, 3], [], [2, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition44" +regex = '''((..)|(.)){1}''' +input = '''aaa''' +captures = [[[0, 2], [0, 2], [0, 2], []]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition46" +regex = '''((..)|(.)){2}''' +input = '''aaa''' +captures = [[[0, 3], [2, 3], [0, 2], [2, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition47" +regex = '''((..)|(.)){3}''' +input = '''aaa''' +captures = [[[0, 3], [2, 3], [], [2, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition50" +regex = '''((..)|(.))*''' +input = '''aaa''' +captures = [[[0, 3], [2, 3], [0, 2], [2, 3]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition52" +regex = '''((..)|(.))''' +input = '''aaaa''' +captures = [[[0, 2], [0, 2], [0, 2], []]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition53" +regex = '''((..)|(.))((..)|(.))''' +input = '''aaaa''' +captures = [[[0, 4], [0, 2], [0, 2], [], [2, 4], [2, 4], []]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition54" +regex = '''((..)|(.))((..)|(.))((..)|(.))''' +input = '''aaaa''' +captures = [[[0, 4], [0, 2], [0, 2], [], [2, 3], [], [2, 3], [3, 4], [], [3, 4]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition56" +regex = '''((..)|(.)){1}''' +input = '''aaaa''' +captures = [[[0, 2], [0, 2], [0, 2], []]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition57" +regex = '''((..)|(.)){2}''' +input = '''aaaa''' +captures = [[[0, 4], [2, 4], [2, 4], []]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition59" +regex = '''((..)|(.)){3}''' +input = '''aaaa''' +captures = [[[0, 4], [3, 4], [0, 2], [3, 4]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition61" +regex = '''((..)|(.))*''' +input = '''aaaa''' +captures = [[[0, 4], [2, 4], [2, 4], []]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition63" +regex = '''((..)|(.))''' +input = '''aaaaa''' +captures = [[[0, 2], [0, 2], [0, 2], []]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition64" +regex = '''((..)|(.))((..)|(.))''' +input = '''aaaaa''' +captures = [[[0, 4], [0, 2], [0, 2], [], [2, 4], [2, 4], []]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition65" +regex = '''((..)|(.))((..)|(.))((..)|(.))''' +input = '''aaaaa''' +captures = [[[0, 5], [0, 2], [0, 2], [], [2, 4], [2, 4], [], [4, 5], [], [4, 5]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition67" +regex = '''((..)|(.)){1}''' +input = '''aaaaa''' +captures = [[[0, 2], [0, 2], [0, 2], []]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition68" +regex = '''((..)|(.)){2}''' +input = '''aaaaa''' +captures = [[[0, 4], [2, 4], [2, 4], []]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition70" +regex = '''((..)|(.)){3}''' +input = '''aaaaa''' +captures = [[[0, 5], [4, 5], [2, 4], [4, 5]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition73" +regex = '''((..)|(.))*''' +input = '''aaaaa''' +captures = [[[0, 5], [4, 5], [2, 4], [4, 5]]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition75" +regex = '''((..)|(.))''' +input = '''aaaaaa''' +captures = [[[0, 2], [0, 2], [0, 2], []]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition76" +regex = '''((..)|(.))((..)|(.))''' +input = '''aaaaaa''' +captures = [[[0, 4], [0, 2], [0, 2], [], [2, 4], [2, 4], []]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition77" +regex = '''((..)|(.))((..)|(.))((..)|(.))''' +input = '''aaaaaa''' +captures = [[[0, 6], [0, 2], [0, 2], [], [2, 4], [2, 4], [], [4, 6], [4, 6], []]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition79" +regex = '''((..)|(.)){1}''' +input = '''aaaaaa''' +captures = [[[0, 2], [0, 2], [0, 2], []]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition80" +regex = '''((..)|(.)){2}''' +input = '''aaaaaa''' +captures = [[[0, 4], [2, 4], [2, 4], []]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition81" +regex = '''((..)|(.)){3}''' +input = '''aaaaaa''' +captures = [[[0, 6], [4, 6], [4, 6], []]] +match_limit = 1 +unescape = true + +[[tests]] +name = "repetition83" +regex = '''((..)|(.))*''' +input = '''aaaaaa''' +captures = [[[0, 6], [4, 6], [4, 6], []]] +match_limit = 1 +unescape = true + diff --git a/vendor/regex-automata/tests/data/iter.toml b/vendor/regex-automata/tests/data/iter.toml new file mode 100644 index 000000000..6c0539fd4 --- /dev/null +++ b/vendor/regex-automata/tests/data/iter.toml @@ -0,0 +1,119 @@ +[[tests]] +name = "1" +regex = "a" +input = "aaa" +matches = [[0, 1], [1, 2], [2, 3]] + +[[tests]] +name = "2" +regex = "a" +input = "aba" +matches = [[0, 1], [2, 3]] + +[[tests]] +name = "empty1" +regex = '' +input = '' +matches = [[0, 0]] + +[[tests]] +name = "empty2" +regex = '' +input = 'abc' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[tests]] +name = "empty3" +regex = '()' +input = 'abc' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[tests]] +name = "empty4" +regex = '()*' +input = 'abc' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[tests]] +name = "empty5" +regex = '()+' +input = 'abc' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[tests]] +name = "empty6" +regex = '()?' +input = 'abc' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[tests]] +name = "empty7" +regex = '()()' +input = 'abc' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[tests]] +name = "empty8" +regex = '()+|z' +input = 'abc' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[tests]] +name = "empty9" +regex = 'z|()+' +input = 'abc' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[tests]] +name = "empty10" +regex = '()+|b' +input = 'abc' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[tests]] +name = "empty11" +regex = 'b|()+' +input = 'abc' +matches = [[0, 0], [1, 2], [3, 3]] + +[[tests]] +name = "start1" +regex = "^a" +input = "a" +matches = [[0, 1]] + +[[tests]] +name = "start2" +regex = "^a" +input = "aa" +matches = [[0, 1]] + +[[tests]] +name = "anchored1" +regex = "a" +input = "a" +matches = [[0, 1]] +anchored = true + +# This test is pretty subtle. It demonstrates the crucial difference between +# '^a' and 'a' compiled in 'anchored' mode. The former regex exclusively +# matches at the start of a haystack and nowhere else. The latter regex has +# no such restriction, but its automaton is constructed such that it lacks a +# `.*?` prefix. So it can actually produce matches at multiple locations. +# The anchored3 test drives this point home. +[[tests]] +name = "anchored2" +regex = "a" +input = "aa" +matches = [[0, 1], [1, 2]] +anchored = true + +# Unlikely anchored2, this test stops matching anything after it sees `b` +# since it lacks a `.*?` prefix. Since it is looking for 'a' but sees 'b', it +# determines that there are no remaining matches. +[[tests]] +name = "anchored3" +regex = "a" +input = "aaba" +matches = [[0, 1], [1, 2]] +anchored = true diff --git a/vendor/regex-automata/tests/data/misc.toml b/vendor/regex-automata/tests/data/misc.toml new file mode 100644 index 000000000..c05418dd6 --- /dev/null +++ b/vendor/regex-automata/tests/data/misc.toml @@ -0,0 +1,99 @@ +[[tests]] +name = "ascii-literal" +regex = "a" +input = "a" +matches = [[0, 1]] + +[[tests]] +name = "ascii-literal-not" +regex = "a" +input = "z" +matches = [] + +[[tests]] +name = "ascii-literal-anchored" +regex = "a" +input = "a" +matches = [[0, 1]] +anchored = true + +[[tests]] +name = "ascii-literal-anchored-not" +regex = "a" +input = "z" +matches = [] +anchored = true + +[[tests]] +name = "anchor-start-end-line" +regex = '(?m)^bar$' +input = "foo\nbar\nbaz" +matches = [[4, 7]] + +[[tests]] +name = "prefix-literal-match" +regex = '^abc' +input = "abc" +matches = [[0, 3]] + +[[tests]] +name = "prefix-literal-match-ascii" +regex = '^abc' +input = "abc" +matches = [[0, 3]] +unicode = false +utf8 = false + +[[tests]] +name = "prefix-literal-no-match" +regex = '^abc' +input = "zabc" +matches = [] + +[[tests]] +name = "one-literal-edge" +regex = 'abc' +input = "xxxxxab" +matches = [] + +[[tests]] +name = "terminates" +regex = 'a$' +input = "a" +matches = [[0, 1]] + +[[tests]] +name = "suffix-100" +regex = '.*abcd' +input = "abcd" +matches = [[0, 4]] + +[[tests]] +name = "suffix-200" +regex = '.*(?:abcd)+' +input = "abcd" +matches = [[0, 4]] + +[[tests]] +name = "suffix-300" +regex = '.*(?:abcd)+' +input = "abcdabcd" +matches = [[0, 8]] + +[[tests]] +name = "suffix-400" +regex = '.*(?:abcd)+' +input = "abcdxabcd" +matches = [[0, 9]] + +[[tests]] +name = "suffix-500" +regex = '.*x(?:abcd)+' +input = "abcdxabcd" +matches = [[0, 9]] + +[[tests]] +name = "suffix-600" +regex = '[^abcd]*x(?:abcd)+' +input = "abcdxabcd" +matches = [[4, 9]] diff --git a/vendor/regex-automata/tests/data/multiline.toml b/vendor/regex-automata/tests/data/multiline.toml new file mode 100644 index 000000000..cefdb2629 --- /dev/null +++ b/vendor/regex-automata/tests/data/multiline.toml @@ -0,0 +1,275 @@ +[[tests]] +name = "basic1" +regex = '(?m)^[a-z]+$' +input = "abc\ndef\nxyz" +matches = [[0, 3], [4, 7], [8, 11]] + +[[tests]] +name = "basic2" +regex = '(?m)^$' +input = "abc\ndef\nxyz" +matches = [] + +[[tests]] +name = "basic3" +regex = '(?m)^' +input = "abc\ndef\nxyz" +matches = [[0, 0], [4, 4], [8, 8]] + +[[tests]] +name = "basic4" +regex = '(?m)$' +input = "abc\ndef\nxyz" +matches = [[3, 3], [7, 7], [11, 11]] + +[[tests]] +name = "basic5" +regex = '(?m)^[a-z]' +input = "abc\ndef\nxyz" +matches = [[0, 1], [4, 5], [8, 9]] + +[[tests]] +name = "basic6" +regex = '(?m)[a-z]^' +input = "abc\ndef\nxyz" +matches = [] + +[[tests]] +name = "basic7" +regex = '(?m)[a-z]$' +input = "abc\ndef\nxyz" +matches = [[2, 3], [6, 7], [10, 11]] + +[[tests]] +name = "basic8" +regex = '(?m)$[a-z]' +input = "abc\ndef\nxyz" +matches = [] + +[[tests]] +name = "basic9" +regex = '(?m)^$' +input = "" +matches = [[0, 0]] + +[[tests]] +name = "repeat1" +regex = '(?m)(?:^$)*' +input = "a\nb\nc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] + +[[tests]] +name = "repeat1-no-multi" +regex = '(?:^$)*' +input = "a\nb\nc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] + +[[tests]] +name = "repeat2" +regex = '(?m)(?:^|a)+' +input = "a\naaa\n" +matches = [[0, 0], [2, 2], [3, 5], [6, 6]] + +[[tests]] +name = "repeat100" +regex = '(?m)(?:^|a)+' +input = "a\naaa\n" +matches = [[0, 0], [2, 2], [3, 5], [6, 6]] + +[[tests]] +name = "repeat2-no-multi" +regex = '(?:^|a)+' +input = "a\naaa\n" +matches = [[0, 0], [2, 5]] + +[[tests]] +name = "repeat3" +regex = '(?m)(?:^|a)*' +input = "a\naaa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]] + +[[tests]] +name = "repeat3-no-multi" +regex = '(?:^|a)*' +input = "a\naaa\n" +matches = [[0, 0], [1, 1], [2, 5], [6, 6]] + +[[tests]] +name = "repeat4" +regex = '(?m)(?:^|a+)' +input = "a\naaa\n" +matches = [[0, 0], [2, 2], [3, 5], [6, 6]] + +[[tests]] +name = "repeat4-no-multi" +regex = '(?:^|a+)' +input = "a\naaa\n" +matches = [[0, 0], [2, 5]] + +[[tests]] +name = "repeat5" +regex = '(?m)(?:^|a*)' +input = "a\naaa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]] + +[[tests]] +name = "repeat5-no-multi" +regex = '(?:^|a*)' +input = "a\naaa\n" +matches = [[0, 0], [1, 1], [2, 5], [6, 6]] + +[[tests]] +name = "repeat6" +regex = '(?m)(?:^[a-z])+' +input = "abc\ndef\nxyz" +matches = [[0, 1], [4, 5], [8, 9]] + +[[tests]] +name = "repeat6-no-multi" +regex = '(?:^[a-z])+' +input = "abc\ndef\nxyz" +matches = [[0, 1]] + +[[tests]] +name = "repeat7" +regex = '(?m)(?:^[a-z]{3}\n?)+' +input = "abc\ndef\nxyz" +matches = [[0, 11]] + +[[tests]] +name = "repeat7-no-multi" +regex = '(?:^[a-z]{3}\n?)+' +input = "abc\ndef\nxyz" +matches = [[0, 4]] + +[[tests]] +name = "repeat8" +regex = '(?m)(?:^[a-z]{3}\n?)*' +input = "abc\ndef\nxyz" +matches = [[0, 11]] + +[[tests]] +name = "repeat8-no-multi" +regex = '(?:^[a-z]{3}\n?)*' +input = "abc\ndef\nxyz" +matches = [[0, 4], [5, 5], [6, 6], [7, 7], [8, 8], [9, 9], [10, 10], [11, 11]] + +[[tests]] +name = "repeat9" +regex = '(?m)(?:\n?[a-z]{3}$)+' +input = "abc\ndef\nxyz" +matches = [[0, 11]] + +[[tests]] +name = "repeat9-no-multi" +regex = '(?:\n?[a-z]{3}$)+' +input = "abc\ndef\nxyz" +matches = [[7, 11]] + +[[tests]] +name = "repeat10" +regex = '(?m)(?:\n?[a-z]{3}$)*' +input = "abc\ndef\nxyz" +matches = [[0, 11]] + +[[tests]] +name = "repeat10-no-multi" +regex = '(?:\n?[a-z]{3}$)*' +input = "abc\ndef\nxyz" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 11]] + +[[tests]] +name = "repeat11" +regex = '(?m)^*' +input = "\naa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[tests]] +name = "repeat11-no-multi" +regex = '^*' +input = "\naa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[tests]] +name = "repeat12" +regex = '(?m)^+' +input = "\naa\n" +matches = [[0, 0], [1, 1], [4, 4]] + +[[tests]] +name = "repeat12-no-multi" +regex = '^+' +input = "\naa\n" +matches = [[0, 0]] + +[[tests]] +name = "repeat13" +regex = '(?m)$*' +input = "\naa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[tests]] +name = "repeat13-no-multi" +regex = '$*' +input = "\naa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[tests]] +name = "repeat14" +regex = '(?m)$+' +input = "\naa\n" +matches = [[0, 0], [3, 3], [4, 4]] + +[[tests]] +name = "repeat14-no-multi" +regex = '$+' +input = "\naa\n" +matches = [[4, 4]] + +[[tests]] +name = "repeat15" +regex = '(?m)(?:$\n)+' +input = "\n\naaa\n\n" +matches = [[0, 2], [5, 7]] + +[[tests]] +name = "repeat15-no-multi" +regex = '(?:$\n)+' +input = "\n\naaa\n\n" +matches = [] + +[[tests]] +name = "repeat16" +regex = '(?m)(?:$\n)*' +input = "\n\naaa\n\n" +matches = [[0, 2], [3, 3], [4, 4], [5, 7]] + +[[tests]] +name = "repeat16-no-multi" +regex = '(?:$\n)*' +input = "\n\naaa\n\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 7]] + +[[tests]] +name = "repeat17" +regex = '(?m)(?:$\n^)+' +input = "\n\naaa\n\n" +matches = [[0, 2], [5, 7]] + +[[tests]] +name = "repeat17-no-multi" +regex = '(?:$\n^)+' +input = "\n\naaa\n\n" +matches = [] + +[[tests]] +name = "repeat18" +regex = '(?m)(?:^|$)+' +input = "\n\naaa\n\n" +matches = [[0, 0], [1, 1], [2, 2], [5, 5], [6, 6], [7, 7]] + +[[tests]] +name = "repeat18-no-multi" +regex = '(?:^|$)+' +input = "\n\naaa\n\n" +matches = [[0, 0], [7, 7]] diff --git a/vendor/regex-automata/tests/data/no-unicode.toml b/vendor/regex-automata/tests/data/no-unicode.toml new file mode 100644 index 000000000..c7fc9664f --- /dev/null +++ b/vendor/regex-automata/tests/data/no-unicode.toml @@ -0,0 +1,158 @@ +[[tests]] +name = "invalid-utf8-literal1" +regex = '\xFF' +input = '\xFF' +matches = [[0, 1]] +unicode = false +utf8 = false +unescape = true + + +[[tests]] +name = "mixed" +regex = '(.+)(?-u)(.+)' +input = '\xCE\x93\xCE\x94\xFF' +matches = [[0, 5]] +utf8 = false +unescape = true + + +[[tests]] +name = "case1" +regex = "a" +input = "A" +matches = [[0, 1]] +case_insensitive = true +unicode = false + +[[tests]] +name = "case2" +regex = "[a-z]+" +input = "AaAaA" +matches = [[0, 5]] +case_insensitive = true +unicode = false + +[[tests]] +name = "case3" +regex = "[a-z]+" +input = "aA\u212AaA" +matches = [[0, 7]] +case_insensitive = true + +[[tests]] +name = "case4" +regex = "[a-z]+" +input = "aA\u212AaA" +matches = [[0, 2], [5, 7]] +case_insensitive = true +unicode = false + + +[[tests]] +name = "negate1" +regex = "[^a]" +input = "δ" +matches = [[0, 2]] + +[[tests]] +name = "negate2" +regex = "[^a]" +input = "δ" +matches = [[0, 1], [1, 2]] +unicode = false +utf8 = false + + +[[tests]] +name = "dotstar-prefix1" +regex = "a" +input = '\xFFa' +matches = [[1, 2]] +unicode = false +utf8 = false +unescape = true + +[[tests]] +name = "dotstar-prefix2" +regex = "a" +input = '\xFFa' +matches = [[1, 2]] +utf8 = false +unescape = true + + +[[tests]] +name = "null-bytes1" +regex = '[^\x00]+\x00' +input = 'foo\x00' +matches = [[0, 4]] +unicode = false +utf8 = false +unescape = true + + +[[tests]] +name = "word-ascii" +regex = '\w+' +input = "aδ" +matches = [[0, 1]] +unicode = false + +[[tests]] +name = "word-unicode" +regex = '\w+' +input = "aδ" +matches = [[0, 3]] + +[[tests]] +name = "decimal-ascii" +regex = '\d+' +input = "1२३9" +matches = [[0, 1], [7, 8]] +unicode = false + +[[tests]] +name = "decimal-unicode" +regex = '\d+' +input = "1२३9" +matches = [[0, 8]] + +[[tests]] +name = "space-ascii" +regex = '\s+' +input = " \u1680" +matches = [[0, 1]] +unicode = false + +[[tests]] +name = "space-unicode" +regex = '\s+' +input = " \u1680" +matches = [[0, 4]] + + +[[tests]] +# See: https://github.com/rust-lang/regex/issues/484 +name = "iter1-bytes" +regex = '' +input = "☃" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] +utf8 = false + +[[tests]] +# See: https://github.com/rust-lang/regex/issues/484 +name = "iter1-utf8" +regex = '' +input = "☃" +matches = [[0, 0], [3, 3]] + +[[tests]] +# See: https://github.com/rust-lang/regex/issues/484 +# Note that iter2-utf8 doesn't make sense here, since the input isn't UTF-8. +name = "iter2-bytes" +regex = '' +input = 'b\xFFr' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] +unescape = true +utf8 = false diff --git a/vendor/regex-automata/tests/data/overlapping.toml b/vendor/regex-automata/tests/data/overlapping.toml new file mode 100644 index 000000000..6662876b4 --- /dev/null +++ b/vendor/regex-automata/tests/data/overlapping.toml @@ -0,0 +1,126 @@ +[[tests]] +name = "repetition-plus-leftmost-first-100" +regex = 'a+' +input = "aaa" +matches = [[0, 1], [0, 2], [0, 3]] +match_kind = "leftmost-first" +search_kind = "overlapping" + +[[tests]] +name = "repetition-plus-all-100" +regex = 'a+' +input = "aaa" +matches = [[0, 1], [0, 2], [0, 3]] +match_kind = "all" +search_kind = "overlapping" + +[[tests]] +name = "repetition-plus-leftmost-first-200" +regex = '(abc)+' +input = "zzabcabczzabc" +matches = [[2, 5], [2, 8]] +match_kind = "leftmost-first" +search_kind = "overlapping" + +[[tests]] +name = "repetition-plus-all-200" +regex = '(abc)+' +input = "zzabcabczzabc" +matches = [[2, 5], [2, 8], [10, 13]] +match_kind = "all" +search_kind = "overlapping" + +[[tests]] +name = "repetition-star-leftmost-first-100" +regex = 'a*' +input = "aaa" +matches = [[0, 0], [0, 1], [0, 2], [0, 3]] +match_kind = "leftmost-first" +search_kind = "overlapping" + +[[tests]] +name = "repetition-star-all-100" +regex = 'a*' +input = "aaa" +matches = [[0, 0], [0, 1], [0, 2], [0, 3]] +match_kind = "all" +search_kind = "overlapping" + +[[tests]] +name = "repetition-star-leftmost-first-200" +regex = '(abc)*' +input = "zzabcabczzabc" +matches = [[0, 0]] +match_kind = "leftmost-first" +search_kind = "overlapping" + +[[tests]] +name = "repetition-star-all-200" +regex = '(abc)*' +input = "zzabcabczzabc" +matches = [ + [0, 0], [1, 1], [2, 2], [3, 3], [4, 4], + [2, 5], + [6, 6], [7, 7], + [2, 8], + [9, 9], [10, 10], [11, 11], [12, 12], + [10, 13], +] +match_kind = "all" +search_kind = "overlapping" + +[[tests]] +name = "start-end-rep-leftmost-first" +regex = '(^$)*' +input = "abc" +matches = [[0, 0]] +match_kind = "leftmost-first" +search_kind = "overlapping" + +[[tests]] +name = "start-end-rep-all" +regex = '(^$)*' +input = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] +match_kind = "all" +search_kind = "overlapping" + +[[tests]] +name = "alt-leftmost-first-100" +regex = 'abc|a' +input = "zzabcazzaabc" +matches = [[2, 3], [2, 5]] +match_kind = "leftmost-first" +search_kind = "overlapping" + +[[tests]] +name = "alt-all-100" +regex = 'abc|a' +input = "zzabcazzaabc" +matches = [[2, 3], [2, 5], [5, 6], [8, 9], [9, 10], [9, 12]] +match_kind = "all" +search_kind = "overlapping" + +[[tests]] +name = "empty-000" +regex = "" +input = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] +match_kind = "all" +search_kind = "overlapping" + +[[tests]] +name = "empty-alt-000" +regex = "|b" +input = "abc" +matches = [[0, 0], [1, 1], [1, 2], [3, 3]] +match_kind = "all" +search_kind = "overlapping" + +[[tests]] +name = "empty-alt-010" +regex = "b|" +input = "abc" +matches = [[0, 0], [1, 1], [1, 2], [3, 3]] +match_kind = "all" +search_kind = "overlapping" diff --git a/vendor/regex-automata/tests/data/regression.toml b/vendor/regex-automata/tests/data/regression.toml new file mode 100644 index 000000000..6a4dbb151 --- /dev/null +++ b/vendor/regex-automata/tests/data/regression.toml @@ -0,0 +1,423 @@ +# See: https://github.com/rust-lang/regex/issues/48 +[[tests]] +name = "invalid-regex-no-crash-100" +regex = '(*)' +input = "" +matches = [] +compiles = false + +# See: https://github.com/rust-lang/regex/issues/48 +[[tests]] +name = "invalid-regex-no-crash-200" +regex = '(?:?)' +input = "" +matches = [] +compiles = false + +# See: https://github.com/rust-lang/regex/issues/48 +[[tests]] +name = "invalid-regex-no-crash-300" +regex = '(?)' +input = "" +matches = [] +compiles = false + +# See: https://github.com/rust-lang/regex/issues/48 +[[tests]] +name = "invalid-regex-no-crash-400" +regex = '*' +input = "" +matches = [] +compiles = false + +# See: https://github.com/rust-lang/regex/issues/75 +[[tests]] +name = "unsorted-binary-search-100" +regex = '(?i-u)[a_]+' +input = "A_" +matches = [[0, 2]] + +# See: https://github.com/rust-lang/regex/issues/75 +[[tests]] +name = "unsorted-binary-search-200" +regex = '(?i-u)[A_]+' +input = "a_" +matches = [[0, 2]] + +# See: https://github.com/rust-lang/regex/issues/76 +[[tests]] +name = "unicode-case-lower-nocase-flag" +regex = '(?i)\p{Ll}+' +input = "ΛΘΓΔα" +matches = [[0, 10]] + +# See: https://github.com/rust-lang/regex/issues/99 +[[tests]] +name = "negated-char-class-100" +regex = '(?i)[^x]' +input = "x" +matches = [] + +# See: https://github.com/rust-lang/regex/issues/99 +[[tests]] +name = "negated-char-class-200" +regex = '(?i)[^x]' +input = "X" +matches = [] + +# See: https://github.com/rust-lang/regex/issues/101 +[[tests]] +name = "ascii-word-underscore" +regex = '[[:word:]]' +input = "_" +matches = [[0, 1]] + +# See: https://github.com/rust-lang/regex/issues/129 +[[tests]] +name = "captures-repeat" +regex = '([a-f]){2}(?P<foo>[x-z])' +input = "abx" +captures = [ + [[0, 3], [0, 2], [2, 3]], +] + +# See: https://github.com/rust-lang/regex/issues/153 +[[tests]] +name = "alt-in-alt-100" +regex = 'ab?|$' +input = "az" +matches = [[0, 1], [2, 2]] + +# See: https://github.com/rust-lang/regex/issues/153 +[[tests]] +name = "alt-in-alt-200" +regex = '^(.*?)(\n|\r\n?|$)' +input = "ab\rcd" +matches = [[0, 3]] + +# See: https://github.com/rust-lang/regex/issues/169 +[[tests]] +name = "leftmost-first-prefix" +regex = 'z*azb' +input = "azb" +matches = [[0, 3]] + +# See: https://github.com/rust-lang/regex/issues/191 +[[tests]] +name = "many-alternates" +regex = '1|2|3|4|5|6|7|8|9|10|int' +input = "int" +matches = [[0, 3]] + +# See: https://github.com/rust-lang/regex/issues/204 +[[tests]] +name = "word-boundary-alone-100" +regex = '\b' +input = "Should this (work?)" +matches = [[0, 0], [6, 6], [7, 7], [11, 11], [13, 13], [17, 17]] + +# See: https://github.com/rust-lang/regex/issues/204 +[[tests]] +name = "word-boundary-alone-200" +regex = '\b' +input = "a b c" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] + +# See: https://github.com/rust-lang/regex/issues/264 +[[tests]] +name = "word-boundary-ascii-no-capture" +regex = '\B' +input = "\U00028F3E" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] +unicode = false +utf8 = false + +# See: https://github.com/rust-lang/regex/issues/264 +[[tests]] +name = "word-boundary-ascii-capture" +regex = '(\B)' +input = "\U00028F3E" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] +unicode = false +utf8 = false + +# See: https://github.com/rust-lang/regex/issues/268 +[[tests]] +name = "partial-anchor" +regex = '^a|b' +input = "ba" +matches = [[0, 1]] + +# See: https://github.com/rust-lang/regex/issues/271 +[[tests]] +name = "endl-or-word-boundary" +regex = '(?m:$)|(?-u:\b)' +input = "\U0006084E" +matches = [[4, 4]] + +# See: https://github.com/rust-lang/regex/issues/271 +[[tests]] +name = "zero-or-end" +regex = '(?i-u:\x00)|$' +input = "\U000E682F" +matches = [[4, 4]] + +# See: https://github.com/rust-lang/regex/issues/271 +[[tests]] +name = "y-or-endl" +regex = '(?i-u:y)|(?m:$)' +input = "\U000B4331" +matches = [[4, 4]] + +# See: https://github.com/rust-lang/regex/issues/271 +[[tests]] +name = "word-boundary-start-x" +regex = '(?u:\b)^(?-u:X)' +input = "X" +matches = [[0, 1]] + +# See: https://github.com/rust-lang/regex/issues/271 +[[tests]] +name = "word-boundary-ascii-start-x" +regex = '(?-u:\b)^(?-u:X)' +input = "X" +matches = [[0, 1]] + +# See: https://github.com/rust-lang/regex/issues/271 +[[tests]] +name = "end-not-word-boundary" +regex = '$\B' +input = "\U0005C124\U000B576C" +matches = [[8, 8]] +unicode = false +utf8 = false + +# See: https://github.com/rust-lang/regex/issues/280 +[[tests]] +name = "partial-anchor-alternate-begin" +regex = '^a|z' +input = "yyyyya" +matches = [] + +# See: https://github.com/rust-lang/regex/issues/280 +[[tests]] +name = "partial-anchor-alternate-end" +regex = 'a$|z' +input = "ayyyyy" +matches = [] + +# See: https://github.com/rust-lang/regex/issues/289 +[[tests]] +name = "lits-unambiguous-100" +regex = '(ABC|CDA|BC)X' +input = "CDAX" +matches = [[0, 4]] + +# See: https://github.com/rust-lang/regex/issues/291 +[[tests]] +name = "lits-unambiguous-200" +regex = '((IMG|CAM|MG|MB2)_|(DSCN|CIMG))(?P<n>[0-9]+)$' +input = "CIMG2341" +captures = [ + [[0, 8], [0, 4], [], [0, 4], [4, 8]], +] + +# See: https://github.com/rust-lang/regex/issues/303 +[[tests]] +name = "negated-full-byte-range" +regex = '[^\x00-\xFF]' +input = "" +matches = [] +compiles = false +unicode = false +utf8 = false + +# See: https://github.com/rust-lang/regex/issues/321 +[[tests]] +name = "strange-anchor-non-complete-prefix" +regex = 'a^{2}' +input = "" +matches = [] + +# See: https://github.com/rust-lang/regex/issues/321 +[[tests]] +name = "strange-anchor-non-complete-suffix" +regex = '${2}a' +input = "" +matches = [] + +# See: https://github.com/rust-lang/regex/issues/334 +# See: https://github.com/rust-lang/regex/issues/557 +[[tests]] +name = "captures-after-dfa-premature-end-100" +regex = 'a(b*(X|$))?' +input = "abcbX" +captures = [ + [[0, 1], [], []], +] + +# See: https://github.com/rust-lang/regex/issues/334 +# See: https://github.com/rust-lang/regex/issues/557 +[[tests]] +name = "captures-after-dfa-premature-end-200" +regex = 'a(bc*(X|$))?' +input = "abcbX" +captures = [ + [[0, 1], [], []], +] + +# See: https://github.com/rust-lang/regex/issues/334 +# See: https://github.com/rust-lang/regex/issues/557 +[[tests]] +name = "captures-after-dfa-premature-end-300" +regex = '(aa$)?' +input = "aaz" +captures = [ + [[0, 0]], + [[1, 1]], + [[2, 2]], + [[3, 3]], +] + +# See: https://github.com/rust-lang/regex/issues/437 +[[tests]] +name = "literal-panic" +regex = 'typename type\-parameter\-[0-9]+\-[0-9]+::.+' +input = "test" +matches = [] + +# See: https://github.com/rust-lang/regex/issues/527 +[[tests]] +name = "empty-flag-expr" +regex = '(((?x)))' +input = "" +matches = [[0, 0]] + +# See: https://github.com/rust-lang/regex/issues/533 +[[tests]] +name = "blank-matches-nothing-between-space-and-tab" +regex = '[[:blank:]]' +input = '\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F' +match = false +unescape = true + +# See: https://github.com/rust-lang/regex/issues/533 +[[tests]] +name = "blank-matches-nothing-between-space-and-tab-inverted" +regex = '^[[:^blank:]]+$' +input = '\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F' +match = true +unescape = true + +# See: https://github.com/rust-lang/regex/issues/555 +[[tests]] +name = "invalid-repetition" +regex = '(?m){1,1}' +input = "" +matches = [] +compiles = false + +# See: https://github.com/rust-lang/regex/issues/640 +[[tests]] +name = "flags-are-unset" +regex = '((?i)foo)|Bar' +input = "foo Foo bar Bar" +matches = [[0, 3], [4, 7], [12, 15]] + +# Note that 'Ј' is not 'j', but cyrillic Je +# https://en.wikipedia.org/wiki/Je_(Cyrillic) +# +# See: https://github.com/rust-lang/regex/issues/659 +[[tests]] +name = "empty-group-with-unicode" +regex = '()Ј01' +input = 'zЈ01' +matches = [[1, 5]] + +# See: https://github.com/rust-lang/regex/issues/579 +[[tests]] +name = "word-boundary-weird" +regex = '\b..\b' +input = "I have 12, he has 2!" +matches = [[0, 2], [7, 9], [9, 11], [11, 13], [17, 19]] + +# See: https://github.com/rust-lang/regex/issues/579 +[[tests]] +name = "word-boundary-weird-ascii" +regex = '\b..\b' +input = "I have 12, he has 2!" +matches = [[0, 2], [7, 9], [9, 11], [11, 13], [17, 19]] +unicode = false +utf8 = false + +# See: https://github.com/rust-lang/regex/issues/579 +[[tests]] +name = "word-boundary-weird-minimal-ascii" +regex = '\b..\b' +input = "az,,b" +matches = [[0, 2], [2, 4]] +unicode = false +utf8 = false + +# See: https://github.com/BurntSushi/ripgrep/issues/1203 +[[tests]] +name = "reverse-suffix-100" +regex = '[0-4][0-4][0-4]000' +input = "153.230000" +matches = [[4, 10]] + +# See: https://github.com/BurntSushi/ripgrep/issues/1203 +[[tests]] +name = "reverse-suffix-200" +regex = '[0-9][0-9][0-9]000' +input = "153.230000\n" +matches = [[4, 10]] + +# See: https://github.com/BurntSushi/ripgrep/issues/1247 +[[tests]] +name = "stops" +regex = '\bs(?:[ab])' +input = 's\xE4' +matches = [] +unescape = true + +# See: https://github.com/BurntSushi/ripgrep/issues/1247 +[[tests]] +name = "stops-ascii" +regex = '(?-u:\b)s(?:[ab])' +input = 's\xE4' +matches = [] +unescape = true + +# There is no issue for this bug. +[[tests]] +name = "anchored-prefix-100" +regex = '^a[[:^space:]]' +input = "a " +matches = [] + +# There is no issue for this bug. +[[tests]] +name = "anchored-prefix-200" +regex = '^a[[:^space:]]' +input = "foo boo a" +matches = [] + +# There is no issue for this bug. +[[tests]] +name = "anchored-prefix-300" +regex = '^-[a-z]' +input = "r-f" +matches = [] + +# Tests that a possible Aho-Corasick optimization works correctly. It only +# kicks in when we have a lot of literals. By "works correctly," we mean that +# leftmost-first match semantics are properly respected. That is, samwise +# should match, not sam. +# +# There is no issue for this bug. +[[tests]] +name = "aho-corasick-100" +regex = 'samwise|sam|a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z' +input = "samwise" +matches = [[0, 7]] diff --git a/vendor/regex-automata/tests/data/set.toml b/vendor/regex-automata/tests/data/set.toml new file mode 100644 index 000000000..e0eb0583e --- /dev/null +++ b/vendor/regex-automata/tests/data/set.toml @@ -0,0 +1,523 @@ +[[tests]] +name = "basic10" +regexes = ["a", "a"] +input = "a" +matches = [ + { id = 0, offsets = [0, 1] }, + { id = 1, offsets = [0, 1] }, +] +match_kind = "all" +search_kind = "overlapping" + +[[tests]] +name = "basic10-leftmost-first" +regexes = ["a", "a"] +input = "a" +matches = [ + { id = 0, offsets = [0, 1] }, +] +match_kind = "leftmost-first" +search_kind = "leftmost" + +[[tests]] +name = "basic20" +regexes = ["a", "a"] +input = "ba" +matches = [ + { id = 0, offsets = [1, 2] }, + { id = 1, offsets = [1, 2] }, +] +match_kind = "all" +search_kind = "overlapping" + +[[tests]] +name = "basic30" +regexes = ["a", "b"] +input = "a" +matches = [ + { id = 0, offsets = [0, 1] }, +] +match_kind = "all" +search_kind = "overlapping" + +[[tests]] +name = "basic40" +regexes = ["a", "b"] +input = "b" +matches = [ + { id = 1, offsets = [0, 1] }, +] +match_kind = "all" +search_kind = "overlapping" + +[[tests]] +name = "basic50" +regexes = ["a|b", "b|a"] +input = "b" +matches = [ + { id = 0, offsets = [0, 1] }, + { id = 1, offsets = [0, 1] }, +] +match_kind = "all" +search_kind = "overlapping" + +[[tests]] +name = "basic60" +regexes = ["foo", "oo"] +input = "foo" +matches = [ + { id = 0, offsets = [0, 3] }, + { id = 1, offsets = [1, 3] }, +] +match_kind = "all" +search_kind = "overlapping" + +[[tests]] +name = "basic60-leftmost-first" +regexes = ["foo", "oo"] +input = "foo" +matches = [ + { id = 0, offsets = [0, 3] }, +] +match_kind = "leftmost-first" +search_kind = "leftmost" + +[[tests]] +name = "basic61" +regexes = ["oo", "foo"] +input = "foo" +matches = [ + { id = 1, offsets = [0, 3] }, + { id = 0, offsets = [1, 3] }, +] +match_kind = "all" +search_kind = "overlapping" + +[[tests]] +name = "basic61-leftmost-first" +regexes = ["oo", "foo"] +input = "foo" +matches = [ + { id = 1, offsets = [0, 3] }, +] +match_kind = "leftmost-first" +search_kind = "leftmost" + +[[tests]] +name = "basic70" +regexes = ["abcd", "bcd", "cd", "d"] +input = "abcd" +matches = [ + { id = 0, offsets = [0, 4] }, + { id = 1, offsets = [1, 4] }, + { id = 2, offsets = [2, 4] }, + { id = 3, offsets = [3, 4] }, +] +match_kind = "all" +search_kind = "overlapping" + +[[tests]] +name = "basic71" +regexes = ["bcd", "cd", "d", "abcd"] +input = "abcd" +matches = [ + { id = 3, offsets = [0, 4] }, +] +match_kind = "leftmost-first" +search_kind = "leftmost" + +[[tests]] +name = "basic80" +regexes = ["^foo", "bar$"] +input = "foo" +matches = [ + { id = 0, offsets = [0, 3] }, +] +match_kind = "all" +search_kind = "overlapping" + +[[tests]] +name = "basic81" +regexes = ["^foo", "bar$"] +input = "foo bar" +matches = [ + { id = 0, offsets = [0, 3] }, + { id = 1, offsets = [4, 7] }, +] +match_kind = "all" +search_kind = "overlapping" + +[[tests]] +name = "basic82" +regexes = ["^foo", "bar$"] +input = "bar" +matches = [ + { id = 1, offsets = [0, 3] }, +] +match_kind = "all" +search_kind = "overlapping" + +[[tests]] +name = "basic90" +regexes = ["[a-z]+$", "foo"] +input = "01234 foo" +matches = [ + { id = 0, offsets = [6, 9] }, + { id = 1, offsets = [6, 9] }, +] +match_kind = "all" +search_kind = "overlapping" + +[[tests]] +name = "basic91" +regexes = ["[a-z]+$", "foo"] +input = "foo 01234" +matches = [ + { id = 1, offsets = [0, 3] }, +] +match_kind = "all" +search_kind = "overlapping" + +[[tests]] +name = "basic100" +regexes = [".*?", "a"] +input = "zzza" +matches = [ + { id = 0, offsets = [0, 0] }, + { id = 0, offsets = [0, 1] }, + { id = 0, offsets = [0, 2] }, + { id = 0, offsets = [0, 3] }, + { id = 0, offsets = [0, 4] }, + { id = 1, offsets = [3, 4] }, +] +match_kind = "all" +search_kind = "overlapping" + +[[tests]] +name = "basic101" +regexes = [".*", "a"] +input = "zzza" +matches = [ + { id = 0, offsets = [0, 0] }, + { id = 0, offsets = [0, 1] }, + { id = 0, offsets = [0, 2] }, + { id = 0, offsets = [0, 3] }, + { id = 0, offsets = [0, 4] }, + { id = 1, offsets = [3, 4] }, +] +match_kind = "all" +search_kind = "overlapping" + +[[tests]] +name = "basic102" +regexes = [".*", "a"] +input = "zzz" +matches = [ + { id = 0, offsets = [0, 0] }, + { id = 0, offsets = [0, 1] }, + { id = 0, offsets = [0, 2] }, + { id = 0, offsets = [0, 3] }, +] +match_kind = "all" +search_kind = "overlapping" + +[[tests]] +name = "basic110" +regexes = ['\ba\b'] +input = "hello a bye" +matches = [ + { id = 0, offsets = [6, 7] }, +] +match_kind = "all" +search_kind = "overlapping" + +[[tests]] +name = "basic111" +regexes = ['\ba\b', '\be\b'] +input = "hello a bye e" +matches = [ + { id = 0, offsets = [6, 7] }, + { id = 1, offsets = [12, 13] }, +] +match_kind = "all" +search_kind = "overlapping" + +[[tests]] +name = "basic120" +regexes = ["a"] +input = "a" +matches = [ + { id = 0, offsets = [0, 1] }, +] +match_kind = "all" +search_kind = "overlapping" + +[[tests]] +name = "basic121" +regexes = [".*a"] +input = "a" +matches = [ + { id = 0, offsets = [0, 1] }, +] +match_kind = "all" +search_kind = "overlapping" + +[[tests]] +name = "basic122" +regexes = [".*a", "β"] +input = "β" +matches = [ + { id = 1, offsets = [0, 2] }, +] +match_kind = "all" +search_kind = "overlapping" + +[[tests]] +name = "basic130" +regexes = ["ab", "b"] +input = "ba" +matches = [ + { id = 1, offsets = [0, 1] }, +] +match_kind = "all" +search_kind = "overlapping" + +[[tests]] +name = "empty10" +regexes = ["", "a"] +input = "abc" +matches = [ + { id = 0, offsets = [0, 0] }, + { id = 1, offsets = [0, 1] }, + { id = 0, offsets = [1, 1] }, + { id = 0, offsets = [2, 2] }, + { id = 0, offsets = [3, 3] }, +] +match_kind = "all" +search_kind = "overlapping" + +[[tests]] +name = "empty10-leftmost-first" +regexes = ["", "a"] +input = "abc" +matches = [ + { id = 0, offsets = [0, 0] }, + { id = 0, offsets = [1, 1] }, + { id = 0, offsets = [2, 2] }, + { id = 0, offsets = [3, 3] }, +] +match_kind = "leftmost-first" +search_kind = "leftmost" + +[[tests]] +name = "empty11" +regexes = ["a", ""] +input = "abc" +matches = [ + { id = 1, offsets = [0, 0] }, + { id = 0, offsets = [0, 1] }, + { id = 1, offsets = [1, 1] }, + { id = 1, offsets = [2, 2] }, + { id = 1, offsets = [3, 3] }, +] +match_kind = "all" +search_kind = "overlapping" + +[[tests]] +name = "empty11-leftmost-first" +regexes = ["a", ""] +input = "abc" +matches = [ + { id = 0, offsets = [0, 1] }, + { id = 1, offsets = [2, 2] }, + { id = 1, offsets = [3, 3] }, +] +match_kind = "leftmost-first" +search_kind = "leftmost" + +[[tests]] +name = "empty20" +regexes = ["", "b"] +input = "abc" +matches = [ + { id = 0, offsets = [0, 0] }, + { id = 0, offsets = [1, 1] }, + { id = 1, offsets = [1, 2] }, + { id = 0, offsets = [2, 2] }, + { id = 0, offsets = [3, 3] }, +] +match_kind = "all" +search_kind = "overlapping" + +[[tests]] +name = "empty20-leftmost-first" +regexes = ["", "b"] +input = "abc" +matches = [ + { id = 0, offsets = [0, 0] }, + { id = 0, offsets = [1, 1] }, + { id = 0, offsets = [2, 2] }, + { id = 0, offsets = [3, 3] }, +] +match_kind = "leftmost-first" +search_kind = "leftmost" + +[[tests]] +name = "empty21" +regexes = ["b", ""] +input = "abc" +matches = [ + { id = 1, offsets = [0, 0] }, + { id = 1, offsets = [1, 1] }, + { id = 0, offsets = [1, 2] }, + { id = 1, offsets = [2, 2] }, + { id = 1, offsets = [3, 3] }, +] +match_kind = "all" +search_kind = "overlapping" + +[[tests]] +name = "empty21-leftmost-first" +regexes = ["b", ""] +input = "abc" +matches = [ + { id = 1, offsets = [0, 0] }, + { id = 0, offsets = [1, 2] }, + { id = 1, offsets = [3, 3] }, +] +match_kind = "leftmost-first" +search_kind = "leftmost" + +[[tests]] +name = "empty22" +regexes = ["(?:)", "b"] +input = "abc" +matches = [ + { id = 0, offsets = [0, 0] }, + { id = 0, offsets = [1, 1] }, + { id = 1, offsets = [1, 2] }, + { id = 0, offsets = [2, 2] }, + { id = 0, offsets = [3, 3] }, +] +match_kind = "all" +search_kind = "overlapping" + +[[tests]] +name = "empty23" +regexes = ["b", "(?:)"] +input = "abc" +matches = [ + { id = 1, offsets = [0, 0] }, + { id = 1, offsets = [1, 1] }, + { id = 0, offsets = [1, 2] }, + { id = 1, offsets = [2, 2] }, + { id = 1, offsets = [3, 3] }, +] +match_kind = "all" +search_kind = "overlapping" + +[[tests]] +name = "empty30" +regexes = ["", "z"] +input = "abc" +matches = [ + { id = 0, offsets = [0, 0] }, + { id = 0, offsets = [1, 1] }, + { id = 0, offsets = [2, 2] }, + { id = 0, offsets = [3, 3] }, +] +match_kind = "all" +search_kind = "overlapping" + +[[tests]] +name = "empty30-leftmost-first" +regexes = ["", "z"] +input = "abc" +matches = [ + { id = 0, offsets = [0, 0] }, + { id = 0, offsets = [1, 1] }, + { id = 0, offsets = [2, 2] }, + { id = 0, offsets = [3, 3] }, +] +match_kind = "leftmost-first" +search_kind = "leftmost" + +[[tests]] +name = "empty31" +regexes = ["z", ""] +input = "abc" +matches = [ + { id = 1, offsets = [0, 0] }, + { id = 1, offsets = [1, 1] }, + { id = 1, offsets = [2, 2] }, + { id = 1, offsets = [3, 3] }, +] +match_kind = "all" +search_kind = "overlapping" + +[[tests]] +name = "empty31-leftmost-first" +regexes = ["z", ""] +input = "abc" +matches = [ + { id = 1, offsets = [0, 0] }, + { id = 1, offsets = [1, 1] }, + { id = 1, offsets = [2, 2] }, + { id = 1, offsets = [3, 3] }, +] +match_kind = "leftmost-first" +search_kind = "leftmost" + +[[tests]] +name = "empty40" +regexes = ["c(?:)", "b"] +input = "abc" +matches = [ + { id = 1, offsets = [1, 2] }, + { id = 0, offsets = [2, 3] }, +] +match_kind = "all" +search_kind = "overlapping" + +[[tests]] +name = "empty40-leftmost-first" +regexes = ["c(?:)", "b"] +input = "abc" +matches = [ + { id = 1, offsets = [1, 2] }, + { id = 0, offsets = [2, 3] }, +] +match_kind = "leftmost-first" +search_kind = "leftmost" + +[[tests]] +name = "nomatch10" +regexes = ["a", "a"] +input = "b" +matches = [] +match_kind = "all" +search_kind = "overlapping" + +[[tests]] +name = "nomatch20" +regexes = ["^foo", "bar$"] +input = "bar foo" +matches = [] +match_kind = "all" +search_kind = "overlapping" + +[[tests]] +name = "nomatch30" +regexes = [] +input = "a" +matches = [] +match_kind = "all" +search_kind = "overlapping" + +[[tests]] +name = "nomatch40" +regexes = ["^rooted$", '\.log$'] +input = "notrooted" +matches = [] +match_kind = "all" +search_kind = "overlapping" diff --git a/vendor/regex-automata/tests/data/unicode.toml b/vendor/regex-automata/tests/data/unicode.toml new file mode 100644 index 000000000..016bbfd9b --- /dev/null +++ b/vendor/regex-automata/tests/data/unicode.toml @@ -0,0 +1,514 @@ +# Basic Unicode literal support. +[[tests]] +name = "literal1" +regex = '☃' +input = "☃" +matches = [[0, 3]] + +[[tests]] +name = "literal2" +regex = '☃+' +input = "☃" +matches = [[0, 3]] + +[[tests]] +name = "literal3" +regex = '(?i)☃+' +input = "☃" +matches = [[0, 3]] + +[[tests]] +name = "literal4" +regex = '(?i)Δ' +input = "δ" +matches = [[0, 2]] + +# Unicode word boundaries. +[[tests]] +name = "wb-100" +regex = '\d\b' +input = "6δ" +matches = [] + +[[tests]] +name = "wb-200" +regex = '\d\b' +input = "6 " +matches = [[0, 1]] + +[[tests]] +name = "wb-300" +regex = '\d\B' +input = "6δ" +matches = [[0, 1]] + +[[tests]] +name = "wb-400" +regex = '\d\B' +input = "6 " +matches = [] + +# Unicode character class support. +[[tests]] +name = "class1" +regex = '[☃Ⅰ]+' +input = "☃" +matches = [[0, 3]] + +[[tests]] +name = "class2" +regex = '\pN' +input = "Ⅰ" +matches = [[0, 3]] + +[[tests]] +name = "class3" +regex = '\pN+' +input = "Ⅰ1Ⅱ2" +matches = [[0, 8]] + +[[tests]] +name = "class4" +regex = '\PN+' +input = "abⅠ" +matches = [[0, 2]] + +[[tests]] +name = "class5" +regex = '[\PN]+' +input = "abⅠ" +matches = [[0, 2]] + +[[tests]] +name = "class6" +regex = '[^\PN]+' +input = "abⅠ" +matches = [[2, 5]] + +[[tests]] +name = "class7" +regex = '\p{Lu}+' +input = "ΛΘΓΔα" +matches = [[0, 8]] + +[[tests]] +name = "class8" +regex = '(?i)\p{Lu}+' +input = "ΛΘΓΔα" +matches = [[0, 10]] + +[[tests]] +name = "class9" +regex = '\pL+' +input = "ΛΘΓΔα" +matches = [[0, 10]] + +[[tests]] +name = "class10" +regex = '\p{Ll}+' +input = "ΛΘΓΔα" +matches = [[8, 10]] + +# Unicode aware "Perl" character classes. +[[tests]] +name = "perl1" +regex = '\w+' +input = "dδd" +matches = [[0, 4]] + +[[tests]] +name = "perl2" +regex = '\w+' +input = "⥡" +matches = [] + +[[tests]] +name = "perl3" +regex = '\W+' +input = "⥡" +matches = [[0, 3]] + +[[tests]] +name = "perl4" +regex = '\d+' +input = "1२३9" +matches = [[0, 8]] + +[[tests]] +name = "perl5" +regex = '\d+' +input = "Ⅱ" +matches = [] + +[[tests]] +name = "perl6" +regex = '\D+' +input = "Ⅱ" +matches = [[0, 3]] + +[[tests]] +name = "perl7" +regex = '\s+' +input = " " +matches = [[0, 3]] + +[[tests]] +name = "perl8" +regex = '\s+' +input = "☃" +matches = [] + +[[tests]] +name = "perl9" +regex = '\S+' +input = "☃" +matches = [[0, 3]] + +# Specific tests for Unicode general category classes. +[[tests]] +name = "class-gencat1" +regex = '\p{Cased_Letter}' +input = "A" +matches = [[0, 3]] + +[[tests]] +name = "class-gencat2" +regex = '\p{Close_Punctuation}' +input = "❯" +matches = [[0, 3]] + +[[tests]] +name = "class-gencat3" +regex = '\p{Connector_Punctuation}' +input = "⁀" +matches = [[0, 3]] + +[[tests]] +name = "class-gencat4" +regex = '\p{Control}' +input = "\u009F" +matches = [[0, 2]] + +[[tests]] +name = "class-gencat5" +regex = '\p{Currency_Symbol}' +input = "£" +matches = [[0, 3]] + +[[tests]] +name = "class-gencat6" +regex = '\p{Dash_Punctuation}' +input = "〰" +matches = [[0, 3]] + +[[tests]] +name = "class-gencat7" +regex = '\p{Decimal_Number}' +input = "𑓙" +matches = [[0, 4]] + +[[tests]] +name = "class-gencat8" +regex = '\p{Enclosing_Mark}' +input = "\uA672" +matches = [[0, 3]] + +[[tests]] +name = "class-gencat9" +regex = '\p{Final_Punctuation}' +input = "⸡" +matches = [[0, 3]] + +[[tests]] +name = "class-gencat10" +regex = '\p{Format}' +input = "\U000E007F" +matches = [[0, 4]] + +[[tests]] +name = "class-gencat11" +regex = '\p{Initial_Punctuation}' +input = "⸜" +matches = [[0, 3]] + +[[tests]] +name = "class-gencat12" +regex = '\p{Letter}' +input = "Έ" +matches = [[0, 2]] + +[[tests]] +name = "class-gencat13" +regex = '\p{Letter_Number}' +input = "ↂ" +matches = [[0, 3]] + +[[tests]] +name = "class-gencat14" +regex = '\p{Line_Separator}' +input = "\u2028" +matches = [[0, 3]] + +[[tests]] +name = "class-gencat15" +regex = '\p{Lowercase_Letter}' +input = "ϛ" +matches = [[0, 2]] + +[[tests]] +name = "class-gencat16" +regex = '\p{Mark}' +input = "\U000E01EF" +matches = [[0, 4]] + +[[tests]] +name = "class-gencat17" +regex = '\p{Math}' +input = "⋿" +matches = [[0, 3]] + +[[tests]] +name = "class-gencat18" +regex = '\p{Modifier_Letter}' +input = "𖭃" +matches = [[0, 4]] + +[[tests]] +name = "class-gencat19" +regex = '\p{Modifier_Symbol}' +input = "🏿" +matches = [[0, 4]] + +[[tests]] +name = "class-gencat20" +regex = '\p{Nonspacing_Mark}' +input = "\U0001E94A" +matches = [[0, 4]] + +[[tests]] +name = "class-gencat21" +regex = '\p{Number}' +input = "⓿" +matches = [[0, 3]] + +[[tests]] +name = "class-gencat22" +regex = '\p{Open_Punctuation}' +input = "⦅" +matches = [[0, 3]] + +[[tests]] +name = "class-gencat23" +regex = '\p{Other}' +input = "\u0BC9" +matches = [[0, 3]] + +[[tests]] +name = "class-gencat24" +regex = '\p{Other_Letter}' +input = "ꓷ" +matches = [[0, 3]] + +[[tests]] +name = "class-gencat25" +regex = '\p{Other_Number}' +input = "㉏" +matches = [[0, 3]] + +[[tests]] +name = "class-gencat26" +regex = '\p{Other_Punctuation}' +input = "𞥞" +matches = [[0, 4]] + +[[tests]] +name = "class-gencat27" +regex = '\p{Other_Symbol}' +input = "⅌" +matches = [[0, 3]] + +[[tests]] +name = "class-gencat28" +regex = '\p{Paragraph_Separator}' +input = "\u2029" +matches = [[0, 3]] + +[[tests]] +name = "class-gencat29" +regex = '\p{Private_Use}' +input = "\U0010FFFD" +matches = [[0, 4]] + +[[tests]] +name = "class-gencat30" +regex = '\p{Punctuation}' +input = "𑁍" +matches = [[0, 4]] + +[[tests]] +name = "class-gencat31" +regex = '\p{Separator}' +input = "\u3000" +matches = [[0, 3]] + +[[tests]] +name = "class-gencat32" +regex = '\p{Space_Separator}' +input = "\u205F" +matches = [[0, 3]] + +[[tests]] +name = "class-gencat33" +regex = '\p{Spacing_Mark}' +input = "\U00016F7E" +matches = [[0, 4]] + +[[tests]] +name = "class-gencat34" +regex = '\p{Symbol}' +input = "⯈" +matches = [[0, 3]] + +[[tests]] +name = "class-gencat35" +regex = '\p{Titlecase_Letter}' +input = "ῼ" +matches = [[0, 3]] + +[[tests]] +name = "class-gencat36" +regex = '\p{Unassigned}' +input = "\U0010FFFF" +matches = [[0, 4]] + +[[tests]] +name = "class-gencat37" +regex = '\p{Uppercase_Letter}' +input = "Ꝋ" +matches = [[0, 3]] + + +# Tests for Unicode emoji properties. +[[tests]] +name = "class-emoji1" +regex = '\p{Emoji}' +input = "\u23E9" +matches = [[0, 3]] + +[[tests]] +name = "class-emoji2" +regex = '\p{emoji}' +input = "\U0001F21A" +matches = [[0, 4]] + +[[tests]] +name = "class-emoji3" +regex = '\p{extendedpictographic}' +input = "\U0001FA6E" +matches = [[0, 4]] + +[[tests]] +name = "class-emoji4" +regex = '\p{extendedpictographic}' +input = "\U0001FFFD" +matches = [[0, 4]] + + +# Tests for Unicode grapheme cluster properties. +[[tests]] +name = "class-gcb1" +regex = '\p{grapheme_cluster_break=prepend}' +input = "\U00011D46" +matches = [[0, 4]] + +[[tests]] +name = "class-gcb2" +regex = '\p{gcb=regional_indicator}' +input = "\U0001F1E6" +matches = [[0, 4]] + +[[tests]] +name = "class-gcb3" +regex = '\p{gcb=ri}' +input = "\U0001F1E7" +matches = [[0, 4]] + +[[tests]] +name = "class-gcb4" +regex = '\p{regionalindicator}' +input = "\U0001F1FF" +matches = [[0, 4]] + +[[tests]] +name = "class-gcb5" +regex = '\p{gcb=lvt}' +input = "\uC989" +matches = [[0, 3]] + +[[tests]] +name = "class-gcb6" +regex = '\p{gcb=zwj}' +input = "\u200D" +matches = [[0, 3]] + +# Tests for Unicode word boundary properties. +[[tests]] +name = "class-word-break1" +regex = '\p{word_break=Hebrew_Letter}' +input = "\uFB46" +matches = [[0, 3]] + +[[tests]] +name = "class-word-break2" +regex = '\p{wb=hebrewletter}' +input = "\uFB46" +matches = [[0, 3]] + +[[tests]] +name = "class-word-break3" +regex = '\p{wb=ExtendNumLet}' +input = "\uFF3F" +matches = [[0, 3]] + +[[tests]] +name = "class-word-break4" +regex = '\p{wb=WSegSpace}' +input = "\u3000" +matches = [[0, 3]] + +[[tests]] +name = "class-word-break5" +regex = '\p{wb=numeric}' +input = "\U0001E950" +matches = [[0, 4]] + +# Tests for Unicode sentence boundary properties. +[[tests]] +name = "class-sentence-break1" +regex = '\p{sentence_break=Lower}' +input = "\u0469" +matches = [[0, 2]] + +[[tests]] +name = "class-sentence-break2" +regex = '\p{sb=lower}' +input = "\u0469" +matches = [[0, 2]] + +[[tests]] +name = "class-sentence-break3" +regex = '\p{sb=Close}' +input = "\uFF60" +matches = [[0, 3]] + +[[tests]] +name = "class-sentence-break4" +regex = '\p{sb=Close}' +input = "\U0001F677" +matches = [[0, 4]] + +[[tests]] +name = "class-sentence-break5" +regex = '\p{sb=SContinue}' +input = "\uFF64" +matches = [[0, 3]] diff --git a/vendor/regex-automata/tests/data/word-boundary.toml b/vendor/regex-automata/tests/data/word-boundary.toml new file mode 100644 index 000000000..e84b25c2a --- /dev/null +++ b/vendor/regex-automata/tests/data/word-boundary.toml @@ -0,0 +1,771 @@ +# Some of these are cribbed from RE2's test suite. + +# These test \b. Below are tests for \B. +[[tests]] +name = "wb1" +regex = '\b' +input = "" +matches = [] +unicode = false + +[[tests]] +name = "wb2" +regex = '\b' +input = "a" +matches = [[0, 0], [1, 1]] +unicode = false + +[[tests]] +name = "wb3" +regex = '\b' +input = "ab" +matches = [[0, 0], [2, 2]] +unicode = false + +[[tests]] +name = "wb4" +regex = '^\b' +input = "ab" +matches = [[0, 0]] +unicode = false + +[[tests]] +name = "wb5" +regex = '\b$' +input = "ab" +matches = [[2, 2]] +unicode = false + +[[tests]] +name = "wb6" +regex = '^\b$' +input = "ab" +matches = [] +unicode = false + +[[tests]] +name = "wb7" +regex = '\bbar\b' +input = "nobar bar foo bar" +matches = [[6, 9], [14, 17]] +unicode = false + +[[tests]] +name = "wb8" +regex = 'a\b' +input = "faoa x" +matches = [[3, 4]] +unicode = false + +[[tests]] +name = "wb9" +regex = '\bbar' +input = "bar x" +matches = [[0, 3]] +unicode = false + +[[tests]] +name = "wb10" +regex = '\bbar' +input = "foo\nbar x" +matches = [[4, 7]] +unicode = false + +[[tests]] +name = "wb11" +regex = 'bar\b' +input = "foobar" +matches = [[3, 6]] +unicode = false + +[[tests]] +name = "wb12" +regex = 'bar\b' +input = "foobar\nxxx" +matches = [[3, 6]] +unicode = false + +[[tests]] +name = "wb13" +regex = '(foo|bar|[A-Z])\b' +input = "foo" +matches = [[0, 3]] +unicode = false + +[[tests]] +name = "wb14" +regex = '(foo|bar|[A-Z])\b' +input = "foo\n" +matches = [[0, 3]] +unicode = false + +[[tests]] +name = "wb15" +regex = '\b(foo|bar|[A-Z])' +input = "foo" +matches = [[0, 3]] +unicode = false + +[[tests]] +name = "wb16" +regex = '\b(foo|bar|[A-Z])\b' +input = "X" +matches = [[0, 1]] +unicode = false + +[[tests]] +name = "wb17" +regex = '\b(foo|bar|[A-Z])\b' +input = "XY" +matches = [] +unicode = false + +[[tests]] +name = "wb18" +regex = '\b(foo|bar|[A-Z])\b' +input = "bar" +matches = [[0, 3]] +unicode = false + +[[tests]] +name = "wb19" +regex = '\b(foo|bar|[A-Z])\b' +input = "foo" +matches = [[0, 3]] +unicode = false + +[[tests]] +name = "wb20" +regex = '\b(foo|bar|[A-Z])\b' +input = "foo\n" +matches = [[0, 3]] +unicode = false + +[[tests]] +name = "wb21" +regex = '\b(foo|bar|[A-Z])\b' +input = "ffoo bbar N x" +matches = [[10, 11]] +unicode = false + +[[tests]] +name = "wb22" +regex = '\b(fo|foo)\b' +input = "fo" +matches = [[0, 2]] +unicode = false + +[[tests]] +name = "wb23" +regex = '\b(fo|foo)\b' +input = "foo" +matches = [[0, 3]] +unicode = false + +[[tests]] +name = "wb24" +regex = '\b\b' +input = "" +matches = [] +unicode = false + +[[tests]] +name = "wb25" +regex = '\b\b' +input = "a" +matches = [[0, 0], [1, 1]] +unicode = false + +[[tests]] +name = "wb26" +regex = '\b$' +input = "" +matches = [] +unicode = false + +[[tests]] +name = "wb27" +regex = '\b$' +input = "x" +matches = [[1, 1]] +unicode = false + +[[tests]] +name = "wb28" +regex = '\b$' +input = "y x" +matches = [[3, 3]] +unicode = false + +[[tests]] +name = "wb29" +regex = '(?-u:\b).$' +input = "x" +matches = [[0, 1]] + +[[tests]] +name = "wb30" +regex = '^\b(fo|foo)\b' +input = "fo" +matches = [[0, 2]] +unicode = false + +[[tests]] +name = "wb31" +regex = '^\b(fo|foo)\b' +input = "foo" +matches = [[0, 3]] +unicode = false + +[[tests]] +name = "wb32" +regex = '^\b$' +input = "" +matches = [] +unicode = false + +[[tests]] +name = "wb33" +regex = '^\b$' +input = "x" +matches = [] +unicode = false + +[[tests]] +name = "wb34" +regex = '^(?-u:\b).$' +input = "x" +matches = [[0, 1]] + +[[tests]] +name = "wb35" +regex = '^(?-u:\b).(?-u:\b)$' +input = "x" +matches = [[0, 1]] + +[[tests]] +name = "wb36" +regex = '^^^^^\b$$$$$' +input = "" +matches = [] +unicode = false + +[[tests]] +name = "wb37" +regex = '^^^^^(?-u:\b).$$$$$' +input = "x" +matches = [[0, 1]] + +[[tests]] +name = "wb38" +regex = '^^^^^\b$$$$$' +input = "x" +matches = [] +unicode = false + +[[tests]] +name = "wb39" +regex = '^^^^^(?-u:\b\b\b).(?-u:\b\b\b)$$$$$' +input = "x" +matches = [[0, 1]] + +[[tests]] +name = "wb40" +regex = '(?-u:\b).+(?-u:\b)' +input = "$$abc$$" +matches = [[2, 5]] + +[[tests]] +name = "wb41" +regex = '\b' +input = "a b c" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] +unicode = false + +[[tests]] +name = "wb42" +regex = '\bfoo\b' +input = "zzz foo zzz" +matches = [[4, 7]] +unicode = false + +[[tests]] +name = "wb43" +regex = '\b^' +input = "ab" +matches = [[0, 0]] +unicode = false + +[[tests]] +name = "wb44" +regex = '$\b' +input = "ab" +matches = [[2, 2]] +unicode = false + + +# Tests for \B. Note that \B is not allowed if UTF-8 mode is enabled, so we +# have to disable it for most of these tests. This is because \B can match at +# non-UTF-8 boundaries. +[[tests]] +name = "nb1" +regex = '\Bfoo\B' +input = "n foo xfoox that" +matches = [[7, 10]] +unicode = false +utf8 = false + +[[tests]] +name = "nb2" +regex = 'a\B' +input = "faoa x" +matches = [[1, 2]] +unicode = false +utf8 = false + +[[tests]] +name = "nb3" +regex = '\Bbar' +input = "bar x" +matches = [] +unicode = false +utf8 = false + +[[tests]] +name = "nb4" +regex = '\Bbar' +input = "foo\nbar x" +matches = [] +unicode = false +utf8 = false + +[[tests]] +name = "nb5" +regex = 'bar\B' +input = "foobar" +matches = [] +unicode = false +utf8 = false + +[[tests]] +name = "nb6" +regex = 'bar\B' +input = "foobar\nxxx" +matches = [] +unicode = false +utf8 = false + +[[tests]] +name = "nb7" +regex = '(foo|bar|[A-Z])\B' +input = "foox" +matches = [[0, 3]] +unicode = false +utf8 = false + +[[tests]] +name = "nb8" +regex = '(foo|bar|[A-Z])\B' +input = "foo\n" +matches = [] +unicode = false +utf8 = false + +[[tests]] +name = "nb9" +regex = '\B' +input = "" +matches = [[0, 0]] +unicode = false +utf8 = false + +[[tests]] +name = "nb10" +regex = '\B' +input = "x" +matches = [] +unicode = false +utf8 = false + +[[tests]] +name = "nb11" +regex = '\B(foo|bar|[A-Z])' +input = "foo" +matches = [] +unicode = false +utf8 = false + +[[tests]] +name = "nb12" +regex = '\B(foo|bar|[A-Z])\B' +input = "xXy" +matches = [[1, 2]] +unicode = false +utf8 = false + +[[tests]] +name = "nb13" +regex = '\B(foo|bar|[A-Z])\B' +input = "XY" +matches = [] +unicode = false +utf8 = false + +[[tests]] +name = "nb14" +regex = '\B(foo|bar|[A-Z])\B' +input = "XYZ" +matches = [[1, 2]] +unicode = false +utf8 = false + +[[tests]] +name = "nb15" +regex = '\B(foo|bar|[A-Z])\B' +input = "abara" +matches = [[1, 4]] +unicode = false +utf8 = false + +[[tests]] +name = "nb16" +regex = '\B(foo|bar|[A-Z])\B' +input = "xfoo_" +matches = [[1, 4]] +unicode = false +utf8 = false + +[[tests]] +name = "nb17" +regex = '\B(foo|bar|[A-Z])\B' +input = "xfoo\n" +matches = [] +unicode = false +utf8 = false + +[[tests]] +name = "nb18" +regex = '\B(foo|bar|[A-Z])\B' +input = "foo bar vNX" +matches = [[9, 10]] +unicode = false +utf8 = false + +[[tests]] +name = "nb19" +regex = '\B(fo|foo)\B' +input = "xfoo" +matches = [[1, 3]] +unicode = false +utf8 = false + +[[tests]] +name = "nb20" +regex = '\B(foo|fo)\B' +input = "xfooo" +matches = [[1, 4]] +unicode = false +utf8 = false + +[[tests]] +name = "nb21" +regex = '\B\B' +input = "" +matches = [[0, 0]] +unicode = false +utf8 = false + +[[tests]] +name = "nb22" +regex = '\B\B' +input = "x" +matches = [] +unicode = false +utf8 = false + +[[tests]] +name = "nb23" +regex = '\B$' +input = "" +matches = [[0, 0]] +unicode = false +utf8 = false + +[[tests]] +name = "nb24" +regex = '\B$' +input = "x" +matches = [] +unicode = false +utf8 = false + +[[tests]] +name = "nb25" +regex = '\B$' +input = "y x" +matches = [] +unicode = false +utf8 = false + +[[tests]] +name = "nb26" +regex = '\B.$' +input = "x" +matches = [] +unicode = false +utf8 = false + +[[tests]] +name = "nb27" +regex = '^\B(fo|foo)\B' +input = "fo" +matches = [] +unicode = false +utf8 = false + +[[tests]] +name = "nb28" +regex = '^\B(fo|foo)\B' +input = "fo" +matches = [] +unicode = false +utf8 = false + +[[tests]] +name = "nb29" +regex = '^\B' +input = "" +matches = [[0, 0]] +unicode = false +utf8 = false + +[[tests]] +name = "nb30" +regex = '^\B' +input = "x" +matches = [] +unicode = false +utf8 = false + +[[tests]] +name = "nb31" +regex = '^\B\B' +input = "" +matches = [[0, 0]] +unicode = false +utf8 = false + +[[tests]] +name = "nb32" +regex = '^\B\B' +input = "x" +matches = [] +unicode = false +utf8 = false + +[[tests]] +name = "nb33" +regex = '^\B$' +input = "" +matches = [[0, 0]] +unicode = false +utf8 = false + +[[tests]] +name = "nb34" +regex = '^\B$' +input = "x" +matches = [] +unicode = false +utf8 = false + +[[tests]] +name = "nb35" +regex = '^\B.$' +input = "x" +matches = [] +unicode = false +utf8 = false + +[[tests]] +name = "nb36" +regex = '^\B.\B$' +input = "x" +matches = [] +unicode = false +utf8 = false + +[[tests]] +name = "nb37" +regex = '^^^^^\B$$$$$' +input = "" +matches = [[0, 0]] +unicode = false +utf8 = false + +[[tests]] +name = "nb38" +regex = '^^^^^\B.$$$$$' +input = "x" +matches = [] +unicode = false +utf8 = false + +[[tests]] +name = "nb39" +regex = '^^^^^\B$$$$$' +input = "x" +matches = [] +unicode = false +utf8 = false + + +# unicode1* and unicode2* work for both Unicode and ASCII because all matches +# are reported as byte offsets, and « and » do not correspond to word +# boundaries at either the character or byte level. +[[tests]] +name = "unicode1" +regex = '\bx\b' +input = "«x" +matches = [[2, 3]] + +[[tests]] +name = "unicode1-only-ascii" +regex = '\bx\b' +input = "«x" +matches = [[2, 3]] +unicode = false + +[[tests]] +name = "unicode2" +regex = '\bx\b' +input = "x»" +matches = [[0, 1]] + +[[tests]] +name = "unicode2-only-ascii" +regex = '\bx\b' +input = "x»" +matches = [[0, 1]] +unicode = false + +# ASCII word boundaries are completely oblivious to Unicode characters, so +# even though β is a character, an ASCII \b treats it as a word boundary +# when it is adjacent to another ASCII character. (The ASCII \b only looks +# at the leading byte of β.) For Unicode \b, the tests are precisely inverted. +[[tests]] +name = "unicode3" +regex = '\bx\b' +input = 'áxβ' +matches = [] + +[[tests]] +name = "unicode3-only-ascii" +regex = '\bx\b' +input = 'áxβ' +matches = [[2, 3]] +unicode = false + +[[tests]] +name = "unicode4" +regex = '\Bx\B' +input = 'áxβ' +matches = [[2, 3]] + +[[tests]] +name = "unicode4-only-ascii" +regex = '\Bx\B' +input = 'áxβ' +matches = [] +unicode = false +utf8 = false + +# The same as above, but with \b instead of \B as a sanity check. +[[tests]] +name = "unicode5" +regex = '\b' +input = "0\U0007EF5E" +matches = [[0, 0], [1, 1]] + +[[tests]] +name = "unicode5-only-ascii" +regex = '\b' +input = "0\U0007EF5E" +matches = [[0, 0], [1, 1]] +unicode = false +utf8 = false + +[[tests]] +name = "unicode5-noutf8" +regex = '\b' +input = '0\xFF\xFF\xFF\xFF' +matches = [[0, 0], [1, 1]] +unescape = true +utf8 = false + +[[tests]] +name = "unicode5-noutf8-only-ascii" +regex = '\b' +input = '0\xFF\xFF\xFF\xFF' +matches = [[0, 0], [1, 1]] +unescape = true +unicode = false +utf8 = false + +# Weird special case to ensure that ASCII \B treats each individual code unit +# as a non-word byte. (The specific codepoint is irrelevant. It's an arbitrary +# codepoint that uses 4 bytes in its UTF-8 encoding and is not a member of the +# \w character class.) +[[tests]] +name = "unicode5-not" +regex = '\B' +input = "0\U0007EF5E" +matches = [[5, 5]] + +[[tests]] +name = "unicode5-not-only-ascii" +regex = '\B' +input = "0\U0007EF5E" +matches = [[2, 2], [3, 3], [4, 4], [5, 5]] +unicode = false +utf8 = false + +# This gets no matches since \B only matches in the presence of valid UTF-8 +# when Unicode is enabled, even when UTF-8 mode is disabled. +[[tests]] +name = "unicode5-not-noutf8" +regex = '\B' +input = '0\xFF\xFF\xFF\xFF' +matches = [] +unescape = true +utf8 = false + +# But this DOES get matches since \B in ASCII mode only looks at individual +# bytes. +[[tests]] +name = "unicode5-not-noutf8-only-ascii" +regex = '\B' +input = '0\xFF\xFF\xFF\xFF' +matches = [[2, 2], [3, 3], [4, 4], [5, 5]] +unescape = true +unicode = false +utf8 = false + +# Some tests of no particular significance. +[[tests]] +name = "unicode6" +regex = '\b[0-9]+\b' +input = "foo 123 bar 456 quux 789" +matches = [[4, 7], [12, 15], [21, 24]] + +[[tests]] +name = "unicode7" +regex = '\b[0-9]+\b' +input = "foo 123 bar a456 quux 789" +matches = [[4, 7], [22, 25]] + +[[tests]] +name = "unicode8" +regex = '\b[0-9]+\b' +input = "foo 123 bar 456a quux 789" +matches = [[4, 7], [22, 25]] diff --git a/vendor/regex-automata/tests/dfa/api.rs b/vendor/regex-automata/tests/dfa/api.rs new file mode 100644 index 000000000..80d7d704c --- /dev/null +++ b/vendor/regex-automata/tests/dfa/api.rs @@ -0,0 +1,133 @@ +use std::error::Error; + +use regex_automata::{ + dfa::{dense, regex::Regex, Automaton, OverlappingState}, + nfa::thompson, + HalfMatch, MatchError, MatchKind, MultiMatch, +}; + +use crate::util::{BunkPrefilter, SubstringPrefilter}; + +// Tests that quit bytes in the forward direction work correctly. +#[test] +fn quit_fwd() -> Result<(), Box<dyn Error>> { + let dfa = dense::Builder::new() + .configure(dense::Config::new().quit(b'x', true)) + .build("[[:word:]]+$")?; + + assert_eq!( + dfa.find_earliest_fwd(b"abcxyz"), + Err(MatchError::Quit { byte: b'x', offset: 3 }) + ); + assert_eq!( + dfa.find_leftmost_fwd(b"abcxyz"), + Err(MatchError::Quit { byte: b'x', offset: 3 }) + ); + assert_eq!( + dfa.find_overlapping_fwd(b"abcxyz", &mut OverlappingState::start()), + Err(MatchError::Quit { byte: b'x', offset: 3 }) + ); + + Ok(()) +} + +// Tests that quit bytes in the reverse direction work correctly. +#[test] +fn quit_rev() -> Result<(), Box<dyn Error>> { + let dfa = dense::Builder::new() + .configure(dense::Config::new().quit(b'x', true)) + .thompson(thompson::Config::new().reverse(true)) + .build("^[[:word:]]+")?; + + assert_eq!( + dfa.find_earliest_rev(b"abcxyz"), + Err(MatchError::Quit { byte: b'x', offset: 3 }) + ); + assert_eq!( + dfa.find_leftmost_rev(b"abcxyz"), + Err(MatchError::Quit { byte: b'x', offset: 3 }) + ); + + Ok(()) +} + +// Tests that if we heuristically enable Unicode word boundaries but then +// instruct that a non-ASCII byte should NOT be a quit byte, then the builder +// will panic. +#[test] +#[should_panic] +fn quit_panics() { + dense::Config::new().unicode_word_boundary(true).quit(b'\xFF', false); +} + +// Tests that if we attempt an overlapping search using a regex without a +// reverse DFA compiled with 'starts_for_each_pattern', then we get a panic. +#[test] +#[should_panic] +fn incorrect_config_overlapping_search_panics() { + let forward = dense::DFA::new(r"abca").unwrap(); + let reverse = dense::Builder::new() + .configure( + dense::Config::new() + .anchored(true) + .match_kind(MatchKind::All) + .starts_for_each_pattern(false), + ) + .thompson(thompson::Config::new().reverse(true)) + .build(r"abca") + .unwrap(); + + let re = Regex::builder().build_from_dfas(forward, reverse); + let haystack = "bar abcabcabca abca foo".as_bytes(); + re.find_overlapping(haystack, &mut OverlappingState::start()); +} + +// This tests an intesting case where even if the Unicode word boundary option +// is disabled, setting all non-ASCII bytes to be quit bytes will cause Unicode +// word boundaries to be enabled. +#[test] +fn unicode_word_implicitly_works() -> Result<(), Box<dyn Error>> { + let mut config = dense::Config::new(); + for b in 0x80..=0xFF { + config = config.quit(b, true); + } + let dfa = dense::Builder::new().configure(config).build(r"\b")?; + let expected = HalfMatch::must(0, 1); + assert_eq!(dfa.find_leftmost_fwd(b" a"), Ok(Some(expected))); + Ok(()) +} + +// Tests that we can provide a prefilter to a Regex, and the search reports +// correct results. +#[test] +fn prefilter_works() -> Result<(), Box<dyn Error>> { + let re = Regex::new(r"a[0-9]+") + .unwrap() + .with_prefilter(SubstringPrefilter::new("a")); + let text = b"foo abc foo a1a2a3 foo a123 bar aa456"; + let matches: Vec<(usize, usize)> = + re.find_leftmost_iter(text).map(|m| (m.start(), m.end())).collect(); + assert_eq!( + matches, + vec![(12, 14), (14, 16), (16, 18), (23, 27), (33, 37),] + ); + Ok(()) +} + +// This test confirms that a prefilter is active by using a prefilter that +// reports false negatives. +#[test] +fn prefilter_is_active() -> Result<(), Box<dyn Error>> { + let text = b"za123"; + let re = Regex::new(r"a[0-9]+") + .unwrap() + .with_prefilter(SubstringPrefilter::new("a")); + assert_eq!(re.find_leftmost(b"za123"), Some(MultiMatch::must(0, 1, 5))); + assert_eq!(re.find_leftmost(b"a123"), Some(MultiMatch::must(0, 0, 4))); + let re = re.with_prefilter(BunkPrefilter::new()); + assert_eq!(re.find_leftmost(b"za123"), None); + // This checks that the prefilter is used when first starting the search, + // instead of waiting until at least one transition has occurred. + assert_eq!(re.find_leftmost(b"a123"), None); + Ok(()) +} diff --git a/vendor/regex-automata/tests/dfa/mod.rs b/vendor/regex-automata/tests/dfa/mod.rs new file mode 100644 index 000000000..f4299510c --- /dev/null +++ b/vendor/regex-automata/tests/dfa/mod.rs @@ -0,0 +1,2 @@ +mod api; +mod suite; diff --git a/vendor/regex-automata/tests/dfa/suite.rs b/vendor/regex-automata/tests/dfa/suite.rs new file mode 100644 index 000000000..426ae346d --- /dev/null +++ b/vendor/regex-automata/tests/dfa/suite.rs @@ -0,0 +1,280 @@ +use regex_automata::{ + dfa::{self, dense, regex::Regex, sparse, Automaton}, + nfa::thompson, + MatchKind, SyntaxConfig, +}; +use regex_syntax as syntax; + +use regex_test::{ + bstr::{BString, ByteSlice}, + CompiledRegex, Match, MatchKind as TestMatchKind, RegexTest, RegexTests, + SearchKind as TestSearchKind, TestResult, TestRunner, +}; + +use crate::{suite, Result}; + +/// Runs the test suite with the default configuration. +#[test] +fn unminimized_default() -> Result<()> { + let builder = Regex::builder(); + TestRunner::new()? + .test_iter(suite()?.iter(), dense_compiler(builder)) + .assert(); + Ok(()) +} + +/// Runs the test suite with byte classes disabled. +#[test] +fn unminimized_no_byte_class() -> Result<()> { + let mut builder = Regex::builder(); + builder.dense(dense::Config::new().byte_classes(false)); + + TestRunner::new()? + .test_iter(suite()?.iter(), dense_compiler(builder)) + .assert(); + Ok(()) +} + +/// Runs the test suite with NFA shrinking disabled. +#[test] +fn unminimized_no_nfa_shrink() -> Result<()> { + let mut builder = Regex::builder(); + builder.thompson(thompson::Config::new().shrink(false)); + + TestRunner::new()? + .test_iter(suite()?.iter(), dense_compiler(builder)) + .assert(); + Ok(()) +} + +/// Runs the test suite on a minimized DFA with an otherwise default +/// configuration. +#[test] +fn minimized_default() -> Result<()> { + let mut builder = Regex::builder(); + builder.dense(dense::Config::new().minimize(true)); + TestRunner::new()? + // These regexes tend to be too big. Minimization takes... forever. + .blacklist("expensive") + .test_iter(suite()?.iter(), dense_compiler(builder)) + .assert(); + Ok(()) +} + +/// Runs the test suite on a minimized DFA with byte classes disabled. +#[test] +fn minimized_no_byte_class() -> Result<()> { + let mut builder = Regex::builder(); + builder.dense(dense::Config::new().minimize(true).byte_classes(false)); + + TestRunner::new()? + // These regexes tend to be too big. Minimization takes... forever. + .blacklist("expensive") + .test_iter(suite()?.iter(), dense_compiler(builder)) + .assert(); + Ok(()) +} + +/// Runs the test suite on a sparse unminimized DFA. +#[test] +fn sparse_unminimized_default() -> Result<()> { + let builder = Regex::builder(); + TestRunner::new()? + .test_iter(suite()?.iter(), sparse_compiler(builder)) + .assert(); + Ok(()) +} + +/// Another basic sanity test that checks we can serialize and then deserialize +/// a regex, and that the resulting regex can be used for searching correctly. +#[test] +fn serialization_unminimized_default() -> Result<()> { + let builder = Regex::builder(); + let my_compiler = |builder| { + compiler(builder, |builder, re| { + let builder = builder.clone(); + let (fwd_bytes, _) = re.forward().to_bytes_native_endian(); + let (rev_bytes, _) = re.reverse().to_bytes_native_endian(); + Ok(CompiledRegex::compiled(move |test| -> Vec<TestResult> { + let fwd: dense::DFA<&[u32]> = + dense::DFA::from_bytes(&fwd_bytes).unwrap().0; + let rev: dense::DFA<&[u32]> = + dense::DFA::from_bytes(&rev_bytes).unwrap().0; + let re = builder.build_from_dfas(fwd, rev); + + run_test(&re, test) + })) + }) + }; + TestRunner::new()? + .test_iter(suite()?.iter(), my_compiler(builder)) + .assert(); + Ok(()) +} + +/// A basic sanity test that checks we can serialize and then deserialize a +/// regex using sparse DFAs, and that the resulting regex can be used for +/// searching correctly. +#[test] +fn sparse_serialization_unminimized_default() -> Result<()> { + let builder = Regex::builder(); + let my_compiler = |builder| { + compiler(builder, |builder, re| { + let builder = builder.clone(); + let fwd_bytes = re.forward().to_sparse()?.to_bytes_native_endian(); + let rev_bytes = re.reverse().to_sparse()?.to_bytes_native_endian(); + Ok(CompiledRegex::compiled(move |test| -> Vec<TestResult> { + let fwd: sparse::DFA<&[u8]> = + sparse::DFA::from_bytes(&fwd_bytes).unwrap().0; + let rev: sparse::DFA<&[u8]> = + sparse::DFA::from_bytes(&rev_bytes).unwrap().0; + let re = builder.build_from_dfas(fwd, rev); + run_test(&re, test) + })) + }) + }; + TestRunner::new()? + .test_iter(suite()?.iter(), my_compiler(builder)) + .assert(); + Ok(()) +} + +fn dense_compiler( + builder: dfa::regex::Builder, +) -> impl FnMut(&RegexTest, &[BString]) -> Result<CompiledRegex> { + compiler(builder, |_, re| { + Ok(CompiledRegex::compiled(move |test| -> Vec<TestResult> { + run_test(&re, test) + })) + }) +} + +fn sparse_compiler( + builder: dfa::regex::Builder, +) -> impl FnMut(&RegexTest, &[BString]) -> Result<CompiledRegex> { + compiler(builder, |builder, re| { + let fwd = re.forward().to_sparse()?; + let rev = re.reverse().to_sparse()?; + let re = builder.build_from_dfas(fwd, rev); + Ok(CompiledRegex::compiled(move |test| -> Vec<TestResult> { + run_test(&re, test) + })) + }) +} + +fn compiler( + mut builder: dfa::regex::Builder, + mut create_matcher: impl FnMut( + &dfa::regex::Builder, + Regex, + ) -> Result<CompiledRegex>, +) -> impl FnMut(&RegexTest, &[BString]) -> Result<CompiledRegex> { + move |test, regexes| { + let regexes = regexes + .iter() + .map(|r| r.to_str().map(|s| s.to_string())) + .collect::<std::result::Result<Vec<String>, _>>()?; + + // Check if our regex contains things that aren't supported by DFAs. + // That is, Unicode word boundaries when searching non-ASCII text. + let mut thompson = thompson::Builder::new(); + thompson.configure(config_thompson(test)); + // TODO: Modify Hir to report facts like this, instead of needing to + // build an NFA to do it. + if let Ok(nfa) = thompson.build_many(®exes) { + let non_ascii = test.input().iter().any(|&b| !b.is_ascii()); + if nfa.has_word_boundary_unicode() && non_ascii { + return Ok(CompiledRegex::skip()); + } + } + if !configure_regex_builder(test, &mut builder) { + return Ok(CompiledRegex::skip()); + } + create_matcher(&builder, builder.build_many(®exes)?) + } +} + +fn run_test<A: Automaton>(re: &Regex<A>, test: &RegexTest) -> Vec<TestResult> { + let is_match = if re.is_match(test.input()) { + TestResult::matched() + } else { + TestResult::no_match() + }; + let is_match = is_match.name("is_match"); + + let find_matches = match test.search_kind() { + TestSearchKind::Earliest => { + let it = re + .find_earliest_iter(test.input()) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|m| Match { + id: m.pattern().as_usize(), + start: m.start(), + end: m.end(), + }); + TestResult::matches(it).name("find_earliest_iter") + } + TestSearchKind::Leftmost => { + let it = re + .find_leftmost_iter(test.input()) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|m| Match { + id: m.pattern().as_usize(), + start: m.start(), + end: m.end(), + }); + TestResult::matches(it).name("find_leftmost_iter") + } + TestSearchKind::Overlapping => { + let it = re + .find_overlapping_iter(test.input()) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|m| Match { + id: m.pattern().as_usize(), + start: m.start(), + end: m.end(), + }); + TestResult::matches(it).name("find_overlapping_iter") + } + }; + + vec![is_match, find_matches] +} + +/// Configures the given regex builder with all relevant settings on the given +/// regex test. +/// +/// If the regex test has a setting that is unsupported, then this returns +/// false (implying the test should be skipped). +fn configure_regex_builder( + test: &RegexTest, + builder: &mut dfa::regex::Builder, +) -> bool { + let match_kind = match test.match_kind() { + TestMatchKind::All => MatchKind::All, + TestMatchKind::LeftmostFirst => MatchKind::LeftmostFirst, + TestMatchKind::LeftmostLongest => return false, + }; + + let syntax_config = SyntaxConfig::new() + .case_insensitive(test.case_insensitive()) + .unicode(test.unicode()) + .utf8(test.utf8()); + let dense_config = dense::Config::new() + .anchored(test.anchored()) + .match_kind(match_kind) + .unicode_word_boundary(true); + let regex_config = Regex::config().utf8(test.utf8()); + + builder + .configure(regex_config) + .syntax(syntax_config) + .thompson(config_thompson(test)) + .dense(dense_config); + true +} + +/// Configuration of a Thompson NFA compiler from a regex test. +fn config_thompson(test: &RegexTest) -> thompson::Config { + thompson::Config::new().utf8(test.utf8()) +} diff --git a/vendor/regex-automata/tests/hybrid/api.rs b/vendor/regex-automata/tests/hybrid/api.rs new file mode 100644 index 000000000..9a834dbb8 --- /dev/null +++ b/vendor/regex-automata/tests/hybrid/api.rs @@ -0,0 +1,195 @@ +use std::error::Error; + +use regex_automata::{ + hybrid::{ + dfa::{self, DFA}, + regex::Regex, + OverlappingState, + }, + nfa::thompson, + HalfMatch, MatchError, MatchKind, MultiMatch, +}; + +use crate::util::{BunkPrefilter, SubstringPrefilter}; + +// Tests that too many cache resets cause the lazy DFA to quit. +// +// We only test this on 64-bit because the test is gingerly crafted based on +// implementation details of cache sizes. It's not a great test because of +// that, but it does check some interesting properties around how positions are +// reported when a search "gives up." +#[test] +#[cfg(target_pointer_width = "64")] +fn too_many_cache_resets_cause_quit() -> Result<(), Box<dyn Error>> { + // This is a carefully chosen regex. The idea is to pick one that requires + // some decent number of states (hence the bounded repetition). But we + // specifically choose to create a class with an ASCII letter and a + // non-ASCII letter so that we can check that no new states are created + // once the cache is full. Namely, if we fill up the cache on a haystack + // of 'a's, then in order to match one 'β', a new state will need to be + // created since a 'β' is encoded with multiple bytes. Since there's no + // room for this state, the search should quit at the very first position. + let pattern = r"[aβ]{100}"; + let dfa = DFA::builder() + .configure( + // Configure it so that we have the minimum cache capacity + // possible. And that if any resets occur, the search quits. + DFA::config() + .skip_cache_capacity_check(true) + .cache_capacity(0) + .minimum_cache_clear_count(Some(0)), + ) + .build(pattern)?; + let mut cache = dfa.create_cache(); + + let haystack = "a".repeat(101).into_bytes(); + let err = MatchError::GaveUp { offset: 25 }; + assert_eq!(dfa.find_earliest_fwd(&mut cache, &haystack), Err(err.clone())); + assert_eq!(dfa.find_leftmost_fwd(&mut cache, &haystack), Err(err.clone())); + assert_eq!( + dfa.find_overlapping_fwd( + &mut cache, + &haystack, + &mut OverlappingState::start() + ), + Err(err.clone()) + ); + + let haystack = "β".repeat(101).into_bytes(); + let err = MatchError::GaveUp { offset: 0 }; + assert_eq!(dfa.find_earliest_fwd(&mut cache, &haystack), Err(err)); + // no need to test that other find routines quit, since we did that above + + // OK, if we reset the cache, then we should be able to create more states + // and make more progress with searching for betas. + cache.reset(&dfa); + let err = MatchError::GaveUp { offset: 26 }; + assert_eq!(dfa.find_earliest_fwd(&mut cache, &haystack), Err(err)); + + // ... switching back to ASCII still makes progress since it just needs to + // set transitions on existing states! + let haystack = "a".repeat(101).into_bytes(); + let err = MatchError::GaveUp { offset: 13 }; + assert_eq!(dfa.find_earliest_fwd(&mut cache, &haystack), Err(err)); + + Ok(()) +} + +// Tests that quit bytes in the forward direction work correctly. +#[test] +fn quit_fwd() -> Result<(), Box<dyn Error>> { + let dfa = DFA::builder() + .configure(DFA::config().quit(b'x', true)) + .build("[[:word:]]+$")?; + let mut cache = dfa.create_cache(); + + assert_eq!( + dfa.find_earliest_fwd(&mut cache, b"abcxyz"), + Err(MatchError::Quit { byte: b'x', offset: 3 }) + ); + assert_eq!( + dfa.find_leftmost_fwd(&mut cache, b"abcxyz"), + Err(MatchError::Quit { byte: b'x', offset: 3 }) + ); + assert_eq!( + dfa.find_overlapping_fwd( + &mut cache, + b"abcxyz", + &mut OverlappingState::start() + ), + Err(MatchError::Quit { byte: b'x', offset: 3 }) + ); + + Ok(()) +} + +// Tests that quit bytes in the reverse direction work correctly. +#[test] +fn quit_rev() -> Result<(), Box<dyn Error>> { + let dfa = DFA::builder() + .configure(DFA::config().quit(b'x', true)) + .thompson(thompson::Config::new().reverse(true)) + .build("^[[:word:]]+")?; + let mut cache = dfa.create_cache(); + + assert_eq!( + dfa.find_earliest_rev(&mut cache, b"abcxyz"), + Err(MatchError::Quit { byte: b'x', offset: 3 }) + ); + assert_eq!( + dfa.find_leftmost_rev(&mut cache, b"abcxyz"), + Err(MatchError::Quit { byte: b'x', offset: 3 }) + ); + + Ok(()) +} + +// Tests that if we heuristically enable Unicode word boundaries but then +// instruct that a non-ASCII byte should NOT be a quit byte, then the builder +// will panic. +#[test] +#[should_panic] +fn quit_panics() { + DFA::config().unicode_word_boundary(true).quit(b'\xFF', false); +} + +// This tests an intesting case where even if the Unicode word boundary option +// is disabled, setting all non-ASCII bytes to be quit bytes will cause Unicode +// word boundaries to be enabled. +#[test] +fn unicode_word_implicitly_works() -> Result<(), Box<dyn Error>> { + let mut config = DFA::config(); + for b in 0x80..=0xFF { + config = config.quit(b, true); + } + let dfa = DFA::builder().configure(config).build(r"\b")?; + let mut cache = dfa.create_cache(); + let expected = HalfMatch::must(0, 1); + assert_eq!(dfa.find_leftmost_fwd(&mut cache, b" a"), Ok(Some(expected))); + Ok(()) +} + +// Tests that we can provide a prefilter to a Regex, and the search reports +// correct results. +#[test] +fn prefilter_works() -> Result<(), Box<dyn Error>> { + let mut re = Regex::new(r"a[0-9]+").unwrap(); + re.set_prefilter(Some(Box::new(SubstringPrefilter::new("a")))); + let mut cache = re.create_cache(); + + let text = b"foo abc foo a1a2a3 foo a123 bar aa456"; + let matches: Vec<(usize, usize)> = re + .find_leftmost_iter(&mut cache, text) + .map(|m| (m.start(), m.end())) + .collect(); + assert_eq!( + matches, + vec![(12, 14), (14, 16), (16, 18), (23, 27), (33, 37),] + ); + Ok(()) +} + +// This test confirms that a prefilter is active by using a prefilter that +// reports false negatives. +#[test] +fn prefilter_is_active() -> Result<(), Box<dyn Error>> { + let text = b"za123"; + let mut re = Regex::new(r"a[0-9]+").unwrap(); + let mut cache = re.create_cache(); + + re.set_prefilter(Some(Box::new(SubstringPrefilter::new("a")))); + assert_eq!( + re.find_leftmost(&mut cache, b"za123"), + Some(MultiMatch::must(0, 1, 5)) + ); + assert_eq!( + re.find_leftmost(&mut cache, b"a123"), + Some(MultiMatch::must(0, 0, 4)) + ); + re.set_prefilter(Some(Box::new(BunkPrefilter::new()))); + assert_eq!(re.find_leftmost(&mut cache, b"za123"), None); + // This checks that the prefilter is used when first starting the search, + // instead of waiting until at least one transition has occurred. + assert_eq!(re.find_leftmost(&mut cache, b"a123"), None); + Ok(()) +} diff --git a/vendor/regex-automata/tests/hybrid/mod.rs b/vendor/regex-automata/tests/hybrid/mod.rs new file mode 100644 index 000000000..f4299510c --- /dev/null +++ b/vendor/regex-automata/tests/hybrid/mod.rs @@ -0,0 +1,2 @@ +mod api; +mod suite; diff --git a/vendor/regex-automata/tests/hybrid/suite.rs b/vendor/regex-automata/tests/hybrid/suite.rs new file mode 100644 index 000000000..d60570d84 --- /dev/null +++ b/vendor/regex-automata/tests/hybrid/suite.rs @@ -0,0 +1,212 @@ +use regex_automata::{ + hybrid::{ + dfa::DFA, + regex::{self, Regex}, + }, + nfa::thompson, + MatchKind, SyntaxConfig, +}; +use regex_syntax as syntax; + +use regex_test::{ + bstr::{BString, ByteSlice}, + CompiledRegex, Match, MatchKind as TestMatchKind, RegexTest, RegexTests, + SearchKind as TestSearchKind, TestResult, TestRunner, +}; + +use crate::{suite, Result}; + +/// Tests the default configuration of the hybrid NFA/DFA. +#[test] +fn default() -> Result<()> { + let builder = Regex::builder(); + TestRunner::new()?.test_iter(suite()?.iter(), compiler(builder)).assert(); + Ok(()) +} + +/// Tests the hybrid NFA/DFA with NFA shrinking disabled. +/// +/// This is actually the typical configuration one wants for a lazy DFA. NFA +/// shrinking is mostly only advantageous when building a full DFA since it +/// can sharply decrease the amount of time determinization takes. But NFA +/// shrinking is itself otherwise fairly expensive. Since a lazy DFA has +/// no compilation time (other than for building the NFA of course) before +/// executing a search, it's usually worth it to forgo NFA shrinking. +#[test] +fn no_nfa_shrink() -> Result<()> { + let mut builder = Regex::builder(); + builder.thompson(thompson::Config::new().shrink(false)); + TestRunner::new()? + // Without NFA shrinking, this test blows the default cache capacity. + .blacklist("expensive/regression-many-repeat-no-stack-overflow") + .test_iter(suite()?.iter(), compiler(builder)) + .assert(); + Ok(()) +} + +/// Tests the hybrid NFA/DFA when 'starts_for_each_pattern' is enabled. +#[test] +fn starts_for_each_pattern() -> Result<()> { + let mut builder = Regex::builder(); + builder.dfa(DFA::config().starts_for_each_pattern(true)); + TestRunner::new()?.test_iter(suite()?.iter(), compiler(builder)).assert(); + Ok(()) +} + +/// Tests the hybrid NFA/DFA when byte classes are disabled. +/// +/// N.B. Disabling byte classes doesn't avoid any indirection at search time. +/// All it does is cause every byte value to be its own distinct equivalence +/// class. +#[test] +fn no_byte_classes() -> Result<()> { + let mut builder = Regex::builder(); + builder.dfa(DFA::config().byte_classes(false)); + TestRunner::new()?.test_iter(suite()?.iter(), compiler(builder)).assert(); + Ok(()) +} + +/// Tests that hybrid NFA/DFA never clears its cache for any test with the +/// default capacity. +/// +/// N.B. If a regex suite test is added that causes the cache to be cleared, +/// then this should just skip that test. (Which can be done by calling the +/// 'blacklist' method on 'TestRunner'.) +#[test] +fn no_cache_clearing() -> Result<()> { + let mut builder = Regex::builder(); + builder.dfa(DFA::config().minimum_cache_clear_count(Some(0))); + TestRunner::new()?.test_iter(suite()?.iter(), compiler(builder)).assert(); + Ok(()) +} + +/// Tests the hybrid NFA/DFA when the minimum cache capacity is set. +#[test] +fn min_cache_capacity() -> Result<()> { + let mut builder = Regex::builder(); + builder + .dfa(DFA::config().cache_capacity(0).skip_cache_capacity_check(true)); + TestRunner::new()?.test_iter(suite()?.iter(), compiler(builder)).assert(); + Ok(()) +} + +fn compiler( + mut builder: regex::Builder, +) -> impl FnMut(&RegexTest, &[BString]) -> Result<CompiledRegex> { + move |test, regexes| { + let regexes = regexes + .iter() + .map(|r| r.to_str().map(|s| s.to_string())) + .collect::<std::result::Result<Vec<String>, _>>()?; + + // Check if our regex contains things that aren't supported by DFAs. + // That is, Unicode word boundaries when searching non-ASCII text. + let mut thompson = thompson::Builder::new(); + thompson.syntax(config_syntax(test)).configure(config_thompson(test)); + if let Ok(nfa) = thompson.build_many(®exes) { + let non_ascii = test.input().iter().any(|&b| !b.is_ascii()); + if nfa.has_word_boundary_unicode() && non_ascii { + return Ok(CompiledRegex::skip()); + } + } + if !configure_regex_builder(test, &mut builder) { + return Ok(CompiledRegex::skip()); + } + let re = builder.build_many(®exes)?; + let mut cache = re.create_cache(); + Ok(CompiledRegex::compiled(move |test| -> Vec<TestResult> { + run_test(&re, &mut cache, test) + })) + } +} + +fn run_test( + re: &Regex, + cache: &mut regex::Cache, + test: &RegexTest, +) -> Vec<TestResult> { + let is_match = if re.is_match(cache, test.input()) { + TestResult::matched() + } else { + TestResult::no_match() + }; + let is_match = is_match.name("is_match"); + + let find_matches = match test.search_kind() { + TestSearchKind::Earliest => { + let it = re + .find_earliest_iter(cache, test.input()) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|m| Match { + id: m.pattern().as_usize(), + start: m.start(), + end: m.end(), + }); + TestResult::matches(it).name("find_earliest_iter") + } + TestSearchKind::Leftmost => { + let it = re + .find_leftmost_iter(cache, test.input()) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|m| Match { + id: m.pattern().as_usize(), + start: m.start(), + end: m.end(), + }); + TestResult::matches(it).name("find_leftmost_iter") + } + TestSearchKind::Overlapping => { + let it = re + .find_overlapping_iter(cache, test.input()) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|m| Match { + id: m.pattern().as_usize(), + start: m.start(), + end: m.end(), + }); + TestResult::matches(it).name("find_overlapping_iter") + } + }; + vec![is_match, find_matches] +} + +/// Configures the given regex builder with all relevant settings on the given +/// regex test. +/// +/// If the regex test has a setting that is unsupported, then this returns +/// false (implying the test should be skipped). +fn configure_regex_builder( + test: &RegexTest, + builder: &mut regex::Builder, +) -> bool { + let match_kind = match test.match_kind() { + TestMatchKind::All => MatchKind::All, + TestMatchKind::LeftmostFirst => MatchKind::LeftmostFirst, + TestMatchKind::LeftmostLongest => return false, + }; + + let dense_config = DFA::config() + .anchored(test.anchored()) + .match_kind(match_kind) + .unicode_word_boundary(true); + let regex_config = Regex::config().utf8(test.utf8()); + builder + .configure(regex_config) + .syntax(config_syntax(test)) + .thompson(config_thompson(test)) + .dfa(dense_config); + true +} + +/// Configuration of a Thompson NFA compiler from a regex test. +fn config_thompson(test: &RegexTest) -> thompson::Config { + thompson::Config::new().utf8(test.utf8()) +} + +/// Configuration of the regex parser from a regex test. +fn config_syntax(test: &RegexTest) -> SyntaxConfig { + SyntaxConfig::new() + .case_insensitive(test.case_insensitive()) + .unicode(test.unicode()) + .utf8(test.utf8()) +} diff --git a/vendor/regex-automata/tests/nfa/mod.rs b/vendor/regex-automata/tests/nfa/mod.rs new file mode 100644 index 000000000..326862147 --- /dev/null +++ b/vendor/regex-automata/tests/nfa/mod.rs @@ -0,0 +1 @@ +mod thompson; diff --git a/vendor/regex-automata/tests/nfa/thompson/mod.rs b/vendor/regex-automata/tests/nfa/thompson/mod.rs new file mode 100644 index 000000000..3a03f52ce --- /dev/null +++ b/vendor/regex-automata/tests/nfa/thompson/mod.rs @@ -0,0 +1 @@ +mod pikevm; diff --git a/vendor/regex-automata/tests/nfa/thompson/pikevm/api.rs b/vendor/regex-automata/tests/nfa/thompson/pikevm/api.rs new file mode 100644 index 000000000..c8199f709 --- /dev/null +++ b/vendor/regex-automata/tests/nfa/thompson/pikevm/api.rs @@ -0,0 +1,191 @@ +/* +use std::error::Error; + +use regex_automata::{ + hybrid::{ + dfa::{self, DFA}, + regex::Regex, + OverlappingState, + }, + nfa::thompson, + HalfMatch, MatchError, MatchKind, MultiMatch, +}; + +use crate::util::{BunkPrefilter, SubstringPrefilter}; + +// Tests that too many cache resets cause the lazy DFA to quit. +#[test] +fn too_many_cache_resets_cause_quit() -> Result<(), Box<dyn Error>> { + // This is a carefully chosen regex. The idea is to pick one that requires + // some decent number of states (hence the bounded repetition). But we + // specifically choose to create a class with an ASCII letter and a + // non-ASCII letter so that we can check that no new states are created + // once the cache is full. Namely, if we fill up the cache on a haystack + // of 'a's, then in order to match one 'β', a new state will need to be + // created since a 'β' is encoded with multiple bytes. Since there's no + // room for this state, the search should quit at the very first position. + let pattern = r"[aβ]{100}"; + let dfa = DFA::builder() + .configure( + // Configure it so that we have the minimum cache capacity + // possible. And that if any resets occur, the search quits. + DFA::config() + .skip_cache_capacity_check(true) + .cache_capacity(0) + .minimum_cache_clear_count(Some(0)), + ) + .build(pattern)?; + let mut cache = dfa.create_cache(); + + let haystack = "a".repeat(101).into_bytes(); + let err = MatchError::GaveUp { offset: 25 }; + assert_eq!(dfa.find_earliest_fwd(&mut cache, &haystack), Err(err.clone())); + assert_eq!(dfa.find_leftmost_fwd(&mut cache, &haystack), Err(err.clone())); + assert_eq!( + dfa.find_overlapping_fwd( + &mut cache, + &haystack, + &mut OverlappingState::start() + ), + Err(err.clone()) + ); + + let haystack = "β".repeat(101).into_bytes(); + let err = MatchError::GaveUp { offset: 0 }; + assert_eq!(dfa.find_earliest_fwd(&mut cache, &haystack), Err(err)); + // no need to test that other find routines quit, since we did that above + + // OK, if we reset the cache, then we should be able to create more states + // and make more progress with searching for betas. + cache.reset(&dfa); + let err = MatchError::GaveUp { offset: 26 }; + assert_eq!(dfa.find_earliest_fwd(&mut cache, &haystack), Err(err)); + + // ... switching back to ASCII still makes progress since it just needs to + // set transitions on existing states! + let haystack = "a".repeat(101).into_bytes(); + let err = MatchError::GaveUp { offset: 13 }; + assert_eq!(dfa.find_earliest_fwd(&mut cache, &haystack), Err(err)); + + Ok(()) +} + +// Tests that quit bytes in the forward direction work correctly. +#[test] +fn quit_fwd() -> Result<(), Box<dyn Error>> { + let dfa = DFA::builder() + .configure(DFA::config().quit(b'x', true)) + .build("[[:word:]]+$")?; + let mut cache = dfa.create_cache(); + + assert_eq!( + dfa.find_earliest_fwd(&mut cache, b"abcxyz"), + Err(MatchError::Quit { byte: b'x', offset: 3 }) + ); + assert_eq!( + dfa.find_leftmost_fwd(&mut cache, b"abcxyz"), + Err(MatchError::Quit { byte: b'x', offset: 3 }) + ); + assert_eq!( + dfa.find_overlapping_fwd( + &mut cache, + b"abcxyz", + &mut OverlappingState::start() + ), + Err(MatchError::Quit { byte: b'x', offset: 3 }) + ); + + Ok(()) +} + +// Tests that quit bytes in the reverse direction work correctly. +#[test] +fn quit_rev() -> Result<(), Box<dyn Error>> { + let dfa = DFA::builder() + .configure(DFA::config().quit(b'x', true)) + .thompson(thompson::Config::new().reverse(true)) + .build("^[[:word:]]+")?; + let mut cache = dfa.create_cache(); + + assert_eq!( + dfa.find_earliest_rev(&mut cache, b"abcxyz"), + Err(MatchError::Quit { byte: b'x', offset: 3 }) + ); + assert_eq!( + dfa.find_leftmost_rev(&mut cache, b"abcxyz"), + Err(MatchError::Quit { byte: b'x', offset: 3 }) + ); + + Ok(()) +} + +// Tests that if we heuristically enable Unicode word boundaries but then +// instruct that a non-ASCII byte should NOT be a quit byte, then the builder +// will panic. +#[test] +#[should_panic] +fn quit_panics() { + DFA::config().unicode_word_boundary(true).quit(b'\xFF', false); +} + +// This tests an intesting case where even if the Unicode word boundary option +// is disabled, setting all non-ASCII bytes to be quit bytes will cause Unicode +// word boundaries to be enabled. +#[test] +fn unicode_word_implicitly_works() -> Result<(), Box<dyn Error>> { + let mut config = DFA::config(); + for b in 0x80..=0xFF { + config = config.quit(b, true); + } + let dfa = DFA::builder().configure(config).build(r"\b")?; + let mut cache = dfa.create_cache(); + let expected = HalfMatch::must(0, 1); + assert_eq!(dfa.find_leftmost_fwd(&mut cache, b" a"), Ok(Some(expected))); + Ok(()) +} + +// Tests that we can provide a prefilter to a Regex, and the search reports +// correct results. +#[test] +fn prefilter_works() -> Result<(), Box<dyn Error>> { + let mut re = Regex::new(r"a[0-9]+").unwrap(); + re.set_prefilter(Some(Box::new(SubstringPrefilter::new("a")))); + let mut cache = re.create_cache(); + + let text = b"foo abc foo a1a2a3 foo a123 bar aa456"; + let matches: Vec<(usize, usize)> = re + .find_leftmost_iter(&mut cache, text) + .map(|m| (m.start(), m.end())) + .collect(); + assert_eq!( + matches, + vec![(12, 14), (14, 16), (16, 18), (23, 27), (33, 37),] + ); + Ok(()) +} + +// This test confirms that a prefilter is active by using a prefilter that +// reports false negatives. +#[test] +fn prefilter_is_active() -> Result<(), Box<dyn Error>> { + let text = b"za123"; + let mut re = Regex::new(r"a[0-9]+").unwrap(); + let mut cache = re.create_cache(); + + re.set_prefilter(Some(Box::new(SubstringPrefilter::new("a")))); + assert_eq!( + re.find_leftmost(&mut cache, b"za123"), + Some(MultiMatch::must(0, 1, 5)) + ); + assert_eq!( + re.find_leftmost(&mut cache, b"a123"), + Some(MultiMatch::must(0, 0, 4)) + ); + re.set_prefilter(Some(Box::new(BunkPrefilter::new()))); + assert_eq!(re.find_leftmost(&mut cache, b"za123"), None); + // This checks that the prefilter is used when first starting the search, + // instead of waiting until at least one transition has occurred. + assert_eq!(re.find_leftmost(&mut cache, b"a123"), None); + Ok(()) +} +*/ diff --git a/vendor/regex-automata/tests/nfa/thompson/pikevm/mod.rs b/vendor/regex-automata/tests/nfa/thompson/pikevm/mod.rs new file mode 100644 index 000000000..f4299510c --- /dev/null +++ b/vendor/regex-automata/tests/nfa/thompson/pikevm/mod.rs @@ -0,0 +1,2 @@ +mod api; +mod suite; diff --git a/vendor/regex-automata/tests/nfa/thompson/pikevm/suite.rs b/vendor/regex-automata/tests/nfa/thompson/pikevm/suite.rs new file mode 100644 index 000000000..e5505d59a --- /dev/null +++ b/vendor/regex-automata/tests/nfa/thompson/pikevm/suite.rs @@ -0,0 +1,109 @@ +use regex_automata::{ + nfa::thompson::{ + self, + pikevm::{self, PikeVM}, + }, + MatchKind, SyntaxConfig, +}; +use regex_syntax as syntax; + +use regex_test::{ + bstr::{BString, ByteSlice}, + CompiledRegex, Match, MatchKind as TestMatchKind, RegexTest, RegexTests, + SearchKind as TestSearchKind, TestResult, TestRunner, +}; + +use crate::{suite, Result}; + +/// Tests the default configuration of the hybrid NFA/DFA. +#[test] +fn default() -> Result<()> { + let builder = PikeVM::builder(); + TestRunner::new()?.test_iter(suite()?.iter(), compiler(builder)).assert(); + Ok(()) +} + +fn compiler( + mut builder: pikevm::Builder, +) -> impl FnMut(&RegexTest, &[BString]) -> Result<CompiledRegex> { + move |test, regexes| { + let regexes = regexes + .iter() + .map(|r| r.to_str().map(|s| s.to_string())) + .collect::<std::result::Result<Vec<String>, _>>()?; + if !configure_pikevm_builder(test, &mut builder) { + return Ok(CompiledRegex::skip()); + } + let re = builder.build_many(®exes)?; + let mut cache = re.create_cache(); + Ok(CompiledRegex::compiled(move |test| -> Vec<TestResult> { + run_test(&re, &mut cache, test) + })) + } +} + +fn run_test( + re: &PikeVM, + cache: &mut pikevm::Cache, + test: &RegexTest, +) -> Vec<TestResult> { + // let is_match = if re.is_match(cache, test.input()) { + // TestResult::matched() + // } else { + // TestResult::no_match() + // }; + // let is_match = is_match.name("is_match"); + + let find_matches = match test.search_kind() { + TestSearchKind::Earliest => { + TestResult::skip().name("find_earliest_iter") + } + TestSearchKind::Leftmost => { + let it = re + .find_leftmost_iter(cache, test.input()) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|m| Match { + id: m.pattern().as_usize(), + start: m.start(), + end: m.end(), + }); + TestResult::matches(it).name("find_leftmost_iter") + } + TestSearchKind::Overlapping => { + TestResult::skip().name("find_overlapping_iter") + } + }; + // vec![is_match, find_matches] + vec![find_matches] +} + +/// Configures the given regex builder with all relevant settings on the given +/// regex test. +/// +/// If the regex test has a setting that is unsupported, then this returns +/// false (implying the test should be skipped). +fn configure_pikevm_builder( + test: &RegexTest, + builder: &mut pikevm::Builder, +) -> bool { + let pikevm_config = + PikeVM::config().anchored(test.anchored()).utf8(test.utf8()); + builder + .configure(pikevm_config) + .syntax(config_syntax(test)) + .thompson(config_thompson(test)); + true +} + +/// Configuration of a Thompson NFA compiler from a regex test. +fn config_thompson(test: &RegexTest) -> thompson::Config { + thompson::Config::new().utf8(test.utf8()) +} + +/// Configuration of the regex parser from a regex test. +fn config_syntax(test: &RegexTest) -> SyntaxConfig { + SyntaxConfig::new() + .case_insensitive(test.case_insensitive()) + .unicode(test.unicode()) + .utf8(test.utf8()) +} diff --git a/vendor/regex-automata/tests/regression.rs b/vendor/regex-automata/tests/regression.rs index c2d2c1226..e5355fed7 100644 --- a/vendor/regex-automata/tests/regression.rs +++ b/vendor/regex-automata/tests/regression.rs @@ -1,4 +1,7 @@ -use regex_automata::{dense, DFA}; +use regex_automata::{ + dfa::{dense, Automaton}, + MatchError, +}; // A regression test for checking that minimization correctly translates // whether a state is a match state or not. Previously, it was possible for @@ -34,9 +37,8 @@ fn minimize_sets_correct_match_states() { "; let dfa = dense::Builder::new() - .minimize(true) - .anchored(true) + .configure(dense::Config::new().anchored(true).minimize(true)) .build(pattern) .unwrap(); - assert_eq!(None, dfa.find(b"\xE2")); + assert_eq!(Ok(None), dfa.find_leftmost_fwd(b"\xE2")); } diff --git a/vendor/regex-automata/tests/suite.rs b/vendor/regex-automata/tests/suite.rs deleted file mode 100644 index 839719403..000000000 --- a/vendor/regex-automata/tests/suite.rs +++ /dev/null @@ -1,250 +0,0 @@ -use regex_automata::{DenseDFA, Regex, RegexBuilder, SparseDFA}; - -use collection::{RegexTester, SUITE}; - -#[test] -fn unminimized_standard() { - let mut builder = RegexBuilder::new(); - builder.minimize(false).premultiply(false).byte_classes(false); - - let mut tester = RegexTester::new().skip_expensive(); - tester.test_all(builder, SUITE.tests()); - tester.assert(); -} - -#[test] -fn unminimized_premultiply() { - let mut builder = RegexBuilder::new(); - builder.minimize(false).premultiply(true).byte_classes(false); - - let mut tester = RegexTester::new().skip_expensive(); - tester.test_all(builder, SUITE.tests()); - tester.assert(); -} - -#[test] -fn unminimized_byte_class() { - let mut builder = RegexBuilder::new(); - builder.minimize(false).premultiply(false).byte_classes(true); - - let mut tester = RegexTester::new(); - tester.test_all(builder, SUITE.tests()); - tester.assert(); -} - -#[test] -fn unminimized_premultiply_byte_class() { - let mut builder = RegexBuilder::new(); - builder.minimize(false).premultiply(true).byte_classes(true); - - let mut tester = RegexTester::new(); - tester.test_all(builder, SUITE.tests()); - tester.assert(); -} - -#[test] -fn unminimized_standard_no_nfa_shrink() { - let mut builder = RegexBuilder::new(); - builder - .minimize(false) - .premultiply(false) - .byte_classes(false) - .shrink(false); - - let mut tester = RegexTester::new().skip_expensive(); - tester.test_all(builder, SUITE.tests()); - tester.assert(); -} - -#[test] -fn minimized_standard() { - let mut builder = RegexBuilder::new(); - builder.minimize(true).premultiply(false).byte_classes(false); - - let mut tester = RegexTester::new().skip_expensive(); - tester.test_all(builder, SUITE.tests()); - tester.assert(); -} - -#[test] -fn minimized_premultiply() { - let mut builder = RegexBuilder::new(); - builder.minimize(true).premultiply(true).byte_classes(false); - - let mut tester = RegexTester::new().skip_expensive(); - tester.test_all(builder, SUITE.tests()); - tester.assert(); -} - -#[test] -fn minimized_byte_class() { - let mut builder = RegexBuilder::new(); - builder.minimize(true).premultiply(false).byte_classes(true); - - let mut tester = RegexTester::new(); - tester.test_all(builder, SUITE.tests()); - tester.assert(); -} - -#[test] -fn minimized_premultiply_byte_class() { - let mut builder = RegexBuilder::new(); - builder.minimize(true).premultiply(true).byte_classes(true); - - let mut tester = RegexTester::new(); - tester.test_all(builder, SUITE.tests()); - tester.assert(); -} - -#[test] -fn minimized_standard_no_nfa_shrink() { - let mut builder = RegexBuilder::new(); - builder - .minimize(true) - .premultiply(false) - .byte_classes(false) - .shrink(false); - - let mut tester = RegexTester::new().skip_expensive(); - tester.test_all(builder, SUITE.tests()); - tester.assert(); -} - -// A basic sanity test that checks we can convert a regex to a smaller -// representation and that the resulting regex still passes our tests. -// -// If tests grow minimal regexes that cannot be represented in 16 bits, then -// we'll either want to skip those or increase the size to test to u32. -#[test] -fn u16() { - let mut builder = RegexBuilder::new(); - builder.minimize(true).premultiply(false).byte_classes(true); - - let mut tester = RegexTester::new().skip_expensive(); - for test in SUITE.tests() { - let builder = builder.clone(); - let re: Regex = match tester.build_regex(builder, test) { - None => continue, - Some(re) => re, - }; - let small_re = Regex::from_dfas( - re.forward().to_u16().unwrap(), - re.reverse().to_u16().unwrap(), - ); - - tester.test(test, &small_re); - } - tester.assert(); -} - -// Test that sparse DFAs work using the standard configuration. -#[test] -fn sparse_unminimized_standard() { - let mut builder = RegexBuilder::new(); - builder.minimize(false).premultiply(false).byte_classes(false); - - let mut tester = RegexTester::new().skip_expensive(); - for test in SUITE.tests() { - let builder = builder.clone(); - let re: Regex = match tester.build_regex(builder, test) { - None => continue, - Some(re) => re, - }; - let fwd = re.forward().to_sparse().unwrap(); - let rev = re.reverse().to_sparse().unwrap(); - let sparse_re = Regex::from_dfas(fwd, rev); - - tester.test(test, &sparse_re); - } - tester.assert(); -} - -// Test that sparse DFAs work after converting them to a different state ID -// representation. -#[test] -fn sparse_u16() { - let mut builder = RegexBuilder::new(); - builder.minimize(true).premultiply(false).byte_classes(false); - - let mut tester = RegexTester::new().skip_expensive(); - for test in SUITE.tests() { - let builder = builder.clone(); - let re: Regex = match tester.build_regex(builder, test) { - None => continue, - Some(re) => re, - }; - let fwd = re.forward().to_sparse().unwrap().to_u16().unwrap(); - let rev = re.reverse().to_sparse().unwrap().to_u16().unwrap(); - let sparse_re = Regex::from_dfas(fwd, rev); - - tester.test(test, &sparse_re); - } - tester.assert(); -} - -// Another basic sanity test that checks we can serialize and then deserialize -// a regex, and that the resulting regex can be used for searching correctly. -#[test] -fn serialization_roundtrip() { - let mut builder = RegexBuilder::new(); - builder.premultiply(false).byte_classes(true); - - let mut tester = RegexTester::new().skip_expensive(); - for test in SUITE.tests() { - let builder = builder.clone(); - let re: Regex = match tester.build_regex(builder, test) { - None => continue, - Some(re) => re, - }; - - let fwd_bytes = re.forward().to_bytes_native_endian().unwrap(); - let rev_bytes = re.reverse().to_bytes_native_endian().unwrap(); - let fwd: DenseDFA<&[usize], usize> = - unsafe { DenseDFA::from_bytes(&fwd_bytes) }; - let rev: DenseDFA<&[usize], usize> = - unsafe { DenseDFA::from_bytes(&rev_bytes) }; - let re = Regex::from_dfas(fwd, rev); - - tester.test(test, &re); - } - tester.assert(); -} - -// A basic sanity test that checks we can serialize and then deserialize a -// regex using sparse DFAs, and that the resulting regex can be used for -// searching correctly. -#[test] -fn sparse_serialization_roundtrip() { - let mut builder = RegexBuilder::new(); - builder.byte_classes(true); - - let mut tester = RegexTester::new().skip_expensive(); - for test in SUITE.tests() { - let builder = builder.clone(); - let re: Regex = match tester.build_regex(builder, test) { - None => continue, - Some(re) => re, - }; - - let fwd_bytes = re - .forward() - .to_sparse() - .unwrap() - .to_bytes_native_endian() - .unwrap(); - let rev_bytes = re - .reverse() - .to_sparse() - .unwrap() - .to_bytes_native_endian() - .unwrap(); - let fwd: SparseDFA<&[u8], usize> = - unsafe { SparseDFA::from_bytes(&fwd_bytes) }; - let rev: SparseDFA<&[u8], usize> = - unsafe { SparseDFA::from_bytes(&rev_bytes) }; - let re = Regex::from_dfas(fwd, rev); - - tester.test(test, &re); - } - tester.assert(); -} diff --git a/vendor/regex-automata/tests/tests.rs b/vendor/regex-automata/tests/tests.rs index fb4cd7717..e4728470c 100644 --- a/vendor/regex-automata/tests/tests.rs +++ b/vendor/regex-automata/tests/tests.rs @@ -1,25 +1,44 @@ -#[cfg(feature = "std")] -#[macro_use] -extern crate lazy_static; -#[cfg(feature = "std")] -extern crate regex; -#[cfg(feature = "std")] -extern crate regex_automata; -#[cfg(feature = "std")] -extern crate serde; -#[cfg(feature = "std")] -extern crate serde_bytes; -#[cfg(feature = "std")] -#[macro_use] -extern crate serde_derive; -#[cfg(feature = "std")] -extern crate toml; +#![allow(warnings)] -#[cfg(feature = "std")] -mod collection; -#[cfg(feature = "std")] +use regex_test::RegexTests; + +mod dfa; +mod hybrid; +mod nfa; mod regression; -#[cfg(feature = "std")] -mod suite; -#[cfg(feature = "std")] -mod unescape; +mod util; + +type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>; + +fn suite() -> Result<RegexTests> { + let mut tests = RegexTests::new(); + macro_rules! load { + ($name:expr) => {{ + const DATA: &[u8] = + include_bytes!(concat!("data/", $name, ".toml")); + tests.load_slice($name, DATA)?; + }}; + } + + load!("bytes"); + load!("crazy"); + load!("earliest"); + load!("empty"); + load!("expensive"); + load!("flags"); + load!("iter"); + load!("misc"); + load!("multiline"); + load!("no-unicode"); + load!("overlapping"); + load!("regression"); + load!("set"); + load!("unicode"); + load!("word-boundary"); + load!("fowler/basic"); + load!("fowler/nullsubexpr"); + load!("fowler/repetition"); + load!("fowler/repetition-expensive"); + + Ok(tests) +} diff --git a/vendor/regex-automata/tests/unescape.rs b/vendor/regex-automata/tests/unescape.rs deleted file mode 100644 index 43fe04e71..000000000 --- a/vendor/regex-automata/tests/unescape.rs +++ /dev/null @@ -1,84 +0,0 @@ -#[derive(Clone, Copy, Eq, PartialEq)] -enum State { - /// The state after seeing a `\`. - Escape, - /// The state after seeing a `\x`. - HexFirst, - /// The state after seeing a `\x[0-9A-Fa-f]`. - HexSecond(char), - /// Default state. - Literal, -} - -pub fn unescape(s: &str) -> Vec<u8> { - use self::State::*; - - let mut bytes = vec![]; - let mut state = Literal; - for c in s.chars() { - match state { - Escape => match c { - '\\' => { - bytes.push(b'\\'); - state = Literal; - } - 'n' => { - bytes.push(b'\n'); - state = Literal; - } - 'r' => { - bytes.push(b'\r'); - state = Literal; - } - 't' => { - bytes.push(b'\t'); - state = Literal; - } - 'x' => { - state = HexFirst; - } - c => { - bytes.extend(format!(r"\{}", c).into_bytes()); - state = Literal; - } - }, - HexFirst => match c { - '0'..='9' | 'A'..='F' | 'a'..='f' => { - state = HexSecond(c); - } - c => { - bytes.extend(format!(r"\x{}", c).into_bytes()); - state = Literal; - } - }, - HexSecond(first) => match c { - '0'..='9' | 'A'..='F' | 'a'..='f' => { - let ordinal = format!("{}{}", first, c); - let byte = u8::from_str_radix(&ordinal, 16).unwrap(); - bytes.push(byte); - state = Literal; - } - c => { - let original = format!(r"\x{}{}", first, c); - bytes.extend(original.into_bytes()); - state = Literal; - } - }, - Literal => match c { - '\\' => { - state = Escape; - } - c => { - bytes.extend(c.to_string().as_bytes()); - } - }, - } - } - match state { - Escape => bytes.push(b'\\'), - HexFirst => bytes.extend(b"\\x"), - HexSecond(c) => bytes.extend(format!("\\x{}", c).into_bytes()), - Literal => {} - } - bytes -} diff --git a/vendor/regex-automata/tests/util.rs b/vendor/regex-automata/tests/util.rs new file mode 100644 index 000000000..499aa8c6d --- /dev/null +++ b/vendor/regex-automata/tests/util.rs @@ -0,0 +1,57 @@ +use regex_automata::util::prefilter::{self, Candidate, Prefilter}; + +#[derive(Clone, Debug)] +pub struct SubstringPrefilter(bstr::Finder<'static>); + +impl SubstringPrefilter { + pub fn new<B: AsRef<[u8]>>(needle: B) -> SubstringPrefilter { + SubstringPrefilter(bstr::Finder::new(needle.as_ref()).into_owned()) + } +} + +impl Prefilter for SubstringPrefilter { + #[inline] + fn next_candidate( + &self, + state: &mut prefilter::State, + haystack: &[u8], + at: usize, + ) -> Candidate { + self.0 + .find(&haystack[at..]) + .map(|i| Candidate::PossibleStartOfMatch(at + i)) + .unwrap_or(Candidate::None) + } + + fn heap_bytes(&self) -> usize { + self.0.needle().len() + } +} + +/// A prefilter that always returns `Candidate::None`, even if it's a false +/// negative. This is useful for confirming that a prefilter is actually +/// active by asserting an incorrect result. +#[derive(Clone, Debug)] +pub struct BunkPrefilter(()); + +impl BunkPrefilter { + pub fn new() -> BunkPrefilter { + BunkPrefilter(()) + } +} + +impl Prefilter for BunkPrefilter { + #[inline] + fn next_candidate( + &self, + _state: &mut prefilter::State, + _haystack: &[u8], + _at: usize, + ) -> Candidate { + Candidate::None + } + + fn heap_bytes(&self) -> usize { + 0 + } +} |