diff options
Diffstat (limited to 'vendor/regex-automata/tests')
71 files changed, 1804 insertions, 8107 deletions
diff --git a/vendor/regex-automata/tests/data/bytes.toml b/vendor/regex-automata/tests/data/bytes.toml deleted file mode 100644 index eb3a0942e..000000000 --- a/vendor/regex-automata/tests/data/bytes.toml +++ /dev/null @@ -1,235 +0,0 @@ -# These are tests specifically crafted for regexes that can match arbitrary -# bytes. In some cases, we also test the Unicode variant as well, just because -# it's good sense to do so. But also, these tests aren't really about Unicode, -# but whether matches are only reported at valid UTF-8 boundaries. For most -# tests in this entire collection, utf8 = true. But for these tests, we use -# utf8 = false. - -[[tests]] -name = "word-boundary-ascii" -regex = ' \b' -input = " δ" -matches = [] -unicode = false -utf8 = false - -[[tests]] -name = "word-boundary-unicode" -regex = ' \b' -input = " δ" -matches = [[0, 1]] -unicode = true -utf8 = false - -[[tests]] -name = "word-boundary-ascii-not" -regex = ' \B' -input = " δ" -matches = [[0, 1]] -unicode = false -utf8 = false - -[[tests]] -name = "word-boundary-unicode-not" -regex = ' \B' -input = " δ" -matches = [] -unicode = true -utf8 = false - -[[tests]] -name = "perl-word-ascii" -regex = '\w+' -input = "aδ" -matches = [[0, 1]] -unicode = false -utf8 = false - -[[tests]] -name = "perl-word-unicode" -regex = '\w+' -input = "aδ" -matches = [[0, 3]] -unicode = true -utf8 = false - -[[tests]] -name = "perl-decimal-ascii" -regex = '\d+' -input = "1२३9" -matches = [[0, 1], [7, 8]] -unicode = false -utf8 = false - -[[tests]] -name = "perl-decimal-unicode" -regex = '\d+' -input = "1२३9" -matches = [[0, 8]] -unicode = true -utf8 = false - -[[tests]] -name = "perl-whitespace-ascii" -regex = '\s+' -input = " \u1680" -matches = [[0, 1]] -unicode = false -utf8 = false - -[[tests]] -name = "perl-whitespace-unicode" -regex = '\s+' -input = " \u1680" -matches = [[0, 4]] -unicode = true -utf8 = false - -# The first `(.+)` matches two Unicode codepoints, but can't match the 5th -# byte, which isn't valid UTF-8. The second (byte based) `(.+)` takes over and -# matches. -[[tests]] -name = "mixed-dot" -regex = '(.+)(?-u)(.+)' -input = '\xCE\x93\xCE\x94\xFF' -captures = [ - [[0, 5], [0, 4], [4, 5]], -] -unescape = true -unicode = true -utf8 = false - -[[tests]] -name = "case-one-ascii" -regex = 'a' -input = "A" -matches = [[0, 1]] -case_insensitive = true -unicode = false -utf8 = false - -[[tests]] -name = "case-one-unicode" -regex = 'a' -input = "A" -matches = [[0, 1]] -case_insensitive = true -unicode = true -utf8 = false - -[[tests]] -name = "case-class-simple-ascii" -regex = '[a-z]+' -input = "AaAaA" -matches = [[0, 5]] -case_insensitive = true -unicode = false -utf8 = false - -[[tests]] -name = "case-class-ascii" -regex = '[a-z]+' -input = "aA\u212AaA" -matches = [[0, 2], [5, 7]] -case_insensitive = true -unicode = false -utf8 = false - -[[tests]] -name = "case-class-unicode" -regex = '[a-z]+' -input = "aA\u212AaA" -matches = [[0, 7]] -case_insensitive = true -unicode = true -utf8 = false - -[[tests]] -name = "negate-ascii" -regex = '[^a]' -input = "δ" -matches = [[0, 1], [1, 2]] -unicode = false -utf8 = false - -[[tests]] -name = "negate-unicode" -regex = '[^a]' -input = "δ" -matches = [[0, 2]] -unicode = true -utf8 = false - -# When utf8=true, this won't match, because the implicit '.*?' prefix is -# Unicode aware and will refuse to match through invalid UTF-8 bytes. -[[tests]] -name = "dotstar-prefix-ascii" -regex = 'a' -input = '\xFFa' -matches = [[1, 2]] -unescape = true -unicode = false -utf8 = false - -[[tests]] -name = "dotstar-prefix-unicode" -regex = 'a' -input = '\xFFa' -matches = [[1, 2]] -unescape = true -unicode = true -utf8 = false - -[[tests]] -name = "null-bytes" -regex = '(?P<cstr>[^\x00]+)\x00' -input = 'foo\x00' -captures = [ - [[0, 4], [0, 3]], -] -unescape = true -unicode = false -utf8 = false - -[[tests]] -name = "invalid-utf8-anchor-100" -regex = '\xCC?^' -input = '\x8d#;\x1a\xa4s3\x05foobarX\\\x0f0t\xe4\x9b\xa4' -matches = [[0, 0]] -unescape = true -unicode = false -utf8 = false - -[[tests]] -name = "invalid-utf8-anchor-200" -regex = '^\xf7|4\xff\d\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a##########[] d\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a##########\[] #####\x80\S7|$' -input = '\x8d#;\x1a\xa4s3\x05foobarX\\\x0f0t\xe4\x9b\xa4' -matches = [[22, 22]] -unescape = true -unicode = false -utf8 = false - -[[tests]] -name = "invalid-utf8-anchor-300" -regex = '^|ddp\xff\xffdddddlQd@\x80' -input = '\x8d#;\x1a\xa4s3\x05foobarX\\\x0f0t\xe4\x9b\xa4' -matches = [[0, 0]] -unescape = true -unicode = false -utf8 = false - -[[tests]] -name = "word-boundary-ascii-100" -regex = '\Bx\B' -input = "áxβ" -matches = [] -unicode = false -utf8 = false - -[[tests]] -name = "word-boundary-ascii-200" -regex = '\B' -input = "0\U0007EF5E" -matches = [[2, 2], [3, 3], [4, 4], [5, 5]] -unicode = false -utf8 = false diff --git a/vendor/regex-automata/tests/data/crazy.toml b/vendor/regex-automata/tests/data/crazy.toml deleted file mode 100644 index 549b86cca..000000000 --- a/vendor/regex-automata/tests/data/crazy.toml +++ /dev/null @@ -1,302 +0,0 @@ -# TODO: There are still a couple of manually written tests in crazy.rs. - -[[tests]] -name = "ranges" -regex = '(?-u)\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b' -input = "num: 255" -matches = [[5, 8]] - -[[tests]] -name = "ranges-not" -regex = '(?-u)\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b' -input = "num: 256" -matches = [] - -[[tests]] -name = "float1" -regex = '[-+]?[0-9]*\.?[0-9]+' -input = "0.1" -matches = [[0, 3]] - -[[tests]] -name = "float2" -regex = '[-+]?[0-9]*\.?[0-9]+' -input = "0.1.2" -matches = [[0, 3]] -match_limit = 1 - -[[tests]] -name = "float3" -regex = '[-+]?[0-9]*\.?[0-9]+' -input = "a1.2" -matches = [[1, 4]] - -[[tests]] -name = "float4" -regex = '[-+]?[0-9]*\.?[0-9]+' -input = "1.a" -matches = [[0, 1]] - -[[tests]] -name = "float5" -regex = '^[-+]?[0-9]*\.?[0-9]+$' -input = "1.a" -matches = [] - -[[tests]] -name = "email" -regex = '(?i-u)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b' -input = "mine is jam.slam@gmail.com " -matches = [[8, 26]] - -[[tests]] -name = "email-not" -regex = '(?i-u)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b' -input = "mine is jam.slam@gmail " -matches = [] - -[[tests]] -name = "email-big" -regex = '''[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?''' -input = "mine is jam.slam@gmail.com " -matches = [[8, 26]] - -[[tests]] -name = "date1" -regex = '(?-u)^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$' -input = "1900-01-01" -matches = [[0, 10]] - -[[tests]] -name = "date2" -regex = '(?-u)^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$' -input = "1900-00-01" -matches = [] - -[[tests]] -name = "date3" -regex = '(?-u)^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$' -input = "1900-13-01" -matches = [] - -[[tests]] -name = "start-end-empty" -regex = '^$' -input = "" -matches = [[0, 0]] - -[[tests]] -name = "start-end-empty-rev" -regex = '$^' -input = "" -matches = [[0, 0]] - -[[tests]] -name = "start-end-empty-many-1" -regex = '^$^$^$' -input = "" -matches = [[0, 0]] - -[[tests]] -name = "start-end-empty-many-2" -regex = '^^^$$$' -input = "" -matches = [[0, 0]] - -[[tests]] -name = "start-end-empty-rep" -regex = '(?:^$)*' -input = "a\nb\nc" -matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] - -[[tests]] -name = "start-end-empty-rep-rev" -regex = '(?:$^)*' -input = "a\nb\nc" -matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] - -[[tests]] -name = "neg-class-letter" -regex = '[^ac]' -input = "acx" -matches = [[2, 3]] - -[[tests]] -name = "neg-class-letter-comma" -regex = '[^a,]' -input = "a,x" -matches = [[2, 3]] - -[[tests]] -name = "neg-class-letter-space" -regex = '[^a[:space:]]' -input = "a x" -matches = [[2, 3]] - -[[tests]] -name = "neg-class-comma" -regex = '[^,]' -input = ",,x" -matches = [[2, 3]] - -[[tests]] -name = "neg-class-space" -regex = '[^[:space:]]' -input = " a" -matches = [[1, 2]] - -[[tests]] -name = "neg-class-space-comma" -regex = '[^,[:space:]]' -input = ", a" -matches = [[2, 3]] - -[[tests]] -name = "neg-class-comma-space" -regex = '[^[:space:],]' -input = " ,a" -matches = [[2, 3]] - -[[tests]] -name = "neg-class-ascii" -regex = '[^[:alpha:]Z]' -input = "A1" -matches = [[1, 2]] - -[[tests]] -name = "lazy-many-many" -regex = '((?:.*)*?)=' -input = "a=b" -matches = [[0, 2]] - -[[tests]] -name = "lazy-many-optional" -regex = '((?:.?)*?)=' -input = "a=b" -matches = [[0, 2]] - -[[tests]] -name = "lazy-one-many-many" -regex = '((?:.*)+?)=' -input = "a=b" -matches = [[0, 2]] - -[[tests]] -name = "lazy-one-many-optional" -regex = '((?:.?)+?)=' -input = "a=b" -matches = [[0, 2]] - -[[tests]] -name = "lazy-range-min-many" -regex = '((?:.*){1,}?)=' -input = "a=b" -matches = [[0, 2]] - -[[tests]] -name = "lazy-range-many" -regex = '((?:.*){1,2}?)=' -input = "a=b" -matches = [[0, 2]] - -[[tests]] -name = "greedy-many-many" -regex = '((?:.*)*)=' -input = "a=b" -matches = [[0, 2]] - -[[tests]] -name = "greedy-many-optional" -regex = '((?:.?)*)=' -input = "a=b" -matches = [[0, 2]] - -[[tests]] -name = "greedy-one-many-many" -regex = '((?:.*)+)=' -input = "a=b" -matches = [[0, 2]] - -[[tests]] -name = "greedy-one-many-optional" -regex = '((?:.?)+)=' -input = "a=b" -matches = [[0, 2]] - -[[tests]] -name = "greedy-range-min-many" -regex = '((?:.*){1,})=' -input = "a=b" -matches = [[0, 2]] - -[[tests]] -name = "greedy-range-many" -regex = '((?:.*){1,2})=' -input = "a=b" -matches = [[0, 2]] - -[[tests]] -name = "empty1" -regex = '' -input = "" -matches = [[0, 0]] - -[[tests]] -name = "empty2" -regex = '' -input = "abc" -matches = [[0, 0], [1, 1], [2, 2], [3, 3]] - -[[tests]] -name = "empty3" -regex = '()' -input = "abc" -matches = [[0, 0], [1, 1], [2, 2], [3, 3]] - -[[tests]] -name = "empty4" -regex = '()*' -input = "abc" -matches = [[0, 0], [1, 1], [2, 2], [3, 3]] - -[[tests]] -name = "empty5" -regex = '()+' -input = "abc" -matches = [[0, 0], [1, 1], [2, 2], [3, 3]] - -[[tests]] -name = "empty6" -regex = '()?' -input = "abc" -matches = [[0, 0], [1, 1], [2, 2], [3, 3]] - -[[tests]] -name = "empty7" -regex = '()()' -input = "abc" -matches = [[0, 0], [1, 1], [2, 2], [3, 3]] - -[[tests]] -name = "empty8" -regex = '()+|z' -input = "abc" -matches = [[0, 0], [1, 1], [2, 2], [3, 3]] - -[[tests]] -name = "empty9" -regex = 'z|()+' -input = "abc" -matches = [[0, 0], [1, 1], [2, 2], [3, 3]] - -[[tests]] -name = "empty10" -regex = '()+|b' -input = "abc" -matches = [[0, 0], [1, 1], [2, 2], [3, 3]] - -[[tests]] -name = "empty11" -regex = 'b|()+' -input = "abc" -matches = [[0, 0], [1, 2], [3, 3]] diff --git a/vendor/regex-automata/tests/data/earliest.toml b/vendor/regex-automata/tests/data/earliest.toml deleted file mode 100644 index 6714a850b..000000000 --- a/vendor/regex-automata/tests/data/earliest.toml +++ /dev/null @@ -1,48 +0,0 @@ -[[tests]] -name = "no-greedy-100" -regex = 'a+' -input = "aaa" -matches = [[0, 1], [1, 2], [2, 3]] -search_kind = "earliest" - -[[tests]] -name = "no-greedy-200" -regex = 'abc+' -input = "zzzabccc" -matches = [[3, 6]] -search_kind = "earliest" - -[[tests]] -name = "is-ungreedy" -regex = 'a+?' -input = "aaa" -matches = [[0, 1], [1, 2], [2, 3]] -search_kind = "earliest" - -[[tests]] -name = "look-start-test" -regex = '^(abc|a)' -input = "abc" -matches = [[0, 1]] -search_kind = "earliest" - -[[tests]] -name = "look-end-test" -regex = '(abc|a)$' -input = "abc" -matches = [[0, 3]] -search_kind = "earliest" - -[[tests]] -name = "no-leftmost-first-100" -regex = 'abc|a' -input = "abc" -matches = [[0, 1]] -search_kind = "earliest" - -[[tests]] -name = "no-leftmost-first-200" -regex = 'aba|a' -input = "aba" -matches = [[0, 1], [2, 3]] -search_kind = "earliest" diff --git a/vendor/regex-automata/tests/data/empty.toml b/vendor/regex-automata/tests/data/empty.toml deleted file mode 100644 index ad703e601..000000000 --- a/vendor/regex-automata/tests/data/empty.toml +++ /dev/null @@ -1,113 +0,0 @@ -[[tests]] -name = "100" -regex = "|b" -input = "abc" -matches = [[0, 0], [1, 1], [2, 2], [3, 3]] - -[[tests]] -name = "110" -regex = "b|" -input = "abc" -matches = [[0, 0], [1, 2], [3, 3]] - -[[tests]] -name = "120" -regex = "|z" -input = "abc" -matches = [[0, 0], [1, 1], [2, 2], [3, 3]] - -[[tests]] -name = "130" -regex = "z|" -input = "abc" -matches = [[0, 0], [1, 1], [2, 2], [3, 3]] - -[[tests]] -name = "200" -regex = "|" -input = "abc" -matches = [[0, 0], [1, 1], [2, 2], [3, 3]] - -[[tests]] -name = "210" -regex = "||" -input = "abc" -matches = [[0, 0], [1, 1], [2, 2], [3, 3]] - -[[tests]] -name = "220" -regex = "||b" -input = "abc" -matches = [[0, 0], [1, 1], [2, 2], [3, 3]] - -[[tests]] -name = "230" -regex = "b||" -input = "abc" -matches = [[0, 0], [1, 2], [3, 3]] - -[[tests]] -name = "240" -regex = "||z" -input = "abc" -matches = [[0, 0], [1, 1], [2, 2], [3, 3]] - -[[tests]] -name = "300" -regex = "(?:)|b" -input = "abc" -matches = [[0, 0], [1, 1], [2, 2], [3, 3]] - -[[tests]] -name = "310" -regex = "b|(?:)" -input = "abc" -matches = [[0, 0], [1, 2], [3, 3]] - -[[tests]] -name = "320" -regex = "(?:|)" -input = "abc" -matches = [[0, 0], [1, 1], [2, 2], [3, 3]] - -[[tests]] -name = "330" -regex = "(?:|)|z" -input = "abc" -matches = [[0, 0], [1, 1], [2, 2], [3, 3]] - -[[tests]] -name = "400" -regex = "a(?:)|b" -input = "abc" -matches = [[0, 1], [1, 2]] - -[[tests]] -name = "500" -regex = "" -input = "" -matches = [[0, 0]] - -[[tests]] -name = "510" -regex = "" -input = "a" -matches = [[0, 0], [1, 1]] - -[[tests]] -name = "520" -regex = "" -input = "abc" -matches = [[0, 0], [1, 1], [2, 2], [3, 3]] - -[[tests]] -name = "600" -regex = '(|a)*' -input = "aaa" -matches = [[0, 0], [1, 1], [2, 2], [3, 3]] - -[[tests]] -name = "610" -regex = '(|a)+' -input = "aaa" -matches = [[0, 0], [1, 1], [2, 2], [3, 3]] diff --git a/vendor/regex-automata/tests/data/expensive.toml b/vendor/regex-automata/tests/data/expensive.toml deleted file mode 100644 index e062e3902..000000000 --- a/vendor/regex-automata/tests/data/expensive.toml +++ /dev/null @@ -1,12 +0,0 @@ -# These represent tests that may be expensive to run on some regex engines. For -# example, tests that build a full DFA ahead of time and minimize it can take a -# horrendously long time on regexes that are large (or result in an explosion -# in the number of states). We group these tests together so that such engines -# can simply skip these tests. - -# See: https://github.com/rust-lang/regex/issues/98 -[[tests]] -name = "regression-many-repeat-no-stack-overflow" -regex = '^.{1,2500}' -input = "a" -matches = [[0, 1]] diff --git a/vendor/regex-automata/tests/data/flags.toml b/vendor/regex-automata/tests/data/flags.toml deleted file mode 100644 index 2b631ef23..000000000 --- a/vendor/regex-automata/tests/data/flags.toml +++ /dev/null @@ -1,67 +0,0 @@ -[[tests]] -name = "1" -regex = "(?i)abc" -input = "ABC" -matches = [[0, 3]] - -[[tests]] -name = "2" -regex = "(?i)a(?-i)bc" -input = "Abc" -matches = [[0, 3]] - -[[tests]] -name = "3" -regex = "(?i)a(?-i)bc" -input = "ABC" -matches = [] - -[[tests]] -name = "4" -regex = "(?is)a." -input = "A\n" -matches = [[0, 2]] - -[[tests]] -name = "5" -regex = "(?is)a.(?-is)a." -input = "A\nab" -matches = [[0, 4]] - -[[tests]] -name = "6" -regex = "(?is)a.(?-is)a." -input = "A\na\n" -matches = [] - -[[tests]] -name = "7" -regex = "(?is)a.(?-is:a.)?" -input = "A\na\n" -matches = [[0, 2]] -match_limit = 1 - -[[tests]] -name = "8" -regex = "(?U)a+" -input = "aa" -matches = [[0, 1]] -match_limit = 1 - -[[tests]] -name = "9" -regex = "(?U)a+?" -input = "aa" -matches = [[0, 2]] - -[[tests]] -name = "10" -regex = "(?U)(?-U)a+" -input = "aa" -matches = [[0, 2]] - -[[tests]] -name = "11" -regex = '(?m)(?:^\d+$\n?)+' -input = "123\n456\n789" -matches = [[0, 11]] diff --git a/vendor/regex-automata/tests/data/fowler/basic.toml b/vendor/regex-automata/tests/data/fowler/basic.toml deleted file mode 100644 index c965f26ff..000000000 --- a/vendor/regex-automata/tests/data/fowler/basic.toml +++ /dev/null @@ -1,1638 +0,0 @@ -# !!! DO NOT EDIT !!! -# Automatically generated by scripts/fowler-to-toml. -# Numbers in the test names correspond to the line number of the test from -# the original dat file. - -[[tests]] -name = "basic3" -regex = '''abracadabra$''' -input = '''abracadabracadabra''' -captures = [[[7, 18]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic4" -regex = '''a...b''' -input = '''abababbb''' -captures = [[[2, 7]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic5" -regex = '''XXXXXX''' -input = '''..XXXXXX''' -captures = [[[2, 8]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic6" -regex = '''\)''' -input = '''()''' -captures = [[[1, 2]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic7" -regex = '''a]''' -input = '''a]a''' -captures = [[[0, 2]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic9" -regex = '''\}''' -input = '''}''' -captures = [[[0, 1]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic10" -regex = '''\]''' -input = ''']''' -captures = [[[0, 1]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic12" -regex = ''']''' -input = ''']''' -captures = [[[0, 1]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic15" -regex = '''^a''' -input = '''ax''' -captures = [[[0, 1]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic16" -regex = '''\^a''' -input = '''a^a''' -captures = [[[1, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic17" -regex = '''a\^''' -input = '''a^''' -captures = [[[0, 2]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic18" -regex = '''a$''' -input = '''aa''' -captures = [[[1, 2]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic19" -regex = '''a\$''' -input = '''a$''' -captures = [[[0, 2]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic20" -regex = '''^$''' -input = '''''' -captures = [[[0, 0]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic21" -regex = '''$^''' -input = '''''' -captures = [[[0, 0]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic22" -regex = '''a($)''' -input = '''aa''' -captures = [[[1, 2], [2, 2]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic23" -regex = '''a*(^a)''' -input = '''aa''' -captures = [[[0, 1], [0, 1]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic24" -regex = '''(..)*(...)*''' -input = '''a''' -captures = [[[0, 0]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic25" -regex = '''(..)*(...)*''' -input = '''abcd''' -captures = [[[0, 4], [2, 4]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic26" -regex = '''(ab|a)(bc|c)''' -input = '''abc''' -captures = [[[0, 3], [0, 2], [2, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic27" -regex = '''(ab)c|abc''' -input = '''abc''' -captures = [[[0, 3], [0, 2]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic28" -regex = '''a{0}b''' -input = '''ab''' -captures = [[[1, 2]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic29" -regex = '''(a*)(b?)(b+)b{3}''' -input = '''aaabbbbbbb''' -captures = [[[0, 10], [0, 3], [3, 4], [4, 7]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic30" -regex = '''(a*)(b{0,1})(b{1,})b{3}''' -input = '''aaabbbbbbb''' -captures = [[[0, 10], [0, 3], [3, 4], [4, 7]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic32" -regex = '''((a|a)|a)''' -input = '''a''' -captures = [[[0, 1], [0, 1], [0, 1]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic33" -regex = '''(a*)(a|aa)''' -input = '''aaaa''' -captures = [[[0, 4], [0, 3], [3, 4]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic34" -regex = '''a*(a.|aa)''' -input = '''aaaa''' -captures = [[[0, 4], [2, 4]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic35" -regex = '''a(b)|c(d)|a(e)f''' -input = '''aef''' -captures = [[[0, 3], [], [], [1, 2]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic36" -regex = '''(a|b)?.*''' -input = '''b''' -captures = [[[0, 1], [0, 1]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic37" -regex = '''(a|b)c|a(b|c)''' -input = '''ac''' -captures = [[[0, 2], [0, 1]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic38" -regex = '''(a|b)c|a(b|c)''' -input = '''ab''' -captures = [[[0, 2], [], [1, 2]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic39" -regex = '''(a|b)*c|(a|ab)*c''' -input = '''abc''' -captures = [[[0, 3], [1, 2]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic40" -regex = '''(a|b)*c|(a|ab)*c''' -input = '''xc''' -captures = [[[1, 2]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic41" -regex = '''(.a|.b).*|.*(.a|.b)''' -input = '''xa''' -captures = [[[0, 2], [0, 2]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic42" -regex = '''a?(ab|ba)ab''' -input = '''abab''' -captures = [[[0, 4], [0, 2]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic43" -regex = '''a?(ac{0}b|ba)ab''' -input = '''abab''' -captures = [[[0, 4], [0, 2]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic44" -regex = '''ab|abab''' -input = '''abbabab''' -captures = [[[0, 2]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic45" -regex = '''aba|bab|bba''' -input = '''baaabbbaba''' -captures = [[[5, 8]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic46" -regex = '''aba|bab''' -input = '''baaabbbaba''' -captures = [[[6, 9]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic47" -regex = '''(aa|aaa)*|(a|aaaaa)''' -input = '''aa''' -captures = [[[0, 2], [0, 2]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic48" -regex = '''(a.|.a.)*|(a|.a...)''' -input = '''aa''' -captures = [[[0, 2], [0, 2]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic49" -regex = '''ab|a''' -input = '''xabc''' -captures = [[[1, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic50" -regex = '''ab|a''' -input = '''xxabc''' -captures = [[[2, 4]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic51" -regex = '''(Ab|cD)*''' -input = '''aBcD''' -captures = [[[0, 4], [2, 4]]] -match_limit = 1 -unescape = true -case_insensitive = true - -[[tests]] -name = "basic52" -regex = '''[^-]''' -input = '''--a''' -captures = [[[2, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic53" -regex = '''[a-]*''' -input = '''--a''' -captures = [[[0, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic54" -regex = '''[a-m-]*''' -input = '''--amoma--''' -captures = [[[0, 4]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic55" -regex = ''':::1:::0:|:::1:1:0:''' -input = ''':::0:::1:::1:::0:''' -captures = [[[8, 17]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic56" -regex = ''':::1:::0:|:::1:1:1:''' -input = ''':::0:::1:::1:::0:''' -captures = [[[8, 17]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic57" -regex = '''[[:upper:]]''' -input = '''A''' -captures = [[[0, 1]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic58" -regex = '''[[:lower:]]+''' -input = '''`az{''' -captures = [[[1, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic59" -regex = '''[[:upper:]]+''' -input = '''@AZ[''' -captures = [[[1, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic65" -regex = '''\n''' -input = '''\n''' -captures = [[[0, 1]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic66" -regex = '''\n''' -input = '''\n''' -captures = [[[0, 1]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic67" -regex = '''[^a]''' -input = '''\n''' -captures = [[[0, 1]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic68" -regex = '''\na''' -input = '''\na''' -captures = [[[0, 2]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic69" -regex = '''(a)(b)(c)''' -input = '''abc''' -captures = [[[0, 3], [0, 1], [1, 2], [2, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic70" -regex = '''xxx''' -input = '''xxx''' -captures = [[[0, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic71" -regex = '''(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)''' -input = '''feb 6,''' -captures = [[[0, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic72" -regex = '''(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)''' -input = '''2/7''' -captures = [[[0, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic73" -regex = '''(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)''' -input = '''feb 1,Feb 6''' -captures = [[[5, 11]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic74" -regex = '''((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))''' -input = '''x''' -captures = [[[0, 1], [0, 1], [0, 1]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic75" -regex = '''((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))*''' -input = '''xx''' -captures = [[[0, 2], [1, 2], [1, 2]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic76" -regex = '''a?(ab|ba)*''' -input = '''ababababababababababababababababababababababababababababababababababababababababa''' -captures = [[[0, 81], [79, 81]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic77" -regex = '''abaa|abbaa|abbbaa|abbbbaa''' -input = '''ababbabbbabbbabbbbabbbbaa''' -captures = [[[18, 25]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic78" -regex = '''abaa|abbaa|abbbaa|abbbbaa''' -input = '''ababbabbbabbbabbbbabaa''' -captures = [[[18, 22]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic79" -regex = '''aaac|aabc|abac|abbc|baac|babc|bbac|bbbc''' -input = '''baaabbbabac''' -captures = [[[7, 11]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic80" -regex = '''.*''' -input = '''\x01\x7f''' -captures = [[[0, 2]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic81" -regex = '''aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll''' -input = '''XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa''' -captures = [[[53, 57]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic83" -regex = '''a*a*a*a*a*b''' -input = '''aaaaaaaaab''' -captures = [[[0, 10]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic84" -regex = '''^''' -input = '''''' -captures = [[[0, 0]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic85" -regex = '''$''' -input = '''''' -captures = [[[0, 0]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic86" -regex = '''^$''' -input = '''''' -captures = [[[0, 0]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic87" -regex = '''^a$''' -input = '''a''' -captures = [[[0, 1]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic88" -regex = '''abc''' -input = '''abc''' -captures = [[[0, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic89" -regex = '''abc''' -input = '''xabcy''' -captures = [[[1, 4]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic90" -regex = '''abc''' -input = '''ababc''' -captures = [[[2, 5]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic91" -regex = '''ab*c''' -input = '''abc''' -captures = [[[0, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic92" -regex = '''ab*bc''' -input = '''abc''' -captures = [[[0, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic93" -regex = '''ab*bc''' -input = '''abbc''' -captures = [[[0, 4]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic94" -regex = '''ab*bc''' -input = '''abbbbc''' -captures = [[[0, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic95" -regex = '''ab+bc''' -input = '''abbc''' -captures = [[[0, 4]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic96" -regex = '''ab+bc''' -input = '''abbbbc''' -captures = [[[0, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic97" -regex = '''ab?bc''' -input = '''abbc''' -captures = [[[0, 4]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic98" -regex = '''ab?bc''' -input = '''abc''' -captures = [[[0, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic99" -regex = '''ab?c''' -input = '''abc''' -captures = [[[0, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic100" -regex = '''^abc$''' -input = '''abc''' -captures = [[[0, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic101" -regex = '''^abc''' -input = '''abcc''' -captures = [[[0, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic102" -regex = '''abc$''' -input = '''aabc''' -captures = [[[1, 4]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic103" -regex = '''^''' -input = '''abc''' -captures = [[[0, 0]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic104" -regex = '''$''' -input = '''abc''' -captures = [[[3, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic105" -regex = '''a.c''' -input = '''abc''' -captures = [[[0, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic106" -regex = '''a.c''' -input = '''axc''' -captures = [[[0, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic107" -regex = '''a.*c''' -input = '''axyzc''' -captures = [[[0, 5]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic108" -regex = '''a[bc]d''' -input = '''abd''' -captures = [[[0, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic109" -regex = '''a[b-d]e''' -input = '''ace''' -captures = [[[0, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic110" -regex = '''a[b-d]''' -input = '''aac''' -captures = [[[1, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic111" -regex = '''a[-b]''' -input = '''a-''' -captures = [[[0, 2]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic112" -regex = '''a[b-]''' -input = '''a-''' -captures = [[[0, 2]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic113" -regex = '''a]''' -input = '''a]''' -captures = [[[0, 2]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic114" -regex = '''a[]]b''' -input = '''a]b''' -captures = [[[0, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic115" -regex = '''a[^bc]d''' -input = '''aed''' -captures = [[[0, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic116" -regex = '''a[^-b]c''' -input = '''adc''' -captures = [[[0, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic117" -regex = '''a[^]b]c''' -input = '''adc''' -captures = [[[0, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic118" -regex = '''ab|cd''' -input = '''abc''' -captures = [[[0, 2]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic119" -regex = '''ab|cd''' -input = '''abcd''' -captures = [[[0, 2]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic120" -regex = '''a\(b''' -input = '''a(b''' -captures = [[[0, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic121" -regex = '''a\(*b''' -input = '''ab''' -captures = [[[0, 2]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic122" -regex = '''a\(*b''' -input = '''a((b''' -captures = [[[0, 4]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic123" -regex = '''((a))''' -input = '''abc''' -captures = [[[0, 1], [0, 1], [0, 1]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic124" -regex = '''(a)b(c)''' -input = '''abc''' -captures = [[[0, 3], [0, 1], [2, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic125" -regex = '''a+b+c''' -input = '''aabbabc''' -captures = [[[4, 7]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic126" -regex = '''a*''' -input = '''aaa''' -captures = [[[0, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic128" -regex = '''(a*)*''' -input = '''-''' -captures = [[[0, 0], []]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic129" -regex = '''(a*)+''' -input = '''-''' -captures = [[[0, 0], [0, 0]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic131" -regex = '''(a*|b)*''' -input = '''-''' -captures = [[[0, 0], []]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic132" -regex = '''(a+|b)*''' -input = '''ab''' -captures = [[[0, 2], [1, 2]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic133" -regex = '''(a+|b)+''' -input = '''ab''' -captures = [[[0, 2], [1, 2]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic134" -regex = '''(a+|b)?''' -input = '''ab''' -captures = [[[0, 1], [0, 1]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic135" -regex = '''[^ab]*''' -input = '''cde''' -captures = [[[0, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic137" -regex = '''(^)*''' -input = '''-''' -captures = [[[0, 0], []]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic138" -regex = '''a*''' -input = '''''' -captures = [[[0, 0]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic139" -regex = '''([abc])*d''' -input = '''abbbcd''' -captures = [[[0, 6], [4, 5]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic140" -regex = '''([abc])*bcd''' -input = '''abcd''' -captures = [[[0, 4], [0, 1]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic141" -regex = '''a|b|c|d|e''' -input = '''e''' -captures = [[[0, 1]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic142" -regex = '''(a|b|c|d|e)f''' -input = '''ef''' -captures = [[[0, 2], [0, 1]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic144" -regex = '''((a*|b))*''' -input = '''-''' -captures = [[[0, 0], [], []]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic145" -regex = '''abcd*efg''' -input = '''abcdefg''' -captures = [[[0, 7]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic146" -regex = '''ab*''' -input = '''xabyabbbz''' -captures = [[[1, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic147" -regex = '''ab*''' -input = '''xayabbbz''' -captures = [[[1, 2]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic148" -regex = '''(ab|cd)e''' -input = '''abcde''' -captures = [[[2, 5], [2, 4]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic149" -regex = '''[abhgefdc]ij''' -input = '''hij''' -captures = [[[0, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic150" -regex = '''(a|b)c*d''' -input = '''abcd''' -captures = [[[1, 4], [1, 2]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic151" -regex = '''(ab|ab*)bc''' -input = '''abc''' -captures = [[[0, 3], [0, 1]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic152" -regex = '''a([bc]*)c*''' -input = '''abc''' -captures = [[[0, 3], [1, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic153" -regex = '''a([bc]*)(c*d)''' -input = '''abcd''' -captures = [[[0, 4], [1, 3], [3, 4]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic154" -regex = '''a([bc]+)(c*d)''' -input = '''abcd''' -captures = [[[0, 4], [1, 3], [3, 4]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic155" -regex = '''a([bc]*)(c+d)''' -input = '''abcd''' -captures = [[[0, 4], [1, 2], [2, 4]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic156" -regex = '''a[bcd]*dcdcde''' -input = '''adcdcde''' -captures = [[[0, 7]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic157" -regex = '''(ab|a)b*c''' -input = '''abc''' -captures = [[[0, 3], [0, 2]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic158" -regex = '''((a)(b)c)(d)''' -input = '''abcd''' -captures = [[[0, 4], [0, 3], [0, 1], [1, 2], [3, 4]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic159" -regex = '''[A-Za-z_][A-Za-z0-9_]*''' -input = '''alpha''' -captures = [[[0, 5]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic160" -regex = '''^a(bc+|b[eh])g|.h$''' -input = '''abh''' -captures = [[[1, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic161" -regex = '''(bc+d$|ef*g.|h?i(j|k))''' -input = '''effgz''' -captures = [[[0, 5], [0, 5]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic162" -regex = '''(bc+d$|ef*g.|h?i(j|k))''' -input = '''ij''' -captures = [[[0, 2], [0, 2], [1, 2]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic163" -regex = '''(bc+d$|ef*g.|h?i(j|k))''' -input = '''reffgz''' -captures = [[[1, 6], [1, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic164" -regex = '''(((((((((a)))))))))''' -input = '''a''' -captures = [[[0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic165" -regex = '''multiple words''' -input = '''multiple words yeah''' -captures = [[[0, 14]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic166" -regex = '''(.*)c(.*)''' -input = '''abcde''' -captures = [[[0, 5], [0, 2], [3, 5]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic167" -regex = '''abcd''' -input = '''abcd''' -captures = [[[0, 4]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic168" -regex = '''a(bc)d''' -input = '''abcd''' -captures = [[[0, 4], [1, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic169" -regex = '''a[\x01-\x03]?c''' -input = '''a\x02c''' -captures = [[[0, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic170" -regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' -input = '''Muammar Qaddafi''' -captures = [[[0, 15], [], [10, 12]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic171" -regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' -input = '''Mo'ammar Gadhafi''' -captures = [[[0, 16], [], [11, 13]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic172" -regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' -input = '''Muammar Kaddafi''' -captures = [[[0, 15], [], [10, 12]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic173" -regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' -input = '''Muammar Qadhafi''' -captures = [[[0, 15], [], [10, 12]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic174" -regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' -input = '''Muammar Gadafi''' -captures = [[[0, 14], [], [10, 11]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic175" -regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' -input = '''Mu'ammar Qadafi''' -captures = [[[0, 15], [], [11, 12]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic176" -regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' -input = '''Moamar Gaddafi''' -captures = [[[0, 14], [], [9, 11]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic177" -regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' -input = '''Mu'ammar Qadhdhafi''' -captures = [[[0, 18], [], [13, 15]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic178" -regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' -input = '''Muammar Khaddafi''' -captures = [[[0, 16], [], [11, 13]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic179" -regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' -input = '''Muammar Ghaddafy''' -captures = [[[0, 16], [], [11, 13]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic180" -regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' -input = '''Muammar Ghadafi''' -captures = [[[0, 15], [], [11, 12]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic181" -regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' -input = '''Muammar Ghaddafi''' -captures = [[[0, 16], [], [11, 13]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic182" -regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' -input = '''Muamar Kaddafi''' -captures = [[[0, 14], [], [9, 11]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic183" -regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' -input = '''Muammar Quathafi''' -captures = [[[0, 16], [], [11, 13]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic184" -regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' -input = '''Muammar Gheddafi''' -captures = [[[0, 16], [], [11, 13]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic185" -regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' -input = '''Moammar Khadafy''' -captures = [[[0, 15], [], [11, 12]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic186" -regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' -input = '''Moammar Qudhafi''' -captures = [[[0, 15], [], [10, 12]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic187" -regex = '''a+(b|c)*d+''' -input = '''aabcdd''' -captures = [[[0, 6], [3, 4]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic188" -regex = '''^.+$''' -input = '''vivi''' -captures = [[[0, 4]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic189" -regex = '''^(.+)$''' -input = '''vivi''' -captures = [[[0, 4], [0, 4]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic190" -regex = '''^([^!.]+).att.com!(.+)$''' -input = '''gryphon.att.com!eby''' -captures = [[[0, 19], [0, 7], [16, 19]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic191" -regex = '''^([^!]+!)?([^!]+)$''' -input = '''bas''' -captures = [[[0, 3], [], [0, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic192" -regex = '''^([^!]+!)?([^!]+)$''' -input = '''bar!bas''' -captures = [[[0, 7], [0, 4], [4, 7]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic193" -regex = '''^([^!]+!)?([^!]+)$''' -input = '''foo!bas''' -captures = [[[0, 7], [0, 4], [4, 7]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic194" -regex = '''^.+!([^!]+!)([^!]+)$''' -input = '''foo!bar!bas''' -captures = [[[0, 11], [4, 8], [8, 11]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic195" -regex = '''((foo)|(bar))!bas''' -input = '''bar!bas''' -captures = [[[0, 7], [0, 3], [], [0, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic196" -regex = '''((foo)|(bar))!bas''' -input = '''foo!bar!bas''' -captures = [[[4, 11], [4, 7], [], [4, 7]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic197" -regex = '''((foo)|(bar))!bas''' -input = '''foo!bas''' -captures = [[[0, 7], [0, 3], [0, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic198" -regex = '''((foo)|bar)!bas''' -input = '''bar!bas''' -captures = [[[0, 7], [0, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic199" -regex = '''((foo)|bar)!bas''' -input = '''foo!bar!bas''' -captures = [[[4, 11], [4, 7]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic200" -regex = '''((foo)|bar)!bas''' -input = '''foo!bas''' -captures = [[[0, 7], [0, 3], [0, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic201" -regex = '''(foo|(bar))!bas''' -input = '''bar!bas''' -captures = [[[0, 7], [0, 3], [0, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic202" -regex = '''(foo|(bar))!bas''' -input = '''foo!bar!bas''' -captures = [[[4, 11], [4, 7], [4, 7]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic203" -regex = '''(foo|(bar))!bas''' -input = '''foo!bas''' -captures = [[[0, 7], [0, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic204" -regex = '''(foo|bar)!bas''' -input = '''bar!bas''' -captures = [[[0, 7], [0, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic205" -regex = '''(foo|bar)!bas''' -input = '''foo!bar!bas''' -captures = [[[4, 11], [4, 7]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic206" -regex = '''(foo|bar)!bas''' -input = '''foo!bas''' -captures = [[[0, 7], [0, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic207" -regex = '''^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$''' -input = '''foo!bar!bas''' -captures = [[[0, 11], [0, 11], [], [], [4, 8], [8, 11]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic208" -regex = '''^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$''' -input = '''bas''' -captures = [[[0, 3], [], [0, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic209" -regex = '''^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$''' -input = '''bar!bas''' -captures = [[[0, 7], [0, 4], [4, 7]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic210" -regex = '''^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$''' -input = '''foo!bar!bas''' -captures = [[[0, 11], [], [], [4, 8], [8, 11]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic211" -regex = '''^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$''' -input = '''foo!bas''' -captures = [[[0, 7], [0, 4], [4, 7]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic212" -regex = '''^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$''' -input = '''bas''' -captures = [[[0, 3], [0, 3], [], [0, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic213" -regex = '''^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$''' -input = '''bar!bas''' -captures = [[[0, 7], [0, 7], [0, 4], [4, 7]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic214" -regex = '''^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$''' -input = '''foo!bar!bas''' -captures = [[[0, 11], [0, 11], [], [], [4, 8], [8, 11]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic215" -regex = '''^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$''' -input = '''foo!bas''' -captures = [[[0, 7], [0, 7], [0, 4], [4, 7]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic216" -regex = '''.*(/XXX).*''' -input = '''/XXX''' -captures = [[[0, 4], [0, 4]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic217" -regex = '''.*(\\XXX).*''' -input = '''\\XXX''' -captures = [[[0, 4], [0, 4]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic218" -regex = '''\\XXX''' -input = '''\\XXX''' -captures = [[[0, 4]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic219" -regex = '''.*(/000).*''' -input = '''/000''' -captures = [[[0, 4], [0, 4]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic220" -regex = '''.*(\\000).*''' -input = '''\\000''' -captures = [[[0, 4], [0, 4]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "basic221" -regex = '''\\000''' -input = '''\\000''' -captures = [[[0, 4]]] -match_limit = 1 -unescape = true - diff --git a/vendor/regex-automata/tests/data/fowler/dat/README b/vendor/regex-automata/tests/data/fowler/dat/README deleted file mode 100644 index e70072500..000000000 --- a/vendor/regex-automata/tests/data/fowler/dat/README +++ /dev/null @@ -1,24 +0,0 @@ -Test data was taken from the Go distribution, which was in turn taken from the -testregex test suite: - - http://www2.research.att.com/~astopen/testregex/testregex.html - -Unfortunately, the above link is now dead, but the test data lives on. - -The LICENSE in this directory corresponds to the LICENSE that the data was -originally released under. - -The tests themselves were modified for RE2/Go. A couple were modified further -by me (Andrew Gallant) (only in repetition.dat) so that RE2/Go would pass them. -(Yes, it seems like RE2/Go includes failing test cases.) This may or may not -have been a bad idea, but I think being consistent with an established Regex -library is worth something. - -After some number of years, these tests were transformed into a TOML format -using the fowler-to-toml script in the 'scripts' directory. To re-generate the -TOML files, then run the following from the root of this repository: - - ./scripts/fowler-to-toml tests/data/fowler tests/data/fowler/dat/*.dat - -which brings them into a sensible structured format in which other tests can -be written. diff --git a/vendor/regex-automata/tests/data/fowler/dat/basic.dat b/vendor/regex-automata/tests/data/fowler/dat/basic.dat deleted file mode 100644 index e55efaeec..000000000 --- a/vendor/regex-automata/tests/data/fowler/dat/basic.dat +++ /dev/null @@ -1,221 +0,0 @@ -NOTE all standard compliant implementations should pass these : 2002-05-31 - -BE abracadabra$ abracadabracadabra (7,18) -BE a...b abababbb (2,7) -BE XXXXXX ..XXXXXX (2,8) -E \) () (1,2) -BE a] a]a (0,2) -B } } (0,1) -E \} } (0,1) -BE \] ] (0,1) -B ] ] (0,1) -E ] ] (0,1) -B { { (0,1) -B } } (0,1) -BE ^a ax (0,1) -BE \^a a^a (1,3) -BE a\^ a^ (0,2) -BE a$ aa (1,2) -BE a\$ a$ (0,2) -BE ^$ NULL (0,0) -E $^ NULL (0,0) -E a($) aa (1,2)(2,2) -E a*(^a) aa (0,1)(0,1) -E (..)*(...)* a (0,0) -E (..)*(...)* abcd (0,4)(2,4) -E (ab|a)(bc|c) abc (0,3)(0,2)(2,3) -E (ab)c|abc abc (0,3)(0,2) -E a{0}b ab (1,2) -E (a*)(b?)(b+)b{3} aaabbbbbbb (0,10)(0,3)(3,4)(4,7) -E (a*)(b{0,1})(b{1,})b{3} aaabbbbbbb (0,10)(0,3)(3,4)(4,7) -E a{9876543210} NULL BADBR -E ((a|a)|a) a (0,1)(0,1)(0,1) -E (a*)(a|aa) aaaa (0,4)(0,3)(3,4) -E a*(a.|aa) aaaa (0,4)(2,4) -E a(b)|c(d)|a(e)f aef (0,3)(?,?)(?,?)(1,2) -E (a|b)?.* b (0,1)(0,1) -E (a|b)c|a(b|c) ac (0,2)(0,1) -E (a|b)c|a(b|c) ab (0,2)(?,?)(1,2) -E (a|b)*c|(a|ab)*c abc (0,3)(1,2) -E (a|b)*c|(a|ab)*c xc (1,2) -E (.a|.b).*|.*(.a|.b) xa (0,2)(0,2) -E a?(ab|ba)ab abab (0,4)(0,2) -E a?(ac{0}b|ba)ab abab (0,4)(0,2) -E ab|abab abbabab (0,2) -E aba|bab|bba baaabbbaba (5,8) -E aba|bab baaabbbaba (6,9) -E (aa|aaa)*|(a|aaaaa) aa (0,2)(0,2) -E (a.|.a.)*|(a|.a...) aa (0,2)(0,2) -E ab|a xabc (1,3) -E ab|a xxabc (2,4) -Ei (Ab|cD)* aBcD (0,4)(2,4) -BE [^-] --a (2,3) -BE [a-]* --a (0,3) -BE [a-m-]* --amoma-- (0,4) -E :::1:::0:|:::1:1:0: :::0:::1:::1:::0: (8,17) -E :::1:::0:|:::1:1:1: :::0:::1:::1:::0: (8,17) -{E [[:upper:]] A (0,1) [[<element>]] not supported -E [[:lower:]]+ `az{ (1,3) -E [[:upper:]]+ @AZ[ (1,3) -# No collation in Go -#BE [[-]] [[-]] (2,4) -#BE [[.NIL.]] NULL ECOLLATE -#BE [[=aleph=]] NULL ECOLLATE -} -BE$ \n \n (0,1) -BEn$ \n \n (0,1) -BE$ [^a] \n (0,1) -BE$ \na \na (0,2) -E (a)(b)(c) abc (0,3)(0,1)(1,2)(2,3) -BE xxx xxx (0,3) -E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) feb 6, (0,6) -E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) 2/7 (0,3) -E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) feb 1,Feb 6 (5,11) -E3 ((((((((((((((((((((((((((((((x)))))))))))))))))))))))))))))) x (0,1)(0,1)(0,1) -E3 ((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))* xx (0,2)(1,2)(1,2) -E a?(ab|ba)* ababababababababababababababababababababababababababababababababababababababababa (0,81)(79,81) -E abaa|abbaa|abbbaa|abbbbaa ababbabbbabbbabbbbabbbbaa (18,25) -E abaa|abbaa|abbbaa|abbbbaa ababbabbbabbbabbbbabaa (18,22) -E aaac|aabc|abac|abbc|baac|babc|bbac|bbbc baaabbbabac (7,11) -BE$ .* \x01\x7f (0,2) -E aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa (53,57) -L aaaa\nbbbb\ncccc\nddddd\neeeeee\nfffffff\ngggg\nhhhh\niiiii\njjjjj\nkkkkk\nllll XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa NOMATCH -E a*a*a*a*a*b aaaaaaaaab (0,10) -BE ^ NULL (0,0) -BE $ NULL (0,0) -BE ^$ NULL (0,0) -BE ^a$ a (0,1) -BE abc abc (0,3) -BE abc xabcy (1,4) -BE abc ababc (2,5) -BE ab*c abc (0,3) -BE ab*bc abc (0,3) -BE ab*bc abbc (0,4) -BE ab*bc abbbbc (0,6) -E ab+bc abbc (0,4) -E ab+bc abbbbc (0,6) -E ab?bc abbc (0,4) -E ab?bc abc (0,3) -E ab?c abc (0,3) -BE ^abc$ abc (0,3) -BE ^abc abcc (0,3) -BE abc$ aabc (1,4) -BE ^ abc (0,0) -BE $ abc (3,3) -BE a.c abc (0,3) -BE a.c axc (0,3) -BE a.*c axyzc (0,5) -BE a[bc]d abd (0,3) -BE a[b-d]e ace (0,3) -BE a[b-d] aac (1,3) -BE a[-b] a- (0,2) -BE a[b-] a- (0,2) -BE a] a] (0,2) -BE a[]]b a]b (0,3) -BE a[^bc]d aed (0,3) -BE a[^-b]c adc (0,3) -BE a[^]b]c adc (0,3) -E ab|cd abc (0,2) -E ab|cd abcd (0,2) -E a\(b a(b (0,3) -E a\(*b ab (0,2) -E a\(*b a((b (0,4) -E ((a)) abc (0,1)(0,1)(0,1) -E (a)b(c) abc (0,3)(0,1)(2,3) -E a+b+c aabbabc (4,7) -E a* aaa (0,3) -#E (a*)* - (0,0)(0,0) -E (a*)* - (0,0)(?,?) RE2/Go -E (a*)+ - (0,0)(0,0) -#E (a*|b)* - (0,0)(0,0) -E (a*|b)* - (0,0)(?,?) RE2/Go -E (a+|b)* ab (0,2)(1,2) -E (a+|b)+ ab (0,2)(1,2) -E (a+|b)? ab (0,1)(0,1) -BE [^ab]* cde (0,3) -#E (^)* - (0,0)(0,0) -E (^)* - (0,0)(?,?) RE2/Go -BE a* NULL (0,0) -E ([abc])*d abbbcd (0,6)(4,5) -E ([abc])*bcd abcd (0,4)(0,1) -E a|b|c|d|e e (0,1) -E (a|b|c|d|e)f ef (0,2)(0,1) -#E ((a*|b))* - (0,0)(0,0)(0,0) -E ((a*|b))* - (0,0)(?,?)(?,?) RE2/Go -BE abcd*efg abcdefg (0,7) -BE ab* xabyabbbz (1,3) -BE ab* xayabbbz (1,2) -E (ab|cd)e abcde (2,5)(2,4) -BE [abhgefdc]ij hij (0,3) -E (a|b)c*d abcd (1,4)(1,2) -E (ab|ab*)bc abc (0,3)(0,1) -E a([bc]*)c* abc (0,3)(1,3) -E a([bc]*)(c*d) abcd (0,4)(1,3)(3,4) -E a([bc]+)(c*d) abcd (0,4)(1,3)(3,4) -E a([bc]*)(c+d) abcd (0,4)(1,2)(2,4) -E a[bcd]*dcdcde adcdcde (0,7) -E (ab|a)b*c abc (0,3)(0,2) -E ((a)(b)c)(d) abcd (0,4)(0,3)(0,1)(1,2)(3,4) -BE [A-Za-z_][A-Za-z0-9_]* alpha (0,5) -E ^a(bc+|b[eh])g|.h$ abh (1,3) -E (bc+d$|ef*g.|h?i(j|k)) effgz (0,5)(0,5) -E (bc+d$|ef*g.|h?i(j|k)) ij (0,2)(0,2)(1,2) -E (bc+d$|ef*g.|h?i(j|k)) reffgz (1,6)(1,6) -E (((((((((a))))))))) a (0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1) -BE multiple words multiple words yeah (0,14) -E (.*)c(.*) abcde (0,5)(0,2)(3,5) -BE abcd abcd (0,4) -E a(bc)d abcd (0,4)(1,3) -E a[-]?c ac (0,3) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Qaddafi (0,15)(?,?)(10,12) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mo'ammar Gadhafi (0,16)(?,?)(11,13) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Kaddafi (0,15)(?,?)(10,12) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Qadhafi (0,15)(?,?)(10,12) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Gadafi (0,14)(?,?)(10,11) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mu'ammar Qadafi (0,15)(?,?)(11,12) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moamar Gaddafi (0,14)(?,?)(9,11) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mu'ammar Qadhdhafi (0,18)(?,?)(13,15) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Khaddafi (0,16)(?,?)(11,13) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghaddafy (0,16)(?,?)(11,13) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghadafi (0,15)(?,?)(11,12) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghaddafi (0,16)(?,?)(11,13) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muamar Kaddafi (0,14)(?,?)(9,11) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Quathafi (0,16)(?,?)(11,13) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Gheddafi (0,16)(?,?)(11,13) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moammar Khadafy (0,15)(?,?)(11,12) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moammar Qudhafi (0,15)(?,?)(10,12) -E a+(b|c)*d+ aabcdd (0,6)(3,4) -E ^.+$ vivi (0,4) -E ^(.+)$ vivi (0,4)(0,4) -E ^([^!.]+).att.com!(.+)$ gryphon.att.com!eby (0,19)(0,7)(16,19) -E ^([^!]+!)?([^!]+)$ bas (0,3)(?,?)(0,3) -E ^([^!]+!)?([^!]+)$ bar!bas (0,7)(0,4)(4,7) -E ^([^!]+!)?([^!]+)$ foo!bas (0,7)(0,4)(4,7) -E ^.+!([^!]+!)([^!]+)$ foo!bar!bas (0,11)(4,8)(8,11) -E ((foo)|(bar))!bas bar!bas (0,7)(0,3)(?,?)(0,3) -E ((foo)|(bar))!bas foo!bar!bas (4,11)(4,7)(?,?)(4,7) -E ((foo)|(bar))!bas foo!bas (0,7)(0,3)(0,3) -E ((foo)|bar)!bas bar!bas (0,7)(0,3) -E ((foo)|bar)!bas foo!bar!bas (4,11)(4,7) -E ((foo)|bar)!bas foo!bas (0,7)(0,3)(0,3) -E (foo|(bar))!bas bar!bas (0,7)(0,3)(0,3) -E (foo|(bar))!bas foo!bar!bas (4,11)(4,7)(4,7) -E (foo|(bar))!bas foo!bas (0,7)(0,3) -E (foo|bar)!bas bar!bas (0,7)(0,3) -E (foo|bar)!bas foo!bar!bas (4,11)(4,7) -E (foo|bar)!bas foo!bas (0,7)(0,3) -E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bar!bas (0,11)(0,11)(?,?)(?,?)(4,8)(8,11) -E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ bas (0,3)(?,?)(0,3) -E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ bar!bas (0,7)(0,4)(4,7) -E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ foo!bar!bas (0,11)(?,?)(?,?)(4,8)(8,11) -E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ foo!bas (0,7)(0,4)(4,7) -E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ bas (0,3)(0,3)(?,?)(0,3) -E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ bar!bas (0,7)(0,7)(0,4)(4,7) -E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bar!bas (0,11)(0,11)(?,?)(?,?)(4,8)(8,11) -E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bas (0,7)(0,7)(0,4)(4,7) -E .*(/XXX).* /XXX (0,4)(0,4) -E .*(\\XXX).* \XXX (0,4)(0,4) -E \\XXX \XXX (0,4) -E .*(/000).* /000 (0,4)(0,4) -E .*(\\000).* \000 (0,4)(0,4) -E \\000 \000 (0,4) diff --git a/vendor/regex-automata/tests/data/fowler/dat/nullsubexpr.dat b/vendor/regex-automata/tests/data/fowler/dat/nullsubexpr.dat deleted file mode 100644 index 2e18fbb91..000000000 --- a/vendor/regex-automata/tests/data/fowler/dat/nullsubexpr.dat +++ /dev/null @@ -1,79 +0,0 @@ -NOTE null subexpression matches : 2002-06-06 - -E (a*)* a (0,1)(0,1) -#E SAME x (0,0)(0,0) -E SAME x (0,0)(?,?) RE2/Go -E SAME aaaaaa (0,6)(0,6) -E SAME aaaaaax (0,6)(0,6) -E (a*)+ a (0,1)(0,1) -E SAME x (0,0)(0,0) -E SAME aaaaaa (0,6)(0,6) -E SAME aaaaaax (0,6)(0,6) -E (a+)* a (0,1)(0,1) -E SAME x (0,0) -E SAME aaaaaa (0,6)(0,6) -E SAME aaaaaax (0,6)(0,6) -E (a+)+ a (0,1)(0,1) -E SAME x NOMATCH -E SAME aaaaaa (0,6)(0,6) -E SAME aaaaaax (0,6)(0,6) - -E ([a]*)* a (0,1)(0,1) -#E SAME x (0,0)(0,0) -E SAME x (0,0)(?,?) RE2/Go -E SAME aaaaaa (0,6)(0,6) -E SAME aaaaaax (0,6)(0,6) -E ([a]*)+ a (0,1)(0,1) -E SAME x (0,0)(0,0) -E SAME aaaaaa (0,6)(0,6) -E SAME aaaaaax (0,6)(0,6) -E ([^b]*)* a (0,1)(0,1) -#E SAME b (0,0)(0,0) -E SAME b (0,0)(?,?) RE2/Go -E SAME aaaaaa (0,6)(0,6) -E SAME aaaaaab (0,6)(0,6) -E ([ab]*)* a (0,1)(0,1) -E SAME aaaaaa (0,6)(0,6) -E SAME ababab (0,6)(0,6) -E SAME bababa (0,6)(0,6) -E SAME b (0,1)(0,1) -E SAME bbbbbb (0,6)(0,6) -E SAME aaaabcde (0,5)(0,5) -E ([^a]*)* b (0,1)(0,1) -E SAME bbbbbb (0,6)(0,6) -#E SAME aaaaaa (0,0)(0,0) -E SAME aaaaaa (0,0)(?,?) RE2/Go -E ([^ab]*)* ccccxx (0,6)(0,6) -#E SAME ababab (0,0)(0,0) -E SAME ababab (0,0)(?,?) RE2/Go - -E ((z)+|a)* zabcde (0,2)(1,2) - -#{E a+? aaaaaa (0,1) no *? +? mimimal match ops -#E (a) aaa (0,1)(0,1) -#E (a*?) aaa (0,0)(0,0) -#E (a)*? aaa (0,0) -#E (a*?)*? aaa (0,0) -#} - -B \(a*\)*\(x\) x (0,1)(0,0)(0,1) -B \(a*\)*\(x\) ax (0,2)(0,1)(1,2) -B \(a*\)*\(x\) axa (0,2)(0,1)(1,2) -B \(a*\)*\(x\)\(\1\) x (0,1)(0,0)(0,1)(1,1) -B \(a*\)*\(x\)\(\1\) ax (0,2)(1,1)(1,2)(2,2) -B \(a*\)*\(x\)\(\1\) axa (0,3)(0,1)(1,2)(2,3) -B \(a*\)*\(x\)\(\1\)\(x\) axax (0,4)(0,1)(1,2)(2,3)(3,4) -B \(a*\)*\(x\)\(\1\)\(x\) axxa (0,3)(1,1)(1,2)(2,2)(2,3) - -#E (a*)*(x) x (0,1)(0,0)(0,1) -E (a*)*(x) x (0,1)(?,?)(0,1) RE2/Go -E (a*)*(x) ax (0,2)(0,1)(1,2) -E (a*)*(x) axa (0,2)(0,1)(1,2) - -E (a*)+(x) x (0,1)(0,0)(0,1) -E (a*)+(x) ax (0,2)(0,1)(1,2) -E (a*)+(x) axa (0,2)(0,1)(1,2) - -E (a*){2}(x) x (0,1)(0,0)(0,1) -E (a*){2}(x) ax (0,2)(1,1)(1,2) -E (a*){2}(x) axa (0,2)(1,1)(1,2) diff --git a/vendor/regex-automata/tests/data/fowler/dat/repetition-expensive.dat b/vendor/regex-automata/tests/data/fowler/dat/repetition-expensive.dat deleted file mode 100644 index c91580236..000000000 --- a/vendor/regex-automata/tests/data/fowler/dat/repetition-expensive.dat +++ /dev/null @@ -1,85 +0,0 @@ -NOTE implicit vs. explicit repetitions : 2009-02-02 - -# Glenn Fowler <gsf@research.att.com> -# conforming matches (column 4) must match one of the following BREs -# NOMATCH -# (0,.)\((\(.\),\(.\))(?,?)(\2,\3)\)* -# (0,.)\((\(.\),\(.\))(\2,\3)(?,?)\)* -# i.e., each 3-tuple has two identical elements and one (?,?) - -NOTE additional repetition tests graciously provided by Chris Kuklewicz www.haskell.org 2009-02-02 - -:HA#100:E X(.?){0,}Y X1234567Y (0,9)(7,8) -:HA#101:E X(.?){1,}Y X1234567Y (0,9)(7,8) -:HA#102:E X(.?){2,}Y X1234567Y (0,9)(7,8) -:HA#103:E X(.?){3,}Y X1234567Y (0,9)(7,8) -:HA#104:E X(.?){4,}Y X1234567Y (0,9)(7,8) -:HA#105:E X(.?){5,}Y X1234567Y (0,9)(7,8) -:HA#106:E X(.?){6,}Y X1234567Y (0,9)(7,8) -:HA#107:E X(.?){7,}Y X1234567Y (0,9)(7,8) -:HA#108:E X(.?){8,}Y X1234567Y (0,9)(8,8) -#:HA#110:E X(.?){0,8}Y X1234567Y (0,9)(7,8) -:HA#110:E X(.?){0,8}Y X1234567Y (0,9)(8,8) RE2/Go -#:HA#111:E X(.?){1,8}Y X1234567Y (0,9)(7,8) -:HA#111:E X(.?){1,8}Y X1234567Y (0,9)(8,8) RE2/Go -#:HA#112:E X(.?){2,8}Y X1234567Y (0,9)(7,8) -:HA#112:E X(.?){2,8}Y X1234567Y (0,9)(8,8) RE2/Go -#:HA#113:E X(.?){3,8}Y X1234567Y (0,9)(7,8) -:HA#113:E X(.?){3,8}Y X1234567Y (0,9)(8,8) RE2/Go -#:HA#114:E X(.?){4,8}Y X1234567Y (0,9)(7,8) -:HA#114:E X(.?){4,8}Y X1234567Y (0,9)(8,8) RE2/Go -#:HA#115:E X(.?){5,8}Y X1234567Y (0,9)(7,8) -:HA#115:E X(.?){5,8}Y X1234567Y (0,9)(8,8) RE2/Go -#:HA#116:E X(.?){6,8}Y X1234567Y (0,9)(7,8) -:HA#116:E X(.?){6,8}Y X1234567Y (0,9)(8,8) RE2/Go -#:HA#117:E X(.?){7,8}Y X1234567Y (0,9)(7,8) -:HA#117:E X(.?){7,8}Y X1234567Y (0,9)(8,8) RE2/Go -:HA#118:E X(.?){8,8}Y X1234567Y (0,9)(8,8) - -# These test a fixed bug in my regex-tdfa that did not keep the expanded -# form properly grouped, so right association did the wrong thing with -# these ambiguous patterns (crafted just to test my code when I became -# suspicious of my implementation). The first subexpression should use -# "ab" then "a" then "bcd". - -# OS X / FreeBSD / NetBSD badly fail many of these, with impossible -# results like (0,6)(4,5)(6,6). - -:HA#260:E (a|ab|c|bcd){0,}(d*) ababcd (0,1)(0,1)(1,1) -:HA#261:E (a|ab|c|bcd){1,}(d*) ababcd (0,1)(0,1)(1,1) -:HA#262:E (a|ab|c|bcd){2,}(d*) ababcd (0,6)(3,6)(6,6) -:HA#263:E (a|ab|c|bcd){3,}(d*) ababcd (0,6)(3,6)(6,6) -:HA#264:E (a|ab|c|bcd){4,}(d*) ababcd NOMATCH -:HA#265:E (a|ab|c|bcd){0,10}(d*) ababcd (0,1)(0,1)(1,1) -:HA#266:E (a|ab|c|bcd){1,10}(d*) ababcd (0,1)(0,1)(1,1) -:HA#267:E (a|ab|c|bcd){2,10}(d*) ababcd (0,6)(3,6)(6,6) -:HA#268:E (a|ab|c|bcd){3,10}(d*) ababcd (0,6)(3,6)(6,6) -:HA#269:E (a|ab|c|bcd){4,10}(d*) ababcd NOMATCH -:HA#270:E (a|ab|c|bcd)*(d*) ababcd (0,1)(0,1)(1,1) -:HA#271:E (a|ab|c|bcd)+(d*) ababcd (0,1)(0,1)(1,1) - -# The above worked on Linux/GLIBC but the following often fail. -# They also trip up OS X / FreeBSD / NetBSD: - -#:HA#280:E (ab|a|c|bcd){0,}(d*) ababcd (0,6)(3,6)(6,6) -:HA#280:E (ab|a|c|bcd){0,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go -#:HA#281:E (ab|a|c|bcd){1,}(d*) ababcd (0,6)(3,6)(6,6) -:HA#281:E (ab|a|c|bcd){1,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go -#:HA#282:E (ab|a|c|bcd){2,}(d*) ababcd (0,6)(3,6)(6,6) -:HA#282:E (ab|a|c|bcd){2,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go -#:HA#283:E (ab|a|c|bcd){3,}(d*) ababcd (0,6)(3,6)(6,6) -:HA#283:E (ab|a|c|bcd){3,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go -:HA#284:E (ab|a|c|bcd){4,}(d*) ababcd NOMATCH -#:HA#285:E (ab|a|c|bcd){0,10}(d*) ababcd (0,6)(3,6)(6,6) -:HA#285:E (ab|a|c|bcd){0,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go -#:HA#286:E (ab|a|c|bcd){1,10}(d*) ababcd (0,6)(3,6)(6,6) -:HA#286:E (ab|a|c|bcd){1,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go -#:HA#287:E (ab|a|c|bcd){2,10}(d*) ababcd (0,6)(3,6)(6,6) -:HA#287:E (ab|a|c|bcd){2,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go -#:HA#288:E (ab|a|c|bcd){3,10}(d*) ababcd (0,6)(3,6)(6,6) -:HA#288:E (ab|a|c|bcd){3,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go -:HA#289:E (ab|a|c|bcd){4,10}(d*) ababcd NOMATCH -#:HA#290:E (ab|a|c|bcd)*(d*) ababcd (0,6)(3,6)(6,6) -:HA#290:E (ab|a|c|bcd)*(d*) ababcd (0,6)(4,5)(5,6) RE2/Go -#:HA#291:E (ab|a|c|bcd)+(d*) ababcd (0,6)(3,6)(6,6) -:HA#291:E (ab|a|c|bcd)+(d*) ababcd (0,6)(4,5)(5,6) RE2/Go diff --git a/vendor/regex-automata/tests/data/fowler/dat/repetition.dat b/vendor/regex-automata/tests/data/fowler/dat/repetition.dat deleted file mode 100644 index 2dac0823f..000000000 --- a/vendor/regex-automata/tests/data/fowler/dat/repetition.dat +++ /dev/null @@ -1,83 +0,0 @@ -NOTE implicit vs. explicit repetitions : 2009-02-02 - -# Glenn Fowler <gsf@research.att.com> -# conforming matches (column 4) must match one of the following BREs -# NOMATCH -# (0,.)\((\(.\),\(.\))(?,?)(\2,\3)\)* -# (0,.)\((\(.\),\(.\))(\2,\3)(?,?)\)* -# i.e., each 3-tuple has two identical elements and one (?,?) - -E ((..)|(.)) NULL NOMATCH -E ((..)|(.))((..)|(.)) NULL NOMATCH -E ((..)|(.))((..)|(.))((..)|(.)) NULL NOMATCH - -E ((..)|(.)){1} NULL NOMATCH -E ((..)|(.)){2} NULL NOMATCH -E ((..)|(.)){3} NULL NOMATCH - -E ((..)|(.))* NULL (0,0) - -E ((..)|(.)) a (0,1)(0,1)(?,?)(0,1) -E ((..)|(.))((..)|(.)) a NOMATCH -E ((..)|(.))((..)|(.))((..)|(.)) a NOMATCH - -E ((..)|(.)){1} a (0,1)(0,1)(?,?)(0,1) -E ((..)|(.)){2} a NOMATCH -E ((..)|(.)){3} a NOMATCH - -E ((..)|(.))* a (0,1)(0,1)(?,?)(0,1) - -E ((..)|(.)) aa (0,2)(0,2)(0,2)(?,?) -E ((..)|(.))((..)|(.)) aa (0,2)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2) -E ((..)|(.))((..)|(.))((..)|(.)) aa NOMATCH - -E ((..)|(.)){1} aa (0,2)(0,2)(0,2)(?,?) -E ((..)|(.)){2} aa (0,2)(1,2)(?,?)(1,2) -E ((..)|(.)){3} aa NOMATCH - -E ((..)|(.))* aa (0,2)(0,2)(0,2)(?,?) - -E ((..)|(.)) aaa (0,2)(0,2)(0,2)(?,?) -E ((..)|(.))((..)|(.)) aaa (0,3)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3) -E ((..)|(.))((..)|(.))((..)|(.)) aaa (0,3)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2)(2,3)(?,?)(2,3) - -E ((..)|(.)){1} aaa (0,2)(0,2)(0,2)(?,?) -#E ((..)|(.)){2} aaa (0,3)(2,3)(?,?)(2,3) -E ((..)|(.)){2} aaa (0,3)(2,3)(0,2)(2,3) RE2/Go -E ((..)|(.)){3} aaa (0,3)(2,3)(?,?)(2,3) - -#E ((..)|(.))* aaa (0,3)(2,3)(?,?)(2,3) -E ((..)|(.))* aaa (0,3)(2,3)(0,2)(2,3) RE2/Go - -E ((..)|(.)) aaaa (0,2)(0,2)(0,2)(?,?) -E ((..)|(.))((..)|(.)) aaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?) -E ((..)|(.))((..)|(.))((..)|(.)) aaaa (0,4)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3)(3,4)(?,?)(3,4) - -E ((..)|(.)){1} aaaa (0,2)(0,2)(0,2)(?,?) -E ((..)|(.)){2} aaaa (0,4)(2,4)(2,4)(?,?) -#E ((..)|(.)){3} aaaa (0,4)(3,4)(?,?)(3,4) -E ((..)|(.)){3} aaaa (0,4)(3,4)(0,2)(3,4) RE2/Go - -E ((..)|(.))* aaaa (0,4)(2,4)(2,4)(?,?) - -E ((..)|(.)) aaaaa (0,2)(0,2)(0,2)(?,?) -E ((..)|(.))((..)|(.)) aaaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?) -E ((..)|(.))((..)|(.))((..)|(.)) aaaaa (0,5)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,5)(?,?)(4,5) - -E ((..)|(.)){1} aaaaa (0,2)(0,2)(0,2)(?,?) -E ((..)|(.)){2} aaaaa (0,4)(2,4)(2,4)(?,?) -#E ((..)|(.)){3} aaaaa (0,5)(4,5)(?,?)(4,5) -E ((..)|(.)){3} aaaaa (0,5)(4,5)(2,4)(4,5) RE2/Go - -#E ((..)|(.))* aaaaa (0,5)(4,5)(?,?)(4,5) -E ((..)|(.))* aaaaa (0,5)(4,5)(2,4)(4,5) RE2/Go - -E ((..)|(.)) aaaaaa (0,2)(0,2)(0,2)(?,?) -E ((..)|(.))((..)|(.)) aaaaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?) -E ((..)|(.))((..)|(.))((..)|(.)) aaaaaa (0,6)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,6)(4,6)(?,?) - -E ((..)|(.)){1} aaaaaa (0,2)(0,2)(0,2)(?,?) -E ((..)|(.)){2} aaaaaa (0,4)(2,4)(2,4)(?,?) -E ((..)|(.)){3} aaaaaa (0,6)(4,6)(4,6)(?,?) - -E ((..)|(.))* aaaaaa (0,6)(4,6)(4,6)(?,?) diff --git a/vendor/regex-automata/tests/data/fowler/nullsubexpr.toml b/vendor/regex-automata/tests/data/fowler/nullsubexpr.toml deleted file mode 100644 index 55d1d5b43..000000000 --- a/vendor/regex-automata/tests/data/fowler/nullsubexpr.toml +++ /dev/null @@ -1,405 +0,0 @@ -# !!! DO NOT EDIT !!! -# Automatically generated by scripts/fowler-to-toml. -# Numbers in the test names correspond to the line number of the test from -# the original dat file. - -[[tests]] -name = "nullsubexpr3" -regex = '''(a*)*''' -input = '''a''' -captures = [[[0, 1], [0, 1]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "nullsubexpr5" -regex = '''(a*)*''' -input = '''x''' -captures = [[[0, 0], []]] -match_limit = 1 -unescape = true - -[[tests]] -name = "nullsubexpr6" -regex = '''(a*)*''' -input = '''aaaaaa''' -captures = [[[0, 6], [0, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "nullsubexpr7" -regex = '''(a*)*''' -input = '''aaaaaax''' -captures = [[[0, 6], [0, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "nullsubexpr8" -regex = '''(a*)+''' -input = '''a''' -captures = [[[0, 1], [0, 1]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "nullsubexpr9" -regex = '''(a*)+''' -input = '''x''' -captures = [[[0, 0], [0, 0]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "nullsubexpr10" -regex = '''(a*)+''' -input = '''aaaaaa''' -captures = [[[0, 6], [0, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "nullsubexpr11" -regex = '''(a*)+''' -input = '''aaaaaax''' -captures = [[[0, 6], [0, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "nullsubexpr12" -regex = '''(a+)*''' -input = '''a''' -captures = [[[0, 1], [0, 1]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "nullsubexpr13" -regex = '''(a+)*''' -input = '''x''' -captures = [[[0, 0]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "nullsubexpr14" -regex = '''(a+)*''' -input = '''aaaaaa''' -captures = [[[0, 6], [0, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "nullsubexpr15" -regex = '''(a+)*''' -input = '''aaaaaax''' -captures = [[[0, 6], [0, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "nullsubexpr16" -regex = '''(a+)+''' -input = '''a''' -captures = [[[0, 1], [0, 1]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "nullsubexpr17" -regex = '''(a+)+''' -input = '''x''' -captures = [] -match_limit = 1 -unescape = true - -[[tests]] -name = "nullsubexpr18" -regex = '''(a+)+''' -input = '''aaaaaa''' -captures = [[[0, 6], [0, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "nullsubexpr19" -regex = '''(a+)+''' -input = '''aaaaaax''' -captures = [[[0, 6], [0, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "nullsubexpr21" -regex = '''([a]*)*''' -input = '''a''' -captures = [[[0, 1], [0, 1]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "nullsubexpr23" -regex = '''([a]*)*''' -input = '''x''' -captures = [[[0, 0], []]] -match_limit = 1 -unescape = true - -[[tests]] -name = "nullsubexpr24" -regex = '''([a]*)*''' -input = '''aaaaaa''' -captures = [[[0, 6], [0, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "nullsubexpr25" -regex = '''([a]*)*''' -input = '''aaaaaax''' -captures = [[[0, 6], [0, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "nullsubexpr26" -regex = '''([a]*)+''' -input = '''a''' -captures = [[[0, 1], [0, 1]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "nullsubexpr27" -regex = '''([a]*)+''' -input = '''x''' -captures = [[[0, 0], [0, 0]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "nullsubexpr28" -regex = '''([a]*)+''' -input = '''aaaaaa''' -captures = [[[0, 6], [0, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "nullsubexpr29" -regex = '''([a]*)+''' -input = '''aaaaaax''' -captures = [[[0, 6], [0, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "nullsubexpr30" -regex = '''([^b]*)*''' -input = '''a''' -captures = [[[0, 1], [0, 1]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "nullsubexpr32" -regex = '''([^b]*)*''' -input = '''b''' -captures = [[[0, 0], []]] -match_limit = 1 -unescape = true - -[[tests]] -name = "nullsubexpr33" -regex = '''([^b]*)*''' -input = '''aaaaaa''' -captures = [[[0, 6], [0, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "nullsubexpr34" -regex = '''([^b]*)*''' -input = '''aaaaaab''' -captures = [[[0, 6], [0, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "nullsubexpr35" -regex = '''([ab]*)*''' -input = '''a''' -captures = [[[0, 1], [0, 1]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "nullsubexpr36" -regex = '''([ab]*)*''' -input = '''aaaaaa''' -captures = [[[0, 6], [0, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "nullsubexpr37" -regex = '''([ab]*)*''' -input = '''ababab''' -captures = [[[0, 6], [0, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "nullsubexpr38" -regex = '''([ab]*)*''' -input = '''bababa''' -captures = [[[0, 6], [0, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "nullsubexpr39" -regex = '''([ab]*)*''' -input = '''b''' -captures = [[[0, 1], [0, 1]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "nullsubexpr40" -regex = '''([ab]*)*''' -input = '''bbbbbb''' -captures = [[[0, 6], [0, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "nullsubexpr41" -regex = '''([ab]*)*''' -input = '''aaaabcde''' -captures = [[[0, 5], [0, 5]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "nullsubexpr42" -regex = '''([^a]*)*''' -input = '''b''' -captures = [[[0, 1], [0, 1]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "nullsubexpr43" -regex = '''([^a]*)*''' -input = '''bbbbbb''' -captures = [[[0, 6], [0, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "nullsubexpr45" -regex = '''([^a]*)*''' -input = '''aaaaaa''' -captures = [[[0, 0], []]] -match_limit = 1 -unescape = true - -[[tests]] -name = "nullsubexpr46" -regex = '''([^ab]*)*''' -input = '''ccccxx''' -captures = [[[0, 6], [0, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "nullsubexpr48" -regex = '''([^ab]*)*''' -input = '''ababab''' -captures = [[[0, 0], []]] -match_limit = 1 -unescape = true - -[[tests]] -name = "nullsubexpr50" -regex = '''((z)+|a)*''' -input = '''zabcde''' -captures = [[[0, 2], [1, 2]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "nullsubexpr69" -regex = '''(a*)*(x)''' -input = '''x''' -captures = [[[0, 1], [], [0, 1]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "nullsubexpr70" -regex = '''(a*)*(x)''' -input = '''ax''' -captures = [[[0, 2], [0, 1], [1, 2]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "nullsubexpr71" -regex = '''(a*)*(x)''' -input = '''axa''' -captures = [[[0, 2], [0, 1], [1, 2]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "nullsubexpr73" -regex = '''(a*)+(x)''' -input = '''x''' -captures = [[[0, 1], [0, 0], [0, 1]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "nullsubexpr74" -regex = '''(a*)+(x)''' -input = '''ax''' -captures = [[[0, 2], [0, 1], [1, 2]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "nullsubexpr75" -regex = '''(a*)+(x)''' -input = '''axa''' -captures = [[[0, 2], [0, 1], [1, 2]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "nullsubexpr77" -regex = '''(a*){2}(x)''' -input = '''x''' -captures = [[[0, 1], [0, 0], [0, 1]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "nullsubexpr78" -regex = '''(a*){2}(x)''' -input = '''ax''' -captures = [[[0, 2], [1, 1], [1, 2]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "nullsubexpr79" -regex = '''(a*){2}(x)''' -input = '''axa''' -captures = [[[0, 2], [1, 1], [1, 2]]] -match_limit = 1 -unescape = true - diff --git a/vendor/regex-automata/tests/data/fowler/repetition-expensive.toml b/vendor/regex-automata/tests/data/fowler/repetition-expensive.toml deleted file mode 100644 index 81a896452..000000000 --- a/vendor/regex-automata/tests/data/fowler/repetition-expensive.toml +++ /dev/null @@ -1,341 +0,0 @@ -# !!! DO NOT EDIT !!! -# Automatically generated by scripts/fowler-to-toml. -# Numbers in the test names correspond to the line number of the test from -# the original dat file. - -[[tests]] -name = "repetition-expensive12" -regex = '''X(.?){0,}Y''' -input = '''X1234567Y''' -captures = [[[0, 9], [7, 8]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-expensive13" -regex = '''X(.?){1,}Y''' -input = '''X1234567Y''' -captures = [[[0, 9], [7, 8]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-expensive14" -regex = '''X(.?){2,}Y''' -input = '''X1234567Y''' -captures = [[[0, 9], [7, 8]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-expensive15" -regex = '''X(.?){3,}Y''' -input = '''X1234567Y''' -captures = [[[0, 9], [7, 8]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-expensive16" -regex = '''X(.?){4,}Y''' -input = '''X1234567Y''' -captures = [[[0, 9], [7, 8]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-expensive17" -regex = '''X(.?){5,}Y''' -input = '''X1234567Y''' -captures = [[[0, 9], [7, 8]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-expensive18" -regex = '''X(.?){6,}Y''' -input = '''X1234567Y''' -captures = [[[0, 9], [7, 8]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-expensive19" -regex = '''X(.?){7,}Y''' -input = '''X1234567Y''' -captures = [[[0, 9], [7, 8]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-expensive20" -regex = '''X(.?){8,}Y''' -input = '''X1234567Y''' -captures = [[[0, 9], [8, 8]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-expensive22" -regex = '''X(.?){0,8}Y''' -input = '''X1234567Y''' -captures = [[[0, 9], [8, 8]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-expensive24" -regex = '''X(.?){1,8}Y''' -input = '''X1234567Y''' -captures = [[[0, 9], [8, 8]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-expensive26" -regex = '''X(.?){2,8}Y''' -input = '''X1234567Y''' -captures = [[[0, 9], [8, 8]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-expensive28" -regex = '''X(.?){3,8}Y''' -input = '''X1234567Y''' -captures = [[[0, 9], [8, 8]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-expensive30" -regex = '''X(.?){4,8}Y''' -input = '''X1234567Y''' -captures = [[[0, 9], [8, 8]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-expensive32" -regex = '''X(.?){5,8}Y''' -input = '''X1234567Y''' -captures = [[[0, 9], [8, 8]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-expensive34" -regex = '''X(.?){6,8}Y''' -input = '''X1234567Y''' -captures = [[[0, 9], [8, 8]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-expensive36" -regex = '''X(.?){7,8}Y''' -input = '''X1234567Y''' -captures = [[[0, 9], [8, 8]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-expensive37" -regex = '''X(.?){8,8}Y''' -input = '''X1234567Y''' -captures = [[[0, 9], [8, 8]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-expensive48" -regex = '''(a|ab|c|bcd){0,}(d*)''' -input = '''ababcd''' -captures = [[[0, 1], [0, 1], [1, 1]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-expensive49" -regex = '''(a|ab|c|bcd){1,}(d*)''' -input = '''ababcd''' -captures = [[[0, 1], [0, 1], [1, 1]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-expensive50" -regex = '''(a|ab|c|bcd){2,}(d*)''' -input = '''ababcd''' -captures = [[[0, 6], [3, 6], [6, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-expensive51" -regex = '''(a|ab|c|bcd){3,}(d*)''' -input = '''ababcd''' -captures = [[[0, 6], [3, 6], [6, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-expensive52" -regex = '''(a|ab|c|bcd){4,}(d*)''' -input = '''ababcd''' -captures = [] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-expensive53" -regex = '''(a|ab|c|bcd){0,10}(d*)''' -input = '''ababcd''' -captures = [[[0, 1], [0, 1], [1, 1]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-expensive54" -regex = '''(a|ab|c|bcd){1,10}(d*)''' -input = '''ababcd''' -captures = [[[0, 1], [0, 1], [1, 1]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-expensive55" -regex = '''(a|ab|c|bcd){2,10}(d*)''' -input = '''ababcd''' -captures = [[[0, 6], [3, 6], [6, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-expensive56" -regex = '''(a|ab|c|bcd){3,10}(d*)''' -input = '''ababcd''' -captures = [[[0, 6], [3, 6], [6, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-expensive57" -regex = '''(a|ab|c|bcd){4,10}(d*)''' -input = '''ababcd''' -captures = [] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-expensive58" -regex = '''(a|ab|c|bcd)*(d*)''' -input = '''ababcd''' -captures = [[[0, 1], [0, 1], [1, 1]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-expensive59" -regex = '''(a|ab|c|bcd)+(d*)''' -input = '''ababcd''' -captures = [[[0, 1], [0, 1], [1, 1]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-expensive65" -regex = '''(ab|a|c|bcd){0,}(d*)''' -input = '''ababcd''' -captures = [[[0, 6], [4, 5], [5, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-expensive67" -regex = '''(ab|a|c|bcd){1,}(d*)''' -input = '''ababcd''' -captures = [[[0, 6], [4, 5], [5, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-expensive69" -regex = '''(ab|a|c|bcd){2,}(d*)''' -input = '''ababcd''' -captures = [[[0, 6], [4, 5], [5, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-expensive71" -regex = '''(ab|a|c|bcd){3,}(d*)''' -input = '''ababcd''' -captures = [[[0, 6], [4, 5], [5, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-expensive72" -regex = '''(ab|a|c|bcd){4,}(d*)''' -input = '''ababcd''' -captures = [] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-expensive74" -regex = '''(ab|a|c|bcd){0,10}(d*)''' -input = '''ababcd''' -captures = [[[0, 6], [4, 5], [5, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-expensive76" -regex = '''(ab|a|c|bcd){1,10}(d*)''' -input = '''ababcd''' -captures = [[[0, 6], [4, 5], [5, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-expensive78" -regex = '''(ab|a|c|bcd){2,10}(d*)''' -input = '''ababcd''' -captures = [[[0, 6], [4, 5], [5, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-expensive80" -regex = '''(ab|a|c|bcd){3,10}(d*)''' -input = '''ababcd''' -captures = [[[0, 6], [4, 5], [5, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-expensive81" -regex = '''(ab|a|c|bcd){4,10}(d*)''' -input = '''ababcd''' -captures = [] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-expensive83" -regex = '''(ab|a|c|bcd)*(d*)''' -input = '''ababcd''' -captures = [[[0, 6], [4, 5], [5, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-expensive85" -regex = '''(ab|a|c|bcd)+(d*)''' -input = '''ababcd''' -captures = [[[0, 6], [4, 5], [5, 6]]] -match_limit = 1 -unescape = true - diff --git a/vendor/regex-automata/tests/data/fowler/repetition-long.toml b/vendor/regex-automata/tests/data/fowler/repetition-long.toml deleted file mode 100644 index fa24c834a..000000000 --- a/vendor/regex-automata/tests/data/fowler/repetition-long.toml +++ /dev/null @@ -1,341 +0,0 @@ -# !!! DO NOT EDIT !!! -# Automatically generated by scripts/fowler-to-toml. -# Numbers in the test names correspond to the line number of the test from -# the original dat file. - -[[tests]] -name = "repetition-long12" -regex = '''X(.?){0,}Y''' -input = '''X1234567Y''' -captures = [[[0, 9], [7, 8]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-long13" -regex = '''X(.?){1,}Y''' -input = '''X1234567Y''' -captures = [[[0, 9], [7, 8]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-long14" -regex = '''X(.?){2,}Y''' -input = '''X1234567Y''' -captures = [[[0, 9], [7, 8]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-long15" -regex = '''X(.?){3,}Y''' -input = '''X1234567Y''' -captures = [[[0, 9], [7, 8]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-long16" -regex = '''X(.?){4,}Y''' -input = '''X1234567Y''' -captures = [[[0, 9], [7, 8]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-long17" -regex = '''X(.?){5,}Y''' -input = '''X1234567Y''' -captures = [[[0, 9], [7, 8]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-long18" -regex = '''X(.?){6,}Y''' -input = '''X1234567Y''' -captures = [[[0, 9], [7, 8]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-long19" -regex = '''X(.?){7,}Y''' -input = '''X1234567Y''' -captures = [[[0, 9], [7, 8]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-long20" -regex = '''X(.?){8,}Y''' -input = '''X1234567Y''' -captures = [[[0, 9], [8, 8]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-long22" -regex = '''X(.?){0,8}Y''' -input = '''X1234567Y''' -captures = [[[0, 9], [8, 8]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-long24" -regex = '''X(.?){1,8}Y''' -input = '''X1234567Y''' -captures = [[[0, 9], [8, 8]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-long26" -regex = '''X(.?){2,8}Y''' -input = '''X1234567Y''' -captures = [[[0, 9], [8, 8]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-long28" -regex = '''X(.?){3,8}Y''' -input = '''X1234567Y''' -captures = [[[0, 9], [8, 8]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-long30" -regex = '''X(.?){4,8}Y''' -input = '''X1234567Y''' -captures = [[[0, 9], [8, 8]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-long32" -regex = '''X(.?){5,8}Y''' -input = '''X1234567Y''' -captures = [[[0, 9], [8, 8]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-long34" -regex = '''X(.?){6,8}Y''' -input = '''X1234567Y''' -captures = [[[0, 9], [8, 8]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-long36" -regex = '''X(.?){7,8}Y''' -input = '''X1234567Y''' -captures = [[[0, 9], [8, 8]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-long37" -regex = '''X(.?){8,8}Y''' -input = '''X1234567Y''' -captures = [[[0, 9], [8, 8]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-long48" -regex = '''(a|ab|c|bcd){0,}(d*)''' -input = '''ababcd''' -captures = [[[0, 1], [0, 1], [1, 1]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-long49" -regex = '''(a|ab|c|bcd){1,}(d*)''' -input = '''ababcd''' -captures = [[[0, 1], [0, 1], [1, 1]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-long50" -regex = '''(a|ab|c|bcd){2,}(d*)''' -input = '''ababcd''' -captures = [[[0, 6], [3, 6], [6, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-long51" -regex = '''(a|ab|c|bcd){3,}(d*)''' -input = '''ababcd''' -captures = [[[0, 6], [3, 6], [6, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-long52" -regex = '''(a|ab|c|bcd){4,}(d*)''' -input = '''ababcd''' -captures = [] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-long53" -regex = '''(a|ab|c|bcd){0,10}(d*)''' -input = '''ababcd''' -captures = [[[0, 1], [0, 1], [1, 1]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-long54" -regex = '''(a|ab|c|bcd){1,10}(d*)''' -input = '''ababcd''' -captures = [[[0, 1], [0, 1], [1, 1]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-long55" -regex = '''(a|ab|c|bcd){2,10}(d*)''' -input = '''ababcd''' -captures = [[[0, 6], [3, 6], [6, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-long56" -regex = '''(a|ab|c|bcd){3,10}(d*)''' -input = '''ababcd''' -captures = [[[0, 6], [3, 6], [6, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-long57" -regex = '''(a|ab|c|bcd){4,10}(d*)''' -input = '''ababcd''' -captures = [] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-long58" -regex = '''(a|ab|c|bcd)*(d*)''' -input = '''ababcd''' -captures = [[[0, 1], [0, 1], [1, 1]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-long59" -regex = '''(a|ab|c|bcd)+(d*)''' -input = '''ababcd''' -captures = [[[0, 1], [0, 1], [1, 1]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-long65" -regex = '''(ab|a|c|bcd){0,}(d*)''' -input = '''ababcd''' -captures = [[[0, 6], [4, 5], [5, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-long67" -regex = '''(ab|a|c|bcd){1,}(d*)''' -input = '''ababcd''' -captures = [[[0, 6], [4, 5], [5, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-long69" -regex = '''(ab|a|c|bcd){2,}(d*)''' -input = '''ababcd''' -captures = [[[0, 6], [4, 5], [5, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-long71" -regex = '''(ab|a|c|bcd){3,}(d*)''' -input = '''ababcd''' -captures = [[[0, 6], [4, 5], [5, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-long72" -regex = '''(ab|a|c|bcd){4,}(d*)''' -input = '''ababcd''' -captures = [] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-long74" -regex = '''(ab|a|c|bcd){0,10}(d*)''' -input = '''ababcd''' -captures = [[[0, 6], [4, 5], [5, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-long76" -regex = '''(ab|a|c|bcd){1,10}(d*)''' -input = '''ababcd''' -captures = [[[0, 6], [4, 5], [5, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-long78" -regex = '''(ab|a|c|bcd){2,10}(d*)''' -input = '''ababcd''' -captures = [[[0, 6], [4, 5], [5, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-long80" -regex = '''(ab|a|c|bcd){3,10}(d*)''' -input = '''ababcd''' -captures = [[[0, 6], [4, 5], [5, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-long81" -regex = '''(ab|a|c|bcd){4,10}(d*)''' -input = '''ababcd''' -captures = [] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-long83" -regex = '''(ab|a|c|bcd)*(d*)''' -input = '''ababcd''' -captures = [[[0, 6], [4, 5], [5, 6]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition-long85" -regex = '''(ab|a|c|bcd)+(d*)''' -input = '''ababcd''' -captures = [[[0, 6], [4, 5], [5, 6]]] -match_limit = 1 -unescape = true - diff --git a/vendor/regex-automata/tests/data/fowler/repetition.toml b/vendor/regex-automata/tests/data/fowler/repetition.toml deleted file mode 100644 index fc8da8df4..000000000 --- a/vendor/regex-automata/tests/data/fowler/repetition.toml +++ /dev/null @@ -1,397 +0,0 @@ -# !!! DO NOT EDIT !!! -# Automatically generated by scripts/fowler-to-toml. -# Numbers in the test names correspond to the line number of the test from -# the original dat file. - -[[tests]] -name = "repetition10" -regex = '''((..)|(.))''' -input = '''''' -captures = [] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition11" -regex = '''((..)|(.))((..)|(.))''' -input = '''''' -captures = [] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition12" -regex = '''((..)|(.))((..)|(.))((..)|(.))''' -input = '''''' -captures = [] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition14" -regex = '''((..)|(.)){1}''' -input = '''''' -captures = [] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition15" -regex = '''((..)|(.)){2}''' -input = '''''' -captures = [] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition16" -regex = '''((..)|(.)){3}''' -input = '''''' -captures = [] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition18" -regex = '''((..)|(.))*''' -input = '''''' -captures = [[[0, 0]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition20" -regex = '''((..)|(.))''' -input = '''a''' -captures = [[[0, 1], [0, 1], [], [0, 1]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition21" -regex = '''((..)|(.))((..)|(.))''' -input = '''a''' -captures = [] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition22" -regex = '''((..)|(.))((..)|(.))((..)|(.))''' -input = '''a''' -captures = [] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition24" -regex = '''((..)|(.)){1}''' -input = '''a''' -captures = [[[0, 1], [0, 1], [], [0, 1]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition25" -regex = '''((..)|(.)){2}''' -input = '''a''' -captures = [] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition26" -regex = '''((..)|(.)){3}''' -input = '''a''' -captures = [] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition28" -regex = '''((..)|(.))*''' -input = '''a''' -captures = [[[0, 1], [0, 1], [], [0, 1]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition30" -regex = '''((..)|(.))''' -input = '''aa''' -captures = [[[0, 2], [0, 2], [0, 2], []]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition31" -regex = '''((..)|(.))((..)|(.))''' -input = '''aa''' -captures = [[[0, 2], [0, 1], [], [0, 1], [1, 2], [], [1, 2]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition32" -regex = '''((..)|(.))((..)|(.))((..)|(.))''' -input = '''aa''' -captures = [] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition34" -regex = '''((..)|(.)){1}''' -input = '''aa''' -captures = [[[0, 2], [0, 2], [0, 2], []]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition35" -regex = '''((..)|(.)){2}''' -input = '''aa''' -captures = [[[0, 2], [1, 2], [], [1, 2]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition36" -regex = '''((..)|(.)){3}''' -input = '''aa''' -captures = [] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition38" -regex = '''((..)|(.))*''' -input = '''aa''' -captures = [[[0, 2], [0, 2], [0, 2], []]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition40" -regex = '''((..)|(.))''' -input = '''aaa''' -captures = [[[0, 2], [0, 2], [0, 2], []]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition41" -regex = '''((..)|(.))((..)|(.))''' -input = '''aaa''' -captures = [[[0, 3], [0, 2], [0, 2], [], [2, 3], [], [2, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition42" -regex = '''((..)|(.))((..)|(.))((..)|(.))''' -input = '''aaa''' -captures = [[[0, 3], [0, 1], [], [0, 1], [1, 2], [], [1, 2], [2, 3], [], [2, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition44" -regex = '''((..)|(.)){1}''' -input = '''aaa''' -captures = [[[0, 2], [0, 2], [0, 2], []]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition46" -regex = '''((..)|(.)){2}''' -input = '''aaa''' -captures = [[[0, 3], [2, 3], [0, 2], [2, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition47" -regex = '''((..)|(.)){3}''' -input = '''aaa''' -captures = [[[0, 3], [2, 3], [], [2, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition50" -regex = '''((..)|(.))*''' -input = '''aaa''' -captures = [[[0, 3], [2, 3], [0, 2], [2, 3]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition52" -regex = '''((..)|(.))''' -input = '''aaaa''' -captures = [[[0, 2], [0, 2], [0, 2], []]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition53" -regex = '''((..)|(.))((..)|(.))''' -input = '''aaaa''' -captures = [[[0, 4], [0, 2], [0, 2], [], [2, 4], [2, 4], []]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition54" -regex = '''((..)|(.))((..)|(.))((..)|(.))''' -input = '''aaaa''' -captures = [[[0, 4], [0, 2], [0, 2], [], [2, 3], [], [2, 3], [3, 4], [], [3, 4]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition56" -regex = '''((..)|(.)){1}''' -input = '''aaaa''' -captures = [[[0, 2], [0, 2], [0, 2], []]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition57" -regex = '''((..)|(.)){2}''' -input = '''aaaa''' -captures = [[[0, 4], [2, 4], [2, 4], []]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition59" -regex = '''((..)|(.)){3}''' -input = '''aaaa''' -captures = [[[0, 4], [3, 4], [0, 2], [3, 4]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition61" -regex = '''((..)|(.))*''' -input = '''aaaa''' -captures = [[[0, 4], [2, 4], [2, 4], []]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition63" -regex = '''((..)|(.))''' -input = '''aaaaa''' -captures = [[[0, 2], [0, 2], [0, 2], []]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition64" -regex = '''((..)|(.))((..)|(.))''' -input = '''aaaaa''' -captures = [[[0, 4], [0, 2], [0, 2], [], [2, 4], [2, 4], []]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition65" -regex = '''((..)|(.))((..)|(.))((..)|(.))''' -input = '''aaaaa''' -captures = [[[0, 5], [0, 2], [0, 2], [], [2, 4], [2, 4], [], [4, 5], [], [4, 5]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition67" -regex = '''((..)|(.)){1}''' -input = '''aaaaa''' -captures = [[[0, 2], [0, 2], [0, 2], []]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition68" -regex = '''((..)|(.)){2}''' -input = '''aaaaa''' -captures = [[[0, 4], [2, 4], [2, 4], []]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition70" -regex = '''((..)|(.)){3}''' -input = '''aaaaa''' -captures = [[[0, 5], [4, 5], [2, 4], [4, 5]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition73" -regex = '''((..)|(.))*''' -input = '''aaaaa''' -captures = [[[0, 5], [4, 5], [2, 4], [4, 5]]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition75" -regex = '''((..)|(.))''' -input = '''aaaaaa''' -captures = [[[0, 2], [0, 2], [0, 2], []]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition76" -regex = '''((..)|(.))((..)|(.))''' -input = '''aaaaaa''' -captures = [[[0, 4], [0, 2], [0, 2], [], [2, 4], [2, 4], []]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition77" -regex = '''((..)|(.))((..)|(.))((..)|(.))''' -input = '''aaaaaa''' -captures = [[[0, 6], [0, 2], [0, 2], [], [2, 4], [2, 4], [], [4, 6], [4, 6], []]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition79" -regex = '''((..)|(.)){1}''' -input = '''aaaaaa''' -captures = [[[0, 2], [0, 2], [0, 2], []]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition80" -regex = '''((..)|(.)){2}''' -input = '''aaaaaa''' -captures = [[[0, 4], [2, 4], [2, 4], []]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition81" -regex = '''((..)|(.)){3}''' -input = '''aaaaaa''' -captures = [[[0, 6], [4, 6], [4, 6], []]] -match_limit = 1 -unescape = true - -[[tests]] -name = "repetition83" -regex = '''((..)|(.))*''' -input = '''aaaaaa''' -captures = [[[0, 6], [4, 6], [4, 6], []]] -match_limit = 1 -unescape = true - diff --git a/vendor/regex-automata/tests/data/iter.toml b/vendor/regex-automata/tests/data/iter.toml deleted file mode 100644 index 6c0539fd4..000000000 --- a/vendor/regex-automata/tests/data/iter.toml +++ /dev/null @@ -1,119 +0,0 @@ -[[tests]] -name = "1" -regex = "a" -input = "aaa" -matches = [[0, 1], [1, 2], [2, 3]] - -[[tests]] -name = "2" -regex = "a" -input = "aba" -matches = [[0, 1], [2, 3]] - -[[tests]] -name = "empty1" -regex = '' -input = '' -matches = [[0, 0]] - -[[tests]] -name = "empty2" -regex = '' -input = 'abc' -matches = [[0, 0], [1, 1], [2, 2], [3, 3]] - -[[tests]] -name = "empty3" -regex = '()' -input = 'abc' -matches = [[0, 0], [1, 1], [2, 2], [3, 3]] - -[[tests]] -name = "empty4" -regex = '()*' -input = 'abc' -matches = [[0, 0], [1, 1], [2, 2], [3, 3]] - -[[tests]] -name = "empty5" -regex = '()+' -input = 'abc' -matches = [[0, 0], [1, 1], [2, 2], [3, 3]] - -[[tests]] -name = "empty6" -regex = '()?' -input = 'abc' -matches = [[0, 0], [1, 1], [2, 2], [3, 3]] - -[[tests]] -name = "empty7" -regex = '()()' -input = 'abc' -matches = [[0, 0], [1, 1], [2, 2], [3, 3]] - -[[tests]] -name = "empty8" -regex = '()+|z' -input = 'abc' -matches = [[0, 0], [1, 1], [2, 2], [3, 3]] - -[[tests]] -name = "empty9" -regex = 'z|()+' -input = 'abc' -matches = [[0, 0], [1, 1], [2, 2], [3, 3]] - -[[tests]] -name = "empty10" -regex = '()+|b' -input = 'abc' -matches = [[0, 0], [1, 1], [2, 2], [3, 3]] - -[[tests]] -name = "empty11" -regex = 'b|()+' -input = 'abc' -matches = [[0, 0], [1, 2], [3, 3]] - -[[tests]] -name = "start1" -regex = "^a" -input = "a" -matches = [[0, 1]] - -[[tests]] -name = "start2" -regex = "^a" -input = "aa" -matches = [[0, 1]] - -[[tests]] -name = "anchored1" -regex = "a" -input = "a" -matches = [[0, 1]] -anchored = true - -# This test is pretty subtle. It demonstrates the crucial difference between -# '^a' and 'a' compiled in 'anchored' mode. The former regex exclusively -# matches at the start of a haystack and nowhere else. The latter regex has -# no such restriction, but its automaton is constructed such that it lacks a -# `.*?` prefix. So it can actually produce matches at multiple locations. -# The anchored3 test drives this point home. -[[tests]] -name = "anchored2" -regex = "a" -input = "aa" -matches = [[0, 1], [1, 2]] -anchored = true - -# Unlikely anchored2, this test stops matching anything after it sees `b` -# since it lacks a `.*?` prefix. Since it is looking for 'a' but sees 'b', it -# determines that there are no remaining matches. -[[tests]] -name = "anchored3" -regex = "a" -input = "aaba" -matches = [[0, 1], [1, 2]] -anchored = true diff --git a/vendor/regex-automata/tests/data/misc.toml b/vendor/regex-automata/tests/data/misc.toml deleted file mode 100644 index c05418dd6..000000000 --- a/vendor/regex-automata/tests/data/misc.toml +++ /dev/null @@ -1,99 +0,0 @@ -[[tests]] -name = "ascii-literal" -regex = "a" -input = "a" -matches = [[0, 1]] - -[[tests]] -name = "ascii-literal-not" -regex = "a" -input = "z" -matches = [] - -[[tests]] -name = "ascii-literal-anchored" -regex = "a" -input = "a" -matches = [[0, 1]] -anchored = true - -[[tests]] -name = "ascii-literal-anchored-not" -regex = "a" -input = "z" -matches = [] -anchored = true - -[[tests]] -name = "anchor-start-end-line" -regex = '(?m)^bar$' -input = "foo\nbar\nbaz" -matches = [[4, 7]] - -[[tests]] -name = "prefix-literal-match" -regex = '^abc' -input = "abc" -matches = [[0, 3]] - -[[tests]] -name = "prefix-literal-match-ascii" -regex = '^abc' -input = "abc" -matches = [[0, 3]] -unicode = false -utf8 = false - -[[tests]] -name = "prefix-literal-no-match" -regex = '^abc' -input = "zabc" -matches = [] - -[[tests]] -name = "one-literal-edge" -regex = 'abc' -input = "xxxxxab" -matches = [] - -[[tests]] -name = "terminates" -regex = 'a$' -input = "a" -matches = [[0, 1]] - -[[tests]] -name = "suffix-100" -regex = '.*abcd' -input = "abcd" -matches = [[0, 4]] - -[[tests]] -name = "suffix-200" -regex = '.*(?:abcd)+' -input = "abcd" -matches = [[0, 4]] - -[[tests]] -name = "suffix-300" -regex = '.*(?:abcd)+' -input = "abcdabcd" -matches = [[0, 8]] - -[[tests]] -name = "suffix-400" -regex = '.*(?:abcd)+' -input = "abcdxabcd" -matches = [[0, 9]] - -[[tests]] -name = "suffix-500" -regex = '.*x(?:abcd)+' -input = "abcdxabcd" -matches = [[0, 9]] - -[[tests]] -name = "suffix-600" -regex = '[^abcd]*x(?:abcd)+' -input = "abcdxabcd" -matches = [[4, 9]] diff --git a/vendor/regex-automata/tests/data/multiline.toml b/vendor/regex-automata/tests/data/multiline.toml deleted file mode 100644 index cefdb2629..000000000 --- a/vendor/regex-automata/tests/data/multiline.toml +++ /dev/null @@ -1,275 +0,0 @@ -[[tests]] -name = "basic1" -regex = '(?m)^[a-z]+$' -input = "abc\ndef\nxyz" -matches = [[0, 3], [4, 7], [8, 11]] - -[[tests]] -name = "basic2" -regex = '(?m)^$' -input = "abc\ndef\nxyz" -matches = [] - -[[tests]] -name = "basic3" -regex = '(?m)^' -input = "abc\ndef\nxyz" -matches = [[0, 0], [4, 4], [8, 8]] - -[[tests]] -name = "basic4" -regex = '(?m)$' -input = "abc\ndef\nxyz" -matches = [[3, 3], [7, 7], [11, 11]] - -[[tests]] -name = "basic5" -regex = '(?m)^[a-z]' -input = "abc\ndef\nxyz" -matches = [[0, 1], [4, 5], [8, 9]] - -[[tests]] -name = "basic6" -regex = '(?m)[a-z]^' -input = "abc\ndef\nxyz" -matches = [] - -[[tests]] -name = "basic7" -regex = '(?m)[a-z]$' -input = "abc\ndef\nxyz" -matches = [[2, 3], [6, 7], [10, 11]] - -[[tests]] -name = "basic8" -regex = '(?m)$[a-z]' -input = "abc\ndef\nxyz" -matches = [] - -[[tests]] -name = "basic9" -regex = '(?m)^$' -input = "" -matches = [[0, 0]] - -[[tests]] -name = "repeat1" -regex = '(?m)(?:^$)*' -input = "a\nb\nc" -matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] - -[[tests]] -name = "repeat1-no-multi" -regex = '(?:^$)*' -input = "a\nb\nc" -matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] - -[[tests]] -name = "repeat2" -regex = '(?m)(?:^|a)+' -input = "a\naaa\n" -matches = [[0, 0], [2, 2], [3, 5], [6, 6]] - -[[tests]] -name = "repeat100" -regex = '(?m)(?:^|a)+' -input = "a\naaa\n" -matches = [[0, 0], [2, 2], [3, 5], [6, 6]] - -[[tests]] -name = "repeat2-no-multi" -regex = '(?:^|a)+' -input = "a\naaa\n" -matches = [[0, 0], [2, 5]] - -[[tests]] -name = "repeat3" -regex = '(?m)(?:^|a)*' -input = "a\naaa\n" -matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]] - -[[tests]] -name = "repeat3-no-multi" -regex = '(?:^|a)*' -input = "a\naaa\n" -matches = [[0, 0], [1, 1], [2, 5], [6, 6]] - -[[tests]] -name = "repeat4" -regex = '(?m)(?:^|a+)' -input = "a\naaa\n" -matches = [[0, 0], [2, 2], [3, 5], [6, 6]] - -[[tests]] -name = "repeat4-no-multi" -regex = '(?:^|a+)' -input = "a\naaa\n" -matches = [[0, 0], [2, 5]] - -[[tests]] -name = "repeat5" -regex = '(?m)(?:^|a*)' -input = "a\naaa\n" -matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]] - -[[tests]] -name = "repeat5-no-multi" -regex = '(?:^|a*)' -input = "a\naaa\n" -matches = [[0, 0], [1, 1], [2, 5], [6, 6]] - -[[tests]] -name = "repeat6" -regex = '(?m)(?:^[a-z])+' -input = "abc\ndef\nxyz" -matches = [[0, 1], [4, 5], [8, 9]] - -[[tests]] -name = "repeat6-no-multi" -regex = '(?:^[a-z])+' -input = "abc\ndef\nxyz" -matches = [[0, 1]] - -[[tests]] -name = "repeat7" -regex = '(?m)(?:^[a-z]{3}\n?)+' -input = "abc\ndef\nxyz" -matches = [[0, 11]] - -[[tests]] -name = "repeat7-no-multi" -regex = '(?:^[a-z]{3}\n?)+' -input = "abc\ndef\nxyz" -matches = [[0, 4]] - -[[tests]] -name = "repeat8" -regex = '(?m)(?:^[a-z]{3}\n?)*' -input = "abc\ndef\nxyz" -matches = [[0, 11]] - -[[tests]] -name = "repeat8-no-multi" -regex = '(?:^[a-z]{3}\n?)*' -input = "abc\ndef\nxyz" -matches = [[0, 4], [5, 5], [6, 6], [7, 7], [8, 8], [9, 9], [10, 10], [11, 11]] - -[[tests]] -name = "repeat9" -regex = '(?m)(?:\n?[a-z]{3}$)+' -input = "abc\ndef\nxyz" -matches = [[0, 11]] - -[[tests]] -name = "repeat9-no-multi" -regex = '(?:\n?[a-z]{3}$)+' -input = "abc\ndef\nxyz" -matches = [[7, 11]] - -[[tests]] -name = "repeat10" -regex = '(?m)(?:\n?[a-z]{3}$)*' -input = "abc\ndef\nxyz" -matches = [[0, 11]] - -[[tests]] -name = "repeat10-no-multi" -regex = '(?:\n?[a-z]{3}$)*' -input = "abc\ndef\nxyz" -matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 11]] - -[[tests]] -name = "repeat11" -regex = '(?m)^*' -input = "\naa\n" -matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] - -[[tests]] -name = "repeat11-no-multi" -regex = '^*' -input = "\naa\n" -matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] - -[[tests]] -name = "repeat12" -regex = '(?m)^+' -input = "\naa\n" -matches = [[0, 0], [1, 1], [4, 4]] - -[[tests]] -name = "repeat12-no-multi" -regex = '^+' -input = "\naa\n" -matches = [[0, 0]] - -[[tests]] -name = "repeat13" -regex = '(?m)$*' -input = "\naa\n" -matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] - -[[tests]] -name = "repeat13-no-multi" -regex = '$*' -input = "\naa\n" -matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] - -[[tests]] -name = "repeat14" -regex = '(?m)$+' -input = "\naa\n" -matches = [[0, 0], [3, 3], [4, 4]] - -[[tests]] -name = "repeat14-no-multi" -regex = '$+' -input = "\naa\n" -matches = [[4, 4]] - -[[tests]] -name = "repeat15" -regex = '(?m)(?:$\n)+' -input = "\n\naaa\n\n" -matches = [[0, 2], [5, 7]] - -[[tests]] -name = "repeat15-no-multi" -regex = '(?:$\n)+' -input = "\n\naaa\n\n" -matches = [] - -[[tests]] -name = "repeat16" -regex = '(?m)(?:$\n)*' -input = "\n\naaa\n\n" -matches = [[0, 2], [3, 3], [4, 4], [5, 7]] - -[[tests]] -name = "repeat16-no-multi" -regex = '(?:$\n)*' -input = "\n\naaa\n\n" -matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 7]] - -[[tests]] -name = "repeat17" -regex = '(?m)(?:$\n^)+' -input = "\n\naaa\n\n" -matches = [[0, 2], [5, 7]] - -[[tests]] -name = "repeat17-no-multi" -regex = '(?:$\n^)+' -input = "\n\naaa\n\n" -matches = [] - -[[tests]] -name = "repeat18" -regex = '(?m)(?:^|$)+' -input = "\n\naaa\n\n" -matches = [[0, 0], [1, 1], [2, 2], [5, 5], [6, 6], [7, 7]] - -[[tests]] -name = "repeat18-no-multi" -regex = '(?:^|$)+' -input = "\n\naaa\n\n" -matches = [[0, 0], [7, 7]] diff --git a/vendor/regex-automata/tests/data/no-unicode.toml b/vendor/regex-automata/tests/data/no-unicode.toml deleted file mode 100644 index c7fc9664f..000000000 --- a/vendor/regex-automata/tests/data/no-unicode.toml +++ /dev/null @@ -1,158 +0,0 @@ -[[tests]] -name = "invalid-utf8-literal1" -regex = '\xFF' -input = '\xFF' -matches = [[0, 1]] -unicode = false -utf8 = false -unescape = true - - -[[tests]] -name = "mixed" -regex = '(.+)(?-u)(.+)' -input = '\xCE\x93\xCE\x94\xFF' -matches = [[0, 5]] -utf8 = false -unescape = true - - -[[tests]] -name = "case1" -regex = "a" -input = "A" -matches = [[0, 1]] -case_insensitive = true -unicode = false - -[[tests]] -name = "case2" -regex = "[a-z]+" -input = "AaAaA" -matches = [[0, 5]] -case_insensitive = true -unicode = false - -[[tests]] -name = "case3" -regex = "[a-z]+" -input = "aA\u212AaA" -matches = [[0, 7]] -case_insensitive = true - -[[tests]] -name = "case4" -regex = "[a-z]+" -input = "aA\u212AaA" -matches = [[0, 2], [5, 7]] -case_insensitive = true -unicode = false - - -[[tests]] -name = "negate1" -regex = "[^a]" -input = "δ" -matches = [[0, 2]] - -[[tests]] -name = "negate2" -regex = "[^a]" -input = "δ" -matches = [[0, 1], [1, 2]] -unicode = false -utf8 = false - - -[[tests]] -name = "dotstar-prefix1" -regex = "a" -input = '\xFFa' -matches = [[1, 2]] -unicode = false -utf8 = false -unescape = true - -[[tests]] -name = "dotstar-prefix2" -regex = "a" -input = '\xFFa' -matches = [[1, 2]] -utf8 = false -unescape = true - - -[[tests]] -name = "null-bytes1" -regex = '[^\x00]+\x00' -input = 'foo\x00' -matches = [[0, 4]] -unicode = false -utf8 = false -unescape = true - - -[[tests]] -name = "word-ascii" -regex = '\w+' -input = "aδ" -matches = [[0, 1]] -unicode = false - -[[tests]] -name = "word-unicode" -regex = '\w+' -input = "aδ" -matches = [[0, 3]] - -[[tests]] -name = "decimal-ascii" -regex = '\d+' -input = "1२३9" -matches = [[0, 1], [7, 8]] -unicode = false - -[[tests]] -name = "decimal-unicode" -regex = '\d+' -input = "1२३9" -matches = [[0, 8]] - -[[tests]] -name = "space-ascii" -regex = '\s+' -input = " \u1680" -matches = [[0, 1]] -unicode = false - -[[tests]] -name = "space-unicode" -regex = '\s+' -input = " \u1680" -matches = [[0, 4]] - - -[[tests]] -# See: https://github.com/rust-lang/regex/issues/484 -name = "iter1-bytes" -regex = '' -input = "☃" -matches = [[0, 0], [1, 1], [2, 2], [3, 3]] -utf8 = false - -[[tests]] -# See: https://github.com/rust-lang/regex/issues/484 -name = "iter1-utf8" -regex = '' -input = "☃" -matches = [[0, 0], [3, 3]] - -[[tests]] -# See: https://github.com/rust-lang/regex/issues/484 -# Note that iter2-utf8 doesn't make sense here, since the input isn't UTF-8. -name = "iter2-bytes" -regex = '' -input = 'b\xFFr' -matches = [[0, 0], [1, 1], [2, 2], [3, 3]] -unescape = true -utf8 = false diff --git a/vendor/regex-automata/tests/data/overlapping.toml b/vendor/regex-automata/tests/data/overlapping.toml deleted file mode 100644 index 6662876b4..000000000 --- a/vendor/regex-automata/tests/data/overlapping.toml +++ /dev/null @@ -1,126 +0,0 @@ -[[tests]] -name = "repetition-plus-leftmost-first-100" -regex = 'a+' -input = "aaa" -matches = [[0, 1], [0, 2], [0, 3]] -match_kind = "leftmost-first" -search_kind = "overlapping" - -[[tests]] -name = "repetition-plus-all-100" -regex = 'a+' -input = "aaa" -matches = [[0, 1], [0, 2], [0, 3]] -match_kind = "all" -search_kind = "overlapping" - -[[tests]] -name = "repetition-plus-leftmost-first-200" -regex = '(abc)+' -input = "zzabcabczzabc" -matches = [[2, 5], [2, 8]] -match_kind = "leftmost-first" -search_kind = "overlapping" - -[[tests]] -name = "repetition-plus-all-200" -regex = '(abc)+' -input = "zzabcabczzabc" -matches = [[2, 5], [2, 8], [10, 13]] -match_kind = "all" -search_kind = "overlapping" - -[[tests]] -name = "repetition-star-leftmost-first-100" -regex = 'a*' -input = "aaa" -matches = [[0, 0], [0, 1], [0, 2], [0, 3]] -match_kind = "leftmost-first" -search_kind = "overlapping" - -[[tests]] -name = "repetition-star-all-100" -regex = 'a*' -input = "aaa" -matches = [[0, 0], [0, 1], [0, 2], [0, 3]] -match_kind = "all" -search_kind = "overlapping" - -[[tests]] -name = "repetition-star-leftmost-first-200" -regex = '(abc)*' -input = "zzabcabczzabc" -matches = [[0, 0]] -match_kind = "leftmost-first" -search_kind = "overlapping" - -[[tests]] -name = "repetition-star-all-200" -regex = '(abc)*' -input = "zzabcabczzabc" -matches = [ - [0, 0], [1, 1], [2, 2], [3, 3], [4, 4], - [2, 5], - [6, 6], [7, 7], - [2, 8], - [9, 9], [10, 10], [11, 11], [12, 12], - [10, 13], -] -match_kind = "all" -search_kind = "overlapping" - -[[tests]] -name = "start-end-rep-leftmost-first" -regex = '(^$)*' -input = "abc" -matches = [[0, 0]] -match_kind = "leftmost-first" -search_kind = "overlapping" - -[[tests]] -name = "start-end-rep-all" -regex = '(^$)*' -input = "abc" -matches = [[0, 0], [1, 1], [2, 2], [3, 3]] -match_kind = "all" -search_kind = "overlapping" - -[[tests]] -name = "alt-leftmost-first-100" -regex = 'abc|a' -input = "zzabcazzaabc" -matches = [[2, 3], [2, 5]] -match_kind = "leftmost-first" -search_kind = "overlapping" - -[[tests]] -name = "alt-all-100" -regex = 'abc|a' -input = "zzabcazzaabc" -matches = [[2, 3], [2, 5], [5, 6], [8, 9], [9, 10], [9, 12]] -match_kind = "all" -search_kind = "overlapping" - -[[tests]] -name = "empty-000" -regex = "" -input = "abc" -matches = [[0, 0], [1, 1], [2, 2], [3, 3]] -match_kind = "all" -search_kind = "overlapping" - -[[tests]] -name = "empty-alt-000" -regex = "|b" -input = "abc" -matches = [[0, 0], [1, 1], [1, 2], [3, 3]] -match_kind = "all" -search_kind = "overlapping" - -[[tests]] -name = "empty-alt-010" -regex = "b|" -input = "abc" -matches = [[0, 0], [1, 1], [1, 2], [3, 3]] -match_kind = "all" -search_kind = "overlapping" diff --git a/vendor/regex-automata/tests/data/regression.toml b/vendor/regex-automata/tests/data/regression.toml deleted file mode 100644 index 6a4dbb151..000000000 --- a/vendor/regex-automata/tests/data/regression.toml +++ /dev/null @@ -1,423 +0,0 @@ -# See: https://github.com/rust-lang/regex/issues/48 -[[tests]] -name = "invalid-regex-no-crash-100" -regex = '(*)' -input = "" -matches = [] -compiles = false - -# See: https://github.com/rust-lang/regex/issues/48 -[[tests]] -name = "invalid-regex-no-crash-200" -regex = '(?:?)' -input = "" -matches = [] -compiles = false - -# See: https://github.com/rust-lang/regex/issues/48 -[[tests]] -name = "invalid-regex-no-crash-300" -regex = '(?)' -input = "" -matches = [] -compiles = false - -# See: https://github.com/rust-lang/regex/issues/48 -[[tests]] -name = "invalid-regex-no-crash-400" -regex = '*' -input = "" -matches = [] -compiles = false - -# See: https://github.com/rust-lang/regex/issues/75 -[[tests]] -name = "unsorted-binary-search-100" -regex = '(?i-u)[a_]+' -input = "A_" -matches = [[0, 2]] - -# See: https://github.com/rust-lang/regex/issues/75 -[[tests]] -name = "unsorted-binary-search-200" -regex = '(?i-u)[A_]+' -input = "a_" -matches = [[0, 2]] - -# See: https://github.com/rust-lang/regex/issues/76 -[[tests]] -name = "unicode-case-lower-nocase-flag" -regex = '(?i)\p{Ll}+' -input = "ΛΘΓΔα" -matches = [[0, 10]] - -# See: https://github.com/rust-lang/regex/issues/99 -[[tests]] -name = "negated-char-class-100" -regex = '(?i)[^x]' -input = "x" -matches = [] - -# See: https://github.com/rust-lang/regex/issues/99 -[[tests]] -name = "negated-char-class-200" -regex = '(?i)[^x]' -input = "X" -matches = [] - -# See: https://github.com/rust-lang/regex/issues/101 -[[tests]] -name = "ascii-word-underscore" -regex = '[[:word:]]' -input = "_" -matches = [[0, 1]] - -# See: https://github.com/rust-lang/regex/issues/129 -[[tests]] -name = "captures-repeat" -regex = '([a-f]){2}(?P<foo>[x-z])' -input = "abx" -captures = [ - [[0, 3], [0, 2], [2, 3]], -] - -# See: https://github.com/rust-lang/regex/issues/153 -[[tests]] -name = "alt-in-alt-100" -regex = 'ab?|$' -input = "az" -matches = [[0, 1], [2, 2]] - -# See: https://github.com/rust-lang/regex/issues/153 -[[tests]] -name = "alt-in-alt-200" -regex = '^(.*?)(\n|\r\n?|$)' -input = "ab\rcd" -matches = [[0, 3]] - -# See: https://github.com/rust-lang/regex/issues/169 -[[tests]] -name = "leftmost-first-prefix" -regex = 'z*azb' -input = "azb" -matches = [[0, 3]] - -# See: https://github.com/rust-lang/regex/issues/191 -[[tests]] -name = "many-alternates" -regex = '1|2|3|4|5|6|7|8|9|10|int' -input = "int" -matches = [[0, 3]] - -# See: https://github.com/rust-lang/regex/issues/204 -[[tests]] -name = "word-boundary-alone-100" -regex = '\b' -input = "Should this (work?)" -matches = [[0, 0], [6, 6], [7, 7], [11, 11], [13, 13], [17, 17]] - -# See: https://github.com/rust-lang/regex/issues/204 -[[tests]] -name = "word-boundary-alone-200" -regex = '\b' -input = "a b c" -matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] - -# See: https://github.com/rust-lang/regex/issues/264 -[[tests]] -name = "word-boundary-ascii-no-capture" -regex = '\B' -input = "\U00028F3E" -matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] -unicode = false -utf8 = false - -# See: https://github.com/rust-lang/regex/issues/264 -[[tests]] -name = "word-boundary-ascii-capture" -regex = '(\B)' -input = "\U00028F3E" -matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] -unicode = false -utf8 = false - -# See: https://github.com/rust-lang/regex/issues/268 -[[tests]] -name = "partial-anchor" -regex = '^a|b' -input = "ba" -matches = [[0, 1]] - -# See: https://github.com/rust-lang/regex/issues/271 -[[tests]] -name = "endl-or-word-boundary" -regex = '(?m:$)|(?-u:\b)' -input = "\U0006084E" -matches = [[4, 4]] - -# See: https://github.com/rust-lang/regex/issues/271 -[[tests]] -name = "zero-or-end" -regex = '(?i-u:\x00)|$' -input = "\U000E682F" -matches = [[4, 4]] - -# See: https://github.com/rust-lang/regex/issues/271 -[[tests]] -name = "y-or-endl" -regex = '(?i-u:y)|(?m:$)' -input = "\U000B4331" -matches = [[4, 4]] - -# See: https://github.com/rust-lang/regex/issues/271 -[[tests]] -name = "word-boundary-start-x" -regex = '(?u:\b)^(?-u:X)' -input = "X" -matches = [[0, 1]] - -# See: https://github.com/rust-lang/regex/issues/271 -[[tests]] -name = "word-boundary-ascii-start-x" -regex = '(?-u:\b)^(?-u:X)' -input = "X" -matches = [[0, 1]] - -# See: https://github.com/rust-lang/regex/issues/271 -[[tests]] -name = "end-not-word-boundary" -regex = '$\B' -input = "\U0005C124\U000B576C" -matches = [[8, 8]] -unicode = false -utf8 = false - -# See: https://github.com/rust-lang/regex/issues/280 -[[tests]] -name = "partial-anchor-alternate-begin" -regex = '^a|z' -input = "yyyyya" -matches = [] - -# See: https://github.com/rust-lang/regex/issues/280 -[[tests]] -name = "partial-anchor-alternate-end" -regex = 'a$|z' -input = "ayyyyy" -matches = [] - -# See: https://github.com/rust-lang/regex/issues/289 -[[tests]] -name = "lits-unambiguous-100" -regex = '(ABC|CDA|BC)X' -input = "CDAX" -matches = [[0, 4]] - -# See: https://github.com/rust-lang/regex/issues/291 -[[tests]] -name = "lits-unambiguous-200" -regex = '((IMG|CAM|MG|MB2)_|(DSCN|CIMG))(?P<n>[0-9]+)$' -input = "CIMG2341" -captures = [ - [[0, 8], [0, 4], [], [0, 4], [4, 8]], -] - -# See: https://github.com/rust-lang/regex/issues/303 -[[tests]] -name = "negated-full-byte-range" -regex = '[^\x00-\xFF]' -input = "" -matches = [] -compiles = false -unicode = false -utf8 = false - -# See: https://github.com/rust-lang/regex/issues/321 -[[tests]] -name = "strange-anchor-non-complete-prefix" -regex = 'a^{2}' -input = "" -matches = [] - -# See: https://github.com/rust-lang/regex/issues/321 -[[tests]] -name = "strange-anchor-non-complete-suffix" -regex = '${2}a' -input = "" -matches = [] - -# See: https://github.com/rust-lang/regex/issues/334 -# See: https://github.com/rust-lang/regex/issues/557 -[[tests]] -name = "captures-after-dfa-premature-end-100" -regex = 'a(b*(X|$))?' -input = "abcbX" -captures = [ - [[0, 1], [], []], -] - -# See: https://github.com/rust-lang/regex/issues/334 -# See: https://github.com/rust-lang/regex/issues/557 -[[tests]] -name = "captures-after-dfa-premature-end-200" -regex = 'a(bc*(X|$))?' -input = "abcbX" -captures = [ - [[0, 1], [], []], -] - -# See: https://github.com/rust-lang/regex/issues/334 -# See: https://github.com/rust-lang/regex/issues/557 -[[tests]] -name = "captures-after-dfa-premature-end-300" -regex = '(aa$)?' -input = "aaz" -captures = [ - [[0, 0]], - [[1, 1]], - [[2, 2]], - [[3, 3]], -] - -# See: https://github.com/rust-lang/regex/issues/437 -[[tests]] -name = "literal-panic" -regex = 'typename type\-parameter\-[0-9]+\-[0-9]+::.+' -input = "test" -matches = [] - -# See: https://github.com/rust-lang/regex/issues/527 -[[tests]] -name = "empty-flag-expr" -regex = '(((?x)))' -input = "" -matches = [[0, 0]] - -# See: https://github.com/rust-lang/regex/issues/533 -[[tests]] -name = "blank-matches-nothing-between-space-and-tab" -regex = '[[:blank:]]' -input = '\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F' -match = false -unescape = true - -# See: https://github.com/rust-lang/regex/issues/533 -[[tests]] -name = "blank-matches-nothing-between-space-and-tab-inverted" -regex = '^[[:^blank:]]+$' -input = '\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F' -match = true -unescape = true - -# See: https://github.com/rust-lang/regex/issues/555 -[[tests]] -name = "invalid-repetition" -regex = '(?m){1,1}' -input = "" -matches = [] -compiles = false - -# See: https://github.com/rust-lang/regex/issues/640 -[[tests]] -name = "flags-are-unset" -regex = '((?i)foo)|Bar' -input = "foo Foo bar Bar" -matches = [[0, 3], [4, 7], [12, 15]] - -# Note that 'Ј' is not 'j', but cyrillic Je -# https://en.wikipedia.org/wiki/Je_(Cyrillic) -# -# See: https://github.com/rust-lang/regex/issues/659 -[[tests]] -name = "empty-group-with-unicode" -regex = '()Ј01' -input = 'zЈ01' -matches = [[1, 5]] - -# See: https://github.com/rust-lang/regex/issues/579 -[[tests]] -name = "word-boundary-weird" -regex = '\b..\b' -input = "I have 12, he has 2!" -matches = [[0, 2], [7, 9], [9, 11], [11, 13], [17, 19]] - -# See: https://github.com/rust-lang/regex/issues/579 -[[tests]] -name = "word-boundary-weird-ascii" -regex = '\b..\b' -input = "I have 12, he has 2!" -matches = [[0, 2], [7, 9], [9, 11], [11, 13], [17, 19]] -unicode = false -utf8 = false - -# See: https://github.com/rust-lang/regex/issues/579 -[[tests]] -name = "word-boundary-weird-minimal-ascii" -regex = '\b..\b' -input = "az,,b" -matches = [[0, 2], [2, 4]] -unicode = false -utf8 = false - -# See: https://github.com/BurntSushi/ripgrep/issues/1203 -[[tests]] -name = "reverse-suffix-100" -regex = '[0-4][0-4][0-4]000' -input = "153.230000" -matches = [[4, 10]] - -# See: https://github.com/BurntSushi/ripgrep/issues/1203 -[[tests]] -name = "reverse-suffix-200" -regex = '[0-9][0-9][0-9]000' -input = "153.230000\n" -matches = [[4, 10]] - -# See: https://github.com/BurntSushi/ripgrep/issues/1247 -[[tests]] -name = "stops" -regex = '\bs(?:[ab])' -input = 's\xE4' -matches = [] -unescape = true - -# See: https://github.com/BurntSushi/ripgrep/issues/1247 -[[tests]] -name = "stops-ascii" -regex = '(?-u:\b)s(?:[ab])' -input = 's\xE4' -matches = [] -unescape = true - -# There is no issue for this bug. -[[tests]] -name = "anchored-prefix-100" -regex = '^a[[:^space:]]' -input = "a " -matches = [] - -# There is no issue for this bug. -[[tests]] -name = "anchored-prefix-200" -regex = '^a[[:^space:]]' -input = "foo boo a" -matches = [] - -# There is no issue for this bug. -[[tests]] -name = "anchored-prefix-300" -regex = '^-[a-z]' -input = "r-f" -matches = [] - -# Tests that a possible Aho-Corasick optimization works correctly. It only -# kicks in when we have a lot of literals. By "works correctly," we mean that -# leftmost-first match semantics are properly respected. That is, samwise -# should match, not sam. -# -# There is no issue for this bug. -[[tests]] -name = "aho-corasick-100" -regex = 'samwise|sam|a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z' -input = "samwise" -matches = [[0, 7]] diff --git a/vendor/regex-automata/tests/data/set.toml b/vendor/regex-automata/tests/data/set.toml deleted file mode 100644 index e0eb0583e..000000000 --- a/vendor/regex-automata/tests/data/set.toml +++ /dev/null @@ -1,523 +0,0 @@ -[[tests]] -name = "basic10" -regexes = ["a", "a"] -input = "a" -matches = [ - { id = 0, offsets = [0, 1] }, - { id = 1, offsets = [0, 1] }, -] -match_kind = "all" -search_kind = "overlapping" - -[[tests]] -name = "basic10-leftmost-first" -regexes = ["a", "a"] -input = "a" -matches = [ - { id = 0, offsets = [0, 1] }, -] -match_kind = "leftmost-first" -search_kind = "leftmost" - -[[tests]] -name = "basic20" -regexes = ["a", "a"] -input = "ba" -matches = [ - { id = 0, offsets = [1, 2] }, - { id = 1, offsets = [1, 2] }, -] -match_kind = "all" -search_kind = "overlapping" - -[[tests]] -name = "basic30" -regexes = ["a", "b"] -input = "a" -matches = [ - { id = 0, offsets = [0, 1] }, -] -match_kind = "all" -search_kind = "overlapping" - -[[tests]] -name = "basic40" -regexes = ["a", "b"] -input = "b" -matches = [ - { id = 1, offsets = [0, 1] }, -] -match_kind = "all" -search_kind = "overlapping" - -[[tests]] -name = "basic50" -regexes = ["a|b", "b|a"] -input = "b" -matches = [ - { id = 0, offsets = [0, 1] }, - { id = 1, offsets = [0, 1] }, -] -match_kind = "all" -search_kind = "overlapping" - -[[tests]] -name = "basic60" -regexes = ["foo", "oo"] -input = "foo" -matches = [ - { id = 0, offsets = [0, 3] }, - { id = 1, offsets = [1, 3] }, -] -match_kind = "all" -search_kind = "overlapping" - -[[tests]] -name = "basic60-leftmost-first" -regexes = ["foo", "oo"] -input = "foo" -matches = [ - { id = 0, offsets = [0, 3] }, -] -match_kind = "leftmost-first" -search_kind = "leftmost" - -[[tests]] -name = "basic61" -regexes = ["oo", "foo"] -input = "foo" -matches = [ - { id = 1, offsets = [0, 3] }, - { id = 0, offsets = [1, 3] }, -] -match_kind = "all" -search_kind = "overlapping" - -[[tests]] -name = "basic61-leftmost-first" -regexes = ["oo", "foo"] -input = "foo" -matches = [ - { id = 1, offsets = [0, 3] }, -] -match_kind = "leftmost-first" -search_kind = "leftmost" - -[[tests]] -name = "basic70" -regexes = ["abcd", "bcd", "cd", "d"] -input = "abcd" -matches = [ - { id = 0, offsets = [0, 4] }, - { id = 1, offsets = [1, 4] }, - { id = 2, offsets = [2, 4] }, - { id = 3, offsets = [3, 4] }, -] -match_kind = "all" -search_kind = "overlapping" - -[[tests]] -name = "basic71" -regexes = ["bcd", "cd", "d", "abcd"] -input = "abcd" -matches = [ - { id = 3, offsets = [0, 4] }, -] -match_kind = "leftmost-first" -search_kind = "leftmost" - -[[tests]] -name = "basic80" -regexes = ["^foo", "bar$"] -input = "foo" -matches = [ - { id = 0, offsets = [0, 3] }, -] -match_kind = "all" -search_kind = "overlapping" - -[[tests]] -name = "basic81" -regexes = ["^foo", "bar$"] -input = "foo bar" -matches = [ - { id = 0, offsets = [0, 3] }, - { id = 1, offsets = [4, 7] }, -] -match_kind = "all" -search_kind = "overlapping" - -[[tests]] -name = "basic82" -regexes = ["^foo", "bar$"] -input = "bar" -matches = [ - { id = 1, offsets = [0, 3] }, -] -match_kind = "all" -search_kind = "overlapping" - -[[tests]] -name = "basic90" -regexes = ["[a-z]+$", "foo"] -input = "01234 foo" -matches = [ - { id = 0, offsets = [6, 9] }, - { id = 1, offsets = [6, 9] }, -] -match_kind = "all" -search_kind = "overlapping" - -[[tests]] -name = "basic91" -regexes = ["[a-z]+$", "foo"] -input = "foo 01234" -matches = [ - { id = 1, offsets = [0, 3] }, -] -match_kind = "all" -search_kind = "overlapping" - -[[tests]] -name = "basic100" -regexes = [".*?", "a"] -input = "zzza" -matches = [ - { id = 0, offsets = [0, 0] }, - { id = 0, offsets = [0, 1] }, - { id = 0, offsets = [0, 2] }, - { id = 0, offsets = [0, 3] }, - { id = 0, offsets = [0, 4] }, - { id = 1, offsets = [3, 4] }, -] -match_kind = "all" -search_kind = "overlapping" - -[[tests]] -name = "basic101" -regexes = [".*", "a"] -input = "zzza" -matches = [ - { id = 0, offsets = [0, 0] }, - { id = 0, offsets = [0, 1] }, - { id = 0, offsets = [0, 2] }, - { id = 0, offsets = [0, 3] }, - { id = 0, offsets = [0, 4] }, - { id = 1, offsets = [3, 4] }, -] -match_kind = "all" -search_kind = "overlapping" - -[[tests]] -name = "basic102" -regexes = [".*", "a"] -input = "zzz" -matches = [ - { id = 0, offsets = [0, 0] }, - { id = 0, offsets = [0, 1] }, - { id = 0, offsets = [0, 2] }, - { id = 0, offsets = [0, 3] }, -] -match_kind = "all" -search_kind = "overlapping" - -[[tests]] -name = "basic110" -regexes = ['\ba\b'] -input = "hello a bye" -matches = [ - { id = 0, offsets = [6, 7] }, -] -match_kind = "all" -search_kind = "overlapping" - -[[tests]] -name = "basic111" -regexes = ['\ba\b', '\be\b'] -input = "hello a bye e" -matches = [ - { id = 0, offsets = [6, 7] }, - { id = 1, offsets = [12, 13] }, -] -match_kind = "all" -search_kind = "overlapping" - -[[tests]] -name = "basic120" -regexes = ["a"] -input = "a" -matches = [ - { id = 0, offsets = [0, 1] }, -] -match_kind = "all" -search_kind = "overlapping" - -[[tests]] -name = "basic121" -regexes = [".*a"] -input = "a" -matches = [ - { id = 0, offsets = [0, 1] }, -] -match_kind = "all" -search_kind = "overlapping" - -[[tests]] -name = "basic122" -regexes = [".*a", "β"] -input = "β" -matches = [ - { id = 1, offsets = [0, 2] }, -] -match_kind = "all" -search_kind = "overlapping" - -[[tests]] -name = "basic130" -regexes = ["ab", "b"] -input = "ba" -matches = [ - { id = 1, offsets = [0, 1] }, -] -match_kind = "all" -search_kind = "overlapping" - -[[tests]] -name = "empty10" -regexes = ["", "a"] -input = "abc" -matches = [ - { id = 0, offsets = [0, 0] }, - { id = 1, offsets = [0, 1] }, - { id = 0, offsets = [1, 1] }, - { id = 0, offsets = [2, 2] }, - { id = 0, offsets = [3, 3] }, -] -match_kind = "all" -search_kind = "overlapping" - -[[tests]] -name = "empty10-leftmost-first" -regexes = ["", "a"] -input = "abc" -matches = [ - { id = 0, offsets = [0, 0] }, - { id = 0, offsets = [1, 1] }, - { id = 0, offsets = [2, 2] }, - { id = 0, offsets = [3, 3] }, -] -match_kind = "leftmost-first" -search_kind = "leftmost" - -[[tests]] -name = "empty11" -regexes = ["a", ""] -input = "abc" -matches = [ - { id = 1, offsets = [0, 0] }, - { id = 0, offsets = [0, 1] }, - { id = 1, offsets = [1, 1] }, - { id = 1, offsets = [2, 2] }, - { id = 1, offsets = [3, 3] }, -] -match_kind = "all" -search_kind = "overlapping" - -[[tests]] -name = "empty11-leftmost-first" -regexes = ["a", ""] -input = "abc" -matches = [ - { id = 0, offsets = [0, 1] }, - { id = 1, offsets = [2, 2] }, - { id = 1, offsets = [3, 3] }, -] -match_kind = "leftmost-first" -search_kind = "leftmost" - -[[tests]] -name = "empty20" -regexes = ["", "b"] -input = "abc" -matches = [ - { id = 0, offsets = [0, 0] }, - { id = 0, offsets = [1, 1] }, - { id = 1, offsets = [1, 2] }, - { id = 0, offsets = [2, 2] }, - { id = 0, offsets = [3, 3] }, -] -match_kind = "all" -search_kind = "overlapping" - -[[tests]] -name = "empty20-leftmost-first" -regexes = ["", "b"] -input = "abc" -matches = [ - { id = 0, offsets = [0, 0] }, - { id = 0, offsets = [1, 1] }, - { id = 0, offsets = [2, 2] }, - { id = 0, offsets = [3, 3] }, -] -match_kind = "leftmost-first" -search_kind = "leftmost" - -[[tests]] -name = "empty21" -regexes = ["b", ""] -input = "abc" -matches = [ - { id = 1, offsets = [0, 0] }, - { id = 1, offsets = [1, 1] }, - { id = 0, offsets = [1, 2] }, - { id = 1, offsets = [2, 2] }, - { id = 1, offsets = [3, 3] }, -] -match_kind = "all" -search_kind = "overlapping" - -[[tests]] -name = "empty21-leftmost-first" -regexes = ["b", ""] -input = "abc" -matches = [ - { id = 1, offsets = [0, 0] }, - { id = 0, offsets = [1, 2] }, - { id = 1, offsets = [3, 3] }, -] -match_kind = "leftmost-first" -search_kind = "leftmost" - -[[tests]] -name = "empty22" -regexes = ["(?:)", "b"] -input = "abc" -matches = [ - { id = 0, offsets = [0, 0] }, - { id = 0, offsets = [1, 1] }, - { id = 1, offsets = [1, 2] }, - { id = 0, offsets = [2, 2] }, - { id = 0, offsets = [3, 3] }, -] -match_kind = "all" -search_kind = "overlapping" - -[[tests]] -name = "empty23" -regexes = ["b", "(?:)"] -input = "abc" -matches = [ - { id = 1, offsets = [0, 0] }, - { id = 1, offsets = [1, 1] }, - { id = 0, offsets = [1, 2] }, - { id = 1, offsets = [2, 2] }, - { id = 1, offsets = [3, 3] }, -] -match_kind = "all" -search_kind = "overlapping" - -[[tests]] -name = "empty30" -regexes = ["", "z"] -input = "abc" -matches = [ - { id = 0, offsets = [0, 0] }, - { id = 0, offsets = [1, 1] }, - { id = 0, offsets = [2, 2] }, - { id = 0, offsets = [3, 3] }, -] -match_kind = "all" -search_kind = "overlapping" - -[[tests]] -name = "empty30-leftmost-first" -regexes = ["", "z"] -input = "abc" -matches = [ - { id = 0, offsets = [0, 0] }, - { id = 0, offsets = [1, 1] }, - { id = 0, offsets = [2, 2] }, - { id = 0, offsets = [3, 3] }, -] -match_kind = "leftmost-first" -search_kind = "leftmost" - -[[tests]] -name = "empty31" -regexes = ["z", ""] -input = "abc" -matches = [ - { id = 1, offsets = [0, 0] }, - { id = 1, offsets = [1, 1] }, - { id = 1, offsets = [2, 2] }, - { id = 1, offsets = [3, 3] }, -] -match_kind = "all" -search_kind = "overlapping" - -[[tests]] -name = "empty31-leftmost-first" -regexes = ["z", ""] -input = "abc" -matches = [ - { id = 1, offsets = [0, 0] }, - { id = 1, offsets = [1, 1] }, - { id = 1, offsets = [2, 2] }, - { id = 1, offsets = [3, 3] }, -] -match_kind = "leftmost-first" -search_kind = "leftmost" - -[[tests]] -name = "empty40" -regexes = ["c(?:)", "b"] -input = "abc" -matches = [ - { id = 1, offsets = [1, 2] }, - { id = 0, offsets = [2, 3] }, -] -match_kind = "all" -search_kind = "overlapping" - -[[tests]] -name = "empty40-leftmost-first" -regexes = ["c(?:)", "b"] -input = "abc" -matches = [ - { id = 1, offsets = [1, 2] }, - { id = 0, offsets = [2, 3] }, -] -match_kind = "leftmost-first" -search_kind = "leftmost" - -[[tests]] -name = "nomatch10" -regexes = ["a", "a"] -input = "b" -matches = [] -match_kind = "all" -search_kind = "overlapping" - -[[tests]] -name = "nomatch20" -regexes = ["^foo", "bar$"] -input = "bar foo" -matches = [] -match_kind = "all" -search_kind = "overlapping" - -[[tests]] -name = "nomatch30" -regexes = [] -input = "a" -matches = [] -match_kind = "all" -search_kind = "overlapping" - -[[tests]] -name = "nomatch40" -regexes = ["^rooted$", '\.log$'] -input = "notrooted" -matches = [] -match_kind = "all" -search_kind = "overlapping" diff --git a/vendor/regex-automata/tests/data/unicode.toml b/vendor/regex-automata/tests/data/unicode.toml deleted file mode 100644 index 016bbfd9b..000000000 --- a/vendor/regex-automata/tests/data/unicode.toml +++ /dev/null @@ -1,514 +0,0 @@ -# Basic Unicode literal support. -[[tests]] -name = "literal1" -regex = '☃' -input = "☃" -matches = [[0, 3]] - -[[tests]] -name = "literal2" -regex = '☃+' -input = "☃" -matches = [[0, 3]] - -[[tests]] -name = "literal3" -regex = '(?i)☃+' -input = "☃" -matches = [[0, 3]] - -[[tests]] -name = "literal4" -regex = '(?i)Δ' -input = "δ" -matches = [[0, 2]] - -# Unicode word boundaries. -[[tests]] -name = "wb-100" -regex = '\d\b' -input = "6δ" -matches = [] - -[[tests]] -name = "wb-200" -regex = '\d\b' -input = "6 " -matches = [[0, 1]] - -[[tests]] -name = "wb-300" -regex = '\d\B' -input = "6δ" -matches = [[0, 1]] - -[[tests]] -name = "wb-400" -regex = '\d\B' -input = "6 " -matches = [] - -# Unicode character class support. -[[tests]] -name = "class1" -regex = '[☃Ⅰ]+' -input = "☃" -matches = [[0, 3]] - -[[tests]] -name = "class2" -regex = '\pN' -input = "Ⅰ" -matches = [[0, 3]] - -[[tests]] -name = "class3" -regex = '\pN+' -input = "Ⅰ1Ⅱ2" -matches = [[0, 8]] - -[[tests]] -name = "class4" -regex = '\PN+' -input = "abⅠ" -matches = [[0, 2]] - -[[tests]] -name = "class5" -regex = '[\PN]+' -input = "abⅠ" -matches = [[0, 2]] - -[[tests]] -name = "class6" -regex = '[^\PN]+' -input = "abⅠ" -matches = [[2, 5]] - -[[tests]] -name = "class7" -regex = '\p{Lu}+' -input = "ΛΘΓΔα" -matches = [[0, 8]] - -[[tests]] -name = "class8" -regex = '(?i)\p{Lu}+' -input = "ΛΘΓΔα" -matches = [[0, 10]] - -[[tests]] -name = "class9" -regex = '\pL+' -input = "ΛΘΓΔα" -matches = [[0, 10]] - -[[tests]] -name = "class10" -regex = '\p{Ll}+' -input = "ΛΘΓΔα" -matches = [[8, 10]] - -# Unicode aware "Perl" character classes. -[[tests]] -name = "perl1" -regex = '\w+' -input = "dδd" -matches = [[0, 4]] - -[[tests]] -name = "perl2" -regex = '\w+' -input = "⥡" -matches = [] - -[[tests]] -name = "perl3" -regex = '\W+' -input = "⥡" -matches = [[0, 3]] - -[[tests]] -name = "perl4" -regex = '\d+' -input = "1२३9" -matches = [[0, 8]] - -[[tests]] -name = "perl5" -regex = '\d+' -input = "Ⅱ" -matches = [] - -[[tests]] -name = "perl6" -regex = '\D+' -input = "Ⅱ" -matches = [[0, 3]] - -[[tests]] -name = "perl7" -regex = '\s+' -input = " " -matches = [[0, 3]] - -[[tests]] -name = "perl8" -regex = '\s+' -input = "☃" -matches = [] - -[[tests]] -name = "perl9" -regex = '\S+' -input = "☃" -matches = [[0, 3]] - -# Specific tests for Unicode general category classes. -[[tests]] -name = "class-gencat1" -regex = '\p{Cased_Letter}' -input = "A" -matches = [[0, 3]] - -[[tests]] -name = "class-gencat2" -regex = '\p{Close_Punctuation}' -input = "❯" -matches = [[0, 3]] - -[[tests]] -name = "class-gencat3" -regex = '\p{Connector_Punctuation}' -input = "⁀" -matches = [[0, 3]] - -[[tests]] -name = "class-gencat4" -regex = '\p{Control}' -input = "\u009F" -matches = [[0, 2]] - -[[tests]] -name = "class-gencat5" -regex = '\p{Currency_Symbol}' -input = "£" -matches = [[0, 3]] - -[[tests]] -name = "class-gencat6" -regex = '\p{Dash_Punctuation}' -input = "〰" -matches = [[0, 3]] - -[[tests]] -name = "class-gencat7" -regex = '\p{Decimal_Number}' -input = "𑓙" -matches = [[0, 4]] - -[[tests]] -name = "class-gencat8" -regex = '\p{Enclosing_Mark}' -input = "\uA672" -matches = [[0, 3]] - -[[tests]] -name = "class-gencat9" -regex = '\p{Final_Punctuation}' -input = "⸡" -matches = [[0, 3]] - -[[tests]] -name = "class-gencat10" -regex = '\p{Format}' -input = "\U000E007F" -matches = [[0, 4]] - -[[tests]] -name = "class-gencat11" -regex = '\p{Initial_Punctuation}' -input = "⸜" -matches = [[0, 3]] - -[[tests]] -name = "class-gencat12" -regex = '\p{Letter}' -input = "Έ" -matches = [[0, 2]] - -[[tests]] -name = "class-gencat13" -regex = '\p{Letter_Number}' -input = "ↂ" -matches = [[0, 3]] - -[[tests]] -name = "class-gencat14" -regex = '\p{Line_Separator}' -input = "\u2028" -matches = [[0, 3]] - -[[tests]] -name = "class-gencat15" -regex = '\p{Lowercase_Letter}' -input = "ϛ" -matches = [[0, 2]] - -[[tests]] -name = "class-gencat16" -regex = '\p{Mark}' -input = "\U000E01EF" -matches = [[0, 4]] - -[[tests]] -name = "class-gencat17" -regex = '\p{Math}' -input = "⋿" -matches = [[0, 3]] - -[[tests]] -name = "class-gencat18" -regex = '\p{Modifier_Letter}' -input = "𖭃" -matches = [[0, 4]] - -[[tests]] -name = "class-gencat19" -regex = '\p{Modifier_Symbol}' -input = "🏿" -matches = [[0, 4]] - -[[tests]] -name = "class-gencat20" -regex = '\p{Nonspacing_Mark}' -input = "\U0001E94A" -matches = [[0, 4]] - -[[tests]] -name = "class-gencat21" -regex = '\p{Number}' -input = "⓿" -matches = [[0, 3]] - -[[tests]] -name = "class-gencat22" -regex = '\p{Open_Punctuation}' -input = "⦅" -matches = [[0, 3]] - -[[tests]] -name = "class-gencat23" -regex = '\p{Other}' -input = "\u0BC9" -matches = [[0, 3]] - -[[tests]] -name = "class-gencat24" -regex = '\p{Other_Letter}' -input = "ꓷ" -matches = [[0, 3]] - -[[tests]] -name = "class-gencat25" -regex = '\p{Other_Number}' -input = "㉏" -matches = [[0, 3]] - -[[tests]] -name = "class-gencat26" -regex = '\p{Other_Punctuation}' -input = "𞥞" -matches = [[0, 4]] - -[[tests]] -name = "class-gencat27" -regex = '\p{Other_Symbol}' -input = "⅌" -matches = [[0, 3]] - -[[tests]] -name = "class-gencat28" -regex = '\p{Paragraph_Separator}' -input = "\u2029" -matches = [[0, 3]] - -[[tests]] -name = "class-gencat29" -regex = '\p{Private_Use}' -input = "\U0010FFFD" -matches = [[0, 4]] - -[[tests]] -name = "class-gencat30" -regex = '\p{Punctuation}' -input = "𑁍" -matches = [[0, 4]] - -[[tests]] -name = "class-gencat31" -regex = '\p{Separator}' -input = "\u3000" -matches = [[0, 3]] - -[[tests]] -name = "class-gencat32" -regex = '\p{Space_Separator}' -input = "\u205F" -matches = [[0, 3]] - -[[tests]] -name = "class-gencat33" -regex = '\p{Spacing_Mark}' -input = "\U00016F7E" -matches = [[0, 4]] - -[[tests]] -name = "class-gencat34" -regex = '\p{Symbol}' -input = "⯈" -matches = [[0, 3]] - -[[tests]] -name = "class-gencat35" -regex = '\p{Titlecase_Letter}' -input = "ῼ" -matches = [[0, 3]] - -[[tests]] -name = "class-gencat36" -regex = '\p{Unassigned}' -input = "\U0010FFFF" -matches = [[0, 4]] - -[[tests]] -name = "class-gencat37" -regex = '\p{Uppercase_Letter}' -input = "Ꝋ" -matches = [[0, 3]] - - -# Tests for Unicode emoji properties. -[[tests]] -name = "class-emoji1" -regex = '\p{Emoji}' -input = "\u23E9" -matches = [[0, 3]] - -[[tests]] -name = "class-emoji2" -regex = '\p{emoji}' -input = "\U0001F21A" -matches = [[0, 4]] - -[[tests]] -name = "class-emoji3" -regex = '\p{extendedpictographic}' -input = "\U0001FA6E" -matches = [[0, 4]] - -[[tests]] -name = "class-emoji4" -regex = '\p{extendedpictographic}' -input = "\U0001FFFD" -matches = [[0, 4]] - - -# Tests for Unicode grapheme cluster properties. -[[tests]] -name = "class-gcb1" -regex = '\p{grapheme_cluster_break=prepend}' -input = "\U00011D46" -matches = [[0, 4]] - -[[tests]] -name = "class-gcb2" -regex = '\p{gcb=regional_indicator}' -input = "\U0001F1E6" -matches = [[0, 4]] - -[[tests]] -name = "class-gcb3" -regex = '\p{gcb=ri}' -input = "\U0001F1E7" -matches = [[0, 4]] - -[[tests]] -name = "class-gcb4" -regex = '\p{regionalindicator}' -input = "\U0001F1FF" -matches = [[0, 4]] - -[[tests]] -name = "class-gcb5" -regex = '\p{gcb=lvt}' -input = "\uC989" -matches = [[0, 3]] - -[[tests]] -name = "class-gcb6" -regex = '\p{gcb=zwj}' -input = "\u200D" -matches = [[0, 3]] - -# Tests for Unicode word boundary properties. -[[tests]] -name = "class-word-break1" -regex = '\p{word_break=Hebrew_Letter}' -input = "\uFB46" -matches = [[0, 3]] - -[[tests]] -name = "class-word-break2" -regex = '\p{wb=hebrewletter}' -input = "\uFB46" -matches = [[0, 3]] - -[[tests]] -name = "class-word-break3" -regex = '\p{wb=ExtendNumLet}' -input = "\uFF3F" -matches = [[0, 3]] - -[[tests]] -name = "class-word-break4" -regex = '\p{wb=WSegSpace}' -input = "\u3000" -matches = [[0, 3]] - -[[tests]] -name = "class-word-break5" -regex = '\p{wb=numeric}' -input = "\U0001E950" -matches = [[0, 4]] - -# Tests for Unicode sentence boundary properties. -[[tests]] -name = "class-sentence-break1" -regex = '\p{sentence_break=Lower}' -input = "\u0469" -matches = [[0, 2]] - -[[tests]] -name = "class-sentence-break2" -regex = '\p{sb=lower}' -input = "\u0469" -matches = [[0, 2]] - -[[tests]] -name = "class-sentence-break3" -regex = '\p{sb=Close}' -input = "\uFF60" -matches = [[0, 3]] - -[[tests]] -name = "class-sentence-break4" -regex = '\p{sb=Close}' -input = "\U0001F677" -matches = [[0, 4]] - -[[tests]] -name = "class-sentence-break5" -regex = '\p{sb=SContinue}' -input = "\uFF64" -matches = [[0, 3]] diff --git a/vendor/regex-automata/tests/data/word-boundary.toml b/vendor/regex-automata/tests/data/word-boundary.toml deleted file mode 100644 index e84b25c2a..000000000 --- a/vendor/regex-automata/tests/data/word-boundary.toml +++ /dev/null @@ -1,771 +0,0 @@ -# Some of these are cribbed from RE2's test suite. - -# These test \b. Below are tests for \B. -[[tests]] -name = "wb1" -regex = '\b' -input = "" -matches = [] -unicode = false - -[[tests]] -name = "wb2" -regex = '\b' -input = "a" -matches = [[0, 0], [1, 1]] -unicode = false - -[[tests]] -name = "wb3" -regex = '\b' -input = "ab" -matches = [[0, 0], [2, 2]] -unicode = false - -[[tests]] -name = "wb4" -regex = '^\b' -input = "ab" -matches = [[0, 0]] -unicode = false - -[[tests]] -name = "wb5" -regex = '\b$' -input = "ab" -matches = [[2, 2]] -unicode = false - -[[tests]] -name = "wb6" -regex = '^\b$' -input = "ab" -matches = [] -unicode = false - -[[tests]] -name = "wb7" -regex = '\bbar\b' -input = "nobar bar foo bar" -matches = [[6, 9], [14, 17]] -unicode = false - -[[tests]] -name = "wb8" -regex = 'a\b' -input = "faoa x" -matches = [[3, 4]] -unicode = false - -[[tests]] -name = "wb9" -regex = '\bbar' -input = "bar x" -matches = [[0, 3]] -unicode = false - -[[tests]] -name = "wb10" -regex = '\bbar' -input = "foo\nbar x" -matches = [[4, 7]] -unicode = false - -[[tests]] -name = "wb11" -regex = 'bar\b' -input = "foobar" -matches = [[3, 6]] -unicode = false - -[[tests]] -name = "wb12" -regex = 'bar\b' -input = "foobar\nxxx" -matches = [[3, 6]] -unicode = false - -[[tests]] -name = "wb13" -regex = '(foo|bar|[A-Z])\b' -input = "foo" -matches = [[0, 3]] -unicode = false - -[[tests]] -name = "wb14" -regex = '(foo|bar|[A-Z])\b' -input = "foo\n" -matches = [[0, 3]] -unicode = false - -[[tests]] -name = "wb15" -regex = '\b(foo|bar|[A-Z])' -input = "foo" -matches = [[0, 3]] -unicode = false - -[[tests]] -name = "wb16" -regex = '\b(foo|bar|[A-Z])\b' -input = "X" -matches = [[0, 1]] -unicode = false - -[[tests]] -name = "wb17" -regex = '\b(foo|bar|[A-Z])\b' -input = "XY" -matches = [] -unicode = false - -[[tests]] -name = "wb18" -regex = '\b(foo|bar|[A-Z])\b' -input = "bar" -matches = [[0, 3]] -unicode = false - -[[tests]] -name = "wb19" -regex = '\b(foo|bar|[A-Z])\b' -input = "foo" -matches = [[0, 3]] -unicode = false - -[[tests]] -name = "wb20" -regex = '\b(foo|bar|[A-Z])\b' -input = "foo\n" -matches = [[0, 3]] -unicode = false - -[[tests]] -name = "wb21" -regex = '\b(foo|bar|[A-Z])\b' -input = "ffoo bbar N x" -matches = [[10, 11]] -unicode = false - -[[tests]] -name = "wb22" -regex = '\b(fo|foo)\b' -input = "fo" -matches = [[0, 2]] -unicode = false - -[[tests]] -name = "wb23" -regex = '\b(fo|foo)\b' -input = "foo" -matches = [[0, 3]] -unicode = false - -[[tests]] -name = "wb24" -regex = '\b\b' -input = "" -matches = [] -unicode = false - -[[tests]] -name = "wb25" -regex = '\b\b' -input = "a" -matches = [[0, 0], [1, 1]] -unicode = false - -[[tests]] -name = "wb26" -regex = '\b$' -input = "" -matches = [] -unicode = false - -[[tests]] -name = "wb27" -regex = '\b$' -input = "x" -matches = [[1, 1]] -unicode = false - -[[tests]] -name = "wb28" -regex = '\b$' -input = "y x" -matches = [[3, 3]] -unicode = false - -[[tests]] -name = "wb29" -regex = '(?-u:\b).$' -input = "x" -matches = [[0, 1]] - -[[tests]] -name = "wb30" -regex = '^\b(fo|foo)\b' -input = "fo" -matches = [[0, 2]] -unicode = false - -[[tests]] -name = "wb31" -regex = '^\b(fo|foo)\b' -input = "foo" -matches = [[0, 3]] -unicode = false - -[[tests]] -name = "wb32" -regex = '^\b$' -input = "" -matches = [] -unicode = false - -[[tests]] -name = "wb33" -regex = '^\b$' -input = "x" -matches = [] -unicode = false - -[[tests]] -name = "wb34" -regex = '^(?-u:\b).$' -input = "x" -matches = [[0, 1]] - -[[tests]] -name = "wb35" -regex = '^(?-u:\b).(?-u:\b)$' -input = "x" -matches = [[0, 1]] - -[[tests]] -name = "wb36" -regex = '^^^^^\b$$$$$' -input = "" -matches = [] -unicode = false - -[[tests]] -name = "wb37" -regex = '^^^^^(?-u:\b).$$$$$' -input = "x" -matches = [[0, 1]] - -[[tests]] -name = "wb38" -regex = '^^^^^\b$$$$$' -input = "x" -matches = [] -unicode = false - -[[tests]] -name = "wb39" -regex = '^^^^^(?-u:\b\b\b).(?-u:\b\b\b)$$$$$' -input = "x" -matches = [[0, 1]] - -[[tests]] -name = "wb40" -regex = '(?-u:\b).+(?-u:\b)' -input = "$$abc$$" -matches = [[2, 5]] - -[[tests]] -name = "wb41" -regex = '\b' -input = "a b c" -matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] -unicode = false - -[[tests]] -name = "wb42" -regex = '\bfoo\b' -input = "zzz foo zzz" -matches = [[4, 7]] -unicode = false - -[[tests]] -name = "wb43" -regex = '\b^' -input = "ab" -matches = [[0, 0]] -unicode = false - -[[tests]] -name = "wb44" -regex = '$\b' -input = "ab" -matches = [[2, 2]] -unicode = false - - -# Tests for \B. Note that \B is not allowed if UTF-8 mode is enabled, so we -# have to disable it for most of these tests. This is because \B can match at -# non-UTF-8 boundaries. -[[tests]] -name = "nb1" -regex = '\Bfoo\B' -input = "n foo xfoox that" -matches = [[7, 10]] -unicode = false -utf8 = false - -[[tests]] -name = "nb2" -regex = 'a\B' -input = "faoa x" -matches = [[1, 2]] -unicode = false -utf8 = false - -[[tests]] -name = "nb3" -regex = '\Bbar' -input = "bar x" -matches = [] -unicode = false -utf8 = false - -[[tests]] -name = "nb4" -regex = '\Bbar' -input = "foo\nbar x" -matches = [] -unicode = false -utf8 = false - -[[tests]] -name = "nb5" -regex = 'bar\B' -input = "foobar" -matches = [] -unicode = false -utf8 = false - -[[tests]] -name = "nb6" -regex = 'bar\B' -input = "foobar\nxxx" -matches = [] -unicode = false -utf8 = false - -[[tests]] -name = "nb7" -regex = '(foo|bar|[A-Z])\B' -input = "foox" -matches = [[0, 3]] -unicode = false -utf8 = false - -[[tests]] -name = "nb8" -regex = '(foo|bar|[A-Z])\B' -input = "foo\n" -matches = [] -unicode = false -utf8 = false - -[[tests]] -name = "nb9" -regex = '\B' -input = "" -matches = [[0, 0]] -unicode = false -utf8 = false - -[[tests]] -name = "nb10" -regex = '\B' -input = "x" -matches = [] -unicode = false -utf8 = false - -[[tests]] -name = "nb11" -regex = '\B(foo|bar|[A-Z])' -input = "foo" -matches = [] -unicode = false -utf8 = false - -[[tests]] -name = "nb12" -regex = '\B(foo|bar|[A-Z])\B' -input = "xXy" -matches = [[1, 2]] -unicode = false -utf8 = false - -[[tests]] -name = "nb13" -regex = '\B(foo|bar|[A-Z])\B' -input = "XY" -matches = [] -unicode = false -utf8 = false - -[[tests]] -name = "nb14" -regex = '\B(foo|bar|[A-Z])\B' -input = "XYZ" -matches = [[1, 2]] -unicode = false -utf8 = false - -[[tests]] -name = "nb15" -regex = '\B(foo|bar|[A-Z])\B' -input = "abara" -matches = [[1, 4]] -unicode = false -utf8 = false - -[[tests]] -name = "nb16" -regex = '\B(foo|bar|[A-Z])\B' -input = "xfoo_" -matches = [[1, 4]] -unicode = false -utf8 = false - -[[tests]] -name = "nb17" -regex = '\B(foo|bar|[A-Z])\B' -input = "xfoo\n" -matches = [] -unicode = false -utf8 = false - -[[tests]] -name = "nb18" -regex = '\B(foo|bar|[A-Z])\B' -input = "foo bar vNX" -matches = [[9, 10]] -unicode = false -utf8 = false - -[[tests]] -name = "nb19" -regex = '\B(fo|foo)\B' -input = "xfoo" -matches = [[1, 3]] -unicode = false -utf8 = false - -[[tests]] -name = "nb20" -regex = '\B(foo|fo)\B' -input = "xfooo" -matches = [[1, 4]] -unicode = false -utf8 = false - -[[tests]] -name = "nb21" -regex = '\B\B' -input = "" -matches = [[0, 0]] -unicode = false -utf8 = false - -[[tests]] -name = "nb22" -regex = '\B\B' -input = "x" -matches = [] -unicode = false -utf8 = false - -[[tests]] -name = "nb23" -regex = '\B$' -input = "" -matches = [[0, 0]] -unicode = false -utf8 = false - -[[tests]] -name = "nb24" -regex = '\B$' -input = "x" -matches = [] -unicode = false -utf8 = false - -[[tests]] -name = "nb25" -regex = '\B$' -input = "y x" -matches = [] -unicode = false -utf8 = false - -[[tests]] -name = "nb26" -regex = '\B.$' -input = "x" -matches = [] -unicode = false -utf8 = false - -[[tests]] -name = "nb27" -regex = '^\B(fo|foo)\B' -input = "fo" -matches = [] -unicode = false -utf8 = false - -[[tests]] -name = "nb28" -regex = '^\B(fo|foo)\B' -input = "fo" -matches = [] -unicode = false -utf8 = false - -[[tests]] -name = "nb29" -regex = '^\B' -input = "" -matches = [[0, 0]] -unicode = false -utf8 = false - -[[tests]] -name = "nb30" -regex = '^\B' -input = "x" -matches = [] -unicode = false -utf8 = false - -[[tests]] -name = "nb31" -regex = '^\B\B' -input = "" -matches = [[0, 0]] -unicode = false -utf8 = false - -[[tests]] -name = "nb32" -regex = '^\B\B' -input = "x" -matches = [] -unicode = false -utf8 = false - -[[tests]] -name = "nb33" -regex = '^\B$' -input = "" -matches = [[0, 0]] -unicode = false -utf8 = false - -[[tests]] -name = "nb34" -regex = '^\B$' -input = "x" -matches = [] -unicode = false -utf8 = false - -[[tests]] -name = "nb35" -regex = '^\B.$' -input = "x" -matches = [] -unicode = false -utf8 = false - -[[tests]] -name = "nb36" -regex = '^\B.\B$' -input = "x" -matches = [] -unicode = false -utf8 = false - -[[tests]] -name = "nb37" -regex = '^^^^^\B$$$$$' -input = "" -matches = [[0, 0]] -unicode = false -utf8 = false - -[[tests]] -name = "nb38" -regex = '^^^^^\B.$$$$$' -input = "x" -matches = [] -unicode = false -utf8 = false - -[[tests]] -name = "nb39" -regex = '^^^^^\B$$$$$' -input = "x" -matches = [] -unicode = false -utf8 = false - - -# unicode1* and unicode2* work for both Unicode and ASCII because all matches -# are reported as byte offsets, and « and » do not correspond to word -# boundaries at either the character or byte level. -[[tests]] -name = "unicode1" -regex = '\bx\b' -input = "«x" -matches = [[2, 3]] - -[[tests]] -name = "unicode1-only-ascii" -regex = '\bx\b' -input = "«x" -matches = [[2, 3]] -unicode = false - -[[tests]] -name = "unicode2" -regex = '\bx\b' -input = "x»" -matches = [[0, 1]] - -[[tests]] -name = "unicode2-only-ascii" -regex = '\bx\b' -input = "x»" -matches = [[0, 1]] -unicode = false - -# ASCII word boundaries are completely oblivious to Unicode characters, so -# even though β is a character, an ASCII \b treats it as a word boundary -# when it is adjacent to another ASCII character. (The ASCII \b only looks -# at the leading byte of β.) For Unicode \b, the tests are precisely inverted. -[[tests]] -name = "unicode3" -regex = '\bx\b' -input = 'áxβ' -matches = [] - -[[tests]] -name = "unicode3-only-ascii" -regex = '\bx\b' -input = 'áxβ' -matches = [[2, 3]] -unicode = false - -[[tests]] -name = "unicode4" -regex = '\Bx\B' -input = 'áxβ' -matches = [[2, 3]] - -[[tests]] -name = "unicode4-only-ascii" -regex = '\Bx\B' -input = 'áxβ' -matches = [] -unicode = false -utf8 = false - -# The same as above, but with \b instead of \B as a sanity check. -[[tests]] -name = "unicode5" -regex = '\b' -input = "0\U0007EF5E" -matches = [[0, 0], [1, 1]] - -[[tests]] -name = "unicode5-only-ascii" -regex = '\b' -input = "0\U0007EF5E" -matches = [[0, 0], [1, 1]] -unicode = false -utf8 = false - -[[tests]] -name = "unicode5-noutf8" -regex = '\b' -input = '0\xFF\xFF\xFF\xFF' -matches = [[0, 0], [1, 1]] -unescape = true -utf8 = false - -[[tests]] -name = "unicode5-noutf8-only-ascii" -regex = '\b' -input = '0\xFF\xFF\xFF\xFF' -matches = [[0, 0], [1, 1]] -unescape = true -unicode = false -utf8 = false - -# Weird special case to ensure that ASCII \B treats each individual code unit -# as a non-word byte. (The specific codepoint is irrelevant. It's an arbitrary -# codepoint that uses 4 bytes in its UTF-8 encoding and is not a member of the -# \w character class.) -[[tests]] -name = "unicode5-not" -regex = '\B' -input = "0\U0007EF5E" -matches = [[5, 5]] - -[[tests]] -name = "unicode5-not-only-ascii" -regex = '\B' -input = "0\U0007EF5E" -matches = [[2, 2], [3, 3], [4, 4], [5, 5]] -unicode = false -utf8 = false - -# This gets no matches since \B only matches in the presence of valid UTF-8 -# when Unicode is enabled, even when UTF-8 mode is disabled. -[[tests]] -name = "unicode5-not-noutf8" -regex = '\B' -input = '0\xFF\xFF\xFF\xFF' -matches = [] -unescape = true -utf8 = false - -# But this DOES get matches since \B in ASCII mode only looks at individual -# bytes. -[[tests]] -name = "unicode5-not-noutf8-only-ascii" -regex = '\B' -input = '0\xFF\xFF\xFF\xFF' -matches = [[2, 2], [3, 3], [4, 4], [5, 5]] -unescape = true -unicode = false -utf8 = false - -# Some tests of no particular significance. -[[tests]] -name = "unicode6" -regex = '\b[0-9]+\b' -input = "foo 123 bar 456 quux 789" -matches = [[4, 7], [12, 15], [21, 24]] - -[[tests]] -name = "unicode7" -regex = '\b[0-9]+\b' -input = "foo 123 bar a456 quux 789" -matches = [[4, 7], [22, 25]] - -[[tests]] -name = "unicode8" -regex = '\b[0-9]+\b' -input = "foo 123 bar 456a quux 789" -matches = [[4, 7], [22, 25]] diff --git a/vendor/regex-automata/tests/dfa/api.rs b/vendor/regex-automata/tests/dfa/api.rs index 80d7d704c..96e73af6c 100644 --- a/vendor/regex-automata/tests/dfa/api.rs +++ b/vendor/regex-automata/tests/dfa/api.rs @@ -1,13 +1,11 @@ use std::error::Error; use regex_automata::{ - dfa::{dense, regex::Regex, Automaton, OverlappingState}, + dfa::{dense, Automaton, OverlappingState}, nfa::thompson, - HalfMatch, MatchError, MatchKind, MultiMatch, + HalfMatch, Input, MatchError, }; -use crate::util::{BunkPrefilter, SubstringPrefilter}; - // Tests that quit bytes in the forward direction work correctly. #[test] fn quit_fwd() -> Result<(), Box<dyn Error>> { @@ -16,16 +14,15 @@ fn quit_fwd() -> Result<(), Box<dyn Error>> { .build("[[:word:]]+$")?; assert_eq!( - dfa.find_earliest_fwd(b"abcxyz"), - Err(MatchError::Quit { byte: b'x', offset: 3 }) - ); - assert_eq!( - dfa.find_leftmost_fwd(b"abcxyz"), - Err(MatchError::Quit { byte: b'x', offset: 3 }) + Err(MatchError::quit(b'x', 3)), + dfa.try_search_fwd(&Input::new(b"abcxyz")) ); assert_eq!( - dfa.find_overlapping_fwd(b"abcxyz", &mut OverlappingState::start()), - Err(MatchError::Quit { byte: b'x', offset: 3 }) + dfa.try_search_overlapping_fwd( + &Input::new(b"abcxyz"), + &mut OverlappingState::start() + ), + Err(MatchError::quit(b'x', 3)), ); Ok(()) @@ -40,12 +37,8 @@ fn quit_rev() -> Result<(), Box<dyn Error>> { .build("^[[:word:]]+")?; assert_eq!( - dfa.find_earliest_rev(b"abcxyz"), - Err(MatchError::Quit { byte: b'x', offset: 3 }) - ); - assert_eq!( - dfa.find_leftmost_rev(b"abcxyz"), - Err(MatchError::Quit { byte: b'x', offset: 3 }) + Err(MatchError::quit(b'x', 3)), + dfa.try_search_rev(&Input::new(b"abcxyz")) ); Ok(()) @@ -60,28 +53,6 @@ fn quit_panics() { dense::Config::new().unicode_word_boundary(true).quit(b'\xFF', false); } -// Tests that if we attempt an overlapping search using a regex without a -// reverse DFA compiled with 'starts_for_each_pattern', then we get a panic. -#[test] -#[should_panic] -fn incorrect_config_overlapping_search_panics() { - let forward = dense::DFA::new(r"abca").unwrap(); - let reverse = dense::Builder::new() - .configure( - dense::Config::new() - .anchored(true) - .match_kind(MatchKind::All) - .starts_for_each_pattern(false), - ) - .thompson(thompson::Config::new().reverse(true)) - .build(r"abca") - .unwrap(); - - let re = Regex::builder().build_from_dfas(forward, reverse); - let haystack = "bar abcabcabca abca foo".as_bytes(); - re.find_overlapping(haystack, &mut OverlappingState::start()); -} - // This tests an intesting case where even if the Unicode word boundary option // is disabled, setting all non-ASCII bytes to be quit bytes will cause Unicode // word boundaries to be enabled. @@ -93,41 +64,6 @@ fn unicode_word_implicitly_works() -> Result<(), Box<dyn Error>> { } let dfa = dense::Builder::new().configure(config).build(r"\b")?; let expected = HalfMatch::must(0, 1); - assert_eq!(dfa.find_leftmost_fwd(b" a"), Ok(Some(expected))); - Ok(()) -} - -// Tests that we can provide a prefilter to a Regex, and the search reports -// correct results. -#[test] -fn prefilter_works() -> Result<(), Box<dyn Error>> { - let re = Regex::new(r"a[0-9]+") - .unwrap() - .with_prefilter(SubstringPrefilter::new("a")); - let text = b"foo abc foo a1a2a3 foo a123 bar aa456"; - let matches: Vec<(usize, usize)> = - re.find_leftmost_iter(text).map(|m| (m.start(), m.end())).collect(); - assert_eq!( - matches, - vec![(12, 14), (14, 16), (16, 18), (23, 27), (33, 37),] - ); - Ok(()) -} - -// This test confirms that a prefilter is active by using a prefilter that -// reports false negatives. -#[test] -fn prefilter_is_active() -> Result<(), Box<dyn Error>> { - let text = b"za123"; - let re = Regex::new(r"a[0-9]+") - .unwrap() - .with_prefilter(SubstringPrefilter::new("a")); - assert_eq!(re.find_leftmost(b"za123"), Some(MultiMatch::must(0, 1, 5))); - assert_eq!(re.find_leftmost(b"a123"), Some(MultiMatch::must(0, 0, 4))); - let re = re.with_prefilter(BunkPrefilter::new()); - assert_eq!(re.find_leftmost(b"za123"), None); - // This checks that the prefilter is used when first starting the search, - // instead of waiting until at least one transition has occurred. - assert_eq!(re.find_leftmost(b"a123"), None); + assert_eq!(Ok(Some(expected)), dfa.try_search_fwd(&Input::new(b" a"))); Ok(()) } diff --git a/vendor/regex-automata/tests/dfa/mod.rs b/vendor/regex-automata/tests/dfa/mod.rs index f4299510c..0d8f539db 100644 --- a/vendor/regex-automata/tests/dfa/mod.rs +++ b/vendor/regex-automata/tests/dfa/mod.rs @@ -1,2 +1,8 @@ +#[cfg(all(feature = "dfa-build", feature = "dfa-search"))] mod api; +#[cfg(feature = "dfa-onepass")] +mod onepass; +#[cfg(all(feature = "dfa-build", feature = "dfa-search"))] +mod regression; +#[cfg(all(not(miri), feature = "dfa-build", feature = "dfa-search"))] mod suite; diff --git a/vendor/regex-automata/tests/dfa/onepass/mod.rs b/vendor/regex-automata/tests/dfa/onepass/mod.rs new file mode 100644 index 000000000..9d6ab475e --- /dev/null +++ b/vendor/regex-automata/tests/dfa/onepass/mod.rs @@ -0,0 +1,2 @@ +#[cfg(not(miri))] +mod suite; diff --git a/vendor/regex-automata/tests/dfa/onepass/suite.rs b/vendor/regex-automata/tests/dfa/onepass/suite.rs new file mode 100644 index 000000000..20bd6965c --- /dev/null +++ b/vendor/regex-automata/tests/dfa/onepass/suite.rs @@ -0,0 +1,197 @@ +use { + anyhow::Result, + regex_automata::{ + dfa::onepass::{self, DFA}, + nfa::thompson, + util::{iter, syntax}, + }, + regex_test::{ + CompiledRegex, Match, RegexTest, SearchKind, Span, TestResult, + TestRunner, + }, +}; + +use crate::{create_input, suite, testify_captures, untestify_kind}; + +const EXPANSIONS: &[&str] = &["is_match", "find", "captures"]; + +/// Tests the default configuration of the hybrid NFA/DFA. +#[test] +fn default() -> Result<()> { + let builder = DFA::builder(); + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + .test_iter(suite()?.iter(), compiler(builder)) + .assert(); + Ok(()) +} + +/// Tests the hybrid NFA/DFA when 'starts_for_each_pattern' is enabled for all +/// tests. +#[test] +fn starts_for_each_pattern() -> Result<()> { + let mut builder = DFA::builder(); + builder.configure(DFA::config().starts_for_each_pattern(true)); + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + .test_iter(suite()?.iter(), compiler(builder)) + .assert(); + Ok(()) +} + +/// Tests the hybrid NFA/DFA when byte classes are disabled. +/// +/// N.B. Disabling byte classes doesn't avoid any indirection at search time. +/// All it does is cause every byte value to be its own distinct equivalence +/// class. +#[test] +fn no_byte_classes() -> Result<()> { + let mut builder = DFA::builder(); + builder.configure(DFA::config().byte_classes(false)); + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + .test_iter(suite()?.iter(), compiler(builder)) + .assert(); + Ok(()) +} + +fn compiler( + mut builder: onepass::Builder, +) -> impl FnMut(&RegexTest, &[String]) -> Result<CompiledRegex> { + move |test, regexes| { + // Check if our regex contains things that aren't supported by DFAs. + // That is, Unicode word boundaries when searching non-ASCII text. + if !configure_onepass_builder(test, &mut builder) { + return Ok(CompiledRegex::skip()); + } + let re = match builder.build_many(®exes) { + Ok(re) => re, + Err(err) => { + let msg = err.to_string(); + // This is pretty gross, but when a regex fails to compile as + // a one-pass regex, then we want to be OK with that and just + // skip the test. But we have to be careful to only skip it + // when the expected result is that the regex compiles. If + // the test is specifically checking that the regex does not + // compile, then we should bubble up that error and allow the + // test to pass. + // + // Since our error types are all generally opaque, we just + // look for an error string. Not great, but not the end of the + // world. + if test.compiles() && msg.contains("not one-pass") { + return Ok(CompiledRegex::skip()); + } + return Err(err.into()); + } + }; + let mut cache = re.create_cache(); + Ok(CompiledRegex::compiled(move |test| -> TestResult { + run_test(&re, &mut cache, test) + })) + } +} + +fn run_test( + re: &DFA, + cache: &mut onepass::Cache, + test: &RegexTest, +) -> TestResult { + let input = create_input(test); + match test.additional_name() { + "is_match" => { + TestResult::matched(re.is_match(cache, input.earliest(true))) + } + "find" => match test.search_kind() { + SearchKind::Earliest | SearchKind::Leftmost => { + let input = + input.earliest(test.search_kind() == SearchKind::Earliest); + let mut caps = re.create_captures(); + let it = iter::Searcher::new(input) + .into_matches_iter(|input| { + re.try_search(cache, input, &mut caps)?; + Ok(caps.get_match()) + }) + .infallible() + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|m| Match { + id: m.pattern().as_usize(), + span: Span { start: m.start(), end: m.end() }, + }); + TestResult::matches(it) + } + SearchKind::Overlapping => { + // The one-pass DFA does not support any kind of overlapping + // search. This is not just a matter of not having the API. + // It's fundamentally incompatible with the one-pass concept. + // If overlapping matches were possible, then the one-pass DFA + // would fail to build. + TestResult::skip() + } + }, + "captures" => match test.search_kind() { + SearchKind::Earliest | SearchKind::Leftmost => { + let input = + input.earliest(test.search_kind() == SearchKind::Earliest); + let it = iter::Searcher::new(input) + .into_captures_iter(re.create_captures(), |input, caps| { + re.try_search(cache, input, caps) + }) + .infallible() + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|caps| testify_captures(&caps)); + TestResult::captures(it) + } + SearchKind::Overlapping => { + // The one-pass DFA does not support any kind of overlapping + // search. This is not just a matter of not having the API. + // It's fundamentally incompatible with the one-pass concept. + // If overlapping matches were possible, then the one-pass DFA + // would fail to build. + TestResult::skip() + } + }, + name => TestResult::fail(&format!("unrecognized test name: {}", name)), + } +} + +/// Configures the given regex builder with all relevant settings on the given +/// regex test. +/// +/// If the regex test has a setting that is unsupported, then this returns +/// false (implying the test should be skipped). +fn configure_onepass_builder( + test: &RegexTest, + builder: &mut onepass::Builder, +) -> bool { + if !test.anchored() { + return false; + } + let match_kind = match untestify_kind(test.match_kind()) { + None => return false, + Some(k) => k, + }; + + let config = DFA::config().match_kind(match_kind); + builder + .configure(config) + .syntax(config_syntax(test)) + .thompson(config_thompson(test)); + true +} + +/// Configuration of a Thompson NFA compiler from a regex test. +fn config_thompson(test: &RegexTest) -> thompson::Config { + let mut lookm = regex_automata::util::look::LookMatcher::new(); + lookm.set_line_terminator(test.line_terminator()); + thompson::Config::new().utf8(test.utf8()).look_matcher(lookm) +} + +/// Configuration of the regex parser from a regex test. +fn config_syntax(test: &RegexTest) -> syntax::Config { + syntax::Config::new() + .case_insensitive(test.case_insensitive()) + .unicode(test.unicode()) + .utf8(test.utf8()) + .line_terminator(test.line_terminator()) +} diff --git a/vendor/regex-automata/tests/regression.rs b/vendor/regex-automata/tests/dfa/regression.rs index e5355fed7..09caffabc 100644 --- a/vendor/regex-automata/tests/regression.rs +++ b/vendor/regex-automata/tests/dfa/regression.rs @@ -1,13 +1,14 @@ -use regex_automata::{ - dfa::{dense, Automaton}, - MatchError, -}; - // A regression test for checking that minimization correctly translates // whether a state is a match state or not. Previously, it was possible for // minimization to mark a non-matching state as matching. #[test] +#[cfg(not(miri))] fn minimize_sets_correct_match_states() { + use regex_automata::{ + dfa::{dense::DFA, Automaton, StartKind}, + Anchored, Input, + }; + let pattern = // This is a subset of the grapheme matching regex. I couldn't seem // to get a repro any smaller than this unfortunately. @@ -36,9 +37,12 @@ fn minimize_sets_correct_match_states() { ) "; - let dfa = dense::Builder::new() - .configure(dense::Config::new().anchored(true).minimize(true)) + let dfa = DFA::builder() + .configure( + DFA::config().start_kind(StartKind::Anchored).minimize(true), + ) .build(pattern) .unwrap(); - assert_eq!(Ok(None), dfa.find_leftmost_fwd(b"\xE2")); + let input = Input::new(b"\xE2").anchored(Anchored::Yes); + assert_eq!(Ok(None), dfa.try_search_fwd(&input)); } diff --git a/vendor/regex-automata/tests/dfa/suite.rs b/vendor/regex-automata/tests/dfa/suite.rs index 426ae346d..f3445e02a 100644 --- a/vendor/regex-automata/tests/dfa/suite.rs +++ b/vendor/regex-automata/tests/dfa/suite.rs @@ -1,23 +1,77 @@ -use regex_automata::{ - dfa::{self, dense, regex::Regex, sparse, Automaton}, - nfa::thompson, - MatchKind, SyntaxConfig, +use { + anyhow::Result, + regex_automata::{ + dfa::{ + self, dense, regex::Regex, sparse, Automaton, OverlappingState, + StartKind, + }, + nfa::thompson, + util::{prefilter::Prefilter, syntax}, + Anchored, Input, PatternSet, + }, + regex_syntax::hir, + regex_test::{ + CompiledRegex, Match, RegexTest, SearchKind, Span, TestResult, + TestRunner, + }, }; -use regex_syntax as syntax; -use regex_test::{ - bstr::{BString, ByteSlice}, - CompiledRegex, Match, MatchKind as TestMatchKind, RegexTest, RegexTests, - SearchKind as TestSearchKind, TestResult, TestRunner, -}; +use crate::{create_input, suite, untestify_kind}; -use crate::{suite, Result}; +const EXPANSIONS: &[&str] = &["is_match", "find", "which"]; /// Runs the test suite with the default configuration. #[test] fn unminimized_default() -> Result<()> { let builder = Regex::builder(); TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + .blacklist("expensive") + .test_iter(suite()?.iter(), dense_compiler(builder)) + .assert(); + Ok(()) +} + +/// Runs the test suite with the default configuration and a prefilter enabled, +/// if one can be built. +#[test] +fn unminimized_prefilter() -> Result<()> { + let my_compiler = |test: &RegexTest, regexes: &[String]| { + // Parse regexes as HIRs so we can get literals to build a prefilter. + let mut hirs = vec![]; + for pattern in regexes.iter() { + hirs.push(syntax::parse_with(pattern, &config_syntax(test))?); + } + let kind = match untestify_kind(test.match_kind()) { + None => return Ok(CompiledRegex::skip()), + Some(kind) => kind, + }; + let pre = Prefilter::from_hirs_prefix(kind, &hirs); + let mut builder = Regex::builder(); + builder.dense(dense::DFA::config().prefilter(pre)); + compiler(builder, |_, _, re| { + Ok(CompiledRegex::compiled(move |test| -> TestResult { + run_test(&re, test) + })) + })(test, regexes) + }; + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + .blacklist("expensive") + .test_iter(suite()?.iter(), my_compiler) + .assert(); + Ok(()) +} + +/// Runs the test suite with start states specialized. +#[test] +fn unminimized_specialized_start_states() -> Result<()> { + let mut builder = Regex::builder(); + builder.dense(dense::Config::new().specialize_start_states(true)); + + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + .blacklist("expensive") .test_iter(suite()?.iter(), dense_compiler(builder)) .assert(); Ok(()) @@ -30,18 +84,22 @@ fn unminimized_no_byte_class() -> Result<()> { builder.dense(dense::Config::new().byte_classes(false)); TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + .blacklist("expensive") .test_iter(suite()?.iter(), dense_compiler(builder)) .assert(); Ok(()) } -/// Runs the test suite with NFA shrinking disabled. +/// Runs the test suite with NFA shrinking enabled. #[test] -fn unminimized_no_nfa_shrink() -> Result<()> { +fn unminimized_nfa_shrink() -> Result<()> { let mut builder = Regex::builder(); - builder.thompson(thompson::Config::new().shrink(false)); + builder.thompson(thompson::Config::new().shrink(true)); TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + .blacklist("expensive") .test_iter(suite()?.iter(), dense_compiler(builder)) .assert(); Ok(()) @@ -54,7 +112,7 @@ fn minimized_default() -> Result<()> { let mut builder = Regex::builder(); builder.dense(dense::Config::new().minimize(true)); TestRunner::new()? - // These regexes tend to be too big. Minimization takes... forever. + .expand(EXPANSIONS, |t| t.compiles()) .blacklist("expensive") .test_iter(suite()?.iter(), dense_compiler(builder)) .assert(); @@ -68,7 +126,7 @@ fn minimized_no_byte_class() -> Result<()> { builder.dense(dense::Config::new().minimize(true).byte_classes(false)); TestRunner::new()? - // These regexes tend to be too big. Minimization takes... forever. + .expand(EXPANSIONS, |t| t.compiles()) .blacklist("expensive") .test_iter(suite()?.iter(), dense_compiler(builder)) .assert(); @@ -80,22 +138,57 @@ fn minimized_no_byte_class() -> Result<()> { fn sparse_unminimized_default() -> Result<()> { let builder = Regex::builder(); TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + .blacklist("expensive") .test_iter(suite()?.iter(), sparse_compiler(builder)) .assert(); Ok(()) } +/// Runs the test suite on a sparse unminimized DFA with prefilters enabled. +#[test] +fn sparse_unminimized_prefilter() -> Result<()> { + let my_compiler = |test: &RegexTest, regexes: &[String]| { + // Parse regexes as HIRs so we can get literals to build a prefilter. + let mut hirs = vec![]; + for pattern in regexes.iter() { + hirs.push(syntax::parse_with(pattern, &config_syntax(test))?); + } + let kind = match untestify_kind(test.match_kind()) { + None => return Ok(CompiledRegex::skip()), + Some(kind) => kind, + }; + let pre = Prefilter::from_hirs_prefix(kind, &hirs); + let mut builder = Regex::builder(); + builder.dense(dense::DFA::config().prefilter(pre)); + compiler(builder, |builder, _, re| { + let fwd = re.forward().to_sparse()?; + let rev = re.reverse().to_sparse()?; + let re = builder.build_from_dfas(fwd, rev); + Ok(CompiledRegex::compiled(move |test| -> TestResult { + run_test(&re, test) + })) + })(test, regexes) + }; + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + .blacklist("expensive") + .test_iter(suite()?.iter(), my_compiler) + .assert(); + Ok(()) +} + /// Another basic sanity test that checks we can serialize and then deserialize /// a regex, and that the resulting regex can be used for searching correctly. #[test] fn serialization_unminimized_default() -> Result<()> { let builder = Regex::builder(); let my_compiler = |builder| { - compiler(builder, |builder, re| { + compiler(builder, |builder, _, re| { let builder = builder.clone(); let (fwd_bytes, _) = re.forward().to_bytes_native_endian(); let (rev_bytes, _) = re.reverse().to_bytes_native_endian(); - Ok(CompiledRegex::compiled(move |test| -> Vec<TestResult> { + Ok(CompiledRegex::compiled(move |test| -> TestResult { let fwd: dense::DFA<&[u32]> = dense::DFA::from_bytes(&fwd_bytes).unwrap().0; let rev: dense::DFA<&[u32]> = @@ -107,6 +200,8 @@ fn serialization_unminimized_default() -> Result<()> { }) }; TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + .blacklist("expensive") .test_iter(suite()?.iter(), my_compiler(builder)) .assert(); Ok(()) @@ -119,11 +214,11 @@ fn serialization_unminimized_default() -> Result<()> { fn sparse_serialization_unminimized_default() -> Result<()> { let builder = Regex::builder(); let my_compiler = |builder| { - compiler(builder, |builder, re| { + compiler(builder, |builder, _, re| { let builder = builder.clone(); let fwd_bytes = re.forward().to_sparse()?.to_bytes_native_endian(); let rev_bytes = re.reverse().to_sparse()?.to_bytes_native_endian(); - Ok(CompiledRegex::compiled(move |test| -> Vec<TestResult> { + Ok(CompiledRegex::compiled(move |test| -> TestResult { let fwd: sparse::DFA<&[u8]> = sparse::DFA::from_bytes(&fwd_bytes).unwrap().0; let rev: sparse::DFA<&[u8]> = @@ -134,6 +229,8 @@ fn sparse_serialization_unminimized_default() -> Result<()> { }) }; TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + .blacklist("expensive") .test_iter(suite()?.iter(), my_compiler(builder)) .assert(); Ok(()) @@ -141,9 +238,9 @@ fn sparse_serialization_unminimized_default() -> Result<()> { fn dense_compiler( builder: dfa::regex::Builder, -) -> impl FnMut(&RegexTest, &[BString]) -> Result<CompiledRegex> { - compiler(builder, |_, re| { - Ok(CompiledRegex::compiled(move |test| -> Vec<TestResult> { +) -> impl FnMut(&RegexTest, &[String]) -> Result<CompiledRegex> { + compiler(builder, |_, _, re| { + Ok(CompiledRegex::compiled(move |test| -> TestResult { run_test(&re, test) })) }) @@ -151,12 +248,12 @@ fn dense_compiler( fn sparse_compiler( builder: dfa::regex::Builder, -) -> impl FnMut(&RegexTest, &[BString]) -> Result<CompiledRegex> { - compiler(builder, |builder, re| { +) -> impl FnMut(&RegexTest, &[String]) -> Result<CompiledRegex> { + compiler(builder, |builder, _, re| { let fwd = re.forward().to_sparse()?; let rev = re.reverse().to_sparse()?; let re = builder.build_from_dfas(fwd, rev); - Ok(CompiledRegex::compiled(move |test| -> Vec<TestResult> { + Ok(CompiledRegex::compiled(move |test| -> TestResult { run_test(&re, test) })) }) @@ -166,79 +263,79 @@ fn compiler( mut builder: dfa::regex::Builder, mut create_matcher: impl FnMut( &dfa::regex::Builder, + Option<Prefilter>, Regex, ) -> Result<CompiledRegex>, -) -> impl FnMut(&RegexTest, &[BString]) -> Result<CompiledRegex> { +) -> impl FnMut(&RegexTest, &[String]) -> Result<CompiledRegex> { move |test, regexes| { - let regexes = regexes - .iter() - .map(|r| r.to_str().map(|s| s.to_string())) - .collect::<std::result::Result<Vec<String>, _>>()?; + // Parse regexes as HIRs for some analysis below. + let mut hirs = vec![]; + for pattern in regexes.iter() { + hirs.push(syntax::parse_with(pattern, &config_syntax(test))?); + } + + // Get a prefilter in case the test wants it. + let kind = match untestify_kind(test.match_kind()) { + None => return Ok(CompiledRegex::skip()), + Some(kind) => kind, + }; + let pre = Prefilter::from_hirs_prefix(kind, &hirs); // Check if our regex contains things that aren't supported by DFAs. // That is, Unicode word boundaries when searching non-ASCII text. - let mut thompson = thompson::Builder::new(); - thompson.configure(config_thompson(test)); - // TODO: Modify Hir to report facts like this, instead of needing to - // build an NFA to do it. - if let Ok(nfa) = thompson.build_many(®exes) { - let non_ascii = test.input().iter().any(|&b| !b.is_ascii()); - if nfa.has_word_boundary_unicode() && non_ascii { - return Ok(CompiledRegex::skip()); + if !test.haystack().is_ascii() { + for hir in hirs.iter() { + let looks = hir.properties().look_set(); + if looks.contains(hir::Look::WordUnicode) + || looks.contains(hir::Look::WordUnicodeNegate) + { + return Ok(CompiledRegex::skip()); + } } } if !configure_regex_builder(test, &mut builder) { return Ok(CompiledRegex::skip()); } - create_matcher(&builder, builder.build_many(®exes)?) + create_matcher(&builder, pre, builder.build_many(®exes)?) } } -fn run_test<A: Automaton>(re: &Regex<A>, test: &RegexTest) -> Vec<TestResult> { - let is_match = if re.is_match(test.input()) { - TestResult::matched() - } else { - TestResult::no_match() - }; - let is_match = is_match.name("is_match"); - - let find_matches = match test.search_kind() { - TestSearchKind::Earliest => { - let it = re - .find_earliest_iter(test.input()) - .take(test.match_limit().unwrap_or(std::usize::MAX)) - .map(|m| Match { - id: m.pattern().as_usize(), - start: m.start(), - end: m.end(), - }); - TestResult::matches(it).name("find_earliest_iter") - } - TestSearchKind::Leftmost => { - let it = re - .find_leftmost_iter(test.input()) - .take(test.match_limit().unwrap_or(std::usize::MAX)) - .map(|m| Match { - id: m.pattern().as_usize(), - start: m.start(), - end: m.end(), - }); - TestResult::matches(it).name("find_leftmost_iter") - } - TestSearchKind::Overlapping => { - let it = re - .find_overlapping_iter(test.input()) - .take(test.match_limit().unwrap_or(std::usize::MAX)) - .map(|m| Match { - id: m.pattern().as_usize(), - start: m.start(), - end: m.end(), - }); - TestResult::matches(it).name("find_overlapping_iter") - } - }; - - vec![is_match, find_matches] +fn run_test<A: Automaton>(re: &Regex<A>, test: &RegexTest) -> TestResult { + let input = create_input(test); + match test.additional_name() { + "is_match" => TestResult::matched(re.is_match(input.earliest(true))), + "find" => match test.search_kind() { + SearchKind::Earliest | SearchKind::Leftmost => { + let input = + input.earliest(test.search_kind() == SearchKind::Earliest); + TestResult::matches( + re.find_iter(input) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|m| Match { + id: m.pattern().as_usize(), + span: Span { start: m.start(), end: m.end() }, + }), + ) + } + SearchKind::Overlapping => { + try_search_overlapping(re, &input).unwrap() + } + }, + "which" => match test.search_kind() { + SearchKind::Earliest | SearchKind::Leftmost => { + // There are no "which" APIs for standard searches. + TestResult::skip() + } + SearchKind::Overlapping => { + let dfa = re.forward(); + let mut patset = PatternSet::new(dfa.pattern_len()); + dfa.try_which_overlapping_matches(&input, &mut patset) + .unwrap(); + TestResult::which(patset.iter().map(|p| p.as_usize())) + } + }, + name => TestResult::fail(&format!("unrecognized test name: {}", name)), + } } /// Configures the given regex builder with all relevant settings on the given @@ -250,25 +347,32 @@ fn configure_regex_builder( test: &RegexTest, builder: &mut dfa::regex::Builder, ) -> bool { - let match_kind = match test.match_kind() { - TestMatchKind::All => MatchKind::All, - TestMatchKind::LeftmostFirst => MatchKind::LeftmostFirst, - TestMatchKind::LeftmostLongest => return false, + let match_kind = match untestify_kind(test.match_kind()) { + None => return false, + Some(k) => k, }; - let syntax_config = SyntaxConfig::new() - .case_insensitive(test.case_insensitive()) - .unicode(test.unicode()) - .utf8(test.utf8()); - let dense_config = dense::Config::new() - .anchored(test.anchored()) + let starts = if test.anchored() { + StartKind::Anchored + } else { + StartKind::Unanchored + }; + let mut dense_config = dense::Config::new() + .start_kind(starts) .match_kind(match_kind) .unicode_word_boundary(true); - let regex_config = Regex::config().utf8(test.utf8()); + // When doing an overlapping search, we might try to find the start of each + // match with a custom search routine. In that case, we need to tell the + // reverse search (for the start offset) which pattern to look for. The + // only way that API works is when anchored starting states are compiled + // for each pattern. This does technically also enable it for the forward + // DFA, but we're okay with that. + if test.search_kind() == SearchKind::Overlapping { + dense_config = dense_config.starts_for_each_pattern(true); + } builder - .configure(regex_config) - .syntax(syntax_config) + .syntax(config_syntax(test)) .thompson(config_thompson(test)) .dense(dense_config); true @@ -276,5 +380,68 @@ fn configure_regex_builder( /// Configuration of a Thompson NFA compiler from a regex test. fn config_thompson(test: &RegexTest) -> thompson::Config { - thompson::Config::new().utf8(test.utf8()) + let mut lookm = regex_automata::util::look::LookMatcher::new(); + lookm.set_line_terminator(test.line_terminator()); + thompson::Config::new().utf8(test.utf8()).look_matcher(lookm) +} + +/// Configuration of the regex syntax from a regex test. +fn config_syntax(test: &RegexTest) -> syntax::Config { + syntax::Config::new() + .case_insensitive(test.case_insensitive()) + .unicode(test.unicode()) + .utf8(test.utf8()) + .line_terminator(test.line_terminator()) +} + +/// Execute an overlapping search, and for each match found, also find its +/// overlapping starting positions. +/// +/// N.B. This routine used to be part of the crate API, but 1) it wasn't clear +/// to me how useful it was and 2) it wasn't clear to me what its semantics +/// should be. In particular, a potentially surprising footgun of this routine +/// that it is worst case *quadratic* in the size of the haystack. Namely, it's +/// possible to report a match at every position, and for every such position, +/// scan all the way to the beginning of the haystack to find the starting +/// position. Typical leftmost non-overlapping searches don't suffer from this +/// because, well, matches can't overlap. So subsequent searches after a match +/// is found don't revisit previously scanned parts of the haystack. +/// +/// Its semantics can be strange for other reasons too. For example, given +/// the regex '.*' and the haystack 'zz', the full set of overlapping matches +/// is: [0, 0], [1, 1], [0, 1], [2, 2], [1, 2], [0, 2]. The ordering of +/// those matches is quite strange, but makes sense when you think about the +/// implementation: an end offset is found left-to-right, and then one or more +/// starting offsets are found right-to-left. +/// +/// Nevertheless, we provide this routine in our test suite because it's +/// useful to test the low level DFA overlapping search and our test suite +/// is written in a way that requires starting offsets. +fn try_search_overlapping<A: Automaton>( + re: &Regex<A>, + input: &Input<'_>, +) -> Result<TestResult> { + let mut matches = vec![]; + let mut fwd_state = OverlappingState::start(); + let (fwd_dfa, rev_dfa) = (re.forward(), re.reverse()); + while let Some(end) = { + fwd_dfa.try_search_overlapping_fwd(input, &mut fwd_state)?; + fwd_state.get_match() + } { + let revsearch = input + .clone() + .range(input.start()..end.offset()) + .anchored(Anchored::Pattern(end.pattern())) + .earliest(false); + let mut rev_state = OverlappingState::start(); + while let Some(start) = { + rev_dfa.try_search_overlapping_rev(&revsearch, &mut rev_state)?; + rev_state.get_match() + } { + let span = Span { start: start.offset(), end: end.offset() }; + let mat = Match { id: end.pattern().as_usize(), span }; + matches.push(mat); + } + } + Ok(TestResult::matches(matches)) } diff --git a/vendor/regex-automata/tests/fuzz/dense.rs b/vendor/regex-automata/tests/fuzz/dense.rs new file mode 100644 index 000000000..213891b3e --- /dev/null +++ b/vendor/regex-automata/tests/fuzz/dense.rs @@ -0,0 +1,52 @@ +// This test was found by a fuzzer input that crafted a way to provide +// an invalid serialization of ByteClasses that passed our verification. +// Specifically, the verification step in the deserialization of ByteClasses +// used an iterator that depends on part of the serialized bytes being correct. +// (Specifically, the encoding of the number of classes.) +#[test] +fn invalid_byte_classes() { + let data = include_bytes!( + "testdata/deserialize_dense_crash-9486fb7c8a93b12c12a62166b43d31640c0208a9", + ); + let _ = fuzz_run(data); +} + +#[test] +fn invalid_byte_classes_min() { + let data = include_bytes!( + "testdata/deserialize_dense_minimized-from-9486fb7c8a93b12c12a62166b43d31640c0208a9", + ); + let _ = fuzz_run(data); +} + +// This is the code from the fuzz target. Kind of sucks to duplicate it here, +// but this is fundamentally how we interpret the date. +fn fuzz_run(given_data: &[u8]) -> Option<()> { + use regex_automata::dfa::Automaton; + + if given_data.len() < 2 { + return None; + } + let haystack_len = usize::from(given_data[0]); + let haystack = given_data.get(1..1 + haystack_len)?; + let given_dfa_bytes = given_data.get(1 + haystack_len..)?; + + // We help the fuzzer along by adding a preamble to the bytes that should + // at least make these first parts valid. The preamble expects a very + // specific sequence of bytes, so it makes sense to just force this. + let label = "rust-regex-automata-dfa-dense\x00\x00\x00"; + assert_eq!(0, label.len() % 4); + let endianness_check = 0xFEFFu32.to_ne_bytes().to_vec(); + let version_check = 2u32.to_ne_bytes().to_vec(); + let mut dfa_bytes: Vec<u8> = vec![]; + dfa_bytes.extend(label.as_bytes()); + dfa_bytes.extend(&endianness_check); + dfa_bytes.extend(&version_check); + dfa_bytes.extend(given_dfa_bytes); + // This is the real test: checking that any input we give to + // DFA::from_bytes will never result in a panic. + let (dfa, _) = + regex_automata::dfa::dense::DFA::from_bytes(&dfa_bytes).ok()?; + let _ = dfa.try_search_fwd(®ex_automata::Input::new(haystack)); + Some(()) +} diff --git a/vendor/regex-automata/tests/fuzz/mod.rs b/vendor/regex-automata/tests/fuzz/mod.rs new file mode 100644 index 000000000..960cb4251 --- /dev/null +++ b/vendor/regex-automata/tests/fuzz/mod.rs @@ -0,0 +1,2 @@ +mod dense; +mod sparse; diff --git a/vendor/regex-automata/tests/fuzz/sparse.rs b/vendor/regex-automata/tests/fuzz/sparse.rs new file mode 100644 index 000000000..837ad1014 --- /dev/null +++ b/vendor/regex-automata/tests/fuzz/sparse.rs @@ -0,0 +1,132 @@ +// This is a regression test for a bug in how special states are handled. The +// fuzzer found a case where a state returned true for 'is_special_state' but +// *didn't* return true for 'is_dead_state', 'is_quit_state', 'is_match_state', +// 'is_start_state' or 'is_accel_state'. This in turn tripped a debug assertion +// in the core matching loop that requires 'is_special_state' being true to +// imply that one of the other routines returns true. +// +// We fixed this by adding some validation to both dense and sparse DFAs that +// checks that this property is true for every state ID in the DFA. +#[test] +fn invalid_special_state() { + let data = include_bytes!( + "testdata/deserialize_sparse_crash-a1b839d899ced76d5d7d0f78f9edb7a421505838", + ); + let _ = fuzz_run(data); +} + +// This is an interesting case where a fuzzer generated a DFA with +// a transition to a state ID that decoded as a valid state, but +// where the ID itself did not point to one of the two existing +// states for this particular DFA. This combined with marking this +// transition's state ID as special but without actually making one of the +// 'is_{dead,quit,match,start,accel}_state' predicates return true ended up +// tripping the 'debug_assert(dfa.is_quit_state(sid))' code in the search +// routine. +// +// We fixed this in alloc mode by checking that every transition points to a +// valid state ID. Technically this bug still exists in core-only mode, but +// it's not clear how to fix it. And it's worth pointing out that the search +// routine won't panic in production. It will just provide invalid results. And +// that's acceptable within the contract of DFA::from_bytes. +#[test] +fn transition_to_invalid_but_valid_state() { + let data = include_bytes!( + "testdata/deserialize_sparse_crash-dbb8172d3984e7e7d03f4b5f8bb86ecd1460eff9", + ); + let _ = fuzz_run(data); +} + +// Another one caught by the fuzzer where it generated a DFA that reported a +// start state as a match state. Since matches are always delayed by one byte, +// start states specifically cannot be match states. And indeed, the search +// code relies on this. +#[test] +fn start_state_is_not_match_state() { + let data = include_bytes!( + "testdata/deserialize_sparse_crash-0da59c0434eaf35e5a6b470fa9244bb79c72b000", + ); + let _ = fuzz_run(data); +} + +// This is variation on 'transition_to_invalid_but_valid_state', but happens +// to a start state. Namely, the fuzz data here builds a DFA with a start +// state ID that is incorrect but points to a sequence of bytes that satisfies +// state decoding validation. This errant state in turn has a non-zero number +// of transitions, and its those transitions that point to a state that does +// *not* satisfy state decoding validation. But we never checked those. So the +// fix here was to add validation of the transitions off of the start state. +#[test] +fn start_state_has_valid_transitions() { + let data = include_bytes!( + "testdata/deserialize_sparse_crash-61fd8e3003bf9d99f6c1e5a8488727eefd234b98", + ); + let _ = fuzz_run(data); +} + +// This fuzz input generated a DFA with a state whose ID was in the match state +// ID range, but where the state itself was encoded with zero pattern IDs. We +// added validation code to check this case. +#[test] +fn match_state_inconsistency() { + let data = include_bytes!( + "testdata/deserialize_sparse_crash-c383ae07ec5e191422eadc492117439011816570", + ); + let _ = fuzz_run(data); +} + +// This fuzz input generated a DFA with a state whose ID was in the accelerator +// range, but who didn't have any accelerators. This violated an invariant that +// assumes that if 'dfa.is_accel_state(sid)' returns true, then the state must +// have some accelerators. +#[test] +fn invalid_accelerators() { + let data = include_bytes!( + "testdata/deserialize_sparse_crash-d07703ceb94b10dcd9e4acb809f2051420449e2b", + ); + let _ = fuzz_run(data); +} + +// This fuzz input generated a DFA with a state whose EOI transition led to +// a quit state, which is generally considered illegal. Why? Because the EOI +// transition is defined over a special sentinel alphabet element and one +// cannot configure a DFA to "quit" on that sentinel. +#[test] +fn eoi_transition_to_quit_state() { + let data = include_bytes!( + "testdata/deserialize_sparse_crash-18cfc246f2ddfc3dfc92b0c7893178c7cf65efa9", + ); + let _ = fuzz_run(data); +} + +// This is the code from the fuzz target. Kind of sucks to duplicate it here, +// but this is fundamentally how we interpret the date. +fn fuzz_run(given_data: &[u8]) -> Option<()> { + use regex_automata::dfa::Automaton; + + if given_data.len() < 2 { + return None; + } + let haystack_len = usize::from(given_data[0]); + let haystack = given_data.get(1..1 + haystack_len)?; + let given_dfa_bytes = given_data.get(1 + haystack_len..)?; + + // We help the fuzzer along by adding a preamble to the bytes that should + // at least make these first parts valid. The preamble expects a very + // specific sequence of bytes, so it makes sense to just force this. + let label = "rust-regex-automata-dfa-sparse\x00\x00"; + assert_eq!(0, label.len() % 4); + let endianness_check = 0xFEFFu32.to_ne_bytes().to_vec(); + let version_check = 2u32.to_ne_bytes().to_vec(); + let mut dfa_bytes: Vec<u8> = vec![]; + dfa_bytes.extend(label.as_bytes()); + dfa_bytes.extend(&endianness_check); + dfa_bytes.extend(&version_check); + dfa_bytes.extend(given_dfa_bytes); + // This is the real test: checking that any input we give to + // DFA::from_bytes will never result in a panic. + let (dfa, _) = + regex_automata::dfa::sparse::DFA::from_bytes(&dfa_bytes).ok()?; + let _ = dfa.try_search_fwd(®ex_automata::Input::new(haystack)); + Some(()) +} diff --git a/vendor/regex-automata/tests/fuzz/testdata/deserialize_dense_crash-9486fb7c8a93b12c12a62166b43d31640c0208a9 b/vendor/regex-automata/tests/fuzz/testdata/deserialize_dense_crash-9486fb7c8a93b12c12a62166b43d31640c0208a9 Binary files differnew file mode 100644 index 000000000..972bfb2cd --- /dev/null +++ b/vendor/regex-automata/tests/fuzz/testdata/deserialize_dense_crash-9486fb7c8a93b12c12a62166b43d31640c0208a9 diff --git a/vendor/regex-automata/tests/fuzz/testdata/deserialize_dense_minimized-from-9486fb7c8a93b12c12a62166b43d31640c0208a9 b/vendor/regex-automata/tests/fuzz/testdata/deserialize_dense_minimized-from-9486fb7c8a93b12c12a62166b43d31640c0208a9 Binary files differnew file mode 100644 index 000000000..72dbdad82 --- /dev/null +++ b/vendor/regex-automata/tests/fuzz/testdata/deserialize_dense_minimized-from-9486fb7c8a93b12c12a62166b43d31640c0208a9 diff --git a/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-0da59c0434eaf35e5a6b470fa9244bb79c72b000 b/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-0da59c0434eaf35e5a6b470fa9244bb79c72b000 Binary files differnew file mode 100644 index 000000000..5ce508803 --- /dev/null +++ b/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-0da59c0434eaf35e5a6b470fa9244bb79c72b000 diff --git a/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-18cfc246f2ddfc3dfc92b0c7893178c7cf65efa9 b/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-18cfc246f2ddfc3dfc92b0c7893178c7cf65efa9 Binary files differnew file mode 100644 index 000000000..4fa13fbed --- /dev/null +++ b/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-18cfc246f2ddfc3dfc92b0c7893178c7cf65efa9 diff --git a/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-61fd8e3003bf9d99f6c1e5a8488727eefd234b98 b/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-61fd8e3003bf9d99f6c1e5a8488727eefd234b98 Binary files differnew file mode 100644 index 000000000..0f809f33f --- /dev/null +++ b/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-61fd8e3003bf9d99f6c1e5a8488727eefd234b98 diff --git a/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-a1b839d899ced76d5d7d0f78f9edb7a421505838 b/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-a1b839d899ced76d5d7d0f78f9edb7a421505838 Binary files differnew file mode 100644 index 000000000..8b435fd26 --- /dev/null +++ b/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-a1b839d899ced76d5d7d0f78f9edb7a421505838 diff --git a/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-c383ae07ec5e191422eadc492117439011816570 b/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-c383ae07ec5e191422eadc492117439011816570 Binary files differnew file mode 100644 index 000000000..69b65160c --- /dev/null +++ b/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-c383ae07ec5e191422eadc492117439011816570 diff --git a/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-d07703ceb94b10dcd9e4acb809f2051420449e2b b/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-d07703ceb94b10dcd9e4acb809f2051420449e2b Binary files differnew file mode 100644 index 000000000..15b43e47f --- /dev/null +++ b/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-d07703ceb94b10dcd9e4acb809f2051420449e2b diff --git a/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-dbb8172d3984e7e7d03f4b5f8bb86ecd1460eff9 b/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-dbb8172d3984e7e7d03f4b5f8bb86ecd1460eff9 Binary files differnew file mode 100644 index 000000000..aa72eb1dd --- /dev/null +++ b/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-dbb8172d3984e7e7d03f4b5f8bb86ecd1460eff9 diff --git a/vendor/regex-automata/tests/gen/README.md b/vendor/regex-automata/tests/gen/README.md new file mode 100644 index 000000000..59439a11f --- /dev/null +++ b/vendor/regex-automata/tests/gen/README.md @@ -0,0 +1,65 @@ +This directory contains tests for serialized objects from the regex-automata +crate. Currently, there are only two supported such objects: dense and sparse +DFAs. + +The idea behind these tests is to commit some serialized objects and run some +basic tests by deserializing them and running searches and ensuring they are +correct. We also make sure these are run under Miri, since deserialization is +one of the biggest places where undefined behavior might occur in this crate +(at the time of writing). + +The main thing we're testing is that the *current* code can still deserialize +*old* objects correctly. Generally speaking, compatibility extends to semver +compatible releases of this crate. Beyond that, no promises are made, although +in practice callers can at least depend on errors occurring. (The serialized +format always includes a version number, and incompatible changes increment +that version number such that an error will occur if an unsupported version is +detected.) + +To generate the dense DFAs, I used this command: + +``` +$ regex-cli generate serialize dense regex \ + MULTI_PATTERN_V2 \ + tests/gen/dense/ \ + --rustfmt \ + --safe \ + --starts-for-each-pattern \ + --specialize-start-states \ + --start-kind both \ + --unicode-word-boundary \ + --minimize \ + '\b[a-zA-Z]+\b' \ + '(?m)^\S+$' \ + '(?Rm)^\S+$' +``` + +And to generate the sparse DFAs, I used this command, which is the same as +above, but with `s/dense/sparse/g`. + +``` +$ regex-cli generate serialize sparse regex \ + MULTI_PATTERN_V2 \ + tests/gen/sparse/ \ + --rustfmt \ + --safe \ + --starts-for-each-pattern \ + --specialize-start-states \ + --start-kind both \ + --unicode-word-boundary \ + --minimize \ + '\b[a-zA-Z]+\b' \ + '(?m)^\S+$' \ + '(?Rm)^\S+$' +``` + +The idea is to try to enable as many of the DFA's options as possible in order +to test that serialization works for all of them. + +Arguably we should increase test coverage here, but this is a start. Note +that in particular, this does not need to test that serialization and +deserialization correctly roundtrips on its own. Indeed, the normal regex test +suite has a test that does a serialization round trip for every test supported +by DFAs. So that has very good coverage. What we're interested in testing here +is our compatibility promise: do DFAs generated with an older revision of the +code still deserialize correctly? diff --git a/vendor/regex-automata/tests/gen/dense/mod.rs b/vendor/regex-automata/tests/gen/dense/mod.rs new file mode 100644 index 000000000..b4365d4e1 --- /dev/null +++ b/vendor/regex-automata/tests/gen/dense/mod.rs @@ -0,0 +1,22 @@ +use regex_automata::{Input, Match}; + +mod multi_pattern_v2; + +#[test] +fn multi_pattern_v2() { + use multi_pattern_v2::MULTI_PATTERN_V2 as RE; + + assert_eq!(Some(Match::must(0, 0..4)), RE.find("abcd")); + assert_eq!(Some(Match::must(0, 2..6)), RE.find("@ abcd @")); + assert_eq!(Some(Match::must(1, 0..6)), RE.find("@abcd@")); + assert_eq!(Some(Match::must(0, 1..5)), RE.find("\nabcd\n")); + assert_eq!(Some(Match::must(0, 1..5)), RE.find("\nabcd wxyz\n")); + assert_eq!(Some(Match::must(1, 1..7)), RE.find("\n@abcd@\n")); + assert_eq!(Some(Match::must(2, 0..6)), RE.find("@abcd@\r\n")); + assert_eq!(Some(Match::must(1, 2..8)), RE.find("\r\n@abcd@")); + assert_eq!(Some(Match::must(2, 2..8)), RE.find("\r\n@abcd@\r\n")); + + // Fails because we have heuristic support for Unicode word boundaries + // enabled. + assert!(RE.try_search(&Input::new(b"\xFF@abcd@\xFF")).is_err()); +} diff --git a/vendor/regex-automata/tests/gen/dense/multi_pattern_v2.rs b/vendor/regex-automata/tests/gen/dense/multi_pattern_v2.rs new file mode 100644 index 000000000..a95fd204b --- /dev/null +++ b/vendor/regex-automata/tests/gen/dense/multi_pattern_v2.rs @@ -0,0 +1,43 @@ +// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: +// +// regex-cli generate serialize dense regex MULTI_PATTERN_V2 tests/gen/dense/ --rustfmt --safe --starts-for-each-pattern --specialize-start-states --start-kind both --unicode-word-boundary --minimize \b[a-zA-Z]+\b (?m)^\S+$ (?Rm)^\S+$ +// +// regex-cli 0.0.1 is available on crates.io. + +use regex_automata::{ + dfa::{dense::DFA, regex::Regex}, + util::{lazy::Lazy, wire::AlignAs}, +}; + +pub static MULTI_PATTERN_V2: Lazy<Regex<DFA<&'static [u32]>>> = + Lazy::new(|| { + let dfafwd = { + static ALIGNED: &AlignAs<[u8], u32> = &AlignAs { + _align: [], + #[cfg(target_endian = "big")] + bytes: *include_bytes!("multi_pattern_v2_fwd.bigendian.dfa"), + #[cfg(target_endian = "little")] + bytes: *include_bytes!( + "multi_pattern_v2_fwd.littleendian.dfa" + ), + }; + DFA::from_bytes(&ALIGNED.bytes) + .expect("serialized forward DFA should be valid") + .0 + }; + let dfarev = { + static ALIGNED: &AlignAs<[u8], u32> = &AlignAs { + _align: [], + #[cfg(target_endian = "big")] + bytes: *include_bytes!("multi_pattern_v2_rev.bigendian.dfa"), + #[cfg(target_endian = "little")] + bytes: *include_bytes!( + "multi_pattern_v2_rev.littleendian.dfa" + ), + }; + DFA::from_bytes(&ALIGNED.bytes) + .expect("serialized reverse DFA should be valid") + .0 + }; + Regex::builder().build_from_dfas(dfafwd, dfarev) + }); diff --git a/vendor/regex-automata/tests/gen/dense/multi_pattern_v2_fwd.bigendian.dfa b/vendor/regex-automata/tests/gen/dense/multi_pattern_v2_fwd.bigendian.dfa Binary files differnew file mode 100644 index 000000000..6d6e040c3 --- /dev/null +++ b/vendor/regex-automata/tests/gen/dense/multi_pattern_v2_fwd.bigendian.dfa diff --git a/vendor/regex-automata/tests/gen/dense/multi_pattern_v2_fwd.littleendian.dfa b/vendor/regex-automata/tests/gen/dense/multi_pattern_v2_fwd.littleendian.dfa Binary files differnew file mode 100644 index 000000000..a1f4b3da1 --- /dev/null +++ b/vendor/regex-automata/tests/gen/dense/multi_pattern_v2_fwd.littleendian.dfa diff --git a/vendor/regex-automata/tests/gen/dense/multi_pattern_v2_rev.bigendian.dfa b/vendor/regex-automata/tests/gen/dense/multi_pattern_v2_rev.bigendian.dfa Binary files differnew file mode 100644 index 000000000..74f74ec2a --- /dev/null +++ b/vendor/regex-automata/tests/gen/dense/multi_pattern_v2_rev.bigendian.dfa diff --git a/vendor/regex-automata/tests/gen/dense/multi_pattern_v2_rev.littleendian.dfa b/vendor/regex-automata/tests/gen/dense/multi_pattern_v2_rev.littleendian.dfa Binary files differnew file mode 100644 index 000000000..663bdb9ea --- /dev/null +++ b/vendor/regex-automata/tests/gen/dense/multi_pattern_v2_rev.littleendian.dfa diff --git a/vendor/regex-automata/tests/gen/mod.rs b/vendor/regex-automata/tests/gen/mod.rs new file mode 100644 index 000000000..960cb4251 --- /dev/null +++ b/vendor/regex-automata/tests/gen/mod.rs @@ -0,0 +1,2 @@ +mod dense; +mod sparse; diff --git a/vendor/regex-automata/tests/gen/sparse/mod.rs b/vendor/regex-automata/tests/gen/sparse/mod.rs new file mode 100644 index 000000000..b4365d4e1 --- /dev/null +++ b/vendor/regex-automata/tests/gen/sparse/mod.rs @@ -0,0 +1,22 @@ +use regex_automata::{Input, Match}; + +mod multi_pattern_v2; + +#[test] +fn multi_pattern_v2() { + use multi_pattern_v2::MULTI_PATTERN_V2 as RE; + + assert_eq!(Some(Match::must(0, 0..4)), RE.find("abcd")); + assert_eq!(Some(Match::must(0, 2..6)), RE.find("@ abcd @")); + assert_eq!(Some(Match::must(1, 0..6)), RE.find("@abcd@")); + assert_eq!(Some(Match::must(0, 1..5)), RE.find("\nabcd\n")); + assert_eq!(Some(Match::must(0, 1..5)), RE.find("\nabcd wxyz\n")); + assert_eq!(Some(Match::must(1, 1..7)), RE.find("\n@abcd@\n")); + assert_eq!(Some(Match::must(2, 0..6)), RE.find("@abcd@\r\n")); + assert_eq!(Some(Match::must(1, 2..8)), RE.find("\r\n@abcd@")); + assert_eq!(Some(Match::must(2, 2..8)), RE.find("\r\n@abcd@\r\n")); + + // Fails because we have heuristic support for Unicode word boundaries + // enabled. + assert!(RE.try_search(&Input::new(b"\xFF@abcd@\xFF")).is_err()); +} diff --git a/vendor/regex-automata/tests/gen/sparse/multi_pattern_v2.rs b/vendor/regex-automata/tests/gen/sparse/multi_pattern_v2.rs new file mode 100644 index 000000000..911e3f5dd --- /dev/null +++ b/vendor/regex-automata/tests/gen/sparse/multi_pattern_v2.rs @@ -0,0 +1,37 @@ +// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: +// +// regex-cli generate serialize sparse regex MULTI_PATTERN_V2 regex-automata/tests/gen/sparse/ --rustfmt --safe --starts-for-each-pattern --specialize-start-states --start-kind both --unicode-word-boundary --minimize \b[a-zA-Z]+\b (?m)^\S+$ (?Rm)^\S+$ +// +// regex-cli 0.0.1 is available on crates.io. + +use regex_automata::{ + dfa::{regex::Regex, sparse::DFA}, + util::lazy::Lazy, +}; + +pub static MULTI_PATTERN_V2: Lazy<Regex<DFA<&'static [u8]>>> = + Lazy::new(|| { + let dfafwd = { + #[cfg(target_endian = "big")] + static BYTES: &'static [u8] = + include_bytes!("multi_pattern_v2_fwd.bigendian.dfa"); + #[cfg(target_endian = "little")] + static BYTES: &'static [u8] = + include_bytes!("multi_pattern_v2_fwd.littleendian.dfa"); + DFA::from_bytes(BYTES) + .expect("serialized forward DFA should be valid") + .0 + }; + let dfarev = { + #[cfg(target_endian = "big")] + static BYTES: &'static [u8] = + include_bytes!("multi_pattern_v2_rev.bigendian.dfa"); + #[cfg(target_endian = "little")] + static BYTES: &'static [u8] = + include_bytes!("multi_pattern_v2_rev.littleendian.dfa"); + DFA::from_bytes(BYTES) + .expect("serialized reverse DFA should be valid") + .0 + }; + Regex::builder().build_from_dfas(dfafwd, dfarev) + }); diff --git a/vendor/regex-automata/tests/gen/sparse/multi_pattern_v2_fwd.bigendian.dfa b/vendor/regex-automata/tests/gen/sparse/multi_pattern_v2_fwd.bigendian.dfa Binary files differnew file mode 100644 index 000000000..aa04f6316 --- /dev/null +++ b/vendor/regex-automata/tests/gen/sparse/multi_pattern_v2_fwd.bigendian.dfa diff --git a/vendor/regex-automata/tests/gen/sparse/multi_pattern_v2_fwd.littleendian.dfa b/vendor/regex-automata/tests/gen/sparse/multi_pattern_v2_fwd.littleendian.dfa Binary files differnew file mode 100644 index 000000000..c27d92abe --- /dev/null +++ b/vendor/regex-automata/tests/gen/sparse/multi_pattern_v2_fwd.littleendian.dfa diff --git a/vendor/regex-automata/tests/gen/sparse/multi_pattern_v2_rev.bigendian.dfa b/vendor/regex-automata/tests/gen/sparse/multi_pattern_v2_rev.bigendian.dfa Binary files differnew file mode 100644 index 000000000..89867d30f --- /dev/null +++ b/vendor/regex-automata/tests/gen/sparse/multi_pattern_v2_rev.bigendian.dfa diff --git a/vendor/regex-automata/tests/gen/sparse/multi_pattern_v2_rev.littleendian.dfa b/vendor/regex-automata/tests/gen/sparse/multi_pattern_v2_rev.littleendian.dfa Binary files differnew file mode 100644 index 000000000..c0ca807f8 --- /dev/null +++ b/vendor/regex-automata/tests/gen/sparse/multi_pattern_v2_rev.littleendian.dfa diff --git a/vendor/regex-automata/tests/hybrid/api.rs b/vendor/regex-automata/tests/hybrid/api.rs index 9a834dbb8..e82d808e3 100644 --- a/vendor/regex-automata/tests/hybrid/api.rs +++ b/vendor/regex-automata/tests/hybrid/api.rs @@ -1,25 +1,29 @@ use std::error::Error; use regex_automata::{ - hybrid::{ - dfa::{self, DFA}, - regex::Regex, - OverlappingState, - }, + hybrid::dfa::{OverlappingState, DFA}, nfa::thompson, - HalfMatch, MatchError, MatchKind, MultiMatch, + HalfMatch, Input, MatchError, }; -use crate::util::{BunkPrefilter, SubstringPrefilter}; - // Tests that too many cache resets cause the lazy DFA to quit. // // We only test this on 64-bit because the test is gingerly crafted based on // implementation details of cache sizes. It's not a great test because of // that, but it does check some interesting properties around how positions are // reported when a search "gives up." +// +// NOTE: If you change something in lazy DFA implementation that causes this +// test to fail by reporting different "gave up" positions, then it's generally +// okay to update the positions in the test below as long as you're sure your +// changes are correct. Namely, it is expected that if there are changes in the +// cache size (or changes in how big things are inside the cache), then its +// utilization may change slightly and thus impact where a search gives up. +// Precisely where a search gives up is not an API guarantee, so changing the +// offsets here is OK. #[test] #[cfg(target_pointer_width = "64")] +#[cfg(not(miri))] fn too_many_cache_resets_cause_quit() -> Result<(), Box<dyn Error>> { // This is a carefully chosen regex. The idea is to pick one that requires // some decent number of states (hence the bounded repetition). But we @@ -27,9 +31,16 @@ fn too_many_cache_resets_cause_quit() -> Result<(), Box<dyn Error>> { // non-ASCII letter so that we can check that no new states are created // once the cache is full. Namely, if we fill up the cache on a haystack // of 'a's, then in order to match one 'β', a new state will need to be - // created since a 'β' is encoded with multiple bytes. Since there's no - // room for this state, the search should quit at the very first position. - let pattern = r"[aβ]{100}"; + // created since a 'β' is encoded with multiple bytes. + // + // So we proceed by "filling" up the cache by searching a haystack of just + // 'a's. The cache won't have enough room to add enough states to find the + // match (because of the bounded repetition), which should result in it + // giving up before it finds a match. + // + // Since there's now no more room to create states, we search a haystack + // of 'β' and confirm that it gives up immediately. + let pattern = r"[aβ]{99}"; let dfa = DFA::builder() .configure( // Configure it so that we have the minimum cache capacity @@ -39,38 +50,53 @@ fn too_many_cache_resets_cause_quit() -> Result<(), Box<dyn Error>> { .cache_capacity(0) .minimum_cache_clear_count(Some(0)), ) + .thompson(thompson::NFA::config()) .build(pattern)?; let mut cache = dfa.create_cache(); let haystack = "a".repeat(101).into_bytes(); - let err = MatchError::GaveUp { offset: 25 }; - assert_eq!(dfa.find_earliest_fwd(&mut cache, &haystack), Err(err.clone())); - assert_eq!(dfa.find_leftmost_fwd(&mut cache, &haystack), Err(err.clone())); + let err = MatchError::gave_up(25); + // Notice that we make the same amount of progress in each search! That's + // because the cache is reused and already has states to handle the first + // N bytes. + assert_eq!( + Err(err.clone()), + dfa.try_search_fwd(&mut cache, &Input::new(&haystack)) + ); assert_eq!( - dfa.find_overlapping_fwd( + Err(err.clone()), + dfa.try_search_overlapping_fwd( &mut cache, - &haystack, + &Input::new(&haystack), &mut OverlappingState::start() ), - Err(err.clone()) ); let haystack = "β".repeat(101).into_bytes(); - let err = MatchError::GaveUp { offset: 0 }; - assert_eq!(dfa.find_earliest_fwd(&mut cache, &haystack), Err(err)); + let err = MatchError::gave_up(2); + assert_eq!( + Err(err), + dfa.try_search_fwd(&mut cache, &Input::new(&haystack)) + ); // no need to test that other find routines quit, since we did that above // OK, if we reset the cache, then we should be able to create more states // and make more progress with searching for betas. cache.reset(&dfa); - let err = MatchError::GaveUp { offset: 26 }; - assert_eq!(dfa.find_earliest_fwd(&mut cache, &haystack), Err(err)); + let err = MatchError::gave_up(27); + assert_eq!( + Err(err), + dfa.try_search_fwd(&mut cache, &Input::new(&haystack)) + ); // ... switching back to ASCII still makes progress since it just needs to // set transitions on existing states! let haystack = "a".repeat(101).into_bytes(); - let err = MatchError::GaveUp { offset: 13 }; - assert_eq!(dfa.find_earliest_fwd(&mut cache, &haystack), Err(err)); + let err = MatchError::gave_up(13); + assert_eq!( + Err(err), + dfa.try_search_fwd(&mut cache, &Input::new(&haystack)) + ); Ok(()) } @@ -84,20 +110,16 @@ fn quit_fwd() -> Result<(), Box<dyn Error>> { let mut cache = dfa.create_cache(); assert_eq!( - dfa.find_earliest_fwd(&mut cache, b"abcxyz"), - Err(MatchError::Quit { byte: b'x', offset: 3 }) + dfa.try_search_fwd(&mut cache, &Input::new("abcxyz")), + Err(MatchError::quit(b'x', 3)), ); assert_eq!( - dfa.find_leftmost_fwd(&mut cache, b"abcxyz"), - Err(MatchError::Quit { byte: b'x', offset: 3 }) - ); - assert_eq!( - dfa.find_overlapping_fwd( + dfa.try_search_overlapping_fwd( &mut cache, - b"abcxyz", + &Input::new(b"abcxyz"), &mut OverlappingState::start() ), - Err(MatchError::Quit { byte: b'x', offset: 3 }) + Err(MatchError::quit(b'x', 3)), ); Ok(()) @@ -113,12 +135,8 @@ fn quit_rev() -> Result<(), Box<dyn Error>> { let mut cache = dfa.create_cache(); assert_eq!( - dfa.find_earliest_rev(&mut cache, b"abcxyz"), - Err(MatchError::Quit { byte: b'x', offset: 3 }) - ); - assert_eq!( - dfa.find_leftmost_rev(&mut cache, b"abcxyz"), - Err(MatchError::Quit { byte: b'x', offset: 3 }) + dfa.try_search_rev(&mut cache, &Input::new("abcxyz")), + Err(MatchError::quit(b'x', 3)), ); Ok(()) @@ -145,51 +163,9 @@ fn unicode_word_implicitly_works() -> Result<(), Box<dyn Error>> { let dfa = DFA::builder().configure(config).build(r"\b")?; let mut cache = dfa.create_cache(); let expected = HalfMatch::must(0, 1); - assert_eq!(dfa.find_leftmost_fwd(&mut cache, b" a"), Ok(Some(expected))); - Ok(()) -} - -// Tests that we can provide a prefilter to a Regex, and the search reports -// correct results. -#[test] -fn prefilter_works() -> Result<(), Box<dyn Error>> { - let mut re = Regex::new(r"a[0-9]+").unwrap(); - re.set_prefilter(Some(Box::new(SubstringPrefilter::new("a")))); - let mut cache = re.create_cache(); - - let text = b"foo abc foo a1a2a3 foo a123 bar aa456"; - let matches: Vec<(usize, usize)> = re - .find_leftmost_iter(&mut cache, text) - .map(|m| (m.start(), m.end())) - .collect(); - assert_eq!( - matches, - vec![(12, 14), (14, 16), (16, 18), (23, 27), (33, 37),] - ); - Ok(()) -} - -// This test confirms that a prefilter is active by using a prefilter that -// reports false negatives. -#[test] -fn prefilter_is_active() -> Result<(), Box<dyn Error>> { - let text = b"za123"; - let mut re = Regex::new(r"a[0-9]+").unwrap(); - let mut cache = re.create_cache(); - - re.set_prefilter(Some(Box::new(SubstringPrefilter::new("a")))); - assert_eq!( - re.find_leftmost(&mut cache, b"za123"), - Some(MultiMatch::must(0, 1, 5)) - ); assert_eq!( - re.find_leftmost(&mut cache, b"a123"), - Some(MultiMatch::must(0, 0, 4)) + Ok(Some(expected)), + dfa.try_search_fwd(&mut cache, &Input::new(" a")), ); - re.set_prefilter(Some(Box::new(BunkPrefilter::new()))); - assert_eq!(re.find_leftmost(&mut cache, b"za123"), None); - // This checks that the prefilter is used when first starting the search, - // instead of waiting until at least one transition has occurred. - assert_eq!(re.find_leftmost(&mut cache, b"a123"), None); Ok(()) } diff --git a/vendor/regex-automata/tests/hybrid/mod.rs b/vendor/regex-automata/tests/hybrid/mod.rs index f4299510c..36667d09c 100644 --- a/vendor/regex-automata/tests/hybrid/mod.rs +++ b/vendor/regex-automata/tests/hybrid/mod.rs @@ -1,2 +1,3 @@ mod api; +#[cfg(not(miri))] mod suite; diff --git a/vendor/regex-automata/tests/hybrid/suite.rs b/vendor/regex-automata/tests/hybrid/suite.rs index d60570d84..4aaca6698 100644 --- a/vendor/regex-automata/tests/hybrid/suite.rs +++ b/vendor/regex-automata/tests/hybrid/suite.rs @@ -1,55 +1,113 @@ -use regex_automata::{ - hybrid::{ - dfa::DFA, - regex::{self, Regex}, +use { + anyhow::Result, + regex_automata::{ + hybrid::{ + dfa::{OverlappingState, DFA}, + regex::{self, Regex}, + }, + nfa::thompson, + util::{prefilter::Prefilter, syntax}, + Anchored, Input, PatternSet, + }, + regex_test::{ + CompiledRegex, Match, RegexTest, SearchKind, Span, TestResult, + TestRunner, }, - nfa::thompson, - MatchKind, SyntaxConfig, }; -use regex_syntax as syntax; -use regex_test::{ - bstr::{BString, ByteSlice}, - CompiledRegex, Match, MatchKind as TestMatchKind, RegexTest, RegexTests, - SearchKind as TestSearchKind, TestResult, TestRunner, -}; +use crate::{create_input, suite, untestify_kind}; -use crate::{suite, Result}; +const EXPANSIONS: &[&str] = &["is_match", "find", "which"]; /// Tests the default configuration of the hybrid NFA/DFA. #[test] fn default() -> Result<()> { let builder = Regex::builder(); - TestRunner::new()?.test_iter(suite()?.iter(), compiler(builder)).assert(); + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + // Without NFA shrinking, this test blows the default cache capacity. + .blacklist("expensive/regression-many-repeat-no-stack-overflow") + .test_iter(suite()?.iter(), compiler(builder)) + .assert(); + Ok(()) +} + +/// Tests the hybrid NFA/DFA with prefilters enabled. +#[test] +fn prefilter() -> Result<()> { + let my_compiler = |test: &RegexTest, regexes: &[String]| { + // Parse regexes as HIRs so we can get literals to build a prefilter. + let mut hirs = vec![]; + for pattern in regexes.iter() { + hirs.push(syntax::parse_with(pattern, &config_syntax(test))?); + } + let kind = match untestify_kind(test.match_kind()) { + None => return Ok(CompiledRegex::skip()), + Some(kind) => kind, + }; + let pre = Prefilter::from_hirs_prefix(kind, &hirs); + let mut builder = Regex::builder(); + builder.dfa(DFA::config().prefilter(pre)); + compiler(builder)(test, regexes) + }; + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + // Without NFA shrinking, this test blows the default cache capacity. + .blacklist("expensive/regression-many-repeat-no-stack-overflow") + .test_iter(suite()?.iter(), my_compiler) + .assert(); Ok(()) } -/// Tests the hybrid NFA/DFA with NFA shrinking disabled. +/// Tests the hybrid NFA/DFA with NFA shrinking enabled. /// -/// This is actually the typical configuration one wants for a lazy DFA. NFA +/// This is *usually* not the configuration one wants for a lazy DFA. NFA /// shrinking is mostly only advantageous when building a full DFA since it /// can sharply decrease the amount of time determinization takes. But NFA -/// shrinking is itself otherwise fairly expensive. Since a lazy DFA has -/// no compilation time (other than for building the NFA of course) before +/// shrinking is itself otherwise fairly expensive currently. Since a lazy DFA +/// has no compilation time (other than for building the NFA of course) before /// executing a search, it's usually worth it to forgo NFA shrinking. +/// +/// Nevertheless, we test to make sure everything is OK with NFA shrinking. As +/// a bonus, there are some tests we don't need to skip because they now fit in +/// the default cache capacity. #[test] -fn no_nfa_shrink() -> Result<()> { +fn nfa_shrink() -> Result<()> { let mut builder = Regex::builder(); - builder.thompson(thompson::Config::new().shrink(false)); + builder.thompson(thompson::Config::new().shrink(true)); TestRunner::new()? - // Without NFA shrinking, this test blows the default cache capacity. - .blacklist("expensive/regression-many-repeat-no-stack-overflow") + .expand(EXPANSIONS, |t| t.compiles()) .test_iter(suite()?.iter(), compiler(builder)) .assert(); Ok(()) } -/// Tests the hybrid NFA/DFA when 'starts_for_each_pattern' is enabled. +/// Tests the hybrid NFA/DFA when 'starts_for_each_pattern' is enabled for all +/// tests. #[test] fn starts_for_each_pattern() -> Result<()> { let mut builder = Regex::builder(); builder.dfa(DFA::config().starts_for_each_pattern(true)); - TestRunner::new()?.test_iter(suite()?.iter(), compiler(builder)).assert(); + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + // Without NFA shrinking, this test blows the default cache capacity. + .blacklist("expensive/regression-many-repeat-no-stack-overflow") + .test_iter(suite()?.iter(), compiler(builder)) + .assert(); + Ok(()) +} + +/// Tests the hybrid NFA/DFA when 'specialize_start_states' is enabled. +#[test] +fn specialize_start_states() -> Result<()> { + let mut builder = Regex::builder(); + builder.dfa(DFA::config().specialize_start_states(true)); + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + // Without NFA shrinking, this test blows the default cache capacity. + .blacklist("expensive/regression-many-repeat-no-stack-overflow") + .test_iter(suite()?.iter(), compiler(builder)) + .assert(); Ok(()) } @@ -62,7 +120,12 @@ fn starts_for_each_pattern() -> Result<()> { fn no_byte_classes() -> Result<()> { let mut builder = Regex::builder(); builder.dfa(DFA::config().byte_classes(false)); - TestRunner::new()?.test_iter(suite()?.iter(), compiler(builder)).assert(); + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + // Without NFA shrinking, this test blows the default cache capacity. + .blacklist("expensive/regression-many-repeat-no-stack-overflow") + .test_iter(suite()?.iter(), compiler(builder)) + .assert(); Ok(()) } @@ -76,7 +139,12 @@ fn no_byte_classes() -> Result<()> { fn no_cache_clearing() -> Result<()> { let mut builder = Regex::builder(); builder.dfa(DFA::config().minimum_cache_clear_count(Some(0))); - TestRunner::new()?.test_iter(suite()?.iter(), compiler(builder)).assert(); + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + // Without NFA shrinking, this test blows the default cache capacity. + .blacklist("expensive/regression-many-repeat-no-stack-overflow") + .test_iter(suite()?.iter(), compiler(builder)) + .assert(); Ok(()) } @@ -86,27 +154,30 @@ fn min_cache_capacity() -> Result<()> { let mut builder = Regex::builder(); builder .dfa(DFA::config().cache_capacity(0).skip_cache_capacity_check(true)); - TestRunner::new()?.test_iter(suite()?.iter(), compiler(builder)).assert(); + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + .test_iter(suite()?.iter(), compiler(builder)) + .assert(); Ok(()) } fn compiler( mut builder: regex::Builder, -) -> impl FnMut(&RegexTest, &[BString]) -> Result<CompiledRegex> { +) -> impl FnMut(&RegexTest, &[String]) -> Result<CompiledRegex> { move |test, regexes| { - let regexes = regexes - .iter() - .map(|r| r.to_str().map(|s| s.to_string())) - .collect::<std::result::Result<Vec<String>, _>>()?; + // Parse regexes as HIRs for some analysis below. + let mut hirs = vec![]; + for pattern in regexes.iter() { + hirs.push(syntax::parse_with(pattern, &config_syntax(test))?); + } // Check if our regex contains things that aren't supported by DFAs. // That is, Unicode word boundaries when searching non-ASCII text. - let mut thompson = thompson::Builder::new(); - thompson.syntax(config_syntax(test)).configure(config_thompson(test)); - if let Ok(nfa) = thompson.build_many(®exes) { - let non_ascii = test.input().iter().any(|&b| !b.is_ascii()); - if nfa.has_word_boundary_unicode() && non_ascii { - return Ok(CompiledRegex::skip()); + if !test.haystack().is_ascii() { + for hir in hirs.iter() { + if hir.properties().look_set().contains_word_unicode() { + return Ok(CompiledRegex::skip()); + } } } if !configure_regex_builder(test, &mut builder) { @@ -114,7 +185,7 @@ fn compiler( } let re = builder.build_many(®exes)?; let mut cache = re.create_cache(); - Ok(CompiledRegex::compiled(move |test| -> Vec<TestResult> { + Ok(CompiledRegex::compiled(move |test| -> TestResult { run_test(&re, &mut cache, test) })) } @@ -124,50 +195,45 @@ fn run_test( re: &Regex, cache: &mut regex::Cache, test: &RegexTest, -) -> Vec<TestResult> { - let is_match = if re.is_match(cache, test.input()) { - TestResult::matched() - } else { - TestResult::no_match() - }; - let is_match = is_match.name("is_match"); - - let find_matches = match test.search_kind() { - TestSearchKind::Earliest => { - let it = re - .find_earliest_iter(cache, test.input()) - .take(test.match_limit().unwrap_or(std::usize::MAX)) - .map(|m| Match { - id: m.pattern().as_usize(), - start: m.start(), - end: m.end(), - }); - TestResult::matches(it).name("find_earliest_iter") - } - TestSearchKind::Leftmost => { - let it = re - .find_leftmost_iter(cache, test.input()) - .take(test.match_limit().unwrap_or(std::usize::MAX)) - .map(|m| Match { - id: m.pattern().as_usize(), - start: m.start(), - end: m.end(), - }); - TestResult::matches(it).name("find_leftmost_iter") +) -> TestResult { + let input = create_input(test); + match test.additional_name() { + "is_match" => { + TestResult::matched(re.is_match(cache, input.earliest(true))) } - TestSearchKind::Overlapping => { - let it = re - .find_overlapping_iter(cache, test.input()) - .take(test.match_limit().unwrap_or(std::usize::MAX)) - .map(|m| Match { - id: m.pattern().as_usize(), - start: m.start(), - end: m.end(), - }); - TestResult::matches(it).name("find_overlapping_iter") - } - }; - vec![is_match, find_matches] + "find" => match test.search_kind() { + SearchKind::Earliest | SearchKind::Leftmost => { + let input = + input.earliest(test.search_kind() == SearchKind::Earliest); + TestResult::matches( + re.find_iter(cache, input) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|m| Match { + id: m.pattern().as_usize(), + span: Span { start: m.start(), end: m.end() }, + }), + ) + } + SearchKind::Overlapping => { + try_search_overlapping(re, cache, &input).unwrap() + } + }, + "which" => match test.search_kind() { + SearchKind::Earliest | SearchKind::Leftmost => { + // There are no "which" APIs for standard searches. + TestResult::skip() + } + SearchKind::Overlapping => { + let dfa = re.forward(); + let cache = cache.as_parts_mut().0; + let mut patset = PatternSet::new(dfa.pattern_len()); + dfa.try_which_overlapping_matches(cache, &input, &mut patset) + .unwrap(); + TestResult::which(patset.iter().map(|p| p.as_usize())) + } + }, + name => TestResult::fail(&format!("unrecognized test name: {}", name)), + } } /// Configures the given regex builder with all relevant settings on the given @@ -179,34 +245,103 @@ fn configure_regex_builder( test: &RegexTest, builder: &mut regex::Builder, ) -> bool { - let match_kind = match test.match_kind() { - TestMatchKind::All => MatchKind::All, - TestMatchKind::LeftmostFirst => MatchKind::LeftmostFirst, - TestMatchKind::LeftmostLongest => return false, + let match_kind = match untestify_kind(test.match_kind()) { + None => return false, + Some(k) => k, }; - let dense_config = DFA::config() - .anchored(test.anchored()) - .match_kind(match_kind) - .unicode_word_boundary(true); - let regex_config = Regex::config().utf8(test.utf8()); + let mut dfa_config = + DFA::config().match_kind(match_kind).unicode_word_boundary(true); + // When doing an overlapping search, we might try to find the start of each + // match with a custom search routine. In that case, we need to tell the + // reverse search (for the start offset) which pattern to look for. The + // only way that API works is when anchored starting states are compiled + // for each pattern. This does technically also enable it for the forward + // DFA, but we're okay with that. + if test.search_kind() == SearchKind::Overlapping { + dfa_config = dfa_config.starts_for_each_pattern(true); + } builder - .configure(regex_config) .syntax(config_syntax(test)) .thompson(config_thompson(test)) - .dfa(dense_config); + .dfa(dfa_config); true } /// Configuration of a Thompson NFA compiler from a regex test. fn config_thompson(test: &RegexTest) -> thompson::Config { - thompson::Config::new().utf8(test.utf8()) + let mut lookm = regex_automata::util::look::LookMatcher::new(); + lookm.set_line_terminator(test.line_terminator()); + thompson::Config::new().utf8(test.utf8()).look_matcher(lookm) } /// Configuration of the regex parser from a regex test. -fn config_syntax(test: &RegexTest) -> SyntaxConfig { - SyntaxConfig::new() +fn config_syntax(test: &RegexTest) -> syntax::Config { + syntax::Config::new() .case_insensitive(test.case_insensitive()) .unicode(test.unicode()) .utf8(test.utf8()) + .line_terminator(test.line_terminator()) +} + +/// Execute an overlapping search, and for each match found, also find its +/// overlapping starting positions. +/// +/// N.B. This routine used to be part of the crate API, but 1) it wasn't clear +/// to me how useful it was and 2) it wasn't clear to me what its semantics +/// should be. In particular, a potentially surprising footgun of this routine +/// that it is worst case *quadratic* in the size of the haystack. Namely, it's +/// possible to report a match at every position, and for every such position, +/// scan all the way to the beginning of the haystack to find the starting +/// position. Typical leftmost non-overlapping searches don't suffer from this +/// because, well, matches can't overlap. So subsequent searches after a match +/// is found don't revisit previously scanned parts of the haystack. +/// +/// Its semantics can be strange for other reasons too. For example, given +/// the regex '.*' and the haystack 'zz', the full set of overlapping matches +/// is: [0, 0], [1, 1], [0, 1], [2, 2], [1, 2], [0, 2]. The ordering of +/// those matches is quite strange, but makes sense when you think about the +/// implementation: an end offset is found left-to-right, and then one or more +/// starting offsets are found right-to-left. +/// +/// Nevertheless, we provide this routine in our test suite because it's +/// useful to test the low level DFA overlapping search and our test suite +/// is written in a way that requires starting offsets. +fn try_search_overlapping( + re: &Regex, + cache: &mut regex::Cache, + input: &Input<'_>, +) -> Result<TestResult> { + let mut matches = vec![]; + let mut fwd_state = OverlappingState::start(); + let (fwd_dfa, rev_dfa) = (re.forward(), re.reverse()); + let (fwd_cache, rev_cache) = cache.as_parts_mut(); + while let Some(end) = { + fwd_dfa.try_search_overlapping_fwd( + fwd_cache, + input, + &mut fwd_state, + )?; + fwd_state.get_match() + } { + let revsearch = input + .clone() + .range(input.start()..end.offset()) + .anchored(Anchored::Pattern(end.pattern())) + .earliest(false); + let mut rev_state = OverlappingState::start(); + while let Some(start) = { + rev_dfa.try_search_overlapping_rev( + rev_cache, + &revsearch, + &mut rev_state, + )?; + rev_state.get_match() + } { + let span = Span { start: start.offset(), end: end.offset() }; + let mat = Match { id: end.pattern().as_usize(), span }; + matches.push(mat); + } + } + Ok(TestResult::matches(matches)) } diff --git a/vendor/regex-automata/tests/lib.rs b/vendor/regex-automata/tests/lib.rs new file mode 100644 index 000000000..1465e51eb --- /dev/null +++ b/vendor/regex-automata/tests/lib.rs @@ -0,0 +1,114 @@ +// We have a similar config in the regex-automata crate root. Basically, it is +// just too annoying to deal with dead code when a subset of features is +// enabled. +#![cfg_attr( + not(all( + feature = "std", + feature = "nfa", + feature = "dfa", + feature = "hybrid", + feature = "perf-literal-substring", + feature = "perf-literal-multisubstring", + )), + allow(dead_code, unused_imports, unused_variables) +)] +// Similar deal with Miri. Just let dead code warnings be. +#![cfg_attr(miri, allow(dead_code, unused_imports, unused_variables))] + +#[cfg(any(feature = "dfa-search", feature = "dfa-onepass"))] +mod dfa; +#[cfg(feature = "dfa-search")] +mod fuzz; +#[cfg(feature = "dfa-search")] +mod gen; +#[cfg(feature = "hybrid")] +mod hybrid; +#[cfg(feature = "meta")] +mod meta; +#[cfg(any(feature = "nfa-backtrack", feature = "nfa-pikevm"))] +mod nfa; + +fn suite() -> anyhow::Result<regex_test::RegexTests> { + let _ = env_logger::try_init(); + + let mut tests = regex_test::RegexTests::new(); + macro_rules! load { + ($name:expr) => {{ + const DATA: &[u8] = + include_bytes!(concat!("../../testdata/", $name, ".toml")); + tests.load_slice($name, DATA)?; + }}; + } + + load!("anchored"); + load!("bytes"); + load!("crazy"); + load!("crlf"); + load!("earliest"); + load!("empty"); + load!("expensive"); + load!("flags"); + load!("iter"); + load!("leftmost-all"); + load!("line-terminator"); + load!("misc"); + load!("multiline"); + load!("no-unicode"); + load!("overlapping"); + load!("regression"); + load!("set"); + load!("substring"); + load!("unicode"); + load!("utf8"); + load!("word-boundary"); + load!("fowler/basic"); + load!("fowler/nullsubexpr"); + load!("fowler/repetition"); + + Ok(tests) +} + +/// Configure a regex_automata::Input with the given test configuration. +fn create_input<'h>( + test: &'h regex_test::RegexTest, +) -> regex_automata::Input<'h> { + use regex_automata::Anchored; + + let bounds = test.bounds(); + let anchored = if test.anchored() { Anchored::Yes } else { Anchored::No }; + regex_automata::Input::new(test.haystack()) + .range(bounds.start..bounds.end) + .anchored(anchored) +} + +/// Convert capture matches into the test suite's capture values. +/// +/// The given captures must represent a valid match, where the first capturing +/// group has a non-None span. Otherwise this panics. +fn testify_captures( + caps: ®ex_automata::util::captures::Captures, +) -> regex_test::Captures { + assert!(caps.is_match(), "expected captures to represent a match"); + let spans = caps.iter().map(|group| { + group.map(|m| regex_test::Span { start: m.start, end: m.end }) + }); + // These unwraps are OK because we assume our 'caps' represents a match, + // and a match always gives a non-zero number of groups with the first + // group being non-None. + regex_test::Captures::new(caps.pattern().unwrap().as_usize(), spans) + .unwrap() +} + +/// Convert a test harness match kind to a regex-automata match kind. If +/// regex-automata doesn't support the harness kind, then `None` is returned. +fn untestify_kind( + kind: regex_test::MatchKind, +) -> Option<regex_automata::MatchKind> { + match kind { + regex_test::MatchKind::All => Some(regex_automata::MatchKind::All), + regex_test::MatchKind::LeftmostFirst => { + Some(regex_automata::MatchKind::LeftmostFirst) + } + regex_test::MatchKind::LeftmostLongest => None, + } +} diff --git a/vendor/regex-automata/tests/meta/mod.rs b/vendor/regex-automata/tests/meta/mod.rs new file mode 100644 index 000000000..9d6ab475e --- /dev/null +++ b/vendor/regex-automata/tests/meta/mod.rs @@ -0,0 +1,2 @@ +#[cfg(not(miri))] +mod suite; diff --git a/vendor/regex-automata/tests/meta/suite.rs b/vendor/regex-automata/tests/meta/suite.rs new file mode 100644 index 000000000..20f97b4bb --- /dev/null +++ b/vendor/regex-automata/tests/meta/suite.rs @@ -0,0 +1,200 @@ +use { + anyhow::Result, + regex_automata::{ + meta::{self, Regex}, + util::syntax, + MatchKind, PatternSet, + }, + regex_test::{ + CompiledRegex, Match, RegexTest, SearchKind, Span, TestResult, + TestRunner, + }, +}; + +use crate::{create_input, suite, testify_captures}; + +const BLACKLIST: &[&str] = &[ + // These 'earliest' tests are blacklisted because the meta searcher doesn't + // give the same offsets that the test expects. This is legal because the + // 'earliest' routines don't guarantee a particular match offset other + // than "the earliest the regex engine can report a match." Some regex + // engines will quit earlier than others. The backtracker, for example, + // can't really quit before finding the full leftmost-first match. Many of + // the literal searchers also don't have the ability to quit fully or it's + // otherwise not worth doing. (A literal searcher not quitting as early as + // possible usually means looking at a few more bytes. That's no biggie.) + "earliest/", +]; + +/// Tests the default configuration of the meta regex engine. +#[test] +fn default() -> Result<()> { + let builder = Regex::builder(); + let mut runner = TestRunner::new()?; + runner + .expand(&["is_match", "find", "captures"], |test| test.compiles()) + .blacklist_iter(BLACKLIST) + .test_iter(suite()?.iter(), compiler(builder)) + .assert(); + Ok(()) +} + +/// Tests the default configuration minus the full DFA. +#[test] +fn no_dfa() -> Result<()> { + let mut builder = Regex::builder(); + builder.configure(Regex::config().dfa(false)); + let mut runner = TestRunner::new()?; + runner + .expand(&["is_match", "find", "captures"], |test| test.compiles()) + .blacklist_iter(BLACKLIST) + .test_iter(suite()?.iter(), compiler(builder)) + .assert(); + Ok(()) +} + +/// Tests the default configuration minus the full DFA and lazy DFA. +#[test] +fn no_dfa_hybrid() -> Result<()> { + let mut builder = Regex::builder(); + builder.configure(Regex::config().dfa(false).hybrid(false)); + let mut runner = TestRunner::new()?; + runner + .expand(&["is_match", "find", "captures"], |test| test.compiles()) + .blacklist_iter(BLACKLIST) + .test_iter(suite()?.iter(), compiler(builder)) + .assert(); + Ok(()) +} + +/// Tests the default configuration minus the full DFA, lazy DFA and one-pass +/// DFA. +#[test] +fn no_dfa_hybrid_onepass() -> Result<()> { + let mut builder = Regex::builder(); + builder.configure(Regex::config().dfa(false).hybrid(false).onepass(false)); + let mut runner = TestRunner::new()?; + runner + .expand(&["is_match", "find", "captures"], |test| test.compiles()) + .blacklist_iter(BLACKLIST) + .test_iter(suite()?.iter(), compiler(builder)) + .assert(); + Ok(()) +} + +/// Tests the default configuration minus the full DFA, lazy DFA, one-pass +/// DFA and backtracker. +#[test] +fn no_dfa_hybrid_onepass_backtrack() -> Result<()> { + let mut builder = Regex::builder(); + builder.configure( + Regex::config() + .dfa(false) + .hybrid(false) + .onepass(false) + .backtrack(false), + ); + let mut runner = TestRunner::new()?; + runner + .expand(&["is_match", "find", "captures"], |test| test.compiles()) + .blacklist_iter(BLACKLIST) + .test_iter(suite()?.iter(), compiler(builder)) + .assert(); + Ok(()) +} + +fn compiler( + mut builder: meta::Builder, +) -> impl FnMut(&RegexTest, &[String]) -> Result<CompiledRegex> { + move |test, regexes| { + if !configure_meta_builder(test, &mut builder) { + return Ok(CompiledRegex::skip()); + } + let re = builder.build_many(®exes)?; + Ok(CompiledRegex::compiled(move |test| -> TestResult { + run_test(&re, test) + })) + } +} + +fn run_test(re: &Regex, test: &RegexTest) -> TestResult { + let input = create_input(test); + match test.additional_name() { + "is_match" => TestResult::matched(re.is_match(input)), + "find" => match test.search_kind() { + SearchKind::Earliest => TestResult::matches( + re.find_iter(input.earliest(true)) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|m| Match { + id: m.pattern().as_usize(), + span: Span { start: m.start(), end: m.end() }, + }), + ), + SearchKind::Leftmost => TestResult::matches( + re.find_iter(input) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|m| Match { + id: m.pattern().as_usize(), + span: Span { start: m.start(), end: m.end() }, + }), + ), + SearchKind::Overlapping => { + let mut patset = PatternSet::new(re.pattern_len()); + re.which_overlapping_matches(&input, &mut patset); + TestResult::which(patset.iter().map(|p| p.as_usize())) + } + }, + "captures" => match test.search_kind() { + SearchKind::Earliest => { + let it = re + .captures_iter(input.earliest(true)) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|caps| testify_captures(&caps)); + TestResult::captures(it) + } + SearchKind::Leftmost => { + let it = re + .captures_iter(input) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|caps| testify_captures(&caps)); + TestResult::captures(it) + } + SearchKind::Overlapping => { + // There is no overlapping regex API that supports captures. + TestResult::skip() + } + }, + name => TestResult::fail(&format!("unrecognized test name: {}", name)), + } +} + +/// Configures the given regex builder with all relevant settings on the given +/// regex test. +/// +/// If the regex test has a setting that is unsupported, then this returns +/// false (implying the test should be skipped). +fn configure_meta_builder( + test: &RegexTest, + builder: &mut meta::Builder, +) -> bool { + let match_kind = match test.match_kind() { + regex_test::MatchKind::All => MatchKind::All, + regex_test::MatchKind::LeftmostFirst => MatchKind::LeftmostFirst, + regex_test::MatchKind::LeftmostLongest => return false, + }; + let meta_config = Regex::config() + .match_kind(match_kind) + .utf8_empty(test.utf8()) + .line_terminator(test.line_terminator()); + builder.configure(meta_config).syntax(config_syntax(test)); + true +} + +/// Configuration of the regex parser from a regex test. +fn config_syntax(test: &RegexTest) -> syntax::Config { + syntax::Config::new() + .case_insensitive(test.case_insensitive()) + .unicode(test.unicode()) + .utf8(test.utf8()) + .line_terminator(test.line_terminator()) +} diff --git a/vendor/regex-automata/tests/nfa/thompson/backtrack/mod.rs b/vendor/regex-automata/tests/nfa/thompson/backtrack/mod.rs new file mode 100644 index 000000000..9d6ab475e --- /dev/null +++ b/vendor/regex-automata/tests/nfa/thompson/backtrack/mod.rs @@ -0,0 +1,2 @@ +#[cfg(not(miri))] +mod suite; diff --git a/vendor/regex-automata/tests/nfa/thompson/backtrack/suite.rs b/vendor/regex-automata/tests/nfa/thompson/backtrack/suite.rs new file mode 100644 index 000000000..bce0eef40 --- /dev/null +++ b/vendor/regex-automata/tests/nfa/thompson/backtrack/suite.rs @@ -0,0 +1,213 @@ +use { + anyhow::Result, + regex_automata::{ + nfa::thompson::{ + self, + backtrack::{self, BoundedBacktracker}, + NFA, + }, + util::{prefilter::Prefilter, syntax}, + Input, + }, + regex_test::{ + CompiledRegex, Match, MatchKind, RegexTest, SearchKind, Span, + TestResult, TestRunner, + }, +}; + +use crate::{create_input, suite, testify_captures}; + +/// Tests the default configuration of the bounded backtracker. +#[test] +fn default() -> Result<()> { + let builder = BoundedBacktracker::builder(); + let mut runner = TestRunner::new()?; + runner.expand(&["is_match", "find", "captures"], |test| test.compiles()); + // At the time of writing, every regex search in the test suite fits + // into the backtracker's default visited capacity (except for the + // blacklisted tests below). If regexes are added that blow that capacity, + // then they should be blacklisted here. A tempting alternative is to + // automatically skip them by checking the haystack length against + // BoundedBacktracker::max_haystack_len, but that could wind up hiding + // interesting failure modes. e.g., If the visited capacity is somehow + // wrong or smaller than it should be. + runner.blacklist("expensive/backtrack-blow-visited-capacity"); + runner.test_iter(suite()?.iter(), compiler(builder)).assert(); + Ok(()) +} + +/// Tests the backtracker with prefilters enabled. +#[test] +fn prefilter() -> Result<()> { + let my_compiler = |test: &RegexTest, regexes: &[String]| { + // Parse regexes as HIRs so we can get literals to build a prefilter. + let mut hirs = vec![]; + for pattern in regexes.iter() { + hirs.push(syntax::parse_with(pattern, &config_syntax(test))?); + } + // We can always select leftmost-first here because the backtracker + // only supports leftmost-first matching. + let pre = Prefilter::from_hirs_prefix( + regex_automata::MatchKind::LeftmostFirst, + &hirs, + ); + let mut builder = BoundedBacktracker::builder(); + builder.configure(BoundedBacktracker::config().prefilter(pre)); + compiler(builder)(test, regexes) + }; + let mut runner = TestRunner::new()?; + runner.expand(&["is_match", "find", "captures"], |test| test.compiles()); + runner.blacklist("expensive/backtrack-blow-visited-capacity"); + runner.test_iter(suite()?.iter(), my_compiler).assert(); + Ok(()) +} + +/// Tests the bounded backtracker when its visited capacity is set to its +/// minimum amount. +#[test] +fn min_visited_capacity() -> Result<()> { + let mut runner = TestRunner::new()?; + runner.expand(&["is_match", "find", "captures"], |test| test.compiles()); + runner + .test_iter(suite()?.iter(), move |test, regexes| { + let nfa = NFA::compiler() + .configure(config_thompson(test)) + .syntax(config_syntax(test)) + .build_many(®exes)?; + let mut builder = BoundedBacktracker::builder(); + if !configure_backtrack_builder(test, &mut builder) { + return Ok(CompiledRegex::skip()); + } + // Setup the bounded backtracker so that its visited capacity is + // the absolute minimum required for the test's haystack. + builder.configure(BoundedBacktracker::config().visited_capacity( + backtrack::min_visited_capacity( + &nfa, + &Input::new(test.haystack()), + ), + )); + + let re = builder.build_from_nfa(nfa)?; + let mut cache = re.create_cache(); + Ok(CompiledRegex::compiled(move |test| -> TestResult { + run_test(&re, &mut cache, test) + })) + }) + .assert(); + Ok(()) +} + +fn compiler( + mut builder: backtrack::Builder, +) -> impl FnMut(&RegexTest, &[String]) -> Result<CompiledRegex> { + move |test, regexes| { + if !configure_backtrack_builder(test, &mut builder) { + return Ok(CompiledRegex::skip()); + } + let re = builder.build_many(®exes)?; + let mut cache = re.create_cache(); + Ok(CompiledRegex::compiled(move |test| -> TestResult { + run_test(&re, &mut cache, test) + })) + } +} + +fn run_test( + re: &BoundedBacktracker, + cache: &mut backtrack::Cache, + test: &RegexTest, +) -> TestResult { + let input = create_input(test); + match test.additional_name() { + "is_match" => match test.search_kind() { + SearchKind::Earliest | SearchKind::Overlapping => { + TestResult::skip() + } + SearchKind::Leftmost => { + let input = input.earliest(true); + TestResult::matched(re.try_is_match(cache, input).unwrap()) + } + }, + "find" => match test.search_kind() { + SearchKind::Earliest | SearchKind::Overlapping => { + TestResult::skip() + } + SearchKind::Leftmost => TestResult::matches( + re.try_find_iter(cache, input) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|result| result.unwrap()) + .map(|m| Match { + id: m.pattern().as_usize(), + span: Span { start: m.start(), end: m.end() }, + }), + ), + }, + "captures" => match test.search_kind() { + SearchKind::Earliest | SearchKind::Overlapping => { + TestResult::skip() + } + SearchKind::Leftmost => TestResult::captures( + re.try_captures_iter(cache, input) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|result| result.unwrap()) + .map(|caps| testify_captures(&caps)), + ), + }, + name => TestResult::fail(&format!("unrecognized test name: {}", name)), + } +} + +/// Configures the given regex builder with all relevant settings on the given +/// regex test. +/// +/// If the regex test has a setting that is unsupported, then this returns +/// false (implying the test should be skipped). +fn configure_backtrack_builder( + test: &RegexTest, + builder: &mut backtrack::Builder, +) -> bool { + match (test.search_kind(), test.match_kind()) { + // For testing the standard search APIs. This is the only supported + // configuration for the backtracker. + (SearchKind::Leftmost, MatchKind::LeftmostFirst) => {} + // Overlapping APIs not supported at all for backtracker. + (SearchKind::Overlapping, _) => return false, + // Backtracking doesn't really support the notion of 'earliest'. + // Namely, backtracking already works by returning as soon as it knows + // it has found a match. It just so happens that this corresponds to + // the standard 'leftmost' formulation. + // + // The 'earliest' definition in this crate does indeed permit this + // behavior, so this is "fine," but our test suite specifically looks + // for the earliest position at which a match is known, which our + // finite automata based regex engines have no problem providing. So + // for backtracking, we just skip these tests. + (SearchKind::Earliest, _) => return false, + // For backtracking, 'all' semantics don't really make sense. + (_, MatchKind::All) => return false, + // Not supported at all in regex-automata. + (_, MatchKind::LeftmostLongest) => return false, + }; + let backtrack_config = BoundedBacktracker::config(); + builder + .configure(backtrack_config) + .syntax(config_syntax(test)) + .thompson(config_thompson(test)); + true +} + +/// Configuration of a Thompson NFA compiler from a regex test. +fn config_thompson(test: &RegexTest) -> thompson::Config { + let mut lookm = regex_automata::util::look::LookMatcher::new(); + lookm.set_line_terminator(test.line_terminator()); + thompson::Config::new().utf8(test.utf8()).look_matcher(lookm) +} + +/// Configuration of the regex parser from a regex test. +fn config_syntax(test: &RegexTest) -> syntax::Config { + syntax::Config::new() + .case_insensitive(test.case_insensitive()) + .unicode(test.unicode()) + .utf8(test.utf8()) + .line_terminator(test.line_terminator()) +} diff --git a/vendor/regex-automata/tests/nfa/thompson/mod.rs b/vendor/regex-automata/tests/nfa/thompson/mod.rs index 3a03f52ce..b2558f704 100644 --- a/vendor/regex-automata/tests/nfa/thompson/mod.rs +++ b/vendor/regex-automata/tests/nfa/thompson/mod.rs @@ -1 +1,4 @@ +#[cfg(feature = "nfa-backtrack")] +mod backtrack; +#[cfg(feature = "nfa-pikevm")] mod pikevm; diff --git a/vendor/regex-automata/tests/nfa/thompson/pikevm/api.rs b/vendor/regex-automata/tests/nfa/thompson/pikevm/api.rs deleted file mode 100644 index c8199f709..000000000 --- a/vendor/regex-automata/tests/nfa/thompson/pikevm/api.rs +++ /dev/null @@ -1,191 +0,0 @@ -/* -use std::error::Error; - -use regex_automata::{ - hybrid::{ - dfa::{self, DFA}, - regex::Regex, - OverlappingState, - }, - nfa::thompson, - HalfMatch, MatchError, MatchKind, MultiMatch, -}; - -use crate::util::{BunkPrefilter, SubstringPrefilter}; - -// Tests that too many cache resets cause the lazy DFA to quit. -#[test] -fn too_many_cache_resets_cause_quit() -> Result<(), Box<dyn Error>> { - // This is a carefully chosen regex. The idea is to pick one that requires - // some decent number of states (hence the bounded repetition). But we - // specifically choose to create a class with an ASCII letter and a - // non-ASCII letter so that we can check that no new states are created - // once the cache is full. Namely, if we fill up the cache on a haystack - // of 'a's, then in order to match one 'β', a new state will need to be - // created since a 'β' is encoded with multiple bytes. Since there's no - // room for this state, the search should quit at the very first position. - let pattern = r"[aβ]{100}"; - let dfa = DFA::builder() - .configure( - // Configure it so that we have the minimum cache capacity - // possible. And that if any resets occur, the search quits. - DFA::config() - .skip_cache_capacity_check(true) - .cache_capacity(0) - .minimum_cache_clear_count(Some(0)), - ) - .build(pattern)?; - let mut cache = dfa.create_cache(); - - let haystack = "a".repeat(101).into_bytes(); - let err = MatchError::GaveUp { offset: 25 }; - assert_eq!(dfa.find_earliest_fwd(&mut cache, &haystack), Err(err.clone())); - assert_eq!(dfa.find_leftmost_fwd(&mut cache, &haystack), Err(err.clone())); - assert_eq!( - dfa.find_overlapping_fwd( - &mut cache, - &haystack, - &mut OverlappingState::start() - ), - Err(err.clone()) - ); - - let haystack = "β".repeat(101).into_bytes(); - let err = MatchError::GaveUp { offset: 0 }; - assert_eq!(dfa.find_earliest_fwd(&mut cache, &haystack), Err(err)); - // no need to test that other find routines quit, since we did that above - - // OK, if we reset the cache, then we should be able to create more states - // and make more progress with searching for betas. - cache.reset(&dfa); - let err = MatchError::GaveUp { offset: 26 }; - assert_eq!(dfa.find_earliest_fwd(&mut cache, &haystack), Err(err)); - - // ... switching back to ASCII still makes progress since it just needs to - // set transitions on existing states! - let haystack = "a".repeat(101).into_bytes(); - let err = MatchError::GaveUp { offset: 13 }; - assert_eq!(dfa.find_earliest_fwd(&mut cache, &haystack), Err(err)); - - Ok(()) -} - -// Tests that quit bytes in the forward direction work correctly. -#[test] -fn quit_fwd() -> Result<(), Box<dyn Error>> { - let dfa = DFA::builder() - .configure(DFA::config().quit(b'x', true)) - .build("[[:word:]]+$")?; - let mut cache = dfa.create_cache(); - - assert_eq!( - dfa.find_earliest_fwd(&mut cache, b"abcxyz"), - Err(MatchError::Quit { byte: b'x', offset: 3 }) - ); - assert_eq!( - dfa.find_leftmost_fwd(&mut cache, b"abcxyz"), - Err(MatchError::Quit { byte: b'x', offset: 3 }) - ); - assert_eq!( - dfa.find_overlapping_fwd( - &mut cache, - b"abcxyz", - &mut OverlappingState::start() - ), - Err(MatchError::Quit { byte: b'x', offset: 3 }) - ); - - Ok(()) -} - -// Tests that quit bytes in the reverse direction work correctly. -#[test] -fn quit_rev() -> Result<(), Box<dyn Error>> { - let dfa = DFA::builder() - .configure(DFA::config().quit(b'x', true)) - .thompson(thompson::Config::new().reverse(true)) - .build("^[[:word:]]+")?; - let mut cache = dfa.create_cache(); - - assert_eq!( - dfa.find_earliest_rev(&mut cache, b"abcxyz"), - Err(MatchError::Quit { byte: b'x', offset: 3 }) - ); - assert_eq!( - dfa.find_leftmost_rev(&mut cache, b"abcxyz"), - Err(MatchError::Quit { byte: b'x', offset: 3 }) - ); - - Ok(()) -} - -// Tests that if we heuristically enable Unicode word boundaries but then -// instruct that a non-ASCII byte should NOT be a quit byte, then the builder -// will panic. -#[test] -#[should_panic] -fn quit_panics() { - DFA::config().unicode_word_boundary(true).quit(b'\xFF', false); -} - -// This tests an intesting case where even if the Unicode word boundary option -// is disabled, setting all non-ASCII bytes to be quit bytes will cause Unicode -// word boundaries to be enabled. -#[test] -fn unicode_word_implicitly_works() -> Result<(), Box<dyn Error>> { - let mut config = DFA::config(); - for b in 0x80..=0xFF { - config = config.quit(b, true); - } - let dfa = DFA::builder().configure(config).build(r"\b")?; - let mut cache = dfa.create_cache(); - let expected = HalfMatch::must(0, 1); - assert_eq!(dfa.find_leftmost_fwd(&mut cache, b" a"), Ok(Some(expected))); - Ok(()) -} - -// Tests that we can provide a prefilter to a Regex, and the search reports -// correct results. -#[test] -fn prefilter_works() -> Result<(), Box<dyn Error>> { - let mut re = Regex::new(r"a[0-9]+").unwrap(); - re.set_prefilter(Some(Box::new(SubstringPrefilter::new("a")))); - let mut cache = re.create_cache(); - - let text = b"foo abc foo a1a2a3 foo a123 bar aa456"; - let matches: Vec<(usize, usize)> = re - .find_leftmost_iter(&mut cache, text) - .map(|m| (m.start(), m.end())) - .collect(); - assert_eq!( - matches, - vec![(12, 14), (14, 16), (16, 18), (23, 27), (33, 37),] - ); - Ok(()) -} - -// This test confirms that a prefilter is active by using a prefilter that -// reports false negatives. -#[test] -fn prefilter_is_active() -> Result<(), Box<dyn Error>> { - let text = b"za123"; - let mut re = Regex::new(r"a[0-9]+").unwrap(); - let mut cache = re.create_cache(); - - re.set_prefilter(Some(Box::new(SubstringPrefilter::new("a")))); - assert_eq!( - re.find_leftmost(&mut cache, b"za123"), - Some(MultiMatch::must(0, 1, 5)) - ); - assert_eq!( - re.find_leftmost(&mut cache, b"a123"), - Some(MultiMatch::must(0, 0, 4)) - ); - re.set_prefilter(Some(Box::new(BunkPrefilter::new()))); - assert_eq!(re.find_leftmost(&mut cache, b"za123"), None); - // This checks that the prefilter is used when first starting the search, - // instead of waiting until at least one transition has occurred. - assert_eq!(re.find_leftmost(&mut cache, b"a123"), None); - Ok(()) -} -*/ diff --git a/vendor/regex-automata/tests/nfa/thompson/pikevm/mod.rs b/vendor/regex-automata/tests/nfa/thompson/pikevm/mod.rs index f4299510c..9d6ab475e 100644 --- a/vendor/regex-automata/tests/nfa/thompson/pikevm/mod.rs +++ b/vendor/regex-automata/tests/nfa/thompson/pikevm/mod.rs @@ -1,2 +1,2 @@ -mod api; +#[cfg(not(miri))] mod suite; diff --git a/vendor/regex-automata/tests/nfa/thompson/pikevm/suite.rs b/vendor/regex-automata/tests/nfa/thompson/pikevm/suite.rs index e5505d59a..d32842a15 100644 --- a/vendor/regex-automata/tests/nfa/thompson/pikevm/suite.rs +++ b/vendor/regex-automata/tests/nfa/thompson/pikevm/suite.rs @@ -1,42 +1,65 @@ -use regex_automata::{ - nfa::thompson::{ - self, - pikevm::{self, PikeVM}, +use { + anyhow::Result, + regex_automata::{ + nfa::thompson::{ + self, + pikevm::{self, PikeVM}, + }, + util::{prefilter::Prefilter, syntax}, + PatternSet, + }, + regex_test::{ + CompiledRegex, Match, RegexTest, SearchKind, Span, TestResult, + TestRunner, }, - MatchKind, SyntaxConfig, -}; -use regex_syntax as syntax; - -use regex_test::{ - bstr::{BString, ByteSlice}, - CompiledRegex, Match, MatchKind as TestMatchKind, RegexTest, RegexTests, - SearchKind as TestSearchKind, TestResult, TestRunner, }; -use crate::{suite, Result}; +use crate::{create_input, suite, testify_captures, untestify_kind}; /// Tests the default configuration of the hybrid NFA/DFA. #[test] fn default() -> Result<()> { let builder = PikeVM::builder(); - TestRunner::new()?.test_iter(suite()?.iter(), compiler(builder)).assert(); + let mut runner = TestRunner::new()?; + runner.expand(&["is_match", "find", "captures"], |test| test.compiles()); + runner.test_iter(suite()?.iter(), compiler(builder)).assert(); + Ok(()) +} + +/// Tests the PikeVM with prefilters enabled. +#[test] +fn prefilter() -> Result<()> { + let my_compiler = |test: &RegexTest, regexes: &[String]| { + // Parse regexes as HIRs so we can get literals to build a prefilter. + let mut hirs = vec![]; + for pattern in regexes.iter() { + hirs.push(syntax::parse_with(pattern, &config_syntax(test))?); + } + let kind = match untestify_kind(test.match_kind()) { + None => return Ok(CompiledRegex::skip()), + Some(kind) => kind, + }; + let pre = Prefilter::from_hirs_prefix(kind, &hirs); + let mut builder = PikeVM::builder(); + builder.configure(PikeVM::config().prefilter(pre)); + compiler(builder)(test, regexes) + }; + let mut runner = TestRunner::new()?; + runner.expand(&["is_match", "find", "captures"], |test| test.compiles()); + runner.test_iter(suite()?.iter(), my_compiler).assert(); Ok(()) } fn compiler( mut builder: pikevm::Builder, -) -> impl FnMut(&RegexTest, &[BString]) -> Result<CompiledRegex> { +) -> impl FnMut(&RegexTest, &[String]) -> Result<CompiledRegex> { move |test, regexes| { - let regexes = regexes - .iter() - .map(|r| r.to_str().map(|s| s.to_string())) - .collect::<std::result::Result<Vec<String>, _>>()?; if !configure_pikevm_builder(test, &mut builder) { return Ok(CompiledRegex::skip()); } let re = builder.build_many(®exes)?; let mut cache = re.create_cache(); - Ok(CompiledRegex::compiled(move |test| -> Vec<TestResult> { + Ok(CompiledRegex::compiled(move |test| -> TestResult { run_test(&re, &mut cache, test) })) } @@ -46,35 +69,59 @@ fn run_test( re: &PikeVM, cache: &mut pikevm::Cache, test: &RegexTest, -) -> Vec<TestResult> { - // let is_match = if re.is_match(cache, test.input()) { - // TestResult::matched() - // } else { - // TestResult::no_match() - // }; - // let is_match = is_match.name("is_match"); - - let find_matches = match test.search_kind() { - TestSearchKind::Earliest => { - TestResult::skip().name("find_earliest_iter") - } - TestSearchKind::Leftmost => { - let it = re - .find_leftmost_iter(cache, test.input()) - .take(test.match_limit().unwrap_or(std::usize::MAX)) - .map(|m| Match { - id: m.pattern().as_usize(), - start: m.start(), - end: m.end(), - }); - TestResult::matches(it).name("find_leftmost_iter") - } - TestSearchKind::Overlapping => { - TestResult::skip().name("find_overlapping_iter") - } - }; - // vec![is_match, find_matches] - vec![find_matches] +) -> TestResult { + let input = create_input(test); + match test.additional_name() { + "is_match" => TestResult::matched(re.is_match(cache, input)), + "find" => match test.search_kind() { + SearchKind::Earliest => { + let it = re + .find_iter(cache, input.earliest(true)) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|m| Match { + id: m.pattern().as_usize(), + span: Span { start: m.start(), end: m.end() }, + }); + TestResult::matches(it) + } + SearchKind::Leftmost => { + let it = re + .find_iter(cache, input) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|m| Match { + id: m.pattern().as_usize(), + span: Span { start: m.start(), end: m.end() }, + }); + TestResult::matches(it) + } + SearchKind::Overlapping => { + let mut patset = PatternSet::new(re.get_nfa().pattern_len()); + re.which_overlapping_matches(cache, &input, &mut patset); + TestResult::which(patset.iter().map(|p| p.as_usize())) + } + }, + "captures" => match test.search_kind() { + SearchKind::Earliest => { + let it = re + .captures_iter(cache, input.earliest(true)) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|caps| testify_captures(&caps)); + TestResult::captures(it) + } + SearchKind::Leftmost => { + let it = re + .captures_iter(cache, input) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|caps| testify_captures(&caps)); + TestResult::captures(it) + } + SearchKind::Overlapping => { + // There is no overlapping PikeVM API that supports captures. + TestResult::skip() + } + }, + name => TestResult::fail(&format!("unrecognized test name: {}", name)), + } } /// Configures the given regex builder with all relevant settings on the given @@ -86,8 +133,11 @@ fn configure_pikevm_builder( test: &RegexTest, builder: &mut pikevm::Builder, ) -> bool { - let pikevm_config = - PikeVM::config().anchored(test.anchored()).utf8(test.utf8()); + let match_kind = match untestify_kind(test.match_kind()) { + None => return false, + Some(k) => k, + }; + let pikevm_config = PikeVM::config().match_kind(match_kind); builder .configure(pikevm_config) .syntax(config_syntax(test)) @@ -97,13 +147,16 @@ fn configure_pikevm_builder( /// Configuration of a Thompson NFA compiler from a regex test. fn config_thompson(test: &RegexTest) -> thompson::Config { - thompson::Config::new().utf8(test.utf8()) + let mut lookm = regex_automata::util::look::LookMatcher::new(); + lookm.set_line_terminator(test.line_terminator()); + thompson::Config::new().utf8(test.utf8()).look_matcher(lookm) } /// Configuration of the regex parser from a regex test. -fn config_syntax(test: &RegexTest) -> SyntaxConfig { - SyntaxConfig::new() +fn config_syntax(test: &RegexTest) -> syntax::Config { + syntax::Config::new() .case_insensitive(test.case_insensitive()) .unicode(test.unicode()) .utf8(test.utf8()) + .line_terminator(test.line_terminator()) } diff --git a/vendor/regex-automata/tests/tests.rs b/vendor/regex-automata/tests/tests.rs deleted file mode 100644 index e4728470c..000000000 --- a/vendor/regex-automata/tests/tests.rs +++ /dev/null @@ -1,44 +0,0 @@ -#![allow(warnings)] - -use regex_test::RegexTests; - -mod dfa; -mod hybrid; -mod nfa; -mod regression; -mod util; - -type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>; - -fn suite() -> Result<RegexTests> { - let mut tests = RegexTests::new(); - macro_rules! load { - ($name:expr) => {{ - const DATA: &[u8] = - include_bytes!(concat!("data/", $name, ".toml")); - tests.load_slice($name, DATA)?; - }}; - } - - load!("bytes"); - load!("crazy"); - load!("earliest"); - load!("empty"); - load!("expensive"); - load!("flags"); - load!("iter"); - load!("misc"); - load!("multiline"); - load!("no-unicode"); - load!("overlapping"); - load!("regression"); - load!("set"); - load!("unicode"); - load!("word-boundary"); - load!("fowler/basic"); - load!("fowler/nullsubexpr"); - load!("fowler/repetition"); - load!("fowler/repetition-expensive"); - - Ok(tests) -} diff --git a/vendor/regex-automata/tests/util.rs b/vendor/regex-automata/tests/util.rs deleted file mode 100644 index 499aa8c6d..000000000 --- a/vendor/regex-automata/tests/util.rs +++ /dev/null @@ -1,57 +0,0 @@ -use regex_automata::util::prefilter::{self, Candidate, Prefilter}; - -#[derive(Clone, Debug)] -pub struct SubstringPrefilter(bstr::Finder<'static>); - -impl SubstringPrefilter { - pub fn new<B: AsRef<[u8]>>(needle: B) -> SubstringPrefilter { - SubstringPrefilter(bstr::Finder::new(needle.as_ref()).into_owned()) - } -} - -impl Prefilter for SubstringPrefilter { - #[inline] - fn next_candidate( - &self, - state: &mut prefilter::State, - haystack: &[u8], - at: usize, - ) -> Candidate { - self.0 - .find(&haystack[at..]) - .map(|i| Candidate::PossibleStartOfMatch(at + i)) - .unwrap_or(Candidate::None) - } - - fn heap_bytes(&self) -> usize { - self.0.needle().len() - } -} - -/// A prefilter that always returns `Candidate::None`, even if it's a false -/// negative. This is useful for confirming that a prefilter is actually -/// active by asserting an incorrect result. -#[derive(Clone, Debug)] -pub struct BunkPrefilter(()); - -impl BunkPrefilter { - pub fn new() -> BunkPrefilter { - BunkPrefilter(()) - } -} - -impl Prefilter for BunkPrefilter { - #[inline] - fn next_candidate( - &self, - _state: &mut prefilter::State, - _haystack: &[u8], - _at: usize, - ) -> Candidate { - Candidate::None - } - - fn heap_bytes(&self) -> usize { - 0 - } -} |