diff options
Diffstat (limited to 'vendor/regex/testdata')
31 files changed, 10037 insertions, 0 deletions
diff --git a/vendor/regex/testdata/README.md b/vendor/regex/testdata/README.md new file mode 100644 index 000000000..c3bc1acb5 --- /dev/null +++ b/vendor/regex/testdata/README.md @@ -0,0 +1,22 @@ +This directory contains a large suite of regex tests defined in a TOML format. +They are used to drive tests in `tests/lib.rs`, `regex-automata/tests/lib.rs` +and `regex-lite/tests/lib.rs`. + +See the [`regex-test`][regex-test] crate documentation for an explanation of +the format and how it generates tests. + +The basic idea here is that we have many different regex engines but generally +one set of tests. We want to be able to run those tests (or most of them) on +every engine. Prior to `regex 1.9`, we used to do this with a hodge podge soup +of macros and a different test executable for each engine. It overall took a +longer time to compile, was harder to maintain and it made the test definitions +themselves less clear. + +In `regex 1.9`, when we moved over to `regex-automata`, the situation got a lot +worse because of an increase in the number of engines. So I devised an engine +independent format for testing regex patterns and their semantics. + +Note: the naming scheme used in these tests isn't terribly consistent. It would +be great to fix that. + +[regex-test]: https://docs.rs/regex-test diff --git a/vendor/regex/testdata/anchored.toml b/vendor/regex/testdata/anchored.toml new file mode 100644 index 000000000..0f2248d09 --- /dev/null +++ b/vendor/regex/testdata/anchored.toml @@ -0,0 +1,127 @@ +# These tests are specifically geared toward searches with 'anchored = true'. +# While they are interesting in their own right, they are particularly +# important for testing the one-pass DFA since the one-pass DFA can't work in +# unanchored contexts. +# +# Note that "anchored" in this context does not mean "^". Anchored searches are +# searches whose matches must begin at the start of the search, which may not +# be at the start of the haystack. That's why anchored searches---and there are +# some examples below---can still report multiple matches. This occurs when the +# matches are adjacent to one another. + +[[test]] +name = "greedy" +regex = '(abc)+' +haystack = "abcabcabc" +matches = [ + [[0, 9], [6, 9]], +] +anchored = true + +# When a "earliest" search is used, greediness doesn't really exist because +# matches are reported as soon as they are known. +[[test]] +name = "greedy-earliest" +regex = '(abc)+' +haystack = "abcabcabc" +matches = [ + [[0, 3], [0, 3]], + [[3, 6], [3, 6]], + [[6, 9], [6, 9]], +] +anchored = true +search-kind = "earliest" + +[[test]] +name = "nongreedy" +regex = '(abc)+?' +haystack = "abcabcabc" +matches = [ + [[0, 3], [0, 3]], + [[3, 6], [3, 6]], + [[6, 9], [6, 9]], +] +anchored = true + +# When "all" semantics are used, non-greediness doesn't exist since the longest +# possible match is always taken. +[[test]] +name = "nongreedy-all" +regex = '(abc)+?' +haystack = "abcabcabc" +matches = [ + [[0, 9], [6, 9]], +] +anchored = true +match-kind = "all" + +[[test]] +name = "word-boundary-unicode-01" +regex = '\b\w+\b' +haystack = 'βββ☃' +matches = [[0, 6]] +anchored = true + +[[test]] +name = "word-boundary-nounicode-01" +regex = '\b\w+\b' +haystack = 'abcβ' +matches = [[0, 3]] +anchored = true +unicode = false + +# Tests that '.c' doesn't match 'abc' when performing an anchored search from +# the beginning of the haystack. This test found two different bugs in the +# PikeVM and the meta engine. +[[test]] +name = "no-match-at-start" +regex = '.c' +haystack = 'abc' +matches = [] +anchored = true + +# Like above, but at a non-zero start offset. +[[test]] +name = "no-match-at-start-bounds" +regex = '.c' +haystack = 'aabc' +bounds = [1, 4] +matches = [] +anchored = true + +# This is like no-match-at-start, but hits the "reverse inner" optimization +# inside the meta engine. (no-match-at-start hits the "reverse suffix" +# optimization.) +[[test]] +name = "no-match-at-start-reverse-inner" +regex = '.c[a-z]' +haystack = 'abcz' +matches = [] +anchored = true + +# Like above, but at a non-zero start offset. +[[test]] +name = "no-match-at-start-reverse-inner-bounds" +regex = '.c[a-z]' +haystack = 'aabcz' +bounds = [1, 5] +matches = [] +anchored = true + +# Same as no-match-at-start, but applies to the meta engine's "reverse +# anchored" optimization. +[[test]] +name = "no-match-at-start-reverse-anchored" +regex = '.c[a-z]$' +haystack = 'abcz' +matches = [] +anchored = true + +# Like above, but at a non-zero start offset. +[[test]] +name = "no-match-at-start-reverse-anchored-bounds" +regex = '.c[a-z]$' +haystack = 'aabcz' +bounds = [1, 5] +matches = [] +anchored = true diff --git a/vendor/regex/testdata/bytes.toml b/vendor/regex/testdata/bytes.toml new file mode 100644 index 000000000..346e36971 --- /dev/null +++ b/vendor/regex/testdata/bytes.toml @@ -0,0 +1,235 @@ +# These are tests specifically crafted for regexes that can match arbitrary +# bytes. In some cases, we also test the Unicode variant as well, just because +# it's good sense to do so. But also, these tests aren't really about Unicode, +# but whether matches are only reported at valid UTF-8 boundaries. For most +# tests in this entire collection, utf8 = true. But for these tests, we use +# utf8 = false. + +[[test]] +name = "word-boundary-ascii" +regex = ' \b' +haystack = " δ" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "word-boundary-unicode" +regex = ' \b' +haystack = " δ" +matches = [[0, 1]] +unicode = true +utf8 = false + +[[test]] +name = "word-boundary-ascii-not" +regex = ' \B' +haystack = " δ" +matches = [[0, 1]] +unicode = false +utf8 = false + +[[test]] +name = "word-boundary-unicode-not" +regex = ' \B' +haystack = " δ" +matches = [] +unicode = true +utf8 = false + +[[test]] +name = "perl-word-ascii" +regex = '\w+' +haystack = "aδ" +matches = [[0, 1]] +unicode = false +utf8 = false + +[[test]] +name = "perl-word-unicode" +regex = '\w+' +haystack = "aδ" +matches = [[0, 3]] +unicode = true +utf8 = false + +[[test]] +name = "perl-decimal-ascii" +regex = '\d+' +haystack = "1२३9" +matches = [[0, 1], [7, 8]] +unicode = false +utf8 = false + +[[test]] +name = "perl-decimal-unicode" +regex = '\d+' +haystack = "1२३9" +matches = [[0, 8]] +unicode = true +utf8 = false + +[[test]] +name = "perl-whitespace-ascii" +regex = '\s+' +haystack = " \u1680" +matches = [[0, 1]] +unicode = false +utf8 = false + +[[test]] +name = "perl-whitespace-unicode" +regex = '\s+' +haystack = " \u1680" +matches = [[0, 4]] +unicode = true +utf8 = false + +# The first `(.+)` matches two Unicode codepoints, but can't match the 5th +# byte, which isn't valid UTF-8. The second (byte based) `(.+)` takes over and +# matches. +[[test]] +name = "mixed-dot" +regex = '(.+)(?-u)(.+)' +haystack = '\xCE\x93\xCE\x94\xFF' +matches = [ + [[0, 5], [0, 4], [4, 5]], +] +unescape = true +unicode = true +utf8 = false + +[[test]] +name = "case-one-ascii" +regex = 'a' +haystack = "A" +matches = [[0, 1]] +case-insensitive = true +unicode = false +utf8 = false + +[[test]] +name = "case-one-unicode" +regex = 'a' +haystack = "A" +matches = [[0, 1]] +case-insensitive = true +unicode = true +utf8 = false + +[[test]] +name = "case-class-simple-ascii" +regex = '[a-z]+' +haystack = "AaAaA" +matches = [[0, 5]] +case-insensitive = true +unicode = false +utf8 = false + +[[test]] +name = "case-class-ascii" +regex = '[a-z]+' +haystack = "aA\u212AaA" +matches = [[0, 2], [5, 7]] +case-insensitive = true +unicode = false +utf8 = false + +[[test]] +name = "case-class-unicode" +regex = '[a-z]+' +haystack = "aA\u212AaA" +matches = [[0, 7]] +case-insensitive = true +unicode = true +utf8 = false + +[[test]] +name = "negate-ascii" +regex = '[^a]' +haystack = "δ" +matches = [[0, 1], [1, 2]] +unicode = false +utf8 = false + +[[test]] +name = "negate-unicode" +regex = '[^a]' +haystack = "δ" +matches = [[0, 2]] +unicode = true +utf8 = false + +# When utf8=true, this won't match, because the implicit '.*?' prefix is +# Unicode aware and will refuse to match through invalid UTF-8 bytes. +[[test]] +name = "dotstar-prefix-ascii" +regex = 'a' +haystack = '\xFFa' +matches = [[1, 2]] +unescape = true +unicode = false +utf8 = false + +[[test]] +name = "dotstar-prefix-unicode" +regex = 'a' +haystack = '\xFFa' +matches = [[1, 2]] +unescape = true +unicode = true +utf8 = false + +[[test]] +name = "null-bytes" +regex = '(?P<cstr>[^\x00]+)\x00' +haystack = 'foo\x00' +matches = [ + [[0, 4], [0, 3]], +] +unescape = true +unicode = false +utf8 = false + +[[test]] +name = "invalid-utf8-anchor-100" +regex = '\xCC?^' +haystack = '\x8d#;\x1a\xa4s3\x05foobarX\\\x0f0t\xe4\x9b\xa4' +matches = [[0, 0]] +unescape = true +unicode = false +utf8 = false + +[[test]] +name = "invalid-utf8-anchor-200" +regex = '^\xf7|4\xff\d\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a##########[] d\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a##########\[] #####\x80\S7|$' +haystack = '\x8d#;\x1a\xa4s3\x05foobarX\\\x0f0t\xe4\x9b\xa4' +matches = [[22, 22]] +unescape = true +unicode = false +utf8 = false + +[[test]] +name = "invalid-utf8-anchor-300" +regex = '^|ddp\xff\xffdddddlQd@\x80' +haystack = '\x8d#;\x1a\xa4s3\x05foobarX\\\x0f0t\xe4\x9b\xa4' +matches = [[0, 0]] +unescape = true +unicode = false +utf8 = false + +[[test]] +name = "word-boundary-ascii-100" +regex = '\Bx\B' +haystack = "áxβ" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "word-boundary-ascii-200" +regex = '\B' +haystack = "0\U0007EF5E" +matches = [[2, 2], [3, 3], [4, 4], [5, 5]] +unicode = false +utf8 = false diff --git a/vendor/regex/testdata/crazy.toml b/vendor/regex/testdata/crazy.toml new file mode 100644 index 000000000..aed46ea15 --- /dev/null +++ b/vendor/regex/testdata/crazy.toml @@ -0,0 +1,315 @@ +[[test]] +name = "nothing-empty" +regex = [] +haystack = "" +matches = [] + +[[test]] +name = "nothing-something" +regex = [] +haystack = "wat" +matches = [] + +[[test]] +name = "ranges" +regex = '(?-u)\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b' +haystack = "num: 255" +matches = [[5, 8]] + +[[test]] +name = "ranges-not" +regex = '(?-u)\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b' +haystack = "num: 256" +matches = [] + +[[test]] +name = "float1" +regex = '[-+]?[0-9]*\.?[0-9]+' +haystack = "0.1" +matches = [[0, 3]] + +[[test]] +name = "float2" +regex = '[-+]?[0-9]*\.?[0-9]+' +haystack = "0.1.2" +matches = [[0, 3]] +match-limit = 1 + +[[test]] +name = "float3" +regex = '[-+]?[0-9]*\.?[0-9]+' +haystack = "a1.2" +matches = [[1, 4]] + +[[test]] +name = "float4" +regex = '[-+]?[0-9]*\.?[0-9]+' +haystack = "1.a" +matches = [[0, 1]] + +[[test]] +name = "float5" +regex = '^[-+]?[0-9]*\.?[0-9]+$' +haystack = "1.a" +matches = [] + +[[test]] +name = "email" +regex = '(?i-u)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b' +haystack = "mine is jam.slam@gmail.com " +matches = [[8, 26]] + +[[test]] +name = "email-not" +regex = '(?i-u)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b' +haystack = "mine is jam.slam@gmail " +matches = [] + +[[test]] +name = "email-big" +regex = '''[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?''' +haystack = "mine is jam.slam@gmail.com " +matches = [[8, 26]] + +[[test]] +name = "date1" +regex = '^(?:19|20)\d\d[- /.](?:0[1-9]|1[012])[- /.](?:0[1-9]|[12][0-9]|3[01])$' +haystack = "1900-01-01" +matches = [[0, 10]] +unicode = false + +[[test]] +name = "date2" +regex = '^(?:19|20)\d\d[- /.](?:0[1-9]|1[012])[- /.](?:0[1-9]|[12][0-9]|3[01])$' +haystack = "1900-00-01" +matches = [] +unicode = false + +[[test]] +name = "date3" +regex = '^(?:19|20)\d\d[- /.](?:0[1-9]|1[012])[- /.](?:0[1-9]|[12][0-9]|3[01])$' +haystack = "1900-13-01" +matches = [] +unicode = false + +[[test]] +name = "start-end-empty" +regex = '^$' +haystack = "" +matches = [[0, 0]] + +[[test]] +name = "start-end-empty-rev" +regex = '$^' +haystack = "" +matches = [[0, 0]] + +[[test]] +name = "start-end-empty-many-1" +regex = '^$^$^$' +haystack = "" +matches = [[0, 0]] + +[[test]] +name = "start-end-empty-many-2" +regex = '^^^$$$' +haystack = "" +matches = [[0, 0]] + +[[test]] +name = "start-end-empty-rep" +regex = '(?:^$)*' +haystack = "a\nb\nc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] + +[[test]] +name = "start-end-empty-rep-rev" +regex = '(?:$^)*' +haystack = "a\nb\nc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] + +[[test]] +name = "neg-class-letter" +regex = '[^ac]' +haystack = "acx" +matches = [[2, 3]] + +[[test]] +name = "neg-class-letter-comma" +regex = '[^a,]' +haystack = "a,x" +matches = [[2, 3]] + +[[test]] +name = "neg-class-letter-space" +regex = '[^a[:space:]]' +haystack = "a x" +matches = [[2, 3]] + +[[test]] +name = "neg-class-comma" +regex = '[^,]' +haystack = ",,x" +matches = [[2, 3]] + +[[test]] +name = "neg-class-space" +regex = '[^[:space:]]' +haystack = " a" +matches = [[1, 2]] + +[[test]] +name = "neg-class-space-comma" +regex = '[^,[:space:]]' +haystack = ", a" +matches = [[2, 3]] + +[[test]] +name = "neg-class-comma-space" +regex = '[^[:space:],]' +haystack = " ,a" +matches = [[2, 3]] + +[[test]] +name = "neg-class-ascii" +regex = '[^[:alpha:]Z]' +haystack = "A1" +matches = [[1, 2]] + +[[test]] +name = "lazy-many-many" +regex = '(?:(?:.*)*?)=' +haystack = "a=b" +matches = [[0, 2]] + +[[test]] +name = "lazy-many-optional" +regex = '(?:(?:.?)*?)=' +haystack = "a=b" +matches = [[0, 2]] + +[[test]] +name = "lazy-one-many-many" +regex = '(?:(?:.*)+?)=' +haystack = "a=b" +matches = [[0, 2]] + +[[test]] +name = "lazy-one-many-optional" +regex = '(?:(?:.?)+?)=' +haystack = "a=b" +matches = [[0, 2]] + +[[test]] +name = "lazy-range-min-many" +regex = '(?:(?:.*){1,}?)=' +haystack = "a=b" +matches = [[0, 2]] + +[[test]] +name = "lazy-range-many" +regex = '(?:(?:.*){1,2}?)=' +haystack = "a=b" +matches = [[0, 2]] + +[[test]] +name = "greedy-many-many" +regex = '(?:(?:.*)*)=' +haystack = "a=b" +matches = [[0, 2]] + +[[test]] +name = "greedy-many-optional" +regex = '(?:(?:.?)*)=' +haystack = "a=b" +matches = [[0, 2]] + +[[test]] +name = "greedy-one-many-many" +regex = '(?:(?:.*)+)=' +haystack = "a=b" +matches = [[0, 2]] + +[[test]] +name = "greedy-one-many-optional" +regex = '(?:(?:.?)+)=' +haystack = "a=b" +matches = [[0, 2]] + +[[test]] +name = "greedy-range-min-many" +regex = '(?:(?:.*){1,})=' +haystack = "a=b" +matches = [[0, 2]] + +[[test]] +name = "greedy-range-many" +regex = '(?:(?:.*){1,2})=' +haystack = "a=b" +matches = [[0, 2]] + +[[test]] +name = "empty1" +regex = '' +haystack = "" +matches = [[0, 0]] + +[[test]] +name = "empty2" +regex = '' +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty3" +regex = '(?:)' +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty4" +regex = '(?:)*' +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty5" +regex = '(?:)+' +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty6" +regex = '(?:)?' +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty7" +regex = '(?:)(?:)' +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty8" +regex = '(?:)+|z' +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty9" +regex = 'z|(?:)+' +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty10" +regex = '(?:)+|b' +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty11" +regex = 'b|(?:)+' +haystack = "abc" +matches = [[0, 0], [1, 2], [3, 3]] diff --git a/vendor/regex/testdata/crlf.toml b/vendor/regex/testdata/crlf.toml new file mode 100644 index 000000000..9e2d3761a --- /dev/null +++ b/vendor/regex/testdata/crlf.toml @@ -0,0 +1,117 @@ +# This is a basic test that checks ^ and $ treat \r\n as a single line +# terminator. If ^ and $ only treated \n as a line terminator, then this would +# only match 'xyz' at the end of the haystack. +[[test]] +name = "basic" +regex = '(?mR)^[a-z]+$' +haystack = "abc\r\ndef\r\nxyz" +matches = [[0, 3], [5, 8], [10, 13]] + +# Tests that a CRLF-aware '^$' assertion does not match between CR and LF. +[[test]] +name = "start-end-non-empty" +regex = '(?mR)^$' +haystack = "abc\r\ndef\r\nxyz" +matches = [] + +# Tests that a CRLF-aware '^$' assertion matches the empty string, just like +# a non-CRLF-aware '^$' assertion. +[[test]] +name = "start-end-empty" +regex = '(?mR)^$' +haystack = "" +matches = [[0, 0]] + +# Tests that a CRLF-aware '^$' assertion matches the empty string preceding +# and following a line terminator. +[[test]] +name = "start-end-before-after" +regex = '(?mR)^$' +haystack = "\r\n" +matches = [[0, 0], [2, 2]] + +# Tests that a CRLF-aware '^' assertion does not split a line terminator. +[[test]] +name = "start-no-split" +regex = '(?mR)^' +haystack = "abc\r\ndef\r\nxyz" +matches = [[0, 0], [5, 5], [10, 10]] + +# Same as above, but with adjacent runs of line terminators. +[[test]] +name = "start-no-split-adjacent" +regex = '(?mR)^' +haystack = "\r\n\r\n\r\n" +matches = [[0, 0], [2, 2], [4, 4], [6, 6]] + +# Same as above, but with adjacent runs of just carriage returns. +[[test]] +name = "start-no-split-adjacent-cr" +regex = '(?mR)^' +haystack = "\r\r\r" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +# Same as above, but with adjacent runs of just line feeds. +[[test]] +name = "start-no-split-adjacent-lf" +regex = '(?mR)^' +haystack = "\n\n\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +# Tests that a CRLF-aware '$' assertion does not split a line terminator. +[[test]] +name = "end-no-split" +regex = '(?mR)$' +haystack = "abc\r\ndef\r\nxyz" +matches = [[3, 3], [8, 8], [13, 13]] + +# Same as above, but with adjacent runs of line terminators. +[[test]] +name = "end-no-split-adjacent" +regex = '(?mR)$' +haystack = "\r\n\r\n\r\n" +matches = [[0, 0], [2, 2], [4, 4], [6, 6]] + +# Same as above, but with adjacent runs of just carriage returns. +[[test]] +name = "end-no-split-adjacent-cr" +regex = '(?mR)$' +haystack = "\r\r\r" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +# Same as above, but with adjacent runs of just line feeds. +[[test]] +name = "end-no-split-adjacent-lf" +regex = '(?mR)$' +haystack = "\n\n\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +# Tests that '.' does not match either \r or \n when CRLF mode is enabled. Note +# that this doesn't require multi-line mode to be enabled. +[[test]] +name = "dot-no-crlf" +regex = '(?R).' +haystack = "\r\n\r\n\r\n" +matches = [] + +# This is a test that caught a bug in the one-pass DFA where it (amazingly) was +# using 'is_end_lf' instead of 'is_end_crlf' here. It was probably a copy & +# paste bug. We insert an empty capture group here because it provokes the meta +# regex engine to first find a match and then trip over a panic because the +# one-pass DFA erroneously says there is no match. +[[test]] +name = "onepass-wrong-crlf-with-capture" +regex = '(?Rm:().$)' +haystack = "ZZ\r" +matches = [[[1, 2], [1, 1]]] + +# This is like onepass-wrong-crlf-with-capture above, except it sets up the +# test so that it can be run by the one-pass DFA directly. (i.e., Make it +# anchored and start the search at the right place.) +[[test]] +name = "onepass-wrong-crlf-anchored" +regex = '(?Rm:.$)' +haystack = "ZZ\r" +matches = [[1, 2]] +anchored = true +bounds = [1, 3] diff --git a/vendor/regex/testdata/earliest.toml b/vendor/regex/testdata/earliest.toml new file mode 100644 index 000000000..951689358 --- /dev/null +++ b/vendor/regex/testdata/earliest.toml @@ -0,0 +1,52 @@ +[[test]] +name = "no-greedy-100" +regex = 'a+' +haystack = "aaa" +matches = [[0, 1], [1, 2], [2, 3]] +search-kind = "earliest" + +[[test]] +name = "no-greedy-200" +regex = 'abc+' +haystack = "zzzabccc" +matches = [[3, 6]] +search-kind = "earliest" + +[[test]] +name = "is-ungreedy" +regex = 'a+?' +haystack = "aaa" +matches = [[0, 1], [1, 2], [2, 3]] +search-kind = "earliest" + +[[test]] +name = "look-start-test" +regex = '^(abc|a)' +haystack = "abc" +matches = [ + [[0, 1], [0, 1]], +] +search-kind = "earliest" + +[[test]] +name = "look-end-test" +regex = '(abc|a)$' +haystack = "abc" +matches = [ + [[0, 3], [0, 3]], +] +search-kind = "earliest" + +[[test]] +name = "no-leftmost-first-100" +regex = 'abc|a' +haystack = "abc" +matches = [[0, 1]] +search-kind = "earliest" + +[[test]] +name = "no-leftmost-first-200" +regex = 'aba|a' +haystack = "aba" +matches = [[0, 1], [2, 3]] +search-kind = "earliest" diff --git a/vendor/regex/testdata/empty.toml b/vendor/regex/testdata/empty.toml new file mode 100644 index 000000000..7dfd8027a --- /dev/null +++ b/vendor/regex/testdata/empty.toml @@ -0,0 +1,113 @@ +[[test]] +name = "100" +regex = "|b" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "110" +regex = "b|" +haystack = "abc" +matches = [[0, 0], [1, 2], [3, 3]] + +[[test]] +name = "120" +regex = "|z" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "130" +regex = "z|" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "200" +regex = "|" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "210" +regex = "||" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "220" +regex = "||b" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "230" +regex = "b||" +haystack = "abc" +matches = [[0, 0], [1, 2], [3, 3]] + +[[test]] +name = "240" +regex = "||z" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "300" +regex = "(?:)|b" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "310" +regex = "b|(?:)" +haystack = "abc" +matches = [[0, 0], [1, 2], [3, 3]] + +[[test]] +name = "320" +regex = "(?:|)" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "330" +regex = "(?:|)|z" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "400" +regex = "a(?:)|b" +haystack = "abc" +matches = [[0, 1], [1, 2]] + +[[test]] +name = "500" +regex = "" +haystack = "" +matches = [[0, 0]] + +[[test]] +name = "510" +regex = "" +haystack = "a" +matches = [[0, 0], [1, 1]] + +[[test]] +name = "520" +regex = "" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "600" +regex = '(?:|a)*' +haystack = "aaa" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "610" +regex = '(?:|a)+' +haystack = "aaa" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] diff --git a/vendor/regex/testdata/expensive.toml b/vendor/regex/testdata/expensive.toml new file mode 100644 index 000000000..b70e42f9b --- /dev/null +++ b/vendor/regex/testdata/expensive.toml @@ -0,0 +1,23 @@ +# This file represent tests that may be expensive to run on some regex engines. +# For example, tests that build a full DFA ahead of time and minimize it can +# take a horrendously long time on regexes that are large (or result in an +# explosion in the number of states). We group these tests together so that +# such engines can simply skip these tests. + +# See: https://github.com/rust-lang/regex/issues/98 +[[test]] +name = "regression-many-repeat-no-stack-overflow" +regex = '^.{1,2500}' +haystack = "a" +matches = [[0, 1]] + +# This test is meant to blow the bounded backtracker's visited capacity. In +# order to do that, we need a somewhat sizeable regex. The purpose of this +# is to make sure there's at least one test that exercises this path in the +# backtracker. All other tests (at time of writing) are small enough that the +# backtracker can handle them fine. +[[test]] +name = "backtrack-blow-visited-capacity" +regex = '\pL{50}' +haystack = "abcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyZZ" +matches = [[0, 50], [50, 100], [100, 150]] diff --git a/vendor/regex/testdata/flags.toml b/vendor/regex/testdata/flags.toml new file mode 100644 index 000000000..30b412ca6 --- /dev/null +++ b/vendor/regex/testdata/flags.toml @@ -0,0 +1,68 @@ +[[test]] +name = "1" +regex = "(?i)abc" +haystack = "ABC" +matches = [[0, 3]] + +[[test]] +name = "2" +regex = "(?i)a(?-i)bc" +haystack = "Abc" +matches = [[0, 3]] + +[[test]] +name = "3" +regex = "(?i)a(?-i)bc" +haystack = "ABC" +matches = [] + +[[test]] +name = "4" +regex = "(?is)a." +haystack = "A\n" +matches = [[0, 2]] + +[[test]] +name = "5" +regex = "(?is)a.(?-is)a." +haystack = "A\nab" +matches = [[0, 4]] + +[[test]] +name = "6" +regex = "(?is)a.(?-is)a." +haystack = "A\na\n" +matches = [] + +[[test]] +name = "7" +regex = "(?is)a.(?-is:a.)?" +haystack = "A\na\n" +matches = [[0, 2]] +match-limit = 1 + +[[test]] +name = "8" +regex = "(?U)a+" +haystack = "aa" +matches = [[0, 1]] +match-limit = 1 + +[[test]] +name = "9" +regex = "(?U)a+?" +haystack = "aa" +matches = [[0, 2]] + +[[test]] +name = "10" +regex = "(?U)(?-U)a+" +haystack = "aa" +matches = [[0, 2]] + +[[test]] +name = "11" +regex = '(?m)(?:^\d+$\n?)+' +haystack = "123\n456\n789" +matches = [[0, 11]] +unicode = false diff --git a/vendor/regex/testdata/fowler/basic.toml b/vendor/regex/testdata/fowler/basic.toml new file mode 100644 index 000000000..92b4e4cf7 --- /dev/null +++ b/vendor/regex/testdata/fowler/basic.toml @@ -0,0 +1,1611 @@ +# !!! DO NOT EDIT !!! +# Automatically generated by 'regex-cli generate fowler'. +# Numbers in the test names correspond to the line number of the test from +# the original dat file. + +[[test]] +name = "basic3" +regex = '''abracadabra$''' +haystack = '''abracadabracadabra''' +matches = [[[7, 18]]] +match-limit = 1 + +[[test]] +name = "basic4" +regex = '''a...b''' +haystack = '''abababbb''' +matches = [[[2, 7]]] +match-limit = 1 + +[[test]] +name = "basic5" +regex = '''XXXXXX''' +haystack = '''..XXXXXX''' +matches = [[[2, 8]]] +match-limit = 1 + +[[test]] +name = "basic6" +regex = '''\)''' +haystack = '''()''' +matches = [[[1, 2]]] +match-limit = 1 + +[[test]] +name = "basic7" +regex = '''a]''' +haystack = '''a]a''' +matches = [[[0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic9" +regex = '''\}''' +haystack = '''}''' +matches = [[[0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic10" +regex = '''\]''' +haystack = ''']''' +matches = [[[0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic12" +regex = ''']''' +haystack = ''']''' +matches = [[[0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic15" +regex = '''^a''' +haystack = '''ax''' +matches = [[[0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic16" +regex = '''\^a''' +haystack = '''a^a''' +matches = [[[1, 3]]] +match-limit = 1 + +[[test]] +name = "basic17" +regex = '''a\^''' +haystack = '''a^''' +matches = [[[0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic18" +regex = '''a$''' +haystack = '''aa''' +matches = [[[1, 2]]] +match-limit = 1 + +[[test]] +name = "basic19" +regex = '''a\$''' +haystack = '''a$''' +matches = [[[0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic20" +regex = '''^$''' +haystack = '''''' +matches = [[[0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic21" +regex = '''$^''' +haystack = '''''' +matches = [[[0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic22" +regex = '''a($)''' +haystack = '''aa''' +matches = [[[1, 2], [2, 2]]] +match-limit = 1 + +[[test]] +name = "basic23" +regex = '''a*(^a)''' +haystack = '''aa''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic24" +regex = '''(..)*(...)*''' +haystack = '''a''' +matches = [[[0, 0], [], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic25" +regex = '''(..)*(...)*''' +haystack = '''abcd''' +matches = [[[0, 4], [2, 4], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic26" +regex = '''(ab|a)(bc|c)''' +haystack = '''abc''' +matches = [[[0, 3], [0, 2], [2, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic27" +regex = '''(ab)c|abc''' +haystack = '''abc''' +matches = [[[0, 3], [0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic28" +regex = '''a{0}b''' +haystack = '''ab''' +matches = [[[1, 2]]] +match-limit = 1 + +[[test]] +name = "basic29" +regex = '''(a*)(b?)(b+)b{3}''' +haystack = '''aaabbbbbbb''' +matches = [[[0, 10], [0, 3], [3, 4], [4, 7]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic30" +regex = '''(a*)(b{0,1})(b{1,})b{3}''' +haystack = '''aaabbbbbbb''' +matches = [[[0, 10], [0, 3], [3, 4], [4, 7]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic32" +regex = '''((a|a)|a)''' +haystack = '''a''' +matches = [[[0, 1], [0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic33" +regex = '''(a*)(a|aa)''' +haystack = '''aaaa''' +matches = [[[0, 4], [0, 3], [3, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic34" +regex = '''a*(a.|aa)''' +haystack = '''aaaa''' +matches = [[[0, 4], [2, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic35" +regex = '''a(b)|c(d)|a(e)f''' +haystack = '''aef''' +matches = [[[0, 3], [], [], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic36" +regex = '''(a|b)?.*''' +haystack = '''b''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic37" +regex = '''(a|b)c|a(b|c)''' +haystack = '''ac''' +matches = [[[0, 2], [0, 1], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic38" +regex = '''(a|b)c|a(b|c)''' +haystack = '''ab''' +matches = [[[0, 2], [], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic39" +regex = '''(a|b)*c|(a|ab)*c''' +haystack = '''abc''' +matches = [[[0, 3], [1, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic40" +regex = '''(a|b)*c|(a|ab)*c''' +haystack = '''xc''' +matches = [[[1, 2], [], []]] +match-limit = 1 + +[[test]] +name = "basic41" +regex = '''(.a|.b).*|.*(.a|.b)''' +haystack = '''xa''' +matches = [[[0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic42" +regex = '''a?(ab|ba)ab''' +haystack = '''abab''' +matches = [[[0, 4], [0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic43" +regex = '''a?(ac{0}b|ba)ab''' +haystack = '''abab''' +matches = [[[0, 4], [0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic44" +regex = '''ab|abab''' +haystack = '''abbabab''' +matches = [[[0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic45" +regex = '''aba|bab|bba''' +haystack = '''baaabbbaba''' +matches = [[[5, 8]]] +match-limit = 1 + +[[test]] +name = "basic46" +regex = '''aba|bab''' +haystack = '''baaabbbaba''' +matches = [[[6, 9]]] +match-limit = 1 + +[[test]] +name = "basic47" +regex = '''(aa|aaa)*|(a|aaaaa)''' +haystack = '''aa''' +matches = [[[0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic48" +regex = '''(a.|.a.)*|(a|.a...)''' +haystack = '''aa''' +matches = [[[0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic49" +regex = '''ab|a''' +haystack = '''xabc''' +matches = [[[1, 3]]] +match-limit = 1 + +[[test]] +name = "basic50" +regex = '''ab|a''' +haystack = '''xxabc''' +matches = [[[2, 4]]] +match-limit = 1 + +[[test]] +name = "basic51" +regex = '''(Ab|cD)*''' +haystack = '''aBcD''' +matches = [[[0, 4], [2, 4]]] +match-limit = 1 +anchored = true +case-insensitive = true + +[[test]] +name = "basic52" +regex = '''[^-]''' +haystack = '''--a''' +matches = [[[2, 3]]] +match-limit = 1 + +[[test]] +name = "basic53" +regex = '''[a-]*''' +haystack = '''--a''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic54" +regex = '''[a-m-]*''' +haystack = '''--amoma--''' +matches = [[[0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic55" +regex = ''':::1:::0:|:::1:1:0:''' +haystack = ''':::0:::1:::1:::0:''' +matches = [[[8, 17]]] +match-limit = 1 + +[[test]] +name = "basic56" +regex = ''':::1:::0:|:::1:1:1:''' +haystack = ''':::0:::1:::1:::0:''' +matches = [[[8, 17]]] +match-limit = 1 + +[[test]] +name = "basic57" +regex = '''[[:upper:]]''' +haystack = '''A''' +matches = [[[0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic58" +regex = '''[[:lower:]]+''' +haystack = '''`az{''' +matches = [[[1, 3]]] +match-limit = 1 + +[[test]] +name = "basic59" +regex = '''[[:upper:]]+''' +haystack = '''@AZ[''' +matches = [[[1, 3]]] +match-limit = 1 + +[[test]] +name = "basic65" +regex = '''\n''' +haystack = '''\n''' +matches = [[[0, 1]]] +match-limit = 1 +anchored = true +unescape = true + +[[test]] +name = "basic66" +regex = '''\n''' +haystack = '''\n''' +matches = [[[0, 1]]] +match-limit = 1 +anchored = true +unescape = true + +[[test]] +name = "basic67" +regex = '''[^a]''' +haystack = '''\n''' +matches = [[[0, 1]]] +match-limit = 1 +anchored = true +unescape = true + +[[test]] +name = "basic68" +regex = '''\na''' +haystack = '''\na''' +matches = [[[0, 2]]] +match-limit = 1 +anchored = true +unescape = true + +[[test]] +name = "basic69" +regex = '''(a)(b)(c)''' +haystack = '''abc''' +matches = [[[0, 3], [0, 1], [1, 2], [2, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic70" +regex = '''xxx''' +haystack = '''xxx''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +# Test added by Rust regex project. +[[test]] +name = "basic72" +regex = '''(?:^|[ (,;])(?:(?:(?:[Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))(?:[^0-9]|$)''' +haystack = '''feb 6,''' +matches = [[[0, 6]]] +match-limit = 1 +anchored = true + +# Test added by Rust regex project. +[[test]] +name = "basic74" +regex = '''(?:^|[ (,;])(?:(?:(?:[Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))(?:[^0-9]|$)''' +haystack = '''2/7''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +# Test added by Rust regex project. +[[test]] +name = "basic76" +regex = '''(?:^|[ (,;])(?:(?:(?:[Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))(?:[^0-9]|$)''' +haystack = '''feb 1,Feb 6''' +matches = [[[5, 11]]] +match-limit = 1 + +# Test added by Rust regex project. +[[test]] +name = "basic78" +regex = '''(((?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:x))))))))))))))))))))))))))))))''' +haystack = '''x''' +matches = [[[0, 1], [0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +# Test added by Rust regex project. +[[test]] +name = "basic80" +regex = '''(((?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:x))))))))))))))))))))))))))))))*''' +haystack = '''xx''' +matches = [[[0, 2], [1, 2], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic81" +regex = '''a?(ab|ba)*''' +haystack = '''ababababababababababababababababababababababababababababababababababababababababa''' +matches = [[[0, 81], [79, 81]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic82" +regex = '''abaa|abbaa|abbbaa|abbbbaa''' +haystack = '''ababbabbbabbbabbbbabbbbaa''' +matches = [[[18, 25]]] +match-limit = 1 + +[[test]] +name = "basic83" +regex = '''abaa|abbaa|abbbaa|abbbbaa''' +haystack = '''ababbabbbabbbabbbbabaa''' +matches = [[[18, 22]]] +match-limit = 1 + +[[test]] +name = "basic84" +regex = '''aaac|aabc|abac|abbc|baac|babc|bbac|bbbc''' +haystack = '''baaabbbabac''' +matches = [[[7, 11]]] +match-limit = 1 + +# Test added by Rust regex project. +[[test]] +name = "basic86" +regex = '''.*''' +haystack = '''\x01\x7f''' +matches = [[[0, 2]]] +match-limit = 1 +anchored = true +unescape = true + +[[test]] +name = "basic87" +regex = '''aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll''' +haystack = '''XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa''' +matches = [[[53, 57]]] +match-limit = 1 + +[[test]] +name = "basic89" +regex = '''a*a*a*a*a*b''' +haystack = '''aaaaaaaaab''' +matches = [[[0, 10]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic90" +regex = '''^''' +haystack = '''''' +matches = [[[0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic91" +regex = '''$''' +haystack = '''''' +matches = [[[0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic92" +regex = '''^$''' +haystack = '''''' +matches = [[[0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic93" +regex = '''^a$''' +haystack = '''a''' +matches = [[[0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic94" +regex = '''abc''' +haystack = '''abc''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic95" +regex = '''abc''' +haystack = '''xabcy''' +matches = [[[1, 4]]] +match-limit = 1 + +[[test]] +name = "basic96" +regex = '''abc''' +haystack = '''ababc''' +matches = [[[2, 5]]] +match-limit = 1 + +[[test]] +name = "basic97" +regex = '''ab*c''' +haystack = '''abc''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic98" +regex = '''ab*bc''' +haystack = '''abc''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic99" +regex = '''ab*bc''' +haystack = '''abbc''' +matches = [[[0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic100" +regex = '''ab*bc''' +haystack = '''abbbbc''' +matches = [[[0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic101" +regex = '''ab+bc''' +haystack = '''abbc''' +matches = [[[0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic102" +regex = '''ab+bc''' +haystack = '''abbbbc''' +matches = [[[0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic103" +regex = '''ab?bc''' +haystack = '''abbc''' +matches = [[[0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic104" +regex = '''ab?bc''' +haystack = '''abc''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic105" +regex = '''ab?c''' +haystack = '''abc''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic106" +regex = '''^abc$''' +haystack = '''abc''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic107" +regex = '''^abc''' +haystack = '''abcc''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic108" +regex = '''abc$''' +haystack = '''aabc''' +matches = [[[1, 4]]] +match-limit = 1 + +[[test]] +name = "basic109" +regex = '''^''' +haystack = '''abc''' +matches = [[[0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic110" +regex = '''$''' +haystack = '''abc''' +matches = [[[3, 3]]] +match-limit = 1 + +[[test]] +name = "basic111" +regex = '''a.c''' +haystack = '''abc''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic112" +regex = '''a.c''' +haystack = '''axc''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic113" +regex = '''a.*c''' +haystack = '''axyzc''' +matches = [[[0, 5]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic114" +regex = '''a[bc]d''' +haystack = '''abd''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic115" +regex = '''a[b-d]e''' +haystack = '''ace''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic116" +regex = '''a[b-d]''' +haystack = '''aac''' +matches = [[[1, 3]]] +match-limit = 1 + +[[test]] +name = "basic117" +regex = '''a[-b]''' +haystack = '''a-''' +matches = [[[0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic118" +regex = '''a[b-]''' +haystack = '''a-''' +matches = [[[0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic119" +regex = '''a]''' +haystack = '''a]''' +matches = [[[0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic120" +regex = '''a[]]b''' +haystack = '''a]b''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic121" +regex = '''a[^bc]d''' +haystack = '''aed''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic122" +regex = '''a[^-b]c''' +haystack = '''adc''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic123" +regex = '''a[^]b]c''' +haystack = '''adc''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic124" +regex = '''ab|cd''' +haystack = '''abc''' +matches = [[[0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic125" +regex = '''ab|cd''' +haystack = '''abcd''' +matches = [[[0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic126" +regex = '''a\(b''' +haystack = '''a(b''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic127" +regex = '''a\(*b''' +haystack = '''ab''' +matches = [[[0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic128" +regex = '''a\(*b''' +haystack = '''a((b''' +matches = [[[0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic129" +regex = '''((a))''' +haystack = '''abc''' +matches = [[[0, 1], [0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic130" +regex = '''(a)b(c)''' +haystack = '''abc''' +matches = [[[0, 3], [0, 1], [2, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic131" +regex = '''a+b+c''' +haystack = '''aabbabc''' +matches = [[[4, 7]]] +match-limit = 1 + +[[test]] +name = "basic132" +regex = '''a*''' +haystack = '''aaa''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic133" +regex = '''(a*)*''' +haystack = '''-''' +matches = [[[0, 0], [0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic134" +regex = '''(a*)+''' +haystack = '''-''' +matches = [[[0, 0], [0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic135" +regex = '''(a*|b)*''' +haystack = '''-''' +matches = [[[0, 0], [0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic136" +regex = '''(a+|b)*''' +haystack = '''ab''' +matches = [[[0, 2], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic137" +regex = '''(a+|b)+''' +haystack = '''ab''' +matches = [[[0, 2], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic138" +regex = '''(a+|b)?''' +haystack = '''ab''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic139" +regex = '''[^ab]*''' +haystack = '''cde''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic140" +regex = '''(^)*''' +haystack = '''-''' +matches = [[[0, 0], [0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic141" +regex = '''a*''' +haystack = '''''' +matches = [[[0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic142" +regex = '''([abc])*d''' +haystack = '''abbbcd''' +matches = [[[0, 6], [4, 5]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic143" +regex = '''([abc])*bcd''' +haystack = '''abcd''' +matches = [[[0, 4], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic144" +regex = '''a|b|c|d|e''' +haystack = '''e''' +matches = [[[0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic145" +regex = '''(a|b|c|d|e)f''' +haystack = '''ef''' +matches = [[[0, 2], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic146" +regex = '''((a*|b))*''' +haystack = '''-''' +matches = [[[0, 0], [0, 0], [0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic147" +regex = '''abcd*efg''' +haystack = '''abcdefg''' +matches = [[[0, 7]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic148" +regex = '''ab*''' +haystack = '''xabyabbbz''' +matches = [[[1, 3]]] +match-limit = 1 + +[[test]] +name = "basic149" +regex = '''ab*''' +haystack = '''xayabbbz''' +matches = [[[1, 2]]] +match-limit = 1 + +[[test]] +name = "basic150" +regex = '''(ab|cd)e''' +haystack = '''abcde''' +matches = [[[2, 5], [2, 4]]] +match-limit = 1 + +[[test]] +name = "basic151" +regex = '''[abhgefdc]ij''' +haystack = '''hij''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic152" +regex = '''(a|b)c*d''' +haystack = '''abcd''' +matches = [[[1, 4], [1, 2]]] +match-limit = 1 + +[[test]] +name = "basic153" +regex = '''(ab|ab*)bc''' +haystack = '''abc''' +matches = [[[0, 3], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic154" +regex = '''a([bc]*)c*''' +haystack = '''abc''' +matches = [[[0, 3], [1, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic155" +regex = '''a([bc]*)(c*d)''' +haystack = '''abcd''' +matches = [[[0, 4], [1, 3], [3, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic156" +regex = '''a([bc]+)(c*d)''' +haystack = '''abcd''' +matches = [[[0, 4], [1, 3], [3, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic157" +regex = '''a([bc]*)(c+d)''' +haystack = '''abcd''' +matches = [[[0, 4], [1, 2], [2, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic158" +regex = '''a[bcd]*dcdcde''' +haystack = '''adcdcde''' +matches = [[[0, 7]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic159" +regex = '''(ab|a)b*c''' +haystack = '''abc''' +matches = [[[0, 3], [0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic160" +regex = '''((a)(b)c)(d)''' +haystack = '''abcd''' +matches = [[[0, 4], [0, 3], [0, 1], [1, 2], [3, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic161" +regex = '''[A-Za-z_][A-Za-z0-9_]*''' +haystack = '''alpha''' +matches = [[[0, 5]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic162" +regex = '''^a(bc+|b[eh])g|.h$''' +haystack = '''abh''' +matches = [[[1, 3], []]] +match-limit = 1 + +[[test]] +name = "basic163" +regex = '''(bc+d$|ef*g.|h?i(j|k))''' +haystack = '''effgz''' +matches = [[[0, 5], [0, 5], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic164" +regex = '''(bc+d$|ef*g.|h?i(j|k))''' +haystack = '''ij''' +matches = [[[0, 2], [0, 2], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic165" +regex = '''(bc+d$|ef*g.|h?i(j|k))''' +haystack = '''reffgz''' +matches = [[[1, 6], [1, 6], []]] +match-limit = 1 + +[[test]] +name = "basic166" +regex = '''(((((((((a)))))))))''' +haystack = '''a''' +matches = [[[0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic167" +regex = '''multiple words''' +haystack = '''multiple words yeah''' +matches = [[[0, 14]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic168" +regex = '''(.*)c(.*)''' +haystack = '''abcde''' +matches = [[[0, 5], [0, 2], [3, 5]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic169" +regex = '''abcd''' +haystack = '''abcd''' +matches = [[[0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic170" +regex = '''a(bc)d''' +haystack = '''abcd''' +matches = [[[0, 4], [1, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic171" +regex = '''a[\x01-\x03]?c''' +haystack = '''a\x02c''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true +unescape = true + +[[test]] +name = "basic172" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Muammar Qaddafi''' +matches = [[[0, 15], [], [10, 12]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic173" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Mo'ammar Gadhafi''' +matches = [[[0, 16], [], [11, 13]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic174" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Muammar Kaddafi''' +matches = [[[0, 15], [], [10, 12]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic175" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Muammar Qadhafi''' +matches = [[[0, 15], [], [10, 12]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic176" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Muammar Gadafi''' +matches = [[[0, 14], [], [10, 11]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic177" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Mu'ammar Qadafi''' +matches = [[[0, 15], [], [11, 12]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic178" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Moamar Gaddafi''' +matches = [[[0, 14], [], [9, 11]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic179" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Mu'ammar Qadhdhafi''' +matches = [[[0, 18], [], [13, 15]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic180" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Muammar Khaddafi''' +matches = [[[0, 16], [], [11, 13]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic181" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Muammar Ghaddafy''' +matches = [[[0, 16], [], [11, 13]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic182" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Muammar Ghadafi''' +matches = [[[0, 15], [], [11, 12]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic183" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Muammar Ghaddafi''' +matches = [[[0, 16], [], [11, 13]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic184" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Muamar Kaddafi''' +matches = [[[0, 14], [], [9, 11]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic185" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Muammar Quathafi''' +matches = [[[0, 16], [], [11, 13]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic186" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Muammar Gheddafi''' +matches = [[[0, 16], [], [11, 13]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic187" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Moammar Khadafy''' +matches = [[[0, 15], [], [11, 12]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic188" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Moammar Qudhafi''' +matches = [[[0, 15], [], [10, 12]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic189" +regex = '''a+(b|c)*d+''' +haystack = '''aabcdd''' +matches = [[[0, 6], [3, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic190" +regex = '''^.+$''' +haystack = '''vivi''' +matches = [[[0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic191" +regex = '''^(.+)$''' +haystack = '''vivi''' +matches = [[[0, 4], [0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic192" +regex = '''^([^!.]+).att.com!(.+)$''' +haystack = '''gryphon.att.com!eby''' +matches = [[[0, 19], [0, 7], [16, 19]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic193" +regex = '''^([^!]+!)?([^!]+)$''' +haystack = '''bas''' +matches = [[[0, 3], [], [0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic194" +regex = '''^([^!]+!)?([^!]+)$''' +haystack = '''bar!bas''' +matches = [[[0, 7], [0, 4], [4, 7]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic195" +regex = '''^([^!]+!)?([^!]+)$''' +haystack = '''foo!bas''' +matches = [[[0, 7], [0, 4], [4, 7]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic196" +regex = '''^.+!([^!]+!)([^!]+)$''' +haystack = '''foo!bar!bas''' +matches = [[[0, 11], [4, 8], [8, 11]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic197" +regex = '''((foo)|(bar))!bas''' +haystack = '''bar!bas''' +matches = [[[0, 7], [0, 3], [], [0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic198" +regex = '''((foo)|(bar))!bas''' +haystack = '''foo!bar!bas''' +matches = [[[4, 11], [4, 7], [], [4, 7]]] +match-limit = 1 + +[[test]] +name = "basic199" +regex = '''((foo)|(bar))!bas''' +haystack = '''foo!bas''' +matches = [[[0, 7], [0, 3], [0, 3], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic200" +regex = '''((foo)|bar)!bas''' +haystack = '''bar!bas''' +matches = [[[0, 7], [0, 3], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic201" +regex = '''((foo)|bar)!bas''' +haystack = '''foo!bar!bas''' +matches = [[[4, 11], [4, 7], []]] +match-limit = 1 + +[[test]] +name = "basic202" +regex = '''((foo)|bar)!bas''' +haystack = '''foo!bas''' +matches = [[[0, 7], [0, 3], [0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic203" +regex = '''(foo|(bar))!bas''' +haystack = '''bar!bas''' +matches = [[[0, 7], [0, 3], [0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic204" +regex = '''(foo|(bar))!bas''' +haystack = '''foo!bar!bas''' +matches = [[[4, 11], [4, 7], [4, 7]]] +match-limit = 1 + +[[test]] +name = "basic205" +regex = '''(foo|(bar))!bas''' +haystack = '''foo!bas''' +matches = [[[0, 7], [0, 3], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic206" +regex = '''(foo|bar)!bas''' +haystack = '''bar!bas''' +matches = [[[0, 7], [0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic207" +regex = '''(foo|bar)!bas''' +haystack = '''foo!bar!bas''' +matches = [[[4, 11], [4, 7]]] +match-limit = 1 + +[[test]] +name = "basic208" +regex = '''(foo|bar)!bas''' +haystack = '''foo!bas''' +matches = [[[0, 7], [0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic209" +regex = '''^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$''' +haystack = '''foo!bar!bas''' +matches = [[[0, 11], [0, 11], [], [], [4, 8], [8, 11]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic210" +regex = '''^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$''' +haystack = '''bas''' +matches = [[[0, 3], [], [0, 3], [], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic211" +regex = '''^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$''' +haystack = '''bar!bas''' +matches = [[[0, 7], [0, 4], [4, 7], [], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic212" +regex = '''^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$''' +haystack = '''foo!bar!bas''' +matches = [[[0, 11], [], [], [4, 8], [8, 11]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic213" +regex = '''^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$''' +haystack = '''foo!bas''' +matches = [[[0, 7], [0, 4], [4, 7], [], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic214" +regex = '''^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$''' +haystack = '''bas''' +matches = [[[0, 3], [0, 3], [], [0, 3], [], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic215" +regex = '''^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$''' +haystack = '''bar!bas''' +matches = [[[0, 7], [0, 7], [0, 4], [4, 7], [], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic216" +regex = '''^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$''' +haystack = '''foo!bar!bas''' +matches = [[[0, 11], [0, 11], [], [], [4, 8], [8, 11]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic217" +regex = '''^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$''' +haystack = '''foo!bas''' +matches = [[[0, 7], [0, 7], [0, 4], [4, 7], [], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic218" +regex = '''.*(/XXX).*''' +haystack = '''/XXX''' +matches = [[[0, 4], [0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic219" +regex = '''.*(\\XXX).*''' +haystack = '''\XXX''' +matches = [[[0, 4], [0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic220" +regex = '''\\XXX''' +haystack = '''\XXX''' +matches = [[[0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic221" +regex = '''.*(/000).*''' +haystack = '''/000''' +matches = [[[0, 4], [0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic222" +regex = '''.*(\\000).*''' +haystack = '''\000''' +matches = [[[0, 4], [0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic223" +regex = '''\\000''' +haystack = '''\000''' +matches = [[[0, 4]]] +match-limit = 1 +anchored = true + diff --git a/vendor/regex/testdata/fowler/dat/README b/vendor/regex/testdata/fowler/dat/README new file mode 100644 index 000000000..242a0e6c3 --- /dev/null +++ b/vendor/regex/testdata/fowler/dat/README @@ -0,0 +1,25 @@ +Test data was taken from the Go distribution, which was in turn taken from the +testregex test suite: + + http://web.archive.org/web/20150925124103/http://www2.research.att.com/~astopen/testregex/testregex.html + +Unfortunately, the original web site now appears dead, but the test data lives +on. + +The LICENSE in this directory corresponds to the LICENSE that the data was +originally released under. + +The tests themselves were modified for RE2/Go (and marked as such). A +couple were modified further by me (Andrew Gallant) and marked with 'Rust'. + +After some number of years, these tests were transformed into a TOML format +using the 'regex-cli generate fowler' command. To re-generate the +TOML files, run the following from the root of this repository: + + regex-cli generate fowler tests/data/fowler tests/data/fowler/dat/*.dat + +This assumes that you have 'regex-cli' installed. See 'regex-cli/README.md' +from the root of the repository for more information. + +This brings the Fowler tests into a more "sensible" structured format in which +other tests can be written such that they aren't write-only. diff --git a/vendor/regex/testdata/fowler/dat/basic.dat b/vendor/regex/testdata/fowler/dat/basic.dat new file mode 100644 index 000000000..654a72b39 --- /dev/null +++ b/vendor/regex/testdata/fowler/dat/basic.dat @@ -0,0 +1,223 @@ +NOTE all standard compliant implementations should pass these : 2002-05-31 + +BE abracadabra$ abracadabracadabra (7,18) +BE a...b abababbb (2,7) +BE XXXXXX ..XXXXXX (2,8) +E \) () (1,2) +BE a] a]a (0,2) +B } } (0,1) +E \} } (0,1) +BE \] ] (0,1) +B ] ] (0,1) +E ] ] (0,1) +B { { (0,1) +B } } (0,1) +BE ^a ax (0,1) +BE \^a a^a (1,3) +BE a\^ a^ (0,2) +BE a$ aa (1,2) +BE a\$ a$ (0,2) +BE ^$ NULL (0,0) +E $^ NULL (0,0) +E a($) aa (1,2)(2,2) +E a*(^a) aa (0,1)(0,1) +E (..)*(...)* a (0,0) +E (..)*(...)* abcd (0,4)(2,4) +E (ab|a)(bc|c) abc (0,3)(0,2)(2,3) +E (ab)c|abc abc (0,3)(0,2) +E a{0}b ab (1,2) +E (a*)(b?)(b+)b{3} aaabbbbbbb (0,10)(0,3)(3,4)(4,7) +E (a*)(b{0,1})(b{1,})b{3} aaabbbbbbb (0,10)(0,3)(3,4)(4,7) +E a{9876543210} NULL BADBR +E ((a|a)|a) a (0,1)(0,1)(0,1) +E (a*)(a|aa) aaaa (0,4)(0,3)(3,4) +E a*(a.|aa) aaaa (0,4)(2,4) +E a(b)|c(d)|a(e)f aef (0,3)(?,?)(?,?)(1,2) +E (a|b)?.* b (0,1)(0,1) +E (a|b)c|a(b|c) ac (0,2)(0,1) +E (a|b)c|a(b|c) ab (0,2)(?,?)(1,2) +E (a|b)*c|(a|ab)*c abc (0,3)(1,2) +E (a|b)*c|(a|ab)*c xc (1,2) +E (.a|.b).*|.*(.a|.b) xa (0,2)(0,2) +E a?(ab|ba)ab abab (0,4)(0,2) +E a?(ac{0}b|ba)ab abab (0,4)(0,2) +E ab|abab abbabab (0,2) +E aba|bab|bba baaabbbaba (5,8) +E aba|bab baaabbbaba (6,9) +E (aa|aaa)*|(a|aaaaa) aa (0,2)(0,2) +E (a.|.a.)*|(a|.a...) aa (0,2)(0,2) +E ab|a xabc (1,3) +E ab|a xxabc (2,4) +Ei (Ab|cD)* aBcD (0,4)(2,4) +BE [^-] --a (2,3) +BE [a-]* --a (0,3) +BE [a-m-]* --amoma-- (0,4) +E :::1:::0:|:::1:1:0: :::0:::1:::1:::0: (8,17) +E :::1:::0:|:::1:1:1: :::0:::1:::1:::0: (8,17) +{E [[:upper:]] A (0,1) [[<element>]] not supported +E [[:lower:]]+ `az{ (1,3) +E [[:upper:]]+ @AZ[ (1,3) +# No collation in Go +#BE [[-]] [[-]] (2,4) +#BE [[.NIL.]] NULL ECOLLATE +#BE [[=aleph=]] NULL ECOLLATE +} +BE$ \n \n (0,1) +BEn$ \n \n (0,1) +BE$ [^a] \n (0,1) +BE$ \na \na (0,2) +E (a)(b)(c) abc (0,3)(0,1)(1,2)(2,3) +BE xxx xxx (0,3) +#E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) feb 6, (0,6) +E (?:^|[ (,;])(?:(?:(?:[Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))(?:[^0-9]|$) feb 6, (0,6) Rust +#E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) 2/7 (0,3) +E (?:^|[ (,;])(?:(?:(?:[Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))(?:[^0-9]|$) 2/7 (0,3) Rust +#E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) feb 1,Feb 6 (5,11) +E (?:^|[ (,;])(?:(?:(?:[Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))(?:[^0-9]|$) feb 1,Feb 6 (5,11) Rust +#E3 ((((((((((((((((((((((((((((((x)))))))))))))))))))))))))))))) x (0,1)(0,1)(0,1) +E (((?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:x)))))))))))))))))))))))))))))) x (0,1)(0,1)(0,1) Rust +#E3 ((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))* xx (0,2)(1,2)(1,2) +E (((?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:x))))))))))))))))))))))))))))))* xx (0,2)(1,2)(1,2) Rust +E a?(ab|ba)* ababababababababababababababababababababababababababababababababababababababababa (0,81)(79,81) +E abaa|abbaa|abbbaa|abbbbaa ababbabbbabbbabbbbabbbbaa (18,25) +E abaa|abbaa|abbbaa|abbbbaa ababbabbbabbbabbbbabaa (18,22) +E aaac|aabc|abac|abbc|baac|babc|bbac|bbbc baaabbbabac (7,11) +#BE$ .* \x01\xff (0,2) +BE$ .* \x01\x7f (0,2) Rust +E aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa (53,57) +L aaaa\nbbbb\ncccc\nddddd\neeeeee\nfffffff\ngggg\nhhhh\niiiii\njjjjj\nkkkkk\nllll XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa NOMATCH +E a*a*a*a*a*b aaaaaaaaab (0,10) +BE ^ NULL (0,0) +BE $ NULL (0,0) +BE ^$ NULL (0,0) +BE ^a$ a (0,1) +BE abc abc (0,3) +BE abc xabcy (1,4) +BE abc ababc (2,5) +BE ab*c abc (0,3) +BE ab*bc abc (0,3) +BE ab*bc abbc (0,4) +BE ab*bc abbbbc (0,6) +E ab+bc abbc (0,4) +E ab+bc abbbbc (0,6) +E ab?bc abbc (0,4) +E ab?bc abc (0,3) +E ab?c abc (0,3) +BE ^abc$ abc (0,3) +BE ^abc abcc (0,3) +BE abc$ aabc (1,4) +BE ^ abc (0,0) +BE $ abc (3,3) +BE a.c abc (0,3) +BE a.c axc (0,3) +BE a.*c axyzc (0,5) +BE a[bc]d abd (0,3) +BE a[b-d]e ace (0,3) +BE a[b-d] aac (1,3) +BE a[-b] a- (0,2) +BE a[b-] a- (0,2) +BE a] a] (0,2) +BE a[]]b a]b (0,3) +BE a[^bc]d aed (0,3) +BE a[^-b]c adc (0,3) +BE a[^]b]c adc (0,3) +E ab|cd abc (0,2) +E ab|cd abcd (0,2) +E a\(b a(b (0,3) +E a\(*b ab (0,2) +E a\(*b a((b (0,4) +E ((a)) abc (0,1)(0,1)(0,1) +E (a)b(c) abc (0,3)(0,1)(2,3) +E a+b+c aabbabc (4,7) +E a* aaa (0,3) +E (a*)* - (0,0)(0,0) +E (a*)+ - (0,0)(0,0) +E (a*|b)* - (0,0)(0,0) +E (a+|b)* ab (0,2)(1,2) +E (a+|b)+ ab (0,2)(1,2) +E (a+|b)? ab (0,1)(0,1) +BE [^ab]* cde (0,3) +E (^)* - (0,0)(0,0) +BE a* NULL (0,0) +E ([abc])*d abbbcd (0,6)(4,5) +E ([abc])*bcd abcd (0,4)(0,1) +E a|b|c|d|e e (0,1) +E (a|b|c|d|e)f ef (0,2)(0,1) +E ((a*|b))* - (0,0)(0,0)(0,0) +BE abcd*efg abcdefg (0,7) +BE ab* xabyabbbz (1,3) +BE ab* xayabbbz (1,2) +E (ab|cd)e abcde (2,5)(2,4) +BE [abhgefdc]ij hij (0,3) +E (a|b)c*d abcd (1,4)(1,2) +E (ab|ab*)bc abc (0,3)(0,1) +E a([bc]*)c* abc (0,3)(1,3) +E a([bc]*)(c*d) abcd (0,4)(1,3)(3,4) +E a([bc]+)(c*d) abcd (0,4)(1,3)(3,4) +E a([bc]*)(c+d) abcd (0,4)(1,2)(2,4) +E a[bcd]*dcdcde adcdcde (0,7) +E (ab|a)b*c abc (0,3)(0,2) +E ((a)(b)c)(d) abcd (0,4)(0,3)(0,1)(1,2)(3,4) +BE [A-Za-z_][A-Za-z0-9_]* alpha (0,5) +E ^a(bc+|b[eh])g|.h$ abh (1,3) +E (bc+d$|ef*g.|h?i(j|k)) effgz (0,5)(0,5) +E (bc+d$|ef*g.|h?i(j|k)) ij (0,2)(0,2)(1,2) +E (bc+d$|ef*g.|h?i(j|k)) reffgz (1,6)(1,6) +E (((((((((a))))))))) a (0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1) +BE multiple words multiple words yeah (0,14) +E (.*)c(.*) abcde (0,5)(0,2)(3,5) +BE abcd abcd (0,4) +E a(bc)d abcd (0,4)(1,3) +E a[-]?c ac (0,3) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Qaddafi (0,15)(?,?)(10,12) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mo'ammar Gadhafi (0,16)(?,?)(11,13) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Kaddafi (0,15)(?,?)(10,12) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Qadhafi (0,15)(?,?)(10,12) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Gadafi (0,14)(?,?)(10,11) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mu'ammar Qadafi (0,15)(?,?)(11,12) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moamar Gaddafi (0,14)(?,?)(9,11) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mu'ammar Qadhdhafi (0,18)(?,?)(13,15) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Khaddafi (0,16)(?,?)(11,13) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghaddafy (0,16)(?,?)(11,13) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghadafi (0,15)(?,?)(11,12) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghaddafi (0,16)(?,?)(11,13) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muamar Kaddafi (0,14)(?,?)(9,11) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Quathafi (0,16)(?,?)(11,13) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Gheddafi (0,16)(?,?)(11,13) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moammar Khadafy (0,15)(?,?)(11,12) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moammar Qudhafi (0,15)(?,?)(10,12) +E a+(b|c)*d+ aabcdd (0,6)(3,4) +E ^.+$ vivi (0,4) +E ^(.+)$ vivi (0,4)(0,4) +E ^([^!.]+).att.com!(.+)$ gryphon.att.com!eby (0,19)(0,7)(16,19) +E ^([^!]+!)?([^!]+)$ bas (0,3)(?,?)(0,3) +E ^([^!]+!)?([^!]+)$ bar!bas (0,7)(0,4)(4,7) +E ^([^!]+!)?([^!]+)$ foo!bas (0,7)(0,4)(4,7) +E ^.+!([^!]+!)([^!]+)$ foo!bar!bas (0,11)(4,8)(8,11) +E ((foo)|(bar))!bas bar!bas (0,7)(0,3)(?,?)(0,3) +E ((foo)|(bar))!bas foo!bar!bas (4,11)(4,7)(?,?)(4,7) +E ((foo)|(bar))!bas foo!bas (0,7)(0,3)(0,3) +E ((foo)|bar)!bas bar!bas (0,7)(0,3) +E ((foo)|bar)!bas foo!bar!bas (4,11)(4,7) +E ((foo)|bar)!bas foo!bas (0,7)(0,3)(0,3) +E (foo|(bar))!bas bar!bas (0,7)(0,3)(0,3) +E (foo|(bar))!bas foo!bar!bas (4,11)(4,7)(4,7) +E (foo|(bar))!bas foo!bas (0,7)(0,3) +E (foo|bar)!bas bar!bas (0,7)(0,3) +E (foo|bar)!bas foo!bar!bas (4,11)(4,7) +E (foo|bar)!bas foo!bas (0,7)(0,3) +E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bar!bas (0,11)(0,11)(?,?)(?,?)(4,8)(8,11) +E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ bas (0,3)(?,?)(0,3) +E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ bar!bas (0,7)(0,4)(4,7) +E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ foo!bar!bas (0,11)(?,?)(?,?)(4,8)(8,11) +E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ foo!bas (0,7)(0,4)(4,7) +E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ bas (0,3)(0,3)(?,?)(0,3) +E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ bar!bas (0,7)(0,7)(0,4)(4,7) +E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bar!bas (0,11)(0,11)(?,?)(?,?)(4,8)(8,11) +E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bas (0,7)(0,7)(0,4)(4,7) +E .*(/XXX).* /XXX (0,4)(0,4) +E .*(\\XXX).* \XXX (0,4)(0,4) +E \\XXX \XXX (0,4) +E .*(/000).* /000 (0,4)(0,4) +E .*(\\000).* \000 (0,4)(0,4) +E \\000 \000 (0,4) diff --git a/vendor/regex/testdata/fowler/dat/nullsubexpr.dat b/vendor/regex/testdata/fowler/dat/nullsubexpr.dat new file mode 100644 index 000000000..a94430649 --- /dev/null +++ b/vendor/regex/testdata/fowler/dat/nullsubexpr.dat @@ -0,0 +1,74 @@ +NOTE null subexpression matches : 2002-06-06 + +E (a*)* a (0,1)(0,1) +E SAME x (0,0)(0,0) +E SAME aaaaaa (0,6)(0,6) +E SAME aaaaaax (0,6)(0,6) +E (a*)+ a (0,1)(0,1) +E SAME x (0,0)(0,0) +E SAME aaaaaa (0,6)(0,6) +E SAME aaaaaax (0,6)(0,6) +E (a+)* a (0,1)(0,1) +E SAME x (0,0) +E SAME aaaaaa (0,6)(0,6) +E SAME aaaaaax (0,6)(0,6) +E (a+)+ a (0,1)(0,1) +E SAME x NOMATCH +E SAME aaaaaa (0,6)(0,6) +E SAME aaaaaax (0,6)(0,6) + +E ([a]*)* a (0,1)(0,1) +E SAME x (0,0)(0,0) +E SAME aaaaaa (0,6)(0,6) +E SAME aaaaaax (0,6)(0,6) +E ([a]*)+ a (0,1)(0,1) +E SAME x (0,0)(0,0) +E SAME aaaaaa (0,6)(0,6) +E SAME aaaaaax (0,6)(0,6) +E ([^b]*)* a (0,1)(0,1) +E SAME b (0,0)(0,0) +E SAME aaaaaa (0,6)(0,6) +E SAME aaaaaab (0,6)(0,6) +E ([ab]*)* a (0,1)(0,1) +E SAME aaaaaa (0,6)(0,6) +E SAME ababab (0,6)(0,6) +E SAME bababa (0,6)(0,6) +E SAME b (0,1)(0,1) +E SAME bbbbbb (0,6)(0,6) +E SAME aaaabcde (0,5)(0,5) +E ([^a]*)* b (0,1)(0,1) +E SAME bbbbbb (0,6)(0,6) +E SAME aaaaaa (0,0)(0,0) +E ([^ab]*)* ccccxx (0,6)(0,6) +E SAME ababab (0,0)(0,0) + +#E ((z)+|a)* zabcde (0,2)(1,2) +E ((z)+|a)* zabcde (0,2)(1,2)(0,1) Rust + +#{E a+? aaaaaa (0,1) no *? +? mimimal match ops +#E (a) aaa (0,1)(0,1) +#E (a*?) aaa (0,0)(0,0) +#E (a)*? aaa (0,0) +#E (a*?)*? aaa (0,0) +#} + +B \(a*\)*\(x\) x (0,1)(0,0)(0,1) +B \(a*\)*\(x\) ax (0,2)(0,1)(1,2) +B \(a*\)*\(x\) axa (0,2)(0,1)(1,2) +B \(a*\)*\(x\)\(\1\) x (0,1)(0,0)(0,1)(1,1) +B \(a*\)*\(x\)\(\1\) ax (0,2)(1,1)(1,2)(2,2) +B \(a*\)*\(x\)\(\1\) axa (0,3)(0,1)(1,2)(2,3) +B \(a*\)*\(x\)\(\1\)\(x\) axax (0,4)(0,1)(1,2)(2,3)(3,4) +B \(a*\)*\(x\)\(\1\)\(x\) axxa (0,3)(1,1)(1,2)(2,2)(2,3) + +E (a*)*(x) x (0,1)(0,0)(0,1) +E (a*)*(x) ax (0,2)(0,1)(1,2) +E (a*)*(x) axa (0,2)(0,1)(1,2) + +E (a*)+(x) x (0,1)(0,0)(0,1) +E (a*)+(x) ax (0,2)(0,1)(1,2) +E (a*)+(x) axa (0,2)(0,1)(1,2) + +E (a*){2}(x) x (0,1)(0,0)(0,1) +E (a*){2}(x) ax (0,2)(1,1)(1,2) +E (a*){2}(x) axa (0,2)(1,1)(1,2) diff --git a/vendor/regex/testdata/fowler/dat/repetition.dat b/vendor/regex/testdata/fowler/dat/repetition.dat new file mode 100644 index 000000000..cf0d8382f --- /dev/null +++ b/vendor/regex/testdata/fowler/dat/repetition.dat @@ -0,0 +1,169 @@ +NOTE implicit vs. explicit repetitions : 2009-02-02 + +# Glenn Fowler <gsf@research.att.com> +# conforming matches (column 4) must match one of the following BREs +# NOMATCH +# (0,.)\((\(.\),\(.\))(?,?)(\2,\3)\)* +# (0,.)\((\(.\),\(.\))(\2,\3)(?,?)\)* +# i.e., each 3-tuple has two identical elements and one (?,?) + +E ((..)|(.)) NULL NOMATCH +E ((..)|(.))((..)|(.)) NULL NOMATCH +E ((..)|(.))((..)|(.))((..)|(.)) NULL NOMATCH + +E ((..)|(.)){1} NULL NOMATCH +E ((..)|(.)){2} NULL NOMATCH +E ((..)|(.)){3} NULL NOMATCH + +E ((..)|(.))* NULL (0,0) + +E ((..)|(.)) a (0,1)(0,1)(?,?)(0,1) +E ((..)|(.))((..)|(.)) a NOMATCH +E ((..)|(.))((..)|(.))((..)|(.)) a NOMATCH + +E ((..)|(.)){1} a (0,1)(0,1)(?,?)(0,1) +E ((..)|(.)){2} a NOMATCH +E ((..)|(.)){3} a NOMATCH + +E ((..)|(.))* a (0,1)(0,1)(?,?)(0,1) + +E ((..)|(.)) aa (0,2)(0,2)(0,2)(?,?) +E ((..)|(.))((..)|(.)) aa (0,2)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2) +E ((..)|(.))((..)|(.))((..)|(.)) aa NOMATCH + +E ((..)|(.)){1} aa (0,2)(0,2)(0,2)(?,?) +E ((..)|(.)){2} aa (0,2)(1,2)(?,?)(1,2) +E ((..)|(.)){3} aa NOMATCH + +E ((..)|(.))* aa (0,2)(0,2)(0,2)(?,?) + +E ((..)|(.)) aaa (0,2)(0,2)(0,2)(?,?) +E ((..)|(.))((..)|(.)) aaa (0,3)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3) +E ((..)|(.))((..)|(.))((..)|(.)) aaa (0,3)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2)(2,3)(?,?)(2,3) + +E ((..)|(.)){1} aaa (0,2)(0,2)(0,2)(?,?) +#E ((..)|(.)){2} aaa (0,3)(2,3)(?,?)(2,3) +E ((..)|(.)){2} aaa (0,3)(2,3)(0,2)(2,3) RE2/Go +E ((..)|(.)){3} aaa (0,3)(2,3)(?,?)(2,3) + +#E ((..)|(.))* aaa (0,3)(2,3)(?,?)(2,3) +E ((..)|(.))* aaa (0,3)(2,3)(0,2)(2,3) RE2/Go + +E ((..)|(.)) aaaa (0,2)(0,2)(0,2)(?,?) +E ((..)|(.))((..)|(.)) aaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?) +E ((..)|(.))((..)|(.))((..)|(.)) aaaa (0,4)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3)(3,4)(?,?)(3,4) + +E ((..)|(.)){1} aaaa (0,2)(0,2)(0,2)(?,?) +E ((..)|(.)){2} aaaa (0,4)(2,4)(2,4)(?,?) +#E ((..)|(.)){3} aaaa (0,4)(3,4)(?,?)(3,4) +E ((..)|(.)){3} aaaa (0,4)(3,4)(0,2)(3,4) RE2/Go + +E ((..)|(.))* aaaa (0,4)(2,4)(2,4)(?,?) + +E ((..)|(.)) aaaaa (0,2)(0,2)(0,2)(?,?) +E ((..)|(.))((..)|(.)) aaaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?) +E ((..)|(.))((..)|(.))((..)|(.)) aaaaa (0,5)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,5)(?,?)(4,5) + +E ((..)|(.)){1} aaaaa (0,2)(0,2)(0,2)(?,?) +E ((..)|(.)){2} aaaaa (0,4)(2,4)(2,4)(?,?) +#E ((..)|(.)){3} aaaaa (0,5)(4,5)(?,?)(4,5) +E ((..)|(.)){3} aaaaa (0,5)(4,5)(2,4)(4,5) RE2/Go + +#E ((..)|(.))* aaaaa (0,5)(4,5)(?,?)(4,5) +E ((..)|(.))* aaaaa (0,5)(4,5)(2,4)(4,5) RE2/Go + +E ((..)|(.)) aaaaaa (0,2)(0,2)(0,2)(?,?) +E ((..)|(.))((..)|(.)) aaaaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?) +E ((..)|(.))((..)|(.))((..)|(.)) aaaaaa (0,6)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,6)(4,6)(?,?) + +E ((..)|(.)){1} aaaaaa (0,2)(0,2)(0,2)(?,?) +E ((..)|(.)){2} aaaaaa (0,4)(2,4)(2,4)(?,?) +E ((..)|(.)){3} aaaaaa (0,6)(4,6)(4,6)(?,?) + +E ((..)|(.))* aaaaaa (0,6)(4,6)(4,6)(?,?) + +NOTE additional repetition tests graciously provided by Chris Kuklewicz www.haskell.org 2009-02-02 + +# These test a bug in OS X / FreeBSD / NetBSD, and libtree. +# Linux/GLIBC gets the {8,} and {8,8} wrong. + +:HA#100:E X(.?){0,}Y X1234567Y (0,9)(7,8) +:HA#101:E X(.?){1,}Y X1234567Y (0,9)(7,8) +:HA#102:E X(.?){2,}Y X1234567Y (0,9)(7,8) +:HA#103:E X(.?){3,}Y X1234567Y (0,9)(7,8) +:HA#104:E X(.?){4,}Y X1234567Y (0,9)(7,8) +:HA#105:E X(.?){5,}Y X1234567Y (0,9)(7,8) +:HA#106:E X(.?){6,}Y X1234567Y (0,9)(7,8) +:HA#107:E X(.?){7,}Y X1234567Y (0,9)(7,8) +:HA#108:E X(.?){8,}Y X1234567Y (0,9)(8,8) +#:HA#110:E X(.?){0,8}Y X1234567Y (0,9)(7,8) +:HA#110:E X(.?){0,8}Y X1234567Y (0,9)(8,8) RE2/Go +#:HA#111:E X(.?){1,8}Y X1234567Y (0,9)(7,8) +:HA#111:E X(.?){1,8}Y X1234567Y (0,9)(8,8) RE2/Go +#:HA#112:E X(.?){2,8}Y X1234567Y (0,9)(7,8) +:HA#112:E X(.?){2,8}Y X1234567Y (0,9)(8,8) RE2/Go +#:HA#113:E X(.?){3,8}Y X1234567Y (0,9)(7,8) +:HA#113:E X(.?){3,8}Y X1234567Y (0,9)(8,8) RE2/Go +#:HA#114:E X(.?){4,8}Y X1234567Y (0,9)(7,8) +:HA#114:E X(.?){4,8}Y X1234567Y (0,9)(8,8) RE2/Go +#:HA#115:E X(.?){5,8}Y X1234567Y (0,9)(7,8) +:HA#115:E X(.?){5,8}Y X1234567Y (0,9)(8,8) RE2/Go +#:HA#116:E X(.?){6,8}Y X1234567Y (0,9)(7,8) +:HA#116:E X(.?){6,8}Y X1234567Y (0,9)(8,8) RE2/Go +#:HA#117:E X(.?){7,8}Y X1234567Y (0,9)(7,8) +:HA#117:E X(.?){7,8}Y X1234567Y (0,9)(8,8) RE2/Go +:HA#118:E X(.?){8,8}Y X1234567Y (0,9)(8,8) + +# These test a fixed bug in my regex-tdfa that did not keep the expanded +# form properly grouped, so right association did the wrong thing with +# these ambiguous patterns (crafted just to test my code when I became +# suspicious of my implementation). The first subexpression should use +# "ab" then "a" then "bcd". + +# OS X / FreeBSD / NetBSD badly fail many of these, with impossible +# results like (0,6)(4,5)(6,6). + +#:HA#260:E (a|ab|c|bcd){0,}(d*) ababcd (0,6)(3,6)(6,6) +:HA#260:E (a|ab|c|bcd){0,}(d*) ababcd (0,1)(0,1)(1,1) Rust +#:HA#261:E (a|ab|c|bcd){1,}(d*) ababcd (0,6)(3,6)(6,6) +:HA#261:E (a|ab|c|bcd){1,}(d*) ababcd (0,1)(0,1)(1,1) Rust +:HA#262:E (a|ab|c|bcd){2,}(d*) ababcd (0,6)(3,6)(6,6) +:HA#263:E (a|ab|c|bcd){3,}(d*) ababcd (0,6)(3,6)(6,6) +:HA#264:E (a|ab|c|bcd){4,}(d*) ababcd NOMATCH +#:HA#265:E (a|ab|c|bcd){0,10}(d*) ababcd (0,6)(3,6)(6,6) +:HA#265:E (a|ab|c|bcd){0,10}(d*) ababcd (0,1)(0,1)(1,1) Rust +#:HA#266:E (a|ab|c|bcd){1,10}(d*) ababcd (0,6)(3,6)(6,6) +:HA#266:E (a|ab|c|bcd){1,10}(d*) ababcd (0,1)(0,1)(1,1) Rust +:HA#267:E (a|ab|c|bcd){2,10}(d*) ababcd (0,6)(3,6)(6,6) +:HA#268:E (a|ab|c|bcd){3,10}(d*) ababcd (0,6)(3,6)(6,6) +:HA#269:E (a|ab|c|bcd){4,10}(d*) ababcd NOMATCH +#:HA#270:E (a|ab|c|bcd)*(d*) ababcd (0,6)(3,6)(6,6) +:HA#270:E (a|ab|c|bcd)*(d*) ababcd (0,1)(0,1)(1,1) Rust +#:HA#271:E (a|ab|c|bcd)+(d*) ababcd (0,6)(3,6)(6,6) +:HA#271:E (a|ab|c|bcd)+(d*) ababcd (0,1)(0,1)(1,1) Rust + +# The above worked on Linux/GLIBC but the following often fail. +# They also trip up OS X / FreeBSD / NetBSD: + +#:HA#280:E (ab|a|c|bcd){0,}(d*) ababcd (0,6)(3,6)(6,6) +:HA#280:E (ab|a|c|bcd){0,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go +#:HA#281:E (ab|a|c|bcd){1,}(d*) ababcd (0,6)(3,6)(6,6) +:HA#281:E (ab|a|c|bcd){1,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go +#:HA#282:E (ab|a|c|bcd){2,}(d*) ababcd (0,6)(3,6)(6,6) +:HA#282:E (ab|a|c|bcd){2,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go +#:HA#283:E (ab|a|c|bcd){3,}(d*) ababcd (0,6)(3,6)(6,6) +:HA#283:E (ab|a|c|bcd){3,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go +:HA#284:E (ab|a|c|bcd){4,}(d*) ababcd NOMATCH +#:HA#285:E (ab|a|c|bcd){0,10}(d*) ababcd (0,6)(3,6)(6,6) +:HA#285:E (ab|a|c|bcd){0,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go +#:HA#286:E (ab|a|c|bcd){1,10}(d*) ababcd (0,6)(3,6)(6,6) +:HA#286:E (ab|a|c|bcd){1,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go +#:HA#287:E (ab|a|c|bcd){2,10}(d*) ababcd (0,6)(3,6)(6,6) +:HA#287:E (ab|a|c|bcd){2,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go +#:HA#288:E (ab|a|c|bcd){3,10}(d*) ababcd (0,6)(3,6)(6,6) +:HA#288:E (ab|a|c|bcd){3,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go +:HA#289:E (ab|a|c|bcd){4,10}(d*) ababcd NOMATCH +#:HA#290:E (ab|a|c|bcd)*(d*) ababcd (0,6)(3,6)(6,6) +:HA#290:E (ab|a|c|bcd)*(d*) ababcd (0,6)(4,5)(5,6) RE2/Go +#:HA#291:E (ab|a|c|bcd)+(d*) ababcd (0,6)(3,6)(6,6) +:HA#291:E (ab|a|c|bcd)+(d*) ababcd (0,6)(4,5)(5,6) RE2/Go diff --git a/vendor/regex/testdata/fowler/nullsubexpr.toml b/vendor/regex/testdata/fowler/nullsubexpr.toml new file mode 100644 index 000000000..2f1f0183e --- /dev/null +++ b/vendor/regex/testdata/fowler/nullsubexpr.toml @@ -0,0 +1,405 @@ +# !!! DO NOT EDIT !!! +# Automatically generated by 'regex-cli generate fowler'. +# Numbers in the test names correspond to the line number of the test from +# the original dat file. + +[[test]] +name = "nullsubexpr3" +regex = '''(a*)*''' +haystack = '''a''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr4" +regex = '''(a*)*''' +haystack = '''x''' +matches = [[[0, 0], [0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr5" +regex = '''(a*)*''' +haystack = '''aaaaaa''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr6" +regex = '''(a*)*''' +haystack = '''aaaaaax''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr7" +regex = '''(a*)+''' +haystack = '''a''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr8" +regex = '''(a*)+''' +haystack = '''x''' +matches = [[[0, 0], [0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr9" +regex = '''(a*)+''' +haystack = '''aaaaaa''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr10" +regex = '''(a*)+''' +haystack = '''aaaaaax''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr11" +regex = '''(a+)*''' +haystack = '''a''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr12" +regex = '''(a+)*''' +haystack = '''x''' +matches = [[[0, 0], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr13" +regex = '''(a+)*''' +haystack = '''aaaaaa''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr14" +regex = '''(a+)*''' +haystack = '''aaaaaax''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr15" +regex = '''(a+)+''' +haystack = '''a''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr16" +regex = '''(a+)+''' +haystack = '''x''' +matches = [] +match-limit = 1 + +[[test]] +name = "nullsubexpr17" +regex = '''(a+)+''' +haystack = '''aaaaaa''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr18" +regex = '''(a+)+''' +haystack = '''aaaaaax''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr20" +regex = '''([a]*)*''' +haystack = '''a''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr21" +regex = '''([a]*)*''' +haystack = '''x''' +matches = [[[0, 0], [0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr22" +regex = '''([a]*)*''' +haystack = '''aaaaaa''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr23" +regex = '''([a]*)*''' +haystack = '''aaaaaax''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr24" +regex = '''([a]*)+''' +haystack = '''a''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr25" +regex = '''([a]*)+''' +haystack = '''x''' +matches = [[[0, 0], [0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr26" +regex = '''([a]*)+''' +haystack = '''aaaaaa''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr27" +regex = '''([a]*)+''' +haystack = '''aaaaaax''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr28" +regex = '''([^b]*)*''' +haystack = '''a''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr29" +regex = '''([^b]*)*''' +haystack = '''b''' +matches = [[[0, 0], [0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr30" +regex = '''([^b]*)*''' +haystack = '''aaaaaa''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr31" +regex = '''([^b]*)*''' +haystack = '''aaaaaab''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr32" +regex = '''([ab]*)*''' +haystack = '''a''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr33" +regex = '''([ab]*)*''' +haystack = '''aaaaaa''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr34" +regex = '''([ab]*)*''' +haystack = '''ababab''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr35" +regex = '''([ab]*)*''' +haystack = '''bababa''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr36" +regex = '''([ab]*)*''' +haystack = '''b''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr37" +regex = '''([ab]*)*''' +haystack = '''bbbbbb''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr38" +regex = '''([ab]*)*''' +haystack = '''aaaabcde''' +matches = [[[0, 5], [0, 5]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr39" +regex = '''([^a]*)*''' +haystack = '''b''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr40" +regex = '''([^a]*)*''' +haystack = '''bbbbbb''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr41" +regex = '''([^a]*)*''' +haystack = '''aaaaaa''' +matches = [[[0, 0], [0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr42" +regex = '''([^ab]*)*''' +haystack = '''ccccxx''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr43" +regex = '''([^ab]*)*''' +haystack = '''ababab''' +matches = [[[0, 0], [0, 0]]] +match-limit = 1 +anchored = true + +# Test added by Rust regex project. +[[test]] +name = "nullsubexpr46" +regex = '''((z)+|a)*''' +haystack = '''zabcde''' +matches = [[[0, 2], [1, 2], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr64" +regex = '''(a*)*(x)''' +haystack = '''x''' +matches = [[[0, 1], [0, 0], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr65" +regex = '''(a*)*(x)''' +haystack = '''ax''' +matches = [[[0, 2], [0, 1], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr66" +regex = '''(a*)*(x)''' +haystack = '''axa''' +matches = [[[0, 2], [0, 1], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr68" +regex = '''(a*)+(x)''' +haystack = '''x''' +matches = [[[0, 1], [0, 0], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr69" +regex = '''(a*)+(x)''' +haystack = '''ax''' +matches = [[[0, 2], [0, 1], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr70" +regex = '''(a*)+(x)''' +haystack = '''axa''' +matches = [[[0, 2], [0, 1], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr72" +regex = '''(a*){2}(x)''' +haystack = '''x''' +matches = [[[0, 1], [0, 0], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr73" +regex = '''(a*){2}(x)''' +haystack = '''ax''' +matches = [[[0, 2], [1, 1], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr74" +regex = '''(a*){2}(x)''' +haystack = '''axa''' +matches = [[[0, 2], [1, 1], [1, 2]]] +match-limit = 1 +anchored = true + diff --git a/vendor/regex/testdata/fowler/repetition.toml b/vendor/regex/testdata/fowler/repetition.toml new file mode 100644 index 000000000..d6a711202 --- /dev/null +++ b/vendor/regex/testdata/fowler/repetition.toml @@ -0,0 +1,746 @@ +# !!! DO NOT EDIT !!! +# Automatically generated by 'regex-cli generate fowler'. +# Numbers in the test names correspond to the line number of the test from +# the original dat file. + +[[test]] +name = "repetition10" +regex = '''((..)|(.))''' +haystack = '''''' +matches = [] +match-limit = 1 + +[[test]] +name = "repetition11" +regex = '''((..)|(.))((..)|(.))''' +haystack = '''''' +matches = [] +match-limit = 1 + +[[test]] +name = "repetition12" +regex = '''((..)|(.))((..)|(.))((..)|(.))''' +haystack = '''''' +matches = [] +match-limit = 1 + +[[test]] +name = "repetition14" +regex = '''((..)|(.)){1}''' +haystack = '''''' +matches = [] +match-limit = 1 + +[[test]] +name = "repetition15" +regex = '''((..)|(.)){2}''' +haystack = '''''' +matches = [] +match-limit = 1 + +[[test]] +name = "repetition16" +regex = '''((..)|(.)){3}''' +haystack = '''''' +matches = [] +match-limit = 1 + +[[test]] +name = "repetition18" +regex = '''((..)|(.))*''' +haystack = '''''' +matches = [[[0, 0], [], [], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition20" +regex = '''((..)|(.))''' +haystack = '''a''' +matches = [[[0, 1], [0, 1], [], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition21" +regex = '''((..)|(.))((..)|(.))''' +haystack = '''a''' +matches = [] +match-limit = 1 + +[[test]] +name = "repetition22" +regex = '''((..)|(.))((..)|(.))((..)|(.))''' +haystack = '''a''' +matches = [] +match-limit = 1 + +[[test]] +name = "repetition24" +regex = '''((..)|(.)){1}''' +haystack = '''a''' +matches = [[[0, 1], [0, 1], [], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition25" +regex = '''((..)|(.)){2}''' +haystack = '''a''' +matches = [] +match-limit = 1 + +[[test]] +name = "repetition26" +regex = '''((..)|(.)){3}''' +haystack = '''a''' +matches = [] +match-limit = 1 + +[[test]] +name = "repetition28" +regex = '''((..)|(.))*''' +haystack = '''a''' +matches = [[[0, 1], [0, 1], [], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition30" +regex = '''((..)|(.))''' +haystack = '''aa''' +matches = [[[0, 2], [0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition31" +regex = '''((..)|(.))((..)|(.))''' +haystack = '''aa''' +matches = [[[0, 2], [0, 1], [], [0, 1], [1, 2], [], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition32" +regex = '''((..)|(.))((..)|(.))((..)|(.))''' +haystack = '''aa''' +matches = [] +match-limit = 1 + +[[test]] +name = "repetition34" +regex = '''((..)|(.)){1}''' +haystack = '''aa''' +matches = [[[0, 2], [0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition35" +regex = '''((..)|(.)){2}''' +haystack = '''aa''' +matches = [[[0, 2], [1, 2], [], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition36" +regex = '''((..)|(.)){3}''' +haystack = '''aa''' +matches = [] +match-limit = 1 + +[[test]] +name = "repetition38" +regex = '''((..)|(.))*''' +haystack = '''aa''' +matches = [[[0, 2], [0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition40" +regex = '''((..)|(.))''' +haystack = '''aaa''' +matches = [[[0, 2], [0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition41" +regex = '''((..)|(.))((..)|(.))''' +haystack = '''aaa''' +matches = [[[0, 3], [0, 2], [0, 2], [], [2, 3], [], [2, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition42" +regex = '''((..)|(.))((..)|(.))((..)|(.))''' +haystack = '''aaa''' +matches = [[[0, 3], [0, 1], [], [0, 1], [1, 2], [], [1, 2], [2, 3], [], [2, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition44" +regex = '''((..)|(.)){1}''' +haystack = '''aaa''' +matches = [[[0, 2], [0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition46" +regex = '''((..)|(.)){2}''' +haystack = '''aaa''' +matches = [[[0, 3], [2, 3], [0, 2], [2, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition47" +regex = '''((..)|(.)){3}''' +haystack = '''aaa''' +matches = [[[0, 3], [2, 3], [], [2, 3]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition50" +regex = '''((..)|(.))*''' +haystack = '''aaa''' +matches = [[[0, 3], [2, 3], [0, 2], [2, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition52" +regex = '''((..)|(.))''' +haystack = '''aaaa''' +matches = [[[0, 2], [0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition53" +regex = '''((..)|(.))((..)|(.))''' +haystack = '''aaaa''' +matches = [[[0, 4], [0, 2], [0, 2], [], [2, 4], [2, 4], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition54" +regex = '''((..)|(.))((..)|(.))((..)|(.))''' +haystack = '''aaaa''' +matches = [[[0, 4], [0, 2], [0, 2], [], [2, 3], [], [2, 3], [3, 4], [], [3, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition56" +regex = '''((..)|(.)){1}''' +haystack = '''aaaa''' +matches = [[[0, 2], [0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition57" +regex = '''((..)|(.)){2}''' +haystack = '''aaaa''' +matches = [[[0, 4], [2, 4], [2, 4], []]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition59" +regex = '''((..)|(.)){3}''' +haystack = '''aaaa''' +matches = [[[0, 4], [3, 4], [0, 2], [3, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition61" +regex = '''((..)|(.))*''' +haystack = '''aaaa''' +matches = [[[0, 4], [2, 4], [2, 4], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition63" +regex = '''((..)|(.))''' +haystack = '''aaaaa''' +matches = [[[0, 2], [0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition64" +regex = '''((..)|(.))((..)|(.))''' +haystack = '''aaaaa''' +matches = [[[0, 4], [0, 2], [0, 2], [], [2, 4], [2, 4], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition65" +regex = '''((..)|(.))((..)|(.))((..)|(.))''' +haystack = '''aaaaa''' +matches = [[[0, 5], [0, 2], [0, 2], [], [2, 4], [2, 4], [], [4, 5], [], [4, 5]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition67" +regex = '''((..)|(.)){1}''' +haystack = '''aaaaa''' +matches = [[[0, 2], [0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition68" +regex = '''((..)|(.)){2}''' +haystack = '''aaaaa''' +matches = [[[0, 4], [2, 4], [2, 4], []]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition70" +regex = '''((..)|(.)){3}''' +haystack = '''aaaaa''' +matches = [[[0, 5], [4, 5], [2, 4], [4, 5]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition73" +regex = '''((..)|(.))*''' +haystack = '''aaaaa''' +matches = [[[0, 5], [4, 5], [2, 4], [4, 5]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition75" +regex = '''((..)|(.))''' +haystack = '''aaaaaa''' +matches = [[[0, 2], [0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition76" +regex = '''((..)|(.))((..)|(.))''' +haystack = '''aaaaaa''' +matches = [[[0, 4], [0, 2], [0, 2], [], [2, 4], [2, 4], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition77" +regex = '''((..)|(.))((..)|(.))((..)|(.))''' +haystack = '''aaaaaa''' +matches = [[[0, 6], [0, 2], [0, 2], [], [2, 4], [2, 4], [], [4, 6], [4, 6], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition79" +regex = '''((..)|(.)){1}''' +haystack = '''aaaaaa''' +matches = [[[0, 2], [0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition80" +regex = '''((..)|(.)){2}''' +haystack = '''aaaaaa''' +matches = [[[0, 4], [2, 4], [2, 4], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition81" +regex = '''((..)|(.)){3}''' +haystack = '''aaaaaa''' +matches = [[[0, 6], [4, 6], [4, 6], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition83" +regex = '''((..)|(.))*''' +haystack = '''aaaaaa''' +matches = [[[0, 6], [4, 6], [4, 6], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive90" +regex = '''X(.?){0,}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [7, 8]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive91" +regex = '''X(.?){1,}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [7, 8]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive92" +regex = '''X(.?){2,}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [7, 8]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive93" +regex = '''X(.?){3,}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [7, 8]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive94" +regex = '''X(.?){4,}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [7, 8]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive95" +regex = '''X(.?){5,}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [7, 8]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive96" +regex = '''X(.?){6,}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [7, 8]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive97" +regex = '''X(.?){7,}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [7, 8]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive98" +regex = '''X(.?){8,}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [8, 8]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive100" +regex = '''X(.?){0,8}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [8, 8]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive102" +regex = '''X(.?){1,8}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [8, 8]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive104" +regex = '''X(.?){2,8}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [8, 8]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive106" +regex = '''X(.?){3,8}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [8, 8]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive108" +regex = '''X(.?){4,8}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [8, 8]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive110" +regex = '''X(.?){5,8}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [8, 8]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive112" +regex = '''X(.?){6,8}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [8, 8]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive114" +regex = '''X(.?){7,8}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [8, 8]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive115" +regex = '''X(.?){8,8}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [8, 8]]] +match-limit = 1 +anchored = true + +# Test added by Rust regex project. +[[test]] +name = "repetition-expensive127" +regex = '''(a|ab|c|bcd){0,}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 1], [0, 1], [1, 1]]] +match-limit = 1 +anchored = true + +# Test added by Rust regex project. +[[test]] +name = "repetition-expensive129" +regex = '''(a|ab|c|bcd){1,}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 1], [0, 1], [1, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive130" +regex = '''(a|ab|c|bcd){2,}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [3, 6], [6, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive131" +regex = '''(a|ab|c|bcd){3,}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [3, 6], [6, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive132" +regex = '''(a|ab|c|bcd){4,}(d*)''' +haystack = '''ababcd''' +matches = [] +match-limit = 1 + +# Test added by Rust regex project. +[[test]] +name = "repetition-expensive134" +regex = '''(a|ab|c|bcd){0,10}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 1], [0, 1], [1, 1]]] +match-limit = 1 +anchored = true + +# Test added by Rust regex project. +[[test]] +name = "repetition-expensive136" +regex = '''(a|ab|c|bcd){1,10}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 1], [0, 1], [1, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive137" +regex = '''(a|ab|c|bcd){2,10}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [3, 6], [6, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive138" +regex = '''(a|ab|c|bcd){3,10}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [3, 6], [6, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive139" +regex = '''(a|ab|c|bcd){4,10}(d*)''' +haystack = '''ababcd''' +matches = [] +match-limit = 1 + +# Test added by Rust regex project. +[[test]] +name = "repetition-expensive141" +regex = '''(a|ab|c|bcd)*(d*)''' +haystack = '''ababcd''' +matches = [[[0, 1], [0, 1], [1, 1]]] +match-limit = 1 +anchored = true + +# Test added by Rust regex project. +[[test]] +name = "repetition-expensive143" +regex = '''(a|ab|c|bcd)+(d*)''' +haystack = '''ababcd''' +matches = [[[0, 1], [0, 1], [1, 1]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive149" +regex = '''(ab|a|c|bcd){0,}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [4, 5], [5, 6]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive151" +regex = '''(ab|a|c|bcd){1,}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [4, 5], [5, 6]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive153" +regex = '''(ab|a|c|bcd){2,}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [4, 5], [5, 6]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive155" +regex = '''(ab|a|c|bcd){3,}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [4, 5], [5, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive156" +regex = '''(ab|a|c|bcd){4,}(d*)''' +haystack = '''ababcd''' +matches = [] +match-limit = 1 + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive158" +regex = '''(ab|a|c|bcd){0,10}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [4, 5], [5, 6]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive160" +regex = '''(ab|a|c|bcd){1,10}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [4, 5], [5, 6]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive162" +regex = '''(ab|a|c|bcd){2,10}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [4, 5], [5, 6]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive164" +regex = '''(ab|a|c|bcd){3,10}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [4, 5], [5, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive165" +regex = '''(ab|a|c|bcd){4,10}(d*)''' +haystack = '''ababcd''' +matches = [] +match-limit = 1 + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive167" +regex = '''(ab|a|c|bcd)*(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [4, 5], [5, 6]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive169" +regex = '''(ab|a|c|bcd)+(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [4, 5], [5, 6]]] +match-limit = 1 +anchored = true + diff --git a/vendor/regex/testdata/iter.toml b/vendor/regex/testdata/iter.toml new file mode 100644 index 000000000..329b9f031 --- /dev/null +++ b/vendor/regex/testdata/iter.toml @@ -0,0 +1,143 @@ +[[test]] +name = "1" +regex = "a" +haystack = "aaa" +matches = [[0, 1], [1, 2], [2, 3]] + +[[test]] +name = "2" +regex = "a" +haystack = "aba" +matches = [[0, 1], [2, 3]] + +[[test]] +name = "empty1" +regex = '' +haystack = '' +matches = [[0, 0]] + +[[test]] +name = "empty2" +regex = '' +haystack = 'abc' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty3" +regex = '(?:)' +haystack = 'abc' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty4" +regex = '(?:)*' +haystack = 'abc' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty5" +regex = '(?:)+' +haystack = 'abc' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty6" +regex = '(?:)?' +haystack = 'abc' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty7" +regex = '(?:)(?:)' +haystack = 'abc' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty8" +regex = '(?:)+|z' +haystack = 'abc' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty9" +regex = 'z|(?:)+' +haystack = 'abc' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty10" +regex = '(?:)+|b' +haystack = 'abc' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty11" +regex = 'b|(?:)+' +haystack = 'abc' +matches = [[0, 0], [1, 2], [3, 3]] + +[[test]] +name = "start1" +regex = "^a" +haystack = "a" +matches = [[0, 1]] + +[[test]] +name = "start2" +regex = "^a" +haystack = "aa" +matches = [[0, 1]] + +[[test]] +name = "anchored1" +regex = "a" +haystack = "a" +matches = [[0, 1]] +anchored = true + +# This test is pretty subtle. It demonstrates the crucial difference between +# '^a' and 'a' compiled in 'anchored' mode. The former regex exclusively +# matches at the start of a haystack and nowhere else. The latter regex has +# no such restriction, but its automaton is constructed such that it lacks a +# `.*?` prefix. So it can actually produce matches at multiple locations. +# The anchored3 test drives this point home. +[[test]] +name = "anchored2" +regex = "a" +haystack = "aa" +matches = [[0, 1], [1, 2]] +anchored = true + +# Unlikely anchored2, this test stops matching anything after it sees `b` +# since it lacks a `.*?` prefix. Since it is looking for 'a' but sees 'b', it +# determines that there are no remaining matches. +[[test]] +name = "anchored3" +regex = "a" +haystack = "aaba" +matches = [[0, 1], [1, 2]] +anchored = true + +[[test]] +name = "nonempty-followedby-empty" +regex = 'abc|.*?' +haystack = "abczzz" +matches = [[0, 3], [4, 4], [5, 5], [6, 6]] + +[[test]] +name = "nonempty-followedby-oneempty" +regex = 'abc|.*?' +haystack = "abcz" +matches = [[0, 3], [4, 4]] + +[[test]] +name = "nonempty-followedby-onemixed" +regex = 'abc|.*?' +haystack = "abczabc" +matches = [[0, 3], [4, 7]] + +[[test]] +name = "nonempty-followedby-twomixed" +regex = 'abc|.*?' +haystack = "abczzabc" +matches = [[0, 3], [4, 4], [5, 8]] diff --git a/vendor/regex/testdata/leftmost-all.toml b/vendor/regex/testdata/leftmost-all.toml new file mode 100644 index 000000000..e3fd950b6 --- /dev/null +++ b/vendor/regex/testdata/leftmost-all.toml @@ -0,0 +1,25 @@ +[[test]] +name = "alt" +regex = 'foo|foobar' +haystack = "foobar" +matches = [[0, 6]] +match-kind = "all" +search-kind = "leftmost" + +[[test]] +name = "multi" +regex = ['foo', 'foobar'] +haystack = "foobar" +matches = [ + { id = 1, span = [0, 6] }, +] +match-kind = "all" +search-kind = "leftmost" + +[[test]] +name = "dotall" +regex = '(?s:.)' +haystack = "foobar" +matches = [[5, 6]] +match-kind = "all" +search-kind = "leftmost" diff --git a/vendor/regex/testdata/line-terminator.toml b/vendor/regex/testdata/line-terminator.toml new file mode 100644 index 000000000..a398dafa2 --- /dev/null +++ b/vendor/regex/testdata/line-terminator.toml @@ -0,0 +1,109 @@ +# This tests that we can switch the line terminator to the NUL byte. +[[test]] +name = "nul" +regex = '(?m)^[a-z]+$' +haystack = '\x00abc\x00' +matches = [[1, 4]] +unescape = true +line-terminator = '\x00' + +# This tests that '.' will not match the configured line terminator, but will +# match \n. +[[test]] +name = "dot-changes-with-line-terminator" +regex = '.' +haystack = '\x00\n' +matches = [[1, 2]] +unescape = true +line-terminator = '\x00' + +# This tests that when we switch the line terminator, \n is no longer +# recognized as the terminator. +[[test]] +name = "not-line-feed" +regex = '(?m)^[a-z]+$' +haystack = '\nabc\n' +matches = [] +unescape = true +line-terminator = '\x00' + +# This tests that we can set the line terminator to a non-ASCII byte and have +# it behave as expected. +[[test]] +name = "non-ascii" +regex = '(?m)^[a-z]+$' +haystack = '\xFFabc\xFF' +matches = [[1, 4]] +unescape = true +line-terminator = '\xFF' +utf8 = false + +# This tests a tricky case where the line terminator is set to \r. This ensures +# that the StartLF look-behind assertion is tracked when computing the start +# state. +[[test]] +name = "carriage" +regex = '(?m)^[a-z]+' +haystack = 'ABC\rabc' +matches = [[4, 7]] +bounds = [4, 7] +unescape = true +line-terminator = '\r' + +# This tests that we can set the line terminator to a byte corresponding to a +# word character, and things work as expected. +[[test]] +name = "word-byte" +regex = '(?m)^[a-z]+$' +haystack = 'ZabcZ' +matches = [[1, 4]] +unescape = true +line-terminator = 'Z' + +# This tests that we can set the line terminator to a byte corresponding to a +# non-word character, and things work as expected. +[[test]] +name = "non-word-byte" +regex = '(?m)^[a-z]+$' +haystack = '%abc%' +matches = [[1, 4]] +unescape = true +line-terminator = '%' + +# This combines "set line terminator to a word byte" with a word boundary +# assertion, which should result in no match even though ^/$ matches. +[[test]] +name = "word-boundary" +regex = '(?m)^\b[a-z]+\b$' +haystack = 'ZabcZ' +matches = [] +unescape = true +line-terminator = 'Z' + +# Like 'word-boundary', but does an anchored search at the point where ^ +# matches, but where \b should not. +[[test]] +name = "word-boundary-at" +regex = '(?m)^\b[a-z]+\b$' +haystack = 'ZabcZ' +matches = [] +bounds = [1, 4] +anchored = true +unescape = true +line-terminator = 'Z' + +# Like 'word-boundary-at', but flips the word boundary to a negation. This +# in particular tests a tricky case in DFA engines, where they must consider +# explicitly that a starting configuration from a custom line terminator may +# also required setting the "is from word byte" flag on a state. Otherwise, +# it's treated as "not from a word byte," which would result in \B not matching +# here when it should. +[[test]] +name = "not-word-boundary-at" +regex = '(?m)^\B[a-z]+\B$' +haystack = 'ZabcZ' +matches = [[1, 4]] +bounds = [1, 4] +anchored = true +unescape = true +line-terminator = 'Z' diff --git a/vendor/regex/testdata/misc.toml b/vendor/regex/testdata/misc.toml new file mode 100644 index 000000000..c65531f5d --- /dev/null +++ b/vendor/regex/testdata/misc.toml @@ -0,0 +1,99 @@ +[[test]] +name = "ascii-literal" +regex = "a" +haystack = "a" +matches = [[0, 1]] + +[[test]] +name = "ascii-literal-not" +regex = "a" +haystack = "z" +matches = [] + +[[test]] +name = "ascii-literal-anchored" +regex = "a" +haystack = "a" +matches = [[0, 1]] +anchored = true + +[[test]] +name = "ascii-literal-anchored-not" +regex = "a" +haystack = "z" +matches = [] +anchored = true + +[[test]] +name = "anchor-start-end-line" +regex = '(?m)^bar$' +haystack = "foo\nbar\nbaz" +matches = [[4, 7]] + +[[test]] +name = "prefix-literal-match" +regex = '^abc' +haystack = "abc" +matches = [[0, 3]] + +[[test]] +name = "prefix-literal-match-ascii" +regex = '^abc' +haystack = "abc" +matches = [[0, 3]] +unicode = false +utf8 = false + +[[test]] +name = "prefix-literal-no-match" +regex = '^abc' +haystack = "zabc" +matches = [] + +[[test]] +name = "one-literal-edge" +regex = 'abc' +haystack = "xxxxxab" +matches = [] + +[[test]] +name = "terminates" +regex = 'a$' +haystack = "a" +matches = [[0, 1]] + +[[test]] +name = "suffix-100" +regex = '.*abcd' +haystack = "abcd" +matches = [[0, 4]] + +[[test]] +name = "suffix-200" +regex = '.*(?:abcd)+' +haystack = "abcd" +matches = [[0, 4]] + +[[test]] +name = "suffix-300" +regex = '.*(?:abcd)+' +haystack = "abcdabcd" +matches = [[0, 8]] + +[[test]] +name = "suffix-400" +regex = '.*(?:abcd)+' +haystack = "abcdxabcd" +matches = [[0, 9]] + +[[test]] +name = "suffix-500" +regex = '.*x(?:abcd)+' +haystack = "abcdxabcd" +matches = [[0, 9]] + +[[test]] +name = "suffix-600" +regex = '[^abcd]*x(?:abcd)+' +haystack = "abcdxabcd" +matches = [[4, 9]] diff --git a/vendor/regex/testdata/multiline.toml b/vendor/regex/testdata/multiline.toml new file mode 100644 index 000000000..3acc901d5 --- /dev/null +++ b/vendor/regex/testdata/multiline.toml @@ -0,0 +1,845 @@ +[[test]] +name = "basic1" +regex = '(?m)^[a-z]+$' +haystack = "abc\ndef\nxyz" +matches = [[0, 3], [4, 7], [8, 11]] + +[[test]] +name = "basic1-crlf" +regex = '(?Rm)^[a-z]+$' +haystack = "abc\ndef\nxyz" +matches = [[0, 3], [4, 7], [8, 11]] + +[[test]] +name = "basic1-crlf-cr" +regex = '(?Rm)^[a-z]+$' +haystack = "abc\rdef\rxyz" +matches = [[0, 3], [4, 7], [8, 11]] + +[[test]] +name = "basic2" +regex = '(?m)^$' +haystack = "abc\ndef\nxyz" +matches = [] + +[[test]] +name = "basic2-crlf" +regex = '(?Rm)^$' +haystack = "abc\ndef\nxyz" +matches = [] + +[[test]] +name = "basic2-crlf-cr" +regex = '(?Rm)^$' +haystack = "abc\rdef\rxyz" +matches = [] + +[[test]] +name = "basic3" +regex = '(?m)^' +haystack = "abc\ndef\nxyz" +matches = [[0, 0], [4, 4], [8, 8]] + +[[test]] +name = "basic3-crlf" +regex = '(?Rm)^' +haystack = "abc\ndef\nxyz" +matches = [[0, 0], [4, 4], [8, 8]] + +[[test]] +name = "basic3-crlf-cr" +regex = '(?Rm)^' +haystack = "abc\rdef\rxyz" +matches = [[0, 0], [4, 4], [8, 8]] + +[[test]] +name = "basic4" +regex = '(?m)$' +haystack = "abc\ndef\nxyz" +matches = [[3, 3], [7, 7], [11, 11]] + +[[test]] +name = "basic4-crlf" +regex = '(?Rm)$' +haystack = "abc\ndef\nxyz" +matches = [[3, 3], [7, 7], [11, 11]] + +[[test]] +name = "basic4-crlf-cr" +regex = '(?Rm)$' +haystack = "abc\rdef\rxyz" +matches = [[3, 3], [7, 7], [11, 11]] + +[[test]] +name = "basic5" +regex = '(?m)^[a-z]' +haystack = "abc\ndef\nxyz" +matches = [[0, 1], [4, 5], [8, 9]] + +[[test]] +name = "basic5-crlf" +regex = '(?Rm)^[a-z]' +haystack = "abc\ndef\nxyz" +matches = [[0, 1], [4, 5], [8, 9]] + +[[test]] +name = "basic5-crlf-cr" +regex = '(?Rm)^[a-z]' +haystack = "abc\rdef\rxyz" +matches = [[0, 1], [4, 5], [8, 9]] + +[[test]] +name = "basic6" +regex = '(?m)[a-z]^' +haystack = "abc\ndef\nxyz" +matches = [] + +[[test]] +name = "basic6-crlf" +regex = '(?Rm)[a-z]^' +haystack = "abc\ndef\nxyz" +matches = [] + +[[test]] +name = "basic6-crlf-cr" +regex = '(?Rm)[a-z]^' +haystack = "abc\rdef\rxyz" +matches = [] + +[[test]] +name = "basic7" +regex = '(?m)[a-z]$' +haystack = "abc\ndef\nxyz" +matches = [[2, 3], [6, 7], [10, 11]] + +[[test]] +name = "basic7-crlf" +regex = '(?Rm)[a-z]$' +haystack = "abc\ndef\nxyz" +matches = [[2, 3], [6, 7], [10, 11]] + +[[test]] +name = "basic7-crlf-cr" +regex = '(?Rm)[a-z]$' +haystack = "abc\rdef\rxyz" +matches = [[2, 3], [6, 7], [10, 11]] + +[[test]] +name = "basic8" +regex = '(?m)$[a-z]' +haystack = "abc\ndef\nxyz" +matches = [] + +[[test]] +name = "basic8-crlf" +regex = '(?Rm)$[a-z]' +haystack = "abc\ndef\nxyz" +matches = [] + +[[test]] +name = "basic8-crlf-cr" +regex = '(?Rm)$[a-z]' +haystack = "abc\rdef\rxyz" +matches = [] + +[[test]] +name = "basic9" +regex = '(?m)^$' +haystack = "" +matches = [[0, 0]] + +[[test]] +name = "basic9-crlf" +regex = '(?Rm)^$' +haystack = "" +matches = [[0, 0]] + +[[test]] +name = "repeat1" +regex = '(?m)(?:^$)*' +haystack = "a\nb\nc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] + +[[test]] +name = "repeat1-crlf" +regex = '(?Rm)(?:^$)*' +haystack = "a\nb\nc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] + +[[test]] +name = "repeat1-crlf-cr" +regex = '(?Rm)(?:^$)*' +haystack = "a\rb\rc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] + +[[test]] +name = "repeat1-no-multi" +regex = '(?:^$)*' +haystack = "a\nb\nc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] + +[[test]] +name = "repeat1-no-multi-crlf" +regex = '(?R)(?:^$)*' +haystack = "a\nb\nc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] + +[[test]] +name = "repeat1-no-multi-crlf-cr" +regex = '(?R)(?:^$)*' +haystack = "a\rb\rc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] + +[[test]] +name = "repeat2" +regex = '(?m)(?:^|a)+' +haystack = "a\naaa\n" +matches = [[0, 0], [2, 2], [3, 5], [6, 6]] + +[[test]] +name = "repeat2-crlf" +regex = '(?Rm)(?:^|a)+' +haystack = "a\naaa\n" +matches = [[0, 0], [2, 2], [3, 5], [6, 6]] + +[[test]] +name = "repeat2-crlf-cr" +regex = '(?Rm)(?:^|a)+' +haystack = "a\raaa\r" +matches = [[0, 0], [2, 2], [3, 5], [6, 6]] + +[[test]] +name = "repeat2-no-multi" +regex = '(?:^|a)+' +haystack = "a\naaa\n" +matches = [[0, 0], [2, 5]] + +[[test]] +name = "repeat2-no-multi-crlf" +regex = '(?R)(?:^|a)+' +haystack = "a\naaa\n" +matches = [[0, 0], [2, 5]] + +[[test]] +name = "repeat2-no-multi-crlf-cr" +regex = '(?R)(?:^|a)+' +haystack = "a\raaa\r" +matches = [[0, 0], [2, 5]] + +[[test]] +name = "repeat3" +regex = '(?m)(?:^|a)*' +haystack = "a\naaa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]] + +[[test]] +name = "repeat3-crlf" +regex = '(?Rm)(?:^|a)*' +haystack = "a\naaa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]] + +[[test]] +name = "repeat3-crlf-cr" +regex = '(?Rm)(?:^|a)*' +haystack = "a\raaa\r" +matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]] + +[[test]] +name = "repeat3-no-multi" +regex = '(?:^|a)*' +haystack = "a\naaa\n" +matches = [[0, 0], [1, 1], [2, 5], [6, 6]] + +[[test]] +name = "repeat3-no-multi-crlf" +regex = '(?R)(?:^|a)*' +haystack = "a\naaa\n" +matches = [[0, 0], [1, 1], [2, 5], [6, 6]] + +[[test]] +name = "repeat3-no-multi-crlf-cr" +regex = '(?R)(?:^|a)*' +haystack = "a\raaa\r" +matches = [[0, 0], [1, 1], [2, 5], [6, 6]] + +[[test]] +name = "repeat4" +regex = '(?m)(?:^|a+)' +haystack = "a\naaa\n" +matches = [[0, 0], [2, 2], [3, 5], [6, 6]] + +[[test]] +name = "repeat4-crlf" +regex = '(?Rm)(?:^|a+)' +haystack = "a\naaa\n" +matches = [[0, 0], [2, 2], [3, 5], [6, 6]] + +[[test]] +name = "repeat4-crlf-cr" +regex = '(?Rm)(?:^|a+)' +haystack = "a\raaa\r" +matches = [[0, 0], [2, 2], [3, 5], [6, 6]] + +[[test]] +name = "repeat4-no-multi" +regex = '(?:^|a+)' +haystack = "a\naaa\n" +matches = [[0, 0], [2, 5]] + +[[test]] +name = "repeat4-no-multi-crlf" +regex = '(?R)(?:^|a+)' +haystack = "a\naaa\n" +matches = [[0, 0], [2, 5]] + +[[test]] +name = "repeat4-no-multi-crlf-cr" +regex = '(?R)(?:^|a+)' +haystack = "a\raaa\r" +matches = [[0, 0], [2, 5]] + +[[test]] +name = "repeat5" +regex = '(?m)(?:^|a*)' +haystack = "a\naaa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]] + +[[test]] +name = "repeat5-crlf" +regex = '(?Rm)(?:^|a*)' +haystack = "a\naaa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]] + +[[test]] +name = "repeat5-crlf-cr" +regex = '(?Rm)(?:^|a*)' +haystack = "a\raaa\r" +matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]] + +[[test]] +name = "repeat5-no-multi" +regex = '(?:^|a*)' +haystack = "a\naaa\n" +matches = [[0, 0], [1, 1], [2, 5], [6, 6]] + +[[test]] +name = "repeat5-no-multi-crlf" +regex = '(?R)(?:^|a*)' +haystack = "a\naaa\n" +matches = [[0, 0], [1, 1], [2, 5], [6, 6]] + +[[test]] +name = "repeat5-no-multi-crlf-cr" +regex = '(?R)(?:^|a*)' +haystack = "a\raaa\r" +matches = [[0, 0], [1, 1], [2, 5], [6, 6]] + +[[test]] +name = "repeat6" +regex = '(?m)(?:^[a-z])+' +haystack = "abc\ndef\nxyz" +matches = [[0, 1], [4, 5], [8, 9]] + +[[test]] +name = "repeat6-crlf" +regex = '(?Rm)(?:^[a-z])+' +haystack = "abc\ndef\nxyz" +matches = [[0, 1], [4, 5], [8, 9]] + +[[test]] +name = "repeat6-crlf-cr" +regex = '(?Rm)(?:^[a-z])+' +haystack = "abc\rdef\rxyz" +matches = [[0, 1], [4, 5], [8, 9]] + +[[test]] +name = "repeat6-no-multi" +regex = '(?:^[a-z])+' +haystack = "abc\ndef\nxyz" +matches = [[0, 1]] + +[[test]] +name = "repeat6-no-multi-crlf" +regex = '(?R)(?:^[a-z])+' +haystack = "abc\ndef\nxyz" +matches = [[0, 1]] + +[[test]] +name = "repeat6-no-multi-crlf-cr" +regex = '(?R)(?:^[a-z])+' +haystack = "abc\rdef\rxyz" +matches = [[0, 1]] + +[[test]] +name = "repeat7" +regex = '(?m)(?:^[a-z]{3}\n?)+' +haystack = "abc\ndef\nxyz" +matches = [[0, 11]] + +[[test]] +name = "repeat7-crlf" +regex = '(?Rm)(?:^[a-z]{3}\n?)+' +haystack = "abc\ndef\nxyz" +matches = [[0, 11]] + +[[test]] +name = "repeat7-crlf-cr" +regex = '(?Rm)(?:^[a-z]{3}\r?)+' +haystack = "abc\rdef\rxyz" +matches = [[0, 11]] + +[[test]] +name = "repeat7-no-multi" +regex = '(?:^[a-z]{3}\n?)+' +haystack = "abc\ndef\nxyz" +matches = [[0, 4]] + +[[test]] +name = "repeat7-no-multi-crlf" +regex = '(?R)(?:^[a-z]{3}\n?)+' +haystack = "abc\ndef\nxyz" +matches = [[0, 4]] + +[[test]] +name = "repeat7-no-multi-crlf-cr" +regex = '(?R)(?:^[a-z]{3}\r?)+' +haystack = "abc\rdef\rxyz" +matches = [[0, 4]] + +[[test]] +name = "repeat8" +regex = '(?m)(?:^[a-z]{3}\n?)*' +haystack = "abc\ndef\nxyz" +matches = [[0, 11]] + +[[test]] +name = "repeat8-crlf" +regex = '(?Rm)(?:^[a-z]{3}\n?)*' +haystack = "abc\ndef\nxyz" +matches = [[0, 11]] + +[[test]] +name = "repeat8-crlf-cr" +regex = '(?Rm)(?:^[a-z]{3}\r?)*' +haystack = "abc\rdef\rxyz" +matches = [[0, 11]] + +[[test]] +name = "repeat8-no-multi" +regex = '(?:^[a-z]{3}\n?)*' +haystack = "abc\ndef\nxyz" +matches = [[0, 4], [5, 5], [6, 6], [7, 7], [8, 8], [9, 9], [10, 10], [11, 11]] + +[[test]] +name = "repeat8-no-multi-crlf" +regex = '(?R)(?:^[a-z]{3}\n?)*' +haystack = "abc\ndef\nxyz" +matches = [[0, 4], [5, 5], [6, 6], [7, 7], [8, 8], [9, 9], [10, 10], [11, 11]] + +[[test]] +name = "repeat8-no-multi-crlf-cr" +regex = '(?R)(?:^[a-z]{3}\r?)*' +haystack = "abc\rdef\rxyz" +matches = [[0, 4], [5, 5], [6, 6], [7, 7], [8, 8], [9, 9], [10, 10], [11, 11]] + +[[test]] +name = "repeat9" +regex = '(?m)(?:\n?[a-z]{3}$)+' +haystack = "abc\ndef\nxyz" +matches = [[0, 11]] + +[[test]] +name = "repeat9-crlf" +regex = '(?Rm)(?:\n?[a-z]{3}$)+' +haystack = "abc\ndef\nxyz" +matches = [[0, 11]] + +[[test]] +name = "repeat9-crlf-cr" +regex = '(?Rm)(?:\r?[a-z]{3}$)+' +haystack = "abc\rdef\rxyz" +matches = [[0, 11]] + +[[test]] +name = "repeat9-no-multi" +regex = '(?:\n?[a-z]{3}$)+' +haystack = "abc\ndef\nxyz" +matches = [[7, 11]] + +[[test]] +name = "repeat9-no-multi-crlf" +regex = '(?R)(?:\n?[a-z]{3}$)+' +haystack = "abc\ndef\nxyz" +matches = [[7, 11]] + +[[test]] +name = "repeat9-no-multi-crlf-cr" +regex = '(?R)(?:\r?[a-z]{3}$)+' +haystack = "abc\rdef\rxyz" +matches = [[7, 11]] + +[[test]] +name = "repeat10" +regex = '(?m)(?:\n?[a-z]{3}$)*' +haystack = "abc\ndef\nxyz" +matches = [[0, 11]] + +[[test]] +name = "repeat10-crlf" +regex = '(?Rm)(?:\n?[a-z]{3}$)*' +haystack = "abc\ndef\nxyz" +matches = [[0, 11]] + +[[test]] +name = "repeat10-crlf-cr" +regex = '(?Rm)(?:\r?[a-z]{3}$)*' +haystack = "abc\rdef\rxyz" +matches = [[0, 11]] + +[[test]] +name = "repeat10-no-multi" +regex = '(?:\n?[a-z]{3}$)*' +haystack = "abc\ndef\nxyz" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 11]] + +[[test]] +name = "repeat10-no-multi-crlf" +regex = '(?R)(?:\n?[a-z]{3}$)*' +haystack = "abc\ndef\nxyz" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 11]] + +[[test]] +name = "repeat10-no-multi-crlf-cr" +regex = '(?R)(?:\r?[a-z]{3}$)*' +haystack = "abc\rdef\rxyz" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 11]] + +[[test]] +name = "repeat11" +regex = '(?m)^*' +haystack = "\naa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[test]] +name = "repeat11-crlf" +regex = '(?Rm)^*' +haystack = "\naa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[test]] +name = "repeat11-crlf-cr" +regex = '(?Rm)^*' +haystack = "\raa\r" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[test]] +name = "repeat11-no-multi" +regex = '^*' +haystack = "\naa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[test]] +name = "repeat11-no-multi-crlf" +regex = '(?R)^*' +haystack = "\naa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[test]] +name = "repeat11-no-multi-crlf-cr" +regex = '(?R)^*' +haystack = "\raa\r" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[test]] +name = "repeat12" +regex = '(?m)^+' +haystack = "\naa\n" +matches = [[0, 0], [1, 1], [4, 4]] + +[[test]] +name = "repeat12-crlf" +regex = '(?Rm)^+' +haystack = "\naa\n" +matches = [[0, 0], [1, 1], [4, 4]] + +[[test]] +name = "repeat12-crlf-cr" +regex = '(?Rm)^+' +haystack = "\raa\r" +matches = [[0, 0], [1, 1], [4, 4]] + +[[test]] +name = "repeat12-no-multi" +regex = '^+' +haystack = "\naa\n" +matches = [[0, 0]] + +[[test]] +name = "repeat12-no-multi-crlf" +regex = '(?R)^+' +haystack = "\naa\n" +matches = [[0, 0]] + +[[test]] +name = "repeat12-no-multi-crlf-cr" +regex = '(?R)^+' +haystack = "\raa\r" +matches = [[0, 0]] + +[[test]] +name = "repeat13" +regex = '(?m)$*' +haystack = "\naa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[test]] +name = "repeat13-crlf" +regex = '(?Rm)$*' +haystack = "\naa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[test]] +name = "repeat13-crlf-cr" +regex = '(?Rm)$*' +haystack = "\raa\r" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[test]] +name = "repeat13-no-multi" +regex = '$*' +haystack = "\naa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[test]] +name = "repeat13-no-multi-crlf" +regex = '(?R)$*' +haystack = "\naa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[test]] +name = "repeat13-no-multi-crlf-cr" +regex = '(?R)$*' +haystack = "\raa\r" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[test]] +name = "repeat14" +regex = '(?m)$+' +haystack = "\naa\n" +matches = [[0, 0], [3, 3], [4, 4]] + +[[test]] +name = "repeat14-crlf" +regex = '(?Rm)$+' +haystack = "\naa\n" +matches = [[0, 0], [3, 3], [4, 4]] + +[[test]] +name = "repeat14-crlf-cr" +regex = '(?Rm)$+' +haystack = "\raa\r" +matches = [[0, 0], [3, 3], [4, 4]] + +[[test]] +name = "repeat14-no-multi" +regex = '$+' +haystack = "\naa\n" +matches = [[4, 4]] + +[[test]] +name = "repeat14-no-multi-crlf" +regex = '(?R)$+' +haystack = "\naa\n" +matches = [[4, 4]] + +[[test]] +name = "repeat14-no-multi-crlf-cr" +regex = '(?R)$+' +haystack = "\raa\r" +matches = [[4, 4]] + +[[test]] +name = "repeat15" +regex = '(?m)(?:$\n)+' +haystack = "\n\naaa\n\n" +matches = [[0, 2], [5, 7]] + +[[test]] +name = "repeat15-crlf" +regex = '(?Rm)(?:$\n)+' +haystack = "\n\naaa\n\n" +matches = [[0, 2], [5, 7]] + +[[test]] +name = "repeat15-crlf-cr" +regex = '(?Rm)(?:$\r)+' +haystack = "\r\raaa\r\r" +matches = [[0, 2], [5, 7]] + +[[test]] +name = "repeat15-no-multi" +regex = '(?:$\n)+' +haystack = "\n\naaa\n\n" +matches = [] + +[[test]] +name = "repeat15-no-multi-crlf" +regex = '(?R)(?:$\n)+' +haystack = "\n\naaa\n\n" +matches = [] + +[[test]] +name = "repeat15-no-multi-crlf-cr" +regex = '(?R)(?:$\r)+' +haystack = "\r\raaa\r\r" +matches = [] + +[[test]] +name = "repeat16" +regex = '(?m)(?:$\n)*' +haystack = "\n\naaa\n\n" +matches = [[0, 2], [3, 3], [4, 4], [5, 7]] + +[[test]] +name = "repeat16-crlf" +regex = '(?Rm)(?:$\n)*' +haystack = "\n\naaa\n\n" +matches = [[0, 2], [3, 3], [4, 4], [5, 7]] + +[[test]] +name = "repeat16-crlf-cr" +regex = '(?Rm)(?:$\r)*' +haystack = "\r\raaa\r\r" +matches = [[0, 2], [3, 3], [4, 4], [5, 7]] + +[[test]] +name = "repeat16-no-multi" +regex = '(?:$\n)*' +haystack = "\n\naaa\n\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 7]] + +[[test]] +name = "repeat16-no-multi-crlf" +regex = '(?R)(?:$\n)*' +haystack = "\n\naaa\n\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 7]] + +[[test]] +name = "repeat16-no-multi-crlf-cr" +regex = '(?R)(?:$\r)*' +haystack = "\r\raaa\r\r" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 7]] + +[[test]] +name = "repeat17" +regex = '(?m)(?:$\n^)+' +haystack = "\n\naaa\n\n" +matches = [[0, 2], [5, 7]] + +[[test]] +name = "repeat17-crlf" +regex = '(?Rm)(?:$\n^)+' +haystack = "\n\naaa\n\n" +matches = [[0, 2], [5, 7]] + +[[test]] +name = "repeat17-crlf-cr" +regex = '(?Rm)(?:$\r^)+' +haystack = "\r\raaa\r\r" +matches = [[0, 2], [5, 7]] + +[[test]] +name = "repeat17-no-multi" +regex = '(?:$\n^)+' +haystack = "\n\naaa\n\n" +matches = [] + +[[test]] +name = "repeat17-no-multi-crlf" +regex = '(?R)(?:$\n^)+' +haystack = "\n\naaa\n\n" +matches = [] + +[[test]] +name = "repeat17-no-multi-crlf-cr" +regex = '(?R)(?:$\r^)+' +haystack = "\r\raaa\r\r" +matches = [] + +[[test]] +name = "repeat18" +regex = '(?m)(?:^|$)+' +haystack = "\n\naaa\n\n" +matches = [[0, 0], [1, 1], [2, 2], [5, 5], [6, 6], [7, 7]] + +[[test]] +name = "repeat18-crlf" +regex = '(?Rm)(?:^|$)+' +haystack = "\n\naaa\n\n" +matches = [[0, 0], [1, 1], [2, 2], [5, 5], [6, 6], [7, 7]] + +[[test]] +name = "repeat18-crlf-cr" +regex = '(?Rm)(?:^|$)+' +haystack = "\r\raaa\r\r" +matches = [[0, 0], [1, 1], [2, 2], [5, 5], [6, 6], [7, 7]] + +[[test]] +name = "repeat18-no-multi" +regex = '(?:^|$)+' +haystack = "\n\naaa\n\n" +matches = [[0, 0], [7, 7]] + +[[test]] +name = "repeat18-no-multi-crlf" +regex = '(?R)(?:^|$)+' +haystack = "\n\naaa\n\n" +matches = [[0, 0], [7, 7]] + +[[test]] +name = "repeat18-no-multi-crlf-cr" +regex = '(?R)(?:^|$)+' +haystack = "\r\raaa\r\r" +matches = [[0, 0], [7, 7]] + +[[test]] +name = "match-line-100" +regex = '(?m)^.+$' +haystack = "aa\naaaaaaaaaaaaaaaaaaa\n" +matches = [[0, 2], [3, 22]] + +[[test]] +name = "match-line-100-crlf" +regex = '(?Rm)^.+$' +haystack = "aa\naaaaaaaaaaaaaaaaaaa\n" +matches = [[0, 2], [3, 22]] + +[[test]] +name = "match-line-100-crlf-cr" +regex = '(?Rm)^.+$' +haystack = "aa\raaaaaaaaaaaaaaaaaaa\r" +matches = [[0, 2], [3, 22]] + +[[test]] +name = "match-line-200" +regex = '(?m)^.+$' +haystack = "aa\naaaaaaaaaaaaaaaaaaa\n" +matches = [[0, 2], [3, 22]] +unicode = false +utf8 = false + +[[test]] +name = "match-line-200-crlf" +regex = '(?Rm)^.+$' +haystack = "aa\naaaaaaaaaaaaaaaaaaa\n" +matches = [[0, 2], [3, 22]] +unicode = false +utf8 = false + +[[test]] +name = "match-line-200-crlf-cr" +regex = '(?Rm)^.+$' +haystack = "aa\raaaaaaaaaaaaaaaaaaa\r" +matches = [[0, 2], [3, 22]] +unicode = false +utf8 = false diff --git a/vendor/regex/testdata/no-unicode.toml b/vendor/regex/testdata/no-unicode.toml new file mode 100644 index 000000000..0ddac4c96 --- /dev/null +++ b/vendor/regex/testdata/no-unicode.toml @@ -0,0 +1,222 @@ +[[test]] +name = "invalid-utf8-literal1" +regex = '\xFF' +haystack = '\xFF' +matches = [[0, 1]] +unicode = false +utf8 = false +unescape = true + + +[[test]] +name = "mixed" +regex = '(?:.+)(?-u)(?:.+)' +haystack = '\xCE\x93\xCE\x94\xFF' +matches = [[0, 5]] +utf8 = false +unescape = true + + +[[test]] +name = "case1" +regex = "a" +haystack = "A" +matches = [[0, 1]] +case-insensitive = true +unicode = false + +[[test]] +name = "case2" +regex = "[a-z]+" +haystack = "AaAaA" +matches = [[0, 5]] +case-insensitive = true +unicode = false + +[[test]] +name = "case3" +regex = "[a-z]+" +haystack = "aA\u212AaA" +matches = [[0, 7]] +case-insensitive = true + +[[test]] +name = "case4" +regex = "[a-z]+" +haystack = "aA\u212AaA" +matches = [[0, 2], [5, 7]] +case-insensitive = true +unicode = false + + +[[test]] +name = "negate1" +regex = "[^a]" +haystack = "δ" +matches = [[0, 2]] + +[[test]] +name = "negate2" +regex = "[^a]" +haystack = "δ" +matches = [[0, 1], [1, 2]] +unicode = false +utf8 = false + + +[[test]] +name = "dotstar-prefix1" +regex = "a" +haystack = '\xFFa' +matches = [[1, 2]] +unicode = false +utf8 = false +unescape = true + +[[test]] +name = "dotstar-prefix2" +regex = "a" +haystack = '\xFFa' +matches = [[1, 2]] +utf8 = false +unescape = true + + +[[test]] +name = "null-bytes1" +regex = '[^\x00]+\x00' +haystack = 'foo\x00' +matches = [[0, 4]] +unicode = false +utf8 = false +unescape = true + + +[[test]] +name = "word-ascii" +regex = '\w+' +haystack = "aδ" +matches = [[0, 1]] +unicode = false + +[[test]] +name = "word-unicode" +regex = '\w+' +haystack = "aδ" +matches = [[0, 3]] + +[[test]] +name = "decimal-ascii" +regex = '\d+' +haystack = "1२३9" +matches = [[0, 1], [7, 8]] +unicode = false + +[[test]] +name = "decimal-unicode" +regex = '\d+' +haystack = "1२३9" +matches = [[0, 8]] + +[[test]] +name = "space-ascii" +regex = '\s+' +haystack = " \u1680" +matches = [[0, 1]] +unicode = false + +[[test]] +name = "space-unicode" +regex = '\s+' +haystack = " \u1680" +matches = [[0, 4]] + + +[[test]] +# See: https://github.com/rust-lang/regex/issues/484 +name = "iter1-bytes" +regex = '' +haystack = "☃" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] +utf8 = false + +[[test]] +# See: https://github.com/rust-lang/regex/issues/484 +name = "iter1-utf8" +regex = '' +haystack = "☃" +matches = [[0, 0], [3, 3]] + +[[test]] +# See: https://github.com/rust-lang/regex/issues/484 +# Note that iter2-utf8 doesn't make sense here, since the input isn't UTF-8. +name = "iter2-bytes" +regex = '' +haystack = 'b\xFFr' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] +unescape = true +utf8 = false + + +# These test that unanchored prefixes can munch through invalid UTF-8 even when +# utf8 is enabled. +# +# This test actually reflects an interesting simplification in how the Thompson +# NFA is constructed. It used to be that the NFA could be built with an +# unanchored prefix that either matched any byte or _only_ matched valid UTF-8. +# But the latter turns out to be pretty precarious when it comes to prefilters, +# because if you search a haystack that contains invalid UTF-8 but have an +# unanchored prefix that requires UTF-8, then prefilters are no longer a valid +# optimization because you actually have to check that everything is valid +# UTF-8. +# +# Originally, I had thought that we needed a valid UTF-8 unanchored prefix in +# order to guarantee that we only match at valid UTF-8 boundaries. But this +# isn't actually true! There are really only two things to consider here: +# +# 1) Will a regex match split an encoded codepoint? No. Because by construction, +# we ensure that a MATCH state can only be reached by following valid UTF-8 (assuming +# all of the UTF-8 modes are enabled). +# +# 2) Will a regex match arbitrary bytes that aren't valid UTF-8? Again, no, +# assuming all of the UTF-8 modes are enabled. +[[test]] +name = "unanchored-invalid-utf8-match-100" +regex = '[a-z]' +haystack = '\xFFa\xFF' +matches = [[1, 2]] +unescape = true +utf8 = false + +# This test shows that we can still prevent a match from occurring by requiring +# that valid UTF-8 match by inserting our own unanchored prefix. Thus, if the +# behavior of not munching through invalid UTF-8 anywhere is needed, then it +# can be achieved thusly. +[[test]] +name = "unanchored-invalid-utf8-nomatch" +regex = '^(?s:.)*?[a-z]' +haystack = '\xFFa\xFF' +matches = [] +unescape = true +utf8 = false + +# This is a tricky test that makes sure we don't accidentally do a kind of +# unanchored search when we've requested that a regex engine not report +# empty matches that split a codepoint. This test caught a regression during +# development where the code for skipping over bad empty matches would do so +# even if the search should have been anchored. This is ultimately what led to +# making 'anchored' an 'Input' option, so that it was always clear what kind +# of search was being performed. (Before that, whether a search was anchored +# or not was a config knob on the regex engine.) This did wind up making DFAs +# a little more complex to configure (with their 'StartKind' knob), but it +# generally smoothed out everything else. +# +# Great example of a test whose failure motivated a sweeping API refactoring. +[[test]] +name = "anchored-iter-empty-utf8" +regex = '' +haystack = 'a☃z' +matches = [[0, 0], [1, 1]] +unescape = false +utf8 = true +anchored = true diff --git a/vendor/regex/testdata/overlapping.toml b/vendor/regex/testdata/overlapping.toml new file mode 100644 index 000000000..7bcd45a2f --- /dev/null +++ b/vendor/regex/testdata/overlapping.toml @@ -0,0 +1,280 @@ +# NOTE: We define a number of tests where the *match* kind is 'leftmost-first' +# but the *search* kind is 'overlapping'. This is a somewhat nonsensical +# combination and can produce odd results. Nevertheless, those results should +# be consistent so we test them here. (At the time of writing this note, I +# hadn't yet decided whether to make 'leftmost-first' with 'overlapping' result +# in unspecified behavior.) + +# This demonstrates how a full overlapping search is obvious quadratic. This +# regex reports a match for every substring in the haystack. +[[test]] +name = "ungreedy-dotstar-matches-everything-100" +regex = [".*?"] +haystack = "zzz" +matches = [ + { id = 0, span = [0, 0] }, + { id = 0, span = [1, 1] }, + { id = 0, span = [0, 1] }, + { id = 0, span = [2, 2] }, + { id = 0, span = [1, 2] }, + { id = 0, span = [0, 2] }, + { id = 0, span = [3, 3] }, + { id = 0, span = [2, 3] }, + { id = 0, span = [1, 3] }, + { id = 0, span = [0, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "greedy-dotstar-matches-everything-100" +regex = [".*"] +haystack = "zzz" +matches = [ + { id = 0, span = [0, 0] }, + { id = 0, span = [1, 1] }, + { id = 0, span = [0, 1] }, + { id = 0, span = [2, 2] }, + { id = 0, span = [1, 2] }, + { id = 0, span = [0, 2] }, + { id = 0, span = [3, 3] }, + { id = 0, span = [2, 3] }, + { id = 0, span = [1, 3] }, + { id = 0, span = [0, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "repetition-plus-leftmost-first-100" +regex = 'a+' +haystack = "aaa" +matches = [[0, 1], [1, 2], [0, 2], [2, 3], [1, 3], [0, 3]] +match-kind = "leftmost-first" +search-kind = "overlapping" + +[[test]] +name = "repetition-plus-leftmost-first-110" +regex = '☃+' +haystack = "☃☃☃" +matches = [[0, 3], [3, 6], [0, 6], [6, 9], [3, 9], [0, 9]] +match-kind = "leftmost-first" +search-kind = "overlapping" + +[[test]] +name = "repetition-plus-all-100" +regex = 'a+' +haystack = "aaa" +matches = [[0, 1], [1, 2], [0, 2], [2, 3], [1, 3], [0, 3]] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "repetition-plus-all-110" +regex = '☃+' +haystack = "☃☃☃" +matches = [[0, 3], [3, 6], [0, 6], [6, 9], [3, 9], [0, 9]] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "repetition-plus-leftmost-first-200" +regex = '(abc)+' +haystack = "zzabcabczzabc" +matches = [ + [[2, 5], [2, 5]], + [[5, 8], [5, 8]], + [[2, 8], [5, 8]], +] +match-kind = "leftmost-first" +search-kind = "overlapping" + +[[test]] +name = "repetition-plus-all-200" +regex = '(abc)+' +haystack = "zzabcabczzabc" +matches = [ + [[2, 5], [2, 5]], + [[5, 8], [5, 8]], + [[2, 8], [5, 8]], + [[10, 13], [10, 13]], +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "repetition-star-leftmost-first-100" +regex = 'a*' +haystack = "aaa" +matches = [ + [0, 0], + [1, 1], + [0, 1], + [2, 2], + [1, 2], + [0, 2], + [3, 3], + [2, 3], + [1, 3], + [0, 3], +] +match-kind = "leftmost-first" +search-kind = "overlapping" + +[[test]] +name = "repetition-star-all-100" +regex = 'a*' +haystack = "aaa" +matches = [ + [0, 0], + [1, 1], + [0, 1], + [2, 2], + [1, 2], + [0, 2], + [3, 3], + [2, 3], + [1, 3], + [0, 3], +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "repetition-star-leftmost-first-200" +regex = '(abc)*' +haystack = "zzabcabczzabc" +matches = [ + [[0, 0], []], +] +match-kind = "leftmost-first" +search-kind = "overlapping" + +[[test]] +name = "repetition-star-all-200" +regex = '(abc)*' +haystack = "zzabcabczzabc" +matches = [ + [[0, 0], []], + [[1, 1], []], + [[2, 2], []], + [[3, 3], []], + [[4, 4], []], + [[5, 5], []], + [[2, 5], [2, 5]], + [[6, 6], []], + [[7, 7], []], + [[8, 8], []], + [[5, 8], [5, 8]], + [[2, 8], [5, 8]], + [[9, 9], []], + [[10, 10], []], + [[11, 11], []], + [[12, 12], []], + [[13, 13], []], + [[10, 13], [10, 13]], +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "start-end-rep-leftmost-first" +regex = '(^$)*' +haystack = "abc" +matches = [ + [[0, 0], []], +] +match-kind = "leftmost-first" +search-kind = "overlapping" + +[[test]] +name = "start-end-rep-all" +regex = '(^$)*' +haystack = "abc" +matches = [ + [[0, 0], []], + [[1, 1], []], + [[2, 2], []], + [[3, 3], []], +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "alt-leftmost-first-100" +regex = 'abc|a' +haystack = "zzabcazzaabc" +matches = [[2, 3], [2, 5]] +match-kind = "leftmost-first" +search-kind = "overlapping" + +[[test]] +name = "alt-all-100" +regex = 'abc|a' +haystack = "zzabcazzaabc" +matches = [[2, 3], [2, 5], [5, 6], [8, 9], [9, 10], [9, 12]] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "empty-000" +regex = "" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "empty-alt-000" +regex = "|b" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [1, 2], [3, 3]] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "empty-alt-010" +regex = "b|" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [1, 2], [3, 3]] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +# See: https://github.com/rust-lang/regex/issues/484 +name = "iter1-bytes" +regex = '' +haystack = "☃" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] +utf8 = false +match-kind = "all" +search-kind = "overlapping" + +[[test]] +# See: https://github.com/rust-lang/regex/issues/484 +name = "iter1-utf8" +regex = '' +haystack = "☃" +matches = [[0, 0], [3, 3]] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "iter1-incomplete-utf8" +regex = '' +haystack = '\xE2\x98' # incomplete snowman +matches = [[0, 0], [1, 1], [2, 2]] +match-kind = "all" +search-kind = "overlapping" +unescape = true +utf8 = false + +[[test]] +name = "scratch" +regex = ['sam', 'samwise'] +haystack = "samwise" +matches = [ + { id = 0, span = [0, 3] }, +] +match-kind = "leftmost-first" +search-kind = "overlapping" diff --git a/vendor/regex/testdata/regex-lite.toml b/vendor/regex/testdata/regex-lite.toml new file mode 100644 index 000000000..1769d803d --- /dev/null +++ b/vendor/regex/testdata/regex-lite.toml @@ -0,0 +1,98 @@ +# These tests are specifically written to test the regex-lite crate. While it +# largely has the same semantics as the regex crate, there are some differences +# around Unicode support and UTF-8. +# +# To be clear, regex-lite supports far fewer patterns because of its lack of +# Unicode support, nested character classes and character class set operations. +# What we're talking about here are the patterns that both crates support but +# where the semantics might differ. + +# regex-lite uses ASCII definitions for Perl character classes. +[[test]] +name = "perl-class-decimal" +regex = '\d' +haystack = '᠕' +matches = [] +unicode = true + +# regex-lite uses ASCII definitions for Perl character classes. +[[test]] +name = "perl-class-space" +regex = '\s' +haystack = "\u2000" +matches = [] +unicode = true + +# regex-lite uses ASCII definitions for Perl character classes. +[[test]] +name = "perl-class-word" +regex = '\w' +haystack = 'δ' +matches = [] +unicode = true + +# regex-lite uses the ASCII definition of word for word boundary assertions. +[[test]] +name = "word-boundary" +regex = '\b' +haystack = 'δ' +matches = [] +unicode = true + +# regex-lite uses the ASCII definition of word for negated word boundary +# assertions. But note that it should still not split codepoints! +[[test]] +name = "word-boundary-negated" +regex = '\B' +haystack = 'δ' +matches = [[0, 0], [2, 2]] +unicode = true + +# While we're here, the empty regex---which matches at every +# position---shouldn't split a codepoint either. +[[test]] +name = "empty-no-split-codepoint" +regex = '' +haystack = '💩' +matches = [[0, 0], [4, 4]] +unicode = true + +# A dot always matches a full codepoint. +[[test]] +name = "dot-always-matches-codepoint" +regex = '.' +haystack = '💩' +matches = [[0, 4]] +unicode = false + +# A negated character class also always matches a full codepoint. +[[test]] +name = "negated-class-always-matches-codepoint" +regex = '[^a]' +haystack = '💩' +matches = [[0, 4]] +unicode = false + +# regex-lite only supports ASCII-aware case insensitive matching. +[[test]] +name = "case-insensitive-is-ascii-only" +regex = 's' +haystack = 'ſ' +matches = [] +unicode = true +case-insensitive = true + +# Negated word boundaries shouldn't split a codepoint, but they will match +# between invalid UTF-8. +# +# This test is only valid for a 'bytes' API, but that doesn't (yet) exist in +# regex-lite. This can't happen in the main API because &str can't contain +# invalid UTF-8. +# [[test]] +# name = "word-boundary-invalid-utf8" +# regex = '\B' +# haystack = '\xFF\xFF\xFF\xFF' +# unescape = true +# matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] +# unicode = true +# utf8 = false diff --git a/vendor/regex/testdata/regression.toml b/vendor/regex/testdata/regression.toml new file mode 100644 index 000000000..53b0701a3 --- /dev/null +++ b/vendor/regex/testdata/regression.toml @@ -0,0 +1,830 @@ +# See: https://github.com/rust-lang/regex/issues/48 +[[test]] +name = "invalid-regex-no-crash-100" +regex = '(*)' +haystack = "" +matches = [] +compiles = false + +# See: https://github.com/rust-lang/regex/issues/48 +[[test]] +name = "invalid-regex-no-crash-200" +regex = '(?:?)' +haystack = "" +matches = [] +compiles = false + +# See: https://github.com/rust-lang/regex/issues/48 +[[test]] +name = "invalid-regex-no-crash-300" +regex = '(?)' +haystack = "" +matches = [] +compiles = false + +# See: https://github.com/rust-lang/regex/issues/48 +[[test]] +name = "invalid-regex-no-crash-400" +regex = '*' +haystack = "" +matches = [] +compiles = false + +# See: https://github.com/rust-lang/regex/issues/75 +[[test]] +name = "unsorted-binary-search-100" +regex = '(?i-u)[a_]+' +haystack = "A_" +matches = [[0, 2]] + +# See: https://github.com/rust-lang/regex/issues/75 +[[test]] +name = "unsorted-binary-search-200" +regex = '(?i-u)[A_]+' +haystack = "a_" +matches = [[0, 2]] + +# See: https://github.com/rust-lang/regex/issues/76 +[[test]] +name = "unicode-case-lower-nocase-flag" +regex = '(?i)\p{Ll}+' +haystack = "ΛΘΓΔα" +matches = [[0, 10]] + +# See: https://github.com/rust-lang/regex/issues/99 +[[test]] +name = "negated-char-class-100" +regex = '(?i)[^x]' +haystack = "x" +matches = [] + +# See: https://github.com/rust-lang/regex/issues/99 +[[test]] +name = "negated-char-class-200" +regex = '(?i)[^x]' +haystack = "X" +matches = [] + +# See: https://github.com/rust-lang/regex/issues/101 +[[test]] +name = "ascii-word-underscore" +regex = '[[:word:]]' +haystack = "_" +matches = [[0, 1]] + +# See: https://github.com/rust-lang/regex/issues/129 +[[test]] +name = "captures-repeat" +regex = '([a-f]){2}(?P<foo>[x-z])' +haystack = "abx" +matches = [ + [[0, 3], [1, 2], [2, 3]], +] + +# See: https://github.com/rust-lang/regex/issues/153 +[[test]] +name = "alt-in-alt-100" +regex = 'ab?|$' +haystack = "az" +matches = [[0, 1], [2, 2]] + +# See: https://github.com/rust-lang/regex/issues/153 +[[test]] +name = "alt-in-alt-200" +regex = '^(?:.*?)(?:\n|\r\n?|$)' +haystack = "ab\rcd" +matches = [[0, 3]] + +# See: https://github.com/rust-lang/regex/issues/169 +[[test]] +name = "leftmost-first-prefix" +regex = 'z*azb' +haystack = "azb" +matches = [[0, 3]] + +# See: https://github.com/rust-lang/regex/issues/191 +[[test]] +name = "many-alternates" +regex = '1|2|3|4|5|6|7|8|9|10|int' +haystack = "int" +matches = [[0, 3]] + +# See: https://github.com/rust-lang/regex/issues/204 +[[test]] +name = "word-boundary-alone-100" +regex = '\b' +haystack = "Should this (work?)" +matches = [[0, 0], [6, 6], [7, 7], [11, 11], [13, 13], [17, 17]] + +# See: https://github.com/rust-lang/regex/issues/204 +[[test]] +name = "word-boundary-alone-200" +regex = '\b' +haystack = "a b c" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] + +# See: https://github.com/rust-lang/regex/issues/264 +[[test]] +name = "word-boundary-ascii-no-capture" +regex = '\B' +haystack = "\U00028F3E" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] +unicode = false +utf8 = false + +# See: https://github.com/rust-lang/regex/issues/264 +[[test]] +name = "word-boundary-ascii-capture" +regex = '(?:\B)' +haystack = "\U00028F3E" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] +unicode = false +utf8 = false + +# See: https://github.com/rust-lang/regex/issues/268 +[[test]] +name = "partial-anchor" +regex = '^a|b' +haystack = "ba" +matches = [[0, 1]] + +# See: https://github.com/rust-lang/regex/issues/271 +[[test]] +name = "endl-or-word-boundary" +regex = '(?m:$)|(?-u:\b)' +haystack = "\U0006084E" +matches = [[4, 4]] + +# See: https://github.com/rust-lang/regex/issues/271 +[[test]] +name = "zero-or-end" +regex = '(?i-u:\x00)|$' +haystack = "\U000E682F" +matches = [[4, 4]] + +# See: https://github.com/rust-lang/regex/issues/271 +[[test]] +name = "y-or-endl" +regex = '(?i-u:y)|(?m:$)' +haystack = "\U000B4331" +matches = [[4, 4]] + +# See: https://github.com/rust-lang/regex/issues/271 +[[test]] +name = "word-boundary-start-x" +regex = '(?u:\b)^(?-u:X)' +haystack = "X" +matches = [[0, 1]] + +# See: https://github.com/rust-lang/regex/issues/271 +[[test]] +name = "word-boundary-ascii-start-x" +regex = '(?-u:\b)^(?-u:X)' +haystack = "X" +matches = [[0, 1]] + +# See: https://github.com/rust-lang/regex/issues/271 +[[test]] +name = "end-not-word-boundary" +regex = '$\B' +haystack = "\U0005C124\U000B576C" +matches = [[8, 8]] +unicode = false +utf8 = false + +# See: https://github.com/rust-lang/regex/issues/280 +[[test]] +name = "partial-anchor-alternate-begin" +regex = '^a|z' +haystack = "yyyyya" +matches = [] + +# See: https://github.com/rust-lang/regex/issues/280 +[[test]] +name = "partial-anchor-alternate-end" +regex = 'a$|z' +haystack = "ayyyyy" +matches = [] + +# See: https://github.com/rust-lang/regex/issues/289 +[[test]] +name = "lits-unambiguous-100" +regex = '(?:ABC|CDA|BC)X' +haystack = "CDAX" +matches = [[0, 4]] + +# See: https://github.com/rust-lang/regex/issues/291 +[[test]] +name = "lits-unambiguous-200" +regex = '((IMG|CAM|MG|MB2)_|(DSCN|CIMG))(?P<n>[0-9]+)$' +haystack = "CIMG2341" +matches = [ + [[0, 8], [0, 4], [], [0, 4], [4, 8]], +] + +# See: https://github.com/rust-lang/regex/issues/303 +# +# 2022-09-19: This has now been "properly" fixed in that empty character +# classes are fully supported as something that can never match. This test +# used to be marked as 'compiles = false', but now it works. +[[test]] +name = "negated-full-byte-range" +regex = '[^\x00-\xFF]' +haystack = "" +matches = [] +compiles = true +unicode = false +utf8 = false + +# See: https://github.com/rust-lang/regex/issues/321 +[[test]] +name = "strange-anchor-non-complete-prefix" +regex = 'a^{2}' +haystack = "" +matches = [] + +# See: https://github.com/rust-lang/regex/issues/321 +[[test]] +name = "strange-anchor-non-complete-suffix" +regex = '${2}a' +haystack = "" +matches = [] + +# See: https://github.com/rust-lang/regex/issues/334 +# See: https://github.com/rust-lang/regex/issues/557 +[[test]] +name = "captures-after-dfa-premature-end-100" +regex = 'a(b*(X|$))?' +haystack = "abcbX" +matches = [ + [[0, 1], [], []], +] + +# See: https://github.com/rust-lang/regex/issues/334 +# See: https://github.com/rust-lang/regex/issues/557 +[[test]] +name = "captures-after-dfa-premature-end-200" +regex = 'a(bc*(X|$))?' +haystack = "abcbX" +matches = [ + [[0, 1], [], []], +] + +# See: https://github.com/rust-lang/regex/issues/334 +# See: https://github.com/rust-lang/regex/issues/557 +[[test]] +name = "captures-after-dfa-premature-end-300" +regex = '(aa$)?' +haystack = "aaz" +matches = [ + [[0, 0], []], + [[1, 1], []], + [[2, 2], []], + [[3, 3], []], +] + +# Plucked from "Why aren’t regular expressions a lingua franca? an empirical +# study on the re-use and portability of regular expressions", The ACM Joint +# European Software Engineering Conference and Symposium on the Foundations of +# Software Engineering (ESEC/FSE), 2019. +# +# Link: https://dl.acm.org/doi/pdf/10.1145/3338906.3338909 +[[test]] +name = "captures-after-dfa-premature-end-400" +regex = '(a)\d*\.?\d+\b' +haystack = "a0.0c" +matches = [ + [[0, 2], [0, 1]], +] + +# See: https://github.com/rust-lang/regex/issues/437 +[[test]] +name = "literal-panic" +regex = 'typename type\-parameter\-[0-9]+\-[0-9]+::.+' +haystack = "test" +matches = [] + +# See: https://github.com/rust-lang/regex/issues/527 +[[test]] +name = "empty-flag-expr" +regex = '(?:(?:(?x)))' +haystack = "" +matches = [[0, 0]] + +# See: https://github.com/rust-lang/regex/issues/533 +#[[tests]] +#name = "blank-matches-nothing-between-space-and-tab" +#regex = '[[:blank:]]' +#input = '\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F' +#match = false +#unescape = true + +# See: https://github.com/rust-lang/regex/issues/533 +#[[tests]] +#name = "blank-matches-nothing-between-space-and-tab-inverted" +#regex = '^[[:^blank:]]+$' +#input = '\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F' +#match = true +#unescape = true + +# See: https://github.com/rust-lang/regex/issues/555 +[[test]] +name = "invalid-repetition" +regex = '(?m){1,1}' +haystack = "" +matches = [] +compiles = false + +# See: https://github.com/rust-lang/regex/issues/640 +[[test]] +name = "flags-are-unset" +regex = '(?:(?i)foo)|Bar' +haystack = "foo Foo bar Bar" +matches = [[0, 3], [4, 7], [12, 15]] + +# Note that 'Ј' is not 'j', but cyrillic Je +# https://en.wikipedia.org/wiki/Je_(Cyrillic) +# +# See: https://github.com/rust-lang/regex/issues/659 +[[test]] +name = "empty-group-with-unicode" +regex = '(?:)Ј01' +haystack = 'zЈ01' +matches = [[1, 5]] + +# See: https://github.com/rust-lang/regex/issues/579 +[[test]] +name = "word-boundary-weird" +regex = '\b..\b' +haystack = "I have 12, he has 2!" +matches = [[0, 2], [7, 9], [9, 11], [11, 13], [17, 19]] + +# See: https://github.com/rust-lang/regex/issues/579 +[[test]] +name = "word-boundary-weird-ascii" +regex = '\b..\b' +haystack = "I have 12, he has 2!" +matches = [[0, 2], [7, 9], [9, 11], [11, 13], [17, 19]] +unicode = false +utf8 = false + +# See: https://github.com/rust-lang/regex/issues/579 +[[test]] +name = "word-boundary-weird-minimal-ascii" +regex = '\b..\b' +haystack = "az,,b" +matches = [[0, 2], [2, 4]] +unicode = false +utf8 = false + +# See: https://github.com/BurntSushi/ripgrep/issues/1203 +[[test]] +name = "reverse-suffix-100" +regex = '[0-4][0-4][0-4]000' +haystack = "153.230000" +matches = [[4, 10]] + +# See: https://github.com/BurntSushi/ripgrep/issues/1203 +[[test]] +name = "reverse-suffix-200" +regex = '[0-9][0-9][0-9]000' +haystack = "153.230000\n" +matches = [[4, 10]] + +# This is a tricky case for the reverse suffix optimization, because it +# finds the 'foobar' match but the reverse scan must fail to find a match by +# correctly dealing with the word boundary following the 'foobar' literal when +# computing the start state. +# +# This test exists because I tried to break the following assumption that +# is currently in the code: that if a suffix is found and the reverse scan +# succeeds, then it's guaranteed that there is an overall match. Namely, the +# 'is_match' routine does *not* do another forward scan in this case because of +# this assumption. +[[test]] +name = "reverse-suffix-300" +regex = '\w+foobar\b' +haystack = "xyzfoobarZ" +matches = [] +unicode = false +utf8 = false + +# See: https://github.com/BurntSushi/ripgrep/issues/1247 +[[test]] +name = "stops" +regex = '\bs(?:[ab])' +haystack = 's\xE4' +matches = [] +unescape = true +utf8 = false + +# See: https://github.com/BurntSushi/ripgrep/issues/1247 +[[test]] +name = "stops-ascii" +regex = '(?-u:\b)s(?:[ab])' +haystack = 's\xE4' +matches = [] +unescape = true +utf8 = false + +# See: https://github.com/rust-lang/regex/issues/850 +[[test]] +name = "adjacent-line-boundary-100" +regex = '(?m)^(?:[^ ]+?)$' +haystack = "line1\nline2" +matches = [[0, 5], [6, 11]] + +# Continued. +[[test]] +name = "adjacent-line-boundary-200" +regex = '(?m)^(?:[^ ]+?)$' +haystack = "A\nB" +matches = [[0, 1], [2, 3]] + +# There is no issue for this bug. +[[test]] +name = "anchored-prefix-100" +regex = '^a[[:^space:]]' +haystack = "a " +matches = [] + +# There is no issue for this bug. +[[test]] +name = "anchored-prefix-200" +regex = '^a[[:^space:]]' +haystack = "foo boo a" +matches = [] + +# There is no issue for this bug. +[[test]] +name = "anchored-prefix-300" +regex = '^-[a-z]' +haystack = "r-f" +matches = [] + +# Tests that a possible Aho-Corasick optimization works correctly. It only +# kicks in when we have a lot of literals. By "works correctly," we mean that +# leftmost-first match semantics are properly respected. That is, samwise +# should match, not sam. +# +# There is no issue for this bug. +[[test]] +name = "aho-corasick-100" +regex = 'samwise|sam|a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z' +haystack = "samwise" +matches = [[0, 7]] + +# See: https://github.com/rust-lang/regex/issues/921 +[[test]] +name = "interior-anchor-capture" +regex = '(a$)b$' +haystack = 'ab' +matches = [] + +# I found this bug in the course of adding some of the regexes that Ruff uses +# to rebar. It turns out that the lazy DFA was finding a match that was being +# rejected by the one-pass DFA. Yikes. I then minimized the regex and haystack. +# +# Source: https://github.com/charliermarsh/ruff/blob/a919041ddaa64cdf6f216f90dd0480dab69fd3ba/crates/ruff/src/rules/pycodestyle/rules/whitespace_around_keywords.rs#L52 +[[test]] +name = "ruff-whitespace-around-keywords" +regex = '^(a|ab)$' +haystack = "ab" +anchored = true +unicode = false +utf8 = true +matches = [[[0, 2], [0, 2]]] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-0" +regex = '(?:(?-u:\b)|(?u:h))+' +haystack = "h" +unicode = true +utf8 = false +matches = [[0, 0], [1, 1]] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-1" +regex = '(?u:\B)' +haystack = "鋸" +unicode = true +utf8 = false +matches = [] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-2" +regex = '(?:(?u:\b)|(?s-u:.))+' +haystack = "oB" +unicode = true +utf8 = false +matches = [[0, 0], [1, 2]] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-3" +regex = '(?:(?-u:\B)|(?su:.))+' +haystack = "\U000FEF80" +unicode = true +utf8 = false +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-3-utf8" +regex = '(?:(?-u:\B)|(?su:.))+' +haystack = "\U000FEF80" +unicode = true +utf8 = true +matches = [[0, 0], [4, 4]] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-4" +regex = '(?m:$)(?m:^)(?su:.)' +haystack = "\n‣" +unicode = true +utf8 = false +matches = [[0, 1]] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-5" +regex = '(?m:$)^(?m:^)' +haystack = "\n" +unicode = true +utf8 = false +matches = [[0, 0]] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-6" +regex = '(?P<kp>(?iu:do)(?m:$))*' +haystack = "dodo" +unicode = true +utf8 = false +matches = [ + [[0, 0], []], + [[1, 1], []], + [[2, 4], [2, 4]], +] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-7" +regex = '(?u:\B)' +haystack = "䡁" +unicode = true +utf8 = false +matches = [] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-8" +regex = '(?:(?-u:\b)|(?u:[\u{0}-W]))+' +haystack = "0" +unicode = true +utf8 = false +matches = [[0, 0], [1, 1]] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-9" +regex = '((?m:$)(?-u:\B)(?s-u:.)(?-u:\B)$)' +haystack = "\n\n" +unicode = true +utf8 = false +matches = [ + [[1, 2], [1, 2]], +] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-10" +regex = '(?m:$)(?m:$)^(?su:.)' +haystack = "\n\u0081¨\u200a" +unicode = true +utf8 = false +matches = [[0, 1]] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-11" +regex = '(?-u:\B)(?m:^)' +haystack = "0\n" +unicode = true +utf8 = false +matches = [[2, 2]] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-12" +regex = '(?:(?u:\b)|(?-u:.))+' +haystack = "0" +unicode = true +utf8 = false +matches = [[0, 0], [1, 1]] + +# From: https://github.com/rust-lang/regex/issues/969 +[[test]] +name = "i969" +regex = 'c.*d\z' +haystack = "ababcd" +bounds = [4, 6] +search-kind = "earliest" +matches = [[4, 6]] + +# I found this during the regex-automata migration. This is the fowler basic +# 154 test, but without anchored = true and without a match limit. +# +# This test caught a subtle bug in the hybrid reverse DFA search, where it +# would skip over the termination condition if it entered a start state. This +# was a double bug. Firstly, the reverse DFA shouldn't have had start states +# specialized in the first place, and thus it shouldn't have possible to detect +# that the DFA had entered a start state. The second bug was that the start +# state handling was incorrect by jumping over the termination condition. +[[test]] +name = "fowler-basic154-unanchored" +regex = '''a([bc]*)c*''' +haystack = '''abc''' +matches = [[[0, 3], [1, 3]]] + +# From: https://github.com/rust-lang/regex/issues/981 +# +# This was never really a problem in the new architecture because the +# regex-automata engines are far more principled about how they deal with +# look-around. (This was one of the many reasons I wanted to re-work the +# original regex crate engines.) +[[test]] +name = "word-boundary-interact-poorly-with-literal-optimizations" +regex = '(?i:(?:\b|_)win(?:32|64|dows)?(?:\b|_))' +haystack = 'ubi-Darwin-x86_64.tar.gz' +matches = [] + +# This was found during fuzz testing of regex. It provoked a panic in the meta +# engine as a result of the reverse suffix optimization. Namely, it hit a case +# where a suffix match was found, a corresponding reverse match was found, but +# the forward search turned up no match. The forward search should always match +# if the suffix and reverse search match. +# +# This in turn uncovered an inconsistency between the PikeVM and the DFA (lazy +# and fully compiled) engines. It was caused by a mishandling of the collection +# of NFA state IDs in the generic determinization code (which is why both types +# of DFA were impacted). Namely, when a fail state was encountered (that's the +# `[^\s\S]` in the pattern below), then it would just stop collecting states. +# But that's not correct since a later state could lead to a match. +[[test]] +name = "impossible-branch" +regex = '.*[^\s\S]A|B' +haystack = "B" +matches = [[0, 1]] + +# This was found during fuzz testing in regex-lite. The regex crate never +# suffered from this bug, but it causes regex-lite to incorrectly compile +# captures. +[[test]] +name = "captures-wrong-order" +regex = '(a){0}(a)' +haystack = 'a' +matches = [[[0, 1], [], [0, 1]]] + +# This tests a bug in how quit states are handled in the DFA. At some point +# during development, the DFAs were tweaked slightly such that if they hit +# a quit state (which means, they hit a byte that the caller configured should +# stop the search), then it might not return an error necessarily. Namely, if a +# match had already been found, then it would be returned instead of an error. +# +# But this is actually wrong! Why? Because even though a match had been found, +# it wouldn't be fully correct to return it once a quit state has been seen +# because you can't determine whether the match offset returned is the correct +# greedy/leftmost-first match. Since you can't complete the search as requested +# by the caller, the DFA should just stop and return an error. +# +# Interestingly, this does seem to produce an unavoidable difference between +# 'try_is_match().unwrap()' and 'try_find().unwrap().is_some()' for the DFAs. +# The former will stop immediately once a match is known to occur and return +# 'Ok(true)', where as the latter could find the match but quit with an +# 'Err(..)' first. +# +# Thankfully, I believe this inconsistency between 'is_match()' and 'find()' +# cannot be observed in the higher level meta regex API because it specifically +# will try another engine that won't fail in the case of a DFA failing. +# +# This regression happened in the regex crate rewrite, but before anything got +# released. +[[test]] +name = "negated-unicode-word-boundary-dfa-fail" +regex = '\B.*' +haystack = "!\u02D7" +matches = [[0, 3]] + +# This failure was found in the *old* regex crate (prior to regex 1.9), but +# I didn't investigate why. My best guess is that it's a literal optimization +# bug. It didn't occur in the rewrite. +[[test]] +name = "missed-match" +regex = 'e..+e.ee>' +haystack = 'Zeee.eZZZZZZZZeee>eeeeeee>' +matches = [[1, 26]] + +# This test came from the 'ignore' crate and tripped a bug in how accelerated +# DFA states were handled in an overlapping search. +[[test]] +name = "regex-to-glob" +regex = ['(?-u)^path1/[^/]*$'] +haystack = "path1/foo" +matches = [[0, 9]] +utf8 = false +match-kind = "all" +search-kind = "overlapping" + +# See: https://github.com/rust-lang/regex/issues/1060 +[[test]] +name = "reverse-inner-plus-shorter-than-expected" +regex = '(?:(\d+)[:.])?(\d{1,2})[:.](\d{2})' +haystack = '102:12:39' +matches = [[[0, 9], [0, 3], [4, 6], [7, 9]]] + +# Like reverse-inner-plus-shorter-than-expected, but using a far simpler regex +# to demonstrate the extent of the rot. Sigh. +# +# See: https://github.com/rust-lang/regex/issues/1060 +[[test]] +name = "reverse-inner-short" +regex = '(?:([0-9][0-9][0-9]):)?([0-9][0-9]):([0-9][0-9])' +haystack = '102:12:39' +matches = [[[0, 9], [0, 3], [4, 6], [7, 9]]] + +# This regression test was found via the RegexSet APIs. It triggered a +# particular code path where a regex was compiled with 'All' match semantics +# (to support overlapping search), but got funneled down into a standard +# leftmost search when calling 'is_match'. This is fine on its own, but the +# leftmost search will use a prefilter and that's where this went awry. +# +# Namely, since 'All' semantics were used, the aho-corasick prefilter was +# incorrectly compiled with 'Standard' semantics. This was wrong because +# 'Standard' immediately attempts to report a match at every position, even if +# that would mean reporting a match past the leftmost match before reporting +# the leftmost match. This breaks the prefilter contract of never having false +# negatives and leads overall to the engine not finding a match. +# +# See: https://github.com/rust-lang/regex/issues/1070 +[[test]] +name = "prefilter-with-aho-corasick-standard-semantics" +regex = '(?m)^ *v [0-9]' +haystack = 'v 0' +matches = [ + { id = 0, spans = [[0, 3]] }, +] +match-kind = "all" +search-kind = "overlapping" +unicode = true +utf8 = true + +# This tests that the PikeVM and the meta regex agree on a particular regex. +# This test previously failed when the ad hoc engines inside the meta engine +# did not handle quit states correctly. Namely, the Unicode word boundary here +# combined with a non-ASCII codepoint provokes the quit state. The ad hoc +# engines were previously returning a match even after entering the quit state +# if a match had been previously detected, but this is incorrect. The reason +# is that if a quit state is found, then the search must give up *immediately* +# because it prevents the search from finding the "proper" leftmost-first +# match. If it instead returns a match that has been found, it risks reporting +# an improper match, as it did in this case. +# +# See: https://github.com/rust-lang/regex/issues/1046 +[[test]] +name = "non-prefix-literal-quit-state" +regex = '.+\b\n' +haystack = "β77\n" +matches = [[0, 5]] + +# This is a regression test for some errant HIR interval set operations that +# were made in the regex-syntax 0.8.0 release and then reverted in 0.8.1. The +# issue here is that the HIR produced from the regex had out-of-order ranges. +# +# See: https://github.com/rust-lang/regex/issues/1103 +# Ref: https://github.com/rust-lang/regex/pull/1051 +# Ref: https://github.com/rust-lang/regex/pull/1102 +[[test]] +name = "hir-optimization-out-of-order-class" +regex = '^[[:alnum:]./-]+$' +haystack = "a-b" +matches = [[0, 3]] + +# This is a regression test for an improper reverse suffix optimization. This +# occurred when I "broadened" the applicability of the optimization to include +# multiple possible literal suffixes instead of only sticking to a non-empty +# longest common suffix. It turns out that, at least given how the reverse +# suffix optimization works, we need to stick to the longest common suffix for +# now. +# +# See: https://github.com/rust-lang/regex/issues/1110 +# See also: https://github.com/astral-sh/ruff/pull/7980 +[[test]] +name = 'improper-reverse-suffix-optimization' +regex = '(\\N\{[^}]+})|([{}])' +haystack = 'hiya \N{snowman} bye' +matches = [[[5, 16], [5, 16], []]] diff --git a/vendor/regex/testdata/set.toml b/vendor/regex/testdata/set.toml new file mode 100644 index 000000000..049e8a89d --- /dev/null +++ b/vendor/regex/testdata/set.toml @@ -0,0 +1,641 @@ +# Basic multi-regex tests. + +[[test]] +name = "basic10" +regex = ["a", "a"] +haystack = "a" +matches = [ + { id = 0, span = [0, 1] }, + { id = 1, span = [0, 1] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic10-leftmost-first" +regex = ["a", "a"] +haystack = "a" +matches = [ + { id = 0, span = [0, 1] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" + +[[test]] +name = "basic20" +regex = ["a", "a"] +haystack = "ba" +matches = [ + { id = 0, span = [1, 2] }, + { id = 1, span = [1, 2] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic30" +regex = ["a", "b"] +haystack = "a" +matches = [ + { id = 0, span = [0, 1] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic40" +regex = ["a", "b"] +haystack = "b" +matches = [ + { id = 1, span = [0, 1] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic50" +regex = ["a|b", "b|a"] +haystack = "b" +matches = [ + { id = 0, span = [0, 1] }, + { id = 1, span = [0, 1] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic60" +regex = ["foo", "oo"] +haystack = "foo" +matches = [ + { id = 0, span = [0, 3] }, + { id = 1, span = [1, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic60-leftmost-first" +regex = ["foo", "oo"] +haystack = "foo" +matches = [ + { id = 0, span = [0, 3] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" + +[[test]] +name = "basic61" +regex = ["oo", "foo"] +haystack = "foo" +matches = [ + { id = 1, span = [0, 3] }, + { id = 0, span = [1, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic61-leftmost-first" +regex = ["oo", "foo"] +haystack = "foo" +matches = [ + { id = 1, span = [0, 3] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" + +[[test]] +name = "basic70" +regex = ["abcd", "bcd", "cd", "d"] +haystack = "abcd" +matches = [ + { id = 0, span = [0, 4] }, + { id = 1, span = [1, 4] }, + { id = 2, span = [2, 4] }, + { id = 3, span = [3, 4] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic71" +regex = ["bcd", "cd", "d", "abcd"] +haystack = "abcd" +matches = [ + { id = 3, span = [0, 4] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" + +[[test]] +name = "basic80" +regex = ["^foo", "bar$"] +haystack = "foo" +matches = [ + { id = 0, span = [0, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic81" +regex = ["^foo", "bar$"] +haystack = "foo bar" +matches = [ + { id = 0, span = [0, 3] }, + { id = 1, span = [4, 7] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic82" +regex = ["^foo", "bar$"] +haystack = "bar" +matches = [ + { id = 1, span = [0, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic90" +regex = ["[a-z]+$", "foo"] +haystack = "01234 foo" +matches = [ + { id = 0, span = [8, 9] }, + { id = 0, span = [7, 9] }, + { id = 0, span = [6, 9] }, + { id = 1, span = [6, 9] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic91" +regex = ["[a-z]+$", "foo"] +haystack = "foo 01234" +matches = [ + { id = 1, span = [0, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic100" +regex = [".*?", "a"] +haystack = "zzza" +matches = [ + { id = 0, span = [0, 0] }, + { id = 0, span = [1, 1] }, + { id = 0, span = [0, 1] }, + { id = 0, span = [2, 2] }, + { id = 0, span = [1, 2] }, + { id = 0, span = [0, 2] }, + { id = 0, span = [3, 3] }, + { id = 0, span = [2, 3] }, + { id = 0, span = [1, 3] }, + { id = 0, span = [0, 3] }, + { id = 0, span = [4, 4] }, + { id = 0, span = [3, 4] }, + { id = 0, span = [2, 4] }, + { id = 0, span = [1, 4] }, + { id = 0, span = [0, 4] }, + { id = 1, span = [3, 4] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic101" +regex = [".*", "a"] +haystack = "zzza" +matches = [ + { id = 0, span = [0, 0] }, + { id = 0, span = [1, 1] }, + { id = 0, span = [0, 1] }, + { id = 0, span = [2, 2] }, + { id = 0, span = [1, 2] }, + { id = 0, span = [0, 2] }, + { id = 0, span = [3, 3] }, + { id = 0, span = [2, 3] }, + { id = 0, span = [1, 3] }, + { id = 0, span = [0, 3] }, + { id = 0, span = [4, 4] }, + { id = 0, span = [3, 4] }, + { id = 0, span = [2, 4] }, + { id = 0, span = [1, 4] }, + { id = 0, span = [0, 4] }, + { id = 1, span = [3, 4] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic102" +regex = [".*", "a"] +haystack = "zzz" +matches = [ + { id = 0, span = [0, 0] }, + { id = 0, span = [1, 1] }, + { id = 0, span = [0, 1] }, + { id = 0, span = [2, 2] }, + { id = 0, span = [1, 2] }, + { id = 0, span = [0, 2] }, + { id = 0, span = [3, 3] }, + { id = 0, span = [2, 3] }, + { id = 0, span = [1, 3] }, + { id = 0, span = [0, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic110" +regex = ['\ba\b'] +haystack = "hello a bye" +matches = [ + { id = 0, span = [6, 7] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic111" +regex = ['\ba\b', '\be\b'] +haystack = "hello a bye e" +matches = [ + { id = 0, span = [6, 7] }, + { id = 1, span = [12, 13] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic120" +regex = ["a"] +haystack = "a" +matches = [ + { id = 0, span = [0, 1] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic121" +regex = [".*a"] +haystack = "a" +matches = [ + { id = 0, span = [0, 1] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic122" +regex = [".*a", "β"] +haystack = "β" +matches = [ + { id = 1, span = [0, 2] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic130" +regex = ["ab", "b"] +haystack = "ba" +matches = [ + { id = 1, span = [0, 1] }, +] +match-kind = "all" +search-kind = "overlapping" + +# These test cases where one of the regexes matches the empty string. + +[[test]] +name = "empty10" +regex = ["", "a"] +haystack = "abc" +matches = [ + { id = 0, span = [0, 0] }, + { id = 1, span = [0, 1] }, + { id = 0, span = [1, 1] }, + { id = 0, span = [2, 2] }, + { id = 0, span = [3, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "empty10-leftmost-first" +regex = ["", "a"] +haystack = "abc" +matches = [ + { id = 0, span = [0, 0] }, + { id = 0, span = [1, 1] }, + { id = 0, span = [2, 2] }, + { id = 0, span = [3, 3] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" + +[[test]] +name = "empty11" +regex = ["a", ""] +haystack = "abc" +matches = [ + { id = 1, span = [0, 0] }, + { id = 0, span = [0, 1] }, + { id = 1, span = [1, 1] }, + { id = 1, span = [2, 2] }, + { id = 1, span = [3, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "empty11-leftmost-first" +regex = ["a", ""] +haystack = "abc" +matches = [ + { id = 0, span = [0, 1] }, + { id = 1, span = [2, 2] }, + { id = 1, span = [3, 3] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" + +[[test]] +name = "empty20" +regex = ["", "b"] +haystack = "abc" +matches = [ + { id = 0, span = [0, 0] }, + { id = 0, span = [1, 1] }, + { id = 1, span = [1, 2] }, + { id = 0, span = [2, 2] }, + { id = 0, span = [3, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "empty20-leftmost-first" +regex = ["", "b"] +haystack = "abc" +matches = [ + { id = 0, span = [0, 0] }, + { id = 0, span = [1, 1] }, + { id = 0, span = [2, 2] }, + { id = 0, span = [3, 3] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" + +[[test]] +name = "empty21" +regex = ["b", ""] +haystack = "abc" +matches = [ + { id = 1, span = [0, 0] }, + { id = 1, span = [1, 1] }, + { id = 0, span = [1, 2] }, + { id = 1, span = [2, 2] }, + { id = 1, span = [3, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "empty21-leftmost-first" +regex = ["b", ""] +haystack = "abc" +matches = [ + { id = 1, span = [0, 0] }, + { id = 0, span = [1, 2] }, + { id = 1, span = [3, 3] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" + +[[test]] +name = "empty22" +regex = ["(?:)", "b"] +haystack = "abc" +matches = [ + { id = 0, span = [0, 0] }, + { id = 0, span = [1, 1] }, + { id = 1, span = [1, 2] }, + { id = 0, span = [2, 2] }, + { id = 0, span = [3, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "empty23" +regex = ["b", "(?:)"] +haystack = "abc" +matches = [ + { id = 1, span = [0, 0] }, + { id = 1, span = [1, 1] }, + { id = 0, span = [1, 2] }, + { id = 1, span = [2, 2] }, + { id = 1, span = [3, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "empty30" +regex = ["", "z"] +haystack = "abc" +matches = [ + { id = 0, span = [0, 0] }, + { id = 0, span = [1, 1] }, + { id = 0, span = [2, 2] }, + { id = 0, span = [3, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "empty30-leftmost-first" +regex = ["", "z"] +haystack = "abc" +matches = [ + { id = 0, span = [0, 0] }, + { id = 0, span = [1, 1] }, + { id = 0, span = [2, 2] }, + { id = 0, span = [3, 3] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" + +[[test]] +name = "empty31" +regex = ["z", ""] +haystack = "abc" +matches = [ + { id = 1, span = [0, 0] }, + { id = 1, span = [1, 1] }, + { id = 1, span = [2, 2] }, + { id = 1, span = [3, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "empty31-leftmost-first" +regex = ["z", ""] +haystack = "abc" +matches = [ + { id = 1, span = [0, 0] }, + { id = 1, span = [1, 1] }, + { id = 1, span = [2, 2] }, + { id = 1, span = [3, 3] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" + +[[test]] +name = "empty40" +regex = ["c(?:)", "b"] +haystack = "abc" +matches = [ + { id = 1, span = [1, 2] }, + { id = 0, span = [2, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "empty40-leftmost-first" +regex = ["c(?:)", "b"] +haystack = "abc" +matches = [ + { id = 1, span = [1, 2] }, + { id = 0, span = [2, 3] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" + +# These test cases where there are no matches. + +[[test]] +name = "nomatch10" +regex = ["a", "a"] +haystack = "b" +matches = [] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "nomatch20" +regex = ["^foo", "bar$"] +haystack = "bar foo" +matches = [] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "nomatch30" +regex = [] +haystack = "a" +matches = [] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "nomatch40" +regex = ["^rooted$", '\.log$'] +haystack = "notrooted" +matches = [] +match-kind = "all" +search-kind = "overlapping" + +# These test multi-regex searches with capture groups. +# +# NOTE: I wrote these tests in the course of developing a first class API for +# overlapping capturing group matches, but ultimately removed that API because +# the semantics for overlapping matches aren't totally clear. However, I've +# left the tests because I believe the semantics for these patterns are clear +# and because we can still test our "which patterns matched" APIs with them. + +[[test]] +name = "caps-010" +regex = ['^(\w+) (\w+)$', '^(\S+) (\S+)$'] +haystack = "Bruce Springsteen" +matches = [ + { id = 0, spans = [[0, 17], [0, 5], [6, 17]] }, + { id = 1, spans = [[0, 17], [0, 5], [6, 17]] }, +] +match-kind = "all" +search-kind = "overlapping" +unicode = false +utf8 = false + +[[test]] +name = "caps-020" +regex = ['^(\w+) (\w+)$', '^[A-Z](\S+) [A-Z](\S+)$'] +haystack = "Bruce Springsteen" +matches = [ + { id = 0, spans = [[0, 17], [0, 5], [6, 17]] }, + { id = 1, spans = [[0, 17], [1, 5], [7, 17]] }, +] +match-kind = "all" +search-kind = "overlapping" +unicode = false +utf8 = false + +[[test]] +name = "caps-030" +regex = ['^(\w+) (\w+)$', '^([A-Z])(\S+) ([A-Z])(\S+)$'] +haystack = "Bruce Springsteen" +matches = [ + { id = 0, spans = [[0, 17], [0, 5], [6, 17]] }, + { id = 1, spans = [[0, 17], [0, 1], [1, 5], [6, 7], [7, 17]] }, +] +match-kind = "all" +search-kind = "overlapping" +unicode = false +utf8 = false + +[[test]] +name = "caps-110" +regex = ['(\w+) (\w+)', '(\S+) (\S+)'] +haystack = "Bruce Springsteen" +matches = [ + { id = 0, spans = [[0, 17], [0, 5], [6, 17]] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" +unicode = false +utf8 = false + +[[test]] +name = "caps-120" +regex = ['(\w+) (\w+)', '(\S+) (\S+)'] +haystack = "&ruce $pringsteen" +matches = [ + { id = 1, spans = [[0, 17], [0, 5], [6, 17]] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" +unicode = false +utf8 = false + +[[test]] +name = "caps-121" +regex = ['(\w+) (\w+)', '(\S+) (\S+)'] +haystack = "&ruce $pringsteen Foo Bar" +matches = [ + { id = 1, spans = [[0, 17], [0, 5], [6, 17]] }, + { id = 0, spans = [[18, 25], [18, 21], [22, 25]] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" +unicode = false +utf8 = false diff --git a/vendor/regex/testdata/substring.toml b/vendor/regex/testdata/substring.toml new file mode 100644 index 000000000..69595ce85 --- /dev/null +++ b/vendor/regex/testdata/substring.toml @@ -0,0 +1,36 @@ +# These tests check that regex engines perform as expected when the search is +# instructed to only search a substring of a haystack instead of the entire +# haystack. This tends to exercise interesting edge cases that are otherwise +# difficult to provoke. (But not necessarily impossible. Regex search iterators +# for example, make use of the "search just a substring" APIs by changing the +# starting position of a search to the end position of the previous match.) + +[[test]] +name = "unicode-word-start" +regex = '\b[0-9]+\b' +haystack = "β123" +bounds = { start = 2, end = 5 } +matches = [] + +[[test]] +name = "unicode-word-end" +regex = '\b[0-9]+\b' +haystack = "123β" +bounds = { start = 0, end = 3 } +matches = [] + +[[test]] +name = "ascii-word-start" +regex = '\b[0-9]+\b' +haystack = "β123" +bounds = { start = 2, end = 5 } +matches = [[2, 5]] +unicode = false + +[[test]] +name = "ascii-word-end" +regex = '\b[0-9]+\b' +haystack = "123β" +bounds = { start = 0, end = 3 } +matches = [[0, 3]] +unicode = false diff --git a/vendor/regex/testdata/unicode.toml b/vendor/regex/testdata/unicode.toml new file mode 100644 index 000000000..f4ac76bae --- /dev/null +++ b/vendor/regex/testdata/unicode.toml @@ -0,0 +1,517 @@ +# Basic Unicode literal support. +[[test]] +name = "literal1" +regex = '☃' +haystack = "☃" +matches = [[0, 3]] + +[[test]] +name = "literal2" +regex = '☃+' +haystack = "☃" +matches = [[0, 3]] + +[[test]] +name = "literal3" +regex = '☃+' +haystack = "☃" +matches = [[0, 3]] +case-insensitive = true + +[[test]] +name = "literal4" +regex = 'Δ' +haystack = "δ" +matches = [[0, 2]] +case-insensitive = true + +# Unicode word boundaries. +[[test]] +name = "wb-100" +regex = '\d\b' +haystack = "6δ" +matches = [] + +[[test]] +name = "wb-200" +regex = '\d\b' +haystack = "6 " +matches = [[0, 1]] + +[[test]] +name = "wb-300" +regex = '\d\B' +haystack = "6δ" +matches = [[0, 1]] + +[[test]] +name = "wb-400" +regex = '\d\B' +haystack = "6 " +matches = [] + +# Unicode character class support. +[[test]] +name = "class1" +regex = '[☃Ⅰ]+' +haystack = "☃" +matches = [[0, 3]] + +[[test]] +name = "class2" +regex = '\pN' +haystack = "Ⅰ" +matches = [[0, 3]] + +[[test]] +name = "class3" +regex = '\pN+' +haystack = "Ⅰ1Ⅱ2" +matches = [[0, 8]] + +[[test]] +name = "class4" +regex = '\PN+' +haystack = "abⅠ" +matches = [[0, 2]] + +[[test]] +name = "class5" +regex = '[\PN]+' +haystack = "abⅠ" +matches = [[0, 2]] + +[[test]] +name = "class6" +regex = '[^\PN]+' +haystack = "abⅠ" +matches = [[2, 5]] + +[[test]] +name = "class7" +regex = '\p{Lu}+' +haystack = "ΛΘΓΔα" +matches = [[0, 8]] + +[[test]] +name = "class8" +regex = '\p{Lu}+' +haystack = "ΛΘΓΔα" +matches = [[0, 10]] +case-insensitive = true + +[[test]] +name = "class9" +regex = '\pL+' +haystack = "ΛΘΓΔα" +matches = [[0, 10]] + +[[test]] +name = "class10" +regex = '\p{Ll}+' +haystack = "ΛΘΓΔα" +matches = [[8, 10]] + +# Unicode aware "Perl" character classes. +[[test]] +name = "perl1" +regex = '\w+' +haystack = "dδd" +matches = [[0, 4]] + +[[test]] +name = "perl2" +regex = '\w+' +haystack = "⥡" +matches = [] + +[[test]] +name = "perl3" +regex = '\W+' +haystack = "⥡" +matches = [[0, 3]] + +[[test]] +name = "perl4" +regex = '\d+' +haystack = "1२३9" +matches = [[0, 8]] + +[[test]] +name = "perl5" +regex = '\d+' +haystack = "Ⅱ" +matches = [] + +[[test]] +name = "perl6" +regex = '\D+' +haystack = "Ⅱ" +matches = [[0, 3]] + +[[test]] +name = "perl7" +regex = '\s+' +haystack = " " +matches = [[0, 3]] + +[[test]] +name = "perl8" +regex = '\s+' +haystack = "☃" +matches = [] + +[[test]] +name = "perl9" +regex = '\S+' +haystack = "☃" +matches = [[0, 3]] + +# Specific tests for Unicode general category classes. +[[test]] +name = "class-gencat1" +regex = '\p{Cased_Letter}' +haystack = "A" +matches = [[0, 3]] + +[[test]] +name = "class-gencat2" +regex = '\p{Close_Punctuation}' +haystack = "❯" +matches = [[0, 3]] + +[[test]] +name = "class-gencat3" +regex = '\p{Connector_Punctuation}' +haystack = "⁀" +matches = [[0, 3]] + +[[test]] +name = "class-gencat4" +regex = '\p{Control}' +haystack = "\u009F" +matches = [[0, 2]] + +[[test]] +name = "class-gencat5" +regex = '\p{Currency_Symbol}' +haystack = "£" +matches = [[0, 3]] + +[[test]] +name = "class-gencat6" +regex = '\p{Dash_Punctuation}' +haystack = "〰" +matches = [[0, 3]] + +[[test]] +name = "class-gencat7" +regex = '\p{Decimal_Number}' +haystack = "𑓙" +matches = [[0, 4]] + +[[test]] +name = "class-gencat8" +regex = '\p{Enclosing_Mark}' +haystack = "\uA672" +matches = [[0, 3]] + +[[test]] +name = "class-gencat9" +regex = '\p{Final_Punctuation}' +haystack = "⸡" +matches = [[0, 3]] + +[[test]] +name = "class-gencat10" +regex = '\p{Format}' +haystack = "\U000E007F" +matches = [[0, 4]] + +[[test]] +name = "class-gencat11" +regex = '\p{Initial_Punctuation}' +haystack = "⸜" +matches = [[0, 3]] + +[[test]] +name = "class-gencat12" +regex = '\p{Letter}' +haystack = "Έ" +matches = [[0, 2]] + +[[test]] +name = "class-gencat13" +regex = '\p{Letter_Number}' +haystack = "ↂ" +matches = [[0, 3]] + +[[test]] +name = "class-gencat14" +regex = '\p{Line_Separator}' +haystack = "\u2028" +matches = [[0, 3]] + +[[test]] +name = "class-gencat15" +regex = '\p{Lowercase_Letter}' +haystack = "ϛ" +matches = [[0, 2]] + +[[test]] +name = "class-gencat16" +regex = '\p{Mark}' +haystack = "\U000E01EF" +matches = [[0, 4]] + +[[test]] +name = "class-gencat17" +regex = '\p{Math}' +haystack = "⋿" +matches = [[0, 3]] + +[[test]] +name = "class-gencat18" +regex = '\p{Modifier_Letter}' +haystack = "𖭃" +matches = [[0, 4]] + +[[test]] +name = "class-gencat19" +regex = '\p{Modifier_Symbol}' +haystack = "🏿" +matches = [[0, 4]] + +[[test]] +name = "class-gencat20" +regex = '\p{Nonspacing_Mark}' +haystack = "\U0001E94A" +matches = [[0, 4]] + +[[test]] +name = "class-gencat21" +regex = '\p{Number}' +haystack = "⓿" +matches = [[0, 3]] + +[[test]] +name = "class-gencat22" +regex = '\p{Open_Punctuation}' +haystack = "⦅" +matches = [[0, 3]] + +[[test]] +name = "class-gencat23" +regex = '\p{Other}' +haystack = "\u0BC9" +matches = [[0, 3]] + +[[test]] +name = "class-gencat24" +regex = '\p{Other_Letter}' +haystack = "ꓷ" +matches = [[0, 3]] + +[[test]] +name = "class-gencat25" +regex = '\p{Other_Number}' +haystack = "㉏" +matches = [[0, 3]] + +[[test]] +name = "class-gencat26" +regex = '\p{Other_Punctuation}' +haystack = "𞥞" +matches = [[0, 4]] + +[[test]] +name = "class-gencat27" +regex = '\p{Other_Symbol}' +haystack = "⅌" +matches = [[0, 3]] + +[[test]] +name = "class-gencat28" +regex = '\p{Paragraph_Separator}' +haystack = "\u2029" +matches = [[0, 3]] + +[[test]] +name = "class-gencat29" +regex = '\p{Private_Use}' +haystack = "\U0010FFFD" +matches = [[0, 4]] + +[[test]] +name = "class-gencat30" +regex = '\p{Punctuation}' +haystack = "𑁍" +matches = [[0, 4]] + +[[test]] +name = "class-gencat31" +regex = '\p{Separator}' +haystack = "\u3000" +matches = [[0, 3]] + +[[test]] +name = "class-gencat32" +regex = '\p{Space_Separator}' +haystack = "\u205F" +matches = [[0, 3]] + +[[test]] +name = "class-gencat33" +regex = '\p{Spacing_Mark}' +haystack = "\U00016F7E" +matches = [[0, 4]] + +[[test]] +name = "class-gencat34" +regex = '\p{Symbol}' +haystack = "⯈" +matches = [[0, 3]] + +[[test]] +name = "class-gencat35" +regex = '\p{Titlecase_Letter}' +haystack = "ῼ" +matches = [[0, 3]] + +[[test]] +name = "class-gencat36" +regex = '\p{Unassigned}' +haystack = "\U0010FFFF" +matches = [[0, 4]] + +[[test]] +name = "class-gencat37" +regex = '\p{Uppercase_Letter}' +haystack = "Ꝋ" +matches = [[0, 3]] + + +# Tests for Unicode emoji properties. +[[test]] +name = "class-emoji1" +regex = '\p{Emoji}' +haystack = "\u23E9" +matches = [[0, 3]] + +[[test]] +name = "class-emoji2" +regex = '\p{emoji}' +haystack = "\U0001F21A" +matches = [[0, 4]] + +[[test]] +name = "class-emoji3" +regex = '\p{extendedpictographic}' +haystack = "\U0001FA6E" +matches = [[0, 4]] + +[[test]] +name = "class-emoji4" +regex = '\p{extendedpictographic}' +haystack = "\U0001FFFD" +matches = [[0, 4]] + + +# Tests for Unicode grapheme cluster properties. +[[test]] +name = "class-gcb1" +regex = '\p{grapheme_cluster_break=prepend}' +haystack = "\U00011D46" +matches = [[0, 4]] + +[[test]] +name = "class-gcb2" +regex = '\p{gcb=regional_indicator}' +haystack = "\U0001F1E6" +matches = [[0, 4]] + +[[test]] +name = "class-gcb3" +regex = '\p{gcb=ri}' +haystack = "\U0001F1E7" +matches = [[0, 4]] + +[[test]] +name = "class-gcb4" +regex = '\p{regionalindicator}' +haystack = "\U0001F1FF" +matches = [[0, 4]] + +[[test]] +name = "class-gcb5" +regex = '\p{gcb=lvt}' +haystack = "\uC989" +matches = [[0, 3]] + +[[test]] +name = "class-gcb6" +regex = '\p{gcb=zwj}' +haystack = "\u200D" +matches = [[0, 3]] + +# Tests for Unicode word boundary properties. +[[test]] +name = "class-word-break1" +regex = '\p{word_break=Hebrew_Letter}' +haystack = "\uFB46" +matches = [[0, 3]] + +[[test]] +name = "class-word-break2" +regex = '\p{wb=hebrewletter}' +haystack = "\uFB46" +matches = [[0, 3]] + +[[test]] +name = "class-word-break3" +regex = '\p{wb=ExtendNumLet}' +haystack = "\uFF3F" +matches = [[0, 3]] + +[[test]] +name = "class-word-break4" +regex = '\p{wb=WSegSpace}' +haystack = "\u3000" +matches = [[0, 3]] + +[[test]] +name = "class-word-break5" +regex = '\p{wb=numeric}' +haystack = "\U0001E950" +matches = [[0, 4]] + +# Tests for Unicode sentence boundary properties. +[[test]] +name = "class-sentence-break1" +regex = '\p{sentence_break=Lower}' +haystack = "\u0469" +matches = [[0, 2]] + +[[test]] +name = "class-sentence-break2" +regex = '\p{sb=lower}' +haystack = "\u0469" +matches = [[0, 2]] + +[[test]] +name = "class-sentence-break3" +regex = '\p{sb=Close}' +haystack = "\uFF60" +matches = [[0, 3]] + +[[test]] +name = "class-sentence-break4" +regex = '\p{sb=Close}' +haystack = "\U0001F677" +matches = [[0, 4]] + +[[test]] +name = "class-sentence-break5" +regex = '\p{sb=SContinue}' +haystack = "\uFF64" +matches = [[0, 3]] diff --git a/vendor/regex/testdata/utf8.toml b/vendor/regex/testdata/utf8.toml new file mode 100644 index 000000000..39e284b38 --- /dev/null +++ b/vendor/regex/testdata/utf8.toml @@ -0,0 +1,399 @@ +# These test the UTF-8 modes expose by regex-automata. Namely, when utf8 is +# true, then we promise that the haystack is valid UTF-8. (Otherwise behavior +# is unspecified.) This also corresponds to building the regex engine with the +# following two guarantees: +# +# 1) For any non-empty match reported, its span is guaranteed to correspond to +# valid UTF-8. +# 2) All empty or zero-width matches reported must never split a UTF-8 +# encoded codepoint. If the haystack has invalid UTF-8, then this results in +# unspecified behavior. +# +# The (2) is in particular what we focus our testing on since (1) is generally +# guaranteed by regex-syntax's AST-to-HIR translator and is well tested there. +# The thing with (2) is that it can't be described in the HIR, so the regex +# engines have to handle that case. Thus, we test it here. +# +# Note that it is possible to build a regex that has property (1) but not +# (2), and vice versa. This is done by building the HIR with 'utf8=true' but +# building the Thompson NFA with 'utf8=false'. We don't test that here because +# the harness doesn't expose a way to enable or disable UTF-8 mode with that +# granularity. Instead, those combinations are lightly tested via doc examples. +# That's not to say that (1) without (2) is uncommon. Indeed, ripgrep uses it +# because it cannot guarantee that its haystack is valid UTF-8. + +# This tests that an empty regex doesn't split a codepoint. +[[test]] +name = "empty-utf8yes" +regex = '' +haystack = '☃' +matches = [[0, 0], [3, 3]] +unicode = true +utf8 = true + +# Tests the overlapping case of the above. +[[test]] +name = "empty-utf8yes-overlapping" +regex = '' +haystack = '☃' +matches = [[0, 0], [3, 3]] +unicode = true +utf8 = true +match-kind = "all" +search-kind = "overlapping" + +# This tests that an empty regex DOES split a codepoint when utf=false. +[[test]] +name = "empty-utf8no" +regex = '' +haystack = '☃' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] +unicode = true +utf8 = false + +# Tests the overlapping case of the above. +[[test]] +name = "empty-utf8no-overlapping" +regex = '' +haystack = '☃' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] +unicode = true +utf8 = false +match-kind = "all" +search-kind = "overlapping" + +# This tests that an empty regex doesn't split a codepoint, even if we give +# it bounds entirely within the codepoint. +# +# This is one of the trickier cases and is what motivated the current UTF-8 +# mode design. In particular, at one point, this test failed the 'is_match' +# variant of the test but not 'find'. This is because the 'is_match' code path +# is specifically optimized for "was a match found" rather than "where is the +# match." In the former case, you don't really care about the empty-vs-non-empty +# matches, and thus, the codepoint splitting filtering logic wasn't getting +# applied. (In multiple ways across multiple regex engines.) In this way, you +# can wind up with a situation where 'is_match' says "yes," but 'find' says, +# "I didn't find anything." Which is... not great. +# +# I could have decided to say that providing boundaries that themselves split +# a codepoint would have unspecified behavior. But I couldn't quite convince +# myself that such boundaries were the only way to get an inconsistency between +# 'is_match' and 'find'. +# +# Note that I also tried to come up with a test like this that fails without +# using `bounds`. Specifically, a test where 'is_match' and 'find' disagree. +# But I couldn't do it, and I'm tempted to conclude it is impossible. The +# fundamental problem is that you need to simultaneously produce an empty match +# that splits a codepoint while *not* matching before or after the codepoint. +[[test]] +name = "empty-utf8yes-bounds" +regex = '' +haystack = '𝛃' +bounds = [1, 3] +matches = [] +unicode = true +utf8 = true + +# Tests the overlapping case of the above. +[[test]] +name = "empty-utf8yes-bounds-overlapping" +regex = '' +haystack = '𝛃' +bounds = [1, 3] +matches = [] +unicode = true +utf8 = true +match-kind = "all" +search-kind = "overlapping" + +# This tests that an empty regex splits a codepoint when the bounds are +# entirely within the codepoint. +[[test]] +name = "empty-utf8no-bounds" +regex = '' +haystack = '𝛃' +bounds = [1, 3] +matches = [[1, 1], [2, 2], [3, 3]] +unicode = true +utf8 = false + +# Tests the overlapping case of the above. +[[test]] +name = "empty-utf8no-bounds-overlapping" +regex = '' +haystack = '𝛃' +bounds = [1, 3] +matches = [[1, 1], [2, 2], [3, 3]] +unicode = true +utf8 = false +match-kind = "all" +search-kind = "overlapping" + +# In this test, we anchor the search. Since the start position is also a UTF-8 +# boundary, we get a match. +[[test]] +name = "empty-utf8yes-anchored" +regex = '' +haystack = '𝛃' +matches = [[0, 0]] +anchored = true +unicode = true +utf8 = true + +# Tests the overlapping case of the above. +[[test]] +name = "empty-utf8yes-anchored-overlapping" +regex = '' +haystack = '𝛃' +matches = [[0, 0]] +anchored = true +unicode = true +utf8 = true +match-kind = "all" +search-kind = "overlapping" + +# Same as above, except with UTF-8 mode disabled. It almost doesn't change the +# result, except for the fact that since this is an anchored search and we +# always find all matches, the test harness will keep reporting matches until +# none are found. Because it's anchored, matches will be reported so long as +# they are directly adjacent. Since with UTF-8 mode the next anchored search +# after the match at [0, 0] fails, iteration stops (and doesn't find the last +# match at [4, 4]). +[[test]] +name = "empty-utf8no-anchored" +regex = '' +haystack = '𝛃' +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] +anchored = true +unicode = true +utf8 = false + +# Tests the overlapping case of the above. +# +# Note that overlapping anchored searches are a little weird, and it's not +# totally clear what their semantics ought to be. For now, we just test the +# current behavior of our test shim that implements overlapping search. (This +# is one of the reasons why we don't really expose regex-level overlapping +# searches.) +[[test]] +name = "empty-utf8no-anchored-overlapping" +regex = '' +haystack = '𝛃' +matches = [[0, 0]] +anchored = true +unicode = true +utf8 = false +match-kind = "all" +search-kind = "overlapping" + +# In this test, we anchor the search, but also set bounds. The bounds start the +# search in the middle of a codepoint, so there should never be a match. +[[test]] +name = "empty-utf8yes-anchored-bounds" +regex = '' +haystack = '𝛃' +matches = [] +bounds = [1, 3] +anchored = true +unicode = true +utf8 = true + +# Tests the overlapping case of the above. +[[test]] +name = "empty-utf8yes-anchored-bounds-overlapping" +regex = '' +haystack = '𝛃' +matches = [] +bounds = [1, 3] +anchored = true +unicode = true +utf8 = true +match-kind = "all" +search-kind = "overlapping" + +# Same as above, except with UTF-8 mode disabled. Without UTF-8 mode enabled, +# matching within a codepoint is allowed. And remember, as in the anchored test +# above with UTF-8 mode disabled, iteration will report all adjacent matches. +# The matches at [0, 0] and [4, 4] are not included because of the bounds of +# the search. +[[test]] +name = "empty-utf8no-anchored-bounds" +regex = '' +haystack = '𝛃' +bounds = [1, 3] +matches = [[1, 1], [2, 2], [3, 3]] +anchored = true +unicode = true +utf8 = false + +# Tests the overlapping case of the above. +# +# Note that overlapping anchored searches are a little weird, and it's not +# totally clear what their semantics ought to be. For now, we just test the +# current behavior of our test shim that implements overlapping search. (This +# is one of the reasons why we don't really expose regex-level overlapping +# searches.) +[[test]] +name = "empty-utf8no-anchored-bounds-overlapping" +regex = '' +haystack = '𝛃' +bounds = [1, 3] +matches = [[1, 1]] +anchored = true +unicode = true +utf8 = false +match-kind = "all" +search-kind = "overlapping" + +# This tests that we find the match at the end of the string when the bounds +# exclude the first match. +[[test]] +name = "empty-utf8yes-startbound" +regex = '' +haystack = '𝛃' +bounds = [1, 4] +matches = [[4, 4]] +unicode = true +utf8 = true + +# Tests the overlapping case of the above. +[[test]] +name = "empty-utf8yes-startbound-overlapping" +regex = '' +haystack = '𝛃' +bounds = [1, 4] +matches = [[4, 4]] +unicode = true +utf8 = true +match-kind = "all" +search-kind = "overlapping" + +# Same as above, except since UTF-8 mode is disabled, we also find the matches +# inbetween that split the codepoint. +[[test]] +name = "empty-utf8no-startbound" +regex = '' +haystack = '𝛃' +bounds = [1, 4] +matches = [[1, 1], [2, 2], [3, 3], [4, 4]] +unicode = true +utf8 = false + +# Tests the overlapping case of the above. +[[test]] +name = "empty-utf8no-startbound-overlapping" +regex = '' +haystack = '𝛃' +bounds = [1, 4] +matches = [[1, 1], [2, 2], [3, 3], [4, 4]] +unicode = true +utf8 = false +match-kind = "all" +search-kind = "overlapping" + +# This tests that we don't find any matches in an anchored search, even when +# the bounds include a match (at the end). +[[test]] +name = "empty-utf8yes-anchored-startbound" +regex = '' +haystack = '𝛃' +bounds = [1, 4] +matches = [] +anchored = true +unicode = true +utf8 = true + +# Tests the overlapping case of the above. +[[test]] +name = "empty-utf8yes-anchored-startbound-overlapping" +regex = '' +haystack = '𝛃' +bounds = [1, 4] +matches = [] +anchored = true +unicode = true +utf8 = true +match-kind = "all" +search-kind = "overlapping" + +# Same as above, except since UTF-8 mode is disabled, we also find the matches +# inbetween that split the codepoint. Even though this is an anchored search, +# since the matches are adjacent, we find all of them. +[[test]] +name = "empty-utf8no-anchored-startbound" +regex = '' +haystack = '𝛃' +bounds = [1, 4] +matches = [[1, 1], [2, 2], [3, 3], [4, 4]] +anchored = true +unicode = true +utf8 = false + +# Tests the overlapping case of the above. +# +# Note that overlapping anchored searches are a little weird, and it's not +# totally clear what their semantics ought to be. For now, we just test the +# current behavior of our test shim that implements overlapping search. (This +# is one of the reasons why we don't really expose regex-level overlapping +# searches.) +[[test]] +name = "empty-utf8no-anchored-startbound-overlapping" +regex = '' +haystack = '𝛃' +bounds = [1, 4] +matches = [[1, 1]] +anchored = true +unicode = true +utf8 = false +match-kind = "all" +search-kind = "overlapping" + +# This tests that we find the match at the end of the haystack in UTF-8 mode +# when our bounds only include the empty string at the end of the haystack. +[[test]] +name = "empty-utf8yes-anchored-endbound" +regex = '' +haystack = '𝛃' +bounds = [4, 4] +matches = [[4, 4]] +anchored = true +unicode = true +utf8 = true + +# Tests the overlapping case of the above. +[[test]] +name = "empty-utf8yes-anchored-endbound-overlapping" +regex = '' +haystack = '𝛃' +bounds = [4, 4] +matches = [[4, 4]] +anchored = true +unicode = true +utf8 = true +match-kind = "all" +search-kind = "overlapping" + +# Same as above, but with UTF-8 mode disabled. Results remain the same since +# the only possible match does not split a codepoint. +[[test]] +name = "empty-utf8no-anchored-endbound" +regex = '' +haystack = '𝛃' +bounds = [4, 4] +matches = [[4, 4]] +anchored = true +unicode = true +utf8 = false + +# Tests the overlapping case of the above. +[[test]] +name = "empty-utf8no-anchored-endbound-overlapping" +regex = '' +haystack = '𝛃' +bounds = [4, 4] +matches = [[4, 4]] +anchored = true +unicode = true +utf8 = false +match-kind = "all" +search-kind = "overlapping" diff --git a/vendor/regex/testdata/word-boundary-special.toml b/vendor/regex/testdata/word-boundary-special.toml new file mode 100644 index 000000000..2b5a2a0ac --- /dev/null +++ b/vendor/regex/testdata/word-boundary-special.toml @@ -0,0 +1,687 @@ +# These tests are for the "special" word boundary assertions. That is, +# \b{start}, \b{end}, \b{start-half}, \b{end-half}. These are specialty +# assertions for more niche use cases, but hitting those cases without these +# assertions is difficult. For example, \b{start-half} and \b{end-half} are +# used to implement the -w/--word-regexp flag in a grep program. + +# Tests for (?-u:\b{start}) + +[[test]] +name = "word-start-ascii-010" +regex = '\b{start}' +haystack = "a" +matches = [[0, 0]] +unicode = false + +[[test]] +name = "word-start-ascii-020" +regex = '\b{start}' +haystack = "a " +matches = [[0, 0]] +unicode = false + +[[test]] +name = "word-start-ascii-030" +regex = '\b{start}' +haystack = " a " +matches = [[1, 1]] +unicode = false + +[[test]] +name = "word-start-ascii-040" +regex = '\b{start}' +haystack = "" +matches = [] +unicode = false + +[[test]] +name = "word-start-ascii-050" +regex = '\b{start}' +haystack = "ab" +matches = [[0, 0]] +unicode = false + +[[test]] +name = "word-start-ascii-060" +regex = '\b{start}' +haystack = "𝛃" +matches = [] +unicode = false + +[[test]] +name = "word-start-ascii-060-bounds" +regex = '\b{start}' +haystack = "𝛃" +bounds = [2, 3] +matches = [] +unicode = false + +[[test]] +name = "word-start-ascii-070" +regex = '\b{start}' +haystack = " 𝛃 " +matches = [] +unicode = false + +[[test]] +name = "word-start-ascii-080" +regex = '\b{start}' +haystack = "𝛃𐆀" +matches = [] +unicode = false + +[[test]] +name = "word-start-ascii-090" +regex = '\b{start}' +haystack = "𝛃b" +matches = [[4, 4]] +unicode = false + +[[test]] +name = "word-start-ascii-110" +regex = '\b{start}' +haystack = "b𝛃" +matches = [[0, 0]] +unicode = false + +# Tests for (?-u:\b{end}) + +[[test]] +name = "word-end-ascii-010" +regex = '\b{end}' +haystack = "a" +matches = [[1, 1]] +unicode = false + +[[test]] +name = "word-end-ascii-020" +regex = '\b{end}' +haystack = "a " +matches = [[1, 1]] +unicode = false + +[[test]] +name = "word-end-ascii-030" +regex = '\b{end}' +haystack = " a " +matches = [[2, 2]] +unicode = false + +[[test]] +name = "word-end-ascii-040" +regex = '\b{end}' +haystack = "" +matches = [] +unicode = false + +[[test]] +name = "word-end-ascii-050" +regex = '\b{end}' +haystack = "ab" +matches = [[2, 2]] +unicode = false + +[[test]] +name = "word-end-ascii-060" +regex = '\b{end}' +haystack = "𝛃" +matches = [] +unicode = false + +[[test]] +name = "word-end-ascii-060-bounds" +regex = '\b{end}' +haystack = "𝛃" +bounds = [2, 3] +matches = [] +unicode = false + +[[test]] +name = "word-end-ascii-070" +regex = '\b{end}' +haystack = " 𝛃 " +matches = [] +unicode = false + +[[test]] +name = "word-end-ascii-080" +regex = '\b{end}' +haystack = "𝛃𐆀" +matches = [] +unicode = false + +[[test]] +name = "word-end-ascii-090" +regex = '\b{end}' +haystack = "𝛃b" +matches = [[5, 5]] +unicode = false + +[[test]] +name = "word-end-ascii-110" +regex = '\b{end}' +haystack = "b𝛃" +matches = [[1, 1]] +unicode = false + +# Tests for \b{start} + +[[test]] +name = "word-start-unicode-010" +regex = '\b{start}' +haystack = "a" +matches = [[0, 0]] +unicode = true + +[[test]] +name = "word-start-unicode-020" +regex = '\b{start}' +haystack = "a " +matches = [[0, 0]] +unicode = true + +[[test]] +name = "word-start-unicode-030" +regex = '\b{start}' +haystack = " a " +matches = [[1, 1]] +unicode = true + +[[test]] +name = "word-start-unicode-040" +regex = '\b{start}' +haystack = "" +matches = [] +unicode = true + +[[test]] +name = "word-start-unicode-050" +regex = '\b{start}' +haystack = "ab" +matches = [[0, 0]] +unicode = true + +[[test]] +name = "word-start-unicode-060" +regex = '\b{start}' +haystack = "𝛃" +matches = [[0, 0]] +unicode = true + +[[test]] +name = "word-start-unicode-060-bounds" +regex = '\b{start}' +haystack = "𝛃" +bounds = [2, 3] +matches = [] +unicode = true + +[[test]] +name = "word-start-unicode-070" +regex = '\b{start}' +haystack = " 𝛃 " +matches = [[1, 1]] +unicode = true + +[[test]] +name = "word-start-unicode-080" +regex = '\b{start}' +haystack = "𝛃𐆀" +matches = [[0, 0]] +unicode = true + +[[test]] +name = "word-start-unicode-090" +regex = '\b{start}' +haystack = "𝛃b" +matches = [[0, 0]] +unicode = true + +[[test]] +name = "word-start-unicode-110" +regex = '\b{start}' +haystack = "b𝛃" +matches = [[0, 0]] +unicode = true + +# Tests for \b{end} + +[[test]] +name = "word-end-unicode-010" +regex = '\b{end}' +haystack = "a" +matches = [[1, 1]] +unicode = true + +[[test]] +name = "word-end-unicode-020" +regex = '\b{end}' +haystack = "a " +matches = [[1, 1]] +unicode = true + +[[test]] +name = "word-end-unicode-030" +regex = '\b{end}' +haystack = " a " +matches = [[2, 2]] +unicode = true + +[[test]] +name = "word-end-unicode-040" +regex = '\b{end}' +haystack = "" +matches = [] +unicode = true + +[[test]] +name = "word-end-unicode-050" +regex = '\b{end}' +haystack = "ab" +matches = [[2, 2]] +unicode = true + +[[test]] +name = "word-end-unicode-060" +regex = '\b{end}' +haystack = "𝛃" +matches = [[4, 4]] +unicode = true + +[[test]] +name = "word-end-unicode-060-bounds" +regex = '\b{end}' +haystack = "𝛃" +bounds = [2, 3] +matches = [] +unicode = true + +[[test]] +name = "word-end-unicode-070" +regex = '\b{end}' +haystack = " 𝛃 " +matches = [[5, 5]] +unicode = true + +[[test]] +name = "word-end-unicode-080" +regex = '\b{end}' +haystack = "𝛃𐆀" +matches = [[4, 4]] +unicode = true + +[[test]] +name = "word-end-unicode-090" +regex = '\b{end}' +haystack = "𝛃b" +matches = [[5, 5]] +unicode = true + +[[test]] +name = "word-end-unicode-110" +regex = '\b{end}' +haystack = "b𝛃" +matches = [[5, 5]] +unicode = true + +# Tests for (?-u:\b{start-half}) + +[[test]] +name = "word-start-half-ascii-010" +regex = '\b{start-half}' +haystack = "a" +matches = [[0, 0]] +unicode = false + +[[test]] +name = "word-start-half-ascii-020" +regex = '\b{start-half}' +haystack = "a " +matches = [[0, 0], [2, 2]] +unicode = false + +[[test]] +name = "word-start-half-ascii-030" +regex = '\b{start-half}' +haystack = " a " +matches = [[0, 0], [1, 1], [3, 3]] +unicode = false + +[[test]] +name = "word-start-half-ascii-040" +regex = '\b{start-half}' +haystack = "" +matches = [[0, 0]] +unicode = false + +[[test]] +name = "word-start-half-ascii-050" +regex = '\b{start-half}' +haystack = "ab" +matches = [[0, 0]] +unicode = false + +[[test]] +name = "word-start-half-ascii-060" +regex = '\b{start-half}' +haystack = "𝛃" +matches = [[0, 0], [4, 4]] +unicode = false + +[[test]] +name = "word-start-half-ascii-060-noutf8" +regex = '\b{start-half}' +haystack = "𝛃" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] +unicode = false +utf8 = false + +[[test]] +name = "word-start-half-ascii-060-bounds" +regex = '\b{start-half}' +haystack = "𝛃" +bounds = [2, 3] +matches = [] +unicode = false + +[[test]] +name = "word-start-half-ascii-070" +regex = '\b{start-half}' +haystack = " 𝛃 " +matches = [[0, 0], [1, 1], [5, 5], [6, 6]] +unicode = false + +[[test]] +name = "word-start-half-ascii-080" +regex = '\b{start-half}' +haystack = "𝛃𐆀" +matches = [[0, 0], [4, 4], [8, 8]] +unicode = false + +[[test]] +name = "word-start-half-ascii-090" +regex = '\b{start-half}' +haystack = "𝛃b" +matches = [[0, 0], [4, 4]] +unicode = false + +[[test]] +name = "word-start-half-ascii-110" +regex = '\b{start-half}' +haystack = "b𝛃" +matches = [[0, 0], [5, 5]] +unicode = false + +# Tests for (?-u:\b{end-half}) + +[[test]] +name = "word-end-half-ascii-010" +regex = '\b{end-half}' +haystack = "a" +matches = [[1, 1]] +unicode = false + +[[test]] +name = "word-end-half-ascii-020" +regex = '\b{end-half}' +haystack = "a " +matches = [[1, 1], [2, 2]] +unicode = false + +[[test]] +name = "word-end-half-ascii-030" +regex = '\b{end-half}' +haystack = " a " +matches = [[0, 0], [2, 2], [3, 3]] +unicode = false + +[[test]] +name = "word-end-half-ascii-040" +regex = '\b{end-half}' +haystack = "" +matches = [[0, 0]] +unicode = false + +[[test]] +name = "word-end-half-ascii-050" +regex = '\b{end-half}' +haystack = "ab" +matches = [[2, 2]] +unicode = false + +[[test]] +name = "word-end-half-ascii-060" +regex = '\b{end-half}' +haystack = "𝛃" +matches = [[0, 0], [4, 4]] +unicode = false + +[[test]] +name = "word-end-half-ascii-060-bounds" +regex = '\b{end-half}' +haystack = "𝛃" +bounds = [2, 3] +matches = [] +unicode = false + +[[test]] +name = "word-end-half-ascii-070" +regex = '\b{end-half}' +haystack = " 𝛃 " +matches = [[0, 0], [1, 1], [5, 5], [6, 6]] +unicode = false + +[[test]] +name = "word-end-half-ascii-080" +regex = '\b{end-half}' +haystack = "𝛃𐆀" +matches = [[0, 0], [4, 4], [8, 8]] +unicode = false + +[[test]] +name = "word-end-half-ascii-090" +regex = '\b{end-half}' +haystack = "𝛃b" +matches = [[0, 0], [5, 5]] +unicode = false + +[[test]] +name = "word-end-half-ascii-110" +regex = '\b{end-half}' +haystack = "b𝛃" +matches = [[1, 1], [5, 5]] +unicode = false + +# Tests for \b{start-half} + +[[test]] +name = "word-start-half-unicode-010" +regex = '\b{start-half}' +haystack = "a" +matches = [[0, 0]] +unicode = true + +[[test]] +name = "word-start-half-unicode-020" +regex = '\b{start-half}' +haystack = "a " +matches = [[0, 0], [2, 2]] +unicode = true + +[[test]] +name = "word-start-half-unicode-030" +regex = '\b{start-half}' +haystack = " a " +matches = [[0, 0], [1, 1], [3, 3]] +unicode = true + +[[test]] +name = "word-start-half-unicode-040" +regex = '\b{start-half}' +haystack = "" +matches = [[0, 0]] +unicode = true + +[[test]] +name = "word-start-half-unicode-050" +regex = '\b{start-half}' +haystack = "ab" +matches = [[0, 0]] +unicode = true + +[[test]] +name = "word-start-half-unicode-060" +regex = '\b{start-half}' +haystack = "𝛃" +matches = [[0, 0]] +unicode = true + +[[test]] +name = "word-start-half-unicode-060-bounds" +regex = '\b{start-half}' +haystack = "𝛃" +bounds = [2, 3] +matches = [] +unicode = true + +[[test]] +name = "word-start-half-unicode-070" +regex = '\b{start-half}' +haystack = " 𝛃 " +matches = [[0, 0], [1, 1], [6, 6]] +unicode = true + +[[test]] +name = "word-start-half-unicode-080" +regex = '\b{start-half}' +haystack = "𝛃𐆀" +matches = [[0, 0], [8, 8]] +unicode = true + +[[test]] +name = "word-start-half-unicode-090" +regex = '\b{start-half}' +haystack = "𝛃b" +matches = [[0, 0]] +unicode = true + +[[test]] +name = "word-start-half-unicode-110" +regex = '\b{start-half}' +haystack = "b𝛃" +matches = [[0, 0]] +unicode = true + +# Tests for \b{end-half} + +[[test]] +name = "word-end-half-unicode-010" +regex = '\b{end-half}' +haystack = "a" +matches = [[1, 1]] +unicode = true + +[[test]] +name = "word-end-half-unicode-020" +regex = '\b{end-half}' +haystack = "a " +matches = [[1, 1], [2, 2]] +unicode = true + +[[test]] +name = "word-end-half-unicode-030" +regex = '\b{end-half}' +haystack = " a " +matches = [[0, 0], [2, 2], [3, 3]] +unicode = true + +[[test]] +name = "word-end-half-unicode-040" +regex = '\b{end-half}' +haystack = "" +matches = [[0, 0]] +unicode = true + +[[test]] +name = "word-end-half-unicode-050" +regex = '\b{end-half}' +haystack = "ab" +matches = [[2, 2]] +unicode = true + +[[test]] +name = "word-end-half-unicode-060" +regex = '\b{end-half}' +haystack = "𝛃" +matches = [[4, 4]] +unicode = true + +[[test]] +name = "word-end-half-unicode-060-bounds" +regex = '\b{end-half}' +haystack = "𝛃" +bounds = [2, 3] +matches = [] +unicode = true + +[[test]] +name = "word-end-half-unicode-070" +regex = '\b{end-half}' +haystack = " 𝛃 " +matches = [[0, 0], [5, 5], [6, 6]] +unicode = true + +[[test]] +name = "word-end-half-unicode-080" +regex = '\b{end-half}' +haystack = "𝛃𐆀" +matches = [[4, 4], [8, 8]] +unicode = true + +[[test]] +name = "word-end-half-unicode-090" +regex = '\b{end-half}' +haystack = "𝛃b" +matches = [[5, 5]] +unicode = true + +[[test]] +name = "word-end-half-unicode-110" +regex = '\b{end-half}' +haystack = "b𝛃" +matches = [[5, 5]] +unicode = true + +# Specialty tests. + +# Since \r is special cased in the start state computation (to deal with CRLF +# mode), this test ensures that the correct start state is computed when the +# pattern starts with a half word boundary assertion. +[[test]] +name = "word-start-half-ascii-carriage" +regex = '\b{start-half}[a-z]+' +haystack = 'ABC\rabc' +matches = [[4, 7]] +bounds = [4, 7] +unescape = true + +# Since \n is also special cased in the start state computation, this test +# ensures that the correct start state is computed when the pattern starts with +# a half word boundary assertion. +[[test]] +name = "word-start-half-ascii-linefeed" +regex = '\b{start-half}[a-z]+' +haystack = 'ABC\nabc' +matches = [[4, 7]] +bounds = [4, 7] +unescape = true + +# Like the carriage return test above, but with a custom line terminator. +[[test]] +name = "word-start-half-ascii-customlineterm" +regex = '\b{start-half}[a-z]+' +haystack = 'ABC!abc' +matches = [[4, 7]] +bounds = [4, 7] +unescape = true +line-terminator = '!' diff --git a/vendor/regex/testdata/word-boundary.toml b/vendor/regex/testdata/word-boundary.toml new file mode 100644 index 000000000..1d86fc9bb --- /dev/null +++ b/vendor/regex/testdata/word-boundary.toml @@ -0,0 +1,781 @@ +# Some of these are cribbed from RE2's test suite. + +# These test \b. Below are tests for \B. +[[test]] +name = "wb1" +regex = '\b' +haystack = "" +matches = [] +unicode = false + +[[test]] +name = "wb2" +regex = '\b' +haystack = "a" +matches = [[0, 0], [1, 1]] +unicode = false + +[[test]] +name = "wb3" +regex = '\b' +haystack = "ab" +matches = [[0, 0], [2, 2]] +unicode = false + +[[test]] +name = "wb4" +regex = '^\b' +haystack = "ab" +matches = [[0, 0]] +unicode = false + +[[test]] +name = "wb5" +regex = '\b$' +haystack = "ab" +matches = [[2, 2]] +unicode = false + +[[test]] +name = "wb6" +regex = '^\b$' +haystack = "ab" +matches = [] +unicode = false + +[[test]] +name = "wb7" +regex = '\bbar\b' +haystack = "nobar bar foo bar" +matches = [[6, 9], [14, 17]] +unicode = false + +[[test]] +name = "wb8" +regex = 'a\b' +haystack = "faoa x" +matches = [[3, 4]] +unicode = false + +[[test]] +name = "wb9" +regex = '\bbar' +haystack = "bar x" +matches = [[0, 3]] +unicode = false + +[[test]] +name = "wb10" +regex = '\bbar' +haystack = "foo\nbar x" +matches = [[4, 7]] +unicode = false + +[[test]] +name = "wb11" +regex = 'bar\b' +haystack = "foobar" +matches = [[3, 6]] +unicode = false + +[[test]] +name = "wb12" +regex = 'bar\b' +haystack = "foobar\nxxx" +matches = [[3, 6]] +unicode = false + +[[test]] +name = "wb13" +regex = '(?:foo|bar|[A-Z])\b' +haystack = "foo" +matches = [[0, 3]] +unicode = false + +[[test]] +name = "wb14" +regex = '(?:foo|bar|[A-Z])\b' +haystack = "foo\n" +matches = [[0, 3]] +unicode = false + +[[test]] +name = "wb15" +regex = '\b(?:foo|bar|[A-Z])' +haystack = "foo" +matches = [[0, 3]] +unicode = false + +[[test]] +name = "wb16" +regex = '\b(?:foo|bar|[A-Z])\b' +haystack = "X" +matches = [[0, 1]] +unicode = false + +[[test]] +name = "wb17" +regex = '\b(?:foo|bar|[A-Z])\b' +haystack = "XY" +matches = [] +unicode = false + +[[test]] +name = "wb18" +regex = '\b(?:foo|bar|[A-Z])\b' +haystack = "bar" +matches = [[0, 3]] +unicode = false + +[[test]] +name = "wb19" +regex = '\b(?:foo|bar|[A-Z])\b' +haystack = "foo" +matches = [[0, 3]] +unicode = false + +[[test]] +name = "wb20" +regex = '\b(?:foo|bar|[A-Z])\b' +haystack = "foo\n" +matches = [[0, 3]] +unicode = false + +[[test]] +name = "wb21" +regex = '\b(?:foo|bar|[A-Z])\b' +haystack = "ffoo bbar N x" +matches = [[10, 11]] +unicode = false + +[[test]] +name = "wb22" +regex = '\b(?:fo|foo)\b' +haystack = "fo" +matches = [[0, 2]] +unicode = false + +[[test]] +name = "wb23" +regex = '\b(?:fo|foo)\b' +haystack = "foo" +matches = [[0, 3]] +unicode = false + +[[test]] +name = "wb24" +regex = '\b\b' +haystack = "" +matches = [] +unicode = false + +[[test]] +name = "wb25" +regex = '\b\b' +haystack = "a" +matches = [[0, 0], [1, 1]] +unicode = false + +[[test]] +name = "wb26" +regex = '\b$' +haystack = "" +matches = [] +unicode = false + +[[test]] +name = "wb27" +regex = '\b$' +haystack = "x" +matches = [[1, 1]] +unicode = false + +[[test]] +name = "wb28" +regex = '\b$' +haystack = "y x" +matches = [[3, 3]] +unicode = false + +[[test]] +name = "wb29" +regex = '(?-u:\b).$' +haystack = "x" +matches = [[0, 1]] + +[[test]] +name = "wb30" +regex = '^\b(?:fo|foo)\b' +haystack = "fo" +matches = [[0, 2]] +unicode = false + +[[test]] +name = "wb31" +regex = '^\b(?:fo|foo)\b' +haystack = "foo" +matches = [[0, 3]] +unicode = false + +[[test]] +name = "wb32" +regex = '^\b$' +haystack = "" +matches = [] +unicode = false + +[[test]] +name = "wb33" +regex = '^\b$' +haystack = "x" +matches = [] +unicode = false + +[[test]] +name = "wb34" +regex = '^(?-u:\b).$' +haystack = "x" +matches = [[0, 1]] + +[[test]] +name = "wb35" +regex = '^(?-u:\b).(?-u:\b)$' +haystack = "x" +matches = [[0, 1]] + +[[test]] +name = "wb36" +regex = '^^^^^\b$$$$$' +haystack = "" +matches = [] +unicode = false + +[[test]] +name = "wb37" +regex = '^^^^^(?-u:\b).$$$$$' +haystack = "x" +matches = [[0, 1]] + +[[test]] +name = "wb38" +regex = '^^^^^\b$$$$$' +haystack = "x" +matches = [] +unicode = false + +[[test]] +name = "wb39" +regex = '^^^^^(?-u:\b\b\b).(?-u:\b\b\b)$$$$$' +haystack = "x" +matches = [[0, 1]] + +[[test]] +name = "wb40" +regex = '(?-u:\b).+(?-u:\b)' +haystack = "$$abc$$" +matches = [[2, 5]] + +[[test]] +name = "wb41" +regex = '\b' +haystack = "a b c" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] +unicode = false + +[[test]] +name = "wb42" +regex = '\bfoo\b' +haystack = "zzz foo zzz" +matches = [[4, 7]] +unicode = false + +[[test]] +name = "wb43" +regex = '\b^' +haystack = "ab" +matches = [[0, 0]] +unicode = false + +[[test]] +name = "wb44" +regex = '$\b' +haystack = "ab" +matches = [[2, 2]] +unicode = false + + +# Tests for \B. Note that \B is not allowed if UTF-8 mode is enabled, so we +# have to disable it for most of these tests. This is because \B can match at +# non-UTF-8 boundaries. +[[test]] +name = "nb1" +regex = '\Bfoo\B' +haystack = "n foo xfoox that" +matches = [[7, 10]] +unicode = false +utf8 = false + +[[test]] +name = "nb2" +regex = 'a\B' +haystack = "faoa x" +matches = [[1, 2]] +unicode = false +utf8 = false + +[[test]] +name = "nb3" +regex = '\Bbar' +haystack = "bar x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb4" +regex = '\Bbar' +haystack = "foo\nbar x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb5" +regex = 'bar\B' +haystack = "foobar" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb6" +regex = 'bar\B' +haystack = "foobar\nxxx" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb7" +regex = '(?:foo|bar|[A-Z])\B' +haystack = "foox" +matches = [[0, 3]] +unicode = false +utf8 = false + +[[test]] +name = "nb8" +regex = '(?:foo|bar|[A-Z])\B' +haystack = "foo\n" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb9" +regex = '\B' +haystack = "" +matches = [[0, 0]] +unicode = false +utf8 = false + +[[test]] +name = "nb10" +regex = '\B' +haystack = "x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb11" +regex = '\B(?:foo|bar|[A-Z])' +haystack = "foo" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb12" +regex = '\B(?:foo|bar|[A-Z])\B' +haystack = "xXy" +matches = [[1, 2]] +unicode = false +utf8 = false + +[[test]] +name = "nb13" +regex = '\B(?:foo|bar|[A-Z])\B' +haystack = "XY" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb14" +regex = '\B(?:foo|bar|[A-Z])\B' +haystack = "XYZ" +matches = [[1, 2]] +unicode = false +utf8 = false + +[[test]] +name = "nb15" +regex = '\B(?:foo|bar|[A-Z])\B' +haystack = "abara" +matches = [[1, 4]] +unicode = false +utf8 = false + +[[test]] +name = "nb16" +regex = '\B(?:foo|bar|[A-Z])\B' +haystack = "xfoo_" +matches = [[1, 4]] +unicode = false +utf8 = false + +[[test]] +name = "nb17" +regex = '\B(?:foo|bar|[A-Z])\B' +haystack = "xfoo\n" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb18" +regex = '\B(?:foo|bar|[A-Z])\B' +haystack = "foo bar vNX" +matches = [[9, 10]] +unicode = false +utf8 = false + +[[test]] +name = "nb19" +regex = '\B(?:fo|foo)\B' +haystack = "xfoo" +matches = [[1, 3]] +unicode = false +utf8 = false + +[[test]] +name = "nb20" +regex = '\B(?:foo|fo)\B' +haystack = "xfooo" +matches = [[1, 4]] +unicode = false +utf8 = false + +[[test]] +name = "nb21" +regex = '\B\B' +haystack = "" +matches = [[0, 0]] +unicode = false +utf8 = false + +[[test]] +name = "nb22" +regex = '\B\B' +haystack = "x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb23" +regex = '\B$' +haystack = "" +matches = [[0, 0]] +unicode = false +utf8 = false + +[[test]] +name = "nb24" +regex = '\B$' +haystack = "x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb25" +regex = '\B$' +haystack = "y x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb26" +regex = '\B.$' +haystack = "x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb27" +regex = '^\B(?:fo|foo)\B' +haystack = "fo" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb28" +regex = '^\B(?:fo|foo)\B' +haystack = "fo" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb29" +regex = '^\B' +haystack = "" +matches = [[0, 0]] +unicode = false +utf8 = false + +[[test]] +name = "nb30" +regex = '^\B' +haystack = "x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb31" +regex = '^\B\B' +haystack = "" +matches = [[0, 0]] +unicode = false +utf8 = false + +[[test]] +name = "nb32" +regex = '^\B\B' +haystack = "x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb33" +regex = '^\B$' +haystack = "" +matches = [[0, 0]] +unicode = false +utf8 = false + +[[test]] +name = "nb34" +regex = '^\B$' +haystack = "x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb35" +regex = '^\B.$' +haystack = "x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb36" +regex = '^\B.\B$' +haystack = "x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb37" +regex = '^^^^^\B$$$$$' +haystack = "" +matches = [[0, 0]] +unicode = false +utf8 = false + +[[test]] +name = "nb38" +regex = '^^^^^\B.$$$$$' +haystack = "x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb39" +regex = '^^^^^\B$$$$$' +haystack = "x" +matches = [] +unicode = false +utf8 = false + + +# unicode1* and unicode2* work for both Unicode and ASCII because all matches +# are reported as byte offsets, and « and » do not correspond to word +# boundaries at either the character or byte level. +[[test]] +name = "unicode1" +regex = '\bx\b' +haystack = "«x" +matches = [[2, 3]] + +[[test]] +name = "unicode1-only-ascii" +regex = '\bx\b' +haystack = "«x" +matches = [[2, 3]] +unicode = false + +[[test]] +name = "unicode2" +regex = '\bx\b' +haystack = "x»" +matches = [[0, 1]] + +[[test]] +name = "unicode2-only-ascii" +regex = '\bx\b' +haystack = "x»" +matches = [[0, 1]] +unicode = false + +# ASCII word boundaries are completely oblivious to Unicode characters, so +# even though β is a character, an ASCII \b treats it as a word boundary +# when it is adjacent to another ASCII character. (The ASCII \b only looks +# at the leading byte of β.) For Unicode \b, the tests are precisely inverted. +[[test]] +name = "unicode3" +regex = '\bx\b' +haystack = 'áxβ' +matches = [] + +[[test]] +name = "unicode3-only-ascii" +regex = '\bx\b' +haystack = 'áxβ' +matches = [[2, 3]] +unicode = false + +[[test]] +name = "unicode4" +regex = '\Bx\B' +haystack = 'áxβ' +matches = [[2, 3]] + +[[test]] +name = "unicode4-only-ascii" +regex = '\Bx\B' +haystack = 'áxβ' +matches = [] +unicode = false +utf8 = false + +# The same as above, but with \b instead of \B as a sanity check. +[[test]] +name = "unicode5" +regex = '\b' +haystack = "0\U0007EF5E" +matches = [[0, 0], [1, 1]] + +[[test]] +name = "unicode5-only-ascii" +regex = '\b' +haystack = "0\U0007EF5E" +matches = [[0, 0], [1, 1]] +unicode = false +utf8 = false + +[[test]] +name = "unicode5-noutf8" +regex = '\b' +haystack = '0\xFF\xFF\xFF\xFF' +matches = [[0, 0], [1, 1]] +unescape = true +utf8 = false + +[[test]] +name = "unicode5-noutf8-only-ascii" +regex = '\b' +haystack = '0\xFF\xFF\xFF\xFF' +matches = [[0, 0], [1, 1]] +unescape = true +unicode = false +utf8 = false + +# Weird special case to ensure that ASCII \B treats each individual code unit +# as a non-word byte. (The specific codepoint is irrelevant. It's an arbitrary +# codepoint that uses 4 bytes in its UTF-8 encoding and is not a member of the +# \w character class.) +[[test]] +name = "unicode5-not" +regex = '\B' +haystack = "0\U0007EF5E" +matches = [[5, 5]] + +[[test]] +name = "unicode5-not-only-ascii" +regex = '\B' +haystack = "0\U0007EF5E" +matches = [[2, 2], [3, 3], [4, 4], [5, 5]] +unicode = false +utf8 = false + +# This gets no matches since \B only matches in the presence of valid UTF-8 +# when Unicode is enabled, even when UTF-8 mode is disabled. +[[test]] +name = "unicode5-not-noutf8" +regex = '\B' +haystack = '0\xFF\xFF\xFF\xFF' +matches = [] +unescape = true +utf8 = false + +# But this DOES get matches since \B in ASCII mode only looks at individual +# bytes. +[[test]] +name = "unicode5-not-noutf8-only-ascii" +regex = '\B' +haystack = '0\xFF\xFF\xFF\xFF' +matches = [[2, 2], [3, 3], [4, 4], [5, 5]] +unescape = true +unicode = false +utf8 = false + +# Some tests of no particular significance. +[[test]] +name = "unicode6" +regex = '\b[0-9]+\b' +haystack = "foo 123 bar 456 quux 789" +matches = [[4, 7], [12, 15], [21, 24]] + +[[test]] +name = "unicode7" +regex = '\b[0-9]+\b' +haystack = "foo 123 bar a456 quux 789" +matches = [[4, 7], [22, 25]] + +[[test]] +name = "unicode8" +regex = '\b[0-9]+\b' +haystack = "foo 123 bar 456a quux 789" +matches = [[4, 7], [22, 25]] + +# A variant of the problem described here: +# https://github.com/google/re2/blob/89567f5de5b23bb5ad0c26cbafc10bdc7389d1fa/re2/dfa.cc#L658-L667 +[[test]] +name = "alt-with-assertion-repetition" +regex = '(?:\b|%)+' +haystack = "z%" +bounds = [1, 2] +anchored = true +matches = [[1, 1]] |