diff options
Diffstat (limited to 'vendor/regex-automata/tests/data/bytes.toml')
-rw-r--r-- | vendor/regex-automata/tests/data/bytes.toml | 235 |
1 files changed, 0 insertions, 235 deletions
diff --git a/vendor/regex-automata/tests/data/bytes.toml b/vendor/regex-automata/tests/data/bytes.toml deleted file mode 100644 index eb3a0942e..000000000 --- a/vendor/regex-automata/tests/data/bytes.toml +++ /dev/null @@ -1,235 +0,0 @@ -# These are tests specifically crafted for regexes that can match arbitrary -# bytes. In some cases, we also test the Unicode variant as well, just because -# it's good sense to do so. But also, these tests aren't really about Unicode, -# but whether matches are only reported at valid UTF-8 boundaries. For most -# tests in this entire collection, utf8 = true. But for these tests, we use -# utf8 = false. - -[[tests]] -name = "word-boundary-ascii" -regex = ' \b' -input = " δ" -matches = [] -unicode = false -utf8 = false - -[[tests]] -name = "word-boundary-unicode" -regex = ' \b' -input = " δ" -matches = [[0, 1]] -unicode = true -utf8 = false - -[[tests]] -name = "word-boundary-ascii-not" -regex = ' \B' -input = " δ" -matches = [[0, 1]] -unicode = false -utf8 = false - -[[tests]] -name = "word-boundary-unicode-not" -regex = ' \B' -input = " δ" -matches = [] -unicode = true -utf8 = false - -[[tests]] -name = "perl-word-ascii" -regex = '\w+' -input = "aδ" -matches = [[0, 1]] -unicode = false -utf8 = false - -[[tests]] -name = "perl-word-unicode" -regex = '\w+' -input = "aδ" -matches = [[0, 3]] -unicode = true -utf8 = false - -[[tests]] -name = "perl-decimal-ascii" -regex = '\d+' -input = "1२३9" -matches = [[0, 1], [7, 8]] -unicode = false -utf8 = false - -[[tests]] -name = "perl-decimal-unicode" -regex = '\d+' -input = "1२३9" -matches = [[0, 8]] -unicode = true -utf8 = false - -[[tests]] -name = "perl-whitespace-ascii" -regex = '\s+' -input = " \u1680" -matches = [[0, 1]] -unicode = false -utf8 = false - -[[tests]] -name = "perl-whitespace-unicode" -regex = '\s+' -input = " \u1680" -matches = [[0, 4]] -unicode = true -utf8 = false - -# The first `(.+)` matches two Unicode codepoints, but can't match the 5th -# byte, which isn't valid UTF-8. The second (byte based) `(.+)` takes over and -# matches. -[[tests]] -name = "mixed-dot" -regex = '(.+)(?-u)(.+)' -input = '\xCE\x93\xCE\x94\xFF' -captures = [ - [[0, 5], [0, 4], [4, 5]], -] -unescape = true -unicode = true -utf8 = false - -[[tests]] -name = "case-one-ascii" -regex = 'a' -input = "A" -matches = [[0, 1]] -case_insensitive = true -unicode = false -utf8 = false - -[[tests]] -name = "case-one-unicode" -regex = 'a' -input = "A" -matches = [[0, 1]] -case_insensitive = true -unicode = true -utf8 = false - -[[tests]] -name = "case-class-simple-ascii" -regex = '[a-z]+' -input = "AaAaA" -matches = [[0, 5]] -case_insensitive = true -unicode = false -utf8 = false - -[[tests]] -name = "case-class-ascii" -regex = '[a-z]+' -input = "aA\u212AaA" -matches = [[0, 2], [5, 7]] -case_insensitive = true -unicode = false -utf8 = false - -[[tests]] -name = "case-class-unicode" -regex = '[a-z]+' -input = "aA\u212AaA" -matches = [[0, 7]] -case_insensitive = true -unicode = true -utf8 = false - -[[tests]] -name = "negate-ascii" -regex = '[^a]' -input = "δ" -matches = [[0, 1], [1, 2]] -unicode = false -utf8 = false - -[[tests]] -name = "negate-unicode" -regex = '[^a]' -input = "δ" -matches = [[0, 2]] -unicode = true -utf8 = false - -# When utf8=true, this won't match, because the implicit '.*?' prefix is -# Unicode aware and will refuse to match through invalid UTF-8 bytes. -[[tests]] -name = "dotstar-prefix-ascii" -regex = 'a' -input = '\xFFa' -matches = [[1, 2]] -unescape = true -unicode = false -utf8 = false - -[[tests]] -name = "dotstar-prefix-unicode" -regex = 'a' -input = '\xFFa' -matches = [[1, 2]] -unescape = true -unicode = true -utf8 = false - -[[tests]] -name = "null-bytes" -regex = '(?P<cstr>[^\x00]+)\x00' -input = 'foo\x00' -captures = [ - [[0, 4], [0, 3]], -] -unescape = true -unicode = false -utf8 = false - -[[tests]] -name = "invalid-utf8-anchor-100" -regex = '\xCC?^' -input = '\x8d#;\x1a\xa4s3\x05foobarX\\\x0f0t\xe4\x9b\xa4' -matches = [[0, 0]] -unescape = true -unicode = false -utf8 = false - -[[tests]] -name = "invalid-utf8-anchor-200" -regex = '^\xf7|4\xff\d\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a##########[] d\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a##########\[] #####\x80\S7|$' -input = '\x8d#;\x1a\xa4s3\x05foobarX\\\x0f0t\xe4\x9b\xa4' -matches = [[22, 22]] -unescape = true -unicode = false -utf8 = false - -[[tests]] -name = "invalid-utf8-anchor-300" -regex = '^|ddp\xff\xffdddddlQd@\x80' -input = '\x8d#;\x1a\xa4s3\x05foobarX\\\x0f0t\xe4\x9b\xa4' -matches = [[0, 0]] -unescape = true -unicode = false -utf8 = false - -[[tests]] -name = "word-boundary-ascii-100" -regex = '\Bx\B' -input = "áxβ" -matches = [] -unicode = false -utf8 = false - -[[tests]] -name = "word-boundary-ascii-200" -regex = '\B' -input = "0\U0007EF5E" -matches = [[2, 2], [3, 3], [4, 4], [5, 5]] -unicode = false -utf8 = false |