summaryrefslogtreecommitdiffstats
path: root/vendor/regex-automata/tests/data/bytes.toml
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/regex-automata/tests/data/bytes.toml')
-rw-r--r--vendor/regex-automata/tests/data/bytes.toml235
1 files changed, 0 insertions, 235 deletions
diff --git a/vendor/regex-automata/tests/data/bytes.toml b/vendor/regex-automata/tests/data/bytes.toml
deleted file mode 100644
index eb3a0942e..000000000
--- a/vendor/regex-automata/tests/data/bytes.toml
+++ /dev/null
@@ -1,235 +0,0 @@
-# These are tests specifically crafted for regexes that can match arbitrary
-# bytes. In some cases, we also test the Unicode variant as well, just because
-# it's good sense to do so. But also, these tests aren't really about Unicode,
-# but whether matches are only reported at valid UTF-8 boundaries. For most
-# tests in this entire collection, utf8 = true. But for these tests, we use
-# utf8 = false.
-
-[[tests]]
-name = "word-boundary-ascii"
-regex = ' \b'
-input = " δ"
-matches = []
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "word-boundary-unicode"
-regex = ' \b'
-input = " δ"
-matches = [[0, 1]]
-unicode = true
-utf8 = false
-
-[[tests]]
-name = "word-boundary-ascii-not"
-regex = ' \B'
-input = " δ"
-matches = [[0, 1]]
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "word-boundary-unicode-not"
-regex = ' \B'
-input = " δ"
-matches = []
-unicode = true
-utf8 = false
-
-[[tests]]
-name = "perl-word-ascii"
-regex = '\w+'
-input = "aδ"
-matches = [[0, 1]]
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "perl-word-unicode"
-regex = '\w+'
-input = "aδ"
-matches = [[0, 3]]
-unicode = true
-utf8 = false
-
-[[tests]]
-name = "perl-decimal-ascii"
-regex = '\d+'
-input = "1२३9"
-matches = [[0, 1], [7, 8]]
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "perl-decimal-unicode"
-regex = '\d+'
-input = "1२३9"
-matches = [[0, 8]]
-unicode = true
-utf8 = false
-
-[[tests]]
-name = "perl-whitespace-ascii"
-regex = '\s+'
-input = " \u1680"
-matches = [[0, 1]]
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "perl-whitespace-unicode"
-regex = '\s+'
-input = " \u1680"
-matches = [[0, 4]]
-unicode = true
-utf8 = false
-
-# The first `(.+)` matches two Unicode codepoints, but can't match the 5th
-# byte, which isn't valid UTF-8. The second (byte based) `(.+)` takes over and
-# matches.
-[[tests]]
-name = "mixed-dot"
-regex = '(.+)(?-u)(.+)'
-input = '\xCE\x93\xCE\x94\xFF'
-captures = [
- [[0, 5], [0, 4], [4, 5]],
-]
-unescape = true
-unicode = true
-utf8 = false
-
-[[tests]]
-name = "case-one-ascii"
-regex = 'a'
-input = "A"
-matches = [[0, 1]]
-case_insensitive = true
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "case-one-unicode"
-regex = 'a'
-input = "A"
-matches = [[0, 1]]
-case_insensitive = true
-unicode = true
-utf8 = false
-
-[[tests]]
-name = "case-class-simple-ascii"
-regex = '[a-z]+'
-input = "AaAaA"
-matches = [[0, 5]]
-case_insensitive = true
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "case-class-ascii"
-regex = '[a-z]+'
-input = "aA\u212AaA"
-matches = [[0, 2], [5, 7]]
-case_insensitive = true
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "case-class-unicode"
-regex = '[a-z]+'
-input = "aA\u212AaA"
-matches = [[0, 7]]
-case_insensitive = true
-unicode = true
-utf8 = false
-
-[[tests]]
-name = "negate-ascii"
-regex = '[^a]'
-input = "δ"
-matches = [[0, 1], [1, 2]]
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "negate-unicode"
-regex = '[^a]'
-input = "δ"
-matches = [[0, 2]]
-unicode = true
-utf8 = false
-
-# When utf8=true, this won't match, because the implicit '.*?' prefix is
-# Unicode aware and will refuse to match through invalid UTF-8 bytes.
-[[tests]]
-name = "dotstar-prefix-ascii"
-regex = 'a'
-input = '\xFFa'
-matches = [[1, 2]]
-unescape = true
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "dotstar-prefix-unicode"
-regex = 'a'
-input = '\xFFa'
-matches = [[1, 2]]
-unescape = true
-unicode = true
-utf8 = false
-
-[[tests]]
-name = "null-bytes"
-regex = '(?P<cstr>[^\x00]+)\x00'
-input = 'foo\x00'
-captures = [
- [[0, 4], [0, 3]],
-]
-unescape = true
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "invalid-utf8-anchor-100"
-regex = '\xCC?^'
-input = '\x8d#;\x1a\xa4s3\x05foobarX\\\x0f0t\xe4\x9b\xa4'
-matches = [[0, 0]]
-unescape = true
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "invalid-utf8-anchor-200"
-regex = '^\xf7|4\xff\d\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a##########[] d\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a##########\[] #####\x80\S7|$'
-input = '\x8d#;\x1a\xa4s3\x05foobarX\\\x0f0t\xe4\x9b\xa4'
-matches = [[22, 22]]
-unescape = true
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "invalid-utf8-anchor-300"
-regex = '^|ddp\xff\xffdddddlQd@\x80'
-input = '\x8d#;\x1a\xa4s3\x05foobarX\\\x0f0t\xe4\x9b\xa4'
-matches = [[0, 0]]
-unescape = true
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "word-boundary-ascii-100"
-regex = '\Bx\B'
-input = "áxβ"
-matches = []
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "word-boundary-ascii-200"
-regex = '\B'
-input = "0\U0007EF5E"
-matches = [[2, 2], [3, 3], [4, 4], [5, 5]]
-unicode = false
-utf8 = false