summaryrefslogtreecommitdiffstats
path: root/vendor/regex-automata/tests/data/word-boundary.toml
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/regex-automata/tests/data/word-boundary.toml')
-rw-r--r--vendor/regex-automata/tests/data/word-boundary.toml771
1 files changed, 0 insertions, 771 deletions
diff --git a/vendor/regex-automata/tests/data/word-boundary.toml b/vendor/regex-automata/tests/data/word-boundary.toml
deleted file mode 100644
index e84b25c2a..000000000
--- a/vendor/regex-automata/tests/data/word-boundary.toml
+++ /dev/null
@@ -1,771 +0,0 @@
-# Some of these are cribbed from RE2's test suite.
-
-# These test \b. Below are tests for \B.
-[[tests]]
-name = "wb1"
-regex = '\b'
-input = ""
-matches = []
-unicode = false
-
-[[tests]]
-name = "wb2"
-regex = '\b'
-input = "a"
-matches = [[0, 0], [1, 1]]
-unicode = false
-
-[[tests]]
-name = "wb3"
-regex = '\b'
-input = "ab"
-matches = [[0, 0], [2, 2]]
-unicode = false
-
-[[tests]]
-name = "wb4"
-regex = '^\b'
-input = "ab"
-matches = [[0, 0]]
-unicode = false
-
-[[tests]]
-name = "wb5"
-regex = '\b$'
-input = "ab"
-matches = [[2, 2]]
-unicode = false
-
-[[tests]]
-name = "wb6"
-regex = '^\b$'
-input = "ab"
-matches = []
-unicode = false
-
-[[tests]]
-name = "wb7"
-regex = '\bbar\b'
-input = "nobar bar foo bar"
-matches = [[6, 9], [14, 17]]
-unicode = false
-
-[[tests]]
-name = "wb8"
-regex = 'a\b'
-input = "faoa x"
-matches = [[3, 4]]
-unicode = false
-
-[[tests]]
-name = "wb9"
-regex = '\bbar'
-input = "bar x"
-matches = [[0, 3]]
-unicode = false
-
-[[tests]]
-name = "wb10"
-regex = '\bbar'
-input = "foo\nbar x"
-matches = [[4, 7]]
-unicode = false
-
-[[tests]]
-name = "wb11"
-regex = 'bar\b'
-input = "foobar"
-matches = [[3, 6]]
-unicode = false
-
-[[tests]]
-name = "wb12"
-regex = 'bar\b'
-input = "foobar\nxxx"
-matches = [[3, 6]]
-unicode = false
-
-[[tests]]
-name = "wb13"
-regex = '(foo|bar|[A-Z])\b'
-input = "foo"
-matches = [[0, 3]]
-unicode = false
-
-[[tests]]
-name = "wb14"
-regex = '(foo|bar|[A-Z])\b'
-input = "foo\n"
-matches = [[0, 3]]
-unicode = false
-
-[[tests]]
-name = "wb15"
-regex = '\b(foo|bar|[A-Z])'
-input = "foo"
-matches = [[0, 3]]
-unicode = false
-
-[[tests]]
-name = "wb16"
-regex = '\b(foo|bar|[A-Z])\b'
-input = "X"
-matches = [[0, 1]]
-unicode = false
-
-[[tests]]
-name = "wb17"
-regex = '\b(foo|bar|[A-Z])\b'
-input = "XY"
-matches = []
-unicode = false
-
-[[tests]]
-name = "wb18"
-regex = '\b(foo|bar|[A-Z])\b'
-input = "bar"
-matches = [[0, 3]]
-unicode = false
-
-[[tests]]
-name = "wb19"
-regex = '\b(foo|bar|[A-Z])\b'
-input = "foo"
-matches = [[0, 3]]
-unicode = false
-
-[[tests]]
-name = "wb20"
-regex = '\b(foo|bar|[A-Z])\b'
-input = "foo\n"
-matches = [[0, 3]]
-unicode = false
-
-[[tests]]
-name = "wb21"
-regex = '\b(foo|bar|[A-Z])\b'
-input = "ffoo bbar N x"
-matches = [[10, 11]]
-unicode = false
-
-[[tests]]
-name = "wb22"
-regex = '\b(fo|foo)\b'
-input = "fo"
-matches = [[0, 2]]
-unicode = false
-
-[[tests]]
-name = "wb23"
-regex = '\b(fo|foo)\b'
-input = "foo"
-matches = [[0, 3]]
-unicode = false
-
-[[tests]]
-name = "wb24"
-regex = '\b\b'
-input = ""
-matches = []
-unicode = false
-
-[[tests]]
-name = "wb25"
-regex = '\b\b'
-input = "a"
-matches = [[0, 0], [1, 1]]
-unicode = false
-
-[[tests]]
-name = "wb26"
-regex = '\b$'
-input = ""
-matches = []
-unicode = false
-
-[[tests]]
-name = "wb27"
-regex = '\b$'
-input = "x"
-matches = [[1, 1]]
-unicode = false
-
-[[tests]]
-name = "wb28"
-regex = '\b$'
-input = "y x"
-matches = [[3, 3]]
-unicode = false
-
-[[tests]]
-name = "wb29"
-regex = '(?-u:\b).$'
-input = "x"
-matches = [[0, 1]]
-
-[[tests]]
-name = "wb30"
-regex = '^\b(fo|foo)\b'
-input = "fo"
-matches = [[0, 2]]
-unicode = false
-
-[[tests]]
-name = "wb31"
-regex = '^\b(fo|foo)\b'
-input = "foo"
-matches = [[0, 3]]
-unicode = false
-
-[[tests]]
-name = "wb32"
-regex = '^\b$'
-input = ""
-matches = []
-unicode = false
-
-[[tests]]
-name = "wb33"
-regex = '^\b$'
-input = "x"
-matches = []
-unicode = false
-
-[[tests]]
-name = "wb34"
-regex = '^(?-u:\b).$'
-input = "x"
-matches = [[0, 1]]
-
-[[tests]]
-name = "wb35"
-regex = '^(?-u:\b).(?-u:\b)$'
-input = "x"
-matches = [[0, 1]]
-
-[[tests]]
-name = "wb36"
-regex = '^^^^^\b$$$$$'
-input = ""
-matches = []
-unicode = false
-
-[[tests]]
-name = "wb37"
-regex = '^^^^^(?-u:\b).$$$$$'
-input = "x"
-matches = [[0, 1]]
-
-[[tests]]
-name = "wb38"
-regex = '^^^^^\b$$$$$'
-input = "x"
-matches = []
-unicode = false
-
-[[tests]]
-name = "wb39"
-regex = '^^^^^(?-u:\b\b\b).(?-u:\b\b\b)$$$$$'
-input = "x"
-matches = [[0, 1]]
-
-[[tests]]
-name = "wb40"
-regex = '(?-u:\b).+(?-u:\b)'
-input = "$$abc$$"
-matches = [[2, 5]]
-
-[[tests]]
-name = "wb41"
-regex = '\b'
-input = "a b c"
-matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]
-unicode = false
-
-[[tests]]
-name = "wb42"
-regex = '\bfoo\b'
-input = "zzz foo zzz"
-matches = [[4, 7]]
-unicode = false
-
-[[tests]]
-name = "wb43"
-regex = '\b^'
-input = "ab"
-matches = [[0, 0]]
-unicode = false
-
-[[tests]]
-name = "wb44"
-regex = '$\b'
-input = "ab"
-matches = [[2, 2]]
-unicode = false
-
-
-# Tests for \B. Note that \B is not allowed if UTF-8 mode is enabled, so we
-# have to disable it for most of these tests. This is because \B can match at
-# non-UTF-8 boundaries.
-[[tests]]
-name = "nb1"
-regex = '\Bfoo\B'
-input = "n foo xfoox that"
-matches = [[7, 10]]
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "nb2"
-regex = 'a\B'
-input = "faoa x"
-matches = [[1, 2]]
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "nb3"
-regex = '\Bbar'
-input = "bar x"
-matches = []
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "nb4"
-regex = '\Bbar'
-input = "foo\nbar x"
-matches = []
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "nb5"
-regex = 'bar\B'
-input = "foobar"
-matches = []
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "nb6"
-regex = 'bar\B'
-input = "foobar\nxxx"
-matches = []
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "nb7"
-regex = '(foo|bar|[A-Z])\B'
-input = "foox"
-matches = [[0, 3]]
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "nb8"
-regex = '(foo|bar|[A-Z])\B'
-input = "foo\n"
-matches = []
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "nb9"
-regex = '\B'
-input = ""
-matches = [[0, 0]]
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "nb10"
-regex = '\B'
-input = "x"
-matches = []
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "nb11"
-regex = '\B(foo|bar|[A-Z])'
-input = "foo"
-matches = []
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "nb12"
-regex = '\B(foo|bar|[A-Z])\B'
-input = "xXy"
-matches = [[1, 2]]
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "nb13"
-regex = '\B(foo|bar|[A-Z])\B'
-input = "XY"
-matches = []
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "nb14"
-regex = '\B(foo|bar|[A-Z])\B'
-input = "XYZ"
-matches = [[1, 2]]
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "nb15"
-regex = '\B(foo|bar|[A-Z])\B'
-input = "abara"
-matches = [[1, 4]]
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "nb16"
-regex = '\B(foo|bar|[A-Z])\B'
-input = "xfoo_"
-matches = [[1, 4]]
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "nb17"
-regex = '\B(foo|bar|[A-Z])\B'
-input = "xfoo\n"
-matches = []
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "nb18"
-regex = '\B(foo|bar|[A-Z])\B'
-input = "foo bar vNX"
-matches = [[9, 10]]
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "nb19"
-regex = '\B(fo|foo)\B'
-input = "xfoo"
-matches = [[1, 3]]
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "nb20"
-regex = '\B(foo|fo)\B'
-input = "xfooo"
-matches = [[1, 4]]
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "nb21"
-regex = '\B\B'
-input = ""
-matches = [[0, 0]]
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "nb22"
-regex = '\B\B'
-input = "x"
-matches = []
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "nb23"
-regex = '\B$'
-input = ""
-matches = [[0, 0]]
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "nb24"
-regex = '\B$'
-input = "x"
-matches = []
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "nb25"
-regex = '\B$'
-input = "y x"
-matches = []
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "nb26"
-regex = '\B.$'
-input = "x"
-matches = []
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "nb27"
-regex = '^\B(fo|foo)\B'
-input = "fo"
-matches = []
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "nb28"
-regex = '^\B(fo|foo)\B'
-input = "fo"
-matches = []
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "nb29"
-regex = '^\B'
-input = ""
-matches = [[0, 0]]
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "nb30"
-regex = '^\B'
-input = "x"
-matches = []
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "nb31"
-regex = '^\B\B'
-input = ""
-matches = [[0, 0]]
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "nb32"
-regex = '^\B\B'
-input = "x"
-matches = []
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "nb33"
-regex = '^\B$'
-input = ""
-matches = [[0, 0]]
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "nb34"
-regex = '^\B$'
-input = "x"
-matches = []
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "nb35"
-regex = '^\B.$'
-input = "x"
-matches = []
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "nb36"
-regex = '^\B.\B$'
-input = "x"
-matches = []
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "nb37"
-regex = '^^^^^\B$$$$$'
-input = ""
-matches = [[0, 0]]
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "nb38"
-regex = '^^^^^\B.$$$$$'
-input = "x"
-matches = []
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "nb39"
-regex = '^^^^^\B$$$$$'
-input = "x"
-matches = []
-unicode = false
-utf8 = false
-
-
-# unicode1* and unicode2* work for both Unicode and ASCII because all matches
-# are reported as byte offsets, and « and » do not correspond to word
-# boundaries at either the character or byte level.
-[[tests]]
-name = "unicode1"
-regex = '\bx\b'
-input = "«x"
-matches = [[2, 3]]
-
-[[tests]]
-name = "unicode1-only-ascii"
-regex = '\bx\b'
-input = "«x"
-matches = [[2, 3]]
-unicode = false
-
-[[tests]]
-name = "unicode2"
-regex = '\bx\b'
-input = "x»"
-matches = [[0, 1]]
-
-[[tests]]
-name = "unicode2-only-ascii"
-regex = '\bx\b'
-input = "x»"
-matches = [[0, 1]]
-unicode = false
-
-# ASCII word boundaries are completely oblivious to Unicode characters, so
-# even though β is a character, an ASCII \b treats it as a word boundary
-# when it is adjacent to another ASCII character. (The ASCII \b only looks
-# at the leading byte of β.) For Unicode \b, the tests are precisely inverted.
-[[tests]]
-name = "unicode3"
-regex = '\bx\b'
-input = 'áxβ'
-matches = []
-
-[[tests]]
-name = "unicode3-only-ascii"
-regex = '\bx\b'
-input = 'áxβ'
-matches = [[2, 3]]
-unicode = false
-
-[[tests]]
-name = "unicode4"
-regex = '\Bx\B'
-input = 'áxβ'
-matches = [[2, 3]]
-
-[[tests]]
-name = "unicode4-only-ascii"
-regex = '\Bx\B'
-input = 'áxβ'
-matches = []
-unicode = false
-utf8 = false
-
-# The same as above, but with \b instead of \B as a sanity check.
-[[tests]]
-name = "unicode5"
-regex = '\b'
-input = "0\U0007EF5E"
-matches = [[0, 0], [1, 1]]
-
-[[tests]]
-name = "unicode5-only-ascii"
-regex = '\b'
-input = "0\U0007EF5E"
-matches = [[0, 0], [1, 1]]
-unicode = false
-utf8 = false
-
-[[tests]]
-name = "unicode5-noutf8"
-regex = '\b'
-input = '0\xFF\xFF\xFF\xFF'
-matches = [[0, 0], [1, 1]]
-unescape = true
-utf8 = false
-
-[[tests]]
-name = "unicode5-noutf8-only-ascii"
-regex = '\b'
-input = '0\xFF\xFF\xFF\xFF'
-matches = [[0, 0], [1, 1]]
-unescape = true
-unicode = false
-utf8 = false
-
-# Weird special case to ensure that ASCII \B treats each individual code unit
-# as a non-word byte. (The specific codepoint is irrelevant. It's an arbitrary
-# codepoint that uses 4 bytes in its UTF-8 encoding and is not a member of the
-# \w character class.)
-[[tests]]
-name = "unicode5-not"
-regex = '\B'
-input = "0\U0007EF5E"
-matches = [[5, 5]]
-
-[[tests]]
-name = "unicode5-not-only-ascii"
-regex = '\B'
-input = "0\U0007EF5E"
-matches = [[2, 2], [3, 3], [4, 4], [5, 5]]
-unicode = false
-utf8 = false
-
-# This gets no matches since \B only matches in the presence of valid UTF-8
-# when Unicode is enabled, even when UTF-8 mode is disabled.
-[[tests]]
-name = "unicode5-not-noutf8"
-regex = '\B'
-input = '0\xFF\xFF\xFF\xFF'
-matches = []
-unescape = true
-utf8 = false
-
-# But this DOES get matches since \B in ASCII mode only looks at individual
-# bytes.
-[[tests]]
-name = "unicode5-not-noutf8-only-ascii"
-regex = '\B'
-input = '0\xFF\xFF\xFF\xFF'
-matches = [[2, 2], [3, 3], [4, 4], [5, 5]]
-unescape = true
-unicode = false
-utf8 = false
-
-# Some tests of no particular significance.
-[[tests]]
-name = "unicode6"
-regex = '\b[0-9]+\b'
-input = "foo 123 bar 456 quux 789"
-matches = [[4, 7], [12, 15], [21, 24]]
-
-[[tests]]
-name = "unicode7"
-regex = '\b[0-9]+\b'
-input = "foo 123 bar a456 quux 789"
-matches = [[4, 7], [22, 25]]
-
-[[tests]]
-name = "unicode8"
-regex = '\b[0-9]+\b'
-input = "foo 123 bar 456a quux 789"
-matches = [[4, 7], [22, 25]]