diff options
Diffstat (limited to 'vendor/regex-automata/tests/data/word-boundary.toml')
-rw-r--r-- | vendor/regex-automata/tests/data/word-boundary.toml | 771 |
1 files changed, 0 insertions, 771 deletions
diff --git a/vendor/regex-automata/tests/data/word-boundary.toml b/vendor/regex-automata/tests/data/word-boundary.toml deleted file mode 100644 index e84b25c2a..000000000 --- a/vendor/regex-automata/tests/data/word-boundary.toml +++ /dev/null @@ -1,771 +0,0 @@ -# Some of these are cribbed from RE2's test suite. - -# These test \b. Below are tests for \B. -[[tests]] -name = "wb1" -regex = '\b' -input = "" -matches = [] -unicode = false - -[[tests]] -name = "wb2" -regex = '\b' -input = "a" -matches = [[0, 0], [1, 1]] -unicode = false - -[[tests]] -name = "wb3" -regex = '\b' -input = "ab" -matches = [[0, 0], [2, 2]] -unicode = false - -[[tests]] -name = "wb4" -regex = '^\b' -input = "ab" -matches = [[0, 0]] -unicode = false - -[[tests]] -name = "wb5" -regex = '\b$' -input = "ab" -matches = [[2, 2]] -unicode = false - -[[tests]] -name = "wb6" -regex = '^\b$' -input = "ab" -matches = [] -unicode = false - -[[tests]] -name = "wb7" -regex = '\bbar\b' -input = "nobar bar foo bar" -matches = [[6, 9], [14, 17]] -unicode = false - -[[tests]] -name = "wb8" -regex = 'a\b' -input = "faoa x" -matches = [[3, 4]] -unicode = false - -[[tests]] -name = "wb9" -regex = '\bbar' -input = "bar x" -matches = [[0, 3]] -unicode = false - -[[tests]] -name = "wb10" -regex = '\bbar' -input = "foo\nbar x" -matches = [[4, 7]] -unicode = false - -[[tests]] -name = "wb11" -regex = 'bar\b' -input = "foobar" -matches = [[3, 6]] -unicode = false - -[[tests]] -name = "wb12" -regex = 'bar\b' -input = "foobar\nxxx" -matches = [[3, 6]] -unicode = false - -[[tests]] -name = "wb13" -regex = '(foo|bar|[A-Z])\b' -input = "foo" -matches = [[0, 3]] -unicode = false - -[[tests]] -name = "wb14" -regex = '(foo|bar|[A-Z])\b' -input = "foo\n" -matches = [[0, 3]] -unicode = false - -[[tests]] -name = "wb15" -regex = '\b(foo|bar|[A-Z])' -input = "foo" -matches = [[0, 3]] -unicode = false - -[[tests]] -name = "wb16" -regex = '\b(foo|bar|[A-Z])\b' -input = "X" -matches = [[0, 1]] -unicode = false - -[[tests]] -name = "wb17" -regex = '\b(foo|bar|[A-Z])\b' -input = "XY" -matches = [] -unicode = false - -[[tests]] -name = "wb18" -regex = '\b(foo|bar|[A-Z])\b' -input = "bar" -matches = [[0, 3]] -unicode = false - -[[tests]] -name = "wb19" -regex = '\b(foo|bar|[A-Z])\b' -input = "foo" -matches = [[0, 3]] -unicode = false - -[[tests]] -name = "wb20" -regex = '\b(foo|bar|[A-Z])\b' -input = "foo\n" -matches = [[0, 3]] -unicode = false - -[[tests]] -name = "wb21" -regex = '\b(foo|bar|[A-Z])\b' -input = "ffoo bbar N x" -matches = [[10, 11]] -unicode = false - -[[tests]] -name = "wb22" -regex = '\b(fo|foo)\b' -input = "fo" -matches = [[0, 2]] -unicode = false - -[[tests]] -name = "wb23" -regex = '\b(fo|foo)\b' -input = "foo" -matches = [[0, 3]] -unicode = false - -[[tests]] -name = "wb24" -regex = '\b\b' -input = "" -matches = [] -unicode = false - -[[tests]] -name = "wb25" -regex = '\b\b' -input = "a" -matches = [[0, 0], [1, 1]] -unicode = false - -[[tests]] -name = "wb26" -regex = '\b$' -input = "" -matches = [] -unicode = false - -[[tests]] -name = "wb27" -regex = '\b$' -input = "x" -matches = [[1, 1]] -unicode = false - -[[tests]] -name = "wb28" -regex = '\b$' -input = "y x" -matches = [[3, 3]] -unicode = false - -[[tests]] -name = "wb29" -regex = '(?-u:\b).$' -input = "x" -matches = [[0, 1]] - -[[tests]] -name = "wb30" -regex = '^\b(fo|foo)\b' -input = "fo" -matches = [[0, 2]] -unicode = false - -[[tests]] -name = "wb31" -regex = '^\b(fo|foo)\b' -input = "foo" -matches = [[0, 3]] -unicode = false - -[[tests]] -name = "wb32" -regex = '^\b$' -input = "" -matches = [] -unicode = false - -[[tests]] -name = "wb33" -regex = '^\b$' -input = "x" -matches = [] -unicode = false - -[[tests]] -name = "wb34" -regex = '^(?-u:\b).$' -input = "x" -matches = [[0, 1]] - -[[tests]] -name = "wb35" -regex = '^(?-u:\b).(?-u:\b)$' -input = "x" -matches = [[0, 1]] - -[[tests]] -name = "wb36" -regex = '^^^^^\b$$$$$' -input = "" -matches = [] -unicode = false - -[[tests]] -name = "wb37" -regex = '^^^^^(?-u:\b).$$$$$' -input = "x" -matches = [[0, 1]] - -[[tests]] -name = "wb38" -regex = '^^^^^\b$$$$$' -input = "x" -matches = [] -unicode = false - -[[tests]] -name = "wb39" -regex = '^^^^^(?-u:\b\b\b).(?-u:\b\b\b)$$$$$' -input = "x" -matches = [[0, 1]] - -[[tests]] -name = "wb40" -regex = '(?-u:\b).+(?-u:\b)' -input = "$$abc$$" -matches = [[2, 5]] - -[[tests]] -name = "wb41" -regex = '\b' -input = "a b c" -matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] -unicode = false - -[[tests]] -name = "wb42" -regex = '\bfoo\b' -input = "zzz foo zzz" -matches = [[4, 7]] -unicode = false - -[[tests]] -name = "wb43" -regex = '\b^' -input = "ab" -matches = [[0, 0]] -unicode = false - -[[tests]] -name = "wb44" -regex = '$\b' -input = "ab" -matches = [[2, 2]] -unicode = false - - -# Tests for \B. Note that \B is not allowed if UTF-8 mode is enabled, so we -# have to disable it for most of these tests. This is because \B can match at -# non-UTF-8 boundaries. -[[tests]] -name = "nb1" -regex = '\Bfoo\B' -input = "n foo xfoox that" -matches = [[7, 10]] -unicode = false -utf8 = false - -[[tests]] -name = "nb2" -regex = 'a\B' -input = "faoa x" -matches = [[1, 2]] -unicode = false -utf8 = false - -[[tests]] -name = "nb3" -regex = '\Bbar' -input = "bar x" -matches = [] -unicode = false -utf8 = false - -[[tests]] -name = "nb4" -regex = '\Bbar' -input = "foo\nbar x" -matches = [] -unicode = false -utf8 = false - -[[tests]] -name = "nb5" -regex = 'bar\B' -input = "foobar" -matches = [] -unicode = false -utf8 = false - -[[tests]] -name = "nb6" -regex = 'bar\B' -input = "foobar\nxxx" -matches = [] -unicode = false -utf8 = false - -[[tests]] -name = "nb7" -regex = '(foo|bar|[A-Z])\B' -input = "foox" -matches = [[0, 3]] -unicode = false -utf8 = false - -[[tests]] -name = "nb8" -regex = '(foo|bar|[A-Z])\B' -input = "foo\n" -matches = [] -unicode = false -utf8 = false - -[[tests]] -name = "nb9" -regex = '\B' -input = "" -matches = [[0, 0]] -unicode = false -utf8 = false - -[[tests]] -name = "nb10" -regex = '\B' -input = "x" -matches = [] -unicode = false -utf8 = false - -[[tests]] -name = "nb11" -regex = '\B(foo|bar|[A-Z])' -input = "foo" -matches = [] -unicode = false -utf8 = false - -[[tests]] -name = "nb12" -regex = '\B(foo|bar|[A-Z])\B' -input = "xXy" -matches = [[1, 2]] -unicode = false -utf8 = false - -[[tests]] -name = "nb13" -regex = '\B(foo|bar|[A-Z])\B' -input = "XY" -matches = [] -unicode = false -utf8 = false - -[[tests]] -name = "nb14" -regex = '\B(foo|bar|[A-Z])\B' -input = "XYZ" -matches = [[1, 2]] -unicode = false -utf8 = false - -[[tests]] -name = "nb15" -regex = '\B(foo|bar|[A-Z])\B' -input = "abara" -matches = [[1, 4]] -unicode = false -utf8 = false - -[[tests]] -name = "nb16" -regex = '\B(foo|bar|[A-Z])\B' -input = "xfoo_" -matches = [[1, 4]] -unicode = false -utf8 = false - -[[tests]] -name = "nb17" -regex = '\B(foo|bar|[A-Z])\B' -input = "xfoo\n" -matches = [] -unicode = false -utf8 = false - -[[tests]] -name = "nb18" -regex = '\B(foo|bar|[A-Z])\B' -input = "foo bar vNX" -matches = [[9, 10]] -unicode = false -utf8 = false - -[[tests]] -name = "nb19" -regex = '\B(fo|foo)\B' -input = "xfoo" -matches = [[1, 3]] -unicode = false -utf8 = false - -[[tests]] -name = "nb20" -regex = '\B(foo|fo)\B' -input = "xfooo" -matches = [[1, 4]] -unicode = false -utf8 = false - -[[tests]] -name = "nb21" -regex = '\B\B' -input = "" -matches = [[0, 0]] -unicode = false -utf8 = false - -[[tests]] -name = "nb22" -regex = '\B\B' -input = "x" -matches = [] -unicode = false -utf8 = false - -[[tests]] -name = "nb23" -regex = '\B$' -input = "" -matches = [[0, 0]] -unicode = false -utf8 = false - -[[tests]] -name = "nb24" -regex = '\B$' -input = "x" -matches = [] -unicode = false -utf8 = false - -[[tests]] -name = "nb25" -regex = '\B$' -input = "y x" -matches = [] -unicode = false -utf8 = false - -[[tests]] -name = "nb26" -regex = '\B.$' -input = "x" -matches = [] -unicode = false -utf8 = false - -[[tests]] -name = "nb27" -regex = '^\B(fo|foo)\B' -input = "fo" -matches = [] -unicode = false -utf8 = false - -[[tests]] -name = "nb28" -regex = '^\B(fo|foo)\B' -input = "fo" -matches = [] -unicode = false -utf8 = false - -[[tests]] -name = "nb29" -regex = '^\B' -input = "" -matches = [[0, 0]] -unicode = false -utf8 = false - -[[tests]] -name = "nb30" -regex = '^\B' -input = "x" -matches = [] -unicode = false -utf8 = false - -[[tests]] -name = "nb31" -regex = '^\B\B' -input = "" -matches = [[0, 0]] -unicode = false -utf8 = false - -[[tests]] -name = "nb32" -regex = '^\B\B' -input = "x" -matches = [] -unicode = false -utf8 = false - -[[tests]] -name = "nb33" -regex = '^\B$' -input = "" -matches = [[0, 0]] -unicode = false -utf8 = false - -[[tests]] -name = "nb34" -regex = '^\B$' -input = "x" -matches = [] -unicode = false -utf8 = false - -[[tests]] -name = "nb35" -regex = '^\B.$' -input = "x" -matches = [] -unicode = false -utf8 = false - -[[tests]] -name = "nb36" -regex = '^\B.\B$' -input = "x" -matches = [] -unicode = false -utf8 = false - -[[tests]] -name = "nb37" -regex = '^^^^^\B$$$$$' -input = "" -matches = [[0, 0]] -unicode = false -utf8 = false - -[[tests]] -name = "nb38" -regex = '^^^^^\B.$$$$$' -input = "x" -matches = [] -unicode = false -utf8 = false - -[[tests]] -name = "nb39" -regex = '^^^^^\B$$$$$' -input = "x" -matches = [] -unicode = false -utf8 = false - - -# unicode1* and unicode2* work for both Unicode and ASCII because all matches -# are reported as byte offsets, and « and » do not correspond to word -# boundaries at either the character or byte level. -[[tests]] -name = "unicode1" -regex = '\bx\b' -input = "«x" -matches = [[2, 3]] - -[[tests]] -name = "unicode1-only-ascii" -regex = '\bx\b' -input = "«x" -matches = [[2, 3]] -unicode = false - -[[tests]] -name = "unicode2" -regex = '\bx\b' -input = "x»" -matches = [[0, 1]] - -[[tests]] -name = "unicode2-only-ascii" -regex = '\bx\b' -input = "x»" -matches = [[0, 1]] -unicode = false - -# ASCII word boundaries are completely oblivious to Unicode characters, so -# even though β is a character, an ASCII \b treats it as a word boundary -# when it is adjacent to another ASCII character. (The ASCII \b only looks -# at the leading byte of β.) For Unicode \b, the tests are precisely inverted. -[[tests]] -name = "unicode3" -regex = '\bx\b' -input = 'áxβ' -matches = [] - -[[tests]] -name = "unicode3-only-ascii" -regex = '\bx\b' -input = 'áxβ' -matches = [[2, 3]] -unicode = false - -[[tests]] -name = "unicode4" -regex = '\Bx\B' -input = 'áxβ' -matches = [[2, 3]] - -[[tests]] -name = "unicode4-only-ascii" -regex = '\Bx\B' -input = 'áxβ' -matches = [] -unicode = false -utf8 = false - -# The same as above, but with \b instead of \B as a sanity check. -[[tests]] -name = "unicode5" -regex = '\b' -input = "0\U0007EF5E" -matches = [[0, 0], [1, 1]] - -[[tests]] -name = "unicode5-only-ascii" -regex = '\b' -input = "0\U0007EF5E" -matches = [[0, 0], [1, 1]] -unicode = false -utf8 = false - -[[tests]] -name = "unicode5-noutf8" -regex = '\b' -input = '0\xFF\xFF\xFF\xFF' -matches = [[0, 0], [1, 1]] -unescape = true -utf8 = false - -[[tests]] -name = "unicode5-noutf8-only-ascii" -regex = '\b' -input = '0\xFF\xFF\xFF\xFF' -matches = [[0, 0], [1, 1]] -unescape = true -unicode = false -utf8 = false - -# Weird special case to ensure that ASCII \B treats each individual code unit -# as a non-word byte. (The specific codepoint is irrelevant. It's an arbitrary -# codepoint that uses 4 bytes in its UTF-8 encoding and is not a member of the -# \w character class.) -[[tests]] -name = "unicode5-not" -regex = '\B' -input = "0\U0007EF5E" -matches = [[5, 5]] - -[[tests]] -name = "unicode5-not-only-ascii" -regex = '\B' -input = "0\U0007EF5E" -matches = [[2, 2], [3, 3], [4, 4], [5, 5]] -unicode = false -utf8 = false - -# This gets no matches since \B only matches in the presence of valid UTF-8 -# when Unicode is enabled, even when UTF-8 mode is disabled. -[[tests]] -name = "unicode5-not-noutf8" -regex = '\B' -input = '0\xFF\xFF\xFF\xFF' -matches = [] -unescape = true -utf8 = false - -# But this DOES get matches since \B in ASCII mode only looks at individual -# bytes. -[[tests]] -name = "unicode5-not-noutf8-only-ascii" -regex = '\B' -input = '0\xFF\xFF\xFF\xFF' -matches = [[2, 2], [3, 3], [4, 4], [5, 5]] -unescape = true -unicode = false -utf8 = false - -# Some tests of no particular significance. -[[tests]] -name = "unicode6" -regex = '\b[0-9]+\b' -input = "foo 123 bar 456 quux 789" -matches = [[4, 7], [12, 15], [21, 24]] - -[[tests]] -name = "unicode7" -regex = '\b[0-9]+\b' -input = "foo 123 bar a456 quux 789" -matches = [[4, 7], [22, 25]] - -[[tests]] -name = "unicode8" -regex = '\b[0-9]+\b' -input = "foo 123 bar 456a quux 789" -matches = [[4, 7], [22, 25]] |