diff options
Diffstat (limited to 'vendor/regex/testdata/anchored.toml')
-rw-r--r-- | vendor/regex/testdata/anchored.toml | 127 |
1 files changed, 127 insertions, 0 deletions
diff --git a/vendor/regex/testdata/anchored.toml b/vendor/regex/testdata/anchored.toml new file mode 100644 index 0000000..0f2248d --- /dev/null +++ b/vendor/regex/testdata/anchored.toml @@ -0,0 +1,127 @@ +# These tests are specifically geared toward searches with 'anchored = true'. +# While they are interesting in their own right, they are particularly +# important for testing the one-pass DFA since the one-pass DFA can't work in +# unanchored contexts. +# +# Note that "anchored" in this context does not mean "^". Anchored searches are +# searches whose matches must begin at the start of the search, which may not +# be at the start of the haystack. That's why anchored searches---and there are +# some examples below---can still report multiple matches. This occurs when the +# matches are adjacent to one another. + +[[test]] +name = "greedy" +regex = '(abc)+' +haystack = "abcabcabc" +matches = [ + [[0, 9], [6, 9]], +] +anchored = true + +# When a "earliest" search is used, greediness doesn't really exist because +# matches are reported as soon as they are known. +[[test]] +name = "greedy-earliest" +regex = '(abc)+' +haystack = "abcabcabc" +matches = [ + [[0, 3], [0, 3]], + [[3, 6], [3, 6]], + [[6, 9], [6, 9]], +] +anchored = true +search-kind = "earliest" + +[[test]] +name = "nongreedy" +regex = '(abc)+?' +haystack = "abcabcabc" +matches = [ + [[0, 3], [0, 3]], + [[3, 6], [3, 6]], + [[6, 9], [6, 9]], +] +anchored = true + +# When "all" semantics are used, non-greediness doesn't exist since the longest +# possible match is always taken. +[[test]] +name = "nongreedy-all" +regex = '(abc)+?' +haystack = "abcabcabc" +matches = [ + [[0, 9], [6, 9]], +] +anchored = true +match-kind = "all" + +[[test]] +name = "word-boundary-unicode-01" +regex = '\b\w+\b' +haystack = 'βββ☃' +matches = [[0, 6]] +anchored = true + +[[test]] +name = "word-boundary-nounicode-01" +regex = '\b\w+\b' +haystack = 'abcβ' +matches = [[0, 3]] +anchored = true +unicode = false + +# Tests that '.c' doesn't match 'abc' when performing an anchored search from +# the beginning of the haystack. This test found two different bugs in the +# PikeVM and the meta engine. +[[test]] +name = "no-match-at-start" +regex = '.c' +haystack = 'abc' +matches = [] +anchored = true + +# Like above, but at a non-zero start offset. +[[test]] +name = "no-match-at-start-bounds" +regex = '.c' +haystack = 'aabc' +bounds = [1, 4] +matches = [] +anchored = true + +# This is like no-match-at-start, but hits the "reverse inner" optimization +# inside the meta engine. (no-match-at-start hits the "reverse suffix" +# optimization.) +[[test]] +name = "no-match-at-start-reverse-inner" +regex = '.c[a-z]' +haystack = 'abcz' +matches = [] +anchored = true + +# Like above, but at a non-zero start offset. +[[test]] +name = "no-match-at-start-reverse-inner-bounds" +regex = '.c[a-z]' +haystack = 'aabcz' +bounds = [1, 5] +matches = [] +anchored = true + +# Same as no-match-at-start, but applies to the meta engine's "reverse +# anchored" optimization. +[[test]] +name = "no-match-at-start-reverse-anchored" +regex = '.c[a-z]$' +haystack = 'abcz' +matches = [] +anchored = true + +# Like above, but at a non-zero start offset. +[[test]] +name = "no-match-at-start-reverse-anchored-bounds" +regex = '.c[a-z]$' +haystack = 'aabcz' +bounds = [1, 5] +matches = [] +anchored = true |