# These tests are specifically geared toward searches with 'anchored = true'. # While they are interesting in their own right, they are particularly # important for testing the one-pass DFA since the one-pass DFA can't work in # unanchored contexts. # # Note that "anchored" in this context does not mean "^". Anchored searches are # searches whose matches must begin at the start of the search, which may not # be at the start of the haystack. That's why anchored searches---and there are # some examples below---can still report multiple matches. This occurs when the # matches are adjacent to one another. [[test]] name = "greedy" regex = '(abc)+' haystack = "abcabcabc" matches = [ [[0, 9], [6, 9]], ] anchored = true # When a "earliest" search is used, greediness doesn't really exist because # matches are reported as soon as they are known. [[test]] name = "greedy-earliest" regex = '(abc)+' haystack = "abcabcabc" matches = [ [[0, 3], [0, 3]], [[3, 6], [3, 6]], [[6, 9], [6, 9]], ] anchored = true search-kind = "earliest" [[test]] name = "nongreedy" regex = '(abc)+?' haystack = "abcabcabc" matches = [ [[0, 3], [0, 3]], [[3, 6], [3, 6]], [[6, 9], [6, 9]], ] anchored = true # When "all" semantics are used, non-greediness doesn't exist since the longest # possible match is always taken. [[test]] name = "nongreedy-all" regex = '(abc)+?' haystack = "abcabcabc" matches = [ [[0, 9], [6, 9]], ] anchored = true match-kind = "all" [[test]] name = "word-boundary-unicode-01" regex = '\b\w+\b' haystack = 'βββ☃' matches = [[0, 6]] anchored = true [[test]] name = "word-boundary-nounicode-01" regex = '\b\w+\b' haystack = 'abcβ' matches = [[0, 3]] anchored = true unicode = false # Tests that '.c' doesn't match 'abc' when performing an anchored search from # the beginning of the haystack. This test found two different bugs in the # PikeVM and the meta engine. [[test]] name = "no-match-at-start" regex = '.c' haystack = 'abc' matches = [] anchored = true # Like above, but at a non-zero start offset. [[test]] name = "no-match-at-start-bounds" regex = '.c' haystack = 'aabc' bounds = [1, 4] matches = [] anchored = true # This is like no-match-at-start, but hits the "reverse inner" optimization # inside the meta engine. (no-match-at-start hits the "reverse suffix" # optimization.) [[test]] name = "no-match-at-start-reverse-inner" regex = '.c[a-z]' haystack = 'abcz' matches = [] anchored = true # Like above, but at a non-zero start offset. [[test]] name = "no-match-at-start-reverse-inner-bounds" regex = '.c[a-z]' haystack = 'aabcz' bounds = [1, 5] matches = [] anchored = true # Same as no-match-at-start, but applies to the meta engine's "reverse # anchored" optimization. [[test]] name = "no-match-at-start-reverse-anchored" regex = '.c[a-z]$' haystack = 'abcz' matches = [] anchored = true # Like above, but at a non-zero start offset. [[test]] name = "no-match-at-start-reverse-anchored-bounds" regex = '.c[a-z]$' haystack = 'aabcz' bounds = [1, 5] matches = [] anchored = true