97 lines
2.5 KiB
TOML
97 lines
2.5 KiB
TOML
# This tests that we can switch the line terminator to the NUL byte.
|
|
[[test]]
|
|
name = "nul"
|
|
regex = '(?m)^[a-z]+$'
|
|
haystack = '\x00abc\x00'
|
|
matches = [[1, 4]]
|
|
unescape = true
|
|
line-terminator = '\x00'
|
|
|
|
# This tests that '.' will not match the configured line terminator, but will
|
|
# match \n.
|
|
[[test]]
|
|
name = "dot-changes-with-line-terminator"
|
|
regex = '.'
|
|
haystack = '\x00\n'
|
|
matches = [[1, 2]]
|
|
unescape = true
|
|
line-terminator = '\x00'
|
|
|
|
# This tests that when we switch the line terminator, \n is no longer
|
|
# recognized as the terminator.
|
|
[[test]]
|
|
name = "not-line-feed"
|
|
regex = '(?m)^[a-z]+$'
|
|
haystack = '\nabc\n'
|
|
matches = []
|
|
unescape = true
|
|
line-terminator = '\x00'
|
|
|
|
# This tests that we can set the line terminator to a non-ASCII byte and have
|
|
# it behave as expected.
|
|
[[test]]
|
|
name = "non-ascii"
|
|
regex = '(?m)^[a-z]+$'
|
|
haystack = '\xFFabc\xFF'
|
|
matches = [[1, 4]]
|
|
unescape = true
|
|
line-terminator = '\xFF'
|
|
utf8 = false
|
|
|
|
# This tests that we can set the line terminator to a byte corresponding to a
|
|
# word character, and things work as expected.
|
|
[[test]]
|
|
name = "word-byte"
|
|
regex = '(?m)^[a-z]+$'
|
|
haystack = 'ZabcZ'
|
|
matches = [[1, 4]]
|
|
unescape = true
|
|
line-terminator = 'Z'
|
|
|
|
# This tests that we can set the line terminator to a byte corresponding to a
|
|
# non-word character, and things work as expected.
|
|
[[test]]
|
|
name = "non-word-byte"
|
|
regex = '(?m)^[a-z]+$'
|
|
haystack = '%abc%'
|
|
matches = [[1, 4]]
|
|
unescape = true
|
|
line-terminator = '%'
|
|
|
|
# This combines "set line terminator to a word byte" with a word boundary
|
|
# assertion, which should result in no match even though ^/$ matches.
|
|
[[test]]
|
|
name = "word-boundary"
|
|
regex = '(?m)^\b[a-z]+\b$'
|
|
haystack = 'ZabcZ'
|
|
matches = []
|
|
unescape = true
|
|
line-terminator = 'Z'
|
|
|
|
# Like 'word-boundary', but does an anchored search at the point where ^
|
|
# matches, but where \b should not.
|
|
[[test]]
|
|
name = "word-boundary-at"
|
|
regex = '(?m)^\b[a-z]+\b$'
|
|
haystack = 'ZabcZ'
|
|
matches = []
|
|
bounds = [1, 4]
|
|
anchored = true
|
|
unescape = true
|
|
line-terminator = 'Z'
|
|
|
|
# Like 'word-boundary-at', but flips the word boundary to a negation. This
|
|
# in particular tests a tricky case in DFA engines, where they must consider
|
|
# explicitly that a starting configuration from a custom line terminator may
|
|
# also required setting the "is from word byte" flag on a state. Otherwise,
|
|
# it's treated as "not from a word byte," which would result in \B not matching
|
|
# here when it should.
|
|
[[test]]
|
|
name = "not-word-boundary-at"
|
|
regex = '(?m)^\B[a-z]+\B$'
|
|
haystack = 'ZabcZ'
|
|
matches = [[1, 4]]
|
|
bounds = [1, 4]
|
|
anchored = true
|
|
unescape = true
|
|
line-terminator = 'Z'
|