117 lines
3.4 KiB
TOML
117 lines
3.4 KiB
TOML
# This is a basic test that checks ^ and $ treat \r\n as a single line
|
|
# terminator. If ^ and $ only treated \n as a line terminator, then this would
|
|
# only match 'xyz' at the end of the haystack.
|
|
[[test]]
|
|
name = "basic"
|
|
regex = '(?mR)^[a-z]+$'
|
|
haystack = "abc\r\ndef\r\nxyz"
|
|
matches = [[0, 3], [5, 8], [10, 13]]
|
|
|
|
# Tests that a CRLF-aware '^$' assertion does not match between CR and LF.
|
|
[[test]]
|
|
name = "start-end-non-empty"
|
|
regex = '(?mR)^$'
|
|
haystack = "abc\r\ndef\r\nxyz"
|
|
matches = []
|
|
|
|
# Tests that a CRLF-aware '^$' assertion matches the empty string, just like
|
|
# a non-CRLF-aware '^$' assertion.
|
|
[[test]]
|
|
name = "start-end-empty"
|
|
regex = '(?mR)^$'
|
|
haystack = ""
|
|
matches = [[0, 0]]
|
|
|
|
# Tests that a CRLF-aware '^$' assertion matches the empty string preceding
|
|
# and following a line terminator.
|
|
[[test]]
|
|
name = "start-end-before-after"
|
|
regex = '(?mR)^$'
|
|
haystack = "\r\n"
|
|
matches = [[0, 0], [2, 2]]
|
|
|
|
# Tests that a CRLF-aware '^' assertion does not split a line terminator.
|
|
[[test]]
|
|
name = "start-no-split"
|
|
regex = '(?mR)^'
|
|
haystack = "abc\r\ndef\r\nxyz"
|
|
matches = [[0, 0], [5, 5], [10, 10]]
|
|
|
|
# Same as above, but with adjacent runs of line terminators.
|
|
[[test]]
|
|
name = "start-no-split-adjacent"
|
|
regex = '(?mR)^'
|
|
haystack = "\r\n\r\n\r\n"
|
|
matches = [[0, 0], [2, 2], [4, 4], [6, 6]]
|
|
|
|
# Same as above, but with adjacent runs of just carriage returns.
|
|
[[test]]
|
|
name = "start-no-split-adjacent-cr"
|
|
regex = '(?mR)^'
|
|
haystack = "\r\r\r"
|
|
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
|
|
|
|
# Same as above, but with adjacent runs of just line feeds.
|
|
[[test]]
|
|
name = "start-no-split-adjacent-lf"
|
|
regex = '(?mR)^'
|
|
haystack = "\n\n\n"
|
|
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
|
|
|
|
# Tests that a CRLF-aware '$' assertion does not split a line terminator.
|
|
[[test]]
|
|
name = "end-no-split"
|
|
regex = '(?mR)$'
|
|
haystack = "abc\r\ndef\r\nxyz"
|
|
matches = [[3, 3], [8, 8], [13, 13]]
|
|
|
|
# Same as above, but with adjacent runs of line terminators.
|
|
[[test]]
|
|
name = "end-no-split-adjacent"
|
|
regex = '(?mR)$'
|
|
haystack = "\r\n\r\n\r\n"
|
|
matches = [[0, 0], [2, 2], [4, 4], [6, 6]]
|
|
|
|
# Same as above, but with adjacent runs of just carriage returns.
|
|
[[test]]
|
|
name = "end-no-split-adjacent-cr"
|
|
regex = '(?mR)$'
|
|
haystack = "\r\r\r"
|
|
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
|
|
|
|
# Same as above, but with adjacent runs of just line feeds.
|
|
[[test]]
|
|
name = "end-no-split-adjacent-lf"
|
|
regex = '(?mR)$'
|
|
haystack = "\n\n\n"
|
|
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
|
|
|
|
# Tests that '.' does not match either \r or \n when CRLF mode is enabled. Note
|
|
# that this doesn't require multi-line mode to be enabled.
|
|
[[test]]
|
|
name = "dot-no-crlf"
|
|
regex = '(?R).'
|
|
haystack = "\r\n\r\n\r\n"
|
|
matches = []
|
|
|
|
# This is a test that caught a bug in the one-pass DFA where it (amazingly) was
|
|
# using 'is_end_lf' instead of 'is_end_crlf' here. It was probably a copy &
|
|
# paste bug. We insert an empty capture group here because it provokes the meta
|
|
# regex engine to first find a match and then trip over a panic because the
|
|
# one-pass DFA erroneously says there is no match.
|
|
[[test]]
|
|
name = "onepass-wrong-crlf-with-capture"
|
|
regex = '(?Rm:().$)'
|
|
haystack = "ZZ\r"
|
|
matches = [[[1, 2], [1, 1]]]
|
|
|
|
# This is like onepass-wrong-crlf-with-capture above, except it sets up the
|
|
# test so that it can be run by the one-pass DFA directly. (i.e., Make it
|
|
# anchored and start the search at the right place.)
|
|
[[test]]
|
|
name = "onepass-wrong-crlf-anchored"
|
|
regex = '(?Rm:.$)'
|
|
haystack = "ZZ\r"
|
|
matches = [[1, 2]]
|
|
anchored = true
|
|
bounds = [1, 3]
|