third_party/rust/regex/testdata/line-terminator.toml


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97

# This tests that we can switch the line terminator to the NUL byte.
[[test]]
name = "nul"
regex = '(?m)^[a-z]+$'
haystack = '\x00abc\x00'
matches = [[1, 4]]
unescape = true
line-terminator = '\x00'

# This tests that '.' will not match the configured line terminator, but will
# match \n.
[[test]]
name = "dot-changes-with-line-terminator"
regex = '.'
haystack = '\x00\n'
matches = [[1, 2]]
unescape = true
line-terminator = '\x00'

# This tests that when we switch the line terminator, \n is no longer
# recognized as the terminator.
[[test]]
name = "not-line-feed"
regex = '(?m)^[a-z]+$'
haystack = '\nabc\n'
matches = []
unescape = true
line-terminator = '\x00'

# This tests that we can set the line terminator to a non-ASCII byte and have
# it behave as expected.
[[test]]
name = "non-ascii"
regex = '(?m)^[a-z]+$'
haystack = '\xFFabc\xFF'
matches = [[1, 4]]
unescape = true
line-terminator = '\xFF'
utf8 = false

# This tests that we can set the line terminator to a byte corresponding to a
# word character, and things work as expected.
[[test]]
name = "word-byte"
regex = '(?m)^[a-z]+$'
haystack = 'ZabcZ'
matches = [[1, 4]]
unescape = true
line-terminator = 'Z'

# This tests that we can set the line terminator to a byte corresponding to a
# non-word character, and things work as expected.
[[test]]
name = "non-word-byte"
regex = '(?m)^[a-z]+$'
haystack = '%abc%'
matches = [[1, 4]]
unescape = true
line-terminator = '%'

# This combines "set line terminator to a word byte" with a word boundary
# assertion, which should result in no match even though ^/$ matches.
[[test]]
name = "word-boundary"
regex = '(?m)^\b[a-z]+\b$'
haystack = 'ZabcZ'
matches = []
unescape = true
line-terminator = 'Z'

# Like 'word-boundary', but does an anchored search at the point where ^
# matches, but where \b should not.
[[test]]
name = "word-boundary-at"
regex = '(?m)^\b[a-z]+\b$'
haystack = 'ZabcZ'
matches = []
bounds = [1, 4]
anchored = true
unescape = true
line-terminator = 'Z'

# Like 'word-boundary-at', but flips the word boundary to a negation. This
# in particular tests a tricky case in DFA engines, where they must consider
# explicitly that a starting configuration from a custom line terminator may
# also required setting the "is from word byte" flag on a state. Otherwise,
# it's treated as "not from a word byte," which would result in \B not matching
# here when it should.
[[test]]
name = "not-word-boundary-at"
regex = '(?m)^\B[a-z]+\B$'
haystack = 'ZabcZ'
matches = [[1, 4]]
bounds = [1, 4]
anchored = true
unescape = true
line-terminator = 'Z'