summaryrefslogtreecommitdiffstats
path: root/third_party/rust/regex/testdata/line-terminator.toml
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
commit26a029d407be480d791972afb5975cf62c9360a6 (patch)
treef435a8308119effd964b339f76abb83a57c29483 /third_party/rust/regex/testdata/line-terminator.toml
parentInitial commit. (diff)
downloadfirefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz
firefox-26a029d407be480d791972afb5975cf62c9360a6.zip
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/regex/testdata/line-terminator.toml')
-rw-r--r--third_party/rust/regex/testdata/line-terminator.toml97
1 files changed, 97 insertions, 0 deletions
diff --git a/third_party/rust/regex/testdata/line-terminator.toml b/third_party/rust/regex/testdata/line-terminator.toml
new file mode 100644
index 0000000000..4de72de31e
--- /dev/null
+++ b/third_party/rust/regex/testdata/line-terminator.toml
@@ -0,0 +1,97 @@
+# This tests that we can switch the line terminator to the NUL byte.
+[[test]]
+name = "nul"
+regex = '(?m)^[a-z]+$'
+haystack = '\x00abc\x00'
+matches = [[1, 4]]
+unescape = true
+line-terminator = '\x00'
+
+# This tests that '.' will not match the configured line terminator, but will
+# match \n.
+[[test]]
+name = "dot-changes-with-line-terminator"
+regex = '.'
+haystack = '\x00\n'
+matches = [[1, 2]]
+unescape = true
+line-terminator = '\x00'
+
+# This tests that when we switch the line terminator, \n is no longer
+# recognized as the terminator.
+[[test]]
+name = "not-line-feed"
+regex = '(?m)^[a-z]+$'
+haystack = '\nabc\n'
+matches = []
+unescape = true
+line-terminator = '\x00'
+
+# This tests that we can set the line terminator to a non-ASCII byte and have
+# it behave as expected.
+[[test]]
+name = "non-ascii"
+regex = '(?m)^[a-z]+$'
+haystack = '\xFFabc\xFF'
+matches = [[1, 4]]
+unescape = true
+line-terminator = '\xFF'
+utf8 = false
+
+# This tests that we can set the line terminator to a byte corresponding to a
+# word character, and things work as expected.
+[[test]]
+name = "word-byte"
+regex = '(?m)^[a-z]+$'
+haystack = 'ZabcZ'
+matches = [[1, 4]]
+unescape = true
+line-terminator = 'Z'
+
+# This tests that we can set the line terminator to a byte corresponding to a
+# non-word character, and things work as expected.
+[[test]]
+name = "non-word-byte"
+regex = '(?m)^[a-z]+$'
+haystack = '%abc%'
+matches = [[1, 4]]
+unescape = true
+line-terminator = '%'
+
+# This combines "set line terminator to a word byte" with a word boundary
+# assertion, which should result in no match even though ^/$ matches.
+[[test]]
+name = "word-boundary"
+regex = '(?m)^\b[a-z]+\b$'
+haystack = 'ZabcZ'
+matches = []
+unescape = true
+line-terminator = 'Z'
+
+# Like 'word-boundary', but does an anchored search at the point where ^
+# matches, but where \b should not.
+[[test]]
+name = "word-boundary-at"
+regex = '(?m)^\b[a-z]+\b$'
+haystack = 'ZabcZ'
+matches = []
+bounds = [1, 4]
+anchored = true
+unescape = true
+line-terminator = 'Z'
+
+# Like 'word-boundary-at', but flips the word boundary to a negation. This
+# in particular tests a tricky case in DFA engines, where they must consider
+# explicitly that a starting configuration from a custom line terminator may
+# also required setting the "is from word byte" flag on a state. Otherwise,
+# it's treated as "not from a word byte," which would result in \B not matching
+# here when it should.
+[[test]]
+name = "not-word-boundary-at"
+regex = '(?m)^\B[a-z]+\B$'
+haystack = 'ZabcZ'
+matches = [[1, 4]]
+bounds = [1, 4]
+anchored = true
+unescape = true
+line-terminator = 'Z'