summaryrefslogtreecommitdiffstats
path: root/vendor/regex/testdata/anchored.toml
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/regex/testdata/anchored.toml')
-rw-r--r--vendor/regex/testdata/anchored.toml127
1 files changed, 127 insertions, 0 deletions
diff --git a/vendor/regex/testdata/anchored.toml b/vendor/regex/testdata/anchored.toml
new file mode 100644
index 0000000..0f2248d
--- /dev/null
+++ b/vendor/regex/testdata/anchored.toml
@@ -0,0 +1,127 @@
+# These tests are specifically geared toward searches with 'anchored = true'.
+# While they are interesting in their own right, they are particularly
+# important for testing the one-pass DFA since the one-pass DFA can't work in
+# unanchored contexts.
+#
+# Note that "anchored" in this context does not mean "^". Anchored searches are
+# searches whose matches must begin at the start of the search, which may not
+# be at the start of the haystack. That's why anchored searches---and there are
+# some examples below---can still report multiple matches. This occurs when the
+# matches are adjacent to one another.
+
+[[test]]
+name = "greedy"
+regex = '(abc)+'
+haystack = "abcabcabc"
+matches = [
+ [[0, 9], [6, 9]],
+]
+anchored = true
+
+# When a "earliest" search is used, greediness doesn't really exist because
+# matches are reported as soon as they are known.
+[[test]]
+name = "greedy-earliest"
+regex = '(abc)+'
+haystack = "abcabcabc"
+matches = [
+ [[0, 3], [0, 3]],
+ [[3, 6], [3, 6]],
+ [[6, 9], [6, 9]],
+]
+anchored = true
+search-kind = "earliest"
+
+[[test]]
+name = "nongreedy"
+regex = '(abc)+?'
+haystack = "abcabcabc"
+matches = [
+ [[0, 3], [0, 3]],
+ [[3, 6], [3, 6]],
+ [[6, 9], [6, 9]],
+]
+anchored = true
+
+# When "all" semantics are used, non-greediness doesn't exist since the longest
+# possible match is always taken.
+[[test]]
+name = "nongreedy-all"
+regex = '(abc)+?'
+haystack = "abcabcabc"
+matches = [
+ [[0, 9], [6, 9]],
+]
+anchored = true
+match-kind = "all"
+
+[[test]]
+name = "word-boundary-unicode-01"
+regex = '\b\w+\b'
+haystack = 'βββ☃'
+matches = [[0, 6]]
+anchored = true
+
+[[test]]
+name = "word-boundary-nounicode-01"
+regex = '\b\w+\b'
+haystack = 'abcβ'
+matches = [[0, 3]]
+anchored = true
+unicode = false
+
+# Tests that '.c' doesn't match 'abc' when performing an anchored search from
+# the beginning of the haystack. This test found two different bugs in the
+# PikeVM and the meta engine.
+[[test]]
+name = "no-match-at-start"
+regex = '.c'
+haystack = 'abc'
+matches = []
+anchored = true
+
+# Like above, but at a non-zero start offset.
+[[test]]
+name = "no-match-at-start-bounds"
+regex = '.c'
+haystack = 'aabc'
+bounds = [1, 4]
+matches = []
+anchored = true
+
+# This is like no-match-at-start, but hits the "reverse inner" optimization
+# inside the meta engine. (no-match-at-start hits the "reverse suffix"
+# optimization.)
+[[test]]
+name = "no-match-at-start-reverse-inner"
+regex = '.c[a-z]'
+haystack = 'abcz'
+matches = []
+anchored = true
+
+# Like above, but at a non-zero start offset.
+[[test]]
+name = "no-match-at-start-reverse-inner-bounds"
+regex = '.c[a-z]'
+haystack = 'aabcz'
+bounds = [1, 5]
+matches = []
+anchored = true
+
+# Same as no-match-at-start, but applies to the meta engine's "reverse
+# anchored" optimization.
+[[test]]
+name = "no-match-at-start-reverse-anchored"
+regex = '.c[a-z]$'
+haystack = 'abcz'
+matches = []
+anchored = true
+
+# Like above, but at a non-zero start offset.
+[[test]]
+name = "no-match-at-start-reverse-anchored-bounds"
+regex = '.c[a-z]$'
+haystack = 'aabcz'
+bounds = [1, 5]
+matches = []
+anchored = true