summaryrefslogtreecommitdiffstats
path: root/vendor/regex/testdata/regression.toml
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--vendor/regex/testdata/regression.toml830
1 files changed, 830 insertions, 0 deletions
diff --git a/vendor/regex/testdata/regression.toml b/vendor/regex/testdata/regression.toml
new file mode 100644
index 0000000..53b0701
--- /dev/null
+++ b/vendor/regex/testdata/regression.toml
@@ -0,0 +1,830 @@
+# See: https://github.com/rust-lang/regex/issues/48
+[[test]]
+name = "invalid-regex-no-crash-100"
+regex = '(*)'
+haystack = ""
+matches = []
+compiles = false
+
+# See: https://github.com/rust-lang/regex/issues/48
+[[test]]
+name = "invalid-regex-no-crash-200"
+regex = '(?:?)'
+haystack = ""
+matches = []
+compiles = false
+
+# See: https://github.com/rust-lang/regex/issues/48
+[[test]]
+name = "invalid-regex-no-crash-300"
+regex = '(?)'
+haystack = ""
+matches = []
+compiles = false
+
+# See: https://github.com/rust-lang/regex/issues/48
+[[test]]
+name = "invalid-regex-no-crash-400"
+regex = '*'
+haystack = ""
+matches = []
+compiles = false
+
+# See: https://github.com/rust-lang/regex/issues/75
+[[test]]
+name = "unsorted-binary-search-100"
+regex = '(?i-u)[a_]+'
+haystack = "A_"
+matches = [[0, 2]]
+
+# See: https://github.com/rust-lang/regex/issues/75
+[[test]]
+name = "unsorted-binary-search-200"
+regex = '(?i-u)[A_]+'
+haystack = "a_"
+matches = [[0, 2]]
+
+# See: https://github.com/rust-lang/regex/issues/76
+[[test]]
+name = "unicode-case-lower-nocase-flag"
+regex = '(?i)\p{Ll}+'
+haystack = "ΛΘΓΔα"
+matches = [[0, 10]]
+
+# See: https://github.com/rust-lang/regex/issues/99
+[[test]]
+name = "negated-char-class-100"
+regex = '(?i)[^x]'
+haystack = "x"
+matches = []
+
+# See: https://github.com/rust-lang/regex/issues/99
+[[test]]
+name = "negated-char-class-200"
+regex = '(?i)[^x]'
+haystack = "X"
+matches = []
+
+# See: https://github.com/rust-lang/regex/issues/101
+[[test]]
+name = "ascii-word-underscore"
+regex = '[[:word:]]'
+haystack = "_"
+matches = [[0, 1]]
+
+# See: https://github.com/rust-lang/regex/issues/129
+[[test]]
+name = "captures-repeat"
+regex = '([a-f]){2}(?P<foo>[x-z])'
+haystack = "abx"
+matches = [
+ [[0, 3], [1, 2], [2, 3]],
+]
+
+# See: https://github.com/rust-lang/regex/issues/153
+[[test]]
+name = "alt-in-alt-100"
+regex = 'ab?|$'
+haystack = "az"
+matches = [[0, 1], [2, 2]]
+
+# See: https://github.com/rust-lang/regex/issues/153
+[[test]]
+name = "alt-in-alt-200"
+regex = '^(?:.*?)(?:\n|\r\n?|$)'
+haystack = "ab\rcd"
+matches = [[0, 3]]
+
+# See: https://github.com/rust-lang/regex/issues/169
+[[test]]
+name = "leftmost-first-prefix"
+regex = 'z*azb'
+haystack = "azb"
+matches = [[0, 3]]
+
+# See: https://github.com/rust-lang/regex/issues/191
+[[test]]
+name = "many-alternates"
+regex = '1|2|3|4|5|6|7|8|9|10|int'
+haystack = "int"
+matches = [[0, 3]]
+
+# See: https://github.com/rust-lang/regex/issues/204
+[[test]]
+name = "word-boundary-alone-100"
+regex = '\b'
+haystack = "Should this (work?)"
+matches = [[0, 0], [6, 6], [7, 7], [11, 11], [13, 13], [17, 17]]
+
+# See: https://github.com/rust-lang/regex/issues/204
+[[test]]
+name = "word-boundary-alone-200"
+regex = '\b'
+haystack = "a b c"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]
+
+# See: https://github.com/rust-lang/regex/issues/264
+[[test]]
+name = "word-boundary-ascii-no-capture"
+regex = '\B'
+haystack = "\U00028F3E"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
+unicode = false
+utf8 = false
+
+# See: https://github.com/rust-lang/regex/issues/264
+[[test]]
+name = "word-boundary-ascii-capture"
+regex = '(?:\B)'
+haystack = "\U00028F3E"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
+unicode = false
+utf8 = false
+
+# See: https://github.com/rust-lang/regex/issues/268
+[[test]]
+name = "partial-anchor"
+regex = '^a|b'
+haystack = "ba"
+matches = [[0, 1]]
+
+# See: https://github.com/rust-lang/regex/issues/271
+[[test]]
+name = "endl-or-word-boundary"
+regex = '(?m:$)|(?-u:\b)'
+haystack = "\U0006084E"
+matches = [[4, 4]]
+
+# See: https://github.com/rust-lang/regex/issues/271
+[[test]]
+name = "zero-or-end"
+regex = '(?i-u:\x00)|$'
+haystack = "\U000E682F"
+matches = [[4, 4]]
+
+# See: https://github.com/rust-lang/regex/issues/271
+[[test]]
+name = "y-or-endl"
+regex = '(?i-u:y)|(?m:$)'
+haystack = "\U000B4331"
+matches = [[4, 4]]
+
+# See: https://github.com/rust-lang/regex/issues/271
+[[test]]
+name = "word-boundary-start-x"
+regex = '(?u:\b)^(?-u:X)'
+haystack = "X"
+matches = [[0, 1]]
+
+# See: https://github.com/rust-lang/regex/issues/271
+[[test]]
+name = "word-boundary-ascii-start-x"
+regex = '(?-u:\b)^(?-u:X)'
+haystack = "X"
+matches = [[0, 1]]
+
+# See: https://github.com/rust-lang/regex/issues/271
+[[test]]
+name = "end-not-word-boundary"
+regex = '$\B'
+haystack = "\U0005C124\U000B576C"
+matches = [[8, 8]]
+unicode = false
+utf8 = false
+
+# See: https://github.com/rust-lang/regex/issues/280
+[[test]]
+name = "partial-anchor-alternate-begin"
+regex = '^a|z'
+haystack = "yyyyya"
+matches = []
+
+# See: https://github.com/rust-lang/regex/issues/280
+[[test]]
+name = "partial-anchor-alternate-end"
+regex = 'a$|z'
+haystack = "ayyyyy"
+matches = []
+
+# See: https://github.com/rust-lang/regex/issues/289
+[[test]]
+name = "lits-unambiguous-100"
+regex = '(?:ABC|CDA|BC)X'
+haystack = "CDAX"
+matches = [[0, 4]]
+
+# See: https://github.com/rust-lang/regex/issues/291
+[[test]]
+name = "lits-unambiguous-200"
+regex = '((IMG|CAM|MG|MB2)_|(DSCN|CIMG))(?P<n>[0-9]+)$'
+haystack = "CIMG2341"
+matches = [
+ [[0, 8], [0, 4], [], [0, 4], [4, 8]],
+]
+
+# See: https://github.com/rust-lang/regex/issues/303
+#
+# 2022-09-19: This has now been "properly" fixed in that empty character
+# classes are fully supported as something that can never match. This test
+# used to be marked as 'compiles = false', but now it works.
+[[test]]
+name = "negated-full-byte-range"
+regex = '[^\x00-\xFF]'
+haystack = ""
+matches = []
+compiles = true
+unicode = false
+utf8 = false
+
+# See: https://github.com/rust-lang/regex/issues/321
+[[test]]
+name = "strange-anchor-non-complete-prefix"
+regex = 'a^{2}'
+haystack = ""
+matches = []
+
+# See: https://github.com/rust-lang/regex/issues/321
+[[test]]
+name = "strange-anchor-non-complete-suffix"
+regex = '${2}a'
+haystack = ""
+matches = []
+
+# See: https://github.com/rust-lang/regex/issues/334
+# See: https://github.com/rust-lang/regex/issues/557
+[[test]]
+name = "captures-after-dfa-premature-end-100"
+regex = 'a(b*(X|$))?'
+haystack = "abcbX"
+matches = [
+ [[0, 1], [], []],
+]
+
+# See: https://github.com/rust-lang/regex/issues/334
+# See: https://github.com/rust-lang/regex/issues/557
+[[test]]
+name = "captures-after-dfa-premature-end-200"
+regex = 'a(bc*(X|$))?'
+haystack = "abcbX"
+matches = [
+ [[0, 1], [], []],
+]
+
+# See: https://github.com/rust-lang/regex/issues/334
+# See: https://github.com/rust-lang/regex/issues/557
+[[test]]
+name = "captures-after-dfa-premature-end-300"
+regex = '(aa$)?'
+haystack = "aaz"
+matches = [
+ [[0, 0], []],
+ [[1, 1], []],
+ [[2, 2], []],
+ [[3, 3], []],
+]
+
+# Plucked from "Why aren’t regular expressions a lingua franca? an empirical
+# study on the re-use and portability of regular expressions", The ACM Joint
+# European Software Engineering Conference and Symposium on the Foundations of
+# Software Engineering (ESEC/FSE), 2019.
+#
+# Link: https://dl.acm.org/doi/pdf/10.1145/3338906.3338909
+[[test]]
+name = "captures-after-dfa-premature-end-400"
+regex = '(a)\d*\.?\d+\b'
+haystack = "a0.0c"
+matches = [
+ [[0, 2], [0, 1]],
+]
+
+# See: https://github.com/rust-lang/regex/issues/437
+[[test]]
+name = "literal-panic"
+regex = 'typename type\-parameter\-[0-9]+\-[0-9]+::.+'
+haystack = "test"
+matches = []
+
+# See: https://github.com/rust-lang/regex/issues/527
+[[test]]
+name = "empty-flag-expr"
+regex = '(?:(?:(?x)))'
+haystack = ""
+matches = [[0, 0]]
+
+# See: https://github.com/rust-lang/regex/issues/533
+#[[tests]]
+#name = "blank-matches-nothing-between-space-and-tab"
+#regex = '[[:blank:]]'
+#input = '\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F'
+#match = false
+#unescape = true
+
+# See: https://github.com/rust-lang/regex/issues/533
+#[[tests]]
+#name = "blank-matches-nothing-between-space-and-tab-inverted"
+#regex = '^[[:^blank:]]+$'
+#input = '\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F'
+#match = true
+#unescape = true
+
+# See: https://github.com/rust-lang/regex/issues/555
+[[test]]
+name = "invalid-repetition"
+regex = '(?m){1,1}'
+haystack = ""
+matches = []
+compiles = false
+
+# See: https://github.com/rust-lang/regex/issues/640
+[[test]]
+name = "flags-are-unset"
+regex = '(?:(?i)foo)|Bar'
+haystack = "foo Foo bar Bar"
+matches = [[0, 3], [4, 7], [12, 15]]
+
+# Note that 'Ј' is not 'j', but cyrillic Je
+# https://en.wikipedia.org/wiki/Je_(Cyrillic)
+#
+# See: https://github.com/rust-lang/regex/issues/659
+[[test]]
+name = "empty-group-with-unicode"
+regex = '(?:)Ј01'
+haystack = 'zЈ01'
+matches = [[1, 5]]
+
+# See: https://github.com/rust-lang/regex/issues/579
+[[test]]
+name = "word-boundary-weird"
+regex = '\b..\b'
+haystack = "I have 12, he has 2!"
+matches = [[0, 2], [7, 9], [9, 11], [11, 13], [17, 19]]
+
+# See: https://github.com/rust-lang/regex/issues/579
+[[test]]
+name = "word-boundary-weird-ascii"
+regex = '\b..\b'
+haystack = "I have 12, he has 2!"
+matches = [[0, 2], [7, 9], [9, 11], [11, 13], [17, 19]]
+unicode = false
+utf8 = false
+
+# See: https://github.com/rust-lang/regex/issues/579
+[[test]]
+name = "word-boundary-weird-minimal-ascii"
+regex = '\b..\b'
+haystack = "az,,b"
+matches = [[0, 2], [2, 4]]
+unicode = false
+utf8 = false
+
+# See: https://github.com/BurntSushi/ripgrep/issues/1203
+[[test]]
+name = "reverse-suffix-100"
+regex = '[0-4][0-4][0-4]000'
+haystack = "153.230000"
+matches = [[4, 10]]
+
+# See: https://github.com/BurntSushi/ripgrep/issues/1203
+[[test]]
+name = "reverse-suffix-200"
+regex = '[0-9][0-9][0-9]000'
+haystack = "153.230000\n"
+matches = [[4, 10]]
+
+# This is a tricky case for the reverse suffix optimization, because it
+# finds the 'foobar' match but the reverse scan must fail to find a match by
+# correctly dealing with the word boundary following the 'foobar' literal when
+# computing the start state.
+#
+# This test exists because I tried to break the following assumption that
+# is currently in the code: that if a suffix is found and the reverse scan
+# succeeds, then it's guaranteed that there is an overall match. Namely, the
+# 'is_match' routine does *not* do another forward scan in this case because of
+# this assumption.
+[[test]]
+name = "reverse-suffix-300"
+regex = '\w+foobar\b'
+haystack = "xyzfoobarZ"
+matches = []
+unicode = false
+utf8 = false
+
+# See: https://github.com/BurntSushi/ripgrep/issues/1247
+[[test]]
+name = "stops"
+regex = '\bs(?:[ab])'
+haystack = 's\xE4'
+matches = []
+unescape = true
+utf8 = false
+
+# See: https://github.com/BurntSushi/ripgrep/issues/1247
+[[test]]
+name = "stops-ascii"
+regex = '(?-u:\b)s(?:[ab])'
+haystack = 's\xE4'
+matches = []
+unescape = true
+utf8 = false
+
+# See: https://github.com/rust-lang/regex/issues/850
+[[test]]
+name = "adjacent-line-boundary-100"
+regex = '(?m)^(?:[^ ]+?)$'
+haystack = "line1\nline2"
+matches = [[0, 5], [6, 11]]
+
+# Continued.
+[[test]]
+name = "adjacent-line-boundary-200"
+regex = '(?m)^(?:[^ ]+?)$'
+haystack = "A\nB"
+matches = [[0, 1], [2, 3]]
+
+# There is no issue for this bug.
+[[test]]
+name = "anchored-prefix-100"
+regex = '^a[[:^space:]]'
+haystack = "a "
+matches = []
+
+# There is no issue for this bug.
+[[test]]
+name = "anchored-prefix-200"
+regex = '^a[[:^space:]]'
+haystack = "foo boo a"
+matches = []
+
+# There is no issue for this bug.
+[[test]]
+name = "anchored-prefix-300"
+regex = '^-[a-z]'
+haystack = "r-f"
+matches = []
+
+# Tests that a possible Aho-Corasick optimization works correctly. It only
+# kicks in when we have a lot of literals. By "works correctly," we mean that
+# leftmost-first match semantics are properly respected. That is, samwise
+# should match, not sam.
+#
+# There is no issue for this bug.
+[[test]]
+name = "aho-corasick-100"
+regex = 'samwise|sam|a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z'
+haystack = "samwise"
+matches = [[0, 7]]
+
+# See: https://github.com/rust-lang/regex/issues/921
+[[test]]
+name = "interior-anchor-capture"
+regex = '(a$)b$'
+haystack = 'ab'
+matches = []
+
+# I found this bug in the course of adding some of the regexes that Ruff uses
+# to rebar. It turns out that the lazy DFA was finding a match that was being
+# rejected by the one-pass DFA. Yikes. I then minimized the regex and haystack.
+#
+# Source: https://github.com/charliermarsh/ruff/blob/a919041ddaa64cdf6f216f90dd0480dab69fd3ba/crates/ruff/src/rules/pycodestyle/rules/whitespace_around_keywords.rs#L52
+[[test]]
+name = "ruff-whitespace-around-keywords"
+regex = '^(a|ab)$'
+haystack = "ab"
+anchored = true
+unicode = false
+utf8 = true
+matches = [[[0, 2], [0, 2]]]
+
+# From: https://github.com/rust-lang/regex/issues/429
+[[test]]
+name = "i429-0"
+regex = '(?:(?-u:\b)|(?u:h))+'
+haystack = "h"
+unicode = true
+utf8 = false
+matches = [[0, 0], [1, 1]]
+
+# From: https://github.com/rust-lang/regex/issues/429
+[[test]]
+name = "i429-1"
+regex = '(?u:\B)'
+haystack = "鋸"
+unicode = true
+utf8 = false
+matches = []
+
+# From: https://github.com/rust-lang/regex/issues/429
+[[test]]
+name = "i429-2"
+regex = '(?:(?u:\b)|(?s-u:.))+'
+haystack = "oB"
+unicode = true
+utf8 = false
+matches = [[0, 0], [1, 2]]
+
+# From: https://github.com/rust-lang/regex/issues/429
+[[test]]
+name = "i429-3"
+regex = '(?:(?-u:\B)|(?su:.))+'
+haystack = "\U000FEF80"
+unicode = true
+utf8 = false
+matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
+
+# From: https://github.com/rust-lang/regex/issues/429
+[[test]]
+name = "i429-3-utf8"
+regex = '(?:(?-u:\B)|(?su:.))+'
+haystack = "\U000FEF80"
+unicode = true
+utf8 = true
+matches = [[0, 0], [4, 4]]
+
+# From: https://github.com/rust-lang/regex/issues/429
+[[test]]
+name = "i429-4"
+regex = '(?m:$)(?m:^)(?su:.)'
+haystack = "\n‣"
+unicode = true
+utf8 = false
+matches = [[0, 1]]
+
+# From: https://github.com/rust-lang/regex/issues/429
+[[test]]
+name = "i429-5"
+regex = '(?m:$)^(?m:^)'
+haystack = "\n"
+unicode = true
+utf8 = false
+matches = [[0, 0]]
+
+# From: https://github.com/rust-lang/regex/issues/429
+[[test]]
+name = "i429-6"
+regex = '(?P<kp>(?iu:do)(?m:$))*'
+haystack = "dodo"
+unicode = true
+utf8 = false
+matches = [
+ [[0, 0], []],
+ [[1, 1], []],
+ [[2, 4], [2, 4]],
+]
+
+# From: https://github.com/rust-lang/regex/issues/429
+[[test]]
+name = "i429-7"
+regex = '(?u:\B)'
+haystack = "䡁"
+unicode = true
+utf8 = false
+matches = []
+
+# From: https://github.com/rust-lang/regex/issues/429
+[[test]]
+name = "i429-8"
+regex = '(?:(?-u:\b)|(?u:[\u{0}-W]))+'
+haystack = "0"
+unicode = true
+utf8 = false
+matches = [[0, 0], [1, 1]]
+
+# From: https://github.com/rust-lang/regex/issues/429
+[[test]]
+name = "i429-9"
+regex = '((?m:$)(?-u:\B)(?s-u:.)(?-u:\B)$)'
+haystack = "\n\n"
+unicode = true
+utf8 = false
+matches = [
+ [[1, 2], [1, 2]],
+]
+
+# From: https://github.com/rust-lang/regex/issues/429
+[[test]]
+name = "i429-10"
+regex = '(?m:$)(?m:$)^(?su:.)'
+haystack = "\n\u0081¨\u200a"
+unicode = true
+utf8 = false
+matches = [[0, 1]]
+
+# From: https://github.com/rust-lang/regex/issues/429
+[[test]]
+name = "i429-11"
+regex = '(?-u:\B)(?m:^)'
+haystack = "0\n"
+unicode = true
+utf8 = false
+matches = [[2, 2]]
+
+# From: https://github.com/rust-lang/regex/issues/429
+[[test]]
+name = "i429-12"
+regex = '(?:(?u:\b)|(?-u:.))+'
+haystack = "0"
+unicode = true
+utf8 = false
+matches = [[0, 0], [1, 1]]
+
+# From: https://github.com/rust-lang/regex/issues/969
+[[test]]
+name = "i969"
+regex = 'c.*d\z'
+haystack = "ababcd"
+bounds = [4, 6]
+search-kind = "earliest"
+matches = [[4, 6]]
+
+# I found this during the regex-automata migration. This is the fowler basic
+# 154 test, but without anchored = true and without a match limit.
+#
+# This test caught a subtle bug in the hybrid reverse DFA search, where it
+# would skip over the termination condition if it entered a start state. This
+# was a double bug. Firstly, the reverse DFA shouldn't have had start states
+# specialized in the first place, and thus it shouldn't have possible to detect
+# that the DFA had entered a start state. The second bug was that the start
+# state handling was incorrect by jumping over the termination condition.
+[[test]]
+name = "fowler-basic154-unanchored"
+regex = '''a([bc]*)c*'''
+haystack = '''abc'''
+matches = [[[0, 3], [1, 3]]]
+
+# From: https://github.com/rust-lang/regex/issues/981
+#
+# This was never really a problem in the new architecture because the
+# regex-automata engines are far more principled about how they deal with
+# look-around. (This was one of the many reasons I wanted to re-work the
+# original regex crate engines.)
+[[test]]
+name = "word-boundary-interact-poorly-with-literal-optimizations"
+regex = '(?i:(?:\b|_)win(?:32|64|dows)?(?:\b|_))'
+haystack = 'ubi-Darwin-x86_64.tar.gz'
+matches = []
+
+# This was found during fuzz testing of regex. It provoked a panic in the meta
+# engine as a result of the reverse suffix optimization. Namely, it hit a case
+# where a suffix match was found, a corresponding reverse match was found, but
+# the forward search turned up no match. The forward search should always match
+# if the suffix and reverse search match.
+#
+# This in turn uncovered an inconsistency between the PikeVM and the DFA (lazy
+# and fully compiled) engines. It was caused by a mishandling of the collection
+# of NFA state IDs in the generic determinization code (which is why both types
+# of DFA were impacted). Namely, when a fail state was encountered (that's the
+# `[^\s\S]` in the pattern below), then it would just stop collecting states.
+# But that's not correct since a later state could lead to a match.
+[[test]]
+name = "impossible-branch"
+regex = '.*[^\s\S]A|B'
+haystack = "B"
+matches = [[0, 1]]
+
+# This was found during fuzz testing in regex-lite. The regex crate never
+# suffered from this bug, but it causes regex-lite to incorrectly compile
+# captures.
+[[test]]
+name = "captures-wrong-order"
+regex = '(a){0}(a)'
+haystack = 'a'
+matches = [[[0, 1], [], [0, 1]]]
+
+# This tests a bug in how quit states are handled in the DFA. At some point
+# during development, the DFAs were tweaked slightly such that if they hit
+# a quit state (which means, they hit a byte that the caller configured should
+# stop the search), then it might not return an error necessarily. Namely, if a
+# match had already been found, then it would be returned instead of an error.
+#
+# But this is actually wrong! Why? Because even though a match had been found,
+# it wouldn't be fully correct to return it once a quit state has been seen
+# because you can't determine whether the match offset returned is the correct
+# greedy/leftmost-first match. Since you can't complete the search as requested
+# by the caller, the DFA should just stop and return an error.
+#
+# Interestingly, this does seem to produce an unavoidable difference between
+# 'try_is_match().unwrap()' and 'try_find().unwrap().is_some()' for the DFAs.
+# The former will stop immediately once a match is known to occur and return
+# 'Ok(true)', where as the latter could find the match but quit with an
+# 'Err(..)' first.
+#
+# Thankfully, I believe this inconsistency between 'is_match()' and 'find()'
+# cannot be observed in the higher level meta regex API because it specifically
+# will try another engine that won't fail in the case of a DFA failing.
+#
+# This regression happened in the regex crate rewrite, but before anything got
+# released.
+[[test]]
+name = "negated-unicode-word-boundary-dfa-fail"
+regex = '\B.*'
+haystack = "!\u02D7"
+matches = [[0, 3]]
+
+# This failure was found in the *old* regex crate (prior to regex 1.9), but
+# I didn't investigate why. My best guess is that it's a literal optimization
+# bug. It didn't occur in the rewrite.
+[[test]]
+name = "missed-match"
+regex = 'e..+e.ee>'
+haystack = 'Zeee.eZZZZZZZZeee>eeeeeee>'
+matches = [[1, 26]]
+
+# This test came from the 'ignore' crate and tripped a bug in how accelerated
+# DFA states were handled in an overlapping search.
+[[test]]
+name = "regex-to-glob"
+regex = ['(?-u)^path1/[^/]*$']
+haystack = "path1/foo"
+matches = [[0, 9]]
+utf8 = false
+match-kind = "all"
+search-kind = "overlapping"
+
+# See: https://github.com/rust-lang/regex/issues/1060
+[[test]]
+name = "reverse-inner-plus-shorter-than-expected"
+regex = '(?:(\d+)[:.])?(\d{1,2})[:.](\d{2})'
+haystack = '102:12:39'
+matches = [[[0, 9], [0, 3], [4, 6], [7, 9]]]
+
+# Like reverse-inner-plus-shorter-than-expected, but using a far simpler regex
+# to demonstrate the extent of the rot. Sigh.
+#
+# See: https://github.com/rust-lang/regex/issues/1060
+[[test]]
+name = "reverse-inner-short"
+regex = '(?:([0-9][0-9][0-9]):)?([0-9][0-9]):([0-9][0-9])'
+haystack = '102:12:39'
+matches = [[[0, 9], [0, 3], [4, 6], [7, 9]]]
+
+# This regression test was found via the RegexSet APIs. It triggered a
+# particular code path where a regex was compiled with 'All' match semantics
+# (to support overlapping search), but got funneled down into a standard
+# leftmost search when calling 'is_match'. This is fine on its own, but the
+# leftmost search will use a prefilter and that's where this went awry.
+#
+# Namely, since 'All' semantics were used, the aho-corasick prefilter was
+# incorrectly compiled with 'Standard' semantics. This was wrong because
+# 'Standard' immediately attempts to report a match at every position, even if
+# that would mean reporting a match past the leftmost match before reporting
+# the leftmost match. This breaks the prefilter contract of never having false
+# negatives and leads overall to the engine not finding a match.
+#
+# See: https://github.com/rust-lang/regex/issues/1070
+[[test]]
+name = "prefilter-with-aho-corasick-standard-semantics"
+regex = '(?m)^ *v [0-9]'
+haystack = 'v 0'
+matches = [
+ { id = 0, spans = [[0, 3]] },
+]
+match-kind = "all"
+search-kind = "overlapping"
+unicode = true
+utf8 = true
+
+# This tests that the PikeVM and the meta regex agree on a particular regex.
+# This test previously failed when the ad hoc engines inside the meta engine
+# did not handle quit states correctly. Namely, the Unicode word boundary here
+# combined with a non-ASCII codepoint provokes the quit state. The ad hoc
+# engines were previously returning a match even after entering the quit state
+# if a match had been previously detected, but this is incorrect. The reason
+# is that if a quit state is found, then the search must give up *immediately*
+# because it prevents the search from finding the "proper" leftmost-first
+# match. If it instead returns a match that has been found, it risks reporting
+# an improper match, as it did in this case.
+#
+# See: https://github.com/rust-lang/regex/issues/1046
+[[test]]
+name = "non-prefix-literal-quit-state"
+regex = '.+\b\n'
+haystack = "β77\n"
+matches = [[0, 5]]
+
+# This is a regression test for some errant HIR interval set operations that
+# were made in the regex-syntax 0.8.0 release and then reverted in 0.8.1. The
+# issue here is that the HIR produced from the regex had out-of-order ranges.
+#
+# See: https://github.com/rust-lang/regex/issues/1103
+# Ref: https://github.com/rust-lang/regex/pull/1051
+# Ref: https://github.com/rust-lang/regex/pull/1102
+[[test]]
+name = "hir-optimization-out-of-order-class"
+regex = '^[[:alnum:]./-]+$'
+haystack = "a-b"
+matches = [[0, 3]]
+
+# This is a regression test for an improper reverse suffix optimization. This
+# occurred when I "broadened" the applicability of the optimization to include
+# multiple possible literal suffixes instead of only sticking to a non-empty
+# longest common suffix. It turns out that, at least given how the reverse
+# suffix optimization works, we need to stick to the longest common suffix for
+# now.
+#
+# See: https://github.com/rust-lang/regex/issues/1110
+# See also: https://github.com/astral-sh/ruff/pull/7980
+[[test]]
+name = 'improper-reverse-suffix-optimization'
+regex = '(\\N\{[^}]+})|([{}])'
+haystack = 'hiya \N{snowman} bye'
+matches = [[[5, 16], [5, 16], []]]