diff options
Diffstat (limited to 'vendor/regex/testdata/word-boundary-special.toml')
-rw-r--r-- | vendor/regex/testdata/word-boundary-special.toml | 687 |
1 files changed, 687 insertions, 0 deletions
diff --git a/vendor/regex/testdata/word-boundary-special.toml b/vendor/regex/testdata/word-boundary-special.toml new file mode 100644 index 0000000..2b5a2a0 --- /dev/null +++ b/vendor/regex/testdata/word-boundary-special.toml @@ -0,0 +1,687 @@ +# These tests are for the "special" word boundary assertions. That is, +# \b{start}, \b{end}, \b{start-half}, \b{end-half}. These are specialty +# assertions for more niche use cases, but hitting those cases without these +# assertions is difficult. For example, \b{start-half} and \b{end-half} are +# used to implement the -w/--word-regexp flag in a grep program. + +# Tests for (?-u:\b{start}) + +[[test]] +name = "word-start-ascii-010" +regex = '\b{start}' +haystack = "a" +matches = [[0, 0]] +unicode = false + +[[test]] +name = "word-start-ascii-020" +regex = '\b{start}' +haystack = "a " +matches = [[0, 0]] +unicode = false + +[[test]] +name = "word-start-ascii-030" +regex = '\b{start}' +haystack = " a " +matches = [[1, 1]] +unicode = false + +[[test]] +name = "word-start-ascii-040" +regex = '\b{start}' +haystack = "" +matches = [] +unicode = false + +[[test]] +name = "word-start-ascii-050" +regex = '\b{start}' +haystack = "ab" +matches = [[0, 0]] +unicode = false + +[[test]] +name = "word-start-ascii-060" +regex = '\b{start}' +haystack = "𝛃" +matches = [] +unicode = false + +[[test]] +name = "word-start-ascii-060-bounds" +regex = '\b{start}' +haystack = "𝛃" +bounds = [2, 3] +matches = [] +unicode = false + +[[test]] +name = "word-start-ascii-070" +regex = '\b{start}' +haystack = " 𝛃 " +matches = [] +unicode = false + +[[test]] +name = "word-start-ascii-080" +regex = '\b{start}' +haystack = "𝛃𐆀" +matches = [] +unicode = false + +[[test]] +name = "word-start-ascii-090" +regex = '\b{start}' +haystack = "𝛃b" +matches = [[4, 4]] +unicode = false + +[[test]] +name = "word-start-ascii-110" +regex = '\b{start}' +haystack = "b𝛃" +matches = [[0, 0]] +unicode = false + +# Tests for (?-u:\b{end}) + +[[test]] +name = "word-end-ascii-010" +regex = '\b{end}' +haystack = "a" +matches = [[1, 1]] +unicode = false + +[[test]] +name = "word-end-ascii-020" +regex = '\b{end}' +haystack = "a " +matches = [[1, 1]] +unicode = false + +[[test]] +name = "word-end-ascii-030" +regex = '\b{end}' +haystack = " a " +matches = [[2, 2]] +unicode = false + +[[test]] +name = "word-end-ascii-040" +regex = '\b{end}' +haystack = "" +matches = [] +unicode = false + +[[test]] +name = "word-end-ascii-050" +regex = '\b{end}' +haystack = "ab" +matches = [[2, 2]] +unicode = false + +[[test]] +name = "word-end-ascii-060" +regex = '\b{end}' +haystack = "𝛃" +matches = [] +unicode = false + +[[test]] +name = "word-end-ascii-060-bounds" +regex = '\b{end}' +haystack = "𝛃" +bounds = [2, 3] +matches = [] +unicode = false + +[[test]] +name = "word-end-ascii-070" +regex = '\b{end}' +haystack = " 𝛃 " +matches = [] +unicode = false + +[[test]] +name = "word-end-ascii-080" +regex = '\b{end}' +haystack = "𝛃𐆀" +matches = [] +unicode = false + +[[test]] +name = "word-end-ascii-090" +regex = '\b{end}' +haystack = "𝛃b" +matches = [[5, 5]] +unicode = false + +[[test]] +name = "word-end-ascii-110" +regex = '\b{end}' +haystack = "b𝛃" +matches = [[1, 1]] +unicode = false + +# Tests for \b{start} + +[[test]] +name = "word-start-unicode-010" +regex = '\b{start}' +haystack = "a" +matches = [[0, 0]] +unicode = true + +[[test]] +name = "word-start-unicode-020" +regex = '\b{start}' +haystack = "a " +matches = [[0, 0]] +unicode = true + +[[test]] +name = "word-start-unicode-030" +regex = '\b{start}' +haystack = " a " +matches = [[1, 1]] +unicode = true + +[[test]] +name = "word-start-unicode-040" +regex = '\b{start}' +haystack = "" +matches = [] +unicode = true + +[[test]] +name = "word-start-unicode-050" +regex = '\b{start}' +haystack = "ab" +matches = [[0, 0]] +unicode = true + +[[test]] +name = "word-start-unicode-060" +regex = '\b{start}' +haystack = "𝛃" +matches = [[0, 0]] +unicode = true + +[[test]] +name = "word-start-unicode-060-bounds" +regex = '\b{start}' +haystack = "𝛃" +bounds = [2, 3] +matches = [] +unicode = true + +[[test]] +name = "word-start-unicode-070" +regex = '\b{start}' +haystack = " 𝛃 " +matches = [[1, 1]] +unicode = true + +[[test]] +name = "word-start-unicode-080" +regex = '\b{start}' +haystack = "𝛃𐆀" +matches = [[0, 0]] +unicode = true + +[[test]] +name = "word-start-unicode-090" +regex = '\b{start}' +haystack = "𝛃b" +matches = [[0, 0]] +unicode = true + +[[test]] +name = "word-start-unicode-110" +regex = '\b{start}' +haystack = "b𝛃" +matches = [[0, 0]] +unicode = true + +# Tests for \b{end} + +[[test]] +name = "word-end-unicode-010" +regex = '\b{end}' +haystack = "a" +matches = [[1, 1]] +unicode = true + +[[test]] +name = "word-end-unicode-020" +regex = '\b{end}' +haystack = "a " +matches = [[1, 1]] +unicode = true + +[[test]] +name = "word-end-unicode-030" +regex = '\b{end}' +haystack = " a " +matches = [[2, 2]] +unicode = true + +[[test]] +name = "word-end-unicode-040" +regex = '\b{end}' +haystack = "" +matches = [] +unicode = true + +[[test]] +name = "word-end-unicode-050" +regex = '\b{end}' +haystack = "ab" +matches = [[2, 2]] +unicode = true + +[[test]] +name = "word-end-unicode-060" +regex = '\b{end}' +haystack = "𝛃" +matches = [[4, 4]] +unicode = true + +[[test]] +name = "word-end-unicode-060-bounds" +regex = '\b{end}' +haystack = "𝛃" +bounds = [2, 3] +matches = [] +unicode = true + +[[test]] +name = "word-end-unicode-070" +regex = '\b{end}' +haystack = " 𝛃 " +matches = [[5, 5]] +unicode = true + +[[test]] +name = "word-end-unicode-080" +regex = '\b{end}' +haystack = "𝛃𐆀" +matches = [[4, 4]] +unicode = true + +[[test]] +name = "word-end-unicode-090" +regex = '\b{end}' +haystack = "𝛃b" +matches = [[5, 5]] +unicode = true + +[[test]] +name = "word-end-unicode-110" +regex = '\b{end}' +haystack = "b𝛃" +matches = [[5, 5]] +unicode = true + +# Tests for (?-u:\b{start-half}) + +[[test]] +name = "word-start-half-ascii-010" +regex = '\b{start-half}' +haystack = "a" +matches = [[0, 0]] +unicode = false + +[[test]] +name = "word-start-half-ascii-020" +regex = '\b{start-half}' +haystack = "a " +matches = [[0, 0], [2, 2]] +unicode = false + +[[test]] +name = "word-start-half-ascii-030" +regex = '\b{start-half}' +haystack = " a " +matches = [[0, 0], [1, 1], [3, 3]] +unicode = false + +[[test]] +name = "word-start-half-ascii-040" +regex = '\b{start-half}' +haystack = "" +matches = [[0, 0]] +unicode = false + +[[test]] +name = "word-start-half-ascii-050" +regex = '\b{start-half}' +haystack = "ab" +matches = [[0, 0]] +unicode = false + +[[test]] +name = "word-start-half-ascii-060" +regex = '\b{start-half}' +haystack = "𝛃" +matches = [[0, 0], [4, 4]] +unicode = false + +[[test]] +name = "word-start-half-ascii-060-noutf8" +regex = '\b{start-half}' +haystack = "𝛃" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] +unicode = false +utf8 = false + +[[test]] +name = "word-start-half-ascii-060-bounds" +regex = '\b{start-half}' +haystack = "𝛃" +bounds = [2, 3] +matches = [] +unicode = false + +[[test]] +name = "word-start-half-ascii-070" +regex = '\b{start-half}' +haystack = " 𝛃 " +matches = [[0, 0], [1, 1], [5, 5], [6, 6]] +unicode = false + +[[test]] +name = "word-start-half-ascii-080" +regex = '\b{start-half}' +haystack = "𝛃𐆀" +matches = [[0, 0], [4, 4], [8, 8]] +unicode = false + +[[test]] +name = "word-start-half-ascii-090" +regex = '\b{start-half}' +haystack = "𝛃b" +matches = [[0, 0], [4, 4]] +unicode = false + +[[test]] +name = "word-start-half-ascii-110" +regex = '\b{start-half}' +haystack = "b𝛃" +matches = [[0, 0], [5, 5]] +unicode = false + +# Tests for (?-u:\b{end-half}) + +[[test]] +name = "word-end-half-ascii-010" +regex = '\b{end-half}' +haystack = "a" +matches = [[1, 1]] +unicode = false + +[[test]] +name = "word-end-half-ascii-020" +regex = '\b{end-half}' +haystack = "a " +matches = [[1, 1], [2, 2]] +unicode = false + +[[test]] +name = "word-end-half-ascii-030" +regex = '\b{end-half}' +haystack = " a " +matches = [[0, 0], [2, 2], [3, 3]] +unicode = false + +[[test]] +name = "word-end-half-ascii-040" +regex = '\b{end-half}' +haystack = "" +matches = [[0, 0]] +unicode = false + +[[test]] +name = "word-end-half-ascii-050" +regex = '\b{end-half}' +haystack = "ab" +matches = [[2, 2]] +unicode = false + +[[test]] +name = "word-end-half-ascii-060" +regex = '\b{end-half}' +haystack = "𝛃" +matches = [[0, 0], [4, 4]] +unicode = false + +[[test]] +name = "word-end-half-ascii-060-bounds" +regex = '\b{end-half}' +haystack = "𝛃" +bounds = [2, 3] +matches = [] +unicode = false + +[[test]] +name = "word-end-half-ascii-070" +regex = '\b{end-half}' +haystack = " 𝛃 " +matches = [[0, 0], [1, 1], [5, 5], [6, 6]] +unicode = false + +[[test]] +name = "word-end-half-ascii-080" +regex = '\b{end-half}' +haystack = "𝛃𐆀" +matches = [[0, 0], [4, 4], [8, 8]] +unicode = false + +[[test]] +name = "word-end-half-ascii-090" +regex = '\b{end-half}' +haystack = "𝛃b" +matches = [[0, 0], [5, 5]] +unicode = false + +[[test]] +name = "word-end-half-ascii-110" +regex = '\b{end-half}' +haystack = "b𝛃" +matches = [[1, 1], [5, 5]] +unicode = false + +# Tests for \b{start-half} + +[[test]] +name = "word-start-half-unicode-010" +regex = '\b{start-half}' +haystack = "a" +matches = [[0, 0]] +unicode = true + +[[test]] +name = "word-start-half-unicode-020" +regex = '\b{start-half}' +haystack = "a " +matches = [[0, 0], [2, 2]] +unicode = true + +[[test]] +name = "word-start-half-unicode-030" +regex = '\b{start-half}' +haystack = " a " +matches = [[0, 0], [1, 1], [3, 3]] +unicode = true + +[[test]] +name = "word-start-half-unicode-040" +regex = '\b{start-half}' +haystack = "" +matches = [[0, 0]] +unicode = true + +[[test]] +name = "word-start-half-unicode-050" +regex = '\b{start-half}' +haystack = "ab" +matches = [[0, 0]] +unicode = true + +[[test]] +name = "word-start-half-unicode-060" +regex = '\b{start-half}' +haystack = "𝛃" +matches = [[0, 0]] +unicode = true + +[[test]] +name = "word-start-half-unicode-060-bounds" +regex = '\b{start-half}' +haystack = "𝛃" +bounds = [2, 3] +matches = [] +unicode = true + +[[test]] +name = "word-start-half-unicode-070" +regex = '\b{start-half}' +haystack = " 𝛃 " +matches = [[0, 0], [1, 1], [6, 6]] +unicode = true + +[[test]] +name = "word-start-half-unicode-080" +regex = '\b{start-half}' +haystack = "𝛃𐆀" +matches = [[0, 0], [8, 8]] +unicode = true + +[[test]] +name = "word-start-half-unicode-090" +regex = '\b{start-half}' +haystack = "𝛃b" +matches = [[0, 0]] +unicode = true + +[[test]] +name = "word-start-half-unicode-110" +regex = '\b{start-half}' +haystack = "b𝛃" +matches = [[0, 0]] +unicode = true + +# Tests for \b{end-half} + +[[test]] +name = "word-end-half-unicode-010" +regex = '\b{end-half}' +haystack = "a" +matches = [[1, 1]] +unicode = true + +[[test]] +name = "word-end-half-unicode-020" +regex = '\b{end-half}' +haystack = "a " +matches = [[1, 1], [2, 2]] +unicode = true + +[[test]] +name = "word-end-half-unicode-030" +regex = '\b{end-half}' +haystack = " a " +matches = [[0, 0], [2, 2], [3, 3]] +unicode = true + +[[test]] +name = "word-end-half-unicode-040" +regex = '\b{end-half}' +haystack = "" +matches = [[0, 0]] +unicode = true + +[[test]] +name = "word-end-half-unicode-050" +regex = '\b{end-half}' +haystack = "ab" +matches = [[2, 2]] +unicode = true + +[[test]] +name = "word-end-half-unicode-060" +regex = '\b{end-half}' +haystack = "𝛃" +matches = [[4, 4]] +unicode = true + +[[test]] +name = "word-end-half-unicode-060-bounds" +regex = '\b{end-half}' +haystack = "𝛃" +bounds = [2, 3] +matches = [] +unicode = true + +[[test]] +name = "word-end-half-unicode-070" +regex = '\b{end-half}' +haystack = " 𝛃 " +matches = [[0, 0], [5, 5], [6, 6]] +unicode = true + +[[test]] +name = "word-end-half-unicode-080" +regex = '\b{end-half}' +haystack = "𝛃𐆀" +matches = [[4, 4], [8, 8]] +unicode = true + +[[test]] +name = "word-end-half-unicode-090" +regex = '\b{end-half}' +haystack = "𝛃b" +matches = [[5, 5]] +unicode = true + +[[test]] +name = "word-end-half-unicode-110" +regex = '\b{end-half}' +haystack = "b𝛃" +matches = [[5, 5]] +unicode = true + +# Specialty tests. + +# Since \r is special cased in the start state computation (to deal with CRLF +# mode), this test ensures that the correct start state is computed when the +# pattern starts with a half word boundary assertion. +[[test]] +name = "word-start-half-ascii-carriage" +regex = '\b{start-half}[a-z]+' +haystack = 'ABC\rabc' +matches = [[4, 7]] +bounds = [4, 7] +unescape = true + +# Since \n is also special cased in the start state computation, this test +# ensures that the correct start state is computed when the pattern starts with +# a half word boundary assertion. +[[test]] +name = "word-start-half-ascii-linefeed" +regex = '\b{start-half}[a-z]+' +haystack = 'ABC\nabc' +matches = [[4, 7]] +bounds = [4, 7] +unescape = true + +# Like the carriage return test above, but with a custom line terminator. +[[test]] +name = "word-start-half-ascii-customlineterm" +regex = '\b{start-half}[a-z]+' +haystack = 'ABC!abc' +matches = [[4, 7]] +bounds = [4, 7] +unescape = true +line-terminator = '!' |