diff options
Diffstat (limited to 'vendor/regex/testdata/unicode.toml')
-rw-r--r-- | vendor/regex/testdata/unicode.toml | 517 |
1 files changed, 517 insertions, 0 deletions
diff --git a/vendor/regex/testdata/unicode.toml b/vendor/regex/testdata/unicode.toml new file mode 100644 index 000000000..f4ac76bae --- /dev/null +++ b/vendor/regex/testdata/unicode.toml @@ -0,0 +1,517 @@ +# Basic Unicode literal support. +[[test]] +name = "literal1" +regex = '☃' +haystack = "☃" +matches = [[0, 3]] + +[[test]] +name = "literal2" +regex = '☃+' +haystack = "☃" +matches = [[0, 3]] + +[[test]] +name = "literal3" +regex = '☃+' +haystack = "☃" +matches = [[0, 3]] +case-insensitive = true + +[[test]] +name = "literal4" +regex = 'Δ' +haystack = "δ" +matches = [[0, 2]] +case-insensitive = true + +# Unicode word boundaries. +[[test]] +name = "wb-100" +regex = '\d\b' +haystack = "6δ" +matches = [] + +[[test]] +name = "wb-200" +regex = '\d\b' +haystack = "6 " +matches = [[0, 1]] + +[[test]] +name = "wb-300" +regex = '\d\B' +haystack = "6δ" +matches = [[0, 1]] + +[[test]] +name = "wb-400" +regex = '\d\B' +haystack = "6 " +matches = [] + +# Unicode character class support. +[[test]] +name = "class1" +regex = '[☃Ⅰ]+' +haystack = "☃" +matches = [[0, 3]] + +[[test]] +name = "class2" +regex = '\pN' +haystack = "Ⅰ" +matches = [[0, 3]] + +[[test]] +name = "class3" +regex = '\pN+' +haystack = "Ⅰ1Ⅱ2" +matches = [[0, 8]] + +[[test]] +name = "class4" +regex = '\PN+' +haystack = "abⅠ" +matches = [[0, 2]] + +[[test]] +name = "class5" +regex = '[\PN]+' +haystack = "abⅠ" +matches = [[0, 2]] + +[[test]] +name = "class6" +regex = '[^\PN]+' +haystack = "abⅠ" +matches = [[2, 5]] + +[[test]] +name = "class7" +regex = '\p{Lu}+' +haystack = "ΛΘΓΔα" +matches = [[0, 8]] + +[[test]] +name = "class8" +regex = '\p{Lu}+' +haystack = "ΛΘΓΔα" +matches = [[0, 10]] +case-insensitive = true + +[[test]] +name = "class9" +regex = '\pL+' +haystack = "ΛΘΓΔα" +matches = [[0, 10]] + +[[test]] +name = "class10" +regex = '\p{Ll}+' +haystack = "ΛΘΓΔα" +matches = [[8, 10]] + +# Unicode aware "Perl" character classes. +[[test]] +name = "perl1" +regex = '\w+' +haystack = "dδd" +matches = [[0, 4]] + +[[test]] +name = "perl2" +regex = '\w+' +haystack = "⥡" +matches = [] + +[[test]] +name = "perl3" +regex = '\W+' +haystack = "⥡" +matches = [[0, 3]] + +[[test]] +name = "perl4" +regex = '\d+' +haystack = "1२३9" +matches = [[0, 8]] + +[[test]] +name = "perl5" +regex = '\d+' +haystack = "Ⅱ" +matches = [] + +[[test]] +name = "perl6" +regex = '\D+' +haystack = "Ⅱ" +matches = [[0, 3]] + +[[test]] +name = "perl7" +regex = '\s+' +haystack = " " +matches = [[0, 3]] + +[[test]] +name = "perl8" +regex = '\s+' +haystack = "☃" +matches = [] + +[[test]] +name = "perl9" +regex = '\S+' +haystack = "☃" +matches = [[0, 3]] + +# Specific tests for Unicode general category classes. +[[test]] +name = "class-gencat1" +regex = '\p{Cased_Letter}' +haystack = "A" +matches = [[0, 3]] + +[[test]] +name = "class-gencat2" +regex = '\p{Close_Punctuation}' +haystack = "❯" +matches = [[0, 3]] + +[[test]] +name = "class-gencat3" +regex = '\p{Connector_Punctuation}' +haystack = "⁀" +matches = [[0, 3]] + +[[test]] +name = "class-gencat4" +regex = '\p{Control}' +haystack = "\u009F" +matches = [[0, 2]] + +[[test]] +name = "class-gencat5" +regex = '\p{Currency_Symbol}' +haystack = "£" +matches = [[0, 3]] + +[[test]] +name = "class-gencat6" +regex = '\p{Dash_Punctuation}' +haystack = "〰" +matches = [[0, 3]] + +[[test]] +name = "class-gencat7" +regex = '\p{Decimal_Number}' +haystack = "𑓙" +matches = [[0, 4]] + +[[test]] +name = "class-gencat8" +regex = '\p{Enclosing_Mark}' +haystack = "\uA672" +matches = [[0, 3]] + +[[test]] +name = "class-gencat9" +regex = '\p{Final_Punctuation}' +haystack = "⸡" +matches = [[0, 3]] + +[[test]] +name = "class-gencat10" +regex = '\p{Format}' +haystack = "\U000E007F" +matches = [[0, 4]] + +[[test]] +name = "class-gencat11" +regex = '\p{Initial_Punctuation}' +haystack = "⸜" +matches = [[0, 3]] + +[[test]] +name = "class-gencat12" +regex = '\p{Letter}' +haystack = "Έ" +matches = [[0, 2]] + +[[test]] +name = "class-gencat13" +regex = '\p{Letter_Number}' +haystack = "ↂ" +matches = [[0, 3]] + +[[test]] +name = "class-gencat14" +regex = '\p{Line_Separator}' +haystack = "\u2028" +matches = [[0, 3]] + +[[test]] +name = "class-gencat15" +regex = '\p{Lowercase_Letter}' +haystack = "ϛ" +matches = [[0, 2]] + +[[test]] +name = "class-gencat16" +regex = '\p{Mark}' +haystack = "\U000E01EF" +matches = [[0, 4]] + +[[test]] +name = "class-gencat17" +regex = '\p{Math}' +haystack = "⋿" +matches = [[0, 3]] + +[[test]] +name = "class-gencat18" +regex = '\p{Modifier_Letter}' +haystack = "𖭃" +matches = [[0, 4]] + +[[test]] +name = "class-gencat19" +regex = '\p{Modifier_Symbol}' +haystack = "🏿" +matches = [[0, 4]] + +[[test]] +name = "class-gencat20" +regex = '\p{Nonspacing_Mark}' +haystack = "\U0001E94A" +matches = [[0, 4]] + +[[test]] +name = "class-gencat21" +regex = '\p{Number}' +haystack = "⓿" +matches = [[0, 3]] + +[[test]] +name = "class-gencat22" +regex = '\p{Open_Punctuation}' +haystack = "⦅" +matches = [[0, 3]] + +[[test]] +name = "class-gencat23" +regex = '\p{Other}' +haystack = "\u0BC9" +matches = [[0, 3]] + +[[test]] +name = "class-gencat24" +regex = '\p{Other_Letter}' +haystack = "ꓷ" +matches = [[0, 3]] + +[[test]] +name = "class-gencat25" +regex = '\p{Other_Number}' +haystack = "㉏" +matches = [[0, 3]] + +[[test]] +name = "class-gencat26" +regex = '\p{Other_Punctuation}' +haystack = "𞥞" +matches = [[0, 4]] + +[[test]] +name = "class-gencat27" +regex = '\p{Other_Symbol}' +haystack = "⅌" +matches = [[0, 3]] + +[[test]] +name = "class-gencat28" +regex = '\p{Paragraph_Separator}' +haystack = "\u2029" +matches = [[0, 3]] + +[[test]] +name = "class-gencat29" +regex = '\p{Private_Use}' +haystack = "\U0010FFFD" +matches = [[0, 4]] + +[[test]] +name = "class-gencat30" +regex = '\p{Punctuation}' +haystack = "𑁍" +matches = [[0, 4]] + +[[test]] +name = "class-gencat31" +regex = '\p{Separator}' +haystack = "\u3000" +matches = [[0, 3]] + +[[test]] +name = "class-gencat32" +regex = '\p{Space_Separator}' +haystack = "\u205F" +matches = [[0, 3]] + +[[test]] +name = "class-gencat33" +regex = '\p{Spacing_Mark}' +haystack = "\U00016F7E" +matches = [[0, 4]] + +[[test]] +name = "class-gencat34" +regex = '\p{Symbol}' +haystack = "⯈" +matches = [[0, 3]] + +[[test]] +name = "class-gencat35" +regex = '\p{Titlecase_Letter}' +haystack = "ῼ" +matches = [[0, 3]] + +[[test]] +name = "class-gencat36" +regex = '\p{Unassigned}' +haystack = "\U0010FFFF" +matches = [[0, 4]] + +[[test]] +name = "class-gencat37" +regex = '\p{Uppercase_Letter}' +haystack = "Ꝋ" +matches = [[0, 3]] + + +# Tests for Unicode emoji properties. +[[test]] +name = "class-emoji1" +regex = '\p{Emoji}' +haystack = "\u23E9" +matches = [[0, 3]] + +[[test]] +name = "class-emoji2" +regex = '\p{emoji}' +haystack = "\U0001F21A" +matches = [[0, 4]] + +[[test]] +name = "class-emoji3" +regex = '\p{extendedpictographic}' +haystack = "\U0001FA6E" +matches = [[0, 4]] + +[[test]] +name = "class-emoji4" +regex = '\p{extendedpictographic}' +haystack = "\U0001FFFD" +matches = [[0, 4]] + + +# Tests for Unicode grapheme cluster properties. +[[test]] +name = "class-gcb1" +regex = '\p{grapheme_cluster_break=prepend}' +haystack = "\U00011D46" +matches = [[0, 4]] + +[[test]] +name = "class-gcb2" +regex = '\p{gcb=regional_indicator}' +haystack = "\U0001F1E6" +matches = [[0, 4]] + +[[test]] +name = "class-gcb3" +regex = '\p{gcb=ri}' +haystack = "\U0001F1E7" +matches = [[0, 4]] + +[[test]] +name = "class-gcb4" +regex = '\p{regionalindicator}' +haystack = "\U0001F1FF" +matches = [[0, 4]] + +[[test]] +name = "class-gcb5" +regex = '\p{gcb=lvt}' +haystack = "\uC989" +matches = [[0, 3]] + +[[test]] +name = "class-gcb6" +regex = '\p{gcb=zwj}' +haystack = "\u200D" +matches = [[0, 3]] + +# Tests for Unicode word boundary properties. +[[test]] +name = "class-word-break1" +regex = '\p{word_break=Hebrew_Letter}' +haystack = "\uFB46" +matches = [[0, 3]] + +[[test]] +name = "class-word-break2" +regex = '\p{wb=hebrewletter}' +haystack = "\uFB46" +matches = [[0, 3]] + +[[test]] +name = "class-word-break3" +regex = '\p{wb=ExtendNumLet}' +haystack = "\uFF3F" +matches = [[0, 3]] + +[[test]] +name = "class-word-break4" +regex = '\p{wb=WSegSpace}' +haystack = "\u3000" +matches = [[0, 3]] + +[[test]] +name = "class-word-break5" +regex = '\p{wb=numeric}' +haystack = "\U0001E950" +matches = [[0, 4]] + +# Tests for Unicode sentence boundary properties. +[[test]] +name = "class-sentence-break1" +regex = '\p{sentence_break=Lower}' +haystack = "\u0469" +matches = [[0, 2]] + +[[test]] +name = "class-sentence-break2" +regex = '\p{sb=lower}' +haystack = "\u0469" +matches = [[0, 2]] + +[[test]] +name = "class-sentence-break3" +regex = '\p{sb=Close}' +haystack = "\uFF60" +matches = [[0, 3]] + +[[test]] +name = "class-sentence-break4" +regex = '\p{sb=Close}' +haystack = "\U0001F677" +matches = [[0, 4]] + +[[test]] +name = "class-sentence-break5" +regex = '\p{sb=SContinue}' +haystack = "\uFF64" +matches = [[0, 3]] |