diff options
Diffstat (limited to 'vendor/regex-automata/tests/data/unicode.toml')
-rw-r--r-- | vendor/regex-automata/tests/data/unicode.toml | 514 |
1 files changed, 0 insertions, 514 deletions
diff --git a/vendor/regex-automata/tests/data/unicode.toml b/vendor/regex-automata/tests/data/unicode.toml deleted file mode 100644 index 016bbfd9b..000000000 --- a/vendor/regex-automata/tests/data/unicode.toml +++ /dev/null @@ -1,514 +0,0 @@ -# Basic Unicode literal support. -[[tests]] -name = "literal1" -regex = '☃' -input = "☃" -matches = [[0, 3]] - -[[tests]] -name = "literal2" -regex = '☃+' -input = "☃" -matches = [[0, 3]] - -[[tests]] -name = "literal3" -regex = '(?i)☃+' -input = "☃" -matches = [[0, 3]] - -[[tests]] -name = "literal4" -regex = '(?i)Δ' -input = "δ" -matches = [[0, 2]] - -# Unicode word boundaries. -[[tests]] -name = "wb-100" -regex = '\d\b' -input = "6δ" -matches = [] - -[[tests]] -name = "wb-200" -regex = '\d\b' -input = "6 " -matches = [[0, 1]] - -[[tests]] -name = "wb-300" -regex = '\d\B' -input = "6δ" -matches = [[0, 1]] - -[[tests]] -name = "wb-400" -regex = '\d\B' -input = "6 " -matches = [] - -# Unicode character class support. -[[tests]] -name = "class1" -regex = '[☃Ⅰ]+' -input = "☃" -matches = [[0, 3]] - -[[tests]] -name = "class2" -regex = '\pN' -input = "Ⅰ" -matches = [[0, 3]] - -[[tests]] -name = "class3" -regex = '\pN+' -input = "Ⅰ1Ⅱ2" -matches = [[0, 8]] - -[[tests]] -name = "class4" -regex = '\PN+' -input = "abⅠ" -matches = [[0, 2]] - -[[tests]] -name = "class5" -regex = '[\PN]+' -input = "abⅠ" -matches = [[0, 2]] - -[[tests]] -name = "class6" -regex = '[^\PN]+' -input = "abⅠ" -matches = [[2, 5]] - -[[tests]] -name = "class7" -regex = '\p{Lu}+' -input = "ΛΘΓΔα" -matches = [[0, 8]] - -[[tests]] -name = "class8" -regex = '(?i)\p{Lu}+' -input = "ΛΘΓΔα" -matches = [[0, 10]] - -[[tests]] -name = "class9" -regex = '\pL+' -input = "ΛΘΓΔα" -matches = [[0, 10]] - -[[tests]] -name = "class10" -regex = '\p{Ll}+' -input = "ΛΘΓΔα" -matches = [[8, 10]] - -# Unicode aware "Perl" character classes. -[[tests]] -name = "perl1" -regex = '\w+' -input = "dδd" -matches = [[0, 4]] - -[[tests]] -name = "perl2" -regex = '\w+' -input = "⥡" -matches = [] - -[[tests]] -name = "perl3" -regex = '\W+' -input = "⥡" -matches = [[0, 3]] - -[[tests]] -name = "perl4" -regex = '\d+' -input = "1२३9" -matches = [[0, 8]] - -[[tests]] -name = "perl5" -regex = '\d+' -input = "Ⅱ" -matches = [] - -[[tests]] -name = "perl6" -regex = '\D+' -input = "Ⅱ" -matches = [[0, 3]] - -[[tests]] -name = "perl7" -regex = '\s+' -input = " " -matches = [[0, 3]] - -[[tests]] -name = "perl8" -regex = '\s+' -input = "☃" -matches = [] - -[[tests]] -name = "perl9" -regex = '\S+' -input = "☃" -matches = [[0, 3]] - -# Specific tests for Unicode general category classes. -[[tests]] -name = "class-gencat1" -regex = '\p{Cased_Letter}' -input = "A" -matches = [[0, 3]] - -[[tests]] -name = "class-gencat2" -regex = '\p{Close_Punctuation}' -input = "❯" -matches = [[0, 3]] - -[[tests]] -name = "class-gencat3" -regex = '\p{Connector_Punctuation}' -input = "⁀" -matches = [[0, 3]] - -[[tests]] -name = "class-gencat4" -regex = '\p{Control}' -input = "\u009F" -matches = [[0, 2]] - -[[tests]] -name = "class-gencat5" -regex = '\p{Currency_Symbol}' -input = "£" -matches = [[0, 3]] - -[[tests]] -name = "class-gencat6" -regex = '\p{Dash_Punctuation}' -input = "〰" -matches = [[0, 3]] - -[[tests]] -name = "class-gencat7" -regex = '\p{Decimal_Number}' -input = "𑓙" -matches = [[0, 4]] - -[[tests]] -name = "class-gencat8" -regex = '\p{Enclosing_Mark}' -input = "\uA672" -matches = [[0, 3]] - -[[tests]] -name = "class-gencat9" -regex = '\p{Final_Punctuation}' -input = "⸡" -matches = [[0, 3]] - -[[tests]] -name = "class-gencat10" -regex = '\p{Format}' -input = "\U000E007F" -matches = [[0, 4]] - -[[tests]] -name = "class-gencat11" -regex = '\p{Initial_Punctuation}' -input = "⸜" -matches = [[0, 3]] - -[[tests]] -name = "class-gencat12" -regex = '\p{Letter}' -input = "Έ" -matches = [[0, 2]] - -[[tests]] -name = "class-gencat13" -regex = '\p{Letter_Number}' -input = "ↂ" -matches = [[0, 3]] - -[[tests]] -name = "class-gencat14" -regex = '\p{Line_Separator}' -input = "\u2028" -matches = [[0, 3]] - -[[tests]] -name = "class-gencat15" -regex = '\p{Lowercase_Letter}' -input = "ϛ" -matches = [[0, 2]] - -[[tests]] -name = "class-gencat16" -regex = '\p{Mark}' -input = "\U000E01EF" -matches = [[0, 4]] - -[[tests]] -name = "class-gencat17" -regex = '\p{Math}' -input = "⋿" -matches = [[0, 3]] - -[[tests]] -name = "class-gencat18" -regex = '\p{Modifier_Letter}' -input = "𖭃" -matches = [[0, 4]] - -[[tests]] -name = "class-gencat19" -regex = '\p{Modifier_Symbol}' -input = "🏿" -matches = [[0, 4]] - -[[tests]] -name = "class-gencat20" -regex = '\p{Nonspacing_Mark}' -input = "\U0001E94A" -matches = [[0, 4]] - -[[tests]] -name = "class-gencat21" -regex = '\p{Number}' -input = "⓿" -matches = [[0, 3]] - -[[tests]] -name = "class-gencat22" -regex = '\p{Open_Punctuation}' -input = "⦅" -matches = [[0, 3]] - -[[tests]] -name = "class-gencat23" -regex = '\p{Other}' -input = "\u0BC9" -matches = [[0, 3]] - -[[tests]] -name = "class-gencat24" -regex = '\p{Other_Letter}' -input = "ꓷ" -matches = [[0, 3]] - -[[tests]] -name = "class-gencat25" -regex = '\p{Other_Number}' -input = "㉏" -matches = [[0, 3]] - -[[tests]] -name = "class-gencat26" -regex = '\p{Other_Punctuation}' -input = "𞥞" -matches = [[0, 4]] - -[[tests]] -name = "class-gencat27" -regex = '\p{Other_Symbol}' -input = "⅌" -matches = [[0, 3]] - -[[tests]] -name = "class-gencat28" -regex = '\p{Paragraph_Separator}' -input = "\u2029" -matches = [[0, 3]] - -[[tests]] -name = "class-gencat29" -regex = '\p{Private_Use}' -input = "\U0010FFFD" -matches = [[0, 4]] - -[[tests]] -name = "class-gencat30" -regex = '\p{Punctuation}' -input = "𑁍" -matches = [[0, 4]] - -[[tests]] -name = "class-gencat31" -regex = '\p{Separator}' -input = "\u3000" -matches = [[0, 3]] - -[[tests]] -name = "class-gencat32" -regex = '\p{Space_Separator}' -input = "\u205F" -matches = [[0, 3]] - -[[tests]] -name = "class-gencat33" -regex = '\p{Spacing_Mark}' -input = "\U00016F7E" -matches = [[0, 4]] - -[[tests]] -name = "class-gencat34" -regex = '\p{Symbol}' -input = "⯈" -matches = [[0, 3]] - -[[tests]] -name = "class-gencat35" -regex = '\p{Titlecase_Letter}' -input = "ῼ" -matches = [[0, 3]] - -[[tests]] -name = "class-gencat36" -regex = '\p{Unassigned}' -input = "\U0010FFFF" -matches = [[0, 4]] - -[[tests]] -name = "class-gencat37" -regex = '\p{Uppercase_Letter}' -input = "Ꝋ" -matches = [[0, 3]] - - -# Tests for Unicode emoji properties. -[[tests]] -name = "class-emoji1" -regex = '\p{Emoji}' -input = "\u23E9" -matches = [[0, 3]] - -[[tests]] -name = "class-emoji2" -regex = '\p{emoji}' -input = "\U0001F21A" -matches = [[0, 4]] - -[[tests]] -name = "class-emoji3" -regex = '\p{extendedpictographic}' -input = "\U0001FA6E" -matches = [[0, 4]] - -[[tests]] -name = "class-emoji4" -regex = '\p{extendedpictographic}' -input = "\U0001FFFD" -matches = [[0, 4]] - - -# Tests for Unicode grapheme cluster properties. -[[tests]] -name = "class-gcb1" -regex = '\p{grapheme_cluster_break=prepend}' -input = "\U00011D46" -matches = [[0, 4]] - -[[tests]] -name = "class-gcb2" -regex = '\p{gcb=regional_indicator}' -input = "\U0001F1E6" -matches = [[0, 4]] - -[[tests]] -name = "class-gcb3" -regex = '\p{gcb=ri}' -input = "\U0001F1E7" -matches = [[0, 4]] - -[[tests]] -name = "class-gcb4" -regex = '\p{regionalindicator}' -input = "\U0001F1FF" -matches = [[0, 4]] - -[[tests]] -name = "class-gcb5" -regex = '\p{gcb=lvt}' -input = "\uC989" -matches = [[0, 3]] - -[[tests]] -name = "class-gcb6" -regex = '\p{gcb=zwj}' -input = "\u200D" -matches = [[0, 3]] - -# Tests for Unicode word boundary properties. -[[tests]] -name = "class-word-break1" -regex = '\p{word_break=Hebrew_Letter}' -input = "\uFB46" -matches = [[0, 3]] - -[[tests]] -name = "class-word-break2" -regex = '\p{wb=hebrewletter}' -input = "\uFB46" -matches = [[0, 3]] - -[[tests]] -name = "class-word-break3" -regex = '\p{wb=ExtendNumLet}' -input = "\uFF3F" -matches = [[0, 3]] - -[[tests]] -name = "class-word-break4" -regex = '\p{wb=WSegSpace}' -input = "\u3000" -matches = [[0, 3]] - -[[tests]] -name = "class-word-break5" -regex = '\p{wb=numeric}' -input = "\U0001E950" -matches = [[0, 4]] - -# Tests for Unicode sentence boundary properties. -[[tests]] -name = "class-sentence-break1" -regex = '\p{sentence_break=Lower}' -input = "\u0469" -matches = [[0, 2]] - -[[tests]] -name = "class-sentence-break2" -regex = '\p{sb=lower}' -input = "\u0469" -matches = [[0, 2]] - -[[tests]] -name = "class-sentence-break3" -regex = '\p{sb=Close}' -input = "\uFF60" -matches = [[0, 3]] - -[[tests]] -name = "class-sentence-break4" -regex = '\p{sb=Close}' -input = "\U0001F677" -matches = [[0, 4]] - -[[tests]] -name = "class-sentence-break5" -regex = '\p{sb=SContinue}' -input = "\uFF64" -matches = [[0, 3]] |