diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:18:32 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:18:32 +0000 |
commit | 4547b622d8d29df964fa2914213088b148c498fc (patch) | |
tree | 9fc6b25f3c3add6b745be9a2400a6e96140046e9 /vendor/regex-automata/tests/data/unicode.toml | |
parent | Releasing progress-linux version 1.66.0+dfsg1-1~progress7.99u1. (diff) | |
download | rustc-4547b622d8d29df964fa2914213088b148c498fc.tar.xz rustc-4547b622d8d29df964fa2914213088b148c498fc.zip |
Merging upstream version 1.67.1+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/regex-automata/tests/data/unicode.toml')
-rw-r--r-- | vendor/regex-automata/tests/data/unicode.toml | 514 |
1 files changed, 514 insertions, 0 deletions
diff --git a/vendor/regex-automata/tests/data/unicode.toml b/vendor/regex-automata/tests/data/unicode.toml new file mode 100644 index 000000000..016bbfd9b --- /dev/null +++ b/vendor/regex-automata/tests/data/unicode.toml @@ -0,0 +1,514 @@ +# Basic Unicode literal support. +[[tests]] +name = "literal1" +regex = '☃' +input = "☃" +matches = [[0, 3]] + +[[tests]] +name = "literal2" +regex = '☃+' +input = "☃" +matches = [[0, 3]] + +[[tests]] +name = "literal3" +regex = '(?i)☃+' +input = "☃" +matches = [[0, 3]] + +[[tests]] +name = "literal4" +regex = '(?i)Δ' +input = "δ" +matches = [[0, 2]] + +# Unicode word boundaries. +[[tests]] +name = "wb-100" +regex = '\d\b' +input = "6δ" +matches = [] + +[[tests]] +name = "wb-200" +regex = '\d\b' +input = "6 " +matches = [[0, 1]] + +[[tests]] +name = "wb-300" +regex = '\d\B' +input = "6δ" +matches = [[0, 1]] + +[[tests]] +name = "wb-400" +regex = '\d\B' +input = "6 " +matches = [] + +# Unicode character class support. +[[tests]] +name = "class1" +regex = '[☃Ⅰ]+' +input = "☃" +matches = [[0, 3]] + +[[tests]] +name = "class2" +regex = '\pN' +input = "Ⅰ" +matches = [[0, 3]] + +[[tests]] +name = "class3" +regex = '\pN+' +input = "Ⅰ1Ⅱ2" +matches = [[0, 8]] + +[[tests]] +name = "class4" +regex = '\PN+' +input = "abⅠ" +matches = [[0, 2]] + +[[tests]] +name = "class5" +regex = '[\PN]+' +input = "abⅠ" +matches = [[0, 2]] + +[[tests]] +name = "class6" +regex = '[^\PN]+' +input = "abⅠ" +matches = [[2, 5]] + +[[tests]] +name = "class7" +regex = '\p{Lu}+' +input = "ΛΘΓΔα" +matches = [[0, 8]] + +[[tests]] +name = "class8" +regex = '(?i)\p{Lu}+' +input = "ΛΘΓΔα" +matches = [[0, 10]] + +[[tests]] +name = "class9" +regex = '\pL+' +input = "ΛΘΓΔα" +matches = [[0, 10]] + +[[tests]] +name = "class10" +regex = '\p{Ll}+' +input = "ΛΘΓΔα" +matches = [[8, 10]] + +# Unicode aware "Perl" character classes. +[[tests]] +name = "perl1" +regex = '\w+' +input = "dδd" +matches = [[0, 4]] + +[[tests]] +name = "perl2" +regex = '\w+' +input = "⥡" +matches = [] + +[[tests]] +name = "perl3" +regex = '\W+' +input = "⥡" +matches = [[0, 3]] + +[[tests]] +name = "perl4" +regex = '\d+' +input = "1२३9" +matches = [[0, 8]] + +[[tests]] +name = "perl5" +regex = '\d+' +input = "Ⅱ" +matches = [] + +[[tests]] +name = "perl6" +regex = '\D+' +input = "Ⅱ" +matches = [[0, 3]] + +[[tests]] +name = "perl7" +regex = '\s+' +input = " " +matches = [[0, 3]] + +[[tests]] +name = "perl8" +regex = '\s+' +input = "☃" +matches = [] + +[[tests]] +name = "perl9" +regex = '\S+' +input = "☃" +matches = [[0, 3]] + +# Specific tests for Unicode general category classes. +[[tests]] +name = "class-gencat1" +regex = '\p{Cased_Letter}' +input = "A" +matches = [[0, 3]] + +[[tests]] +name = "class-gencat2" +regex = '\p{Close_Punctuation}' +input = "❯" +matches = [[0, 3]] + +[[tests]] +name = "class-gencat3" +regex = '\p{Connector_Punctuation}' +input = "⁀" +matches = [[0, 3]] + +[[tests]] +name = "class-gencat4" +regex = '\p{Control}' +input = "\u009F" +matches = [[0, 2]] + +[[tests]] +name = "class-gencat5" +regex = '\p{Currency_Symbol}' +input = "£" +matches = [[0, 3]] + +[[tests]] +name = "class-gencat6" +regex = '\p{Dash_Punctuation}' +input = "〰" +matches = [[0, 3]] + +[[tests]] +name = "class-gencat7" +regex = '\p{Decimal_Number}' +input = "𑓙" +matches = [[0, 4]] + +[[tests]] +name = "class-gencat8" +regex = '\p{Enclosing_Mark}' +input = "\uA672" +matches = [[0, 3]] + +[[tests]] +name = "class-gencat9" +regex = '\p{Final_Punctuation}' +input = "⸡" +matches = [[0, 3]] + +[[tests]] +name = "class-gencat10" +regex = '\p{Format}' +input = "\U000E007F" +matches = [[0, 4]] + +[[tests]] +name = "class-gencat11" +regex = '\p{Initial_Punctuation}' +input = "⸜" +matches = [[0, 3]] + +[[tests]] +name = "class-gencat12" +regex = '\p{Letter}' +input = "Έ" +matches = [[0, 2]] + +[[tests]] +name = "class-gencat13" +regex = '\p{Letter_Number}' +input = "ↂ" +matches = [[0, 3]] + +[[tests]] +name = "class-gencat14" +regex = '\p{Line_Separator}' +input = "\u2028" +matches = [[0, 3]] + +[[tests]] +name = "class-gencat15" +regex = '\p{Lowercase_Letter}' +input = "ϛ" +matches = [[0, 2]] + +[[tests]] +name = "class-gencat16" +regex = '\p{Mark}' +input = "\U000E01EF" +matches = [[0, 4]] + +[[tests]] +name = "class-gencat17" +regex = '\p{Math}' +input = "⋿" +matches = [[0, 3]] + +[[tests]] +name = "class-gencat18" +regex = '\p{Modifier_Letter}' +input = "𖭃" +matches = [[0, 4]] + +[[tests]] +name = "class-gencat19" +regex = '\p{Modifier_Symbol}' +input = "🏿" +matches = [[0, 4]] + +[[tests]] +name = "class-gencat20" +regex = '\p{Nonspacing_Mark}' +input = "\U0001E94A" +matches = [[0, 4]] + +[[tests]] +name = "class-gencat21" +regex = '\p{Number}' +input = "⓿" +matches = [[0, 3]] + +[[tests]] +name = "class-gencat22" +regex = '\p{Open_Punctuation}' +input = "⦅" +matches = [[0, 3]] + +[[tests]] +name = "class-gencat23" +regex = '\p{Other}' +input = "\u0BC9" +matches = [[0, 3]] + +[[tests]] +name = "class-gencat24" +regex = '\p{Other_Letter}' +input = "ꓷ" +matches = [[0, 3]] + +[[tests]] +name = "class-gencat25" +regex = '\p{Other_Number}' +input = "㉏" +matches = [[0, 3]] + +[[tests]] +name = "class-gencat26" +regex = '\p{Other_Punctuation}' +input = "𞥞" +matches = [[0, 4]] + +[[tests]] +name = "class-gencat27" +regex = '\p{Other_Symbol}' +input = "⅌" +matches = [[0, 3]] + +[[tests]] +name = "class-gencat28" +regex = '\p{Paragraph_Separator}' +input = "\u2029" +matches = [[0, 3]] + +[[tests]] +name = "class-gencat29" +regex = '\p{Private_Use}' +input = "\U0010FFFD" +matches = [[0, 4]] + +[[tests]] +name = "class-gencat30" +regex = '\p{Punctuation}' +input = "𑁍" +matches = [[0, 4]] + +[[tests]] +name = "class-gencat31" +regex = '\p{Separator}' +input = "\u3000" +matches = [[0, 3]] + +[[tests]] +name = "class-gencat32" +regex = '\p{Space_Separator}' +input = "\u205F" +matches = [[0, 3]] + +[[tests]] +name = "class-gencat33" +regex = '\p{Spacing_Mark}' +input = "\U00016F7E" +matches = [[0, 4]] + +[[tests]] +name = "class-gencat34" +regex = '\p{Symbol}' +input = "⯈" +matches = [[0, 3]] + +[[tests]] +name = "class-gencat35" +regex = '\p{Titlecase_Letter}' +input = "ῼ" +matches = [[0, 3]] + +[[tests]] +name = "class-gencat36" +regex = '\p{Unassigned}' +input = "\U0010FFFF" +matches = [[0, 4]] + +[[tests]] +name = "class-gencat37" +regex = '\p{Uppercase_Letter}' +input = "Ꝋ" +matches = [[0, 3]] + + +# Tests for Unicode emoji properties. +[[tests]] +name = "class-emoji1" +regex = '\p{Emoji}' +input = "\u23E9" +matches = [[0, 3]] + +[[tests]] +name = "class-emoji2" +regex = '\p{emoji}' +input = "\U0001F21A" +matches = [[0, 4]] + +[[tests]] +name = "class-emoji3" +regex = '\p{extendedpictographic}' +input = "\U0001FA6E" +matches = [[0, 4]] + +[[tests]] +name = "class-emoji4" +regex = '\p{extendedpictographic}' +input = "\U0001FFFD" +matches = [[0, 4]] + + +# Tests for Unicode grapheme cluster properties. +[[tests]] +name = "class-gcb1" +regex = '\p{grapheme_cluster_break=prepend}' +input = "\U00011D46" +matches = [[0, 4]] + +[[tests]] +name = "class-gcb2" +regex = '\p{gcb=regional_indicator}' +input = "\U0001F1E6" +matches = [[0, 4]] + +[[tests]] +name = "class-gcb3" +regex = '\p{gcb=ri}' +input = "\U0001F1E7" +matches = [[0, 4]] + +[[tests]] +name = "class-gcb4" +regex = '\p{regionalindicator}' +input = "\U0001F1FF" +matches = [[0, 4]] + +[[tests]] +name = "class-gcb5" +regex = '\p{gcb=lvt}' +input = "\uC989" +matches = [[0, 3]] + +[[tests]] +name = "class-gcb6" +regex = '\p{gcb=zwj}' +input = "\u200D" +matches = [[0, 3]] + +# Tests for Unicode word boundary properties. +[[tests]] +name = "class-word-break1" +regex = '\p{word_break=Hebrew_Letter}' +input = "\uFB46" +matches = [[0, 3]] + +[[tests]] +name = "class-word-break2" +regex = '\p{wb=hebrewletter}' +input = "\uFB46" +matches = [[0, 3]] + +[[tests]] +name = "class-word-break3" +regex = '\p{wb=ExtendNumLet}' +input = "\uFF3F" +matches = [[0, 3]] + +[[tests]] +name = "class-word-break4" +regex = '\p{wb=WSegSpace}' +input = "\u3000" +matches = [[0, 3]] + +[[tests]] +name = "class-word-break5" +regex = '\p{wb=numeric}' +input = "\U0001E950" +matches = [[0, 4]] + +# Tests for Unicode sentence boundary properties. +[[tests]] +name = "class-sentence-break1" +regex = '\p{sentence_break=Lower}' +input = "\u0469" +matches = [[0, 2]] + +[[tests]] +name = "class-sentence-break2" +regex = '\p{sb=lower}' +input = "\u0469" +matches = [[0, 2]] + +[[tests]] +name = "class-sentence-break3" +regex = '\p{sb=Close}' +input = "\uFF60" +matches = [[0, 3]] + +[[tests]] +name = "class-sentence-break4" +regex = '\p{sb=Close}' +input = "\U0001F677" +matches = [[0, 4]] + +[[tests]] +name = "class-sentence-break5" +regex = '\p{sb=SContinue}' +input = "\uFF64" +matches = [[0, 3]] |