summaryrefslogtreecommitdiffstats
path: root/vendor/regex-automata/tests/data/unicode.toml
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:18:32 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:18:32 +0000
commit4547b622d8d29df964fa2914213088b148c498fc (patch)
tree9fc6b25f3c3add6b745be9a2400a6e96140046e9 /vendor/regex-automata/tests/data/unicode.toml
parentReleasing progress-linux version 1.66.0+dfsg1-1~progress7.99u1. (diff)
downloadrustc-4547b622d8d29df964fa2914213088b148c498fc.tar.xz
rustc-4547b622d8d29df964fa2914213088b148c498fc.zip
Merging upstream version 1.67.1+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/regex-automata/tests/data/unicode.toml')
-rw-r--r--vendor/regex-automata/tests/data/unicode.toml514
1 files changed, 514 insertions, 0 deletions
diff --git a/vendor/regex-automata/tests/data/unicode.toml b/vendor/regex-automata/tests/data/unicode.toml
new file mode 100644
index 000000000..016bbfd9b
--- /dev/null
+++ b/vendor/regex-automata/tests/data/unicode.toml
@@ -0,0 +1,514 @@
+# Basic Unicode literal support.
+[[tests]]
+name = "literal1"
+regex = '☃'
+input = "☃"
+matches = [[0, 3]]
+
+[[tests]]
+name = "literal2"
+regex = '☃+'
+input = "☃"
+matches = [[0, 3]]
+
+[[tests]]
+name = "literal3"
+regex = '(?i)☃+'
+input = "☃"
+matches = [[0, 3]]
+
+[[tests]]
+name = "literal4"
+regex = '(?i)Δ'
+input = "δ"
+matches = [[0, 2]]
+
+# Unicode word boundaries.
+[[tests]]
+name = "wb-100"
+regex = '\d\b'
+input = "6δ"
+matches = []
+
+[[tests]]
+name = "wb-200"
+regex = '\d\b'
+input = "6 "
+matches = [[0, 1]]
+
+[[tests]]
+name = "wb-300"
+regex = '\d\B'
+input = "6δ"
+matches = [[0, 1]]
+
+[[tests]]
+name = "wb-400"
+regex = '\d\B'
+input = "6 "
+matches = []
+
+# Unicode character class support.
+[[tests]]
+name = "class1"
+regex = '[☃Ⅰ]+'
+input = "☃"
+matches = [[0, 3]]
+
+[[tests]]
+name = "class2"
+regex = '\pN'
+input = "Ⅰ"
+matches = [[0, 3]]
+
+[[tests]]
+name = "class3"
+regex = '\pN+'
+input = "Ⅰ1Ⅱ2"
+matches = [[0, 8]]
+
+[[tests]]
+name = "class4"
+regex = '\PN+'
+input = "abⅠ"
+matches = [[0, 2]]
+
+[[tests]]
+name = "class5"
+regex = '[\PN]+'
+input = "abⅠ"
+matches = [[0, 2]]
+
+[[tests]]
+name = "class6"
+regex = '[^\PN]+'
+input = "abⅠ"
+matches = [[2, 5]]
+
+[[tests]]
+name = "class7"
+regex = '\p{Lu}+'
+input = "ΛΘΓΔα"
+matches = [[0, 8]]
+
+[[tests]]
+name = "class8"
+regex = '(?i)\p{Lu}+'
+input = "ΛΘΓΔα"
+matches = [[0, 10]]
+
+[[tests]]
+name = "class9"
+regex = '\pL+'
+input = "ΛΘΓΔα"
+matches = [[0, 10]]
+
+[[tests]]
+name = "class10"
+regex = '\p{Ll}+'
+input = "ΛΘΓΔα"
+matches = [[8, 10]]
+
+# Unicode aware "Perl" character classes.
+[[tests]]
+name = "perl1"
+regex = '\w+'
+input = "dδd"
+matches = [[0, 4]]
+
+[[tests]]
+name = "perl2"
+regex = '\w+'
+input = "⥡"
+matches = []
+
+[[tests]]
+name = "perl3"
+regex = '\W+'
+input = "⥡"
+matches = [[0, 3]]
+
+[[tests]]
+name = "perl4"
+regex = '\d+'
+input = "1२३9"
+matches = [[0, 8]]
+
+[[tests]]
+name = "perl5"
+regex = '\d+'
+input = "Ⅱ"
+matches = []
+
+[[tests]]
+name = "perl6"
+regex = '\D+'
+input = "Ⅱ"
+matches = [[0, 3]]
+
+[[tests]]
+name = "perl7"
+regex = '\s+'
+input = " "
+matches = [[0, 3]]
+
+[[tests]]
+name = "perl8"
+regex = '\s+'
+input = "☃"
+matches = []
+
+[[tests]]
+name = "perl9"
+regex = '\S+'
+input = "☃"
+matches = [[0, 3]]
+
+# Specific tests for Unicode general category classes.
+[[tests]]
+name = "class-gencat1"
+regex = '\p{Cased_Letter}'
+input = "A"
+matches = [[0, 3]]
+
+[[tests]]
+name = "class-gencat2"
+regex = '\p{Close_Punctuation}'
+input = "❯"
+matches = [[0, 3]]
+
+[[tests]]
+name = "class-gencat3"
+regex = '\p{Connector_Punctuation}'
+input = "⁀"
+matches = [[0, 3]]
+
+[[tests]]
+name = "class-gencat4"
+regex = '\p{Control}'
+input = "\u009F"
+matches = [[0, 2]]
+
+[[tests]]
+name = "class-gencat5"
+regex = '\p{Currency_Symbol}'
+input = "£"
+matches = [[0, 3]]
+
+[[tests]]
+name = "class-gencat6"
+regex = '\p{Dash_Punctuation}'
+input = "〰"
+matches = [[0, 3]]
+
+[[tests]]
+name = "class-gencat7"
+regex = '\p{Decimal_Number}'
+input = "𑓙"
+matches = [[0, 4]]
+
+[[tests]]
+name = "class-gencat8"
+regex = '\p{Enclosing_Mark}'
+input = "\uA672"
+matches = [[0, 3]]
+
+[[tests]]
+name = "class-gencat9"
+regex = '\p{Final_Punctuation}'
+input = "⸡"
+matches = [[0, 3]]
+
+[[tests]]
+name = "class-gencat10"
+regex = '\p{Format}'
+input = "\U000E007F"
+matches = [[0, 4]]
+
+[[tests]]
+name = "class-gencat11"
+regex = '\p{Initial_Punctuation}'
+input = "⸜"
+matches = [[0, 3]]
+
+[[tests]]
+name = "class-gencat12"
+regex = '\p{Letter}'
+input = "Έ"
+matches = [[0, 2]]
+
+[[tests]]
+name = "class-gencat13"
+regex = '\p{Letter_Number}'
+input = "ↂ"
+matches = [[0, 3]]
+
+[[tests]]
+name = "class-gencat14"
+regex = '\p{Line_Separator}'
+input = "\u2028"
+matches = [[0, 3]]
+
+[[tests]]
+name = "class-gencat15"
+regex = '\p{Lowercase_Letter}'
+input = "ϛ"
+matches = [[0, 2]]
+
+[[tests]]
+name = "class-gencat16"
+regex = '\p{Mark}'
+input = "\U000E01EF"
+matches = [[0, 4]]
+
+[[tests]]
+name = "class-gencat17"
+regex = '\p{Math}'
+input = "⋿"
+matches = [[0, 3]]
+
+[[tests]]
+name = "class-gencat18"
+regex = '\p{Modifier_Letter}'
+input = "𖭃"
+matches = [[0, 4]]
+
+[[tests]]
+name = "class-gencat19"
+regex = '\p{Modifier_Symbol}'
+input = "🏿"
+matches = [[0, 4]]
+
+[[tests]]
+name = "class-gencat20"
+regex = '\p{Nonspacing_Mark}'
+input = "\U0001E94A"
+matches = [[0, 4]]
+
+[[tests]]
+name = "class-gencat21"
+regex = '\p{Number}'
+input = "⓿"
+matches = [[0, 3]]
+
+[[tests]]
+name = "class-gencat22"
+regex = '\p{Open_Punctuation}'
+input = "⦅"
+matches = [[0, 3]]
+
+[[tests]]
+name = "class-gencat23"
+regex = '\p{Other}'
+input = "\u0BC9"
+matches = [[0, 3]]
+
+[[tests]]
+name = "class-gencat24"
+regex = '\p{Other_Letter}'
+input = "ꓷ"
+matches = [[0, 3]]
+
+[[tests]]
+name = "class-gencat25"
+regex = '\p{Other_Number}'
+input = "㉏"
+matches = [[0, 3]]
+
+[[tests]]
+name = "class-gencat26"
+regex = '\p{Other_Punctuation}'
+input = "𞥞"
+matches = [[0, 4]]
+
+[[tests]]
+name = "class-gencat27"
+regex = '\p{Other_Symbol}'
+input = "⅌"
+matches = [[0, 3]]
+
+[[tests]]
+name = "class-gencat28"
+regex = '\p{Paragraph_Separator}'
+input = "\u2029"
+matches = [[0, 3]]
+
+[[tests]]
+name = "class-gencat29"
+regex = '\p{Private_Use}'
+input = "\U0010FFFD"
+matches = [[0, 4]]
+
+[[tests]]
+name = "class-gencat30"
+regex = '\p{Punctuation}'
+input = "𑁍"
+matches = [[0, 4]]
+
+[[tests]]
+name = "class-gencat31"
+regex = '\p{Separator}'
+input = "\u3000"
+matches = [[0, 3]]
+
+[[tests]]
+name = "class-gencat32"
+regex = '\p{Space_Separator}'
+input = "\u205F"
+matches = [[0, 3]]
+
+[[tests]]
+name = "class-gencat33"
+regex = '\p{Spacing_Mark}'
+input = "\U00016F7E"
+matches = [[0, 4]]
+
+[[tests]]
+name = "class-gencat34"
+regex = '\p{Symbol}'
+input = "⯈"
+matches = [[0, 3]]
+
+[[tests]]
+name = "class-gencat35"
+regex = '\p{Titlecase_Letter}'
+input = "ῼ"
+matches = [[0, 3]]
+
+[[tests]]
+name = "class-gencat36"
+regex = '\p{Unassigned}'
+input = "\U0010FFFF"
+matches = [[0, 4]]
+
+[[tests]]
+name = "class-gencat37"
+regex = '\p{Uppercase_Letter}'
+input = "Ꝋ"
+matches = [[0, 3]]
+
+
+# Tests for Unicode emoji properties.
+[[tests]]
+name = "class-emoji1"
+regex = '\p{Emoji}'
+input = "\u23E9"
+matches = [[0, 3]]
+
+[[tests]]
+name = "class-emoji2"
+regex = '\p{emoji}'
+input = "\U0001F21A"
+matches = [[0, 4]]
+
+[[tests]]
+name = "class-emoji3"
+regex = '\p{extendedpictographic}'
+input = "\U0001FA6E"
+matches = [[0, 4]]
+
+[[tests]]
+name = "class-emoji4"
+regex = '\p{extendedpictographic}'
+input = "\U0001FFFD"
+matches = [[0, 4]]
+
+
+# Tests for Unicode grapheme cluster properties.
+[[tests]]
+name = "class-gcb1"
+regex = '\p{grapheme_cluster_break=prepend}'
+input = "\U00011D46"
+matches = [[0, 4]]
+
+[[tests]]
+name = "class-gcb2"
+regex = '\p{gcb=regional_indicator}'
+input = "\U0001F1E6"
+matches = [[0, 4]]
+
+[[tests]]
+name = "class-gcb3"
+regex = '\p{gcb=ri}'
+input = "\U0001F1E7"
+matches = [[0, 4]]
+
+[[tests]]
+name = "class-gcb4"
+regex = '\p{regionalindicator}'
+input = "\U0001F1FF"
+matches = [[0, 4]]
+
+[[tests]]
+name = "class-gcb5"
+regex = '\p{gcb=lvt}'
+input = "\uC989"
+matches = [[0, 3]]
+
+[[tests]]
+name = "class-gcb6"
+regex = '\p{gcb=zwj}'
+input = "\u200D"
+matches = [[0, 3]]
+
+# Tests for Unicode word boundary properties.
+[[tests]]
+name = "class-word-break1"
+regex = '\p{word_break=Hebrew_Letter}'
+input = "\uFB46"
+matches = [[0, 3]]
+
+[[tests]]
+name = "class-word-break2"
+regex = '\p{wb=hebrewletter}'
+input = "\uFB46"
+matches = [[0, 3]]
+
+[[tests]]
+name = "class-word-break3"
+regex = '\p{wb=ExtendNumLet}'
+input = "\uFF3F"
+matches = [[0, 3]]
+
+[[tests]]
+name = "class-word-break4"
+regex = '\p{wb=WSegSpace}'
+input = "\u3000"
+matches = [[0, 3]]
+
+[[tests]]
+name = "class-word-break5"
+regex = '\p{wb=numeric}'
+input = "\U0001E950"
+matches = [[0, 4]]
+
+# Tests for Unicode sentence boundary properties.
+[[tests]]
+name = "class-sentence-break1"
+regex = '\p{sentence_break=Lower}'
+input = "\u0469"
+matches = [[0, 2]]
+
+[[tests]]
+name = "class-sentence-break2"
+regex = '\p{sb=lower}'
+input = "\u0469"
+matches = [[0, 2]]
+
+[[tests]]
+name = "class-sentence-break3"
+regex = '\p{sb=Close}'
+input = "\uFF60"
+matches = [[0, 3]]
+
+[[tests]]
+name = "class-sentence-break4"
+regex = '\p{sb=Close}'
+input = "\U0001F677"
+matches = [[0, 4]]
+
+[[tests]]
+name = "class-sentence-break5"
+regex = '\p{sb=SContinue}'
+input = "\uFF64"
+matches = [[0, 3]]