summaryrefslogtreecommitdiffstats
path: root/vendor/pest/src/unicode/mod.rs
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:19:03 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:19:03 +0000
commit64d98f8ee037282c35007b64c2649055c56af1db (patch)
tree5492bcf97fce41ee1c0b1cc2add283f3e66cdab0 /vendor/pest/src/unicode/mod.rs
parentAdding debian version 1.67.1+dfsg1-1. (diff)
downloadrustc-64d98f8ee037282c35007b64c2649055c56af1db.tar.xz
rustc-64d98f8ee037282c35007b64c2649055c56af1db.zip
Merging upstream version 1.68.2+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/pest/src/unicode/mod.rs')
-rw-r--r--vendor/pest/src/unicode/mod.rs259
1 files changed, 244 insertions, 15 deletions
diff --git a/vendor/pest/src/unicode/mod.rs b/vendor/pest/src/unicode/mod.rs
index 912a7ecee..8a56bd6e8 100644
--- a/vendor/pest/src/unicode/mod.rs
+++ b/vendor/pest/src/unicode/mod.rs
@@ -7,24 +7,51 @@
use alloc::boxed::Box;
+macro_rules! property_functions {
+ ($module:ident, $property_names:ident, [$(
+ $prop:ident,
+ )*]) => {
+ #[allow(unused)]
+ mod $module;
+ // unicode::ALPHABETIC('a')
+ $(pub fn $prop(c: char) -> bool {
+ self::$module::$prop.contains_char(c)
+ })*
+
+ pub static $property_names: &[&str] = &[
+ $(stringify!($prop),)*
+ ];
+ };
+}
+
macro_rules! char_property_functions {
+ // For define custom property names
{$(
mod $module:ident;
- [$(
+ static $property_names:ident = [$(
$prop:ident,
)*];
)*} => {$(
- #[allow(unused)]
- mod $module;
- $(pub fn $prop(c: char) -> bool {
- self::$module::$prop.contains_char(c)
- })*
+ property_functions!($module, $property_names, [$(
+ $prop,
+ )*]);
+ )*};
+ // For define property by copy BY_NAME values from `ucd-generate` generated.
+ {$(
+ mod $module:ident;
+ static $property_names:ident = [$(
+ ($_name:tt, $prop:ident),
+ )*];
+ )*} => {$(
+ property_functions!($module, $property_names, [$(
+ $prop,
+ )*]);
)*};
}
char_property_functions! {
mod binary;
- [
+ static BINARY_PROPERTY_NAMES = [
// ASCII_HEX_DIGIT, // let this one be stripped out -- the full trie is wasteful for ASCII
ALPHABETIC, BIDI_CONTROL, CASE_IGNORABLE, CASED, CHANGES_WHEN_CASEFOLDED,
CHANGES_WHEN_CASEMAPPED, CHANGES_WHEN_LOWERCASED, CHANGES_WHEN_TITLECASED,
@@ -38,19 +65,215 @@ char_property_functions! {
REGIONAL_INDICATOR, SENTENCE_TERMINAL, SOFT_DOTTED, TERMINAL_PUNCTUATION, UNIFIED_IDEOGRAPH,
UPPERCASE, VARIATION_SELECTOR, WHITE_SPACE, XID_CONTINUE, XID_START,
];
+}
+char_property_functions! {
mod category;
- [
- CASED_LETTER, CLOSE_PUNCTUATION, CONNECTOR_PUNCTUATION, CONTROL, CURRENCY_SYMBOL,
- DASH_PUNCTUATION, DECIMAL_NUMBER, ENCLOSING_MARK, FINAL_PUNCTUATION, FORMAT,
- INITIAL_PUNCTUATION, LETTER, LETTER_NUMBER, LINE_SEPARATOR, LOWERCASE_LETTER, MARK,
- MATH_SYMBOL, MODIFIER_LETTER, MODIFIER_SYMBOL, NONSPACING_MARK, NUMBER, OPEN_PUNCTUATION,
- OTHER, OTHER_LETTER, OTHER_NUMBER, OTHER_PUNCTUATION, OTHER_SYMBOL, PARAGRAPH_SEPARATOR,
- PRIVATE_USE, PUNCTUATION, SEPARATOR, SPACE_SEPARATOR, SPACING_MARK, SURROGATE, SYMBOL,
- TITLECASE_LETTER, UNASSIGNED, UPPERCASE_LETTER,
+ // Copy from category::BY_NAME
+ static CATEGORY_PROPERTY_NAMES = [
+ ("Cased_Letter", CASED_LETTER), ("Close_Punctuation", CLOSE_PUNCTUATION),
+ ("Connector_Punctuation", CONNECTOR_PUNCTUATION), ("Control", CONTROL),
+ ("Currency_Symbol", CURRENCY_SYMBOL),
+ ("Dash_Punctuation", DASH_PUNCTUATION), ("Decimal_Number", DECIMAL_NUMBER),
+ ("Enclosing_Mark", ENCLOSING_MARK),
+ ("Final_Punctuation", FINAL_PUNCTUATION), ("Format", FORMAT),
+ ("Initial_Punctuation", INITIAL_PUNCTUATION), ("Letter", LETTER),
+ ("Letter_Number", LETTER_NUMBER), ("Line_Separator", LINE_SEPARATOR),
+ ("Lowercase_Letter", LOWERCASE_LETTER), ("Mark", MARK),
+ ("Math_Symbol", MATH_SYMBOL), ("Modifier_Letter", MODIFIER_LETTER),
+ ("Modifier_Symbol", MODIFIER_SYMBOL), ("Nonspacing_Mark", NONSPACING_MARK),
+ ("Number", NUMBER), ("Open_Punctuation", OPEN_PUNCTUATION),
+ ("Other", OTHER), ("Other_Letter", OTHER_LETTER),
+ ("Other_Number", OTHER_NUMBER), ("Other_Punctuation", OTHER_PUNCTUATION),
+ ("Other_Symbol", OTHER_SYMBOL),
+ ("Paragraph_Separator", PARAGRAPH_SEPARATOR), ("Private_Use", PRIVATE_USE),
+ ("Punctuation", PUNCTUATION), ("Separator", SEPARATOR),
+ ("Space_Separator", SPACE_SEPARATOR), ("Spacing_Mark", SPACING_MARK),
+ ("Surrogate", SURROGATE), ("Symbol", SYMBOL),
+ ("Titlecase_Letter", TITLECASE_LETTER), ("Unassigned", UNASSIGNED),
+ ("Uppercase_Letter", UPPERCASE_LETTER),
+ ];
+
+ mod script;
+ // Copy from script::BY_NAME
+ static SCRIPT_PROPERTY_NAMES = [
+ ("Adlam", ADLAM),
+ ("Ahom", AHOM),
+ ("Anatolian_Hieroglyphs", ANATOLIAN_HIEROGLYPHS),
+ ("Arabic", ARABIC),
+ ("Armenian", ARMENIAN),
+ ("Avestan", AVESTAN),
+ ("Balinese", BALINESE),
+ ("Bamum", BAMUM),
+ ("Bassa_Vah", BASSA_VAH),
+ ("Batak", BATAK),
+ ("Bengali", BENGALI),
+ ("Bhaiksuki", BHAIKSUKI),
+ ("Bopomofo", BOPOMOFO),
+ ("Brahmi", BRAHMI),
+ ("Braille", BRAILLE),
+ ("Buginese", BUGINESE),
+ ("Buhid", BUHID),
+ ("Canadian_Aboriginal", CANADIAN_ABORIGINAL),
+ ("Carian", CARIAN),
+ ("Caucasian_Albanian", CAUCASIAN_ALBANIAN),
+ ("Chakma", CHAKMA),
+ ("Cham", CHAM),
+ ("Cherokee", CHEROKEE),
+ ("Chorasmian", CHORASMIAN),
+ ("Common", COMMON),
+ ("Coptic", COPTIC),
+ ("Cuneiform", CUNEIFORM),
+ ("Cypriot", CYPRIOT),
+ ("Cypro_Minoan", CYPRO_MINOAN),
+ ("Cyrillic", CYRILLIC),
+ ("Deseret", DESERET),
+ ("Devanagari", DEVANAGARI),
+ ("Dives_Akuru", DIVES_AKURU),
+ ("Dogra", DOGRA),
+ ("Duployan", DUPLOYAN),
+ ("Egyptian_Hieroglyphs", EGYPTIAN_HIEROGLYPHS),
+ ("Elbasan", ELBASAN),
+ ("Elymaic", ELYMAIC),
+ ("Ethiopic", ETHIOPIC),
+ ("Georgian", GEORGIAN),
+ ("Glagolitic", GLAGOLITIC),
+ ("Gothic", GOTHIC),
+ ("Grantha", GRANTHA),
+ ("Greek", GREEK),
+ ("Gujarati", GUJARATI),
+ ("Gunjala_Gondi", GUNJALA_GONDI),
+ ("Gurmukhi", GURMUKHI),
+ ("Han", HAN),
+ ("Hangul", HANGUL),
+ ("Hanifi_Rohingya", HANIFI_ROHINGYA),
+ ("Hanunoo", HANUNOO),
+ ("Hatran", HATRAN),
+ ("Hebrew", HEBREW),
+ ("Hiragana", HIRAGANA),
+ ("Imperial_Aramaic", IMPERIAL_ARAMAIC),
+ ("Inherited", INHERITED),
+ ("Inscriptional_Pahlavi", INSCRIPTIONAL_PAHLAVI),
+ ("Inscriptional_Parthian", INSCRIPTIONAL_PARTHIAN),
+ ("Javanese", JAVANESE),
+ ("Kaithi", KAITHI),
+ ("Kannada", KANNADA),
+ ("Katakana", KATAKANA),
+ ("Kawi", KAWI),
+ ("Kayah_Li", KAYAH_LI),
+ ("Kharoshthi", KHAROSHTHI),
+ ("Khitan_Small_Script", KHITAN_SMALL_SCRIPT),
+ ("Khmer", KHMER),
+ ("Khojki", KHOJKI),
+ ("Khudawadi", KHUDAWADI),
+ ("Lao", LAO),
+ ("Latin", LATIN),
+ ("Lepcha", LEPCHA),
+ ("Limbu", LIMBU),
+ ("Linear_A", LINEAR_A),
+ ("Linear_B", LINEAR_B),
+ ("Lisu", LISU),
+ ("Lycian", LYCIAN),
+ ("Lydian", LYDIAN),
+ ("Mahajani", MAHAJANI),
+ ("Makasar", MAKASAR),
+ ("Malayalam", MALAYALAM),
+ ("Mandaic", MANDAIC),
+ ("Manichaean", MANICHAEAN),
+ ("Marchen", MARCHEN),
+ ("Masaram_Gondi", MASARAM_GONDI),
+ ("Medefaidrin", MEDEFAIDRIN),
+ ("Meetei_Mayek", MEETEI_MAYEK),
+ ("Mende_Kikakui", MENDE_KIKAKUI),
+ ("Meroitic_Cursive", MEROITIC_CURSIVE),
+ ("Meroitic_Hieroglyphs", MEROITIC_HIEROGLYPHS),
+ ("Miao", MIAO),
+ ("Modi", MODI),
+ ("Mongolian", MONGOLIAN),
+ ("Mro", MRO),
+ ("Multani", MULTANI),
+ ("Myanmar", MYANMAR),
+ ("Nabataean", NABATAEAN),
+ ("Nag_Mundari", NAG_MUNDARI),
+ ("Nandinagari", NANDINAGARI),
+ ("New_Tai_Lue", NEW_TAI_LUE),
+ ("Newa", NEWA),
+ ("Nko", NKO),
+ ("Nushu", NUSHU),
+ ("Nyiakeng_Puachue_Hmong", NYIAKENG_PUACHUE_HMONG),
+ ("Ogham", OGHAM),
+ ("Ol_Chiki", OL_CHIKI),
+ ("Old_Hungarian", OLD_HUNGARIAN),
+ ("Old_Italic", OLD_ITALIC),
+ ("Old_North_Arabian", OLD_NORTH_ARABIAN),
+ ("Old_Permic", OLD_PERMIC),
+ ("Old_Persian", OLD_PERSIAN),
+ ("Old_Sogdian", OLD_SOGDIAN),
+ ("Old_South_Arabian", OLD_SOUTH_ARABIAN),
+ ("Old_Turkic", OLD_TURKIC),
+ ("Old_Uyghur", OLD_UYGHUR),
+ ("Oriya", ORIYA),
+ ("Osage", OSAGE),
+ ("Osmanya", OSMANYA),
+ ("Pahawh_Hmong", PAHAWH_HMONG),
+ ("Palmyrene", PALMYRENE),
+ ("Pau_Cin_Hau", PAU_CIN_HAU),
+ ("Phags_Pa", PHAGS_PA),
+ ("Phoenician", PHOENICIAN),
+ ("Psalter_Pahlavi", PSALTER_PAHLAVI),
+ ("Rejang", REJANG),
+ ("Runic", RUNIC),
+ ("Samaritan", SAMARITAN),
+ ("Saurashtra", SAURASHTRA),
+ ("Sharada", SHARADA),
+ ("Shavian", SHAVIAN),
+ ("Siddham", SIDDHAM),
+ ("SignWriting", SIGNWRITING),
+ ("Sinhala", SINHALA),
+ ("Sogdian", SOGDIAN),
+ ("Sora_Sompeng", SORA_SOMPENG),
+ ("Soyombo", SOYOMBO),
+ ("Sundanese", SUNDANESE),
+ ("Syloti_Nagri", SYLOTI_NAGRI),
+ ("Syriac", SYRIAC),
+ ("Tagalog", TAGALOG),
+ ("Tagbanwa", TAGBANWA),
+ ("Tai_Le", TAI_LE),
+ ("Tai_Tham", TAI_THAM),
+ ("Tai_Viet", TAI_VIET),
+ ("Takri", TAKRI),
+ ("Tamil", TAMIL),
+ ("Tangsa", TANGSA),
+ ("Tangut", TANGUT),
+ ("Telugu", TELUGU),
+ ("Thaana", THAANA),
+ ("Thai", THAI),
+ ("Tibetan", TIBETAN),
+ ("Tifinagh", TIFINAGH),
+ ("Tirhuta", TIRHUTA),
+ ("Toto", TOTO),
+ ("Ugaritic", UGARITIC),
+ ("Vai", VAI),
+ ("Vithkuqi", VITHKUQI),
+ ("Wancho", WANCHO),
+ ("Warang_Citi", WARANG_CITI),
+ ("Yezidi", YEZIDI),
+ ("Yi", YI),
+ ("Zanabazar_Square", ZANABAZAR_SQUARE),
];
}
+/// Return all available unicode property names
+pub fn unicode_property_names() -> Box<dyn Iterator<Item = &'static str>> {
+ Box::new(
+ BINARY_PROPERTY_NAMES
+ .iter()
+ .map(|name| *name)
+ .chain(CATEGORY_PROPERTY_NAMES.iter().map(|name| *name))
+ .chain(SCRIPT_PROPERTY_NAMES.iter().map(|name| *name)),
+ )
+}
+
pub fn by_name(name: &str) -> Option<Box<dyn Fn(char) -> bool>> {
for property in binary::BY_NAME {
if name == property.0.to_uppercase() {
@@ -64,5 +287,11 @@ pub fn by_name(name: &str) -> Option<Box<dyn Fn(char) -> bool>> {
}
}
+ for property in script::BY_NAME {
+ if name == property.0.to_uppercase() {
+ return Some(Box::new(move |c| property.1.contains_char(c)));
+ }
+ }
+
None
}