1
0
Fork 0
firefox/third_party/rust/icu_properties/src/sets.rs
Daniel Baumann 5e9a113729
Adding upstream version 140.0.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
2025-06-25 09:37:52 +02:00

2387 lines
83 KiB
Rust
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
//! The functions in this module return a [`CodePointSetData`] containing
//! the set of characters with a particular Unicode property.
//!
//! The descriptions of most properties are taken from [`TR44`], the documentation for the
//! Unicode Character Database. Some properties are instead defined in [`TR18`], the
//! documentation for Unicode regular expressions. In particular, Annex C of this document
//! defines properties for POSIX compatibility.
//!
//! [`CodePointSetData`]: crate::sets::CodePointSetData
//! [`TR44`]: https://www.unicode.org/reports/tr44
//! [`TR18`]: https://www.unicode.org/reports/tr18
use crate::error::PropertiesError;
use crate::provider::*;
use crate::*;
use core::iter::FromIterator;
use core::ops::RangeInclusive;
use icu_collections::codepointinvlist::CodePointInversionList;
use icu_collections::codepointinvliststringlist::CodePointInversionListAndStringList;
use icu_provider::prelude::*;
//
// CodePointSet* structs, impls, & macros
// (a set with only code points)
//
/// A wrapper around code point set data. It is returned by APIs that return Unicode
/// property data in a set-like form, ex: a set of code points sharing the same
/// value for a Unicode property. Access its data via the borrowed version,
/// [`CodePointSetDataBorrowed`].
#[derive(Debug)]
pub struct CodePointSetData {
data: DataPayload<ErasedSetlikeMarker>,
}
/// Private marker type for CodePointSetData
/// to work for all set properties at once
#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
pub(crate) struct ErasedSetlikeMarker;
impl DataMarker for ErasedSetlikeMarker {
type Yokeable = PropertyCodePointSetV1<'static>;
}
impl CodePointSetData {
/// Construct a borrowed version of this type that can be queried.
///
/// This owned version if returned by functions that use a runtime data provider.
#[inline]
pub fn as_borrowed(&self) -> CodePointSetDataBorrowed<'_> {
CodePointSetDataBorrowed {
set: self.data.get(),
}
}
/// Construct a new one from loaded data
///
/// Typically it is preferable to use getters like [`load_ascii_hex_digit()`] instead
pub fn from_data<M>(data: DataPayload<M>) -> Self
where
M: DataMarker<Yokeable = PropertyCodePointSetV1<'static>>,
{
Self { data: data.cast() }
}
/// Construct a new owned [`CodePointInversionList`]
pub fn from_code_point_inversion_list(set: CodePointInversionList<'static>) -> Self {
let set = PropertyCodePointSetV1::from_code_point_inversion_list(set);
CodePointSetData::from_data(DataPayload::<ErasedSetlikeMarker>::from_owned(set))
}
/// Convert this type to a [`CodePointInversionList`] as a borrowed value.
///
/// The data backing this is extensible and supports multiple implementations.
/// Currently it is always [`CodePointInversionList`]; however in the future more backends may be
/// added, and users may select which at data generation time.
///
/// This method returns an `Option` in order to return `None` when the backing data provider
/// cannot return a [`CodePointInversionList`], or cannot do so within the expected constant time
/// constraint.
pub fn as_code_point_inversion_list(&self) -> Option<&CodePointInversionList<'_>> {
self.data.get().as_code_point_inversion_list()
}
/// Convert this type to a [`CodePointInversionList`], borrowing if possible,
/// otherwise allocating a new [`CodePointInversionList`].
///
/// The data backing this is extensible and supports multiple implementations.
/// Currently it is always [`CodePointInversionList`]; however in the future more backends may be
/// added, and users may select which at data generation time.
///
/// The performance of the conversion to this specific return type will vary
/// depending on the data structure that is backing `self`.
pub fn to_code_point_inversion_list(&self) -> CodePointInversionList<'_> {
self.data.get().to_code_point_inversion_list()
}
}
/// A borrowed wrapper around code point set data, returned by
/// [`CodePointSetData::as_borrowed()`]. More efficient to query.
#[derive(Clone, Copy, Debug)]
pub struct CodePointSetDataBorrowed<'a> {
set: &'a PropertyCodePointSetV1<'a>,
}
impl CodePointSetDataBorrowed<'static> {
/// Cheaply converts a [`CodePointSetDataBorrowed<'static>`] into a [`CodePointSetData`].
///
/// Note: Due to branching and indirection, using [`CodePointSetData`] might inhibit some
/// compile-time optimizations that are possible with [`CodePointSetDataBorrowed`].
pub const fn static_to_owned(self) -> CodePointSetData {
CodePointSetData {
data: DataPayload::from_static_ref(self.set),
}
}
}
impl<'a> CodePointSetDataBorrowed<'a> {
/// Check if the set contains a character
///
/// ```rust
/// use icu::properties::sets;
///
/// let alphabetic = sets::alphabetic();
///
/// assert!(!alphabetic.contains('3'));
/// assert!(!alphabetic.contains('੩')); // U+0A69 GURMUKHI DIGIT THREE
/// assert!(alphabetic.contains('A'));
/// assert!(alphabetic.contains('Ä')); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
/// ```
#[inline]
pub fn contains(self, ch: char) -> bool {
self.set.contains(ch)
}
/// Check if the set contains a character as a UTF32 code unit
///
/// ```rust
/// use icu::properties::sets;
///
/// let alphabetic = sets::alphabetic();
///
/// assert!(!alphabetic.contains32(0x0A69)); // U+0A69 GURMUKHI DIGIT THREE
/// assert!(alphabetic.contains32(0x00C4)); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
/// ```
#[inline]
pub fn contains32(self, ch: u32) -> bool {
self.set.contains32(ch)
}
// Yields an [`Iterator`] returning the ranges of the code points that are
/// included in the [`CodePointSetData`]
///
/// Ranges are returned as [`RangeInclusive`], which is inclusive of its
/// `end` bound value. An end-inclusive behavior matches the ICU4C/J
/// behavior of ranges, ex: `UnicodeSet::contains(UChar32 start, UChar32 end)`.
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let alphabetic = sets::alphabetic();
/// let mut ranges = alphabetic.iter_ranges();
///
/// assert_eq!(Some(0x0041..=0x005A), ranges.next()); // 'A'..'Z'
/// assert_eq!(Some(0x0061..=0x007A), ranges.next()); // 'a'..'z'
/// ```
#[inline]
pub fn iter_ranges(self) -> impl Iterator<Item = RangeInclusive<u32>> + 'a {
self.set.iter_ranges()
}
// Yields an [`Iterator`] returning the ranges of the code points that are
/// *not* included in the [`CodePointSetData`]
///
/// Ranges are returned as [`RangeInclusive`], which is inclusive of its
/// `end` bound value. An end-inclusive behavior matches the ICU4C/J
/// behavior of ranges, ex: `UnicodeSet::contains(UChar32 start, UChar32 end)`.
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let alphabetic = sets::alphabetic();
/// let mut ranges = alphabetic.iter_ranges();
///
/// assert_eq!(Some(0x0041..=0x005A), ranges.next()); // 'A'..'Z'
/// assert_eq!(Some(0x0061..=0x007A), ranges.next()); // 'a'..'z'
/// ```
#[inline]
pub fn iter_ranges_complemented(self) -> impl Iterator<Item = RangeInclusive<u32>> + 'a {
self.set.iter_ranges_complemented()
}
}
//
// UnicodeSet* structs, impls, & macros
// (a set with code points + strings)
//
/// A wrapper around `UnicodeSet` data (characters and strings)
#[derive(Debug)]
pub struct UnicodeSetData {
data: DataPayload<ErasedUnicodeSetlikeMarker>,
}
#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
pub(crate) struct ErasedUnicodeSetlikeMarker;
impl DataMarker for ErasedUnicodeSetlikeMarker {
type Yokeable = PropertyUnicodeSetV1<'static>;
}
impl UnicodeSetData {
/// Construct a borrowed version of this type that can be queried.
///
/// This avoids a potential small underlying cost per API call (ex: `contains()`) by consolidating it
/// up front.
#[inline]
pub fn as_borrowed(&self) -> UnicodeSetDataBorrowed<'_> {
UnicodeSetDataBorrowed {
set: self.data.get(),
}
}
/// Construct a new one from loaded data
///
/// Typically it is preferable to use getters instead
pub fn from_data<M>(data: DataPayload<M>) -> Self
where
M: DataMarker<Yokeable = PropertyUnicodeSetV1<'static>>,
{
Self { data: data.cast() }
}
/// Construct a new owned [`CodePointInversionListAndStringList`]
pub fn from_code_point_inversion_list_string_list(
set: CodePointInversionListAndStringList<'static>,
) -> Self {
let set = PropertyUnicodeSetV1::from_code_point_inversion_list_string_list(set);
UnicodeSetData::from_data(DataPayload::<ErasedUnicodeSetlikeMarker>::from_owned(set))
}
/// Convert this type to a [`CodePointInversionListAndStringList`] as a borrowed value.
///
/// The data backing this is extensible and supports multiple implementations.
/// Currently it is always [`CodePointInversionListAndStringList`]; however in the future more backends may be
/// added, and users may select which at data generation time.
///
/// This method returns an `Option` in order to return `None` when the backing data provider
/// cannot return a [`CodePointInversionListAndStringList`], or cannot do so within the expected constant time
/// constraint.
pub fn as_code_point_inversion_list_string_list(
&self,
) -> Option<&CodePointInversionListAndStringList<'_>> {
self.data.get().as_code_point_inversion_list_string_list()
}
/// Convert this type to a [`CodePointInversionListAndStringList`], borrowing if possible,
/// otherwise allocating a new [`CodePointInversionListAndStringList`].
///
/// The data backing this is extensible and supports multiple implementations.
/// Currently it is always [`CodePointInversionListAndStringList`]; however in the future more backends may be
/// added, and users may select which at data generation time.
///
/// The performance of the conversion to this specific return type will vary
/// depending on the data structure that is backing `self`.
pub fn to_code_point_inversion_list_string_list(
&self,
) -> CodePointInversionListAndStringList<'_> {
self.data.get().to_code_point_inversion_list_string_list()
}
}
/// A borrowed wrapper around code point set data, returned by
/// [`UnicodeSetData::as_borrowed()`]. More efficient to query.
#[derive(Clone, Copy, Debug)]
pub struct UnicodeSetDataBorrowed<'a> {
set: &'a PropertyUnicodeSetV1<'a>,
}
impl<'a> UnicodeSetDataBorrowed<'a> {
/// Check if the set contains the string. Strings consisting of one character
/// are treated as a character/code point.
///
/// This matches ICU behavior for ICU's `UnicodeSet`.
#[inline]
pub fn contains(self, s: &str) -> bool {
self.set.contains(s)
}
/// Check if the set contains a character as a UTF32 code unit
#[inline]
pub fn contains32(&self, cp: u32) -> bool {
self.set.contains32(cp)
}
/// Check if the set contains the code point corresponding to the Rust character.
#[inline]
pub fn contains_char(&self, ch: char) -> bool {
self.set.contains_char(ch)
}
}
impl UnicodeSetDataBorrowed<'static> {
/// Cheaply converts a [`UnicodeSetDataBorrowed<'static>`] into a [`UnicodeSetData`].
///
/// Note: Due to branching and indirection, using [`UnicodeSetData`] might inhibit some
/// compile-time optimizations that are possible with [`UnicodeSetDataBorrowed`].
pub const fn static_to_owned(self) -> UnicodeSetData {
UnicodeSetData {
data: DataPayload::from_static_ref(self.set),
}
}
}
pub(crate) fn load_set_data<M, P>(provider: &P) -> Result<CodePointSetData, PropertiesError>
where
M: KeyedDataMarker<Yokeable = PropertyCodePointSetV1<'static>>,
P: DataProvider<M> + ?Sized,
{
Ok(provider
.load(Default::default())
.and_then(DataResponse::take_payload)
.map(CodePointSetData::from_data)?)
}
//
// Binary property getter fns
// (data as code point sets)
//
macro_rules! make_code_point_set_property {
(
// currently unused
property: $property:expr;
// currently unused
marker: $marker_name:ident;
keyed_data_marker: $keyed_data_marker:ty;
func:
$(#[$doc:meta])+
$cvis:vis const fn $constname:ident() => $singleton_name:ident;
$vis:vis fn $funcname:ident();
) => {
#[doc = concat!("A version of [`", stringify!($constname), "()`] that uses custom data provided by a [`DataProvider`].")]
///
/// Note that this will return an owned version of the data. Functionality is available on
/// the borrowed version, accessible through [`CodePointSetData::as_borrowed`].
$vis fn $funcname(
provider: &(impl DataProvider<$keyed_data_marker> + ?Sized)
) -> Result<CodePointSetData, PropertiesError> {
load_set_data(provider)
}
$(#[$doc])*
#[cfg(feature = "compiled_data")]
$cvis const fn $constname() -> CodePointSetDataBorrowed<'static> {
CodePointSetDataBorrowed {
set: crate::provider::Baked::$singleton_name,
}
}
}
}
make_code_point_set_property! {
property: "ASCII_Hex_Digit";
marker: AsciiHexDigitProperty;
keyed_data_marker: AsciiHexDigitV1Marker;
func:
/// ASCII characters commonly used for the representation of hexadecimal numbers
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let ascii_hex_digit = sets::ascii_hex_digit();
///
/// assert!(ascii_hex_digit.contains('3'));
/// assert!(!ascii_hex_digit.contains('੩')); // U+0A69 GURMUKHI DIGIT THREE
/// assert!(ascii_hex_digit.contains('A'));
/// assert!(!ascii_hex_digit.contains('Ä')); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
/// ```
pub const fn ascii_hex_digit() => SINGLETON_PROPS_AHEX_V1;
pub fn load_ascii_hex_digit();
}
make_code_point_set_property! {
property: "Alnum";
marker: AlnumProperty;
keyed_data_marker: AlnumV1Marker;
func:
/// Characters with the Alphabetic or Decimal_Number property
/// This is defined for POSIX compatibility.
pub const fn alnum() => SINGLETON_PROPS_ALNUM_V1;
pub fn load_alnum();
}
make_code_point_set_property! {
property: "Alphabetic";
marker: AlphabeticProperty;
keyed_data_marker: AlphabeticV1Marker;
func:
/// Alphabetic characters
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let alphabetic = sets::alphabetic();
///
/// assert!(!alphabetic.contains('3'));
/// assert!(!alphabetic.contains('੩')); // U+0A69 GURMUKHI DIGIT THREE
/// assert!(alphabetic.contains('A'));
/// assert!(alphabetic.contains('Ä')); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
/// ```
pub const fn alphabetic() => SINGLETON_PROPS_ALPHA_V1;
pub fn load_alphabetic();
}
make_code_point_set_property! {
property: "Bidi_Control";
marker: BidiControlProperty;
keyed_data_marker: BidiControlV1Marker;
func:
/// Format control characters which have specific functions in the Unicode Bidirectional
/// Algorithm
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let bidi_control = sets::bidi_control();
///
/// assert!(bidi_control.contains32(0x200F)); // RIGHT-TO-LEFT MARK
/// assert!(!bidi_control.contains('ش')); // U+0634 ARABIC LETTER SHEEN
/// ```
pub const fn bidi_control() => SINGLETON_PROPS_BIDI_C_V1;
pub fn load_bidi_control();
}
make_code_point_set_property! {
property: "Bidi_Mirrored";
marker: BidiMirroredProperty;
keyed_data_marker: BidiMirroredV1Marker;
func:
/// Characters that are mirrored in bidirectional text
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let bidi_mirrored = sets::bidi_mirrored();
///
/// assert!(bidi_mirrored.contains('['));
/// assert!(bidi_mirrored.contains(']'));
/// assert!(bidi_mirrored.contains('∑')); // U+2211 N-ARY SUMMATION
/// assert!(!bidi_mirrored.contains('ཉ')); // U+0F49 TIBETAN LETTER NYA
/// ```
pub const fn bidi_mirrored() => SINGLETON_PROPS_BIDI_M_V1;
pub fn load_bidi_mirrored();
}
make_code_point_set_property! {
property: "Blank";
marker: BlankProperty;
keyed_data_marker: BlankV1Marker;
func:
/// Horizontal whitespace characters
pub const fn blank() => SINGLETON_PROPS_BLANK_V1;
pub fn load_blank();
}
make_code_point_set_property! {
property: "Cased";
marker: CasedProperty;
keyed_data_marker: CasedV1Marker;
func:
/// Uppercase, lowercase, and titlecase characters
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let cased = sets::cased();
///
/// assert!(cased.contains('Ꙡ')); // U+A660 CYRILLIC CAPITAL LETTER REVERSED TSE
/// assert!(!cased.contains('ދ')); // U+078B THAANA LETTER DHAALU
/// ```
pub const fn cased() => SINGLETON_PROPS_CASED_V1;
pub fn load_cased();
}
make_code_point_set_property! {
property: "Case_Ignorable";
marker: CaseIgnorableProperty;
keyed_data_marker: CaseIgnorableV1Marker;
func:
/// Characters which are ignored for casing purposes
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let case_ignorable = sets::case_ignorable();
///
/// assert!(case_ignorable.contains(':'));
/// assert!(!case_ignorable.contains('λ')); // U+03BB GREEK SMALL LETTER LAMDA
/// ```
pub const fn case_ignorable() => SINGLETON_PROPS_CI_V1;
pub fn load_case_ignorable();
}
make_code_point_set_property! {
property: "Full_Composition_Exclusion";
marker: FullCompositionExclusionProperty;
keyed_data_marker: FullCompositionExclusionV1Marker;
func:
/// Characters that are excluded from composition
/// See <https://unicode.org/Public/UNIDATA/CompositionExclusions.txt>
pub const fn full_composition_exclusion() => SINGLETON_PROPS_COMP_EX_V1;
pub fn load_full_composition_exclusion();
}
make_code_point_set_property! {
property: "Changes_When_Casefolded";
marker: ChangesWhenCasefoldedProperty;
keyed_data_marker: ChangesWhenCasefoldedV1Marker;
func:
/// Characters whose normalized forms are not stable under case folding
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let changes_when_casefolded = sets::changes_when_casefolded();
///
/// assert!(changes_when_casefolded.contains('ß')); // U+00DF LATIN SMALL LETTER SHARP S
/// assert!(!changes_when_casefolded.contains('ᜉ')); // U+1709 TAGALOG LETTER PA
/// ```
pub const fn changes_when_casefolded() => SINGLETON_PROPS_CWCF_V1;
pub fn load_changes_when_casefolded();
}
make_code_point_set_property! {
property: "Changes_When_Casemapped";
marker: ChangesWhenCasemappedProperty;
keyed_data_marker: ChangesWhenCasemappedV1Marker;
func:
/// Characters which may change when they undergo case mapping
pub const fn changes_when_casemapped() => SINGLETON_PROPS_CWCM_V1;
pub fn load_changes_when_casemapped();
}
make_code_point_set_property! {
property: "Changes_When_NFKC_Casefolded";
marker: ChangesWhenNfkcCasefoldedProperty;
keyed_data_marker: ChangesWhenNfkcCasefoldedV1Marker;
func:
/// Characters which are not identical to their NFKC_Casefold mapping
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let changes_when_nfkc_casefolded = sets::changes_when_nfkc_casefolded();
///
/// assert!(changes_when_nfkc_casefolded.contains('🄵')); // U+1F135 SQUARED LATIN CAPITAL LETTER F
/// assert!(!changes_when_nfkc_casefolded.contains('f'));
/// ```
pub const fn changes_when_nfkc_casefolded() => SINGLETON_PROPS_CWKCF_V1;
pub fn load_changes_when_nfkc_casefolded();
}
make_code_point_set_property! {
property: "Changes_When_Lowercased";
marker: ChangesWhenLowercasedProperty;
keyed_data_marker: ChangesWhenLowercasedV1Marker;
func:
/// Characters whose normalized forms are not stable under a toLowercase mapping
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let changes_when_lowercased = sets::changes_when_lowercased();
///
/// assert!(changes_when_lowercased.contains('Ⴔ')); // U+10B4 GEORGIAN CAPITAL LETTER PHAR
/// assert!(!changes_when_lowercased.contains('ფ')); // U+10E4 GEORGIAN LETTER PHAR
/// ```
pub const fn changes_when_lowercased() => SINGLETON_PROPS_CWL_V1;
pub fn load_changes_when_lowercased();
}
make_code_point_set_property! {
property: "Changes_When_Titlecased";
marker: ChangesWhenTitlecasedProperty;
keyed_data_marker: ChangesWhenTitlecasedV1Marker;
func:
/// Characters whose normalized forms are not stable under a toTitlecase mapping
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let changes_when_titlecased = sets::changes_when_titlecased();
///
/// assert!(changes_when_titlecased.contains('æ')); // U+00E6 LATIN SMALL LETTER AE
/// assert!(!changes_when_titlecased.contains('Æ')); // U+00E6 LATIN CAPITAL LETTER AE
/// ```
pub const fn changes_when_titlecased() => SINGLETON_PROPS_CWT_V1;
pub fn load_changes_when_titlecased();
}
make_code_point_set_property! {
property: "Changes_When_Uppercased";
marker: ChangesWhenUppercasedProperty;
keyed_data_marker: ChangesWhenUppercasedV1Marker;
func:
/// Characters whose normalized forms are not stable under a toUppercase mapping
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let changes_when_uppercased = sets::changes_when_uppercased();
///
/// assert!(changes_when_uppercased.contains('ւ')); // U+0582 ARMENIAN SMALL LETTER YIWN
/// assert!(!changes_when_uppercased.contains('Ւ')); // U+0552 ARMENIAN CAPITAL LETTER YIWN
/// ```
pub const fn changes_when_uppercased() => SINGLETON_PROPS_CWU_V1;
pub fn load_changes_when_uppercased();
}
make_code_point_set_property! {
property: "Dash";
marker: DashProperty;
keyed_data_marker: DashV1Marker;
func:
/// Punctuation characters explicitly called out as dashes in the Unicode Standard, plus
/// their compatibility equivalents
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let dash = sets::dash();
///
/// assert!(dash.contains('⸺')); // U+2E3A TWO-EM DASH
/// assert!(dash.contains('-')); // U+002D
/// assert!(!dash.contains('=')); // U+003D
/// ```
pub const fn dash() => SINGLETON_PROPS_DASH_V1;
pub fn load_dash();
}
make_code_point_set_property! {
property: "Deprecated";
marker: DeprecatedProperty;
keyed_data_marker: DeprecatedV1Marker;
func:
/// Deprecated characters. No characters will ever be removed from the standard, but the
/// usage of deprecated characters is strongly discouraged.
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let deprecated = sets::deprecated();
///
/// assert!(deprecated.contains('ឣ')); // U+17A3 KHMER INDEPENDENT VOWEL QAQ
/// assert!(!deprecated.contains('A'));
/// ```
pub const fn deprecated() => SINGLETON_PROPS_DEP_V1;
pub fn load_deprecated();
}
make_code_point_set_property! {
property: "Default_Ignorable_Code_Point";
marker: DefaultIgnorableCodePointProperty;
keyed_data_marker: DefaultIgnorableCodePointV1Marker;
func:
/// For programmatic determination of default ignorable code points. New characters that
/// should be ignored in rendering (unless explicitly supported) will be assigned in these
/// ranges, permitting programs to correctly handle the default rendering of such
/// characters when not otherwise supported.
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let default_ignorable_code_point = sets::default_ignorable_code_point();
///
/// assert!(default_ignorable_code_point.contains32(0x180B)); // MONGOLIAN FREE VARIATION SELECTOR ONE
/// assert!(!default_ignorable_code_point.contains('E'));
/// ```
pub const fn default_ignorable_code_point() => SINGLETON_PROPS_DI_V1;
pub fn load_default_ignorable_code_point();
}
make_code_point_set_property! {
property: "Diacritic";
marker: DiacriticProperty;
keyed_data_marker: DiacriticV1Marker;
func:
/// Characters that linguistically modify the meaning of another character to which they apply
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let diacritic = sets::diacritic();
///
/// assert!(diacritic.contains('\u{05B3}')); // HEBREW POINT HATAF QAMATS
/// assert!(!diacritic.contains('א')); // U+05D0 HEBREW LETTER ALEF
/// ```
pub const fn diacritic() => SINGLETON_PROPS_DIA_V1;
pub fn load_diacritic();
}
make_code_point_set_property! {
property: "Emoji_Modifier_Base";
marker: EmojiModifierBaseProperty;
keyed_data_marker: EmojiModifierBaseV1Marker;
func:
/// Characters that can serve as a base for emoji modifiers
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let emoji_modifier_base = sets::emoji_modifier_base();
///
/// assert!(emoji_modifier_base.contains('✊')); // U+270A RAISED FIST
/// assert!(!emoji_modifier_base.contains('⛰')); // U+26F0 MOUNTAIN
/// ```
pub const fn emoji_modifier_base() => SINGLETON_PROPS_EBASE_V1;
pub fn load_emoji_modifier_base();
}
make_code_point_set_property! {
property: "Emoji_Component";
marker: EmojiComponentProperty;
keyed_data_marker: EmojiComponentV1Marker;
func:
/// Characters used in emoji sequences that normally do not appear on emoji keyboards as
/// separate choices, such as base characters for emoji keycaps
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let emoji_component = sets::emoji_component();
///
/// assert!(emoji_component.contains('🇹')); // U+1F1F9 REGIONAL INDICATOR SYMBOL LETTER T
/// assert!(emoji_component.contains32(0x20E3)); // COMBINING ENCLOSING KEYCAP
/// assert!(emoji_component.contains('7'));
/// assert!(!emoji_component.contains('T'));
/// ```
pub const fn emoji_component() => SINGLETON_PROPS_ECOMP_V1;
pub fn load_emoji_component();
}
make_code_point_set_property! {
property: "Emoji_Modifier";
marker: EmojiModifierProperty;
keyed_data_marker: EmojiModifierV1Marker;
func:
/// Characters that are emoji modifiers
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let emoji_modifier = sets::emoji_modifier();
///
/// assert!(emoji_modifier.contains32(0x1F3FD)); // EMOJI MODIFIER FITZPATRICK TYPE-4
/// assert!(!emoji_modifier.contains32(0x200C)); // ZERO WIDTH NON-JOINER
/// ```
pub const fn emoji_modifier() => SINGLETON_PROPS_EMOD_V1;
pub fn load_emoji_modifier();
}
make_code_point_set_property! {
property: "Emoji";
marker: EmojiProperty;
keyed_data_marker: EmojiV1Marker;
func:
/// Characters that are emoji
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let emoji = sets::emoji();
///
/// assert!(emoji.contains('🔥')); // U+1F525 FIRE
/// assert!(!emoji.contains('V'));
/// ```
pub const fn emoji() => SINGLETON_PROPS_EMOJI_V1;
pub fn load_emoji();
}
make_code_point_set_property! {
property: "Emoji_Presentation";
marker: EmojiPresentationProperty;
keyed_data_marker: EmojiPresentationV1Marker;
func:
/// Characters that have emoji presentation by default
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let emoji_presentation = sets::emoji_presentation();
///
/// assert!(emoji_presentation.contains('🦬')); // U+1F9AC BISON
/// assert!(!emoji_presentation.contains('♻')); // U+267B BLACK UNIVERSAL RECYCLING SYMBOL
/// ```
pub const fn emoji_presentation() => SINGLETON_PROPS_EPRES_V1;
pub fn load_emoji_presentation();
}
make_code_point_set_property! {
property: "Extender";
marker: ExtenderProperty;
keyed_data_marker: ExtenderV1Marker;
func:
/// Characters whose principal function is to extend the value of a preceding alphabetic
/// character or to extend the shape of adjacent characters.
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let extender = sets::extender();
///
/// assert!(extender.contains('ヾ')); // U+30FE KATAKANA VOICED ITERATION MARK
/// assert!(extender.contains('ー')); // U+30FC KATAKANA-HIRAGANA PROLONGED SOUND MARK
/// assert!(!extender.contains('・')); // U+30FB KATAKANA MIDDLE DOT
/// ```
pub const fn extender() => SINGLETON_PROPS_EXT_V1;
pub fn load_extender();
}
make_code_point_set_property! {
property: "Extended_Pictographic";
marker: ExtendedPictographicProperty;
keyed_data_marker: ExtendedPictographicV1Marker;
func:
/// Pictographic symbols, as well as reserved ranges in blocks largely associated with
/// emoji characters
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let extended_pictographic = sets::extended_pictographic();
///
/// assert!(extended_pictographic.contains('🥳')); // U+1F973 FACE WITH PARTY HORN AND PARTY HAT
/// assert!(!extended_pictographic.contains('🇪')); // U+1F1EA REGIONAL INDICATOR SYMBOL LETTER E
/// ```
pub const fn extended_pictographic() => SINGLETON_PROPS_EXTPICT_V1;
pub fn load_extended_pictographic();
}
make_code_point_set_property! {
property: "Graph";
marker: GraphProperty;
keyed_data_marker: GraphV1Marker;
func:
/// Visible characters.
/// This is defined for POSIX compatibility.
pub const fn graph() => SINGLETON_PROPS_GRAPH_V1;
pub fn load_graph();
}
make_code_point_set_property! {
property: "Grapheme_Base";
marker: GraphemeBaseProperty;
keyed_data_marker: GraphemeBaseV1Marker;
func:
/// Property used together with the definition of Standard Korean Syllable Block to define
/// "Grapheme base". See D58 in Chapter 3, Conformance in the Unicode Standard.
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let grapheme_base = sets::grapheme_base();
///
/// assert!(grapheme_base.contains('ക')); // U+0D15 MALAYALAM LETTER KA
/// assert!(grapheme_base.contains('\u{0D3F}')); // U+0D3F MALAYALAM VOWEL SIGN I
/// assert!(!grapheme_base.contains('\u{0D3E}')); // U+0D3E MALAYALAM VOWEL SIGN AA
/// ```
pub const fn grapheme_base() => SINGLETON_PROPS_GR_BASE_V1;
pub fn load_grapheme_base();
}
make_code_point_set_property! {
property: "Grapheme_Extend";
marker: GraphemeExtendProperty;
keyed_data_marker: GraphemeExtendV1Marker;
func:
/// Property used to define "Grapheme extender". See D59 in Chapter 3, Conformance in the
/// Unicode Standard.
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let grapheme_extend = sets::grapheme_extend();
///
/// assert!(!grapheme_extend.contains('ക')); // U+0D15 MALAYALAM LETTER KA
/// assert!(!grapheme_extend.contains('\u{0D3F}')); // U+0D3F MALAYALAM VOWEL SIGN I
/// assert!(grapheme_extend.contains('\u{0D3E}')); // U+0D3E MALAYALAM VOWEL SIGN AA
/// ```
pub const fn grapheme_extend() => SINGLETON_PROPS_GR_EXT_V1;
pub fn load_grapheme_extend();
}
make_code_point_set_property! {
property: "Grapheme_Link";
marker: GraphemeLinkProperty;
keyed_data_marker: GraphemeLinkV1Marker;
func:
/// Deprecated property. Formerly proposed for programmatic determination of grapheme
/// cluster boundaries.
pub const fn grapheme_link() => SINGLETON_PROPS_GR_LINK_V1;
pub fn load_grapheme_link();
}
make_code_point_set_property! {
property: "Hex_Digit";
marker: HexDigitProperty;
keyed_data_marker: HexDigitV1Marker;
func:
/// Characters commonly used for the representation of hexadecimal numbers, plus their
/// compatibility equivalents
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let hex_digit = sets::hex_digit();
///
/// assert!(hex_digit.contains('0'));
/// assert!(!hex_digit.contains('੩')); // U+0A69 GURMUKHI DIGIT THREE
/// assert!(hex_digit.contains('f'));
/// assert!(hex_digit.contains('')); // U+FF46 FULLWIDTH LATIN SMALL LETTER F
/// assert!(hex_digit.contains('')); // U+FF26 FULLWIDTH LATIN CAPITAL LETTER F
/// assert!(!hex_digit.contains('Ä')); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
/// ```
pub const fn hex_digit() => SINGLETON_PROPS_HEX_V1;
pub fn load_hex_digit();
}
make_code_point_set_property! {
property: "Hyphen";
marker: HyphenProperty;
keyed_data_marker: HyphenV1Marker;
func:
/// Deprecated property. Dashes which are used to mark connections between pieces of
/// words, plus the Katakana middle dot.
pub const fn hyphen() => SINGLETON_PROPS_HYPHEN_V1;
pub fn load_hyphen();
}
make_code_point_set_property! {
property: "Id_Continue";
marker: IdContinueProperty;
keyed_data_marker: IdContinueV1Marker;
func:
/// Characters that can come after the first character in an identifier. If using NFKC to
/// fold differences between characters, use [`load_xid_continue`] instead. See
/// [`Unicode Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for
/// more details.
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let id_continue = sets::id_continue();
///
/// assert!(id_continue.contains('x'));
/// assert!(id_continue.contains('1'));
/// assert!(id_continue.contains('_'));
/// assert!(id_continue.contains('ߝ')); // U+07DD NKO LETTER FA
/// assert!(!id_continue.contains('ⓧ')); // U+24E7 CIRCLED LATIN SMALL LETTER X
/// assert!(id_continue.contains32(0xFC5E)); // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
/// ```
pub const fn id_continue() => SINGLETON_PROPS_IDC_V1;
pub fn load_id_continue();
}
make_code_point_set_property! {
property: "Ideographic";
marker: IdeographicProperty;
keyed_data_marker: IdeographicV1Marker;
func:
/// Characters considered to be CJKV (Chinese, Japanese, Korean, and Vietnamese)
/// ideographs, or related siniform ideographs
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let ideographic = sets::ideographic();
///
/// assert!(ideographic.contains('川')); // U+5DDD CJK UNIFIED IDEOGRAPH-5DDD
/// assert!(!ideographic.contains('밥')); // U+BC25 HANGUL SYLLABLE BAB
/// ```
pub const fn ideographic() => SINGLETON_PROPS_IDEO_V1;
pub fn load_ideographic();
}
make_code_point_set_property! {
property: "Id_Start";
marker: IdStartProperty;
keyed_data_marker: IdStartV1Marker;
func:
/// Characters that can begin an identifier. If using NFKC to fold differences between
/// characters, use [`load_xid_start`] instead. See [`Unicode Standard Annex
/// #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more details.
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let id_start = sets::id_start();
///
/// assert!(id_start.contains('x'));
/// assert!(!id_start.contains('1'));
/// assert!(!id_start.contains('_'));
/// assert!(id_start.contains('ߝ')); // U+07DD NKO LETTER FA
/// assert!(!id_start.contains('ⓧ')); // U+24E7 CIRCLED LATIN SMALL LETTER X
/// assert!(id_start.contains32(0xFC5E)); // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
/// ```
pub const fn id_start() => SINGLETON_PROPS_IDS_V1;
pub fn load_id_start();
}
make_code_point_set_property! {
property: "Ids_Binary_Operator";
marker: IdsBinaryOperatorProperty;
keyed_data_marker: IdsBinaryOperatorV1Marker;
func:
/// Characters used in Ideographic Description Sequences
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let ids_binary_operator = sets::ids_binary_operator();
///
/// assert!(ids_binary_operator.contains32(0x2FF5)); // IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM ABOVE
/// assert!(!ids_binary_operator.contains32(0x3006)); // IDEOGRAPHIC CLOSING MARK
/// ```
pub const fn ids_binary_operator() => SINGLETON_PROPS_IDSB_V1;
pub fn load_ids_binary_operator();
}
make_code_point_set_property! {
property: "Ids_Trinary_Operator";
marker: IdsTrinaryOperatorProperty;
keyed_data_marker: IdsTrinaryOperatorV1Marker;
func:
/// Characters used in Ideographic Description Sequences
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let ids_trinary_operator = sets::ids_trinary_operator();
///
/// assert!(ids_trinary_operator.contains32(0x2FF2)); // IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO MIDDLE AND RIGHT
/// assert!(ids_trinary_operator.contains32(0x2FF3)); // IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO MIDDLE AND BELOW
/// assert!(!ids_trinary_operator.contains32(0x2FF4));
/// assert!(!ids_trinary_operator.contains32(0x2FF5)); // IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM ABOVE
/// assert!(!ids_trinary_operator.contains32(0x3006)); // IDEOGRAPHIC CLOSING MARK
/// ```
pub const fn ids_trinary_operator() => SINGLETON_PROPS_IDST_V1;
pub fn load_ids_trinary_operator();
}
make_code_point_set_property! {
property: "Join_Control";
marker: JoinControlProperty;
keyed_data_marker: JoinControlV1Marker;
func:
/// Format control characters which have specific functions for control of cursive joining
/// and ligation
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let join_control = sets::join_control();
///
/// assert!(join_control.contains32(0x200C)); // ZERO WIDTH NON-JOINER
/// assert!(join_control.contains32(0x200D)); // ZERO WIDTH JOINER
/// assert!(!join_control.contains32(0x200E));
/// ```
pub const fn join_control() => SINGLETON_PROPS_JOIN_C_V1;
pub fn load_join_control();
}
make_code_point_set_property! {
property: "Logical_Order_Exception";
marker: LogicalOrderExceptionProperty;
keyed_data_marker: LogicalOrderExceptionV1Marker;
func:
/// A small number of spacing vowel letters occurring in certain Southeast Asian scripts such as Thai and Lao
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let logical_order_exception = sets::logical_order_exception();
///
/// assert!(logical_order_exception.contains('ແ')); // U+0EC1 LAO VOWEL SIGN EI
/// assert!(!logical_order_exception.contains('ະ')); // U+0EB0 LAO VOWEL SIGN A
/// ```
pub const fn logical_order_exception() => SINGLETON_PROPS_LOE_V1;
pub fn load_logical_order_exception();
}
make_code_point_set_property! {
property: "Lowercase";
marker: LowercaseProperty;
keyed_data_marker: LowercaseV1Marker;
func:
/// Lowercase characters
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let lowercase = sets::lowercase();
///
/// assert!(lowercase.contains('a'));
/// assert!(!lowercase.contains('A'));
/// ```
pub const fn lowercase() => SINGLETON_PROPS_LOWER_V1;
pub fn load_lowercase();
}
make_code_point_set_property! {
property: "Math";
marker: MathProperty;
keyed_data_marker: MathV1Marker;
func:
/// Characters used in mathematical notation
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let math = sets::math();
///
/// assert!(math.contains('='));
/// assert!(math.contains('+'));
/// assert!(!math.contains('-'));
/// assert!(math.contains('')); // U+2212 MINUS SIGN
/// assert!(!math.contains('/'));
/// assert!(math.contains('')); // U+2215 DIVISION SLASH
/// ```
pub const fn math() => SINGLETON_PROPS_MATH_V1;
pub fn load_math();
}
make_code_point_set_property! {
property: "Noncharacter_Code_Point";
marker: NoncharacterCodePointProperty;
keyed_data_marker: NoncharacterCodePointV1Marker;
func:
/// Code points permanently reserved for internal use
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let noncharacter_code_point = sets::noncharacter_code_point();
///
/// assert!(noncharacter_code_point.contains32(0xFDD0));
/// assert!(noncharacter_code_point.contains32(0xFFFF));
/// assert!(!noncharacter_code_point.contains32(0x10000));
/// ```
pub const fn noncharacter_code_point() => SINGLETON_PROPS_NCHAR_V1;
pub fn load_noncharacter_code_point();
}
make_code_point_set_property! {
property: "NFC_Inert";
marker: NfcInertProperty;
keyed_data_marker: NfcInertV1Marker;
func:
/// Characters that are inert under NFC, i.e., they do not interact with adjacent characters
pub const fn nfc_inert() => SINGLETON_PROPS_NFCINERT_V1;
pub fn load_nfc_inert();
}
make_code_point_set_property! {
property: "NFD_Inert";
marker: NfdInertProperty;
keyed_data_marker: NfdInertV1Marker;
func:
/// Characters that are inert under NFD, i.e., they do not interact with adjacent characters
pub const fn nfd_inert() => SINGLETON_PROPS_NFDINERT_V1;
pub fn load_nfd_inert();
}
make_code_point_set_property! {
property: "NFKC_Inert";
marker: NfkcInertProperty;
keyed_data_marker: NfkcInertV1Marker;
func:
/// Characters that are inert under NFKC, i.e., they do not interact with adjacent characters
pub const fn nfkc_inert() => SINGLETON_PROPS_NFKCINERT_V1;
pub fn load_nfkc_inert();
}
make_code_point_set_property! {
property: "NFKD_Inert";
marker: NfkdInertProperty;
keyed_data_marker: NfkdInertV1Marker;
func:
/// Characters that are inert under NFKD, i.e., they do not interact with adjacent characters
pub const fn nfkd_inert() => SINGLETON_PROPS_NFKDINERT_V1;
pub fn load_nfkd_inert();
}
make_code_point_set_property! {
property: "Pattern_Syntax";
marker: PatternSyntaxProperty;
keyed_data_marker: PatternSyntaxV1Marker;
func:
/// Characters used as syntax in patterns (such as regular expressions). See [`Unicode
/// Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more
/// details.
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let pattern_syntax = sets::pattern_syntax();
///
/// assert!(pattern_syntax.contains('{'));
/// assert!(pattern_syntax.contains('⇒')); // U+21D2 RIGHTWARDS DOUBLE ARROW
/// assert!(!pattern_syntax.contains('0'));
/// ```
pub const fn pattern_syntax() => SINGLETON_PROPS_PAT_SYN_V1;
pub fn load_pattern_syntax();
}
make_code_point_set_property! {
property: "Pattern_White_Space";
marker: PatternWhiteSpaceProperty;
keyed_data_marker: PatternWhiteSpaceV1Marker;
func:
/// Characters used as whitespace in patterns (such as regular expressions). See
/// [`Unicode Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for
/// more details.
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let pattern_white_space = sets::pattern_white_space();
///
/// assert!(pattern_white_space.contains(' '));
/// assert!(pattern_white_space.contains32(0x2029)); // PARAGRAPH SEPARATOR
/// assert!(pattern_white_space.contains32(0x000A)); // NEW LINE
/// assert!(!pattern_white_space.contains32(0x00A0)); // NO-BREAK SPACE
/// ```
pub const fn pattern_white_space() => SINGLETON_PROPS_PAT_WS_V1;
pub fn load_pattern_white_space();
}
make_code_point_set_property! {
property: "Prepended_Concatenation_Mark";
marker: PrependedConcatenationMarkProperty;
keyed_data_marker: PrependedConcatenationMarkV1Marker;
func:
/// A small class of visible format controls, which precede and then span a sequence of
/// other characters, usually digits.
pub const fn prepended_concatenation_mark() => SINGLETON_PROPS_PCM_V1;
pub fn load_prepended_concatenation_mark();
}
make_code_point_set_property! {
property: "Print";
marker: PrintProperty;
keyed_data_marker: PrintV1Marker;
func:
/// Printable characters (visible characters and whitespace).
/// This is defined for POSIX compatibility.
pub const fn print() => SINGLETON_PROPS_PRINT_V1;
pub fn load_print();
}
make_code_point_set_property! {
property: "Quotation_Mark";
marker: QuotationMarkProperty;
keyed_data_marker: QuotationMarkV1Marker;
func:
/// Punctuation characters that function as quotation marks.
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let quotation_mark = sets::quotation_mark();
///
/// assert!(quotation_mark.contains('\''));
/// assert!(quotation_mark.contains('„')); // U+201E DOUBLE LOW-9 QUOTATION MARK
/// assert!(!quotation_mark.contains('<'));
/// ```
pub const fn quotation_mark() => SINGLETON_PROPS_QMARK_V1;
pub fn load_quotation_mark();
}
make_code_point_set_property! {
property: "Radical";
marker: RadicalProperty;
keyed_data_marker: RadicalV1Marker;
func:
/// Characters used in the definition of Ideographic Description Sequences
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let radical = sets::radical();
///
/// assert!(radical.contains('⺆')); // U+2E86 CJK RADICAL BOX
/// assert!(!radical.contains('丹')); // U+F95E CJK COMPATIBILITY IDEOGRAPH-F95E
/// ```
pub const fn radical() => SINGLETON_PROPS_RADICAL_V1;
pub fn load_radical();
}
make_code_point_set_property! {
property: "Regional_Indicator";
marker: RegionalIndicatorProperty;
keyed_data_marker: RegionalIndicatorV1Marker;
func:
/// Regional indicator characters, U+1F1E6..U+1F1FF
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let regional_indicator = sets::regional_indicator();
///
/// assert!(regional_indicator.contains('🇹')); // U+1F1F9 REGIONAL INDICATOR SYMBOL LETTER T
/// assert!(!regional_indicator.contains('Ⓣ')); // U+24C9 CIRCLED LATIN CAPITAL LETTER T
/// assert!(!regional_indicator.contains('T'));
/// ```
pub const fn regional_indicator() => SINGLETON_PROPS_RI_V1;
pub fn load_regional_indicator();
}
make_code_point_set_property! {
property: "Soft_Dotted";
marker: SoftDottedProperty;
keyed_data_marker: SoftDottedV1Marker;
func:
/// Characters with a "soft dot", like i or j. An accent placed on these characters causes
/// the dot to disappear.
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let soft_dotted = sets::soft_dotted();
///
/// assert!(soft_dotted.contains('і')); //U+0456 CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
/// assert!(!soft_dotted.contains('ı')); // U+0131 LATIN SMALL LETTER DOTLESS I
/// ```
pub const fn soft_dotted() => SINGLETON_PROPS_SD_V1;
pub fn load_soft_dotted();
}
make_code_point_set_property! {
property: "Segment_Starter";
marker: SegmentStarterProperty;
keyed_data_marker: SegmentStarterV1Marker;
func:
/// Characters that are starters in terms of Unicode normalization and combining character
/// sequences
pub const fn segment_starter() => SINGLETON_PROPS_SEGSTART_V1;
pub fn load_segment_starter();
}
make_code_point_set_property! {
property: "Case_Sensitive";
marker: CaseSensitiveProperty;
keyed_data_marker: CaseSensitiveV1Marker;
func:
/// Characters that are either the source of a case mapping or in the target of a case
/// mapping
pub const fn case_sensitive() => SINGLETON_PROPS_SENSITIVE_V1;
pub fn load_case_sensitive();
}
make_code_point_set_property! {
property: "Sentence_Terminal";
marker: SentenceTerminalProperty;
keyed_data_marker: SentenceTerminalV1Marker;
func:
/// Punctuation characters that generally mark the end of sentences
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let sentence_terminal = sets::sentence_terminal();
///
/// assert!(sentence_terminal.contains('.'));
/// assert!(sentence_terminal.contains('?'));
/// assert!(sentence_terminal.contains('᪨')); // U+1AA8 TAI THAM SIGN KAAN
/// assert!(!sentence_terminal.contains(','));
/// assert!(!sentence_terminal.contains('¿')); // U+00BF INVERTED QUESTION MARK
/// ```
pub const fn sentence_terminal() => SINGLETON_PROPS_STERM_V1;
pub fn load_sentence_terminal();
}
make_code_point_set_property! {
property: "Terminal_Punctuation";
marker: TerminalPunctuationProperty;
keyed_data_marker: TerminalPunctuationV1Marker;
func:
/// Punctuation characters that generally mark the end of textual units
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let terminal_punctuation = sets::terminal_punctuation();
///
/// assert!(terminal_punctuation.contains('.'));
/// assert!(terminal_punctuation.contains('?'));
/// assert!(terminal_punctuation.contains('᪨')); // U+1AA8 TAI THAM SIGN KAAN
/// assert!(terminal_punctuation.contains(','));
/// assert!(!terminal_punctuation.contains('¿')); // U+00BF INVERTED QUESTION MARK
/// ```
pub const fn terminal_punctuation() => SINGLETON_PROPS_TERM_V1;
pub fn load_terminal_punctuation();
}
make_code_point_set_property! {
property: "Unified_Ideograph";
marker: UnifiedIdeographProperty;
keyed_data_marker: UnifiedIdeographV1Marker;
func:
/// A property which specifies the exact set of Unified CJK Ideographs in the standard
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let unified_ideograph = sets::unified_ideograph();
///
/// assert!(unified_ideograph.contains('川')); // U+5DDD CJK UNIFIED IDEOGRAPH-5DDD
/// assert!(unified_ideograph.contains('木')); // U+6728 CJK UNIFIED IDEOGRAPH-6728
/// assert!(!unified_ideograph.contains('𛅸')); // U+1B178 NUSHU CHARACTER-1B178
/// ```
pub const fn unified_ideograph() => SINGLETON_PROPS_UIDEO_V1;
pub fn load_unified_ideograph();
}
make_code_point_set_property! {
property: "Uppercase";
marker: UppercaseProperty;
keyed_data_marker: UppercaseV1Marker;
func:
/// Uppercase characters
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let uppercase = sets::uppercase();
///
/// assert!(uppercase.contains('U'));
/// assert!(!uppercase.contains('u'));
/// ```
pub const fn uppercase() => SINGLETON_PROPS_UPPER_V1;
pub fn load_uppercase();
}
make_code_point_set_property! {
property: "Variation_Selector";
marker: VariationSelectorProperty;
keyed_data_marker: VariationSelectorV1Marker;
func:
/// Characters that are Variation Selectors.
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let variation_selector = sets::variation_selector();
///
/// assert!(variation_selector.contains32(0x180D)); // MONGOLIAN FREE VARIATION SELECTOR THREE
/// assert!(!variation_selector.contains32(0x303E)); // IDEOGRAPHIC VARIATION INDICATOR
/// assert!(variation_selector.contains32(0xFE0F)); // VARIATION SELECTOR-16
/// assert!(!variation_selector.contains32(0xFE10)); // PRESENTATION FORM FOR VERTICAL COMMA
/// assert!(variation_selector.contains32(0xE01EF)); // VARIATION SELECTOR-256
/// ```
pub const fn variation_selector() => SINGLETON_PROPS_VS_V1;
pub fn load_variation_selector();
}
make_code_point_set_property! {
property: "White_Space";
marker: WhiteSpaceProperty;
keyed_data_marker: WhiteSpaceV1Marker;
func:
/// Spaces, separator characters and other control characters which should be treated by
/// programming languages as "white space" for the purpose of parsing elements
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let white_space = sets::white_space();
///
/// assert!(white_space.contains(' '));
/// assert!(white_space.contains32(0x000A)); // NEW LINE
/// assert!(white_space.contains32(0x00A0)); // NO-BREAK SPACE
/// assert!(!white_space.contains32(0x200B)); // ZERO WIDTH SPACE
/// ```
pub const fn white_space() => SINGLETON_PROPS_WSPACE_V1;
pub fn load_white_space();
}
make_code_point_set_property! {
property: "Xdigit";
marker: XdigitProperty;
keyed_data_marker: XdigitV1Marker;
func:
/// Hexadecimal digits
/// This is defined for POSIX compatibility.
pub const fn xdigit() => SINGLETON_PROPS_XDIGIT_V1;
pub fn load_xdigit();
}
make_code_point_set_property! {
property: "XID_Continue";
marker: XidContinueProperty;
keyed_data_marker: XidContinueV1Marker;
func:
/// Characters that can come after the first character in an identifier. See [`Unicode Standard Annex
/// #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more details.
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let xid_continue = sets::xid_continue();
///
/// assert!(xid_continue.contains('x'));
/// assert!(xid_continue.contains('1'));
/// assert!(xid_continue.contains('_'));
/// assert!(xid_continue.contains('ߝ')); // U+07DD NKO LETTER FA
/// assert!(!xid_continue.contains('ⓧ')); // U+24E7 CIRCLED LATIN SMALL LETTER X
/// assert!(!xid_continue.contains32(0xFC5E)); // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
/// ```
pub const fn xid_continue() => SINGLETON_PROPS_XIDC_V1;
pub fn load_xid_continue();
}
make_code_point_set_property! {
property: "XID_Start";
marker: XidStartProperty;
keyed_data_marker: XidStartV1Marker;
func:
/// Characters that can begin an identifier. See [`Unicode
/// Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more
/// details.
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let xid_start = sets::xid_start();
///
/// assert!(xid_start.contains('x'));
/// assert!(!xid_start.contains('1'));
/// assert!(!xid_start.contains('_'));
/// assert!(xid_start.contains('ߝ')); // U+07DD NKO LETTER FA
/// assert!(!xid_start.contains('ⓧ')); // U+24E7 CIRCLED LATIN SMALL LETTER X
/// assert!(!xid_start.contains32(0xFC5E)); // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
/// ```
pub const fn xid_start() => SINGLETON_PROPS_XIDS_V1;
pub fn load_xid_start();
}
//
// Binary property getter fns
// (data as sets of strings + code points)
//
macro_rules! make_unicode_set_property {
(
// currently unused
property: $property:expr;
// currently unused
marker: $marker_name:ident;
keyed_data_marker: $keyed_data_marker:ty;
func:
$(#[$doc:meta])+
$cvis:vis const fn $constname:ident() => $singleton:ident;
$vis:vis fn $funcname:ident();
) => {
#[doc = concat!("A version of [`", stringify!($constname), "()`] that uses custom data provided by a [`DataProvider`].")]
$vis fn $funcname(
provider: &(impl DataProvider<$keyed_data_marker> + ?Sized)
) -> Result<UnicodeSetData, PropertiesError> {
Ok(provider.load(Default::default()).and_then(DataResponse::take_payload).map(UnicodeSetData::from_data)?)
}
$(#[$doc])*
#[cfg(feature = "compiled_data")]
$cvis const fn $constname() -> UnicodeSetDataBorrowed<'static> {
UnicodeSetDataBorrowed {
set: crate::provider::Baked::$singleton
}
}
}
}
make_unicode_set_property! {
property: "Basic_Emoji";
marker: BasicEmojiProperty;
keyed_data_marker: BasicEmojiV1Marker;
func:
/// Characters and character sequences intended for general-purpose, independent, direct input.
/// See [`Unicode Technical Standard #51`](https://unicode.org/reports/tr51/) for more
/// details.
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// # Example
///
/// ```
/// use icu::properties::sets;
///
/// let basic_emoji = sets::basic_emoji();
///
/// assert!(!basic_emoji.contains32(0x0020));
/// assert!(!basic_emoji.contains_char('\n'));
/// assert!(basic_emoji.contains_char('🦃')); // U+1F983 TURKEY
/// assert!(basic_emoji.contains("\u{1F983}"));
/// assert!(basic_emoji.contains("\u{1F6E4}\u{FE0F}")); // railway track
/// assert!(!basic_emoji.contains("\u{0033}\u{FE0F}\u{20E3}")); // Emoji_Keycap_Sequence, keycap 3
/// ```
pub const fn basic_emoji() => SINGLETON_PROPS_BASIC_EMOJI_V1;
pub fn load_basic_emoji();
}
//
// Enumerated property getter fns
//
/// A version of [`for_general_category_group()`] that uses custom data provided by a [`DataProvider`].
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
pub fn load_for_general_category_group(
provider: &(impl DataProvider<GeneralCategoryV1Marker> + ?Sized),
enum_val: GeneralCategoryGroup,
) -> Result<CodePointSetData, PropertiesError> {
let gc_map_payload = maps::load_general_category(provider)?;
let gc_map = gc_map_payload.as_borrowed();
let matching_gc_ranges = gc_map
.iter_ranges()
.filter(|cpm_range| (1 << cpm_range.value as u32) & enum_val.0 != 0)
.map(|cpm_range| cpm_range.range);
let set = CodePointInversionList::from_iter(matching_gc_ranges);
Ok(CodePointSetData::from_code_point_inversion_list(set))
}
/// Return a [`CodePointSetData`] for a value or a grouping of values of the General_Category property. See [`GeneralCategoryGroup`].
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
#[cfg(feature = "compiled_data")]
pub fn for_general_category_group(enum_val: GeneralCategoryGroup) -> CodePointSetData {
let matching_gc_ranges = maps::general_category()
.iter_ranges()
.filter(|cpm_range| (1 << cpm_range.value as u32) & enum_val.0 != 0)
.map(|cpm_range| cpm_range.range);
let set = CodePointInversionList::from_iter(matching_gc_ranges);
CodePointSetData::from_code_point_inversion_list(set)
}
/// Returns a type capable of looking up values for a property specified as a string, as long as it is a
/// [binary property listed in ECMA-262][ecma], using strict matching on the names in the spec.
///
/// This handles every property required by ECMA-262 `/u` regular expressions, except for:
///
/// - `Script` and `General_Category`: handle these directly with [`maps::load_general_category()`] and
/// [`maps::load_script()`].
/// using property values parsed via [`GeneralCategory::get_name_to_enum_mapper()`] and [`Script::get_name_to_enum_mapper()`]
/// if necessary.
/// - `Script_Extensions`: handle this directly using APIs from [`crate::script`], like [`script::load_script_with_extensions_unstable()`]
/// - `General_Category` mask values: Handle this alongside `General_Category` using [`GeneralCategoryGroup`],
/// using property values parsed via [`GeneralCategoryGroup::get_name_to_enum_mapper()`] if necessary
/// - `Assigned`, `All`, and `ASCII` pseudoproperties: Handle these using their equivalent sets:
/// - `Any` can be expressed as the range `[\u{0}-\u{10FFFF}]`
/// - `Assigned` can be expressed as the inverse of the set `gc=Cn` (i.e., `\P{gc=Cn}`).
/// - `ASCII` can be expressed as the range `[\u{0}-\u{7F}]`
/// - `General_Category` property values can themselves be treated like properties using a shorthand in ECMA262,
/// simply create the corresponding `GeneralCategory` set.
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
///
/// ```
/// use icu::properties::sets;
///
/// let emoji = sets::load_for_ecma262("Emoji").expect("loading data failed");
///
/// assert!(emoji.contains('🔥')); // U+1F525 FIRE
/// assert!(!emoji.contains('V'));
/// ```
///
/// [ecma]: https://tc39.es/ecma262/#table-binary-unicode-properties
#[cfg(feature = "compiled_data")]
pub fn load_for_ecma262(name: &str) -> Result<CodePointSetDataBorrowed<'static>, PropertiesError> {
use crate::runtime::UnicodeProperty;
let prop = if let Some(prop) = UnicodeProperty::parse_ecma262_name(name) {
prop
} else {
return Err(PropertiesError::UnexpectedPropertyName);
};
Ok(match prop {
UnicodeProperty::AsciiHexDigit => ascii_hex_digit(),
UnicodeProperty::Alphabetic => alphabetic(),
UnicodeProperty::BidiControl => bidi_control(),
UnicodeProperty::BidiMirrored => bidi_mirrored(),
UnicodeProperty::CaseIgnorable => case_ignorable(),
UnicodeProperty::Cased => cased(),
UnicodeProperty::ChangesWhenCasefolded => changes_when_casefolded(),
UnicodeProperty::ChangesWhenCasemapped => changes_when_casemapped(),
UnicodeProperty::ChangesWhenLowercased => changes_when_lowercased(),
UnicodeProperty::ChangesWhenNfkcCasefolded => changes_when_nfkc_casefolded(),
UnicodeProperty::ChangesWhenTitlecased => changes_when_titlecased(),
UnicodeProperty::ChangesWhenUppercased => changes_when_uppercased(),
UnicodeProperty::Dash => dash(),
UnicodeProperty::DefaultIgnorableCodePoint => default_ignorable_code_point(),
UnicodeProperty::Deprecated => deprecated(),
UnicodeProperty::Diacritic => diacritic(),
UnicodeProperty::Emoji => emoji(),
UnicodeProperty::EmojiComponent => emoji_component(),
UnicodeProperty::EmojiModifier => emoji_modifier(),
UnicodeProperty::EmojiModifierBase => emoji_modifier_base(),
UnicodeProperty::EmojiPresentation => emoji_presentation(),
UnicodeProperty::ExtendedPictographic => extended_pictographic(),
UnicodeProperty::Extender => extender(),
UnicodeProperty::GraphemeBase => grapheme_base(),
UnicodeProperty::GraphemeExtend => grapheme_extend(),
UnicodeProperty::HexDigit => hex_digit(),
UnicodeProperty::IdsBinaryOperator => ids_binary_operator(),
UnicodeProperty::IdsTrinaryOperator => ids_trinary_operator(),
UnicodeProperty::IdContinue => id_continue(),
UnicodeProperty::IdStart => id_start(),
UnicodeProperty::Ideographic => ideographic(),
UnicodeProperty::JoinControl => join_control(),
UnicodeProperty::LogicalOrderException => logical_order_exception(),
UnicodeProperty::Lowercase => lowercase(),
UnicodeProperty::Math => math(),
UnicodeProperty::NoncharacterCodePoint => noncharacter_code_point(),
UnicodeProperty::PatternSyntax => pattern_syntax(),
UnicodeProperty::PatternWhiteSpace => pattern_white_space(),
UnicodeProperty::QuotationMark => quotation_mark(),
UnicodeProperty::Radical => radical(),
UnicodeProperty::RegionalIndicator => regional_indicator(),
UnicodeProperty::SentenceTerminal => sentence_terminal(),
UnicodeProperty::SoftDotted => soft_dotted(),
UnicodeProperty::TerminalPunctuation => terminal_punctuation(),
UnicodeProperty::UnifiedIdeograph => unified_ideograph(),
UnicodeProperty::Uppercase => uppercase(),
UnicodeProperty::VariationSelector => variation_selector(),
UnicodeProperty::WhiteSpace => white_space(),
UnicodeProperty::XidContinue => xid_continue(),
UnicodeProperty::XidStart => xid_start(),
_ => return Err(PropertiesError::UnexpectedPropertyName),
})
}
icu_provider::gen_any_buffer_data_constructors!(
locale: skip,
name: &str,
result: Result<CodePointSetData, PropertiesError>,
#[cfg(skip)]
functions: [
load_for_ecma262,
load_for_ecma262_with_any_provider,
load_for_ecma262_with_buffer_provider,
load_for_ecma262_unstable,
]
);
#[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, load_for_ecma262)]
pub fn load_for_ecma262_unstable<P>(
provider: &P,
name: &str,
) -> Result<CodePointSetData, PropertiesError>
where
P: ?Sized
+ DataProvider<AsciiHexDigitV1Marker>
+ DataProvider<AlphabeticV1Marker>
+ DataProvider<BidiControlV1Marker>
+ DataProvider<BidiMirroredV1Marker>
+ DataProvider<CaseIgnorableV1Marker>
+ DataProvider<CasedV1Marker>
+ DataProvider<ChangesWhenCasefoldedV1Marker>
+ DataProvider<ChangesWhenCasemappedV1Marker>
+ DataProvider<ChangesWhenLowercasedV1Marker>
+ DataProvider<ChangesWhenNfkcCasefoldedV1Marker>
+ DataProvider<ChangesWhenTitlecasedV1Marker>
+ DataProvider<ChangesWhenUppercasedV1Marker>
+ DataProvider<DashV1Marker>
+ DataProvider<DefaultIgnorableCodePointV1Marker>
+ DataProvider<DeprecatedV1Marker>
+ DataProvider<DiacriticV1Marker>
+ DataProvider<EmojiV1Marker>
+ DataProvider<EmojiComponentV1Marker>
+ DataProvider<EmojiModifierV1Marker>
+ DataProvider<EmojiModifierBaseV1Marker>
+ DataProvider<EmojiPresentationV1Marker>
+ DataProvider<ExtendedPictographicV1Marker>
+ DataProvider<ExtenderV1Marker>
+ DataProvider<GraphemeBaseV1Marker>
+ DataProvider<GraphemeExtendV1Marker>
+ DataProvider<HexDigitV1Marker>
+ DataProvider<IdsBinaryOperatorV1Marker>
+ DataProvider<IdsTrinaryOperatorV1Marker>
+ DataProvider<IdContinueV1Marker>
+ DataProvider<IdStartV1Marker>
+ DataProvider<IdeographicV1Marker>
+ DataProvider<JoinControlV1Marker>
+ DataProvider<LogicalOrderExceptionV1Marker>
+ DataProvider<LowercaseV1Marker>
+ DataProvider<MathV1Marker>
+ DataProvider<NoncharacterCodePointV1Marker>
+ DataProvider<PatternSyntaxV1Marker>
+ DataProvider<PatternWhiteSpaceV1Marker>
+ DataProvider<QuotationMarkV1Marker>
+ DataProvider<RadicalV1Marker>
+ DataProvider<RegionalIndicatorV1Marker>
+ DataProvider<SentenceTerminalV1Marker>
+ DataProvider<SoftDottedV1Marker>
+ DataProvider<TerminalPunctuationV1Marker>
+ DataProvider<UnifiedIdeographV1Marker>
+ DataProvider<UppercaseV1Marker>
+ DataProvider<VariationSelectorV1Marker>
+ DataProvider<WhiteSpaceV1Marker>
+ DataProvider<XidContinueV1Marker>
+ DataProvider<XidStartV1Marker>,
{
use crate::runtime::UnicodeProperty;
let prop = if let Some(prop) = UnicodeProperty::parse_ecma262_name(name) {
prop
} else {
return Err(PropertiesError::UnexpectedPropertyName);
};
match prop {
UnicodeProperty::AsciiHexDigit => load_ascii_hex_digit(provider),
UnicodeProperty::Alphabetic => load_alphabetic(provider),
UnicodeProperty::BidiControl => load_bidi_control(provider),
UnicodeProperty::BidiMirrored => load_bidi_mirrored(provider),
UnicodeProperty::CaseIgnorable => load_case_ignorable(provider),
UnicodeProperty::Cased => load_cased(provider),
UnicodeProperty::ChangesWhenCasefolded => load_changes_when_casefolded(provider),
UnicodeProperty::ChangesWhenCasemapped => load_changes_when_casemapped(provider),
UnicodeProperty::ChangesWhenLowercased => load_changes_when_lowercased(provider),
UnicodeProperty::ChangesWhenNfkcCasefolded => load_changes_when_nfkc_casefolded(provider),
UnicodeProperty::ChangesWhenTitlecased => load_changes_when_titlecased(provider),
UnicodeProperty::ChangesWhenUppercased => load_changes_when_uppercased(provider),
UnicodeProperty::Dash => load_dash(provider),
UnicodeProperty::DefaultIgnorableCodePoint => load_default_ignorable_code_point(provider),
UnicodeProperty::Deprecated => load_deprecated(provider),
UnicodeProperty::Diacritic => load_diacritic(provider),
UnicodeProperty::Emoji => load_emoji(provider),
UnicodeProperty::EmojiComponent => load_emoji_component(provider),
UnicodeProperty::EmojiModifier => load_emoji_modifier(provider),
UnicodeProperty::EmojiModifierBase => load_emoji_modifier_base(provider),
UnicodeProperty::EmojiPresentation => load_emoji_presentation(provider),
UnicodeProperty::ExtendedPictographic => load_extended_pictographic(provider),
UnicodeProperty::Extender => load_extender(provider),
UnicodeProperty::GraphemeBase => load_grapheme_base(provider),
UnicodeProperty::GraphemeExtend => load_grapheme_extend(provider),
UnicodeProperty::HexDigit => load_hex_digit(provider),
UnicodeProperty::IdsBinaryOperator => load_ids_binary_operator(provider),
UnicodeProperty::IdsTrinaryOperator => load_ids_trinary_operator(provider),
UnicodeProperty::IdContinue => load_id_continue(provider),
UnicodeProperty::IdStart => load_id_start(provider),
UnicodeProperty::Ideographic => load_ideographic(provider),
UnicodeProperty::JoinControl => load_join_control(provider),
UnicodeProperty::LogicalOrderException => load_logical_order_exception(provider),
UnicodeProperty::Lowercase => load_lowercase(provider),
UnicodeProperty::Math => load_math(provider),
UnicodeProperty::NoncharacterCodePoint => load_noncharacter_code_point(provider),
UnicodeProperty::PatternSyntax => load_pattern_syntax(provider),
UnicodeProperty::PatternWhiteSpace => load_pattern_white_space(provider),
UnicodeProperty::QuotationMark => load_quotation_mark(provider),
UnicodeProperty::Radical => load_radical(provider),
UnicodeProperty::RegionalIndicator => load_regional_indicator(provider),
UnicodeProperty::SentenceTerminal => load_sentence_terminal(provider),
UnicodeProperty::SoftDotted => load_soft_dotted(provider),
UnicodeProperty::TerminalPunctuation => load_terminal_punctuation(provider),
UnicodeProperty::UnifiedIdeograph => load_unified_ideograph(provider),
UnicodeProperty::Uppercase => load_uppercase(provider),
UnicodeProperty::VariationSelector => load_variation_selector(provider),
UnicodeProperty::WhiteSpace => load_white_space(provider),
UnicodeProperty::XidContinue => load_xid_continue(provider),
UnicodeProperty::XidStart => load_xid_start(provider),
_ => Err(PropertiesError::UnexpectedPropertyName),
}
}
#[cfg(test)]
mod tests {
#[test]
fn test_general_category() {
use icu::properties::sets;
use icu::properties::GeneralCategoryGroup;
let digits_data = sets::for_general_category_group(GeneralCategoryGroup::Number);
let digits = digits_data.as_borrowed();
assert!(digits.contains('5'));
assert!(digits.contains('\u{0665}')); // U+0665 ARABIC-INDIC DIGIT FIVE
assert!(digits.contains('\u{096b}')); // U+0969 DEVANAGARI DIGIT FIVE
assert!(!digits.contains('A'));
}
#[test]
fn test_script() {
use icu::properties::maps;
use icu::properties::Script;
let thai_data = maps::script().get_set_for_value(Script::Thai);
let thai = thai_data.as_borrowed();
assert!(thai.contains('\u{0e01}')); // U+0E01 THAI CHARACTER KO KAI
assert!(thai.contains('\u{0e50}')); // U+0E50 THAI DIGIT ZERO
assert!(!thai.contains('A'));
assert!(!thai.contains('\u{0e3f}')); // U+0E50 THAI CURRENCY SYMBOL BAHT
}
#[test]
fn test_gc_groupings() {
use icu::properties::{maps, sets};
use icu::properties::{GeneralCategory, GeneralCategoryGroup};
use icu_collections::codepointinvlist::CodePointInversionListBuilder;
let test_group = |category: GeneralCategoryGroup, subcategories: &[GeneralCategory]| {
let category_set = sets::for_general_category_group(category);
let category_set = category_set
.as_code_point_inversion_list()
.expect("The data should be valid");
let mut builder = CodePointInversionListBuilder::new();
for subcategory in subcategories {
let gc_set_data = &maps::general_category().get_set_for_value(*subcategory);
let gc_set = gc_set_data.as_borrowed();
for range in gc_set.iter_ranges() {
builder.add_range32(&range);
}
}
let combined_set = builder.build();
println!("{category:?} {subcategories:?}");
assert_eq!(
category_set.get_inversion_list_vec(),
combined_set.get_inversion_list_vec()
);
};
test_group(
GeneralCategoryGroup::Letter,
&[
GeneralCategory::UppercaseLetter,
GeneralCategory::LowercaseLetter,
GeneralCategory::TitlecaseLetter,
GeneralCategory::ModifierLetter,
GeneralCategory::OtherLetter,
],
);
test_group(
GeneralCategoryGroup::Other,
&[
GeneralCategory::Control,
GeneralCategory::Format,
GeneralCategory::Unassigned,
GeneralCategory::PrivateUse,
GeneralCategory::Surrogate,
],
);
test_group(
GeneralCategoryGroup::Mark,
&[
GeneralCategory::SpacingMark,
GeneralCategory::EnclosingMark,
GeneralCategory::NonspacingMark,
],
);
test_group(
GeneralCategoryGroup::Number,
&[
GeneralCategory::DecimalNumber,
GeneralCategory::LetterNumber,
GeneralCategory::OtherNumber,
],
);
test_group(
GeneralCategoryGroup::Punctuation,
&[
GeneralCategory::ConnectorPunctuation,
GeneralCategory::DashPunctuation,
GeneralCategory::ClosePunctuation,
GeneralCategory::FinalPunctuation,
GeneralCategory::InitialPunctuation,
GeneralCategory::OtherPunctuation,
GeneralCategory::OpenPunctuation,
],
);
test_group(
GeneralCategoryGroup::Symbol,
&[
GeneralCategory::CurrencySymbol,
GeneralCategory::ModifierSymbol,
GeneralCategory::MathSymbol,
GeneralCategory::OtherSymbol,
],
);
test_group(
GeneralCategoryGroup::Separator,
&[
GeneralCategory::LineSeparator,
GeneralCategory::ParagraphSeparator,
GeneralCategory::SpaceSeparator,
],
);
}
#[test]
fn test_gc_surrogate() {
use icu::properties::maps;
use icu::properties::GeneralCategory;
let surrogates_data =
maps::general_category().get_set_for_value(GeneralCategory::Surrogate);
let surrogates = surrogates_data.as_borrowed();
assert!(surrogates.contains32(0xd800));
assert!(surrogates.contains32(0xd900));
assert!(surrogates.contains32(0xdfff));
assert!(!surrogates.contains('A'));
}
}