// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). // Provider structs must be stable #![allow(clippy::exhaustive_structs, clippy::exhaustive_enums)] //! 🚧 \[Unstable\] Data provider struct definitions for this ICU4X component. //! //!
//! 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, //! including in SemVer minor releases. While the serde representation of data structs is guaranteed //! to be stable, their Rust representation might not be. Use with caution. //!
//! //! Read more about data providers: [`icu_provider`] pub mod names; use crate::script::ScriptWithExt; use crate::Script; use core::ops::RangeInclusive; use core::str; use icu_collections::codepointinvlist::CodePointInversionList; use icu_collections::codepointinvliststringlist::CodePointInversionListAndStringList; use icu_collections::codepointtrie::{CodePointMapRange, CodePointTrie, TrieValue}; use icu_provider::prelude::*; use icu_provider::{DataKeyMetadata, FallbackPriority}; use zerofrom::ZeroFrom; use zerovec::{VarZeroVec, ZeroSlice, ZeroVecError}; #[cfg(feature = "compiled_data")] #[derive(Debug)] /// Baked data /// ///
/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, /// including in SemVer minor releases. In particular, the `DataProvider` implementations are only /// guaranteed to match with this version's `*_unstable` providers. Use with caution. ///
pub struct Baked; #[cfg(feature = "compiled_data")] const _: () = { pub mod icu { pub use crate as properties; pub use icu_collections as collections; pub use icu_locid_transform as locid_transform; } icu_properties_data::make_provider!(Baked); icu_properties_data::impl_propnames_from_gcb_v1!(Baked); icu_properties_data::impl_propnames_from_bc_v1!(Baked); icu_properties_data::impl_propnames_from_ccc_v1!(Baked); icu_properties_data::impl_propnames_from_ea_v1!(Baked); icu_properties_data::impl_propnames_from_gc_v1!(Baked); icu_properties_data::impl_propnames_from_gcm_v1!(Baked); icu_properties_data::impl_propnames_from_insc_v1!(Baked); icu_properties_data::impl_propnames_from_lb_v1!(Baked); icu_properties_data::impl_propnames_from_sb_v1!(Baked); icu_properties_data::impl_propnames_from_sc_v1!(Baked); icu_properties_data::impl_propnames_from_wb_v1!(Baked); icu_properties_data::impl_propnames_to_long_linear_bc_v1!(Baked); icu_properties_data::impl_propnames_to_long_linear_ea_v1!(Baked); icu_properties_data::impl_propnames_to_long_linear_gc_v1!(Baked); icu_properties_data::impl_propnames_to_long_linear_gcb_v1!(Baked); icu_properties_data::impl_propnames_to_long_linear_insc_v1!(Baked); icu_properties_data::impl_propnames_to_long_linear_lb_v1!(Baked); icu_properties_data::impl_propnames_to_long_linear_sb_v1!(Baked); icu_properties_data::impl_propnames_to_long_linear_sc_v1!(Baked); icu_properties_data::impl_propnames_to_long_linear_wb_v1!(Baked); icu_properties_data::impl_propnames_to_long_sparse_ccc_v1!(Baked); icu_properties_data::impl_propnames_to_short_linear_bc_v1!(Baked); icu_properties_data::impl_propnames_to_short_linear_ea_v1!(Baked); icu_properties_data::impl_propnames_to_short_linear_gc_v1!(Baked); icu_properties_data::impl_propnames_to_short_linear_gcb_v1!(Baked); icu_properties_data::impl_propnames_to_short_linear_insc_v1!(Baked); icu_properties_data::impl_propnames_to_short_linear_lb_v1!(Baked); icu_properties_data::impl_propnames_to_short_linear_sb_v1!(Baked); icu_properties_data::impl_propnames_to_short_linear_wb_v1!(Baked); icu_properties_data::impl_propnames_to_short_linear4_sc_v1!(Baked); icu_properties_data::impl_propnames_to_short_sparse_ccc_v1!(Baked); icu_properties_data::impl_props_ahex_v1!(Baked); icu_properties_data::impl_props_alnum_v1!(Baked); icu_properties_data::impl_props_alpha_v1!(Baked); icu_properties_data::impl_props_basic_emoji_v1!(Baked); icu_properties_data::impl_props_bc_v1!(Baked); icu_properties_data::impl_props_bidi_c_v1!(Baked); icu_properties_data::impl_props_bidi_m_v1!(Baked); icu_properties_data::impl_props_bidiauxiliaryprops_v1!(Baked); icu_properties_data::impl_props_blank_v1!(Baked); icu_properties_data::impl_props_cased_v1!(Baked); icu_properties_data::impl_props_ccc_v1!(Baked); icu_properties_data::impl_props_ci_v1!(Baked); icu_properties_data::impl_props_comp_ex_v1!(Baked); icu_properties_data::impl_props_cwcf_v1!(Baked); icu_properties_data::impl_props_cwcm_v1!(Baked); icu_properties_data::impl_props_cwkcf_v1!(Baked); icu_properties_data::impl_props_cwl_v1!(Baked); icu_properties_data::impl_props_cwt_v1!(Baked); icu_properties_data::impl_props_cwu_v1!(Baked); icu_properties_data::impl_props_dash_v1!(Baked); icu_properties_data::impl_props_dep_v1!(Baked); icu_properties_data::impl_props_di_v1!(Baked); icu_properties_data::impl_props_dia_v1!(Baked); icu_properties_data::impl_props_ea_v1!(Baked); icu_properties_data::impl_props_ebase_v1!(Baked); icu_properties_data::impl_props_ecomp_v1!(Baked); icu_properties_data::impl_props_emod_v1!(Baked); icu_properties_data::impl_props_emoji_v1!(Baked); icu_properties_data::impl_props_epres_v1!(Baked); icu_properties_data::impl_props_exemplarchars_auxiliary_v1!(Baked); icu_properties_data::impl_props_exemplarchars_index_v1!(Baked); icu_properties_data::impl_props_exemplarchars_main_v1!(Baked); icu_properties_data::impl_props_exemplarchars_numbers_v1!(Baked); icu_properties_data::impl_props_exemplarchars_punctuation_v1!(Baked); icu_properties_data::impl_props_ext_v1!(Baked); icu_properties_data::impl_props_extpict_v1!(Baked); icu_properties_data::impl_props_gc_v1!(Baked); icu_properties_data::impl_props_gcb_v1!(Baked); icu_properties_data::impl_props_gr_base_v1!(Baked); icu_properties_data::impl_props_gr_ext_v1!(Baked); icu_properties_data::impl_props_gr_link_v1!(Baked); icu_properties_data::impl_props_graph_v1!(Baked); icu_properties_data::impl_props_hex_v1!(Baked); icu_properties_data::impl_props_hyphen_v1!(Baked); icu_properties_data::impl_props_idc_v1!(Baked); icu_properties_data::impl_props_ideo_v1!(Baked); icu_properties_data::impl_props_ids_v1!(Baked); icu_properties_data::impl_props_idsb_v1!(Baked); icu_properties_data::impl_props_idst_v1!(Baked); icu_properties_data::impl_props_insc_v1!(Baked); icu_properties_data::impl_props_join_c_v1!(Baked); icu_properties_data::impl_props_lb_v1!(Baked); icu_properties_data::impl_props_loe_v1!(Baked); icu_properties_data::impl_props_lower_v1!(Baked); icu_properties_data::impl_props_math_v1!(Baked); icu_properties_data::impl_props_nchar_v1!(Baked); icu_properties_data::impl_props_nfcinert_v1!(Baked); icu_properties_data::impl_props_nfdinert_v1!(Baked); icu_properties_data::impl_props_nfkcinert_v1!(Baked); icu_properties_data::impl_props_nfkdinert_v1!(Baked); icu_properties_data::impl_props_pat_syn_v1!(Baked); icu_properties_data::impl_props_pat_ws_v1!(Baked); icu_properties_data::impl_props_pcm_v1!(Baked); icu_properties_data::impl_props_print_v1!(Baked); icu_properties_data::impl_props_qmark_v1!(Baked); icu_properties_data::impl_props_radical_v1!(Baked); icu_properties_data::impl_props_ri_v1!(Baked); icu_properties_data::impl_props_sb_v1!(Baked); icu_properties_data::impl_props_sc_v1!(Baked); icu_properties_data::impl_props_scx_v1!(Baked); icu_properties_data::impl_props_sd_v1!(Baked); icu_properties_data::impl_props_segstart_v1!(Baked); icu_properties_data::impl_props_sensitive_v1!(Baked); icu_properties_data::impl_props_sterm_v1!(Baked); icu_properties_data::impl_props_term_v1!(Baked); icu_properties_data::impl_props_uideo_v1!(Baked); icu_properties_data::impl_props_upper_v1!(Baked); icu_properties_data::impl_props_vs_v1!(Baked); icu_properties_data::impl_props_wb_v1!(Baked); icu_properties_data::impl_props_wspace_v1!(Baked); icu_properties_data::impl_props_xdigit_v1!(Baked); icu_properties_data::impl_props_xidc_v1!(Baked); icu_properties_data::impl_props_xids_v1!(Baked); }; // include the specialized structs for the compact representation of Bidi property data pub mod bidi_data; /// A set of characters which share a particular property value. /// /// This data enum is extensible, more backends may be added in the future. /// Old data can be used with newer code but not vice versa. /// ///
/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, /// including in SemVer minor releases. While the serde representation of data structs is guaranteed /// to be stable, their Rust representation might not be. Use with caution. ///
#[derive(Debug, Eq, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)] #[cfg_attr( feature = "datagen", derive(serde::Serialize, databake::Bake), databake(path = icu_properties::provider), )] #[cfg_attr(feature = "serde", derive(serde::Deserialize))] #[non_exhaustive] pub enum PropertyCodePointSetV1<'data> { /// The set of characters, represented as an inversion list InversionList(#[cfg_attr(feature = "serde", serde(borrow))] CodePointInversionList<'data>), // new variants should go BELOW existing ones // Serde serializes based on variant name and index in the enum // https://docs.rs/serde/latest/serde/trait.Serializer.html#tymethod.serialize_unit_variant } /// A map efficiently storing data about individual characters. /// /// This data enum is extensible, more backends may be added in the future. /// Old data can be used with newer code but not vice versa. /// ///
/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, /// including in SemVer minor releases. While the serde representation of data structs is guaranteed /// to be stable, their Rust representation might not be. Use with caution. ///
#[derive(Clone, Debug, Eq, PartialEq, yoke::Yokeable, zerofrom::ZeroFrom)] #[cfg_attr( feature = "datagen", derive(serde::Serialize, databake::Bake), databake(path = icu_properties::provider), )] #[cfg_attr(feature = "serde", derive(serde::Deserialize))] #[non_exhaustive] pub enum PropertyCodePointMapV1<'data, T: TrieValue> { /// A codepoint trie storing the data CodePointTrie(#[cfg_attr(feature = "serde", serde(borrow))] CodePointTrie<'data, T>), // new variants should go BELOW existing ones // Serde serializes based on variant name and index in the enum // https://docs.rs/serde/latest/serde/trait.Serializer.html#tymethod.serialize_unit_variant } /// A set of characters and strings which share a particular property value. /// ///
/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, /// including in SemVer minor releases. While the serde representation of data structs is guaranteed /// to be stable, their Rust representation might not be. Use with caution. ///
#[derive(Debug, Eq, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)] #[cfg_attr( feature = "datagen", derive(serde::Serialize, databake::Bake), databake(path = icu_properties::provider), )] #[cfg_attr(feature = "serde", derive(serde::Deserialize))] #[non_exhaustive] pub enum PropertyUnicodeSetV1<'data> { /// A set representing characters in an inversion list, and the strings in a list. CPInversionListStrList( #[cfg_attr(feature = "serde", serde(borrow))] CodePointInversionListAndStringList<'data>, ), // new variants should go BELOW existing ones // Serde serializes based on variant name and index in the enum // https://docs.rs/serde/latest/serde/trait.Serializer.html#tymethod.serialize_unit_variant } impl<'data> PropertyUnicodeSetV1<'data> { #[inline] pub(crate) fn contains(&self, s: &str) -> bool { match *self { Self::CPInversionListStrList(ref l) => l.contains(s), } } #[inline] pub(crate) fn contains32(&self, cp: u32) -> bool { match *self { Self::CPInversionListStrList(ref l) => l.contains32(cp), } } #[inline] pub(crate) fn contains_char(&self, ch: char) -> bool { match *self { Self::CPInversionListStrList(ref l) => l.contains_char(ch), } } #[inline] pub(crate) fn from_code_point_inversion_list_string_list( l: CodePointInversionListAndStringList<'static>, ) -> Self { Self::CPInversionListStrList(l) } #[inline] pub(crate) fn as_code_point_inversion_list_string_list( &'_ self, ) -> Option<&'_ CodePointInversionListAndStringList<'data>> { match *self { Self::CPInversionListStrList(ref l) => Some(l), // any other backing data structure that cannot return a CPInversionListStrList in O(1) time should return None } } #[inline] pub(crate) fn to_code_point_inversion_list_string_list( &self, ) -> CodePointInversionListAndStringList<'_> { match *self { Self::CPInversionListStrList(ref t) => ZeroFrom::zero_from(t), } } } /// A struct that efficiently stores `Script` and `Script_Extensions` property data. /// ///
/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, /// including in SemVer minor releases. While the serde representation of data structs is guaranteed /// to be stable, their Rust representation might not be. Use with caution. ///
#[icu_provider::data_struct(marker( ScriptWithExtensionsPropertyV1Marker, "props/scx@1", singleton ))] #[derive(Debug, Eq, PartialEq, Clone)] #[cfg_attr( feature = "datagen", derive(serde::Serialize, databake::Bake), databake(path = icu_properties::provider), )] #[cfg_attr(feature = "serde", derive(serde::Deserialize))] pub struct ScriptWithExtensionsPropertyV1<'data> { /// Note: The `ScriptWithExt` values in this array will assume a 12-bit layout. The 2 /// higher order bits 11..10 will indicate how to deduce the Script value and /// Script_Extensions value, nearly matching the representation /// [in ICU](https://github.com/unicode-org/icu/blob/main/icu4c/source/common/uprops.h): /// /// | High order 2 bits value | Script | Script_Extensions | /// |-------------------------|--------------------------------------------------------|----------------------------------------------------------------| /// | 3 | First value in sub-array, index given by lower 10 bits | Sub-array excluding first value, index given by lower 10 bits | /// | 2 | Script=Inherited | Entire sub-array, index given by lower 10 bits | /// | 1 | Script=Common | Entire sub-array, index given by lower 10 bits | /// | 0 | Value in lower 10 bits | `[ Script value ]` single-element array | /// /// When the lower 10 bits of the value are used as an index, that index is /// used for the outer-level vector of the nested `extensions` structure. #[cfg_attr(feature = "serde", serde(borrow))] pub trie: CodePointTrie<'data, ScriptWithExt>, /// This companion structure stores Script_Extensions values, which are /// themselves arrays / vectors. This structure only stores the values for /// cases in which `scx(cp) != [ sc(cp) ]`. Each sub-vector is distinct. The /// sub-vector represents the Script_Extensions array value for a code point, /// and may also indicate Script value, as described for the `trie` field. #[cfg_attr(feature = "serde", serde(borrow))] pub extensions: VarZeroVec<'data, ZeroSlice