// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use core::cmp::Ordering; use core::default::Default; use core::fmt; use core::fmt::Debug; use icu_locid::extensions::unicode as unicode_ext; use icu_locid::subtags::{Language, Region, Script, Variants}; use icu_locid::{LanguageIdentifier, Locale, SubtagOrderingResult}; use writeable::{LengthHint, Writeable}; #[cfg(doc)] use icu_locid::subtags::Variant; /// The request type passed into all data provider implementations. #[derive(Default, Debug, Clone, Copy, PartialEq, Eq)] #[allow(clippy::exhaustive_structs)] // this type is stable pub struct DataRequest<'a> { /// The locale for which to load data. /// /// If locale fallback is enabled, the resulting data may be from a different locale /// than the one requested here. pub locale: &'a DataLocale, /// Metadata that may affect the behavior of the data provider. pub metadata: DataRequestMetadata, } impl fmt::Display for DataRequest<'_> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fmt::Display::fmt(&self.locale, f) } } /// Metadata for data requests. This is currently empty, but it may be extended with options /// for tuning locale fallback, buffer layout, and so forth. #[derive(Default, Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] #[non_exhaustive] pub struct DataRequestMetadata; /// The main locale type used by the ICU4X data provider. /// /// [`DataLocale`] contains less functionality than [`Locale`] but more than /// [`LanguageIdentifier`] for better size and performance while still meeting /// the needs of the ICU4X data pipeline. /// /// # Examples /// /// Convert a [`Locale`] to a [`DataLocale`] and back: /// /// ``` /// use icu_locid::locale; /// use icu_provider::DataLocale; /// /// let locale1 = locale!("en-u-ca-buddhist"); /// let data_locale = DataLocale::from(locale1); /// let locale2 = data_locale.into_locale(); /// /// assert_eq!(locale2.to_string(), "en-u-ca-buddhist"); /// ``` /// /// You can alternatively create a [`DataLocale`] from a borrowed [`Locale`], which is more /// efficient than cloning the [`Locale`], but less efficient than converting an owned /// [`Locale`]: /// /// ``` /// use icu_locid::locale; /// use icu_provider::DataLocale; /// /// let locale1 = locale!("en-u-ca-buddhist"); /// let data_locale = DataLocale::from(&locale1); /// let locale2 = data_locale.into_locale(); /// /// assert_eq!(locale1, locale2); /// ``` /// /// If you are sure that you have no Unicode keywords, start with [`LanguageIdentifier`]: /// /// ``` /// use icu_locid::langid; /// use icu_provider::DataLocale; /// /// let langid1 = langid!("es-CA-valencia"); /// let data_locale = DataLocale::from(langid1); /// let langid2 = data_locale.get_langid(); /// /// assert_eq!(langid2.to_string(), "es-CA-valencia"); /// ``` /// /// [`DataLocale`] only supports `-u` keywords, to reflect the current state of CLDR data /// lookup and fallback. This may change in the future. /// /// ``` /// use icu_locid::Locale; /// use icu_provider::DataLocale; /// /// let locale = "hi-t-en-h0-hybrid-u-attr-ca-buddhist" /// .parse::() /// .unwrap(); /// let data_locale = DataLocale::from(locale); /// /// assert_eq!(data_locale.to_string(), "hi-u-ca-buddhist"); /// ``` #[derive(PartialEq, Clone, Default, Eq, Hash)] pub struct DataLocale { langid: LanguageIdentifier, keywords: unicode_ext::Keywords, } impl<'a> Default for &'a DataLocale { fn default() -> Self { static DEFAULT: DataLocale = DataLocale { langid: LanguageIdentifier::UND, keywords: unicode_ext::Keywords::new(), }; &DEFAULT } } impl fmt::Debug for DataLocale { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "DataLocale{{{}}}", self) } } impl Writeable for DataLocale { fn write_to(&self, sink: &mut W) -> core::fmt::Result { self.langid.write_to(sink)?; if !self.keywords.is_empty() { sink.write_str("-u-")?; self.keywords.write_to(sink)?; } Ok(()) } fn writeable_length_hint(&self) -> LengthHint { self.langid.writeable_length_hint() + if !self.keywords.is_empty() { self.keywords.writeable_length_hint() + 3 } else { LengthHint::exact(0) } } fn write_to_string(&self) -> alloc::borrow::Cow { if self.keywords.is_empty() { return self.langid.write_to_string(); } let mut string = alloc::string::String::with_capacity(self.writeable_length_hint().capacity()); let _ = self.write_to(&mut string); alloc::borrow::Cow::Owned(string) } } writeable::impl_display_with_writeable!(DataLocale); impl From for DataLocale { fn from(langid: LanguageIdentifier) -> Self { Self { langid, keywords: unicode_ext::Keywords::new(), } } } impl From for DataLocale { fn from(locale: Locale) -> Self { Self { langid: locale.id, keywords: locale.extensions.unicode.keywords, } } } impl From<&LanguageIdentifier> for DataLocale { fn from(langid: &LanguageIdentifier) -> Self { Self { langid: langid.clone(), keywords: unicode_ext::Keywords::new(), } } } impl From<&Locale> for DataLocale { fn from(locale: &Locale) -> Self { Self { langid: locale.id.clone(), keywords: locale.extensions.unicode.keywords.clone(), } } } impl DataLocale { /// Compare this [`DataLocale`] with BCP-47 bytes. /// /// The return value is equivalent to what would happen if you first converted this /// [`DataLocale`] to a BCP-47 string and then performed a byte comparison. /// /// This function is case-sensitive and results in a *total order*, so it is appropriate for /// binary search. The only argument producing [`Ordering::Equal`] is `self.to_string()`. /// /// # Examples /// /// ``` /// use icu_locid::Locale; /// use icu_provider::DataLocale; /// use std::cmp::Ordering; /// /// let bcp47_strings: &[&str] = &[ /// "ca-ES", /// "ca-ES-u-ca-buddhist", /// "ca-ES-valencia", /// "pl-Latn-PL", /// "und", /// "und-fonipa", /// "und-u-ca-hebrew", /// "und-u-ca-japanese", /// "zh", /// ]; /// /// for ab in bcp47_strings.windows(2) { /// let a = ab[0]; /// let b = ab[1]; /// assert!(a.cmp(b) == Ordering::Less); /// let a_loc: DataLocale = a.parse::().unwrap().into(); /// assert_eq!(a, a_loc.to_string()); /// assert!( /// a_loc.strict_cmp(a.as_bytes()) == Ordering::Equal, /// "{} == {}", /// a, /// a /// ); /// assert!( /// a_loc.strict_cmp(b.as_bytes()) == Ordering::Less, /// "{} < {}", /// a, /// b /// ); /// let b_loc: DataLocale = b.parse::().unwrap().into(); /// assert_eq!(b, b_loc.to_string()); /// assert!( /// b_loc.strict_cmp(b.as_bytes()) == Ordering::Equal, /// "{} == {}", /// b, /// b /// ); /// assert!( /// b_loc.strict_cmp(a.as_bytes()) == Ordering::Greater, /// "{} > {}", /// b, /// a /// ); /// } /// ``` pub fn strict_cmp(&self, other: &[u8]) -> Ordering { let subtags = other.split(|b| *b == b'-'); let mut subtag_result = self.langid.strict_cmp_iter(subtags); if self.has_unicode_ext() { let mut subtags = match subtag_result { SubtagOrderingResult::Subtags(s) => s, SubtagOrderingResult::Ordering(o) => return o, }; match subtags.next() { Some(b"u") => (), Some(s) => return s.cmp(b"u").reverse(), None => return Ordering::Greater, } subtag_result = self.keywords.strict_cmp_iter(subtags); } subtag_result.end() } } impl DataLocale { /// Returns whether this [`DataLocale`] has all empty fields (no components). pub fn is_empty(&self) -> bool { self == <&DataLocale>::default() } /// Returns whether the [`LanguageIdentifier`] associated with this request is `und`. /// /// Note that this only checks the language identifier; extension keywords may also be set. /// To check the entire `DataLocale`, use [`DataLocale::is_empty()`]. pub fn is_langid_und(&self) -> bool { self.langid == LanguageIdentifier::UND } /// Gets the [`LanguageIdentifier`] for this [`DataLocale`]. /// /// This may allocate memory if there are variant subtags. If you need only the language, /// script, and/or region subtag, use the specific getters for those subtags: /// /// - [`DataLocale::language()`] /// - [`DataLocale::script()`] /// - [`DataLocale::region()`] /// /// If you have ownership over the `DataLocale`, use [`DataLocale::into_locale()`] /// and then access the `id` field. /// /// # Examples /// /// ``` /// use icu_locid::langid; /// use icu_provider::prelude::*; /// /// const FOO_BAR: DataKey = icu_provider::data_key!("foo/bar@1"); /// /// let req_no_langid = DataRequest { /// locale: &Default::default(), /// metadata: Default::default(), /// }; /// /// let req_with_langid = DataRequest { /// locale: &langid!("ar-EG").into(), /// metadata: Default::default(), /// }; /// /// assert_eq!(req_no_langid.locale.get_langid(), langid!("und")); /// assert_eq!(req_with_langid.locale.get_langid(), langid!("ar-EG")); /// ``` pub fn get_langid(&self) -> LanguageIdentifier { self.langid.clone() } /// Overrides the entire [`LanguageIdentifier`] portion of this [`DataLocale`]. #[inline] pub fn set_langid(&mut self, lid: LanguageIdentifier) { self.langid = lid; } /// Converts this [`DataLocale`] into a [`Locale`]. /// /// See also [`DataLocale::get_langid()`]. /// /// # Examples /// /// ``` /// use icu_locid::{ /// langid, subtags_language as language, subtags_region as region, Locale, /// }; /// use icu_provider::prelude::*; /// /// let locale: Locale = "it-IT-u-ca-coptic".parse().expect("Valid BCP-47"); /// let locale: DataLocale = locale.into(); /// /// assert_eq!(locale.to_string(), "it-IT-u-ca-coptic"); /// assert_eq!(locale.get_langid(), langid!("it-IT")); /// assert_eq!(locale.language(), language!("it")); /// assert_eq!(locale.script(), None); /// assert_eq!(locale.region(), Some(region!("IT"))); /// /// let locale = locale.into_locale(); /// assert_eq!(locale.to_string(), "it-IT-u-ca-coptic"); /// ``` pub fn into_locale(self) -> Locale { let mut loc = Locale { id: self.langid, ..Default::default() }; loc.extensions.unicode.keywords = self.keywords; loc } /// Returns the [`Language`] for this [`DataLocale`]. #[inline] pub fn language(&self) -> Language { self.langid.language } /// Returns the [`Language`] for this [`DataLocale`]. #[inline] pub fn set_language(&mut self, language: Language) { self.langid.language = language; } /// Returns the [`Script`] for this [`DataLocale`]. #[inline] pub fn script(&self) -> Option