// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use crate::{DataError, DataErrorKind}; use core::cmp::Ordering; use core::default::Default; use core::fmt; use core::fmt::Debug; use core::hash::Hash; use core::str::FromStr; use icu_locid::extensions::unicode as unicode_ext; use icu_locid::subtags::{Language, Region, Script, Variants}; use icu_locid::{LanguageIdentifier, Locale, SubtagOrderingResult}; use writeable::{LengthHint, Writeable}; #[cfg(feature = "experimental")] use alloc::string::String; #[cfg(feature = "experimental")] use core::ops::Deref; #[cfg(feature = "experimental")] use icu_locid::extensions::private::Subtag; #[cfg(feature = "experimental")] use tinystr::TinyAsciiStr; #[cfg(doc)] use icu_locid::subtags::Variant; /// The request type passed into all data provider implementations. #[derive(Default, Debug, Clone, Copy, PartialEq, Eq)] #[allow(clippy::exhaustive_structs)] // this type is stable pub struct DataRequest<'a> { /// The locale for which to load data. /// /// If locale fallback is enabled, the resulting data may be from a different locale /// than the one requested here. pub locale: &'a DataLocale, /// Metadata that may affect the behavior of the data provider. pub metadata: DataRequestMetadata, } impl fmt::Display for DataRequest<'_> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fmt::Display::fmt(&self.locale, f) } } /// Metadata for data requests. This is currently empty, but it may be extended with options /// for tuning locale fallback, buffer layout, and so forth. #[derive(Default, Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] #[non_exhaustive] pub struct DataRequestMetadata { /// Silent requests do not log errors. This can be used for exploratory querying, such as fallbacks. pub silent: bool, } /// A locale type optimized for use in fallbacking and the ICU4X data pipeline. /// /// [`DataLocale`] contains less functionality than [`Locale`] but more than /// [`LanguageIdentifier`] for better size and performance while still meeting /// the needs of the ICU4X data pipeline. /// /// # Examples /// /// Convert a [`Locale`] to a [`DataLocale`] and back: /// /// ``` /// use icu_locid::locale; /// use icu_provider::DataLocale; /// /// let locale = locale!("en-u-ca-buddhist"); /// let data_locale = DataLocale::from(locale); /// let locale = data_locale.into_locale(); /// /// assert_eq!(locale, locale!("en-u-ca-buddhist")); /// ``` /// /// You can alternatively create a [`DataLocale`] from a borrowed [`Locale`], which is more /// efficient than cloning the [`Locale`], but less efficient than converting an owned /// [`Locale`]: /// /// ``` /// use icu_locid::locale; /// use icu_provider::DataLocale; /// /// let locale1 = locale!("en-u-ca-buddhist"); /// let data_locale = DataLocale::from(&locale1); /// let locale2 = data_locale.into_locale(); /// /// assert_eq!(locale1, locale2); /// ``` /// /// If you are sure that you have no Unicode keywords, start with [`LanguageIdentifier`]: /// /// ``` /// use icu_locid::langid; /// use icu_provider::DataLocale; /// /// let langid = langid!("es-CA-valencia"); /// let data_locale = DataLocale::from(langid); /// let langid = data_locale.get_langid(); /// /// assert_eq!(langid, langid!("es-CA-valencia")); /// ``` /// /// [`DataLocale`] only supports `-u` keywords, to reflect the current state of CLDR data /// lookup and fallback. This may change in the future. /// /// ``` /// use icu_locid::{locale, Locale}; /// use icu_provider::DataLocale; /// /// let locale = "hi-t-en-h0-hybrid-u-attr-ca-buddhist" /// .parse::() /// .unwrap(); /// let data_locale = DataLocale::from(locale); /// /// assert_eq!(data_locale.into_locale(), locale!("hi-u-ca-buddhist")); /// ``` #[derive(PartialEq, Clone, Default, Eq, Hash)] pub struct DataLocale { langid: LanguageIdentifier, keywords: unicode_ext::Keywords, #[cfg(feature = "experimental")] aux: Option, } impl<'a> Default for &'a DataLocale { fn default() -> Self { static DEFAULT: DataLocale = DataLocale { langid: LanguageIdentifier::UND, keywords: unicode_ext::Keywords::new(), #[cfg(feature = "experimental")] aux: None, }; &DEFAULT } } impl fmt::Debug for DataLocale { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "DataLocale{{{self}}}") } } impl Writeable for DataLocale { fn write_to(&self, sink: &mut W) -> core::fmt::Result { self.langid.write_to(sink)?; if !self.keywords.is_empty() { sink.write_str("-u-")?; self.keywords.write_to(sink)?; } #[cfg(feature = "experimental")] if let Some(aux) = self.aux.as_ref() { sink.write_str("-x-")?; aux.write_to(sink)?; } Ok(()) } fn writeable_length_hint(&self) -> LengthHint { let mut length_hint = self.langid.writeable_length_hint(); if !self.keywords.is_empty() { length_hint += self.keywords.writeable_length_hint() + 3; } #[cfg(feature = "experimental")] if let Some(aux) = self.aux.as_ref() { length_hint += aux.writeable_length_hint() + 3; } length_hint } fn write_to_string(&self) -> alloc::borrow::Cow { #[cfg_attr(not(feature = "experimental"), allow(unused_mut))] let mut is_only_langid = self.keywords.is_empty(); #[cfg(feature = "experimental")] { is_only_langid = is_only_langid && self.aux.is_none(); } if is_only_langid { return self.langid.write_to_string(); } let mut string = alloc::string::String::with_capacity(self.writeable_length_hint().capacity()); let _ = self.write_to(&mut string); alloc::borrow::Cow::Owned(string) } } writeable::impl_display_with_writeable!(DataLocale); impl From for DataLocale { fn from(langid: LanguageIdentifier) -> Self { Self { langid, keywords: unicode_ext::Keywords::new(), #[cfg(feature = "experimental")] aux: None, } } } impl From for DataLocale { fn from(locale: Locale) -> Self { Self { langid: locale.id, keywords: locale.extensions.unicode.keywords, #[cfg(feature = "experimental")] aux: AuxiliaryKeys::try_from_iter(locale.extensions.private.iter().copied()).ok(), } } } impl From<&LanguageIdentifier> for DataLocale { fn from(langid: &LanguageIdentifier) -> Self { Self { langid: langid.clone(), keywords: unicode_ext::Keywords::new(), #[cfg(feature = "experimental")] aux: None, } } } impl From<&Locale> for DataLocale { fn from(locale: &Locale) -> Self { Self { langid: locale.id.clone(), keywords: locale.extensions.unicode.keywords.clone(), #[cfg(feature = "experimental")] aux: AuxiliaryKeys::try_from_iter(locale.extensions.private.iter().copied()).ok(), } } } impl FromStr for DataLocale { type Err = DataError; fn from_str(s: &str) -> Result { let locale = Locale::from_str(s).map_err(|e| { DataErrorKind::KeyLocaleSyntax .into_error() .with_display_context(s) .with_display_context(&e) })?; Ok(DataLocale::from(locale)) } } impl DataLocale { /// Compare this [`DataLocale`] with BCP-47 bytes. /// /// The return value is equivalent to what would happen if you first converted this /// [`DataLocale`] to a BCP-47 string and then performed a byte comparison. /// /// This function is case-sensitive and results in a *total order*, so it is appropriate for /// binary search. The only argument producing [`Ordering::Equal`] is `self.to_string()`. /// /// # Examples /// /// ``` /// use icu_locid::Locale; /// use icu_provider::DataLocale; /// use std::cmp::Ordering; /// /// let bcp47_strings: &[&str] = &[ /// "ca", /// "ca-ES", /// "ca-ES-u-ca-buddhist", /// "ca-ES-valencia", /// "ca-ES-x-gbp", /// "ca-ES-x-gbp-short", /// "ca-ES-x-usd", /// "ca-ES-xyzabc", /// "ca-x-eur", /// "cat", /// "pl-Latn-PL", /// "und", /// "und-fonipa", /// "und-u-ca-hebrew", /// "und-u-ca-japanese", /// "und-x-mxn", /// "zh", /// ]; /// /// for ab in bcp47_strings.windows(2) { /// let a = ab[0]; /// let b = ab[1]; /// assert_eq!(a.cmp(b), Ordering::Less, "strings: {} < {}", a, b); /// let a_loc: DataLocale = a.parse().unwrap(); /// assert_eq!( /// a_loc.strict_cmp(a.as_bytes()), /// Ordering::Equal, /// "strict_cmp: {} == {}", /// a_loc, /// a /// ); /// assert_eq!( /// a_loc.strict_cmp(b.as_bytes()), /// Ordering::Less, /// "strict_cmp: {} < {}", /// a_loc, /// b /// ); /// let b_loc: DataLocale = b.parse().unwrap(); /// assert_eq!( /// b_loc.strict_cmp(b.as_bytes()), /// Ordering::Equal, /// "strict_cmp: {} == {}", /// b_loc, /// b /// ); /// assert_eq!( /// b_loc.strict_cmp(a.as_bytes()), /// Ordering::Greater, /// "strict_cmp: {} > {}", /// b_loc, /// a /// ); /// } /// ``` /// /// Comparison against invalid strings: /// /// ``` /// use icu_provider::DataLocale; /// /// let invalid_strings: &[&str] = &[ /// // Less than "ca-ES" /// "CA", /// "ar-x-gbp-FOO", /// // Greater than "ca-ES-x-gbp" /// "ca_ES", /// "ca-ES-x-gbp-FOO", /// ]; /// /// let data_locale = "ca-ES-x-gbp".parse::().unwrap(); /// /// for s in invalid_strings.iter() { /// let expected_ordering = "ca-ES-x-gbp".cmp(s); /// let actual_ordering = data_locale.strict_cmp(s.as_bytes()); /// assert_eq!(expected_ordering, actual_ordering, "{}", s); /// } /// ``` pub fn strict_cmp(&self, other: &[u8]) -> Ordering { let subtags = other.split(|b| *b == b'-'); let mut subtag_result = self.langid.strict_cmp_iter(subtags); if self.has_unicode_ext() { let mut subtags = match subtag_result { SubtagOrderingResult::Subtags(s) => s, SubtagOrderingResult::Ordering(o) => return o, }; match subtags.next() { Some(b"u") => (), Some(s) => return s.cmp(b"u").reverse(), None => return Ordering::Greater, } subtag_result = self.keywords.strict_cmp_iter(subtags); } #[cfg(feature = "experimental")] if let Some(aux) = self.get_aux() { let mut subtags = match subtag_result { SubtagOrderingResult::Subtags(s) => s, SubtagOrderingResult::Ordering(o) => return o, }; match subtags.next() { Some(b"x") => (), Some(s) => return s.cmp(b"x").reverse(), None => return Ordering::Greater, } subtag_result = aux.strict_cmp_iter(subtags); } subtag_result.end() } } impl DataLocale { /// Returns whether this [`DataLocale`] has all empty fields (no components). /// /// See also: /// /// - [`DataLocale::is_und()`] /// - [`DataLocale::is_langid_und()`] /// /// # Examples /// /// ``` /// use icu_provider::DataLocale; /// /// assert!("und".parse::().unwrap().is_empty()); /// assert!(!"und-u-ca-buddhist" /// .parse::() /// .unwrap() /// .is_empty()); /// assert!(!"und-x-aux".parse::().unwrap().is_empty()); /// assert!(!"ca-ES".parse::().unwrap().is_empty()); /// ``` pub fn is_empty(&self) -> bool { self == <&DataLocale>::default() } /// Returns whether this [`DataLocale`] is `und` in the locale and extensions portion. /// /// This ignores auxiliary keys. /// /// See also: /// /// - [`DataLocale::is_empty()`] /// - [`DataLocale::is_langid_und()`] /// /// # Examples /// /// ``` /// use icu_provider::DataLocale; /// /// assert!("und".parse::().unwrap().is_und()); /// assert!(!"und-u-ca-buddhist".parse::().unwrap().is_und()); /// assert!("und-x-aux".parse::().unwrap().is_und()); /// assert!(!"ca-ES".parse::().unwrap().is_und()); /// ``` pub fn is_und(&self) -> bool { self.langid == LanguageIdentifier::UND && self.keywords.is_empty() } /// Returns whether the [`LanguageIdentifier`] associated with this request is `und`. /// /// This ignores extension keywords and auxiliary keys. /// /// See also: /// /// - [`DataLocale::is_empty()`] /// - [`DataLocale::is_und()`] /// /// # Examples /// /// ``` /// use icu_provider::DataLocale; /// /// assert!("und".parse::().unwrap().is_langid_und()); /// assert!("und-u-ca-buddhist" /// .parse::() /// .unwrap() /// .is_langid_und()); /// assert!("und-x-aux".parse::().unwrap().is_langid_und()); /// assert!(!"ca-ES".parse::().unwrap().is_langid_und()); /// ``` pub fn is_langid_und(&self) -> bool { self.langid == LanguageIdentifier::UND } /// Gets the [`LanguageIdentifier`] for this [`DataLocale`]. /// /// This may allocate memory if there are variant subtags. If you need only the language, /// script, and/or region subtag, use the specific getters for those subtags: /// /// - [`DataLocale::language()`] /// - [`DataLocale::script()`] /// - [`DataLocale::region()`] /// /// If you have ownership over the `DataLocale`, use [`DataLocale::into_locale()`] /// and then access the `id` field. /// /// # Examples /// /// ``` /// use icu_locid::langid; /// use icu_provider::prelude::*; /// /// const FOO_BAR: DataKey = icu_provider::data_key!("foo/bar@1"); /// /// let req_no_langid = DataRequest { /// locale: &Default::default(), /// metadata: Default::default(), /// }; /// /// let req_with_langid = DataRequest { /// locale: &langid!("ar-EG").into(), /// metadata: Default::default(), /// }; /// /// assert_eq!(req_no_langid.locale.get_langid(), langid!("und")); /// assert_eq!(req_with_langid.locale.get_langid(), langid!("ar-EG")); /// ``` pub fn get_langid(&self) -> LanguageIdentifier { self.langid.clone() } /// Overrides the entire [`LanguageIdentifier`] portion of this [`DataLocale`]. #[inline] pub fn set_langid(&mut self, lid: LanguageIdentifier) { self.langid = lid; } /// Converts this [`DataLocale`] into a [`Locale`]. /// /// See also [`DataLocale::get_langid()`]. /// /// # Examples /// /// ``` /// use icu_locid::{ /// langid, locale, /// subtags::{language, region}, /// Locale, /// }; /// use icu_provider::prelude::*; /// /// let locale: DataLocale = locale!("it-IT-u-ca-coptic").into(); /// /// assert_eq!(locale.get_langid(), langid!("it-IT")); /// assert_eq!(locale.language(), language!("it")); /// assert_eq!(locale.script(), None); /// assert_eq!(locale.region(), Some(region!("IT"))); /// /// let locale = locale.into_locale(); /// assert_eq!(locale, locale!("it-IT-u-ca-coptic")); /// ``` /// /// Auxiliary keys are retained: /// /// ``` /// use icu_locid::Locale; /// use icu_provider::prelude::*; /// use writeable::assert_writeable_eq; /// /// let locale: Locale = "und-u-nu-arab-x-gbp".parse().unwrap(); /// let data_locale = DataLocale::from(locale); /// assert_writeable_eq!(data_locale, "und-u-nu-arab-x-gbp"); /// /// let recovered_locale = data_locale.into_locale(); /// assert_writeable_eq!(recovered_locale, "und-u-nu-arab-x-gbp"); /// ``` pub fn into_locale(self) -> Locale { let mut loc = Locale { id: self.langid, ..Default::default() }; loc.extensions.unicode.keywords = self.keywords; #[cfg(feature = "experimental")] if let Some(aux) = self.aux { loc.extensions.private = icu_locid::extensions::private::Private::from_vec_unchecked(aux.iter().collect()); } loc } /// Returns the [`Language`] for this [`DataLocale`]. #[inline] pub fn language(&self) -> Language { self.langid.language } /// Returns the [`Language`] for this [`DataLocale`]. #[inline] pub fn set_language(&mut self, language: Language) { self.langid.language = language; } /// Returns the [`Script`] for this [`DataLocale`]. #[inline] pub fn script(&self) -> Option