diff options
Diffstat (limited to 'vendor/icu_provider/src/request.rs')
-rw-r--r-- | vendor/icu_provider/src/request.rs | 513 |
1 files changed, 513 insertions, 0 deletions
diff --git a/vendor/icu_provider/src/request.rs b/vendor/icu_provider/src/request.rs new file mode 100644 index 000000000..7f6bb5911 --- /dev/null +++ b/vendor/icu_provider/src/request.rs @@ -0,0 +1,513 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use core::cmp::Ordering; +use core::default::Default; +use core::fmt; +use core::fmt::Debug; +use icu_locid::extensions::unicode as unicode_ext; +use icu_locid::subtags::{Language, Region, Script, Variants}; +use icu_locid::{LanguageIdentifier, Locale, SubtagOrderingResult}; +use writeable::{LengthHint, Writeable}; + +#[cfg(doc)] +use icu_locid::subtags::Variant; + +/// The request type passed into all data provider implementations. +#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)] +#[allow(clippy::exhaustive_structs)] // this type is stable +pub struct DataRequest<'a> { + /// The locale for which to load data. + /// + /// If locale fallback is enabled, the resulting data may be from a different locale + /// than the one requested here. + pub locale: &'a DataLocale, + /// Metadata that may affect the behavior of the data provider. + pub metadata: DataRequestMetadata, +} + +impl fmt::Display for DataRequest<'_> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Display::fmt(&self.locale, f) + } +} + +/// Metadata for data requests. This is currently empty, but it may be extended with options +/// for tuning locale fallback, buffer layout, and so forth. +#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +#[non_exhaustive] +pub struct DataRequestMetadata; + +/// The main locale type used by the ICU4X data provider. +/// +/// [`DataLocale`] contains less functionality than [`Locale`] but more than +/// [`LanguageIdentifier`] for better size and performance while still meeting +/// the needs of the ICU4X data pipeline. +/// +/// # Examples +/// +/// Convert a [`Locale`] to a [`DataLocale`] and back: +/// +/// ``` +/// use icu_locid::locale; +/// use icu_provider::DataLocale; +/// +/// let locale1 = locale!("en-u-ca-buddhist"); +/// let data_locale = DataLocale::from(locale1); +/// let locale2 = data_locale.into_locale(); +/// +/// assert_eq!(locale2.to_string(), "en-u-ca-buddhist"); +/// ``` +/// +/// You can alternatively create a [`DataLocale`] from a borrowed [`Locale`], which is more +/// efficient than cloning the [`Locale`], but less efficient than converting an owned +/// [`Locale`]: +/// +/// ``` +/// use icu_locid::locale; +/// use icu_provider::DataLocale; +/// +/// let locale1 = locale!("en-u-ca-buddhist"); +/// let data_locale = DataLocale::from(&locale1); +/// let locale2 = data_locale.into_locale(); +/// +/// assert_eq!(locale1, locale2); +/// ``` +/// +/// If you are sure that you have no Unicode keywords, start with [`LanguageIdentifier`]: +/// +/// ``` +/// use icu_locid::langid; +/// use icu_provider::DataLocale; +/// +/// let langid1 = langid!("es-CA-valencia"); +/// let data_locale = DataLocale::from(langid1); +/// let langid2 = data_locale.get_langid(); +/// +/// assert_eq!(langid2.to_string(), "es-CA-valencia"); +/// ``` +/// +/// [`DataLocale`] only supports `-u` keywords, to reflect the current state of CLDR data +/// lookup and fallback. This may change in the future. +/// +/// ``` +/// use icu_locid::Locale; +/// use icu_provider::DataLocale; +/// +/// let locale = "hi-t-en-h0-hybrid-u-attr-ca-buddhist" +/// .parse::<Locale>() +/// .unwrap(); +/// let data_locale = DataLocale::from(locale); +/// +/// assert_eq!(data_locale.to_string(), "hi-u-ca-buddhist"); +/// ``` +#[derive(PartialEq, Clone, Default, Eq, Hash)] +pub struct DataLocale { + langid: LanguageIdentifier, + keywords: unicode_ext::Keywords, +} + +impl<'a> Default for &'a DataLocale { + fn default() -> Self { + static DEFAULT: DataLocale = DataLocale { + langid: LanguageIdentifier::UND, + keywords: unicode_ext::Keywords::new(), + }; + &DEFAULT + } +} + +impl fmt::Debug for DataLocale { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "DataLocale{{{}}}", self) + } +} + +impl Writeable for DataLocale { + fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result { + self.langid.write_to(sink)?; + if !self.keywords.is_empty() { + sink.write_str("-u-")?; + self.keywords.write_to(sink)?; + } + Ok(()) + } + + fn writeable_length_hint(&self) -> LengthHint { + self.langid.writeable_length_hint() + + if !self.keywords.is_empty() { + self.keywords.writeable_length_hint() + 3 + } else { + LengthHint::exact(0) + } + } + + fn write_to_string(&self) -> alloc::borrow::Cow<str> { + if self.keywords.is_empty() { + return self.langid.write_to_string(); + } + let mut string = + alloc::string::String::with_capacity(self.writeable_length_hint().capacity()); + let _ = self.write_to(&mut string); + alloc::borrow::Cow::Owned(string) + } +} + +writeable::impl_display_with_writeable!(DataLocale); + +impl From<LanguageIdentifier> for DataLocale { + fn from(langid: LanguageIdentifier) -> Self { + Self { + langid, + keywords: unicode_ext::Keywords::new(), + } + } +} + +impl From<Locale> for DataLocale { + fn from(locale: Locale) -> Self { + Self { + langid: locale.id, + keywords: locale.extensions.unicode.keywords, + } + } +} + +impl From<&LanguageIdentifier> for DataLocale { + fn from(langid: &LanguageIdentifier) -> Self { + Self { + langid: langid.clone(), + keywords: unicode_ext::Keywords::new(), + } + } +} + +impl From<&Locale> for DataLocale { + fn from(locale: &Locale) -> Self { + Self { + langid: locale.id.clone(), + keywords: locale.extensions.unicode.keywords.clone(), + } + } +} + +impl DataLocale { + /// Compare this [`DataLocale`] with BCP-47 bytes. + /// + /// The return value is equivalent to what would happen if you first converted this + /// [`DataLocale`] to a BCP-47 string and then performed a byte comparison. + /// + /// This function is case-sensitive and results in a *total order*, so it is appropriate for + /// binary search. The only argument producing [`Ordering::Equal`] is `self.to_string()`. + /// + /// # Examples + /// + /// ``` + /// use icu_locid::Locale; + /// use icu_provider::DataLocale; + /// use std::cmp::Ordering; + /// + /// let bcp47_strings: &[&str] = &[ + /// "ca-ES", + /// "ca-ES-u-ca-buddhist", + /// "ca-ES-valencia", + /// "pl-Latn-PL", + /// "und", + /// "und-fonipa", + /// "und-u-ca-hebrew", + /// "und-u-ca-japanese", + /// "zh", + /// ]; + /// + /// for ab in bcp47_strings.windows(2) { + /// let a = ab[0]; + /// let b = ab[1]; + /// assert!(a.cmp(b) == Ordering::Less); + /// let a_loc: DataLocale = a.parse::<Locale>().unwrap().into(); + /// assert_eq!(a, a_loc.to_string()); + /// assert!( + /// a_loc.strict_cmp(a.as_bytes()) == Ordering::Equal, + /// "{} == {}", + /// a, + /// a + /// ); + /// assert!( + /// a_loc.strict_cmp(b.as_bytes()) == Ordering::Less, + /// "{} < {}", + /// a, + /// b + /// ); + /// let b_loc: DataLocale = b.parse::<Locale>().unwrap().into(); + /// assert_eq!(b, b_loc.to_string()); + /// assert!( + /// b_loc.strict_cmp(b.as_bytes()) == Ordering::Equal, + /// "{} == {}", + /// b, + /// b + /// ); + /// assert!( + /// b_loc.strict_cmp(a.as_bytes()) == Ordering::Greater, + /// "{} > {}", + /// b, + /// a + /// ); + /// } + /// ``` + pub fn strict_cmp(&self, other: &[u8]) -> Ordering { + let subtags = other.split(|b| *b == b'-'); + let mut subtag_result = self.langid.strict_cmp_iter(subtags); + if self.has_unicode_ext() { + let mut subtags = match subtag_result { + SubtagOrderingResult::Subtags(s) => s, + SubtagOrderingResult::Ordering(o) => return o, + }; + match subtags.next() { + Some(b"u") => (), + Some(s) => return s.cmp(b"u").reverse(), + None => return Ordering::Greater, + } + subtag_result = self.keywords.strict_cmp_iter(subtags); + } + subtag_result.end() + } +} + +impl DataLocale { + /// Returns whether this [`DataLocale`] has all empty fields (no components). + pub fn is_empty(&self) -> bool { + self == <&DataLocale>::default() + } + + /// Returns whether the [`LanguageIdentifier`] associated with this request is `und`. + /// + /// Note that this only checks the language identifier; extension keywords may also be set. + /// To check the entire `DataLocale`, use [`DataLocale::is_empty()`]. + pub fn is_langid_und(&self) -> bool { + self.langid == LanguageIdentifier::UND + } + + /// Gets the [`LanguageIdentifier`] for this [`DataLocale`]. + /// + /// This may allocate memory if there are variant subtags. If you need only the language, + /// script, and/or region subtag, use the specific getters for those subtags: + /// + /// - [`DataLocale::language()`] + /// - [`DataLocale::script()`] + /// - [`DataLocale::region()`] + /// + /// If you have ownership over the `DataLocale`, use [`DataLocale::into_locale()`] + /// and then access the `id` field. + /// + /// # Examples + /// + /// ``` + /// use icu_locid::langid; + /// use icu_provider::prelude::*; + /// + /// const FOO_BAR: DataKey = icu_provider::data_key!("foo/bar@1"); + /// + /// let req_no_langid = DataRequest { + /// locale: &Default::default(), + /// metadata: Default::default(), + /// }; + /// + /// let req_with_langid = DataRequest { + /// locale: &langid!("ar-EG").into(), + /// metadata: Default::default(), + /// }; + /// + /// assert_eq!(req_no_langid.locale.get_langid(), langid!("und")); + /// assert_eq!(req_with_langid.locale.get_langid(), langid!("ar-EG")); + /// ``` + pub fn get_langid(&self) -> LanguageIdentifier { + self.langid.clone() + } + + /// Overrides the entire [`LanguageIdentifier`] portion of this [`DataLocale`]. + #[inline] + pub fn set_langid(&mut self, lid: LanguageIdentifier) { + self.langid = lid; + } + + /// Converts this [`DataLocale`] into a [`Locale`]. + /// + /// See also [`DataLocale::get_langid()`]. + /// + /// # Examples + /// + /// ``` + /// use icu_locid::{ + /// langid, subtags_language as language, subtags_region as region, Locale, + /// }; + /// use icu_provider::prelude::*; + /// + /// let locale: Locale = "it-IT-u-ca-coptic".parse().expect("Valid BCP-47"); + /// let locale: DataLocale = locale.into(); + /// + /// assert_eq!(locale.to_string(), "it-IT-u-ca-coptic"); + /// assert_eq!(locale.get_langid(), langid!("it-IT")); + /// assert_eq!(locale.language(), language!("it")); + /// assert_eq!(locale.script(), None); + /// assert_eq!(locale.region(), Some(region!("IT"))); + /// + /// let locale = locale.into_locale(); + /// assert_eq!(locale.to_string(), "it-IT-u-ca-coptic"); + /// ``` + pub fn into_locale(self) -> Locale { + let mut loc = Locale { + id: self.langid, + ..Default::default() + }; + loc.extensions.unicode.keywords = self.keywords; + loc + } + + /// Returns the [`Language`] for this [`DataLocale`]. + #[inline] + pub fn language(&self) -> Language { + self.langid.language + } + + /// Returns the [`Language`] for this [`DataLocale`]. + #[inline] + pub fn set_language(&mut self, language: Language) { + self.langid.language = language; + } + + /// Returns the [`Script`] for this [`DataLocale`]. + #[inline] + pub fn script(&self) -> Option<Script> { + self.langid.script + } + + /// Sets the [`Script`] for this [`DataLocale`]. + #[inline] + pub fn set_script(&mut self, script: Option<Script>) { + self.langid.script = script; + } + + /// Returns the [`Region`] for this [`DataLocale`]. + #[inline] + pub fn region(&self) -> Option<Region> { + self.langid.region + } + + /// Sets the [`Region`] for this [`DataLocale`]. + #[inline] + pub fn set_region(&mut self, region: Option<Region>) { + self.langid.region = region; + } + + /// Returns whether there are any [`Variant`] subtags in this [`DataLocale`]. + #[inline] + pub fn has_variants(&self) -> bool { + !self.langid.variants.is_empty() + } + + /// Sets all [`Variants`] on this [`DataLocale`], overwriting any that were there previously. + #[inline] + pub fn set_variants(&mut self, variants: Variants) { + self.langid.variants = variants; + } + + /// Removes all [`Variant`] subtags in this [`DataLocale`]. + #[inline] + pub fn clear_variants(&mut self) -> Variants { + self.langid.variants.clear() + } + + /// Gets the value of the specified Unicode extension keyword for this [`DataLocale`]. + #[inline] + pub fn get_unicode_ext(&self, key: &unicode_ext::Key) -> Option<unicode_ext::Value> { + self.keywords.get(key).cloned() + } + + /// Returns whether there are any Unicode extension keywords in this [`DataLocale`]. + #[inline] + pub fn has_unicode_ext(&self) -> bool { + !self.keywords.is_empty() + } + + /// Returns whether a specific Unicode extension keyword is present in this [`DataLocale`]. + #[inline] + pub fn contains_unicode_ext(&self, key: &unicode_ext::Key) -> bool { + self.keywords.contains_key(key) + } + + /// Returns whether this [`DataLocale`] contains a Unicode extension keyword + /// with the specified key and value. + /// + /// # Examples + /// + /// ``` + /// use icu_locid::{ + /// extensions_unicode_key as key, extensions_unicode_value as value, + /// Locale, + /// }; + /// use icu_provider::prelude::*; + /// + /// let locale: Locale = "it-IT-u-ca-coptic".parse().expect("Valid BCP-47"); + /// let locale: DataLocale = locale.into(); + /// + /// assert_eq!(locale.get_unicode_ext(&key!("hc")), None); + /// assert_eq!(locale.get_unicode_ext(&key!("ca")), Some(value!("coptic"))); + /// assert!(locale.matches_unicode_ext(&key!("ca"), &value!("coptic"),)); + /// ``` + #[inline] + pub fn matches_unicode_ext(&self, key: &unicode_ext::Key, value: &unicode_ext::Value) -> bool { + self.keywords.get(key) == Some(value) + } + + /// Sets the value for a specific Unicode extension keyword on this [`DataLocale`]. + #[inline] + pub fn set_unicode_ext( + &mut self, + key: unicode_ext::Key, + value: unicode_ext::Value, + ) -> Option<unicode_ext::Value> { + self.keywords.set(key, value) + } + + /// Removes a specific Unicode extension keyword from this [`DataLocale`], returning + /// the value if it was present. + #[inline] + pub fn remove_unicode_ext(&mut self, key: &unicode_ext::Key) -> Option<unicode_ext::Value> { + self.keywords.remove(key) + } + + /// Retains a subset of keywords as specified by the predicate function. + #[inline] + pub fn retain_unicode_ext<F>(&mut self, predicate: F) + where + F: FnMut(&unicode_ext::Key) -> bool, + { + self.keywords.retain_by_key(predicate) + } +} + +#[test] +fn test_data_locale_to_string() { + struct TestCase { + pub locale: DataLocale, + pub expected: &'static str, + } + + for cas in [ + TestCase { + locale: Locale::UND.into(), + expected: "und", + }, + TestCase { + locale: "und-u-cu-gbp".parse::<Locale>().unwrap().into(), + expected: "und-u-cu-gbp", + }, + TestCase { + locale: "en-ZA-u-cu-gbp".parse::<Locale>().unwrap().into(), + expected: "en-ZA-u-cu-gbp", + }, + ] { + assert_eq!(cas.expected, cas.locale.to_string()); + writeable::assert_writeable_eq!(&cas.locale, cas.expected); + } +} |