From 4e8199b572f2035b7749cba276ece3a26630d23e Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 14:18:21 +0200 Subject: Adding upstream version 1.67.1+dfsg1. Signed-off-by: Daniel Baumann --- vendor/icu_provider/src/request.rs | 513 +++++++++++++++++++++++++++++++++++++ 1 file changed, 513 insertions(+) create mode 100644 vendor/icu_provider/src/request.rs (limited to 'vendor/icu_provider/src/request.rs') diff --git a/vendor/icu_provider/src/request.rs b/vendor/icu_provider/src/request.rs new file mode 100644 index 000000000..7f6bb5911 --- /dev/null +++ b/vendor/icu_provider/src/request.rs @@ -0,0 +1,513 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use core::cmp::Ordering; +use core::default::Default; +use core::fmt; +use core::fmt::Debug; +use icu_locid::extensions::unicode as unicode_ext; +use icu_locid::subtags::{Language, Region, Script, Variants}; +use icu_locid::{LanguageIdentifier, Locale, SubtagOrderingResult}; +use writeable::{LengthHint, Writeable}; + +#[cfg(doc)] +use icu_locid::subtags::Variant; + +/// The request type passed into all data provider implementations. +#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)] +#[allow(clippy::exhaustive_structs)] // this type is stable +pub struct DataRequest<'a> { + /// The locale for which to load data. + /// + /// If locale fallback is enabled, the resulting data may be from a different locale + /// than the one requested here. + pub locale: &'a DataLocale, + /// Metadata that may affect the behavior of the data provider. + pub metadata: DataRequestMetadata, +} + +impl fmt::Display for DataRequest<'_> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Display::fmt(&self.locale, f) + } +} + +/// Metadata for data requests. This is currently empty, but it may be extended with options +/// for tuning locale fallback, buffer layout, and so forth. +#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +#[non_exhaustive] +pub struct DataRequestMetadata; + +/// The main locale type used by the ICU4X data provider. +/// +/// [`DataLocale`] contains less functionality than [`Locale`] but more than +/// [`LanguageIdentifier`] for better size and performance while still meeting +/// the needs of the ICU4X data pipeline. +/// +/// # Examples +/// +/// Convert a [`Locale`] to a [`DataLocale`] and back: +/// +/// ``` +/// use icu_locid::locale; +/// use icu_provider::DataLocale; +/// +/// let locale1 = locale!("en-u-ca-buddhist"); +/// let data_locale = DataLocale::from(locale1); +/// let locale2 = data_locale.into_locale(); +/// +/// assert_eq!(locale2.to_string(), "en-u-ca-buddhist"); +/// ``` +/// +/// You can alternatively create a [`DataLocale`] from a borrowed [`Locale`], which is more +/// efficient than cloning the [`Locale`], but less efficient than converting an owned +/// [`Locale`]: +/// +/// ``` +/// use icu_locid::locale; +/// use icu_provider::DataLocale; +/// +/// let locale1 = locale!("en-u-ca-buddhist"); +/// let data_locale = DataLocale::from(&locale1); +/// let locale2 = data_locale.into_locale(); +/// +/// assert_eq!(locale1, locale2); +/// ``` +/// +/// If you are sure that you have no Unicode keywords, start with [`LanguageIdentifier`]: +/// +/// ``` +/// use icu_locid::langid; +/// use icu_provider::DataLocale; +/// +/// let langid1 = langid!("es-CA-valencia"); +/// let data_locale = DataLocale::from(langid1); +/// let langid2 = data_locale.get_langid(); +/// +/// assert_eq!(langid2.to_string(), "es-CA-valencia"); +/// ``` +/// +/// [`DataLocale`] only supports `-u` keywords, to reflect the current state of CLDR data +/// lookup and fallback. This may change in the future. +/// +/// ``` +/// use icu_locid::Locale; +/// use icu_provider::DataLocale; +/// +/// let locale = "hi-t-en-h0-hybrid-u-attr-ca-buddhist" +/// .parse::() +/// .unwrap(); +/// let data_locale = DataLocale::from(locale); +/// +/// assert_eq!(data_locale.to_string(), "hi-u-ca-buddhist"); +/// ``` +#[derive(PartialEq, Clone, Default, Eq, Hash)] +pub struct DataLocale { + langid: LanguageIdentifier, + keywords: unicode_ext::Keywords, +} + +impl<'a> Default for &'a DataLocale { + fn default() -> Self { + static DEFAULT: DataLocale = DataLocale { + langid: LanguageIdentifier::UND, + keywords: unicode_ext::Keywords::new(), + }; + &DEFAULT + } +} + +impl fmt::Debug for DataLocale { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "DataLocale{{{}}}", self) + } +} + +impl Writeable for DataLocale { + fn write_to(&self, sink: &mut W) -> core::fmt::Result { + self.langid.write_to(sink)?; + if !self.keywords.is_empty() { + sink.write_str("-u-")?; + self.keywords.write_to(sink)?; + } + Ok(()) + } + + fn writeable_length_hint(&self) -> LengthHint { + self.langid.writeable_length_hint() + + if !self.keywords.is_empty() { + self.keywords.writeable_length_hint() + 3 + } else { + LengthHint::exact(0) + } + } + + fn write_to_string(&self) -> alloc::borrow::Cow { + if self.keywords.is_empty() { + return self.langid.write_to_string(); + } + let mut string = + alloc::string::String::with_capacity(self.writeable_length_hint().capacity()); + let _ = self.write_to(&mut string); + alloc::borrow::Cow::Owned(string) + } +} + +writeable::impl_display_with_writeable!(DataLocale); + +impl From for DataLocale { + fn from(langid: LanguageIdentifier) -> Self { + Self { + langid, + keywords: unicode_ext::Keywords::new(), + } + } +} + +impl From for DataLocale { + fn from(locale: Locale) -> Self { + Self { + langid: locale.id, + keywords: locale.extensions.unicode.keywords, + } + } +} + +impl From<&LanguageIdentifier> for DataLocale { + fn from(langid: &LanguageIdentifier) -> Self { + Self { + langid: langid.clone(), + keywords: unicode_ext::Keywords::new(), + } + } +} + +impl From<&Locale> for DataLocale { + fn from(locale: &Locale) -> Self { + Self { + langid: locale.id.clone(), + keywords: locale.extensions.unicode.keywords.clone(), + } + } +} + +impl DataLocale { + /// Compare this [`DataLocale`] with BCP-47 bytes. + /// + /// The return value is equivalent to what would happen if you first converted this + /// [`DataLocale`] to a BCP-47 string and then performed a byte comparison. + /// + /// This function is case-sensitive and results in a *total order*, so it is appropriate for + /// binary search. The only argument producing [`Ordering::Equal`] is `self.to_string()`. + /// + /// # Examples + /// + /// ``` + /// use icu_locid::Locale; + /// use icu_provider::DataLocale; + /// use std::cmp::Ordering; + /// + /// let bcp47_strings: &[&str] = &[ + /// "ca-ES", + /// "ca-ES-u-ca-buddhist", + /// "ca-ES-valencia", + /// "pl-Latn-PL", + /// "und", + /// "und-fonipa", + /// "und-u-ca-hebrew", + /// "und-u-ca-japanese", + /// "zh", + /// ]; + /// + /// for ab in bcp47_strings.windows(2) { + /// let a = ab[0]; + /// let b = ab[1]; + /// assert!(a.cmp(b) == Ordering::Less); + /// let a_loc: DataLocale = a.parse::().unwrap().into(); + /// assert_eq!(a, a_loc.to_string()); + /// assert!( + /// a_loc.strict_cmp(a.as_bytes()) == Ordering::Equal, + /// "{} == {}", + /// a, + /// a + /// ); + /// assert!( + /// a_loc.strict_cmp(b.as_bytes()) == Ordering::Less, + /// "{} < {}", + /// a, + /// b + /// ); + /// let b_loc: DataLocale = b.parse::().unwrap().into(); + /// assert_eq!(b, b_loc.to_string()); + /// assert!( + /// b_loc.strict_cmp(b.as_bytes()) == Ordering::Equal, + /// "{} == {}", + /// b, + /// b + /// ); + /// assert!( + /// b_loc.strict_cmp(a.as_bytes()) == Ordering::Greater, + /// "{} > {}", + /// b, + /// a + /// ); + /// } + /// ``` + pub fn strict_cmp(&self, other: &[u8]) -> Ordering { + let subtags = other.split(|b| *b == b'-'); + let mut subtag_result = self.langid.strict_cmp_iter(subtags); + if self.has_unicode_ext() { + let mut subtags = match subtag_result { + SubtagOrderingResult::Subtags(s) => s, + SubtagOrderingResult::Ordering(o) => return o, + }; + match subtags.next() { + Some(b"u") => (), + Some(s) => return s.cmp(b"u").reverse(), + None => return Ordering::Greater, + } + subtag_result = self.keywords.strict_cmp_iter(subtags); + } + subtag_result.end() + } +} + +impl DataLocale { + /// Returns whether this [`DataLocale`] has all empty fields (no components). + pub fn is_empty(&self) -> bool { + self == <&DataLocale>::default() + } + + /// Returns whether the [`LanguageIdentifier`] associated with this request is `und`. + /// + /// Note that this only checks the language identifier; extension keywords may also be set. + /// To check the entire `DataLocale`, use [`DataLocale::is_empty()`]. + pub fn is_langid_und(&self) -> bool { + self.langid == LanguageIdentifier::UND + } + + /// Gets the [`LanguageIdentifier`] for this [`DataLocale`]. + /// + /// This may allocate memory if there are variant subtags. If you need only the language, + /// script, and/or region subtag, use the specific getters for those subtags: + /// + /// - [`DataLocale::language()`] + /// - [`DataLocale::script()`] + /// - [`DataLocale::region()`] + /// + /// If you have ownership over the `DataLocale`, use [`DataLocale::into_locale()`] + /// and then access the `id` field. + /// + /// # Examples + /// + /// ``` + /// use icu_locid::langid; + /// use icu_provider::prelude::*; + /// + /// const FOO_BAR: DataKey = icu_provider::data_key!("foo/bar@1"); + /// + /// let req_no_langid = DataRequest { + /// locale: &Default::default(), + /// metadata: Default::default(), + /// }; + /// + /// let req_with_langid = DataRequest { + /// locale: &langid!("ar-EG").into(), + /// metadata: Default::default(), + /// }; + /// + /// assert_eq!(req_no_langid.locale.get_langid(), langid!("und")); + /// assert_eq!(req_with_langid.locale.get_langid(), langid!("ar-EG")); + /// ``` + pub fn get_langid(&self) -> LanguageIdentifier { + self.langid.clone() + } + + /// Overrides the entire [`LanguageIdentifier`] portion of this [`DataLocale`]. + #[inline] + pub fn set_langid(&mut self, lid: LanguageIdentifier) { + self.langid = lid; + } + + /// Converts this [`DataLocale`] into a [`Locale`]. + /// + /// See also [`DataLocale::get_langid()`]. + /// + /// # Examples + /// + /// ``` + /// use icu_locid::{ + /// langid, subtags_language as language, subtags_region as region, Locale, + /// }; + /// use icu_provider::prelude::*; + /// + /// let locale: Locale = "it-IT-u-ca-coptic".parse().expect("Valid BCP-47"); + /// let locale: DataLocale = locale.into(); + /// + /// assert_eq!(locale.to_string(), "it-IT-u-ca-coptic"); + /// assert_eq!(locale.get_langid(), langid!("it-IT")); + /// assert_eq!(locale.language(), language!("it")); + /// assert_eq!(locale.script(), None); + /// assert_eq!(locale.region(), Some(region!("IT"))); + /// + /// let locale = locale.into_locale(); + /// assert_eq!(locale.to_string(), "it-IT-u-ca-coptic"); + /// ``` + pub fn into_locale(self) -> Locale { + let mut loc = Locale { + id: self.langid, + ..Default::default() + }; + loc.extensions.unicode.keywords = self.keywords; + loc + } + + /// Returns the [`Language`] for this [`DataLocale`]. + #[inline] + pub fn language(&self) -> Language { + self.langid.language + } + + /// Returns the [`Language`] for this [`DataLocale`]. + #[inline] + pub fn set_language(&mut self, language: Language) { + self.langid.language = language; + } + + /// Returns the [`Script`] for this [`DataLocale`]. + #[inline] + pub fn script(&self) -> Option