summaryrefslogtreecommitdiffstats
path: root/vendor/icu_locid/src/locale.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/icu_locid/src/locale.rs')
-rw-r--r--vendor/icu_locid/src/locale.rs528
1 files changed, 528 insertions, 0 deletions
diff --git a/vendor/icu_locid/src/locale.rs b/vendor/icu_locid/src/locale.rs
new file mode 100644
index 000000000..d7040d31a
--- /dev/null
+++ b/vendor/icu_locid/src/locale.rs
@@ -0,0 +1,528 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use crate::ordering::SubtagOrderingResult;
+use crate::parser::{
+ get_subtag_iterator, parse_locale,
+ parse_locale_with_single_variant_single_keyword_unicode_keyword_extension, ParserError,
+ ParserMode,
+};
+use crate::{extensions, subtags, LanguageIdentifier};
+use alloc::string::String;
+use alloc::string::ToString;
+use core::cmp::Ordering;
+use core::str::FromStr;
+use tinystr::TinyAsciiStr;
+
+/// A core struct representing a [`Unicode Locale Identifier`].
+///
+/// A locale is made of two parts:
+/// * Unicode Language Identifier
+/// * A set of Unicode Extensions
+///
+/// [`Locale`] exposes all of the same fields and methods as [`LanguageIdentifier`], and
+/// on top of that is able to parse, manipulate and serialize unicode extension fields.
+///
+///
+/// # Examples
+///
+/// ```
+/// use icu::locid::extensions::unicode::{Key, Value};
+/// use icu::locid::{subtags::*, Locale};
+///
+/// let loc: Locale = "en-US-u-ca-buddhist".parse().expect("Failed to parse.");
+///
+/// assert_eq!(loc.id.language, "en".parse::<Language>().unwrap());
+/// assert_eq!(loc.id.script, None);
+/// assert_eq!(loc.id.region, "US".parse::<Region>().ok());
+/// assert_eq!(loc.id.variants.len(), 0);
+/// assert_eq!(loc.to_string(), "en-US-u-ca-buddhist");
+///
+/// let key: Key = "ca".parse().expect("Parsing key failed.");
+/// let value: Value = "buddhist".parse().expect("Parsing value failed.");
+/// assert_eq!(loc.extensions.unicode.keywords.get(&key), Some(&value));
+/// ```
+///
+/// # Parsing
+///
+/// Unicode recognizes three levels of standard conformance for a locale:
+///
+/// * *well-formed* - syntactically correct
+/// * *valid* - well-formed and only uses registered language subtags, extensions, keywords, types...
+/// * *canonical* - valid and no deprecated codes or structure.
+///
+/// At the moment parsing normalizes a well-formed locale identifier converting
+/// `_` separators to `-` and adjusting casing to conform to the Unicode standard.
+///
+/// Any bogus subtags will cause the parsing to fail with an error.
+/// No subtag validation or canonicalization is performed.
+///
+/// # Examples
+///
+/// ```
+/// use icu::locid::{subtags::*, Locale};
+///
+/// let loc: Locale = "eN_latn_Us-Valencia_u-hC-H12"
+/// .parse()
+/// .expect("Failed to parse.");
+///
+/// assert_eq!(loc.id.language, "en".parse::<Language>().unwrap());
+/// assert_eq!(loc.id.script, "Latn".parse::<Script>().ok());
+/// assert_eq!(loc.id.region, "US".parse::<Region>().ok());
+/// assert_eq!(
+/// loc.id.variants.get(0),
+/// "valencia".parse::<Variant>().ok().as_ref()
+/// );
+/// ```
+/// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/tr35.html#Unicode_locale_identifier
+#[derive(Default, PartialEq, Eq, Clone, Hash)]
+#[allow(clippy::exhaustive_structs)] // This struct is stable (and invoked by a macro)
+pub struct Locale {
+ /// The basic language/script/region components in the locale identifier along with any variants.
+ pub id: LanguageIdentifier,
+ /// Any extensions present in the locale identifier.
+ pub extensions: extensions::Extensions,
+}
+
+#[test]
+fn test_sizes() {
+ assert_eq!(core::mem::size_of::<subtags::Language>(), 3);
+ assert_eq!(core::mem::size_of::<subtags::Script>(), 4);
+ assert_eq!(core::mem::size_of::<subtags::Region>(), 3);
+ assert_eq!(core::mem::size_of::<subtags::Variant>(), 8);
+ assert_eq!(core::mem::size_of::<subtags::Variants>(), 32);
+ assert_eq!(core::mem::size_of::<LanguageIdentifier>(), 48);
+
+ assert_eq!(core::mem::size_of::<extensions::transform::Transform>(), 72);
+ assert_eq!(core::mem::size_of::<Option<LanguageIdentifier>>(), 48);
+ assert_eq!(core::mem::size_of::<extensions::transform::Fields>(), 24);
+
+ assert_eq!(core::mem::size_of::<extensions::unicode::Attributes>(), 24);
+ assert_eq!(core::mem::size_of::<extensions::unicode::Keywords>(), 48);
+ assert_eq!(core::mem::size_of::<Vec<extensions::other::Other>>(), 24);
+ assert_eq!(core::mem::size_of::<extensions::private::Private>(), 24);
+ assert_eq!(core::mem::size_of::<extensions::Extensions>(), 192);
+
+ assert_eq!(core::mem::size_of::<Locale>(), 240);
+}
+
+impl Locale {
+ /// A constructor which takes a utf8 slice, parses it and
+ /// produces a well-formed [`Locale`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::Locale;
+ ///
+ /// let loc = Locale::try_from_bytes("en-US-u-hc-h12".as_bytes())
+ /// .expect("Parsing failed.");
+ ///
+ /// assert_eq!(loc.to_string(), "en-US-u-hc-h12");
+ /// ```
+ pub fn try_from_bytes(v: &[u8]) -> Result<Self, ParserError> {
+ parse_locale(v)
+ }
+
+ /// The default undefined locale "und". Same as [`default()`](Default::default()).
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::Locale;
+ ///
+ /// assert_eq!(Locale::default(), Locale::UND);
+ /// assert_eq!("und", Locale::UND.to_string());
+ /// ```
+ pub const UND: Self = Self {
+ id: LanguageIdentifier::UND,
+ extensions: extensions::Extensions::new(),
+ };
+
+ /// This is a best-effort operation that performs all available levels of canonicalization.
+ ///
+ /// At the moment the operation will normalize casing and the separator, but in the future
+ /// it may also validate and update from deprecated subtags to canonical ones.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::Locale;
+ ///
+ /// assert_eq!(
+ /// Locale::canonicalize("pL_latn_pl-U-HC-H12"),
+ /// Ok("pl-Latn-PL-u-hc-h12".to_string())
+ /// );
+ /// ```
+ pub fn canonicalize<S: AsRef<[u8]>>(input: S) -> Result<String, ParserError> {
+ let locale = Self::try_from_bytes(input.as_ref())?;
+ Ok(locale.to_string())
+ }
+
+ /// Compare this [`Locale`] with BCP-47 bytes.
+ ///
+ /// The return value is equivalent to what would happen if you first converted this
+ /// [`Locale`] to a BCP-47 string and then performed a byte comparison.
+ ///
+ /// This function is case-sensitive and results in a *total order*, so it is appropriate for
+ /// binary search. The only argument producing [`Ordering::Equal`] is `self.to_string()`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::Locale;
+ /// use std::cmp::Ordering;
+ ///
+ /// let bcp47_strings: &[&str] = &[
+ /// "pl-Latn-PL",
+ /// "und",
+ /// "und-fonipa",
+ /// "und-t-m0-true",
+ /// "und-u-ca-hebrew",
+ /// "und-u-ca-japanese",
+ /// "zh",
+ /// ];
+ ///
+ /// for ab in bcp47_strings.windows(2) {
+ /// let a = ab[0];
+ /// let b = ab[1];
+ /// assert!(a.cmp(b) == Ordering::Less);
+ /// let a_loc = a.parse::<Locale>().unwrap();
+ /// assert_eq!(a, a_loc.to_string());
+ /// assert!(a_loc.strict_cmp(a.as_bytes()) == Ordering::Equal);
+ /// assert!(a_loc.strict_cmp(b.as_bytes()) == Ordering::Less);
+ /// }
+ /// ```
+ pub fn strict_cmp(&self, other: &[u8]) -> Ordering {
+ self.strict_cmp_iter(other.split(|b| *b == b'-')).end()
+ }
+
+ /// Compare this [`Locale`] with an iterator of BCP-47 subtags.
+ ///
+ /// This function has the same equality semantics as [`Locale::strict_cmp`]. It is intended as
+ /// a more modular version that allows multiple subtag iterators to be chained together.
+ ///
+ /// For an additional example, see [`SubtagOrderingResult`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::locale;
+ /// use std::cmp::Ordering;
+ ///
+ /// let subtags: &[&[u8]] =
+ /// &[b"ca", b"ES", b"valencia", b"u", b"ca", b"hebrew"];
+ ///
+ /// let loc = locale!("ca-ES-valencia-u-ca-hebrew");
+ /// assert_eq!(
+ /// Ordering::Equal,
+ /// loc.strict_cmp_iter(subtags.iter().copied()).end()
+ /// );
+ ///
+ /// let loc = locale!("ca-ES-valencia");
+ /// assert_eq!(
+ /// Ordering::Less,
+ /// loc.strict_cmp_iter(subtags.iter().copied()).end()
+ /// );
+ ///
+ /// let loc = locale!("ca-ES-valencia-u-nu-arab");
+ /// assert_eq!(
+ /// Ordering::Greater,
+ /// loc.strict_cmp_iter(subtags.iter().copied()).end()
+ /// );
+ /// ```
+ pub fn strict_cmp_iter<'l, I>(&self, mut subtags: I) -> SubtagOrderingResult<I>
+ where
+ I: Iterator<Item = &'l [u8]>,
+ {
+ let r = self.for_each_subtag_str(&mut |subtag| {
+ if let Some(other) = subtags.next() {
+ match subtag.as_bytes().cmp(other) {
+ Ordering::Equal => Ok(()),
+ not_equal => Err(not_equal),
+ }
+ } else {
+ Err(Ordering::Greater)
+ }
+ });
+ match r {
+ Ok(_) => SubtagOrderingResult::Subtags(subtags),
+ Err(o) => SubtagOrderingResult::Ordering(o),
+ }
+ }
+
+ /// Compare this `Locale` with a potentially unnormalized BCP-47 string.
+ ///
+ /// The return value is equivalent to what would happen if you first parsed the
+ /// BCP-47 string to a `Locale` and then performed a structucal comparison.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::Locale;
+ /// use std::cmp::Ordering;
+ ///
+ /// let bcp47_strings: &[&str] = &[
+ /// "pl-LaTn-pL",
+ /// "uNd",
+ /// "UND-FONIPA",
+ /// "UnD-t-m0-TrUe",
+ /// "uNd-u-CA-Japanese",
+ /// "ZH",
+ /// ];
+ ///
+ /// for a in bcp47_strings {
+ /// assert!(a.parse::<Locale>().unwrap().normalizing_eq(a));
+ /// }
+ /// ```
+ pub fn normalizing_eq(&self, other: &str) -> bool {
+ macro_rules! subtag_matches {
+ ($T:ty, $iter:ident, $expected:expr) => {
+ $iter
+ .next()
+ .map(|b| <$T>::try_from_bytes(b) == Ok($expected))
+ .unwrap_or(false)
+ };
+ }
+
+ let mut iter = get_subtag_iterator(other.as_bytes());
+ if !subtag_matches!(subtags::Language, iter, self.id.language) {
+ return false;
+ }
+ if let Some(ref script) = self.id.script {
+ if !subtag_matches!(subtags::Script, iter, *script) {
+ return false;
+ }
+ }
+ if let Some(ref region) = self.id.region {
+ if !subtag_matches!(subtags::Region, iter, *region) {
+ return false;
+ }
+ }
+ for variant in self.id.variants.iter() {
+ if !subtag_matches!(subtags::Variant, iter, *variant) {
+ return false;
+ }
+ }
+ if !self.extensions.is_empty() {
+ match extensions::Extensions::try_from_iter(&mut iter) {
+ Ok(exts) => {
+ if self.extensions != exts {
+ return false;
+ }
+ }
+ Err(_) => {
+ return false;
+ }
+ }
+ }
+ iter.next() == None
+ }
+
+ #[doc(hidden)]
+ #[allow(clippy::type_complexity)]
+ pub const fn try_from_bytes_with_single_variant_single_keyword_unicode_extension(
+ v: &[u8],
+ ) -> Result<
+ (
+ subtags::Language,
+ Option<subtags::Script>,
+ Option<subtags::Region>,
+ Option<subtags::Variant>,
+ Option<(extensions::unicode::Key, Option<TinyAsciiStr<8>>)>,
+ ),
+ ParserError,
+ > {
+ parse_locale_with_single_variant_single_keyword_unicode_keyword_extension(
+ v,
+ ParserMode::Locale,
+ )
+ }
+
+ pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
+ where
+ F: FnMut(&str) -> Result<(), E>,
+ {
+ self.id.for_each_subtag_str(f)?;
+ self.extensions.for_each_subtag_str(f)?;
+ Ok(())
+ }
+}
+
+impl FromStr for Locale {
+ type Err = ParserError;
+
+ fn from_str(source: &str) -> Result<Self, Self::Err> {
+ Self::try_from_bytes(source.as_bytes())
+ }
+}
+
+impl From<LanguageIdentifier> for Locale {
+ fn from(id: LanguageIdentifier) -> Self {
+ Self {
+ id,
+ extensions: extensions::Extensions::default(),
+ }
+ }
+}
+
+impl From<Locale> for LanguageIdentifier {
+ fn from(loc: Locale) -> Self {
+ loc.id
+ }
+}
+
+impl AsRef<LanguageIdentifier> for Locale {
+ fn as_ref(&self) -> &LanguageIdentifier {
+ &self.id
+ }
+}
+
+impl AsMut<LanguageIdentifier> for Locale {
+ fn as_mut(&mut self) -> &mut LanguageIdentifier {
+ &mut self.id
+ }
+}
+
+impl core::fmt::Debug for Locale {
+ fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+ writeable::Writeable::write_to(self, f)
+ }
+}
+
+impl_writeable_for_each_subtag_str_no_test!(Locale);
+
+#[test]
+fn test_writeable() {
+ use writeable::assert_writeable_eq;
+ assert_writeable_eq!(Locale::UND, "und");
+ assert_writeable_eq!("und-001".parse::<Locale>().unwrap(), "und-001");
+ assert_writeable_eq!("und-Mymr".parse::<Locale>().unwrap(), "und-Mymr");
+ assert_writeable_eq!("my-Mymr-MM".parse::<Locale>().unwrap(), "my-Mymr-MM");
+ assert_writeable_eq!(
+ "my-Mymr-MM-posix".parse::<Locale>().unwrap(),
+ "my-Mymr-MM-posix",
+ );
+ assert_writeable_eq!(
+ "zh-macos-posix".parse::<Locale>().unwrap(),
+ "zh-macos-posix",
+ );
+ assert_writeable_eq!(
+ "my-t-my-d0-zawgyi".parse::<Locale>().unwrap(),
+ "my-t-my-d0-zawgyi",
+ );
+ assert_writeable_eq!(
+ "ar-SA-u-ca-islamic-civil".parse::<Locale>().unwrap(),
+ "ar-SA-u-ca-islamic-civil",
+ );
+ assert_writeable_eq!(
+ "en-001-x-foo-bar".parse::<Locale>().unwrap(),
+ "en-001-x-foo-bar",
+ );
+ assert_writeable_eq!("und-t-m0-true".parse::<Locale>().unwrap(), "und-t-m0-true",);
+}
+
+/// # Examples
+///
+/// ```
+/// use icu::locid::subtags_language as language;
+/// use icu::locid::Locale;
+///
+/// let language = language!("en");
+/// let loc = Locale::from(language);
+///
+/// assert_eq!(loc.id.language, language);
+/// assert_eq!(loc.to_string(), "en");
+/// ```
+impl From<subtags::Language> for Locale {
+ fn from(language: subtags::Language) -> Self {
+ Self {
+ id: language.into(),
+ ..Default::default()
+ }
+ }
+}
+
+/// # Examples
+///
+/// ```
+/// use icu::locid::subtags_script as script;
+/// use icu::locid::Locale;
+///
+/// let script = script!("latn");
+/// let loc = Locale::from(Some(script));
+///
+/// assert_eq!(loc.id.script.unwrap(), script);
+/// assert_eq!(loc.to_string(), "und-Latn");
+/// ```
+impl From<Option<subtags::Script>> for Locale {
+ fn from(script: Option<subtags::Script>) -> Self {
+ Self {
+ id: script.into(),
+ ..Default::default()
+ }
+ }
+}
+
+/// # Examples
+///
+/// ```
+/// use icu::locid::subtags_region as region;
+/// use icu::locid::Locale;
+///
+/// let region = region!("US");
+/// let loc = Locale::from(Some(region));
+///
+/// assert_eq!(loc.id.region.unwrap(), region);
+/// assert_eq!(loc.to_string(), "und-US");
+/// ```
+impl From<Option<subtags::Region>> for Locale {
+ fn from(region: Option<subtags::Region>) -> Self {
+ Self {
+ id: region.into(),
+ ..Default::default()
+ }
+ }
+}
+
+/// # Examples
+///
+/// ```
+/// use icu::locid::Locale;
+/// use icu::locid::{
+/// subtags_language as language, subtags_region as region,
+/// subtags_script as script,
+/// };
+///
+/// let lang = language!("en");
+/// let script = script!("Latn");
+/// let region = region!("US");
+/// let loc = Locale::from((lang, Some(script), Some(region)));
+///
+/// assert_eq!(loc.id.language, lang);
+/// assert_eq!(loc.id.script.unwrap(), script);
+/// assert_eq!(loc.id.region.unwrap(), region);
+/// assert_eq!(loc.id.variants.len(), 0);
+/// assert_eq!(loc.to_string(), "en-Latn-US");
+/// ```
+impl
+ From<(
+ subtags::Language,
+ Option<subtags::Script>,
+ Option<subtags::Region>,
+ )> for Locale
+{
+ fn from(
+ lsr: (
+ subtags::Language,
+ Option<subtags::Script>,
+ Option<subtags::Region>,
+ ),
+ ) -> Self {
+ Self {
+ id: lsr.into(),
+ ..Default::default()
+ }
+ }
+}