From 4e8199b572f2035b7749cba276ece3a26630d23e Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 14:18:21 +0200 Subject: Adding upstream version 1.67.1+dfsg1. Signed-off-by: Daniel Baumann --- vendor/icu_locid/src/subtags/language.rs | 107 ++++++++++++++++++++++++ vendor/icu_locid/src/subtags/mod.rs | 58 +++++++++++++ vendor/icu_locid/src/subtags/region.rs | 61 ++++++++++++++ vendor/icu_locid/src/subtags/script.rs | 32 ++++++++ vendor/icu_locid/src/subtags/variant.rs | 34 ++++++++ vendor/icu_locid/src/subtags/variants.rs | 134 +++++++++++++++++++++++++++++++ 6 files changed, 426 insertions(+) create mode 100644 vendor/icu_locid/src/subtags/language.rs create mode 100644 vendor/icu_locid/src/subtags/mod.rs create mode 100644 vendor/icu_locid/src/subtags/region.rs create mode 100644 vendor/icu_locid/src/subtags/script.rs create mode 100644 vendor/icu_locid/src/subtags/variant.rs create mode 100644 vendor/icu_locid/src/subtags/variants.rs (limited to 'vendor/icu_locid/src/subtags') diff --git a/vendor/icu_locid/src/subtags/language.rs b/vendor/icu_locid/src/subtags/language.rs new file mode 100644 index 000000000..a5ec8d76e --- /dev/null +++ b/vendor/icu_locid/src/subtags/language.rs @@ -0,0 +1,107 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +impl_tinystr_subtag!( + /// A language subtag (examples: `"en"`, `"csb"`, `"zh"`, `"und"`, etc.) + /// + /// [`Language`] represents a Unicode base language code conformat to the + /// [`unicode_language_id`] field of the Language and Locale Identifier. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::subtags::Language; + /// + /// let language: Language = + /// "en".parse().expect("Failed to parse a language subtag."); + /// ``` + /// + /// If the [`Language`] has no value assigned, it serializes to a string `"und"`, which + /// can be then parsed back to an empty [`Language`] field. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::subtags::Language; + /// + /// assert_eq!(Language::default().as_str(), "und"); + /// ``` + /// + /// `Notice`: ICU4X uses a narrow form of language subtag of 2-3 characters. + /// The specification allows language subtag to optionally also be 5-8 characters + /// but that form has not been used and ICU4X does not support it right now. + /// + /// [`unicode_language_id`]: https://unicode.org/reports/tr35/#unicode_language_id + Language, + subtags::Language, + subtags_language, + 2..=3, + s, + s.is_ascii_alphabetic(), + s.to_ascii_lowercase(), + s.is_ascii_alphabetic_lowercase(), + InvalidLanguage, + ["en", "foo"], + ["419", "german", "en1"], +); + +impl Language { + /// The default undefined language "und". Same as [`default()`](Default::default()). + /// + /// # Examples + /// + /// ``` + /// use icu::locid::subtags::Language; + /// + /// assert_eq!(Language::default(), Language::UND); + /// assert_eq!("und", Language::UND.to_string()); + /// ``` + pub const UND: Self = unsafe { Self::from_raw_unchecked(*b"und") }; + + /// Resets the [`Language`] subtag to an empty one (equal to `"und"`). + /// + /// # Examples + /// + /// ``` + /// use icu::locid::subtags::Language; + /// + /// let mut lang: Language = "csb".parse().expect("Parsing failed."); + /// + /// assert_eq!(lang.as_str(), "csb"); + /// + /// lang.clear(); + /// + /// assert_eq!(lang.as_str(), "und"); + /// ``` + #[inline] + pub fn clear(&mut self) { + *self = Self::UND + } + + /// Tests if the [`Language`] subtag is empty (equal to `"und"`). + /// + /// # Examples + /// + /// ``` + /// use icu::locid::subtags::Language; + /// + /// let mut lang: Language = "und".parse().expect("Parsing failed."); + /// + /// assert!(lang.is_empty()); + /// + /// lang.clear(); + /// + /// assert!(lang.is_empty()); + /// ``` + #[inline] + pub fn is_empty(self) -> bool { + self == Self::UND + } +} + +impl Default for Language { + fn default() -> Language { + Language::UND + } +} diff --git a/vendor/icu_locid/src/subtags/mod.rs b/vendor/icu_locid/src/subtags/mod.rs new file mode 100644 index 000000000..bd243a321 --- /dev/null +++ b/vendor/icu_locid/src/subtags/mod.rs @@ -0,0 +1,58 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Language Identifier and Locale contains a set of subtags +//! which represent different fields of the structure. +//! +//! * [`Language`] is the only mandatory field, which when empty, +//! takes the value `und`. +//! * [`Script`] is an optional field representing the written script used by the locale. +//! * [`Region`] is the region used by the locale. +//! * [`Variants`] is a list of optional [`Variant`] subtags containing information about the +//! variant adjustments used by the locale. +//! +//! Subtags can be used in isolation, and all basic operations such as parsing, syntax canonicalization +//! and serialization are supported on each individual subtag, but most commonly +//! they are used to construct a [`LanguageIdentifier`] instance. +//! +//! [`Variants`] is a special structure which contains a list of [`Variant`] subtags. +//! It is wrapped around to allow for sorting and deduplication of variants, which +//! is one of the required steps of language identifier and locale syntax canonicalization. +//! +//! # Examples +//! +//! ``` +//! use icu::locid::subtags::{Language, Region, Script, Variant}; +//! +//! let language: Language = +//! "en".parse().expect("Failed to parse a language subtag."); +//! let script: Script = +//! "arab".parse().expect("Failed to parse a script subtag."); +//! let region: Region = +//! "cn".parse().expect("Failed to parse a region subtag."); +//! let variant: Variant = +//! "MacOS".parse().expect("Failed to parse a variant subtag."); +//! +//! assert_eq!(language.as_str(), "en"); +//! assert_eq!(script.as_str(), "Arab"); +//! assert_eq!(region.as_str(), "CN"); +//! assert_eq!(variant.as_str(), "macos"); +//! ``` +//! +//! `Notice`: The subtags are canonicalized on parsing. That means +//! that all operations work on a canonicalized version of the subtag +//! and serialization is very cheap. +//! +//! [`LanguageIdentifier`]: super::LanguageIdentifier +mod language; +mod region; +mod script; +mod variant; +mod variants; + +pub use language::Language; +pub use region::Region; +pub use script::Script; +pub use variant::Variant; +pub use variants::Variants; diff --git a/vendor/icu_locid/src/subtags/region.rs b/vendor/icu_locid/src/subtags/region.rs new file mode 100644 index 000000000..f605937ce --- /dev/null +++ b/vendor/icu_locid/src/subtags/region.rs @@ -0,0 +1,61 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +impl_tinystr_subtag!( + /// A region subtag (examples: `"US"`, `"CN"`, `"AR"` etc.) + /// + /// [`Region`] represents a Unicode base language code conformat to the + /// [`unicode_region_id`] field of the Language and Locale Identifier. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::subtags::Region; + /// + /// let region: Region = + /// "DE".parse().expect("Failed to parse a region subtag."); + /// ``` + /// + /// [`unicode_region_id`]: https://unicode.org/reports/tr35/#unicode_region_id + Region, + subtags::Region, + subtags_region, + 2..=3, + s, + if s.len() == 2 { + s.is_ascii_alphabetic() + } else { + s.is_ascii_numeric() + }, + if s.len() == 2 { + s.to_ascii_uppercase() + } else { + s + }, + if s.len() == 2 { + s.is_ascii_alphabetic_uppercase() + } else { + s.is_ascii_numeric() + }, + InvalidSubtag, + ["FR", "123"], + ["12", "FRA", "b2"], +); + +impl Region { + /// Returns true if the Region has an alphabetic code. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::subtags::Region; + /// + /// let region = Region::try_from_bytes(b"us").expect("Parsing failed."); + /// + /// assert!(region.is_alphabetic()); + /// ``` + pub fn is_alphabetic(&self) -> bool { + self.0.len() == 2 + } +} diff --git a/vendor/icu_locid/src/subtags/script.rs b/vendor/icu_locid/src/subtags/script.rs new file mode 100644 index 000000000..05eb63d1c --- /dev/null +++ b/vendor/icu_locid/src/subtags/script.rs @@ -0,0 +1,32 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +impl_tinystr_subtag!( + /// A script subtag (examples: `"Latn"`, `"Arab"`, etc.) + /// + /// [`Script`] represents a Unicode base language code conformat to the + /// [`unicode_script_id`] field of the Language and Locale Identifier. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::subtags::Script; + /// + /// let script: Script = + /// "Latn".parse().expect("Failed to parse a script subtag."); + /// ``` + /// + /// [`unicode_script_id`]: https://unicode.org/reports/tr35/#unicode_script_id + Script, + subtags::Script, + subtags_script, + 4..=4, + s, + s.is_ascii_alphabetic(), + s.to_ascii_titlecase(), + s.is_ascii_alphabetic_titlecase(), + InvalidSubtag, + ["Latn"], + ["Latin"], +); diff --git a/vendor/icu_locid/src/subtags/variant.rs b/vendor/icu_locid/src/subtags/variant.rs new file mode 100644 index 000000000..96fd7500e --- /dev/null +++ b/vendor/icu_locid/src/subtags/variant.rs @@ -0,0 +1,34 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +impl_tinystr_subtag!( + /// A variant subtag (examples: `"macos"`, `"posix"`, `"1996"` etc.) + /// + /// [`Variant`] represents a Unicode base language code conformat to the + /// [`unicode_variant_id`] field of the Language and Locale Identifier. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::subtags::Variant; + /// + /// let variant: Variant = + /// "macos".parse().expect("Failed to parse a variant subtag."); + /// ``` + /// + /// [`unicode_variant_id`]: https://unicode.org/reports/tr35/#unicode_variant_id + Variant, + subtags::Variant, + subtags_variant, + 4..=8, + s, + s.is_ascii_alphanumeric() && (s.len() != 4 || s.all_bytes()[0].is_ascii_digit()), + s.to_ascii_lowercase(), + s.is_ascii_lowercase() + && s.is_ascii_alphanumeric() + && (s.len() != 4 || s.all_bytes()[0].is_ascii_digit()), + InvalidSubtag, + ["posix", "1996"], + ["yes"], +); diff --git a/vendor/icu_locid/src/subtags/variants.rs b/vendor/icu_locid/src/subtags/variants.rs new file mode 100644 index 000000000..bbff9ebac --- /dev/null +++ b/vendor/icu_locid/src/subtags/variants.rs @@ -0,0 +1,134 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use super::Variant; +use crate::helpers::ShortVec; + +use alloc::vec::Vec; +use core::ops::Deref; + +/// A list of variants (examples: `["macos", "posix"]`, etc.) +/// +/// [`Variants`] stores a list of [`Variant`] subtags in a canonical form +/// by sorting and deduplicating them. +/// +/// # Examples +/// +/// ``` +/// use icu::locid::subtags::{Variant, Variants}; +/// +/// let variant1: Variant = +/// "posix".parse().expect("Failed to parse a variant subtag."); +/// +/// let variant2: Variant = +/// "macos".parse().expect("Failed to parse a variant subtag."); +/// let mut v = vec![variant1, variant2]; +/// v.sort(); +/// v.dedup(); +/// +/// let variants: Variants = Variants::from_vec_unchecked(v); +/// assert_eq!(variants.to_string(), "macos-posix"); +/// ``` +#[derive(Default, Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord)] +pub struct Variants(ShortVec); + +impl Variants { + /// Returns a new empty list of variants. Same as [`default()`](Default::default()), but is `const`. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::subtags::Variants; + /// + /// assert_eq!(Variants::new(), Variants::default()); + /// ``` + #[inline] + pub const fn new() -> Self { + Self(ShortVec::new()) + } + + /// Creates a new [`Variants`] set from a single [`Variant`]. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::subtags::{Variant, Variants}; + /// + /// let variant: Variant = "posix".parse().expect("Parsing failed."); + /// let variants = Variants::from_variant(variant); + /// ``` + #[inline] + pub const fn from_variant(variant: Variant) -> Self { + Self(ShortVec::new_single(variant)) + } + + /// Creates a new [`Variants`] set from a [`Vec`]. + /// The caller is expected to provide sorted and deduplicated vector as + /// an input. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::subtags::{Variant, Variants}; + /// + /// let variant1: Variant = "posix".parse().expect("Parsing failed."); + /// let variant2: Variant = "macos".parse().expect("Parsing failed."); + /// let mut v = vec![variant1, variant2]; + /// v.sort(); + /// v.dedup(); + /// + /// let variants = Variants::from_vec_unchecked(v); + /// ``` + /// + /// Notice: For performance- and memory-constrained environments, it is recommended + /// for the caller to use [`binary_search`](slice::binary_search) instead of [`sort`](slice::sort) + /// and [`dedup`](Vec::dedup()). + pub fn from_vec_unchecked(input: Vec) -> Self { + Self(ShortVec::from(input)) + } + + /// Empties the [`Variants`] list. + /// + /// Returns the old list. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::subtags::{Variant, Variants}; + /// + /// let variant1: Variant = "posix".parse().expect("Parsing failed."); + /// let variant2: Variant = "macos".parse().expect("Parsing failed."); + /// let mut v = vec![variant1, variant2]; + /// v.sort(); + /// v.dedup(); + /// + /// let mut variants: Variants = Variants::from_vec_unchecked(v); + /// + /// assert_eq!(variants.to_string(), "macos-posix"); + /// + /// variants.clear(); + /// + /// assert_eq!(variants.to_string(), ""); + /// ``` + pub fn clear(&mut self) -> Self { + core::mem::take(self) + } + + pub(crate) fn for_each_subtag_str(&self, f: &mut F) -> Result<(), E> + where + F: FnMut(&str) -> Result<(), E>, + { + self.deref().iter().map(|t| t.as_str()).try_for_each(f) + } +} + +impl_writeable_for_subtag_list!(Variants, "macos", "posix"); + +impl Deref for Variants { + type Target = [Variant]; + + fn deref(&self) -> &[Variant] { + self.0.as_slice() + } +} -- cgit v1.2.3