summaryrefslogtreecommitdiffstats
path: root/vendor/icu_locid/src/subtags
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/icu_locid/src/subtags')
-rw-r--r--vendor/icu_locid/src/subtags/language.rs107
-rw-r--r--vendor/icu_locid/src/subtags/mod.rs58
-rw-r--r--vendor/icu_locid/src/subtags/region.rs61
-rw-r--r--vendor/icu_locid/src/subtags/script.rs32
-rw-r--r--vendor/icu_locid/src/subtags/variant.rs34
-rw-r--r--vendor/icu_locid/src/subtags/variants.rs134
6 files changed, 426 insertions, 0 deletions
diff --git a/vendor/icu_locid/src/subtags/language.rs b/vendor/icu_locid/src/subtags/language.rs
new file mode 100644
index 000000000..a5ec8d76e
--- /dev/null
+++ b/vendor/icu_locid/src/subtags/language.rs
@@ -0,0 +1,107 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+impl_tinystr_subtag!(
+ /// A language subtag (examples: `"en"`, `"csb"`, `"zh"`, `"und"`, etc.)
+ ///
+ /// [`Language`] represents a Unicode base language code conformat to the
+ /// [`unicode_language_id`] field of the Language and Locale Identifier.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::subtags::Language;
+ ///
+ /// let language: Language =
+ /// "en".parse().expect("Failed to parse a language subtag.");
+ /// ```
+ ///
+ /// If the [`Language`] has no value assigned, it serializes to a string `"und"`, which
+ /// can be then parsed back to an empty [`Language`] field.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::subtags::Language;
+ ///
+ /// assert_eq!(Language::default().as_str(), "und");
+ /// ```
+ ///
+ /// `Notice`: ICU4X uses a narrow form of language subtag of 2-3 characters.
+ /// The specification allows language subtag to optionally also be 5-8 characters
+ /// but that form has not been used and ICU4X does not support it right now.
+ ///
+ /// [`unicode_language_id`]: https://unicode.org/reports/tr35/#unicode_language_id
+ Language,
+ subtags::Language,
+ subtags_language,
+ 2..=3,
+ s,
+ s.is_ascii_alphabetic(),
+ s.to_ascii_lowercase(),
+ s.is_ascii_alphabetic_lowercase(),
+ InvalidLanguage,
+ ["en", "foo"],
+ ["419", "german", "en1"],
+);
+
+impl Language {
+ /// The default undefined language "und". Same as [`default()`](Default::default()).
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::subtags::Language;
+ ///
+ /// assert_eq!(Language::default(), Language::UND);
+ /// assert_eq!("und", Language::UND.to_string());
+ /// ```
+ pub const UND: Self = unsafe { Self::from_raw_unchecked(*b"und") };
+
+ /// Resets the [`Language`] subtag to an empty one (equal to `"und"`).
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::subtags::Language;
+ ///
+ /// let mut lang: Language = "csb".parse().expect("Parsing failed.");
+ ///
+ /// assert_eq!(lang.as_str(), "csb");
+ ///
+ /// lang.clear();
+ ///
+ /// assert_eq!(lang.as_str(), "und");
+ /// ```
+ #[inline]
+ pub fn clear(&mut self) {
+ *self = Self::UND
+ }
+
+ /// Tests if the [`Language`] subtag is empty (equal to `"und"`).
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::subtags::Language;
+ ///
+ /// let mut lang: Language = "und".parse().expect("Parsing failed.");
+ ///
+ /// assert!(lang.is_empty());
+ ///
+ /// lang.clear();
+ ///
+ /// assert!(lang.is_empty());
+ /// ```
+ #[inline]
+ pub fn is_empty(self) -> bool {
+ self == Self::UND
+ }
+}
+
+impl Default for Language {
+ fn default() -> Language {
+ Language::UND
+ }
+}
diff --git a/vendor/icu_locid/src/subtags/mod.rs b/vendor/icu_locid/src/subtags/mod.rs
new file mode 100644
index 000000000..bd243a321
--- /dev/null
+++ b/vendor/icu_locid/src/subtags/mod.rs
@@ -0,0 +1,58 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! Language Identifier and Locale contains a set of subtags
+//! which represent different fields of the structure.
+//!
+//! * [`Language`] is the only mandatory field, which when empty,
+//! takes the value `und`.
+//! * [`Script`] is an optional field representing the written script used by the locale.
+//! * [`Region`] is the region used by the locale.
+//! * [`Variants`] is a list of optional [`Variant`] subtags containing information about the
+//! variant adjustments used by the locale.
+//!
+//! Subtags can be used in isolation, and all basic operations such as parsing, syntax canonicalization
+//! and serialization are supported on each individual subtag, but most commonly
+//! they are used to construct a [`LanguageIdentifier`] instance.
+//!
+//! [`Variants`] is a special structure which contains a list of [`Variant`] subtags.
+//! It is wrapped around to allow for sorting and deduplication of variants, which
+//! is one of the required steps of language identifier and locale syntax canonicalization.
+//!
+//! # Examples
+//!
+//! ```
+//! use icu::locid::subtags::{Language, Region, Script, Variant};
+//!
+//! let language: Language =
+//! "en".parse().expect("Failed to parse a language subtag.");
+//! let script: Script =
+//! "arab".parse().expect("Failed to parse a script subtag.");
+//! let region: Region =
+//! "cn".parse().expect("Failed to parse a region subtag.");
+//! let variant: Variant =
+//! "MacOS".parse().expect("Failed to parse a variant subtag.");
+//!
+//! assert_eq!(language.as_str(), "en");
+//! assert_eq!(script.as_str(), "Arab");
+//! assert_eq!(region.as_str(), "CN");
+//! assert_eq!(variant.as_str(), "macos");
+//! ```
+//!
+//! `Notice`: The subtags are canonicalized on parsing. That means
+//! that all operations work on a canonicalized version of the subtag
+//! and serialization is very cheap.
+//!
+//! [`LanguageIdentifier`]: super::LanguageIdentifier
+mod language;
+mod region;
+mod script;
+mod variant;
+mod variants;
+
+pub use language::Language;
+pub use region::Region;
+pub use script::Script;
+pub use variant::Variant;
+pub use variants::Variants;
diff --git a/vendor/icu_locid/src/subtags/region.rs b/vendor/icu_locid/src/subtags/region.rs
new file mode 100644
index 000000000..f605937ce
--- /dev/null
+++ b/vendor/icu_locid/src/subtags/region.rs
@@ -0,0 +1,61 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+impl_tinystr_subtag!(
+ /// A region subtag (examples: `"US"`, `"CN"`, `"AR"` etc.)
+ ///
+ /// [`Region`] represents a Unicode base language code conformat to the
+ /// [`unicode_region_id`] field of the Language and Locale Identifier.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::subtags::Region;
+ ///
+ /// let region: Region =
+ /// "DE".parse().expect("Failed to parse a region subtag.");
+ /// ```
+ ///
+ /// [`unicode_region_id`]: https://unicode.org/reports/tr35/#unicode_region_id
+ Region,
+ subtags::Region,
+ subtags_region,
+ 2..=3,
+ s,
+ if s.len() == 2 {
+ s.is_ascii_alphabetic()
+ } else {
+ s.is_ascii_numeric()
+ },
+ if s.len() == 2 {
+ s.to_ascii_uppercase()
+ } else {
+ s
+ },
+ if s.len() == 2 {
+ s.is_ascii_alphabetic_uppercase()
+ } else {
+ s.is_ascii_numeric()
+ },
+ InvalidSubtag,
+ ["FR", "123"],
+ ["12", "FRA", "b2"],
+);
+
+impl Region {
+ /// Returns true if the Region has an alphabetic code.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::subtags::Region;
+ ///
+ /// let region = Region::try_from_bytes(b"us").expect("Parsing failed.");
+ ///
+ /// assert!(region.is_alphabetic());
+ /// ```
+ pub fn is_alphabetic(&self) -> bool {
+ self.0.len() == 2
+ }
+}
diff --git a/vendor/icu_locid/src/subtags/script.rs b/vendor/icu_locid/src/subtags/script.rs
new file mode 100644
index 000000000..05eb63d1c
--- /dev/null
+++ b/vendor/icu_locid/src/subtags/script.rs
@@ -0,0 +1,32 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+impl_tinystr_subtag!(
+ /// A script subtag (examples: `"Latn"`, `"Arab"`, etc.)
+ ///
+ /// [`Script`] represents a Unicode base language code conformat to the
+ /// [`unicode_script_id`] field of the Language and Locale Identifier.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::subtags::Script;
+ ///
+ /// let script: Script =
+ /// "Latn".parse().expect("Failed to parse a script subtag.");
+ /// ```
+ ///
+ /// [`unicode_script_id`]: https://unicode.org/reports/tr35/#unicode_script_id
+ Script,
+ subtags::Script,
+ subtags_script,
+ 4..=4,
+ s,
+ s.is_ascii_alphabetic(),
+ s.to_ascii_titlecase(),
+ s.is_ascii_alphabetic_titlecase(),
+ InvalidSubtag,
+ ["Latn"],
+ ["Latin"],
+);
diff --git a/vendor/icu_locid/src/subtags/variant.rs b/vendor/icu_locid/src/subtags/variant.rs
new file mode 100644
index 000000000..96fd7500e
--- /dev/null
+++ b/vendor/icu_locid/src/subtags/variant.rs
@@ -0,0 +1,34 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+impl_tinystr_subtag!(
+ /// A variant subtag (examples: `"macos"`, `"posix"`, `"1996"` etc.)
+ ///
+ /// [`Variant`] represents a Unicode base language code conformat to the
+ /// [`unicode_variant_id`] field of the Language and Locale Identifier.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::subtags::Variant;
+ ///
+ /// let variant: Variant =
+ /// "macos".parse().expect("Failed to parse a variant subtag.");
+ /// ```
+ ///
+ /// [`unicode_variant_id`]: https://unicode.org/reports/tr35/#unicode_variant_id
+ Variant,
+ subtags::Variant,
+ subtags_variant,
+ 4..=8,
+ s,
+ s.is_ascii_alphanumeric() && (s.len() != 4 || s.all_bytes()[0].is_ascii_digit()),
+ s.to_ascii_lowercase(),
+ s.is_ascii_lowercase()
+ && s.is_ascii_alphanumeric()
+ && (s.len() != 4 || s.all_bytes()[0].is_ascii_digit()),
+ InvalidSubtag,
+ ["posix", "1996"],
+ ["yes"],
+);
diff --git a/vendor/icu_locid/src/subtags/variants.rs b/vendor/icu_locid/src/subtags/variants.rs
new file mode 100644
index 000000000..bbff9ebac
--- /dev/null
+++ b/vendor/icu_locid/src/subtags/variants.rs
@@ -0,0 +1,134 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use super::Variant;
+use crate::helpers::ShortVec;
+
+use alloc::vec::Vec;
+use core::ops::Deref;
+
+/// A list of variants (examples: `["macos", "posix"]`, etc.)
+///
+/// [`Variants`] stores a list of [`Variant`] subtags in a canonical form
+/// by sorting and deduplicating them.
+///
+/// # Examples
+///
+/// ```
+/// use icu::locid::subtags::{Variant, Variants};
+///
+/// let variant1: Variant =
+/// "posix".parse().expect("Failed to parse a variant subtag.");
+///
+/// let variant2: Variant =
+/// "macos".parse().expect("Failed to parse a variant subtag.");
+/// let mut v = vec![variant1, variant2];
+/// v.sort();
+/// v.dedup();
+///
+/// let variants: Variants = Variants::from_vec_unchecked(v);
+/// assert_eq!(variants.to_string(), "macos-posix");
+/// ```
+#[derive(Default, Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord)]
+pub struct Variants(ShortVec<Variant>);
+
+impl Variants {
+ /// Returns a new empty list of variants. Same as [`default()`](Default::default()), but is `const`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::subtags::Variants;
+ ///
+ /// assert_eq!(Variants::new(), Variants::default());
+ /// ```
+ #[inline]
+ pub const fn new() -> Self {
+ Self(ShortVec::new())
+ }
+
+ /// Creates a new [`Variants`] set from a single [`Variant`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::subtags::{Variant, Variants};
+ ///
+ /// let variant: Variant = "posix".parse().expect("Parsing failed.");
+ /// let variants = Variants::from_variant(variant);
+ /// ```
+ #[inline]
+ pub const fn from_variant(variant: Variant) -> Self {
+ Self(ShortVec::new_single(variant))
+ }
+
+ /// Creates a new [`Variants`] set from a [`Vec`].
+ /// The caller is expected to provide sorted and deduplicated vector as
+ /// an input.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::subtags::{Variant, Variants};
+ ///
+ /// let variant1: Variant = "posix".parse().expect("Parsing failed.");
+ /// let variant2: Variant = "macos".parse().expect("Parsing failed.");
+ /// let mut v = vec![variant1, variant2];
+ /// v.sort();
+ /// v.dedup();
+ ///
+ /// let variants = Variants::from_vec_unchecked(v);
+ /// ```
+ ///
+ /// Notice: For performance- and memory-constrained environments, it is recommended
+ /// for the caller to use [`binary_search`](slice::binary_search) instead of [`sort`](slice::sort)
+ /// and [`dedup`](Vec::dedup()).
+ pub fn from_vec_unchecked(input: Vec<Variant>) -> Self {
+ Self(ShortVec::from(input))
+ }
+
+ /// Empties the [`Variants`] list.
+ ///
+ /// Returns the old list.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::subtags::{Variant, Variants};
+ ///
+ /// let variant1: Variant = "posix".parse().expect("Parsing failed.");
+ /// let variant2: Variant = "macos".parse().expect("Parsing failed.");
+ /// let mut v = vec![variant1, variant2];
+ /// v.sort();
+ /// v.dedup();
+ ///
+ /// let mut variants: Variants = Variants::from_vec_unchecked(v);
+ ///
+ /// assert_eq!(variants.to_string(), "macos-posix");
+ ///
+ /// variants.clear();
+ ///
+ /// assert_eq!(variants.to_string(), "");
+ /// ```
+ pub fn clear(&mut self) -> Self {
+ core::mem::take(self)
+ }
+
+ pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
+ where
+ F: FnMut(&str) -> Result<(), E>,
+ {
+ self.deref().iter().map(|t| t.as_str()).try_for_each(f)
+ }
+}
+
+impl_writeable_for_subtag_list!(Variants, "macos", "posix");
+
+impl Deref for Variants {
+ type Target = [Variant];
+
+ fn deref(&self) -> &[Variant] {
+ self.0.as_slice()
+ }
+}