diff options
Diffstat (limited to 'third_party/rust/oxilangtag/src')
-rw-r--r-- | third_party/rust/oxilangtag/src/lib.rs | 923 |
1 files changed, 923 insertions, 0 deletions
diff --git a/third_party/rust/oxilangtag/src/lib.rs b/third_party/rust/oxilangtag/src/lib.rs new file mode 100644 index 0000000000..1fd913e13c --- /dev/null +++ b/third_party/rust/oxilangtag/src/lib.rs @@ -0,0 +1,923 @@ +#![doc = include_str!("../README.md")] +#![deny(unsafe_code)] + +#[cfg(feature = "serde")] +use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use std::borrow::{Borrow, Cow}; +use std::cmp::Ordering; +use std::error::Error; +use std::fmt; +use std::hash::{Hash, Hasher}; +use std::iter::once; +use std::ops::Deref; +use std::str::{FromStr, Split}; + +/// A [RFC 5646](https://tools.ietf.org/html/rfc5646) language tag. +/// +/// ``` +/// use oxilangtag::LanguageTag; +/// +/// let language_tag = LanguageTag::parse("en-us").unwrap(); +/// assert_eq!(language_tag.into_inner(), "en-us") +/// ``` +#[derive(Copy, Clone)] +pub struct LanguageTag<T> { + tag: T, + positions: TagElementsPositions, +} + +impl<T: Deref<Target = str>> LanguageTag<T> { + /// Parses a language tag acccording to [RFC 5646](https://tools.ietf.org/html/rfc5646). + /// and checks if the tag is ["well-formed"](https://tools.ietf.org/html/rfc5646#section-2.2.9). + /// + /// This operation keeps internally the `tag` parameter and does not allocate on the heap. + /// + /// ``` + /// use oxilangtag::LanguageTag; + /// + /// let language_tag = LanguageTag::parse("en-us").unwrap(); + /// assert_eq!(language_tag.into_inner(), "en-us") + /// ``` + pub fn parse(tag: T) -> Result<Self, LanguageTagParseError> { + let positions = parse_language_tag(&tag, &mut VoidOutputBuffer::default())?; + Ok(Self { tag, positions }) + } + + /// Returns the underlying language tag representation. + #[inline] + pub fn as_str(&self) -> &str { + &self.tag + } + + /// Returns the underlying language tag representation. + #[inline] + pub fn into_inner(self) -> T { + self.tag + } + + /// Returns the [primary language subtag](https://tools.ietf.org/html/rfc5646#section-2.2.1). + /// + /// ``` + /// use oxilangtag::LanguageTag; + /// + /// let language_tag = LanguageTag::parse("zh-cmn-Hans-CN").unwrap(); + /// assert_eq!(language_tag.primary_language(), "zh"); + /// ``` + #[inline] + pub fn primary_language(&self) -> &str { + &self.tag[..self.positions.language_end] + } + + /// Returns the [extended language subtags](https://tools.ietf.org/html/rfc5646#section-2.2.2). + /// + /// Valid language tags have at most one extended language. + /// + /// ``` + /// use oxilangtag::LanguageTag; + /// + /// let language_tag = LanguageTag::parse("zh-cmn-Hans-CN").unwrap(); + /// assert_eq!(language_tag.extended_language(), Some("cmn")); + /// ``` + #[inline] + pub fn extended_language(&self) -> Option<&str> { + if self.positions.language_end == self.positions.extlang_end { + None + } else { + Some(&self.tag[self.positions.language_end + 1..self.positions.extlang_end]) + } + } + + /// Iterates on the [extended language subtags](https://tools.ietf.org/html/rfc5646#section-2.2.2). + /// + /// Valid language tags have at most one extended language. + /// + /// ``` + /// use oxilangtag::LanguageTag; + /// + /// let language_tag = LanguageTag::parse("zh-cmn-Hans-CN").unwrap(); + /// assert_eq!(language_tag.extended_language_subtags().collect::<Vec<_>>(), vec!["cmn"]); + /// ``` + #[inline] + pub fn extended_language_subtags(&self) -> impl Iterator<Item = &str> { + self.extended_language().unwrap_or("").split_terminator('-') + } + + /// Returns the [primary language subtag](https://tools.ietf.org/html/rfc5646#section-2.2.1) + /// and its [extended language subtags](https://tools.ietf.org/html/rfc5646#section-2.2.2). + /// + /// ``` + /// use oxilangtag::LanguageTag; + /// + /// let language_tag = LanguageTag::parse("zh-cmn-Hans-CN").unwrap(); + /// assert_eq!(language_tag.full_language(), "zh-cmn"); + /// ``` + #[inline] + pub fn full_language(&self) -> &str { + &self.tag[..self.positions.extlang_end] + } + + /// Returns the [script subtag](https://tools.ietf.org/html/rfc5646#section-2.2.3). + /// + /// ``` + /// use oxilangtag::LanguageTag; + /// + /// let language_tag = LanguageTag::parse("zh-cmn-Hans-CN").unwrap(); + /// assert_eq!(language_tag.script(), Some("Hans")); + /// ``` + #[inline] + pub fn script(&self) -> Option<&str> { + if self.positions.extlang_end == self.positions.script_end { + None + } else { + Some(&self.tag[self.positions.extlang_end + 1..self.positions.script_end]) + } + } + + /// Returns the [region subtag](https://tools.ietf.org/html/rfc5646#section-2.2.4). + /// + /// ``` + /// use oxilangtag::LanguageTag; + /// + /// let language_tag = LanguageTag::parse("zh-cmn-Hans-CN").unwrap(); + /// assert_eq!(language_tag.region(), Some("CN")); + /// ``` + #[inline] + pub fn region(&self) -> Option<&str> { + if self.positions.script_end == self.positions.region_end { + None + } else { + Some(&self.tag[self.positions.script_end + 1..self.positions.region_end]) + } + } + + /// Returns the [variant subtags](https://tools.ietf.org/html/rfc5646#section-2.2.5). + /// + /// ``` + /// use oxilangtag::LanguageTag; + /// + /// let language_tag = LanguageTag::parse("zh-Latn-TW-pinyin").unwrap(); + /// assert_eq!(language_tag.variant(), Some("pinyin")); + /// ``` + #[inline] + pub fn variant(&self) -> Option<&str> { + if self.positions.region_end == self.positions.variant_end { + None + } else { + Some(&self.tag[self.positions.region_end + 1..self.positions.variant_end]) + } + } + + /// Iterates on the [variant subtags](https://tools.ietf.org/html/rfc5646#section-2.2.5). + /// + /// ``` + /// use oxilangtag::LanguageTag; + /// + /// let language_tag = LanguageTag::parse("zh-Latn-TW-pinyin").unwrap(); + /// assert_eq!(language_tag.variant_subtags().collect::<Vec<_>>(), vec!["pinyin"]); + /// ``` + #[inline] + pub fn variant_subtags(&self) -> impl Iterator<Item = &str> { + self.variant().unwrap_or("").split_terminator('-') + } + + /// Returns the [extension subtags](https://tools.ietf.org/html/rfc5646#section-2.2.6). + /// + /// ``` + /// use oxilangtag::LanguageTag; + /// + /// let language_tag = LanguageTag::parse("de-DE-u-co-phonebk").unwrap(); + /// assert_eq!(language_tag.extension(), Some("u-co-phonebk")); + /// ``` + #[inline] + pub fn extension(&self) -> Option<&str> { + if self.positions.variant_end == self.positions.extension_end { + None + } else { + Some(&self.tag[self.positions.variant_end + 1..self.positions.extension_end]) + } + } + + /// Iterates on the [extension subtags](https://tools.ietf.org/html/rfc5646#section-2.2.6). + /// + /// ``` + /// use oxilangtag::LanguageTag; + /// + /// let language_tag = LanguageTag::parse("de-DE-u-co-phonebk").unwrap(); + /// assert_eq!(language_tag.extension_subtags().collect::<Vec<_>>(), vec![('u', "co-phonebk")]); + /// ``` + #[inline] + pub fn extension_subtags(&self) -> impl Iterator<Item = (char, &str)> { + match self.extension() { + Some(parts) => ExtensionsIterator::new(parts), + None => ExtensionsIterator::new(""), + } + } + + /// Returns the [private use subtags](https://tools.ietf.org/html/rfc5646#section-2.2.7). + /// + /// ``` + /// use oxilangtag::LanguageTag; + /// + /// let language_tag = LanguageTag::parse("de-x-foo-bar").unwrap(); + /// assert_eq!(language_tag.private_use(), Some("x-foo-bar")); + /// ``` + #[inline] + pub fn private_use(&self) -> Option<&str> { + if self.tag.starts_with("x-") { + Some(&self.tag) + } else if self.positions.extension_end == self.tag.len() { + None + } else { + Some(&self.tag[self.positions.extension_end + 1..]) + } + } + + /// Iterates on the [private use subtags](https://tools.ietf.org/html/rfc5646#section-2.2.7). + /// + /// ``` + /// use oxilangtag::LanguageTag; + /// + /// let language_tag = LanguageTag::parse("de-x-foo-bar").unwrap(); + /// assert_eq!(language_tag.private_use_subtags().collect::<Vec<_>>(), vec!["foo", "bar"]); + /// ``` + #[inline] + pub fn private_use_subtags(&self) -> impl Iterator<Item = &str> { + self.private_use() + .map(|part| &part[2..]) + .unwrap_or("") + .split_terminator('-') + } +} + +impl LanguageTag<String> { + /// Parses a language tag acccording to [RFC 5646](https://tools.ietf.org/html/rfc5646) + /// and normalizes its case. + /// + /// This parser accepts the language tags that are "well-formed" according to + /// [RFC 5646](https://tools.ietf.org/html/rfc5646#section-2.2.9). + /// + /// This operation does heap allocation. + /// + /// ``` + /// use oxilangtag::LanguageTag; + /// + /// let language_tag = LanguageTag::parse_and_normalize("en-us").unwrap(); + /// assert_eq!(language_tag.into_inner(), "en-US") + /// ``` + pub fn parse_and_normalize(tag: &str) -> Result<Self, LanguageTagParseError> { + let mut output_buffer = String::with_capacity(tag.len()); + let positions = parse_language_tag(tag, &mut output_buffer)?; + Ok(Self { + tag: output_buffer, + positions, + }) + } +} + +impl<Lft: PartialEq<Rhs>, Rhs> PartialEq<LanguageTag<Rhs>> for LanguageTag<Lft> { + #[inline] + fn eq(&self, other: &LanguageTag<Rhs>) -> bool { + self.tag.eq(&other.tag) + } +} + +impl<T: PartialEq<str>> PartialEq<str> for LanguageTag<T> { + #[inline] + fn eq(&self, other: &str) -> bool { + self.tag.eq(other) + } +} + +impl<'a, T: PartialEq<&'a str>> PartialEq<&'a str> for LanguageTag<T> { + #[inline] + fn eq(&self, other: &&'a str) -> bool { + self.tag.eq(other) + } +} + +impl<T: PartialEq<String>> PartialEq<String> for LanguageTag<T> { + #[inline] + fn eq(&self, other: &String) -> bool { + self.tag.eq(other) + } +} + +impl<'a, T: PartialEq<Cow<'a, str>>> PartialEq<Cow<'a, str>> for LanguageTag<T> { + #[inline] + fn eq(&self, other: &Cow<'a, str>) -> bool { + self.tag.eq(other) + } +} + +impl<T: PartialEq<str>> PartialEq<LanguageTag<T>> for str { + #[inline] + fn eq(&self, other: &LanguageTag<T>) -> bool { + other.tag.eq(self) + } +} + +impl<'a, T: PartialEq<&'a str>> PartialEq<LanguageTag<T>> for &'a str { + #[inline] + fn eq(&self, other: &LanguageTag<T>) -> bool { + other.tag.eq(self) + } +} + +impl<T: PartialEq<String>> PartialEq<LanguageTag<T>> for String { + #[inline] + fn eq(&self, other: &LanguageTag<T>) -> bool { + other.tag.eq(self) + } +} + +impl<'a, T: PartialEq<Cow<'a, str>>> PartialEq<LanguageTag<T>> for Cow<'a, str> { + #[inline] + fn eq(&self, other: &LanguageTag<T>) -> bool { + other.tag.eq(self) + } +} + +impl<T: Eq> Eq for LanguageTag<T> {} + +impl<T: Hash> Hash for LanguageTag<T> { + #[inline] + fn hash<H: Hasher>(&self, state: &mut H) { + self.tag.hash(state) + } +} + +impl<T: PartialOrd> PartialOrd for LanguageTag<T> { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option<Ordering> { + self.tag.partial_cmp(&other.tag) + } +} + +impl<T: Ord> Ord for LanguageTag<T> { + #[inline] + fn cmp(&self, other: &Self) -> Ordering { + self.tag.cmp(&other.tag) + } +} + +impl<T: Deref<Target = str>> Deref for LanguageTag<T> { + type Target = str; + + #[inline] + fn deref(&self) -> &str { + self.tag.deref() + } +} + +impl<T: AsRef<str>> AsRef<str> for LanguageTag<T> { + #[inline] + fn as_ref(&self) -> &str { + self.tag.as_ref() + } +} + +impl<T: Borrow<str>> Borrow<str> for LanguageTag<T> { + #[inline] + fn borrow(&self) -> &str { + self.tag.borrow() + } +} + +impl<T: fmt::Debug> fmt::Debug for LanguageTag<T> { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.tag.fmt(f) + } +} + +impl<T: fmt::Display> fmt::Display for LanguageTag<T> { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.tag.fmt(f) + } +} + +impl FromStr for LanguageTag<String> { + type Err = LanguageTagParseError; + + #[inline] + fn from_str(tag: &str) -> Result<Self, LanguageTagParseError> { + Self::parse_and_normalize(tag) + } +} + +impl<'a> From<LanguageTag<&'a str>> for LanguageTag<String> { + #[inline] + fn from(tag: LanguageTag<&'a str>) -> Self { + Self { + tag: tag.tag.into(), + positions: tag.positions, + } + } +} + +impl<'a> From<LanguageTag<Cow<'a, str>>> for LanguageTag<String> { + #[inline] + fn from(tag: LanguageTag<Cow<'a, str>>) -> Self { + Self { + tag: tag.tag.into(), + positions: tag.positions, + } + } +} + +impl From<LanguageTag<Box<str>>> for LanguageTag<String> { + #[inline] + fn from(tag: LanguageTag<Box<str>>) -> Self { + Self { + tag: tag.tag.into(), + positions: tag.positions, + } + } +} + +impl<'a> From<LanguageTag<&'a str>> for LanguageTag<Cow<'a, str>> { + #[inline] + fn from(tag: LanguageTag<&'a str>) -> Self { + Self { + tag: tag.tag.into(), + positions: tag.positions, + } + } +} + +impl<'a> From<LanguageTag<String>> for LanguageTag<Cow<'a, str>> { + #[inline] + fn from(tag: LanguageTag<String>) -> Self { + Self { + tag: tag.tag.into(), + positions: tag.positions, + } + } +} + +#[cfg(feature = "serde")] +impl<T: Serialize> Serialize for LanguageTag<T> { + fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> { + self.tag.serialize(serializer) + } +} + +#[cfg(feature = "serde")] +impl<'de, T: Deref<Target = str> + Deserialize<'de>> Deserialize<'de> for LanguageTag<T> { + fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<LanguageTag<T>, D::Error> { + use serde::de::Error; + + Self::parse(T::deserialize(deserializer)?).map_err(D::Error::custom) + } +} + +/// An error raised during [`LanguageTag`](struct.LanguageTag.html) validation. +#[derive(Debug)] +pub struct LanguageTagParseError { + kind: TagParseErrorKind, +} + +impl fmt::Display for LanguageTagParseError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self.kind { + TagParseErrorKind::EmptyExtension => { + write!(f, "If an extension subtag is present, it must not be empty") + } + TagParseErrorKind::EmptyPrivateUse => { + write!(f, "If the `x` subtag is present, it must not be empty") + } + TagParseErrorKind::ForbiddenChar => { + write!(f, "The langtag contains a char not allowed") + } + TagParseErrorKind::InvalidSubtag => write!( + f, + "A subtag fails to parse, it does not match any other subtags" + ), + TagParseErrorKind::InvalidLanguage => write!(f, "The given language subtag is invalid"), + TagParseErrorKind::SubtagTooLong => { + write!(f, "A subtag may be eight characters in length at maximum") + } + TagParseErrorKind::EmptySubtag => write!(f, "A subtag should not be empty"), + TagParseErrorKind::TooManyExtlangs => { + write!(f, "At maximum three extlangs are allowed") + } + } + } +} + +impl Error for LanguageTagParseError {} + +#[derive(Debug)] +enum TagParseErrorKind { + /// If an extension subtag is present, it must not be empty. + EmptyExtension, + /// If the `x` subtag is present, it must not be empty. + EmptyPrivateUse, + /// The langtag contains a char that is not A-Z, a-z, 0-9 or the dash. + ForbiddenChar, + /// A subtag fails to parse, it does not match any other subtags. + InvalidSubtag, + /// The given language subtag is invalid. + InvalidLanguage, + /// A subtag may be eight characters in length at maximum. + SubtagTooLong, + /// A subtag should not be empty. + EmptySubtag, + /// At maximum three extlangs are allowed, but zero to one extlangs are preferred. + TooManyExtlangs, +} + +#[derive(Copy, Clone, Debug)] +struct TagElementsPositions { + language_end: usize, + extlang_end: usize, + script_end: usize, + region_end: usize, + variant_end: usize, + extension_end: usize, +} + +trait OutputBuffer: Extend<char> { + fn push(&mut self, c: char); + + fn push_str(&mut self, s: &str); +} + +#[derive(Default)] +struct VoidOutputBuffer {} + +impl OutputBuffer for VoidOutputBuffer { + #[inline] + fn push(&mut self, _: char) {} + + #[inline] + fn push_str(&mut self, _: &str) {} +} + +impl Extend<char> for VoidOutputBuffer { + #[inline] + fn extend<T: IntoIterator<Item = char>>(&mut self, _: T) {} +} + +impl OutputBuffer for String { + #[inline] + fn push(&mut self, c: char) { + self.push(c); + } + + #[inline] + fn push_str(&mut self, s: &str) { + self.push_str(s); + } +} + +/// Parses language tag following [the RFC5646 grammar](https://tools.ietf.org/html/rfc5646#section-2.1) +fn parse_language_tag( + input: &str, + output: &mut impl OutputBuffer, +) -> Result<TagElementsPositions, LanguageTagParseError> { + //grandfathered tags + if let Some(tag) = GRANDFATHEREDS + .iter() + .find(|record| record.eq_ignore_ascii_case(input)) + { + output.push_str(tag); + Ok(TagElementsPositions { + language_end: tag.len(), + extlang_end: tag.len(), + script_end: tag.len(), + region_end: tag.len(), + variant_end: tag.len(), + extension_end: tag.len(), + }) + } else if input.starts_with("x-") || input.starts_with("X-") { + // private use + if !is_alphanumeric_or_dash(input) { + Err(LanguageTagParseError { + kind: TagParseErrorKind::ForbiddenChar, + }) + } else if input.len() == 2 { + Err(LanguageTagParseError { + kind: TagParseErrorKind::EmptyPrivateUse, + }) + } else { + output.extend(input.chars().map(|c| c.to_ascii_lowercase())); + Ok(TagElementsPositions { + language_end: input.len(), + extlang_end: input.len(), + script_end: input.len(), + region_end: input.len(), + variant_end: input.len(), + extension_end: input.len(), + }) + } + } else { + parse_langtag(input, output) + } +} + +/// Handles normal tags. +fn parse_langtag( + input: &str, + output: &mut impl OutputBuffer, +) -> Result<TagElementsPositions, LanguageTagParseError> { + #[derive(PartialEq, Eq)] + enum State { + Start, + AfterLanguage, + AfterExtLang, + AfterScript, + AfterRegion, + InExtension { expected: bool }, + InPrivateUse { expected: bool }, + } + + let mut state = State::Start; + let mut language_end = 0; + let mut extlang_end = 0; + let mut script_end = 0; + let mut region_end = 0; + let mut variant_end = 0; + let mut extension_end = 0; + let mut extlangs_count = 0; + for (subtag, end) in SubTagIterator::new(input) { + if subtag.is_empty() { + return Err(LanguageTagParseError { + kind: TagParseErrorKind::EmptySubtag, + }); + } + if subtag.len() > 8 { + return Err(LanguageTagParseError { + kind: TagParseErrorKind::SubtagTooLong, + }); + } + if state == State::Start { + // Primary language + if subtag.len() < 2 || !is_alphabetic(subtag) { + return Err(LanguageTagParseError { + kind: TagParseErrorKind::InvalidLanguage, + }); + } + language_end = end; + output.extend(to_lowercase(subtag)); + if subtag.len() < 4 { + // extlangs are only allowed for short language tags + state = State::AfterLanguage; + } else { + state = State::AfterExtLang; + } + } else if let State::InPrivateUse { .. } = state { + if !is_alphanumeric(subtag) { + return Err(LanguageTagParseError { + kind: TagParseErrorKind::InvalidSubtag, + }); + } + output.push('-'); + output.extend(to_lowercase(subtag)); + state = State::InPrivateUse { expected: false }; + } else if subtag == "x" || subtag == "X" { + // We make sure extension is found + if let State::InExtension { expected: true } = state { + return Err(LanguageTagParseError { + kind: TagParseErrorKind::EmptyExtension, + }); + } + output.push('-'); + output.push('x'); + state = State::InPrivateUse { expected: true }; + } else if subtag.len() == 1 && is_alphanumeric(subtag) { + // We make sure extension is found + if let State::InExtension { expected: true } = state { + return Err(LanguageTagParseError { + kind: TagParseErrorKind::EmptyExtension, + }); + } + let extension_tag = subtag.chars().next().unwrap().to_ascii_lowercase(); + output.push('-'); + output.push(extension_tag); + state = State::InExtension { expected: true }; + } else if let State::InExtension { .. } = state { + if !is_alphanumeric(subtag) { + return Err(LanguageTagParseError { + kind: TagParseErrorKind::InvalidSubtag, + }); + } + extension_end = end; + output.push('-'); + output.extend(to_lowercase(subtag)); + state = State::InExtension { expected: false }; + } else if state == State::AfterLanguage && subtag.len() == 3 && is_alphabetic(subtag) { + extlangs_count += 1; + if extlangs_count > 3 { + return Err(LanguageTagParseError { + kind: TagParseErrorKind::TooManyExtlangs, + }); + } + // valid extlangs + extlang_end = end; + output.push('-'); + output.extend(to_lowercase(subtag)); + } else if (state == State::AfterLanguage || state == State::AfterExtLang) + && subtag.len() == 4 + && is_alphabetic(subtag) + { + // Script + script_end = end; + output.push('-'); + output.extend(to_uppercase_first(subtag)); + state = State::AfterScript; + } else if (state == State::AfterLanguage + || state == State::AfterExtLang + || state == State::AfterScript) + && (subtag.len() == 2 && is_alphabetic(subtag) + || subtag.len() == 3 && is_numeric(subtag)) + { + // Region + region_end = end; + output.push('-'); + output.extend(to_uppercase(subtag)); + state = State::AfterRegion; + } else if (state == State::AfterLanguage + || state == State::AfterExtLang + || state == State::AfterScript + || state == State::AfterRegion) + && is_alphanumeric(subtag) + && (subtag.len() >= 5 && is_alphabetic(&subtag[0..1]) + || subtag.len() >= 4 && is_numeric(&subtag[0..1])) + { + // Variant + variant_end = end; + output.push('-'); + output.extend(to_lowercase(subtag)); + state = State::AfterRegion; + } else { + return Err(LanguageTagParseError { + kind: TagParseErrorKind::InvalidSubtag, + }); + } + } + + //We make sure we are in a correct final state + if let State::InExtension { expected: true } = state { + return Err(LanguageTagParseError { + kind: TagParseErrorKind::EmptyExtension, + }); + } + if let State::InPrivateUse { expected: true } = state { + return Err(LanguageTagParseError { + kind: TagParseErrorKind::EmptyPrivateUse, + }); + } + + //We make sure we have not skipped anyone + if extlang_end < language_end { + extlang_end = language_end; + } + if script_end < extlang_end { + script_end = extlang_end; + } + if region_end < script_end { + region_end = script_end; + } + if variant_end < region_end { + variant_end = region_end; + } + if extension_end < variant_end { + extension_end = variant_end; + } + + Ok(TagElementsPositions { + language_end, + extlang_end, + script_end, + region_end, + variant_end, + extension_end, + }) +} + +struct ExtensionsIterator<'a> { + input: &'a str, +} + +impl<'a> ExtensionsIterator<'a> { + fn new(input: &'a str) -> Self { + Self { input } + } +} + +impl<'a> Iterator for ExtensionsIterator<'a> { + type Item = (char, &'a str); + + fn next(&mut self) -> Option<(char, &'a str)> { + let mut parts_iterator = self.input.split_terminator('-'); + let singleton = parts_iterator.next()?.chars().next().unwrap(); + let mut content_size: usize = 2; + for part in parts_iterator { + if part.len() == 1 { + let content = &self.input[2..content_size - 1]; + self.input = &self.input[content_size..]; + return Some((singleton, content)); + } else { + content_size += part.len() + 1; + } + } + let result = self.input.get(2..).map(|content| (singleton, content)); + self.input = ""; + result + } +} + +struct SubTagIterator<'a> { + split: Split<'a, char>, + position: usize, +} + +impl<'a> SubTagIterator<'a> { + #[inline] + fn new(input: &'a str) -> Self { + Self { + split: input.split('-'), + position: 0, + } + } +} + +impl<'a> Iterator for SubTagIterator<'a> { + type Item = (&'a str, usize); + + #[inline] + fn next(&mut self) -> Option<(&'a str, usize)> { + let tag = self.split.next()?; + let tag_end = self.position + tag.len(); + self.position = tag_end + 1; + Some((tag, tag_end)) + } +} + +#[inline] +fn is_alphabetic(s: &str) -> bool { + s.chars().all(|x| x.is_ascii_alphabetic()) +} + +#[inline] +fn is_numeric(s: &str) -> bool { + s.chars().all(|x| x.is_ascii_digit()) +} + +#[inline] +fn is_alphanumeric(s: &str) -> bool { + s.chars().all(|x| x.is_ascii_alphanumeric()) +} + +#[inline] +fn is_alphanumeric_or_dash(s: &str) -> bool { + s.chars().all(|x| x.is_ascii_alphanumeric() || x == '-') +} + +#[inline] +fn to_uppercase(s: &str) -> impl Iterator<Item = char> + '_ { + s.chars().map(|c| c.to_ascii_uppercase()) +} + +// Beware: panics if s.len() == 0 (should never happen in our code) +#[inline] +fn to_uppercase_first(s: &str) -> impl Iterator<Item = char> + '_ { + let mut chars = s.chars(); + once(chars.next().unwrap().to_ascii_uppercase()).chain(chars.map(|c| c.to_ascii_lowercase())) +} + +#[inline] +fn to_lowercase(s: &str) -> impl Iterator<Item = char> + '_ { + s.chars().map(|c| c.to_ascii_lowercase()) +} + +const GRANDFATHEREDS: [&str; 26] = [ + "art-lojban", + "cel-gaulish", + "en-GB-oed", + "i-ami", + "i-bnn", + "i-default", + "i-enochian", + "i-hak", + "i-klingon", + "i-lux", + "i-mingo", + "i-navajo", + "i-pwn", + "i-tao", + "i-tay", + "i-tsu", + "no-bok", + "no-nyn", + "sgn-BE-FR", + "sgn-BE-NL", + "sgn-CH-DE", + "zh-guoyu", + "zh-hakka", + "zh-min", + "zh-min-nan", + "zh-xiang", +]; |