summaryrefslogtreecommitdiffstats
path: root/third_party/rust/oxilangtag
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
commit26a029d407be480d791972afb5975cf62c9360a6 (patch)
treef435a8308119effd964b339f76abb83a57c29483 /third_party/rust/oxilangtag
parentInitial commit. (diff)
downloadfirefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz
firefox-26a029d407be480d791972afb5975cf62c9360a6.zip
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/oxilangtag')
-rw-r--r--third_party/rust/oxilangtag/.cargo-checksum.json1
-rw-r--r--third_party/rust/oxilangtag/CHANGELOG.md16
-rw-r--r--third_party/rust/oxilangtag/Cargo.toml39
-rw-r--r--third_party/rust/oxilangtag/LICENSE19
-rw-r--r--third_party/rust/oxilangtag/README.md46
-rw-r--r--third_party/rust/oxilangtag/benches/lib.rs77
-rw-r--r--third_party/rust/oxilangtag/deny.toml11
-rw-r--r--third_party/rust/oxilangtag/src/lib.rs923
-rw-r--r--third_party/rust/oxilangtag/tests/lib.rs722
9 files changed, 1854 insertions, 0 deletions
diff --git a/third_party/rust/oxilangtag/.cargo-checksum.json b/third_party/rust/oxilangtag/.cargo-checksum.json
new file mode 100644
index 0000000000..979e09920e
--- /dev/null
+++ b/third_party/rust/oxilangtag/.cargo-checksum.json
@@ -0,0 +1 @@
+{"files":{"CHANGELOG.md":"3d0f3240ed450d19b894dd8715e20bbec50a14eb0d357df8c09a4af1f19fc831","Cargo.toml":"b8414a40b2cdeb5b34dc4b7e79a5e192b56b953d9db1a762dbf3e8728074dd6a","LICENSE":"3fe41c99abc306c2cd34a9365b1810035ae93335ebf4736c0240b469b3f410eb","README.md":"fc98b140225bc0521a136c2c1ed8146f7398349a36d52481f97d8ec2b7679619","benches/lib.rs":"61c94b95e005c0df25ff740ddc7801d65f68bd6e00c0b8aca7eeb66b103f9eea","deny.toml":"fce6beebdde75e3950abfd230b5110d485f2daf5a333cc77b447669593fa7c62","src/lib.rs":"92c85f535a42b8dde8c2f3078c61e4e1580d326ac621eba2f410bdee521be41d","tests/lib.rs":"9927c137f39094cfd8fbcf56069a047818112374148e8950fd73708e9ae0382a"},"package":"8d91edf4fbb970279443471345a4e8c491bf05bb283b3e6c88e4e606fd8c181b"} \ No newline at end of file
diff --git a/third_party/rust/oxilangtag/CHANGELOG.md b/third_party/rust/oxilangtag/CHANGELOG.md
new file mode 100644
index 0000000000..dd34d7e49b
--- /dev/null
+++ b/third_party/rust/oxilangtag/CHANGELOG.md
@@ -0,0 +1,16 @@
+# Changelog
+
+## [0.1.3] - 2022-03-26
+
+### Added
+- `LanguageTag` now implements Serde `Serialize` and `Deserialize` trait if the `serde` crate is present.
+ The serialization is a plain string.
+
+
+## [0.1.2] - 2021-04-16
+
+### Added
+- `LanguageTag` struct with a parser, case normalization and components accessors.
+
+### Changed
+- Proper attribution from [`language-tags`](https://github.com/pyfisch/rust-language-tags/).
diff --git a/third_party/rust/oxilangtag/Cargo.toml b/third_party/rust/oxilangtag/Cargo.toml
new file mode 100644
index 0000000000..eda4c2d70a
--- /dev/null
+++ b/third_party/rust/oxilangtag/Cargo.toml
@@ -0,0 +1,39 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g., crates.io) dependencies.
+#
+# If you are reading this file be aware that the original Cargo.toml
+# will likely look very different (and much more reasonable).
+# See Cargo.toml.orig for the original contents.
+
+[package]
+edition = "2018"
+name = "oxilangtag"
+version = "0.1.3"
+authors = ["Tpt <thomas@pellissier-tanon.fr>"]
+description = "Simple and fast implementation of language tag normalization and validation\n"
+readme = "README.md"
+keywords = ["language-tag", "BCP47"]
+license = "MIT"
+repository = "https://github.com/oxigraph/oxilangtag"
+[package.metadata.docs.rs]
+all-features = true
+
+[[bench]]
+name = "lib"
+harness = false
+[dependencies.serde]
+version = "1"
+optional = true
+[dev-dependencies.criterion]
+version = "0.3"
+
+[dev-dependencies.serde_test]
+version = "1"
+
+[features]
+default = []
+serialize = ["serde"]
diff --git a/third_party/rust/oxilangtag/LICENSE b/third_party/rust/oxilangtag/LICENSE
new file mode 100644
index 0000000000..155b0903ee
--- /dev/null
+++ b/third_party/rust/oxilangtag/LICENSE
@@ -0,0 +1,19 @@
+Copyright (c) 2015-2021 Pyfisch Tpt
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/third_party/rust/oxilangtag/README.md b/third_party/rust/oxilangtag/README.md
new file mode 100644
index 0000000000..933e8e011c
--- /dev/null
+++ b/third_party/rust/oxilangtag/README.md
@@ -0,0 +1,46 @@
+oxilangtag
+==========
+
+[![actions status](https://github.com/oxigraph/oxilangtag/workflows/build/badge.svg)](https://github.com/oxigraph/oxilangtag/actions)
+[![Latest Version](https://img.shields.io/crates/v/oxilangtag.svg)](https://crates.io/crates/oxilangtag)
+[![Released API docs](https://docs.rs/oxilangtag/badge.svg)](https://docs.rs/oxilangtag)
+
+OxiLangTag is a Rust library allowing to validate and normalize language tags following [RFC 5646](https://tools.ietf.org/html/rfc5646)
+([BCP 47](https://tools.ietf.org/html/bcp47)).
+
+It is a fork of the [`language-tags`](https://github.com/pyfisch/rust-language-tags/) focusing on [RDF use cases](https://www.w3.org/TR/rdf11-primer/).
+You might find the [`language-tags`](https://github.com/pyfisch/rust-language-tags/) crate more convenient.
+
+It allows zero stack allocation language tag validation.
+Getters are also provided to easily retrieve the various language tag components.
+
+If [`serde`](https://serde.rs/) is available, `LanguageTag` implements the `Serialize` and `Deserialize` traits and encodes the language tag as a string.
+
+Example:
+```rust
+use oxilangtag::LanguageTag;
+
+// Parsing and validation
+let language_tag = LanguageTag::parse("zh-cmn-Hans-CN-x-test").unwrap();
+assert_eq!(language_tag.as_str(), "zh-cmn-Hans-CN-x-test");
+
+// Language tag components
+assert_eq!(language_tag.primary_language(), "zh");
+assert_eq!(language_tag.extended_language(), Some("cmn"));
+assert_eq!(language_tag.full_language(), "zh-cmn");
+assert_eq!(language_tag.script(), Some("Hans"));
+assert_eq!(language_tag.region(), Some("CN"));
+assert_eq!(language_tag.extension(), None);
+assert_eq!(language_tag.private_use_subtags().collect::<Vec<_>>(), vec!["test"]);
+```
+
+## License
+
+This project is licensed under the MIT license ([LICENSE-MIT](LICENSE-MIT) or `<http://opensource.org/licenses/MIT>`).
+
+It is based on the [`language-tags`](https://github.com/pyfisch/rust-language-tags/) crate by [pyfisch](https://github.com/pyfisch) under MIT license.
+
+
+### Contribution
+
+Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxilangtag by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions.
diff --git a/third_party/rust/oxilangtag/benches/lib.rs b/third_party/rust/oxilangtag/benches/lib.rs
new file mode 100644
index 0000000000..2bc5a9722a
--- /dev/null
+++ b/third_party/rust/oxilangtag/benches/lib.rs
@@ -0,0 +1,77 @@
+use criterion::{criterion_group, criterion_main, Criterion};
+use oxilangtag::LanguageTag;
+
+fn bench_language_tag_parse(c: &mut Criterion) {
+ let examples = [
+ "fr",
+ "fr-Latn",
+ "fr-fra",
+ "fr-Latn-FR",
+ "fr-Latn-419",
+ "fr-FR",
+ "ax-TZ",
+ "fr-shadok",
+ "fr-y-myext-myext2",
+ "fra-Latn",
+ "fra",
+ "fra-FX",
+ "i-klingon",
+ "I-kLINgon",
+ "no-bok",
+ "fr-Lat",
+ "mn-Cyrl-MN",
+ "mN-cYrL-Mn",
+ "fr-Latn-CA",
+ "en-US",
+ "fr-Latn-CA",
+ "i-enochian",
+ "x-fr-CH",
+ "sr-Latn-CS",
+ "es-419",
+ "sl-nedis",
+ "de-CH-1996",
+ "de-Latg-1996",
+ "sl-IT-nedis",
+ "en-a-bbb-x-a-ccc",
+ "de-a-value",
+ "en-Latn-GB-boont-r-extended-sequence-x-private",
+ "en-x-US",
+ "az-Arab-x-AZE-derbend",
+ "es-Latn-CO-x-private",
+ "en-US-boont",
+ "ab-x-abc-x-abc",
+ "ab-x-abc-a-a",
+ "i-default",
+ "i-klingon",
+ "abcd-Latn",
+ "AaBbCcDd-x-y-any-x",
+ "en",
+ "de-AT",
+ "es-419",
+ "de-CH-1901",
+ "sr-Cyrl",
+ "sr-Cyrl-CS",
+ "sl-Latn-IT-rozaj",
+ "en-US-x-twain",
+ "zh-cmn",
+ "zh-cmn-Hant",
+ "zh-cmn-Hant-HK",
+ "zh-gan",
+ "zh-yue-Hant-HK",
+ "xr-lxs-qut",
+ "xr-lqt-qu",
+ "xr-p-lze",
+ ];
+
+ c.bench_function("language tag parse tests", |b| {
+ b.iter(|| {
+ for tag in examples.iter() {
+ LanguageTag::parse(*tag).unwrap();
+ }
+ })
+ });
+}
+
+criterion_group!(language_tag, bench_language_tag_parse);
+
+criterion_main!(language_tag);
diff --git a/third_party/rust/oxilangtag/deny.toml b/third_party/rust/oxilangtag/deny.toml
new file mode 100644
index 0000000000..6b5133355b
--- /dev/null
+++ b/third_party/rust/oxilangtag/deny.toml
@@ -0,0 +1,11 @@
+[licenses]
+unlicensed = "deny"
+allow = [
+ "MIT",
+ "Apache-2.0"
+]
+default = "deny"
+
+[bans]
+multiple-versions = "warn"
+wildcards = "deny"
diff --git a/third_party/rust/oxilangtag/src/lib.rs b/third_party/rust/oxilangtag/src/lib.rs
new file mode 100644
index 0000000000..1fd913e13c
--- /dev/null
+++ b/third_party/rust/oxilangtag/src/lib.rs
@@ -0,0 +1,923 @@
+#![doc = include_str!("../README.md")]
+#![deny(unsafe_code)]
+
+#[cfg(feature = "serde")]
+use serde::{Deserialize, Deserializer, Serialize, Serializer};
+use std::borrow::{Borrow, Cow};
+use std::cmp::Ordering;
+use std::error::Error;
+use std::fmt;
+use std::hash::{Hash, Hasher};
+use std::iter::once;
+use std::ops::Deref;
+use std::str::{FromStr, Split};
+
+/// A [RFC 5646](https://tools.ietf.org/html/rfc5646) language tag.
+///
+/// ```
+/// use oxilangtag::LanguageTag;
+///
+/// let language_tag = LanguageTag::parse("en-us").unwrap();
+/// assert_eq!(language_tag.into_inner(), "en-us")
+/// ```
+#[derive(Copy, Clone)]
+pub struct LanguageTag<T> {
+ tag: T,
+ positions: TagElementsPositions,
+}
+
+impl<T: Deref<Target = str>> LanguageTag<T> {
+ /// Parses a language tag acccording to [RFC 5646](https://tools.ietf.org/html/rfc5646).
+ /// and checks if the tag is ["well-formed"](https://tools.ietf.org/html/rfc5646#section-2.2.9).
+ ///
+ /// This operation keeps internally the `tag` parameter and does not allocate on the heap.
+ ///
+ /// ```
+ /// use oxilangtag::LanguageTag;
+ ///
+ /// let language_tag = LanguageTag::parse("en-us").unwrap();
+ /// assert_eq!(language_tag.into_inner(), "en-us")
+ /// ```
+ pub fn parse(tag: T) -> Result<Self, LanguageTagParseError> {
+ let positions = parse_language_tag(&tag, &mut VoidOutputBuffer::default())?;
+ Ok(Self { tag, positions })
+ }
+
+ /// Returns the underlying language tag representation.
+ #[inline]
+ pub fn as_str(&self) -> &str {
+ &self.tag
+ }
+
+ /// Returns the underlying language tag representation.
+ #[inline]
+ pub fn into_inner(self) -> T {
+ self.tag
+ }
+
+ /// Returns the [primary language subtag](https://tools.ietf.org/html/rfc5646#section-2.2.1).
+ ///
+ /// ```
+ /// use oxilangtag::LanguageTag;
+ ///
+ /// let language_tag = LanguageTag::parse("zh-cmn-Hans-CN").unwrap();
+ /// assert_eq!(language_tag.primary_language(), "zh");
+ /// ```
+ #[inline]
+ pub fn primary_language(&self) -> &str {
+ &self.tag[..self.positions.language_end]
+ }
+
+ /// Returns the [extended language subtags](https://tools.ietf.org/html/rfc5646#section-2.2.2).
+ ///
+ /// Valid language tags have at most one extended language.
+ ///
+ /// ```
+ /// use oxilangtag::LanguageTag;
+ ///
+ /// let language_tag = LanguageTag::parse("zh-cmn-Hans-CN").unwrap();
+ /// assert_eq!(language_tag.extended_language(), Some("cmn"));
+ /// ```
+ #[inline]
+ pub fn extended_language(&self) -> Option<&str> {
+ if self.positions.language_end == self.positions.extlang_end {
+ None
+ } else {
+ Some(&self.tag[self.positions.language_end + 1..self.positions.extlang_end])
+ }
+ }
+
+ /// Iterates on the [extended language subtags](https://tools.ietf.org/html/rfc5646#section-2.2.2).
+ ///
+ /// Valid language tags have at most one extended language.
+ ///
+ /// ```
+ /// use oxilangtag::LanguageTag;
+ ///
+ /// let language_tag = LanguageTag::parse("zh-cmn-Hans-CN").unwrap();
+ /// assert_eq!(language_tag.extended_language_subtags().collect::<Vec<_>>(), vec!["cmn"]);
+ /// ```
+ #[inline]
+ pub fn extended_language_subtags(&self) -> impl Iterator<Item = &str> {
+ self.extended_language().unwrap_or("").split_terminator('-')
+ }
+
+ /// Returns the [primary language subtag](https://tools.ietf.org/html/rfc5646#section-2.2.1)
+ /// and its [extended language subtags](https://tools.ietf.org/html/rfc5646#section-2.2.2).
+ ///
+ /// ```
+ /// use oxilangtag::LanguageTag;
+ ///
+ /// let language_tag = LanguageTag::parse("zh-cmn-Hans-CN").unwrap();
+ /// assert_eq!(language_tag.full_language(), "zh-cmn");
+ /// ```
+ #[inline]
+ pub fn full_language(&self) -> &str {
+ &self.tag[..self.positions.extlang_end]
+ }
+
+ /// Returns the [script subtag](https://tools.ietf.org/html/rfc5646#section-2.2.3).
+ ///
+ /// ```
+ /// use oxilangtag::LanguageTag;
+ ///
+ /// let language_tag = LanguageTag::parse("zh-cmn-Hans-CN").unwrap();
+ /// assert_eq!(language_tag.script(), Some("Hans"));
+ /// ```
+ #[inline]
+ pub fn script(&self) -> Option<&str> {
+ if self.positions.extlang_end == self.positions.script_end {
+ None
+ } else {
+ Some(&self.tag[self.positions.extlang_end + 1..self.positions.script_end])
+ }
+ }
+
+ /// Returns the [region subtag](https://tools.ietf.org/html/rfc5646#section-2.2.4).
+ ///
+ /// ```
+ /// use oxilangtag::LanguageTag;
+ ///
+ /// let language_tag = LanguageTag::parse("zh-cmn-Hans-CN").unwrap();
+ /// assert_eq!(language_tag.region(), Some("CN"));
+ /// ```
+ #[inline]
+ pub fn region(&self) -> Option<&str> {
+ if self.positions.script_end == self.positions.region_end {
+ None
+ } else {
+ Some(&self.tag[self.positions.script_end + 1..self.positions.region_end])
+ }
+ }
+
+ /// Returns the [variant subtags](https://tools.ietf.org/html/rfc5646#section-2.2.5).
+ ///
+ /// ```
+ /// use oxilangtag::LanguageTag;
+ ///
+ /// let language_tag = LanguageTag::parse("zh-Latn-TW-pinyin").unwrap();
+ /// assert_eq!(language_tag.variant(), Some("pinyin"));
+ /// ```
+ #[inline]
+ pub fn variant(&self) -> Option<&str> {
+ if self.positions.region_end == self.positions.variant_end {
+ None
+ } else {
+ Some(&self.tag[self.positions.region_end + 1..self.positions.variant_end])
+ }
+ }
+
+ /// Iterates on the [variant subtags](https://tools.ietf.org/html/rfc5646#section-2.2.5).
+ ///
+ /// ```
+ /// use oxilangtag::LanguageTag;
+ ///
+ /// let language_tag = LanguageTag::parse("zh-Latn-TW-pinyin").unwrap();
+ /// assert_eq!(language_tag.variant_subtags().collect::<Vec<_>>(), vec!["pinyin"]);
+ /// ```
+ #[inline]
+ pub fn variant_subtags(&self) -> impl Iterator<Item = &str> {
+ self.variant().unwrap_or("").split_terminator('-')
+ }
+
+ /// Returns the [extension subtags](https://tools.ietf.org/html/rfc5646#section-2.2.6).
+ ///
+ /// ```
+ /// use oxilangtag::LanguageTag;
+ ///
+ /// let language_tag = LanguageTag::parse("de-DE-u-co-phonebk").unwrap();
+ /// assert_eq!(language_tag.extension(), Some("u-co-phonebk"));
+ /// ```
+ #[inline]
+ pub fn extension(&self) -> Option<&str> {
+ if self.positions.variant_end == self.positions.extension_end {
+ None
+ } else {
+ Some(&self.tag[self.positions.variant_end + 1..self.positions.extension_end])
+ }
+ }
+
+ /// Iterates on the [extension subtags](https://tools.ietf.org/html/rfc5646#section-2.2.6).
+ ///
+ /// ```
+ /// use oxilangtag::LanguageTag;
+ ///
+ /// let language_tag = LanguageTag::parse("de-DE-u-co-phonebk").unwrap();
+ /// assert_eq!(language_tag.extension_subtags().collect::<Vec<_>>(), vec![('u', "co-phonebk")]);
+ /// ```
+ #[inline]
+ pub fn extension_subtags(&self) -> impl Iterator<Item = (char, &str)> {
+ match self.extension() {
+ Some(parts) => ExtensionsIterator::new(parts),
+ None => ExtensionsIterator::new(""),
+ }
+ }
+
+ /// Returns the [private use subtags](https://tools.ietf.org/html/rfc5646#section-2.2.7).
+ ///
+ /// ```
+ /// use oxilangtag::LanguageTag;
+ ///
+ /// let language_tag = LanguageTag::parse("de-x-foo-bar").unwrap();
+ /// assert_eq!(language_tag.private_use(), Some("x-foo-bar"));
+ /// ```
+ #[inline]
+ pub fn private_use(&self) -> Option<&str> {
+ if self.tag.starts_with("x-") {
+ Some(&self.tag)
+ } else if self.positions.extension_end == self.tag.len() {
+ None
+ } else {
+ Some(&self.tag[self.positions.extension_end + 1..])
+ }
+ }
+
+ /// Iterates on the [private use subtags](https://tools.ietf.org/html/rfc5646#section-2.2.7).
+ ///
+ /// ```
+ /// use oxilangtag::LanguageTag;
+ ///
+ /// let language_tag = LanguageTag::parse("de-x-foo-bar").unwrap();
+ /// assert_eq!(language_tag.private_use_subtags().collect::<Vec<_>>(), vec!["foo", "bar"]);
+ /// ```
+ #[inline]
+ pub fn private_use_subtags(&self) -> impl Iterator<Item = &str> {
+ self.private_use()
+ .map(|part| &part[2..])
+ .unwrap_or("")
+ .split_terminator('-')
+ }
+}
+
+impl LanguageTag<String> {
+ /// Parses a language tag acccording to [RFC 5646](https://tools.ietf.org/html/rfc5646)
+ /// and normalizes its case.
+ ///
+ /// This parser accepts the language tags that are "well-formed" according to
+ /// [RFC 5646](https://tools.ietf.org/html/rfc5646#section-2.2.9).
+ ///
+ /// This operation does heap allocation.
+ ///
+ /// ```
+ /// use oxilangtag::LanguageTag;
+ ///
+ /// let language_tag = LanguageTag::parse_and_normalize("en-us").unwrap();
+ /// assert_eq!(language_tag.into_inner(), "en-US")
+ /// ```
+ pub fn parse_and_normalize(tag: &str) -> Result<Self, LanguageTagParseError> {
+ let mut output_buffer = String::with_capacity(tag.len());
+ let positions = parse_language_tag(tag, &mut output_buffer)?;
+ Ok(Self {
+ tag: output_buffer,
+ positions,
+ })
+ }
+}
+
+impl<Lft: PartialEq<Rhs>, Rhs> PartialEq<LanguageTag<Rhs>> for LanguageTag<Lft> {
+ #[inline]
+ fn eq(&self, other: &LanguageTag<Rhs>) -> bool {
+ self.tag.eq(&other.tag)
+ }
+}
+
+impl<T: PartialEq<str>> PartialEq<str> for LanguageTag<T> {
+ #[inline]
+ fn eq(&self, other: &str) -> bool {
+ self.tag.eq(other)
+ }
+}
+
+impl<'a, T: PartialEq<&'a str>> PartialEq<&'a str> for LanguageTag<T> {
+ #[inline]
+ fn eq(&self, other: &&'a str) -> bool {
+ self.tag.eq(other)
+ }
+}
+
+impl<T: PartialEq<String>> PartialEq<String> for LanguageTag<T> {
+ #[inline]
+ fn eq(&self, other: &String) -> bool {
+ self.tag.eq(other)
+ }
+}
+
+impl<'a, T: PartialEq<Cow<'a, str>>> PartialEq<Cow<'a, str>> for LanguageTag<T> {
+ #[inline]
+ fn eq(&self, other: &Cow<'a, str>) -> bool {
+ self.tag.eq(other)
+ }
+}
+
+impl<T: PartialEq<str>> PartialEq<LanguageTag<T>> for str {
+ #[inline]
+ fn eq(&self, other: &LanguageTag<T>) -> bool {
+ other.tag.eq(self)
+ }
+}
+
+impl<'a, T: PartialEq<&'a str>> PartialEq<LanguageTag<T>> for &'a str {
+ #[inline]
+ fn eq(&self, other: &LanguageTag<T>) -> bool {
+ other.tag.eq(self)
+ }
+}
+
+impl<T: PartialEq<String>> PartialEq<LanguageTag<T>> for String {
+ #[inline]
+ fn eq(&self, other: &LanguageTag<T>) -> bool {
+ other.tag.eq(self)
+ }
+}
+
+impl<'a, T: PartialEq<Cow<'a, str>>> PartialEq<LanguageTag<T>> for Cow<'a, str> {
+ #[inline]
+ fn eq(&self, other: &LanguageTag<T>) -> bool {
+ other.tag.eq(self)
+ }
+}
+
+impl<T: Eq> Eq for LanguageTag<T> {}
+
+impl<T: Hash> Hash for LanguageTag<T> {
+ #[inline]
+ fn hash<H: Hasher>(&self, state: &mut H) {
+ self.tag.hash(state)
+ }
+}
+
+impl<T: PartialOrd> PartialOrd for LanguageTag<T> {
+ #[inline]
+ fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+ self.tag.partial_cmp(&other.tag)
+ }
+}
+
+impl<T: Ord> Ord for LanguageTag<T> {
+ #[inline]
+ fn cmp(&self, other: &Self) -> Ordering {
+ self.tag.cmp(&other.tag)
+ }
+}
+
+impl<T: Deref<Target = str>> Deref for LanguageTag<T> {
+ type Target = str;
+
+ #[inline]
+ fn deref(&self) -> &str {
+ self.tag.deref()
+ }
+}
+
+impl<T: AsRef<str>> AsRef<str> for LanguageTag<T> {
+ #[inline]
+ fn as_ref(&self) -> &str {
+ self.tag.as_ref()
+ }
+}
+
+impl<T: Borrow<str>> Borrow<str> for LanguageTag<T> {
+ #[inline]
+ fn borrow(&self) -> &str {
+ self.tag.borrow()
+ }
+}
+
+impl<T: fmt::Debug> fmt::Debug for LanguageTag<T> {
+ #[inline]
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ self.tag.fmt(f)
+ }
+}
+
+impl<T: fmt::Display> fmt::Display for LanguageTag<T> {
+ #[inline]
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ self.tag.fmt(f)
+ }
+}
+
+impl FromStr for LanguageTag<String> {
+ type Err = LanguageTagParseError;
+
+ #[inline]
+ fn from_str(tag: &str) -> Result<Self, LanguageTagParseError> {
+ Self::parse_and_normalize(tag)
+ }
+}
+
+impl<'a> From<LanguageTag<&'a str>> for LanguageTag<String> {
+ #[inline]
+ fn from(tag: LanguageTag<&'a str>) -> Self {
+ Self {
+ tag: tag.tag.into(),
+ positions: tag.positions,
+ }
+ }
+}
+
+impl<'a> From<LanguageTag<Cow<'a, str>>> for LanguageTag<String> {
+ #[inline]
+ fn from(tag: LanguageTag<Cow<'a, str>>) -> Self {
+ Self {
+ tag: tag.tag.into(),
+ positions: tag.positions,
+ }
+ }
+}
+
+impl From<LanguageTag<Box<str>>> for LanguageTag<String> {
+ #[inline]
+ fn from(tag: LanguageTag<Box<str>>) -> Self {
+ Self {
+ tag: tag.tag.into(),
+ positions: tag.positions,
+ }
+ }
+}
+
+impl<'a> From<LanguageTag<&'a str>> for LanguageTag<Cow<'a, str>> {
+ #[inline]
+ fn from(tag: LanguageTag<&'a str>) -> Self {
+ Self {
+ tag: tag.tag.into(),
+ positions: tag.positions,
+ }
+ }
+}
+
+impl<'a> From<LanguageTag<String>> for LanguageTag<Cow<'a, str>> {
+ #[inline]
+ fn from(tag: LanguageTag<String>) -> Self {
+ Self {
+ tag: tag.tag.into(),
+ positions: tag.positions,
+ }
+ }
+}
+
+#[cfg(feature = "serde")]
+impl<T: Serialize> Serialize for LanguageTag<T> {
+ fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
+ self.tag.serialize(serializer)
+ }
+}
+
+#[cfg(feature = "serde")]
+impl<'de, T: Deref<Target = str> + Deserialize<'de>> Deserialize<'de> for LanguageTag<T> {
+ fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<LanguageTag<T>, D::Error> {
+ use serde::de::Error;
+
+ Self::parse(T::deserialize(deserializer)?).map_err(D::Error::custom)
+ }
+}
+
+/// An error raised during [`LanguageTag`](struct.LanguageTag.html) validation.
+#[derive(Debug)]
+pub struct LanguageTagParseError {
+ kind: TagParseErrorKind,
+}
+
+impl fmt::Display for LanguageTagParseError {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ match self.kind {
+ TagParseErrorKind::EmptyExtension => {
+ write!(f, "If an extension subtag is present, it must not be empty")
+ }
+ TagParseErrorKind::EmptyPrivateUse => {
+ write!(f, "If the `x` subtag is present, it must not be empty")
+ }
+ TagParseErrorKind::ForbiddenChar => {
+ write!(f, "The langtag contains a char not allowed")
+ }
+ TagParseErrorKind::InvalidSubtag => write!(
+ f,
+ "A subtag fails to parse, it does not match any other subtags"
+ ),
+ TagParseErrorKind::InvalidLanguage => write!(f, "The given language subtag is invalid"),
+ TagParseErrorKind::SubtagTooLong => {
+ write!(f, "A subtag may be eight characters in length at maximum")
+ }
+ TagParseErrorKind::EmptySubtag => write!(f, "A subtag should not be empty"),
+ TagParseErrorKind::TooManyExtlangs => {
+ write!(f, "At maximum three extlangs are allowed")
+ }
+ }
+ }
+}
+
+impl Error for LanguageTagParseError {}
+
+#[derive(Debug)]
+enum TagParseErrorKind {
+ /// If an extension subtag is present, it must not be empty.
+ EmptyExtension,
+ /// If the `x` subtag is present, it must not be empty.
+ EmptyPrivateUse,
+ /// The langtag contains a char that is not A-Z, a-z, 0-9 or the dash.
+ ForbiddenChar,
+ /// A subtag fails to parse, it does not match any other subtags.
+ InvalidSubtag,
+ /// The given language subtag is invalid.
+ InvalidLanguage,
+ /// A subtag may be eight characters in length at maximum.
+ SubtagTooLong,
+ /// A subtag should not be empty.
+ EmptySubtag,
+ /// At maximum three extlangs are allowed, but zero to one extlangs are preferred.
+ TooManyExtlangs,
+}
+
+#[derive(Copy, Clone, Debug)]
+struct TagElementsPositions {
+ language_end: usize,
+ extlang_end: usize,
+ script_end: usize,
+ region_end: usize,
+ variant_end: usize,
+ extension_end: usize,
+}
+
+trait OutputBuffer: Extend<char> {
+ fn push(&mut self, c: char);
+
+ fn push_str(&mut self, s: &str);
+}
+
+#[derive(Default)]
+struct VoidOutputBuffer {}
+
+impl OutputBuffer for VoidOutputBuffer {
+ #[inline]
+ fn push(&mut self, _: char) {}
+
+ #[inline]
+ fn push_str(&mut self, _: &str) {}
+}
+
+impl Extend<char> for VoidOutputBuffer {
+ #[inline]
+ fn extend<T: IntoIterator<Item = char>>(&mut self, _: T) {}
+}
+
+impl OutputBuffer for String {
+ #[inline]
+ fn push(&mut self, c: char) {
+ self.push(c);
+ }
+
+ #[inline]
+ fn push_str(&mut self, s: &str) {
+ self.push_str(s);
+ }
+}
+
+/// Parses language tag following [the RFC5646 grammar](https://tools.ietf.org/html/rfc5646#section-2.1)
+fn parse_language_tag(
+ input: &str,
+ output: &mut impl OutputBuffer,
+) -> Result<TagElementsPositions, LanguageTagParseError> {
+ //grandfathered tags
+ if let Some(tag) = GRANDFATHEREDS
+ .iter()
+ .find(|record| record.eq_ignore_ascii_case(input))
+ {
+ output.push_str(tag);
+ Ok(TagElementsPositions {
+ language_end: tag.len(),
+ extlang_end: tag.len(),
+ script_end: tag.len(),
+ region_end: tag.len(),
+ variant_end: tag.len(),
+ extension_end: tag.len(),
+ })
+ } else if input.starts_with("x-") || input.starts_with("X-") {
+ // private use
+ if !is_alphanumeric_or_dash(input) {
+ Err(LanguageTagParseError {
+ kind: TagParseErrorKind::ForbiddenChar,
+ })
+ } else if input.len() == 2 {
+ Err(LanguageTagParseError {
+ kind: TagParseErrorKind::EmptyPrivateUse,
+ })
+ } else {
+ output.extend(input.chars().map(|c| c.to_ascii_lowercase()));
+ Ok(TagElementsPositions {
+ language_end: input.len(),
+ extlang_end: input.len(),
+ script_end: input.len(),
+ region_end: input.len(),
+ variant_end: input.len(),
+ extension_end: input.len(),
+ })
+ }
+ } else {
+ parse_langtag(input, output)
+ }
+}
+
+/// Handles normal tags.
+fn parse_langtag(
+ input: &str,
+ output: &mut impl OutputBuffer,
+) -> Result<TagElementsPositions, LanguageTagParseError> {
+ #[derive(PartialEq, Eq)]
+ enum State {
+ Start,
+ AfterLanguage,
+ AfterExtLang,
+ AfterScript,
+ AfterRegion,
+ InExtension { expected: bool },
+ InPrivateUse { expected: bool },
+ }
+
+ let mut state = State::Start;
+ let mut language_end = 0;
+ let mut extlang_end = 0;
+ let mut script_end = 0;
+ let mut region_end = 0;
+ let mut variant_end = 0;
+ let mut extension_end = 0;
+ let mut extlangs_count = 0;
+ for (subtag, end) in SubTagIterator::new(input) {
+ if subtag.is_empty() {
+ return Err(LanguageTagParseError {
+ kind: TagParseErrorKind::EmptySubtag,
+ });
+ }
+ if subtag.len() > 8 {
+ return Err(LanguageTagParseError {
+ kind: TagParseErrorKind::SubtagTooLong,
+ });
+ }
+ if state == State::Start {
+ // Primary language
+ if subtag.len() < 2 || !is_alphabetic(subtag) {
+ return Err(LanguageTagParseError {
+ kind: TagParseErrorKind::InvalidLanguage,
+ });
+ }
+ language_end = end;
+ output.extend(to_lowercase(subtag));
+ if subtag.len() < 4 {
+ // extlangs are only allowed for short language tags
+ state = State::AfterLanguage;
+ } else {
+ state = State::AfterExtLang;
+ }
+ } else if let State::InPrivateUse { .. } = state {
+ if !is_alphanumeric(subtag) {
+ return Err(LanguageTagParseError {
+ kind: TagParseErrorKind::InvalidSubtag,
+ });
+ }
+ output.push('-');
+ output.extend(to_lowercase(subtag));
+ state = State::InPrivateUse { expected: false };
+ } else if subtag == "x" || subtag == "X" {
+ // We make sure extension is found
+ if let State::InExtension { expected: true } = state {
+ return Err(LanguageTagParseError {
+ kind: TagParseErrorKind::EmptyExtension,
+ });
+ }
+ output.push('-');
+ output.push('x');
+ state = State::InPrivateUse { expected: true };
+ } else if subtag.len() == 1 && is_alphanumeric(subtag) {
+ // We make sure extension is found
+ if let State::InExtension { expected: true } = state {
+ return Err(LanguageTagParseError {
+ kind: TagParseErrorKind::EmptyExtension,
+ });
+ }
+ let extension_tag = subtag.chars().next().unwrap().to_ascii_lowercase();
+ output.push('-');
+ output.push(extension_tag);
+ state = State::InExtension { expected: true };
+ } else if let State::InExtension { .. } = state {
+ if !is_alphanumeric(subtag) {
+ return Err(LanguageTagParseError {
+ kind: TagParseErrorKind::InvalidSubtag,
+ });
+ }
+ extension_end = end;
+ output.push('-');
+ output.extend(to_lowercase(subtag));
+ state = State::InExtension { expected: false };
+ } else if state == State::AfterLanguage && subtag.len() == 3 && is_alphabetic(subtag) {
+ extlangs_count += 1;
+ if extlangs_count > 3 {
+ return Err(LanguageTagParseError {
+ kind: TagParseErrorKind::TooManyExtlangs,
+ });
+ }
+ // valid extlangs
+ extlang_end = end;
+ output.push('-');
+ output.extend(to_lowercase(subtag));
+ } else if (state == State::AfterLanguage || state == State::AfterExtLang)
+ && subtag.len() == 4
+ && is_alphabetic(subtag)
+ {
+ // Script
+ script_end = end;
+ output.push('-');
+ output.extend(to_uppercase_first(subtag));
+ state = State::AfterScript;
+ } else if (state == State::AfterLanguage
+ || state == State::AfterExtLang
+ || state == State::AfterScript)
+ && (subtag.len() == 2 && is_alphabetic(subtag)
+ || subtag.len() == 3 && is_numeric(subtag))
+ {
+ // Region
+ region_end = end;
+ output.push('-');
+ output.extend(to_uppercase(subtag));
+ state = State::AfterRegion;
+ } else if (state == State::AfterLanguage
+ || state == State::AfterExtLang
+ || state == State::AfterScript
+ || state == State::AfterRegion)
+ && is_alphanumeric(subtag)
+ && (subtag.len() >= 5 && is_alphabetic(&subtag[0..1])
+ || subtag.len() >= 4 && is_numeric(&subtag[0..1]))
+ {
+ // Variant
+ variant_end = end;
+ output.push('-');
+ output.extend(to_lowercase(subtag));
+ state = State::AfterRegion;
+ } else {
+ return Err(LanguageTagParseError {
+ kind: TagParseErrorKind::InvalidSubtag,
+ });
+ }
+ }
+
+ //We make sure we are in a correct final state
+ if let State::InExtension { expected: true } = state {
+ return Err(LanguageTagParseError {
+ kind: TagParseErrorKind::EmptyExtension,
+ });
+ }
+ if let State::InPrivateUse { expected: true } = state {
+ return Err(LanguageTagParseError {
+ kind: TagParseErrorKind::EmptyPrivateUse,
+ });
+ }
+
+ //We make sure we have not skipped anyone
+ if extlang_end < language_end {
+ extlang_end = language_end;
+ }
+ if script_end < extlang_end {
+ script_end = extlang_end;
+ }
+ if region_end < script_end {
+ region_end = script_end;
+ }
+ if variant_end < region_end {
+ variant_end = region_end;
+ }
+ if extension_end < variant_end {
+ extension_end = variant_end;
+ }
+
+ Ok(TagElementsPositions {
+ language_end,
+ extlang_end,
+ script_end,
+ region_end,
+ variant_end,
+ extension_end,
+ })
+}
+
+struct ExtensionsIterator<'a> {
+ input: &'a str,
+}
+
+impl<'a> ExtensionsIterator<'a> {
+ fn new(input: &'a str) -> Self {
+ Self { input }
+ }
+}
+
+impl<'a> Iterator for ExtensionsIterator<'a> {
+ type Item = (char, &'a str);
+
+ fn next(&mut self) -> Option<(char, &'a str)> {
+ let mut parts_iterator = self.input.split_terminator('-');
+ let singleton = parts_iterator.next()?.chars().next().unwrap();
+ let mut content_size: usize = 2;
+ for part in parts_iterator {
+ if part.len() == 1 {
+ let content = &self.input[2..content_size - 1];
+ self.input = &self.input[content_size..];
+ return Some((singleton, content));
+ } else {
+ content_size += part.len() + 1;
+ }
+ }
+ let result = self.input.get(2..).map(|content| (singleton, content));
+ self.input = "";
+ result
+ }
+}
+
+struct SubTagIterator<'a> {
+ split: Split<'a, char>,
+ position: usize,
+}
+
+impl<'a> SubTagIterator<'a> {
+ #[inline]
+ fn new(input: &'a str) -> Self {
+ Self {
+ split: input.split('-'),
+ position: 0,
+ }
+ }
+}
+
+impl<'a> Iterator for SubTagIterator<'a> {
+ type Item = (&'a str, usize);
+
+ #[inline]
+ fn next(&mut self) -> Option<(&'a str, usize)> {
+ let tag = self.split.next()?;
+ let tag_end = self.position + tag.len();
+ self.position = tag_end + 1;
+ Some((tag, tag_end))
+ }
+}
+
+#[inline]
+fn is_alphabetic(s: &str) -> bool {
+ s.chars().all(|x| x.is_ascii_alphabetic())
+}
+
+#[inline]
+fn is_numeric(s: &str) -> bool {
+ s.chars().all(|x| x.is_ascii_digit())
+}
+
+#[inline]
+fn is_alphanumeric(s: &str) -> bool {
+ s.chars().all(|x| x.is_ascii_alphanumeric())
+}
+
+#[inline]
+fn is_alphanumeric_or_dash(s: &str) -> bool {
+ s.chars().all(|x| x.is_ascii_alphanumeric() || x == '-')
+}
+
+#[inline]
+fn to_uppercase(s: &str) -> impl Iterator<Item = char> + '_ {
+ s.chars().map(|c| c.to_ascii_uppercase())
+}
+
+// Beware: panics if s.len() == 0 (should never happen in our code)
+#[inline]
+fn to_uppercase_first(s: &str) -> impl Iterator<Item = char> + '_ {
+ let mut chars = s.chars();
+ once(chars.next().unwrap().to_ascii_uppercase()).chain(chars.map(|c| c.to_ascii_lowercase()))
+}
+
+#[inline]
+fn to_lowercase(s: &str) -> impl Iterator<Item = char> + '_ {
+ s.chars().map(|c| c.to_ascii_lowercase())
+}
+
+const GRANDFATHEREDS: [&str; 26] = [
+ "art-lojban",
+ "cel-gaulish",
+ "en-GB-oed",
+ "i-ami",
+ "i-bnn",
+ "i-default",
+ "i-enochian",
+ "i-hak",
+ "i-klingon",
+ "i-lux",
+ "i-mingo",
+ "i-navajo",
+ "i-pwn",
+ "i-tao",
+ "i-tay",
+ "i-tsu",
+ "no-bok",
+ "no-nyn",
+ "sgn-BE-FR",
+ "sgn-BE-NL",
+ "sgn-CH-DE",
+ "zh-guoyu",
+ "zh-hakka",
+ "zh-min",
+ "zh-min-nan",
+ "zh-xiang",
+];
diff --git a/third_party/rust/oxilangtag/tests/lib.rs b/third_party/rust/oxilangtag/tests/lib.rs
new file mode 100644
index 0000000000..6850f0bd30
--- /dev/null
+++ b/third_party/rust/oxilangtag/tests/lib.rs
@@ -0,0 +1,722 @@
+use oxilangtag::LanguageTag;
+use serde_test::{assert_de_tokens, assert_de_tokens_error};
+#[cfg(feature = "serde")]
+use serde_test::{assert_tokens, Token};
+use std::collections::hash_map::DefaultHasher;
+use std::hash::{Hash, Hasher};
+
+// Tests from RFC 5646 2.1.1
+#[test]
+fn test_formatting() {
+ assert_eq!(
+ "mn-Cyrl-MN",
+ LanguageTag::parse_and_normalize("mn-Cyrl-MN")
+ .unwrap()
+ .as_str()
+ );
+ assert_eq!(
+ "mn-Cyrl-MN",
+ LanguageTag::parse_and_normalize("MN-cYRL-mn")
+ .unwrap()
+ .as_str()
+ );
+ assert_eq!(
+ "mn-Cyrl-MN",
+ LanguageTag::parse_and_normalize("mN-cYrL-Mn")
+ .unwrap()
+ .as_str()
+ );
+ assert_eq!(
+ "en-CA-x-ca",
+ LanguageTag::parse_and_normalize("en-CA-x-ca")
+ .unwrap()
+ .as_str()
+ );
+ assert_eq!(
+ "sgn-BE-FR",
+ LanguageTag::parse_and_normalize("sgn-BE-FR")
+ .unwrap()
+ .as_str()
+ );
+ assert_eq!(
+ "az-Latn-x-latn",
+ LanguageTag::parse_and_normalize("az-Latn-x-latn")
+ .unwrap()
+ .as_str()
+ );
+ assert_eq!(
+ "i-ami",
+ LanguageTag::parse_and_normalize("i-ami").unwrap().as_str()
+ );
+ assert_eq!(
+ "i-ami",
+ LanguageTag::parse_and_normalize("I-AMI").unwrap().as_str()
+ );
+ assert_eq!(
+ "sl-afb-Latn-005-nedis",
+ LanguageTag::parse_and_normalize("SL-AFB-lATN-005-nEdis")
+ .unwrap()
+ .as_str()
+ )
+}
+
+// Tests from RFC 5646 2.2.1
+#[test]
+fn test_primary_language() {
+ assert_eq!(
+ "fr",
+ LanguageTag::parse_and_normalize("fr")
+ .unwrap()
+ .primary_language()
+ );
+ assert_eq!(
+ "de",
+ LanguageTag::parse_and_normalize("de")
+ .unwrap()
+ .primary_language()
+ );
+ assert_eq!(
+ "x-fr-ch",
+ LanguageTag::parse_and_normalize("x-fr-CH")
+ .unwrap()
+ .primary_language()
+ );
+ assert_eq!(
+ "i-klingon",
+ LanguageTag::parse_and_normalize("i-klingon")
+ .unwrap()
+ .primary_language()
+ );
+ assert_eq!(
+ "i-bnn",
+ LanguageTag::parse_and_normalize("i-bnn")
+ .unwrap()
+ .primary_language()
+ );
+ assert_eq!(
+ "zh-hakka",
+ LanguageTag::parse_and_normalize("zh-hakka")
+ .unwrap()
+ .primary_language()
+ )
+}
+
+// Tests from RFC 5646 2.2.2
+#[test]
+fn test_extended_language() {
+ fn parts(tag: &LanguageTag<String>) -> (&str, &str, Option<&str>, Vec<&str>) {
+ (
+ tag.full_language(),
+ tag.primary_language(),
+ tag.extended_language(),
+ tag.extended_language_subtags().collect(),
+ )
+ }
+
+ assert_eq!(("zh", "zh", None, vec![]), parts(&"zh".parse().unwrap()));
+ assert_eq!(
+ ("zh-gan", "zh", Some("gan"), vec!["gan"]),
+ parts(&"zh-gan".parse().unwrap())
+ );
+ assert_eq!(
+ ("zh-gan-foo", "zh", Some("gan-foo"), vec!["gan", "foo"]),
+ parts(&"zh-gan-foo".parse().unwrap())
+ );
+ assert_eq!(
+ ("zh-min-nan", "zh-min-nan", None, vec![]),
+ parts(&"zh-min-nan".parse().unwrap())
+ );
+ assert_eq!(
+ ("i-tsu", "i-tsu", None, vec![]),
+ parts(&"i-tsu".parse().unwrap())
+ );
+ assert_eq!(("zh", "zh", None, vec![]), parts(&"zh-CN".parse().unwrap()));
+ assert_eq!(
+ ("zh-gan", "zh", Some("gan"), vec!["gan"]),
+ parts(&"zh-gan-CN".parse().unwrap())
+ );
+ assert_eq!(
+ ("ar-afb", "ar", Some("afb"), vec!["afb"]),
+ parts(&"ar-afb".parse().unwrap())
+ );
+}
+
+// Tests from RFC 5646 2.2.3
+#[test]
+fn test_script() {
+ fn parts(tag: &LanguageTag<String>) -> (&str, Option<&str>) {
+ (tag.primary_language(), tag.script())
+ }
+
+ assert_eq!(("sr", Some("Latn")), parts(&"sr-Latn".parse().unwrap()));
+ assert_eq!(("ar", Some("Latn")), parts(&"ar-afb-Latn".parse().unwrap()))
+}
+
+// Tests from RFC 5646 2.2.4
+#[test]
+fn test_region() {
+ fn parts(tag: &LanguageTag<String>) -> (&str, Option<&str>, Option<&str>) {
+ (tag.primary_language(), tag.script(), tag.region())
+ }
+
+ assert_eq!(("de", None, Some("AT")), parts(&"de-AT".parse().unwrap()));
+ assert_eq!(
+ ("sr", Some("Latn"), Some("RS")),
+ parts(&"sr-Latn-RS".parse().unwrap())
+ );
+ assert_eq!(("es", None, Some("419")), parts(&"es-419".parse().unwrap()));
+ assert_eq!(("ar", None, Some("DE")), parts(&"ar-DE".parse().unwrap()));
+ assert_eq!(("ar", None, Some("005")), parts(&"ar-005".parse().unwrap()));
+}
+
+// Tests from RFC 5646 2.2.5
+#[test]
+fn test_variant() {
+ fn parts(tag: &LanguageTag<String>) -> (&str, Option<&str>, Vec<&str>) {
+ (
+ tag.primary_language(),
+ tag.variant(),
+ tag.variant_subtags().collect(),
+ )
+ }
+
+ assert_eq!(("sl", None, vec![]), parts(&"sl".parse().unwrap()));
+ assert_eq!(
+ ("sl", Some("nedis"), vec!["nedis"]),
+ parts(&"sl-nedis".parse().unwrap())
+ );
+ assert_eq!(
+ ("de", Some("1996"), vec!["1996"]),
+ parts(&"de-CH-1996".parse().unwrap())
+ );
+ assert_eq!(
+ ("art-lojban", None, vec![]),
+ parts(&"art-lojban".parse().unwrap())
+ );
+}
+
+// Tests from RFC 5646 2.2.6
+#[test]
+fn test_extension() {
+ fn parts(tag: &LanguageTag<String>) -> (&str, Option<&str>, Vec<(char, &str)>) {
+ (
+ tag.primary_language(),
+ tag.extension(),
+ tag.extension_subtags().collect(),
+ )
+ }
+
+ assert_eq!(("en", None, vec![]), parts(&"en".parse().unwrap()));
+ assert_eq!(
+ ("en", Some("a-bbb"), vec![('a', "bbb")]),
+ parts(&"en-a-bbb-x-a-ccc".parse().unwrap())
+ );
+ assert_eq!(
+ (
+ "en",
+ Some("a-babble-b-warble"),
+ vec![('a', "babble"), ('b', "warble")]
+ ),
+ parts(&"en-a-babble-b-warble".parse().unwrap())
+ );
+ assert_eq!(
+ ("fr", Some("a-latn"), vec![('a', "latn")]),
+ parts(&"fr-a-Latn".parse().unwrap())
+ );
+ assert_eq!(
+ (
+ "en",
+ Some("r-extended-sequence"),
+ vec![('r', "extended-sequence")]
+ ),
+ parts(
+ &"en-Latn-GB-boont-r-extended-sequence-x-private"
+ .parse()
+ .unwrap()
+ )
+ );
+ assert_eq!(
+ ("en", Some("r-az-r-qt"), vec![('r', "az"), ('r', "qt")]),
+ parts(&"en-r-az-r-qt".parse().unwrap())
+ );
+ assert_eq!(("i-tsu", None, vec![]), parts(&"i-tsu".parse().unwrap()));
+}
+
+// Tests from RFC 5646 2.2.7
+#[test]
+fn test_privateuse() {
+ fn parts(tag: &LanguageTag<String>) -> (&str, Option<&str>, Vec<&str>) {
+ (
+ tag.primary_language(),
+ tag.private_use(),
+ tag.private_use_subtags().collect(),
+ )
+ }
+
+ assert_eq!(("en", None, vec![]), parts(&"en".parse().unwrap()));
+ assert_eq!(
+ ("en", Some("x-us"), vec!["us"]),
+ parts(&"en-x-US".parse().unwrap())
+ );
+ assert_eq!(
+ ("el", Some("x-koine"), vec!["koine"]),
+ parts(&"el-x-koine".parse().unwrap())
+ );
+ assert_eq!(
+ ("x-fr-ch", Some("x-fr-ch"), vec!["fr", "ch"]),
+ parts(&"x-fr-ch".parse().unwrap())
+ );
+ assert_eq!(
+ ("es", Some("x-foobar-at-007"), vec!["foobar", "at", "007"]),
+ parts(&"es-x-foobar-AT-007".parse().unwrap())
+ )
+}
+
+#[test]
+fn test_fmt() {
+ assert_eq!(
+ "ar-arb-Latn-DE-nedis-foobar",
+ LanguageTag::parse_and_normalize("ar-arb-Latn-DE-nedis-foobar")
+ .unwrap()
+ .as_str()
+ );
+ assert_eq!(
+ "ar-arb-Latn-DE-nedis-foobar",
+ LanguageTag::parse_and_normalize("ar-arb-latn-de-nedis-foobar")
+ .unwrap()
+ .as_str()
+ );
+ assert_eq!(
+ "ar-arb-Latn-DE-nedis-foobar",
+ LanguageTag::parse_and_normalize("AR-ARB-LATN-DE-NEDIS-FOOBAR")
+ .unwrap()
+ .as_str()
+ );
+ assert_eq!(
+ "xx-z-foo-a-bar-f-spam-b-eggs",
+ LanguageTag::parse_and_normalize("xx-z-foo-a-bar-F-spam-b-eggs")
+ .unwrap()
+ .as_str()
+ );
+ assert_eq!(
+ "hkgnmerm-x-e5-zf-vddjcpz-1v6",
+ LanguageTag::parse_and_normalize("HkgnmerM-x-e5-zf-VdDjcpz-1V6")
+ .unwrap()
+ .to_string()
+ );
+ assert_eq!(
+ "mgxqa-Ywep-8lcw-7bvt-h-dp1md-0h7-0z3ir",
+ LanguageTag::parse_and_normalize("MgxQa-ywEp-8lcW-7bvT-h-dP1Md-0h7-0Z3ir")
+ .unwrap()
+ .as_str()
+ );
+}
+
+#[test]
+fn test_unicode() {
+ assert!(LanguageTag::parse("zh-x-Üńìcødê").is_err());
+}
+
+#[test]
+fn test_cmp() {
+ assert_eq!(
+ LanguageTag::parse_and_normalize("dE-AraB-lY").unwrap(),
+ LanguageTag::parse_and_normalize("DE-aRaB-LY").unwrap()
+ );
+ assert_ne!(
+ LanguageTag::parse_and_normalize("zh").unwrap(),
+ LanguageTag::parse_and_normalize("zh-Latn").unwrap()
+ );
+}
+
+// http://www.langtag.net/test-suites/well-formed-tags.txt
+#[test]
+fn test_wellformed_tags() {
+ let tags = vec![
+ "fr",
+ "fr-Latn",
+ "fr-fra", // Extended tag
+ "fr-Latn-FR",
+ "fr-Latn-419",
+ "fr-FR",
+ "ax-TZ", // Not in the registry, but well-formed
+ "fr-shadok", // Variant
+ "fr-y-myext-myext2",
+ "fra-Latn", // ISO 639 can be 3-letters
+ "fra",
+ "fra-FX",
+ "i-klingon", // grandfathered with singleton
+ "I-kLINgon", // tags are case-insensitive...
+ "no-bok", // grandfathered without singleton
+ "fr-Lat", // Extended",
+ "mn-Cyrl-MN",
+ "mN-cYrL-Mn",
+ "fr-Latn-CA",
+ "en-US",
+ "fr-Latn-CA",
+ "i-enochian", // Grand fathered
+ "x-fr-CH",
+ "sr-Latn-CS",
+ "es-419",
+ "sl-nedis",
+ "de-CH-1996",
+ "de-Latg-1996",
+ "sl-IT-nedis",
+ "en-a-bbb-x-a-ccc",
+ "de-a-value",
+ "en-Latn-GB-boont-r-extended-sequence-x-private",
+ "en-x-US",
+ "az-Arab-x-AZE-derbend",
+ "es-Latn-CO-x-private",
+ "en-US-boont",
+ "ab-x-abc-x-abc", // anything goes after x
+ "ab-x-abc-a-a", // ditto",
+ "i-default", // grandfathered",
+ "i-klingon", // grandfathered",
+ "abcd-Latn", // Language of 4 chars reserved for future use
+ "AaBbCcDd-x-y-any-x", // Language of 5-8 chars, registered
+ "en",
+ "de-AT",
+ "es-419",
+ "de-CH-1901",
+ "sr-Cyrl",
+ "sr-Cyrl-CS",
+ "sl-Latn-IT-rozaj",
+ "en-US-x-twain",
+ "zh-cmn",
+ "zh-cmn-Hant",
+ "zh-cmn-Hant-HK",
+ "zh-gan",
+ "zh-yue-Hant-HK",
+ "xr-lxs-qut", // extlangS
+ "xr-lqt-qu", // extlang + region
+ "xr-p-lze", // Extension
+ ];
+ for tag in tags {
+ let result = LanguageTag::parse(tag);
+ assert!(
+ result.is_ok(),
+ "{} should be considered well-formed but returned error {}",
+ tag,
+ result.err().unwrap()
+ );
+ }
+}
+
+// http://www.langtag.net/test-suites/broken-tags.txt
+#[test]
+fn test_broken_tags() {
+ let tags = vec![
+ "",
+ "f",
+ "f-Latn",
+ "fr-Latn-F",
+ "a-value",
+ "tlh-a-b-foo",
+ "i-notexist", // grandfathered but not registered: always invalid
+ "abcdefghi-012345678",
+ "ab-abc-abc-abc-abc",
+ "ab-abcd-abc",
+ "ab-ab-abc",
+ "ab-123-abc",
+ "a-Hant-ZH",
+ "a1-Hant-ZH",
+ "ab-abcde-abc",
+ "ab-1abc-abc",
+ "ab-ab-abcd",
+ "ab-123-abcd",
+ "ab-abcde-abcd",
+ "ab-1abc-abcd",
+ "ab-a-b",
+ "ab-a-x",
+ "ab--ab",
+ "ab-abc-",
+ "-ab-abc",
+ "abcd-efg",
+ "aabbccddE",
+ ];
+ for tag in tags {
+ let result = LanguageTag::parse(tag);
+ assert!(
+ result.is_err(),
+ "{} should be considered not well-formed but returned result {:?}",
+ tag,
+ result.ok().unwrap()
+ );
+ }
+}
+
+#[test]
+fn test_random_good_tags() {
+ // http://unicode.org/repos/cldr/trunk/tools/java/org/unicode/cldr/util/data/langtagTest.txt
+ let tags = vec![
+ "zszLDm-sCVS-es-x-gn762vG-83-S-mlL",
+ "IIJdFI-cfZv",
+ "kbAxSgJ-685",
+ "tbutP",
+ "hDL-595",
+ "dUf-iUjq-0hJ4P-5YkF-WD8fk",
+ "FZAABA-FH",
+ "xZ-lh-4QfM5z9J-1eG4-x-K-R6VPr2z",
+ "Fyi",
+ "SeI-DbaG",
+ "ch-xwFn",
+ "OeC-GPVI",
+ "JLzvUSi",
+ "Fxh-hLAs",
+ "pKHzCP-sgaO-554",
+ "eytqeW-hfgH-uQ",
+ "ydn-zeOP-PR",
+ "uoWmBM-yHCf-JE",
+ "xwYem",
+ "zie",
+ "Re-wjSv-Ey-i-XE-E-JjWTEB8-f-DLSH-NVzLH-AtnFGWoH-SIDE",
+ "Ri-063-c-u6v-ZfhkToTB-C-IFfmv-XT-j-rdyYFMhK-h-pY-D5-Oh6FqBhL-hcXt-v-WdpNx71-\
+ K-c74m4-eBTT7-JdH7Q1Z",
+ "ji",
+ "IM-487",
+ "EPZ-zwcB",
+ "GauwEcwo",
+ "kDEP",
+ "FwDYt-TNvo",
+ "ottqP-KLES-x-9-i9",
+ "fcflR-grQQ",
+ "TvFwdu-kYhs",
+ "WE-336",
+ "MgxQa-ywEp-8lcW-7bvT-h-dP1Md-0h7-0Z3ir-K-Srkm-kA-7LXM-Z-whb2MiO-2mNsvbLm-W3O\
+ -4r-U-KceIxHdI-gvMVgUBV-2uRUni-J0-7C8yTK2",
+ "Hyr-B-evMtVoB1-mtsVZf-vQMV-gM-I-rr-kvLzg-f-lAUK-Qb36Ne-Z-7eFzOD-mv6kKf-l-miZ\
+ 7U3-k-XDGtNQG",
+ "ybrlCpzy",
+ "PTow-w-cAQ51-8Xd6E-cumicgt-WpkZv3NY-q-ORYPRy-v-A4jL4A-iNEqQZZ-sjKn-W-N1F-pzy\
+ c-xP5eWz-LmsCiCcZ",
+ "ih-DlPR-PE",
+ "Krf-362",
+ "WzaD",
+ "EPaOnB-gHHn",
+ "XYta",
+ "NZ-RgOO-tR",
+ "at-FE",
+ "Tpc-693",
+ "YFp",
+ "gRQrQULo",
+ "pVomZ-585",
+ "laSu-ZcAq-338",
+ "gCW",
+ "PydSwHRI-TYfF",
+ "zKmWDD",
+ "X-bCrL5RL",
+ "HK",
+ "YMKGcLY",
+ "GDJ-nHYa-bw-X-ke-rohH5GfS-LdJKsGVe",
+ "tfOxdau-yjge-489-a-oB-I8Csb-1ESaK1v-VFNz-N-FT-ZQyn-On2-I-hu-vaW3-jIQb-vg0U-h\
+ Ul-h-dO6KuJqB-U-tde2L-P3gHUY-vnl5c-RyO-H-gK1-zDPu-VF1oeh8W-kGzzvBbW-yuAJZ",
+ "LwDux",
+ "Zl-072",
+ "Ri-Ar",
+ "vocMSwo-cJnr-288",
+ "kUWq-gWfQ-794",
+ "YyzqKL-273",
+ "Xrw-ZHwH-841-9foT-ESSZF-6OqO-0knk-991U-9p3m-b-JhiV-0Kq7Y-h-cxphLb-cDlXUBOQ-X\
+ -4Ti-jty94yPp",
+ "en-GB-oed",
+ "LEuZl-so",
+ "HyvBvFi-cCAl-X-irMQA-Pzt-H",
+ "uDbsrAA-304",
+ "wTS",
+ "IWXS",
+ "XvDqNkSn-jRDR",
+ "gX-Ycbb-iLphEks-AQ1aJ5",
+ "FbSBz-VLcR-VL",
+ "JYoVQOP-Iytp",
+ "gDSoDGD-lq-v-7aFec-ag-k-Z4-0kgNxXC-7h",
+ "Bjvoayy-029",
+ "qSDJd",
+ "qpbQov",
+ "fYIll-516",
+ "GfgLyfWE-EHtB",
+ "Wc-ZMtk",
+ "cgh-VEYK",
+ "WRZs-AaFd-yQ",
+ "eSb-CpsZ-788",
+ "YVwFU",
+ "JSsHiQhr-MpjT-381",
+ "LuhtJIQi-JKYt",
+ "vVTvS-RHcP",
+ "SY",
+ "fSf-EgvQfI-ktWoG-8X5z-63PW",
+ "NOKcy",
+ "OjJb-550",
+ "KB",
+ "qzKBv-zDKk-589",
+ "Jr",
+ "Acw-GPXf-088",
+ "WAFSbos",
+ "HkgnmerM-x-e5-zf-VdDjcpz-1V6",
+ "UAfYflJU-uXDc-YV",
+ "x-CHsHx-VDcOUAur-FqagDTx-H-V0e74R",
+ "uZIAZ-Xmbh-pd",
+ ];
+ for tag in tags {
+ let result = LanguageTag::parse(tag);
+ assert!(
+ result.is_ok(),
+ "{} should be considered well-formed but returned error {}",
+ tag,
+ result.err().unwrap()
+ );
+ }
+}
+
+#[test]
+fn test_random_bad_tags() {
+ // http://unicode.org/repos/cldr/trunk/tools/java/org/unicode/cldr/util/data/langtagTest.txt
+ let tags = vec![
+ "EdY-z_H791Xx6_m_kj",
+ "qWt85_8S0-L_rbBDq0gl_m_O_zsAx_nRS",
+ "VzyL2",
+ "T_VFJq-L-0JWuH_u2_VW-hK-kbE",
+ "u-t",
+ "Q-f_ZVJXyc-doj_k-i",
+ "JWB7gNa_K-5GB-25t_W-s-ZbGVwDu1-H3E",
+ "b-2T-Qob_L-C9v_2CZxK86",
+ "fQTpX_0_4Vg_L3L_g7VtALh2",
+ "S-Z-E_J",
+ "f6wsq-02_i-F",
+ "9_GcUPq_G",
+ "QjsIy_9-0-7_Dv2yPV09_D-JXWXM",
+ "D_se-f-k",
+ "ON47Wv1_2_W",
+ "f-z-R_s-ha",
+ "N3APeiw_195_Bx2-mM-pf-Z-Ip5lXWa-5r",
+ "IRjxU-E_6kS_D_b1b_H",
+ "NB-3-5-AyW_FQ-9hB-TrRJg3JV_3C",
+ "yF-3a_V_FoJQAHeL_Z-Mc-u",
+ "n_w_bbunOG_1-s-tJMT5je",
+ "Q-AEWE_X",
+ "57b1O_k_R6MU_sb",
+ "hK_65J_i-o_SI-Y",
+ "wB4B7u_5I2_I_NZPI",
+ "J24Nb_q_d-zE",
+ "v6-dHjJmvPS_IEb-x_A-O-i",
+ "8_8_dl-ZgBr84u-P-E",
+ "nIn-xD7EVhe_C",
+ "5_N-6P_x7Of_Lo_6_YX_R",
+ "0_46Oo0sZ-YNwiU8Wr_d-M-pg1OriV",
+ "laiY-5",
+ "K-8Mdd-j_ila0sSpo_aO8_J",
+ "wNATtSL-Cp4_gPa_fD41_9z",
+ "H_FGz5V8_n6rrcoz0_1O6d-kH-7-N",
+ "wDOrnHU-odqJ_vWl",
+ "gP_qO-I-jH",
+ "h",
+ "dJ0hX-o_csBykEhU-F",
+ "L-Vf7_BV_eRJ5goSF_Kp",
+ "y-oF-chnavU-H",
+ "9FkG-8Q-8_v",
+ "W_l_NDQqI-O_SFSAOVq",
+ "kDG3fzXw",
+ "t-nsSp-7-t-mUK2",
+ "Yw-F",
+ "1-S_3_l",
+ "u-v_brn-Y",
+ "4_ft_3ZPZC5lA_D",
+ "n_dR-QodsqJnh_e",
+ "Hwvt-bSwZwj_KL-hxg0m-3_hUG",
+ "mQHzvcV-UL-o2O_1KhUJQo_G2_uryk3-a",
+ "b-UTn33HF",
+ "r-Ep-jY-aFM_N_H",
+ "K-k-krEZ0gwD_k_ua-9dm3Oy-s_v",
+ "XS_oS-p",
+ "EIx_h-zf5",
+ "p_z-0_i-omQCo3B",
+ "1_q0N_jo_9",
+ "0Ai-6-S",
+ "L-LZEp_HtW",
+ "Zj-A4JD_2A5Aj7_b-m3",
+ "x",
+ "p-qPuXQpp_d-jeKifB-c-7_G-X",
+ "X94cvJ_A",
+ "F2D25R_qk_W-w_Okf_kx",
+ "rc-f",
+ "D",
+ "gD_WrDfxmF-wu-E-U4t",
+ "Z_BN9O4_D9-D_0E_KnCwZF-84b-19",
+ "T-8_g-u-0_E",
+ "lXTtys9j_X_A_m-vtNiNMw_X_b-C6Nr",
+ "V_Ps-4Y-S",
+ "X5wGEA",
+ "mIbHFf_ALu4_Jo1Z1",
+ "ET-TacYx_c",
+ "Z-Lm5cAP_ri88-d_q_fi8-x",
+ "rTi2ah-4j_j_4AlxTs6m_8-g9zqncIf-N5",
+ "FBaLB85_u-0NxhAy-ZU_9c",
+ "x_j_l-5_aV95_s_tY_jp4",
+ "PL768_D-m7jNWjfD-Nl_7qvb_bs_8_Vg",
+ "9-yOc-gbh",
+ "6DYxZ_SL-S_Ye",
+ "ZCa-U-muib-6-d-f_oEh_O",
+ "Qt-S-o8340F_f_aGax-c-jbV0gfK_p",
+ "WE_SzOI_OGuoBDk-gDp",
+ "cs-Y_9",
+ "m1_uj",
+ "Y-ob_PT",
+ "li-B",
+ "f-2-7-9m_f8den_J_T_d",
+ "p-Os0dua-H_o-u",
+ "L",
+ "rby-w",
+ ];
+ for tag in tags {
+ let result = LanguageTag::parse(tag);
+ assert!(
+ result.is_err(),
+ "{} should be considered not well-formed but returned result {:?}",
+ tag,
+ result.ok().unwrap()
+ );
+ }
+}
+
+#[test]
+fn test_eq() {
+ let tag = LanguageTag::parse("en-fr").unwrap();
+ assert_eq!(tag, "en-fr");
+ assert_ne!(tag, "en-FR");
+ assert_eq!("en-fr", tag);
+ assert_eq!(hash(&tag), hash("en-fr"));
+ assert_ne!(hash(&tag), hash("en-FR"));
+}
+
+fn hash(value: impl Hash) -> u64 {
+ let mut hasher = DefaultHasher::new();
+ value.hash(&mut hasher);
+ hasher.finish()
+}
+
+#[test]
+fn test_str() {
+ let tag = LanguageTag::parse("en-fr").unwrap();
+ assert!(tag.starts_with("en-"));
+}
+
+#[cfg(feature = "serde")]
+#[test]
+fn test_serd_impl() {
+ assert_tokens(
+ &LanguageTag::parse("en-us").unwrap(),
+ &[Token::BorrowedStr("en-us")],
+ );
+ assert_tokens(
+ &LanguageTag::parse("en-US".to_string()).unwrap(),
+ &[Token::String("en-US")],
+ );
+ assert_de_tokens(
+ &LanguageTag::parse("en-US".to_string()).unwrap(),
+ &[Token::BorrowedStr("en-US")],
+ );
+ assert_de_tokens_error::<LanguageTag<String>>(
+ &[Token::String("verybadvalue")],
+ "A subtag may be eight characters in length at maximum",
+ );
+}