summaryrefslogtreecommitdiffstats
path: root/vendor/icu_locid_transform
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/icu_locid_transform')
-rw-r--r--vendor/icu_locid_transform/.cargo-checksum.json1
-rw-r--r--vendor/icu_locid_transform/Cargo.toml128
-rw-r--r--vendor/icu_locid_transform/LICENSE44
-rw-r--r--vendor/icu_locid_transform/README.md77
-rw-r--r--vendor/icu_locid_transform/benches/fixtures/locales.json41
-rw-r--r--vendor/icu_locid_transform/benches/fixtures/uncanonicalized-locales.json88
-rw-r--r--vendor/icu_locid_transform/benches/helpers/mod.rs15
-rw-r--r--vendor/icu_locid_transform/benches/locale_canonicalizer.rs99
-rw-r--r--vendor/icu_locid_transform/src/canonicalizer.rs618
-rw-r--r--vendor/icu_locid_transform/src/directionality.rs231
-rw-r--r--vendor/icu_locid_transform/src/error.rs27
-rw-r--r--vendor/icu_locid_transform/src/expander.rs722
-rw-r--r--vendor/icu_locid_transform/src/fallback/algorithms.rs487
-rw-r--r--vendor/icu_locid_transform/src/fallback/mod.rs300
-rw-r--r--vendor/icu_locid_transform/src/lib.rs116
-rw-r--r--vendor/icu_locid_transform/src/provider/canonicalizer.rs81
-rw-r--r--vendor/icu_locid_transform/src/provider/directionality.rs36
-rw-r--r--vendor/icu_locid_transform/src/provider/expander.rs243
-rw-r--r--vendor/icu_locid_transform/src/provider/fallback.rs102
-rw-r--r--vendor/icu_locid_transform/src/provider/mod.rs98
-rw-r--r--vendor/icu_locid_transform/tests/fixtures/canonicalize.json444
-rw-r--r--vendor/icu_locid_transform/tests/fixtures/maximize.json182
-rw-r--r--vendor/icu_locid_transform/tests/fixtures/minimize.json26
-rw-r--r--vendor/icu_locid_transform/tests/fixtures/mod.rs12
-rw-r--r--vendor/icu_locid_transform/tests/helpers/mod.rs15
-rw-r--r--vendor/icu_locid_transform/tests/locale_canonicalizer.rs82
26 files changed, 4315 insertions, 0 deletions
diff --git a/vendor/icu_locid_transform/.cargo-checksum.json b/vendor/icu_locid_transform/.cargo-checksum.json
new file mode 100644
index 000000000..ff2371e3f
--- /dev/null
+++ b/vendor/icu_locid_transform/.cargo-checksum.json
@@ -0,0 +1 @@
+{"files":{"Cargo.toml":"334f54b489b88e4808da4fa355ddf773b86971570d4bb0360a876e3437cb962d","LICENSE":"853f87c96f3d249f200fec6db1114427bc8bdf4afddc93c576956d78152ce978","README.md":"3ff3f2e2f9e5d4c5786132838576edef42a12c5529d5f080370f24aa6246bb92","benches/fixtures/locales.json":"9846601a29874baf140cac1252d4624fadc30182fec106d17f008ece886b9185","benches/fixtures/uncanonicalized-locales.json":"a866ed318b92f79d8853567e79b373c02984967023f5f39161140544e71b0c72","benches/helpers/mod.rs":"d3bf59e7eed6230f340bef6c87a7b8de3a387ec391f60afc1b15a0d001cbfb67","benches/locale_canonicalizer.rs":"477af27aa35385d107e19b8e8a0452466a69d20e147a63631b78634c17712fed","src/canonicalizer.rs":"7770767fad20f38aaae19382430451573293ccdeac587b2d063163b870781086","src/directionality.rs":"a031a9d55ffe827c86400637b7302dc424c708dcc52ea667504a33a16db822c2","src/error.rs":"486fda8a0e9b7bb5822bbb0defb51145364d6053b8d60b88ef71e4b2bcd6699d","src/expander.rs":"d3ef487a416425ea6fb2ce7bf08b7487e180a580002e54ce30d5524cfd7514e2","src/fallback/algorithms.rs":"47625130cd5a04cf085dd0494591e117ba204a9d2eb649788b0ff96773cc6e21","src/fallback/mod.rs":"71ca2f23e410863010a62a48bba8a943763f7d49c12bf80b451a1b9295484e44","src/lib.rs":"5390facdc3df7e5ec5ab842bf59d4d13383d77d93a722685231a1d271cfba944","src/provider/canonicalizer.rs":"f848dbbc906b5f3be0b6384f5a2f26178898822a5c37334a57b12db8e1af0ed9","src/provider/directionality.rs":"fc516f501254af444cfa010d3c87aeea032dd6eccf5f82301c050ed3df2e05b1","src/provider/expander.rs":"6903d16138ada8216e0341d984126dcc1f6fac21468144e8140fc217b164572e","src/provider/fallback.rs":"d567e3d49261cac9de35825b3d57204d49068558f10579121f0bf0c42090c9cc","src/provider/mod.rs":"ce8e29eda7128747d489371118d1cf2c0e2740662eb8c6a55310dff86c5641cc","tests/fixtures/canonicalize.json":"3dc2f661b04e4c9ecced70fc1b98a504eb5f5a0067b38665b10e50c25174bc4a","tests/fixtures/maximize.json":"8137359060218572bcaf5e56825346fdcb600e2189378ef4be836ba0a7295b66","tests/fixtures/minimize.json":"3bb6f19c5525818212388dcbf778064e7f73d2c32a8a7e8c58d618583a77121a","tests/fixtures/mod.rs":"18a900aa4f74120b7e7e64fcb09eae38a16504d66e23f752e743dcd9b1ad6530","tests/helpers/mod.rs":"d3bf59e7eed6230f340bef6c87a7b8de3a387ec391f60afc1b15a0d001cbfb67","tests/locale_canonicalizer.rs":"1ebf7320f422b65cc3cc50468abdc8f08128feba85d936f5beb456b0b052a91d"},"package":"6551daf80882d8e68eee186cc19e132d8bde1b1f059a79b93384a5ca0e8fc5e7"} \ No newline at end of file
diff --git a/vendor/icu_locid_transform/Cargo.toml b/vendor/icu_locid_transform/Cargo.toml
new file mode 100644
index 000000000..b083c54a2
--- /dev/null
+++ b/vendor/icu_locid_transform/Cargo.toml
@@ -0,0 +1,128 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g., crates.io) dependencies.
+#
+# If you are reading this file be aware that the original Cargo.toml
+# will likely look very different (and much more reasonable).
+# See Cargo.toml.orig for the original contents.
+
+[package]
+edition = "2021"
+rust-version = "1.66"
+name = "icu_locid_transform"
+version = "1.3.2"
+authors = ["The ICU4X Project Developers"]
+include = [
+ "data/**/*",
+ "src/**/*",
+ "examples/**/*",
+ "benches/**/*",
+ "tests/**/*",
+ "Cargo.toml",
+ "LICENSE",
+ "README.md",
+]
+description = "API for Unicode Language and Locale Identifiers canonicalization"
+homepage = "https://icu4x.unicode.org"
+readme = "README.md"
+categories = ["internationalization"]
+license-file = "LICENSE"
+repository = "https://github.com/unicode-org/icu4x"
+
+[package.metadata.cargo-all-features]
+denylist = ["bench"]
+skip_optional_dependencies = true
+
+[package.metadata.docs.rs]
+all-features = true
+
+[lib]
+bench = false
+
+[[test]]
+name = "locale_canonicalizer"
+required-features = ["serde"]
+
+[[bench]]
+name = "locale_canonicalizer"
+harness = false
+
+[dependencies.databake]
+version = "0.1.6"
+features = ["derive"]
+optional = true
+default-features = false
+
+[dependencies.displaydoc]
+version = "0.2.3"
+default-features = false
+
+[dependencies.icu_locid]
+version = "~1.3.2"
+features = ["zerovec"]
+default-features = false
+
+[dependencies.icu_locid_transform_data]
+version = "~1.3.2"
+optional = true
+default-features = false
+
+[dependencies.icu_provider]
+version = "~1.3.2"
+features = ["macros"]
+default-features = false
+
+[dependencies.serde]
+version = "1.0"
+features = [
+ "derive",
+ "alloc",
+]
+optional = true
+default-features = false
+
+[dependencies.tinystr]
+version = "0.7.3"
+features = [
+ "alloc",
+ "zerovec",
+]
+default-features = false
+
+[dependencies.zerovec]
+version = "0.10.0"
+features = ["yoke"]
+default-features = false
+
+[dev-dependencies.serde]
+version = "1.0"
+features = ["derive"]
+
+[dev-dependencies.serde_json]
+version = "1.0"
+
+[features]
+bench = ["serde"]
+compiled_data = ["dep:icu_locid_transform_data"]
+datagen = [
+ "serde",
+ "dep:databake",
+ "zerovec/databake",
+ "icu_locid/databake",
+ "tinystr/databake",
+]
+default = ["compiled_data"]
+serde = [
+ "dep:serde",
+ "icu_locid/serde",
+ "tinystr/serde",
+ "zerovec/serde",
+ "icu_provider/serde",
+]
+std = []
+
+[target."cfg(not(target_arch = \"wasm32\"))".dev-dependencies.criterion]
+version = "0.4"
diff --git a/vendor/icu_locid_transform/LICENSE b/vendor/icu_locid_transform/LICENSE
new file mode 100644
index 000000000..9845aa5f4
--- /dev/null
+++ b/vendor/icu_locid_transform/LICENSE
@@ -0,0 +1,44 @@
+UNICODE LICENSE V3
+
+COPYRIGHT AND PERMISSION NOTICE
+
+Copyright © 2020-2023 Unicode, Inc.
+
+NOTICE TO USER: Carefully read the following legal agreement. BY
+DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING DATA FILES, AND/OR
+SOFTWARE, YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
+TERMS AND CONDITIONS OF THIS AGREEMENT. IF YOU DO NOT AGREE, DO NOT
+DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE THE DATA FILES OR SOFTWARE.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of data files and any associated documentation (the "Data Files") or
+software and any associated documentation (the "Software") to deal in the
+Data Files or Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, and/or sell
+copies of the Data Files or Software, and to permit persons to whom the
+Data Files or Software are furnished to do so, provided that either (a)
+this copyright and permission notice appear with all copies of the Data
+Files or Software, or (b) this copyright and permission notice appear in
+associated Documentation.
+
+THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
+KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF
+THIRD PARTY RIGHTS.
+
+IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE
+BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES,
+OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THE DATA
+FILES OR SOFTWARE.
+
+Except as contained in this notice, the name of a copyright holder shall
+not be used in advertising or otherwise to promote the sale, use or other
+dealings in these Data Files or Software without prior written
+authorization of the copyright holder.
+
+—
+
+Portions of ICU4X may have been adapted from ICU4C and/or ICU4J.
+ICU 1.8.1 to ICU 57.1 © 1995-2016 International Business Machines Corporation and others.
diff --git a/vendor/icu_locid_transform/README.md b/vendor/icu_locid_transform/README.md
new file mode 100644
index 000000000..b97fea239
--- /dev/null
+++ b/vendor/icu_locid_transform/README.md
@@ -0,0 +1,77 @@
+# icu_locid_transform [![crates.io](https://img.shields.io/crates/v/icu_locid_transform)](https://crates.io/crates/icu_locid_transform)
+
+<!-- cargo-rdme start -->
+
+Canonicalization of locale identifiers based on [`CLDR`] data.
+
+This module is published as its own crate ([`icu_locid_transform`](https://docs.rs/icu_locid_transform/latest/icu_locid_transform/))
+and as part of the [`icu`](https://docs.rs/icu/latest/icu/) crate. See the latter for more details on the ICU4X project.
+
+It currently supports locale canonicalization based upon the canonicalization
+algorithm from [`UTS #35: Unicode LDML 3. LocaleId Canonicalization`],
+as well as the minimize and maximize likely subtags algorithms
+as described in [`UTS #35: Unicode LDML 3. Likely Subtags`].
+
+The maximize method potentially updates a passed in locale in place
+depending up the results of running the 'Add Likely Subtags' algorithm
+from [`UTS #35: Unicode LDML 3. Likely Subtags`].
+
+This minimize method returns a new Locale that is the result of running the
+'Remove Likely Subtags' algorithm from [`UTS #35: Unicode LDML 3. Likely Subtags`].
+
+## Examples
+
+```rust
+use icu::locid::Locale;
+use icu::locid_transform::{LocaleCanonicalizer, TransformResult};
+
+let lc = LocaleCanonicalizer::new();
+
+let mut locale: Locale = "ja-Latn-fonipa-hepburn-heploc"
+ .parse()
+ .expect("parse failed");
+assert_eq!(lc.canonicalize(&mut locale), TransformResult::Modified);
+assert_eq!(locale, "ja-Latn-alalc97-fonipa".parse::<Locale>().unwrap());
+```
+
+```rust
+use icu::locid::locale;
+use icu::locid_transform::{LocaleExpander, TransformResult};
+
+let lc = LocaleExpander::new();
+
+let mut locale = locale!("zh-CN");
+assert_eq!(lc.maximize(&mut locale), TransformResult::Modified);
+assert_eq!(locale, locale!("zh-Hans-CN"));
+
+let mut locale = locale!("zh-Hant-TW");
+assert_eq!(lc.maximize(&mut locale), TransformResult::Unmodified);
+assert_eq!(locale, locale!("zh-Hant-TW"));
+```
+
+```rust
+use icu::locid::locale;
+use icu::locid_transform::{LocaleExpander, TransformResult};
+use writeable::assert_writeable_eq;
+
+let lc = LocaleExpander::new();
+
+let mut locale = locale!("zh-Hans-CN");
+assert_eq!(lc.minimize(&mut locale), TransformResult::Modified);
+assert_eq!(locale, locale!("zh"));
+
+let mut locale = locale!("zh");
+assert_eq!(lc.minimize(&mut locale), TransformResult::Unmodified);
+assert_eq!(locale, locale!("zh"));
+```
+
+[`ICU4X`]: ../icu/index.html
+[`CLDR`]: http://cldr.unicode.org/
+[`UTS #35: Unicode LDML 3. Likely Subtags`]: https://www.unicode.org/reports/tr35/#Likely_Subtags.
+[`UTS #35: Unicode LDML 3. LocaleId Canonicalization`]: http://unicode.org/reports/tr35/#LocaleId_Canonicalization,
+
+<!-- cargo-rdme end -->
+
+## More Information
+
+For more information on development, authorship, contributing etc. please visit [`ICU4X home page`](https://github.com/unicode-org/icu4x).
diff --git a/vendor/icu_locid_transform/benches/fixtures/locales.json b/vendor/icu_locid_transform/benches/fixtures/locales.json
new file mode 100644
index 000000000..0e8ba8b79
--- /dev/null
+++ b/vendor/icu_locid_transform/benches/fixtures/locales.json
@@ -0,0 +1,41 @@
+[
+ "en-US",
+ "en-GB",
+ "es-AR",
+ "it",
+ "zh-Hans-CN",
+ "de-AT",
+ "pl",
+ "fr-FR",
+ "de-AT",
+ "sr-Cyrl-SR",
+ "nb-NO",
+ "fr-FR",
+ "mk",
+ "uk",
+ "und-PL",
+ "und-Latn-AM",
+ "ug-Cyrl",
+ "sr-ME",
+ "mn-Mong",
+ "lif-Limb",
+ "gan",
+ "zh-Hant",
+ "yue-Hans",
+ "unr",
+ "unr-Deva",
+ "und-Thai-CN",
+ "ug-Cyrl",
+ "en-Latn-DE",
+ "pl-FR",
+ "de-CH",
+ "tuq",
+ "sr-ME",
+ "ng",
+ "klx",
+ "kk-Arab",
+ "en-Cyrl",
+ "und-Cyrl-UK",
+ "und-Arab",
+ "und-Arab-FO"
+]
diff --git a/vendor/icu_locid_transform/benches/fixtures/uncanonicalized-locales.json b/vendor/icu_locid_transform/benches/fixtures/uncanonicalized-locales.json
new file mode 100644
index 000000000..18eadbce6
--- /dev/null
+++ b/vendor/icu_locid_transform/benches/fixtures/uncanonicalized-locales.json
@@ -0,0 +1,88 @@
+[
+ "cka",
+ "cze",
+ "gfx",
+ "sgn-BR",
+ "sgn-DD",
+ "tam",
+ "und-aaland",
+ "nob-bokmal",
+ "no-nynorsk",
+ "und-Qaai",
+ "en-554",
+ "en-084",
+ "art-lojban",
+ "zh-guoyu",
+ "zh-hakka",
+ "zh-xiang",
+ "aar-x-private",
+ "heb-x-private",
+ "ces",
+ "hy-arevela",
+ "hy-arevmda",
+ "cel-gaulish",
+ "ja-latn-hepburn-heploc",
+ "ja-Latn-fonipa-hepburn-heploc",
+ "und-Armn-SU",
+ "sh",
+ "sh-Cyrl",
+ "cnr",
+ "cnr-BA",
+ "ru-SU",
+ "ru-810",
+ "en-SU",
+ "en-810",
+ "und-SU",
+ "und-810",
+ "und-Latn-SU",
+ "und-Latn-810",
+ "hy-SU",
+ "hy-810",
+ "und-Armn-SU",
+ "und-Armn-810",
+ "sr-CS",
+ "sr-Latn-CS",
+ "sr-Cyrl-CS",
+ "az-NT",
+ "sl-t-sl-rozaj-biske-1994",
+ "DE-T-M0-DIN-K0-QWERTZ",
+ "en-t-m0-true",
+ "en-t-iw",
+ "und-u-rg-no23",
+ "und-u-rg-cn11",
+ "und-u-rg-cz10a",
+ "und-u-rg-fra",
+ "und-u-rg-frg",
+ "und-u-rg-lud",
+ "und-NO-u-rg-no23",
+ "und-CN-u-rg-cn11",
+ "und-CZ-u-rg-cz10a",
+ "und-FR-u-rg-fra",
+ "und-FR-u-rg-frg",
+ "und-u-rg-lud",
+ "und-u-sd-no23",
+ "und-u-sd-cn11",
+ "und-u-sd-cz10a",
+ "und-u-sd-fra",
+ "hy-arevela",
+ "hy-Armn-arevela",
+ "hy-AM-arevela",
+ "hy-arevela-fonipa",
+ "hy-fonipa-arevela",
+ "hy-arevmda",
+ "hy-Armn-arevmda",
+ "hy-AM-arevmda",
+ "hy-arevmda-fonipa",
+ "hy-fonipa-arevmda",
+ "ja-Latn-hepburn-heploc",
+ "ja-Latn-JP-hepburn-heploc",
+ "sv-aaland",
+ "el-polytoni",
+ "ja-Latn-alalc97-hepburn-heploc",
+ "ja-Latn-hepburn-alalc97-heploc",
+ "ja-Latn-hepburn-heploc-alalc97",
+ "ja-Latn-heploc-hepburn",
+ "ja-Latn-heploc",
+ "ja-Latn-aaland-heploc",
+ "ja-Latn-heploc-polytoni"
+]
diff --git a/vendor/icu_locid_transform/benches/helpers/mod.rs b/vendor/icu_locid_transform/benches/helpers/mod.rs
new file mode 100644
index 000000000..d250c510c
--- /dev/null
+++ b/vendor/icu_locid_transform/benches/helpers/mod.rs
@@ -0,0 +1,15 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use std::fs::File;
+use std::io::{BufReader, Error};
+
+pub fn read_fixture<T>(path: &str) -> Result<T, Error>
+where
+ T: serde::de::DeserializeOwned,
+{
+ let file = File::open(path)?;
+ let reader = BufReader::new(file);
+ Ok(serde_json::from_reader(reader)?)
+}
diff --git a/vendor/icu_locid_transform/benches/locale_canonicalizer.rs b/vendor/icu_locid_transform/benches/locale_canonicalizer.rs
new file mode 100644
index 000000000..1ea8df6b3
--- /dev/null
+++ b/vendor/icu_locid_transform/benches/locale_canonicalizer.rs
@@ -0,0 +1,99 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+mod helpers;
+
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use icu_locid::Locale;
+use icu_locid_transform::LocaleCanonicalizer;
+use icu_locid_transform::LocaleExpander;
+
+fn canonicalize_bench(c: &mut Criterion) {
+ let lc = LocaleCanonicalizer::new();
+
+ let mut group = c.benchmark_group("uncanonicalized");
+
+ let path = "./benches/fixtures/uncanonicalized-locales.json";
+ let data: Vec<String> = helpers::read_fixture(path).expect("Failed to read a fixture");
+ let locales: Vec<Locale> = data.iter().map(|s| s.parse().unwrap()).collect();
+
+ group.bench_function("clone", |b| {
+ b.iter(|| {
+ for locale in &locales {
+ let _ = black_box(locale).clone();
+ }
+ })
+ });
+
+ group.bench_function("canonicalize", |b| {
+ b.iter(|| {
+ for locale in &locales {
+ let mut locale = black_box(locale).clone();
+ lc.canonicalize(&mut locale);
+ }
+ })
+ });
+
+ group.finish();
+}
+
+fn canonicalize_noop_bench(c: &mut Criterion) {
+ let lc = LocaleCanonicalizer::new();
+
+ let mut group = c.benchmark_group("canonicalized");
+
+ // None of these locales require canonicalization, so this measures the cost of calling
+ // the canonicalizer on locales that will not be modified.
+ let path = "./benches/fixtures/locales.json";
+ let data: Vec<String> = helpers::read_fixture(path).expect("Failed to read a fixture");
+ let locales: Vec<Locale> = data.iter().map(|s| s.parse().unwrap()).collect();
+
+ group.bench_function("clone", |b| {
+ b.iter(|| {
+ for locale in &locales {
+ let _ = black_box(locale).clone();
+ }
+ })
+ });
+
+ group.bench_function("canonicalize", |b| {
+ b.iter(|| {
+ for locale in &locales {
+ let mut locale = black_box(locale).clone();
+ lc.canonicalize(&mut locale);
+ }
+ })
+ });
+
+ group.finish();
+}
+
+fn maximize_bench(c: &mut Criterion) {
+ let lc = LocaleExpander::new();
+
+ let mut group = c.benchmark_group("likelysubtags");
+
+ let path = "./benches/fixtures/locales.json";
+ let data: Vec<String> = helpers::read_fixture(path).expect("Failed to read a fixture");
+ let locales: Vec<Locale> = data.iter().map(|s| s.parse().unwrap()).collect();
+
+ group.bench_function("maximize", |b| {
+ b.iter(|| {
+ for locale in &locales {
+ let mut locale = locale.clone();
+ lc.maximize(black_box(&mut locale));
+ }
+ })
+ });
+
+ group.finish();
+}
+
+criterion_group!(
+ benches,
+ canonicalize_bench,
+ canonicalize_noop_bench,
+ maximize_bench
+);
+criterion_main!(benches);
diff --git a/vendor/icu_locid_transform/src/canonicalizer.rs b/vendor/icu_locid_transform/src/canonicalizer.rs
new file mode 100644
index 000000000..5a3782638
--- /dev/null
+++ b/vendor/icu_locid_transform/src/canonicalizer.rs
@@ -0,0 +1,618 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! The collection of code for locale canonicalization.
+
+use crate::provider::*;
+use crate::LocaleTransformError;
+use alloc::vec::Vec;
+use core::cmp::Ordering;
+
+use crate::LocaleExpander;
+use crate::TransformResult;
+use icu_locid::subtags::{Language, Region, Script};
+use icu_locid::{
+ extensions::unicode::key,
+ subtags::{language, Variant, Variants},
+ LanguageIdentifier, Locale,
+};
+use icu_provider::prelude::*;
+use tinystr::TinyAsciiStr;
+
+/// Implements the algorithm defined in *[UTS #35: Annex C, LocaleId Canonicalization]*.
+///
+/// # Examples
+///
+/// ```
+/// use icu_locid::Locale;
+/// use icu_locid_transform::{LocaleCanonicalizer, TransformResult};
+///
+/// let lc = LocaleCanonicalizer::new();
+///
+/// let mut locale: Locale = "ja-Latn-fonipa-hepburn-heploc".parse().unwrap();
+/// assert_eq!(lc.canonicalize(&mut locale), TransformResult::Modified);
+/// assert_eq!(locale, "ja-Latn-alalc97-fonipa".parse().unwrap());
+/// ```
+///
+/// [UTS #35: Annex C, LocaleId Canonicalization]: http://unicode.org/reports/tr35/#LocaleId_Canonicalization
+#[derive(Debug)]
+pub struct LocaleCanonicalizer {
+ /// Data to support canonicalization.
+ aliases: DataPayload<AliasesV1Marker>,
+ /// Likely subtags implementation for delegation.
+ expander: LocaleExpander,
+}
+
+#[inline]
+fn uts35_rule_matches<'a, I>(
+ source: &Locale,
+ language: Language,
+ script: Option<Script>,
+ region: Option<Region>,
+ raw_variants: I,
+) -> bool
+where
+ I: Iterator<Item = &'a str>,
+{
+ (language.is_empty() || language == source.id.language)
+ && (script.is_none() || script == source.id.script)
+ && (region.is_none() || region == source.id.region)
+ && {
+ // Checks if variants are a subset of source variants.
+ // As both iterators are sorted, this can be done linearly.
+ let mut source_variants = source.id.variants.iter();
+ 'outer: for it in raw_variants {
+ for cand in source_variants.by_ref() {
+ match cand.strict_cmp(it.as_bytes()) {
+ Ordering::Equal => {
+ continue 'outer;
+ }
+ Ordering::Less => {}
+ _ => {
+ return false;
+ }
+ }
+ }
+ return false;
+ }
+ true
+ }
+}
+
+fn uts35_replacement<'a, I>(
+ source: &mut Locale,
+ ruletype_has_language: bool,
+ ruletype_has_script: bool,
+ ruletype_has_region: bool,
+ ruletype_variants: Option<I>,
+ replacement: &LanguageIdentifier,
+) where
+ I: Iterator<Item = &'a str>,
+{
+ if ruletype_has_language || (source.id.language.is_empty() && !replacement.language.is_empty())
+ {
+ source.id.language = replacement.language;
+ }
+ if ruletype_has_script || (source.id.script.is_none() && replacement.script.is_some()) {
+ source.id.script = replacement.script;
+ }
+ if ruletype_has_region || (source.id.region.is_none() && replacement.region.is_some()) {
+ source.id.region = replacement.region;
+ }
+ if let Some(skips) = ruletype_variants {
+ // The rule matches if the ruletype variants are a subset of the source variants.
+ // This means ja-Latn-fonipa-hepburn-heploc matches against the rule for
+ // hepburn-heploc and is canonicalized to ja-Latn-alalc97-fonipa
+
+ // We're merging three sorted deduped iterators into a new sequence:
+ // sources - skips + replacements
+
+ let mut sources = source.id.variants.iter().copied().peekable();
+ let mut replacements = replacement.variants.iter().copied().peekable();
+ let mut skips = skips.peekable();
+
+ let mut variants: Vec<Variant> = Vec::new();
+
+ loop {
+ match (sources.peek(), skips.peek(), replacements.peek()) {
+ (Some(&source), Some(skip), _)
+ if source.strict_cmp(skip.as_bytes()) == Ordering::Greater =>
+ {
+ skips.next();
+ }
+ (Some(&source), Some(skip), _)
+ if source.strict_cmp(skip.as_bytes()) == Ordering::Equal =>
+ {
+ skips.next();
+ sources.next();
+ }
+ (Some(&source), _, Some(&replacement))
+ if replacement.cmp(&source) == Ordering::Less =>
+ {
+ variants.push(replacement);
+ replacements.next();
+ }
+ (Some(&source), _, Some(&replacement))
+ if replacement.cmp(&source) == Ordering::Equal =>
+ {
+ variants.push(source);
+ sources.next();
+ replacements.next();
+ }
+ (Some(&source), _, _) => {
+ variants.push(source);
+ sources.next();
+ }
+ (None, _, Some(&replacement)) => {
+ variants.push(replacement);
+ replacements.next();
+ }
+ (None, _, None) => {
+ break;
+ }
+ }
+ }
+ source.id.variants = Variants::from_vec_unchecked(variants);
+ }
+}
+
+#[inline]
+fn uts35_check_language_rules(
+ locale: &mut Locale,
+ alias_data: &DataPayload<AliasesV1Marker>,
+) -> TransformResult {
+ if !locale.id.language.is_empty() {
+ let lang: TinyAsciiStr<3> = locale.id.language.into();
+ let replacement = if lang.len() == 2 {
+ alias_data
+ .get()
+ .language_len2
+ .get(&lang.resize().to_unvalidated())
+ } else {
+ alias_data.get().language_len3.get(&lang.to_unvalidated())
+ };
+
+ if let Some(replacement) = replacement {
+ if let Ok(langid) = replacement.parse() {
+ uts35_replacement::<core::iter::Empty<&str>>(
+ locale, true, false, false, None, &langid,
+ );
+ return TransformResult::Modified;
+ }
+ }
+ }
+
+ TransformResult::Unmodified
+}
+
+fn is_iter_sorted<I, T>(mut iter: I) -> bool
+where
+ I: Iterator<Item = T>,
+ T: PartialOrd,
+{
+ if let Some(mut last) = iter.next() {
+ for curr in iter {
+ if last > curr {
+ return false;
+ }
+ last = curr;
+ }
+ }
+ true
+}
+
+#[cfg(feature = "compiled_data")]
+impl Default for LocaleCanonicalizer {
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+impl LocaleCanonicalizer {
+ /// A constructor which creates a [`LocaleCanonicalizer`] from compiled data.
+ ///
+ /// ✨ *Enabled with the `compiled_data` Cargo feature.*
+ ///
+ /// [📚 Help choosing a constructor](icu_provider::constructors)
+ #[cfg(feature = "compiled_data")]
+ pub const fn new() -> Self {
+ Self::new_with_expander(LocaleExpander::new_extended())
+ }
+
+ // Note: This is a custom impl because the bounds on LocaleExpander::try_new_unstable changed
+ #[doc = icu_provider::gen_any_buffer_unstable_docs!(ANY, Self::new)]
+ pub fn try_new_with_any_provider(
+ provider: &(impl AnyProvider + ?Sized),
+ ) -> Result<LocaleCanonicalizer, LocaleTransformError> {
+ let expander = LocaleExpander::try_new_with_any_provider(provider)?;
+ Self::try_new_with_expander_unstable(&provider.as_downcasting(), expander)
+ }
+
+ // Note: This is a custom impl because the bounds on LocaleExpander::try_new_unstable changed
+ #[doc = icu_provider::gen_any_buffer_unstable_docs!(BUFFER, Self::new)]
+ #[cfg(feature = "serde")]
+ pub fn try_new_with_buffer_provider(
+ provider: &(impl BufferProvider + ?Sized),
+ ) -> Result<LocaleCanonicalizer, LocaleTransformError> {
+ let expander = LocaleExpander::try_new_with_buffer_provider(provider)?;
+ Self::try_new_with_expander_unstable(&provider.as_deserializing(), expander)
+ }
+
+ #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new)]
+ pub fn try_new_unstable<P>(provider: &P) -> Result<LocaleCanonicalizer, LocaleTransformError>
+ where
+ P: DataProvider<AliasesV1Marker>
+ + DataProvider<LikelySubtagsForLanguageV1Marker>
+ + DataProvider<LikelySubtagsForScriptRegionV1Marker>
+ + ?Sized,
+ {
+ let expander = LocaleExpander::try_new_unstable(provider)?;
+ Self::try_new_with_expander_unstable(provider, expander)
+ }
+
+ /// Creates a [`LocaleCanonicalizer`] with a custom [`LocaleExpander`] and compiled data.
+ ///
+ /// ✨ *Enabled with the `compiled_data` Cargo feature.*
+ ///
+ /// [📚 Help choosing a constructor](icu_provider::constructors)
+ #[cfg(feature = "compiled_data")]
+ pub const fn new_with_expander(expander: LocaleExpander) -> Self {
+ Self {
+ aliases: DataPayload::from_static_ref(
+ crate::provider::Baked::SINGLETON_LOCID_TRANSFORM_ALIASES_V1,
+ ),
+ expander,
+ }
+ }
+
+ #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_with_expander)]
+ pub fn try_new_with_expander_unstable<P>(
+ provider: &P,
+ expander: LocaleExpander,
+ ) -> Result<LocaleCanonicalizer, LocaleTransformError>
+ where
+ P: DataProvider<AliasesV1Marker> + ?Sized,
+ {
+ let aliases: DataPayload<AliasesV1Marker> =
+ provider.load(Default::default())?.take_payload()?;
+
+ Ok(LocaleCanonicalizer { aliases, expander })
+ }
+
+ icu_provider::gen_any_buffer_data_constructors!(
+ locale: skip,
+ options: LocaleExpander,
+ error: LocaleTransformError,
+ #[cfg(skip)]
+ functions: [
+ new_with_expander,
+ try_new_with_expander_with_any_provider,
+ try_new_with_expander_with_buffer_provider,
+ try_new_with_expander_unstable,
+ Self,
+ ]
+ );
+
+ /// The canonicalize method potentially updates a passed in locale in place
+ /// depending up the results of running the canonicalization algorithm
+ /// from <http://unicode.org/reports/tr35/#LocaleId_Canonicalization>.
+ ///
+ /// Some BCP47 canonicalization data is not part of the CLDR json package. Because
+ /// of this, some canonicalizations are not performed, e.g. the canonicalization of
+ /// `und-u-ca-islamicc` to `und-u-ca-islamic-civil`. This will be fixed in a future
+ /// release once the missing data has been added to the CLDR json data. See:
+ /// <https://github.com/unicode-org/icu4x/issues/746>
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu_locid::Locale;
+ /// use icu_locid_transform::{LocaleCanonicalizer, TransformResult};
+ ///
+ /// let lc = LocaleCanonicalizer::new();
+ ///
+ /// let mut locale: Locale = "ja-Latn-fonipa-hepburn-heploc".parse().unwrap();
+ /// assert_eq!(lc.canonicalize(&mut locale), TransformResult::Modified);
+ /// assert_eq!(locale, "ja-Latn-alalc97-fonipa".parse().unwrap());
+ /// ```
+ pub fn canonicalize(&self, locale: &mut Locale) -> TransformResult {
+ let mut result = TransformResult::Unmodified;
+
+ // This loops until we get a 'fixed point', where applying the rules do not
+ // result in any more changes.
+ 'outer: loop {
+ // These are linear searches due to the ordering imposed by the canonicalization
+ // rules, where rules with more variants should be considered first. With the
+ // current data in CLDR, we will only do this for locales which have variants,
+ // or new rules which we haven't special-cased yet (of which there are fewer
+ // than 20).
+ if !locale.id.variants.is_empty() {
+ // These language/variant comibnations have around 20 rules
+ for StrStrPair(raw_lang_variants, raw_to) in self
+ .aliases
+ .get()
+ .language_variants
+ .iter()
+ .map(zerofrom::ZeroFrom::zero_from)
+ {
+ let (raw_lang, raw_variants) = {
+ let mut subtags = raw_lang_variants.split('-');
+ (
+ // str::split can't return empty iterators
+ unsafe { subtags.next().unwrap_unchecked() },
+ subtags,
+ )
+ };
+ if is_iter_sorted(raw_variants.clone()) {
+ if let Ok(lang) = raw_lang.parse::<Language>() {
+ if uts35_rule_matches(locale, lang, None, None, raw_variants.clone()) {
+ if let Ok(to) = raw_to.parse() {
+ uts35_replacement(
+ locale,
+ !lang.is_empty(),
+ false,
+ false,
+ Some(raw_variants),
+ &to,
+ );
+ result = TransformResult::Modified;
+ continue 'outer;
+ }
+ }
+ }
+ }
+ }
+ } else {
+ // These are absolute fallbacks, and currently empty.
+ for StrStrPair(raw_from, raw_to) in self
+ .aliases
+ .get()
+ .language
+ .iter()
+ .map(zerofrom::ZeroFrom::zero_from)
+ {
+ if let Ok(from) = raw_from.parse::<LanguageIdentifier>() {
+ if uts35_rule_matches(
+ locale,
+ from.language,
+ from.script,
+ from.region,
+ from.variants.iter().map(Variant::as_str),
+ ) {
+ if let Ok(to) = raw_to.parse() {
+ uts35_replacement(
+ locale,
+ !from.language.is_empty(),
+ from.script.is_some(),
+ from.region.is_some(),
+ Some(from.variants.iter().map(Variant::as_str)),
+ &to,
+ );
+ result = TransformResult::Modified;
+ continue 'outer;
+ }
+ }
+ }
+ }
+ }
+
+ if !locale.id.language.is_empty() {
+ // If the region is specified, check sgn-region rules first
+ if let Some(region) = locale.id.region {
+ if locale.id.language == language!("sgn") {
+ if let Some(&sgn_lang) = self
+ .aliases
+ .get()
+ .sgn_region
+ .get(&region.into_tinystr().to_unvalidated())
+ {
+ uts35_replacement::<core::iter::Empty<&str>>(
+ locale,
+ true,
+ false,
+ true,
+ None,
+ &sgn_lang.into(),
+ );
+ result = TransformResult::Modified;
+ continue;
+ }
+ }
+ }
+
+ if uts35_check_language_rules(locale, &self.aliases) == TransformResult::Modified {
+ result = TransformResult::Modified;
+ continue;
+ }
+ }
+
+ if let Some(script) = locale.id.script {
+ if let Some(&replacement) = self
+ .aliases
+ .get()
+ .script
+ .get(&script.into_tinystr().to_unvalidated())
+ {
+ locale.id.script = Some(replacement);
+ result = TransformResult::Modified;
+ continue;
+ }
+ }
+
+ if let Some(region) = locale.id.region {
+ let replacement = if region.is_alphabetic() {
+ self.aliases
+ .get()
+ .region_alpha
+ .get(&region.into_tinystr().resize().to_unvalidated())
+ } else {
+ self.aliases
+ .get()
+ .region_num
+ .get(&region.into_tinystr().to_unvalidated())
+ };
+ if let Some(&replacement) = replacement {
+ locale.id.region = Some(replacement);
+ result = TransformResult::Modified;
+ continue;
+ }
+
+ if let Some(regions) = self
+ .aliases
+ .get()
+ .complex_region
+ .get(&region.into_tinystr().to_unvalidated())
+ {
+ // Skip if regions are empty
+ if let Some(default_region) = regions.get(0) {
+ let mut maximized = LanguageIdentifier {
+ language: locale.id.language,
+ script: locale.id.script,
+ region: None,
+ variants: Variants::default(),
+ };
+
+ locale.id.region = Some(
+ match (self.expander.maximize(&mut maximized), maximized.region) {
+ (TransformResult::Modified, Some(candidate))
+ if regions.iter().any(|x| x == candidate) =>
+ {
+ candidate
+ }
+ _ => default_region,
+ },
+ );
+ result = TransformResult::Modified;
+ continue;
+ }
+ }
+ }
+
+ if !locale.id.variants.is_empty() {
+ let mut modified = Vec::new();
+ let mut unmodified = Vec::new();
+ for &variant in locale.id.variants.iter() {
+ if let Some(&updated) = self
+ .aliases
+ .get()
+ .variant
+ .get(&variant.into_tinystr().to_unvalidated())
+ {
+ modified.push(updated);
+ } else {
+ unmodified.push(variant);
+ }
+ }
+
+ if !modified.is_empty() {
+ modified.extend(unmodified);
+ modified.sort();
+ modified.dedup();
+ locale.id.variants = Variants::from_vec_unchecked(modified);
+ result = TransformResult::Modified;
+ continue;
+ }
+ }
+
+ // Nothing matched in this iteration, we're done.
+ break;
+ }
+
+ // Handle Locale extensions in their own loops, because these rules do not interact
+ // with each other.
+ if let Some(lang) = &locale.extensions.transform.lang {
+ let mut tlang: Locale = lang.clone().into();
+ let mut matched = false;
+ loop {
+ if uts35_check_language_rules(&mut tlang, &self.aliases)
+ == TransformResult::Modified
+ {
+ result = TransformResult::Modified;
+ matched = true;
+ continue;
+ }
+
+ break;
+ }
+
+ if matched {
+ locale.extensions.transform.lang = Some(tlang.id);
+ }
+ }
+
+ // The `rg` region override and `sd` regional subdivision keys may contain
+ // language codes that require canonicalization.
+ for key in &[key!("rg"), key!("sd")] {
+ if let Some(value) = locale.extensions.unicode.keywords.get_mut(key) {
+ if let &[only_value] = value.as_tinystr_slice() {
+ if let Some(modified_value) = self
+ .aliases
+ .get()
+ .subdivision
+ .get(&only_value.resize().to_unvalidated())
+ {
+ if let Ok(modified_value) = modified_value.parse() {
+ *value = modified_value;
+ result = TransformResult::Modified;
+ }
+ }
+ }
+ }
+ }
+
+ result
+ }
+}
+
+#[test]
+fn test_uts35_rule_matches() {
+ for (source, rule, result) in [
+ ("ja", "und", true),
+ ("und-heploc-hepburn", "und-hepburn", true),
+ ("ja-heploc-hepburn", "und-hepburn", true),
+ ("ja-hepburn", "und-hepburn-heploc", false),
+ ] {
+ let source = source.parse().unwrap();
+ let rule = rule.parse::<LanguageIdentifier>().unwrap();
+ assert_eq!(
+ uts35_rule_matches(
+ &source,
+ rule.language,
+ rule.script,
+ rule.region,
+ rule.variants.iter().map(Variant::as_str),
+ ),
+ result,
+ "{source}"
+ );
+ }
+}
+
+#[test]
+fn test_uts35_replacement() {
+ for (locale, rule_0, rule_1, result) in [
+ (
+ "ja-Latn-fonipa-hepburn-heploc",
+ "und-hepburn-heploc",
+ "und-alalc97",
+ "ja-Latn-alalc97-fonipa",
+ ),
+ ("sgn-DD", "und-DD", "und-DE", "sgn-DE"),
+ ("sgn-DE", "sgn-DE", "gsg", "gsg"),
+ ] {
+ let mut locale = locale.parse().unwrap();
+ let rule_0 = rule_0.parse::<LanguageIdentifier>().unwrap();
+ let rule_1 = rule_1.parse().unwrap();
+ let result = result.parse::<Locale>().unwrap();
+ uts35_replacement(
+ &mut locale,
+ !rule_0.language.is_empty(),
+ rule_0.script.is_some(),
+ rule_0.region.is_some(),
+ Some(rule_0.variants.iter().map(Variant::as_str)),
+ &rule_1,
+ );
+ assert_eq!(result, locale);
+ }
+}
diff --git a/vendor/icu_locid_transform/src/directionality.rs b/vendor/icu_locid_transform/src/directionality.rs
new file mode 100644
index 000000000..8a6c243b8
--- /dev/null
+++ b/vendor/icu_locid_transform/src/directionality.rs
@@ -0,0 +1,231 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use crate::provider::*;
+use crate::{LocaleExpander, LocaleTransformError};
+use icu_locid::subtags::Script;
+use icu_locid::LanguageIdentifier;
+use icu_provider::prelude::*;
+
+/// Represents the direction of a script.
+///
+/// [`LocaleDirectionality`] can be used to get this information.
+#[derive(Debug, PartialEq, Eq, Clone, Copy)]
+#[non_exhaustive]
+pub enum Direction {
+ /// The script is left-to-right.
+ LeftToRight,
+ /// The script is right-to-left.
+ RightToLeft,
+}
+
+/// Provides methods to determine the direction of a locale.
+///
+/// # Examples
+///
+/// ```
+/// use icu_locid::locale;
+/// use icu_locid_transform::{Direction, LocaleDirectionality};
+///
+/// let ld = LocaleDirectionality::new();
+///
+/// assert_eq!(ld.get(&locale!("en")), Some(Direction::LeftToRight));
+/// ```
+#[derive(Debug)]
+pub struct LocaleDirectionality {
+ script_direction: DataPayload<ScriptDirectionV1Marker>,
+ expander: LocaleExpander,
+}
+
+impl LocaleDirectionality {
+ /// Creates a [`LocaleDirectionality`] from compiled data.
+ ///
+ /// This includes limited likely subtags data, see [`LocaleExpander::new()`].
+ #[cfg(feature = "compiled_data")]
+ pub const fn new() -> Self {
+ Self::new_with_expander(LocaleExpander::new())
+ }
+
+ // Note: This is a custom impl because the bounds on `try_new_unstable` don't suffice
+ #[doc = icu_provider::gen_any_buffer_unstable_docs!(ANY, Self::new)]
+ pub fn try_new_with_any_provider(
+ provider: &(impl AnyProvider + ?Sized),
+ ) -> Result<LocaleDirectionality, LocaleTransformError> {
+ let expander = LocaleExpander::try_new_with_any_provider(provider)?;
+ Self::try_new_with_expander_unstable(&provider.as_downcasting(), expander)
+ }
+
+ // Note: This is a custom impl because the bounds on `try_new_unstable` don't suffice
+ #[doc = icu_provider::gen_any_buffer_unstable_docs!(BUFFER, Self::new)]
+ #[cfg(feature = "serde")]
+ pub fn try_new_with_buffer_provider(
+ provider: &(impl BufferProvider + ?Sized),
+ ) -> Result<LocaleDirectionality, LocaleTransformError> {
+ let expander = LocaleExpander::try_new_with_buffer_provider(provider)?;
+ Self::try_new_with_expander_unstable(&provider.as_deserializing(), expander)
+ }
+
+ #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new)]
+ pub fn try_new_unstable<P>(provider: &P) -> Result<LocaleDirectionality, LocaleTransformError>
+ where
+ P: DataProvider<ScriptDirectionV1Marker>
+ + DataProvider<LikelySubtagsForLanguageV1Marker>
+ + DataProvider<LikelySubtagsForScriptRegionV1Marker>
+ + ?Sized,
+ {
+ let expander = LocaleExpander::try_new_unstable(provider)?;
+ Self::try_new_with_expander_unstable(provider, expander)
+ }
+
+ /// Creates a [`LocaleDirectionality`] with a custom [`LocaleExpander`] and compiled data.
+ ///
+ /// This allows using [`LocaleExpander::new_extended()`] with data for all locales.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu_locid::locale;
+ /// use icu_locid_transform::{
+ /// Direction, LocaleDirectionality, LocaleExpander,
+ /// };
+ ///
+ /// let ld_default = LocaleDirectionality::new();
+ ///
+ /// assert_eq!(ld_default.get(&locale!("jbn")), None);
+ ///
+ /// let expander = LocaleExpander::new_extended();
+ /// let ld_extended = LocaleDirectionality::new_with_expander(expander);
+ ///
+ /// assert_eq!(
+ /// ld_extended.get(&locale!("jbn")),
+ /// Some(Direction::RightToLeft)
+ /// );
+ /// ```
+ #[cfg(feature = "compiled_data")]
+ pub const fn new_with_expander(expander: LocaleExpander) -> Self {
+ LocaleDirectionality {
+ script_direction: DataPayload::from_static_ref(
+ crate::provider::Baked::SINGLETON_LOCID_TRANSFORM_SCRIPT_DIR_V1,
+ ),
+ expander,
+ }
+ }
+
+ #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_with_expander)]
+ pub fn try_new_with_expander_unstable<P>(
+ provider: &P,
+ expander: LocaleExpander,
+ ) -> Result<LocaleDirectionality, LocaleTransformError>
+ where
+ P: DataProvider<ScriptDirectionV1Marker> + ?Sized,
+ {
+ let script_direction = provider.load(Default::default())?.take_payload()?;
+
+ Ok(LocaleDirectionality {
+ script_direction,
+ expander,
+ })
+ }
+
+ /// Returns the script direction of the given locale.
+ ///
+ /// Note that the direction is a property of the script of a locale, not of the language. As such,
+ /// when given a locale without an associated script tag (i.e., `locale!("en")` vs. `locale!("en-Latn")`),
+ /// this method first tries to infer the script using the language and region before returning its direction.
+ ///
+ /// If you already have a script struct and want to get its direction, you should use
+ /// `Locale::from(Some(my_script))` and call this method.
+ ///
+ /// This method will return `None` if either a locale's script cannot be determined, or there is no information
+ /// for the script.
+ ///
+ /// # Examples
+ ///
+ /// Using an existing locale:
+ ///
+ /// ```
+ /// use icu_locid::locale;
+ /// use icu_locid_transform::{Direction, LocaleDirectionality};
+ ///
+ /// let ld = LocaleDirectionality::new();
+ ///
+ /// assert_eq!(ld.get(&locale!("en-US")), Some(Direction::LeftToRight));
+ ///
+ /// assert_eq!(ld.get(&locale!("ar")), Some(Direction::RightToLeft));
+ ///
+ /// assert_eq!(ld.get(&locale!("en-Arab")), Some(Direction::RightToLeft));
+ ///
+ /// assert_eq!(ld.get(&locale!("foo")), None);
+ /// ```
+ ///
+ /// Using a script directly:
+ ///
+ /// ```
+ /// use icu_locid::subtags::script;
+ /// use icu_locid::Locale;
+ /// use icu_locid_transform::{Direction, LocaleDirectionality};
+ ///
+ /// let ld = LocaleDirectionality::new();
+ ///
+ /// assert_eq!(
+ /// ld.get(&Locale::from(Some(script!("Latn")))),
+ /// Some(Direction::LeftToRight)
+ /// );
+ /// ```
+ pub fn get(&self, locale: impl AsRef<LanguageIdentifier>) -> Option<Direction> {
+ let script = self.expander.get_likely_script(locale.as_ref())?;
+
+ if self.script_in_ltr(script) {
+ Some(Direction::LeftToRight)
+ } else if self.script_in_rtl(script) {
+ Some(Direction::RightToLeft)
+ } else {
+ None
+ }
+ }
+
+ /// Returns whether the given locale is right-to-left.
+ ///
+ /// Note that if this method returns `false`, the locale is either left-to-right or
+ /// the [`LocaleDirectionality`] does not include data for the locale.
+ /// You should use [`LocaleDirectionality::get`] if you need to differentiate between these cases.
+ ///
+ /// See [`LocaleDirectionality::get`] for more information.
+ pub fn is_right_to_left(&self, locale: impl AsRef<LanguageIdentifier>) -> bool {
+ self.expander
+ .get_likely_script(locale.as_ref())
+ .map(|s| self.script_in_rtl(s))
+ .unwrap_or(false)
+ }
+
+ /// Returns whether the given locale is left-to-right.
+ ///
+ /// Note that if this method returns `false`, the locale is either right-to-left or
+ /// the [`LocaleDirectionality`] does not include data for the locale.
+ /// You should use [`LocaleDirectionality::get`] if you need to differentiate between these cases.
+ ///
+ /// See [`LocaleDirectionality::get`] for more information.
+ pub fn is_left_to_right(&self, locale: impl AsRef<LanguageIdentifier>) -> bool {
+ self.expander
+ .get_likely_script(locale.as_ref())
+ .map(|s| self.script_in_ltr(s))
+ .unwrap_or(false)
+ }
+
+ fn script_in_rtl(&self, script: Script) -> bool {
+ self.script_direction
+ .get()
+ .rtl
+ .binary_search(&script.into_tinystr().to_unvalidated())
+ .is_ok()
+ }
+
+ fn script_in_ltr(&self, script: Script) -> bool {
+ self.script_direction
+ .get()
+ .ltr
+ .binary_search(&script.into_tinystr().to_unvalidated())
+ .is_ok()
+ }
+}
diff --git a/vendor/icu_locid_transform/src/error.rs b/vendor/icu_locid_transform/src/error.rs
new file mode 100644
index 000000000..a59f838be
--- /dev/null
+++ b/vendor/icu_locid_transform/src/error.rs
@@ -0,0 +1,27 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use core::fmt::Debug;
+use displaydoc::Display;
+use icu_provider::DataError;
+
+#[cfg(feature = "std")]
+impl std::error::Error for LocaleTransformError {}
+
+/// A list of error outcomes for various operations in this module.
+///
+/// Re-exported as [`Error`](crate::Error).
+#[derive(Display, Debug, Copy, Clone, PartialEq)]
+#[non_exhaustive]
+pub enum LocaleTransformError {
+ /// An error originating inside of the [data provider](icu_provider).
+ #[displaydoc("{0}")]
+ Data(DataError),
+}
+
+impl From<DataError> for LocaleTransformError {
+ fn from(e: DataError) -> Self {
+ Self::Data(e)
+ }
+}
diff --git a/vendor/icu_locid_transform/src/expander.rs b/vendor/icu_locid_transform/src/expander.rs
new file mode 100644
index 000000000..56f204c32
--- /dev/null
+++ b/vendor/icu_locid_transform/src/expander.rs
@@ -0,0 +1,722 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use crate::{provider::*, LocaleTransformError};
+
+use core::mem;
+use icu_locid::subtags::{Language, Region, Script};
+use icu_locid::LanguageIdentifier;
+use icu_provider::prelude::*;
+
+use crate::TransformResult;
+
+/// Implements the *Add Likely Subtags* and *Remove Likely Subtags*
+/// algorithms as defined in *[UTS #35: Likely Subtags]*.
+///
+/// # Examples
+///
+/// Add likely subtags:
+///
+/// ```
+/// use icu_locid::locale;
+/// use icu_locid_transform::{LocaleExpander, TransformResult};
+///
+/// let lc = LocaleExpander::new();
+///
+/// let mut locale = locale!("zh-CN");
+/// assert_eq!(lc.maximize(&mut locale), TransformResult::Modified);
+/// assert_eq!(locale, locale!("zh-Hans-CN"));
+///
+/// let mut locale = locale!("zh-Hant-TW");
+/// assert_eq!(lc.maximize(&mut locale), TransformResult::Unmodified);
+/// assert_eq!(locale, locale!("zh-Hant-TW"));
+/// ```
+///
+/// Remove likely subtags:
+///
+/// ```
+/// use icu_locid::locale;
+/// use icu_locid_transform::{LocaleExpander, TransformResult};
+///
+/// let lc = LocaleExpander::new();
+///
+/// let mut locale = locale!("zh-Hans-CN");
+/// assert_eq!(lc.minimize(&mut locale), TransformResult::Modified);
+/// assert_eq!(locale, locale!("zh"));
+///
+/// let mut locale = locale!("zh");
+/// assert_eq!(lc.minimize(&mut locale), TransformResult::Unmodified);
+/// assert_eq!(locale, locale!("zh"));
+/// ```
+///
+/// Normally, only CLDR locales with Basic or higher coverage are included. To include more
+/// locales for maximization, use [`try_new_extended`](Self::try_new_extended_unstable):
+///
+/// ```
+/// use icu_locid::locale;
+/// use icu_locid_transform::{LocaleExpander, TransformResult};
+///
+/// let lc = LocaleExpander::new_extended();
+///
+/// let mut locale = locale!("atj");
+/// assert_eq!(lc.maximize(&mut locale), TransformResult::Modified);
+/// assert_eq!(locale, locale!("atj-Latn-CA"));
+/// ```
+///
+/// [UTS #35: Likely Subtags]: https://www.unicode.org/reports/tr35/#Likely_Subtags
+#[derive(Debug, Clone)]
+pub struct LocaleExpander {
+ likely_subtags_l: DataPayload<LikelySubtagsForLanguageV1Marker>,
+ likely_subtags_sr: DataPayload<LikelySubtagsForScriptRegionV1Marker>,
+ likely_subtags_ext: Option<DataPayload<LikelySubtagsExtendedV1Marker>>,
+}
+
+struct LocaleExpanderBorrowed<'a> {
+ likely_subtags_l: &'a LikelySubtagsForLanguageV1<'a>,
+ likely_subtags_sr: &'a LikelySubtagsForScriptRegionV1<'a>,
+ likely_subtags_ext: Option<&'a LikelySubtagsExtendedV1<'a>>,
+}
+
+impl LocaleExpanderBorrowed<'_> {
+ fn get_l(&self, l: Language) -> Option<(Script, Region)> {
+ let key = &l.into_tinystr().to_unvalidated();
+ self.likely_subtags_l.language.get_copied(key).or_else(|| {
+ self.likely_subtags_ext
+ .and_then(|ext| ext.language.get_copied(key))
+ })
+ }
+
+ fn get_ls(&self, l: Language, s: Script) -> Option<Region> {
+ let key = &(
+ l.into_tinystr().to_unvalidated(),
+ s.into_tinystr().to_unvalidated(),
+ );
+ self.likely_subtags_l
+ .language_script
+ .get_copied(key)
+ .or_else(|| {
+ self.likely_subtags_ext
+ .and_then(|ext| ext.language_script.get_copied(key))
+ })
+ }
+
+ fn get_lr(&self, l: Language, r: Region) -> Option<Script> {
+ let key = &(
+ l.into_tinystr().to_unvalidated(),
+ r.into_tinystr().to_unvalidated(),
+ );
+ self.likely_subtags_l
+ .language_region
+ .get_copied(key)
+ .or_else(|| {
+ self.likely_subtags_ext
+ .and_then(|ext| ext.language_region.get_copied(key))
+ })
+ }
+
+ fn get_s(&self, s: Script) -> Option<(Language, Region)> {
+ let key = &s.into_tinystr().to_unvalidated();
+ self.likely_subtags_sr.script.get_copied(key).or_else(|| {
+ self.likely_subtags_ext
+ .and_then(|ext| ext.script.get_copied(key))
+ })
+ }
+
+ fn get_sr(&self, s: Script, r: Region) -> Option<Language> {
+ let key = &(
+ s.into_tinystr().to_unvalidated(),
+ r.into_tinystr().to_unvalidated(),
+ );
+ self.likely_subtags_sr
+ .script_region
+ .get_copied(key)
+ .or_else(|| {
+ self.likely_subtags_ext
+ .and_then(|ext| ext.script_region.get_copied(key))
+ })
+ }
+
+ fn get_r(&self, r: Region) -> Option<(Language, Script)> {
+ let key = &r.into_tinystr().to_unvalidated();
+ self.likely_subtags_sr.region.get_copied(key).or_else(|| {
+ self.likely_subtags_ext
+ .and_then(|ext| ext.region.get_copied(key))
+ })
+ }
+
+ fn get_und(&self) -> (Language, Script, Region) {
+ self.likely_subtags_l.und
+ }
+}
+
+#[inline]
+fn update_langid(
+ language: Language,
+ script: Option<Script>,
+ region: Option<Region>,
+ langid: &mut LanguageIdentifier,
+) -> TransformResult {
+ let mut modified = false;
+
+ if langid.language.is_empty() && !language.is_empty() {
+ langid.language = language;
+ modified = true;
+ }
+
+ if langid.script.is_none() && script.is_some() {
+ langid.script = script;
+ modified = true;
+ }
+
+ if langid.region.is_none() && region.is_some() {
+ langid.region = region;
+ modified = true;
+ }
+
+ if modified {
+ TransformResult::Modified
+ } else {
+ TransformResult::Unmodified
+ }
+}
+
+impl LocaleExpander {
+ /// Creates a [`LocaleExpander`] with compiled data for commonly-used locales
+ /// (locales with *Basic* or higher [CLDR coverage]).
+ ///
+ /// Use this constructor if you want limited likely subtags for data-oriented use cases.
+ ///
+ /// ✨ *Enabled with the `compiled_data` Cargo feature.*
+ ///
+ /// [📚 Help choosing a constructor](icu_provider::constructors)
+ ///
+ /// [CLDR coverage]: https://www.unicode.org/reports/tr35/tr35-info.html#Coverage_Levels
+ #[cfg(feature = "compiled_data")]
+ pub const fn new() -> Self {
+ LocaleExpander {
+ likely_subtags_l: DataPayload::from_static_ref(
+ crate::provider::Baked::SINGLETON_LOCID_TRANSFORM_LIKELYSUBTAGS_L_V1,
+ ),
+ likely_subtags_sr: DataPayload::from_static_ref(
+ crate::provider::Baked::SINGLETON_LOCID_TRANSFORM_LIKELYSUBTAGS_SR_V1,
+ ),
+ likely_subtags_ext: None,
+ }
+ }
+
+ /// Creates a [`LocaleExpander`] with compiled data for all locales.
+ ///
+ /// Use this constructor if you want to include data for all locales, including ones
+ /// that may not have data for other services (i.e. [CLDR coverage] below *Basic*).
+ ///
+ /// ✨ *Enabled with the `compiled_data` Cargo feature.*
+ ///
+ /// [📚 Help choosing a constructor](icu_provider::constructors)
+ ///
+ /// [CLDR coverage]: https://www.unicode.org/reports/tr35/tr35-info.html#Coverage_Levels
+ #[cfg(feature = "compiled_data")]
+ pub const fn new_extended() -> Self {
+ LocaleExpander {
+ likely_subtags_l: DataPayload::from_static_ref(
+ crate::provider::Baked::SINGLETON_LOCID_TRANSFORM_LIKELYSUBTAGS_L_V1,
+ ),
+ likely_subtags_sr: DataPayload::from_static_ref(
+ crate::provider::Baked::SINGLETON_LOCID_TRANSFORM_LIKELYSUBTAGS_SR_V1,
+ ),
+ likely_subtags_ext: Some(DataPayload::from_static_ref(
+ crate::provider::Baked::SINGLETON_LOCID_TRANSFORM_LIKELYSUBTAGS_EXT_V1,
+ )),
+ }
+ }
+
+ #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_extended)]
+ pub fn try_new_extended_unstable<P>(
+ provider: &P,
+ ) -> Result<LocaleExpander, LocaleTransformError>
+ where
+ P: DataProvider<LikelySubtagsForLanguageV1Marker>
+ + DataProvider<LikelySubtagsForScriptRegionV1Marker>
+ + DataProvider<LikelySubtagsExtendedV1Marker>
+ + ?Sized,
+ {
+ let likely_subtags_l = provider.load(Default::default())?.take_payload()?;
+ let likely_subtags_sr = provider.load(Default::default())?.take_payload()?;
+ let likely_subtags_ext = Some(provider.load(Default::default())?.take_payload()?);
+
+ Ok(LocaleExpander {
+ likely_subtags_l,
+ likely_subtags_sr,
+ likely_subtags_ext,
+ })
+ }
+
+ icu_provider::gen_any_buffer_data_constructors!(locale: skip, options: skip, error: LocaleTransformError,
+ #[cfg(skip)]
+ functions: [
+ new_extended,
+ try_new_extended_with_any_provider,
+ try_new_extended_with_buffer_provider,
+ try_new_extended_unstable,
+ Self
+ ]);
+
+ #[doc = icu_provider::gen_any_buffer_unstable_docs!(ANY, Self::new)]
+ pub fn try_new_with_any_provider(
+ provider: &(impl AnyProvider + ?Sized),
+ ) -> Result<LocaleExpander, LocaleTransformError> {
+ Self::try_new_compat(&provider.as_downcasting())
+ }
+
+ #[doc = icu_provider::gen_any_buffer_unstable_docs!(BUFFER, Self::new)]
+ #[cfg(feature = "serde")]
+ pub fn try_new_with_buffer_provider(
+ provider: &(impl BufferProvider + ?Sized),
+ ) -> Result<LocaleExpander, LocaleTransformError> {
+ Self::try_new_compat(&provider.as_deserializing())
+ }
+
+ #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new)]
+ pub fn try_new_unstable<P>(provider: &P) -> Result<LocaleExpander, LocaleTransformError>
+ where
+ P: DataProvider<LikelySubtagsForLanguageV1Marker>
+ + DataProvider<LikelySubtagsForScriptRegionV1Marker>
+ + ?Sized,
+ {
+ let likely_subtags_l = provider.load(Default::default())?.take_payload()?;
+ let likely_subtags_sr = provider.load(Default::default())?.take_payload()?;
+
+ Ok(LocaleExpander {
+ likely_subtags_l,
+ likely_subtags_sr,
+ likely_subtags_ext: None,
+ })
+ }
+
+ fn try_new_compat<P>(provider: &P) -> Result<LocaleExpander, LocaleTransformError>
+ where
+ P: DataProvider<LikelySubtagsForLanguageV1Marker>
+ + DataProvider<LikelySubtagsForScriptRegionV1Marker>
+ + DataProvider<LikelySubtagsExtendedV1Marker>
+ + DataProvider<LikelySubtagsV1Marker>
+ + ?Sized,
+ {
+ let payload_l = provider
+ .load(Default::default())
+ .and_then(DataResponse::take_payload);
+ let payload_sr = provider
+ .load(Default::default())
+ .and_then(DataResponse::take_payload);
+ let payload_ext = provider
+ .load(Default::default())
+ .and_then(DataResponse::take_payload);
+
+ let (likely_subtags_l, likely_subtags_sr, likely_subtags_ext) =
+ match (payload_l, payload_sr, payload_ext) {
+ (Ok(l), Ok(sr), Err(_)) => (l, sr, None),
+ (Ok(l), Ok(sr), Ok(ext)) => (l, sr, Some(ext)),
+ _ => {
+ let result: DataPayload<LikelySubtagsV1Marker> =
+ provider.load(Default::default())?.take_payload()?;
+ (
+ result.map_project_cloned(|st, _| {
+ LikelySubtagsForLanguageV1::clone_from_borrowed(st)
+ }),
+ result.map_project(|st, _| st.into()),
+ None,
+ )
+ }
+ };
+
+ Ok(LocaleExpander {
+ likely_subtags_l,
+ likely_subtags_sr,
+ likely_subtags_ext,
+ })
+ }
+
+ fn as_borrowed(&self) -> LocaleExpanderBorrowed {
+ LocaleExpanderBorrowed {
+ likely_subtags_l: self.likely_subtags_l.get(),
+ likely_subtags_sr: self.likely_subtags_sr.get(),
+ likely_subtags_ext: self.likely_subtags_ext.as_ref().map(|p| p.get()),
+ }
+ }
+
+ /// The maximize method potentially updates a passed in locale in place
+ /// depending up the results of running the 'Add Likely Subtags' algorithm
+ /// from <https://www.unicode.org/reports/tr35/#Likely_Subtags>.
+ ///
+ /// If the result of running the algorithm would result in a new locale, the
+ /// locale argument is updated in place to match the result, and the method
+ /// returns [`TransformResult::Modified`]. Otherwise, the method
+ /// returns [`TransformResult::Unmodified`] and the locale argument is
+ /// unchanged.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu_locid::locale;
+ /// use icu_locid_transform::{LocaleExpander, TransformResult};
+ ///
+ /// let lc = LocaleExpander::new();
+ ///
+ /// let mut locale = locale!("zh-CN");
+ /// assert_eq!(lc.maximize(&mut locale), TransformResult::Modified);
+ /// assert_eq!(locale, locale!("zh-Hans-CN"));
+ ///
+ /// let mut locale = locale!("zh-Hant-TW");
+ /// assert_eq!(lc.maximize(&mut locale), TransformResult::Unmodified);
+ /// assert_eq!(locale, locale!("zh-Hant-TW"));
+ /// ```
+ pub fn maximize<T: AsMut<LanguageIdentifier>>(&self, mut langid: T) -> TransformResult {
+ let langid = langid.as_mut();
+ let data = self.as_borrowed();
+
+ if !langid.language.is_empty() && langid.script.is_some() && langid.region.is_some() {
+ return TransformResult::Unmodified;
+ }
+
+ if !langid.language.is_empty() {
+ if let Some(region) = langid.region {
+ if let Some(script) = data.get_lr(langid.language, region) {
+ return update_langid(Language::UND, Some(script), None, langid);
+ }
+ }
+ if let Some(script) = langid.script {
+ if let Some(region) = data.get_ls(langid.language, script) {
+ return update_langid(Language::UND, None, Some(region), langid);
+ }
+ }
+ if let Some((script, region)) = data.get_l(langid.language) {
+ return update_langid(Language::UND, Some(script), Some(region), langid);
+ }
+ }
+ if let Some(script) = langid.script {
+ if let Some(region) = langid.region {
+ if let Some(language) = data.get_sr(script, region) {
+ return update_langid(language, None, None, langid);
+ }
+ }
+ if let Some((language, region)) = data.get_s(script) {
+ return update_langid(language, None, Some(region), langid);
+ }
+ }
+ if let Some(region) = langid.region {
+ if let Some((language, script)) = data.get_r(region) {
+ return update_langid(language, Some(script), None, langid);
+ }
+ }
+
+ update_langid(
+ data.get_und().0,
+ Some(data.get_und().1),
+ Some(data.get_und().2),
+ langid,
+ )
+ }
+
+ /// This returns a new Locale that is the result of running the
+ /// 'Remove Likely Subtags' algorithm from
+ /// <https://www.unicode.org/reports/tr35/#Likely_Subtags>.
+ ///
+ /// If the result of running the algorithm would result in a new locale, the
+ /// locale argument is updated in place to match the result, and the method
+ /// returns [`TransformResult::Modified`]. Otherwise, the method
+ /// returns [`TransformResult::Unmodified`] and the locale argument is
+ /// unchanged.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu_locid::locale;
+ /// use icu_locid_transform::{LocaleExpander, TransformResult};
+ ///
+ /// let lc = LocaleExpander::new();
+ ///
+ /// let mut locale = locale!("zh-Hans-CN");
+ /// assert_eq!(lc.minimize(&mut locale), TransformResult::Modified);
+ /// assert_eq!(locale, locale!("zh"));
+ ///
+ /// let mut locale = locale!("zh");
+ /// assert_eq!(lc.minimize(&mut locale), TransformResult::Unmodified);
+ /// assert_eq!(locale, locale!("zh"));
+ /// ```
+ pub fn minimize<T: AsMut<LanguageIdentifier>>(&self, mut langid: T) -> TransformResult {
+ let langid = langid.as_mut();
+
+ let mut max = langid.clone();
+ self.maximize(&mut max);
+ let variants = mem::take(&mut max.variants);
+ max.variants.clear();
+ let mut trial = max.clone();
+
+ trial.script = None;
+ trial.region = None;
+ self.maximize(&mut trial);
+ if trial == max {
+ if langid.language != max.language || langid.script.is_some() || langid.region.is_some()
+ {
+ if langid.language != max.language {
+ langid.language = max.language
+ }
+ if langid.script.is_some() {
+ langid.script = None;
+ }
+ if langid.region.is_some() {
+ langid.region = None;
+ }
+ langid.variants = variants;
+ return TransformResult::Modified;
+ } else {
+ return TransformResult::Unmodified;
+ }
+ }
+
+ trial.script = None;
+ trial.region = max.region;
+ self.maximize(&mut trial);
+ if trial == max {
+ if langid.language != max.language
+ || langid.script.is_some()
+ || langid.region != max.region
+ {
+ if langid.language != max.language {
+ langid.language = max.language
+ }
+ if langid.script.is_some() {
+ langid.script = None;
+ }
+ if langid.region != max.region {
+ langid.region = max.region;
+ }
+ langid.variants = variants;
+ return TransformResult::Modified;
+ } else {
+ return TransformResult::Unmodified;
+ }
+ }
+
+ trial.script = max.script;
+ trial.region = None;
+ self.maximize(&mut trial);
+ if trial == max {
+ if langid.language != max.language
+ || langid.script != max.script
+ || langid.region.is_some()
+ {
+ if langid.language != max.language {
+ langid.language = max.language
+ }
+ if langid.script != max.script {
+ langid.script = max.script;
+ }
+ if langid.region.is_some() {
+ langid.region = None;
+ }
+ langid.variants = variants;
+ return TransformResult::Modified;
+ } else {
+ return TransformResult::Unmodified;
+ }
+ }
+
+ if langid.language != max.language
+ || langid.script != max.script
+ || langid.region != max.region
+ {
+ if langid.language != max.language {
+ langid.language = max.language
+ }
+ if langid.script != max.script {
+ langid.script = max.script;
+ }
+ if langid.region != max.region {
+ langid.region = max.region;
+ }
+ TransformResult::Modified
+ } else {
+ TransformResult::Unmodified
+ }
+ }
+
+ // TODO(3492): consider turning this and a future get_likely_region/get_likely_language public
+ #[inline]
+ pub(crate) fn get_likely_script<T: AsRef<LanguageIdentifier>>(
+ &self,
+ langid: T,
+ ) -> Option<Script> {
+ let langid = langid.as_ref();
+ langid
+ .script
+ .or_else(|| self.infer_likely_script(langid.language, langid.region))
+ }
+
+ fn infer_likely_script(&self, language: Language, region: Option<Region>) -> Option<Script> {
+ let data = self.as_borrowed();
+
+ // proceed through _all possible cases_ in order of specificity
+ // (borrowed from LocaleExpander::maximize):
+ // 1. language + region
+ // 2. language
+ // 3. region
+ // we need to check all cases, because e.g. for "en-US" the default script is associated
+ // with "en" but not "en-US"
+ if language != Language::UND {
+ if let Some(region) = region {
+ // 1. we know both language and region
+ if let Some(script) = data.get_lr(language, region) {
+ return Some(script);
+ }
+ }
+ // 2. we know language, but we either do not know region or knowing region did not help
+ if let Some((script, _)) = data.get_l(language) {
+ return Some(script);
+ }
+ }
+ if let Some(region) = region {
+ // 3. we know region, but we either do not know language or knowing language did not help
+ if let Some((_, script)) = data.get_r(region) {
+ return Some(script);
+ }
+ }
+ // we could not figure out the script from the given locale
+ None
+ }
+}
+
+#[cfg(feature = "serde")]
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use icu_locid::locale;
+
+ struct RejectByKeyProvider {
+ keys: Vec<DataKey>,
+ }
+
+ impl AnyProvider for RejectByKeyProvider {
+ fn load_any(&self, key: DataKey, _: DataRequest) -> Result<AnyResponse, DataError> {
+ if self.keys.contains(&key) {
+ return Err(DataErrorKind::MissingDataKey.with_str_context("rejected"));
+ }
+
+ let l = crate::provider::Baked::SINGLETON_LOCID_TRANSFORM_LIKELYSUBTAGS_L_V1;
+ let ext = crate::provider::Baked::SINGLETON_LOCID_TRANSFORM_LIKELYSUBTAGS_EXT_V1;
+ let sr = crate::provider::Baked::SINGLETON_LOCID_TRANSFORM_LIKELYSUBTAGS_SR_V1;
+
+ let payload = if key.hashed() == LikelySubtagsV1Marker::KEY.hashed() {
+ DataPayload::<LikelySubtagsV1Marker>::from_owned(LikelySubtagsV1 {
+ language_script: l
+ .language_script
+ .iter_copied()
+ .chain(ext.language_script.iter_copied())
+ .collect(),
+ language_region: l
+ .language_region
+ .iter_copied()
+ .chain(ext.language_region.iter_copied())
+ .collect(),
+ language: l
+ .language
+ .iter_copied()
+ .chain(ext.language.iter_copied())
+ .collect(),
+ script_region: ext.script_region.clone(),
+ script: ext.script.clone(),
+ region: ext.region.clone(),
+ und: l.und,
+ })
+ .wrap_into_any_payload()
+ } else if key.hashed() == LikelySubtagsForLanguageV1Marker::KEY.hashed() {
+ DataPayload::<LikelySubtagsForLanguageV1Marker>::from_static_ref(l)
+ .wrap_into_any_payload()
+ } else if key.hashed() == LikelySubtagsExtendedV1Marker::KEY.hashed() {
+ DataPayload::<LikelySubtagsExtendedV1Marker>::from_static_ref(ext)
+ .wrap_into_any_payload()
+ } else if key.hashed() == LikelySubtagsForScriptRegionV1Marker::KEY.hashed() {
+ DataPayload::<LikelySubtagsForScriptRegionV1Marker>::from_static_ref(sr)
+ .wrap_into_any_payload()
+ } else {
+ return Err(DataErrorKind::MissingDataKey.into_error());
+ };
+
+ Ok(AnyResponse {
+ payload: Some(payload),
+ metadata: Default::default(),
+ })
+ }
+ }
+
+ #[test]
+ fn test_old_keys() {
+ let provider = RejectByKeyProvider {
+ keys: vec![
+ LikelySubtagsForLanguageV1Marker::KEY,
+ LikelySubtagsForScriptRegionV1Marker::KEY,
+ LikelySubtagsExtendedV1Marker::KEY,
+ ],
+ };
+ let lc = LocaleExpander::try_new_with_any_provider(&provider)
+ .expect("should create with old keys");
+ let mut locale = locale!("zh-CN");
+ assert_eq!(lc.maximize(&mut locale), TransformResult::Modified);
+ assert_eq!(locale, locale!("zh-Hans-CN"));
+ }
+
+ #[test]
+ fn test_new_keys() {
+ let provider = RejectByKeyProvider {
+ keys: vec![LikelySubtagsV1Marker::KEY],
+ };
+ let lc = LocaleExpander::try_new_with_any_provider(&provider)
+ .expect("should create with new keys");
+ let mut locale = locale!("zh-CN");
+ assert_eq!(lc.maximize(&mut locale), TransformResult::Modified);
+ assert_eq!(locale, locale!("zh-Hans-CN"));
+ }
+
+ #[test]
+ fn test_mixed_keys() {
+ // Include the old key and one of the new keys but not both new keys.
+ // Not sure if this is a useful test.
+ let provider = RejectByKeyProvider {
+ keys: vec![LikelySubtagsForScriptRegionV1Marker::KEY],
+ };
+ let lc = LocaleExpander::try_new_with_any_provider(&provider)
+ .expect("should create with mixed keys");
+ let mut locale = locale!("zh-CN");
+ assert_eq!(lc.maximize(&mut locale), TransformResult::Modified);
+ assert_eq!(locale, locale!("zh-Hans-CN"));
+ }
+
+ #[test]
+ fn test_no_keys() {
+ let provider = RejectByKeyProvider {
+ keys: vec![
+ LikelySubtagsForLanguageV1Marker::KEY,
+ LikelySubtagsForScriptRegionV1Marker::KEY,
+ LikelySubtagsV1Marker::KEY,
+ ],
+ };
+ if LocaleExpander::try_new_with_any_provider(&provider).is_ok() {
+ panic!("should not create: no data present")
+ };
+ }
+
+ #[test]
+ fn test_new_small_keys() {
+ // Include the new small keys but not the extended key
+ let provider = RejectByKeyProvider {
+ keys: vec![
+ LikelySubtagsExtendedV1Marker::KEY,
+ LikelySubtagsV1Marker::KEY,
+ ],
+ };
+ let lc = LocaleExpander::try_new_with_any_provider(&provider)
+ .expect("should create with mixed keys");
+ let mut locale = locale!("zh-CN");
+ assert_eq!(lc.maximize(&mut locale), TransformResult::Modified);
+ assert_eq!(locale, locale!("zh-Hans-CN"));
+ }
+}
diff --git a/vendor/icu_locid_transform/src/fallback/algorithms.rs b/vendor/icu_locid_transform/src/fallback/algorithms.rs
new file mode 100644
index 000000000..c3a3d08ca
--- /dev/null
+++ b/vendor/icu_locid_transform/src/fallback/algorithms.rs
@@ -0,0 +1,487 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use icu_locid::extensions::unicode::{key, Key};
+use icu_locid::subtags::Language;
+use icu_locid::LanguageIdentifier;
+use icu_provider::FallbackPriority;
+
+use super::*;
+
+const SUBDIVISION_KEY: Key = key!("sd");
+
+impl<'a> LocaleFallbackerWithConfig<'a> {
+ pub(crate) fn normalize(&self, locale: &mut DataLocale) {
+ let language = locale.language();
+ // 1. Populate the region (required for region fallback only)
+ if self.config.priority == FallbackPriority::Region && locale.region().is_none() {
+ // 1a. First look for region based on language+script
+ if let Some(script) = locale.script() {
+ locale.set_region(
+ self.likely_subtags
+ .ls2r
+ .get_2d(
+ &language.into_tinystr().to_unvalidated(),
+ &script.into_tinystr().to_unvalidated(),
+ )
+ .copied(),
+ );
+ }
+ // 1b. If that fails, try language only
+ if locale.region().is_none() {
+ locale.set_region(
+ self.likely_subtags
+ .l2r
+ .get(&language.into_tinystr().to_unvalidated())
+ .copied(),
+ );
+ }
+ }
+ // 2. Remove the script if it is implied by the other subtags
+ if let Some(script) = locale.script() {
+ let default_script = self
+ .likely_subtags
+ .l2s
+ .get_copied(&language.into_tinystr().to_unvalidated())
+ .unwrap_or(DEFAULT_SCRIPT);
+ if let Some(region) = locale.region() {
+ if script
+ == self
+ .likely_subtags
+ .lr2s
+ .get_copied_2d(
+ &language.into_tinystr().to_unvalidated(),
+ &region.into_tinystr().to_unvalidated(),
+ )
+ .unwrap_or(default_script)
+ {
+ locale.set_script(None);
+ }
+ } else if script == default_script {
+ locale.set_script(None);
+ }
+ }
+ // 3. Remove irrelevant extension subtags
+ locale.retain_unicode_ext(|key| {
+ match *key {
+ // Always retain -u-sd
+ SUBDIVISION_KEY => true,
+ // Retain the query-specific keyword
+ _ if Some(*key) == self.config.extension_key => true,
+ // Drop all others
+ _ => false,
+ }
+ });
+ // 4. If there is an invalid "sd" subtag, drop it
+ // For now, ignore it, and let fallback do it for us
+ }
+}
+
+impl<'a> LocaleFallbackIteratorInner<'a> {
+ pub fn step(&mut self, locale: &mut DataLocale) {
+ match self.config.priority {
+ FallbackPriority::Language => self.step_language(locale),
+ FallbackPriority::Region => self.step_region(locale),
+ // TODO(#1964): Change the collation fallback rules to be different
+ // from the language fallback fules.
+ FallbackPriority::Collation => self.step_language(locale),
+ // This case should not normally happen, but `FallbackPriority` is non_exhaustive.
+ // Make it go directly to `und`.
+ _ => {
+ debug_assert!(
+ false,
+ "Unknown FallbackPriority: {:?}",
+ self.config.priority
+ );
+ *locale = Default::default()
+ }
+ }
+ }
+
+ fn step_language(&mut self, locale: &mut DataLocale) {
+ // 1. Remove the extension fallback keyword
+ if let Some(extension_key) = self.config.extension_key {
+ if let Some(value) = locale.remove_unicode_ext(&extension_key) {
+ self.backup_extension = Some(value);
+ return;
+ }
+ }
+ // 2. Remove the subdivision keyword
+ if let Some(value) = locale.remove_unicode_ext(&SUBDIVISION_KEY) {
+ self.backup_subdivision = Some(value);
+ return;
+ }
+ // 3. Assert that the locale is a language identifier
+ debug_assert!(!locale.has_unicode_ext());
+ // 4. Remove variants
+ if locale.has_variants() {
+ self.backup_variants = Some(locale.clear_variants());
+ return;
+ }
+ // 5. Check for parent override
+ if let Some(parent) = self.get_explicit_parent(locale) {
+ locale.set_langid(parent);
+ self.restore_extensions_variants(locale);
+ return;
+ }
+ // 6. Add the script subtag if necessary
+ if locale.script().is_none() {
+ if let Some(region) = locale.region() {
+ let language = locale.language();
+ if let Some(script) = self.likely_subtags.lr2s.get_copied_2d(
+ &language.into_tinystr().to_unvalidated(),
+ &region.into_tinystr().to_unvalidated(),
+ ) {
+ locale.set_script(Some(script));
+ self.restore_extensions_variants(locale);
+ return;
+ }
+ }
+ }
+ // 7. Remove region
+ if locale.region().is_some() {
+ locale.set_region(None);
+ self.restore_extensions_variants(locale);
+ return;
+ }
+ // 8. Remove language+script
+ debug_assert!(!locale.language().is_empty()); // don't call .step() on und
+ locale.set_script(None);
+ locale.set_language(Language::UND);
+ }
+
+ fn step_region(&mut self, locale: &mut DataLocale) {
+ // 1. Remove the extension fallback keyword
+ if let Some(extension_key) = self.config.extension_key {
+ if let Some(value) = locale.remove_unicode_ext(&extension_key) {
+ self.backup_extension = Some(value);
+ return;
+ }
+ }
+ // 2. Remove the subdivision keyword
+ if let Some(value) = locale.remove_unicode_ext(&SUBDIVISION_KEY) {
+ self.backup_subdivision = Some(value);
+ return;
+ }
+ // 3. Assert that the locale is a language identifier
+ debug_assert!(!locale.has_unicode_ext());
+ // 4. Remove variants
+ if locale.has_variants() {
+ self.backup_variants = Some(locale.clear_variants());
+ return;
+ }
+ // 5. Remove language+script
+ if !locale.language().is_empty() || locale.script().is_some() {
+ locale.set_script(None);
+ locale.set_language(Language::UND);
+ self.restore_extensions_variants(locale);
+ return;
+ }
+ // 6. Remove region
+ debug_assert!(locale.region().is_some()); // don't call .step() on und
+ locale.set_region(None);
+ }
+
+ fn restore_extensions_variants(&mut self, locale: &mut DataLocale) {
+ if let Some(value) = self.backup_extension.take() {
+ #[allow(clippy::unwrap_used)] // not reachable unless extension_key is present
+ locale.set_unicode_ext(self.config.extension_key.unwrap(), value);
+ }
+ if let Some(value) = self.backup_subdivision.take() {
+ locale.set_unicode_ext(SUBDIVISION_KEY, value);
+ }
+ if let Some(variants) = self.backup_variants.take() {
+ locale.set_variants(variants);
+ }
+ }
+
+ fn get_explicit_parent(&self, locale: &DataLocale) -> Option<LanguageIdentifier> {
+ self.supplement
+ .and_then(|supplement| {
+ supplement
+ .parents
+ .get_copied_by(|uvstr| locale.strict_cmp(uvstr).reverse())
+ })
+ .or_else(|| {
+ self.parents
+ .parents
+ .get_copied_by(|uvstr| locale.strict_cmp(uvstr).reverse())
+ })
+ .map(LanguageIdentifier::from)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use icu_locid::Locale;
+ use std::str::FromStr;
+ use writeable::Writeable;
+
+ struct TestCase {
+ input: &'static str,
+ requires_data: bool,
+ extension_key: Option<Key>,
+ fallback_supplement: Option<LocaleFallbackSupplement>,
+ // Note: The first entry in the chain is the normalized locale
+ expected_language_chain: &'static [&'static str],
+ expected_region_chain: &'static [&'static str],
+ }
+
+ // TODO: Consider loading these from a JSON file
+ const TEST_CASES: &[TestCase] = &[
+ TestCase {
+ input: "en-u-hc-h12-sd-usca",
+ requires_data: false,
+ extension_key: None,
+ fallback_supplement: None,
+ expected_language_chain: &["en-u-sd-usca", "en"],
+ expected_region_chain: &["en-u-sd-usca", "en", "und-u-sd-usca"],
+ },
+ TestCase {
+ input: "en-US-u-hc-h12-sd-usca",
+ requires_data: false,
+ extension_key: None,
+ fallback_supplement: None,
+ expected_language_chain: &["en-US-u-sd-usca", "en-US", "en-u-sd-usca", "en"],
+ expected_region_chain: &["en-US-u-sd-usca", "en-US", "und-US-u-sd-usca", "und-US"],
+ },
+ TestCase {
+ input: "en-US-fonipa-u-hc-h12-sd-usca",
+ requires_data: false,
+ extension_key: Some(key!("hc")),
+ fallback_supplement: None,
+ expected_language_chain: &[
+ "en-US-fonipa-u-hc-h12-sd-usca",
+ "en-US-fonipa-u-sd-usca",
+ "en-US-fonipa",
+ "en-US",
+ "en-fonipa-u-hc-h12-sd-usca",
+ "en-fonipa-u-sd-usca",
+ "en-fonipa",
+ "en",
+ ],
+ expected_region_chain: &[
+ "en-US-fonipa-u-hc-h12-sd-usca",
+ "en-US-fonipa-u-sd-usca",
+ "en-US-fonipa",
+ "en-US",
+ "und-US-fonipa-u-hc-h12-sd-usca",
+ "und-US-fonipa-u-sd-usca",
+ "und-US-fonipa",
+ "und-US",
+ ],
+ },
+ TestCase {
+ input: "en-u-hc-h12-sd-usca",
+ requires_data: true,
+ extension_key: None,
+ fallback_supplement: None,
+ expected_language_chain: &["en-u-sd-usca", "en"],
+ expected_region_chain: &["en-US-u-sd-usca", "en-US", "und-US-u-sd-usca", "und-US"],
+ },
+ TestCase {
+ input: "en-Latn-u-sd-usca",
+ requires_data: true,
+ extension_key: None,
+ fallback_supplement: None,
+ expected_language_chain: &["en-u-sd-usca", "en"],
+ expected_region_chain: &["en-US-u-sd-usca", "en-US", "und-US-u-sd-usca", "und-US"],
+ },
+ TestCase {
+ input: "en-Latn-US-u-sd-usca",
+ requires_data: true,
+ extension_key: None,
+ fallback_supplement: None,
+ expected_language_chain: &["en-US-u-sd-usca", "en-US", "en-u-sd-usca", "en"],
+ expected_region_chain: &["en-US-u-sd-usca", "en-US", "und-US-u-sd-usca", "und-US"],
+ },
+ TestCase {
+ // NOTE: -u-rg is not yet supported; when it is, this test should be updated
+ input: "en-u-rg-gbxxxx",
+ requires_data: false,
+ extension_key: None,
+ fallback_supplement: None,
+ expected_language_chain: &["en"],
+ expected_region_chain: &["en"],
+ },
+ TestCase {
+ input: "sr-ME",
+ requires_data: true,
+ extension_key: None,
+ fallback_supplement: None,
+ expected_language_chain: &["sr-ME", "sr-Latn-ME", "sr-Latn"],
+ expected_region_chain: &["sr-ME", "und-ME"],
+ },
+ TestCase {
+ input: "sr-Latn-ME",
+ requires_data: true,
+ extension_key: None,
+ fallback_supplement: None,
+ expected_language_chain: &["sr-ME", "sr-Latn-ME", "sr-Latn"],
+ expected_region_chain: &["sr-ME", "und-ME"],
+ },
+ TestCase {
+ input: "sr-ME-fonipa",
+ requires_data: true,
+ extension_key: None,
+ fallback_supplement: None,
+ expected_language_chain: &[
+ "sr-ME-fonipa",
+ "sr-ME",
+ "sr-Latn-ME-fonipa",
+ "sr-Latn-ME",
+ "sr-Latn-fonipa",
+ "sr-Latn",
+ ],
+ expected_region_chain: &["sr-ME-fonipa", "sr-ME", "und-ME-fonipa", "und-ME"],
+ },
+ TestCase {
+ input: "sr-RS",
+ requires_data: true,
+ extension_key: None,
+ fallback_supplement: None,
+ expected_language_chain: &["sr-RS", "sr"],
+ expected_region_chain: &["sr-RS", "und-RS"],
+ },
+ TestCase {
+ input: "sr-Cyrl-RS",
+ requires_data: true,
+ extension_key: None,
+ fallback_supplement: None,
+ expected_language_chain: &["sr-RS", "sr"],
+ expected_region_chain: &["sr-RS", "und-RS"],
+ },
+ TestCase {
+ input: "sr-Latn-RS",
+ requires_data: true,
+ extension_key: None,
+ fallback_supplement: None,
+ expected_language_chain: &["sr-Latn-RS", "sr-Latn"],
+ expected_region_chain: &["sr-Latn-RS", "und-RS"],
+ },
+ TestCase {
+ input: "de-Latn-LI",
+ requires_data: true,
+ extension_key: None,
+ fallback_supplement: None,
+ expected_language_chain: &["de-LI", "de"],
+ expected_region_chain: &["de-LI", "und-LI"],
+ },
+ TestCase {
+ input: "ca-ES-valencia",
+ requires_data: true,
+ extension_key: None,
+ fallback_supplement: None,
+ expected_language_chain: &["ca-ES-valencia", "ca-ES", "ca-valencia", "ca"],
+ expected_region_chain: &["ca-ES-valencia", "ca-ES", "und-ES-valencia", "und-ES"],
+ },
+ TestCase {
+ input: "es-AR",
+ requires_data: true,
+ extension_key: None,
+ fallback_supplement: None,
+ expected_language_chain: &["es-AR", "es-419", "es"],
+ expected_region_chain: &["es-AR", "und-AR"],
+ },
+ TestCase {
+ input: "hi-IN",
+ requires_data: true,
+ extension_key: None,
+ fallback_supplement: None,
+ expected_language_chain: &["hi-IN", "hi"],
+ expected_region_chain: &["hi-IN", "und-IN"],
+ },
+ TestCase {
+ input: "hi-Latn-IN",
+ requires_data: true,
+ extension_key: None,
+ fallback_supplement: None,
+ expected_language_chain: &["hi-Latn-IN", "hi-Latn", "en-IN", "en-001", "en"],
+ expected_region_chain: &["hi-Latn-IN", "und-IN"],
+ },
+ TestCase {
+ input: "zh-CN",
+ requires_data: true,
+ extension_key: None,
+ fallback_supplement: None,
+ // Note: "zh-Hans" is not reachable because it is the default script for "zh".
+ // The fallback algorithm does not visit the language-script bundle when the
+ // script is the default for the language
+ expected_language_chain: &["zh-CN", "zh"],
+ expected_region_chain: &["zh-CN", "und-CN"],
+ },
+ TestCase {
+ input: "zh-TW",
+ requires_data: true,
+ extension_key: None,
+ fallback_supplement: None,
+ expected_language_chain: &["zh-TW", "zh-Hant-TW", "zh-Hant"],
+ expected_region_chain: &["zh-TW", "und-TW"],
+ },
+ TestCase {
+ input: "yue-HK",
+ requires_data: true,
+ extension_key: None,
+ fallback_supplement: None,
+ expected_language_chain: &["yue-HK", "yue"],
+ expected_region_chain: &["yue-HK", "und-HK"],
+ },
+ TestCase {
+ input: "yue-HK",
+ requires_data: true,
+ extension_key: None,
+ fallback_supplement: Some(LocaleFallbackSupplement::Collation),
+ // TODO(#1964): add "zh" as a target.
+ expected_language_chain: &["yue-HK", "yue", "zh-Hant"],
+ expected_region_chain: &["yue-HK", "und-HK"],
+ },
+ ];
+
+ #[test]
+ fn test_fallback() {
+ let fallbacker_no_data = LocaleFallbacker::new_without_data();
+ let fallbacker_no_data = fallbacker_no_data.as_borrowed();
+ let fallbacker_with_data = LocaleFallbacker::new();
+ for cas in TEST_CASES {
+ for (priority, expected_chain) in [
+ (
+ LocaleFallbackPriority::Language,
+ cas.expected_language_chain,
+ ),
+ (LocaleFallbackPriority::Region, cas.expected_region_chain),
+ ] {
+ let mut config = LocaleFallbackConfig::default();
+ config.priority = priority;
+ config.extension_key = cas.extension_key;
+ config.fallback_supplement = cas.fallback_supplement;
+ let fallbacker = if cas.requires_data {
+ fallbacker_with_data
+ } else {
+ fallbacker_no_data
+ };
+ let mut it = fallbacker
+ .for_config(config)
+ .fallback_for(Locale::from_str(cas.input).unwrap().into());
+ for &expected in expected_chain {
+ assert_eq!(
+ expected,
+ &*it.get().write_to_string(),
+ "{:?} ({:?})",
+ cas.input,
+ priority
+ );
+ it.step();
+ }
+ assert_eq!(
+ "und",
+ &*it.get().write_to_string(),
+ "{:?} ({:?})",
+ cas.input,
+ priority
+ );
+ }
+ }
+ }
+}
diff --git a/vendor/icu_locid_transform/src/fallback/mod.rs b/vendor/icu_locid_transform/src/fallback/mod.rs
new file mode 100644
index 000000000..6b13e0201
--- /dev/null
+++ b/vendor/icu_locid_transform/src/fallback/mod.rs
@@ -0,0 +1,300 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! Tools for locale fallback, enabling arbitrary input locales to be mapped into the nearest
+//! locale with data.
+//!
+//! The algorithm implemented in this module is called [Flexible Vertical Fallback](
+//! https://docs.google.com/document/d/1Mp7EUyl-sFh_HZYgyeVwj88vJGpCBIWxzlCwGgLCDwM/edit).
+//! Watch [#2243](https://github.com/unicode-org/icu4x/issues/2243) to track improvements to
+//! this algorithm and steps to enshrine the algorithm in CLDR.
+//!
+//! # Examples
+//!
+//! ```
+//! use icu_locid::locale;
+//! use icu_locid_transform::LocaleFallbacker;
+//!
+//! // Set up a LocaleFallbacker with data.
+//! let fallbacker = LocaleFallbacker::new();
+//!
+//! // Create a LocaleFallbackerIterator with a default configuration.
+//! // By default, uses language priority with no additional extension keywords.
+//! let mut fallback_iterator = fallbacker
+//! .for_config(Default::default())
+//! .fallback_for(locale!("hi-Latn-IN").into());
+//!
+//! // Run the algorithm and check the results.
+//! assert_eq!(fallback_iterator.get(), &locale!("hi-Latn-IN").into());
+//! fallback_iterator.step();
+//! assert_eq!(fallback_iterator.get(), &locale!("hi-Latn").into());
+//! fallback_iterator.step();
+//! assert_eq!(fallback_iterator.get(), &locale!("en-IN").into());
+//! fallback_iterator.step();
+//! assert_eq!(fallback_iterator.get(), &locale!("en-001").into());
+//! fallback_iterator.step();
+//! assert_eq!(fallback_iterator.get(), &locale!("en").into());
+//! fallback_iterator.step();
+//! assert_eq!(fallback_iterator.get(), &locale!("und").into());
+//! ```
+
+use crate::provider::*;
+use icu_locid::extensions::unicode::Value;
+use icu_locid::subtags::Variants;
+use icu_provider::prelude::*;
+
+#[doc(inline)]
+pub use icu_provider::fallback::*;
+
+mod algorithms;
+
+/// Implements the algorithm defined in *[UTS #35: Locale Inheritance and Matching]*.
+///
+/// Note that this implementation performs some additional steps compared to the *UTS #35*
+/// algorithm, see *[the design doc]* for a detailed description, and [#2243](
+/// https://github.com/unicode-org/icu4x/issues/2243) to track aligment with *UTS #35*.
+///
+/// # Examples
+///
+/// ```
+/// use icu_locid::locale;
+/// use icu_locid_transform::fallback::LocaleFallbacker;
+///
+/// // Set up a LocaleFallbacker with data.
+/// let fallbacker = LocaleFallbacker::new();
+///
+/// // Create a LocaleFallbackerIterator with a default configuration.
+/// // By default, uses language priority with no additional extension keywords.
+/// let mut fallback_iterator = fallbacker
+/// .for_config(Default::default())
+/// .fallback_for(locale!("hi-Latn-IN").into());
+///
+/// // Run the algorithm and check the results.
+/// assert_eq!(fallback_iterator.get(), &locale!("hi-Latn-IN").into());
+/// fallback_iterator.step();
+/// assert_eq!(fallback_iterator.get(), &locale!("hi-Latn").into());
+/// fallback_iterator.step();
+/// assert_eq!(fallback_iterator.get(), &locale!("en-IN").into());
+/// fallback_iterator.step();
+/// assert_eq!(fallback_iterator.get(), &locale!("en-001").into());
+/// fallback_iterator.step();
+/// assert_eq!(fallback_iterator.get(), &locale!("en").into());
+/// fallback_iterator.step();
+/// assert_eq!(fallback_iterator.get(), &locale!("und").into());
+/// ```
+///
+/// [UTS #35: Locale Inheritance and Matching]: https://www.unicode.org/reports/tr35/#Locale_Inheritance
+/// [the design doc]: https://docs.google.com/document/d/1Mp7EUyl-sFh_HZYgyeVwj88vJGpCBIWxzlCwGgLCDwM/edit
+#[doc(hidden)]
+#[derive(Debug, Clone, PartialEq)]
+pub struct LocaleFallbacker {
+ likely_subtags: DataPayload<LocaleFallbackLikelySubtagsV1Marker>,
+ parents: DataPayload<LocaleFallbackParentsV1Marker>,
+ collation_supplement: Option<DataPayload<CollationFallbackSupplementV1Marker>>,
+}
+
+/// Borrowed version of [`LocaleFallbacker`].
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub struct LocaleFallbackerBorrowed<'a> {
+ likely_subtags: &'a LocaleFallbackLikelySubtagsV1<'a>,
+ parents: &'a LocaleFallbackParentsV1<'a>,
+ collation_supplement: Option<&'a LocaleFallbackSupplementV1<'a>>,
+}
+
+/// A [`LocaleFallbackerBorrowed`] with an associated [`LocaleFallbackConfig`].
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub struct LocaleFallbackerWithConfig<'a> {
+ likely_subtags: &'a LocaleFallbackLikelySubtagsV1<'a>,
+ parents: &'a LocaleFallbackParentsV1<'a>,
+ supplement: Option<&'a LocaleFallbackSupplementV1<'a>>,
+ config: LocaleFallbackConfig,
+}
+
+/// Inner iteration type. Does not own the item under fallback.
+#[derive(Debug)]
+struct LocaleFallbackIteratorInner<'a> {
+ likely_subtags: &'a LocaleFallbackLikelySubtagsV1<'a>,
+ parents: &'a LocaleFallbackParentsV1<'a>,
+ supplement: Option<&'a LocaleFallbackSupplementV1<'a>>,
+ config: LocaleFallbackConfig,
+ backup_extension: Option<Value>,
+ backup_subdivision: Option<Value>,
+ backup_variants: Option<Variants>,
+}
+
+/// Iteration type for locale fallback operations.
+///
+/// Because the `Iterator` trait does not allow items to borrow from the iterator, this class does
+/// not implement that trait. Instead, use `.step()` and `.get()`.
+#[derive(Debug)]
+pub struct LocaleFallbackIterator<'a, 'b> {
+ current: DataLocale,
+ inner: LocaleFallbackIteratorInner<'a>,
+ phantom: core::marker::PhantomData<&'b ()>,
+}
+
+impl LocaleFallbacker {
+ /// Creates a [`LocaleFallbacker`] with compiled fallback data (likely subtags and parent locales).
+ ///
+ /// ✨ *Enabled with the `compiled_data` Cargo feature.*
+ ///
+ /// [📚 Help choosing a constructor](icu_provider::constructors)
+ #[cfg(feature = "compiled_data")]
+ #[allow(clippy::new_ret_no_self)] // keeping constructors together
+ pub const fn new<'a>() -> LocaleFallbackerBorrowed<'a> {
+ let tickstatic = LocaleFallbackerBorrowed {
+ likely_subtags: crate::provider::Baked::SINGLETON_FALLBACK_LIKELYSUBTAGS_V1,
+ parents: crate::provider::Baked::SINGLETON_FALLBACK_PARENTS_V1,
+ collation_supplement: Some(crate::provider::Baked::SINGLETON_FALLBACK_SUPPLEMENT_CO_V1),
+ };
+ // Shitty covariance because the zeromaps confuse the compiler
+ unsafe { core::mem::transmute(tickstatic) }
+ }
+
+ icu_provider::gen_any_buffer_data_constructors!(locale: skip, options: skip, error: DataError,
+ #[cfg(skip)]
+ functions: [
+ new,
+ try_new_with_any_provider,
+ try_new_with_buffer_provider,
+ try_new_unstable,
+ Self
+ ]);
+
+ #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new)]
+ pub fn try_new_unstable<P>(provider: &P) -> Result<Self, DataError>
+ where
+ P: DataProvider<LocaleFallbackLikelySubtagsV1Marker>
+ + DataProvider<LocaleFallbackParentsV1Marker>
+ + DataProvider<CollationFallbackSupplementV1Marker>
+ + ?Sized,
+ {
+ let likely_subtags = provider.load(Default::default())?.take_payload()?;
+ let parents = provider.load(Default::default())?.take_payload()?;
+ let collation_supplement = match DataProvider::<CollationFallbackSupplementV1Marker>::load(
+ provider,
+ Default::default(),
+ ) {
+ Ok(response) => Some(response.take_payload()?),
+ // It is expected that not all keys are present
+ Err(DataError {
+ kind: DataErrorKind::MissingDataKey,
+ ..
+ }) => None,
+ Err(e) => return Err(e),
+ };
+ Ok(LocaleFallbacker {
+ likely_subtags,
+ parents,
+ collation_supplement,
+ })
+ }
+
+ /// Creates a [`LocaleFallbacker`] without fallback data. Using this constructor may result in
+ /// surprising behavior, especially in multi-script languages.
+ pub fn new_without_data() -> Self {
+ LocaleFallbacker {
+ likely_subtags: DataPayload::from_owned(Default::default()),
+ parents: DataPayload::from_owned(Default::default()),
+ collation_supplement: None,
+ }
+ }
+
+ /// Associates a configuration with this fallbacker.
+ #[inline]
+ pub fn for_config(&self, config: LocaleFallbackConfig) -> LocaleFallbackerWithConfig {
+ self.as_borrowed().for_config(config)
+ }
+
+ /// Derives a configuration from a [`DataKey`] and associates it
+ /// with this fallbacker.
+ #[inline]
+ #[doc(hidden)] // will be removed in 2.0
+ pub fn for_key(&self, data_key: DataKey) -> LocaleFallbackerWithConfig {
+ self.for_config(data_key.fallback_config())
+ }
+
+ /// Creates a borrowed version of this fallbacker for performance.
+ pub fn as_borrowed(&self) -> LocaleFallbackerBorrowed {
+ LocaleFallbackerBorrowed {
+ likely_subtags: self.likely_subtags.get(),
+ parents: self.parents.get(),
+ collation_supplement: self.collation_supplement.as_ref().map(|p| p.get()),
+ }
+ }
+}
+
+impl<'a> LocaleFallbackerBorrowed<'a> {
+ /// Associates a configuration with this fallbacker.
+ #[inline]
+ pub const fn for_config(self, config: LocaleFallbackConfig) -> LocaleFallbackerWithConfig<'a> {
+ LocaleFallbackerWithConfig {
+ likely_subtags: self.likely_subtags,
+ parents: self.parents,
+ supplement: match config.fallback_supplement {
+ Some(LocaleFallbackSupplement::Collation) => self.collation_supplement,
+ _ => None,
+ },
+ config,
+ }
+ }
+}
+
+impl LocaleFallbackerBorrowed<'static> {
+ /// Cheaply converts a `LocaleFallbackerBorrowed<'static>` into a `LocaleFallbacker`.
+ pub const fn static_to_owned(self) -> LocaleFallbacker {
+ LocaleFallbacker {
+ likely_subtags: DataPayload::from_static_ref(self.likely_subtags),
+ parents: DataPayload::from_static_ref(self.parents),
+ collation_supplement: match self.collation_supplement {
+ None => None,
+ Some(x) => Some(DataPayload::from_static_ref(x)),
+ },
+ }
+ }
+}
+
+impl<'a> LocaleFallbackerWithConfig<'a> {
+ /// Creates an iterator based on a [`DataLocale`].
+ ///
+ /// If you have a [`Locale`](icu_locid::Locale), call `.into()` to get a [`DataLocale`].
+ ///
+ /// When first initialized, the locale is normalized according to the fallback algorithm.
+ pub fn fallback_for(&self, mut locale: DataLocale) -> LocaleFallbackIterator<'a, 'static> {
+ self.normalize(&mut locale);
+ LocaleFallbackIterator {
+ current: locale,
+ inner: LocaleFallbackIteratorInner {
+ likely_subtags: self.likely_subtags,
+ parents: self.parents,
+ supplement: self.supplement,
+ config: self.config,
+ backup_extension: None,
+ backup_subdivision: None,
+ backup_variants: None,
+ },
+ phantom: core::marker::PhantomData,
+ }
+ }
+}
+
+impl LocaleFallbackIterator<'_, '_> {
+ /// Borrows the current [`DataLocale`] under fallback.
+ pub fn get(&self) -> &DataLocale {
+ &self.current
+ }
+
+ /// Takes the current [`DataLocale`] under fallback.
+ pub fn take(self) -> DataLocale {
+ self.current
+ }
+
+ /// Performs one step of the locale fallback algorithm.
+ ///
+ /// The fallback is completed once the inner [`DataLocale`] becomes `und`.
+ pub fn step(&mut self) -> &mut Self {
+ self.inner.step(&mut self.current);
+ self
+ }
+}
diff --git a/vendor/icu_locid_transform/src/lib.rs b/vendor/icu_locid_transform/src/lib.rs
new file mode 100644
index 000000000..4c4e34aea
--- /dev/null
+++ b/vendor/icu_locid_transform/src/lib.rs
@@ -0,0 +1,116 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! Canonicalization of locale identifiers based on [`CLDR`] data.
+//!
+//! This module is published as its own crate ([`icu_locid_transform`](https://docs.rs/icu_locid_transform/latest/icu_locid_transform/))
+//! and as part of the [`icu`](https://docs.rs/icu/latest/icu/) crate. See the latter for more details on the ICU4X project.
+//!
+//! It currently supports locale canonicalization based upon the canonicalization
+//! algorithm from [`UTS #35: Unicode LDML 3. LocaleId Canonicalization`],
+//! as well as the minimize and maximize likely subtags algorithms
+//! as described in [`UTS #35: Unicode LDML 3. Likely Subtags`].
+//!
+//! The maximize method potentially updates a passed in locale in place
+//! depending up the results of running the 'Add Likely Subtags' algorithm
+//! from [`UTS #35: Unicode LDML 3. Likely Subtags`].
+//!
+//! This minimize method returns a new Locale that is the result of running the
+//! 'Remove Likely Subtags' algorithm from [`UTS #35: Unicode LDML 3. Likely Subtags`].
+//!
+//! # Examples
+//!
+//! ```
+//! use icu::locid::Locale;
+//! use icu::locid_transform::{LocaleCanonicalizer, TransformResult};
+//!
+//! let lc = LocaleCanonicalizer::new();
+//!
+//! let mut locale: Locale = "ja-Latn-fonipa-hepburn-heploc"
+//! .parse()
+//! .expect("parse failed");
+//! assert_eq!(lc.canonicalize(&mut locale), TransformResult::Modified);
+//! assert_eq!(locale, "ja-Latn-alalc97-fonipa".parse::<Locale>().unwrap());
+//! ```
+//!
+//! ```
+//! use icu::locid::locale;
+//! use icu::locid_transform::{LocaleExpander, TransformResult};
+//!
+//! let lc = LocaleExpander::new();
+//!
+//! let mut locale = locale!("zh-CN");
+//! assert_eq!(lc.maximize(&mut locale), TransformResult::Modified);
+//! assert_eq!(locale, locale!("zh-Hans-CN"));
+//!
+//! let mut locale = locale!("zh-Hant-TW");
+//! assert_eq!(lc.maximize(&mut locale), TransformResult::Unmodified);
+//! assert_eq!(locale, locale!("zh-Hant-TW"));
+//! ```
+//!
+//! ```
+//! use icu::locid::locale;
+//! use icu::locid_transform::{LocaleExpander, TransformResult};
+//! use writeable::assert_writeable_eq;
+//!
+//! let lc = LocaleExpander::new();
+//!
+//! let mut locale = locale!("zh-Hans-CN");
+//! assert_eq!(lc.minimize(&mut locale), TransformResult::Modified);
+//! assert_eq!(locale, locale!("zh"));
+//!
+//! let mut locale = locale!("zh");
+//! assert_eq!(lc.minimize(&mut locale), TransformResult::Unmodified);
+//! assert_eq!(locale, locale!("zh"));
+//! ```
+//!
+//! [`ICU4X`]: ../icu/index.html
+//! [`CLDR`]: http://cldr.unicode.org/
+//! [`UTS #35: Unicode LDML 3. Likely Subtags`]: https://www.unicode.org/reports/tr35/#Likely_Subtags.
+//! [`UTS #35: Unicode LDML 3. LocaleId Canonicalization`]: http://unicode.org/reports/tr35/#LocaleId_Canonicalization,
+
+// https://github.com/unicode-org/icu4x/blob/main/docs/process/boilerplate.md#library-annotations
+#![cfg_attr(not(any(test, feature = "std")), no_std)]
+#![cfg_attr(
+ not(test),
+ deny(
+ clippy::indexing_slicing,
+ clippy::unwrap_used,
+ clippy::expect_used,
+ clippy::panic,
+ clippy::exhaustive_structs,
+ clippy::exhaustive_enums,
+ missing_debug_implementations,
+ )
+)]
+#![warn(missing_docs)]
+
+extern crate alloc;
+
+mod canonicalizer;
+mod directionality;
+mod error;
+mod expander;
+pub mod fallback;
+pub mod provider;
+
+pub use canonicalizer::LocaleCanonicalizer;
+pub use directionality::{Direction, LocaleDirectionality};
+pub use error::LocaleTransformError;
+pub use expander::LocaleExpander;
+#[doc(inline)]
+pub use fallback::LocaleFallbacker;
+
+/// Used to track the result of a transformation operation that potentially modifies its argument in place.
+#[derive(Debug, PartialEq)]
+#[allow(clippy::exhaustive_enums)] // this enum is stable
+pub enum TransformResult {
+ /// The canonicalization operation modified the locale.
+ Modified,
+ /// The canonicalization operation did not modify the locale.
+ Unmodified,
+}
+
+#[doc(no_inline)]
+pub use LocaleTransformError as Error;
diff --git a/vendor/icu_locid_transform/src/provider/canonicalizer.rs b/vendor/icu_locid_transform/src/provider/canonicalizer.rs
new file mode 100644
index 000000000..7638bba1f
--- /dev/null
+++ b/vendor/icu_locid_transform/src/provider/canonicalizer.rs
@@ -0,0 +1,81 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use super::*;
+use icu_locid::subtags::{Language, Region, Script, Variant};
+use icu_provider::prelude::*;
+use tinystr::UnvalidatedTinyAsciiStr;
+use zerovec::{VarZeroVec, ZeroMap, ZeroSlice};
+
+#[icu_provider::data_struct(marker(AliasesV1Marker, "locid_transform/aliases@1", singleton))]
+#[derive(PartialEq, Clone, Default)]
+#[cfg_attr(
+ feature = "datagen",
+ derive(serde::Serialize, databake::Bake),
+ databake(path = icu_locid_transform::provider),
+)]
+#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
+#[yoke(prove_covariance_manually)]
+/// This alias data is used for locale canonicalization. Each field defines a
+/// mapping from an old identifier to a new identifier, based upon the rules in
+/// from <http://unicode.org/reports/tr35/#LocaleId_Canonicalization>. The data
+/// is stored in sorted order, allowing for binary search to identify rules to
+/// apply. It is broken down into smaller vectors based upon some characteristic
+/// of the data, to help avoid unnecessary searches. For example, the `sgn_region`
+/// field contains aliases for sign language and region, so that it is not
+/// necessary to search the data unless the input is a sign language.
+///
+/// The algorithm in tr35 is not guaranteed to terminate on data other than what
+/// is currently in CLDR. For this reason, it is not a good idea to attempt to add
+/// or modify aliases for use in this structure.
+///
+/// <div class="stab unstable">
+/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
+/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
+/// to be stable, their Rust representation might not be. Use with caution.
+/// </div>
+// TODO: Use validated types as value types
+#[derive(Debug)]
+pub struct AliasesV1<'data> {
+ /// `[language(-variant)+\] -> [langid]`
+ /// This is not a map as it's searched linearly according to the canonicalization rules.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub language_variants: VarZeroVec<'data, UnvalidatedLanguageIdentifierPair>,
+ /// `sgn-[region] -> [language]`
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub sgn_region: ZeroMap<'data, UnvalidatedRegion, Language>,
+ /// `[language{2}] -> [langid]`
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub language_len2: ZeroMap<'data, UnvalidatedTinyAsciiStr<2>, UnvalidatedLanguageIdentifier>,
+ /// `[language{3}] -> [langid]`
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub language_len3: ZeroMap<'data, UnvalidatedLanguage, UnvalidatedLanguageIdentifier>,
+ /// `[langid] -> [langid]`
+ /// This is not a map as it's searched linearly according to the canonicalization rules.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub language: VarZeroVec<'data, UnvalidatedLanguageIdentifierPair>,
+
+ /// `[script] -> [script]`
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub script: ZeroMap<'data, UnvalidatedScript, Script>,
+
+ /// `[region{2}] -> [region]`
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub region_alpha: ZeroMap<'data, UnvalidatedTinyAsciiStr<2>, Region>,
+ /// `[region{3}] -> [region]`
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub region_num: ZeroMap<'data, UnvalidatedRegion, Region>,
+
+ /// `[region] -> [region]+`
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub complex_region: ZeroMap<'data, UnvalidatedRegion, ZeroSlice<Region>>,
+
+ /// `[variant] -> [variant]`
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub variant: ZeroMap<'data, UnvalidatedVariant, Variant>,
+
+ /// `[value{7}] -> [value{7}]`
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub subdivision: ZeroMap<'data, UnvalidatedSubdivision, SemivalidatedSubdivision>,
+}
diff --git a/vendor/icu_locid_transform/src/provider/directionality.rs b/vendor/icu_locid_transform/src/provider/directionality.rs
new file mode 100644
index 000000000..568248180
--- /dev/null
+++ b/vendor/icu_locid_transform/src/provider/directionality.rs
@@ -0,0 +1,36 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use super::*;
+use icu_provider::prelude::*;
+use zerovec::ZeroVec;
+
+#[icu_provider::data_struct(marker(
+ ScriptDirectionV1Marker,
+ "locid_transform/script_dir@1",
+ singleton
+))]
+#[derive(Debug, PartialEq, Clone)]
+#[cfg_attr(
+ feature = "datagen",
+ derive(serde::Serialize, databake::Bake),
+ databake(path = icu_locid_transform::provider),
+)]
+#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
+/// This directionality data is used to determine the script directionality of a locale.
+///
+/// <div class="stab unstable">
+/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
+/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
+/// to be stable, their Rust representation might not be. Use with caution.
+/// </div>
+#[yoke(prove_covariance_manually)]
+pub struct ScriptDirectionV1<'data> {
+ /// Scripts in right-to-left direction.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub rtl: ZeroVec<'data, UnvalidatedScript>,
+ /// Scripts in left-to-right direction.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub ltr: ZeroVec<'data, UnvalidatedScript>,
+}
diff --git a/vendor/icu_locid_transform/src/provider/expander.rs b/vendor/icu_locid_transform/src/provider/expander.rs
new file mode 100644
index 000000000..2f624b40a
--- /dev/null
+++ b/vendor/icu_locid_transform/src/provider/expander.rs
@@ -0,0 +1,243 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use super::*;
+use icu_locid::subtags::{Language, Region, Script};
+use icu_provider::prelude::*;
+use zerovec::ZeroMap;
+
+#[icu_provider::data_struct(marker(
+ LikelySubtagsV1Marker,
+ "locid_transform/likelysubtags@1",
+ singleton
+))]
+#[derive(Debug, PartialEq, Clone)]
+#[cfg_attr(
+ feature = "datagen",
+ derive(serde::Serialize, databake::Bake),
+ databake(path = icu_locid_transform::provider),
+)]
+#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
+/// This likely subtags data is used for the minimize and maximize operations.
+/// Each field defines a mapping from an old identifier to a new identifier,
+/// based upon the rules in
+/// <https://www.unicode.org/reports/tr35/#Likely_Subtags>.
+///
+/// The data is stored is broken down into smaller vectors based upon the rules
+/// defined for the likely subtags maximize algorithm.
+///
+/// For efficiency, only the relevant part of the LanguageIdentifier is stored
+/// for searching and replacing. E.g., the `language_script` field is used to store
+/// rules for `LanguageIdentifier`s that contain a language and a script, but not a
+/// region.
+///
+/// <div class="stab unstable">
+/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
+/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
+/// to be stable, their Rust representation might not be. Use with caution.
+/// </div>
+#[yoke(prove_covariance_manually)]
+pub struct LikelySubtagsV1<'data> {
+ /// Language and script.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub language_script: ZeroMap<'data, (UnvalidatedLanguage, UnvalidatedScript), Region>,
+ /// Language and region.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub language_region: ZeroMap<'data, (UnvalidatedLanguage, UnvalidatedRegion), Script>,
+ /// Just language.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub language: ZeroMap<'data, UnvalidatedLanguage, (Script, Region)>,
+ /// Script and region.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub script_region: ZeroMap<'data, (UnvalidatedScript, UnvalidatedRegion), Language>,
+ /// Just script.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub script: ZeroMap<'data, UnvalidatedScript, (Language, Region)>,
+ /// Just region.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub region: ZeroMap<'data, UnvalidatedRegion, (Language, Script)>,
+ /// Undefined.
+ pub und: (Language, Script, Region),
+}
+
+#[icu_provider::data_struct(marker(
+ LikelySubtagsForLanguageV1Marker,
+ "locid_transform/likelysubtags_l@1",
+ singleton
+))]
+#[derive(Debug, PartialEq, Clone)]
+#[cfg_attr(
+ feature = "datagen",
+ derive(serde::Serialize, databake::Bake),
+ databake(path = icu_locid_transform::provider),
+)]
+#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
+/// This likely subtags data is used for the minimize and maximize operations.
+/// Each field defines a mapping from an old identifier to a new identifier,
+/// based upon the rules in
+/// <https://www.unicode.org/reports/tr35/#Likely_Subtags>.
+///
+/// The data is stored is broken down into smaller vectors based upon the rules
+/// defined for the likely subtags maximize algorithm.
+///
+/// For efficiency, only the relevant part of the LanguageIdentifier is stored
+/// for searching and replacing. E.g., the `language_script` field is used to store
+/// rules for `LanguageIdentifier`s that contain a language and a script, but not a
+/// region.
+///
+/// This struct contains mappings when the input contains a language subtag.
+/// Also see [`LikelySubtagsForScriptRegionV1`].
+///
+/// <div class="stab unstable">
+/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
+/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
+/// to be stable, their Rust representation might not be. Use with caution.
+/// </div>
+#[yoke(prove_covariance_manually)]
+pub struct LikelySubtagsForLanguageV1<'data> {
+ /// Language and script.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub language_script: ZeroMap<'data, (UnvalidatedLanguage, UnvalidatedScript), Region>,
+ /// Language and region.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub language_region: ZeroMap<'data, (UnvalidatedLanguage, UnvalidatedRegion), Script>,
+ /// Just language.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub language: ZeroMap<'data, UnvalidatedLanguage, (Script, Region)>,
+ /// Undefined.
+ pub und: (Language, Script, Region),
+}
+
+impl<'data> From<LikelySubtagsV1<'data>> for LikelySubtagsForLanguageV1<'data> {
+ fn from(other: LikelySubtagsV1<'data>) -> Self {
+ Self {
+ language_script: other.language_script,
+ language_region: other.language_region,
+ language: other.language,
+ und: other.und,
+ }
+ }
+}
+
+impl<'data> LikelySubtagsForLanguageV1<'data> {
+ pub(crate) fn clone_from_borrowed(other: &LikelySubtagsV1<'data>) -> Self {
+ Self {
+ language_script: other.language_script.clone(),
+ language_region: other.language_region.clone(),
+ language: other.language.clone(),
+ und: other.und,
+ }
+ }
+}
+
+#[icu_provider::data_struct(marker(
+ LikelySubtagsForScriptRegionV1Marker,
+ "locid_transform/likelysubtags_sr@1",
+ singleton
+))]
+#[derive(Debug, PartialEq, Clone)]
+#[cfg_attr(
+ feature = "datagen",
+ derive(serde::Serialize, databake::Bake),
+ databake(path = icu_locid_transform::provider),
+)]
+#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
+/// This likely subtags data is used for the minimize and maximize operations.
+/// Each field defines a mapping from an old identifier to a new identifier,
+/// based upon the rules in
+/// <https://www.unicode.org/reports/tr35/#Likely_Subtags>.
+///
+/// The data is stored is broken down into smaller vectors based upon the rules
+/// defined for the likely subtags maximize algorithm.
+///
+/// For efficiency, only the relevant part of the LanguageIdentifier is stored
+/// for searching and replacing. E.g., the `script_region` field is used to store
+/// rules for `LanguageIdentifier`s that contain a script and a region, but not a
+/// language.
+///
+/// This struct contains mappings when the input does not contain a language subtag.
+/// Also see [`LikelySubtagsForLanguageV1`].
+///
+/// <div class="stab unstable">
+/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
+/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
+/// to be stable, their Rust representation might not be. Use with caution.
+/// </div>
+#[yoke(prove_covariance_manually)]
+pub struct LikelySubtagsForScriptRegionV1<'data> {
+ /// Script and region.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub script_region: ZeroMap<'data, (UnvalidatedScript, UnvalidatedRegion), Language>,
+ /// Just script.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub script: ZeroMap<'data, UnvalidatedScript, (Language, Region)>,
+ /// Just region.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub region: ZeroMap<'data, UnvalidatedRegion, (Language, Script)>,
+}
+
+impl<'data> From<LikelySubtagsV1<'data>> for LikelySubtagsForScriptRegionV1<'data> {
+ fn from(other: LikelySubtagsV1<'data>) -> Self {
+ Self {
+ script_region: other.script_region,
+ script: other.script,
+ region: other.region,
+ }
+ }
+}
+
+#[icu_provider::data_struct(marker(
+ LikelySubtagsExtendedV1Marker,
+ "locid_transform/likelysubtags_ext@1",
+ singleton
+))]
+#[derive(Debug, PartialEq, Clone)]
+#[cfg_attr(
+ feature = "datagen",
+ derive(serde::Serialize, databake::Bake),
+ databake(path = icu_locid_transform::provider),
+)]
+#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
+/// This likely subtags data is used for full coverage of locales, including ones that
+/// don't otherwise have data in the Common Locale Data Repository (CLDR).
+///
+/// <div class="stab unstable">
+/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
+/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
+/// to be stable, their Rust representation might not be. Use with caution.
+/// </div>
+#[yoke(prove_covariance_manually)]
+pub struct LikelySubtagsExtendedV1<'data> {
+ /// Language and script.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub language_script: ZeroMap<'data, (UnvalidatedLanguage, UnvalidatedScript), Region>,
+ /// Language and region.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub language_region: ZeroMap<'data, (UnvalidatedLanguage, UnvalidatedRegion), Script>,
+ /// Just language.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub language: ZeroMap<'data, UnvalidatedLanguage, (Script, Region)>,
+ /// Script and region.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub script_region: ZeroMap<'data, (UnvalidatedScript, UnvalidatedRegion), Language>,
+ /// Just script.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub script: ZeroMap<'data, UnvalidatedScript, (Language, Region)>,
+ /// Just region.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub region: ZeroMap<'data, UnvalidatedRegion, (Language, Script)>,
+}
+
+impl<'data> From<LikelySubtagsV1<'data>> for LikelySubtagsExtendedV1<'data> {
+ fn from(other: LikelySubtagsV1<'data>) -> Self {
+ Self {
+ language_script: other.language_script,
+ language_region: other.language_region,
+ language: other.language,
+ script_region: other.script_region,
+ script: other.script,
+ region: other.region,
+ }
+ }
+}
diff --git a/vendor/icu_locid_transform/src/provider/fallback.rs b/vendor/icu_locid_transform/src/provider/fallback.rs
new file mode 100644
index 000000000..c0635afe7
--- /dev/null
+++ b/vendor/icu_locid_transform/src/provider/fallback.rs
@@ -0,0 +1,102 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use super::*;
+use icu_locid::extensions::unicode::Key;
+use icu_locid::subtags::{region, script, Language, Region, Script};
+use icu_provider::prelude::*;
+use zerovec::ule::UnvalidatedStr;
+use zerovec::ZeroMap;
+use zerovec::ZeroMap2d;
+
+/// Locale fallback rules derived from likely subtags data.
+#[icu_provider::data_struct(marker(
+ LocaleFallbackLikelySubtagsV1Marker,
+ "fallback/likelysubtags@1",
+ singleton
+))]
+#[derive(Default, Clone, PartialEq, Debug)]
+#[cfg_attr(
+ feature = "datagen",
+ derive(serde::Serialize, databake::Bake),
+ databake(path = icu_locid_transform::provider),
+)]
+#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
+#[yoke(prove_covariance_manually)]
+pub struct LocaleFallbackLikelySubtagsV1<'data> {
+ /// Map from language to the default script in that language. Languages whose default script
+ /// is `Latn` are not included in the map for data size savings.
+ ///
+ /// Example: "zh" defaults to "Hans", which is in this map.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub l2s: ZeroMap<'data, UnvalidatedLanguage, Script>,
+ /// Map from language-region pairs to a script. Only populated if the script is different
+ /// from the one in `l2s` for that language.
+ ///
+ /// Example: "zh-TW" defaults to "Hant", which is in this map.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub lr2s: ZeroMap2d<'data, UnvalidatedLanguage, UnvalidatedRegion, Script>,
+ /// Map from language to the default region in that language. Languages whose default region
+ /// is `ZZ` are not included in the map for data size savings.
+ ///
+ /// Example: "zh" defaults to "CN".
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub l2r: ZeroMap<'data, UnvalidatedLanguage, Region>,
+ /// Map from language-script pairs to a region. Only populated if the region is different
+ /// from the one in `l2r` for that language.
+ ///
+ /// Example: "zh-Hant" defaults to "TW".
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub ls2r: ZeroMap2d<'data, UnvalidatedLanguage, UnvalidatedScript, Region>,
+}
+
+/// `Latn` is the most common script, so it is defaulted for data size savings.
+pub const DEFAULT_SCRIPT: Script = script!("Latn");
+
+/// `ZZ` is the most common region, so it is defaulted for data size savings.
+pub const DEFAULT_REGION: Region = region!("ZZ");
+
+/// Locale fallback rules derived from CLDR parent locales data.
+#[icu_provider::data_struct(marker(
+ LocaleFallbackParentsV1Marker,
+ "fallback/parents@1",
+ singleton
+))]
+#[derive(Default, Clone, PartialEq, Debug)]
+#[cfg_attr(
+ feature = "datagen",
+ derive(serde::Serialize, databake::Bake),
+ databake(path = icu_locid_transform::provider),
+)]
+#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
+#[yoke(prove_covariance_manually)]
+pub struct LocaleFallbackParentsV1<'data> {
+ /// Map from language identifier to language identifier, indicating that the language on the
+ /// left should inherit from the language on the right.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub parents: ZeroMap<'data, UnvalidatedStr, (Language, Option<Script>, Option<Region>)>,
+}
+
+/// Key-specific supplemental fallback data.
+#[icu_provider::data_struct(marker(
+ CollationFallbackSupplementV1Marker,
+ "fallback/supplement/co@1",
+ singleton,
+))]
+#[derive(Default, Clone, PartialEq, Debug)]
+#[cfg_attr(
+ feature = "datagen",
+ derive(serde::Serialize, databake::Bake),
+ databake(path = icu_locid_transform::provider),
+)]
+#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
+#[yoke(prove_covariance_manually)]
+pub struct LocaleFallbackSupplementV1<'data> {
+ /// Additional parent locales to supplement the common ones.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub parents: ZeroMap<'data, UnvalidatedStr, (Language, Option<Script>, Option<Region>)>,
+ /// Default values for Unicode extension keywords.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub unicode_extension_defaults: ZeroMap2d<'data, Key, UnvalidatedStr, UnvalidatedStr>,
+}
diff --git a/vendor/icu_locid_transform/src/provider/mod.rs b/vendor/icu_locid_transform/src/provider/mod.rs
new file mode 100644
index 000000000..337bd3f0c
--- /dev/null
+++ b/vendor/icu_locid_transform/src/provider/mod.rs
@@ -0,0 +1,98 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+// Provider structs must be stable
+#![allow(clippy::exhaustive_structs, clippy::exhaustive_enums)]
+
+//! 🚧 \[Unstable\] Data provider struct definitions for this ICU4X component.
+//!
+//! <div class="stab unstable">
+//! 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
+//! including in SemVer minor releases. While the serde representation of data structs is guaranteed
+//! to be stable, their Rust representation might not be. Use with caution.
+//! </div>
+//!
+//! Read more about data providers: [`icu_provider`]
+
+mod canonicalizer;
+pub use canonicalizer::*;
+mod directionality;
+pub use directionality::*;
+mod expander;
+pub use expander::*;
+mod fallback;
+pub use fallback::*;
+
+#[cfg(feature = "compiled_data")]
+#[derive(Debug)]
+/// Baked data
+///
+/// <div class="stab unstable">
+/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
+/// including in SemVer minor releases. In particular, the `DataProvider` implementations are only
+/// guaranteed to match with this version's `*_unstable` providers. Use with caution.
+/// </div>
+pub struct Baked;
+
+#[cfg(feature = "compiled_data")]
+const _: () = {
+ pub mod icu {
+ pub use crate as locid_transform;
+ pub use icu_locid as locid;
+ }
+ icu_locid_transform_data::make_provider!(Baked);
+ icu_locid_transform_data::impl_fallback_likelysubtags_v1!(Baked);
+ icu_locid_transform_data::impl_fallback_parents_v1!(Baked);
+ icu_locid_transform_data::impl_fallback_supplement_co_v1!(Baked);
+ icu_locid_transform_data::impl_locid_transform_aliases_v1!(Baked);
+ icu_locid_transform_data::impl_locid_transform_likelysubtags_ext_v1!(Baked);
+ icu_locid_transform_data::impl_locid_transform_likelysubtags_l_v1!(Baked);
+ icu_locid_transform_data::impl_locid_transform_likelysubtags_sr_v1!(Baked);
+ icu_locid_transform_data::impl_locid_transform_script_dir_v1!(Baked);
+};
+
+use alloc::borrow::Cow;
+use tinystr::{TinyAsciiStr, UnvalidatedTinyAsciiStr};
+
+// We use raw TinyAsciiStrs for map keys, as we then don't have to
+// validate them as subtags on deserialization. Map lookup can be
+// done even if they are not valid tags (an invalid key will just
+// become inaccessible).
+type UnvalidatedLanguage = UnvalidatedTinyAsciiStr<3>;
+type UnvalidatedScript = UnvalidatedTinyAsciiStr<4>;
+type UnvalidatedRegion = UnvalidatedTinyAsciiStr<3>;
+type UnvalidatedVariant = UnvalidatedTinyAsciiStr<8>;
+type UnvalidatedSubdivision = UnvalidatedTinyAsciiStr<7>;
+type SemivalidatedSubdivision = TinyAsciiStr<7>;
+
+// LanguageIdentifier doesn't have an AsULE implementation, so we have
+// to store strs and parse when needed.
+type UnvalidatedLanguageIdentifier = str;
+type UnvalidatedLanguageIdentifierPair = StrStrPairVarULE;
+
+#[zerovec::make_varule(StrStrPairVarULE)]
+#[zerovec::derive(Debug)]
+#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug)]
+#[cfg_attr(
+ feature = "serde",
+ derive(serde::Deserialize),
+ zerovec::derive(Deserialize)
+)]
+#[cfg_attr(
+ feature = "datagen",
+ derive(serde::Serialize, databake::Bake),
+ zerovec::derive(Serialize),
+ databake(path = icu_locid_transform::provider),
+)]
+/// A pair of strings with a EncodeAsVarULE implementation.
+///
+/// <div class="stab unstable">
+/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
+/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
+/// to be stable, their Rust representation might not be. Use with caution.
+/// </div>
+pub struct StrStrPair<'a>(
+ #[cfg_attr(feature = "serde", serde(borrow))] pub Cow<'a, str>,
+ #[cfg_attr(feature = "serde", serde(borrow))] pub Cow<'a, str>,
+);
diff --git a/vendor/icu_locid_transform/tests/fixtures/canonicalize.json b/vendor/icu_locid_transform/tests/fixtures/canonicalize.json
new file mode 100644
index 000000000..89e6cec05
--- /dev/null
+++ b/vendor/icu_locid_transform/tests/fixtures/canonicalize.json
@@ -0,0 +1,444 @@
+[
+ {
+ "input": "es",
+ "output": "es"
+ },
+ {
+ "input": "en-CA",
+ "output": "en-CA"
+ },
+ {
+ "input": "cka",
+ "output": "cmr"
+ },
+ {
+ "input": "cze",
+ "output": "cs"
+ },
+ {
+ "input": "gfx",
+ "output": "vaj"
+ },
+ {
+ "input": "sgn-BR",
+ "output": "bzs"
+ },
+ {
+ "input": "sgn-DD",
+ "output": "gsg"
+ },
+ {
+ "input": "tam",
+ "output": "ta"
+ },
+ {
+ "input": "und-aaland",
+ "output": "und-AX"
+ },
+ {
+ "input": "nob-bokmal",
+ "output": "nb"
+ },
+ {
+ "input": "no-nynorsk",
+ "output": "nn"
+ },
+ {
+ "input": "und-Qaai",
+ "output": "und-Zinh"
+ },
+ {
+ "input": "en-554",
+ "output": "en-NZ"
+ },
+ {
+ "input": "en-084",
+ "output": "en-BZ"
+ },
+ {
+ "input": "art-lojban",
+ "output": "jbo"
+ },
+ {
+ "input": "zh-guoyu",
+ "output": "zh"
+ },
+ {
+ "input": "zh-hakka",
+ "output": "hak"
+ },
+ {
+ "input": "zh-xiang",
+ "output": "hsn"
+ },
+ {
+ "input": "aar-x-private",
+ "output": "aa-x-private"
+ },
+ {
+ "input": "heb-x-private",
+ "output": "he-x-private"
+ },
+ {
+ "input": "ces",
+ "output": "cs"
+ },
+ {
+ "input": "hy-arevela",
+ "output": "hy"
+ },
+ {
+ "input": "hy-arevmda",
+ "output": "hyw"
+ },
+ {
+ "input": "cel-gaulish",
+ "output": "xtg"
+ },
+ {
+ "input": "ja-latn-hepburn-heploc",
+ "output": "ja-Latn-alalc97"
+ },
+ {
+ "input": "ja-Latn-fonipa-hepburn-heploc",
+ "output": "ja-Latn-alalc97-fonipa"
+ },
+ {
+ "input": "und-Armn-SU",
+ "output": "und-Armn-AM"
+ },
+ {
+ "input": "sh",
+ "output": "sr-Latn"
+ },
+ {
+ "input": "sh-Cyrl",
+ "output": "sr-Cyrl"
+ },
+ {
+ "input": "cnr",
+ "output": "sr-ME"
+ },
+ {
+ "input": "cnr-BA",
+ "output": "sr-BA"
+ },
+ {
+ "input": "ru-SU",
+ "output": "ru-RU"
+ },
+ {
+ "input": "ru-810",
+ "output": "ru-RU"
+ },
+ {
+ "input": "en-SU",
+ "output": "en-RU"
+ },
+ {
+ "input": "en-810",
+ "output": "en-RU"
+ },
+ {
+ "input": "und-SU",
+ "output": "und-RU"
+ },
+ {
+ "input": "und-810",
+ "output": "und-RU"
+ },
+ {
+ "input": "und-Latn-SU",
+ "output": "und-Latn-RU"
+ },
+ {
+ "input": "und-Latn-810",
+ "output": "und-Latn-RU"
+ },
+ {
+ "input": "hy-SU",
+ "output": "hy-AM"
+ },
+ {
+ "input": "hy-810",
+ "output": "hy-AM"
+ },
+ {
+ "input": "und-Armn-SU",
+ "output": "und-Armn-AM"
+ },
+ {
+ "input": "und-Armn-810",
+ "output": "und-Armn-AM"
+ },
+ {
+ "input": "sr-CS",
+ "output": "sr-RS"
+ },
+ {
+ "input": "sr-Latn-CS",
+ "output": "sr-Latn-RS"
+ },
+ {
+ "input": "sr-Cyrl-CS",
+ "output": "sr-Cyrl-RS"
+ },
+ {
+ "input": "az-NT",
+ "output": "az-SA"
+ },
+ {
+ "input": "sl-t-sl-rozaj-biske-1994",
+ "output": "sl-t-sl-1994-biske-rozaj"
+ },
+ {
+ "input": "DE-T-M0-DIN-K0-QWERTZ",
+ "output": "de-t-k0-qwertz-m0-din"
+ },
+ {
+ "input": "en-t-m0-true",
+ "output": "en-t-m0-true"
+ },
+ {
+ "input": "en-t-iw",
+ "output": "en-t-he"
+ },
+ {
+ "input": "und-u-rg-no23",
+ "output": "und-u-rg-no50"
+ },
+ {
+ "input": "und-u-rg-cn11",
+ "output": "und-u-rg-cnbj"
+ },
+ {
+ "input": "und-u-rg-cz10a",
+ "output": "und-u-rg-cz110"
+ },
+ {
+ "input": "und-u-rg-fra",
+ "output": "und-u-rg-frges"
+ },
+ {
+ "input": "und-u-rg-frg",
+ "output": "und-u-rg-frges"
+ },
+ {
+ "input": "und-u-rg-lud",
+ "output": "und-u-rg-lucl"
+ },
+ {
+ "input": "und-NO-u-rg-no23",
+ "output": "und-NO-u-rg-no50"
+ },
+ {
+ "input": "und-CN-u-rg-cn11",
+ "output": "und-CN-u-rg-cnbj"
+ },
+ {
+ "input": "und-CZ-u-rg-cz10a",
+ "output": "und-CZ-u-rg-cz110"
+ },
+ {
+ "input": "und-FR-u-rg-fra",
+ "output": "und-FR-u-rg-frges"
+ },
+ {
+ "input": "und-FR-u-rg-frg",
+ "output": "und-FR-u-rg-frges"
+ },
+ {
+ "input": "und-u-rg-lud",
+ "output": "und-u-rg-lucl"
+ },
+ {
+ "input": "und-u-sd-no23",
+ "output": "und-u-sd-no50"
+ },
+ {
+ "input": "und-u-sd-cn11",
+ "output": "und-u-sd-cnbj"
+ },
+ {
+ "input": "und-u-sd-cz10a",
+ "output": "und-u-sd-cz110"
+ },
+ {
+ "input": "und-u-sd-fra",
+ "output": "und-u-sd-frges"
+ },
+ {
+ "input": "hy-arevela",
+ "output": "hy"
+ },
+ {
+ "input": "hy-Armn-arevela",
+ "output": "hy-Armn"
+ },
+ {
+ "input": "hy-AM-arevela",
+ "output": "hy-AM"
+ },
+ {
+ "input": "hy-arevela-fonipa",
+ "output": "hy-fonipa"
+ },
+ {
+ "input": "hy-fonipa-arevela",
+ "output": "hy-fonipa"
+ },
+ {
+ "input": "hy-arevmda",
+ "output": "hyw"
+ },
+ {
+ "input": "hy-Armn-arevmda",
+ "output": "hyw-Armn"
+ },
+ {
+ "input": "hy-AM-arevmda",
+ "output": "hyw-AM"
+ },
+ {
+ "input": "hy-arevmda-fonipa",
+ "output": "hyw-fonipa"
+ },
+ {
+ "input": "hy-fonipa-arevmda",
+ "output": "hyw-fonipa"
+ },
+ {
+ "input": "ja-Latn-hepburn-heploc",
+ "output": "ja-Latn-alalc97"
+ },
+ {
+ "input": "ja-Latn-JP-hepburn-heploc",
+ "output": "ja-Latn-JP-alalc97"
+ },
+ {
+ "input": "sv-aaland",
+ "output": "sv-AX"
+ },
+ {
+ "input": "el-polytoni",
+ "output": "el-polyton"
+ },
+ {
+ "input": "ja-Latn-alalc97-hepburn-heploc",
+ "output": "ja-Latn-alalc97"
+ },
+ {
+ "input": "ja-Latn-hepburn-alalc97-heploc",
+ "output": "ja-Latn-alalc97"
+ },
+ {
+ "input": "ja-Latn-hepburn-heploc-alalc97",
+ "output": "ja-Latn-alalc97"
+ },
+ {
+ "input": "ja-Latn-heploc-hepburn",
+ "output": "ja-Latn-alalc97"
+ },
+ {
+ "input": "ja-Latn-heploc",
+ "output": "ja-Latn-alalc97"
+ },
+ {
+ "input": "ja-Latn-aaland-heploc",
+ "output": "ja-Latn-AX-alalc97"
+ },
+ {
+ "input": "ja-Latn-heploc-polytoni",
+ "output": "ja-Latn-alalc97-polyton"
+ },
+ {
+ "input": "und-Latn-t-und-hani-m0-names",
+ "output": "und-Latn-t-und-hani-m0-prprname",
+ "disabled": true
+ },
+ {
+ "input": "und-u-ca-ethiopian-amete-alem",
+ "output": "und-u-ca-ethioaa",
+ "disabled": true
+ },
+ {
+ "input": "und-u-ca-islamicc",
+ "output": "und-u-ca-islamic-civil",
+ "disabled": true
+ },
+ {
+ "input": "und-u-ks-primary",
+ "output": "und-u-ks-level1",
+ "disabled": true
+ },
+ {
+ "input": "und-u-ks-tertiary",
+ "output": "und-u-ks-level3",
+ "disabled": true
+ },
+ {
+ "input": "und-u-ms-imperial",
+ "output": "und-u-ms-uksystem",
+ "disabled": true
+ },
+ {
+ "input": "und-u-kb-yes",
+ "output": "und-u-kb",
+ "disabled": true
+ },
+ {
+ "input": "und-u-kc-yes",
+ "output": "und-u-k",
+ "disabled": true
+ },
+ {
+ "input": "und-u-kh-yes",
+ "output": "und-u-kh",
+ "disabled": true
+ },
+ {
+ "input": "und-u-kk-yes",
+ "output": "und-u-kk",
+ "disabled": true
+ },
+ {
+ "input": "und-u-kn-yes",
+ "output": "und-u-kn",
+ "disabled": true
+ },
+ {
+ "input": "und-u-ka-yes",
+ "output": "und-u-ka-yes",
+ "disabled": true
+ },
+ {
+ "input": "und-u-tz-cnckg",
+ "output": "und-u-tz-cnsha",
+ "disabled": true
+ },
+ {
+ "input": "und-u-tz-eire",
+ "output": "und-u-tz-iedub",
+ "disabled": true
+ },
+ {
+ "input": "und-u-tz-est",
+ "output": "und-u-tz-utcw05",
+ "disabled": true
+ },
+ {
+ "input": "und-u-tz-gmt0",
+ "output": "und-u-tz-gmt",
+ "disabled": true
+ },
+ {
+ "input": "und-u-tz-uct",
+ "output": "und-u-tz-utc",
+ "disabled": true
+ },
+ {
+ "input": "und-u-tz-zulu",
+ "output": "und-u-tz-utc",
+ "disabled": true
+ }
+] \ No newline at end of file
diff --git a/vendor/icu_locid_transform/tests/fixtures/maximize.json b/vendor/icu_locid_transform/tests/fixtures/maximize.json
new file mode 100644
index 000000000..86b9b2fe8
--- /dev/null
+++ b/vendor/icu_locid_transform/tests/fixtures/maximize.json
@@ -0,0 +1,182 @@
+[
+ {
+ "input": "en-US",
+ "output": "en-Latn-US"
+ },
+ {
+ "input": "en-GB",
+ "output": "en-Latn-GB"
+ },
+ {
+ "input": "es-AR",
+ "output": "es-Latn-AR"
+ },
+ {
+ "input": "it",
+ "output": "it-Latn-IT"
+ },
+ {
+ "input": "zh-Hans-CN",
+ "output": "zh-Hans-CN"
+ },
+ {
+ "input": "de-AT",
+ "output": "de-Latn-AT"
+ },
+ {
+ "input": "pl",
+ "output": "pl-Latn-PL"
+ },
+ {
+ "input": "fr-FR",
+ "output": "fr-Latn-FR"
+ },
+ {
+ "input": "de-AT",
+ "output": "de-Latn-AT"
+ },
+ {
+ "input": "sr-Cyrl-SR",
+ "output": "sr-Cyrl-SR"
+ },
+ {
+ "input": "nb-NO",
+ "output": "nb-Latn-NO"
+ },
+ {
+ "input": "fr-FR",
+ "output": "fr-Latn-FR"
+ },
+ {
+ "input": "mk",
+ "output": "mk-Cyrl-MK"
+ },
+ {
+ "input": "uk",
+ "output": "uk-Cyrl-UA"
+ },
+ {
+ "input": "und-PL",
+ "output": "pl-Latn-PL"
+ },
+ {
+ "input": "und-Latn-AM",
+ "output": "ku-Latn-AM"
+ },
+ {
+ "input": "ug-Cyrl",
+ "output": "ug-Cyrl-KZ"
+ },
+ {
+ "input": "sr-ME",
+ "output": "sr-Latn-ME"
+ },
+ {
+ "input": "mn-Mong",
+ "output": "mn-Mong-CN"
+ },
+ {
+ "input": "lif-Limb",
+ "output": "lif-Limb-IN"
+ },
+ {
+ "input": "gan",
+ "output": "gan-Hans-CN"
+ },
+ {
+ "input": "zh-Hant",
+ "output": "zh-Hant-TW"
+ },
+ {
+ "input": "yue-Hans",
+ "output": "yue-Hans-CN"
+ },
+ {
+ "input": "unr",
+ "output": "unr-Beng-IN"
+ },
+ {
+ "input": "unr-Deva",
+ "output": "unr-Deva-NP"
+ },
+ {
+ "input": "und-Thai-CN",
+ "output": "lcp-Thai-CN"
+ },
+ {
+ "input": "ug-Cyrl",
+ "output": "ug-Cyrl-KZ"
+ },
+ {
+ "input": "en-Latn-DE",
+ "output": "en-Latn-DE"
+ },
+ {
+ "input": "pl-FR",
+ "output": "pl-Latn-FR"
+ },
+ {
+ "input": "de-CH",
+ "output": "de-Latn-CH"
+ },
+ {
+ "input": "tuq",
+ "output": "tuq-Latn-ZZ"
+ },
+ {
+ "input": "sr-ME",
+ "output": "sr-Latn-ME"
+ },
+ {
+ "input": "ng",
+ "output": "ng-Latn-NA"
+ },
+ {
+ "input": "klx",
+ "output": "klx-Latn-ZZ"
+ },
+ {
+ "input": "kk-Arab",
+ "output": "kk-Arab-CN"
+ },
+ {
+ "input": "en-Cyrl",
+ "output": "en-Cyrl-US"
+ },
+ {
+ "input": "und-Cyrl-UK",
+ "output": "ru-Cyrl-UK"
+ },
+ {
+ "input": "und-Arab",
+ "output": "ar-Arab-EG"
+ },
+ {
+ "input": "und-Arab-FO",
+ "output": "ar-Arab-FO"
+ },
+ {
+ "input": "zh-TW",
+ "output": "zh-Hant-TW"
+ },
+ {
+ "input": "und",
+ "output": "en-Latn-US"
+ },
+ {
+ "input": "zh-SG",
+ "output": "zh-Hans-SG"
+ },
+ {
+ "input": "und-TW",
+ "output": "zh-Hant-TW"
+ },
+ {
+ "input": "zh-hant-u-nu-Chinese-hc-h24",
+ "output": "zh-Hant-TW-u-hc-h24-nu-chinese"
+ },
+ {
+ "input": "und-latn-AQ",
+ "output": "und-Latn-AQ"
+ }
+]
diff --git a/vendor/icu_locid_transform/tests/fixtures/minimize.json b/vendor/icu_locid_transform/tests/fixtures/minimize.json
new file mode 100644
index 000000000..6c225e230
--- /dev/null
+++ b/vendor/icu_locid_transform/tests/fixtures/minimize.json
@@ -0,0 +1,26 @@
+[
+ {
+ "input": "zh-Hant",
+ "output": "zh-TW"
+ },
+ {
+ "input": "en-Latn-US",
+ "output": "en"
+ },
+ {
+ "input": "en",
+ "output": "en"
+ },
+ {
+ "input": "und",
+ "output": "en"
+ },
+ {
+ "input": "es-ES-preeuro",
+ "output": "es-preeuro"
+ },
+ {
+ "input": "zh-Hant-TW-u-hc-h24-nu-chinese",
+ "output": "zh-TW-u-hc-h24-nu-chinese"
+ }
+]
diff --git a/vendor/icu_locid_transform/tests/fixtures/mod.rs b/vendor/icu_locid_transform/tests/fixtures/mod.rs
new file mode 100644
index 000000000..70dfb0381
--- /dev/null
+++ b/vendor/icu_locid_transform/tests/fixtures/mod.rs
@@ -0,0 +1,12 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use serde::Deserialize;
+
+#[derive(Deserialize)]
+pub struct CanonicalizationTest {
+ pub input: String,
+ pub output: String,
+ pub disabled: Option<bool>,
+}
diff --git a/vendor/icu_locid_transform/tests/helpers/mod.rs b/vendor/icu_locid_transform/tests/helpers/mod.rs
new file mode 100644
index 000000000..d250c510c
--- /dev/null
+++ b/vendor/icu_locid_transform/tests/helpers/mod.rs
@@ -0,0 +1,15 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use std::fs::File;
+use std::io::{BufReader, Error};
+
+pub fn read_fixture<T>(path: &str) -> Result<T, Error>
+where
+ T: serde::de::DeserializeOwned,
+{
+ let file = File::open(path)?;
+ let reader = BufReader::new(file);
+ Ok(serde_json::from_reader(reader)?)
+}
diff --git a/vendor/icu_locid_transform/tests/locale_canonicalizer.rs b/vendor/icu_locid_transform/tests/locale_canonicalizer.rs
new file mode 100644
index 000000000..06e360f53
--- /dev/null
+++ b/vendor/icu_locid_transform/tests/locale_canonicalizer.rs
@@ -0,0 +1,82 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+mod fixtures;
+mod helpers;
+
+use icu_locid::Locale;
+use icu_locid_transform::{LocaleCanonicalizer, LocaleExpander, TransformResult};
+use writeable::assert_writeable_eq;
+
+#[test]
+fn test_maximize() {
+ let lc = LocaleExpander::new_extended();
+
+ let path = "./tests/fixtures/maximize.json";
+ let testcases: Vec<fixtures::CanonicalizationTest> =
+ helpers::read_fixture(path).expect("Failed to read a fixture");
+
+ for case in testcases {
+ if let Some(true) = case.disabled {
+ continue;
+ }
+ let mut locale: Locale = case.input.parse().unwrap();
+ let unmodified = locale.clone();
+ let result = lc.maximize(&mut locale);
+ assert_writeable_eq!(locale, case.output);
+ if result == TransformResult::Modified {
+ assert_ne!(locale, unmodified);
+ } else {
+ assert_eq!(locale, unmodified);
+ }
+ }
+}
+
+#[test]
+fn test_minimize() {
+ let lc = LocaleExpander::new_extended();
+
+ let path = "./tests/fixtures/minimize.json";
+ let testcases: Vec<fixtures::CanonicalizationTest> =
+ helpers::read_fixture(path).expect("Failed to read a fixture");
+
+ for case in testcases {
+ if let Some(true) = case.disabled {
+ continue;
+ }
+ let mut locale: Locale = case.input.parse().unwrap();
+ let unmodified = locale.clone();
+ let result = lc.minimize(&mut locale);
+ assert_writeable_eq!(locale, case.output);
+ if result == TransformResult::Modified {
+ assert_ne!(locale, unmodified);
+ } else {
+ assert_eq!(locale, unmodified);
+ }
+ }
+}
+
+#[test]
+fn test_canonicalize() {
+ let lc = LocaleCanonicalizer::new();
+
+ let path = "./tests/fixtures/canonicalize.json";
+ let testcases: Vec<fixtures::CanonicalizationTest> =
+ helpers::read_fixture(path).expect("Failed to read a fixture");
+
+ for case in testcases {
+ if let Some(true) = case.disabled {
+ continue;
+ }
+ let mut locale: Locale = case.input.parse().expect("Unable to parse input");
+ let unmodified = locale.clone();
+ let result = lc.canonicalize(&mut locale);
+ assert_writeable_eq!(locale, case.output);
+ if result == TransformResult::Modified {
+ assert_ne!(locale, unmodified);
+ } else {
+ assert_eq!(locale, unmodified);
+ }
+ }
+}