diff options
Diffstat (limited to 'vendor/icu_locid_transform')
26 files changed, 4315 insertions, 0 deletions
diff --git a/vendor/icu_locid_transform/.cargo-checksum.json b/vendor/icu_locid_transform/.cargo-checksum.json new file mode 100644 index 000000000..ff2371e3f --- /dev/null +++ b/vendor/icu_locid_transform/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.toml":"334f54b489b88e4808da4fa355ddf773b86971570d4bb0360a876e3437cb962d","LICENSE":"853f87c96f3d249f200fec6db1114427bc8bdf4afddc93c576956d78152ce978","README.md":"3ff3f2e2f9e5d4c5786132838576edef42a12c5529d5f080370f24aa6246bb92","benches/fixtures/locales.json":"9846601a29874baf140cac1252d4624fadc30182fec106d17f008ece886b9185","benches/fixtures/uncanonicalized-locales.json":"a866ed318b92f79d8853567e79b373c02984967023f5f39161140544e71b0c72","benches/helpers/mod.rs":"d3bf59e7eed6230f340bef6c87a7b8de3a387ec391f60afc1b15a0d001cbfb67","benches/locale_canonicalizer.rs":"477af27aa35385d107e19b8e8a0452466a69d20e147a63631b78634c17712fed","src/canonicalizer.rs":"7770767fad20f38aaae19382430451573293ccdeac587b2d063163b870781086","src/directionality.rs":"a031a9d55ffe827c86400637b7302dc424c708dcc52ea667504a33a16db822c2","src/error.rs":"486fda8a0e9b7bb5822bbb0defb51145364d6053b8d60b88ef71e4b2bcd6699d","src/expander.rs":"d3ef487a416425ea6fb2ce7bf08b7487e180a580002e54ce30d5524cfd7514e2","src/fallback/algorithms.rs":"47625130cd5a04cf085dd0494591e117ba204a9d2eb649788b0ff96773cc6e21","src/fallback/mod.rs":"71ca2f23e410863010a62a48bba8a943763f7d49c12bf80b451a1b9295484e44","src/lib.rs":"5390facdc3df7e5ec5ab842bf59d4d13383d77d93a722685231a1d271cfba944","src/provider/canonicalizer.rs":"f848dbbc906b5f3be0b6384f5a2f26178898822a5c37334a57b12db8e1af0ed9","src/provider/directionality.rs":"fc516f501254af444cfa010d3c87aeea032dd6eccf5f82301c050ed3df2e05b1","src/provider/expander.rs":"6903d16138ada8216e0341d984126dcc1f6fac21468144e8140fc217b164572e","src/provider/fallback.rs":"d567e3d49261cac9de35825b3d57204d49068558f10579121f0bf0c42090c9cc","src/provider/mod.rs":"ce8e29eda7128747d489371118d1cf2c0e2740662eb8c6a55310dff86c5641cc","tests/fixtures/canonicalize.json":"3dc2f661b04e4c9ecced70fc1b98a504eb5f5a0067b38665b10e50c25174bc4a","tests/fixtures/maximize.json":"8137359060218572bcaf5e56825346fdcb600e2189378ef4be836ba0a7295b66","tests/fixtures/minimize.json":"3bb6f19c5525818212388dcbf778064e7f73d2c32a8a7e8c58d618583a77121a","tests/fixtures/mod.rs":"18a900aa4f74120b7e7e64fcb09eae38a16504d66e23f752e743dcd9b1ad6530","tests/helpers/mod.rs":"d3bf59e7eed6230f340bef6c87a7b8de3a387ec391f60afc1b15a0d001cbfb67","tests/locale_canonicalizer.rs":"1ebf7320f422b65cc3cc50468abdc8f08128feba85d936f5beb456b0b052a91d"},"package":"6551daf80882d8e68eee186cc19e132d8bde1b1f059a79b93384a5ca0e8fc5e7"}
\ No newline at end of file diff --git a/vendor/icu_locid_transform/Cargo.toml b/vendor/icu_locid_transform/Cargo.toml new file mode 100644 index 000000000..b083c54a2 --- /dev/null +++ b/vendor/icu_locid_transform/Cargo.toml @@ -0,0 +1,128 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2021" +rust-version = "1.66" +name = "icu_locid_transform" +version = "1.3.2" +authors = ["The ICU4X Project Developers"] +include = [ + "data/**/*", + "src/**/*", + "examples/**/*", + "benches/**/*", + "tests/**/*", + "Cargo.toml", + "LICENSE", + "README.md", +] +description = "API for Unicode Language and Locale Identifiers canonicalization" +homepage = "https://icu4x.unicode.org" +readme = "README.md" +categories = ["internationalization"] +license-file = "LICENSE" +repository = "https://github.com/unicode-org/icu4x" + +[package.metadata.cargo-all-features] +denylist = ["bench"] +skip_optional_dependencies = true + +[package.metadata.docs.rs] +all-features = true + +[lib] +bench = false + +[[test]] +name = "locale_canonicalizer" +required-features = ["serde"] + +[[bench]] +name = "locale_canonicalizer" +harness = false + +[dependencies.databake] +version = "0.1.6" +features = ["derive"] +optional = true +default-features = false + +[dependencies.displaydoc] +version = "0.2.3" +default-features = false + +[dependencies.icu_locid] +version = "~1.3.2" +features = ["zerovec"] +default-features = false + +[dependencies.icu_locid_transform_data] +version = "~1.3.2" +optional = true +default-features = false + +[dependencies.icu_provider] +version = "~1.3.2" +features = ["macros"] +default-features = false + +[dependencies.serde] +version = "1.0" +features = [ + "derive", + "alloc", +] +optional = true +default-features = false + +[dependencies.tinystr] +version = "0.7.3" +features = [ + "alloc", + "zerovec", +] +default-features = false + +[dependencies.zerovec] +version = "0.10.0" +features = ["yoke"] +default-features = false + +[dev-dependencies.serde] +version = "1.0" +features = ["derive"] + +[dev-dependencies.serde_json] +version = "1.0" + +[features] +bench = ["serde"] +compiled_data = ["dep:icu_locid_transform_data"] +datagen = [ + "serde", + "dep:databake", + "zerovec/databake", + "icu_locid/databake", + "tinystr/databake", +] +default = ["compiled_data"] +serde = [ + "dep:serde", + "icu_locid/serde", + "tinystr/serde", + "zerovec/serde", + "icu_provider/serde", +] +std = [] + +[target."cfg(not(target_arch = \"wasm32\"))".dev-dependencies.criterion] +version = "0.4" diff --git a/vendor/icu_locid_transform/LICENSE b/vendor/icu_locid_transform/LICENSE new file mode 100644 index 000000000..9845aa5f4 --- /dev/null +++ b/vendor/icu_locid_transform/LICENSE @@ -0,0 +1,44 @@ +UNICODE LICENSE V3 + +COPYRIGHT AND PERMISSION NOTICE + +Copyright © 2020-2023 Unicode, Inc. + +NOTICE TO USER: Carefully read the following legal agreement. BY +DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING DATA FILES, AND/OR +SOFTWARE, YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE +TERMS AND CONDITIONS OF THIS AGREEMENT. IF YOU DO NOT AGREE, DO NOT +DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE THE DATA FILES OR SOFTWARE. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of data files and any associated documentation (the "Data Files") or +software and any associated documentation (the "Software") to deal in the +Data Files or Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, and/or sell +copies of the Data Files or Software, and to permit persons to whom the +Data Files or Software are furnished to do so, provided that either (a) +this copyright and permission notice appear with all copies of the Data +Files or Software, or (b) this copyright and permission notice appear in +associated Documentation. + +THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY +KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF +THIRD PARTY RIGHTS. + +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE +BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, +OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, +WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, +ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THE DATA +FILES OR SOFTWARE. + +Except as contained in this notice, the name of a copyright holder shall +not be used in advertising or otherwise to promote the sale, use or other +dealings in these Data Files or Software without prior written +authorization of the copyright holder. + +— + +Portions of ICU4X may have been adapted from ICU4C and/or ICU4J. +ICU 1.8.1 to ICU 57.1 © 1995-2016 International Business Machines Corporation and others. diff --git a/vendor/icu_locid_transform/README.md b/vendor/icu_locid_transform/README.md new file mode 100644 index 000000000..b97fea239 --- /dev/null +++ b/vendor/icu_locid_transform/README.md @@ -0,0 +1,77 @@ +# icu_locid_transform [![crates.io](https://img.shields.io/crates/v/icu_locid_transform)](https://crates.io/crates/icu_locid_transform) + +<!-- cargo-rdme start --> + +Canonicalization of locale identifiers based on [`CLDR`] data. + +This module is published as its own crate ([`icu_locid_transform`](https://docs.rs/icu_locid_transform/latest/icu_locid_transform/)) +and as part of the [`icu`](https://docs.rs/icu/latest/icu/) crate. See the latter for more details on the ICU4X project. + +It currently supports locale canonicalization based upon the canonicalization +algorithm from [`UTS #35: Unicode LDML 3. LocaleId Canonicalization`], +as well as the minimize and maximize likely subtags algorithms +as described in [`UTS #35: Unicode LDML 3. Likely Subtags`]. + +The maximize method potentially updates a passed in locale in place +depending up the results of running the 'Add Likely Subtags' algorithm +from [`UTS #35: Unicode LDML 3. Likely Subtags`]. + +This minimize method returns a new Locale that is the result of running the +'Remove Likely Subtags' algorithm from [`UTS #35: Unicode LDML 3. Likely Subtags`]. + +## Examples + +```rust +use icu::locid::Locale; +use icu::locid_transform::{LocaleCanonicalizer, TransformResult}; + +let lc = LocaleCanonicalizer::new(); + +let mut locale: Locale = "ja-Latn-fonipa-hepburn-heploc" + .parse() + .expect("parse failed"); +assert_eq!(lc.canonicalize(&mut locale), TransformResult::Modified); +assert_eq!(locale, "ja-Latn-alalc97-fonipa".parse::<Locale>().unwrap()); +``` + +```rust +use icu::locid::locale; +use icu::locid_transform::{LocaleExpander, TransformResult}; + +let lc = LocaleExpander::new(); + +let mut locale = locale!("zh-CN"); +assert_eq!(lc.maximize(&mut locale), TransformResult::Modified); +assert_eq!(locale, locale!("zh-Hans-CN")); + +let mut locale = locale!("zh-Hant-TW"); +assert_eq!(lc.maximize(&mut locale), TransformResult::Unmodified); +assert_eq!(locale, locale!("zh-Hant-TW")); +``` + +```rust +use icu::locid::locale; +use icu::locid_transform::{LocaleExpander, TransformResult}; +use writeable::assert_writeable_eq; + +let lc = LocaleExpander::new(); + +let mut locale = locale!("zh-Hans-CN"); +assert_eq!(lc.minimize(&mut locale), TransformResult::Modified); +assert_eq!(locale, locale!("zh")); + +let mut locale = locale!("zh"); +assert_eq!(lc.minimize(&mut locale), TransformResult::Unmodified); +assert_eq!(locale, locale!("zh")); +``` + +[`ICU4X`]: ../icu/index.html +[`CLDR`]: http://cldr.unicode.org/ +[`UTS #35: Unicode LDML 3. Likely Subtags`]: https://www.unicode.org/reports/tr35/#Likely_Subtags. +[`UTS #35: Unicode LDML 3. LocaleId Canonicalization`]: http://unicode.org/reports/tr35/#LocaleId_Canonicalization, + +<!-- cargo-rdme end --> + +## More Information + +For more information on development, authorship, contributing etc. please visit [`ICU4X home page`](https://github.com/unicode-org/icu4x). diff --git a/vendor/icu_locid_transform/benches/fixtures/locales.json b/vendor/icu_locid_transform/benches/fixtures/locales.json new file mode 100644 index 000000000..0e8ba8b79 --- /dev/null +++ b/vendor/icu_locid_transform/benches/fixtures/locales.json @@ -0,0 +1,41 @@ +[ + "en-US", + "en-GB", + "es-AR", + "it", + "zh-Hans-CN", + "de-AT", + "pl", + "fr-FR", + "de-AT", + "sr-Cyrl-SR", + "nb-NO", + "fr-FR", + "mk", + "uk", + "und-PL", + "und-Latn-AM", + "ug-Cyrl", + "sr-ME", + "mn-Mong", + "lif-Limb", + "gan", + "zh-Hant", + "yue-Hans", + "unr", + "unr-Deva", + "und-Thai-CN", + "ug-Cyrl", + "en-Latn-DE", + "pl-FR", + "de-CH", + "tuq", + "sr-ME", + "ng", + "klx", + "kk-Arab", + "en-Cyrl", + "und-Cyrl-UK", + "und-Arab", + "und-Arab-FO" +] diff --git a/vendor/icu_locid_transform/benches/fixtures/uncanonicalized-locales.json b/vendor/icu_locid_transform/benches/fixtures/uncanonicalized-locales.json new file mode 100644 index 000000000..18eadbce6 --- /dev/null +++ b/vendor/icu_locid_transform/benches/fixtures/uncanonicalized-locales.json @@ -0,0 +1,88 @@ +[ + "cka", + "cze", + "gfx", + "sgn-BR", + "sgn-DD", + "tam", + "und-aaland", + "nob-bokmal", + "no-nynorsk", + "und-Qaai", + "en-554", + "en-084", + "art-lojban", + "zh-guoyu", + "zh-hakka", + "zh-xiang", + "aar-x-private", + "heb-x-private", + "ces", + "hy-arevela", + "hy-arevmda", + "cel-gaulish", + "ja-latn-hepburn-heploc", + "ja-Latn-fonipa-hepburn-heploc", + "und-Armn-SU", + "sh", + "sh-Cyrl", + "cnr", + "cnr-BA", + "ru-SU", + "ru-810", + "en-SU", + "en-810", + "und-SU", + "und-810", + "und-Latn-SU", + "und-Latn-810", + "hy-SU", + "hy-810", + "und-Armn-SU", + "und-Armn-810", + "sr-CS", + "sr-Latn-CS", + "sr-Cyrl-CS", + "az-NT", + "sl-t-sl-rozaj-biske-1994", + "DE-T-M0-DIN-K0-QWERTZ", + "en-t-m0-true", + "en-t-iw", + "und-u-rg-no23", + "und-u-rg-cn11", + "und-u-rg-cz10a", + "und-u-rg-fra", + "und-u-rg-frg", + "und-u-rg-lud", + "und-NO-u-rg-no23", + "und-CN-u-rg-cn11", + "und-CZ-u-rg-cz10a", + "und-FR-u-rg-fra", + "und-FR-u-rg-frg", + "und-u-rg-lud", + "und-u-sd-no23", + "und-u-sd-cn11", + "und-u-sd-cz10a", + "und-u-sd-fra", + "hy-arevela", + "hy-Armn-arevela", + "hy-AM-arevela", + "hy-arevela-fonipa", + "hy-fonipa-arevela", + "hy-arevmda", + "hy-Armn-arevmda", + "hy-AM-arevmda", + "hy-arevmda-fonipa", + "hy-fonipa-arevmda", + "ja-Latn-hepburn-heploc", + "ja-Latn-JP-hepburn-heploc", + "sv-aaland", + "el-polytoni", + "ja-Latn-alalc97-hepburn-heploc", + "ja-Latn-hepburn-alalc97-heploc", + "ja-Latn-hepburn-heploc-alalc97", + "ja-Latn-heploc-hepburn", + "ja-Latn-heploc", + "ja-Latn-aaland-heploc", + "ja-Latn-heploc-polytoni" +] diff --git a/vendor/icu_locid_transform/benches/helpers/mod.rs b/vendor/icu_locid_transform/benches/helpers/mod.rs new file mode 100644 index 000000000..d250c510c --- /dev/null +++ b/vendor/icu_locid_transform/benches/helpers/mod.rs @@ -0,0 +1,15 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use std::fs::File; +use std::io::{BufReader, Error}; + +pub fn read_fixture<T>(path: &str) -> Result<T, Error> +where + T: serde::de::DeserializeOwned, +{ + let file = File::open(path)?; + let reader = BufReader::new(file); + Ok(serde_json::from_reader(reader)?) +} diff --git a/vendor/icu_locid_transform/benches/locale_canonicalizer.rs b/vendor/icu_locid_transform/benches/locale_canonicalizer.rs new file mode 100644 index 000000000..1ea8df6b3 --- /dev/null +++ b/vendor/icu_locid_transform/benches/locale_canonicalizer.rs @@ -0,0 +1,99 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +mod helpers; + +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use icu_locid::Locale; +use icu_locid_transform::LocaleCanonicalizer; +use icu_locid_transform::LocaleExpander; + +fn canonicalize_bench(c: &mut Criterion) { + let lc = LocaleCanonicalizer::new(); + + let mut group = c.benchmark_group("uncanonicalized"); + + let path = "./benches/fixtures/uncanonicalized-locales.json"; + let data: Vec<String> = helpers::read_fixture(path).expect("Failed to read a fixture"); + let locales: Vec<Locale> = data.iter().map(|s| s.parse().unwrap()).collect(); + + group.bench_function("clone", |b| { + b.iter(|| { + for locale in &locales { + let _ = black_box(locale).clone(); + } + }) + }); + + group.bench_function("canonicalize", |b| { + b.iter(|| { + for locale in &locales { + let mut locale = black_box(locale).clone(); + lc.canonicalize(&mut locale); + } + }) + }); + + group.finish(); +} + +fn canonicalize_noop_bench(c: &mut Criterion) { + let lc = LocaleCanonicalizer::new(); + + let mut group = c.benchmark_group("canonicalized"); + + // None of these locales require canonicalization, so this measures the cost of calling + // the canonicalizer on locales that will not be modified. + let path = "./benches/fixtures/locales.json"; + let data: Vec<String> = helpers::read_fixture(path).expect("Failed to read a fixture"); + let locales: Vec<Locale> = data.iter().map(|s| s.parse().unwrap()).collect(); + + group.bench_function("clone", |b| { + b.iter(|| { + for locale in &locales { + let _ = black_box(locale).clone(); + } + }) + }); + + group.bench_function("canonicalize", |b| { + b.iter(|| { + for locale in &locales { + let mut locale = black_box(locale).clone(); + lc.canonicalize(&mut locale); + } + }) + }); + + group.finish(); +} + +fn maximize_bench(c: &mut Criterion) { + let lc = LocaleExpander::new(); + + let mut group = c.benchmark_group("likelysubtags"); + + let path = "./benches/fixtures/locales.json"; + let data: Vec<String> = helpers::read_fixture(path).expect("Failed to read a fixture"); + let locales: Vec<Locale> = data.iter().map(|s| s.parse().unwrap()).collect(); + + group.bench_function("maximize", |b| { + b.iter(|| { + for locale in &locales { + let mut locale = locale.clone(); + lc.maximize(black_box(&mut locale)); + } + }) + }); + + group.finish(); +} + +criterion_group!( + benches, + canonicalize_bench, + canonicalize_noop_bench, + maximize_bench +); +criterion_main!(benches); diff --git a/vendor/icu_locid_transform/src/canonicalizer.rs b/vendor/icu_locid_transform/src/canonicalizer.rs new file mode 100644 index 000000000..5a3782638 --- /dev/null +++ b/vendor/icu_locid_transform/src/canonicalizer.rs @@ -0,0 +1,618 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! The collection of code for locale canonicalization. + +use crate::provider::*; +use crate::LocaleTransformError; +use alloc::vec::Vec; +use core::cmp::Ordering; + +use crate::LocaleExpander; +use crate::TransformResult; +use icu_locid::subtags::{Language, Region, Script}; +use icu_locid::{ + extensions::unicode::key, + subtags::{language, Variant, Variants}, + LanguageIdentifier, Locale, +}; +use icu_provider::prelude::*; +use tinystr::TinyAsciiStr; + +/// Implements the algorithm defined in *[UTS #35: Annex C, LocaleId Canonicalization]*. +/// +/// # Examples +/// +/// ``` +/// use icu_locid::Locale; +/// use icu_locid_transform::{LocaleCanonicalizer, TransformResult}; +/// +/// let lc = LocaleCanonicalizer::new(); +/// +/// let mut locale: Locale = "ja-Latn-fonipa-hepburn-heploc".parse().unwrap(); +/// assert_eq!(lc.canonicalize(&mut locale), TransformResult::Modified); +/// assert_eq!(locale, "ja-Latn-alalc97-fonipa".parse().unwrap()); +/// ``` +/// +/// [UTS #35: Annex C, LocaleId Canonicalization]: http://unicode.org/reports/tr35/#LocaleId_Canonicalization +#[derive(Debug)] +pub struct LocaleCanonicalizer { + /// Data to support canonicalization. + aliases: DataPayload<AliasesV1Marker>, + /// Likely subtags implementation for delegation. + expander: LocaleExpander, +} + +#[inline] +fn uts35_rule_matches<'a, I>( + source: &Locale, + language: Language, + script: Option<Script>, + region: Option<Region>, + raw_variants: I, +) -> bool +where + I: Iterator<Item = &'a str>, +{ + (language.is_empty() || language == source.id.language) + && (script.is_none() || script == source.id.script) + && (region.is_none() || region == source.id.region) + && { + // Checks if variants are a subset of source variants. + // As both iterators are sorted, this can be done linearly. + let mut source_variants = source.id.variants.iter(); + 'outer: for it in raw_variants { + for cand in source_variants.by_ref() { + match cand.strict_cmp(it.as_bytes()) { + Ordering::Equal => { + continue 'outer; + } + Ordering::Less => {} + _ => { + return false; + } + } + } + return false; + } + true + } +} + +fn uts35_replacement<'a, I>( + source: &mut Locale, + ruletype_has_language: bool, + ruletype_has_script: bool, + ruletype_has_region: bool, + ruletype_variants: Option<I>, + replacement: &LanguageIdentifier, +) where + I: Iterator<Item = &'a str>, +{ + if ruletype_has_language || (source.id.language.is_empty() && !replacement.language.is_empty()) + { + source.id.language = replacement.language; + } + if ruletype_has_script || (source.id.script.is_none() && replacement.script.is_some()) { + source.id.script = replacement.script; + } + if ruletype_has_region || (source.id.region.is_none() && replacement.region.is_some()) { + source.id.region = replacement.region; + } + if let Some(skips) = ruletype_variants { + // The rule matches if the ruletype variants are a subset of the source variants. + // This means ja-Latn-fonipa-hepburn-heploc matches against the rule for + // hepburn-heploc and is canonicalized to ja-Latn-alalc97-fonipa + + // We're merging three sorted deduped iterators into a new sequence: + // sources - skips + replacements + + let mut sources = source.id.variants.iter().copied().peekable(); + let mut replacements = replacement.variants.iter().copied().peekable(); + let mut skips = skips.peekable(); + + let mut variants: Vec<Variant> = Vec::new(); + + loop { + match (sources.peek(), skips.peek(), replacements.peek()) { + (Some(&source), Some(skip), _) + if source.strict_cmp(skip.as_bytes()) == Ordering::Greater => + { + skips.next(); + } + (Some(&source), Some(skip), _) + if source.strict_cmp(skip.as_bytes()) == Ordering::Equal => + { + skips.next(); + sources.next(); + } + (Some(&source), _, Some(&replacement)) + if replacement.cmp(&source) == Ordering::Less => + { + variants.push(replacement); + replacements.next(); + } + (Some(&source), _, Some(&replacement)) + if replacement.cmp(&source) == Ordering::Equal => + { + variants.push(source); + sources.next(); + replacements.next(); + } + (Some(&source), _, _) => { + variants.push(source); + sources.next(); + } + (None, _, Some(&replacement)) => { + variants.push(replacement); + replacements.next(); + } + (None, _, None) => { + break; + } + } + } + source.id.variants = Variants::from_vec_unchecked(variants); + } +} + +#[inline] +fn uts35_check_language_rules( + locale: &mut Locale, + alias_data: &DataPayload<AliasesV1Marker>, +) -> TransformResult { + if !locale.id.language.is_empty() { + let lang: TinyAsciiStr<3> = locale.id.language.into(); + let replacement = if lang.len() == 2 { + alias_data + .get() + .language_len2 + .get(&lang.resize().to_unvalidated()) + } else { + alias_data.get().language_len3.get(&lang.to_unvalidated()) + }; + + if let Some(replacement) = replacement { + if let Ok(langid) = replacement.parse() { + uts35_replacement::<core::iter::Empty<&str>>( + locale, true, false, false, None, &langid, + ); + return TransformResult::Modified; + } + } + } + + TransformResult::Unmodified +} + +fn is_iter_sorted<I, T>(mut iter: I) -> bool +where + I: Iterator<Item = T>, + T: PartialOrd, +{ + if let Some(mut last) = iter.next() { + for curr in iter { + if last > curr { + return false; + } + last = curr; + } + } + true +} + +#[cfg(feature = "compiled_data")] +impl Default for LocaleCanonicalizer { + fn default() -> Self { + Self::new() + } +} + +impl LocaleCanonicalizer { + /// A constructor which creates a [`LocaleCanonicalizer`] from compiled data. + /// + /// ✨ *Enabled with the `compiled_data` Cargo feature.* + /// + /// [📚 Help choosing a constructor](icu_provider::constructors) + #[cfg(feature = "compiled_data")] + pub const fn new() -> Self { + Self::new_with_expander(LocaleExpander::new_extended()) + } + + // Note: This is a custom impl because the bounds on LocaleExpander::try_new_unstable changed + #[doc = icu_provider::gen_any_buffer_unstable_docs!(ANY, Self::new)] + pub fn try_new_with_any_provider( + provider: &(impl AnyProvider + ?Sized), + ) -> Result<LocaleCanonicalizer, LocaleTransformError> { + let expander = LocaleExpander::try_new_with_any_provider(provider)?; + Self::try_new_with_expander_unstable(&provider.as_downcasting(), expander) + } + + // Note: This is a custom impl because the bounds on LocaleExpander::try_new_unstable changed + #[doc = icu_provider::gen_any_buffer_unstable_docs!(BUFFER, Self::new)] + #[cfg(feature = "serde")] + pub fn try_new_with_buffer_provider( + provider: &(impl BufferProvider + ?Sized), + ) -> Result<LocaleCanonicalizer, LocaleTransformError> { + let expander = LocaleExpander::try_new_with_buffer_provider(provider)?; + Self::try_new_with_expander_unstable(&provider.as_deserializing(), expander) + } + + #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new)] + pub fn try_new_unstable<P>(provider: &P) -> Result<LocaleCanonicalizer, LocaleTransformError> + where + P: DataProvider<AliasesV1Marker> + + DataProvider<LikelySubtagsForLanguageV1Marker> + + DataProvider<LikelySubtagsForScriptRegionV1Marker> + + ?Sized, + { + let expander = LocaleExpander::try_new_unstable(provider)?; + Self::try_new_with_expander_unstable(provider, expander) + } + + /// Creates a [`LocaleCanonicalizer`] with a custom [`LocaleExpander`] and compiled data. + /// + /// ✨ *Enabled with the `compiled_data` Cargo feature.* + /// + /// [📚 Help choosing a constructor](icu_provider::constructors) + #[cfg(feature = "compiled_data")] + pub const fn new_with_expander(expander: LocaleExpander) -> Self { + Self { + aliases: DataPayload::from_static_ref( + crate::provider::Baked::SINGLETON_LOCID_TRANSFORM_ALIASES_V1, + ), + expander, + } + } + + #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_with_expander)] + pub fn try_new_with_expander_unstable<P>( + provider: &P, + expander: LocaleExpander, + ) -> Result<LocaleCanonicalizer, LocaleTransformError> + where + P: DataProvider<AliasesV1Marker> + ?Sized, + { + let aliases: DataPayload<AliasesV1Marker> = + provider.load(Default::default())?.take_payload()?; + + Ok(LocaleCanonicalizer { aliases, expander }) + } + + icu_provider::gen_any_buffer_data_constructors!( + locale: skip, + options: LocaleExpander, + error: LocaleTransformError, + #[cfg(skip)] + functions: [ + new_with_expander, + try_new_with_expander_with_any_provider, + try_new_with_expander_with_buffer_provider, + try_new_with_expander_unstable, + Self, + ] + ); + + /// The canonicalize method potentially updates a passed in locale in place + /// depending up the results of running the canonicalization algorithm + /// from <http://unicode.org/reports/tr35/#LocaleId_Canonicalization>. + /// + /// Some BCP47 canonicalization data is not part of the CLDR json package. Because + /// of this, some canonicalizations are not performed, e.g. the canonicalization of + /// `und-u-ca-islamicc` to `und-u-ca-islamic-civil`. This will be fixed in a future + /// release once the missing data has been added to the CLDR json data. See: + /// <https://github.com/unicode-org/icu4x/issues/746> + /// + /// # Examples + /// + /// ``` + /// use icu_locid::Locale; + /// use icu_locid_transform::{LocaleCanonicalizer, TransformResult}; + /// + /// let lc = LocaleCanonicalizer::new(); + /// + /// let mut locale: Locale = "ja-Latn-fonipa-hepburn-heploc".parse().unwrap(); + /// assert_eq!(lc.canonicalize(&mut locale), TransformResult::Modified); + /// assert_eq!(locale, "ja-Latn-alalc97-fonipa".parse().unwrap()); + /// ``` + pub fn canonicalize(&self, locale: &mut Locale) -> TransformResult { + let mut result = TransformResult::Unmodified; + + // This loops until we get a 'fixed point', where applying the rules do not + // result in any more changes. + 'outer: loop { + // These are linear searches due to the ordering imposed by the canonicalization + // rules, where rules with more variants should be considered first. With the + // current data in CLDR, we will only do this for locales which have variants, + // or new rules which we haven't special-cased yet (of which there are fewer + // than 20). + if !locale.id.variants.is_empty() { + // These language/variant comibnations have around 20 rules + for StrStrPair(raw_lang_variants, raw_to) in self + .aliases + .get() + .language_variants + .iter() + .map(zerofrom::ZeroFrom::zero_from) + { + let (raw_lang, raw_variants) = { + let mut subtags = raw_lang_variants.split('-'); + ( + // str::split can't return empty iterators + unsafe { subtags.next().unwrap_unchecked() }, + subtags, + ) + }; + if is_iter_sorted(raw_variants.clone()) { + if let Ok(lang) = raw_lang.parse::<Language>() { + if uts35_rule_matches(locale, lang, None, None, raw_variants.clone()) { + if let Ok(to) = raw_to.parse() { + uts35_replacement( + locale, + !lang.is_empty(), + false, + false, + Some(raw_variants), + &to, + ); + result = TransformResult::Modified; + continue 'outer; + } + } + } + } + } + } else { + // These are absolute fallbacks, and currently empty. + for StrStrPair(raw_from, raw_to) in self + .aliases + .get() + .language + .iter() + .map(zerofrom::ZeroFrom::zero_from) + { + if let Ok(from) = raw_from.parse::<LanguageIdentifier>() { + if uts35_rule_matches( + locale, + from.language, + from.script, + from.region, + from.variants.iter().map(Variant::as_str), + ) { + if let Ok(to) = raw_to.parse() { + uts35_replacement( + locale, + !from.language.is_empty(), + from.script.is_some(), + from.region.is_some(), + Some(from.variants.iter().map(Variant::as_str)), + &to, + ); + result = TransformResult::Modified; + continue 'outer; + } + } + } + } + } + + if !locale.id.language.is_empty() { + // If the region is specified, check sgn-region rules first + if let Some(region) = locale.id.region { + if locale.id.language == language!("sgn") { + if let Some(&sgn_lang) = self + .aliases + .get() + .sgn_region + .get(®ion.into_tinystr().to_unvalidated()) + { + uts35_replacement::<core::iter::Empty<&str>>( + locale, + true, + false, + true, + None, + &sgn_lang.into(), + ); + result = TransformResult::Modified; + continue; + } + } + } + + if uts35_check_language_rules(locale, &self.aliases) == TransformResult::Modified { + result = TransformResult::Modified; + continue; + } + } + + if let Some(script) = locale.id.script { + if let Some(&replacement) = self + .aliases + .get() + .script + .get(&script.into_tinystr().to_unvalidated()) + { + locale.id.script = Some(replacement); + result = TransformResult::Modified; + continue; + } + } + + if let Some(region) = locale.id.region { + let replacement = if region.is_alphabetic() { + self.aliases + .get() + .region_alpha + .get(®ion.into_tinystr().resize().to_unvalidated()) + } else { + self.aliases + .get() + .region_num + .get(®ion.into_tinystr().to_unvalidated()) + }; + if let Some(&replacement) = replacement { + locale.id.region = Some(replacement); + result = TransformResult::Modified; + continue; + } + + if let Some(regions) = self + .aliases + .get() + .complex_region + .get(®ion.into_tinystr().to_unvalidated()) + { + // Skip if regions are empty + if let Some(default_region) = regions.get(0) { + let mut maximized = LanguageIdentifier { + language: locale.id.language, + script: locale.id.script, + region: None, + variants: Variants::default(), + }; + + locale.id.region = Some( + match (self.expander.maximize(&mut maximized), maximized.region) { + (TransformResult::Modified, Some(candidate)) + if regions.iter().any(|x| x == candidate) => + { + candidate + } + _ => default_region, + }, + ); + result = TransformResult::Modified; + continue; + } + } + } + + if !locale.id.variants.is_empty() { + let mut modified = Vec::new(); + let mut unmodified = Vec::new(); + for &variant in locale.id.variants.iter() { + if let Some(&updated) = self + .aliases + .get() + .variant + .get(&variant.into_tinystr().to_unvalidated()) + { + modified.push(updated); + } else { + unmodified.push(variant); + } + } + + if !modified.is_empty() { + modified.extend(unmodified); + modified.sort(); + modified.dedup(); + locale.id.variants = Variants::from_vec_unchecked(modified); + result = TransformResult::Modified; + continue; + } + } + + // Nothing matched in this iteration, we're done. + break; + } + + // Handle Locale extensions in their own loops, because these rules do not interact + // with each other. + if let Some(lang) = &locale.extensions.transform.lang { + let mut tlang: Locale = lang.clone().into(); + let mut matched = false; + loop { + if uts35_check_language_rules(&mut tlang, &self.aliases) + == TransformResult::Modified + { + result = TransformResult::Modified; + matched = true; + continue; + } + + break; + } + + if matched { + locale.extensions.transform.lang = Some(tlang.id); + } + } + + // The `rg` region override and `sd` regional subdivision keys may contain + // language codes that require canonicalization. + for key in &[key!("rg"), key!("sd")] { + if let Some(value) = locale.extensions.unicode.keywords.get_mut(key) { + if let &[only_value] = value.as_tinystr_slice() { + if let Some(modified_value) = self + .aliases + .get() + .subdivision + .get(&only_value.resize().to_unvalidated()) + { + if let Ok(modified_value) = modified_value.parse() { + *value = modified_value; + result = TransformResult::Modified; + } + } + } + } + } + + result + } +} + +#[test] +fn test_uts35_rule_matches() { + for (source, rule, result) in [ + ("ja", "und", true), + ("und-heploc-hepburn", "und-hepburn", true), + ("ja-heploc-hepburn", "und-hepburn", true), + ("ja-hepburn", "und-hepburn-heploc", false), + ] { + let source = source.parse().unwrap(); + let rule = rule.parse::<LanguageIdentifier>().unwrap(); + assert_eq!( + uts35_rule_matches( + &source, + rule.language, + rule.script, + rule.region, + rule.variants.iter().map(Variant::as_str), + ), + result, + "{source}" + ); + } +} + +#[test] +fn test_uts35_replacement() { + for (locale, rule_0, rule_1, result) in [ + ( + "ja-Latn-fonipa-hepburn-heploc", + "und-hepburn-heploc", + "und-alalc97", + "ja-Latn-alalc97-fonipa", + ), + ("sgn-DD", "und-DD", "und-DE", "sgn-DE"), + ("sgn-DE", "sgn-DE", "gsg", "gsg"), + ] { + let mut locale = locale.parse().unwrap(); + let rule_0 = rule_0.parse::<LanguageIdentifier>().unwrap(); + let rule_1 = rule_1.parse().unwrap(); + let result = result.parse::<Locale>().unwrap(); + uts35_replacement( + &mut locale, + !rule_0.language.is_empty(), + rule_0.script.is_some(), + rule_0.region.is_some(), + Some(rule_0.variants.iter().map(Variant::as_str)), + &rule_1, + ); + assert_eq!(result, locale); + } +} diff --git a/vendor/icu_locid_transform/src/directionality.rs b/vendor/icu_locid_transform/src/directionality.rs new file mode 100644 index 000000000..8a6c243b8 --- /dev/null +++ b/vendor/icu_locid_transform/src/directionality.rs @@ -0,0 +1,231 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::provider::*; +use crate::{LocaleExpander, LocaleTransformError}; +use icu_locid::subtags::Script; +use icu_locid::LanguageIdentifier; +use icu_provider::prelude::*; + +/// Represents the direction of a script. +/// +/// [`LocaleDirectionality`] can be used to get this information. +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +#[non_exhaustive] +pub enum Direction { + /// The script is left-to-right. + LeftToRight, + /// The script is right-to-left. + RightToLeft, +} + +/// Provides methods to determine the direction of a locale. +/// +/// # Examples +/// +/// ``` +/// use icu_locid::locale; +/// use icu_locid_transform::{Direction, LocaleDirectionality}; +/// +/// let ld = LocaleDirectionality::new(); +/// +/// assert_eq!(ld.get(&locale!("en")), Some(Direction::LeftToRight)); +/// ``` +#[derive(Debug)] +pub struct LocaleDirectionality { + script_direction: DataPayload<ScriptDirectionV1Marker>, + expander: LocaleExpander, +} + +impl LocaleDirectionality { + /// Creates a [`LocaleDirectionality`] from compiled data. + /// + /// This includes limited likely subtags data, see [`LocaleExpander::new()`]. + #[cfg(feature = "compiled_data")] + pub const fn new() -> Self { + Self::new_with_expander(LocaleExpander::new()) + } + + // Note: This is a custom impl because the bounds on `try_new_unstable` don't suffice + #[doc = icu_provider::gen_any_buffer_unstable_docs!(ANY, Self::new)] + pub fn try_new_with_any_provider( + provider: &(impl AnyProvider + ?Sized), + ) -> Result<LocaleDirectionality, LocaleTransformError> { + let expander = LocaleExpander::try_new_with_any_provider(provider)?; + Self::try_new_with_expander_unstable(&provider.as_downcasting(), expander) + } + + // Note: This is a custom impl because the bounds on `try_new_unstable` don't suffice + #[doc = icu_provider::gen_any_buffer_unstable_docs!(BUFFER, Self::new)] + #[cfg(feature = "serde")] + pub fn try_new_with_buffer_provider( + provider: &(impl BufferProvider + ?Sized), + ) -> Result<LocaleDirectionality, LocaleTransformError> { + let expander = LocaleExpander::try_new_with_buffer_provider(provider)?; + Self::try_new_with_expander_unstable(&provider.as_deserializing(), expander) + } + + #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new)] + pub fn try_new_unstable<P>(provider: &P) -> Result<LocaleDirectionality, LocaleTransformError> + where + P: DataProvider<ScriptDirectionV1Marker> + + DataProvider<LikelySubtagsForLanguageV1Marker> + + DataProvider<LikelySubtagsForScriptRegionV1Marker> + + ?Sized, + { + let expander = LocaleExpander::try_new_unstable(provider)?; + Self::try_new_with_expander_unstable(provider, expander) + } + + /// Creates a [`LocaleDirectionality`] with a custom [`LocaleExpander`] and compiled data. + /// + /// This allows using [`LocaleExpander::new_extended()`] with data for all locales. + /// + /// # Examples + /// + /// ``` + /// use icu_locid::locale; + /// use icu_locid_transform::{ + /// Direction, LocaleDirectionality, LocaleExpander, + /// }; + /// + /// let ld_default = LocaleDirectionality::new(); + /// + /// assert_eq!(ld_default.get(&locale!("jbn")), None); + /// + /// let expander = LocaleExpander::new_extended(); + /// let ld_extended = LocaleDirectionality::new_with_expander(expander); + /// + /// assert_eq!( + /// ld_extended.get(&locale!("jbn")), + /// Some(Direction::RightToLeft) + /// ); + /// ``` + #[cfg(feature = "compiled_data")] + pub const fn new_with_expander(expander: LocaleExpander) -> Self { + LocaleDirectionality { + script_direction: DataPayload::from_static_ref( + crate::provider::Baked::SINGLETON_LOCID_TRANSFORM_SCRIPT_DIR_V1, + ), + expander, + } + } + + #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_with_expander)] + pub fn try_new_with_expander_unstable<P>( + provider: &P, + expander: LocaleExpander, + ) -> Result<LocaleDirectionality, LocaleTransformError> + where + P: DataProvider<ScriptDirectionV1Marker> + ?Sized, + { + let script_direction = provider.load(Default::default())?.take_payload()?; + + Ok(LocaleDirectionality { + script_direction, + expander, + }) + } + + /// Returns the script direction of the given locale. + /// + /// Note that the direction is a property of the script of a locale, not of the language. As such, + /// when given a locale without an associated script tag (i.e., `locale!("en")` vs. `locale!("en-Latn")`), + /// this method first tries to infer the script using the language and region before returning its direction. + /// + /// If you already have a script struct and want to get its direction, you should use + /// `Locale::from(Some(my_script))` and call this method. + /// + /// This method will return `None` if either a locale's script cannot be determined, or there is no information + /// for the script. + /// + /// # Examples + /// + /// Using an existing locale: + /// + /// ``` + /// use icu_locid::locale; + /// use icu_locid_transform::{Direction, LocaleDirectionality}; + /// + /// let ld = LocaleDirectionality::new(); + /// + /// assert_eq!(ld.get(&locale!("en-US")), Some(Direction::LeftToRight)); + /// + /// assert_eq!(ld.get(&locale!("ar")), Some(Direction::RightToLeft)); + /// + /// assert_eq!(ld.get(&locale!("en-Arab")), Some(Direction::RightToLeft)); + /// + /// assert_eq!(ld.get(&locale!("foo")), None); + /// ``` + /// + /// Using a script directly: + /// + /// ``` + /// use icu_locid::subtags::script; + /// use icu_locid::Locale; + /// use icu_locid_transform::{Direction, LocaleDirectionality}; + /// + /// let ld = LocaleDirectionality::new(); + /// + /// assert_eq!( + /// ld.get(&Locale::from(Some(script!("Latn")))), + /// Some(Direction::LeftToRight) + /// ); + /// ``` + pub fn get(&self, locale: impl AsRef<LanguageIdentifier>) -> Option<Direction> { + let script = self.expander.get_likely_script(locale.as_ref())?; + + if self.script_in_ltr(script) { + Some(Direction::LeftToRight) + } else if self.script_in_rtl(script) { + Some(Direction::RightToLeft) + } else { + None + } + } + + /// Returns whether the given locale is right-to-left. + /// + /// Note that if this method returns `false`, the locale is either left-to-right or + /// the [`LocaleDirectionality`] does not include data for the locale. + /// You should use [`LocaleDirectionality::get`] if you need to differentiate between these cases. + /// + /// See [`LocaleDirectionality::get`] for more information. + pub fn is_right_to_left(&self, locale: impl AsRef<LanguageIdentifier>) -> bool { + self.expander + .get_likely_script(locale.as_ref()) + .map(|s| self.script_in_rtl(s)) + .unwrap_or(false) + } + + /// Returns whether the given locale is left-to-right. + /// + /// Note that if this method returns `false`, the locale is either right-to-left or + /// the [`LocaleDirectionality`] does not include data for the locale. + /// You should use [`LocaleDirectionality::get`] if you need to differentiate between these cases. + /// + /// See [`LocaleDirectionality::get`] for more information. + pub fn is_left_to_right(&self, locale: impl AsRef<LanguageIdentifier>) -> bool { + self.expander + .get_likely_script(locale.as_ref()) + .map(|s| self.script_in_ltr(s)) + .unwrap_or(false) + } + + fn script_in_rtl(&self, script: Script) -> bool { + self.script_direction + .get() + .rtl + .binary_search(&script.into_tinystr().to_unvalidated()) + .is_ok() + } + + fn script_in_ltr(&self, script: Script) -> bool { + self.script_direction + .get() + .ltr + .binary_search(&script.into_tinystr().to_unvalidated()) + .is_ok() + } +} diff --git a/vendor/icu_locid_transform/src/error.rs b/vendor/icu_locid_transform/src/error.rs new file mode 100644 index 000000000..a59f838be --- /dev/null +++ b/vendor/icu_locid_transform/src/error.rs @@ -0,0 +1,27 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use core::fmt::Debug; +use displaydoc::Display; +use icu_provider::DataError; + +#[cfg(feature = "std")] +impl std::error::Error for LocaleTransformError {} + +/// A list of error outcomes for various operations in this module. +/// +/// Re-exported as [`Error`](crate::Error). +#[derive(Display, Debug, Copy, Clone, PartialEq)] +#[non_exhaustive] +pub enum LocaleTransformError { + /// An error originating inside of the [data provider](icu_provider). + #[displaydoc("{0}")] + Data(DataError), +} + +impl From<DataError> for LocaleTransformError { + fn from(e: DataError) -> Self { + Self::Data(e) + } +} diff --git a/vendor/icu_locid_transform/src/expander.rs b/vendor/icu_locid_transform/src/expander.rs new file mode 100644 index 000000000..56f204c32 --- /dev/null +++ b/vendor/icu_locid_transform/src/expander.rs @@ -0,0 +1,722 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::{provider::*, LocaleTransformError}; + +use core::mem; +use icu_locid::subtags::{Language, Region, Script}; +use icu_locid::LanguageIdentifier; +use icu_provider::prelude::*; + +use crate::TransformResult; + +/// Implements the *Add Likely Subtags* and *Remove Likely Subtags* +/// algorithms as defined in *[UTS #35: Likely Subtags]*. +/// +/// # Examples +/// +/// Add likely subtags: +/// +/// ``` +/// use icu_locid::locale; +/// use icu_locid_transform::{LocaleExpander, TransformResult}; +/// +/// let lc = LocaleExpander::new(); +/// +/// let mut locale = locale!("zh-CN"); +/// assert_eq!(lc.maximize(&mut locale), TransformResult::Modified); +/// assert_eq!(locale, locale!("zh-Hans-CN")); +/// +/// let mut locale = locale!("zh-Hant-TW"); +/// assert_eq!(lc.maximize(&mut locale), TransformResult::Unmodified); +/// assert_eq!(locale, locale!("zh-Hant-TW")); +/// ``` +/// +/// Remove likely subtags: +/// +/// ``` +/// use icu_locid::locale; +/// use icu_locid_transform::{LocaleExpander, TransformResult}; +/// +/// let lc = LocaleExpander::new(); +/// +/// let mut locale = locale!("zh-Hans-CN"); +/// assert_eq!(lc.minimize(&mut locale), TransformResult::Modified); +/// assert_eq!(locale, locale!("zh")); +/// +/// let mut locale = locale!("zh"); +/// assert_eq!(lc.minimize(&mut locale), TransformResult::Unmodified); +/// assert_eq!(locale, locale!("zh")); +/// ``` +/// +/// Normally, only CLDR locales with Basic or higher coverage are included. To include more +/// locales for maximization, use [`try_new_extended`](Self::try_new_extended_unstable): +/// +/// ``` +/// use icu_locid::locale; +/// use icu_locid_transform::{LocaleExpander, TransformResult}; +/// +/// let lc = LocaleExpander::new_extended(); +/// +/// let mut locale = locale!("atj"); +/// assert_eq!(lc.maximize(&mut locale), TransformResult::Modified); +/// assert_eq!(locale, locale!("atj-Latn-CA")); +/// ``` +/// +/// [UTS #35: Likely Subtags]: https://www.unicode.org/reports/tr35/#Likely_Subtags +#[derive(Debug, Clone)] +pub struct LocaleExpander { + likely_subtags_l: DataPayload<LikelySubtagsForLanguageV1Marker>, + likely_subtags_sr: DataPayload<LikelySubtagsForScriptRegionV1Marker>, + likely_subtags_ext: Option<DataPayload<LikelySubtagsExtendedV1Marker>>, +} + +struct LocaleExpanderBorrowed<'a> { + likely_subtags_l: &'a LikelySubtagsForLanguageV1<'a>, + likely_subtags_sr: &'a LikelySubtagsForScriptRegionV1<'a>, + likely_subtags_ext: Option<&'a LikelySubtagsExtendedV1<'a>>, +} + +impl LocaleExpanderBorrowed<'_> { + fn get_l(&self, l: Language) -> Option<(Script, Region)> { + let key = &l.into_tinystr().to_unvalidated(); + self.likely_subtags_l.language.get_copied(key).or_else(|| { + self.likely_subtags_ext + .and_then(|ext| ext.language.get_copied(key)) + }) + } + + fn get_ls(&self, l: Language, s: Script) -> Option<Region> { + let key = &( + l.into_tinystr().to_unvalidated(), + s.into_tinystr().to_unvalidated(), + ); + self.likely_subtags_l + .language_script + .get_copied(key) + .or_else(|| { + self.likely_subtags_ext + .and_then(|ext| ext.language_script.get_copied(key)) + }) + } + + fn get_lr(&self, l: Language, r: Region) -> Option<Script> { + let key = &( + l.into_tinystr().to_unvalidated(), + r.into_tinystr().to_unvalidated(), + ); + self.likely_subtags_l + .language_region + .get_copied(key) + .or_else(|| { + self.likely_subtags_ext + .and_then(|ext| ext.language_region.get_copied(key)) + }) + } + + fn get_s(&self, s: Script) -> Option<(Language, Region)> { + let key = &s.into_tinystr().to_unvalidated(); + self.likely_subtags_sr.script.get_copied(key).or_else(|| { + self.likely_subtags_ext + .and_then(|ext| ext.script.get_copied(key)) + }) + } + + fn get_sr(&self, s: Script, r: Region) -> Option<Language> { + let key = &( + s.into_tinystr().to_unvalidated(), + r.into_tinystr().to_unvalidated(), + ); + self.likely_subtags_sr + .script_region + .get_copied(key) + .or_else(|| { + self.likely_subtags_ext + .and_then(|ext| ext.script_region.get_copied(key)) + }) + } + + fn get_r(&self, r: Region) -> Option<(Language, Script)> { + let key = &r.into_tinystr().to_unvalidated(); + self.likely_subtags_sr.region.get_copied(key).or_else(|| { + self.likely_subtags_ext + .and_then(|ext| ext.region.get_copied(key)) + }) + } + + fn get_und(&self) -> (Language, Script, Region) { + self.likely_subtags_l.und + } +} + +#[inline] +fn update_langid( + language: Language, + script: Option<Script>, + region: Option<Region>, + langid: &mut LanguageIdentifier, +) -> TransformResult { + let mut modified = false; + + if langid.language.is_empty() && !language.is_empty() { + langid.language = language; + modified = true; + } + + if langid.script.is_none() && script.is_some() { + langid.script = script; + modified = true; + } + + if langid.region.is_none() && region.is_some() { + langid.region = region; + modified = true; + } + + if modified { + TransformResult::Modified + } else { + TransformResult::Unmodified + } +} + +impl LocaleExpander { + /// Creates a [`LocaleExpander`] with compiled data for commonly-used locales + /// (locales with *Basic* or higher [CLDR coverage]). + /// + /// Use this constructor if you want limited likely subtags for data-oriented use cases. + /// + /// ✨ *Enabled with the `compiled_data` Cargo feature.* + /// + /// [📚 Help choosing a constructor](icu_provider::constructors) + /// + /// [CLDR coverage]: https://www.unicode.org/reports/tr35/tr35-info.html#Coverage_Levels + #[cfg(feature = "compiled_data")] + pub const fn new() -> Self { + LocaleExpander { + likely_subtags_l: DataPayload::from_static_ref( + crate::provider::Baked::SINGLETON_LOCID_TRANSFORM_LIKELYSUBTAGS_L_V1, + ), + likely_subtags_sr: DataPayload::from_static_ref( + crate::provider::Baked::SINGLETON_LOCID_TRANSFORM_LIKELYSUBTAGS_SR_V1, + ), + likely_subtags_ext: None, + } + } + + /// Creates a [`LocaleExpander`] with compiled data for all locales. + /// + /// Use this constructor if you want to include data for all locales, including ones + /// that may not have data for other services (i.e. [CLDR coverage] below *Basic*). + /// + /// ✨ *Enabled with the `compiled_data` Cargo feature.* + /// + /// [📚 Help choosing a constructor](icu_provider::constructors) + /// + /// [CLDR coverage]: https://www.unicode.org/reports/tr35/tr35-info.html#Coverage_Levels + #[cfg(feature = "compiled_data")] + pub const fn new_extended() -> Self { + LocaleExpander { + likely_subtags_l: DataPayload::from_static_ref( + crate::provider::Baked::SINGLETON_LOCID_TRANSFORM_LIKELYSUBTAGS_L_V1, + ), + likely_subtags_sr: DataPayload::from_static_ref( + crate::provider::Baked::SINGLETON_LOCID_TRANSFORM_LIKELYSUBTAGS_SR_V1, + ), + likely_subtags_ext: Some(DataPayload::from_static_ref( + crate::provider::Baked::SINGLETON_LOCID_TRANSFORM_LIKELYSUBTAGS_EXT_V1, + )), + } + } + + #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_extended)] + pub fn try_new_extended_unstable<P>( + provider: &P, + ) -> Result<LocaleExpander, LocaleTransformError> + where + P: DataProvider<LikelySubtagsForLanguageV1Marker> + + DataProvider<LikelySubtagsForScriptRegionV1Marker> + + DataProvider<LikelySubtagsExtendedV1Marker> + + ?Sized, + { + let likely_subtags_l = provider.load(Default::default())?.take_payload()?; + let likely_subtags_sr = provider.load(Default::default())?.take_payload()?; + let likely_subtags_ext = Some(provider.load(Default::default())?.take_payload()?); + + Ok(LocaleExpander { + likely_subtags_l, + likely_subtags_sr, + likely_subtags_ext, + }) + } + + icu_provider::gen_any_buffer_data_constructors!(locale: skip, options: skip, error: LocaleTransformError, + #[cfg(skip)] + functions: [ + new_extended, + try_new_extended_with_any_provider, + try_new_extended_with_buffer_provider, + try_new_extended_unstable, + Self + ]); + + #[doc = icu_provider::gen_any_buffer_unstable_docs!(ANY, Self::new)] + pub fn try_new_with_any_provider( + provider: &(impl AnyProvider + ?Sized), + ) -> Result<LocaleExpander, LocaleTransformError> { + Self::try_new_compat(&provider.as_downcasting()) + } + + #[doc = icu_provider::gen_any_buffer_unstable_docs!(BUFFER, Self::new)] + #[cfg(feature = "serde")] + pub fn try_new_with_buffer_provider( + provider: &(impl BufferProvider + ?Sized), + ) -> Result<LocaleExpander, LocaleTransformError> { + Self::try_new_compat(&provider.as_deserializing()) + } + + #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new)] + pub fn try_new_unstable<P>(provider: &P) -> Result<LocaleExpander, LocaleTransformError> + where + P: DataProvider<LikelySubtagsForLanguageV1Marker> + + DataProvider<LikelySubtagsForScriptRegionV1Marker> + + ?Sized, + { + let likely_subtags_l = provider.load(Default::default())?.take_payload()?; + let likely_subtags_sr = provider.load(Default::default())?.take_payload()?; + + Ok(LocaleExpander { + likely_subtags_l, + likely_subtags_sr, + likely_subtags_ext: None, + }) + } + + fn try_new_compat<P>(provider: &P) -> Result<LocaleExpander, LocaleTransformError> + where + P: DataProvider<LikelySubtagsForLanguageV1Marker> + + DataProvider<LikelySubtagsForScriptRegionV1Marker> + + DataProvider<LikelySubtagsExtendedV1Marker> + + DataProvider<LikelySubtagsV1Marker> + + ?Sized, + { + let payload_l = provider + .load(Default::default()) + .and_then(DataResponse::take_payload); + let payload_sr = provider + .load(Default::default()) + .and_then(DataResponse::take_payload); + let payload_ext = provider + .load(Default::default()) + .and_then(DataResponse::take_payload); + + let (likely_subtags_l, likely_subtags_sr, likely_subtags_ext) = + match (payload_l, payload_sr, payload_ext) { + (Ok(l), Ok(sr), Err(_)) => (l, sr, None), + (Ok(l), Ok(sr), Ok(ext)) => (l, sr, Some(ext)), + _ => { + let result: DataPayload<LikelySubtagsV1Marker> = + provider.load(Default::default())?.take_payload()?; + ( + result.map_project_cloned(|st, _| { + LikelySubtagsForLanguageV1::clone_from_borrowed(st) + }), + result.map_project(|st, _| st.into()), + None, + ) + } + }; + + Ok(LocaleExpander { + likely_subtags_l, + likely_subtags_sr, + likely_subtags_ext, + }) + } + + fn as_borrowed(&self) -> LocaleExpanderBorrowed { + LocaleExpanderBorrowed { + likely_subtags_l: self.likely_subtags_l.get(), + likely_subtags_sr: self.likely_subtags_sr.get(), + likely_subtags_ext: self.likely_subtags_ext.as_ref().map(|p| p.get()), + } + } + + /// The maximize method potentially updates a passed in locale in place + /// depending up the results of running the 'Add Likely Subtags' algorithm + /// from <https://www.unicode.org/reports/tr35/#Likely_Subtags>. + /// + /// If the result of running the algorithm would result in a new locale, the + /// locale argument is updated in place to match the result, and the method + /// returns [`TransformResult::Modified`]. Otherwise, the method + /// returns [`TransformResult::Unmodified`] and the locale argument is + /// unchanged. + /// + /// # Examples + /// + /// ``` + /// use icu_locid::locale; + /// use icu_locid_transform::{LocaleExpander, TransformResult}; + /// + /// let lc = LocaleExpander::new(); + /// + /// let mut locale = locale!("zh-CN"); + /// assert_eq!(lc.maximize(&mut locale), TransformResult::Modified); + /// assert_eq!(locale, locale!("zh-Hans-CN")); + /// + /// let mut locale = locale!("zh-Hant-TW"); + /// assert_eq!(lc.maximize(&mut locale), TransformResult::Unmodified); + /// assert_eq!(locale, locale!("zh-Hant-TW")); + /// ``` + pub fn maximize<T: AsMut<LanguageIdentifier>>(&self, mut langid: T) -> TransformResult { + let langid = langid.as_mut(); + let data = self.as_borrowed(); + + if !langid.language.is_empty() && langid.script.is_some() && langid.region.is_some() { + return TransformResult::Unmodified; + } + + if !langid.language.is_empty() { + if let Some(region) = langid.region { + if let Some(script) = data.get_lr(langid.language, region) { + return update_langid(Language::UND, Some(script), None, langid); + } + } + if let Some(script) = langid.script { + if let Some(region) = data.get_ls(langid.language, script) { + return update_langid(Language::UND, None, Some(region), langid); + } + } + if let Some((script, region)) = data.get_l(langid.language) { + return update_langid(Language::UND, Some(script), Some(region), langid); + } + } + if let Some(script) = langid.script { + if let Some(region) = langid.region { + if let Some(language) = data.get_sr(script, region) { + return update_langid(language, None, None, langid); + } + } + if let Some((language, region)) = data.get_s(script) { + return update_langid(language, None, Some(region), langid); + } + } + if let Some(region) = langid.region { + if let Some((language, script)) = data.get_r(region) { + return update_langid(language, Some(script), None, langid); + } + } + + update_langid( + data.get_und().0, + Some(data.get_und().1), + Some(data.get_und().2), + langid, + ) + } + + /// This returns a new Locale that is the result of running the + /// 'Remove Likely Subtags' algorithm from + /// <https://www.unicode.org/reports/tr35/#Likely_Subtags>. + /// + /// If the result of running the algorithm would result in a new locale, the + /// locale argument is updated in place to match the result, and the method + /// returns [`TransformResult::Modified`]. Otherwise, the method + /// returns [`TransformResult::Unmodified`] and the locale argument is + /// unchanged. + /// + /// # Examples + /// + /// ``` + /// use icu_locid::locale; + /// use icu_locid_transform::{LocaleExpander, TransformResult}; + /// + /// let lc = LocaleExpander::new(); + /// + /// let mut locale = locale!("zh-Hans-CN"); + /// assert_eq!(lc.minimize(&mut locale), TransformResult::Modified); + /// assert_eq!(locale, locale!("zh")); + /// + /// let mut locale = locale!("zh"); + /// assert_eq!(lc.minimize(&mut locale), TransformResult::Unmodified); + /// assert_eq!(locale, locale!("zh")); + /// ``` + pub fn minimize<T: AsMut<LanguageIdentifier>>(&self, mut langid: T) -> TransformResult { + let langid = langid.as_mut(); + + let mut max = langid.clone(); + self.maximize(&mut max); + let variants = mem::take(&mut max.variants); + max.variants.clear(); + let mut trial = max.clone(); + + trial.script = None; + trial.region = None; + self.maximize(&mut trial); + if trial == max { + if langid.language != max.language || langid.script.is_some() || langid.region.is_some() + { + if langid.language != max.language { + langid.language = max.language + } + if langid.script.is_some() { + langid.script = None; + } + if langid.region.is_some() { + langid.region = None; + } + langid.variants = variants; + return TransformResult::Modified; + } else { + return TransformResult::Unmodified; + } + } + + trial.script = None; + trial.region = max.region; + self.maximize(&mut trial); + if trial == max { + if langid.language != max.language + || langid.script.is_some() + || langid.region != max.region + { + if langid.language != max.language { + langid.language = max.language + } + if langid.script.is_some() { + langid.script = None; + } + if langid.region != max.region { + langid.region = max.region; + } + langid.variants = variants; + return TransformResult::Modified; + } else { + return TransformResult::Unmodified; + } + } + + trial.script = max.script; + trial.region = None; + self.maximize(&mut trial); + if trial == max { + if langid.language != max.language + || langid.script != max.script + || langid.region.is_some() + { + if langid.language != max.language { + langid.language = max.language + } + if langid.script != max.script { + langid.script = max.script; + } + if langid.region.is_some() { + langid.region = None; + } + langid.variants = variants; + return TransformResult::Modified; + } else { + return TransformResult::Unmodified; + } + } + + if langid.language != max.language + || langid.script != max.script + || langid.region != max.region + { + if langid.language != max.language { + langid.language = max.language + } + if langid.script != max.script { + langid.script = max.script; + } + if langid.region != max.region { + langid.region = max.region; + } + TransformResult::Modified + } else { + TransformResult::Unmodified + } + } + + // TODO(3492): consider turning this and a future get_likely_region/get_likely_language public + #[inline] + pub(crate) fn get_likely_script<T: AsRef<LanguageIdentifier>>( + &self, + langid: T, + ) -> Option<Script> { + let langid = langid.as_ref(); + langid + .script + .or_else(|| self.infer_likely_script(langid.language, langid.region)) + } + + fn infer_likely_script(&self, language: Language, region: Option<Region>) -> Option<Script> { + let data = self.as_borrowed(); + + // proceed through _all possible cases_ in order of specificity + // (borrowed from LocaleExpander::maximize): + // 1. language + region + // 2. language + // 3. region + // we need to check all cases, because e.g. for "en-US" the default script is associated + // with "en" but not "en-US" + if language != Language::UND { + if let Some(region) = region { + // 1. we know both language and region + if let Some(script) = data.get_lr(language, region) { + return Some(script); + } + } + // 2. we know language, but we either do not know region or knowing region did not help + if let Some((script, _)) = data.get_l(language) { + return Some(script); + } + } + if let Some(region) = region { + // 3. we know region, but we either do not know language or knowing language did not help + if let Some((_, script)) = data.get_r(region) { + return Some(script); + } + } + // we could not figure out the script from the given locale + None + } +} + +#[cfg(feature = "serde")] +#[cfg(test)] +mod tests { + use super::*; + use icu_locid::locale; + + struct RejectByKeyProvider { + keys: Vec<DataKey>, + } + + impl AnyProvider for RejectByKeyProvider { + fn load_any(&self, key: DataKey, _: DataRequest) -> Result<AnyResponse, DataError> { + if self.keys.contains(&key) { + return Err(DataErrorKind::MissingDataKey.with_str_context("rejected")); + } + + let l = crate::provider::Baked::SINGLETON_LOCID_TRANSFORM_LIKELYSUBTAGS_L_V1; + let ext = crate::provider::Baked::SINGLETON_LOCID_TRANSFORM_LIKELYSUBTAGS_EXT_V1; + let sr = crate::provider::Baked::SINGLETON_LOCID_TRANSFORM_LIKELYSUBTAGS_SR_V1; + + let payload = if key.hashed() == LikelySubtagsV1Marker::KEY.hashed() { + DataPayload::<LikelySubtagsV1Marker>::from_owned(LikelySubtagsV1 { + language_script: l + .language_script + .iter_copied() + .chain(ext.language_script.iter_copied()) + .collect(), + language_region: l + .language_region + .iter_copied() + .chain(ext.language_region.iter_copied()) + .collect(), + language: l + .language + .iter_copied() + .chain(ext.language.iter_copied()) + .collect(), + script_region: ext.script_region.clone(), + script: ext.script.clone(), + region: ext.region.clone(), + und: l.und, + }) + .wrap_into_any_payload() + } else if key.hashed() == LikelySubtagsForLanguageV1Marker::KEY.hashed() { + DataPayload::<LikelySubtagsForLanguageV1Marker>::from_static_ref(l) + .wrap_into_any_payload() + } else if key.hashed() == LikelySubtagsExtendedV1Marker::KEY.hashed() { + DataPayload::<LikelySubtagsExtendedV1Marker>::from_static_ref(ext) + .wrap_into_any_payload() + } else if key.hashed() == LikelySubtagsForScriptRegionV1Marker::KEY.hashed() { + DataPayload::<LikelySubtagsForScriptRegionV1Marker>::from_static_ref(sr) + .wrap_into_any_payload() + } else { + return Err(DataErrorKind::MissingDataKey.into_error()); + }; + + Ok(AnyResponse { + payload: Some(payload), + metadata: Default::default(), + }) + } + } + + #[test] + fn test_old_keys() { + let provider = RejectByKeyProvider { + keys: vec![ + LikelySubtagsForLanguageV1Marker::KEY, + LikelySubtagsForScriptRegionV1Marker::KEY, + LikelySubtagsExtendedV1Marker::KEY, + ], + }; + let lc = LocaleExpander::try_new_with_any_provider(&provider) + .expect("should create with old keys"); + let mut locale = locale!("zh-CN"); + assert_eq!(lc.maximize(&mut locale), TransformResult::Modified); + assert_eq!(locale, locale!("zh-Hans-CN")); + } + + #[test] + fn test_new_keys() { + let provider = RejectByKeyProvider { + keys: vec![LikelySubtagsV1Marker::KEY], + }; + let lc = LocaleExpander::try_new_with_any_provider(&provider) + .expect("should create with new keys"); + let mut locale = locale!("zh-CN"); + assert_eq!(lc.maximize(&mut locale), TransformResult::Modified); + assert_eq!(locale, locale!("zh-Hans-CN")); + } + + #[test] + fn test_mixed_keys() { + // Include the old key and one of the new keys but not both new keys. + // Not sure if this is a useful test. + let provider = RejectByKeyProvider { + keys: vec![LikelySubtagsForScriptRegionV1Marker::KEY], + }; + let lc = LocaleExpander::try_new_with_any_provider(&provider) + .expect("should create with mixed keys"); + let mut locale = locale!("zh-CN"); + assert_eq!(lc.maximize(&mut locale), TransformResult::Modified); + assert_eq!(locale, locale!("zh-Hans-CN")); + } + + #[test] + fn test_no_keys() { + let provider = RejectByKeyProvider { + keys: vec![ + LikelySubtagsForLanguageV1Marker::KEY, + LikelySubtagsForScriptRegionV1Marker::KEY, + LikelySubtagsV1Marker::KEY, + ], + }; + if LocaleExpander::try_new_with_any_provider(&provider).is_ok() { + panic!("should not create: no data present") + }; + } + + #[test] + fn test_new_small_keys() { + // Include the new small keys but not the extended key + let provider = RejectByKeyProvider { + keys: vec![ + LikelySubtagsExtendedV1Marker::KEY, + LikelySubtagsV1Marker::KEY, + ], + }; + let lc = LocaleExpander::try_new_with_any_provider(&provider) + .expect("should create with mixed keys"); + let mut locale = locale!("zh-CN"); + assert_eq!(lc.maximize(&mut locale), TransformResult::Modified); + assert_eq!(locale, locale!("zh-Hans-CN")); + } +} diff --git a/vendor/icu_locid_transform/src/fallback/algorithms.rs b/vendor/icu_locid_transform/src/fallback/algorithms.rs new file mode 100644 index 000000000..c3a3d08ca --- /dev/null +++ b/vendor/icu_locid_transform/src/fallback/algorithms.rs @@ -0,0 +1,487 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use icu_locid::extensions::unicode::{key, Key}; +use icu_locid::subtags::Language; +use icu_locid::LanguageIdentifier; +use icu_provider::FallbackPriority; + +use super::*; + +const SUBDIVISION_KEY: Key = key!("sd"); + +impl<'a> LocaleFallbackerWithConfig<'a> { + pub(crate) fn normalize(&self, locale: &mut DataLocale) { + let language = locale.language(); + // 1. Populate the region (required for region fallback only) + if self.config.priority == FallbackPriority::Region && locale.region().is_none() { + // 1a. First look for region based on language+script + if let Some(script) = locale.script() { + locale.set_region( + self.likely_subtags + .ls2r + .get_2d( + &language.into_tinystr().to_unvalidated(), + &script.into_tinystr().to_unvalidated(), + ) + .copied(), + ); + } + // 1b. If that fails, try language only + if locale.region().is_none() { + locale.set_region( + self.likely_subtags + .l2r + .get(&language.into_tinystr().to_unvalidated()) + .copied(), + ); + } + } + // 2. Remove the script if it is implied by the other subtags + if let Some(script) = locale.script() { + let default_script = self + .likely_subtags + .l2s + .get_copied(&language.into_tinystr().to_unvalidated()) + .unwrap_or(DEFAULT_SCRIPT); + if let Some(region) = locale.region() { + if script + == self + .likely_subtags + .lr2s + .get_copied_2d( + &language.into_tinystr().to_unvalidated(), + ®ion.into_tinystr().to_unvalidated(), + ) + .unwrap_or(default_script) + { + locale.set_script(None); + } + } else if script == default_script { + locale.set_script(None); + } + } + // 3. Remove irrelevant extension subtags + locale.retain_unicode_ext(|key| { + match *key { + // Always retain -u-sd + SUBDIVISION_KEY => true, + // Retain the query-specific keyword + _ if Some(*key) == self.config.extension_key => true, + // Drop all others + _ => false, + } + }); + // 4. If there is an invalid "sd" subtag, drop it + // For now, ignore it, and let fallback do it for us + } +} + +impl<'a> LocaleFallbackIteratorInner<'a> { + pub fn step(&mut self, locale: &mut DataLocale) { + match self.config.priority { + FallbackPriority::Language => self.step_language(locale), + FallbackPriority::Region => self.step_region(locale), + // TODO(#1964): Change the collation fallback rules to be different + // from the language fallback fules. + FallbackPriority::Collation => self.step_language(locale), + // This case should not normally happen, but `FallbackPriority` is non_exhaustive. + // Make it go directly to `und`. + _ => { + debug_assert!( + false, + "Unknown FallbackPriority: {:?}", + self.config.priority + ); + *locale = Default::default() + } + } + } + + fn step_language(&mut self, locale: &mut DataLocale) { + // 1. Remove the extension fallback keyword + if let Some(extension_key) = self.config.extension_key { + if let Some(value) = locale.remove_unicode_ext(&extension_key) { + self.backup_extension = Some(value); + return; + } + } + // 2. Remove the subdivision keyword + if let Some(value) = locale.remove_unicode_ext(&SUBDIVISION_KEY) { + self.backup_subdivision = Some(value); + return; + } + // 3. Assert that the locale is a language identifier + debug_assert!(!locale.has_unicode_ext()); + // 4. Remove variants + if locale.has_variants() { + self.backup_variants = Some(locale.clear_variants()); + return; + } + // 5. Check for parent override + if let Some(parent) = self.get_explicit_parent(locale) { + locale.set_langid(parent); + self.restore_extensions_variants(locale); + return; + } + // 6. Add the script subtag if necessary + if locale.script().is_none() { + if let Some(region) = locale.region() { + let language = locale.language(); + if let Some(script) = self.likely_subtags.lr2s.get_copied_2d( + &language.into_tinystr().to_unvalidated(), + ®ion.into_tinystr().to_unvalidated(), + ) { + locale.set_script(Some(script)); + self.restore_extensions_variants(locale); + return; + } + } + } + // 7. Remove region + if locale.region().is_some() { + locale.set_region(None); + self.restore_extensions_variants(locale); + return; + } + // 8. Remove language+script + debug_assert!(!locale.language().is_empty()); // don't call .step() on und + locale.set_script(None); + locale.set_language(Language::UND); + } + + fn step_region(&mut self, locale: &mut DataLocale) { + // 1. Remove the extension fallback keyword + if let Some(extension_key) = self.config.extension_key { + if let Some(value) = locale.remove_unicode_ext(&extension_key) { + self.backup_extension = Some(value); + return; + } + } + // 2. Remove the subdivision keyword + if let Some(value) = locale.remove_unicode_ext(&SUBDIVISION_KEY) { + self.backup_subdivision = Some(value); + return; + } + // 3. Assert that the locale is a language identifier + debug_assert!(!locale.has_unicode_ext()); + // 4. Remove variants + if locale.has_variants() { + self.backup_variants = Some(locale.clear_variants()); + return; + } + // 5. Remove language+script + if !locale.language().is_empty() || locale.script().is_some() { + locale.set_script(None); + locale.set_language(Language::UND); + self.restore_extensions_variants(locale); + return; + } + // 6. Remove region + debug_assert!(locale.region().is_some()); // don't call .step() on und + locale.set_region(None); + } + + fn restore_extensions_variants(&mut self, locale: &mut DataLocale) { + if let Some(value) = self.backup_extension.take() { + #[allow(clippy::unwrap_used)] // not reachable unless extension_key is present + locale.set_unicode_ext(self.config.extension_key.unwrap(), value); + } + if let Some(value) = self.backup_subdivision.take() { + locale.set_unicode_ext(SUBDIVISION_KEY, value); + } + if let Some(variants) = self.backup_variants.take() { + locale.set_variants(variants); + } + } + + fn get_explicit_parent(&self, locale: &DataLocale) -> Option<LanguageIdentifier> { + self.supplement + .and_then(|supplement| { + supplement + .parents + .get_copied_by(|uvstr| locale.strict_cmp(uvstr).reverse()) + }) + .or_else(|| { + self.parents + .parents + .get_copied_by(|uvstr| locale.strict_cmp(uvstr).reverse()) + }) + .map(LanguageIdentifier::from) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use icu_locid::Locale; + use std::str::FromStr; + use writeable::Writeable; + + struct TestCase { + input: &'static str, + requires_data: bool, + extension_key: Option<Key>, + fallback_supplement: Option<LocaleFallbackSupplement>, + // Note: The first entry in the chain is the normalized locale + expected_language_chain: &'static [&'static str], + expected_region_chain: &'static [&'static str], + } + + // TODO: Consider loading these from a JSON file + const TEST_CASES: &[TestCase] = &[ + TestCase { + input: "en-u-hc-h12-sd-usca", + requires_data: false, + extension_key: None, + fallback_supplement: None, + expected_language_chain: &["en-u-sd-usca", "en"], + expected_region_chain: &["en-u-sd-usca", "en", "und-u-sd-usca"], + }, + TestCase { + input: "en-US-u-hc-h12-sd-usca", + requires_data: false, + extension_key: None, + fallback_supplement: None, + expected_language_chain: &["en-US-u-sd-usca", "en-US", "en-u-sd-usca", "en"], + expected_region_chain: &["en-US-u-sd-usca", "en-US", "und-US-u-sd-usca", "und-US"], + }, + TestCase { + input: "en-US-fonipa-u-hc-h12-sd-usca", + requires_data: false, + extension_key: Some(key!("hc")), + fallback_supplement: None, + expected_language_chain: &[ + "en-US-fonipa-u-hc-h12-sd-usca", + "en-US-fonipa-u-sd-usca", + "en-US-fonipa", + "en-US", + "en-fonipa-u-hc-h12-sd-usca", + "en-fonipa-u-sd-usca", + "en-fonipa", + "en", + ], + expected_region_chain: &[ + "en-US-fonipa-u-hc-h12-sd-usca", + "en-US-fonipa-u-sd-usca", + "en-US-fonipa", + "en-US", + "und-US-fonipa-u-hc-h12-sd-usca", + "und-US-fonipa-u-sd-usca", + "und-US-fonipa", + "und-US", + ], + }, + TestCase { + input: "en-u-hc-h12-sd-usca", + requires_data: true, + extension_key: None, + fallback_supplement: None, + expected_language_chain: &["en-u-sd-usca", "en"], + expected_region_chain: &["en-US-u-sd-usca", "en-US", "und-US-u-sd-usca", "und-US"], + }, + TestCase { + input: "en-Latn-u-sd-usca", + requires_data: true, + extension_key: None, + fallback_supplement: None, + expected_language_chain: &["en-u-sd-usca", "en"], + expected_region_chain: &["en-US-u-sd-usca", "en-US", "und-US-u-sd-usca", "und-US"], + }, + TestCase { + input: "en-Latn-US-u-sd-usca", + requires_data: true, + extension_key: None, + fallback_supplement: None, + expected_language_chain: &["en-US-u-sd-usca", "en-US", "en-u-sd-usca", "en"], + expected_region_chain: &["en-US-u-sd-usca", "en-US", "und-US-u-sd-usca", "und-US"], + }, + TestCase { + // NOTE: -u-rg is not yet supported; when it is, this test should be updated + input: "en-u-rg-gbxxxx", + requires_data: false, + extension_key: None, + fallback_supplement: None, + expected_language_chain: &["en"], + expected_region_chain: &["en"], + }, + TestCase { + input: "sr-ME", + requires_data: true, + extension_key: None, + fallback_supplement: None, + expected_language_chain: &["sr-ME", "sr-Latn-ME", "sr-Latn"], + expected_region_chain: &["sr-ME", "und-ME"], + }, + TestCase { + input: "sr-Latn-ME", + requires_data: true, + extension_key: None, + fallback_supplement: None, + expected_language_chain: &["sr-ME", "sr-Latn-ME", "sr-Latn"], + expected_region_chain: &["sr-ME", "und-ME"], + }, + TestCase { + input: "sr-ME-fonipa", + requires_data: true, + extension_key: None, + fallback_supplement: None, + expected_language_chain: &[ + "sr-ME-fonipa", + "sr-ME", + "sr-Latn-ME-fonipa", + "sr-Latn-ME", + "sr-Latn-fonipa", + "sr-Latn", + ], + expected_region_chain: &["sr-ME-fonipa", "sr-ME", "und-ME-fonipa", "und-ME"], + }, + TestCase { + input: "sr-RS", + requires_data: true, + extension_key: None, + fallback_supplement: None, + expected_language_chain: &["sr-RS", "sr"], + expected_region_chain: &["sr-RS", "und-RS"], + }, + TestCase { + input: "sr-Cyrl-RS", + requires_data: true, + extension_key: None, + fallback_supplement: None, + expected_language_chain: &["sr-RS", "sr"], + expected_region_chain: &["sr-RS", "und-RS"], + }, + TestCase { + input: "sr-Latn-RS", + requires_data: true, + extension_key: None, + fallback_supplement: None, + expected_language_chain: &["sr-Latn-RS", "sr-Latn"], + expected_region_chain: &["sr-Latn-RS", "und-RS"], + }, + TestCase { + input: "de-Latn-LI", + requires_data: true, + extension_key: None, + fallback_supplement: None, + expected_language_chain: &["de-LI", "de"], + expected_region_chain: &["de-LI", "und-LI"], + }, + TestCase { + input: "ca-ES-valencia", + requires_data: true, + extension_key: None, + fallback_supplement: None, + expected_language_chain: &["ca-ES-valencia", "ca-ES", "ca-valencia", "ca"], + expected_region_chain: &["ca-ES-valencia", "ca-ES", "und-ES-valencia", "und-ES"], + }, + TestCase { + input: "es-AR", + requires_data: true, + extension_key: None, + fallback_supplement: None, + expected_language_chain: &["es-AR", "es-419", "es"], + expected_region_chain: &["es-AR", "und-AR"], + }, + TestCase { + input: "hi-IN", + requires_data: true, + extension_key: None, + fallback_supplement: None, + expected_language_chain: &["hi-IN", "hi"], + expected_region_chain: &["hi-IN", "und-IN"], + }, + TestCase { + input: "hi-Latn-IN", + requires_data: true, + extension_key: None, + fallback_supplement: None, + expected_language_chain: &["hi-Latn-IN", "hi-Latn", "en-IN", "en-001", "en"], + expected_region_chain: &["hi-Latn-IN", "und-IN"], + }, + TestCase { + input: "zh-CN", + requires_data: true, + extension_key: None, + fallback_supplement: None, + // Note: "zh-Hans" is not reachable because it is the default script for "zh". + // The fallback algorithm does not visit the language-script bundle when the + // script is the default for the language + expected_language_chain: &["zh-CN", "zh"], + expected_region_chain: &["zh-CN", "und-CN"], + }, + TestCase { + input: "zh-TW", + requires_data: true, + extension_key: None, + fallback_supplement: None, + expected_language_chain: &["zh-TW", "zh-Hant-TW", "zh-Hant"], + expected_region_chain: &["zh-TW", "und-TW"], + }, + TestCase { + input: "yue-HK", + requires_data: true, + extension_key: None, + fallback_supplement: None, + expected_language_chain: &["yue-HK", "yue"], + expected_region_chain: &["yue-HK", "und-HK"], + }, + TestCase { + input: "yue-HK", + requires_data: true, + extension_key: None, + fallback_supplement: Some(LocaleFallbackSupplement::Collation), + // TODO(#1964): add "zh" as a target. + expected_language_chain: &["yue-HK", "yue", "zh-Hant"], + expected_region_chain: &["yue-HK", "und-HK"], + }, + ]; + + #[test] + fn test_fallback() { + let fallbacker_no_data = LocaleFallbacker::new_without_data(); + let fallbacker_no_data = fallbacker_no_data.as_borrowed(); + let fallbacker_with_data = LocaleFallbacker::new(); + for cas in TEST_CASES { + for (priority, expected_chain) in [ + ( + LocaleFallbackPriority::Language, + cas.expected_language_chain, + ), + (LocaleFallbackPriority::Region, cas.expected_region_chain), + ] { + let mut config = LocaleFallbackConfig::default(); + config.priority = priority; + config.extension_key = cas.extension_key; + config.fallback_supplement = cas.fallback_supplement; + let fallbacker = if cas.requires_data { + fallbacker_with_data + } else { + fallbacker_no_data + }; + let mut it = fallbacker + .for_config(config) + .fallback_for(Locale::from_str(cas.input).unwrap().into()); + for &expected in expected_chain { + assert_eq!( + expected, + &*it.get().write_to_string(), + "{:?} ({:?})", + cas.input, + priority + ); + it.step(); + } + assert_eq!( + "und", + &*it.get().write_to_string(), + "{:?} ({:?})", + cas.input, + priority + ); + } + } + } +} diff --git a/vendor/icu_locid_transform/src/fallback/mod.rs b/vendor/icu_locid_transform/src/fallback/mod.rs new file mode 100644 index 000000000..6b13e0201 --- /dev/null +++ b/vendor/icu_locid_transform/src/fallback/mod.rs @@ -0,0 +1,300 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Tools for locale fallback, enabling arbitrary input locales to be mapped into the nearest +//! locale with data. +//! +//! The algorithm implemented in this module is called [Flexible Vertical Fallback]( +//! https://docs.google.com/document/d/1Mp7EUyl-sFh_HZYgyeVwj88vJGpCBIWxzlCwGgLCDwM/edit). +//! Watch [#2243](https://github.com/unicode-org/icu4x/issues/2243) to track improvements to +//! this algorithm and steps to enshrine the algorithm in CLDR. +//! +//! # Examples +//! +//! ``` +//! use icu_locid::locale; +//! use icu_locid_transform::LocaleFallbacker; +//! +//! // Set up a LocaleFallbacker with data. +//! let fallbacker = LocaleFallbacker::new(); +//! +//! // Create a LocaleFallbackerIterator with a default configuration. +//! // By default, uses language priority with no additional extension keywords. +//! let mut fallback_iterator = fallbacker +//! .for_config(Default::default()) +//! .fallback_for(locale!("hi-Latn-IN").into()); +//! +//! // Run the algorithm and check the results. +//! assert_eq!(fallback_iterator.get(), &locale!("hi-Latn-IN").into()); +//! fallback_iterator.step(); +//! assert_eq!(fallback_iterator.get(), &locale!("hi-Latn").into()); +//! fallback_iterator.step(); +//! assert_eq!(fallback_iterator.get(), &locale!("en-IN").into()); +//! fallback_iterator.step(); +//! assert_eq!(fallback_iterator.get(), &locale!("en-001").into()); +//! fallback_iterator.step(); +//! assert_eq!(fallback_iterator.get(), &locale!("en").into()); +//! fallback_iterator.step(); +//! assert_eq!(fallback_iterator.get(), &locale!("und").into()); +//! ``` + +use crate::provider::*; +use icu_locid::extensions::unicode::Value; +use icu_locid::subtags::Variants; +use icu_provider::prelude::*; + +#[doc(inline)] +pub use icu_provider::fallback::*; + +mod algorithms; + +/// Implements the algorithm defined in *[UTS #35: Locale Inheritance and Matching]*. +/// +/// Note that this implementation performs some additional steps compared to the *UTS #35* +/// algorithm, see *[the design doc]* for a detailed description, and [#2243]( +/// https://github.com/unicode-org/icu4x/issues/2243) to track aligment with *UTS #35*. +/// +/// # Examples +/// +/// ``` +/// use icu_locid::locale; +/// use icu_locid_transform::fallback::LocaleFallbacker; +/// +/// // Set up a LocaleFallbacker with data. +/// let fallbacker = LocaleFallbacker::new(); +/// +/// // Create a LocaleFallbackerIterator with a default configuration. +/// // By default, uses language priority with no additional extension keywords. +/// let mut fallback_iterator = fallbacker +/// .for_config(Default::default()) +/// .fallback_for(locale!("hi-Latn-IN").into()); +/// +/// // Run the algorithm and check the results. +/// assert_eq!(fallback_iterator.get(), &locale!("hi-Latn-IN").into()); +/// fallback_iterator.step(); +/// assert_eq!(fallback_iterator.get(), &locale!("hi-Latn").into()); +/// fallback_iterator.step(); +/// assert_eq!(fallback_iterator.get(), &locale!("en-IN").into()); +/// fallback_iterator.step(); +/// assert_eq!(fallback_iterator.get(), &locale!("en-001").into()); +/// fallback_iterator.step(); +/// assert_eq!(fallback_iterator.get(), &locale!("en").into()); +/// fallback_iterator.step(); +/// assert_eq!(fallback_iterator.get(), &locale!("und").into()); +/// ``` +/// +/// [UTS #35: Locale Inheritance and Matching]: https://www.unicode.org/reports/tr35/#Locale_Inheritance +/// [the design doc]: https://docs.google.com/document/d/1Mp7EUyl-sFh_HZYgyeVwj88vJGpCBIWxzlCwGgLCDwM/edit +#[doc(hidden)] +#[derive(Debug, Clone, PartialEq)] +pub struct LocaleFallbacker { + likely_subtags: DataPayload<LocaleFallbackLikelySubtagsV1Marker>, + parents: DataPayload<LocaleFallbackParentsV1Marker>, + collation_supplement: Option<DataPayload<CollationFallbackSupplementV1Marker>>, +} + +/// Borrowed version of [`LocaleFallbacker`]. +#[derive(Debug, Clone, Copy, PartialEq)] +pub struct LocaleFallbackerBorrowed<'a> { + likely_subtags: &'a LocaleFallbackLikelySubtagsV1<'a>, + parents: &'a LocaleFallbackParentsV1<'a>, + collation_supplement: Option<&'a LocaleFallbackSupplementV1<'a>>, +} + +/// A [`LocaleFallbackerBorrowed`] with an associated [`LocaleFallbackConfig`]. +#[derive(Debug, Clone, Copy, PartialEq)] +pub struct LocaleFallbackerWithConfig<'a> { + likely_subtags: &'a LocaleFallbackLikelySubtagsV1<'a>, + parents: &'a LocaleFallbackParentsV1<'a>, + supplement: Option<&'a LocaleFallbackSupplementV1<'a>>, + config: LocaleFallbackConfig, +} + +/// Inner iteration type. Does not own the item under fallback. +#[derive(Debug)] +struct LocaleFallbackIteratorInner<'a> { + likely_subtags: &'a LocaleFallbackLikelySubtagsV1<'a>, + parents: &'a LocaleFallbackParentsV1<'a>, + supplement: Option<&'a LocaleFallbackSupplementV1<'a>>, + config: LocaleFallbackConfig, + backup_extension: Option<Value>, + backup_subdivision: Option<Value>, + backup_variants: Option<Variants>, +} + +/// Iteration type for locale fallback operations. +/// +/// Because the `Iterator` trait does not allow items to borrow from the iterator, this class does +/// not implement that trait. Instead, use `.step()` and `.get()`. +#[derive(Debug)] +pub struct LocaleFallbackIterator<'a, 'b> { + current: DataLocale, + inner: LocaleFallbackIteratorInner<'a>, + phantom: core::marker::PhantomData<&'b ()>, +} + +impl LocaleFallbacker { + /// Creates a [`LocaleFallbacker`] with compiled fallback data (likely subtags and parent locales). + /// + /// ✨ *Enabled with the `compiled_data` Cargo feature.* + /// + /// [📚 Help choosing a constructor](icu_provider::constructors) + #[cfg(feature = "compiled_data")] + #[allow(clippy::new_ret_no_self)] // keeping constructors together + pub const fn new<'a>() -> LocaleFallbackerBorrowed<'a> { + let tickstatic = LocaleFallbackerBorrowed { + likely_subtags: crate::provider::Baked::SINGLETON_FALLBACK_LIKELYSUBTAGS_V1, + parents: crate::provider::Baked::SINGLETON_FALLBACK_PARENTS_V1, + collation_supplement: Some(crate::provider::Baked::SINGLETON_FALLBACK_SUPPLEMENT_CO_V1), + }; + // Shitty covariance because the zeromaps confuse the compiler + unsafe { core::mem::transmute(tickstatic) } + } + + icu_provider::gen_any_buffer_data_constructors!(locale: skip, options: skip, error: DataError, + #[cfg(skip)] + functions: [ + new, + try_new_with_any_provider, + try_new_with_buffer_provider, + try_new_unstable, + Self + ]); + + #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new)] + pub fn try_new_unstable<P>(provider: &P) -> Result<Self, DataError> + where + P: DataProvider<LocaleFallbackLikelySubtagsV1Marker> + + DataProvider<LocaleFallbackParentsV1Marker> + + DataProvider<CollationFallbackSupplementV1Marker> + + ?Sized, + { + let likely_subtags = provider.load(Default::default())?.take_payload()?; + let parents = provider.load(Default::default())?.take_payload()?; + let collation_supplement = match DataProvider::<CollationFallbackSupplementV1Marker>::load( + provider, + Default::default(), + ) { + Ok(response) => Some(response.take_payload()?), + // It is expected that not all keys are present + Err(DataError { + kind: DataErrorKind::MissingDataKey, + .. + }) => None, + Err(e) => return Err(e), + }; + Ok(LocaleFallbacker { + likely_subtags, + parents, + collation_supplement, + }) + } + + /// Creates a [`LocaleFallbacker`] without fallback data. Using this constructor may result in + /// surprising behavior, especially in multi-script languages. + pub fn new_without_data() -> Self { + LocaleFallbacker { + likely_subtags: DataPayload::from_owned(Default::default()), + parents: DataPayload::from_owned(Default::default()), + collation_supplement: None, + } + } + + /// Associates a configuration with this fallbacker. + #[inline] + pub fn for_config(&self, config: LocaleFallbackConfig) -> LocaleFallbackerWithConfig { + self.as_borrowed().for_config(config) + } + + /// Derives a configuration from a [`DataKey`] and associates it + /// with this fallbacker. + #[inline] + #[doc(hidden)] // will be removed in 2.0 + pub fn for_key(&self, data_key: DataKey) -> LocaleFallbackerWithConfig { + self.for_config(data_key.fallback_config()) + } + + /// Creates a borrowed version of this fallbacker for performance. + pub fn as_borrowed(&self) -> LocaleFallbackerBorrowed { + LocaleFallbackerBorrowed { + likely_subtags: self.likely_subtags.get(), + parents: self.parents.get(), + collation_supplement: self.collation_supplement.as_ref().map(|p| p.get()), + } + } +} + +impl<'a> LocaleFallbackerBorrowed<'a> { + /// Associates a configuration with this fallbacker. + #[inline] + pub const fn for_config(self, config: LocaleFallbackConfig) -> LocaleFallbackerWithConfig<'a> { + LocaleFallbackerWithConfig { + likely_subtags: self.likely_subtags, + parents: self.parents, + supplement: match config.fallback_supplement { + Some(LocaleFallbackSupplement::Collation) => self.collation_supplement, + _ => None, + }, + config, + } + } +} + +impl LocaleFallbackerBorrowed<'static> { + /// Cheaply converts a `LocaleFallbackerBorrowed<'static>` into a `LocaleFallbacker`. + pub const fn static_to_owned(self) -> LocaleFallbacker { + LocaleFallbacker { + likely_subtags: DataPayload::from_static_ref(self.likely_subtags), + parents: DataPayload::from_static_ref(self.parents), + collation_supplement: match self.collation_supplement { + None => None, + Some(x) => Some(DataPayload::from_static_ref(x)), + }, + } + } +} + +impl<'a> LocaleFallbackerWithConfig<'a> { + /// Creates an iterator based on a [`DataLocale`]. + /// + /// If you have a [`Locale`](icu_locid::Locale), call `.into()` to get a [`DataLocale`]. + /// + /// When first initialized, the locale is normalized according to the fallback algorithm. + pub fn fallback_for(&self, mut locale: DataLocale) -> LocaleFallbackIterator<'a, 'static> { + self.normalize(&mut locale); + LocaleFallbackIterator { + current: locale, + inner: LocaleFallbackIteratorInner { + likely_subtags: self.likely_subtags, + parents: self.parents, + supplement: self.supplement, + config: self.config, + backup_extension: None, + backup_subdivision: None, + backup_variants: None, + }, + phantom: core::marker::PhantomData, + } + } +} + +impl LocaleFallbackIterator<'_, '_> { + /// Borrows the current [`DataLocale`] under fallback. + pub fn get(&self) -> &DataLocale { + &self.current + } + + /// Takes the current [`DataLocale`] under fallback. + pub fn take(self) -> DataLocale { + self.current + } + + /// Performs one step of the locale fallback algorithm. + /// + /// The fallback is completed once the inner [`DataLocale`] becomes `und`. + pub fn step(&mut self) -> &mut Self { + self.inner.step(&mut self.current); + self + } +} diff --git a/vendor/icu_locid_transform/src/lib.rs b/vendor/icu_locid_transform/src/lib.rs new file mode 100644 index 000000000..4c4e34aea --- /dev/null +++ b/vendor/icu_locid_transform/src/lib.rs @@ -0,0 +1,116 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Canonicalization of locale identifiers based on [`CLDR`] data. +//! +//! This module is published as its own crate ([`icu_locid_transform`](https://docs.rs/icu_locid_transform/latest/icu_locid_transform/)) +//! and as part of the [`icu`](https://docs.rs/icu/latest/icu/) crate. See the latter for more details on the ICU4X project. +//! +//! It currently supports locale canonicalization based upon the canonicalization +//! algorithm from [`UTS #35: Unicode LDML 3. LocaleId Canonicalization`], +//! as well as the minimize and maximize likely subtags algorithms +//! as described in [`UTS #35: Unicode LDML 3. Likely Subtags`]. +//! +//! The maximize method potentially updates a passed in locale in place +//! depending up the results of running the 'Add Likely Subtags' algorithm +//! from [`UTS #35: Unicode LDML 3. Likely Subtags`]. +//! +//! This minimize method returns a new Locale that is the result of running the +//! 'Remove Likely Subtags' algorithm from [`UTS #35: Unicode LDML 3. Likely Subtags`]. +//! +//! # Examples +//! +//! ``` +//! use icu::locid::Locale; +//! use icu::locid_transform::{LocaleCanonicalizer, TransformResult}; +//! +//! let lc = LocaleCanonicalizer::new(); +//! +//! let mut locale: Locale = "ja-Latn-fonipa-hepburn-heploc" +//! .parse() +//! .expect("parse failed"); +//! assert_eq!(lc.canonicalize(&mut locale), TransformResult::Modified); +//! assert_eq!(locale, "ja-Latn-alalc97-fonipa".parse::<Locale>().unwrap()); +//! ``` +//! +//! ``` +//! use icu::locid::locale; +//! use icu::locid_transform::{LocaleExpander, TransformResult}; +//! +//! let lc = LocaleExpander::new(); +//! +//! let mut locale = locale!("zh-CN"); +//! assert_eq!(lc.maximize(&mut locale), TransformResult::Modified); +//! assert_eq!(locale, locale!("zh-Hans-CN")); +//! +//! let mut locale = locale!("zh-Hant-TW"); +//! assert_eq!(lc.maximize(&mut locale), TransformResult::Unmodified); +//! assert_eq!(locale, locale!("zh-Hant-TW")); +//! ``` +//! +//! ``` +//! use icu::locid::locale; +//! use icu::locid_transform::{LocaleExpander, TransformResult}; +//! use writeable::assert_writeable_eq; +//! +//! let lc = LocaleExpander::new(); +//! +//! let mut locale = locale!("zh-Hans-CN"); +//! assert_eq!(lc.minimize(&mut locale), TransformResult::Modified); +//! assert_eq!(locale, locale!("zh")); +//! +//! let mut locale = locale!("zh"); +//! assert_eq!(lc.minimize(&mut locale), TransformResult::Unmodified); +//! assert_eq!(locale, locale!("zh")); +//! ``` +//! +//! [`ICU4X`]: ../icu/index.html +//! [`CLDR`]: http://cldr.unicode.org/ +//! [`UTS #35: Unicode LDML 3. Likely Subtags`]: https://www.unicode.org/reports/tr35/#Likely_Subtags. +//! [`UTS #35: Unicode LDML 3. LocaleId Canonicalization`]: http://unicode.org/reports/tr35/#LocaleId_Canonicalization, + +// https://github.com/unicode-org/icu4x/blob/main/docs/process/boilerplate.md#library-annotations +#![cfg_attr(not(any(test, feature = "std")), no_std)] +#![cfg_attr( + not(test), + deny( + clippy::indexing_slicing, + clippy::unwrap_used, + clippy::expect_used, + clippy::panic, + clippy::exhaustive_structs, + clippy::exhaustive_enums, + missing_debug_implementations, + ) +)] +#![warn(missing_docs)] + +extern crate alloc; + +mod canonicalizer; +mod directionality; +mod error; +mod expander; +pub mod fallback; +pub mod provider; + +pub use canonicalizer::LocaleCanonicalizer; +pub use directionality::{Direction, LocaleDirectionality}; +pub use error::LocaleTransformError; +pub use expander::LocaleExpander; +#[doc(inline)] +pub use fallback::LocaleFallbacker; + +/// Used to track the result of a transformation operation that potentially modifies its argument in place. +#[derive(Debug, PartialEq)] +#[allow(clippy::exhaustive_enums)] // this enum is stable +pub enum TransformResult { + /// The canonicalization operation modified the locale. + Modified, + /// The canonicalization operation did not modify the locale. + Unmodified, +} + +#[doc(no_inline)] +pub use LocaleTransformError as Error; diff --git a/vendor/icu_locid_transform/src/provider/canonicalizer.rs b/vendor/icu_locid_transform/src/provider/canonicalizer.rs new file mode 100644 index 000000000..7638bba1f --- /dev/null +++ b/vendor/icu_locid_transform/src/provider/canonicalizer.rs @@ -0,0 +1,81 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use super::*; +use icu_locid::subtags::{Language, Region, Script, Variant}; +use icu_provider::prelude::*; +use tinystr::UnvalidatedTinyAsciiStr; +use zerovec::{VarZeroVec, ZeroMap, ZeroSlice}; + +#[icu_provider::data_struct(marker(AliasesV1Marker, "locid_transform/aliases@1", singleton))] +#[derive(PartialEq, Clone, Default)] +#[cfg_attr( + feature = "datagen", + derive(serde::Serialize, databake::Bake), + databake(path = icu_locid_transform::provider), +)] +#[cfg_attr(feature = "serde", derive(serde::Deserialize))] +#[yoke(prove_covariance_manually)] +/// This alias data is used for locale canonicalization. Each field defines a +/// mapping from an old identifier to a new identifier, based upon the rules in +/// from <http://unicode.org/reports/tr35/#LocaleId_Canonicalization>. The data +/// is stored in sorted order, allowing for binary search to identify rules to +/// apply. It is broken down into smaller vectors based upon some characteristic +/// of the data, to help avoid unnecessary searches. For example, the `sgn_region` +/// field contains aliases for sign language and region, so that it is not +/// necessary to search the data unless the input is a sign language. +/// +/// The algorithm in tr35 is not guaranteed to terminate on data other than what +/// is currently in CLDR. For this reason, it is not a good idea to attempt to add +/// or modify aliases for use in this structure. +/// +/// <div class="stab unstable"> +/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, +/// including in SemVer minor releases. While the serde representation of data structs is guaranteed +/// to be stable, their Rust representation might not be. Use with caution. +/// </div> +// TODO: Use validated types as value types +#[derive(Debug)] +pub struct AliasesV1<'data> { + /// `[language(-variant)+\] -> [langid]` + /// This is not a map as it's searched linearly according to the canonicalization rules. + #[cfg_attr(feature = "serde", serde(borrow))] + pub language_variants: VarZeroVec<'data, UnvalidatedLanguageIdentifierPair>, + /// `sgn-[region] -> [language]` + #[cfg_attr(feature = "serde", serde(borrow))] + pub sgn_region: ZeroMap<'data, UnvalidatedRegion, Language>, + /// `[language{2}] -> [langid]` + #[cfg_attr(feature = "serde", serde(borrow))] + pub language_len2: ZeroMap<'data, UnvalidatedTinyAsciiStr<2>, UnvalidatedLanguageIdentifier>, + /// `[language{3}] -> [langid]` + #[cfg_attr(feature = "serde", serde(borrow))] + pub language_len3: ZeroMap<'data, UnvalidatedLanguage, UnvalidatedLanguageIdentifier>, + /// `[langid] -> [langid]` + /// This is not a map as it's searched linearly according to the canonicalization rules. + #[cfg_attr(feature = "serde", serde(borrow))] + pub language: VarZeroVec<'data, UnvalidatedLanguageIdentifierPair>, + + /// `[script] -> [script]` + #[cfg_attr(feature = "serde", serde(borrow))] + pub script: ZeroMap<'data, UnvalidatedScript, Script>, + + /// `[region{2}] -> [region]` + #[cfg_attr(feature = "serde", serde(borrow))] + pub region_alpha: ZeroMap<'data, UnvalidatedTinyAsciiStr<2>, Region>, + /// `[region{3}] -> [region]` + #[cfg_attr(feature = "serde", serde(borrow))] + pub region_num: ZeroMap<'data, UnvalidatedRegion, Region>, + + /// `[region] -> [region]+` + #[cfg_attr(feature = "serde", serde(borrow))] + pub complex_region: ZeroMap<'data, UnvalidatedRegion, ZeroSlice<Region>>, + + /// `[variant] -> [variant]` + #[cfg_attr(feature = "serde", serde(borrow))] + pub variant: ZeroMap<'data, UnvalidatedVariant, Variant>, + + /// `[value{7}] -> [value{7}]` + #[cfg_attr(feature = "serde", serde(borrow))] + pub subdivision: ZeroMap<'data, UnvalidatedSubdivision, SemivalidatedSubdivision>, +} diff --git a/vendor/icu_locid_transform/src/provider/directionality.rs b/vendor/icu_locid_transform/src/provider/directionality.rs new file mode 100644 index 000000000..568248180 --- /dev/null +++ b/vendor/icu_locid_transform/src/provider/directionality.rs @@ -0,0 +1,36 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use super::*; +use icu_provider::prelude::*; +use zerovec::ZeroVec; + +#[icu_provider::data_struct(marker( + ScriptDirectionV1Marker, + "locid_transform/script_dir@1", + singleton +))] +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr( + feature = "datagen", + derive(serde::Serialize, databake::Bake), + databake(path = icu_locid_transform::provider), +)] +#[cfg_attr(feature = "serde", derive(serde::Deserialize))] +/// This directionality data is used to determine the script directionality of a locale. +/// +/// <div class="stab unstable"> +/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, +/// including in SemVer minor releases. While the serde representation of data structs is guaranteed +/// to be stable, their Rust representation might not be. Use with caution. +/// </div> +#[yoke(prove_covariance_manually)] +pub struct ScriptDirectionV1<'data> { + /// Scripts in right-to-left direction. + #[cfg_attr(feature = "serde", serde(borrow))] + pub rtl: ZeroVec<'data, UnvalidatedScript>, + /// Scripts in left-to-right direction. + #[cfg_attr(feature = "serde", serde(borrow))] + pub ltr: ZeroVec<'data, UnvalidatedScript>, +} diff --git a/vendor/icu_locid_transform/src/provider/expander.rs b/vendor/icu_locid_transform/src/provider/expander.rs new file mode 100644 index 000000000..2f624b40a --- /dev/null +++ b/vendor/icu_locid_transform/src/provider/expander.rs @@ -0,0 +1,243 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use super::*; +use icu_locid::subtags::{Language, Region, Script}; +use icu_provider::prelude::*; +use zerovec::ZeroMap; + +#[icu_provider::data_struct(marker( + LikelySubtagsV1Marker, + "locid_transform/likelysubtags@1", + singleton +))] +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr( + feature = "datagen", + derive(serde::Serialize, databake::Bake), + databake(path = icu_locid_transform::provider), +)] +#[cfg_attr(feature = "serde", derive(serde::Deserialize))] +/// This likely subtags data is used for the minimize and maximize operations. +/// Each field defines a mapping from an old identifier to a new identifier, +/// based upon the rules in +/// <https://www.unicode.org/reports/tr35/#Likely_Subtags>. +/// +/// The data is stored is broken down into smaller vectors based upon the rules +/// defined for the likely subtags maximize algorithm. +/// +/// For efficiency, only the relevant part of the LanguageIdentifier is stored +/// for searching and replacing. E.g., the `language_script` field is used to store +/// rules for `LanguageIdentifier`s that contain a language and a script, but not a +/// region. +/// +/// <div class="stab unstable"> +/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, +/// including in SemVer minor releases. While the serde representation of data structs is guaranteed +/// to be stable, their Rust representation might not be. Use with caution. +/// </div> +#[yoke(prove_covariance_manually)] +pub struct LikelySubtagsV1<'data> { + /// Language and script. + #[cfg_attr(feature = "serde", serde(borrow))] + pub language_script: ZeroMap<'data, (UnvalidatedLanguage, UnvalidatedScript), Region>, + /// Language and region. + #[cfg_attr(feature = "serde", serde(borrow))] + pub language_region: ZeroMap<'data, (UnvalidatedLanguage, UnvalidatedRegion), Script>, + /// Just language. + #[cfg_attr(feature = "serde", serde(borrow))] + pub language: ZeroMap<'data, UnvalidatedLanguage, (Script, Region)>, + /// Script and region. + #[cfg_attr(feature = "serde", serde(borrow))] + pub script_region: ZeroMap<'data, (UnvalidatedScript, UnvalidatedRegion), Language>, + /// Just script. + #[cfg_attr(feature = "serde", serde(borrow))] + pub script: ZeroMap<'data, UnvalidatedScript, (Language, Region)>, + /// Just region. + #[cfg_attr(feature = "serde", serde(borrow))] + pub region: ZeroMap<'data, UnvalidatedRegion, (Language, Script)>, + /// Undefined. + pub und: (Language, Script, Region), +} + +#[icu_provider::data_struct(marker( + LikelySubtagsForLanguageV1Marker, + "locid_transform/likelysubtags_l@1", + singleton +))] +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr( + feature = "datagen", + derive(serde::Serialize, databake::Bake), + databake(path = icu_locid_transform::provider), +)] +#[cfg_attr(feature = "serde", derive(serde::Deserialize))] +/// This likely subtags data is used for the minimize and maximize operations. +/// Each field defines a mapping from an old identifier to a new identifier, +/// based upon the rules in +/// <https://www.unicode.org/reports/tr35/#Likely_Subtags>. +/// +/// The data is stored is broken down into smaller vectors based upon the rules +/// defined for the likely subtags maximize algorithm. +/// +/// For efficiency, only the relevant part of the LanguageIdentifier is stored +/// for searching and replacing. E.g., the `language_script` field is used to store +/// rules for `LanguageIdentifier`s that contain a language and a script, but not a +/// region. +/// +/// This struct contains mappings when the input contains a language subtag. +/// Also see [`LikelySubtagsForScriptRegionV1`]. +/// +/// <div class="stab unstable"> +/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, +/// including in SemVer minor releases. While the serde representation of data structs is guaranteed +/// to be stable, their Rust representation might not be. Use with caution. +/// </div> +#[yoke(prove_covariance_manually)] +pub struct LikelySubtagsForLanguageV1<'data> { + /// Language and script. + #[cfg_attr(feature = "serde", serde(borrow))] + pub language_script: ZeroMap<'data, (UnvalidatedLanguage, UnvalidatedScript), Region>, + /// Language and region. + #[cfg_attr(feature = "serde", serde(borrow))] + pub language_region: ZeroMap<'data, (UnvalidatedLanguage, UnvalidatedRegion), Script>, + /// Just language. + #[cfg_attr(feature = "serde", serde(borrow))] + pub language: ZeroMap<'data, UnvalidatedLanguage, (Script, Region)>, + /// Undefined. + pub und: (Language, Script, Region), +} + +impl<'data> From<LikelySubtagsV1<'data>> for LikelySubtagsForLanguageV1<'data> { + fn from(other: LikelySubtagsV1<'data>) -> Self { + Self { + language_script: other.language_script, + language_region: other.language_region, + language: other.language, + und: other.und, + } + } +} + +impl<'data> LikelySubtagsForLanguageV1<'data> { + pub(crate) fn clone_from_borrowed(other: &LikelySubtagsV1<'data>) -> Self { + Self { + language_script: other.language_script.clone(), + language_region: other.language_region.clone(), + language: other.language.clone(), + und: other.und, + } + } +} + +#[icu_provider::data_struct(marker( + LikelySubtagsForScriptRegionV1Marker, + "locid_transform/likelysubtags_sr@1", + singleton +))] +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr( + feature = "datagen", + derive(serde::Serialize, databake::Bake), + databake(path = icu_locid_transform::provider), +)] +#[cfg_attr(feature = "serde", derive(serde::Deserialize))] +/// This likely subtags data is used for the minimize and maximize operations. +/// Each field defines a mapping from an old identifier to a new identifier, +/// based upon the rules in +/// <https://www.unicode.org/reports/tr35/#Likely_Subtags>. +/// +/// The data is stored is broken down into smaller vectors based upon the rules +/// defined for the likely subtags maximize algorithm. +/// +/// For efficiency, only the relevant part of the LanguageIdentifier is stored +/// for searching and replacing. E.g., the `script_region` field is used to store +/// rules for `LanguageIdentifier`s that contain a script and a region, but not a +/// language. +/// +/// This struct contains mappings when the input does not contain a language subtag. +/// Also see [`LikelySubtagsForLanguageV1`]. +/// +/// <div class="stab unstable"> +/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, +/// including in SemVer minor releases. While the serde representation of data structs is guaranteed +/// to be stable, their Rust representation might not be. Use with caution. +/// </div> +#[yoke(prove_covariance_manually)] +pub struct LikelySubtagsForScriptRegionV1<'data> { + /// Script and region. + #[cfg_attr(feature = "serde", serde(borrow))] + pub script_region: ZeroMap<'data, (UnvalidatedScript, UnvalidatedRegion), Language>, + /// Just script. + #[cfg_attr(feature = "serde", serde(borrow))] + pub script: ZeroMap<'data, UnvalidatedScript, (Language, Region)>, + /// Just region. + #[cfg_attr(feature = "serde", serde(borrow))] + pub region: ZeroMap<'data, UnvalidatedRegion, (Language, Script)>, +} + +impl<'data> From<LikelySubtagsV1<'data>> for LikelySubtagsForScriptRegionV1<'data> { + fn from(other: LikelySubtagsV1<'data>) -> Self { + Self { + script_region: other.script_region, + script: other.script, + region: other.region, + } + } +} + +#[icu_provider::data_struct(marker( + LikelySubtagsExtendedV1Marker, + "locid_transform/likelysubtags_ext@1", + singleton +))] +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr( + feature = "datagen", + derive(serde::Serialize, databake::Bake), + databake(path = icu_locid_transform::provider), +)] +#[cfg_attr(feature = "serde", derive(serde::Deserialize))] +/// This likely subtags data is used for full coverage of locales, including ones that +/// don't otherwise have data in the Common Locale Data Repository (CLDR). +/// +/// <div class="stab unstable"> +/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, +/// including in SemVer minor releases. While the serde representation of data structs is guaranteed +/// to be stable, their Rust representation might not be. Use with caution. +/// </div> +#[yoke(prove_covariance_manually)] +pub struct LikelySubtagsExtendedV1<'data> { + /// Language and script. + #[cfg_attr(feature = "serde", serde(borrow))] + pub language_script: ZeroMap<'data, (UnvalidatedLanguage, UnvalidatedScript), Region>, + /// Language and region. + #[cfg_attr(feature = "serde", serde(borrow))] + pub language_region: ZeroMap<'data, (UnvalidatedLanguage, UnvalidatedRegion), Script>, + /// Just language. + #[cfg_attr(feature = "serde", serde(borrow))] + pub language: ZeroMap<'data, UnvalidatedLanguage, (Script, Region)>, + /// Script and region. + #[cfg_attr(feature = "serde", serde(borrow))] + pub script_region: ZeroMap<'data, (UnvalidatedScript, UnvalidatedRegion), Language>, + /// Just script. + #[cfg_attr(feature = "serde", serde(borrow))] + pub script: ZeroMap<'data, UnvalidatedScript, (Language, Region)>, + /// Just region. + #[cfg_attr(feature = "serde", serde(borrow))] + pub region: ZeroMap<'data, UnvalidatedRegion, (Language, Script)>, +} + +impl<'data> From<LikelySubtagsV1<'data>> for LikelySubtagsExtendedV1<'data> { + fn from(other: LikelySubtagsV1<'data>) -> Self { + Self { + language_script: other.language_script, + language_region: other.language_region, + language: other.language, + script_region: other.script_region, + script: other.script, + region: other.region, + } + } +} diff --git a/vendor/icu_locid_transform/src/provider/fallback.rs b/vendor/icu_locid_transform/src/provider/fallback.rs new file mode 100644 index 000000000..c0635afe7 --- /dev/null +++ b/vendor/icu_locid_transform/src/provider/fallback.rs @@ -0,0 +1,102 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use super::*; +use icu_locid::extensions::unicode::Key; +use icu_locid::subtags::{region, script, Language, Region, Script}; +use icu_provider::prelude::*; +use zerovec::ule::UnvalidatedStr; +use zerovec::ZeroMap; +use zerovec::ZeroMap2d; + +/// Locale fallback rules derived from likely subtags data. +#[icu_provider::data_struct(marker( + LocaleFallbackLikelySubtagsV1Marker, + "fallback/likelysubtags@1", + singleton +))] +#[derive(Default, Clone, PartialEq, Debug)] +#[cfg_attr( + feature = "datagen", + derive(serde::Serialize, databake::Bake), + databake(path = icu_locid_transform::provider), +)] +#[cfg_attr(feature = "serde", derive(serde::Deserialize))] +#[yoke(prove_covariance_manually)] +pub struct LocaleFallbackLikelySubtagsV1<'data> { + /// Map from language to the default script in that language. Languages whose default script + /// is `Latn` are not included in the map for data size savings. + /// + /// Example: "zh" defaults to "Hans", which is in this map. + #[cfg_attr(feature = "serde", serde(borrow))] + pub l2s: ZeroMap<'data, UnvalidatedLanguage, Script>, + /// Map from language-region pairs to a script. Only populated if the script is different + /// from the one in `l2s` for that language. + /// + /// Example: "zh-TW" defaults to "Hant", which is in this map. + #[cfg_attr(feature = "serde", serde(borrow))] + pub lr2s: ZeroMap2d<'data, UnvalidatedLanguage, UnvalidatedRegion, Script>, + /// Map from language to the default region in that language. Languages whose default region + /// is `ZZ` are not included in the map for data size savings. + /// + /// Example: "zh" defaults to "CN". + #[cfg_attr(feature = "serde", serde(borrow))] + pub l2r: ZeroMap<'data, UnvalidatedLanguage, Region>, + /// Map from language-script pairs to a region. Only populated if the region is different + /// from the one in `l2r` for that language. + /// + /// Example: "zh-Hant" defaults to "TW". + #[cfg_attr(feature = "serde", serde(borrow))] + pub ls2r: ZeroMap2d<'data, UnvalidatedLanguage, UnvalidatedScript, Region>, +} + +/// `Latn` is the most common script, so it is defaulted for data size savings. +pub const DEFAULT_SCRIPT: Script = script!("Latn"); + +/// `ZZ` is the most common region, so it is defaulted for data size savings. +pub const DEFAULT_REGION: Region = region!("ZZ"); + +/// Locale fallback rules derived from CLDR parent locales data. +#[icu_provider::data_struct(marker( + LocaleFallbackParentsV1Marker, + "fallback/parents@1", + singleton +))] +#[derive(Default, Clone, PartialEq, Debug)] +#[cfg_attr( + feature = "datagen", + derive(serde::Serialize, databake::Bake), + databake(path = icu_locid_transform::provider), +)] +#[cfg_attr(feature = "serde", derive(serde::Deserialize))] +#[yoke(prove_covariance_manually)] +pub struct LocaleFallbackParentsV1<'data> { + /// Map from language identifier to language identifier, indicating that the language on the + /// left should inherit from the language on the right. + #[cfg_attr(feature = "serde", serde(borrow))] + pub parents: ZeroMap<'data, UnvalidatedStr, (Language, Option<Script>, Option<Region>)>, +} + +/// Key-specific supplemental fallback data. +#[icu_provider::data_struct(marker( + CollationFallbackSupplementV1Marker, + "fallback/supplement/co@1", + singleton, +))] +#[derive(Default, Clone, PartialEq, Debug)] +#[cfg_attr( + feature = "datagen", + derive(serde::Serialize, databake::Bake), + databake(path = icu_locid_transform::provider), +)] +#[cfg_attr(feature = "serde", derive(serde::Deserialize))] +#[yoke(prove_covariance_manually)] +pub struct LocaleFallbackSupplementV1<'data> { + /// Additional parent locales to supplement the common ones. + #[cfg_attr(feature = "serde", serde(borrow))] + pub parents: ZeroMap<'data, UnvalidatedStr, (Language, Option<Script>, Option<Region>)>, + /// Default values for Unicode extension keywords. + #[cfg_attr(feature = "serde", serde(borrow))] + pub unicode_extension_defaults: ZeroMap2d<'data, Key, UnvalidatedStr, UnvalidatedStr>, +} diff --git a/vendor/icu_locid_transform/src/provider/mod.rs b/vendor/icu_locid_transform/src/provider/mod.rs new file mode 100644 index 000000000..337bd3f0c --- /dev/null +++ b/vendor/icu_locid_transform/src/provider/mod.rs @@ -0,0 +1,98 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +// Provider structs must be stable +#![allow(clippy::exhaustive_structs, clippy::exhaustive_enums)] + +//! 🚧 \[Unstable\] Data provider struct definitions for this ICU4X component. +//! +//! <div class="stab unstable"> +//! 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, +//! including in SemVer minor releases. While the serde representation of data structs is guaranteed +//! to be stable, their Rust representation might not be. Use with caution. +//! </div> +//! +//! Read more about data providers: [`icu_provider`] + +mod canonicalizer; +pub use canonicalizer::*; +mod directionality; +pub use directionality::*; +mod expander; +pub use expander::*; +mod fallback; +pub use fallback::*; + +#[cfg(feature = "compiled_data")] +#[derive(Debug)] +/// Baked data +/// +/// <div class="stab unstable"> +/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, +/// including in SemVer minor releases. In particular, the `DataProvider` implementations are only +/// guaranteed to match with this version's `*_unstable` providers. Use with caution. +/// </div> +pub struct Baked; + +#[cfg(feature = "compiled_data")] +const _: () = { + pub mod icu { + pub use crate as locid_transform; + pub use icu_locid as locid; + } + icu_locid_transform_data::make_provider!(Baked); + icu_locid_transform_data::impl_fallback_likelysubtags_v1!(Baked); + icu_locid_transform_data::impl_fallback_parents_v1!(Baked); + icu_locid_transform_data::impl_fallback_supplement_co_v1!(Baked); + icu_locid_transform_data::impl_locid_transform_aliases_v1!(Baked); + icu_locid_transform_data::impl_locid_transform_likelysubtags_ext_v1!(Baked); + icu_locid_transform_data::impl_locid_transform_likelysubtags_l_v1!(Baked); + icu_locid_transform_data::impl_locid_transform_likelysubtags_sr_v1!(Baked); + icu_locid_transform_data::impl_locid_transform_script_dir_v1!(Baked); +}; + +use alloc::borrow::Cow; +use tinystr::{TinyAsciiStr, UnvalidatedTinyAsciiStr}; + +// We use raw TinyAsciiStrs for map keys, as we then don't have to +// validate them as subtags on deserialization. Map lookup can be +// done even if they are not valid tags (an invalid key will just +// become inaccessible). +type UnvalidatedLanguage = UnvalidatedTinyAsciiStr<3>; +type UnvalidatedScript = UnvalidatedTinyAsciiStr<4>; +type UnvalidatedRegion = UnvalidatedTinyAsciiStr<3>; +type UnvalidatedVariant = UnvalidatedTinyAsciiStr<8>; +type UnvalidatedSubdivision = UnvalidatedTinyAsciiStr<7>; +type SemivalidatedSubdivision = TinyAsciiStr<7>; + +// LanguageIdentifier doesn't have an AsULE implementation, so we have +// to store strs and parse when needed. +type UnvalidatedLanguageIdentifier = str; +type UnvalidatedLanguageIdentifierPair = StrStrPairVarULE; + +#[zerovec::make_varule(StrStrPairVarULE)] +#[zerovec::derive(Debug)] +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug)] +#[cfg_attr( + feature = "serde", + derive(serde::Deserialize), + zerovec::derive(Deserialize) +)] +#[cfg_attr( + feature = "datagen", + derive(serde::Serialize, databake::Bake), + zerovec::derive(Serialize), + databake(path = icu_locid_transform::provider), +)] +/// A pair of strings with a EncodeAsVarULE implementation. +/// +/// <div class="stab unstable"> +/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, +/// including in SemVer minor releases. While the serde representation of data structs is guaranteed +/// to be stable, their Rust representation might not be. Use with caution. +/// </div> +pub struct StrStrPair<'a>( + #[cfg_attr(feature = "serde", serde(borrow))] pub Cow<'a, str>, + #[cfg_attr(feature = "serde", serde(borrow))] pub Cow<'a, str>, +); diff --git a/vendor/icu_locid_transform/tests/fixtures/canonicalize.json b/vendor/icu_locid_transform/tests/fixtures/canonicalize.json new file mode 100644 index 000000000..89e6cec05 --- /dev/null +++ b/vendor/icu_locid_transform/tests/fixtures/canonicalize.json @@ -0,0 +1,444 @@ +[ + { + "input": "es", + "output": "es" + }, + { + "input": "en-CA", + "output": "en-CA" + }, + { + "input": "cka", + "output": "cmr" + }, + { + "input": "cze", + "output": "cs" + }, + { + "input": "gfx", + "output": "vaj" + }, + { + "input": "sgn-BR", + "output": "bzs" + }, + { + "input": "sgn-DD", + "output": "gsg" + }, + { + "input": "tam", + "output": "ta" + }, + { + "input": "und-aaland", + "output": "und-AX" + }, + { + "input": "nob-bokmal", + "output": "nb" + }, + { + "input": "no-nynorsk", + "output": "nn" + }, + { + "input": "und-Qaai", + "output": "und-Zinh" + }, + { + "input": "en-554", + "output": "en-NZ" + }, + { + "input": "en-084", + "output": "en-BZ" + }, + { + "input": "art-lojban", + "output": "jbo" + }, + { + "input": "zh-guoyu", + "output": "zh" + }, + { + "input": "zh-hakka", + "output": "hak" + }, + { + "input": "zh-xiang", + "output": "hsn" + }, + { + "input": "aar-x-private", + "output": "aa-x-private" + }, + { + "input": "heb-x-private", + "output": "he-x-private" + }, + { + "input": "ces", + "output": "cs" + }, + { + "input": "hy-arevela", + "output": "hy" + }, + { + "input": "hy-arevmda", + "output": "hyw" + }, + { + "input": "cel-gaulish", + "output": "xtg" + }, + { + "input": "ja-latn-hepburn-heploc", + "output": "ja-Latn-alalc97" + }, + { + "input": "ja-Latn-fonipa-hepburn-heploc", + "output": "ja-Latn-alalc97-fonipa" + }, + { + "input": "und-Armn-SU", + "output": "und-Armn-AM" + }, + { + "input": "sh", + "output": "sr-Latn" + }, + { + "input": "sh-Cyrl", + "output": "sr-Cyrl" + }, + { + "input": "cnr", + "output": "sr-ME" + }, + { + "input": "cnr-BA", + "output": "sr-BA" + }, + { + "input": "ru-SU", + "output": "ru-RU" + }, + { + "input": "ru-810", + "output": "ru-RU" + }, + { + "input": "en-SU", + "output": "en-RU" + }, + { + "input": "en-810", + "output": "en-RU" + }, + { + "input": "und-SU", + "output": "und-RU" + }, + { + "input": "und-810", + "output": "und-RU" + }, + { + "input": "und-Latn-SU", + "output": "und-Latn-RU" + }, + { + "input": "und-Latn-810", + "output": "und-Latn-RU" + }, + { + "input": "hy-SU", + "output": "hy-AM" + }, + { + "input": "hy-810", + "output": "hy-AM" + }, + { + "input": "und-Armn-SU", + "output": "und-Armn-AM" + }, + { + "input": "und-Armn-810", + "output": "und-Armn-AM" + }, + { + "input": "sr-CS", + "output": "sr-RS" + }, + { + "input": "sr-Latn-CS", + "output": "sr-Latn-RS" + }, + { + "input": "sr-Cyrl-CS", + "output": "sr-Cyrl-RS" + }, + { + "input": "az-NT", + "output": "az-SA" + }, + { + "input": "sl-t-sl-rozaj-biske-1994", + "output": "sl-t-sl-1994-biske-rozaj" + }, + { + "input": "DE-T-M0-DIN-K0-QWERTZ", + "output": "de-t-k0-qwertz-m0-din" + }, + { + "input": "en-t-m0-true", + "output": "en-t-m0-true" + }, + { + "input": "en-t-iw", + "output": "en-t-he" + }, + { + "input": "und-u-rg-no23", + "output": "und-u-rg-no50" + }, + { + "input": "und-u-rg-cn11", + "output": "und-u-rg-cnbj" + }, + { + "input": "und-u-rg-cz10a", + "output": "und-u-rg-cz110" + }, + { + "input": "und-u-rg-fra", + "output": "und-u-rg-frges" + }, + { + "input": "und-u-rg-frg", + "output": "und-u-rg-frges" + }, + { + "input": "und-u-rg-lud", + "output": "und-u-rg-lucl" + }, + { + "input": "und-NO-u-rg-no23", + "output": "und-NO-u-rg-no50" + }, + { + "input": "und-CN-u-rg-cn11", + "output": "und-CN-u-rg-cnbj" + }, + { + "input": "und-CZ-u-rg-cz10a", + "output": "und-CZ-u-rg-cz110" + }, + { + "input": "und-FR-u-rg-fra", + "output": "und-FR-u-rg-frges" + }, + { + "input": "und-FR-u-rg-frg", + "output": "und-FR-u-rg-frges" + }, + { + "input": "und-u-rg-lud", + "output": "und-u-rg-lucl" + }, + { + "input": "und-u-sd-no23", + "output": "und-u-sd-no50" + }, + { + "input": "und-u-sd-cn11", + "output": "und-u-sd-cnbj" + }, + { + "input": "und-u-sd-cz10a", + "output": "und-u-sd-cz110" + }, + { + "input": "und-u-sd-fra", + "output": "und-u-sd-frges" + }, + { + "input": "hy-arevela", + "output": "hy" + }, + { + "input": "hy-Armn-arevela", + "output": "hy-Armn" + }, + { + "input": "hy-AM-arevela", + "output": "hy-AM" + }, + { + "input": "hy-arevela-fonipa", + "output": "hy-fonipa" + }, + { + "input": "hy-fonipa-arevela", + "output": "hy-fonipa" + }, + { + "input": "hy-arevmda", + "output": "hyw" + }, + { + "input": "hy-Armn-arevmda", + "output": "hyw-Armn" + }, + { + "input": "hy-AM-arevmda", + "output": "hyw-AM" + }, + { + "input": "hy-arevmda-fonipa", + "output": "hyw-fonipa" + }, + { + "input": "hy-fonipa-arevmda", + "output": "hyw-fonipa" + }, + { + "input": "ja-Latn-hepburn-heploc", + "output": "ja-Latn-alalc97" + }, + { + "input": "ja-Latn-JP-hepburn-heploc", + "output": "ja-Latn-JP-alalc97" + }, + { + "input": "sv-aaland", + "output": "sv-AX" + }, + { + "input": "el-polytoni", + "output": "el-polyton" + }, + { + "input": "ja-Latn-alalc97-hepburn-heploc", + "output": "ja-Latn-alalc97" + }, + { + "input": "ja-Latn-hepburn-alalc97-heploc", + "output": "ja-Latn-alalc97" + }, + { + "input": "ja-Latn-hepburn-heploc-alalc97", + "output": "ja-Latn-alalc97" + }, + { + "input": "ja-Latn-heploc-hepburn", + "output": "ja-Latn-alalc97" + }, + { + "input": "ja-Latn-heploc", + "output": "ja-Latn-alalc97" + }, + { + "input": "ja-Latn-aaland-heploc", + "output": "ja-Latn-AX-alalc97" + }, + { + "input": "ja-Latn-heploc-polytoni", + "output": "ja-Latn-alalc97-polyton" + }, + { + "input": "und-Latn-t-und-hani-m0-names", + "output": "und-Latn-t-und-hani-m0-prprname", + "disabled": true + }, + { + "input": "und-u-ca-ethiopian-amete-alem", + "output": "und-u-ca-ethioaa", + "disabled": true + }, + { + "input": "und-u-ca-islamicc", + "output": "und-u-ca-islamic-civil", + "disabled": true + }, + { + "input": "und-u-ks-primary", + "output": "und-u-ks-level1", + "disabled": true + }, + { + "input": "und-u-ks-tertiary", + "output": "und-u-ks-level3", + "disabled": true + }, + { + "input": "und-u-ms-imperial", + "output": "und-u-ms-uksystem", + "disabled": true + }, + { + "input": "und-u-kb-yes", + "output": "und-u-kb", + "disabled": true + }, + { + "input": "und-u-kc-yes", + "output": "und-u-k", + "disabled": true + }, + { + "input": "und-u-kh-yes", + "output": "und-u-kh", + "disabled": true + }, + { + "input": "und-u-kk-yes", + "output": "und-u-kk", + "disabled": true + }, + { + "input": "und-u-kn-yes", + "output": "und-u-kn", + "disabled": true + }, + { + "input": "und-u-ka-yes", + "output": "und-u-ka-yes", + "disabled": true + }, + { + "input": "und-u-tz-cnckg", + "output": "und-u-tz-cnsha", + "disabled": true + }, + { + "input": "und-u-tz-eire", + "output": "und-u-tz-iedub", + "disabled": true + }, + { + "input": "und-u-tz-est", + "output": "und-u-tz-utcw05", + "disabled": true + }, + { + "input": "und-u-tz-gmt0", + "output": "und-u-tz-gmt", + "disabled": true + }, + { + "input": "und-u-tz-uct", + "output": "und-u-tz-utc", + "disabled": true + }, + { + "input": "und-u-tz-zulu", + "output": "und-u-tz-utc", + "disabled": true + } +]
\ No newline at end of file diff --git a/vendor/icu_locid_transform/tests/fixtures/maximize.json b/vendor/icu_locid_transform/tests/fixtures/maximize.json new file mode 100644 index 000000000..86b9b2fe8 --- /dev/null +++ b/vendor/icu_locid_transform/tests/fixtures/maximize.json @@ -0,0 +1,182 @@ +[ + { + "input": "en-US", + "output": "en-Latn-US" + }, + { + "input": "en-GB", + "output": "en-Latn-GB" + }, + { + "input": "es-AR", + "output": "es-Latn-AR" + }, + { + "input": "it", + "output": "it-Latn-IT" + }, + { + "input": "zh-Hans-CN", + "output": "zh-Hans-CN" + }, + { + "input": "de-AT", + "output": "de-Latn-AT" + }, + { + "input": "pl", + "output": "pl-Latn-PL" + }, + { + "input": "fr-FR", + "output": "fr-Latn-FR" + }, + { + "input": "de-AT", + "output": "de-Latn-AT" + }, + { + "input": "sr-Cyrl-SR", + "output": "sr-Cyrl-SR" + }, + { + "input": "nb-NO", + "output": "nb-Latn-NO" + }, + { + "input": "fr-FR", + "output": "fr-Latn-FR" + }, + { + "input": "mk", + "output": "mk-Cyrl-MK" + }, + { + "input": "uk", + "output": "uk-Cyrl-UA" + }, + { + "input": "und-PL", + "output": "pl-Latn-PL" + }, + { + "input": "und-Latn-AM", + "output": "ku-Latn-AM" + }, + { + "input": "ug-Cyrl", + "output": "ug-Cyrl-KZ" + }, + { + "input": "sr-ME", + "output": "sr-Latn-ME" + }, + { + "input": "mn-Mong", + "output": "mn-Mong-CN" + }, + { + "input": "lif-Limb", + "output": "lif-Limb-IN" + }, + { + "input": "gan", + "output": "gan-Hans-CN" + }, + { + "input": "zh-Hant", + "output": "zh-Hant-TW" + }, + { + "input": "yue-Hans", + "output": "yue-Hans-CN" + }, + { + "input": "unr", + "output": "unr-Beng-IN" + }, + { + "input": "unr-Deva", + "output": "unr-Deva-NP" + }, + { + "input": "und-Thai-CN", + "output": "lcp-Thai-CN" + }, + { + "input": "ug-Cyrl", + "output": "ug-Cyrl-KZ" + }, + { + "input": "en-Latn-DE", + "output": "en-Latn-DE" + }, + { + "input": "pl-FR", + "output": "pl-Latn-FR" + }, + { + "input": "de-CH", + "output": "de-Latn-CH" + }, + { + "input": "tuq", + "output": "tuq-Latn-ZZ" + }, + { + "input": "sr-ME", + "output": "sr-Latn-ME" + }, + { + "input": "ng", + "output": "ng-Latn-NA" + }, + { + "input": "klx", + "output": "klx-Latn-ZZ" + }, + { + "input": "kk-Arab", + "output": "kk-Arab-CN" + }, + { + "input": "en-Cyrl", + "output": "en-Cyrl-US" + }, + { + "input": "und-Cyrl-UK", + "output": "ru-Cyrl-UK" + }, + { + "input": "und-Arab", + "output": "ar-Arab-EG" + }, + { + "input": "und-Arab-FO", + "output": "ar-Arab-FO" + }, + { + "input": "zh-TW", + "output": "zh-Hant-TW" + }, + { + "input": "und", + "output": "en-Latn-US" + }, + { + "input": "zh-SG", + "output": "zh-Hans-SG" + }, + { + "input": "und-TW", + "output": "zh-Hant-TW" + }, + { + "input": "zh-hant-u-nu-Chinese-hc-h24", + "output": "zh-Hant-TW-u-hc-h24-nu-chinese" + }, + { + "input": "und-latn-AQ", + "output": "und-Latn-AQ" + } +] diff --git a/vendor/icu_locid_transform/tests/fixtures/minimize.json b/vendor/icu_locid_transform/tests/fixtures/minimize.json new file mode 100644 index 000000000..6c225e230 --- /dev/null +++ b/vendor/icu_locid_transform/tests/fixtures/minimize.json @@ -0,0 +1,26 @@ +[ + { + "input": "zh-Hant", + "output": "zh-TW" + }, + { + "input": "en-Latn-US", + "output": "en" + }, + { + "input": "en", + "output": "en" + }, + { + "input": "und", + "output": "en" + }, + { + "input": "es-ES-preeuro", + "output": "es-preeuro" + }, + { + "input": "zh-Hant-TW-u-hc-h24-nu-chinese", + "output": "zh-TW-u-hc-h24-nu-chinese" + } +] diff --git a/vendor/icu_locid_transform/tests/fixtures/mod.rs b/vendor/icu_locid_transform/tests/fixtures/mod.rs new file mode 100644 index 000000000..70dfb0381 --- /dev/null +++ b/vendor/icu_locid_transform/tests/fixtures/mod.rs @@ -0,0 +1,12 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use serde::Deserialize; + +#[derive(Deserialize)] +pub struct CanonicalizationTest { + pub input: String, + pub output: String, + pub disabled: Option<bool>, +} diff --git a/vendor/icu_locid_transform/tests/helpers/mod.rs b/vendor/icu_locid_transform/tests/helpers/mod.rs new file mode 100644 index 000000000..d250c510c --- /dev/null +++ b/vendor/icu_locid_transform/tests/helpers/mod.rs @@ -0,0 +1,15 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use std::fs::File; +use std::io::{BufReader, Error}; + +pub fn read_fixture<T>(path: &str) -> Result<T, Error> +where + T: serde::de::DeserializeOwned, +{ + let file = File::open(path)?; + let reader = BufReader::new(file); + Ok(serde_json::from_reader(reader)?) +} diff --git a/vendor/icu_locid_transform/tests/locale_canonicalizer.rs b/vendor/icu_locid_transform/tests/locale_canonicalizer.rs new file mode 100644 index 000000000..06e360f53 --- /dev/null +++ b/vendor/icu_locid_transform/tests/locale_canonicalizer.rs @@ -0,0 +1,82 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +mod fixtures; +mod helpers; + +use icu_locid::Locale; +use icu_locid_transform::{LocaleCanonicalizer, LocaleExpander, TransformResult}; +use writeable::assert_writeable_eq; + +#[test] +fn test_maximize() { + let lc = LocaleExpander::new_extended(); + + let path = "./tests/fixtures/maximize.json"; + let testcases: Vec<fixtures::CanonicalizationTest> = + helpers::read_fixture(path).expect("Failed to read a fixture"); + + for case in testcases { + if let Some(true) = case.disabled { + continue; + } + let mut locale: Locale = case.input.parse().unwrap(); + let unmodified = locale.clone(); + let result = lc.maximize(&mut locale); + assert_writeable_eq!(locale, case.output); + if result == TransformResult::Modified { + assert_ne!(locale, unmodified); + } else { + assert_eq!(locale, unmodified); + } + } +} + +#[test] +fn test_minimize() { + let lc = LocaleExpander::new_extended(); + + let path = "./tests/fixtures/minimize.json"; + let testcases: Vec<fixtures::CanonicalizationTest> = + helpers::read_fixture(path).expect("Failed to read a fixture"); + + for case in testcases { + if let Some(true) = case.disabled { + continue; + } + let mut locale: Locale = case.input.parse().unwrap(); + let unmodified = locale.clone(); + let result = lc.minimize(&mut locale); + assert_writeable_eq!(locale, case.output); + if result == TransformResult::Modified { + assert_ne!(locale, unmodified); + } else { + assert_eq!(locale, unmodified); + } + } +} + +#[test] +fn test_canonicalize() { + let lc = LocaleCanonicalizer::new(); + + let path = "./tests/fixtures/canonicalize.json"; + let testcases: Vec<fixtures::CanonicalizationTest> = + helpers::read_fixture(path).expect("Failed to read a fixture"); + + for case in testcases { + if let Some(true) = case.disabled { + continue; + } + let mut locale: Locale = case.input.parse().expect("Unable to parse input"); + let unmodified = locale.clone(); + let result = lc.canonicalize(&mut locale); + assert_writeable_eq!(locale, case.output); + if result == TransformResult::Modified { + assert_ne!(locale, unmodified); + } else { + assert_eq!(locale, unmodified); + } + } +} |