From ef24de24a82fe681581cc130f342363c47c0969a Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 7 Jun 2024 07:48:48 +0200 Subject: Merging upstream version 1.75.0+dfsg1. Signed-off-by: Daniel Baumann --- vendor/icu_locid_transform/.cargo-checksum.json | 1 + vendor/icu_locid_transform/Cargo.toml | 128 ++++ vendor/icu_locid_transform/LICENSE | 44 ++ vendor/icu_locid_transform/README.md | 77 +++ .../benches/fixtures/locales.json | 41 ++ .../benches/fixtures/uncanonicalized-locales.json | 88 +++ vendor/icu_locid_transform/benches/helpers/mod.rs | 15 + .../benches/locale_canonicalizer.rs | 99 +++ vendor/icu_locid_transform/src/canonicalizer.rs | 618 ++++++++++++++++++ vendor/icu_locid_transform/src/directionality.rs | 231 +++++++ vendor/icu_locid_transform/src/error.rs | 27 + vendor/icu_locid_transform/src/expander.rs | 722 +++++++++++++++++++++ .../icu_locid_transform/src/fallback/algorithms.rs | 487 ++++++++++++++ vendor/icu_locid_transform/src/fallback/mod.rs | 300 +++++++++ vendor/icu_locid_transform/src/lib.rs | 116 ++++ .../src/provider/canonicalizer.rs | 81 +++ .../src/provider/directionality.rs | 36 + .../icu_locid_transform/src/provider/expander.rs | 243 +++++++ .../icu_locid_transform/src/provider/fallback.rs | 102 +++ vendor/icu_locid_transform/src/provider/mod.rs | 98 +++ .../tests/fixtures/canonicalize.json | 444 +++++++++++++ .../tests/fixtures/maximize.json | 182 ++++++ .../tests/fixtures/minimize.json | 26 + vendor/icu_locid_transform/tests/fixtures/mod.rs | 12 + vendor/icu_locid_transform/tests/helpers/mod.rs | 15 + .../tests/locale_canonicalizer.rs | 82 +++ 26 files changed, 4315 insertions(+) create mode 100644 vendor/icu_locid_transform/.cargo-checksum.json create mode 100644 vendor/icu_locid_transform/Cargo.toml create mode 100644 vendor/icu_locid_transform/LICENSE create mode 100644 vendor/icu_locid_transform/README.md create mode 100644 vendor/icu_locid_transform/benches/fixtures/locales.json create mode 100644 vendor/icu_locid_transform/benches/fixtures/uncanonicalized-locales.json create mode 100644 vendor/icu_locid_transform/benches/helpers/mod.rs create mode 100644 vendor/icu_locid_transform/benches/locale_canonicalizer.rs create mode 100644 vendor/icu_locid_transform/src/canonicalizer.rs create mode 100644 vendor/icu_locid_transform/src/directionality.rs create mode 100644 vendor/icu_locid_transform/src/error.rs create mode 100644 vendor/icu_locid_transform/src/expander.rs create mode 100644 vendor/icu_locid_transform/src/fallback/algorithms.rs create mode 100644 vendor/icu_locid_transform/src/fallback/mod.rs create mode 100644 vendor/icu_locid_transform/src/lib.rs create mode 100644 vendor/icu_locid_transform/src/provider/canonicalizer.rs create mode 100644 vendor/icu_locid_transform/src/provider/directionality.rs create mode 100644 vendor/icu_locid_transform/src/provider/expander.rs create mode 100644 vendor/icu_locid_transform/src/provider/fallback.rs create mode 100644 vendor/icu_locid_transform/src/provider/mod.rs create mode 100644 vendor/icu_locid_transform/tests/fixtures/canonicalize.json create mode 100644 vendor/icu_locid_transform/tests/fixtures/maximize.json create mode 100644 vendor/icu_locid_transform/tests/fixtures/minimize.json create mode 100644 vendor/icu_locid_transform/tests/fixtures/mod.rs create mode 100644 vendor/icu_locid_transform/tests/helpers/mod.rs create mode 100644 vendor/icu_locid_transform/tests/locale_canonicalizer.rs (limited to 'vendor/icu_locid_transform') diff --git a/vendor/icu_locid_transform/.cargo-checksum.json b/vendor/icu_locid_transform/.cargo-checksum.json new file mode 100644 index 000000000..ff2371e3f --- /dev/null +++ b/vendor/icu_locid_transform/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.toml":"334f54b489b88e4808da4fa355ddf773b86971570d4bb0360a876e3437cb962d","LICENSE":"853f87c96f3d249f200fec6db1114427bc8bdf4afddc93c576956d78152ce978","README.md":"3ff3f2e2f9e5d4c5786132838576edef42a12c5529d5f080370f24aa6246bb92","benches/fixtures/locales.json":"9846601a29874baf140cac1252d4624fadc30182fec106d17f008ece886b9185","benches/fixtures/uncanonicalized-locales.json":"a866ed318b92f79d8853567e79b373c02984967023f5f39161140544e71b0c72","benches/helpers/mod.rs":"d3bf59e7eed6230f340bef6c87a7b8de3a387ec391f60afc1b15a0d001cbfb67","benches/locale_canonicalizer.rs":"477af27aa35385d107e19b8e8a0452466a69d20e147a63631b78634c17712fed","src/canonicalizer.rs":"7770767fad20f38aaae19382430451573293ccdeac587b2d063163b870781086","src/directionality.rs":"a031a9d55ffe827c86400637b7302dc424c708dcc52ea667504a33a16db822c2","src/error.rs":"486fda8a0e9b7bb5822bbb0defb51145364d6053b8d60b88ef71e4b2bcd6699d","src/expander.rs":"d3ef487a416425ea6fb2ce7bf08b7487e180a580002e54ce30d5524cfd7514e2","src/fallback/algorithms.rs":"47625130cd5a04cf085dd0494591e117ba204a9d2eb649788b0ff96773cc6e21","src/fallback/mod.rs":"71ca2f23e410863010a62a48bba8a943763f7d49c12bf80b451a1b9295484e44","src/lib.rs":"5390facdc3df7e5ec5ab842bf59d4d13383d77d93a722685231a1d271cfba944","src/provider/canonicalizer.rs":"f848dbbc906b5f3be0b6384f5a2f26178898822a5c37334a57b12db8e1af0ed9","src/provider/directionality.rs":"fc516f501254af444cfa010d3c87aeea032dd6eccf5f82301c050ed3df2e05b1","src/provider/expander.rs":"6903d16138ada8216e0341d984126dcc1f6fac21468144e8140fc217b164572e","src/provider/fallback.rs":"d567e3d49261cac9de35825b3d57204d49068558f10579121f0bf0c42090c9cc","src/provider/mod.rs":"ce8e29eda7128747d489371118d1cf2c0e2740662eb8c6a55310dff86c5641cc","tests/fixtures/canonicalize.json":"3dc2f661b04e4c9ecced70fc1b98a504eb5f5a0067b38665b10e50c25174bc4a","tests/fixtures/maximize.json":"8137359060218572bcaf5e56825346fdcb600e2189378ef4be836ba0a7295b66","tests/fixtures/minimize.json":"3bb6f19c5525818212388dcbf778064e7f73d2c32a8a7e8c58d618583a77121a","tests/fixtures/mod.rs":"18a900aa4f74120b7e7e64fcb09eae38a16504d66e23f752e743dcd9b1ad6530","tests/helpers/mod.rs":"d3bf59e7eed6230f340bef6c87a7b8de3a387ec391f60afc1b15a0d001cbfb67","tests/locale_canonicalizer.rs":"1ebf7320f422b65cc3cc50468abdc8f08128feba85d936f5beb456b0b052a91d"},"package":"6551daf80882d8e68eee186cc19e132d8bde1b1f059a79b93384a5ca0e8fc5e7"} \ No newline at end of file diff --git a/vendor/icu_locid_transform/Cargo.toml b/vendor/icu_locid_transform/Cargo.toml new file mode 100644 index 000000000..b083c54a2 --- /dev/null +++ b/vendor/icu_locid_transform/Cargo.toml @@ -0,0 +1,128 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2021" +rust-version = "1.66" +name = "icu_locid_transform" +version = "1.3.2" +authors = ["The ICU4X Project Developers"] +include = [ + "data/**/*", + "src/**/*", + "examples/**/*", + "benches/**/*", + "tests/**/*", + "Cargo.toml", + "LICENSE", + "README.md", +] +description = "API for Unicode Language and Locale Identifiers canonicalization" +homepage = "https://icu4x.unicode.org" +readme = "README.md" +categories = ["internationalization"] +license-file = "LICENSE" +repository = "https://github.com/unicode-org/icu4x" + +[package.metadata.cargo-all-features] +denylist = ["bench"] +skip_optional_dependencies = true + +[package.metadata.docs.rs] +all-features = true + +[lib] +bench = false + +[[test]] +name = "locale_canonicalizer" +required-features = ["serde"] + +[[bench]] +name = "locale_canonicalizer" +harness = false + +[dependencies.databake] +version = "0.1.6" +features = ["derive"] +optional = true +default-features = false + +[dependencies.displaydoc] +version = "0.2.3" +default-features = false + +[dependencies.icu_locid] +version = "~1.3.2" +features = ["zerovec"] +default-features = false + +[dependencies.icu_locid_transform_data] +version = "~1.3.2" +optional = true +default-features = false + +[dependencies.icu_provider] +version = "~1.3.2" +features = ["macros"] +default-features = false + +[dependencies.serde] +version = "1.0" +features = [ + "derive", + "alloc", +] +optional = true +default-features = false + +[dependencies.tinystr] +version = "0.7.3" +features = [ + "alloc", + "zerovec", +] +default-features = false + +[dependencies.zerovec] +version = "0.10.0" +features = ["yoke"] +default-features = false + +[dev-dependencies.serde] +version = "1.0" +features = ["derive"] + +[dev-dependencies.serde_json] +version = "1.0" + +[features] +bench = ["serde"] +compiled_data = ["dep:icu_locid_transform_data"] +datagen = [ + "serde", + "dep:databake", + "zerovec/databake", + "icu_locid/databake", + "tinystr/databake", +] +default = ["compiled_data"] +serde = [ + "dep:serde", + "icu_locid/serde", + "tinystr/serde", + "zerovec/serde", + "icu_provider/serde", +] +std = [] + +[target."cfg(not(target_arch = \"wasm32\"))".dev-dependencies.criterion] +version = "0.4" diff --git a/vendor/icu_locid_transform/LICENSE b/vendor/icu_locid_transform/LICENSE new file mode 100644 index 000000000..9845aa5f4 --- /dev/null +++ b/vendor/icu_locid_transform/LICENSE @@ -0,0 +1,44 @@ +UNICODE LICENSE V3 + +COPYRIGHT AND PERMISSION NOTICE + +Copyright © 2020-2023 Unicode, Inc. + +NOTICE TO USER: Carefully read the following legal agreement. BY +DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING DATA FILES, AND/OR +SOFTWARE, YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE +TERMS AND CONDITIONS OF THIS AGREEMENT. IF YOU DO NOT AGREE, DO NOT +DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE THE DATA FILES OR SOFTWARE. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of data files and any associated documentation (the "Data Files") or +software and any associated documentation (the "Software") to deal in the +Data Files or Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, and/or sell +copies of the Data Files or Software, and to permit persons to whom the +Data Files or Software are furnished to do so, provided that either (a) +this copyright and permission notice appear with all copies of the Data +Files or Software, or (b) this copyright and permission notice appear in +associated Documentation. + +THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY +KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF +THIRD PARTY RIGHTS. + +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE +BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, +OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, +WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, +ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THE DATA +FILES OR SOFTWARE. + +Except as contained in this notice, the name of a copyright holder shall +not be used in advertising or otherwise to promote the sale, use or other +dealings in these Data Files or Software without prior written +authorization of the copyright holder. + +— + +Portions of ICU4X may have been adapted from ICU4C and/or ICU4J. +ICU 1.8.1 to ICU 57.1 © 1995-2016 International Business Machines Corporation and others. diff --git a/vendor/icu_locid_transform/README.md b/vendor/icu_locid_transform/README.md new file mode 100644 index 000000000..b97fea239 --- /dev/null +++ b/vendor/icu_locid_transform/README.md @@ -0,0 +1,77 @@ +# icu_locid_transform [![crates.io](https://img.shields.io/crates/v/icu_locid_transform)](https://crates.io/crates/icu_locid_transform) + + + +Canonicalization of locale identifiers based on [`CLDR`] data. + +This module is published as its own crate ([`icu_locid_transform`](https://docs.rs/icu_locid_transform/latest/icu_locid_transform/)) +and as part of the [`icu`](https://docs.rs/icu/latest/icu/) crate. See the latter for more details on the ICU4X project. + +It currently supports locale canonicalization based upon the canonicalization +algorithm from [`UTS #35: Unicode LDML 3. LocaleId Canonicalization`], +as well as the minimize and maximize likely subtags algorithms +as described in [`UTS #35: Unicode LDML 3. Likely Subtags`]. + +The maximize method potentially updates a passed in locale in place +depending up the results of running the 'Add Likely Subtags' algorithm +from [`UTS #35: Unicode LDML 3. Likely Subtags`]. + +This minimize method returns a new Locale that is the result of running the +'Remove Likely Subtags' algorithm from [`UTS #35: Unicode LDML 3. Likely Subtags`]. + +## Examples + +```rust +use icu::locid::Locale; +use icu::locid_transform::{LocaleCanonicalizer, TransformResult}; + +let lc = LocaleCanonicalizer::new(); + +let mut locale: Locale = "ja-Latn-fonipa-hepburn-heploc" + .parse() + .expect("parse failed"); +assert_eq!(lc.canonicalize(&mut locale), TransformResult::Modified); +assert_eq!(locale, "ja-Latn-alalc97-fonipa".parse::().unwrap()); +``` + +```rust +use icu::locid::locale; +use icu::locid_transform::{LocaleExpander, TransformResult}; + +let lc = LocaleExpander::new(); + +let mut locale = locale!("zh-CN"); +assert_eq!(lc.maximize(&mut locale), TransformResult::Modified); +assert_eq!(locale, locale!("zh-Hans-CN")); + +let mut locale = locale!("zh-Hant-TW"); +assert_eq!(lc.maximize(&mut locale), TransformResult::Unmodified); +assert_eq!(locale, locale!("zh-Hant-TW")); +``` + +```rust +use icu::locid::locale; +use icu::locid_transform::{LocaleExpander, TransformResult}; +use writeable::assert_writeable_eq; + +let lc = LocaleExpander::new(); + +let mut locale = locale!("zh-Hans-CN"); +assert_eq!(lc.minimize(&mut locale), TransformResult::Modified); +assert_eq!(locale, locale!("zh")); + +let mut locale = locale!("zh"); +assert_eq!(lc.minimize(&mut locale), TransformResult::Unmodified); +assert_eq!(locale, locale!("zh")); +``` + +[`ICU4X`]: ../icu/index.html +[`CLDR`]: http://cldr.unicode.org/ +[`UTS #35: Unicode LDML 3. Likely Subtags`]: https://www.unicode.org/reports/tr35/#Likely_Subtags. +[`UTS #35: Unicode LDML 3. LocaleId Canonicalization`]: http://unicode.org/reports/tr35/#LocaleId_Canonicalization, + + + +## More Information + +For more information on development, authorship, contributing etc. please visit [`ICU4X home page`](https://github.com/unicode-org/icu4x). diff --git a/vendor/icu_locid_transform/benches/fixtures/locales.json b/vendor/icu_locid_transform/benches/fixtures/locales.json new file mode 100644 index 000000000..0e8ba8b79 --- /dev/null +++ b/vendor/icu_locid_transform/benches/fixtures/locales.json @@ -0,0 +1,41 @@ +[ + "en-US", + "en-GB", + "es-AR", + "it", + "zh-Hans-CN", + "de-AT", + "pl", + "fr-FR", + "de-AT", + "sr-Cyrl-SR", + "nb-NO", + "fr-FR", + "mk", + "uk", + "und-PL", + "und-Latn-AM", + "ug-Cyrl", + "sr-ME", + "mn-Mong", + "lif-Limb", + "gan", + "zh-Hant", + "yue-Hans", + "unr", + "unr-Deva", + "und-Thai-CN", + "ug-Cyrl", + "en-Latn-DE", + "pl-FR", + "de-CH", + "tuq", + "sr-ME", + "ng", + "klx", + "kk-Arab", + "en-Cyrl", + "und-Cyrl-UK", + "und-Arab", + "und-Arab-FO" +] diff --git a/vendor/icu_locid_transform/benches/fixtures/uncanonicalized-locales.json b/vendor/icu_locid_transform/benches/fixtures/uncanonicalized-locales.json new file mode 100644 index 000000000..18eadbce6 --- /dev/null +++ b/vendor/icu_locid_transform/benches/fixtures/uncanonicalized-locales.json @@ -0,0 +1,88 @@ +[ + "cka", + "cze", + "gfx", + "sgn-BR", + "sgn-DD", + "tam", + "und-aaland", + "nob-bokmal", + "no-nynorsk", + "und-Qaai", + "en-554", + "en-084", + "art-lojban", + "zh-guoyu", + "zh-hakka", + "zh-xiang", + "aar-x-private", + "heb-x-private", + "ces", + "hy-arevela", + "hy-arevmda", + "cel-gaulish", + "ja-latn-hepburn-heploc", + "ja-Latn-fonipa-hepburn-heploc", + "und-Armn-SU", + "sh", + "sh-Cyrl", + "cnr", + "cnr-BA", + "ru-SU", + "ru-810", + "en-SU", + "en-810", + "und-SU", + "und-810", + "und-Latn-SU", + "und-Latn-810", + "hy-SU", + "hy-810", + "und-Armn-SU", + "und-Armn-810", + "sr-CS", + "sr-Latn-CS", + "sr-Cyrl-CS", + "az-NT", + "sl-t-sl-rozaj-biske-1994", + "DE-T-M0-DIN-K0-QWERTZ", + "en-t-m0-true", + "en-t-iw", + "und-u-rg-no23", + "und-u-rg-cn11", + "und-u-rg-cz10a", + "und-u-rg-fra", + "und-u-rg-frg", + "und-u-rg-lud", + "und-NO-u-rg-no23", + "und-CN-u-rg-cn11", + "und-CZ-u-rg-cz10a", + "und-FR-u-rg-fra", + "und-FR-u-rg-frg", + "und-u-rg-lud", + "und-u-sd-no23", + "und-u-sd-cn11", + "und-u-sd-cz10a", + "und-u-sd-fra", + "hy-arevela", + "hy-Armn-arevela", + "hy-AM-arevela", + "hy-arevela-fonipa", + "hy-fonipa-arevela", + "hy-arevmda", + "hy-Armn-arevmda", + "hy-AM-arevmda", + "hy-arevmda-fonipa", + "hy-fonipa-arevmda", + "ja-Latn-hepburn-heploc", + "ja-Latn-JP-hepburn-heploc", + "sv-aaland", + "el-polytoni", + "ja-Latn-alalc97-hepburn-heploc", + "ja-Latn-hepburn-alalc97-heploc", + "ja-Latn-hepburn-heploc-alalc97", + "ja-Latn-heploc-hepburn", + "ja-Latn-heploc", + "ja-Latn-aaland-heploc", + "ja-Latn-heploc-polytoni" +] diff --git a/vendor/icu_locid_transform/benches/helpers/mod.rs b/vendor/icu_locid_transform/benches/helpers/mod.rs new file mode 100644 index 000000000..d250c510c --- /dev/null +++ b/vendor/icu_locid_transform/benches/helpers/mod.rs @@ -0,0 +1,15 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use std::fs::File; +use std::io::{BufReader, Error}; + +pub fn read_fixture(path: &str) -> Result +where + T: serde::de::DeserializeOwned, +{ + let file = File::open(path)?; + let reader = BufReader::new(file); + Ok(serde_json::from_reader(reader)?) +} diff --git a/vendor/icu_locid_transform/benches/locale_canonicalizer.rs b/vendor/icu_locid_transform/benches/locale_canonicalizer.rs new file mode 100644 index 000000000..1ea8df6b3 --- /dev/null +++ b/vendor/icu_locid_transform/benches/locale_canonicalizer.rs @@ -0,0 +1,99 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +mod helpers; + +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use icu_locid::Locale; +use icu_locid_transform::LocaleCanonicalizer; +use icu_locid_transform::LocaleExpander; + +fn canonicalize_bench(c: &mut Criterion) { + let lc = LocaleCanonicalizer::new(); + + let mut group = c.benchmark_group("uncanonicalized"); + + let path = "./benches/fixtures/uncanonicalized-locales.json"; + let data: Vec = helpers::read_fixture(path).expect("Failed to read a fixture"); + let locales: Vec = data.iter().map(|s| s.parse().unwrap()).collect(); + + group.bench_function("clone", |b| { + b.iter(|| { + for locale in &locales { + let _ = black_box(locale).clone(); + } + }) + }); + + group.bench_function("canonicalize", |b| { + b.iter(|| { + for locale in &locales { + let mut locale = black_box(locale).clone(); + lc.canonicalize(&mut locale); + } + }) + }); + + group.finish(); +} + +fn canonicalize_noop_bench(c: &mut Criterion) { + let lc = LocaleCanonicalizer::new(); + + let mut group = c.benchmark_group("canonicalized"); + + // None of these locales require canonicalization, so this measures the cost of calling + // the canonicalizer on locales that will not be modified. + let path = "./benches/fixtures/locales.json"; + let data: Vec = helpers::read_fixture(path).expect("Failed to read a fixture"); + let locales: Vec = data.iter().map(|s| s.parse().unwrap()).collect(); + + group.bench_function("clone", |b| { + b.iter(|| { + for locale in &locales { + let _ = black_box(locale).clone(); + } + }) + }); + + group.bench_function("canonicalize", |b| { + b.iter(|| { + for locale in &locales { + let mut locale = black_box(locale).clone(); + lc.canonicalize(&mut locale); + } + }) + }); + + group.finish(); +} + +fn maximize_bench(c: &mut Criterion) { + let lc = LocaleExpander::new(); + + let mut group = c.benchmark_group("likelysubtags"); + + let path = "./benches/fixtures/locales.json"; + let data: Vec = helpers::read_fixture(path).expect("Failed to read a fixture"); + let locales: Vec = data.iter().map(|s| s.parse().unwrap()).collect(); + + group.bench_function("maximize", |b| { + b.iter(|| { + for locale in &locales { + let mut locale = locale.clone(); + lc.maximize(black_box(&mut locale)); + } + }) + }); + + group.finish(); +} + +criterion_group!( + benches, + canonicalize_bench, + canonicalize_noop_bench, + maximize_bench +); +criterion_main!(benches); diff --git a/vendor/icu_locid_transform/src/canonicalizer.rs b/vendor/icu_locid_transform/src/canonicalizer.rs new file mode 100644 index 000000000..5a3782638 --- /dev/null +++ b/vendor/icu_locid_transform/src/canonicalizer.rs @@ -0,0 +1,618 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! The collection of code for locale canonicalization. + +use crate::provider::*; +use crate::LocaleTransformError; +use alloc::vec::Vec; +use core::cmp::Ordering; + +use crate::LocaleExpander; +use crate::TransformResult; +use icu_locid::subtags::{Language, Region, Script}; +use icu_locid::{ + extensions::unicode::key, + subtags::{language, Variant, Variants}, + LanguageIdentifier, Locale, +}; +use icu_provider::prelude::*; +use tinystr::TinyAsciiStr; + +/// Implements the algorithm defined in *[UTS #35: Annex C, LocaleId Canonicalization]*. +/// +/// # Examples +/// +/// ``` +/// use icu_locid::Locale; +/// use icu_locid_transform::{LocaleCanonicalizer, TransformResult}; +/// +/// let lc = LocaleCanonicalizer::new(); +/// +/// let mut locale: Locale = "ja-Latn-fonipa-hepburn-heploc".parse().unwrap(); +/// assert_eq!(lc.canonicalize(&mut locale), TransformResult::Modified); +/// assert_eq!(locale, "ja-Latn-alalc97-fonipa".parse().unwrap()); +/// ``` +/// +/// [UTS #35: Annex C, LocaleId Canonicalization]: http://unicode.org/reports/tr35/#LocaleId_Canonicalization +#[derive(Debug)] +pub struct LocaleCanonicalizer { + /// Data to support canonicalization. + aliases: DataPayload, + /// Likely subtags implementation for delegation. + expander: LocaleExpander, +} + +#[inline] +fn uts35_rule_matches<'a, I>( + source: &Locale, + language: Language, + script: Option