From 5363f350887b1e5b5dd21a86f88c8af9d7fea6da Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 14:18:25 +0200 Subject: Merging upstream version 1.67.1+dfsg1. Signed-off-by: Daniel Baumann --- vendor/icu_locid/benches/fixtures/langid.json | 48 ++++++++++ vendor/icu_locid/benches/fixtures/locale.json | 26 ++++++ vendor/icu_locid/benches/fixtures/mod.rs | 25 ++++++ vendor/icu_locid/benches/fixtures/subtags.json | 18 ++++ vendor/icu_locid/benches/helpers/macros.rs | 110 +++++++++++++++++++++++ vendor/icu_locid/benches/helpers/mod.rs | 17 ++++ vendor/icu_locid/benches/iai_langid.rs | 118 +++++++++++++++++++++++++ vendor/icu_locid/benches/langid.rs | 93 +++++++++++++++++++ vendor/icu_locid/benches/locale.rs | 87 ++++++++++++++++++ vendor/icu_locid/benches/subtags.rs | 39 ++++++++ 10 files changed, 581 insertions(+) create mode 100644 vendor/icu_locid/benches/fixtures/langid.json create mode 100644 vendor/icu_locid/benches/fixtures/locale.json create mode 100644 vendor/icu_locid/benches/fixtures/mod.rs create mode 100644 vendor/icu_locid/benches/fixtures/subtags.json create mode 100644 vendor/icu_locid/benches/helpers/macros.rs create mode 100644 vendor/icu_locid/benches/helpers/mod.rs create mode 100644 vendor/icu_locid/benches/iai_langid.rs create mode 100644 vendor/icu_locid/benches/langid.rs create mode 100644 vendor/icu_locid/benches/locale.rs create mode 100644 vendor/icu_locid/benches/subtags.rs (limited to 'vendor/icu_locid/benches') diff --git a/vendor/icu_locid/benches/fixtures/langid.json b/vendor/icu_locid/benches/fixtures/langid.json new file mode 100644 index 000000000..43c56d5a2 --- /dev/null +++ b/vendor/icu_locid/benches/fixtures/langid.json @@ -0,0 +1,48 @@ +{ + "canonicalized": [ + "en-US", + "en-GB", + "es-AR", + "it", + "zh-Hans-CN", + "de-AT", + "pl", + "fr-FR", + "de-AT", + "sr-Cyrl-SR", + "nb-NO", + "fr-FR", + "mk", + "uk", + "en-US", + "en-GB", + "es-AR", + "th", + "de", + "zh-Cyrl-HN", + "en-Latn-US" + ], + "casing": [ + "En_uS", + "EN-GB", + "ES-aR", + "iT", + "zH_HaNs_cN", + "dE-aT", + "Pl", + "FR-FR", + "de_AT", + "sR-CyrL_sr", + "NB-NO", + "fr_fr", + "Mk", + "uK", + "en-us", + "en_gb", + "ES-AR", + "tH", + "DE", + "ZH_cyrl_hN", + "eN-lAtN-uS" + ] +} diff --git a/vendor/icu_locid/benches/fixtures/locale.json b/vendor/icu_locid/benches/fixtures/locale.json new file mode 100644 index 000000000..f974a166f --- /dev/null +++ b/vendor/icu_locid/benches/fixtures/locale.json @@ -0,0 +1,26 @@ +{ + "canonicalized": [ + "en-US-u-hc-h12", + "en-GB-u-ca-gregory-hc-h12", + "es-AR-x-private", + "th-u-ca-buddhist", + "de-u-co-phonebk-ka-shifted", + "ar-u-nu-native", + "ar-u-nu-latn", + "ja-t-it", + "ja-Kana-t-it", + "und-Latn-t-und-cyrl" + ], + "casing": [ + "en-US-U-hc-h12", + "en-GB-u-CA-gregory-hc-h12", + "es-AR-x-Private", + "th-u-ca-buDDhist", + "de-u-co-phonebk-KA-shifted", + "AR_U-NU-native", + "ar-u-nu-LaTN", + "jA-T-it", + "ja-kanA-T-IT", + "unD-Latn-T-und-cyrl" + ] +} diff --git a/vendor/icu_locid/benches/fixtures/mod.rs b/vendor/icu_locid/benches/fixtures/mod.rs new file mode 100644 index 000000000..006b22312 --- /dev/null +++ b/vendor/icu_locid/benches/fixtures/mod.rs @@ -0,0 +1,25 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use serde::Deserialize; + +#[derive(Deserialize)] +pub struct SubtagData { + pub valid: Vec, + pub invalid: Vec, +} + +#[derive(Deserialize)] +pub struct Subtags { + pub language: SubtagData, + pub script: SubtagData, + pub region: SubtagData, + pub variant: SubtagData, +} + +#[derive(Deserialize)] +pub struct LocaleList { + pub canonicalized: Vec, + pub casing: Vec, +} diff --git a/vendor/icu_locid/benches/fixtures/subtags.json b/vendor/icu_locid/benches/fixtures/subtags.json new file mode 100644 index 000000000..cf8419cc9 --- /dev/null +++ b/vendor/icu_locid/benches/fixtures/subtags.json @@ -0,0 +1,18 @@ +{ + "language": { + "valid": ["en", "it", "pl", "de", "fr", "cs", "csb", "und", "ru", "nb", "NB", "UK", "pL", "Zh", "ES"], + "invalid": ["", "1", "$", "a1", "1211", "as_sa^a", "-0we", "3e3", "kk$$22", "testingaverylongstring"] + }, + "script": { + "valid": ["Latn", "latn", "Arab", "xxxx", "Flan", "fAlA", "oOoO", "pPlQ", "esta", "RUSS"], + "invalid": ["", "1", "$", "a1", "1211", "assaa", "-0we", "3e3", "kk$$22", "testingaverylongstring"] + }, + "region": { + "valid": ["DE", "321", "zh", "IA", "fN", "rU", "ru", "RU", "Ru", "CN", "AR"], + "invalid": ["", "1", "$", "a1", "1211", "assaa", "-0we", "3e3", "kk$$22", "testingaverylongstring"] + }, + "variant": { + "valid": ["macos", "MaCoS", "windows", "posix", "POSIX", "Posix", "linux", "lINUX", "mAcOs", "testing", "WWWWWW"], + "invalid": ["", "1", "$", "a1", "a211", "ass__aa", "-0we", "3e3", "kk$$22", "testingaverylongstring"] + } +} diff --git a/vendor/icu_locid/benches/helpers/macros.rs b/vendor/icu_locid/benches/helpers/macros.rs new file mode 100644 index 000000000..848a360c4 --- /dev/null +++ b/vendor/icu_locid/benches/helpers/macros.rs @@ -0,0 +1,110 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +#[macro_export] +macro_rules! overview { + ($c:expr, $struct:ident, $data_str:expr, $compare:expr) => { + $c.bench_function("overview", |b| { + b.iter(|| { + let mut values = vec![]; + for s in $data_str { + let value: Result<$struct, _> = black_box(s).parse(); + values.push(value.expect("Parsing failed")); + } + let _ = values + .iter() + .filter(|&v| v.normalizing_eq($compare)) + .count(); + + values + .iter() + .map(|v| v.to_string()) + .collect::>() + }) + }); + }; +} + +#[macro_export] +macro_rules! construct { + ($c:expr, $struct:ident, $struct_name:expr, $data_str:expr) => { + $c.bench_function($struct_name, |b| { + b.iter(|| { + for s in $data_str { + let _: Result<$struct, _> = black_box(s).parse(); + } + }) + }); + }; +} + +#[macro_export] +macro_rules! to_string { + ($c:expr, $struct:ident, $struct_name:expr, $data:expr) => { + $c.bench_function($struct_name, |b| { + b.iter(|| { + for s in $data { + let _ = black_box(s).to_string(); + } + }) + }); + $c.bench_function(std::concat!($struct_name, "/writeable"), |b| { + use writeable::Writeable; + b.iter(|| { + for s in $data { + let _ = black_box(s).write_to_string(); + } + }) + }); + }; +} + +#[macro_export] +macro_rules! compare_struct { + ($c:expr, $struct:ident, $struct_name:expr, $data1:expr, $data2:expr) => { + $c.bench_function(BenchmarkId::new("struct", $struct_name), |b| { + b.iter(|| { + for (lid1, lid2) in $data1.iter().zip($data2.iter()) { + let _ = black_box(lid1) == black_box(lid2); + } + }) + }); + }; +} + +#[macro_export] +macro_rules! compare_str { + ($c:expr, $struct:ident, $struct_name:expr, $data1:expr, $data2:expr) => { + $c.bench_function(BenchmarkId::new("str", $struct_name), |b| { + b.iter(|| { + for (lid, s) in $data1.iter().zip($data2.iter()) { + let _ = black_box(lid).normalizing_eq(&black_box(s)); + } + }) + }); + $c.bench_function(BenchmarkId::new("strict_cmp", $struct_name), |b| { + b.iter(|| { + for (lid, s) in $data1.iter().zip($data2.iter()) { + let _ = black_box(lid).strict_cmp(&black_box(s).as_str().as_bytes()); + } + }) + }); + }; +} + +#[macro_export] +macro_rules! canonicalize { + ($c:expr, $struct:ident, $struct_name:expr, $data:expr) => { + $c.bench_function($struct_name, |b| { + b.iter(|| { + for s in $data { + let _ = black_box(s).to_string(); + } + for s in $data { + let _ = $struct::canonicalize(black_box(s)); + } + }) + }); + }; +} diff --git a/vendor/icu_locid/benches/helpers/mod.rs b/vendor/icu_locid/benches/helpers/mod.rs new file mode 100644 index 000000000..27e455f7b --- /dev/null +++ b/vendor/icu_locid/benches/helpers/mod.rs @@ -0,0 +1,17 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +mod macros; + +use std::fs::File; +use std::io::{BufReader, Error}; + +pub fn read_fixture(path: &str) -> Result +where + T: serde::de::DeserializeOwned, +{ + let file = File::open(path)?; + let reader = BufReader::new(file); + Ok(serde_json::from_reader(reader)?) +} diff --git a/vendor/icu_locid/benches/iai_langid.rs b/vendor/icu_locid/benches/iai_langid.rs new file mode 100644 index 000000000..f964d1462 --- /dev/null +++ b/vendor/icu_locid/benches/iai_langid.rs @@ -0,0 +1,118 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use icu_locid::{ + langid, subtags_language as language, subtags_region as region, LanguageIdentifier, +}; + +const LIDS: &[LanguageIdentifier] = &[ + langid!("en"), + langid!("pl"), + langid!("fr-CA"), + langid!("zh-Hans"), + langid!("en-US"), + langid!("en-Latn-US"), + langid!("sr-Cyrl-BA"), +]; + +const LIDS_STR: &[&str] = &[ + "en", + "pl", + "fr-CA", + "zh-Hans", + "en-US", + "en-Latn-US", + "sr-Cyrl-BA", +]; + +fn bench_langid_constr() { + // Tests the instructions required to construct a LID from an str. + + let _: Vec = LIDS_STR + .iter() + .map(|l| l.parse().expect("Failed to parse")) + .collect(); +} + +fn bench_langid_compare_components() { + // Tests the cost of comparing LID components. + + let result = LIDS + .iter() + .filter(|l| l.language == language!("en") && l.region == Some(region!("US"))) + .count(); + + assert_eq!(result, 2); +} + +fn bench_langid_compare_components_str() { + // Tests the cost of comparing LID components to str. + + let result = LIDS + .iter() + .filter(|l| { + l.language == language!("en") && l.region.map(|r| r == region!("US")).unwrap_or(false) + }) + .count(); + + assert_eq!(result, 2); +} + +fn bench_langid_strict_cmp() { + // Tests the cost of comparing a langid against byte strings. + use core::cmp::Ordering; + + let lid = langid!("en_us"); + + let result = LIDS_STR + .iter() + .filter(|s| lid.strict_cmp(s.as_bytes()) == Ordering::Equal) + .count(); + + assert_eq!(result, 1); +} + +fn bench_langid_matching() { + // Tests matching a LID against other LIDs. + + let lid = langid!("en_us"); + + let count = LIDS.iter().filter(|l| lid == **l).count(); + assert_eq!(count, 1); +} + +fn bench_langid_matching_str() { + // Tests matching a LID against list of str. + + let lid = langid!("en_us"); + + let count = LIDS_STR.iter().filter(|&l| lid.normalizing_eq(l)).count(); + assert_eq!(count, 1); +} + +fn bench_langid_serialize() { + // Tests serialization of LIDs. + + let _: Vec = LIDS.iter().map(|l| l.to_string()).collect(); +} + +fn bench_langid_canonicalize() { + // Tests canonicalization of strings. + + let _: Vec = LIDS_STR + .iter() + .map(|l| LanguageIdentifier::canonicalize(l).expect("Canonicalization failed")) + .collect(); +} + +iai::main!( + bench_langid_constr, + bench_langid_compare_components, + bench_langid_compare_components_str, + bench_langid_strict_cmp, + bench_langid_matching, + bench_langid_matching_str, + bench_langid_serialize, + bench_langid_canonicalize, +); diff --git a/vendor/icu_locid/benches/langid.rs b/vendor/icu_locid/benches/langid.rs new file mode 100644 index 000000000..e5c9b6734 --- /dev/null +++ b/vendor/icu_locid/benches/langid.rs @@ -0,0 +1,93 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +mod fixtures; +mod helpers; + +use criterion::{black_box, criterion_group, criterion_main, Criterion}; + +use icu_locid::LanguageIdentifier; + +fn langid_benches(c: &mut Criterion) { + let path = "./benches/fixtures/langid.json"; + let data: fixtures::LocaleList = helpers::read_fixture(path).expect("Failed to read a fixture"); + + // Overview + { + let mut group = c.benchmark_group("langid"); + + overview!(group, LanguageIdentifier, &data.canonicalized, "en-US"); + + group.finish(); + } + + #[cfg(feature = "bench")] + { + use criterion::BenchmarkId; + + // Construct + { + let mut group = c.benchmark_group("langid/construct"); + + construct!(group, LanguageIdentifier, "langid", &data.canonicalized); + + group.finish(); + } + + // Stringify + { + let mut group = c.benchmark_group("langid/to_string"); + + let langids: Vec = data + .canonicalized + .iter() + .map(|s| s.parse().unwrap()) + .collect(); + + to_string!(group, LanguageIdentifier, "langid", &langids); + + group.finish(); + } + + // Compare + { + let mut group = c.benchmark_group("langid/compare"); + + let langids: Vec = data + .canonicalized + .iter() + .map(|s| s.parse().unwrap()) + .collect(); + let langids2: Vec = data + .canonicalized + .iter() + .map(|s| s.parse().unwrap()) + .collect(); + + compare_struct!(group, LanguageIdentifier, "langid", &langids, &langids2); + + compare_str!( + group, + LanguageIdentifier, + "langid", + &langids, + &data.canonicalized + ); + + group.finish(); + } + + // Canonicalize + { + let mut group = c.benchmark_group("langid/canonicalize"); + + canonicalize!(group, LanguageIdentifier, "langid", &data.casing); + + group.finish(); + } + } +} + +criterion_group!(benches, langid_benches,); +criterion_main!(benches); diff --git a/vendor/icu_locid/benches/locale.rs b/vendor/icu_locid/benches/locale.rs new file mode 100644 index 000000000..948fbb5e8 --- /dev/null +++ b/vendor/icu_locid/benches/locale.rs @@ -0,0 +1,87 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +mod fixtures; +mod helpers; + +use criterion::{black_box, criterion_group, criterion_main, Criterion}; + +use icu_locid::Locale; + +fn locale_benches(c: &mut Criterion) { + let path = "./benches/fixtures/locale.json"; + let data: fixtures::LocaleList = helpers::read_fixture(path).expect("Failed to read a fixture"); + + // Overview + { + let mut group = c.benchmark_group("locale"); + + overview!(group, Locale, &data.canonicalized, "en-US"); + + group.finish(); + } + + #[cfg(feature = "bench")] + { + use criterion::BenchmarkId; + + // Construct + { + let mut group = c.benchmark_group("locale/construct"); + + construct!(group, Locale, "locale", &data.canonicalized); + + group.finish(); + } + + // Stringify + { + let mut group = c.benchmark_group("locale/to_string"); + + let locales: Vec = data + .canonicalized + .iter() + .map(|s| s.parse().unwrap()) + .collect(); + + to_string!(group, Locale, "locale", &locales); + + group.finish(); + } + + // Compare + { + let mut group = c.benchmark_group("locale/compare"); + + let locales: Vec = data + .canonicalized + .iter() + .map(|s| s.parse().unwrap()) + .collect(); + let locales2: Vec = data + .canonicalized + .iter() + .map(|s| s.parse().unwrap()) + .collect(); + + compare_struct!(group, Locale, "locale", &locales, &locales2); + + compare_str!(group, Locale, "locale", &locales, &data.canonicalized); + + group.finish(); + } + + // Canonicalize + { + let mut group = c.benchmark_group("locale/canonicalize"); + + canonicalize!(group, Locale, "locale", &data.casing); + + group.finish(); + } + } +} + +criterion_group!(benches, locale_benches,); +criterion_main!(benches); diff --git a/vendor/icu_locid/benches/subtags.rs b/vendor/icu_locid/benches/subtags.rs new file mode 100644 index 000000000..4f81b71d2 --- /dev/null +++ b/vendor/icu_locid/benches/subtags.rs @@ -0,0 +1,39 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +mod fixtures; +mod helpers; + +use criterion::{black_box, criterion_group, criterion_main, Criterion}; + +use icu_locid::subtags::{Language, Region, Script, Variant}; +use icu_locid::ParserError; + +macro_rules! subtag_bench { + ($c:expr, $name:expr, $subtag:ident, $data:expr) => { + $c.bench_function(&format!("subtags/{}/parse", $name), |b| { + b.iter(|| { + for s in &$data.valid { + let _: $subtag = black_box(s).parse().unwrap(); + } + for s in &$data.invalid { + let _: ParserError = black_box(s).parse::<$subtag>().unwrap_err(); + } + }) + }); + }; +} + +fn subtags_bench(c: &mut Criterion) { + let path = "./benches/fixtures/subtags.json"; + let data: fixtures::Subtags = helpers::read_fixture(path).expect("Failed to read a fixture"); + + subtag_bench!(c, "language", Language, data.language); + subtag_bench!(c, "script", Script, data.script); + subtag_bench!(c, "region", Region, data.region); + subtag_bench!(c, "variant", Variant, data.variant); +} + +criterion_group!(benches, subtags_bench,); +criterion_main!(benches); -- cgit v1.2.3