From 698f8c2f01ea549d77d7dc3338a12e04c11057b9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 14:02:58 +0200 Subject: Adding upstream version 1.64.0+dfsg1. Signed-off-by: Daniel Baumann --- vendor/unic-langid-impl/benches/canonicalize.rs | 50 +++++++++++ vendor/unic-langid-impl/benches/langid.rs | 84 ++++++++++++++++++ vendor/unic-langid-impl/benches/likely_subtags.rs | 103 ++++++++++++++++++++++ vendor/unic-langid-impl/benches/parser.rs | 68 ++++++++++++++ 4 files changed, 305 insertions(+) create mode 100644 vendor/unic-langid-impl/benches/canonicalize.rs create mode 100644 vendor/unic-langid-impl/benches/langid.rs create mode 100644 vendor/unic-langid-impl/benches/likely_subtags.rs create mode 100644 vendor/unic-langid-impl/benches/parser.rs (limited to 'vendor/unic-langid-impl/benches') diff --git a/vendor/unic-langid-impl/benches/canonicalize.rs b/vendor/unic-langid-impl/benches/canonicalize.rs new file mode 100644 index 000000000..a9e17e49a --- /dev/null +++ b/vendor/unic-langid-impl/benches/canonicalize.rs @@ -0,0 +1,50 @@ +use criterion::black_box; +use criterion::criterion_group; +use criterion::criterion_main; +use criterion::Criterion; + +use unic_langid_impl::canonicalize; + +fn langid_canonicalize_bench(c: &mut Criterion) { + let strings = &[ + "En_uS", + "EN-GB", + "ES-aR", + "iT", + "zH_HaNs_cN", + "dE-aT", + "Pl", + "FR-FR", + "de_AT", + "sR-CyrL_sr", + "NB-NO", + "fr_fr", + "Mk", + "uK", + "en-us", + "en_gb", + "ES-AR", + "tH", + "DE", + "ZH_cyrl_hN", + "eN-lAtN-uS", + ]; + c.bench_function("langid_canonicalize", |b| { + b.iter(|| { + for s in strings { + let _ = canonicalize(black_box(s)); + } + }) + }); + c.bench_function("langid_canonicalize_from_bytes", |b| { + let slices: Vec<&[u8]> = strings.iter().map(|s| s.as_bytes()).collect(); + b.iter(|| { + for s in &slices { + let _ = canonicalize(black_box(s)); + } + }) + }); +} + +criterion_group!(benches, langid_canonicalize_bench,); +criterion_main!(benches); diff --git a/vendor/unic-langid-impl/benches/langid.rs b/vendor/unic-langid-impl/benches/langid.rs new file mode 100644 index 000000000..026c28860 --- /dev/null +++ b/vendor/unic-langid-impl/benches/langid.rs @@ -0,0 +1,84 @@ +use criterion::black_box; +use criterion::criterion_group; +use criterion::criterion_main; +use criterion::Criterion; +use criterion::Fun; + +use unic_langid_impl::subtags; +use unic_langid_impl::LanguageIdentifier; + +static STRINGS: &[&str] = &[ + "en-US", + "en-GB", + "es-AR", + "it", + "zh-Hans-CN", + "de-AT", + "pl", + "fr-FR", + "de-AT", + "sr-Cyrl-SR", + "nb-NO", + "fr-FR", + "mk", + "uk", + "en-US", + "en-GB", + "es-AR", + "th", + "de", + "zh-Cyrl-HN", + "en-Latn-US", +]; + +fn language_identifier_construct_bench(c: &mut Criterion) { + let langids: Vec = STRINGS + .iter() + .map(|s| -> LanguageIdentifier { s.parse().unwrap() }) + .collect(); + + let funcs = vec![ + Fun::new("from_str", |b, _| { + b.iter(|| { + for s in STRINGS { + let _: Result = black_box(s).parse(); + } + }) + }), + Fun::new("from_bytes", |b, _| { + let slices: Vec<&[u8]> = STRINGS.iter().map(|s| s.as_bytes()).collect(); + b.iter(|| { + for s in &slices { + let _ = LanguageIdentifier::from_bytes(black_box(s)); + } + }) + }), + Fun::new("from_parts", |b, langids: &Vec| { + let entries: Vec<( + subtags::Language, + Option, + Option, + Vec, + )> = langids + .iter() + .cloned() + .map(|langid| langid.into_parts()) + .collect(); + b.iter(|| { + for (language, script, region, variants) in &entries { + let _ = LanguageIdentifier::from_parts( + language.clone(), + script.clone(), + region.clone(), + variants, + ); + } + }) + }), + ]; + + c.bench_functions("language_identifier_construct", funcs, langids); +} + +criterion_group!(benches, language_identifier_construct_bench,); +criterion_main!(benches); diff --git a/vendor/unic-langid-impl/benches/likely_subtags.rs b/vendor/unic-langid-impl/benches/likely_subtags.rs new file mode 100644 index 000000000..a01666597 --- /dev/null +++ b/vendor/unic-langid-impl/benches/likely_subtags.rs @@ -0,0 +1,103 @@ +use criterion::black_box; +use criterion::criterion_group; +use criterion::criterion_main; +use criterion::Criterion; + +use tinystr::{TinyStr4, TinyStr8}; +use unic_langid_impl::LanguageIdentifier; + +static STRINGS: &[&str] = &[ + "en-US", + "en-GB", + "es-AR", + "it", + "zh-Hans-CN", + "de-AT", + "pl", + "fr-FR", + "de-AT", + "sr-Cyrl-SR", + "nb-NO", + "fr-FR", + "mk", + "uk", + "und-PL", + "und-Latn-AM", + "ug-Cyrl", + "sr-ME", + "mn-Mong", + "lif-Limb", + "gan", + "zh-Hant", + "yue-Hans", + "unr", + "unr-Deva", + "und-Thai-CN", + "ug-Cyrl", + "en-Latn-DE", + "pl-FR", + "de-CH", + "tuq", + "sr-ME", + "ng", + "klx", + "kk-Arab", + "en-Cyrl", + "und-Cyrl-UK", + "und-Arab", + "und-Arab-FO", +]; + +fn maximize_bench(c: &mut Criterion) { + c.bench_function("maximize", move |b| { + b.iter(|| { + let langids: Vec = STRINGS + .iter() + .map(|s| -> LanguageIdentifier { s.parse().unwrap() }) + .collect(); + for mut s in langids { + s.maximize(); + let _ = black_box(s.to_string()); + } + }) + }); +} + +fn extract_input(s: &str) -> (Option, Option, Option) { + let chunks: Vec<&str> = s.split("-").collect(); + let mut lang: Option = chunks.get(0).map(|s| s.parse().unwrap()); + let mut script: Option = chunks.get(1).map(|s| s.parse().unwrap()); + let mut region: Option = chunks.get(2).map(|s| s.parse().unwrap()); + if let Some(l) = lang { + if l.as_str() == "und" { + lang = None; + } + } + if let Some(s) = script { + if s.as_str().chars().count() == 2 { + region = script; + script = None; + } + } + (lang, script, region) +} + +fn raw_maximize_bench(c: &mut Criterion) { + let entries: Vec<(Option, Option, Option)> = + STRINGS.iter().map(|s| extract_input(s)).collect(); + + c.bench_function("raw_maximize", move |b| { + b.iter(|| { + for (lang, script, region) in &entries { + let _ = unic_langid_impl::likelysubtags::maximize( + lang.clone(), + script.clone(), + region.clone(), + ); + } + }) + }); +} + +criterion_group!(benches, maximize_bench, raw_maximize_bench,); +criterion_main!(benches); diff --git a/vendor/unic-langid-impl/benches/parser.rs b/vendor/unic-langid-impl/benches/parser.rs new file mode 100644 index 000000000..43c7a3282 --- /dev/null +++ b/vendor/unic-langid-impl/benches/parser.rs @@ -0,0 +1,68 @@ +use criterion::black_box; +use criterion::criterion_group; +use criterion::criterion_main; +use criterion::Criterion; + +use unic_langid_impl::parser::parse_language_identifier; + +fn language_identifier_parser_bench(c: &mut Criterion) { + let strings = &[ + "en-US", + "en-GB", + "es-AR", + "it", + "zh-Hans-CN", + "de-AT", + "pl", + "fr-FR", + "de-AT", + "sr-Cyrl-SR", + "nb-NO", + "fr-FR", + "mk", + "uk", + ]; + + c.bench_function("language_identifier_parser", |b| { + let slices: Vec<&[u8]> = strings.iter().map(|s| s.as_bytes()).collect(); + b.iter(|| { + for s in &slices { + let _ = parse_language_identifier(black_box(s)); + } + }) + }); +} + +fn language_identifier_parser_casing_bench(c: &mut Criterion) { + let strings = &[ + "En_uS", + "EN-GB", + "ES-aR", + "iT", + "zH_HaNs_cN", + "dE-aT", + "Pl", + "FR-FR", + "de_AT", + "sR-CyrL_sr", + "NB-NO", + "fr_fr", + "Mk", + "uK", + ]; + c.bench_function("language_identifier_parser_casing", |b| { + let slices: Vec<&[u8]> = strings.iter().map(|s| s.as_bytes()).collect(); + b.iter(|| { + for s in &slices { + let _ = parse_language_identifier(black_box(s)); + } + }) + }); +} + +criterion_group!( + benches, + language_identifier_parser_bench, + language_identifier_parser_casing_bench, +); +criterion_main!(benches); -- cgit v1.2.3