From 36d22d82aa202bb199967e9512281e9a53db42c9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 21:33:14 +0200 Subject: Adding upstream version 115.7.0esr. Signed-off-by: Daniel Baumann --- .../rust/unic-langid-impl/benches/canonicalize.rs | 50 ++++++++++++ .../rust/unic-langid-impl/benches/langid.rs | 84 +++++++++++++++++++++ .../unic-langid-impl/benches/likely_subtags.rs | 88 ++++++++++++++++++++++ .../rust/unic-langid-impl/benches/parser.rs | 82 ++++++++++++++++++++ 4 files changed, 304 insertions(+) create mode 100644 third_party/rust/unic-langid-impl/benches/canonicalize.rs create mode 100644 third_party/rust/unic-langid-impl/benches/langid.rs create mode 100644 third_party/rust/unic-langid-impl/benches/likely_subtags.rs create mode 100644 third_party/rust/unic-langid-impl/benches/parser.rs (limited to 'third_party/rust/unic-langid-impl/benches') diff --git a/third_party/rust/unic-langid-impl/benches/canonicalize.rs b/third_party/rust/unic-langid-impl/benches/canonicalize.rs new file mode 100644 index 0000000000..a9e17e49aa --- /dev/null +++ b/third_party/rust/unic-langid-impl/benches/canonicalize.rs @@ -0,0 +1,50 @@ +use criterion::black_box; +use criterion::criterion_group; +use criterion::criterion_main; +use criterion::Criterion; + +use unic_langid_impl::canonicalize; + +fn langid_canonicalize_bench(c: &mut Criterion) { + let strings = &[ + "En_uS", + "EN-GB", + "ES-aR", + "iT", + "zH_HaNs_cN", + "dE-aT", + "Pl", + "FR-FR", + "de_AT", + "sR-CyrL_sr", + "NB-NO", + "fr_fr", + "Mk", + "uK", + "en-us", + "en_gb", + "ES-AR", + "tH", + "DE", + "ZH_cyrl_hN", + "eN-lAtN-uS", + ]; + c.bench_function("langid_canonicalize", |b| { + b.iter(|| { + for s in strings { + let _ = canonicalize(black_box(s)); + } + }) + }); + c.bench_function("langid_canonicalize_from_bytes", |b| { + let slices: Vec<&[u8]> = strings.iter().map(|s| s.as_bytes()).collect(); + b.iter(|| { + for s in &slices { + let _ = canonicalize(black_box(s)); + } + }) + }); +} + +criterion_group!(benches, langid_canonicalize_bench,); +criterion_main!(benches); diff --git a/third_party/rust/unic-langid-impl/benches/langid.rs b/third_party/rust/unic-langid-impl/benches/langid.rs new file mode 100644 index 0000000000..026c288602 --- /dev/null +++ b/third_party/rust/unic-langid-impl/benches/langid.rs @@ -0,0 +1,84 @@ +use criterion::black_box; +use criterion::criterion_group; +use criterion::criterion_main; +use criterion::Criterion; +use criterion::Fun; + +use unic_langid_impl::subtags; +use unic_langid_impl::LanguageIdentifier; + +static STRINGS: &[&str] = &[ + "en-US", + "en-GB", + "es-AR", + "it", + "zh-Hans-CN", + "de-AT", + "pl", + "fr-FR", + "de-AT", + "sr-Cyrl-SR", + "nb-NO", + "fr-FR", + "mk", + "uk", + "en-US", + "en-GB", + "es-AR", + "th", + "de", + "zh-Cyrl-HN", + "en-Latn-US", +]; + +fn language_identifier_construct_bench(c: &mut Criterion) { + let langids: Vec = STRINGS + .iter() + .map(|s| -> LanguageIdentifier { s.parse().unwrap() }) + .collect(); + + let funcs = vec![ + Fun::new("from_str", |b, _| { + b.iter(|| { + for s in STRINGS { + let _: Result = black_box(s).parse(); + } + }) + }), + Fun::new("from_bytes", |b, _| { + let slices: Vec<&[u8]> = STRINGS.iter().map(|s| s.as_bytes()).collect(); + b.iter(|| { + for s in &slices { + let _ = LanguageIdentifier::from_bytes(black_box(s)); + } + }) + }), + Fun::new("from_parts", |b, langids: &Vec| { + let entries: Vec<( + subtags::Language, + Option, + Option, + Vec, + )> = langids + .iter() + .cloned() + .map(|langid| langid.into_parts()) + .collect(); + b.iter(|| { + for (language, script, region, variants) in &entries { + let _ = LanguageIdentifier::from_parts( + language.clone(), + script.clone(), + region.clone(), + variants, + ); + } + }) + }), + ]; + + c.bench_functions("language_identifier_construct", funcs, langids); +} + +criterion_group!(benches, language_identifier_construct_bench,); +criterion_main!(benches); diff --git a/third_party/rust/unic-langid-impl/benches/likely_subtags.rs b/third_party/rust/unic-langid-impl/benches/likely_subtags.rs new file mode 100644 index 0000000000..3b7f8746ee --- /dev/null +++ b/third_party/rust/unic-langid-impl/benches/likely_subtags.rs @@ -0,0 +1,88 @@ +use criterion::criterion_group; +use criterion::criterion_main; +use criterion::Criterion; + +use unic_langid_impl::subtags; +use unic_langid_impl::LanguageIdentifier; + +static STRINGS: &[&str] = &[ + "en-US", + "en-GB", + "es-AR", + "it", + "zh-Hans-CN", + "de-AT", + "pl", + "fr-FR", + "de-AT", + "sr-Cyrl-SR", + "nb-NO", + "fr-FR", + "mk", + "uk", + "und-PL", + "und-Latn-AM", + "ug-Cyrl", + "sr-ME", + "mn-Mong", + "lif-Limb", + "gan", + "zh-Hant", + "yue-Hans", + "unr", + "unr-Deva", + "und-Thai-CN", + "ug-Cyrl", + "en-Latn-DE", + "pl-FR", + "de-CH", + "tuq", + "sr-ME", + "ng", + "klx", + "kk-Arab", + "en-Cyrl", + "und-Cyrl-UK", + "und-Arab", + "und-Arab-FO", +]; + +fn maximize_bench(c: &mut Criterion) { + let langids: Vec = STRINGS + .iter() + .map(|s| -> LanguageIdentifier { s.parse().unwrap() }) + .collect(); + c.bench_function("maximize", move |b| { + b.iter(|| { + for mut s in langids.clone().into_iter() { + s.maximize(); + } + }) + }); +} + +fn extract_input( + s: &str, +) -> ( + subtags::Language, + Option, + Option, +) { + let langid: LanguageIdentifier = s.parse().unwrap(); + (langid.language, langid.script, langid.region) +} + +fn raw_maximize_bench(c: &mut Criterion) { + let entries: Vec<_> = STRINGS.iter().map(|s| extract_input(s)).collect(); + + c.bench_function("raw_maximize", move |b| { + b.iter(|| { + for (lang, script, region) in entries.clone().into_iter() { + let _ = unic_langid_impl::likelysubtags::maximize(lang, script, region); + } + }) + }); +} + +criterion_group!(benches, maximize_bench, raw_maximize_bench,); +criterion_main!(benches); diff --git a/third_party/rust/unic-langid-impl/benches/parser.rs b/third_party/rust/unic-langid-impl/benches/parser.rs new file mode 100644 index 0000000000..97abe833fb --- /dev/null +++ b/third_party/rust/unic-langid-impl/benches/parser.rs @@ -0,0 +1,82 @@ +use criterion::black_box; +use criterion::criterion_group; +use criterion::criterion_main; +use criterion::Criterion; + +use unic_langid_impl::parser::parse_language_identifier; + +fn language_identifier_parser_bench(c: &mut Criterion) { + let strings = &[ + "en-US", + "en-GB", + "es-AR", + "it", + "zh-Hans-CN", + "de-AT", + "pl", + "fr-FR", + "de-AT", + "sr-Cyrl-SR", + "nb-NO", + "fr-FR", + "mk", + "uk", + "en-US", + "en-GB", + "es-AR", + "th", + "de", + "zh-Cyrl-HN", + "en-Latn-US", + ]; + + c.bench_function("language_identifier_parser", |b| { + let slices: Vec<&[u8]> = strings.iter().map(|s| s.as_bytes()).collect(); + b.iter(|| { + for s in &slices { + let _ = parse_language_identifier(black_box(s)); + } + }) + }); +} + +fn language_identifier_parser_casing_bench(c: &mut Criterion) { + let strings = &[ + "En_uS", + "EN-GB", + "ES-aR", + "iT", + "zH_HaNs_cN", + "dE-aT", + "Pl", + "FR-FR", + "de_AT", + "sR-CyrL_sr", + "NB-NO", + "fr_fr", + "Mk", + "uK", + "en-us", + "en_gb", + "ES-AR", + "tH", + "DE", + "ZH_cyrl_hN", + "eN-lAtN-uS", + ]; + c.bench_function("language_identifier_parser_casing", |b| { + let slices: Vec<&[u8]> = strings.iter().map(|s| s.as_bytes()).collect(); + b.iter(|| { + for s in &slices { + let _ = parse_language_identifier(black_box(s)); + } + }) + }); +} + +criterion_group!( + benches, + language_identifier_parser_bench, + language_identifier_parser_casing_bench, +); +criterion_main!(benches); -- cgit v1.2.3