summaryrefslogtreecommitdiffstats
path: root/third_party/rust/unic-langid-impl/benches
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/unic-langid-impl/benches')
-rw-r--r--third_party/rust/unic-langid-impl/benches/canonicalize.rs50
-rw-r--r--third_party/rust/unic-langid-impl/benches/langid.rs84
-rw-r--r--third_party/rust/unic-langid-impl/benches/likely_subtags.rs88
-rw-r--r--third_party/rust/unic-langid-impl/benches/parser.rs82
4 files changed, 304 insertions, 0 deletions
diff --git a/third_party/rust/unic-langid-impl/benches/canonicalize.rs b/third_party/rust/unic-langid-impl/benches/canonicalize.rs
new file mode 100644
index 0000000000..a9e17e49aa
--- /dev/null
+++ b/third_party/rust/unic-langid-impl/benches/canonicalize.rs
@@ -0,0 +1,50 @@
+use criterion::black_box;
+use criterion::criterion_group;
+use criterion::criterion_main;
+use criterion::Criterion;
+
+use unic_langid_impl::canonicalize;
+
+fn langid_canonicalize_bench(c: &mut Criterion) {
+ let strings = &[
+ "En_uS",
+ "EN-GB",
+ "ES-aR",
+ "iT",
+ "zH_HaNs_cN",
+ "dE-aT",
+ "Pl",
+ "FR-FR",
+ "de_AT",
+ "sR-CyrL_sr",
+ "NB-NO",
+ "fr_fr",
+ "Mk",
+ "uK",
+ "en-us",
+ "en_gb",
+ "ES-AR",
+ "tH",
+ "DE",
+ "ZH_cyrl_hN",
+ "eN-lAtN-uS",
+ ];
+ c.bench_function("langid_canonicalize", |b| {
+ b.iter(|| {
+ for s in strings {
+ let _ = canonicalize(black_box(s));
+ }
+ })
+ });
+ c.bench_function("langid_canonicalize_from_bytes", |b| {
+ let slices: Vec<&[u8]> = strings.iter().map(|s| s.as_bytes()).collect();
+ b.iter(|| {
+ for s in &slices {
+ let _ = canonicalize(black_box(s));
+ }
+ })
+ });
+}
+
+criterion_group!(benches, langid_canonicalize_bench,);
+criterion_main!(benches);
diff --git a/third_party/rust/unic-langid-impl/benches/langid.rs b/third_party/rust/unic-langid-impl/benches/langid.rs
new file mode 100644
index 0000000000..026c288602
--- /dev/null
+++ b/third_party/rust/unic-langid-impl/benches/langid.rs
@@ -0,0 +1,84 @@
+use criterion::black_box;
+use criterion::criterion_group;
+use criterion::criterion_main;
+use criterion::Criterion;
+use criterion::Fun;
+
+use unic_langid_impl::subtags;
+use unic_langid_impl::LanguageIdentifier;
+
+static STRINGS: &[&str] = &[
+ "en-US",
+ "en-GB",
+ "es-AR",
+ "it",
+ "zh-Hans-CN",
+ "de-AT",
+ "pl",
+ "fr-FR",
+ "de-AT",
+ "sr-Cyrl-SR",
+ "nb-NO",
+ "fr-FR",
+ "mk",
+ "uk",
+ "en-US",
+ "en-GB",
+ "es-AR",
+ "th",
+ "de",
+ "zh-Cyrl-HN",
+ "en-Latn-US",
+];
+
+fn language_identifier_construct_bench(c: &mut Criterion) {
+ let langids: Vec<LanguageIdentifier> = STRINGS
+ .iter()
+ .map(|s| -> LanguageIdentifier { s.parse().unwrap() })
+ .collect();
+
+ let funcs = vec![
+ Fun::new("from_str", |b, _| {
+ b.iter(|| {
+ for s in STRINGS {
+ let _: Result<LanguageIdentifier, _> = black_box(s).parse();
+ }
+ })
+ }),
+ Fun::new("from_bytes", |b, _| {
+ let slices: Vec<&[u8]> = STRINGS.iter().map(|s| s.as_bytes()).collect();
+ b.iter(|| {
+ for s in &slices {
+ let _ = LanguageIdentifier::from_bytes(black_box(s));
+ }
+ })
+ }),
+ Fun::new("from_parts", |b, langids: &Vec<LanguageIdentifier>| {
+ let entries: Vec<(
+ subtags::Language,
+ Option<subtags::Script>,
+ Option<subtags::Region>,
+ Vec<subtags::Variant>,
+ )> = langids
+ .iter()
+ .cloned()
+ .map(|langid| langid.into_parts())
+ .collect();
+ b.iter(|| {
+ for (language, script, region, variants) in &entries {
+ let _ = LanguageIdentifier::from_parts(
+ language.clone(),
+ script.clone(),
+ region.clone(),
+ variants,
+ );
+ }
+ })
+ }),
+ ];
+
+ c.bench_functions("language_identifier_construct", funcs, langids);
+}
+
+criterion_group!(benches, language_identifier_construct_bench,);
+criterion_main!(benches);
diff --git a/third_party/rust/unic-langid-impl/benches/likely_subtags.rs b/third_party/rust/unic-langid-impl/benches/likely_subtags.rs
new file mode 100644
index 0000000000..3b7f8746ee
--- /dev/null
+++ b/third_party/rust/unic-langid-impl/benches/likely_subtags.rs
@@ -0,0 +1,88 @@
+use criterion::criterion_group;
+use criterion::criterion_main;
+use criterion::Criterion;
+
+use unic_langid_impl::subtags;
+use unic_langid_impl::LanguageIdentifier;
+
+static STRINGS: &[&str] = &[
+ "en-US",
+ "en-GB",
+ "es-AR",
+ "it",
+ "zh-Hans-CN",
+ "de-AT",
+ "pl",
+ "fr-FR",
+ "de-AT",
+ "sr-Cyrl-SR",
+ "nb-NO",
+ "fr-FR",
+ "mk",
+ "uk",
+ "und-PL",
+ "und-Latn-AM",
+ "ug-Cyrl",
+ "sr-ME",
+ "mn-Mong",
+ "lif-Limb",
+ "gan",
+ "zh-Hant",
+ "yue-Hans",
+ "unr",
+ "unr-Deva",
+ "und-Thai-CN",
+ "ug-Cyrl",
+ "en-Latn-DE",
+ "pl-FR",
+ "de-CH",
+ "tuq",
+ "sr-ME",
+ "ng",
+ "klx",
+ "kk-Arab",
+ "en-Cyrl",
+ "und-Cyrl-UK",
+ "und-Arab",
+ "und-Arab-FO",
+];
+
+fn maximize_bench(c: &mut Criterion) {
+ let langids: Vec<LanguageIdentifier> = STRINGS
+ .iter()
+ .map(|s| -> LanguageIdentifier { s.parse().unwrap() })
+ .collect();
+ c.bench_function("maximize", move |b| {
+ b.iter(|| {
+ for mut s in langids.clone().into_iter() {
+ s.maximize();
+ }
+ })
+ });
+}
+
+fn extract_input(
+ s: &str,
+) -> (
+ subtags::Language,
+ Option<subtags::Script>,
+ Option<subtags::Region>,
+) {
+ let langid: LanguageIdentifier = s.parse().unwrap();
+ (langid.language, langid.script, langid.region)
+}
+
+fn raw_maximize_bench(c: &mut Criterion) {
+ let entries: Vec<_> = STRINGS.iter().map(|s| extract_input(s)).collect();
+
+ c.bench_function("raw_maximize", move |b| {
+ b.iter(|| {
+ for (lang, script, region) in entries.clone().into_iter() {
+ let _ = unic_langid_impl::likelysubtags::maximize(lang, script, region);
+ }
+ })
+ });
+}
+
+criterion_group!(benches, maximize_bench, raw_maximize_bench,);
+criterion_main!(benches);
diff --git a/third_party/rust/unic-langid-impl/benches/parser.rs b/third_party/rust/unic-langid-impl/benches/parser.rs
new file mode 100644
index 0000000000..97abe833fb
--- /dev/null
+++ b/third_party/rust/unic-langid-impl/benches/parser.rs
@@ -0,0 +1,82 @@
+use criterion::black_box;
+use criterion::criterion_group;
+use criterion::criterion_main;
+use criterion::Criterion;
+
+use unic_langid_impl::parser::parse_language_identifier;
+
+fn language_identifier_parser_bench(c: &mut Criterion) {
+ let strings = &[
+ "en-US",
+ "en-GB",
+ "es-AR",
+ "it",
+ "zh-Hans-CN",
+ "de-AT",
+ "pl",
+ "fr-FR",
+ "de-AT",
+ "sr-Cyrl-SR",
+ "nb-NO",
+ "fr-FR",
+ "mk",
+ "uk",
+ "en-US",
+ "en-GB",
+ "es-AR",
+ "th",
+ "de",
+ "zh-Cyrl-HN",
+ "en-Latn-US",
+ ];
+
+ c.bench_function("language_identifier_parser", |b| {
+ let slices: Vec<&[u8]> = strings.iter().map(|s| s.as_bytes()).collect();
+ b.iter(|| {
+ for s in &slices {
+ let _ = parse_language_identifier(black_box(s));
+ }
+ })
+ });
+}
+
+fn language_identifier_parser_casing_bench(c: &mut Criterion) {
+ let strings = &[
+ "En_uS",
+ "EN-GB",
+ "ES-aR",
+ "iT",
+ "zH_HaNs_cN",
+ "dE-aT",
+ "Pl",
+ "FR-FR",
+ "de_AT",
+ "sR-CyrL_sr",
+ "NB-NO",
+ "fr_fr",
+ "Mk",
+ "uK",
+ "en-us",
+ "en_gb",
+ "ES-AR",
+ "tH",
+ "DE",
+ "ZH_cyrl_hN",
+ "eN-lAtN-uS",
+ ];
+ c.bench_function("language_identifier_parser_casing", |b| {
+ let slices: Vec<&[u8]> = strings.iter().map(|s| s.as_bytes()).collect();
+ b.iter(|| {
+ for s in &slices {
+ let _ = parse_language_identifier(black_box(s));
+ }
+ })
+ });
+}
+
+criterion_group!(
+ benches,
+ language_identifier_parser_bench,
+ language_identifier_parser_casing_bench,
+);
+criterion_main!(benches);