diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
commit | 26a029d407be480d791972afb5975cf62c9360a6 (patch) | |
tree | f435a8308119effd964b339f76abb83a57c29483 /third_party/rust/icu_locid/examples | |
parent | Initial commit. (diff) | |
download | firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip |
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/icu_locid/examples')
-rw-r--r-- | third_party/rust/icu_locid/examples/filter_langids.rs | 69 | ||||
-rw-r--r-- | third_party/rust/icu_locid/examples/syntatically_canonicalize_locales.rs | 54 |
2 files changed, 123 insertions, 0 deletions
diff --git a/third_party/rust/icu_locid/examples/filter_langids.rs b/third_party/rust/icu_locid/examples/filter_langids.rs new file mode 100644 index 0000000000..67828a1181 --- /dev/null +++ b/third_party/rust/icu_locid/examples/filter_langids.rs @@ -0,0 +1,69 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +// A sample application which takes a comma separated list of language identifiers, +// filters out identifiers with language subtags different than `en` and serializes +// the list back into a comma separated list in canonical syntax. +// +// Note: This is an example of the API use, and is not a good base for language matching. +// For language matching, please consider algorithms such as Locale Matcher. + +#![no_main] // https://github.com/unicode-org/icu4x/issues/395 + +icu_benchmark_macros::static_setup!(); + +use std::env; + +use icu_locid::{subtags, LanguageIdentifier}; +use writeable::Writeable; + +const DEFAULT_INPUT: &str = + "de, en-us, zh-hant, sr-cyrl, fr-ca, es-cl, pl, en-latn-us, ca-valencia, und-arab"; + +fn filter_input(input: &str) -> String { + // 1. Parse the input string into a list of language identifiers. + let langids = input.split(',').filter_map(|s| s.trim().parse().ok()); + + // 2. Filter for LanguageIdentifiers with Language subtag `en`. + let en_lang: subtags::Language = "en".parse().expect("Failed to parse language subtag."); + + let en_langids = langids.filter(|langid: &LanguageIdentifier| langid.language == en_lang); + + // 3. Serialize the output. + let en_strs: Vec<String> = en_langids + .map(|langid| langid.write_to_string().into_owned()) + .collect(); + + en_strs.join(", ") +} + +#[no_mangle] +fn main(_argc: isize, _argv: *const *const u8) -> isize { + icu_benchmark_macros::main_setup!(); + let args: Vec<String> = env::args().collect(); + + let input = if let Some(input) = args.get(1) { + input.as_str() + } else { + DEFAULT_INPUT + }; + let _output = filter_input(input); + + #[cfg(debug_assertions)] + println!("\nInput: {input}\nOutput: {_output}"); + + 0 +} + +#[cfg(test)] +mod tests { + use super::*; + + const DEFAULT_OUTPUT: &str = "en-US, en-Latn-US"; + + #[test] + fn ensure_default_output() { + assert_eq!(filter_input(DEFAULT_INPUT), DEFAULT_OUTPUT); + } +} diff --git a/third_party/rust/icu_locid/examples/syntatically_canonicalize_locales.rs b/third_party/rust/icu_locid/examples/syntatically_canonicalize_locales.rs new file mode 100644 index 0000000000..1f967504eb --- /dev/null +++ b/third_party/rust/icu_locid/examples/syntatically_canonicalize_locales.rs @@ -0,0 +1,54 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +// A sample application which takes a comma separated list of locales, +// makes them syntatically canonical and serializes the list back into a comma separated list. + +icu_benchmark_macros::static_setup!(); + +use std::env; + +use icu_locid::Locale; + +const DEFAULT_INPUT: &str = "sr-cyrL-rS, es-mx, und-arab-u-ca-Buddhist"; + +fn syntatically_canonicalize_locales(input: &str) -> String { + // Split input string and canonicalize each locale identifier. + let canonical_locales: Vec<String> = input + .split(',') + .filter_map(|s| Locale::canonicalize(s.trim()).ok()) + .collect(); + + canonical_locales.join(", ") +} + +fn main() { + icu_benchmark_macros::main_setup!(); + let args: Vec<String> = env::args().collect(); + + let input = if let Some(input) = args.get(1) { + input.as_str() + } else { + DEFAULT_INPUT + }; + let _output = syntatically_canonicalize_locales(input); + + #[cfg(debug_assertions)] + println!("\nInput: {input}\nOutput: {_output}"); +} + +#[cfg(test)] +mod tests { + use super::*; + + const DEFAULT_OUTPUT: &str = "sr-Cyrl-RS, es-MX, und-Arab-u-ca-buddhist"; + + #[test] + fn ensure_default_output() { + assert_eq!( + syntatically_canonicalize_locales(DEFAULT_INPUT), + DEFAULT_OUTPUT + ); + } +} |