summaryrefslogtreecommitdiffstats
path: root/third_party/rust/icu_locid/examples
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
commit26a029d407be480d791972afb5975cf62c9360a6 (patch)
treef435a8308119effd964b339f76abb83a57c29483 /third_party/rust/icu_locid/examples
parentInitial commit. (diff)
downloadfirefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz
firefox-26a029d407be480d791972afb5975cf62c9360a6.zip
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/icu_locid/examples')
-rw-r--r--third_party/rust/icu_locid/examples/filter_langids.rs69
-rw-r--r--third_party/rust/icu_locid/examples/syntatically_canonicalize_locales.rs54
2 files changed, 123 insertions, 0 deletions
diff --git a/third_party/rust/icu_locid/examples/filter_langids.rs b/third_party/rust/icu_locid/examples/filter_langids.rs
new file mode 100644
index 0000000000..67828a1181
--- /dev/null
+++ b/third_party/rust/icu_locid/examples/filter_langids.rs
@@ -0,0 +1,69 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+// A sample application which takes a comma separated list of language identifiers,
+// filters out identifiers with language subtags different than `en` and serializes
+// the list back into a comma separated list in canonical syntax.
+//
+// Note: This is an example of the API use, and is not a good base for language matching.
+// For language matching, please consider algorithms such as Locale Matcher.
+
+#![no_main] // https://github.com/unicode-org/icu4x/issues/395
+
+icu_benchmark_macros::static_setup!();
+
+use std::env;
+
+use icu_locid::{subtags, LanguageIdentifier};
+use writeable::Writeable;
+
+const DEFAULT_INPUT: &str =
+ "de, en-us, zh-hant, sr-cyrl, fr-ca, es-cl, pl, en-latn-us, ca-valencia, und-arab";
+
+fn filter_input(input: &str) -> String {
+ // 1. Parse the input string into a list of language identifiers.
+ let langids = input.split(',').filter_map(|s| s.trim().parse().ok());
+
+ // 2. Filter for LanguageIdentifiers with Language subtag `en`.
+ let en_lang: subtags::Language = "en".parse().expect("Failed to parse language subtag.");
+
+ let en_langids = langids.filter(|langid: &LanguageIdentifier| langid.language == en_lang);
+
+ // 3. Serialize the output.
+ let en_strs: Vec<String> = en_langids
+ .map(|langid| langid.write_to_string().into_owned())
+ .collect();
+
+ en_strs.join(", ")
+}
+
+#[no_mangle]
+fn main(_argc: isize, _argv: *const *const u8) -> isize {
+ icu_benchmark_macros::main_setup!();
+ let args: Vec<String> = env::args().collect();
+
+ let input = if let Some(input) = args.get(1) {
+ input.as_str()
+ } else {
+ DEFAULT_INPUT
+ };
+ let _output = filter_input(input);
+
+ #[cfg(debug_assertions)]
+ println!("\nInput: {input}\nOutput: {_output}");
+
+ 0
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ const DEFAULT_OUTPUT: &str = "en-US, en-Latn-US";
+
+ #[test]
+ fn ensure_default_output() {
+ assert_eq!(filter_input(DEFAULT_INPUT), DEFAULT_OUTPUT);
+ }
+}
diff --git a/third_party/rust/icu_locid/examples/syntatically_canonicalize_locales.rs b/third_party/rust/icu_locid/examples/syntatically_canonicalize_locales.rs
new file mode 100644
index 0000000000..1f967504eb
--- /dev/null
+++ b/third_party/rust/icu_locid/examples/syntatically_canonicalize_locales.rs
@@ -0,0 +1,54 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+// A sample application which takes a comma separated list of locales,
+// makes them syntatically canonical and serializes the list back into a comma separated list.
+
+icu_benchmark_macros::static_setup!();
+
+use std::env;
+
+use icu_locid::Locale;
+
+const DEFAULT_INPUT: &str = "sr-cyrL-rS, es-mx, und-arab-u-ca-Buddhist";
+
+fn syntatically_canonicalize_locales(input: &str) -> String {
+ // Split input string and canonicalize each locale identifier.
+ let canonical_locales: Vec<String> = input
+ .split(',')
+ .filter_map(|s| Locale::canonicalize(s.trim()).ok())
+ .collect();
+
+ canonical_locales.join(", ")
+}
+
+fn main() {
+ icu_benchmark_macros::main_setup!();
+ let args: Vec<String> = env::args().collect();
+
+ let input = if let Some(input) = args.get(1) {
+ input.as_str()
+ } else {
+ DEFAULT_INPUT
+ };
+ let _output = syntatically_canonicalize_locales(input);
+
+ #[cfg(debug_assertions)]
+ println!("\nInput: {input}\nOutput: {_output}");
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ const DEFAULT_OUTPUT: &str = "sr-Cyrl-RS, es-MX, und-Arab-u-ca-buddhist";
+
+ #[test]
+ fn ensure_default_output() {
+ assert_eq!(
+ syntatically_canonicalize_locales(DEFAULT_INPUT),
+ DEFAULT_OUTPUT
+ );
+ }
+}