From 26a029d407be480d791972afb5975cf62c9360a6 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 19 Apr 2024 02:47:55 +0200 Subject: Adding upstream version 124.0.1. Signed-off-by: Daniel Baumann --- .../unic-langid-impl/tests/canonicalize_test.rs | 13 ++ .../rust/unic-langid-impl/tests/fixtures.rs | 75 ++++++++ .../tests/language_identifier_test.rs | 192 +++++++++++++++++++++ .../rust/unic-langid-impl/tests/likelysubtags.rs | 113 ++++++++++++ 4 files changed, 393 insertions(+) create mode 100644 third_party/rust/unic-langid-impl/tests/canonicalize_test.rs create mode 100644 third_party/rust/unic-langid-impl/tests/fixtures.rs create mode 100644 third_party/rust/unic-langid-impl/tests/language_identifier_test.rs create mode 100644 third_party/rust/unic-langid-impl/tests/likelysubtags.rs (limited to 'third_party/rust/unic-langid-impl/tests') diff --git a/third_party/rust/unic-langid-impl/tests/canonicalize_test.rs b/third_party/rust/unic-langid-impl/tests/canonicalize_test.rs new file mode 100644 index 0000000000..b0195828b3 --- /dev/null +++ b/third_party/rust/unic-langid-impl/tests/canonicalize_test.rs @@ -0,0 +1,13 @@ +use unic_langid_impl::canonicalize; + +fn assert_canonicalize(input: &str, output: &str) { + assert_eq!(&canonicalize(input).unwrap(), output); +} + +#[test] +fn test_canonicalize() { + assert_canonicalize("Pl", "pl"); + assert_canonicalize("eN-uS", "en-US"); + assert_canonicalize("ZH_hans_hK", "zh-Hans-HK"); + assert_canonicalize("en-scouse-fonipa", "en-fonipa-scouse"); +} diff --git a/third_party/rust/unic-langid-impl/tests/fixtures.rs b/third_party/rust/unic-langid-impl/tests/fixtures.rs new file mode 100644 index 0000000000..27cd29b33a --- /dev/null +++ b/third_party/rust/unic-langid-impl/tests/fixtures.rs @@ -0,0 +1,75 @@ +use std::convert::TryInto; +use std::error::Error; +use std::fs::File; +use std::path::Path; + +use unic_langid_impl::LanguageIdentifier; + +use serde::{Deserialize, Serialize}; + +#[derive(Serialize, Deserialize)] +struct LangIdTestInputData { + string: String, +} + +#[derive(Serialize, Deserialize, Debug)] +struct LangIdTestOutputObject { + language: Option, + script: Option, + region: Option, + #[serde(default)] + variants: Vec, +} + +#[derive(Serialize, Deserialize, Debug)] +#[serde(untagged)] +enum LangIdTestOutput { + String(String), + Object(LangIdTestOutputObject), +} + +#[derive(Serialize, Deserialize)] +struct LangIdTestSet { + input: LangIdTestInputData, + output: LangIdTestOutput, +} + +fn read_langid_testsets>(path: P) -> Result, Box> { + let file = File::open(path)?; + let sets = serde_json::from_reader(file)?; + Ok(sets) +} + +fn test_langid_fixtures(path: &str) { + let tests = read_langid_testsets(path).unwrap(); + + for test in tests { + let s = test.input.string; + + let langid: LanguageIdentifier = s.parse().expect("Parsing failed."); + + match test.output { + LangIdTestOutput::Object(o) => { + let expected = LanguageIdentifier::from_parts( + o.language.try_into().unwrap(), + o.script.as_ref().map(|s| s.parse().unwrap()), + o.region.as_ref().map(|r| r.parse().unwrap()), + o.variants + .iter() + .map(|s| s.parse().unwrap()) + .collect::>() + .as_ref(), + ); + assert_eq!(langid, expected); + } + LangIdTestOutput::String(s) => { + assert_eq!(langid.to_string(), s); + } + } + } +} + +#[test] +fn parse() { + test_langid_fixtures("./tests/fixtures/parsing.json"); +} diff --git a/third_party/rust/unic-langid-impl/tests/language_identifier_test.rs b/third_party/rust/unic-langid-impl/tests/language_identifier_test.rs new file mode 100644 index 0000000000..79b81fb4fb --- /dev/null +++ b/third_party/rust/unic-langid-impl/tests/language_identifier_test.rs @@ -0,0 +1,192 @@ +use unic_langid_impl::parser::parse_language_identifier; +use unic_langid_impl::subtags; +use unic_langid_impl::CharacterDirection; +use unic_langid_impl::LanguageIdentifier; + +fn assert_language_identifier( + loc: &LanguageIdentifier, + language: Option<&str>, + script: Option<&str>, + region: Option<&str>, + variants: Option<&[&str]>, +) { + assert_eq!( + loc.language, + language.map_or(subtags::Language::default(), |l| { + subtags::Language::from_bytes(l.as_bytes()).unwrap() + }) + ); + assert_eq!(loc.script, script.map(|s| s.parse().unwrap())); + assert_eq!(loc.region, region.map(|r| r.parse().unwrap())); + let v = variants + .unwrap_or(&[]) + .iter() + .map(|v| -> subtags::Variant { v.parse().unwrap() }) + .collect::>(); + assert_eq!( + loc.variants().collect::>(), + v.iter().collect::>(), + ); +} + +fn assert_parsed_language_identifier( + input: &str, + language: Option<&str>, + script: Option<&str>, + region: Option<&str>, + variants: Option<&[&str]>, +) { + let langid = parse_language_identifier(input.as_bytes()).unwrap(); + assert_language_identifier(&langid, language, script, region, variants); +} + +#[test] +fn test_language_identifier_parser() { + assert_parsed_language_identifier("pl", Some("pl"), None, None, None); + assert_parsed_language_identifier("und", None, None, None, None); + assert_parsed_language_identifier("en-US", Some("en"), None, Some("US"), None); + assert_parsed_language_identifier("en-Latn-US", Some("en"), Some("Latn"), Some("US"), None); + assert_parsed_language_identifier("sl-nedis", Some("sl"), None, None, Some(&["nedis"])); +} + +#[test] +fn test_language_casing() { + assert_parsed_language_identifier("Pl", Some("pl"), None, None, None); + assert_parsed_language_identifier("En-uS", Some("en"), None, Some("US"), None); + assert_parsed_language_identifier("eN-lAtN-uS", Some("en"), Some("Latn"), Some("US"), None); + assert_parsed_language_identifier("ZH_cyrl_hN", Some("zh"), Some("Cyrl"), Some("HN"), None); +} + +#[test] +fn test_serialize_langid() { + let langid: LanguageIdentifier = "en-Latn-US".parse().unwrap(); + assert_eq!(&langid.to_string(), "en-Latn-US"); +} + +#[test] +fn test_sorted_variants() { + let langid: LanguageIdentifier = "en-nedis-macos".parse().unwrap(); + assert_eq!(&langid.to_string(), "en-macos-nedis"); + + let langid = LanguageIdentifier::from_parts( + "en".parse().unwrap(), + None, + None, + &["nedis".parse().unwrap(), "macos".parse().unwrap()], + ); + assert_eq!(&langid.to_string(), "en-macos-nedis"); +} + +#[test] +fn test_from_parts_unchecked() { + let langid: LanguageIdentifier = "en-nedis-macos".parse().unwrap(); + let (lang, script, region, variants) = langid.into_parts(); + let langid = LanguageIdentifier::from_raw_parts_unchecked( + lang, + script, + region, + Some(variants.into_boxed_slice()), + ); + assert_eq!(&langid.to_string(), "en-macos-nedis"); +} + +#[test] +fn test_matches() { + let langid_en: LanguageIdentifier = "en".parse().unwrap(); + let langid_en_us: LanguageIdentifier = "en-US".parse().unwrap(); + let langid_en_us2: LanguageIdentifier = "en-US".parse().unwrap(); + let langid_pl: LanguageIdentifier = "pl".parse().unwrap(); + assert_eq!(langid_en.matches(&langid_en_us, false, false), false); + assert_eq!(langid_en_us.matches(&langid_en_us2, false, false), true); + assert_eq!(langid_en.matches(&langid_pl, false, false), false); + assert_eq!(langid_en.matches(&langid_en_us, true, false), true); +} + +#[test] +fn test_set_fields() { + let mut langid = LanguageIdentifier::default(); + assert_eq!(&langid.to_string(), "und"); + + langid.language = "pl".parse().expect("Setting language failed"); + assert_eq!(&langid.to_string(), "pl"); + + langid.language = "de".parse().expect("Setting language failed"); + assert_eq!(&langid.to_string(), "de"); + langid.region = Some("AT".parse().expect("Setting region failed")); + assert_eq!(&langid.to_string(), "de-AT"); + langid.script = Some("Latn".parse().expect("Setting script failed")); + assert_eq!(&langid.to_string(), "de-Latn-AT"); + langid.set_variants(&["macos".parse().expect("Setting variants failed")]); + assert_eq!(&langid.to_string(), "de-Latn-AT-macos"); + + assert_eq!(langid.has_variant("macos".parse().unwrap()), true); + assert_eq!(langid.has_variant("windows".parse().unwrap()), false); + + langid.language.clear(); + assert_eq!(&langid.to_string(), "und-Latn-AT-macos"); + langid.region = None; + assert_eq!(&langid.to_string(), "und-Latn-macos"); + langid.script = None; + assert_eq!(&langid.to_string(), "und-macos"); + langid.clear_variants(); + assert_eq!(&langid.to_string(), "und"); + + assert_eq!(langid.has_variant("macos".parse().unwrap()), false); +} + +#[test] +fn test_matches_as_range() { + let langid: LanguageIdentifier = "en-US".parse().unwrap(); + let langid2: LanguageIdentifier = "en-US-windows".parse().unwrap(); + assert_eq!(langid.matches(&langid2, false, false), false); + assert_eq!(langid.matches(&langid2, true, false), true); + assert_eq!(langid.matches(&langid2, false, true), false); + assert_eq!(langid.matches(&langid2, true, true), true); +} + +#[test] +fn test_character_direction() { + let langid: LanguageIdentifier = "en-US".parse().unwrap(); + let langid2: LanguageIdentifier = "ar-AF".parse().unwrap(); + assert_eq!(langid.character_direction(), CharacterDirection::LTR); + assert_eq!(langid2.character_direction(), CharacterDirection::RTL); +} + +#[test] +fn test_langid_ord() { + let input = &[ + "en-US-macos-zarab", + "en-US-macos-nedis", + "en-US-macos", + "en-GB", + "en", + "en-US", + "ar", + "fr", + "de", + ]; + + let mut langids = input + .iter() + .map(|l| -> LanguageIdentifier { l.parse().unwrap() }) + .collect::>(); + + langids.sort(); + + let result = langids.iter().map(|l| l.to_string()).collect::>(); + + assert_eq!( + &result, + &[ + "ar", + "de", + "en", + "en-GB", + "en-US", + "en-US-macos", + "en-US-macos-nedis", + "en-US-macos-zarab", + "fr" + ] + ); +} diff --git a/third_party/rust/unic-langid-impl/tests/likelysubtags.rs b/third_party/rust/unic-langid-impl/tests/likelysubtags.rs new file mode 100644 index 0000000000..a378274f3c --- /dev/null +++ b/third_party/rust/unic-langid-impl/tests/likelysubtags.rs @@ -0,0 +1,113 @@ +use unic_langid_impl::likelysubtags::{maximize, minimize, CLDR_VERSION}; +use unic_langid_impl::subtags; + +static STRINGS: &[(&str, Option<&str>)] = &[ + ("en-US", Some("en-Latn-US")), + ("en-GB", Some("en-Latn-GB")), + ("es-AR", Some("es-Latn-AR")), + ("it", Some("it-Latn-IT")), + ("zh-Hans-CN", None), + ("de-AT", Some("de-Latn-AT")), + ("pl", Some("pl-Latn-PL")), + ("fr-FR", Some("fr-Latn-FR")), + ("de-AT", Some("de-Latn-AT")), + ("sr-Cyrl-SR", None), + ("nb-NO", Some("nb-Latn-NO")), + ("fr-FR", Some("fr-Latn-FR")), + ("mk", Some("mk-Cyrl-MK")), + ("uk", Some("uk-Cyrl-UA")), + ("und-PL", Some("pl-Latn-PL")), + ("und-Latn-AM", Some("ku-Latn-AM")), + ("ug-Cyrl", Some("ug-Cyrl-KZ")), + ("sr-ME", Some("sr-Latn-ME")), + ("mn-Mong", Some("mn-Mong-CN")), + ("lif-Limb", Some("lif-Limb-IN")), + ("gan", Some("gan-Hans-CN")), + ("zh-Hant", Some("zh-Hant-TW")), + ("yue-Hans", Some("yue-Hans-CN")), + ("unr", Some("unr-Beng-IN")), + ("unr-Deva", Some("unr-Deva-NP")), + ("und-Thai-CN", Some("lcp-Thai-CN")), + ("ug-Cyrl", Some("ug-Cyrl-KZ")), + ("en-Latn-DE", None), + ("pl-FR", Some("pl-Latn-FR")), + ("de-CH", Some("de-Latn-CH")), + ("tuq", Some("tuq-Latn")), + ("sr-ME", Some("sr-Latn-ME")), + ("ng", Some("ng-Latn-NA")), + ("klx", Some("klx-Latn")), + ("kk-Arab", Some("kk-Arab-CN")), + ("en-Cyrl", Some("en-Cyrl-US")), + ("und-Cyrl-UK", Some("ru-Cyrl-UK")), + ("und-Arab", Some("ar-Arab-EG")), + ("und-Arab-FO", Some("ar-Arab-FO")), + ("zh-TW", Some("zh-Hant-TW")), +]; + +fn extract_input( + s: &str, +) -> ( + subtags::Language, + Option, + Option, +) { + let chunks: Vec<&str> = s.split("-").collect(); + let lang: subtags::Language = chunks[0].parse().unwrap(); + let (script, region) = if let Some(s) = chunks.get(1) { + if let Ok(script) = s.parse() { + let region = chunks.get(2).map(|r| r.parse().unwrap()); + (Some(script), region) + } else { + let region = s.parse().unwrap(); + (None, Some(region)) + } + } else { + (None, None) + }; + (lang, script, region) +} + +fn extract_output( + s: Option<&str>, +) -> Option<( + subtags::Language, + Option, + Option, +)> { + s.map(|s| { + let chunks: Vec<&str> = s.split("-").collect(); + ( + chunks[0].parse().unwrap(), + chunks.get(1).map(|s| s.parse().unwrap()), + chunks.get(2).map(|s| s.parse().unwrap()), + ) + }) +} + +#[test] +fn maximize_test() { + for i in STRINGS { + let chunks = extract_input(i.0); + let result = maximize(chunks.0, chunks.1, chunks.2); + assert_eq!(extract_output(i.1), result); + } +} + +#[test] +fn version_works() { + assert_eq!(CLDR_VERSION, "36"); +} + +#[test] +fn minimize_test() { + let lang = "zh".parse().unwrap(); + let script = "Hant".parse().unwrap(); + let result = minimize(lang, Some(script), None); + assert_eq!(result, Some(extract_input("zh-TW"))); + + let lang = "en".parse().unwrap(); + let script = "Latn".parse().unwrap(); + let region = "US".parse().unwrap(); + let result = minimize(lang, Some(script), Some(region)); + assert_eq!(result, Some(extract_input("en"))); +} -- cgit v1.2.3