diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 17:32:43 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 17:32:43 +0000 |
commit | 6bf0a5cb5034a7e684dcc3500e841785237ce2dd (patch) | |
tree | a68f146d7fa01f0134297619fbe7e33db084e0aa /intl/locale/rust | |
parent | Initial commit. (diff) | |
download | thunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.tar.xz thunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.zip |
Adding upstream version 1:115.7.0.upstream/1%115.7.0upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'intl/locale/rust')
-rw-r--r-- | intl/locale/rust/fluent-langneg-ffi/Cargo.toml | 13 | ||||
-rw-r--r-- | intl/locale/rust/fluent-langneg-ffi/cbindgen.toml | 17 | ||||
-rw-r--r-- | intl/locale/rust/fluent-langneg-ffi/src/lib.rs | 79 | ||||
-rw-r--r-- | intl/locale/rust/oxilangtag-ffi/Cargo.toml | 10 | ||||
-rw-r--r-- | intl/locale/rust/oxilangtag-ffi/cbindgen.toml | 15 | ||||
-rw-r--r-- | intl/locale/rust/oxilangtag-ffi/src/lib.rs | 126 | ||||
-rw-r--r-- | intl/locale/rust/unic-langid-ffi/Cargo.toml | 11 | ||||
-rw-r--r-- | intl/locale/rust/unic-langid-ffi/cbindgen.toml | 22 | ||||
-rw-r--r-- | intl/locale/rust/unic-langid-ffi/src/lib.rs | 168 |
9 files changed, 461 insertions, 0 deletions
diff --git a/intl/locale/rust/fluent-langneg-ffi/Cargo.toml b/intl/locale/rust/fluent-langneg-ffi/Cargo.toml new file mode 100644 index 0000000000..88d6bad4d4 --- /dev/null +++ b/intl/locale/rust/fluent-langneg-ffi/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "fluent-langneg-ffi" +version = "0.1.0" +license = "MPL-2.0" +authors = ["Zibi Braniecki <zibi@braniecki.net>"] +edition = "2018" + +[dependencies] +nsstring = { path = "../../../../xpcom/rust/nsstring" } +thin-vec = { version = "0.2.1", features = ["gecko-ffi"] } +fluent-langneg = { version = "0.13", features = ["cldr"] } +unic-langid = "0.9" +unic-langid-ffi = { path = "../unic-langid-ffi" } diff --git a/intl/locale/rust/fluent-langneg-ffi/cbindgen.toml b/intl/locale/rust/fluent-langneg-ffi/cbindgen.toml new file mode 100644 index 0000000000..98ec15d389 --- /dev/null +++ b/intl/locale/rust/fluent-langneg-ffi/cbindgen.toml @@ -0,0 +1,17 @@ +header = """/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */""" +autogen_warning = """/* DO NOT MODIFY THIS MANUALLY! This file was generated using cbindgen. See RunCbindgen.py */ +#ifndef mozilla_intl_locale_MozLocaleBindings_h +#error "Don't include this file directly, instead include MozLocaleBindings.h" +#endif +""" +include_version = true +braces = "SameLine" +line_length = 100 +tab_width = 2 +language = "C++" +namespaces = ["mozilla", "intl", "ffi"] + +[export.rename] +"ThinVec" = "nsTArray" diff --git a/intl/locale/rust/fluent-langneg-ffi/src/lib.rs b/intl/locale/rust/fluent-langneg-ffi/src/lib.rs new file mode 100644 index 0000000000..591e9ef861 --- /dev/null +++ b/intl/locale/rust/fluent-langneg-ffi/src/lib.rs @@ -0,0 +1,79 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +use fluent_langneg::negotiate::NegotiationStrategy as LangNegNegotiationStrategy; +use fluent_langneg::negotiate_languages; +use nsstring::nsACString; +use nsstring::nsCString; +use thin_vec::ThinVec; +use unic_langid::{LanguageIdentifier, LanguageIdentifierError}; +use unic_langid_ffi::new_langid_for_mozilla; + +/// We want to return the exact strings that were passed to us out of the +/// available and default pool. Since for the negotiation we canonicalize them +/// in `LanguageIdentifier`, this struct will preserve the original, non-canonicalized +/// string, and then use it to populate return array. +#[derive(Debug, PartialEq)] +struct LangIdString<'l> { + pub source: &'l nsCString, + pub langid: LanguageIdentifier, +} + +impl<'l> LangIdString<'l> { + pub fn try_new(s: &'l nsCString) -> Result<Self, LanguageIdentifierError> { + new_langid_for_mozilla(s).map(|l| LangIdString { + source: s, + langid: l, + }) + } +} + +impl<'l> AsRef<LanguageIdentifier> for LangIdString<'l> { + fn as_ref(&self) -> &LanguageIdentifier { + &self.langid + } +} + +#[repr(C)] +pub enum NegotiationStrategy { + Filtering, + Matching, + Lookup, +} + +fn get_strategy(input: NegotiationStrategy) -> LangNegNegotiationStrategy { + match input { + NegotiationStrategy::Filtering => LangNegNegotiationStrategy::Filtering, + NegotiationStrategy::Matching => LangNegNegotiationStrategy::Matching, + NegotiationStrategy::Lookup => LangNegNegotiationStrategy::Lookup, + } +} + +#[no_mangle] +pub extern "C" fn fluent_langneg_negotiate_languages( + requested: &ThinVec<nsCString>, + available: &ThinVec<nsCString>, + default: &nsACString, + strategy: NegotiationStrategy, + result: &mut ThinVec<nsCString>, +) { + let requested = requested + .iter() + .filter_map(|s| new_langid_for_mozilla(s).ok()) + .collect::<Vec<_>>(); + + let available = available + .iter() + .filter_map(|s| LangIdString::try_new(s).ok()) + .collect::<Vec<_>>(); + + let d: nsCString = default.into(); + let default = LangIdString::try_new(&d).ok(); + + let strategy = get_strategy(strategy); + + for l in negotiate_languages(&requested, &available, default.as_ref(), strategy) { + result.push(l.source.clone()); + } +} diff --git a/intl/locale/rust/oxilangtag-ffi/Cargo.toml b/intl/locale/rust/oxilangtag-ffi/Cargo.toml new file mode 100644 index 0000000000..ee3b1cf5c8 --- /dev/null +++ b/intl/locale/rust/oxilangtag-ffi/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "oxilangtag-ffi" +version = "0.1.0" +license = "MPL-2.0" +authors = ["Jonathan Kew <jkew@mozilla.com>"] +edition = "2021" + +[dependencies] +nsstring = { path = "../../../../xpcom/rust/nsstring" } +oxilangtag = "0.1.3" diff --git a/intl/locale/rust/oxilangtag-ffi/cbindgen.toml b/intl/locale/rust/oxilangtag-ffi/cbindgen.toml new file mode 100644 index 0000000000..21d703000b --- /dev/null +++ b/intl/locale/rust/oxilangtag-ffi/cbindgen.toml @@ -0,0 +1,15 @@ +header = """/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */""" +autogen_warning = """/* DO NOT MODIFY THIS MANUALLY! This file was generated using cbindgen. See RunCbindgen.py */ +""" +include_version = true +braces = "SameLine" +line_length = 100 +tab_width = 2 +language = "C++" +namespaces = ["mozilla", "intl", "ffi"] + +[parse] +parse_deps = true +include = ["oxilangtag"] diff --git a/intl/locale/rust/oxilangtag-ffi/src/lib.rs b/intl/locale/rust/oxilangtag-ffi/src/lib.rs new file mode 100644 index 0000000000..5a30e9b77f --- /dev/null +++ b/intl/locale/rust/oxilangtag-ffi/src/lib.rs @@ -0,0 +1,126 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +use nsstring::nsACString; +use oxilangtag::LanguageTag; + +pub struct LangTag; // Opaque type for ffi interface. + +/// Parse a string as a BCP47 language tag. Returns a `LangTag` object if the string is +/// successfully parsed; this must be freed with `lang_tag_destroy`. +/// +/// The string `tag` must outlive the `LangTag`. +/// +/// Returns null if `tag` is not a well-formed BCP47 tag (including if it is not +/// valid UTF-8). +#[no_mangle] +pub extern "C" fn lang_tag_new(tag: &nsACString) -> *mut LangTag { + if let Ok(tag_str) = core::str::from_utf8(tag.as_ref()) { + if let Ok(language_tag) = LanguageTag::parse(tag_str) { + return Box::into_raw(Box::new(language_tag)) as *mut LangTag; + } + } + std::ptr::null_mut() +} + +/// Free a `LangTag` instance. +#[no_mangle] +pub extern "C" fn lang_tag_destroy(lang: *mut LangTag) { + if lang.is_null() { + return; + } + let _ = unsafe { Box::from_raw(lang as *mut LanguageTag<&str>) }; +} + +/// Matches an HTML language attribute against a CSS :lang() selector using the +/// "extended filtering" algorithm. +/// The attribute is a BCP47 language tag that was successfully parsed by oxilangtag; +/// the selector is a string that is treated as a language range per RFC 4647. +#[no_mangle] +pub extern "C" fn lang_tag_matches(attribute: *const LangTag, selector: &nsACString) -> bool { + // This should only be called with a pointer that we got from lang_tag_new(). + let lang = unsafe { *(attribute as *const LanguageTag<&str>) }; + + // Our callers guarantee that the selector string is valid UTF-8. + let range_str = unsafe { selector.as_str_unchecked() }; + + if lang.is_empty() || range_str.is_empty() { + return false; + } + + // RFC 4647 Extended Filtering: + // https://datatracker.ietf.org/doc/html/rfc4647#section-3.3.2 + + // 1. Split both the extended language range and the language tag being + // compared into a list of subtags by dividing on the hyphen (%x2D) + // character. Two subtags match if either they are the same when + // compared case-insensitively or the language range's subtag is the + // wildcard '*'. + + let mut range_subtags = range_str.split('-'); + let mut lang_subtags = lang.as_str().split('-'); + + // 2. Begin with the first subtag in each list. If the first subtag in + // the range does not match the first subtag in the tag, the overall + // match fails. Otherwise, move to the next subtag in both the + // range and the tag. + + let mut range_subtag = range_subtags.next(); + let mut lang_subtag = lang_subtags.next(); + // Cannot be None, because we checked that both args were non-empty. + assert!(range_subtag.is_some() && lang_subtag.is_some()); + if !(range_subtag.unwrap() == "*" + || range_subtag + .unwrap() + .eq_ignore_ascii_case(lang_subtag.unwrap())) + { + return false; + } + + range_subtag = range_subtags.next(); + lang_subtag = lang_subtags.next(); + + // 3. While there are more subtags left in the language range's list: + loop { + // 4. When the language range's list has no more subtags, the match + // succeeds. + let Some(range_subtag_str) = range_subtag else { + return true; + }; + + // A. If the subtag currently being examined in the range is the + // wildcard ('*'), move to the next subtag in the range and + // continue with the loop. + if range_subtag_str == "*" { + range_subtag = range_subtags.next(); + continue; + } + + // B. Else, if there are no more subtags in the language tag's + // list, the match fails. + let Some(lang_subtag_str) = lang_subtag else { + return false; + }; + + // C. Else, if the current subtag in the range's list matches the + // current subtag in the language tag's list, move to the next + // subtag in both lists and continue with the loop. + if range_subtag_str.eq_ignore_ascii_case(lang_subtag_str) { + range_subtag = range_subtags.next(); + lang_subtag = lang_subtags.next(); + continue; + } + + // D. Else, if the language tag's subtag is a "singleton" (a single + // letter or digit, which includes the private-use subtag 'x') + // the match fails. + if lang_subtag_str.len() == 1 { + return false; + } + + // E. Else, move to the next subtag in the language tag's list and + // continue with the loop. + lang_subtag = lang_subtags.next(); + } +} diff --git a/intl/locale/rust/unic-langid-ffi/Cargo.toml b/intl/locale/rust/unic-langid-ffi/Cargo.toml new file mode 100644 index 0000000000..bd969437a6 --- /dev/null +++ b/intl/locale/rust/unic-langid-ffi/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "unic-langid-ffi" +version = "0.1.0" +license = "MPL-2.0" +authors = ["Zibi Braniecki <zibi@braniecki.net>"] +edition = "2018" + +[dependencies] +nsstring = { path = "../../../../xpcom/rust/nsstring" } +thin-vec = { version = "0.2.1", features = ["gecko-ffi"] } +unic-langid = { version = "0.9", features = ["likelysubtags"] } diff --git a/intl/locale/rust/unic-langid-ffi/cbindgen.toml b/intl/locale/rust/unic-langid-ffi/cbindgen.toml new file mode 100644 index 0000000000..3842e5183b --- /dev/null +++ b/intl/locale/rust/unic-langid-ffi/cbindgen.toml @@ -0,0 +1,22 @@ +header = """/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */""" +autogen_warning = """/* DO NOT MODIFY THIS MANUALLY! This file was generated using cbindgen. See RunCbindgen.py */ +#ifndef mozilla_intl_locale_MozLocaleBindings_h +#error "Don't include this file directly, instead include MozLocaleBindings.h" +#endif +""" +include_version = true +braces = "SameLine" +line_length = 100 +tab_width = 2 +language = "C++" +namespaces = ["mozilla", "intl", "ffi"] + +[parse] +parse_deps = true +include = ["unic-langid", "unic-langid-impl"] + +[export.rename] +"ThinVec" = "nsTArray" +"nsCStr" = "nsDependentCSubstring" diff --git a/intl/locale/rust/unic-langid-ffi/src/lib.rs b/intl/locale/rust/unic-langid-ffi/src/lib.rs new file mode 100644 index 0000000000..804a4341e1 --- /dev/null +++ b/intl/locale/rust/unic-langid-ffi/src/lib.rs @@ -0,0 +1,168 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +use nsstring::{nsACString, nsCStr, nsCString}; +use thin_vec::ThinVec; +pub use unic_langid::{subtags, CharacterDirection, LanguageIdentifier, LanguageIdentifierError}; + +pub fn new_langid_for_mozilla( + name: &nsACString, +) -> Result<LanguageIdentifier, LanguageIdentifierError> { + if name.eq_ignore_ascii_case(b"ja-jp-mac") { + "ja-JP-macos".parse() + } else { + // Cut out any `.FOO` like `en-US.POSIX`. + let mut name: &[u8] = name.as_ref(); + if let Some(ptr) = name.iter().position(|b| b == &b'.') { + name = &name[..ptr]; + } + LanguageIdentifier::from_bytes(name) + } +} + +#[no_mangle] +pub extern "C" fn unic_langid_canonicalize(name: &mut nsACString) -> bool { + let langid = new_langid_for_mozilla(name); + + let result = langid.is_ok(); + + name.assign(&langid.unwrap_or_default().to_string()); + + result +} + +#[no_mangle] +pub extern "C" fn unic_langid_new( + name: &nsACString, + ret_val: &mut bool, +) -> *mut LanguageIdentifier { + let langid = new_langid_for_mozilla(name); + + *ret_val = langid.is_ok(); + Box::into_raw(Box::new(langid.unwrap_or_default())) +} + +#[no_mangle] +pub unsafe extern "C" fn unic_langid_destroy(langid: *mut LanguageIdentifier) { + let _ = Box::from_raw(langid); +} + +#[no_mangle] +pub extern "C" fn unic_langid_as_string(langid: &mut LanguageIdentifier, ret_val: &mut nsACString) { + ret_val.assign(&langid.to_string()); +} + +#[no_mangle] +pub extern "C" fn unic_langid_get_language<'a>( + langid: &'a LanguageIdentifier, + out: &mut nsCStr<'a>, +) { + *out = nsCStr::from(langid.language.as_str()); +} + +#[no_mangle] +pub extern "C" fn unic_langid_set_language( + langid: &mut LanguageIdentifier, + string: &nsACString, +) -> bool { + subtags::Language::from_bytes(string) + .map(|lang| langid.language = lang) + .is_ok() +} + +#[no_mangle] +pub extern "C" fn unic_langid_clear_language(langid: &mut LanguageIdentifier) { + langid.language.clear() +} + +#[no_mangle] +pub extern "C" fn unic_langid_get_script<'a>(langid: &'a LanguageIdentifier, out: &mut nsCStr<'a>) { + *out = nsCStr::from(langid.script.as_ref().map_or("", |s| s.as_str())); +} + +#[no_mangle] +pub extern "C" fn unic_langid_set_script( + langid: &mut LanguageIdentifier, + string: &nsACString, +) -> bool { + subtags::Script::from_bytes(string) + .map(|script| langid.script = Some(script)) + .is_ok() +} + +#[no_mangle] +pub extern "C" fn unic_langid_clear_script(langid: &mut LanguageIdentifier) { + langid.script = None; +} + +#[no_mangle] +pub extern "C" fn unic_langid_get_region<'a>(langid: &'a LanguageIdentifier, out: &mut nsCStr<'a>) { + *out = nsCStr::from(langid.region.as_ref().map_or("", |s| s.as_str())); +} + +#[no_mangle] +pub extern "C" fn unic_langid_set_region( + langid: &mut LanguageIdentifier, + string: &nsACString, +) -> bool { + subtags::Region::from_bytes(string) + .map(|region| langid.region = Some(region)) + .is_ok() +} + +#[no_mangle] +pub extern "C" fn unic_langid_clear_region(langid: &mut LanguageIdentifier) { + langid.region = None; +} + +#[no_mangle] +pub extern "C" fn unic_langid_get_variants( + langid: &LanguageIdentifier, + variants: &mut ThinVec<nsCString>, +) { + for v in langid.variants() { + variants.push(v.as_str().into()); + } +} + +#[no_mangle] +pub extern "C" fn unic_langid_set_variants( + langid: &mut LanguageIdentifier, + variants: &ThinVec<nsCString>, +) -> bool { + variants + .iter() + .map(|v| subtags::Variant::from_bytes(v)) + .collect::<Result<Vec<_>, _>>() + .map(|variants| langid.set_variants(&variants)) + .is_ok() +} + +#[no_mangle] +pub extern "C" fn unic_langid_clear_variants(langid: &mut LanguageIdentifier) { + langid.clear_variants() +} + +#[no_mangle] +pub extern "C" fn unic_langid_matches( + langid: &LanguageIdentifier, + other: &LanguageIdentifier, + self_as_range: bool, + other_as_range: bool, +) -> bool { + langid.matches(other, self_as_range, other_as_range) +} + +#[no_mangle] +pub extern "C" fn unic_langid_maximize(langid: &mut LanguageIdentifier) -> bool { + langid.maximize() +} + +#[no_mangle] +pub extern "C" fn unic_langid_is_rtl(name: &nsACString) -> bool { + match new_langid_for_mozilla(name) { + Ok(langid) => langid.character_direction() == CharacterDirection::RTL, + Err(_) => false, + } +} |