From 26a029d407be480d791972afb5975cf62c9360a6 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 19 Apr 2024 02:47:55 +0200 Subject: Adding upstream version 124.0.1. Signed-off-by: Daniel Baumann --- intl/locale/rust/oxilangtag-ffi/src/lib.rs | 126 +++++++++++++++++++++++++++++ 1 file changed, 126 insertions(+) create mode 100644 intl/locale/rust/oxilangtag-ffi/src/lib.rs (limited to 'intl/locale/rust/oxilangtag-ffi/src/lib.rs') diff --git a/intl/locale/rust/oxilangtag-ffi/src/lib.rs b/intl/locale/rust/oxilangtag-ffi/src/lib.rs new file mode 100644 index 0000000000..5a30e9b77f --- /dev/null +++ b/intl/locale/rust/oxilangtag-ffi/src/lib.rs @@ -0,0 +1,126 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +use nsstring::nsACString; +use oxilangtag::LanguageTag; + +pub struct LangTag; // Opaque type for ffi interface. + +/// Parse a string as a BCP47 language tag. Returns a `LangTag` object if the string is +/// successfully parsed; this must be freed with `lang_tag_destroy`. +/// +/// The string `tag` must outlive the `LangTag`. +/// +/// Returns null if `tag` is not a well-formed BCP47 tag (including if it is not +/// valid UTF-8). +#[no_mangle] +pub extern "C" fn lang_tag_new(tag: &nsACString) -> *mut LangTag { + if let Ok(tag_str) = core::str::from_utf8(tag.as_ref()) { + if let Ok(language_tag) = LanguageTag::parse(tag_str) { + return Box::into_raw(Box::new(language_tag)) as *mut LangTag; + } + } + std::ptr::null_mut() +} + +/// Free a `LangTag` instance. +#[no_mangle] +pub extern "C" fn lang_tag_destroy(lang: *mut LangTag) { + if lang.is_null() { + return; + } + let _ = unsafe { Box::from_raw(lang as *mut LanguageTag<&str>) }; +} + +/// Matches an HTML language attribute against a CSS :lang() selector using the +/// "extended filtering" algorithm. +/// The attribute is a BCP47 language tag that was successfully parsed by oxilangtag; +/// the selector is a string that is treated as a language range per RFC 4647. +#[no_mangle] +pub extern "C" fn lang_tag_matches(attribute: *const LangTag, selector: &nsACString) -> bool { + // This should only be called with a pointer that we got from lang_tag_new(). + let lang = unsafe { *(attribute as *const LanguageTag<&str>) }; + + // Our callers guarantee that the selector string is valid UTF-8. + let range_str = unsafe { selector.as_str_unchecked() }; + + if lang.is_empty() || range_str.is_empty() { + return false; + } + + // RFC 4647 Extended Filtering: + // https://datatracker.ietf.org/doc/html/rfc4647#section-3.3.2 + + // 1. Split both the extended language range and the language tag being + // compared into a list of subtags by dividing on the hyphen (%x2D) + // character. Two subtags match if either they are the same when + // compared case-insensitively or the language range's subtag is the + // wildcard '*'. + + let mut range_subtags = range_str.split('-'); + let mut lang_subtags = lang.as_str().split('-'); + + // 2. Begin with the first subtag in each list. If the first subtag in + // the range does not match the first subtag in the tag, the overall + // match fails. Otherwise, move to the next subtag in both the + // range and the tag. + + let mut range_subtag = range_subtags.next(); + let mut lang_subtag = lang_subtags.next(); + // Cannot be None, because we checked that both args were non-empty. + assert!(range_subtag.is_some() && lang_subtag.is_some()); + if !(range_subtag.unwrap() == "*" + || range_subtag + .unwrap() + .eq_ignore_ascii_case(lang_subtag.unwrap())) + { + return false; + } + + range_subtag = range_subtags.next(); + lang_subtag = lang_subtags.next(); + + // 3. While there are more subtags left in the language range's list: + loop { + // 4. When the language range's list has no more subtags, the match + // succeeds. + let Some(range_subtag_str) = range_subtag else { + return true; + }; + + // A. If the subtag currently being examined in the range is the + // wildcard ('*'), move to the next subtag in the range and + // continue with the loop. + if range_subtag_str == "*" { + range_subtag = range_subtags.next(); + continue; + } + + // B. Else, if there are no more subtags in the language tag's + // list, the match fails. + let Some(lang_subtag_str) = lang_subtag else { + return false; + }; + + // C. Else, if the current subtag in the range's list matches the + // current subtag in the language tag's list, move to the next + // subtag in both lists and continue with the loop. + if range_subtag_str.eq_ignore_ascii_case(lang_subtag_str) { + range_subtag = range_subtags.next(); + lang_subtag = lang_subtags.next(); + continue; + } + + // D. Else, if the language tag's subtag is a "singleton" (a single + // letter or digit, which includes the private-use subtag 'x') + // the match fails. + if lang_subtag_str.len() == 1 { + return false; + } + + // E. Else, move to the next subtag in the language tag's list and + // continue with the loop. + lang_subtag = lang_subtags.next(); + } +} -- cgit v1.2.3