summaryrefslogtreecommitdiffstats
path: root/third_party/rust/fluent-langneg/src
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 09:22:09 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 09:22:09 +0000
commit43a97878ce14b72f0981164f87f2e35e14151312 (patch)
tree620249daf56c0258faa40cbdcf9cfba06de2a846 /third_party/rust/fluent-langneg/src
parentInitial commit. (diff)
downloadfirefox-43a97878ce14b72f0981164f87f2e35e14151312.tar.xz
firefox-43a97878ce14b72f0981164f87f2e35e14151312.zip
Adding upstream version 110.0.1.upstream/110.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/fluent-langneg/src')
-rw-r--r--third_party/rust/fluent-langneg/src/accepted_languages.rs41
-rw-r--r--third_party/rust/fluent-langneg/src/lib.rs49
-rw-r--r--third_party/rust/fluent-langneg/src/negotiate/likely_subtags.rs39
-rw-r--r--third_party/rust/fluent-langneg/src/negotiate/mod.rs233
4 files changed, 362 insertions, 0 deletions
diff --git a/third_party/rust/fluent-langneg/src/accepted_languages.rs b/third_party/rust/fluent-langneg/src/accepted_languages.rs
new file mode 100644
index 0000000000..58cf277703
--- /dev/null
+++ b/third_party/rust/fluent-langneg/src/accepted_languages.rs
@@ -0,0 +1,41 @@
+//! This function parses Accept-Language string into a list of language tags that
+//! can be later passed to language negotiation functions.
+//!
+//! # Example:
+//!
+//! ```
+//! use fluent_langneg::negotiate_languages;
+//! use fluent_langneg::NegotiationStrategy;
+//! use fluent_langneg::parse_accepted_languages;
+//! use fluent_langneg::convert_vec_str_to_langids_lossy;
+//! use unic_langid::LanguageIdentifier;
+//!
+//! let requested = parse_accepted_languages("de-AT;0.9,de-DE;0.8,de;0.7;en-US;0.5");
+//! let available = convert_vec_str_to_langids_lossy(&["fr", "pl", "de", "en-US"]);
+//! let default: LanguageIdentifier = "en-US".parse().expect("Failed to parse a langid.");
+//!
+//! let supported = negotiate_languages(
+//! &requested,
+//! &available,
+//! Some(&default),
+//! NegotiationStrategy::Filtering
+//! );
+//!
+//! let expected = convert_vec_str_to_langids_lossy(&["de", "en-US"]);
+//! assert_eq!(supported,
+//! expected.iter().map(|t| t.as_ref()).collect::<Vec<&LanguageIdentifier>>());
+//! ```
+//!
+//! This function ignores the weights associated with the locales, since Fluent Locale
+//! language negotiation only uses the order of locales, not the weights.
+//!
+
+use unic_langid::LanguageIdentifier;
+
+pub fn parse(s: &str) -> Vec<LanguageIdentifier> {
+ s.split(',')
+ .map(|t| t.trim().split(';').nth(0).unwrap())
+ .filter(|t| !t.is_empty())
+ .filter_map(|t| t.parse().ok())
+ .collect()
+}
diff --git a/third_party/rust/fluent-langneg/src/lib.rs b/third_party/rust/fluent-langneg/src/lib.rs
new file mode 100644
index 0000000000..865bfc2758
--- /dev/null
+++ b/third_party/rust/fluent-langneg/src/lib.rs
@@ -0,0 +1,49 @@
+//! fluent-langneg is an API for operating on locales and language tags.
+//! It's part of Project Fluent, a localization framework designed to unleash
+//! the expressive power of the natural language.
+//!
+//! The primary use of fluent-langneg is to parse/modify/serialize language tags
+//! and to perform language negotiation.
+//!
+//! fluent-langneg operates on a subset of [BCP47](http://tools.ietf.org/html/bcp47).
+//! It can parse full BCP47 language tags, and will serialize them back,
+//! but currently only allows for operations on primary subtags and
+//! unicode extension keys.
+//!
+//! In result fluent-langneg is not suited to replace full implementations of
+//! BCP47 like [rust-language-tags](https://github.com/pyfisch/rust-language-tags),
+//! but is arguably a better option for use cases involving operations on
+//! language tags and for language negotiation.
+
+pub mod accepted_languages;
+pub mod negotiate;
+
+pub use accepted_languages::parse as parse_accepted_languages;
+pub use negotiate::negotiate_languages;
+pub use negotiate::NegotiationStrategy;
+
+use unic_langid::{LanguageIdentifier, LanguageIdentifierError};
+
+pub fn convert_vec_str_to_langids<'a, I, J>(
+ input: I,
+) -> Result<Vec<LanguageIdentifier>, LanguageIdentifierError>
+where
+ I: IntoIterator<Item = J>,
+ J: AsRef<[u8]> + 'a,
+{
+ input
+ .into_iter()
+ .map(|s| LanguageIdentifier::from_bytes(s.as_ref()))
+ .collect()
+}
+
+pub fn convert_vec_str_to_langids_lossy<'a, I, J>(input: I) -> Vec<LanguageIdentifier>
+where
+ I: IntoIterator<Item = J>,
+ J: AsRef<[u8]> + 'a,
+{
+ input
+ .into_iter()
+ .filter_map(|t| LanguageIdentifier::from_bytes(t.as_ref()).ok())
+ .collect()
+}
diff --git a/third_party/rust/fluent-langneg/src/negotiate/likely_subtags.rs b/third_party/rust/fluent-langneg/src/negotiate/likely_subtags.rs
new file mode 100644
index 0000000000..60a7b7a525
--- /dev/null
+++ b/third_party/rust/fluent-langneg/src/negotiate/likely_subtags.rs
@@ -0,0 +1,39 @@
+use unic_langid::LanguageIdentifier;
+
+static REGION_MATCHING_KEYS: &[&str] = &[
+ "az", "bg", "cs", "de", "es", "fi", "fr", "hu", "it", "lt", "lv", "nl", "pl", "ro", "ru",
+];
+
+pub trait MockLikelySubtags {
+ fn maximize(&mut self) -> bool;
+}
+
+impl MockLikelySubtags for LanguageIdentifier {
+ fn maximize(&mut self) -> bool {
+ let extended = match self.to_string().as_str() {
+ "en" => "en-Latn-US",
+ "fr" => "fr-Latn-FR",
+ "sr" => "sr-Cyrl-SR",
+ "sr-RU" => "sr-Latn-SR",
+ "az-IR" => "az-Arab-IR",
+ "zh-GB" => "zh-Hant-GB",
+ "zh-US" => "zh-Hant-US",
+ _ => {
+ let lang = self.language;
+
+ for subtag in REGION_MATCHING_KEYS {
+ if lang == *subtag {
+ self.region = Some(subtag.parse().unwrap());
+ return true;
+ }
+ }
+ return false;
+ }
+ };
+ let langid: LanguageIdentifier = extended.parse().expect("Failed to parse langid.");
+ self.language = langid.language;
+ self.script = langid.script;
+ self.region = langid.region;
+ true
+ }
+}
diff --git a/third_party/rust/fluent-langneg/src/negotiate/mod.rs b/third_party/rust/fluent-langneg/src/negotiate/mod.rs
new file mode 100644
index 0000000000..4b3587fd40
--- /dev/null
+++ b/third_party/rust/fluent-langneg/src/negotiate/mod.rs
@@ -0,0 +1,233 @@
+//! Language Negotiation is a process in which locales from different
+//! sources are filtered and sorted in an effort to produce the best
+//! possible selection of them.
+//!
+//! There are multiple language negotiation strategies, most popular is
+//! described in [RFC4647](https://www.ietf.org/rfc/rfc4647.txt).
+//!
+//! The algorithm is based on the BCP4647 3.3.2 Extended Filtering algorithm,
+//! with several modifications.
+//!
+//! # Example:
+//!
+//! ```
+//! use fluent_langneg::negotiate_languages;
+//! use fluent_langneg::NegotiationStrategy;
+//! use fluent_langneg::convert_vec_str_to_langids_lossy;
+//! use unic_langid::LanguageIdentifier;
+//!
+//! let requested = convert_vec_str_to_langids_lossy(&["pl", "fr", "en-US"]);
+//! let available = convert_vec_str_to_langids_lossy(&["it", "de", "fr", "en-GB", "en_US"]);
+//! let default: LanguageIdentifier = "en-US".parse().expect("Parsing langid failed.");
+//!
+//! let supported = negotiate_languages(
+//! &requested,
+//! &available,
+//! Some(&default),
+//! NegotiationStrategy::Filtering
+//! );
+//!
+//! let expected = convert_vec_str_to_langids_lossy(&["fr", "en-US", "en-GB"]);
+//! assert_eq!(supported,
+//! expected.iter().map(|t| t.as_ref()).collect::<Vec<&LanguageIdentifier>>());
+//! ```
+//!
+//! # The exact algorithm is custom, and consists of a 6 level strategy:
+//!
+//! ### 1) Attempt to find an exact match for each requested locale in available locales.
+//!
+//! Example:
+//!
+//! ```text
+//! // [requested] * [available] = [supported]
+//!
+//! ["en-US"] * ["en-US"] = ["en-US"]
+//! ```
+//!
+//! ### 2) Attempt to match a requested locale to an available locale treated as a locale range.
+//!
+//! Example:
+//!
+//! ```text
+//! // [requested] * [available] = [supported]
+//!
+//! ["en-US"] * ["en"] = ["en"]
+//! ^^
+//! |-- becomes "en-*-*-*"
+//! ```
+//!
+//! ### 3) Maximize the requested locale to find the best match in available locales.
+//!
+//! This part uses ICU's likelySubtags or similar database.
+//!
+//! Example:
+//!
+//! ```text
+//! // [requested] * [available] = [supported]
+//!
+//! ["en"] * ["en-GB", "en-US"] = ["en-US"]
+//! ^^ ^^^^^ ^^^^^
+//! | | |
+//! | |----------- become "en-*-GB-*" and "en-*-US-*"
+//! |
+//! |-- ICU likelySubtags expands it to "en-Latn-US"
+//! ```
+//!
+//! ### 4) Attempt to look up for a different variant of the same locale.
+//!
+//! Example:
+//!
+//! ```text
+//! // [requested] * [available] = [supported]
+//!
+//! ["ja-JP-win"] * ["ja-JP-mac"] = ["ja-JP-mac"]
+//! ^^^^^^^^^ ^^^^^^^^^
+//! | |-- become "ja-*-JP-mac"
+//! |
+//! |----------- replace variant with range: "ja-JP-*"
+//! ```
+//!
+//! ### 5) Look up for a maximized version of the requested locale, stripped of the region code.
+//!
+//! Example:
+//!
+//! ```text
+//! // [requested] * [available] = [supported]
+//!
+//! ["en-CA"] * ["en-ZA", "en-US"] = ["en-US", "en-ZA"]
+//! ^^^^^
+//! | ^^^^^ ^^^^^
+//! | | |
+//! | |----------- become "en-*-ZA-*" and "en-*-US-*"
+//! |
+//! |----------- strip region produces "en", then lookup likelySubtag: "en-Latn-US"
+//! ```
+//!
+//!
+//! ### 6) Attempt to look up for a different region of the same locale.
+//!
+//! Example:
+//!
+//! ```text
+//! // [requested] * [available] = [supported]
+//!
+//! ["en-GB"] * ["en-AU"] = ["en-AU"]
+//! ^^^^^ ^^^^^
+//! | |-- become "en-*-AU-*"
+//! |
+//! |----- replace region with range: "en-*"
+//! ```
+//!
+
+use unic_langid::LanguageIdentifier;
+
+#[cfg(not(feature = "cldr"))]
+mod likely_subtags;
+#[cfg(not(feature = "cldr"))]
+use likely_subtags::MockLikelySubtags;
+
+#[derive(PartialEq, Debug, Clone, Copy)]
+pub enum NegotiationStrategy {
+ Filtering,
+ Matching,
+ Lookup,
+}
+
+pub fn filter_matches<'a, R: 'a + AsRef<LanguageIdentifier>, A: 'a + AsRef<LanguageIdentifier>>(
+ requested: &[R],
+ available: &'a [A],
+ strategy: NegotiationStrategy,
+) -> Vec<&'a A> {
+ let mut supported_locales = vec![];
+
+ let mut available_locales: Vec<&A> = available.iter().collect();
+
+ for req in requested {
+ let mut req = req.as_ref().to_owned();
+ macro_rules! test_strategy {
+ ($self_as_range:expr, $other_as_range:expr) => {{
+ let mut match_found = false;
+ available_locales.retain(|locale| {
+ if strategy != NegotiationStrategy::Filtering && match_found {
+ return true;
+ }
+
+ if locale
+ .as_ref()
+ .matches(&req, $self_as_range, $other_as_range)
+ {
+ match_found = true;
+ supported_locales.push(*locale);
+ return false;
+ }
+ true
+ });
+
+ if match_found {
+ match strategy {
+ NegotiationStrategy::Filtering => {}
+ NegotiationStrategy::Matching => continue,
+ NegotiationStrategy::Lookup => break,
+ }
+ }
+ }};
+ }
+
+ // 1) Try to find a simple (case-insensitive) string match for the request.
+ test_strategy!(false, false);
+
+ // 2) Try to match against the available locales treated as ranges.
+ test_strategy!(true, false);
+
+ // Per Unicode TR35, 4.4 Locale Matching, we don't add likely subtags to
+ // requested locales, so we'll skip it from the rest of the steps.
+ if req.language.is_empty() {
+ continue;
+ }
+
+ // 3) Try to match against a maximized version of the requested locale
+ if req.maximize() {
+ test_strategy!(true, false);
+ }
+
+ // 4) Try to match against a variant as a range
+ req.clear_variants();
+ test_strategy!(true, true);
+
+ // 5) Try to match against the likely subtag without region
+ req.region = None;
+ if req.maximize() {
+ test_strategy!(true, false);
+ }
+
+ // 6) Try to match against a region as a range
+ req.region = None;
+ test_strategy!(true, true);
+ }
+
+ supported_locales
+}
+
+pub fn negotiate_languages<
+ 'a,
+ R: 'a + AsRef<LanguageIdentifier>,
+ A: 'a + AsRef<LanguageIdentifier> + PartialEq,
+>(
+ requested: &[R],
+ available: &'a [A],
+ default: Option<&'a A>,
+ strategy: NegotiationStrategy,
+) -> Vec<&'a A> {
+ let mut supported = filter_matches(requested, available, strategy);
+
+ if let Some(default) = default {
+ if strategy == NegotiationStrategy::Lookup {
+ if supported.is_empty() {
+ supported.push(default);
+ }
+ } else if !supported.contains(&default) {
+ supported.push(default);
+ }
+ }
+ supported
+}