8 files changed, 577 insertions, 0 deletions
diff --git a/third_party/rust/fluent-langneg/.cargo-checksum.json b/third_party/rust/fluent-langneg/.cargo-checksum.json
new file mode 100644
index 0000000000..bf0abede56
--- /dev/null
+++ b/third_party/rust/fluent-langneg/.cargo-checksum.json
@@ -0,0 +1 @@
+{"files":{"Cargo.toml":"1b11d8d30fe978704012e27981f8d50a3462319594b54ed2e71eaf85284d61eb","README.md":"a4f17c795725dcb84cdf1e327a61306e82aaa2ca1908c9ea95c0fbe9d53216fd","benches/negotiate.rs":"f14c49d75413fb4b248f8f586c046340d61f0682eb0860db326f1f415e1bceb9","src/accepted_languages.rs":"74fe73bb8c3f36d3b8b85bfdc55731c234c20e92245b0f89eb1e8b68af47c17c","src/lib.rs":"529e3c9810688c3a5d216c977b968a775f83a85c2da90d669f2cfc5eb6c71361","src/negotiate/likely_subtags.rs":"44531e2bbf3a2155771f197f863dffdce403d3e8dd0e1d4f36f7178e52e5a3a3","src/negotiate/mod.rs":"e8aa5ecf08b866d83c957230586cb9c03880473406d7cca28cadf9e883310a15"},"package":"2c4ad0989667548f06ccd0e306ed56b61bd4d35458d54df5ec7587c0e8ed5e94"}
+\ No newline at end of file
diff --git a/third_party/rust/fluent-langneg/Cargo.toml b/third_party/rust/fluent-langneg/Cargo.toml
new file mode 100644
index 0000000000..58aae3c672
--- /dev/null
+++ b/third_party/rust/fluent-langneg/Cargo.toml
@@ -0,0 +1,61 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g., crates.io) dependencies
+#
+# If you believe there's an error in this file please file an
+# issue against the rust-lang/cargo repository. If you're
+# editing this file be aware that the upstream Cargo.toml
+# will likely look very different (and much more reasonable)
+
+[package]
+edition = "2018"
+name = "fluent-langneg"
+version = "0.13.0"
+authors = ["Zibi Braniecki <gandalf@mozilla.com>"]
+include = ["src/**/*", "benches/*.rs", "Cargo.toml", "README.md"]
+description = "A library for language and locale negotiation.\n"
+homepage = "http://projectfluent.org/"
+readme = "README.md"
+categories = ["internationalization", "localization"]
+license = "Apache-2.0"
+repository = "https://github.com/projectfluent/fluent-langneg-rs"
+
+[[bench]]
+name = "negotiate"
+harness = false
+[dependencies.unic-langid]
+version = "0.9"
+[dev-dependencies.criterion]
+version = "0.3"
+
+[dev-dependencies.serde]
+version = "1.0"
+features = ["derive"]
+
+[dev-dependencies.serde_json]
+version = "1.0"
+
+[dev-dependencies.unic-langid]
+version = "0.9"
+features = ["macros"]
+
+[dev-dependencies.unic-locale]
+version = "0.9"
+features = ["macros"]
+
+[features]
+cldr = ["unic-langid/likelysubtags"]
+default = []
+[badges.coveralls]
+branch = "master"
+repository = "projectfluent/fluent-langneg-rs"
+service = "github"
+
+[badges.maintenance]
+status = "actively-developed"
+
+[badges.travis-ci]
+repository = "projectfluent/fluent-langneg-rs"
diff --git a/third_party/rust/fluent-langneg/README.md b/third_party/rust/fluent-langneg/README.md
new file mode 100644
index 0000000000..bdff7649f8
--- /dev/null
+++ b/third_party/rust/fluent-langneg/README.md
@@ -0,0 +1,113 @@
+# Fluent LangNeg
+
+**Fluent LangNeg is a library for language and locale identifier negotiation.**
+
+[![crates.io](http://meritbadge.herokuapp.com/fluent-langneg)](https://crates.io/crates/fluent-langneg)
+[![Build Status](https://travis-ci.org/projectfluent/fluent-langneg-rs.svg?branch=master)](https://travis-ci.org/projectfluent/fluent-langneg-rs)
+[![Coverage Status](https://coveralls.io/repos/github/projectfluent/fluent-langneg-rs/badge.svg?branch=master)](https://coveralls.io/github/projectfluent/fluent-langneg-rs?branch=master)
+
+Introduction
+------------
+
+This is a Rust implementation of fluent-langneg library which is a part of Project Fluent.
+
+The library uses [unic-langid](https://github.com/zbraniecki/unic-locale) and [unic-locale](https://github.com/zbraniecki/unic-locale) to retrieve and operate on Unicode Language and Locale Identifiers.
+The library provides algorithm for negotiating between lists of locales.
+
+Usage
+-----
+
+```rust
+use fluent_langneg::negotiate_languages;
+use fluent_langneg::NegotiationStrategy;
+use fluent_langneg::convert_vec_str_to_langids_lossy;
+use unic_langid::LanguageIdentifier
+
+// Since langid parsing from string is fallible, we'll use a helper
+// function which strips any langids that failed to parse.
+let requested = convert_vec_str_to_langids_lossy(&["de-DE", "fr-FR", "en-US"]);
+let available = convert_vec_str_to_langids_lossy(&["it", "fr", "de-AT", "fr-CA", "en-US"]);
+let default: LanguageIdentifier = "en-US".parse().expect("Parsing langid failed.");
+
+let supported = negotiate_languages(
+  &requested,
+  &available,
+  Some(&default),
+  NegotiationStrategy::Filtering
+);
+
+let expected = convert_vec_str_to_langids_lossy(&["de-AT", "fr", "fr-CA", "en-US"]);
+assert_eq!(supported,
+            expected.iter().map(|t| t.as_ref()).collect::<Vec<&LanguageIdentifier>>());
+```
+
+See [docs.rs][] for more examples.
+
+[docs.rs]: https://docs.rs/fluent-langneg/
+
+Status
+------
+
+The implementation is complete according to fluent-langneg
+corpus of tests, which means that it parses, serializes and negotiates as expected.
+
+The negotiation methods can operate on lists of `LanguageIdentifier` or `Locale`.
+
+The remaining work is on the path to 1.0 is to gain in-field experience of using it,
+add more tests and ensure that bad input is correctly handled.
+
+Compatibility
+-------------
+
+The API is based on [UTS 35][] definition of [Unicode Locale Identifier][] and is aiming to
+parse and serialize all locale identifiers according to that definition.
+
+*Note*: Unicode Locale Identifier is similar, but different, from what [BCP47][] specifies under
+the name Language Tag.
+For most locale management and negotiation needs, the Unicode Locale Identifier used in this crate is likely a better choice,
+but in some case, like HTTP Accepted Headers, you may need the complete BCP47 Language Tag implementation which
+this crate does not provide.
+
+Language negotiation algorithms are custom Project Fluent solutions,
+based on [RFC4647][].
+
+The language negotiation strategies aim to replicate the best-effort matches with
+the most limited amount of data. The algorithm returns reasonable
+results without any database, but the results can be improved with either limited
+or full [CLDR likely-subtags][] database.
+
+The result is a balance chosen for Project Fluent and may differ from other
+implementations of language negotiation algorithms which may choose different
+tradeoffs.
+
+[BCP47]: https://tools.ietf.org/html/bcp47
+[RFC6067]: https://www.ietf.org/rfc/rfc6067.txt
+[UTS 35]: http://www.unicode.org/reports/tr35/#Locale_Extension_Key_and_Type_Data
+[RFC4647]: https://tools.ietf.org/html/rfc4647
+[CLDR likely-subtags]: http://www.unicode.org/cldr/charts/latest/supplemental/likely_subtags.html
+[Unicode Locale Identifier]: (http://unicode.org/reports/tr35/#Identifiers)
+
+Alternatives
+------------
+
+Although Fluent Locale aims to stay close to W3C Accepted Languages, it does not aim
+to implement the full behavior and some aspects of the language negotiation strategy
+recommended by W3C, such as weights, are not a target right now.
+
+For such purposes, [rust-language-tags][] crate seems to be a better choice.
+
+[rust-language-tags]: https://github.com/pyfisch/rust-language-tags
+
+Performance
+-----------
+
+The crate is considered to be fully optimized for production.
+
+
+Develop
+-------
+
+    cargo build
+    cargo test
+    cargo bench
+
diff --git a/third_party/rust/fluent-langneg/benches/negotiate.rs b/third_party/rust/fluent-langneg/benches/negotiate.rs
new file mode 100644
index 0000000000..2ca70d59ec
--- /dev/null
+++ b/third_party/rust/fluent-langneg/benches/negotiate.rs
@@ -0,0 +1,40 @@
+use criterion::criterion_group;
+use criterion::criterion_main;
+use criterion::Criterion;
+
+use fluent_langneg::convert_vec_str_to_langids_lossy;
+use fluent_langneg::negotiate_languages;
+
+use unic_langid::LanguageIdentifier;
+
+#[no_mangle]
+#[inline(never)]
+fn do_negotiate<'a>(
+    requested: &[LanguageIdentifier],
+    available: &'a [LanguageIdentifier],
+) -> Vec<&'a LanguageIdentifier> {
+    negotiate_languages(
+        requested,
+        available,
+        None,
+        fluent_langneg::NegotiationStrategy::Filtering,
+    )
+}
+
+fn negotiate_bench(c: &mut Criterion) {
+    let requested = &["de", "it", "ru"];
+    let available = &[
+        "en-US", "fr", "de", "en-GB", "it", "pl", "ru", "sr-Cyrl", "sr-Latn", "zh-Hant", "zh-Hans",
+        "ja-JP", "he-IL", "de-DE", "de-IT",
+    ];
+
+    let requested = convert_vec_str_to_langids_lossy(requested);
+    let available = convert_vec_str_to_langids_lossy(available);
+
+    c.bench_function("negotiate", |b| {
+        b.iter(|| do_negotiate(&requested, &available))
+    });
+}
+
+criterion_group!(benches, negotiate_bench);
+criterion_main!(benches);
diff --git a/third_party/rust/fluent-langneg/src/accepted_languages.rs b/third_party/rust/fluent-langneg/src/accepted_languages.rs
new file mode 100644
index 0000000000..58cf277703
--- /dev/null
+++ b/third_party/rust/fluent-langneg/src/accepted_languages.rs
@@ -0,0 +1,41 @@
+//! This function parses Accept-Language string into a list of language tags that
+//! can be later passed to language negotiation functions.
+//!
+//! # Example:
+//!
+//! ```
+//! use fluent_langneg::negotiate_languages;
+//! use fluent_langneg::NegotiationStrategy;
+//! use fluent_langneg::parse_accepted_languages;
+//! use fluent_langneg::convert_vec_str_to_langids_lossy;
+//! use unic_langid::LanguageIdentifier;
+//!
+//! let requested = parse_accepted_languages("de-AT;0.9,de-DE;0.8,de;0.7;en-US;0.5");
+//! let available = convert_vec_str_to_langids_lossy(&["fr", "pl", "de", "en-US"]);
+//! let default: LanguageIdentifier = "en-US".parse().expect("Failed to parse a langid.");
+//!
+//! let supported = negotiate_languages(
+//!   &requested,
+//!   &available,
+//!   Some(&default),
+//!   NegotiationStrategy::Filtering
+//! );
+//!
+//! let expected = convert_vec_str_to_langids_lossy(&["de", "en-US"]);
+//! assert_eq!(supported,
+//!            expected.iter().map(|t| t.as_ref()).collect::<Vec<&LanguageIdentifier>>());
+//! ```
+//!
+//! This function ignores the weights associated with the locales, since Fluent Locale
+//! language negotiation only uses the order of locales, not the weights.
+//!
+
+use unic_langid::LanguageIdentifier;
+
+pub fn parse(s: &str) -> Vec<LanguageIdentifier> {
+    s.split(',')
+        .map(|t| t.trim().split(';').nth(0).unwrap())
+        .filter(|t| !t.is_empty())
+        .filter_map(|t| t.parse().ok())
+        .collect()
+}
diff --git a/third_party/rust/fluent-langneg/src/lib.rs b/third_party/rust/fluent-langneg/src/lib.rs
new file mode 100644
index 0000000000..865bfc2758
--- /dev/null
+++ b/third_party/rust/fluent-langneg/src/lib.rs
@@ -0,0 +1,49 @@
+//! fluent-langneg is an API for operating on locales and language tags.
+//! It's part of Project Fluent, a localization framework designed to unleash
+//! the expressive power of the natural language.
+//!
+//! The primary use of fluent-langneg is to parse/modify/serialize language tags
+//! and to perform language negotiation.
+//!
+//! fluent-langneg operates on a subset of [BCP47](http://tools.ietf.org/html/bcp47).
+//! It can parse full BCP47 language tags, and will serialize them back,
+//! but currently only allows for operations on primary subtags and
+//! unicode extension keys.
+//!
+//! In result fluent-langneg is not suited to replace full implementations of
+//! BCP47 like [rust-language-tags](https://github.com/pyfisch/rust-language-tags),
+//! but is arguably a better option for use cases involving operations on
+//! language tags and for language negotiation.
+
+pub mod accepted_languages;
+pub mod negotiate;
+
+pub use accepted_languages::parse as parse_accepted_languages;
+pub use negotiate::negotiate_languages;
+pub use negotiate::NegotiationStrategy;
+
+use unic_langid::{LanguageIdentifier, LanguageIdentifierError};
+
+pub fn convert_vec_str_to_langids<'a, I, J>(
+    input: I,
+) -> Result<Vec<LanguageIdentifier>, LanguageIdentifierError>
+where
+    I: IntoIterator<Item = J>,
+    J: AsRef<[u8]> + 'a,
+{
+    input
+        .into_iter()
+        .map(|s| LanguageIdentifier::from_bytes(s.as_ref()))
+        .collect()
+}
+
+pub fn convert_vec_str_to_langids_lossy<'a, I, J>(input: I) -> Vec<LanguageIdentifier>
+where
+    I: IntoIterator<Item = J>,
+    J: AsRef<[u8]> + 'a,
+{
+    input
+        .into_iter()
+        .filter_map(|t| LanguageIdentifier::from_bytes(t.as_ref()).ok())
+        .collect()
+}
diff --git a/third_party/rust/fluent-langneg/src/negotiate/likely_subtags.rs b/third_party/rust/fluent-langneg/src/negotiate/likely_subtags.rs
new file mode 100644
index 0000000000..60a7b7a525
--- /dev/null
+++ b/third_party/rust/fluent-langneg/src/negotiate/likely_subtags.rs
@@ -0,0 +1,39 @@
+use unic_langid::LanguageIdentifier;
+
+static REGION_MATCHING_KEYS: &[&str] = &[
+    "az", "bg", "cs", "de", "es", "fi", "fr", "hu", "it", "lt", "lv", "nl", "pl", "ro", "ru",
+];
+
+pub trait MockLikelySubtags {
+    fn maximize(&mut self) -> bool;
+}
+
+impl MockLikelySubtags for LanguageIdentifier {
+    fn maximize(&mut self) -> bool {
+        let extended = match self.to_string().as_str() {
+            "en" => "en-Latn-US",
+            "fr" => "fr-Latn-FR",
+            "sr" => "sr-Cyrl-SR",
+            "sr-RU" => "sr-Latn-SR",
+            "az-IR" => "az-Arab-IR",
+            "zh-GB" => "zh-Hant-GB",
+            "zh-US" => "zh-Hant-US",
+            _ => {
+                let lang = self.language;
+
+                for subtag in REGION_MATCHING_KEYS {
+                    if lang == *subtag {
+                        self.region = Some(subtag.parse().unwrap());
+                        return true;
+                    }
+                }
+                return false;
+            }
+        };
+        let langid: LanguageIdentifier = extended.parse().expect("Failed to parse langid.");
+        self.language = langid.language;
+        self.script = langid.script;
+        self.region = langid.region;
+        true
+    }
+}
diff --git a/third_party/rust/fluent-langneg/src/negotiate/mod.rs b/third_party/rust/fluent-langneg/src/negotiate/mod.rs
new file mode 100644
index 0000000000..4b3587fd40
--- /dev/null
+++ b/third_party/rust/fluent-langneg/src/negotiate/mod.rs
@@ -0,0 +1,233 @@
+//! Language Negotiation is a process in which locales from different
+//! sources are filtered and sorted in an effort to produce the best
+//! possible selection of them.
+//!
+//! There are multiple language negotiation strategies, most popular is
+//! described in [RFC4647](https://www.ietf.org/rfc/rfc4647.txt).
+//!
+//! The algorithm is based on the BCP4647 3.3.2 Extended Filtering algorithm,
+//! with several modifications.
+//!
+//! # Example:
+//!
+//! ```
+//! use fluent_langneg::negotiate_languages;
+//! use fluent_langneg::NegotiationStrategy;
+//! use fluent_langneg::convert_vec_str_to_langids_lossy;
+//! use unic_langid::LanguageIdentifier;
+//!
+//! let requested = convert_vec_str_to_langids_lossy(&["pl", "fr", "en-US"]);
+//! let available = convert_vec_str_to_langids_lossy(&["it", "de", "fr", "en-GB", "en_US"]);
+//! let default: LanguageIdentifier = "en-US".parse().expect("Parsing langid failed.");
+//!
+//! let supported = negotiate_languages(
+//!   &requested,
+//!   &available,
+//!   Some(&default),
+//!   NegotiationStrategy::Filtering
+//! );
+//!
+//! let expected = convert_vec_str_to_langids_lossy(&["fr", "en-US", "en-GB"]);
+//! assert_eq!(supported,
+//!            expected.iter().map(|t| t.as_ref()).collect::<Vec<&LanguageIdentifier>>());
+//! ```
+//!
+//! # The exact algorithm is custom, and consists of a 6 level strategy:
+//!
+//! ### 1) Attempt to find an exact match for each requested locale in available locales.
+//!
+//! Example:
+//!
+//! ```text
+//! // [requested] * [available] = [supported]
+//!
+//! ["en-US"] * ["en-US"] = ["en-US"]
+//! ```
+//!
+//! ### 2) Attempt to match a requested locale to an available locale treated as a locale range.
+//!
+//! Example:
+//!
+//! ```text
+//! // [requested] * [available] = [supported]
+//!
+//! ["en-US"] * ["en"] = ["en"]
+//!               ^^
+//!                |-- becomes "en-*-*-*"
+//! ```
+//!
+//! ### 3) Maximize the requested locale to find the best match in available locales.
+//!
+//! This part uses ICU's likelySubtags or similar database.
+//!
+//! Example:
+//!
+//! ```text
+//! // [requested] * [available] = [supported]
+//!
+//! ["en"] * ["en-GB", "en-US"] = ["en-US"]
+//!   ^^       ^^^^^    ^^^^^
+//!    |           |        |
+//!    |           |----------- become "en-*-GB-*" and "en-*-US-*"
+//!    |
+//!    |-- ICU likelySubtags expands it to "en-Latn-US"
+//! ```
+//!
+//! ### 4) Attempt to look up for a different variant of the same locale.
+//!
+//! Example:
+//!
+//! ```text
+//! // [requested] * [available] = [supported]
+//!
+//! ["ja-JP-win"] * ["ja-JP-mac"] = ["ja-JP-mac"]
+//!   ^^^^^^^^^       ^^^^^^^^^
+//!           |               |-- become "ja-*-JP-mac"
+//!           |
+//!           |----------- replace variant with range: "ja-JP-*"
+//! ```
+//!
+//! ### 5) Look up for a maximized version of the requested locale, stripped of the region code.
+//!
+//! Example:
+//!
+//! ```text
+//! // [requested] * [available] = [supported]
+//!
+//! ["en-CA"] * ["en-ZA", "en-US"] = ["en-US", "en-ZA"]
+//!   ^^^^^
+//!       |       ^^^^^    ^^^^^
+//!       |           |        |
+//!       |           |----------- become "en-*-ZA-*" and "en-*-US-*"
+//!       |
+//!       |----------- strip region produces "en", then lookup likelySubtag: "en-Latn-US"
+//! ```
+//!
+//!
+//! ### 6) Attempt to look up for a different region of the same locale.
+//!
+//! Example:
+//!
+//! ```text
+//! // [requested] * [available] = [supported]
+//!
+//! ["en-GB"] * ["en-AU"] = ["en-AU"]
+//!   ^^^^^       ^^^^^
+//!       |           |-- become "en-*-AU-*"
+//!       |
+//!       |----- replace region with range: "en-*"
+//! ```
+//!
+
+use unic_langid::LanguageIdentifier;
+
+#[cfg(not(feature = "cldr"))]
+mod likely_subtags;
+#[cfg(not(feature = "cldr"))]
+use likely_subtags::MockLikelySubtags;
+
+#[derive(PartialEq, Debug, Clone, Copy)]
+pub enum NegotiationStrategy {
+    Filtering,
+    Matching,
+    Lookup,
+}
+
+pub fn filter_matches<'a, R: 'a + AsRef<LanguageIdentifier>, A: 'a + AsRef<LanguageIdentifier>>(
+    requested: &[R],
+    available: &'a [A],
+    strategy: NegotiationStrategy,
+) -> Vec<&'a A> {
+    let mut supported_locales = vec![];
+
+    let mut available_locales: Vec<&A> = available.iter().collect();
+
+    for req in requested {
+        let mut req = req.as_ref().to_owned();
+        macro_rules! test_strategy {
+            ($self_as_range:expr, $other_as_range:expr) => {{
+                let mut match_found = false;
+                available_locales.retain(|locale| {
+                    if strategy != NegotiationStrategy::Filtering && match_found {
+                        return true;
+                    }
+
+                    if locale
+                        .as_ref()
+                        .matches(&req, $self_as_range, $other_as_range)
+                    {
+                        match_found = true;
+                        supported_locales.push(*locale);
+                        return false;
+                    }
+                    true
+                });
+
+                if match_found {
+                    match strategy {
+                        NegotiationStrategy::Filtering => {}
+                        NegotiationStrategy::Matching => continue,
+                        NegotiationStrategy::Lookup => break,
+                    }
+                }
+            }};
+        }
+
+        // 1) Try to find a simple (case-insensitive) string match for the request.
+        test_strategy!(false, false);
+
+        // 2) Try to match against the available locales treated as ranges.
+        test_strategy!(true, false);
+
+        // Per Unicode TR35, 4.4 Locale Matching, we don't add likely subtags to
+        // requested locales, so we'll skip it from the rest of the steps.
+        if req.language.is_empty() {
+            continue;
+        }
+
+        // 3) Try to match against a maximized version of the requested locale
+        if req.maximize() {
+            test_strategy!(true, false);
+        }
+
+        // 4) Try to match against a variant as a range
+        req.clear_variants();
+        test_strategy!(true, true);
+
+        // 5) Try to match against the likely subtag without region
+        req.region = None;
+        if req.maximize() {
+            test_strategy!(true, false);
+        }
+
+        // 6) Try to match against a region as a range
+        req.region = None;
+        test_strategy!(true, true);
+    }
+
+    supported_locales
+}
+
+pub fn negotiate_languages<
+    'a,
+    R: 'a + AsRef<LanguageIdentifier>,
+    A: 'a + AsRef<LanguageIdentifier> + PartialEq,
+>(
+    requested: &[R],
+    available: &'a [A],
+    default: Option<&'a A>,
+    strategy: NegotiationStrategy,
+) -> Vec<&'a A> {
+    let mut supported = filter_matches(requested, available, strategy);
+
+    if let Some(default) = default {
+        if strategy == NegotiationStrategy::Lookup {
+            if supported.is_empty() {
+                supported.push(default);
+            }
+        } else if !supported.contains(&default) {
+            supported.push(default);
+        }
+    }
+    supported
+}