4 files changed, 399 insertions, 0 deletions
diff --git a/netwerk/base/rust-helper/Cargo.toml b/netwerk/base/rust-helper/Cargo.toml
new file mode 100644
index 0000000000..e522dae38b
--- /dev/null
+++ b/netwerk/base/rust-helper/Cargo.toml
@@ -0,0 +1,9 @@
+[package]
+name = "netwerk_helper"
+version = "0.0.1"
+authors = ["Jeff Hemphill <jthemphill@mozilla.com>"]
+
+[dependencies]
+nserror = { path = "../../../xpcom/rust/nserror" }
+nsstring = { path = "../../../xpcom/rust/nsstring" }
+thin-vec = { version = "0.2.1", features = ["gecko-ffi"] }
+\ No newline at end of file
diff --git a/netwerk/base/rust-helper/cbindgen.toml b/netwerk/base/rust-helper/cbindgen.toml
new file mode 100644
index 0000000000..1e5e235576
--- /dev/null
+++ b/netwerk/base/rust-helper/cbindgen.toml
@@ -0,0 +1,18 @@
+header = """/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */"""
+autogen_warning = """/* DO NOT MODIFY THIS MANUALLY! This file was generated using cbindgen. */"""
+include_guard = "mozilla_net_rustHelper_h"
+include_version = true
+braces = "SameLine"
+line_length = 100
+tab_width = 2
+language = "C++"
+namespaces = ["mozilla", "net"]
+includes = ["nsError.h", "nsString.h"]
+
+[export]
+item_types = ["globals", "enums", "structs", "unions", "typedefs", "opaque", "functions", "constants"]
+
+[export.rename]
+"ThinVec" = "nsTArray"
diff --git a/netwerk/base/rust-helper/moz.build b/netwerk/base/rust-helper/moz.build
new file mode 100644
index 0000000000..1f7512ecf9
--- /dev/null
+++ b/netwerk/base/rust-helper/moz.build
@@ -0,0 +1,12 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+if CONFIG["COMPILE_ENVIRONMENT"]:
+    CbindgenHeader("rust_helper.h", inputs=["/netwerk/base/rust-helper"])
+
+    EXPORTS.mozilla.net += [
+        "!rust_helper.h",
+    ]
diff --git a/netwerk/base/rust-helper/src/lib.rs b/netwerk/base/rust-helper/src/lib.rs
new file mode 100644
index 0000000000..df6e4af1e4
--- /dev/null
+++ b/netwerk/base/rust-helper/src/lib.rs
@@ -0,0 +1,360 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+extern crate nserror;
+use self::nserror::*;
+
+extern crate nsstring;
+use self::nsstring::{nsACString, nsCString};
+
+extern crate thin_vec;
+use self::thin_vec::ThinVec;
+
+use std::fs::File;
+use std::io::{self, BufRead};
+use std::net::Ipv4Addr;
+
+/// HTTP leading whitespace, defined in netwerk/protocol/http/nsHttp.h
+static HTTP_LWS: &'static [u8] = &[' ' as u8, '\t' as u8];
+
+/// Trim leading whitespace, trailing whitespace, and quality-value
+/// from a token.
+fn trim_token(token: &[u8]) -> &[u8] {
+    // Trim left whitespace
+    let ltrim = token
+        .iter()
+        .take_while(|c| HTTP_LWS.iter().any(|ws| &ws == c))
+        .count();
+
+    // Trim right whitespace
+    // remove "; q=..." if present
+    let rtrim = token[ltrim..]
+        .iter()
+        .take_while(|c| **c != (';' as u8) && HTTP_LWS.iter().all(|ws| ws != *c))
+        .count();
+
+    &token[ltrim..ltrim + rtrim]
+}
+
+#[no_mangle]
+/// Allocates an nsACString that contains a ISO 639 language list
+/// notated with HTTP "q" values for output with an HTTP Accept-Language
+/// header. Previous q values will be stripped because the order of
+/// the langs implies the q value. The q values are calculated by dividing
+/// 1.0 amongst the number of languages present.
+///
+/// Ex: passing: "en, ja"
+///     returns: "en,ja;q=0.5"
+///
+///     passing: "en, ja, fr_CA"
+///     returns: "en,ja;q=0.7,fr_CA;q=0.3"
+pub extern "C" fn rust_prepare_accept_languages<'a, 'b>(
+    i_accept_languages: &'a nsACString,
+    o_accept_languages: &'b mut nsACString,
+) -> nsresult {
+    if i_accept_languages.is_empty() {
+        return NS_OK;
+    }
+
+    let make_tokens = || {
+        i_accept_languages
+            .split(|c| *c == (',' as u8))
+            .map(|token| trim_token(token))
+            .filter(|token| token.len() != 0)
+    };
+
+    let n = make_tokens().count();
+
+    for (count_n, i_token) in make_tokens().enumerate() {
+        // delimiter if not first item
+        if count_n != 0 {
+            o_accept_languages.append(",");
+        }
+
+        let token_pos = o_accept_languages.len();
+        o_accept_languages.append(&i_token as &[u8]);
+
+        {
+            let o_token = o_accept_languages.to_mut();
+            canonicalize_language_tag(&mut o_token[token_pos..]);
+        }
+
+        // Divide the quality-values evenly among the languages.
+        let q = 1.0 - count_n as f32 / n as f32;
+
+        let u: u32 = ((q + 0.005) * 100.0) as u32;
+        // Only display q-value if less than 1.00.
+        if u < 100 {
+            // With a small number of languages, one decimal place is
+            // enough to prevent duplicate q-values.
+            // Also, trailing zeroes do not add any information, so
+            // they can be removed.
+            if n < 10 || u % 10 == 0 {
+                let u = (u + 5) / 10;
+                o_accept_languages.append(&format!(";q=0.{}", u));
+            } else {
+                // Values below 10 require zero padding.
+                o_accept_languages.append(&format!(";q=0.{:02}", u));
+            }
+        }
+    }
+
+    NS_OK
+}
+
+/// Defines a consistent capitalization for a given language string.
+///
+/// # Arguments
+/// * `token` - a narrow char slice describing a language.
+///
+/// Valid language tags are of the form
+/// "*", "fr", "en-US", "es-419", "az-Arab", "x-pig-latin", "man-Nkoo-GN"
+///
+/// Language tags are defined in the
+/// [rfc5646](https://tools.ietf.org/html/rfc5646) spec. According to
+/// the spec:
+///
+/// > At all times, language tags and their subtags, including private
+/// > use and extensions, are to be treated as case insensitive: there
+/// > exist conventions for the capitalization of some of the subtags,
+/// > but these MUST NOT be taken to carry meaning.
+///
+/// So why is this code even here? See bug 1108183, I guess.
+fn canonicalize_language_tag(token: &mut [u8]) {
+    for c in token.iter_mut() {
+        *c = c.to_ascii_lowercase();
+    }
+
+    let sub_tags = token.split_mut(|c| *c == ('-' as u8));
+    for (i, sub_tag) in sub_tags.enumerate() {
+        if i == 0 {
+            // ISO 639-1 language code, like the "en" in "en-US"
+            continue;
+        }
+
+        match sub_tag.len() {
+            // Singleton tag, like "x" or "i". These signify a
+            // non-standard language, so we stop capitalizing after
+            // these.
+            1 => break,
+            // ISO 3166-1 Country code, like "US"
+            2 => {
+                sub_tag[0] = sub_tag[0].to_ascii_uppercase();
+                sub_tag[1] = sub_tag[1].to_ascii_uppercase();
+            }
+            // ISO 15924 script code, like "Nkoo"
+            4 => {
+                sub_tag[0] = sub_tag[0].to_ascii_uppercase();
+            }
+            _ => {}
+        };
+    }
+}
+
+#[no_mangle]
+pub extern "C" fn rust_net_is_valid_ipv4_addr<'a>(addr: &'a nsACString) -> bool {
+    is_valid_ipv4_addr(addr)
+}
+
+#[inline]
+fn try_apply_digit(current_octet: u8, digit_to_apply: u8) -> Option<u8> {
+    current_octet.checked_mul(10)?.checked_add(digit_to_apply)
+}
+
+pub fn is_valid_ipv4_addr<'a>(addr: &'a [u8]) -> bool {
+    let mut current_octet: Option<u8> = None;
+    let mut dots: u8 = 0;
+    for c in addr {
+        let c = *c as char;
+        match c {
+            '.' => {
+                match current_octet {
+                    None => {
+                        // starting an octet with a . is not allowed
+                        return false;
+                    }
+                    Some(_) => {
+                        dots = dots + 1;
+                        current_octet = None;
+                    }
+                }
+            }
+            // The character is not a digit
+            no_digit if no_digit.to_digit(10).is_none() => {
+                return false;
+            }
+            digit => {
+                match current_octet {
+                    None => {
+                        // Unwrap is sound because it has been checked in the previous arm
+                        current_octet = Some(digit.to_digit(10).unwrap() as u8);
+                    }
+                    Some(octet) => {
+                        if let Some(0) = current_octet {
+                            // Leading 0 is not allowed
+                            return false;
+                        }
+                        if let Some(applied) =
+                            try_apply_digit(octet, digit.to_digit(10).unwrap() as u8)
+                        {
+                            current_octet = Some(applied);
+                        } else {
+                            // Multiplication or Addition overflowed
+                            return false;
+                        }
+                    }
+                }
+            }
+        }
+    }
+    dots == 3 && current_octet.is_some()
+}
+
+#[no_mangle]
+pub extern "C" fn rust_net_is_valid_ipv6_addr<'a>(addr: &'a nsACString) -> bool {
+    is_valid_ipv6_addr(addr)
+}
+
+#[inline(always)]
+fn fast_is_hex_digit(c: u8) -> bool {
+    match c {
+        b'0'..=b'9' => true,
+        b'a'..=b'f' => true,
+        b'A'..=b'F' => true,
+        _ => false,
+    }
+}
+
+pub fn is_valid_ipv6_addr<'a>(addr: &'a [u8]) -> bool {
+    let mut double_colon = false;
+    let mut colon_before = false;
+    let mut digits: u8 = 0;
+    let mut blocks: u8 = 0;
+
+    // The smallest ipv6 is unspecified (::)
+    // The IP starts with a single colon
+    if addr.len() < 2 || addr[0] == b':' && addr[1] != b':' {
+        return false;
+    }
+    //Enumerate with an u8 for cache locality
+    for (i, c) in (0u8..).zip(addr) {
+        match c {
+            maybe_digit if fast_is_hex_digit(*maybe_digit) => {
+                // Too many digits in the block
+                if digits == 4 {
+                    return false;
+                }
+                colon_before = false;
+                digits += 1;
+            }
+            b':' => {
+                // Too many columns
+                if double_colon && colon_before || blocks == 8 {
+                    return false;
+                }
+                if !colon_before {
+                    if digits != 0 {
+                        blocks += 1;
+                    }
+                    digits = 0;
+                    colon_before = true;
+                } else if !double_colon {
+                    double_colon = true;
+                }
+            }
+            b'.' => {
+                // IPv4 from the last block
+                if is_valid_ipv4_addr(&addr[(i - digits) as usize..]) {
+                    return double_colon && blocks < 6 || !double_colon && blocks == 6;
+                }
+                return false;
+            }
+            _ => {
+                // Invalid character
+                return false;
+            }
+        }
+    }
+    if colon_before && !double_colon {
+        // The IP ends with a single colon
+        return false;
+    }
+    if digits != 0 {
+        blocks += 1;
+    }
+
+    double_colon && blocks < 8 || !double_colon && blocks == 8
+}
+
+#[no_mangle]
+pub extern "C" fn rust_net_is_valid_scheme_char(a_char: u8) -> bool {
+    is_valid_scheme_char(a_char)
+}
+
+#[no_mangle]
+pub extern "C" fn rust_net_is_valid_scheme<'a>(scheme: &'a nsACString) -> bool {
+    if scheme.is_empty() {
+        return false;
+    }
+
+    // first char must be alpha
+    if !scheme[0].is_ascii_alphabetic() {
+        return false;
+    }
+
+    scheme[1..]
+        .iter()
+        .all(|a_char| is_valid_scheme_char(*a_char))
+}
+
+fn is_valid_scheme_char(a_char: u8) -> bool {
+    a_char.is_ascii_alphanumeric() || a_char == b'+' || a_char == b'.' || a_char == b'-'
+}
+
+pub type ParsingCallback = extern "C" fn(&ThinVec<nsCString>) -> bool;
+
+#[no_mangle]
+pub extern "C" fn rust_parse_etc_hosts<'a>(path: &'a nsACString, callback: ParsingCallback) {
+    let file = match File::open(&*path.to_utf8()) {
+        Ok(file) => io::BufReader::new(file),
+        Err(..) => return,
+    };
+
+    let mut array = ThinVec::new();
+    for line in file.lines() {
+        let line = match line {
+            Ok(l) => l,
+            Err(..) => break,
+        };
+
+        let mut iter = line.split('#').next().unwrap().split_whitespace();
+        iter.next(); // skip the IP
+
+        array.extend(
+            iter.filter(|host| {
+                // Make sure it's a valid domain
+                let invalid = [
+                    '\0', '\t', '\n', '\r', ' ', '#', '%', '/', ':', '?', '@', '[', '\\', ']',
+                ];
+                host.parse::<Ipv4Addr>().is_err() && !host.contains(&invalid[..])
+            })
+            .map(nsCString::from),
+        );
+
+        // /etc/hosts files can be huge. To make sure we don't block shutdown
+        // for every 100 domains that we parse we call the callback passing the
+        // domains and see if we should keep parsing.
+        if array.len() > 100 {
+            let keep_going = callback(&array);
+            array.clear();
+            if !keep_going {
+                break;
+            }
+        }
+    }
+
+    if !array.is_empty() {
+        callback(&array);
+    }
+}