summaryrefslogtreecommitdiffstats
path: root/netwerk/base/rust-helper/src/lib.rs
blob: df6e4af1e4c213c828d033d17aae0aed7fd16470 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

extern crate nserror;
use self::nserror::*;

extern crate nsstring;
use self::nsstring::{nsACString, nsCString};

extern crate thin_vec;
use self::thin_vec::ThinVec;

use std::fs::File;
use std::io::{self, BufRead};
use std::net::Ipv4Addr;

/// HTTP leading whitespace, defined in netwerk/protocol/http/nsHttp.h
static HTTP_LWS: &'static [u8] = &[' ' as u8, '\t' as u8];

/// Trim leading whitespace, trailing whitespace, and quality-value
/// from a token.
fn trim_token(token: &[u8]) -> &[u8] {
    // Trim left whitespace
    let ltrim = token
        .iter()
        .take_while(|c| HTTP_LWS.iter().any(|ws| &ws == c))
        .count();

    // Trim right whitespace
    // remove "; q=..." if present
    let rtrim = token[ltrim..]
        .iter()
        .take_while(|c| **c != (';' as u8) && HTTP_LWS.iter().all(|ws| ws != *c))
        .count();

    &token[ltrim..ltrim + rtrim]
}

#[no_mangle]
/// Allocates an nsACString that contains a ISO 639 language list
/// notated with HTTP "q" values for output with an HTTP Accept-Language
/// header. Previous q values will be stripped because the order of
/// the langs implies the q value. The q values are calculated by dividing
/// 1.0 amongst the number of languages present.
///
/// Ex: passing: "en, ja"
///     returns: "en,ja;q=0.5"
///
///     passing: "en, ja, fr_CA"
///     returns: "en,ja;q=0.7,fr_CA;q=0.3"
pub extern "C" fn rust_prepare_accept_languages<'a, 'b>(
    i_accept_languages: &'a nsACString,
    o_accept_languages: &'b mut nsACString,
) -> nsresult {
    if i_accept_languages.is_empty() {
        return NS_OK;
    }

    let make_tokens = || {
        i_accept_languages
            .split(|c| *c == (',' as u8))
            .map(|token| trim_token(token))
            .filter(|token| token.len() != 0)
    };

    let n = make_tokens().count();

    for (count_n, i_token) in make_tokens().enumerate() {
        // delimiter if not first item
        if count_n != 0 {
            o_accept_languages.append(",");
        }

        let token_pos = o_accept_languages.len();
        o_accept_languages.append(&i_token as &[u8]);

        {
            let o_token = o_accept_languages.to_mut();
            canonicalize_language_tag(&mut o_token[token_pos..]);
        }

        // Divide the quality-values evenly among the languages.
        let q = 1.0 - count_n as f32 / n as f32;

        let u: u32 = ((q + 0.005) * 100.0) as u32;
        // Only display q-value if less than 1.00.
        if u < 100 {
            // With a small number of languages, one decimal place is
            // enough to prevent duplicate q-values.
            // Also, trailing zeroes do not add any information, so
            // they can be removed.
            if n < 10 || u % 10 == 0 {
                let u = (u + 5) / 10;
                o_accept_languages.append(&format!(";q=0.{}", u));
            } else {
                // Values below 10 require zero padding.
                o_accept_languages.append(&format!(";q=0.{:02}", u));
            }
        }
    }

    NS_OK
}

/// Defines a consistent capitalization for a given language string.
///
/// # Arguments
/// * `token` - a narrow char slice describing a language.
///
/// Valid language tags are of the form
/// "*", "fr", "en-US", "es-419", "az-Arab", "x-pig-latin", "man-Nkoo-GN"
///
/// Language tags are defined in the
/// [rfc5646](https://tools.ietf.org/html/rfc5646) spec. According to
/// the spec:
///
/// > At all times, language tags and their subtags, including private
/// > use and extensions, are to be treated as case insensitive: there
/// > exist conventions for the capitalization of some of the subtags,
/// > but these MUST NOT be taken to carry meaning.
///
/// So why is this code even here? See bug 1108183, I guess.
fn canonicalize_language_tag(token: &mut [u8]) {
    for c in token.iter_mut() {
        *c = c.to_ascii_lowercase();
    }

    let sub_tags = token.split_mut(|c| *c == ('-' as u8));
    for (i, sub_tag) in sub_tags.enumerate() {
        if i == 0 {
            // ISO 639-1 language code, like the "en" in "en-US"
            continue;
        }

        match sub_tag.len() {
            // Singleton tag, like "x" or "i". These signify a
            // non-standard language, so we stop capitalizing after
            // these.
            1 => break,
            // ISO 3166-1 Country code, like "US"
            2 => {
                sub_tag[0] = sub_tag[0].to_ascii_uppercase();
                sub_tag[1] = sub_tag[1].to_ascii_uppercase();
            }
            // ISO 15924 script code, like "Nkoo"
            4 => {
                sub_tag[0] = sub_tag[0].to_ascii_uppercase();
            }
            _ => {}
        };
    }
}

#[no_mangle]
pub extern "C" fn rust_net_is_valid_ipv4_addr<'a>(addr: &'a nsACString) -> bool {
    is_valid_ipv4_addr(addr)
}

#[inline]
fn try_apply_digit(current_octet: u8, digit_to_apply: u8) -> Option<u8> {
    current_octet.checked_mul(10)?.checked_add(digit_to_apply)
}

pub fn is_valid_ipv4_addr<'a>(addr: &'a [u8]) -> bool {
    let mut current_octet: Option<u8> = None;
    let mut dots: u8 = 0;
    for c in addr {
        let c = *c as char;
        match c {
            '.' => {
                match current_octet {
                    None => {
                        // starting an octet with a . is not allowed
                        return false;
                    }
                    Some(_) => {
                        dots = dots + 1;
                        current_octet = None;
                    }
                }
            }
            // The character is not a digit
            no_digit if no_digit.to_digit(10).is_none() => {
                return false;
            }
            digit => {
                match current_octet {
                    None => {
                        // Unwrap is sound because it has been checked in the previous arm
                        current_octet = Some(digit.to_digit(10).unwrap() as u8);
                    }
                    Some(octet) => {
                        if let Some(0) = current_octet {
                            // Leading 0 is not allowed
                            return false;
                        }
                        if let Some(applied) =
                            try_apply_digit(octet, digit.to_digit(10).unwrap() as u8)
                        {
                            current_octet = Some(applied);
                        } else {
                            // Multiplication or Addition overflowed
                            return false;
                        }
                    }
                }
            }
        }
    }
    dots == 3 && current_octet.is_some()
}

#[no_mangle]
pub extern "C" fn rust_net_is_valid_ipv6_addr<'a>(addr: &'a nsACString) -> bool {
    is_valid_ipv6_addr(addr)
}

#[inline(always)]
fn fast_is_hex_digit(c: u8) -> bool {
    match c {
        b'0'..=b'9' => true,
        b'a'..=b'f' => true,
        b'A'..=b'F' => true,
        _ => false,
    }
}

pub fn is_valid_ipv6_addr<'a>(addr: &'a [u8]) -> bool {
    let mut double_colon = false;
    let mut colon_before = false;
    let mut digits: u8 = 0;
    let mut blocks: u8 = 0;

    // The smallest ipv6 is unspecified (::)
    // The IP starts with a single colon
    if addr.len() < 2 || addr[0] == b':' && addr[1] != b':' {
        return false;
    }
    //Enumerate with an u8 for cache locality
    for (i, c) in (0u8..).zip(addr) {
        match c {
            maybe_digit if fast_is_hex_digit(*maybe_digit) => {
                // Too many digits in the block
                if digits == 4 {
                    return false;
                }
                colon_before = false;
                digits += 1;
            }
            b':' => {
                // Too many columns
                if double_colon && colon_before || blocks == 8 {
                    return false;
                }
                if !colon_before {
                    if digits != 0 {
                        blocks += 1;
                    }
                    digits = 0;
                    colon_before = true;
                } else if !double_colon {
                    double_colon = true;
                }
            }
            b'.' => {
                // IPv4 from the last block
                if is_valid_ipv4_addr(&addr[(i - digits) as usize..]) {
                    return double_colon && blocks < 6 || !double_colon && blocks == 6;
                }
                return false;
            }
            _ => {
                // Invalid character
                return false;
            }
        }
    }
    if colon_before && !double_colon {
        // The IP ends with a single colon
        return false;
    }
    if digits != 0 {
        blocks += 1;
    }

    double_colon && blocks < 8 || !double_colon && blocks == 8
}

#[no_mangle]
pub extern "C" fn rust_net_is_valid_scheme_char(a_char: u8) -> bool {
    is_valid_scheme_char(a_char)
}

#[no_mangle]
pub extern "C" fn rust_net_is_valid_scheme<'a>(scheme: &'a nsACString) -> bool {
    if scheme.is_empty() {
        return false;
    }

    // first char must be alpha
    if !scheme[0].is_ascii_alphabetic() {
        return false;
    }

    scheme[1..]
        .iter()
        .all(|a_char| is_valid_scheme_char(*a_char))
}

fn is_valid_scheme_char(a_char: u8) -> bool {
    a_char.is_ascii_alphanumeric() || a_char == b'+' || a_char == b'.' || a_char == b'-'
}

pub type ParsingCallback = extern "C" fn(&ThinVec<nsCString>) -> bool;

#[no_mangle]
pub extern "C" fn rust_parse_etc_hosts<'a>(path: &'a nsACString, callback: ParsingCallback) {
    let file = match File::open(&*path.to_utf8()) {
        Ok(file) => io::BufReader::new(file),
        Err(..) => return,
    };

    let mut array = ThinVec::new();
    for line in file.lines() {
        let line = match line {
            Ok(l) => l,
            Err(..) => break,
        };

        let mut iter = line.split('#').next().unwrap().split_whitespace();
        iter.next(); // skip the IP

        array.extend(
            iter.filter(|host| {
                // Make sure it's a valid domain
                let invalid = [
                    '\0', '\t', '\n', '\r', ' ', '#', '%', '/', ':', '?', '@', '[', '\\', ']',
                ];
                host.parse::<Ipv4Addr>().is_err() && !host.contains(&invalid[..])
            })
            .map(nsCString::from),
        );

        // /etc/hosts files can be huge. To make sure we don't block shutdown
        // for every 100 domains that we parse we call the callback passing the
        // domains and see if we should keep parsing.
        if array.len() > 100 {
            let keep_going = callback(&array);
            array.clear();
            if !keep_going {
                break;
            }
        }
    }

    if !array.is_empty() {
        callback(&array);
    }
}