1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
|
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
"use strict";
const { RemoteSettings } = ChromeUtils.import(
"resource://services-settings/remote-settings.js"
);
// Returns whether the passed in params match the criteria.
// To match, they must contain all the params specified in criteria and the values
// must match if a value is provided in criteria.
function _hasParams(criteria, params) {
for (let param of criteria) {
const val = params.get(param.key);
if (
val === null ||
(param.value && param.value !== val) ||
(param.prefix && !val.startsWith(param.prefix))
) {
return false;
}
}
return true;
}
/**
* classifySite
* Classifies a given URL into a category based on classification data from RemoteSettings.
* The data from remote settings can match a category by one of the following:
* - match the exact URL
* - match the hostname or second level domain (sld)
* - match query parameter(s), and optionally their values or prefixes
* - match both (hostname or sld) and query parameter(s)
*
* The data looks like:
* [{
* "type": "hostname-and-params-match",
* "criteria": [
* {
* "url": "https://matchurl.com",
* "hostname": "matchhostname.com",
* "sld": "secondleveldomain",
* "params": [
* {
* "key": "matchparam",
* "value": "matchvalue",
* "prefix": "matchpPrefix",
* },
* ],
* },
* ],
* "weight": 300,
* },...]
*/
async function classifySite(url, RS = RemoteSettings) {
let category = "other";
let parsedURL;
// Try to parse the url.
for (let _url of [url, `https://${url}`]) {
try {
parsedURL = new URL(_url);
break;
} catch (e) {}
}
if (parsedURL) {
// If we parsed successfully, find a match.
const hostname = parsedURL.hostname.replace(/^www\./i, "");
const params = parsedURL.searchParams;
// NOTE: there will be an initial/default local copy of the data in m-c.
// Therefore, this should never return an empty list [].
const siteTypes = await RS("sites-classification").get();
const sortedSiteTypes = siteTypes.sort(
(x, y) => (y.weight || 0) - (x.weight || 0)
);
for (let type of sortedSiteTypes) {
for (let criteria of type.criteria) {
if (criteria.url && criteria.url !== url) {
continue;
}
if (criteria.hostname && criteria.hostname !== hostname) {
continue;
}
if (criteria.sld && criteria.sld !== hostname.split(".")[0]) {
continue;
}
if (criteria.params && !_hasParams(criteria.params, params)) {
continue;
}
return type.type;
}
}
}
return category;
}
const EXPORTED_SYMBOLS = ["classifySite"];
|