diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
commit | 26a029d407be480d791972afb5975cf62c9360a6 (patch) | |
tree | f435a8308119effd964b339f76abb83a57c29483 /browser/components/newtab/lib/SiteClassifier.sys.mjs | |
parent | Initial commit. (diff) | |
download | firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip |
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'browser/components/newtab/lib/SiteClassifier.sys.mjs')
-rw-r--r-- | browser/components/newtab/lib/SiteClassifier.sys.mjs | 103 |
1 files changed, 103 insertions, 0 deletions
diff --git a/browser/components/newtab/lib/SiteClassifier.sys.mjs b/browser/components/newtab/lib/SiteClassifier.sys.mjs new file mode 100644 index 0000000000..64c7309bf5 --- /dev/null +++ b/browser/components/newtab/lib/SiteClassifier.sys.mjs @@ -0,0 +1,103 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// We use importESModule here instead of static import so that +// the Karma test environment won't choke on this module. This +// is because the Karma test environment already stubs out +// RemoteSettings, and overrides importESModule to be a no-op (which +// can't be done for a static import statement). + +// eslint-disable-next-line mozilla/use-static-import +const { RemoteSettings } = ChromeUtils.importESModule( + "resource://services-settings/remote-settings.sys.mjs" +); + +// Returns whether the passed in params match the criteria. +// To match, they must contain all the params specified in criteria and the values +// must match if a value is provided in criteria. +function _hasParams(criteria, params) { + for (let param of criteria) { + const val = params.get(param.key); + if ( + val === null || + (param.value && param.value !== val) || + (param.prefix && !val.startsWith(param.prefix)) + ) { + return false; + } + } + return true; +} + +/** + * classifySite + * Classifies a given URL into a category based on classification data from RemoteSettings. + * The data from remote settings can match a category by one of the following: + * - match the exact URL + * - match the hostname or second level domain (sld) + * - match query parameter(s), and optionally their values or prefixes + * - match both (hostname or sld) and query parameter(s) + * + * The data looks like: + * [{ + * "type": "hostname-and-params-match", + * "criteria": [ + * { + * "url": "https://matchurl.com", + * "hostname": "matchhostname.com", + * "sld": "secondleveldomain", + * "params": [ + * { + * "key": "matchparam", + * "value": "matchvalue", + * "prefix": "matchpPrefix", + * }, + * ], + * }, + * ], + * "weight": 300, + * },...] + */ +export async function classifySite(url, RS = RemoteSettings) { + let category = "other"; + let parsedURL; + + // Try to parse the url. + for (let _url of [url, `https://${url}`]) { + try { + parsedURL = new URL(_url); + break; + } catch (e) {} + } + + if (parsedURL) { + // If we parsed successfully, find a match. + const hostname = parsedURL.hostname.replace(/^www\./i, ""); + const params = parsedURL.searchParams; + // NOTE: there will be an initial/default local copy of the data in m-c. + // Therefore, this should never return an empty list []. + const siteTypes = await RS("sites-classification").get(); + const sortedSiteTypes = siteTypes.sort( + (x, y) => (y.weight || 0) - (x.weight || 0) + ); + for (let type of sortedSiteTypes) { + for (let criteria of type.criteria) { + if (criteria.url && criteria.url !== url) { + continue; + } + if (criteria.hostname && criteria.hostname !== hostname) { + continue; + } + if (criteria.sld && criteria.sld !== hostname.split(".")[0]) { + continue; + } + if (criteria.params && !_hasParams(criteria.params, params)) { + continue; + } + return type.type; + } + } + } + return category; +} |