summaryrefslogtreecommitdiffstats
path: root/browser/components/newtab/lib/SiteClassifier.sys.mjs
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
commit26a029d407be480d791972afb5975cf62c9360a6 (patch)
treef435a8308119effd964b339f76abb83a57c29483 /browser/components/newtab/lib/SiteClassifier.sys.mjs
parentInitial commit. (diff)
downloadfirefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz
firefox-26a029d407be480d791972afb5975cf62c9360a6.zip
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'browser/components/newtab/lib/SiteClassifier.sys.mjs')
-rw-r--r--browser/components/newtab/lib/SiteClassifier.sys.mjs103
1 files changed, 103 insertions, 0 deletions
diff --git a/browser/components/newtab/lib/SiteClassifier.sys.mjs b/browser/components/newtab/lib/SiteClassifier.sys.mjs
new file mode 100644
index 0000000000..64c7309bf5
--- /dev/null
+++ b/browser/components/newtab/lib/SiteClassifier.sys.mjs
@@ -0,0 +1,103 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// We use importESModule here instead of static import so that
+// the Karma test environment won't choke on this module. This
+// is because the Karma test environment already stubs out
+// RemoteSettings, and overrides importESModule to be a no-op (which
+// can't be done for a static import statement).
+
+// eslint-disable-next-line mozilla/use-static-import
+const { RemoteSettings } = ChromeUtils.importESModule(
+ "resource://services-settings/remote-settings.sys.mjs"
+);
+
+// Returns whether the passed in params match the criteria.
+// To match, they must contain all the params specified in criteria and the values
+// must match if a value is provided in criteria.
+function _hasParams(criteria, params) {
+ for (let param of criteria) {
+ const val = params.get(param.key);
+ if (
+ val === null ||
+ (param.value && param.value !== val) ||
+ (param.prefix && !val.startsWith(param.prefix))
+ ) {
+ return false;
+ }
+ }
+ return true;
+}
+
+/**
+ * classifySite
+ * Classifies a given URL into a category based on classification data from RemoteSettings.
+ * The data from remote settings can match a category by one of the following:
+ * - match the exact URL
+ * - match the hostname or second level domain (sld)
+ * - match query parameter(s), and optionally their values or prefixes
+ * - match both (hostname or sld) and query parameter(s)
+ *
+ * The data looks like:
+ * [{
+ * "type": "hostname-and-params-match",
+ * "criteria": [
+ * {
+ * "url": "https://matchurl.com",
+ * "hostname": "matchhostname.com",
+ * "sld": "secondleveldomain",
+ * "params": [
+ * {
+ * "key": "matchparam",
+ * "value": "matchvalue",
+ * "prefix": "matchpPrefix",
+ * },
+ * ],
+ * },
+ * ],
+ * "weight": 300,
+ * },...]
+ */
+export async function classifySite(url, RS = RemoteSettings) {
+ let category = "other";
+ let parsedURL;
+
+ // Try to parse the url.
+ for (let _url of [url, `https://${url}`]) {
+ try {
+ parsedURL = new URL(_url);
+ break;
+ } catch (e) {}
+ }
+
+ if (parsedURL) {
+ // If we parsed successfully, find a match.
+ const hostname = parsedURL.hostname.replace(/^www\./i, "");
+ const params = parsedURL.searchParams;
+ // NOTE: there will be an initial/default local copy of the data in m-c.
+ // Therefore, this should never return an empty list [].
+ const siteTypes = await RS("sites-classification").get();
+ const sortedSiteTypes = siteTypes.sort(
+ (x, y) => (y.weight || 0) - (x.weight || 0)
+ );
+ for (let type of sortedSiteTypes) {
+ for (let criteria of type.criteria) {
+ if (criteria.url && criteria.url !== url) {
+ continue;
+ }
+ if (criteria.hostname && criteria.hostname !== hostname) {
+ continue;
+ }
+ if (criteria.sld && criteria.sld !== hostname.split(".")[0]) {
+ continue;
+ }
+ if (criteria.params && !_hasParams(criteria.params, params)) {
+ continue;
+ }
+ return type.type;
+ }
+ }
+ }
+ return category;
+}