summaryrefslogtreecommitdiffstats
path: root/browser/components/search/test/browser/telemetry/browser_search_telemetry_domain_categorization_extraction.js
diff options
context:
space:
mode:
Diffstat (limited to 'browser/components/search/test/browser/telemetry/browser_search_telemetry_domain_categorization_extraction.js')
-rw-r--r--browser/components/search/test/browser/telemetry/browser_search_telemetry_domain_categorization_extraction.js263
1 files changed, 263 insertions, 0 deletions
diff --git a/browser/components/search/test/browser/telemetry/browser_search_telemetry_domain_categorization_extraction.js b/browser/components/search/test/browser/telemetry/browser_search_telemetry_domain_categorization_extraction.js
new file mode 100644
index 0000000000..03ddb75481
--- /dev/null
+++ b/browser/components/search/test/browser/telemetry/browser_search_telemetry_domain_categorization_extraction.js
@@ -0,0 +1,263 @@
+/* Any copyright is dedicated to the Public Domain.
+ http://creativecommons.org/publicdomain/zero/1.0/ */
+
+"use strict";
+
+/*
+ * This test ensures we are correctly extracting domains from a SERP.
+ */
+
+ChromeUtils.defineESModuleGetters(this, {
+ SearchUtils: "resource://gre/modules/SearchUtils.sys.mjs",
+});
+
+const TESTS = [
+ {
+ title: "Extract domain from href (absolute URL) - one link.",
+ extractorInfos: [
+ {
+ selectors:
+ '#test1 [data-layout="organic"] a[data-testid="result-title-a"]',
+ method: "href",
+ },
+ ],
+ expectedDomains: ["foobar.com"],
+ },
+ {
+ title: "Extract domain from href (absolute URL) - multiple links.",
+ extractorInfos: [
+ {
+ selectors:
+ '#test2 [data-layout="organic"] a[data-testid="result-title-a"]',
+ method: "href",
+ },
+ ],
+ expectedDomains: ["foo.com", "bar.com", "baz.com", "qux.com"],
+ },
+ {
+ title: "Extract domain from href (relative URL).",
+ extractorInfos: [
+ {
+ selectors:
+ '#test3 [data-layout="organic"] a[data-testid="result-title-a"]',
+ method: "href",
+ },
+ ],
+ expectedDomains: ["example.org"],
+ },
+ {
+ title: "Extract domain from data attribute - one link.",
+ extractorInfos: [
+ {
+ selectors: "#test4 [data-dtld]",
+ method: "data-attribute",
+ options: {
+ dataAttributeKey: "dtld",
+ },
+ },
+ ],
+ expectedDomains: ["www.abc.com"],
+ },
+ {
+ title: "Extract domain from data attribute - multiple links.",
+ extractorInfos: [
+ {
+ selectors: "#test5 [data-dtld]",
+ method: "data-attribute",
+ options: {
+ dataAttributeKey: "dtld",
+ },
+ },
+ ],
+ expectedDomains: [
+ "www.foo.com",
+ "www.bar.com",
+ "www.baz.com",
+ "www.qux.com",
+ ],
+ },
+ {
+ title: "Extract domain from an href's query param value.",
+ extractorInfos: [
+ {
+ selectors:
+ '#test6 .js-carousel-item-title, #test6 [data-layout="ad"] [data-testid="result-title-a"]',
+ method: "href",
+ options: {
+ queryParamKey: "ad_domain",
+ },
+ },
+ ],
+ expectedDomains: ["def.com"],
+ },
+ {
+ title:
+ "Extract domain from an href's query param value containing an href.",
+ extractorInfos: [
+ {
+ selectors: "#test7 a",
+ method: "href",
+ options: {
+ queryParamKey: "ad_domain",
+ queryParamValueIsHref: true,
+ },
+ },
+ ],
+ expectedDomains: ["def.com"],
+ },
+ {
+ title:
+ "The param value contains an invalid href while queryParamValueIsHref enabled.",
+ extractorInfos: [
+ {
+ selectors: "#test8 a",
+ method: "href",
+ options: {
+ queryParamKey: "ad_domain",
+ queryParamValueIsHref: true,
+ },
+ },
+ ],
+ expectedDomains: [],
+ },
+ {
+ title: "Param value is missing from the href.",
+ extractorInfos: [
+ {
+ selectors: "#test9 a",
+ method: "href",
+ options: {
+ queryParamKey: "ad_domain",
+ queryParamValueIsHref: true,
+ },
+ },
+ ],
+ expectedDomains: [],
+ },
+ {
+ title: "Extraction preserves order of domains within the page.",
+ extractorInfos: [
+ {
+ selectors:
+ '#test10 [data-layout="organic"] a[data-testid="result-title-a"]',
+ method: "href",
+ },
+ {
+ selectors: "#test10 [data-dtld]",
+ method: "data-attribute",
+ options: {
+ dataAttributeKey: "dtld",
+ },
+ },
+ {
+ selectors:
+ '#test10 .js-carousel-item-title, #test7 [data-layout="ad"] [data-testid="result-title-a"]',
+ method: "href",
+ options: {
+ queryParamKey: "ad_domain",
+ },
+ },
+ ],
+ expectedDomains: ["foobar.com", "www.abc.com", "def.com"],
+ },
+ {
+ title: "No elements match the selectors.",
+ extractorInfos: [
+ {
+ selectors:
+ '#test11 [data-layout="organic"] a[data-testid="result-title-a"]',
+ method: "href",
+ },
+ ],
+ expectedDomains: [],
+ },
+ {
+ title: "Data attribute is present, but value is missing.",
+ extractorInfos: [
+ {
+ selectors: "#test12 [data-dtld]",
+ method: "data-attribute",
+ options: {
+ dataAttributeKey: "dtld",
+ },
+ },
+ ],
+ expectedDomains: [],
+ },
+ {
+ title: "Query param is present, but value is missing.",
+ extractorInfos: [
+ {
+ selectors: '#test13 [data-layout="ad"] [data-testid="result-title-a"]',
+ method: "href",
+ options: {
+ queryParamKey: "ad_domain",
+ },
+ },
+ ],
+ expectedDomains: [],
+ },
+ {
+ title: "Non-standard URL scheme.",
+ extractorInfos: [
+ {
+ selectors:
+ '#test14 [data-layout="organic"] a[data-testid="result-title-a"]',
+ method: "href",
+ },
+ ],
+ expectedDomains: [],
+ },
+];
+
+add_setup(async function () {
+ await SpecialPowers.pushPrefEnv({
+ set: [
+ ["browser.search.serpEventTelemetry.enabled", true],
+ ["browser.search.serpEventTelemetryCategorization.enabled", true],
+ ],
+ });
+
+ await SearchSERPTelemetry.init();
+
+ registerCleanupFunction(async () => {
+ resetTelemetry();
+ });
+});
+
+add_task(async function test_domain_extraction_heuristics() {
+ resetTelemetry();
+ let url = getSERPUrl("searchTelemetryDomainExtraction.html");
+ info(
+ "Load a sample SERP where domains need to be extracted in different ways."
+ );
+ let tab = await BrowserTestUtils.openNewForegroundTab(gBrowser, url);
+
+ for (let currentTest of TESTS) {
+ if (currentTest.title) {
+ info(currentTest.title);
+ }
+ let expectedDomains = new Set(currentTest.expectedDomains);
+ let actualDomains = await SpecialPowers.spawn(
+ gBrowser.selectedBrowser,
+ [currentTest.extractorInfos],
+ extractorInfos => {
+ const { domainExtractor } = ChromeUtils.importESModule(
+ "resource:///actors/SearchSERPTelemetryChild.sys.mjs"
+ );
+ return domainExtractor.extractDomainsFromDocument(
+ content.document,
+ extractorInfos
+ );
+ }
+ );
+
+ Assert.deepEqual(
+ Array.from(actualDomains),
+ Array.from(expectedDomains),
+ "Domains should have been extracted correctly."
+ );
+ }
+
+ BrowserTestUtils.removeTab(tab);
+});