diff options
Diffstat (limited to 'browser/components/search/test/browser/telemetry/browser_search_telemetry_domain_categorization_extraction.js')
-rw-r--r-- | browser/components/search/test/browser/telemetry/browser_search_telemetry_domain_categorization_extraction.js | 263 |
1 files changed, 263 insertions, 0 deletions
diff --git a/browser/components/search/test/browser/telemetry/browser_search_telemetry_domain_categorization_extraction.js b/browser/components/search/test/browser/telemetry/browser_search_telemetry_domain_categorization_extraction.js new file mode 100644 index 0000000000..03ddb75481 --- /dev/null +++ b/browser/components/search/test/browser/telemetry/browser_search_telemetry_domain_categorization_extraction.js @@ -0,0 +1,263 @@ +/* Any copyright is dedicated to the Public Domain. + http://creativecommons.org/publicdomain/zero/1.0/ */ + +"use strict"; + +/* + * This test ensures we are correctly extracting domains from a SERP. + */ + +ChromeUtils.defineESModuleGetters(this, { + SearchUtils: "resource://gre/modules/SearchUtils.sys.mjs", +}); + +const TESTS = [ + { + title: "Extract domain from href (absolute URL) - one link.", + extractorInfos: [ + { + selectors: + '#test1 [data-layout="organic"] a[data-testid="result-title-a"]', + method: "href", + }, + ], + expectedDomains: ["foobar.com"], + }, + { + title: "Extract domain from href (absolute URL) - multiple links.", + extractorInfos: [ + { + selectors: + '#test2 [data-layout="organic"] a[data-testid="result-title-a"]', + method: "href", + }, + ], + expectedDomains: ["foo.com", "bar.com", "baz.com", "qux.com"], + }, + { + title: "Extract domain from href (relative URL).", + extractorInfos: [ + { + selectors: + '#test3 [data-layout="organic"] a[data-testid="result-title-a"]', + method: "href", + }, + ], + expectedDomains: ["example.org"], + }, + { + title: "Extract domain from data attribute - one link.", + extractorInfos: [ + { + selectors: "#test4 [data-dtld]", + method: "data-attribute", + options: { + dataAttributeKey: "dtld", + }, + }, + ], + expectedDomains: ["www.abc.com"], + }, + { + title: "Extract domain from data attribute - multiple links.", + extractorInfos: [ + { + selectors: "#test5 [data-dtld]", + method: "data-attribute", + options: { + dataAttributeKey: "dtld", + }, + }, + ], + expectedDomains: [ + "www.foo.com", + "www.bar.com", + "www.baz.com", + "www.qux.com", + ], + }, + { + title: "Extract domain from an href's query param value.", + extractorInfos: [ + { + selectors: + '#test6 .js-carousel-item-title, #test6 [data-layout="ad"] [data-testid="result-title-a"]', + method: "href", + options: { + queryParamKey: "ad_domain", + }, + }, + ], + expectedDomains: ["def.com"], + }, + { + title: + "Extract domain from an href's query param value containing an href.", + extractorInfos: [ + { + selectors: "#test7 a", + method: "href", + options: { + queryParamKey: "ad_domain", + queryParamValueIsHref: true, + }, + }, + ], + expectedDomains: ["def.com"], + }, + { + title: + "The param value contains an invalid href while queryParamValueIsHref enabled.", + extractorInfos: [ + { + selectors: "#test8 a", + method: "href", + options: { + queryParamKey: "ad_domain", + queryParamValueIsHref: true, + }, + }, + ], + expectedDomains: [], + }, + { + title: "Param value is missing from the href.", + extractorInfos: [ + { + selectors: "#test9 a", + method: "href", + options: { + queryParamKey: "ad_domain", + queryParamValueIsHref: true, + }, + }, + ], + expectedDomains: [], + }, + { + title: "Extraction preserves order of domains within the page.", + extractorInfos: [ + { + selectors: + '#test10 [data-layout="organic"] a[data-testid="result-title-a"]', + method: "href", + }, + { + selectors: "#test10 [data-dtld]", + method: "data-attribute", + options: { + dataAttributeKey: "dtld", + }, + }, + { + selectors: + '#test10 .js-carousel-item-title, #test7 [data-layout="ad"] [data-testid="result-title-a"]', + method: "href", + options: { + queryParamKey: "ad_domain", + }, + }, + ], + expectedDomains: ["foobar.com", "www.abc.com", "def.com"], + }, + { + title: "No elements match the selectors.", + extractorInfos: [ + { + selectors: + '#test11 [data-layout="organic"] a[data-testid="result-title-a"]', + method: "href", + }, + ], + expectedDomains: [], + }, + { + title: "Data attribute is present, but value is missing.", + extractorInfos: [ + { + selectors: "#test12 [data-dtld]", + method: "data-attribute", + options: { + dataAttributeKey: "dtld", + }, + }, + ], + expectedDomains: [], + }, + { + title: "Query param is present, but value is missing.", + extractorInfos: [ + { + selectors: '#test13 [data-layout="ad"] [data-testid="result-title-a"]', + method: "href", + options: { + queryParamKey: "ad_domain", + }, + }, + ], + expectedDomains: [], + }, + { + title: "Non-standard URL scheme.", + extractorInfos: [ + { + selectors: + '#test14 [data-layout="organic"] a[data-testid="result-title-a"]', + method: "href", + }, + ], + expectedDomains: [], + }, +]; + +add_setup(async function () { + await SpecialPowers.pushPrefEnv({ + set: [ + ["browser.search.serpEventTelemetry.enabled", true], + ["browser.search.serpEventTelemetryCategorization.enabled", true], + ], + }); + + await SearchSERPTelemetry.init(); + + registerCleanupFunction(async () => { + resetTelemetry(); + }); +}); + +add_task(async function test_domain_extraction_heuristics() { + resetTelemetry(); + let url = getSERPUrl("searchTelemetryDomainExtraction.html"); + info( + "Load a sample SERP where domains need to be extracted in different ways." + ); + let tab = await BrowserTestUtils.openNewForegroundTab(gBrowser, url); + + for (let currentTest of TESTS) { + if (currentTest.title) { + info(currentTest.title); + } + let expectedDomains = new Set(currentTest.expectedDomains); + let actualDomains = await SpecialPowers.spawn( + gBrowser.selectedBrowser, + [currentTest.extractorInfos], + extractorInfos => { + const { domainExtractor } = ChromeUtils.importESModule( + "resource:///actors/SearchSERPTelemetryChild.sys.mjs" + ); + return domainExtractor.extractDomainsFromDocument( + content.document, + extractorInfos + ); + } + ); + + Assert.deepEqual( + Array.from(actualDomains), + Array.from(expectedDomains), + "Domains should have been extracted correctly." + ); + } + + BrowserTestUtils.removeTab(tab); +}); |