From 40a355a42d4a9444dc753c04c6608dade2f06a23 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 19 Apr 2024 03:13:27 +0200 Subject: Adding upstream version 125.0.1. Signed-off-by: Daniel Baumann --- browser/actors/SearchSERPTelemetryChild.sys.mjs | 484 ++++++++++++++++++------ 1 file changed, 362 insertions(+), 122 deletions(-) (limited to 'browser/actors/SearchSERPTelemetryChild.sys.mjs') diff --git a/browser/actors/SearchSERPTelemetryChild.sys.mjs b/browser/actors/SearchSERPTelemetryChild.sys.mjs index e6187e9e4b..c760f9a19e 100644 --- a/browser/actors/SearchSERPTelemetryChild.sys.mjs +++ b/browser/actors/SearchSERPTelemetryChild.sys.mjs @@ -25,6 +25,10 @@ XPCOMUtils.defineLazyPreferenceGetter( false ); +export const CATEGORIZATION_SETTINGS = { + MAX_DOMAINS_TO_CATEGORIZE: 10, +}; + // Duplicated from SearchSERPTelemetry to avoid loading the module on content // startup. const SEARCH_TELEMETRY_SHARED = { @@ -33,6 +37,22 @@ const SEARCH_TELEMETRY_SHARED = { SPA_LOAD_TIMEOUT: "SearchTelemetry:SPALoadTimeout", }; +/** + * Standard events mapped to the telemetry action. + */ +const EVENT_TYPE_TO_ACTION = { + click: "clicked", +}; + +/** + * A map of object conditions mapped to the condition that should be run when + * an event is triggered. The condition name is referenced in Remote Settings + * under the optional `condition` string for an event listener. + */ +const CONDITIONS = { + keydownEnter: event => event.key == "Enter", +}; + /** * SearchProviders looks after keeping track of the search provider information * received from the main process. @@ -50,7 +70,8 @@ class SearchProviders { * Gets the search provider information for any provider with advert information. * If there is nothing in the cache, it will obtain it from shared data. * - * @returns {object} Returns the search provider information. @see SearchTelemetry.jsm + * @returns {object} Returns the search provider information. + * @see SearchTelemetry.sys.mjs */ get info() { if (this._searchProviderInfo) { @@ -106,6 +127,129 @@ class SearchProviders { } } +/** + * @typedef {object} EventListenerParam + * @property {string} eventType + * The type of event the listener should listen for. If the event type is + * is non-standard, it should correspond to a definition in + * CUSTOM_EVENT_TYPE_TO_DATA that will re-map it to a standard type. TODO + * @property {string} target + * The type of component that was the source of the event. + * @property {string | null} action + * The action that should be reported in telemetry. + */ + +/** + * Provides a way to add listeners to elements, as well as unload them. + */ +class ListenerHelper { + /** + * Adds each event listener in an array of event listeners to each element + * in an array of elements, and sets their unloading. + * + * @param {Array} elements + * DOM elements to add event listeners to. + * @param {Array} eventListenerParams + * The type of event to add the listener to. + * @param {string} target + */ + static addListeners(elements, eventListenerParams, target) { + if (!elements?.length || !eventListenerParams?.length) { + return; + } + + let document = elements[0].ownerGlobal.document; + let callback = documentToEventCallbackMap.get(document); + if (!callback) { + return; + } + + // The map might have entries from previous callers, so we must ensure + // we don't discard existing event listener callbacks. + let removeListenerCallbacks = []; + if (documentToRemoveEventListenersMap.has(document)) { + removeListenerCallbacks = documentToRemoveEventListenersMap.get(document); + } + + for (let params of eventListenerParams) { + let removeListeners = ListenerHelper.addListener( + elements, + params, + target, + callback + ); + removeListenerCallbacks = removeListenerCallbacks.concat(removeListeners); + } + + documentToRemoveEventListenersMap.set(document, removeListenerCallbacks); + } + + /** + * Add an event listener to each element in an array of elements. + * + * @param {Array} elements + * DOM elements to add event listeners to. + * @param {EventListenerParam} eventListenerParam + * @param {string} target + * @param {Function} callback + * @returns {Array} Array of remove event listener functions. + */ + static addListener(elements, eventListenerParam, target, callback) { + let { action, eventType, target: customTarget } = eventListenerParam; + + if (customTarget) { + target = customTarget; + } + + if (!action) { + action = EVENT_TYPE_TO_ACTION[eventType]; + if (!action) { + return []; + } + } + + // Some events might have specific conditions we want to check before + // registering an engagement event. + let eventCallback; + if (eventListenerParam.condition) { + if (CONDITIONS[eventListenerParam.condition]) { + let condition = CONDITIONS[eventListenerParam.condition]; + eventCallback = async event => { + let start = Cu.now(); + if (condition(event)) { + callback({ action, target }); + } + ChromeUtils.addProfilerMarker( + "SearchSERPTelemetryChild._eventCallback", + start, + "Call cached function before callback." + ); + }; + } else { + // If a component included a condition, but it wasn't found it is + // due to the fact that it was added in a more recent Firefox version + // than what is provided via search-telemetry-v2. Since the version of + // Firefox the user is using doesn't include this condition, + // we shouldn't add the event. + return []; + } + } else { + eventCallback = () => { + callback({ action, target }); + }; + } + + let removeListenerCallbacks = []; + for (let element of elements) { + element.addEventListener(eventType, eventCallback); + removeListenerCallbacks.push(() => { + element.removeEventListener(eventType, eventCallback); + }); + } + return removeListenerCallbacks; + } +} + /** * Scans SERPs for ad components. */ @@ -252,12 +396,24 @@ class SearchAdImpression { // - For others, map its component type and check visibility. for (let [element, data] of this.#elementToAdDataMap.entries()) { if (data.type == "incontent_searchbox") { + // Bug 1880413: Deprecate hard coding the incontent search box. // If searchbox has child elements, observe those, otherwise // fallback to its parent element. - this.#addEventListenerToElements( - data.childElements.length ? data.childElements : [element], - data.type, - false + let searchElements = data.childElements.length + ? data.childElements + : [element]; + ListenerHelper.addListeners( + searchElements, + [ + { eventType: "click", target: data.type }, + { + eventType: "keydown", + target: data.type, + action: "submitted", + condition: "keydownEnter", + }, + ], + data.type ); continue; } @@ -352,6 +508,12 @@ class SearchAdImpression { if (!href) { return ""; } + + // Avoid extracting or fixing up Javascript URLs. + if (href.startsWith("javascript")) { + return ""; + } + // Hrefs can be relative. if (!href.startsWith("https://") && !href.startsWith("http://")) { href = origin + href; @@ -399,7 +561,19 @@ class SearchAdImpression { }); } if (result.relatedElements?.length) { - this.#addEventListenerToElements(result.relatedElements, result.type); + // Bug 1880413: Deprecate related elements. + // Bottom-up approach with related elements are only used for + // non-link elements related to ads, like carousel arrows. + ListenerHelper.addListeners( + result.relatedElements, + [ + { + action: "expanded", + eventType: "click", + }, + ], + result.type + ); } } } @@ -428,25 +602,60 @@ class SearchAdImpression { component.included.parent.selector ); if (parents.length) { + let eventListeners = component.included.parent.eventListeners; + if (eventListeners?.length) { + ListenerHelper.addListeners(parents, eventListeners, component.type); + } for (let parent of parents) { + // Bug 1880413: Deprecate related elements. + // Top-down related elements are either used for auto-suggested + // elements of a searchbox, or elements on a page which we can't + // find through a bottom up approach but we want an add a listener, + // like carousels with arrows. if (component.included.related?.selector) { - this.#addEventListenerToElements( - parent.querySelectorAll(component.included.related.selector), - component.type + let relatedElements = parent.querySelectorAll( + component.included.related.selector ); + if (relatedElements.length) { + // For the search box, related elements with event listeners are + // auto-suggested terms. For everything else (e.g. carousels) + // they are expanded. + ListenerHelper.addListeners( + relatedElements, + [ + { + action: + component.type == "incontent_searchbox" + ? "submitted" + : "expanded", + eventType: "click", + }, + ], + component.type + ); + } } if (component.included.children) { for (let child of component.included.children) { let childElements = parent.querySelectorAll(child.selector); if (childElements.length) { - this.#recordElementData(parent, { - type: component.type, - childElements: Array.from(childElements), - }); - break; + if (child.eventListeners) { + childElements = Array.from(childElements); + ListenerHelper.addListeners( + childElements, + child.eventListeners, + child.type ?? component.type + ); + } + if (!child.skipCount) { + this.#recordElementData(parent, { + type: component.type, + childElements: Array.from(childElements), + }); + } } } - } else { + } else if (!component.included.parent.skipCount) { this.#recordElementData(parent, { type: component.type, }); @@ -788,105 +997,6 @@ class SearchAdImpression { }); } } - - /** - * Adds a click listener to a specific element. - * - * @param {Array} elements - * DOM elements to add event listeners to. - * @param {string} type - * The component type of the element. - * @param {boolean} isRelated - * Whether the elements input are related to components or are actual - * components. - */ - #addEventListenerToElements(elements, type, isRelated = true) { - if (!elements?.length) { - return; - } - let clickAction = "clicked"; - let keydownEnterAction = "clicked"; - - switch (type) { - case "incontent_searchbox": - keydownEnterAction = "submitted"; - if (isRelated) { - // The related element to incontent_search are autosuggested elements - // which when clicked should cause different action than if the - // searchbox is clicked. - clickAction = "submitted"; - } - break; - case "ad_carousel": - case "refined_search_buttons": - if (isRelated) { - clickAction = "expanded"; - } - break; - } - - let document = elements[0].ownerGlobal.document; - let url = document.documentURI; - let callback = documentToEventCallbackMap.get(document); - - let removeListenerCallbacks = []; - - for (let element of elements) { - let clickCallback = () => { - if (clickAction == "submitted") { - documentToSubmitMap.set(document, true); - } - callback({ - type, - url, - action: clickAction, - }); - }; - element.addEventListener("click", clickCallback); - - let keydownCallback = event => { - if (event.key == "Enter") { - if (keydownEnterAction == "submitted") { - documentToSubmitMap.set(document, true); - } - callback({ - type, - url, - action: keydownEnterAction, - }); - } - }; - element.addEventListener("keydown", keydownCallback); - - removeListenerCallbacks.push(() => { - element.removeEventListener("click", clickCallback); - element.removeEventListener("keydown", keydownCallback); - }); - } - - document.ownerGlobal.addEventListener( - "pagehide", - () => { - let callbacks = documentToRemoveEventListenersMap.get(document); - if (callbacks) { - for (let removeEventListenerCallback of callbacks) { - removeEventListenerCallback(); - } - documentToRemoveEventListenersMap.delete(document); - } - }, - { once: true } - ); - - // The map might have entries from previous callers, so we must ensure - // we don't discard existing event listener callbacks. - if (documentToRemoveEventListenersMap.has(document)) { - let callbacks = documentToRemoveEventListenersMap.get(document); - removeListenerCallbacks = removeListenerCallbacks.concat(callbacks); - } - - documentToRemoveEventListenersMap.set(document, removeListenerCallbacks); - } } /** @@ -899,7 +1009,7 @@ class SearchAdImpression { * page that contain domains we want to extract. * @property {string} method * A string representing which domain extraction heuristic to use. - * One of: "href" or "data-attribute". + * One of: "href", "dataAttribute" or "textContent". * @property {object | null} options * Options related to the domain extraction heuristic used. * @property {string | null} options.dataAttributeKey @@ -922,10 +1032,12 @@ class DomainExtractor { * The document for the SERP we are extracting domains from. * @param {Array} extractorInfos * Information used to target the domains we need to extract. + * @param {string} providerName + * Name of the search provider. * @return {Set} * A set of the domains extracted from the page. */ - extractDomainsFromDocument(document, extractorInfos) { + extractDomainsFromDocument(document, extractorInfos, providerName) { let extractedDomains = new Set(); if (!extractorInfos?.length) { return extractedDomains; @@ -948,20 +1060,26 @@ class DomainExtractor { this.#fromElementsConvertHrefsIntoDomains( elements, origin, + providerName, extractedDomains, extractorInfo.options?.queryParamKey, extractorInfo.options?.queryParamValueIsHref ); break; } - case "data-attribute": { + case "dataAttribute": { this.#fromElementsRetrieveDataAttributeValues( elements, + providerName, extractorInfo.options?.dataAttributeKey, extractedDomains ); break; } + case "textContent": { + this.#fromElementsRetrieveTextContent(elements, extractedDomains); + break; + } } } @@ -979,6 +1097,8 @@ class DomainExtractor { * inspect. * @param {string} origin * Origin of the current page. + * @param {string} providerName + * The name of the search provider. * @param {Set} extractedDomains * The result set of domains extracted from the page. * @param {string | null} queryParam @@ -989,11 +1109,16 @@ class DomainExtractor { #fromElementsConvertHrefsIntoDomains( elements, origin, + providerName, extractedDomains, queryParam, queryParamValueIsHref ) { for (let element of elements) { + if (this.#exceedsThreshold(extractedDomains.size)) { + return; + } + let href = element.getAttribute("href"); let url; @@ -1016,12 +1141,16 @@ class DomainExtractor { } catch (e) { continue; } + paramValue = this.#processDomain(paramValue, providerName); } if (paramValue && !extractedDomains.has(paramValue)) { extractedDomains.add(paramValue); } - } else if (url.hostname && !extractedDomains.has(url.hostname)) { - extractedDomains.add(url.hostname); + } else if (url.hostname) { + let processedHostname = this.#processDomain(url.hostname, providerName); + if (processedHostname && !extractedDomains.has(processedHostname)) { + extractedDomains.add(processedHostname); + } } } } @@ -1034,6 +1163,8 @@ class DomainExtractor { * @param {NodeList} elements * A list of elements from the page whose data attributes we want to * inspect. + * @param {string} providerName + * The name of the search provider. * @param {string} attribute * The name of a data attribute to search for within an element. * @param {Set} extractedDomains @@ -1041,16 +1172,113 @@ class DomainExtractor { */ #fromElementsRetrieveDataAttributeValues( elements, + providerName, attribute, extractedDomains ) { for (let element of elements) { + if (this.#exceedsThreshold(extractedDomains.size)) { + return; + } let value = element.dataset[attribute]; + value = this.#processDomain(value, providerName); if (value && !extractedDomains.has(value)) { extractedDomains.add(value); } } } + + /* Given a list of elements, examine the text content for each element, which + * may be 1) a URL from which we can extract a domain or 2) text we can fix + * up to create a best guess as to a URL. If either condition is met, we add + * the domain to the result set. + * + * @param {NodeList} elements + * A list of elements from the page whose text content we want to inspect. + * @param {Set} extractedDomains + * The result set of domains extracted from the page. + */ + #fromElementsRetrieveTextContent(elements, extractedDomains) { + for (let element of elements) { + if (this.#exceedsThreshold(extractedDomains.size)) { + return; + } + let textContent = element.textContent; + if (!textContent) { + continue; + } + + let domain; + try { + domain = new URL(textContent).hostname; + } catch (e) { + domain = textContent.toLowerCase().replaceAll(" ", ""); + // If the attempt to turn the text content into a URL object only fails + // because we're missing a protocol, ".com" may already be present. + if (!domain.endsWith(".com")) { + domain = domain.concat(".com"); + } + } + if (!extractedDomains.has(domain)) { + extractedDomains.add(domain); + } + } + } + + /** + * Processes a raw domain extracted from the SERP into its final form before + * categorization. + * + * @param {string} domain + * The domain extracted from the page. + * @param {string} providerName + * The provider associated with the page. + * @returns {string} + * The domain without any subdomains. + */ + #processDomain(domain, providerName) { + if ( + domain.startsWith(`${providerName}.`) || + domain.includes(`.${providerName}.`) + ) { + return ""; + } + return this.#stripDomainOfSubdomains(domain); + } + + /** + * Helper to strip domains of any subdomains. + * + * @param {string} domain + * The domain to strip of any subdomains. + * @returns {object} browser + * The given domain with any subdomains removed. + */ + #stripDomainOfSubdomains(domain) { + let tld; + // Can throw an exception if the input has too few domain levels. + try { + tld = Services.eTLD.getKnownPublicSuffixFromHost(domain); + } catch (ex) { + return ""; + } + + let domainWithoutTLD = domain.substring(0, domain.length - tld.length); + let secondLevelDomain = domainWithoutTLD.split(".").at(-2); + + return secondLevelDomain ? `${secondLevelDomain}.${tld}` : ""; + } + + /** + * Per a request from Data Science, we need to limit the number of domains + * categorized to 10 non-ad domains and 10 ad domains. + * + * @param {number} nDomains The number of domains processed. + * @returns {boolean} Whether or not the threshold was exceeded. + */ + #exceedsThreshold(nDomains) { + return nDomains >= CATEGORIZATION_SETTINGS.MAX_DOMAINS_TO_CATEGORIZE; + } } export const domainExtractor = new DomainExtractor(); @@ -1149,8 +1377,11 @@ export class SearchSERPTelemetryChild extends JSWindowActorChild { let timerId = Glean.serp.categorizationDuration.start(); let pageActionCallback = info => { + if (info.action == "submitted") { + documentToSubmitMap.set(doc, true); + } this.sendAsyncMessage("SearchTelemetry:Action", { - type: info.type, + target: info.target, url: info.url, action: info.action, }); @@ -1191,11 +1422,13 @@ export class SearchSERPTelemetryChild extends JSWindowActorChild { let start = Cu.now(); let nonAdDomains = domainExtractor.extractDomainsFromDocument( doc, - providerInfo.domainExtraction.nonAds + providerInfo.domainExtraction.nonAds, + providerInfo.telemetryId ); let adDomains = domainExtractor.extractDomainsFromDocument( doc, - providerInfo.domainExtraction.ads + providerInfo.domainExtraction.ads, + providerInfo.telemetryId ); this.sendAsyncMessage("SearchTelemetry:Domains", { @@ -1287,6 +1520,13 @@ export class SearchSERPTelemetryChild extends JSWindowActorChild { break; } case "pagehide": { + let callbacks = documentToRemoveEventListenersMap.get(this.document); + if (callbacks) { + for (let removeEventListenerCallback of callbacks) { + removeEventListenerCallback(); + } + documentToRemoveEventListenersMap.delete(this.document); + } this.#cancelCheck(); break; } -- cgit v1.2.3