summaryrefslogtreecommitdiffstats
path: root/browser/actors/SearchSERPTelemetryChild.sys.mjs
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 01:14:29 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 01:14:29 +0000
commitfbaf0bb26397aa498eb9156f06d5a6fe34dd7dd8 (patch)
tree4c1ccaf5486d4f2009f9a338a98a83e886e29c97 /browser/actors/SearchSERPTelemetryChild.sys.mjs
parentReleasing progress-linux version 124.0.1-1~progress7.99u1. (diff)
downloadfirefox-fbaf0bb26397aa498eb9156f06d5a6fe34dd7dd8.tar.xz
firefox-fbaf0bb26397aa498eb9156f06d5a6fe34dd7dd8.zip
Merging upstream version 125.0.1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'browser/actors/SearchSERPTelemetryChild.sys.mjs')
-rw-r--r--browser/actors/SearchSERPTelemetryChild.sys.mjs484
1 files changed, 362 insertions, 122 deletions
diff --git a/browser/actors/SearchSERPTelemetryChild.sys.mjs b/browser/actors/SearchSERPTelemetryChild.sys.mjs
index e6187e9e4b..c760f9a19e 100644
--- a/browser/actors/SearchSERPTelemetryChild.sys.mjs
+++ b/browser/actors/SearchSERPTelemetryChild.sys.mjs
@@ -25,6 +25,10 @@ XPCOMUtils.defineLazyPreferenceGetter(
false
);
+export const CATEGORIZATION_SETTINGS = {
+ MAX_DOMAINS_TO_CATEGORIZE: 10,
+};
+
// Duplicated from SearchSERPTelemetry to avoid loading the module on content
// startup.
const SEARCH_TELEMETRY_SHARED = {
@@ -34,6 +38,22 @@ const SEARCH_TELEMETRY_SHARED = {
};
/**
+ * Standard events mapped to the telemetry action.
+ */
+const EVENT_TYPE_TO_ACTION = {
+ click: "clicked",
+};
+
+/**
+ * A map of object conditions mapped to the condition that should be run when
+ * an event is triggered. The condition name is referenced in Remote Settings
+ * under the optional `condition` string for an event listener.
+ */
+const CONDITIONS = {
+ keydownEnter: event => event.key == "Enter",
+};
+
+/**
* SearchProviders looks after keeping track of the search provider information
* received from the main process.
*
@@ -50,7 +70,8 @@ class SearchProviders {
* Gets the search provider information for any provider with advert information.
* If there is nothing in the cache, it will obtain it from shared data.
*
- * @returns {object} Returns the search provider information. @see SearchTelemetry.jsm
+ * @returns {object} Returns the search provider information.
+ * @see SearchTelemetry.sys.mjs
*/
get info() {
if (this._searchProviderInfo) {
@@ -107,6 +128,129 @@ class SearchProviders {
}
/**
+ * @typedef {object} EventListenerParam
+ * @property {string} eventType
+ * The type of event the listener should listen for. If the event type is
+ * is non-standard, it should correspond to a definition in
+ * CUSTOM_EVENT_TYPE_TO_DATA that will re-map it to a standard type. TODO
+ * @property {string} target
+ * The type of component that was the source of the event.
+ * @property {string | null} action
+ * The action that should be reported in telemetry.
+ */
+
+/**
+ * Provides a way to add listeners to elements, as well as unload them.
+ */
+class ListenerHelper {
+ /**
+ * Adds each event listener in an array of event listeners to each element
+ * in an array of elements, and sets their unloading.
+ *
+ * @param {Array<Element>} elements
+ * DOM elements to add event listeners to.
+ * @param {Array<EventListenerParam>} eventListenerParams
+ * The type of event to add the listener to.
+ * @param {string} target
+ */
+ static addListeners(elements, eventListenerParams, target) {
+ if (!elements?.length || !eventListenerParams?.length) {
+ return;
+ }
+
+ let document = elements[0].ownerGlobal.document;
+ let callback = documentToEventCallbackMap.get(document);
+ if (!callback) {
+ return;
+ }
+
+ // The map might have entries from previous callers, so we must ensure
+ // we don't discard existing event listener callbacks.
+ let removeListenerCallbacks = [];
+ if (documentToRemoveEventListenersMap.has(document)) {
+ removeListenerCallbacks = documentToRemoveEventListenersMap.get(document);
+ }
+
+ for (let params of eventListenerParams) {
+ let removeListeners = ListenerHelper.addListener(
+ elements,
+ params,
+ target,
+ callback
+ );
+ removeListenerCallbacks = removeListenerCallbacks.concat(removeListeners);
+ }
+
+ documentToRemoveEventListenersMap.set(document, removeListenerCallbacks);
+ }
+
+ /**
+ * Add an event listener to each element in an array of elements.
+ *
+ * @param {Array<Element>} elements
+ * DOM elements to add event listeners to.
+ * @param {EventListenerParam} eventListenerParam
+ * @param {string} target
+ * @param {Function} callback
+ * @returns {Array<function>} Array of remove event listener functions.
+ */
+ static addListener(elements, eventListenerParam, target, callback) {
+ let { action, eventType, target: customTarget } = eventListenerParam;
+
+ if (customTarget) {
+ target = customTarget;
+ }
+
+ if (!action) {
+ action = EVENT_TYPE_TO_ACTION[eventType];
+ if (!action) {
+ return [];
+ }
+ }
+
+ // Some events might have specific conditions we want to check before
+ // registering an engagement event.
+ let eventCallback;
+ if (eventListenerParam.condition) {
+ if (CONDITIONS[eventListenerParam.condition]) {
+ let condition = CONDITIONS[eventListenerParam.condition];
+ eventCallback = async event => {
+ let start = Cu.now();
+ if (condition(event)) {
+ callback({ action, target });
+ }
+ ChromeUtils.addProfilerMarker(
+ "SearchSERPTelemetryChild._eventCallback",
+ start,
+ "Call cached function before callback."
+ );
+ };
+ } else {
+ // If a component included a condition, but it wasn't found it is
+ // due to the fact that it was added in a more recent Firefox version
+ // than what is provided via search-telemetry-v2. Since the version of
+ // Firefox the user is using doesn't include this condition,
+ // we shouldn't add the event.
+ return [];
+ }
+ } else {
+ eventCallback = () => {
+ callback({ action, target });
+ };
+ }
+
+ let removeListenerCallbacks = [];
+ for (let element of elements) {
+ element.addEventListener(eventType, eventCallback);
+ removeListenerCallbacks.push(() => {
+ element.removeEventListener(eventType, eventCallback);
+ });
+ }
+ return removeListenerCallbacks;
+ }
+}
+
+/**
* Scans SERPs for ad components.
*/
class SearchAdImpression {
@@ -252,12 +396,24 @@ class SearchAdImpression {
// - For others, map its component type and check visibility.
for (let [element, data] of this.#elementToAdDataMap.entries()) {
if (data.type == "incontent_searchbox") {
+ // Bug 1880413: Deprecate hard coding the incontent search box.
// If searchbox has child elements, observe those, otherwise
// fallback to its parent element.
- this.#addEventListenerToElements(
- data.childElements.length ? data.childElements : [element],
- data.type,
- false
+ let searchElements = data.childElements.length
+ ? data.childElements
+ : [element];
+ ListenerHelper.addListeners(
+ searchElements,
+ [
+ { eventType: "click", target: data.type },
+ {
+ eventType: "keydown",
+ target: data.type,
+ action: "submitted",
+ condition: "keydownEnter",
+ },
+ ],
+ data.type
);
continue;
}
@@ -352,6 +508,12 @@ class SearchAdImpression {
if (!href) {
return "";
}
+
+ // Avoid extracting or fixing up Javascript URLs.
+ if (href.startsWith("javascript")) {
+ return "";
+ }
+
// Hrefs can be relative.
if (!href.startsWith("https://") && !href.startsWith("http://")) {
href = origin + href;
@@ -399,7 +561,19 @@ class SearchAdImpression {
});
}
if (result.relatedElements?.length) {
- this.#addEventListenerToElements(result.relatedElements, result.type);
+ // Bug 1880413: Deprecate related elements.
+ // Bottom-up approach with related elements are only used for
+ // non-link elements related to ads, like carousel arrows.
+ ListenerHelper.addListeners(
+ result.relatedElements,
+ [
+ {
+ action: "expanded",
+ eventType: "click",
+ },
+ ],
+ result.type
+ );
}
}
}
@@ -428,25 +602,60 @@ class SearchAdImpression {
component.included.parent.selector
);
if (parents.length) {
+ let eventListeners = component.included.parent.eventListeners;
+ if (eventListeners?.length) {
+ ListenerHelper.addListeners(parents, eventListeners, component.type);
+ }
for (let parent of parents) {
+ // Bug 1880413: Deprecate related elements.
+ // Top-down related elements are either used for auto-suggested
+ // elements of a searchbox, or elements on a page which we can't
+ // find through a bottom up approach but we want an add a listener,
+ // like carousels with arrows.
if (component.included.related?.selector) {
- this.#addEventListenerToElements(
- parent.querySelectorAll(component.included.related.selector),
- component.type
+ let relatedElements = parent.querySelectorAll(
+ component.included.related.selector
);
+ if (relatedElements.length) {
+ // For the search box, related elements with event listeners are
+ // auto-suggested terms. For everything else (e.g. carousels)
+ // they are expanded.
+ ListenerHelper.addListeners(
+ relatedElements,
+ [
+ {
+ action:
+ component.type == "incontent_searchbox"
+ ? "submitted"
+ : "expanded",
+ eventType: "click",
+ },
+ ],
+ component.type
+ );
+ }
}
if (component.included.children) {
for (let child of component.included.children) {
let childElements = parent.querySelectorAll(child.selector);
if (childElements.length) {
- this.#recordElementData(parent, {
- type: component.type,
- childElements: Array.from(childElements),
- });
- break;
+ if (child.eventListeners) {
+ childElements = Array.from(childElements);
+ ListenerHelper.addListeners(
+ childElements,
+ child.eventListeners,
+ child.type ?? component.type
+ );
+ }
+ if (!child.skipCount) {
+ this.#recordElementData(parent, {
+ type: component.type,
+ childElements: Array.from(childElements),
+ });
+ }
}
}
- } else {
+ } else if (!component.included.parent.skipCount) {
this.#recordElementData(parent, {
type: component.type,
});
@@ -788,105 +997,6 @@ class SearchAdImpression {
});
}
}
-
- /**
- * Adds a click listener to a specific element.
- *
- * @param {Array<Element>} elements
- * DOM elements to add event listeners to.
- * @param {string} type
- * The component type of the element.
- * @param {boolean} isRelated
- * Whether the elements input are related to components or are actual
- * components.
- */
- #addEventListenerToElements(elements, type, isRelated = true) {
- if (!elements?.length) {
- return;
- }
- let clickAction = "clicked";
- let keydownEnterAction = "clicked";
-
- switch (type) {
- case "incontent_searchbox":
- keydownEnterAction = "submitted";
- if (isRelated) {
- // The related element to incontent_search are autosuggested elements
- // which when clicked should cause different action than if the
- // searchbox is clicked.
- clickAction = "submitted";
- }
- break;
- case "ad_carousel":
- case "refined_search_buttons":
- if (isRelated) {
- clickAction = "expanded";
- }
- break;
- }
-
- let document = elements[0].ownerGlobal.document;
- let url = document.documentURI;
- let callback = documentToEventCallbackMap.get(document);
-
- let removeListenerCallbacks = [];
-
- for (let element of elements) {
- let clickCallback = () => {
- if (clickAction == "submitted") {
- documentToSubmitMap.set(document, true);
- }
- callback({
- type,
- url,
- action: clickAction,
- });
- };
- element.addEventListener("click", clickCallback);
-
- let keydownCallback = event => {
- if (event.key == "Enter") {
- if (keydownEnterAction == "submitted") {
- documentToSubmitMap.set(document, true);
- }
- callback({
- type,
- url,
- action: keydownEnterAction,
- });
- }
- };
- element.addEventListener("keydown", keydownCallback);
-
- removeListenerCallbacks.push(() => {
- element.removeEventListener("click", clickCallback);
- element.removeEventListener("keydown", keydownCallback);
- });
- }
-
- document.ownerGlobal.addEventListener(
- "pagehide",
- () => {
- let callbacks = documentToRemoveEventListenersMap.get(document);
- if (callbacks) {
- for (let removeEventListenerCallback of callbacks) {
- removeEventListenerCallback();
- }
- documentToRemoveEventListenersMap.delete(document);
- }
- },
- { once: true }
- );
-
- // The map might have entries from previous callers, so we must ensure
- // we don't discard existing event listener callbacks.
- if (documentToRemoveEventListenersMap.has(document)) {
- let callbacks = documentToRemoveEventListenersMap.get(document);
- removeListenerCallbacks = removeListenerCallbacks.concat(callbacks);
- }
-
- documentToRemoveEventListenersMap.set(document, removeListenerCallbacks);
- }
}
/**
@@ -899,7 +1009,7 @@ class SearchAdImpression {
* page that contain domains we want to extract.
* @property {string} method
* A string representing which domain extraction heuristic to use.
- * One of: "href" or "data-attribute".
+ * One of: "href", "dataAttribute" or "textContent".
* @property {object | null} options
* Options related to the domain extraction heuristic used.
* @property {string | null} options.dataAttributeKey
@@ -922,10 +1032,12 @@ class DomainExtractor {
* The document for the SERP we are extracting domains from.
* @param {Array<ExtractorInfo>} extractorInfos
* Information used to target the domains we need to extract.
+ * @param {string} providerName
+ * Name of the search provider.
* @return {Set<string>}
* A set of the domains extracted from the page.
*/
- extractDomainsFromDocument(document, extractorInfos) {
+ extractDomainsFromDocument(document, extractorInfos, providerName) {
let extractedDomains = new Set();
if (!extractorInfos?.length) {
return extractedDomains;
@@ -948,20 +1060,26 @@ class DomainExtractor {
this.#fromElementsConvertHrefsIntoDomains(
elements,
origin,
+ providerName,
extractedDomains,
extractorInfo.options?.queryParamKey,
extractorInfo.options?.queryParamValueIsHref
);
break;
}
- case "data-attribute": {
+ case "dataAttribute": {
this.#fromElementsRetrieveDataAttributeValues(
elements,
+ providerName,
extractorInfo.options?.dataAttributeKey,
extractedDomains
);
break;
}
+ case "textContent": {
+ this.#fromElementsRetrieveTextContent(elements, extractedDomains);
+ break;
+ }
}
}
@@ -979,6 +1097,8 @@ class DomainExtractor {
* inspect.
* @param {string} origin
* Origin of the current page.
+ * @param {string} providerName
+ * The name of the search provider.
* @param {Set<string>} extractedDomains
* The result set of domains extracted from the page.
* @param {string | null} queryParam
@@ -989,11 +1109,16 @@ class DomainExtractor {
#fromElementsConvertHrefsIntoDomains(
elements,
origin,
+ providerName,
extractedDomains,
queryParam,
queryParamValueIsHref
) {
for (let element of elements) {
+ if (this.#exceedsThreshold(extractedDomains.size)) {
+ return;
+ }
+
let href = element.getAttribute("href");
let url;
@@ -1016,12 +1141,16 @@ class DomainExtractor {
} catch (e) {
continue;
}
+ paramValue = this.#processDomain(paramValue, providerName);
}
if (paramValue && !extractedDomains.has(paramValue)) {
extractedDomains.add(paramValue);
}
- } else if (url.hostname && !extractedDomains.has(url.hostname)) {
- extractedDomains.add(url.hostname);
+ } else if (url.hostname) {
+ let processedHostname = this.#processDomain(url.hostname, providerName);
+ if (processedHostname && !extractedDomains.has(processedHostname)) {
+ extractedDomains.add(processedHostname);
+ }
}
}
}
@@ -1034,6 +1163,8 @@ class DomainExtractor {
* @param {NodeList<Element>} elements
* A list of elements from the page whose data attributes we want to
* inspect.
+ * @param {string} providerName
+ * The name of the search provider.
* @param {string} attribute
* The name of a data attribute to search for within an element.
* @param {Set<string>} extractedDomains
@@ -1041,16 +1172,113 @@ class DomainExtractor {
*/
#fromElementsRetrieveDataAttributeValues(
elements,
+ providerName,
attribute,
extractedDomains
) {
for (let element of elements) {
+ if (this.#exceedsThreshold(extractedDomains.size)) {
+ return;
+ }
let value = element.dataset[attribute];
+ value = this.#processDomain(value, providerName);
if (value && !extractedDomains.has(value)) {
extractedDomains.add(value);
}
}
}
+
+ /* Given a list of elements, examine the text content for each element, which
+ * may be 1) a URL from which we can extract a domain or 2) text we can fix
+ * up to create a best guess as to a URL. If either condition is met, we add
+ * the domain to the result set.
+ *
+ * @param {NodeList<Element>} elements
+ * A list of elements from the page whose text content we want to inspect.
+ * @param {Set<string>} extractedDomains
+ * The result set of domains extracted from the page.
+ */
+ #fromElementsRetrieveTextContent(elements, extractedDomains) {
+ for (let element of elements) {
+ if (this.#exceedsThreshold(extractedDomains.size)) {
+ return;
+ }
+ let textContent = element.textContent;
+ if (!textContent) {
+ continue;
+ }
+
+ let domain;
+ try {
+ domain = new URL(textContent).hostname;
+ } catch (e) {
+ domain = textContent.toLowerCase().replaceAll(" ", "");
+ // If the attempt to turn the text content into a URL object only fails
+ // because we're missing a protocol, ".com" may already be present.
+ if (!domain.endsWith(".com")) {
+ domain = domain.concat(".com");
+ }
+ }
+ if (!extractedDomains.has(domain)) {
+ extractedDomains.add(domain);
+ }
+ }
+ }
+
+ /**
+ * Processes a raw domain extracted from the SERP into its final form before
+ * categorization.
+ *
+ * @param {string} domain
+ * The domain extracted from the page.
+ * @param {string} providerName
+ * The provider associated with the page.
+ * @returns {string}
+ * The domain without any subdomains.
+ */
+ #processDomain(domain, providerName) {
+ if (
+ domain.startsWith(`${providerName}.`) ||
+ domain.includes(`.${providerName}.`)
+ ) {
+ return "";
+ }
+ return this.#stripDomainOfSubdomains(domain);
+ }
+
+ /**
+ * Helper to strip domains of any subdomains.
+ *
+ * @param {string} domain
+ * The domain to strip of any subdomains.
+ * @returns {object} browser
+ * The given domain with any subdomains removed.
+ */
+ #stripDomainOfSubdomains(domain) {
+ let tld;
+ // Can throw an exception if the input has too few domain levels.
+ try {
+ tld = Services.eTLD.getKnownPublicSuffixFromHost(domain);
+ } catch (ex) {
+ return "";
+ }
+
+ let domainWithoutTLD = domain.substring(0, domain.length - tld.length);
+ let secondLevelDomain = domainWithoutTLD.split(".").at(-2);
+
+ return secondLevelDomain ? `${secondLevelDomain}.${tld}` : "";
+ }
+
+ /**
+ * Per a request from Data Science, we need to limit the number of domains
+ * categorized to 10 non-ad domains and 10 ad domains.
+ *
+ * @param {number} nDomains The number of domains processed.
+ * @returns {boolean} Whether or not the threshold was exceeded.
+ */
+ #exceedsThreshold(nDomains) {
+ return nDomains >= CATEGORIZATION_SETTINGS.MAX_DOMAINS_TO_CATEGORIZE;
+ }
}
export const domainExtractor = new DomainExtractor();
@@ -1149,8 +1377,11 @@ export class SearchSERPTelemetryChild extends JSWindowActorChild {
let timerId = Glean.serp.categorizationDuration.start();
let pageActionCallback = info => {
+ if (info.action == "submitted") {
+ documentToSubmitMap.set(doc, true);
+ }
this.sendAsyncMessage("SearchTelemetry:Action", {
- type: info.type,
+ target: info.target,
url: info.url,
action: info.action,
});
@@ -1191,11 +1422,13 @@ export class SearchSERPTelemetryChild extends JSWindowActorChild {
let start = Cu.now();
let nonAdDomains = domainExtractor.extractDomainsFromDocument(
doc,
- providerInfo.domainExtraction.nonAds
+ providerInfo.domainExtraction.nonAds,
+ providerInfo.telemetryId
);
let adDomains = domainExtractor.extractDomainsFromDocument(
doc,
- providerInfo.domainExtraction.ads
+ providerInfo.domainExtraction.ads,
+ providerInfo.telemetryId
);
this.sendAsyncMessage("SearchTelemetry:Domains", {
@@ -1287,6 +1520,13 @@ export class SearchSERPTelemetryChild extends JSWindowActorChild {
break;
}
case "pagehide": {
+ let callbacks = documentToRemoveEventListenersMap.get(this.document);
+ if (callbacks) {
+ for (let removeEventListenerCallback of callbacks) {
+ removeEventListenerCallback();
+ }
+ documentToRemoveEventListenersMap.delete(this.document);
+ }
this.#cancelCheck();
break;
}