summaryrefslogtreecommitdiffstats
path: root/browser/actors/SearchSERPTelemetryChild.sys.mjs
diff options
context:
space:
mode:
Diffstat (limited to 'browser/actors/SearchSERPTelemetryChild.sys.mjs')
-rw-r--r--browser/actors/SearchSERPTelemetryChild.sys.mjs1357
1 files changed, 1357 insertions, 0 deletions
diff --git a/browser/actors/SearchSERPTelemetryChild.sys.mjs b/browser/actors/SearchSERPTelemetryChild.sys.mjs
new file mode 100644
index 0000000000..e6187e9e4b
--- /dev/null
+++ b/browser/actors/SearchSERPTelemetryChild.sys.mjs
@@ -0,0 +1,1357 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+import { XPCOMUtils } from "resource://gre/modules/XPCOMUtils.sys.mjs";
+
+const lazy = {};
+
+ChromeUtils.defineESModuleGetters(lazy, {
+ clearTimeout: "resource://gre/modules/Timer.sys.mjs",
+ setTimeout: "resource://gre/modules/Timer.sys.mjs",
+});
+
+XPCOMUtils.defineLazyPreferenceGetter(
+ lazy,
+ "serpEventsEnabled",
+ "browser.search.serpEventTelemetry.enabled",
+ true
+);
+
+XPCOMUtils.defineLazyPreferenceGetter(
+ lazy,
+ "serpEventTelemetryCategorization",
+ "browser.search.serpEventTelemetryCategorization.enabled",
+ false
+);
+
+// Duplicated from SearchSERPTelemetry to avoid loading the module on content
+// startup.
+const SEARCH_TELEMETRY_SHARED = {
+ PROVIDER_INFO: "SearchTelemetry:ProviderInfo",
+ LOAD_TIMEOUT: "SearchTelemetry:LoadTimeout",
+ SPA_LOAD_TIMEOUT: "SearchTelemetry:SPALoadTimeout",
+};
+
+/**
+ * SearchProviders looks after keeping track of the search provider information
+ * received from the main process.
+ *
+ * It is separate to SearchTelemetryChild so that it is not constructed for each
+ * tab, but once per process.
+ */
+class SearchProviders {
+ constructor() {
+ this._searchProviderInfo = null;
+ Services.cpmm.sharedData.addEventListener("change", this);
+ }
+
+ /**
+ * Gets the search provider information for any provider with advert information.
+ * If there is nothing in the cache, it will obtain it from shared data.
+ *
+ * @returns {object} Returns the search provider information. @see SearchTelemetry.jsm
+ */
+ get info() {
+ if (this._searchProviderInfo) {
+ return this._searchProviderInfo;
+ }
+
+ this._searchProviderInfo = Services.cpmm.sharedData.get(
+ SEARCH_TELEMETRY_SHARED.PROVIDER_INFO
+ );
+
+ if (!this._searchProviderInfo) {
+ return null;
+ }
+
+ this._searchProviderInfo = this._searchProviderInfo
+ // Filter-out non-ad providers so that we're not trying to match against
+ // those unnecessarily.
+ .filter(p => "extraAdServersRegexps" in p)
+ // Pre-build the regular expressions.
+ .map(p => {
+ p.adServerAttributes = p.adServerAttributes ?? [];
+ if (p.shoppingTab?.inspectRegexpInSERP) {
+ p.shoppingTab.regexp = new RegExp(p.shoppingTab.regexp);
+ }
+ return {
+ ...p,
+ searchPageRegexp: new RegExp(p.searchPageRegexp),
+ extraAdServersRegexps: p.extraAdServersRegexps.map(
+ r => new RegExp(r)
+ ),
+ };
+ });
+
+ return this._searchProviderInfo;
+ }
+
+ /**
+ * Handles events received from sharedData notifications.
+ *
+ * @param {object} event The event details.
+ */
+ handleEvent(event) {
+ switch (event.type) {
+ case "change": {
+ if (event.changedKeys.includes(SEARCH_TELEMETRY_SHARED.PROVIDER_INFO)) {
+ // Just null out the provider information for now, we'll fetch it next
+ // time we need it.
+ this._searchProviderInfo = null;
+ }
+ break;
+ }
+ }
+ }
+}
+
+/**
+ * Scans SERPs for ad components.
+ */
+class SearchAdImpression {
+ /**
+ * A reference to ad component information that is used if an anchor
+ * element could not be categorized to a specific ad component.
+ *
+ * @type {object}
+ */
+ #defaultComponent = null;
+
+ /**
+ * Maps DOM elements to AdData.
+ *
+ * @type {Map<Element, AdData>}
+ *
+ * @typedef AdData
+ * @type {object}
+ * @property {string} type
+ * The type of ad component.
+ * @property {number} adsLoaded
+ * The number of ads counted as loaded for the component.
+ * @property {boolean} countChildren
+ * Whether all the children were counted for the component.
+ */
+ #elementToAdDataMap = new Map();
+
+ /**
+ * An array of components to do a top-down search.
+ */
+ #topDownComponents = [];
+
+ /**
+ * A reference the providerInfo for this SERP.
+ *
+ * @type {object}
+ */
+ #providerInfo = null;
+
+ set providerInfo(providerInfo) {
+ if (this.#providerInfo?.telemetryId == providerInfo.telemetryId) {
+ return;
+ }
+
+ this.#providerInfo = providerInfo;
+
+ // Reset values.
+ this.#topDownComponents = [];
+
+ for (let component of this.#providerInfo.components) {
+ if (component.default) {
+ this.#defaultComponent = component;
+ continue;
+ }
+ if (component.topDown) {
+ this.#topDownComponents.push(component);
+ }
+ }
+ }
+
+ /**
+ * Check if the page has a shopping tab.
+ *
+ * @param {Document} document
+ * @return {boolean}
+ * Whether the page has a shopping tab. Defaults to false.
+ */
+ hasShoppingTab(document) {
+ if (!this.#providerInfo?.shoppingTab) {
+ return false;
+ }
+
+ // If a provider has the inspectRegexpInSERP, we assume there must be an
+ // associated regexp that must be used on any hrefs matched by the elements
+ // found using the selector. If inspectRegexpInSERP is false, then check if
+ // the number of items found using the selector matches exactly one element
+ // to ensure we've used a fine-grained search.
+ let elements = document.querySelectorAll(
+ this.#providerInfo.shoppingTab.selector
+ );
+ if (this.#providerInfo.shoppingTab.inspectRegexpInSERP) {
+ let regexp = this.#providerInfo.shoppingTab.regexp;
+ for (let element of elements) {
+ let href = element.getAttribute("href");
+ if (href && regexp.test(href)) {
+ this.#recordElementData(element, {
+ type: "shopping_tab",
+ count: 1,
+ });
+ return true;
+ }
+ }
+ } else if (elements.length == 1) {
+ this.#recordElementData(elements[0], {
+ type: "shopping_tab",
+ count: 1,
+ });
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Examine the list of anchors and the document object and find components
+ * on the page.
+ *
+ * With the list of anchors, go through each and find the component it
+ * belongs to and save it in elementToAdDataMap.
+ *
+ * Then, with the document object find components and save the results to
+ * elementToAdDataMap.
+ *
+ * Lastly, combine the results together in a new Map that contains the number
+ * of loaded, visible, and blocked results for the component.
+ *
+ * @param {HTMLCollectionOf<HTMLAnchorElement>} anchors
+ * @param {Document} document
+ *
+ * @returns {Map<string, object>}
+ * A map where the key is a string containing the type of ad component
+ * and the value is an object containing the number of adsLoaded,
+ * adsVisible, and adsHidden within the component.
+ */
+ categorize(anchors, document) {
+ // Used for various functions to make relative URLs absolute.
+ let origin = new URL(document.documentURI).origin;
+
+ // Bottom up approach.
+ this.#categorizeAnchors(anchors, origin);
+
+ // Top down approach.
+ this.#categorizeDocument(document);
+
+ let componentToVisibilityMap = new Map();
+ let hrefToComponentMap = new Map();
+
+ let innerWindowHeight = document.ownerGlobal.innerHeight;
+ let scrollY = document.ownerGlobal.scrollY;
+
+ // Iterate over the results:
+ // - If it's searchbox add event listeners.
+ // - If it is a non_ads_link, map its href to component type.
+ // - For others, map its component type and check visibility.
+ for (let [element, data] of this.#elementToAdDataMap.entries()) {
+ if (data.type == "incontent_searchbox") {
+ // If searchbox has child elements, observe those, otherwise
+ // fallback to its parent element.
+ this.#addEventListenerToElements(
+ data.childElements.length ? data.childElements : [element],
+ data.type,
+ false
+ );
+ continue;
+ }
+ if (data.childElements.length) {
+ for (let child of data.childElements) {
+ let href = this.#extractHref(child, origin);
+ if (href) {
+ hrefToComponentMap.set(href, data.type);
+ }
+ }
+ } else {
+ let href = this.#extractHref(element, origin);
+ if (href) {
+ hrefToComponentMap.set(href, data.type);
+ }
+ }
+
+ // If the component is a non_ads_link, skip visibility checks.
+ if (data.type == "non_ads_link") {
+ continue;
+ }
+
+ // If proxy children were found, check the visibility of all of them
+ // otherwise just check the visiblity of the first child.
+ let childElements;
+ if (data.proxyChildElements.length) {
+ childElements = data.proxyChildElements;
+ } else if (data.childElements.length) {
+ childElements = [data.childElements[0]];
+ }
+
+ let count = this.#countVisibleAndHiddenAds(
+ element,
+ data.adsLoaded,
+ childElements,
+ innerWindowHeight,
+ scrollY
+ );
+ if (componentToVisibilityMap.has(data.type)) {
+ let componentInfo = componentToVisibilityMap.get(data.type);
+ componentInfo.adsLoaded += data.adsLoaded;
+ componentInfo.adsVisible += count.adsVisible;
+ componentInfo.adsHidden += count.adsHidden;
+ } else {
+ componentToVisibilityMap.set(data.type, {
+ adsLoaded: data.adsLoaded,
+ adsVisible: count.adsVisible,
+ adsHidden: count.adsHidden,
+ });
+ }
+ }
+
+ // Release the DOM elements from the Map.
+ this.#elementToAdDataMap.clear();
+
+ return { componentToVisibilityMap, hrefToComponentMap };
+ }
+
+ /**
+ * Given an element, find the href that is most likely to make the request if
+ * the element is clicked. If the element contains a specific data attribute
+ * known to contain the url used to make the initial request, use it,
+ * otherwise use its href. Specific character conversions are done to mimic
+ * conversions likely to take place when urls are observed in network
+ * activity.
+ *
+ * @param {Element} element
+ * The element to inspect.
+ * @param {string} origin
+ * The origin for relative urls.
+ * @returns {string}
+ * The href of the element.
+ */
+ #extractHref(element, origin) {
+ let href;
+ // Prioritize the href from a known data attribute value instead of
+ // its href property, as the former is the initial url the page will
+ // navigate to before being re-directed to the href.
+ for (let name of this.#providerInfo.adServerAttributes) {
+ if (
+ element.dataset[name] &&
+ this.#providerInfo.extraAdServersRegexps.some(regexp =>
+ regexp.test(element.dataset[name])
+ )
+ ) {
+ href = element.dataset[name];
+ break;
+ }
+ }
+ // If a data attribute value was not found, fallback to the href.
+ href = href ?? element.getAttribute("href");
+ if (!href) {
+ return "";
+ }
+ // Hrefs can be relative.
+ if (!href.startsWith("https://") && !href.startsWith("http://")) {
+ href = origin + href;
+ }
+ // Per Bug 376844, apostrophes in query params are escaped, and thus, are
+ // percent-encoded by the time they are observed in the network. Even
+ // though it's more comprehensive, we avoid using newURI because its more
+ // expensive and conversions should be the exception.
+ // e.g. /path'?q=Mozilla's -> /path'?q=Mozilla%27s
+ let arr = href.split("?");
+ if (arr.length == 2 && arr[1].includes("'")) {
+ href = arr[0] + "?" + arr[1].replaceAll("'", "%27");
+ }
+ return href;
+ }
+
+ /**
+ * Given a list of anchor elements, group them into ad components.
+ *
+ * The first step in the process is to check if the anchor should be
+ * inspected. This is based on whether it contains an href or a
+ * data-attribute values that matches an ad link, or if it contains a
+ * pattern caught by a components included regular expression.
+ *
+ * Determine which component it belongs to and the number of matches for
+ * the component. The heuristic is described in findDataForAnchor.
+ * If there was a result and we haven't seen it before, save it in
+ * elementToAdDataMap.
+ *
+ * @param {HTMLCollectionOf<HTMLAnchorElement>} anchors
+ * The list of anchors to inspect.
+ * @param {string} origin
+ * The origin of the document the anchors belong to.
+ */
+ #categorizeAnchors(anchors, origin) {
+ for (let anchor of anchors) {
+ if (this.#shouldInspectAnchor(anchor, origin)) {
+ let result = this.#findDataForAnchor(anchor);
+ if (result) {
+ this.#recordElementData(result.element, {
+ type: result.type,
+ count: result.count,
+ proxyChildElements: result.proxyChildElements,
+ childElements: result.childElements,
+ });
+ }
+ if (result.relatedElements?.length) {
+ this.#addEventListenerToElements(result.relatedElements, result.type);
+ }
+ }
+ }
+ }
+
+ /**
+ * Find components from the document object. This is mostly relevant for
+ * components that are non-ads and don't have an obvious regular expression
+ * that could match the pattern of the href.
+ *
+ * @param {Document} document
+ */
+ #categorizeDocument(document) {
+ // using the subset of components that are top down,
+ // go through each one.
+ for (let component of this.#topDownComponents) {
+ // Top-down searches must have the topDown attribute.
+ if (!component.topDown) {
+ continue;
+ }
+ // Top down searches must include a parent.
+ if (!component.included?.parent) {
+ continue;
+ }
+ let parents = document.querySelectorAll(
+ component.included.parent.selector
+ );
+ if (parents.length) {
+ for (let parent of parents) {
+ if (component.included.related?.selector) {
+ this.#addEventListenerToElements(
+ parent.querySelectorAll(component.included.related.selector),
+ component.type
+ );
+ }
+ if (component.included.children) {
+ for (let child of component.included.children) {
+ let childElements = parent.querySelectorAll(child.selector);
+ if (childElements.length) {
+ this.#recordElementData(parent, {
+ type: component.type,
+ childElements: Array.from(childElements),
+ });
+ break;
+ }
+ }
+ } else {
+ this.#recordElementData(parent, {
+ type: component.type,
+ });
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ * Evaluates whether an anchor should be inspected based on matching
+ * regular expressions on either its href or specified data-attribute values.
+ *
+ * @param {HTMLAnchorElement} anchor
+ * @param {string} origin
+ * @returns {boolean}
+ */
+ #shouldInspectAnchor(anchor, origin) {
+ let href = anchor.getAttribute("href");
+ if (!href) {
+ return false;
+ }
+
+ // Some hrefs might be relative.
+ if (!href.startsWith("https://") && !href.startsWith("http://")) {
+ href = origin + href;
+ }
+
+ let regexps = this.#providerInfo.extraAdServersRegexps;
+ // Anchors can contain ad links in a data-attribute.
+ for (let name of this.#providerInfo.adServerAttributes) {
+ let attributeValue = anchor.dataset[name];
+ if (
+ attributeValue &&
+ regexps.some(regexp => regexp.test(attributeValue))
+ ) {
+ return true;
+ }
+ }
+ // Anchors can contain ad links in a specific href.
+ if (regexps.some(regexp => regexp.test(href))) {
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Find the component data for an anchor.
+ *
+ * To categorize the anchor, we iterate over the list of possible components
+ * the anchor could be categorized. If the component is default, we skip
+ * checking because the fallback option for all anchor links is the default.
+ *
+ * First, get the "parent" of the anchor which best represents the DOM element
+ * that contains the anchor links for the component and no other component.
+ * This parent will be cached so that other anchors that share the same
+ * parent can be counted together.
+ *
+ * The check for a parent is a loop because we can define more than one best
+ * parent since on certain SERPs, it's possible for a "better" DOM element
+ * parent to appear occassionally.
+ *
+ * If no parent is found, skip this component.
+ *
+ * If a parent was found, check for specific child elements.
+ *
+ * Finding child DOM elements of a parent is optional. One reason to do so is
+ * to use child elements instead of anchor links to count the number of ads for
+ * a component via the `countChildren` property. This is provided because some ads
+ * (i.e. carousels) have multiple ad links in a single child element that go to the
+ * same location. In this scenario, all instances of the child are recorded as ads.
+ * Subsequent anchor elements that map to the same parent are ignored.
+ *
+ * Whether or not a child was found, return the information that was found,
+ * including whether or not all child elements were counted instead of anchors.
+ *
+ * If another anchor belonging to a parent that was previously recorded is the input
+ * for this function, we either increment the ad count by 1 or don't increment the ad
+ * count because the parent used `countChildren` completed the calculation in a
+ * previous step.
+ *
+ *
+ * @param {HTMLAnchorElement} anchor
+ * The anchor to be inspected.
+ * @returns {object}
+ * An object containing the element representing the root DOM element for
+ * the component, the type of component, how many ads were counted,
+ * and whether or not the count was of all the children.
+ */
+ #findDataForAnchor(anchor) {
+ for (let component of this.#providerInfo.components) {
+ // First, check various conditions for skipping a component.
+
+ // A component should always have at least one included statement.
+ if (!component.included) {
+ continue;
+ }
+
+ // Top down searches are done after the bottom up search.
+ if (component.topDown) {
+ continue;
+ }
+
+ // The default component doesn't need to be checked,
+ // as it will be the fallback option.
+ if (component.default) {
+ continue;
+ }
+
+ // The anchor shouldn't belong to an excluded parent component if one
+ // is provided.
+ if (
+ component.excluded?.parent?.selector &&
+ anchor.closest(component.excluded.parent.selector)
+ ) {
+ continue;
+ }
+
+ // All components with included should have a parent entry.
+ if (!component.included.parent) {
+ continue;
+ }
+
+ // Find the parent of the anchor.
+ let parent = anchor.closest(component.included.parent.selector);
+
+ if (!parent) {
+ continue;
+ }
+
+ // If we've already inspected the parent, add the child element to the
+ // list of anchors. Don't increment the ads loaded count, as we only care
+ // about grouping the anchor with the correct parent.
+ if (this.#elementToAdDataMap.has(parent)) {
+ return {
+ element: parent,
+ childElements: [anchor],
+ };
+ }
+
+ let relatedElements = [];
+ if (component.included.related?.selector) {
+ relatedElements = parent.querySelectorAll(
+ component.included.related.selector
+ );
+ }
+
+ // If the component has no defined children, return the parent element.
+ if (component.included.children) {
+ // Look for the first instance of a matching child selector.
+ for (let child of component.included.children) {
+ // If counting by child, get all of them at once.
+ if (child.countChildren) {
+ let proxyChildElements = parent.querySelectorAll(child.selector);
+ if (proxyChildElements.length) {
+ return {
+ element: parent,
+ type: child.type ?? component.type,
+ proxyChildElements: Array.from(proxyChildElements),
+ count: proxyChildElements.length,
+ childElements: [anchor],
+ relatedElements,
+ };
+ }
+ } else if (parent.querySelector(child.selector)) {
+ return {
+ element: parent,
+ type: child.type ?? component.type,
+ childElements: [anchor],
+ relatedElements,
+ };
+ }
+ }
+ }
+ // If no children were defined for this component, or none were found
+ // in the DOM, use the default definition.
+ return {
+ element: parent,
+ type: component.type,
+ childElements: [anchor],
+ relatedElements,
+ };
+ }
+ // If no component was found, use default values.
+ return {
+ element: anchor,
+ type: this.#defaultComponent.type,
+ };
+ }
+
+ /**
+ * Determines whether or not an ad was visible or hidden.
+ *
+ * An ad is considered visible if the parent element containing the
+ * component has non-zero dimensions, and all child element in the
+ * component have non-zero dimensions and fits within the window
+ * at the time when the impression was takent.
+ *
+ * For some components, like text ads, we don't send every child
+ * element for visibility, just the first text ad. For other components
+ * like carousels, we send all child elements because we do care about
+ * counting how many elements of the carousel were visible.
+ *
+ * @param {Element} element
+ * Element to be inspected
+ * @param {number} adsLoaded
+ * Number of ads initially determined to be loaded for this element.
+ * @param {Array<Element>} childElements
+ * List of children belonging to element.
+ * @param {number} innerWindowHeight
+ * Current height of the window containing the elements.
+ * @param {number} scrollY
+ * Current distance the window has been scrolled.
+ * @returns {object}
+ * Contains adsVisible which is the number of ads shown for the element
+ * and adsHidden, the number of ads not visible to the user.
+ */
+ #countVisibleAndHiddenAds(
+ element,
+ adsLoaded,
+ childElements,
+ innerWindowHeight,
+ scrollY
+ ) {
+ let elementRect =
+ element.ownerGlobal.windowUtils.getBoundsWithoutFlushing(element);
+
+ // If the element lacks a dimension, assume all ads that
+ // were contained within it are hidden.
+ if (elementRect.width == 0 || elementRect.height == 0) {
+ return {
+ adsVisible: 0,
+ adsHidden: adsLoaded,
+ };
+ }
+
+ // If an ad is far above the possible visible area of a window, an
+ // adblocker might be doing it as a workaround for blocking the ad.
+ if (
+ elementRect.bottom < 0 &&
+ innerWindowHeight + scrollY + elementRect.bottom < 0
+ ) {
+ return {
+ adsVisible: 0,
+ adsHidden: adsLoaded,
+ };
+ }
+
+ // Since the parent element has dimensions but no child elements we want
+ // to inspect, check the parent itself is within the viewable area.
+ if (!childElements || !childElements.length) {
+ if (innerWindowHeight < elementRect.y + elementRect.height) {
+ return {
+ adsVisible: 0,
+ adsHidden: 0,
+ };
+ }
+ return {
+ adsVisible: 1,
+ adsHidden: 0,
+ };
+ }
+
+ let adsVisible = 0;
+ let adsHidden = 0;
+ for (let child of childElements) {
+ let itemRect =
+ child.ownerGlobal.windowUtils.getBoundsWithoutFlushing(child);
+
+ // If the child element we're inspecting has no dimension, it is hidden.
+ if (itemRect.height == 0 || itemRect.width == 0) {
+ adsHidden += 1;
+ continue;
+ }
+
+ // If the child element is to the left of the containing element, or to
+ // the right of the containing element, skip it.
+ if (
+ itemRect.x < elementRect.x ||
+ itemRect.x + itemRect.width > elementRect.x + elementRect.width
+ ) {
+ continue;
+ }
+
+ // If the child element is too far down, skip it.
+ if (innerWindowHeight < itemRect.y + itemRect.height) {
+ continue;
+ }
+ ++adsVisible;
+ }
+
+ return {
+ adsVisible,
+ adsHidden,
+ };
+ }
+
+ /**
+ * Caches ad data for a DOM element. The key of the map is by Element rather
+ * than Component for fast lookup on whether an Element has been already been
+ * categorized as a component. Subsequent calls to this passing the same
+ * element will update the list of child elements.
+ *
+ * @param {Element} element
+ * The element considered to be the root for the component.
+ * @param {object} params
+ * Various parameters that can be recorded. Whether the input values exist
+ * or not depends on which component was found, which heuristic should be used
+ * to determine whether an ad was visible, and whether we've already seen this
+ * element.
+ * @param {string | null} params.type
+ * The type of component.
+ * @param {number} params.count
+ * The number of ads found for a component. The number represents either
+ * the number of elements that match an ad expression or the number of DOM
+ * elements containing an ad link.
+ * @param {Array<Element>} params.proxyChildElements
+ * An array of DOM elements that should be inspected for visibility instead
+ * of the actual child elements, possibly because they are grouped.
+ * @param {Array<Element>} params.childElements
+ * An array of DOM elements to inspect.
+ */
+ #recordElementData(
+ element,
+ { type, count = 1, proxyChildElements = [], childElements = [] } = {}
+ ) {
+ if (this.#elementToAdDataMap.has(element)) {
+ let recordedValues = this.#elementToAdDataMap.get(element);
+ if (childElements.length) {
+ recordedValues.childElements =
+ recordedValues.childElements.concat(childElements);
+ }
+ } else {
+ this.#elementToAdDataMap.set(element, {
+ type,
+ adsLoaded: count,
+ proxyChildElements,
+ childElements,
+ });
+ }
+ }
+
+ /**
+ * Adds a click listener to a specific element.
+ *
+ * @param {Array<Element>} elements
+ * DOM elements to add event listeners to.
+ * @param {string} type
+ * The component type of the element.
+ * @param {boolean} isRelated
+ * Whether the elements input are related to components or are actual
+ * components.
+ */
+ #addEventListenerToElements(elements, type, isRelated = true) {
+ if (!elements?.length) {
+ return;
+ }
+ let clickAction = "clicked";
+ let keydownEnterAction = "clicked";
+
+ switch (type) {
+ case "incontent_searchbox":
+ keydownEnterAction = "submitted";
+ if (isRelated) {
+ // The related element to incontent_search are autosuggested elements
+ // which when clicked should cause different action than if the
+ // searchbox is clicked.
+ clickAction = "submitted";
+ }
+ break;
+ case "ad_carousel":
+ case "refined_search_buttons":
+ if (isRelated) {
+ clickAction = "expanded";
+ }
+ break;
+ }
+
+ let document = elements[0].ownerGlobal.document;
+ let url = document.documentURI;
+ let callback = documentToEventCallbackMap.get(document);
+
+ let removeListenerCallbacks = [];
+
+ for (let element of elements) {
+ let clickCallback = () => {
+ if (clickAction == "submitted") {
+ documentToSubmitMap.set(document, true);
+ }
+ callback({
+ type,
+ url,
+ action: clickAction,
+ });
+ };
+ element.addEventListener("click", clickCallback);
+
+ let keydownCallback = event => {
+ if (event.key == "Enter") {
+ if (keydownEnterAction == "submitted") {
+ documentToSubmitMap.set(document, true);
+ }
+ callback({
+ type,
+ url,
+ action: keydownEnterAction,
+ });
+ }
+ };
+ element.addEventListener("keydown", keydownCallback);
+
+ removeListenerCallbacks.push(() => {
+ element.removeEventListener("click", clickCallback);
+ element.removeEventListener("keydown", keydownCallback);
+ });
+ }
+
+ document.ownerGlobal.addEventListener(
+ "pagehide",
+ () => {
+ let callbacks = documentToRemoveEventListenersMap.get(document);
+ if (callbacks) {
+ for (let removeEventListenerCallback of callbacks) {
+ removeEventListenerCallback();
+ }
+ documentToRemoveEventListenersMap.delete(document);
+ }
+ },
+ { once: true }
+ );
+
+ // The map might have entries from previous callers, so we must ensure
+ // we don't discard existing event listener callbacks.
+ if (documentToRemoveEventListenersMap.has(document)) {
+ let callbacks = documentToRemoveEventListenersMap.get(document);
+ removeListenerCallbacks = removeListenerCallbacks.concat(callbacks);
+ }
+
+ documentToRemoveEventListenersMap.set(document, removeListenerCallbacks);
+ }
+}
+
+/**
+ * An object indicating which elements to examine for domains to extract and
+ * which heuristic technique to use to extract that element's domain.
+ *
+ * @typedef {object} ExtractorInfo
+ * @property {string} selectors
+ * A string representing the CSS selector that targets the elements on the
+ * page that contain domains we want to extract.
+ * @property {string} method
+ * A string representing which domain extraction heuristic to use.
+ * One of: "href" or "data-attribute".
+ * @property {object | null} options
+ * Options related to the domain extraction heuristic used.
+ * @property {string | null} options.dataAttributeKey
+ * The key name of the data attribute to lookup.
+ * @property {string | null} options.queryParamKey
+ * The key name of the query param value to lookup.
+ * @property {boolean | null} options.queryParamValueIsHref
+ * Whether the query param value is expected to contain an href.
+ */
+
+/**
+ * DomainExtractor examines elements on a page to retrieve the domains.
+ */
+class DomainExtractor {
+ /**
+ * Extract domains from the page using an array of information pertaining to
+ * the SERP.
+ *
+ * @param {Document} document
+ * The document for the SERP we are extracting domains from.
+ * @param {Array<ExtractorInfo>} extractorInfos
+ * Information used to target the domains we need to extract.
+ * @return {Set<string>}
+ * A set of the domains extracted from the page.
+ */
+ extractDomainsFromDocument(document, extractorInfos) {
+ let extractedDomains = new Set();
+ if (!extractorInfos?.length) {
+ return extractedDomains;
+ }
+
+ for (let extractorInfo of extractorInfos) {
+ if (!extractorInfo.selectors) {
+ continue;
+ }
+
+ let elements = document.querySelectorAll(extractorInfo.selectors);
+ if (!elements) {
+ continue;
+ }
+
+ switch (extractorInfo.method) {
+ case "href": {
+ // Origin is used in case a URL needs to be made absolute.
+ let origin = new URL(document.documentURI).origin;
+ this.#fromElementsConvertHrefsIntoDomains(
+ elements,
+ origin,
+ extractedDomains,
+ extractorInfo.options?.queryParamKey,
+ extractorInfo.options?.queryParamValueIsHref
+ );
+ break;
+ }
+ case "data-attribute": {
+ this.#fromElementsRetrieveDataAttributeValues(
+ elements,
+ extractorInfo.options?.dataAttributeKey,
+ extractedDomains
+ );
+ break;
+ }
+ }
+ }
+
+ return extractedDomains;
+ }
+
+ /**
+ * Given a list of elements, extract domains using href attributes. If the
+ * URL in the href includes the specified query param, the domain will be
+ * that query param's value. Otherwise it will be the hostname of the href
+ * attribute's URL.
+ *
+ * @param {NodeList<Element>} elements
+ * A list of elements from the page whose href attributes we want to
+ * inspect.
+ * @param {string} origin
+ * Origin of the current page.
+ * @param {Set<string>} extractedDomains
+ * The result set of domains extracted from the page.
+ * @param {string | null} queryParam
+ * An optional query param to search for in an element's href attribute.
+ * @param {boolean | null} queryParamValueIsHref
+ * Whether the query param value is expected to contain an href.
+ */
+ #fromElementsConvertHrefsIntoDomains(
+ elements,
+ origin,
+ extractedDomains,
+ queryParam,
+ queryParamValueIsHref
+ ) {
+ for (let element of elements) {
+ let href = element.getAttribute("href");
+
+ let url;
+ try {
+ url = new URL(href, origin);
+ } catch (ex) {
+ continue;
+ }
+
+ // Ignore non-standard protocols.
+ if (url.protocol != "https:" && url.protocol != "http:") {
+ continue;
+ }
+
+ if (queryParam) {
+ let paramValue = url.searchParams.get(queryParam);
+ if (queryParamValueIsHref) {
+ try {
+ paramValue = new URL(paramValue).hostname;
+ } catch (e) {
+ continue;
+ }
+ }
+ if (paramValue && !extractedDomains.has(paramValue)) {
+ extractedDomains.add(paramValue);
+ }
+ } else if (url.hostname && !extractedDomains.has(url.hostname)) {
+ extractedDomains.add(url.hostname);
+ }
+ }
+ }
+
+ /**
+ * Given a list of elements, examine each for the specified data attribute.
+ * If found, add that data attribute's value to the result set of extracted
+ * domains as is.
+ *
+ * @param {NodeList<Element>} elements
+ * A list of elements from the page whose data attributes we want to
+ * inspect.
+ * @param {string} attribute
+ * The name of a data attribute to search for within an element.
+ * @param {Set<string>} extractedDomains
+ * The result set of domains extracted from the page.
+ */
+ #fromElementsRetrieveDataAttributeValues(
+ elements,
+ attribute,
+ extractedDomains
+ ) {
+ for (let element of elements) {
+ let value = element.dataset[attribute];
+ if (value && !extractedDomains.has(value)) {
+ extractedDomains.add(value);
+ }
+ }
+ }
+}
+
+export const domainExtractor = new DomainExtractor();
+const searchProviders = new SearchProviders();
+const searchAdImpression = new SearchAdImpression();
+
+const documentToEventCallbackMap = new WeakMap();
+const documentToRemoveEventListenersMap = new WeakMap();
+const documentToSubmitMap = new WeakMap();
+
+/**
+ * SearchTelemetryChild monitors for pages that are partner searches, and
+ * looks through them to find links which looks like adverts and sends back
+ * a notification to SearchTelemetry for possible telemetry reporting.
+ *
+ * Only the partner details and the fact that at least one ad was found on the
+ * page are returned to SearchTelemetry. If no ads are found, no notification is
+ * given.
+ */
+export class SearchSERPTelemetryChild extends JSWindowActorChild {
+ /**
+ * Amount of time to wait after a page event before examining the page
+ * for ads.
+ *
+ * @type {number | null}
+ */
+ #adTimeout;
+ /**
+ * Determines if there is a provider that matches the supplied URL and returns
+ * the information associated with that provider.
+ *
+ * @param {string} url The url to check
+ * @returns {array|null} Returns null if there's no match, otherwise an array
+ * of provider name and the provider information.
+ */
+ _getProviderInfoForUrl(url) {
+ return searchProviders.info?.find(info => info.searchPageRegexp.test(url));
+ }
+
+ /**
+ * Checks to see if the page is a partner and has an ad link within it. If so,
+ * it will notify SearchTelemetry.
+ */
+ _checkForAdLink(eventType) {
+ try {
+ if (!this.contentWindow) {
+ return;
+ }
+ } catch (ex) {
+ // unload occurred before the timer expired
+ return;
+ }
+
+ let doc = this.document;
+ let url = doc.documentURI;
+ let providerInfo = this._getProviderInfoForUrl(url);
+ if (!providerInfo) {
+ return;
+ }
+
+ let regexps = providerInfo.extraAdServersRegexps;
+ let anchors = doc.getElementsByTagName("a");
+ let hasAds = false;
+ for (let anchor of anchors) {
+ if (!anchor.href) {
+ continue;
+ }
+ for (let name of providerInfo.adServerAttributes) {
+ hasAds = regexps.some(regexp => regexp.test(anchor.dataset[name]));
+ if (hasAds) {
+ break;
+ }
+ }
+ if (!hasAds) {
+ hasAds = regexps.some(regexp => regexp.test(anchor.href));
+ }
+ if (hasAds) {
+ break;
+ }
+ }
+
+ if (hasAds) {
+ this.sendAsyncMessage("SearchTelemetry:PageInfo", {
+ hasAds,
+ url,
+ });
+ }
+
+ if (
+ lazy.serpEventsEnabled &&
+ providerInfo.components?.length &&
+ (eventType == "load" || eventType == "pageshow")
+ ) {
+ // Start performance measurements.
+ let start = Cu.now();
+ let timerId = Glean.serp.categorizationDuration.start();
+
+ let pageActionCallback = info => {
+ this.sendAsyncMessage("SearchTelemetry:Action", {
+ type: info.type,
+ url: info.url,
+ action: info.action,
+ });
+ };
+ documentToEventCallbackMap.set(this.document, pageActionCallback);
+
+ let componentToVisibilityMap, hrefToComponentMap;
+ try {
+ let result = searchAdImpression.categorize(anchors, doc);
+ componentToVisibilityMap = result.componentToVisibilityMap;
+ hrefToComponentMap = result.hrefToComponentMap;
+ } catch (e) {
+ // Cancel the timer if an error encountered.
+ Glean.serp.categorizationDuration.cancel(timerId);
+ }
+
+ if (componentToVisibilityMap && hrefToComponentMap) {
+ // End measurements.
+ ChromeUtils.addProfilerMarker(
+ "SearchSERPTelemetryChild._checkForAdLink",
+ start,
+ "Checked anchors for visibility"
+ );
+ Glean.serp.categorizationDuration.stopAndAccumulate(timerId);
+ this.sendAsyncMessage("SearchTelemetry:AdImpressions", {
+ adImpressions: componentToVisibilityMap,
+ hrefToComponentMap,
+ url,
+ });
+ }
+ }
+
+ if (
+ lazy.serpEventTelemetryCategorization &&
+ providerInfo.domainExtraction &&
+ (eventType == "load" || eventType == "pageshow")
+ ) {
+ let start = Cu.now();
+ let nonAdDomains = domainExtractor.extractDomainsFromDocument(
+ doc,
+ providerInfo.domainExtraction.nonAds
+ );
+ let adDomains = domainExtractor.extractDomainsFromDocument(
+ doc,
+ providerInfo.domainExtraction.ads
+ );
+
+ this.sendAsyncMessage("SearchTelemetry:Domains", {
+ url,
+ nonAdDomains,
+ adDomains,
+ });
+
+ ChromeUtils.addProfilerMarker(
+ "SearchSERPTelemetryChild._checkForAdLink",
+ start,
+ "Extract domains from elements"
+ );
+ }
+ }
+
+ /**
+ * Checks for the presence of certain components on the page that are
+ * required for recording the page impression.
+ */
+ #checkForPageImpressionComponents() {
+ let url = this.document.documentURI;
+ let providerInfo = this._getProviderInfoForUrl(url);
+ if (providerInfo.components?.length) {
+ searchAdImpression.providerInfo = providerInfo;
+ let start = Cu.now();
+ let shoppingTabDisplayed = searchAdImpression.hasShoppingTab(
+ this.document
+ );
+ ChromeUtils.addProfilerMarker(
+ "SearchSERPTelemetryChild.#recordImpression",
+ start,
+ "Checked for shopping tab"
+ );
+ this.sendAsyncMessage("SearchTelemetry:PageImpression", {
+ url,
+ shoppingTabDisplayed,
+ });
+ }
+ }
+
+ #removeEventListeners() {
+ let callbacks = documentToRemoveEventListenersMap.get(this.document);
+ if (callbacks) {
+ for (let callback of callbacks) {
+ callback();
+ }
+ documentToRemoveEventListenersMap.delete(this.document);
+ }
+ }
+
+ /**
+ * Handles events received from the actor child notifications.
+ *
+ * @param {object} event The event details.
+ */
+ handleEvent(event) {
+ if (!this.#urlIsSERP(this.document.documentURI)) {
+ return;
+ }
+ switch (event.type) {
+ case "pageshow": {
+ // If a page is loaded from the bfcache, we won't get a "DOMContentLoaded"
+ // event, so we need to rely on "pageshow" in this case. Note: we do this
+ // so that we remain consistent with the *.in-content:sap* count for the
+ // SEARCH_COUNTS histogram.
+ if (event.persisted) {
+ this.#check(event.type);
+ if (lazy.serpEventsEnabled) {
+ this.#checkForPageImpressionComponents();
+ }
+ }
+ break;
+ }
+ case "DOMContentLoaded": {
+ if (lazy.serpEventsEnabled) {
+ this.#checkForPageImpressionComponents();
+ }
+ this.#check(event.type);
+ break;
+ }
+ case "load": {
+ // We check both DOMContentLoaded and load in case the page has
+ // taken a long time to load and the ad is only detected on load.
+ // We still check at DOMContentLoaded because if the page hasn't
+ // finished loading and the user navigates away, we still want to know
+ // if there were ads on the page or not at that time.
+ this.#check(event.type);
+ break;
+ }
+ case "pagehide": {
+ this.#cancelCheck();
+ break;
+ }
+ }
+ }
+
+ async receiveMessage(message) {
+ switch (message.name) {
+ case "SearchSERPTelemetry:WaitForSPAPageLoad":
+ lazy.setTimeout(() => {
+ this.#checkForPageImpressionComponents();
+ this._checkForAdLink("load");
+ }, Services.cpmm.sharedData.get(SEARCH_TELEMETRY_SHARED.SPA_LOAD_TIMEOUT));
+ break;
+ case "SearchSERPTelemetry:StopTrackingDocument":
+ this.#removeDocumentFromSubmitMap();
+ this.#removeEventListeners();
+ break;
+ case "SearchSERPTelemetry:DidSubmit":
+ return this.#didSubmit();
+ }
+ return null;
+ }
+
+ #didSubmit() {
+ return documentToSubmitMap.get(this.document);
+ }
+
+ #removeDocumentFromSubmitMap() {
+ documentToSubmitMap.delete(this.document);
+ }
+
+ #urlIsSERP(url) {
+ let provider = this._getProviderInfoForUrl(this.document.documentURI);
+ if (provider) {
+ // Some URLs can match provider info but also be the provider's homepage
+ // instead of a SERP.
+ // e.g. https://example.com/ vs. https://example.com/?foo=bar
+ // To check this, we look for the presence of the query parameter
+ // that contains a search term.
+ let queries = new URLSearchParams(url.split("#")[0].split("?")[1]);
+ for (let queryParamName of provider.queryParamNames) {
+ if (queries.get(queryParamName)) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ #cancelCheck() {
+ if (this._waitForContentTimeout) {
+ lazy.clearTimeout(this._waitForContentTimeout);
+ }
+ }
+
+ #check(eventType) {
+ if (!this.#adTimeout) {
+ this.#adTimeout = Services.cpmm.sharedData.get(
+ SEARCH_TELEMETRY_SHARED.LOAD_TIMEOUT
+ );
+ }
+ this.#cancelCheck();
+ this._waitForContentTimeout = lazy.setTimeout(() => {
+ this._checkForAdLink(eventType);
+ }, this.#adTimeout);
+ }
+}