/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ const lazy = {}; ChromeUtils.defineESModuleGetters(lazy, { BrowserSearchTelemetry: "moz-src:///browser/components/search/BrowserSearchTelemetry.sys.mjs", BrowserWindowTracker: "resource:///modules/BrowserWindowTracker.sys.mjs", Region: "resource://gre/modules/Region.sys.mjs", RemoteSettings: "resource://services-settings/remote-settings.sys.mjs", SearchUtils: "moz-src:///toolkit/components/search/SearchUtils.sys.mjs", SERPCategorization: "moz-src:///browser/components/search/SERPCategorization.sys.mjs", SERPCategorizationRecorder: "moz-src:///browser/components/search/SERPCategorization.sys.mjs", SERPCategorizationEventScheduler: "moz-src:///browser/components/search/SERPCategorization.sys.mjs", }); // Exported for tests. export const ADLINK_CHECK_TIMEOUT_MS = 1000; // Unlike the standard adlink check, the timeout for single page apps is not // based on a content event within the page, like DOMContentLoaded or load. // Thus, we aim for a longer timeout to account for when the server might be // slow to update the content on the page. export const SPA_ADLINK_CHECK_TIMEOUT_MS = 2500; export const TELEMETRY_SETTINGS_KEY = "search-telemetry-v2"; export const SEARCH_TELEMETRY_SHARED = { PROVIDER_INFO: "SearchTelemetry:ProviderInfo", LOAD_TIMEOUT: "SearchTelemetry:LoadTimeout", SPA_LOAD_TIMEOUT: "SearchTelemetry:SPALoadTimeout", }; const impressionIdsWithoutEngagementsSet = new Set(); ChromeUtils.defineLazyGetter(lazy, "logConsole", () => { return console.createInstance({ prefix: "SearchTelemetry", maxLogLevel: lazy.SearchUtils.loggingEnabled ? "Debug" : "Warn", }); }); export const SearchSERPTelemetryUtils = { ACTIONS: { CLICKED: "clicked", // specific to cookie banner CLICKED_ACCEPT: "clicked_accept", CLICKED_REJECT: "clicked_reject", CLICKED_MORE_OPTIONS: "clicked_more_options", EXPANDED: "expanded", SUBMITTED: "submitted", }, COMPONENTS: { AD_CAROUSEL: "ad_carousel", AD_IMAGE_ROW: "ad_image_row", AD_LINK: "ad_link", AD_SIDEBAR: "ad_sidebar", AD_SITELINK: "ad_sitelink", AD_UNCATEGORIZED: "ad_uncategorized", COOKIE_BANNER: "cookie_banner", INCONTENT_SEARCHBOX: "incontent_searchbox", NON_ADS_LINK: "non_ads_link", REFINED_SEARCH_BUTTONS: "refined_search_buttons", SHOPPING_TAB: "shopping_tab", }, ABANDONMENTS: { NAVIGATION: "navigation", TAB_CLOSE: "tab_close", WINDOW_CLOSE: "window_close", }, INCONTENT_SOURCES: { OPENED_IN_NEW_TAB: "opened_in_new_tab", REFINE_ON_SERP: "follow_on_from_refine_on_SERP", SEARCHBOX: "follow_on_from_refine_on_incontent_search", }, }; const AD_COMPONENTS = [ SearchSERPTelemetryUtils.COMPONENTS.AD_CAROUSEL, SearchSERPTelemetryUtils.COMPONENTS.AD_IMAGE_ROW, SearchSERPTelemetryUtils.COMPONENTS.AD_LINK, SearchSERPTelemetryUtils.COMPONENTS.AD_SIDEBAR, SearchSERPTelemetryUtils.COMPONENTS.AD_SITELINK, SearchSERPTelemetryUtils.COMPONENTS.AD_UNCATEGORIZED, ]; /** * @typedef {object} FollowOnCookies * * @property {string} codeParamName * The parameter name within the cookie. * @property {string} extraCodeParamName * The query parameter name in the URL that indicates this might be a * follow-on search. * @property {string[]} extraCodePrefixes * Possible values for the query parameter in the URL that indicates this * might be a follow-on search. * @property {string} host * The hostname on which the cookie is stored. * @property {string} name * The name of the cookie to check. */ /** * @typedef {object} SignedInCookies * * @property {string} host * The host associated with a given cookie. * @property {string} name * The name associated with a given cookie. */ /** * @typedef {object} ShoppingTab * * @property {boolean} inspectRegexpInSERP * Whether the regexp should be used against hrefs the selector matches * against. * @property {RegExp} regexp * The regular expression to match against a possible shopping tab. Must be * provided if using this feature. * @property {string} selector * The elements on the page to inspect for the shopping tab. Should be anchor * elements. */ /** * @typedef {object} ProviderInfo * * @property {string} codeParamName * The name of the query parameter for the partner code. * @property {object[]} components * An array of components that could be on the SERP. * @property {{key:string, value: string}} defaultPageQueryParam * Default page query parameter. * @property {string[]} expectedOrganicCodes * An array of partner codes to match against the parameters in the url. * Matching these codes will report the SERP as organic:none which means the * user has done a search through the search engine's website rather than * through a SAP. * @property {RegExp[]} extraAdServersRegexps * An array of regular expressions that match URLs of potential ad servers. * @property {FollowOnCookies[]} followOnCookies * An array of cookie details that are used to identify follow-on searches. * @property {string[]} followOnParamNames * An array of query parameter names that are used when a follow-on search * occurs. * @property {boolean} isSPA * Whether the provider is a single page app. * @property {string[]} organicCodes * An array of partner codes to match against the parameters in the url. * Matching these codes will report the SERP as organic:, which * means the search was performed organically rather than through a SAP. * @property {string[]} queryParamNames * An array of query parameters that may be used for the user's search string. * @property {SignedInCookies[]} signedInCookies * An array of cookie details that are used to determine whether a client is * signed in to a provider's account. * @property {ShoppingTab} shoppingTab * Shopping page parameter. * @property {string[]} taggedCodes * An array of partner codes to match against the parameters in the url. * Matching one of these codes will report the SERP as tagged. * @property {string} telemetryId * The telemetry identifier for the provider. */ /** * TelemetryHandler is the main class handling Search Engine Result Page (SERP) * telemetry. It primarily deals with tracking of what pages are loaded into tabs. * * It handles the *in-content:sap* keys of the SEARCH_COUNTS histogram. */ class TelemetryHandler { // Whether or not this class is initialised. _initialized = false; // An instance of ContentHandler. _contentHandler; // The original provider information, mainly used for tests. _originalProviderInfo = null; // The current search provider info. _searchProviderInfo = null; // An instance of remote settings that is used to access the provider info. _telemetrySettings; // Callback used when syncing telemetry settings. #telemetrySettingsSync; // _browserInfoByURL is a map of tracked search urls to objects containing: // * {object} info // the search provider information associated with the url. // * {WeakMap} browserTelemetryStateMap // a weak map of browsers that have the url loaded, their ad report state, // and their impression id. // * {integer} count // a manual count of browsers logged. // We keep a weak map of browsers, in case we miss something on our counts // and cause a memory leak - worst case our map is slightly bigger than it // needs to be. // The manual count is because WeakMap doesn't give us size/length // information, but we want to know when we can clean up our associated // entry. _browserInfoByURL = new Map(); // Browser objects mapped to the info in _browserInfoByURL. #browserToItemMap = new WeakMap(); // An array of regular expressions that match urls that could be subframes // on SERPs. #subframeRegexps = []; // _browserSourceMap is a map of the latest search source for a particular // browser - one of the KNOWN_SEARCH_SOURCES in BrowserSearchTelemetry. _browserSourceMap = new WeakMap(); /** * A WeakMap whose key is a browser with value of a source type found in * INCONTENT_SOURCES. Kept separate to avoid overlapping with legacy * search sources. These sources are specific to the content of a search * provider page rather than something from within the browser itself. */ #browserContentSourceMap = new WeakMap(); /** * Sets the source of a SERP visit from something that occured in content * rather than from the browser. * * @param {browser} browser * The browser object associated with the page that should be a SERP. * @param {string} source * The source that started the load. One of * SearchSERPTelemetryUtils.COMPONENTS.INCONTENT_SEARCHBOX, * SearchSERPTelemetryUtils.INCONTENT_SOURCES.OPENED_IN_NEW_TAB or * SearchSERPTelemetryUtils.INCONTENT_SOURCES.REFINE_ON_SERP. */ setBrowserContentSource(browser, source) { this.#browserContentSourceMap.set(browser, source); } // _browserNewtabSessionMap is a map of the newtab session id for particular // browsers. _browserNewtabSessionMap = new WeakMap(); constructor() { this._contentHandler = new ContentHandler({ browserInfoByURL: this._browserInfoByURL, findBrowserItemForURL: this._findBrowserItemForURL.bind(this), checkURLForSerpMatch: this._checkURLForSerpMatch.bind(this), findItemForBrowser: this.findItemForBrowser.bind(this), urlIsKnownSERPSubframe: this.urlIsKnownSERPSubframe.bind(this), }); } /** * Initializes the TelemetryHandler and its ContentHandler. It will add * appropriate listeners to the window so that window opening and closing * can be tracked. */ async init() { if (this._initialized) { return; } this._telemetrySettings = lazy.RemoteSettings(TELEMETRY_SETTINGS_KEY); let rawProviderInfo = []; try { rawProviderInfo = await this._telemetrySettings.get(); } catch (ex) { lazy.logConsole.error("Could not get settings:", ex); } this.#telemetrySettingsSync = event => this.#onSettingsSync(event); this._telemetrySettings.on("sync", this.#telemetrySettingsSync); // Send the provider info to the child handler. this._contentHandler.init(rawProviderInfo); this._originalProviderInfo = rawProviderInfo; // Now convert the regexps into this._setSearchProviderInfo(rawProviderInfo); for (let win of Services.wm.getEnumerator("navigator:browser")) { this._registerWindow(win); } Services.wm.addListener(this); this._initialized = true; } async #onSettingsSync(event) { let current = event.data?.current; if (current) { lazy.logConsole.debug( "Update provider info due to Remote Settings sync." ); this._originalProviderInfo = current; this._setSearchProviderInfo(current); Services.ppmm.sharedData.set( SEARCH_TELEMETRY_SHARED.PROVIDER_INFO, current ); Services.ppmm.sharedData.flush(); } else { lazy.logConsole.debug( "Ignoring Remote Settings sync data due to missing records." ); } Services.obs.notifyObservers(null, "search-telemetry-v2-synced"); } /** * Uninitializes the TelemetryHandler and its ContentHandler. */ uninit() { if (!this._initialized) { return; } this._contentHandler.uninit(); for (let win of Services.wm.getEnumerator("navigator:browser")) { this._unregisterWindow(win); } Services.wm.removeListener(this); try { this._telemetrySettings.off("sync", this.#telemetrySettingsSync); } catch (ex) { lazy.logConsole.error( "Failed to shutdown SearchSERPTelemetry Remote Settings.", ex ); } this._telemetrySettings = null; this.#telemetrySettingsSync = null; this._initialized = false; } /** * Records the search source for particular browsers, in case it needs * to be associated with a SERP. * * @param {browser} browser * The browser where the search originated. * @param {string} source * Where the search originated from. */ recordBrowserSource(browser, source) { this._browserSourceMap.set(browser, source); } /** * Records the newtab source for particular browsers, in case it needs * to be associated with a SERP. * * @param {browser} browser * The browser where the search originated. * @param {string} newtabSessionId * The sessionId of the newtab session the search originated from. */ recordBrowserNewtabSession(browser, newtabSessionId) { this._browserNewtabSessionMap.set(browser, newtabSessionId); } /** * Helper function for recording the reason for a Glean abandonment event. * * @param {string} impressionId * The impression id for the abandonment event about to be recorded. * @param {string} reason * The reason the SERP is deemed abandoned. * One of SearchSERPTelemetryUtils.ABANDONMENTS. */ recordAbandonmentTelemetry(impressionId, reason) { impressionIdsWithoutEngagementsSet.delete(impressionId); lazy.logConsole.debug( `Recording an abandonment event for impression id ${impressionId} with reason: ${reason}` ); Glean.serp.abandonment.record({ impression_id: impressionId, reason, }); } /** * Handles the TabClose event received from the listeners. * * @param {object} event * The event object provided by the listener. */ handleEvent(event) { if (event.type != "TabClose") { console.error("Received unexpected event type", event.type); return; } this._browserNewtabSessionMap.delete(event.target.linkedBrowser); this.stopTrackingBrowser( event.target.linkedBrowser, SearchSERPTelemetryUtils.ABANDONMENTS.TAB_CLOSE ); } /** * Test-only function, used to override the provider information, so that * unit tests can set it to easy to test values. * * @param {Array} providerInfo * See {@link https://searchfox.org/mozilla-central/search?q=search-telemetry-v2-schema.json} * for type information. */ overrideSearchTelemetryForTests(providerInfo) { let info = providerInfo ? providerInfo : this._originalProviderInfo; this._contentHandler.overrideSearchTelemetryForTests(info); this._setSearchProviderInfo(info); } /** * Used to set the local version of the search provider information. * This automatically maps the regexps to RegExp objects so that * we don't have to create a new instance each time. * * @param {Array} providerInfo * A raw array of provider information to set. */ _setSearchProviderInfo(providerInfo) { this.#subframeRegexps = []; this._searchProviderInfo = providerInfo.map(provider => { let newProvider = { ...provider, searchPageRegexp: new RegExp(provider.searchPageRegexp), }; if (provider.extraAdServersRegexps) { newProvider.extraAdServersRegexps = provider.extraAdServersRegexps.map( r => new RegExp(r) ); } newProvider.ignoreLinkRegexps = provider.ignoreLinkRegexps?.length ? provider.ignoreLinkRegexps.map(r => new RegExp(r)) : []; newProvider.nonAdsLinkRegexps = provider.nonAdsLinkRegexps?.length ? provider.nonAdsLinkRegexps.map(r => new RegExp(r)) : []; if (provider.shoppingTab?.regexp) { newProvider.shoppingTab = { selector: provider.shoppingTab.selector, regexp: new RegExp(provider.shoppingTab.regexp), }; } newProvider.nonAdsLinkQueryParamNames = provider.nonAdsLinkQueryParamNames ?? []; newProvider.subframes = provider.subframes?.map(obj => { let regexp = new RegExp(obj.regexp); // Also add the Regexp to the list of urls to observe. this.#subframeRegexps.push(regexp); return { ...obj, regexp }; }) ?? []; return newProvider; }); this._contentHandler._searchProviderInfo = this._searchProviderInfo; } reportPageAction(info, browser) { this._contentHandler._reportPageAction(info, browser); } reportPageWithAds(info, browser) { this._contentHandler._reportPageWithAds(info, browser); } reportPageWithAdImpressions(info, browser) { this._contentHandler._reportPageWithAdImpressions(info, browser); } async reportPageDomains(info, browser) { await this._contentHandler._reportPageDomains(info, browser); } reportPageImpression(info, browser) { this._contentHandler._reportPageImpression(info, browser); } /** * This may start tracking a tab based on the URL. If the URL matches a search * partner, and it has a code, then we'll start tracking it. This will aid * determining if it is a page we should be tracking for adverts. * * @param {object} browser * The browser associated with the page. * @param {string} url * The url that was loaded in the browser. * @param {nsIDocShell.LoadCommand} loadType * The load type associated with the page load. */ updateTrackingStatus(browser, url, loadType) { if ( !lazy.BrowserSearchTelemetry.shouldRecordSearchCount( browser.getTabBrowser() ) ) { return; } let info = this._checkURLForSerpMatch(url); if (!info) { this._browserNewtabSessionMap.delete(browser); this.stopTrackingBrowser(browser); return; } let source = "unknown"; if (loadType & Ci.nsIDocShell.LOAD_CMD_RELOAD) { source = "reload"; } else if (loadType & Ci.nsIDocShell.LOAD_CMD_HISTORY) { source = "tabhistory"; } else if (this._browserSourceMap.has(browser)) { source = this._browserSourceMap.get(browser); this._browserSourceMap.delete(browser); } let newtabSessionId; if (this._browserNewtabSessionMap.has(browser)) { newtabSessionId = this._browserNewtabSessionMap.get(browser); // We leave the newtabSessionId in the map for this browser // until we stop loading SERP pages or the tab is closed. } // Generate metadata for the SERP impression. let { impressionId, impressionInfo } = this._generateImpressionInfo( browser, url, info, source ); this._reportSerpPage(info, source, url); // For single page apps, we store the page by its original URI so the // network observers can recover the browser in a context when they only // have access to the originURL. let urlKey = info.isSPA && browser.originalURI?.spec ? browser.originalURI.spec : url; let item = this._browserInfoByURL.get(urlKey); if (item) { item.browserTelemetryStateMap.set(browser, { adsReported: false, adImpressionsReported: false, impressionId, urlToComponentMap: null, impressionInfo, searchBoxSubmitted: false, categorizationInfo: null, adsClicked: 0, adsHidden: 0, adsLoaded: 0, adsVisible: 0, searchQuery: info.searchQuery, }); item.count++; item.source = source; item.newtabSessionId = newtabSessionId; } else { item = { browserTelemetryStateMap: new WeakMap().set(browser, { adsReported: false, adImpressionsReported: false, impressionId, urlToComponentMap: null, impressionInfo, searchBoxSubmitted: false, categorizationInfo: null, adsClicked: 0, adsHidden: 0, adsLoaded: 0, adsVisible: 0, searchQuery: info.searchQuery, }), info, count: 1, source, newtabSessionId, majorVersion: parseInt(Services.appinfo.version), channel: lazy.SearchUtils.MODIFIED_APP_CHANNEL, region: lazy.Region.home, isSPA: info.isSPA, }; // For single page apps, we store the page by its original URI so that // network observers can recover the browser in a context when they only // have the originURL to work with. this._browserInfoByURL.set(urlKey, item); } this.#browserToItemMap.set(browser, item); } /** * Determines whether or not a browser should be untracked or tracked for * SERPs who have single page app behaviour. * * The over-arching logic: * 1. Only inspect the browser if the url matches a SERP that is a SPA. * 2. Recording an engagement if we're tracking the browser and we're going * to another page. * 3. Untrack the browser if we're tracking it and switching pages. * 4. Track the browser if we're now on a default search page. * * @param {BrowserElement} browser * The browser element related to the request. * @param {string} url * The url of the request. * @param {number} loadType * The loadtype of a the request. */ async updateTrackingSinglePageApp(browser, url, loadType) { let providerInfo = this._getProviderInfoForURL(url); if (!providerInfo?.isSPA) { return; } let item = this.findItemForBrowser(browser); let telemetryState = item?.browserTelemetryStateMap.get(browser); let previousSearchTerm = telemetryState?.searchQuery ?? ""; let searchTerm = this.urlSearchTerms(url, providerInfo); let searchTermChanged = previousSearchTerm !== searchTerm; let isSerp = !!this._checkURLForSerpMatch(url); let browserIsTracked = !!telemetryState; let isTabHistory = loadType & Ci.nsIDocShell.LOAD_CMD_HISTORY; // Step 2: Maybe record engagement. if (browserIsTracked && !isTabHistory && (searchTermChanged || !isSerp)) { // If we've established we've changed to another SERP, the cause could be // from a submission event inside the content process. The event is // sent to the parent and stored as `telemetryState.searchBoxSubmitted` // but if we check now, it may be too early. Instead, we check with the // content process directly to see if it recorded a submit event. let actor = browser.browsingContext.currentWindowGlobal.getActor( "SearchSERPTelemetry" ); let didSubmit = await actor.sendQuery("SearchSERPTelemetry:DidSubmit"); if (telemetryState && !telemetryState.searchBoxSubmitted && !didSubmit) { impressionIdsWithoutEngagementsSet.delete(telemetryState.impressionId); Glean.serp.engagement.record({ impression_id: telemetryState.impressionId, action: SearchSERPTelemetryUtils.ACTIONS.CLICKED, target: SearchSERPTelemetryUtils.COMPONENTS.NON_ADS_LINK, }); lazy.logConsole.debug("Counting click:", { impressionId: telemetryState.impressionId, type: SearchSERPTelemetryUtils.COMPONENTS.NON_ADS_LINK, URL: url, }); } } // Step 3: Maybe untrack the browser. if (browserIsTracked && (searchTermChanged || !isSerp)) { let reason = ""; // If we have to untrack it, it might be due to the user using the // back/forward button. if (isTabHistory) { reason = SearchSERPTelemetryUtils.ABANDONMENTS.NAVIGATION; } let actor = browser.browsingContext.currentWindowGlobal.getActor( "SearchSERPTelemetry" ); actor.sendAsyncMessage("SearchSERPTelemetry:StopTrackingDocument"); this.stopTrackingBrowser(browser, reason); browserIsTracked = false; } // Step 4: Maybe track the browser. if (isSerp && !browserIsTracked) { this.updateTrackingStatus(browser, url, loadType); let actor = browser.browsingContext.currentWindowGlobal.getActor( "SearchSERPTelemetry" ); actor.sendAsyncMessage("SearchSERPTelemetry:WaitForSPAPageLoad"); } } /** * Stops tracking of a tab, for example the tab has loaded a different URL. * Also records a Glean abandonment event if appropriate. * * @param {object} browser The browser associated with the tab to stop being * tracked. * @param {string} [abandonmentReason] * An optional parameter that specifies why the browser is deemed abandoned. * The reason will be recorded as part of Glean abandonment telemetry. * One of SearchSERPTelemetryUtils.ABANDONMENTS. */ stopTrackingBrowser(browser, abandonmentReason) { for (let [url, item] of this._browserInfoByURL) { if (item.browserTelemetryStateMap.has(browser)) { let telemetryState = item.browserTelemetryStateMap.get(browser); let impressionId = telemetryState.impressionId; if (impressionIdsWithoutEngagementsSet.has(impressionId)) { this.recordAbandonmentTelemetry(impressionId, abandonmentReason); } if ( lazy.SERPCategorization.enabled && telemetryState.categorizationInfo ) { lazy.SERPCategorizationEventScheduler.sendCallback(browser); } item.browserTelemetryStateMap.delete(browser); item.count--; } if (!item.count) { this._browserInfoByURL.delete(url); } } this.#browserToItemMap.delete(browser); } /** * Calculate how close two urls are in equality. * * The scoring system: * - If the URLs look exactly the same, including the ordering of query * parameters, the score is Infinity. * - If the origin is the same, the score is increased by 1. Otherwise the * score is 0. * - If the path is the same, the score is increased by 1. * - For each query parameter, if the key exists the score is increased by 1. * Likewise if the query parameter values match. * - If the hash is the same, the score is increased by 1. This includes if * the hash is missing in both URLs. * * @param {URL} url1 * Url to compare. * @param {URL} url2 * Other url to compare. Ordering shouldn't matter. * @param {object} [matchOptions] * Options for checking equality. * @param {boolean} [matchOptions.path] * Whether the path must match. Default to false. * @param {boolean} [matchOptions.paramValues] * Whether the values of the query parameters must match if the query * parameter key exists in the other. Defaults to false. * @returns {number} * A score of how closely the two URLs match. Returns 0 if there is no * match or the equality check failed for an enabled match option. */ compareUrls(url1, url2, matchOptions = {}) { // In case of an exact match, well, that's an obvious winner. if (url1.href == url2.href) { return Infinity; } // Each step we get closer to the two URLs being the same, we increase the // score. The consumer of this method will use these scores to see which // of the URLs is the best match. let score = 0; if (url1.origin == url2.origin) { ++score; if (url1.pathname == url2.pathname) { ++score; for (let [key1, value1] of url1.searchParams) { // Let's not fuss about the ordering of search params, since the // score effect will solve that. if (url2.searchParams.has(key1)) { ++score; if (url2.searchParams.get(key1) == value1) { ++score; } else if (matchOptions.paramValues) { return 0; } } } if (url1.hash == url2.hash) { ++score; } } else if (matchOptions.path) { return 0; } } return score; } /** * Extracts the search terms from the URL based on the provider info. * * @param {string} url * The URL to inspect. * @param {object} providerInfo * The providerInfo associated with the URL. * @returns {string} * The search term or if none is found, a blank string. */ urlSearchTerms(url, providerInfo) { if (providerInfo?.queryParamNames?.length) { let { searchParams } = new URL(url); for (let queryParamName of providerInfo.queryParamNames) { let value = searchParams.get(queryParamName); if (value) { return value; } } } return ""; } /** * Finds any SERP data associated with the given browser. * * @param {object} browser * @returns {object} */ findItemForBrowser(browser) { return this.#browserToItemMap.get(browser); } /** * Parts of the URL, like search params and hashes, may be mutated by scripts * on a page we're tracking. Since we don't want to keep track of that * ourselves in order to keep the list of browser objects a weak-referenced * set, we do optional fuzzy matching of URLs to fetch the most relevant item * that contains tracking information. * * @param {string} urlString URL to fetch the tracking data for. * @returns {object} Map containing the following members: * - {WeakMap} browsers * Map of browser elements that belong to `url` and their ad report state. * - {object} info * Info dictionary as returned by `_checkURLForSerpMatch`. * - {number} count * The number of browser element we can most accurately tell we're * tracking, since they're inside a WeakMap. */ _findBrowserItemForURL(urlString) { let url = URL.parse(urlString); if (!url) { return null; } let item; let currentBestMatch = 0; for (let [trackingURL, candidateItem] of this._browserInfoByURL) { if (currentBestMatch === Infinity) { break; } // Make sure to cache the parsed URL object, since there's no reason to // do it twice. trackingURL = candidateItem._trackingURL || (candidateItem._trackingURL = URL.parse(trackingURL)); if (!trackingURL) { continue; } let score = this.compareUrls(url, trackingURL); if (score > currentBestMatch) { item = candidateItem; currentBestMatch = score; } } return item; } // nsIWindowMediatorListener /** * This is called when a new window is opened, and handles registration of * that window if it is a browser window. * * @param {nsIAppWindow} appWin The xul window that was opened. */ onOpenWindow(appWin) { let win = appWin.docShell.domWindow; win.addEventListener( "load", () => { if ( win.document.documentElement.getAttribute("windowtype") != "navigator:browser" ) { return; } this._registerWindow(win); }, { once: true } ); } /** * Listener that is called when a window is closed, and handles deregistration of * that window if it is a browser window. * * @param {nsIAppWindow} appWin The xul window that was closed. */ onCloseWindow(appWin) { let win = appWin.docShell.domWindow; if ( win.document.documentElement.getAttribute("windowtype") != "navigator:browser" ) { return; } this._unregisterWindow(win); } /** * Determines if a URL to be in this SERP's subframes. * * @param {string} url */ urlIsKnownSERPSubframe(url) { if (url) { for (let regexp of this.#subframeRegexps) { if (regexp.test(url)) { return true; } } } return false; } /** * Adds event listeners for the window and registers it with the content handler. * * @param {object} win The window to register. */ _registerWindow(win) { win.gBrowser.tabContainer.addEventListener("TabClose", this); } /** * Removes event listeners for the window and unregisters it with the content * handler. * * @param {object} win The window to unregister. */ _unregisterWindow(win) { for (let tab of win.gBrowser.tabs) { this.stopTrackingBrowser( tab.linkedBrowser, SearchSERPTelemetryUtils.ABANDONMENTS.WINDOW_CLOSE ); } win.gBrowser.tabContainer.removeEventListener("TabClose", this); } /** * Searches for provider information for a given url. * * @param {string} url * The url to match for a provider. * @returns {?ProviderInfo} * Returns a provider or undefined if no provider was found for the url. */ _getProviderInfoForURL(url) { return this._searchProviderInfo.find(info => info.searchPageRegexp.test(url) ); } /** * Checks to see if a url is a search partner location, and determines the * provider and codes used. * * @param {string} url The url to match. * @returns {null|object} Returns null if there is no match found. Otherwise, * returns an object of strings for provider, code, type, whether it's a * single page app, and the search query used. */ _checkURLForSerpMatch(url) { let searchProviderInfo = this._getProviderInfoForURL(url); if (!searchProviderInfo) { return null; } let queries = new URL(url).searchParams; queries.forEach((v, k) => { queries.set(k.toLowerCase(), v); }); let isSPA = !!searchProviderInfo.isSPA; if (isSPA) { // A URL may have a specific query parameter denoting a search page. // If the key was expected but doesn't currently exist, it could be due to // the initial url containing it until after a page load. // In that case, ignore this check since most SERPs missing the query // param will go to the default search page. let { key, value } = searchProviderInfo.defaultPageQueryParam; if (key && queries.has(key) && queries.get(key) != value) { return null; } } // Some URLs can match provider info but also be the provider's homepage // instead of a SERP. // e.g. https://example.com/ vs. https://example.com/?foo=bar // Look for the presence of the query parameter that contains a search term. let hasQuery = false; let searchQuery = ""; for (let queryParamName of searchProviderInfo.queryParamNames) { searchQuery = queries.get(queryParamName); if (searchQuery) { hasQuery = true; break; } } if (!hasQuery) { return null; } // Default to organic to simplify things. // We override type in the sap cases. let type = "organic"; let code; if (searchProviderInfo.codeParamName) { code = queries.get(searchProviderInfo.codeParamName.toLowerCase()); if (code) { // The code is only included if it matches one of the specific ones. if (searchProviderInfo.taggedCodes.includes(code)) { type = "tagged"; if ( searchProviderInfo.followOnParamNames && searchProviderInfo.followOnParamNames.some(p => queries.has(p)) ) { type += "-follow-on"; } } else if (searchProviderInfo.organicCodes.includes(code)) { type = "organic"; } else if (searchProviderInfo.expectedOrganicCodes?.includes(code)) { code = "none"; } else { code = "other"; } } else if (searchProviderInfo.followOnCookies) { // Especially Bing requires lots of extra work related to cookies. for (let followOnCookie of searchProviderInfo.followOnCookies) { if (followOnCookie.extraCodeParamName) { let eCode = queries.get( followOnCookie.extraCodeParamName.toLowerCase() ); if ( !eCode || !followOnCookie.extraCodePrefixes.some(p => eCode.startsWith(p)) ) { continue; } } // If this cookie is present, it's probably an SAP follow-on. // This might be an organic follow-on in the same session, but there // is no way to tell the difference. for (let cookie of Services.cookies.getCookiesFromHost( followOnCookie.host, {} )) { if (cookie.name != followOnCookie.name) { continue; } // Cookie values may take the form of "foo=bar&baz=1". let cookieItems = cookie.value ?.split("&") .map(p => p.split("=")) .filter(p => p[0] == followOnCookie.codeParamName); if (cookieItems.length == 1) { let cookieValue = cookieItems[0][1]; if (searchProviderInfo.taggedCodes.includes(cookieValue)) { type = "tagged-follow-on"; code = cookieValue; break; } } } } } } return { provider: searchProviderInfo.telemetryId, type, code, searchQuery, isSPA, }; } /** * Logs telemetry for a search provider visit. * * @param {object} info The search provider information. * @param {string} info.provider The name of the provider. * @param {string} info.type The type of search. * @param {string} [info.code] The code for the provider. * @param {string} source Where the search originated from. * @param {string} url The url that was matched (for debug logging only). */ _reportSerpPage(info, source, url) { let payload = `${info.provider}:${info.type}:${info.code || "none"}`; let name = source.replace(/_([a-z])/g, (m, p) => p.toUpperCase()); Glean.browserSearchContent[name][payload].add(1); lazy.logConsole.debug("Impression:", payload, url); } /** * @typedef {object} ImpressionInfo * @property {string} provider The name of the provider for the impression. * @property {boolean} tagged Whether the search has partner tags. * @property {string} source The search access point. * @property {boolean} isShoppingPage Whether the page is shopping. * @property {boolean} isPrivate Whether the SERP is in a private tab. * @property {boolean} isSignedIn Whether the user is signed on to the SERP. */ /** * @typedef {object} ImpressionInfoResult * @property {string | null} impressionId The unique id of the impression. * @property {ImpressionInfo | null} impressionInfo General impresison info. */ /** * If applicable for a tracked SERP provider, generates a unique id and * caches information that shouldn't be changed during the lifetime of the * impression. * * @param {browser} browser * The browser associated with the SERP. * @param {string} url * The URL of the SERP. * @param {object} info * General information about the tracked SERP. * @param {string} source * The originator of the SERP load. * @returns {ImpressionInfoResult} The result when attempting to generate * impression info. */ _generateImpressionInfo(browser, url, info, source) { let searchProviderInfo = this._getProviderInfoForURL(url); let data = { impressionId: null, impressionInfo: null, }; if (!searchProviderInfo?.components?.length) { return data; } // The UUID generated by Services.uuid contains leading and trailing braces. // Need to trim them first. data.impressionId = Services.uuid.generateUUID().toString().slice(1, -1); impressionIdsWithoutEngagementsSet.add(data.impressionId); // If it's a SERP but doesn't have a browser source, the source might be // from something that happened in content. if (this.#browserContentSourceMap.has(browser)) { source = this.#browserContentSourceMap.get(browser); this.#browserContentSourceMap.delete(browser); } let partnerCode = ""; if (info.code != "none" && info.code != null) { partnerCode = info.code; } let isShoppingPage = false; if (searchProviderInfo.shoppingTab?.regexp) { isShoppingPage = searchProviderInfo.shoppingTab.regexp.test(url); } let isPrivate = browser.contentPrincipal.originAttributes.privateBrowsingId > 0; let isSignedIn = false; // Signed-in status should not be recorded when the client is in a private // window. if (!isPrivate && searchProviderInfo.signedInCookies) { isSignedIn = searchProviderInfo.signedInCookies.some(cookieObj => { return Services.cookies .getCookiesFromHost( cookieObj.host, browser.contentPrincipal.originAttributes ) .some(c => c.name == cookieObj.name); }); } data.impressionInfo = { provider: info.provider, tagged: info.type.startsWith("tagged"), partnerCode, source, isShoppingPage, isPrivate, isSignedIn, }; return data; } } /** * ContentHandler deals with handling telemetry of the content within a tab - * when ads detected and when they are selected. */ class ContentHandler { /** @type {ProviderInfo[]} */ _searchProviderInfo = null; /** * Constructor. * * @param {object} options * The options for the handler. * @param {Map} options.browserInfoByURL * The map of urls from TelemetryHandler. * @param {(urlString: string) => object} options.findBrowserItemForURL * The function for finding a browser item for the URL. * @param {(url: string) => null|object} options.checkURLForSerpMatch * The function for checking a URL for a SERP match. * @param {(browser: object) => object} options.findItemForBrowser * The function for finding an item for the browser. * @param {(url: string) => boolean} options.urlIsKnownSERPSubframe * The function for determining if a URL is a known SERP sub frame. */ constructor(options) { this._browserInfoByURL = options.browserInfoByURL; this._findBrowserItemForURL = options.findBrowserItemForURL; this._checkURLForSerpMatch = options.checkURLForSerpMatch; this._findItemForBrowser = options.findItemForBrowser; this._urlIsKnownSERPSubframe = options.urlIsKnownSERPSubframe; } /** * Initializes the content handler. This will also set up the shared data that is * shared with the SearchTelemetryChild actor. * * @param {Array} providerInfo * The provider information for the search telemetry to record. */ init(providerInfo) { Services.ppmm.sharedData.set( SEARCH_TELEMETRY_SHARED.PROVIDER_INFO, providerInfo ); Services.ppmm.sharedData.set( SEARCH_TELEMETRY_SHARED.LOAD_TIMEOUT, ADLINK_CHECK_TIMEOUT_MS ); Services.ppmm.sharedData.set( SEARCH_TELEMETRY_SHARED.SPA_LOAD_TIMEOUT, SPA_ADLINK_CHECK_TIMEOUT_MS ); Services.obs.addObserver(this, "http-on-examine-response"); Services.obs.addObserver(this, "http-on-examine-cached-response"); } /** * Uninitializes the content handler. */ uninit() { Services.obs.removeObserver(this, "http-on-examine-response"); Services.obs.removeObserver(this, "http-on-examine-cached-response"); } /** * Test-only function to override the search provider information for use * with tests. Passes it to the SearchTelemetryChild actor. * * @param {object} providerInfo @see SEARCH_PROVIDER_INFO for type information. */ overrideSearchTelemetryForTests(providerInfo) { Services.ppmm.sharedData.set("SearchTelemetry:ProviderInfo", providerInfo); } observe(aSubject, aTopic) { switch (aTopic) { case "http-on-examine-response": case "http-on-examine-cached-response": this.observeActivity(aSubject); break; } } /** * Listener that observes network activity, so that we can determine if a link * from a search provider page was followed, and if then if that link was an * ad click or not. * * @param {nsIChannel} channel The channel that generated the activity. */ observeActivity(channel) { if (!(channel instanceof Ci.nsIChannel)) { return; } let wrappedChannel = ChannelWrapper.get(channel); // The channel we're observing might be a redirect of a channel we've // observed before. if (wrappedChannel._adClickRecorded) { lazy.logConsole.debug("Ad click already recorded"); return; } Services.tm.dispatchToMainThread(() => { // We suspect that No Content (204) responses are used to transfer or // update beacons. They used to lead to double-counting ad-clicks, so let's // ignore them. if (wrappedChannel.statusCode == 204) { lazy.logConsole.debug("Ignoring activity from ambiguous responses"); return; } // The wrapper is consistent across redirects, so we can use it to track state. let originURL = wrappedChannel.originURI && wrappedChannel.originURI.spec; if (!originURL) { return; } let eligibleSubframeUrl = this.#getSerpUrlFromPossibleSubframeUrl( originURL, wrappedChannel ); let item = this._findBrowserItemForURL(eligibleSubframeUrl || originURL); if (!item) { return; } let url = wrappedChannel.finalURL; let providerInfo = item.info.provider; let info = this._searchProviderInfo.find(provider => { return provider.telemetryId == providerInfo; }); // If an error occurs with Glean SERP telemetry logic, avoid // disrupting legacy telemetry. try { this.#maybeRecordSERPTelemetry(wrappedChannel, item, info); } catch (ex) { lazy.logConsole.error(ex); } if (!info.extraAdServersRegexps?.some(regex => regex.test(url))) { return; } try { let name = item.source.replace(/_([a-z])/g, (m, p) => p.toUpperCase()); Glean.browserSearchAdclicks[name][ `${info.telemetryId}:${item.info.type}` ].add(1); wrappedChannel._adClickRecorded = true; if (item.newtabSessionId) { Glean.newtabSearchAd.click.record({ newtab_visit_id: item.newtabSessionId, search_access_point: item.source, is_follow_on: item.info.type.endsWith("follow-on"), is_tagged: item.info.type.startsWith("tagged"), telemetry_id: item.info.provider, }); } lazy.logConsole.debug("Counting ad click in page for:", { source: item.source, originURL, URL: url, }); } catch (e) { console.error(e); } }); } /** * Checks if a request should record an ad click if it can be traced to a * browser containing an observed SERP. * * @param {ChannelWrapper} wrappedChannel * The wrapped channel. * @param {object} item * The browser item associated with the origin URL of the request. * @param {object} info * The search provider info associated with the item. */ #maybeRecordSERPTelemetry(wrappedChannel, item, info) { if (wrappedChannel._recordedClick) { lazy.logConsole.debug("Click already recorded."); return; } let originURL = wrappedChannel.originURI?.spec; let url = wrappedChannel.finalURL; if (info.ignoreLinkRegexps.some(r => r.test(url))) { lazy.logConsole.debug("Ignore url."); return; } // Some channels re-direct by loading pages that return 200. The result // is the channel will have an originURL that changes from the SERP to // either a nonAdsRegexp or an extraAdServersRegexps. This is typical // for loading a page in a new tab. The channel will have changed so any // properties attached to them to record state (e.g. _recordedClick) // won't be present. if ( info.nonAdsLinkRegexps.some(r => r.test(originURL)) || info.extraAdServersRegexps.some(r => r.test(originURL)) ) { lazy.logConsole.debug("Expecting redirect."); return; } // A click event is recorded if a user loads a resource from an // originURL that is a SERP. // // Typically, we only want top level loads containing documents to avoid // recording any event on an in-page resource a SERP might load // (e.g. CSS files). // // The exception to this is if a subframe loads a resource that matches // a non ad link. Some SERPs encode non ad search results with a URL // that gets loaded into an iframe, which then tells the container of // the iframe to change the location of the page. if ( wrappedChannel.channel.isDocument && (wrappedChannel.channel.loadInfo.isTopLevelLoad || info.nonAdsLinkRegexps.some(r => r.test(url))) ) { let browser = wrappedChannel.browserElement; // If the load is from history, don't record an event. if ( browser?.browsingContext.webProgress?.loadType & Ci.nsIDocShell.LOAD_CMD_HISTORY ) { lazy.logConsole.debug("Ignoring load from history"); return; } // Step 1: Check if the browser associated with the request was a // tracked SERP. let start = Cu.now(); let telemetryState; let isFromNewtab = false; if (item.browserTelemetryStateMap.has(browser)) { // If the map contains the browser, then it means that the request is // the SERP is going from one page to another. We know this because // previous conditions prevent non-top level loads from occuring here. telemetryState = item.browserTelemetryStateMap.get(browser); } else if (browser) { // Alternatively, it could be the case that the request is occuring in // a new tab but was triggered by one of the browsers in the state map. // If only one browser exists in the state map, it must be that one. if (item.count === 1) { let sourceBrowsers = ChromeUtils.nondeterministicGetWeakMapKeys( item.browserTelemetryStateMap ); if (sourceBrowsers?.length) { telemetryState = item.browserTelemetryStateMap.get( sourceBrowsers[0] ); } } else if (item.count > 1) { // If the count is more than 1, then multiple open SERPs contain the // same search term, so try to find the specific browser that opened // the request. let tabBrowser = browser.getTabBrowser(); let tab = tabBrowser.getTabForBrowser(browser).openerTab; // A tab will not always have an openerTab, as first tabs in new // windows don't have an openerTab. // Bug 1867582: We should also handle the case where multiple tabs // contain the same search term. if (tab) { telemetryState = item.browserTelemetryStateMap.get( tab.linkedBrowser ); } } if (telemetryState) { isFromNewtab = true; } } lazy.logConsole.debug("Telemetry state:", telemetryState); // Step 2: If we have telemetryState, the browser object must be // associated with another browser that is tracked. Try to find the // component type on the SERP responsible for the request. // Exceptions: // - If a searchbox was used to initiate the load, don't record another // engagement because the event was logged elsewhere. // - If the ad impression hasn't been recorded yet, we have no way of // knowing precisely what kind of component was selected. let isSerp = false; if ( telemetryState && telemetryState.adImpressionsReported && !telemetryState.searchBoxSubmitted ) { if (info.searchPageRegexp?.test(originURL)) { isSerp = true; } let startFindComponent = Cu.now(); let parsedUrl = new URL(url); // Organic links may contain query param values mapped to links shown // on the SERP at page load. If a stored component depends on that // value, we need to be able to recover it or else we'll always consider // it a non_ads_link. if ( info.nonAdsLinkQueryParamNames.length && info.nonAdsLinkRegexps.some(r => r.test(url)) ) { for (let key of info.nonAdsLinkQueryParamNames) { let paramValue = parsedUrl.searchParams.get(key); if (paramValue) { let newParsedUrl = /^https?:\/\//.test(paramValue) ? URL.parse(paramValue) : URL.parse(paramValue, parsedUrl.origin); if (newParsedUrl) { parsedUrl = newParsedUrl; break; } } } } // Determine the component type of the link. let type; for (let [ storedUrl, componentType, ] of telemetryState.urlToComponentMap.entries()) { // The URL we're navigating to may have more query parameters if // the provider adds query parameters when the user clicks on a link. // On the other hand, the URL we are navigating to may have have // fewer query parameters because of query param stripping. // Thus, if a query parameter is missing, a match can still be made // provided keys that exist in both URLs contain equal values. let score = SearchSERPTelemetry.compareUrls(storedUrl, parsedUrl, { paramValues: true, path: true, }); if (score) { type = componentType; break; } } ChromeUtils.addProfilerMarker( "SearchSERPTelemetry._observeActivity", startFindComponent, "Find component for URL" ); // If no component was found, it's possible the link was added after // components were categorized. if (!type) { let isAd = info.extraAdServersRegexps?.some(regex => regex.test(url)); type = isAd ? SearchSERPTelemetryUtils.COMPONENTS.AD_UNCATEGORIZED : SearchSERPTelemetryUtils.COMPONENTS.NON_ADS_LINK; } if ( type == SearchSERPTelemetryUtils.COMPONENTS.REFINED_SEARCH_BUTTONS ) { SearchSERPTelemetry.setBrowserContentSource( browser, SearchSERPTelemetryUtils.INCONTENT_SOURCES.REFINE_ON_SERP ); } else if (isSerp && isFromNewtab) { SearchSERPTelemetry.setBrowserContentSource( browser, SearchSERPTelemetryUtils.INCONTENT_SOURCES.OPENED_IN_NEW_TAB ); } // Step 3: Record the engagement. impressionIdsWithoutEngagementsSet.delete(telemetryState.impressionId); if (AD_COMPONENTS.includes(type)) { telemetryState.adsClicked += 1; } Glean.serp.engagement.record({ impression_id: telemetryState.impressionId, action: SearchSERPTelemetryUtils.ACTIONS.CLICKED, target: type, }); lazy.logConsole.debug("Counting click:", { impressionId: telemetryState.impressionId, type, URL: url, }); // Prevent re-directed channels from being examined more than once. wrappedChannel._recordedClick = true; } ChromeUtils.addProfilerMarker( "SearchSERPTelemetry._observeActivity", start, "Maybe record user engagement." ); } } /** * Checks if the url associated with a request is actually coming from a * subframe within a SERP. If so, try to find the best url associated with * the frame. * * @param {string} originURL * The url associated with the request. * @param {ChannelWrapper} wrappedChannel * The wrapped channel. * @returns {string?} * The url associated with the subframe. */ #getSerpUrlFromPossibleSubframeUrl(originURL, wrappedChannel) { if (!this._urlIsKnownSERPSubframe(originURL)) { return null; } // The sponsored link could be opened in a new tab, in which case the // browser URI may not match a SERP. Thus, try to find a tab that contains // a URI matching a SERP. let browser = wrappedChannel.browserElement; if (browser?.currentURI.spec == "about:blank") { let tabBrowser = browser.getTabBrowser(); let tab = tabBrowser.getTabForBrowser(browser).openerTab; if (tab) { return tab.linkedBrowser.currentURI.spec; } // If no opener tab was found, we're likely looking at the first tab of // a new window. As a last resort, check if the window below the newly // opened window contains a tab with a matching SERP. let windows = lazy.BrowserWindowTracker.orderedWindows; let win = windows.at(1); if (win) { let url = win.gBrowser.selectedBrowser.originalURI?.spec; if (url) { return url; } } // If we couldn't find a matching tab or window, then return null to // indicate to the caller we weren't able to find an appropriate SERP. return null; } return browser?.currentURI.spec; } /** * Logs telemetry for a page with adverts, if it is one of the partner search * provider pages that we're tracking. * * @param {object} info * The search provider information for the page. * @param {boolean} info.hasAds * Whether or not the page has adverts. * @param {string} info.url * The url of the page. * @param {object} browser * The browser associated with the page. */ _reportPageWithAds(info, browser) { let item = this._findItemForBrowser(browser); if (!item) { lazy.logConsole.warn( "Expected to report URI for", info.url, "with ads but couldn't find the information" ); return; } let telemetryState = item.browserTelemetryStateMap.get(browser); if (telemetryState.adsReported) { lazy.logConsole.debug( "Ad was previously reported for browser with URI", info.url ); return; } lazy.logConsole.debug( "Counting ads in page for", item.info.provider, item.info.type, item.source, info.url ); let name = item.source.replace(/_([a-z])/g, (m, p) => p.toUpperCase()); Glean.browserSearchWithads[name][ `${item.info.provider}:${item.info.type}` ].add(1); Services.obs.notifyObservers(null, "reported-page-with-ads"); telemetryState.adsReported = true; if (item.newtabSessionId) { Glean.newtabSearchAd.impression.record({ newtab_visit_id: item.newtabSessionId, search_access_point: item.source, is_follow_on: item.info.type.endsWith("follow-on"), is_tagged: item.info.type.startsWith("tagged"), telemetry_id: item.info.provider, }); } } /** * Logs ad impression telemetry for a page with adverts, if it is * one of the partner search provider pages that we're tracking. * * @param {object} info * The search provider information for the page. * @param {string} info.url * The url of the page. * @param {Map} info.adImpressions * A map of ad impressions found for the page, where the key * is the type of ad component and the value is an object * containing the number of ads that were loaded, visible, * and hidden. * @param {Map} info.hrefToComponentMap * A map of hrefs to their component type. Contains both ads * and non-ads. * @param {object} browser * The browser associated with the page. */ _reportPageWithAdImpressions(info, browser) { let item = this._findItemForBrowser(browser); if (!item) { return; } let telemetryState = item.browserTelemetryStateMap.get(browser); if ( info.adImpressions && telemetryState && !telemetryState.adImpressionsReported ) { for (let [componentType, data] of info.adImpressions.entries()) { // Not all ad impressions are sponsored. if (AD_COMPONENTS.includes(componentType)) { telemetryState.adsHidden += data.adsHidden; telemetryState.adsLoaded += data.adsLoaded; telemetryState.adsVisible += data.adsVisible; } lazy.logConsole.debug("Counting ad:", { type: componentType, ...data }); Glean.serp.adImpression.record({ impression_id: telemetryState.impressionId, component: componentType, ads_loaded: data.adsLoaded, ads_visible: data.adsVisible, ads_hidden: data.adsHidden, }); } // Convert hrefToComponentMap to a urlToComponentMap in order to cache // the query parameters of the href. let urlToComponentMap = new Map(); for (let [href, adType] of info.hrefToComponentMap) { urlToComponentMap.set(new URL(href), adType); } telemetryState.urlToComponentMap = urlToComponentMap; telemetryState.adImpressionsReported = true; Services.obs.notifyObservers(null, "reported-page-with-ad-impressions"); } } /** * Records a page action from a SERP page. Normally, actions are tracked in * parent process by observing network events but some actions are not * possible to detect outside of subscribing to the child process. * * @param {object} info * The search provider infomation for the page. * @param {string} info.target * The target component that was interacted with. * @param {string} info.action * The action taken on the page. * @param {object} browser * The browser associated with the page. */ _reportPageAction(info, browser) { let item = this._findItemForBrowser(browser); if (!item) { return; } let telemetryState = item.browserTelemetryStateMap.get(browser); let impressionId = telemetryState?.impressionId; if (info.target && impressionId) { lazy.logConsole.debug(`Recorded page action:`, { impressionId: telemetryState.impressionId, target: info.target, action: info.action, }); Glean.serp.engagement.record({ impression_id: impressionId, action: info.action, target: info.target, }); impressionIdsWithoutEngagementsSet.delete(impressionId); // In-content searches are not be categorized with a type, so they will // not be picked up in the network processes. if ( info.target == SearchSERPTelemetryUtils.COMPONENTS.INCONTENT_SEARCHBOX && info.action == SearchSERPTelemetryUtils.ACTIONS.SUBMITTED ) { telemetryState.searchBoxSubmitted = true; SearchSERPTelemetry.setBrowserContentSource( browser, SearchSERPTelemetryUtils.INCONTENT_SOURCES.SEARCHBOX ); } Services.obs.notifyObservers(null, "reported-page-with-action"); } else { lazy.logConsole.warn( "Expected to report a", info.action, "engagement but couldn't find an impression id." ); } } _reportPageImpression(info, browser) { let item = this._findItemForBrowser(browser); let telemetryState = item.browserTelemetryStateMap.get(browser); if (!telemetryState?.impressionInfo) { lazy.logConsole.debug( "Could not find telemetry state or impression info." ); return; } let impressionId = telemetryState.impressionId; if (impressionId) { let impressionInfo = telemetryState.impressionInfo; Glean.serp.impression.record({ impression_id: impressionId, provider: impressionInfo.provider, tagged: impressionInfo.tagged, partner_code: impressionInfo.partnerCode, source: impressionInfo.source, shopping_tab_displayed: info.shoppingTabDisplayed, is_shopping_page: impressionInfo.isShoppingPage, is_private: impressionInfo.isPrivate, is_signed_in: impressionInfo.isSignedIn, }); lazy.logConsole.debug(`Reported Impression:`, { impressionId, ...impressionInfo, shoppingTabDisplayed: info.shoppingTabDisplayed, }); Services.obs.notifyObservers(null, "reported-page-with-impression"); } else { lazy.logConsole.debug("Could not find an impression id."); } } /** * Initiates the categorization and reporting of domains extracted from * SERPs. * * @param {object} info * The search provider infomation for the page. * @param {Set} info.nonAdDomains The non-ad domains extracted from the page. * @param {Set} info.adDomains The ad domains extracted from the page. * @param {object} browser * The browser associated with the page. */ async _reportPageDomains(info, browser) { let item = this._findItemForBrowser(browser); let telemetryState = item?.browserTelemetryStateMap.get(browser); if (lazy.SERPCategorization.enabled && telemetryState) { lazy.logConsole.debug("Ad domains:", Array.from(info.adDomains)); lazy.logConsole.debug("Non ad domains:", Array.from(info.nonAdDomains)); let result = await lazy.SERPCategorization.maybeCategorizeSERP( info.nonAdDomains, info.adDomains ); if (result) { telemetryState.categorizationInfo = result; let callback = () => { let impressionInfo = telemetryState.impressionInfo; lazy.SERPCategorizationRecorder.recordCategorizationTelemetry({ ...telemetryState.categorizationInfo, app_version: item.majorVersion, channel: item.channel, region: item.region, partner_code: impressionInfo.partnerCode, provider: impressionInfo.provider, tagged: impressionInfo.tagged, is_shopping_page: impressionInfo.isShoppingPage, num_ads_clicked: telemetryState.adsClicked, num_ads_hidden: telemetryState.adsHidden, num_ads_loaded: telemetryState.adsLoaded, num_ads_visible: telemetryState.adsVisible, }); }; lazy.SERPCategorizationEventScheduler.addCallback(browser, callback); } } Services.obs.notifyObservers( null, "reported-page-with-categorized-domains" ); } } export var SearchSERPTelemetry = new TelemetryHandler();