diff options
Diffstat (limited to 'browser/components/pagedata/PageDataService.sys.mjs')
-rw-r--r-- | browser/components/pagedata/PageDataService.sys.mjs | 680 |
1 files changed, 680 insertions, 0 deletions
diff --git a/browser/components/pagedata/PageDataService.sys.mjs b/browser/components/pagedata/PageDataService.sys.mjs new file mode 100644 index 0000000000..b1ccc19f8d --- /dev/null +++ b/browser/components/pagedata/PageDataService.sys.mjs @@ -0,0 +1,680 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +import { XPCOMUtils } from "resource://gre/modules/XPCOMUtils.sys.mjs"; + +import { EventEmitter } from "resource://gre/modules/EventEmitter.sys.mjs"; + +const lazy = {}; + +ChromeUtils.defineESModuleGetters(lazy, { + E10SUtils: "resource://gre/modules/E10SUtils.sys.mjs", + HiddenFrame: "resource://gre/modules/HiddenFrame.sys.mjs", + PromiseUtils: "resource://gre/modules/PromiseUtils.sys.mjs", +}); + +XPCOMUtils.defineLazyModuleGetters(lazy, { + BrowserWindowTracker: "resource:///modules/BrowserWindowTracker.jsm", +}); + +XPCOMUtils.defineLazyGetter(lazy, "logConsole", function() { + return console.createInstance({ + prefix: "PageData", + maxLogLevel: Services.prefs.getBoolPref("browser.pagedata.log", false) + ? "Debug" + : "Warn", + }); +}); + +XPCOMUtils.defineLazyServiceGetters(lazy, { + idleService: ["@mozilla.org/widget/useridleservice;1", "nsIUserIdleService"], +}); + +XPCOMUtils.defineLazyPreferenceGetter( + lazy, + "fetchIdleTime", + "browser.pagedata.fetchIdleTime", + 300 +); + +const ALLOWED_SCHEMES = ["http", "https", "data", "blob"]; + +const BACKGROUND_WIDTH = 1024; +const BACKGROUND_HEIGHT = 768; + +/** + * Shifts the first element out of the set. + * + * @param {Set<T>} set + * The set containing elements. + * @returns {T | undefined} The first element in the set or undefined if + * there is nothing in the set. + */ +function shift(set) { + let iter = set.values(); + let { value, done } = iter.next(); + + if (done) { + return undefined; + } + + set.delete(value); + return value; +} + +/** + * A manager for hidden browsers. Responsible for creating and destroying a + * hidden frame to hold them. + */ +class HiddenBrowserManager { + /** + * The hidden frame if one has been created. + * + * @type {HiddenFrame | null} + */ + #frame = null; + /** + * The number of hidden browser elements currently in use. + * + * @type {number} + */ + #browsers = 0; + + /** + * Creates and returns a new hidden browser. + * + * @returns {Browser} + */ + async #acquireBrowser() { + this.#browsers++; + if (!this.#frame) { + this.#frame = new lazy.HiddenFrame(); + } + + let frame = await this.#frame.get(); + let doc = frame.document; + let browser = doc.createXULElement("browser"); + browser.setAttribute("remote", "true"); + browser.setAttribute("type", "content"); + browser.setAttribute( + "style", + ` + width: ${BACKGROUND_WIDTH}px; + min-width: ${BACKGROUND_WIDTH}px; + height: ${BACKGROUND_HEIGHT}px; + min-height: ${BACKGROUND_HEIGHT}px; + ` + ); + browser.setAttribute("maychangeremoteness", "true"); + doc.documentElement.appendChild(browser); + + return browser; + } + + /** + * Releases the given hidden browser. + * + * @param {Browser} browser + * The hidden browser element. + */ + #releaseBrowser(browser) { + browser.remove(); + + this.#browsers--; + if (this.#browsers == 0) { + this.#frame.destroy(); + this.#frame = null; + } + } + + /** + * Calls a callback function with a new hidden browser. + * This function will return whatever the callback function returns. + * + * @param {Callback} callback + * The callback function will be called with the browser element and may + * be asynchronous. + * @returns {T} + */ + async withHiddenBrowser(callback) { + let browser = await this.#acquireBrowser(); + try { + return await callback(browser); + } finally { + this.#releaseBrowser(browser); + } + } +} + +/** + * @typedef {object} CacheEntry + * An entry in the page data cache. + * @property {PageData | null} pageData + * The data or null if there is no known data. + * @property {Set} actors + * The actors that maintain an interest in keeping the entry cached. + */ + +/** + * A cache of page data kept in memory. By default any discovered data from + * browsers is kept in memory until the browser element is destroyed but other + * actors may register an interest in keeping an entry alive beyond that. + */ +class PageDataCache { + /** + * The contents of the cache. Keyed on page url. + * + * @type {Map<string, CacheEntry>} + */ + #cache = new Map(); + + /** + * Creates or updates an entry in the cache. If no actor has registered any + * interest in keeping this page's data in memory then this will do nothing. + * + * @param {string} url + * The url of the page. + * @param {PageData|null} pageData + * The current page data for the page. + */ + set(url, pageData) { + let entry = this.#cache.get(url); + + if (entry) { + entry.pageData = pageData; + } + } + + /** + * Gets any cached data for the url. + * + * @param {string} url + * The url of the page. + * @returns {PageData | null} + * The page data if some is known. + */ + get(url) { + let entry = this.#cache.get(url); + return entry?.pageData ?? null; + } + + /** + * Adds a lock to an entry. This can be called before we have discovered the + * data for the url. + * + * @param {object} actor + * Ensures the entry stays in memory until unlocked by this actor. + * @param {string} url + * The url of the page. + */ + lockData(actor, url) { + let entry = this.#cache.get(url); + if (entry) { + entry.actors.add(actor); + } else { + this.#cache.set(url, { + pageData: undefined, + actors: new Set([actor]), + }); + } + } + + /** + * Removes a lock from an entry. + * + * @param {object} actor + * The lock to remove. + * @param {string | undefined} [url] + * The url of the page or undefined to unlock all urls locked by this actor. + */ + unlockData(actor, url) { + let entries = []; + if (url) { + let entry = this.#cache.get(url); + if (!entry) { + return; + } + + entries.push([url, entry]); + } else { + entries = [...this.#cache]; + } + + for (let [entryUrl, entry] of entries) { + if (entry.actors.delete(actor)) { + if (entry.actors.size == 0) { + this.#cache.delete(entryUrl); + } + } + } + } +} + +/** + * @typedef {object} PageData + * A set of discovered from a page. Other than the `data` property this is the + * schema at `browser/components/pagedata/schemas/general.schema.json`. + * @property {string} url + * The page's url. + * @property {number} date + * The epoch based timestamp for when the data was discovered. + * @property {string} siteName + * The page's friendly site name. + * @property {string} image + * The page's image. + * @property {object} data + * The map of data found which may be empty if no data was found. The key in + * map is from the `PageDataSchema.DATA_TYPE` enumeration. The values are in + * the format defined by the schemas at `browser/components/pagedata/schemas`. + */ + +export const PageDataService = new (class PageDataService extends EventEmitter { + /** + * Caches page data discovered from browsers. + * + * @type {PageDataCache} + */ + #pageDataCache = new PageDataCache(); + + /** + * The number of currently running background fetches. + * + * @type {number} + */ + #backgroundFetches = 0; + + /** + * The list of urls waiting to be loaded in the background. + * + * @type {Set<string>} + */ + #backgroundQueue = new Set(); + + /** + * Tracks whether the user is currently idle. + * + * @type {boolean} + */ + #userIsIdle = false; + + /** + * A manager for hidden browsers. + * + * @type {HiddenBrowserManager} + */ + #browserManager = new HiddenBrowserManager(); + + /** + * A map of hidden browsers to a resolve function that should be passed the + * actor that was created for the browser. + * + * @type {WeakMap<Browser, function(PageDataParent): void>} + */ + #backgroundBrowsers = new WeakMap(); + + /** + * Tracks windows that have browsers with entries in the cache. + * + * @type {Map<Window, Set<Browser>>} + */ + #trackedWindows = new Map(); + + /** + * Constructs the service. + */ + constructor() { + super(); + + // Limits the number of background fetches that will run at once. Set to 0 to + // effectively allow an infinite number. + XPCOMUtils.defineLazyPreferenceGetter( + this, + "MAX_BACKGROUND_FETCHES", + "browser.pagedata.maxBackgroundFetches", + 5, + () => this.#startBackgroundWorkers() + ); + } + + /** + * Initializes a new instance of the service, not called externally. + */ + init() { + if (!Services.prefs.getBoolPref("browser.pagedata.enabled", false)) { + return; + } + + ChromeUtils.registerWindowActor("PageData", { + parent: { + esModuleURI: "resource:///actors/PageDataParent.sys.mjs", + }, + child: { + esModuleURI: "resource:///actors/PageDataChild.sys.mjs", + events: { + DOMContentLoaded: {}, + pageshow: {}, + }, + }, + }); + + lazy.logConsole.debug("Service started"); + + for (let win of lazy.BrowserWindowTracker.orderedWindows) { + if (!win.closed) { + // Ask any existing tabs to report + for (let tab of win.gBrowser.tabs) { + let parent = tab.linkedBrowser.browsingContext?.currentWindowGlobal.getActor( + "PageData" + ); + + parent.sendAsyncMessage("PageData:CheckLoaded"); + } + } + } + + lazy.idleService.addIdleObserver(this, lazy.fetchIdleTime); + } + + /** + * Called when the service is destroyed. This is generally on shutdown so we + * don't really need to do much cleanup. + */ + uninit() { + lazy.logConsole.debug("Service stopped"); + } + + /** + * Starts tracking for when a browser is destroyed. + * + * @param {Browser} browser + * The browser to track. + */ + #trackBrowser(browser) { + let window = browser.ownerGlobal; + + let browsers = this.#trackedWindows.get(window); + if (browsers) { + browsers.add(browser); + + // This window is already being tracked, no need to add listeners. + return; + } + + browsers = new Set([browser]); + this.#trackedWindows.set(window, browsers); + + window.addEventListener("unload", () => { + for (let closedBrowser of browsers) { + this.unlockEntry(closedBrowser); + } + + this.#trackedWindows.delete(window); + }); + + window.addEventListener("TabClose", ({ target: tab }) => { + // Unlock any entries locked by this browser. + let closedBrowser = tab.linkedBrowser; + this.unlockEntry(closedBrowser); + browsers.delete(closedBrowser); + }); + } + + /** + * Requests that any page data for this url is retained in memory until + * unlocked. By calling this you are committing to later call `unlockEntry` + * with the same `actor` and `url` parameters. + * + * @param {object} actor + * The actor requesting the lock. + * @param {string} url + * The url of the page to lock. + */ + lockEntry(actor, url) { + this.#pageDataCache.lockData(actor, url); + } + + /** + * Notifies that an actor is no longer interested in a url. + * + * @param {object} actor + * The actor that requested the lock. + * @param {string | undefined} [url] + * The url of the page or undefined to unlock all urls locked by this actor. + */ + unlockEntry(actor, url) { + this.#pageDataCache.unlockData(actor, url); + } + + /** + * Called when the content process signals that a page is ready for data + * collection. + * + * @param {PageDataParent} actor + * The parent actor for the page. + * @param {string} url + * The url of the page. + */ + async pageLoaded(actor, url) { + let uri = Services.io.newURI(url); + if (!ALLOWED_SCHEMES.includes(uri.scheme)) { + return; + } + + let browser = actor.browsingContext?.embedderElement; + + // If we don't have a browser then it went away before we could record, + // so we don't know where the data came from. + if (!browser) { + return; + } + + // Is this a load in a background browser? + let backgroundResolve = this.#backgroundBrowsers.get(browser); + if (backgroundResolve) { + backgroundResolve(actor); + return; + } + + // Otherwise we only care about pages loaded in the tab browser. + if (!this.#isATabBrowser(browser)) { + return; + } + + try { + let data = await actor.collectPageData(); + if (data) { + // Keep this data alive until the browser is destroyed. + this.#trackBrowser(browser); + this.lockEntry(browser, data.url); + + this.pageDataDiscovered(data); + } + } catch (e) { + lazy.logConsole.error(e); + } + } + + /** + * Adds data for a url. This should generally only be called by other components of the + * page data service or tests for simulating page data collection. + * + * @param {PageData} pageData + * The set of data discovered. + */ + pageDataDiscovered(pageData) { + lazy.logConsole.debug("Discovered page data", pageData); + + this.#pageDataCache.set(pageData.url, { + ...pageData, + data: pageData.data ?? {}, + }); + + // Send out a notification. + this.emit("page-data", pageData); + } + + /** + * Retrieves any cached page data. Returns null if there is no information in the cache, this will + * happen either if the page has not been browsed recently or if data collection failed for some + * reason. + * + * @param {string} url + * The url to retrieve data for. + * @returns {PageData|null} + * A `PageData` if one is cached (it may not actually contain any items of data) or null if this + * page has not been successfully checked for data recently. + */ + getCached(url) { + return this.#pageDataCache.get(url); + } + + /** + * Fetches page data from the given URL using a hidden window. Note that this does not populate + * the page data cache or emit the `page-data` event. + * + * @param {string} url + * The url to retrieve data for. + * @returns {Promise<PageData|null>} + * Resolves to the found pagedata or null in case of error. + */ + async fetchPageData(url) { + return this.#browserManager.withHiddenBrowser(async browser => { + try { + let { promise, resolve } = lazy.PromiseUtils.defer(); + this.#backgroundBrowsers.set(browser, resolve); + + let principal = Services.scriptSecurityManager.getSystemPrincipal(); + let oa = lazy.E10SUtils.predictOriginAttributes({ + browser, + }); + let loadURIOptions = { + triggeringPrincipal: principal, + remoteType: lazy.E10SUtils.getRemoteTypeForURI( + url, + true, + false, + lazy.E10SUtils.DEFAULT_REMOTE_TYPE, + null, + oa + ), + }; + browser.loadURI(url, loadURIOptions); + + let actor = await promise; + return await actor.collectPageData(); + } finally { + this.#backgroundBrowsers.delete(browser); + } + }); + } + + /** + * Handles notifications from the idle service. + * + * @param {nsISupports} subject + * The notification's subject. + * @param {string} topic + * The notification topic. + * @param {string} data + * The data associated with the notification. + */ + observe(subject, topic, data) { + switch (topic) { + case "idle": + lazy.logConsole.debug("User went idle"); + this.#userIsIdle = true; + this.#startBackgroundWorkers(); + break; + case "active": + lazy.logConsole.debug("User became active"); + this.#userIsIdle = false; + break; + } + } + + /** + * Starts as many background workers as are allowed to process the background + * queue. + */ + #startBackgroundWorkers() { + if (!this.#userIsIdle) { + return; + } + + let toStart; + + if (this.MAX_BACKGROUND_FETCHES) { + toStart = this.MAX_BACKGROUND_FETCHES - this.#backgroundFetches; + } else { + toStart = this.#backgroundQueue.size; + } + + for (let i = 0; i < toStart; i++) { + this.#backgroundFetch(); + } + } + + /** + * Starts a background fetch worker which will pull urls from the queue and + * load them until the queue is empty. + */ + async #backgroundFetch() { + this.#backgroundFetches++; + + let url = shift(this.#backgroundQueue); + while (url) { + try { + let pageData = await this.fetchPageData(url); + + if (pageData) { + this.#pageDataCache.set(url, pageData); + this.emit("page-data", pageData); + } + } catch (e) { + lazy.logConsole.error(e); + } + + // Check whether the user became active or the worker limit changed + // dynamically. + if ( + !this.#userIsIdle || + (this.MAX_BACKGROUND_FETCHES > 0 && + this.#backgroundFetches > this.MAX_BACKGROUND_FETCHES) + ) { + break; + } + + url = shift(this.#backgroundQueue); + } + + this.#backgroundFetches--; + } + + /** + * Queues page data retrieval for a url. The page-data notification will be + * generated if data becomes available. + * + * Check `getCached` first to ensure that data is not already in the cache. + * + * @param {string} url + * The url to retrieve data for. + */ + queueFetch(url) { + this.#backgroundQueue.add(url); + + this.#startBackgroundWorkers(); + } + + /** + * Determines if the given browser is contained within a tab. + * + * @param {DOMElement} browser + * The browser element to check. + * @returns {boolean} + * True if the browser element is contained within a tab. + */ + #isATabBrowser(browser) { + return browser.ownerGlobal.gBrowser?.getTabForBrowser(browser); + } +})(); |