/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ import { XPCOMUtils } from "resource://gre/modules/XPCOMUtils.sys.mjs"; const lazy = {}; ChromeUtils.defineESModuleGetters(lazy, { JsonSchemaValidator: "resource://gre/modules/components-utils/JsonSchemaValidator.sys.mjs", OpenGraphPageData: "resource:///modules/pagedata/OpenGraphPageData.sys.mjs", SchemaOrgPageData: "resource:///modules/pagedata/SchemaOrgPageData.sys.mjs", TwitterPageData: "resource:///modules/pagedata/TwitterPageData.sys.mjs", }); XPCOMUtils.defineLazyGetter(lazy, "logConsole", function () { return console.createInstance({ prefix: "PageData", maxLogLevel: Services.prefs.getBoolPref("browser.pagedata.log", false) ? "Debug" : "Warn", }); }); /** * The list of page data collectors. These should be sorted in order of * specificity, if the same piece of data is provided by two collectors then the * earlier wins. * * Collectors must provide a `collect` function which will be passed the * document object and should return the PageData structure. The function may be * asynchronous if needed. * * The data returned need not be valid, collectors should return whatever they * can and then we drop anything that is invalid once all data is joined. */ XPCOMUtils.defineLazyGetter(lazy, "DATA_COLLECTORS", function () { return [lazy.SchemaOrgPageData, lazy.OpenGraphPageData, lazy.TwitterPageData]; }); let SCHEMAS = new Map(); /** * Loads the schema for the given name. * * @param {string} schemaName * The name of the schema to load. */ async function loadSchema(schemaName) { if (SCHEMAS.has(schemaName)) { return SCHEMAS.get(schemaName); } let url = `chrome://browser/content/pagedata/schemas/${schemaName.toLocaleLowerCase()}.schema.json`; let response = await fetch(url); if (!response.ok) { throw new Error(`Failed to load schema: ${response.statusText}`); } let schema = await response.json(); SCHEMAS.set(schemaName, schema); return schema; } /** * Validates the data using the schema with the given name. * * @param {string} schemaName * The name of the schema to validate against. * @param {object} data * The data to validate. */ async function validateData(schemaName, data) { let schema = await loadSchema(schemaName.toLocaleLowerCase()); let result = lazy.JsonSchemaValidator.validate(data, schema, { allowExplicitUndefinedProperties: true, // Allowed for future expansion of the schema. allowExtraProperties: true, }); if (!result.valid) { throw result.error; } } /** * A shared API that can be used in parent or child processes */ export const PageDataSchema = { // Enumeration of data types. The keys must match the schema name. DATA_TYPE: Object.freeze({ // Note that 1 and 2 were used as types in earlier versions and should not be used here. PRODUCT: 3, DOCUMENT: 4, ARTICLE: 5, AUDIO: 6, VIDEO: 7, }), /** * Gets the data type name. * * @param {DATA_TYPE} type * The data type from the DATA_TYPE enumeration * * @returns {string | null} The name for the type or null if not found. */ nameForType(type) { for (let [name, value] of Object.entries(this.DATA_TYPE)) { if (value == type) { return name; } } return null; }, /** * Asynchronously validates some page data against the expected schema. Throws * an exception if validation fails. * * @param {DATA_TYPE} type * The data type from the DATA_TYPE enumeration * @param {object} data * The page data */ async validateData(type, data) { let name = this.nameForType(type); if (!name) { throw new Error(`Unknown data type ${type}`); } return validateData(name, data); }, /** * Asynchronously validates an entire PageData structure. Any invalid or * unknown data types are dropped. * * @param {PageData} pageData * The page data * * @returns {PageData} The validated page data structure */ async validatePageData(pageData) { let { data: dataMap = {}, ...general } = pageData; await validateData("general", general); let validData = {}; for (let [type, data] of Object.entries(dataMap)) { let name = this.nameForType(type); // Ignore unknown types here. if (!name) { continue; } try { await validateData(name, data); validData[type] = data; } catch (e) { // Invalid data is dropped. } } return { ...general, data: validData, }; }, /** * Adds new page data into an existing data set. Any existing data is not * overwritten. * * @param {PageData} existingPageData * The existing page data * @param {PageData} newPageData * The new page data * * @returns {PageData} The joined data. */ coalescePageData(existingPageData, newPageData) { // Split out the general data from the map of specific data. let { data: existingMap = {}, ...existingGeneral } = existingPageData; let { data: newMap = {}, ...newGeneral } = newPageData; Object.assign(newGeneral, existingGeneral); let dataMap = {}; for (let [type, data] of Object.entries(existingMap)) { if (type in newMap) { dataMap[type] = Object.assign({}, newMap[type], data); } else { dataMap[type] = data; } } for (let [type, data] of Object.entries(newMap)) { if (!(type in dataMap)) { dataMap[type] = data; } } return { ...newGeneral, data: dataMap, }; }, /** * Collects page data from a DOM document. * * @param {Document} document * The DOM document to collect data from * * @returns {Promise} The data collected or null in case of * error. */ async collectPageData(document) { lazy.logConsole.debug("Starting collection", document.documentURI); let pending = lazy.DATA_COLLECTORS.map(async collector => { try { return await collector.collect(document); } catch (e) { lazy.logConsole.error("Error collecting page data", e); return null; } }); let pageDataList = await Promise.all(pending); let pageData = pageDataList.reduce(PageDataSchema.coalescePageData, { date: Date.now(), url: document.documentURI, }); try { return this.validatePageData(pageData); } catch (e) { lazy.logConsole.error("Failed to collect valid page data", e); return null; } }, };