251 lines
6.6 KiB
JavaScript
251 lines
6.6 KiB
JavaScript
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
const lazy = {};
|
|
|
|
ChromeUtils.defineESModuleGetters(lazy, {
|
|
JsonSchemaValidator:
|
|
"resource://gre/modules/components-utils/JsonSchemaValidator.sys.mjs",
|
|
OpenGraphPageData: "resource:///modules/pagedata/OpenGraphPageData.sys.mjs",
|
|
SchemaOrgPageData: "resource:///modules/pagedata/SchemaOrgPageData.sys.mjs",
|
|
TwitterPageData: "resource:///modules/pagedata/TwitterPageData.sys.mjs",
|
|
});
|
|
|
|
ChromeUtils.defineLazyGetter(lazy, "logConsole", function () {
|
|
return console.createInstance({
|
|
prefix: "PageData",
|
|
maxLogLevel: Services.prefs.getBoolPref("browser.pagedata.log", false)
|
|
? "Debug"
|
|
: "Warn",
|
|
});
|
|
});
|
|
|
|
/**
|
|
* The list of page data collectors. These should be sorted in order of
|
|
* specificity, if the same piece of data is provided by two collectors then the
|
|
* earlier wins.
|
|
*
|
|
* Collectors must provide a `collect` function which will be passed the
|
|
* document object and should return the PageData structure. The function may be
|
|
* asynchronous if needed.
|
|
*
|
|
* The data returned need not be valid, collectors should return whatever they
|
|
* can and then we drop anything that is invalid once all data is joined.
|
|
*/
|
|
ChromeUtils.defineLazyGetter(lazy, "DATA_COLLECTORS", function () {
|
|
return [lazy.SchemaOrgPageData, lazy.OpenGraphPageData, lazy.TwitterPageData];
|
|
});
|
|
|
|
let SCHEMAS = new Map();
|
|
|
|
/**
|
|
* Loads the schema for the given name.
|
|
*
|
|
* @param {string} schemaName
|
|
* The name of the schema to load.
|
|
* @returns {object}
|
|
* The loaded schema.
|
|
*/
|
|
async function loadSchema(schemaName) {
|
|
if (SCHEMAS.has(schemaName)) {
|
|
return SCHEMAS.get(schemaName);
|
|
}
|
|
|
|
let url = `chrome://browser/content/pagedata/schemas/${schemaName.toLocaleLowerCase()}.schema.json`;
|
|
let response = await fetch(url);
|
|
if (!response.ok) {
|
|
throw new Error(`Failed to load schema: ${response.statusText}`);
|
|
}
|
|
|
|
let schema = await response.json();
|
|
SCHEMAS.set(schemaName, schema);
|
|
return schema;
|
|
}
|
|
|
|
/**
|
|
* Validates the data using the schema with the given name.
|
|
*
|
|
* @param {string} schemaName
|
|
* The name of the schema to validate against.
|
|
* @param {object} data
|
|
* The data to validate.
|
|
*/
|
|
async function validateData(schemaName, data) {
|
|
let schema = await loadSchema(schemaName.toLocaleLowerCase());
|
|
|
|
let result = lazy.JsonSchemaValidator.validate(data, schema, {
|
|
allowExplicitUndefinedProperties: true,
|
|
// Allowed for future expansion of the schema.
|
|
allowAdditionalProperties: true,
|
|
});
|
|
|
|
if (!result.valid) {
|
|
throw result.error;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* A shared API that can be used in parent or child processes
|
|
*/
|
|
export const PageDataSchema = {
|
|
// Enumeration of data types. The keys must match the schema name.
|
|
DATA_TYPE: Object.freeze({
|
|
// Note that 1 and 2 were used as types in earlier versions and should not be used here.
|
|
PRODUCT: 3,
|
|
DOCUMENT: 4,
|
|
ARTICLE: 5,
|
|
AUDIO: 6,
|
|
VIDEO: 7,
|
|
}),
|
|
|
|
/**
|
|
* Gets the data type name.
|
|
*
|
|
* @param {DATA_TYPE} type
|
|
* The data type from the DATA_TYPE enumeration
|
|
*
|
|
* @returns {string | null} The name for the type or null if not found.
|
|
*/
|
|
nameForType(type) {
|
|
for (let [name, value] of Object.entries(this.DATA_TYPE)) {
|
|
if (value == type) {
|
|
return name;
|
|
}
|
|
}
|
|
|
|
return null;
|
|
},
|
|
|
|
/**
|
|
* Asynchronously validates some page data against the expected schema. Throws
|
|
* an exception if validation fails.
|
|
*
|
|
* @param {DATA_TYPE} type
|
|
* The data type from the DATA_TYPE enumeration
|
|
* @param {object} data
|
|
* The page data
|
|
*/
|
|
async validateData(type, data) {
|
|
let name = this.nameForType(type);
|
|
|
|
if (!name) {
|
|
throw new Error(`Unknown data type ${type}`);
|
|
}
|
|
|
|
await validateData(name, data);
|
|
},
|
|
|
|
/**
|
|
* Asynchronously validates an entire PageData structure. Any invalid or
|
|
* unknown data types are dropped.
|
|
*
|
|
* @param {PageData} pageData
|
|
* The page data
|
|
*
|
|
* @returns {PageData} The validated page data structure
|
|
*/
|
|
async validatePageData(pageData) {
|
|
let { data: dataMap = {}, ...general } = pageData;
|
|
|
|
await validateData("general", general);
|
|
|
|
let validData = {};
|
|
|
|
for (let [type, data] of Object.entries(dataMap)) {
|
|
let name = this.nameForType(type);
|
|
// Ignore unknown types here.
|
|
if (!name) {
|
|
continue;
|
|
}
|
|
|
|
try {
|
|
await validateData(name, data);
|
|
|
|
validData[type] = data;
|
|
} catch (e) {
|
|
// Invalid data is dropped.
|
|
}
|
|
}
|
|
|
|
return {
|
|
...general,
|
|
data: validData,
|
|
};
|
|
},
|
|
|
|
/**
|
|
* Adds new page data into an existing data set. Any existing data is not
|
|
* overwritten.
|
|
*
|
|
* @param {PageData} existingPageData
|
|
* The existing page data
|
|
* @param {PageData} newPageData
|
|
* The new page data
|
|
*
|
|
* @returns {PageData} The joined data.
|
|
*/
|
|
coalescePageData(existingPageData, newPageData) {
|
|
// Split out the general data from the map of specific data.
|
|
let { data: existingMap = {}, ...existingGeneral } = existingPageData;
|
|
let { data: newMap = {}, ...newGeneral } = newPageData;
|
|
|
|
Object.assign(newGeneral, existingGeneral);
|
|
|
|
let dataMap = {};
|
|
for (let [type, data] of Object.entries(existingMap)) {
|
|
if (type in newMap) {
|
|
dataMap[type] = Object.assign({}, newMap[type], data);
|
|
} else {
|
|
dataMap[type] = data;
|
|
}
|
|
}
|
|
|
|
for (let [type, data] of Object.entries(newMap)) {
|
|
if (!(type in dataMap)) {
|
|
dataMap[type] = data;
|
|
}
|
|
}
|
|
|
|
return {
|
|
...newGeneral,
|
|
data: dataMap,
|
|
};
|
|
},
|
|
|
|
/**
|
|
* Collects page data from a DOM document.
|
|
*
|
|
* @param {Document} document
|
|
* The DOM document to collect data from
|
|
*
|
|
* @returns {Promise<PageData | null>} The data collected or null in case of
|
|
* error.
|
|
*/
|
|
async collectPageData(document) {
|
|
lazy.logConsole.debug("Starting collection", document.documentURI);
|
|
|
|
let pending = lazy.DATA_COLLECTORS.map(async collector => {
|
|
try {
|
|
return await collector.collect(document);
|
|
} catch (e) {
|
|
lazy.logConsole.error("Error collecting page data", e);
|
|
return null;
|
|
}
|
|
});
|
|
|
|
let pageDataList = await Promise.all(pending);
|
|
|
|
let pageData = pageDataList.reduce(PageDataSchema.coalescePageData, {
|
|
date: Date.now(),
|
|
url: document.documentURI,
|
|
});
|
|
|
|
try {
|
|
return this.validatePageData(pageData);
|
|
} catch (e) {
|
|
lazy.logConsole.error("Failed to collect valid page data", e);
|
|
return null;
|
|
}
|
|
},
|
|
};
|