summaryrefslogtreecommitdiffstats
path: root/toolkit/actors/ContentMetaChild.sys.mjs
diff options
context:
space:
mode:
Diffstat (limited to 'toolkit/actors/ContentMetaChild.sys.mjs')
-rw-r--r--toolkit/actors/ContentMetaChild.sys.mjs199
1 files changed, 199 insertions, 0 deletions
diff --git a/toolkit/actors/ContentMetaChild.sys.mjs b/toolkit/actors/ContentMetaChild.sys.mjs
new file mode 100644
index 0000000000..929d92db47
--- /dev/null
+++ b/toolkit/actors/ContentMetaChild.sys.mjs
@@ -0,0 +1,199 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// Debounce time in milliseconds - this should be long enough to account for
+// sync script tags that could appear between desired meta tags
+const TIMEOUT_DELAY = 1000;
+
+const ACCEPTED_PROTOCOLS = ["http:", "https:"];
+
+// Possible description tags, listed in order from least favourable to most favourable
+const DESCRIPTION_RULES = [
+ "twitter:description",
+ "description",
+ "og:description",
+];
+
+// Possible image tags, listed in order from least favourable to most favourable
+const PREVIEW_IMAGE_RULES = [
+ "thumbnail",
+ "twitter:image",
+ "og:image",
+ "og:image:url",
+ "og:image:secure_url",
+];
+
+/*
+ * Checks if the incoming meta tag has a greater score than the current best
+ * score by checking the index of the meta tag in the list of rules provided.
+ *
+ * @param {Array} aRules
+ * The list of rules for a given type of meta tag
+ * @param {String} aTag
+ * The name or property of the incoming meta tag
+ * @param {String} aEntry
+ * The current best entry for the given meta tag
+ *
+ * @returns {Boolean} true if the incoming meta tag is better than the current
+ * best meta tag of that same kind, false otherwise
+ */
+function shouldExtractMetadata(aRules, aTag, aEntry) {
+ return aRules.indexOf(aTag) > aEntry.currMaxScore;
+}
+
+/*
+ * Ensure that the preview image URL is safe and valid before storing
+ *
+ * @param {URL} aURL
+ * A URL object that needs to be checked for valid principal and protocol
+ *
+ * @returns {Boolean} true if the preview URL is safe and can be stored, false otherwise
+ */
+function checkLoadURIStr(aURL) {
+ if (!ACCEPTED_PROTOCOLS.includes(aURL.protocol)) {
+ return false;
+ }
+ try {
+ let ssm = Services.scriptSecurityManager;
+ let principal = ssm.createNullPrincipal({});
+ ssm.checkLoadURIStrWithPrincipal(
+ principal,
+ aURL.href,
+ ssm.DISALLOW_INHERIT_PRINCIPAL
+ );
+ } catch (e) {
+ return false;
+ }
+ return true;
+}
+
+/*
+ * This listens to DOMMetaAdded events and collects relevant metadata about the
+ * meta tag received. Then, it sends the metadata gathered from the meta tags
+ * and the url of the page as it's payload to be inserted into moz_places.
+ */
+export class ContentMetaChild extends JSWindowActorChild {
+ constructor() {
+ super();
+
+ // Store a mapping of the best description and preview
+ // image collected so far for a given URL.
+ this.metaTags = new Map();
+ }
+
+ didDestroy() {
+ for (let entry of this.metaTags.values()) {
+ entry.timeout.cancel();
+ }
+ }
+
+ handleEvent(event) {
+ switch (event.type) {
+ case "DOMContentLoaded":
+ const metaTags = this.contentWindow.document.querySelectorAll("meta");
+ for (let metaTag of metaTags) {
+ this.onMetaTag(metaTag);
+ }
+ break;
+ case "DOMMetaAdded":
+ this.onMetaTag(event.originalTarget);
+ break;
+ default:
+ }
+ }
+
+ onMetaTag(metaTag) {
+ const window = metaTag.ownerGlobal;
+
+ // If there's no meta tag, ignore this. Also verify that the window
+ // matches just to be safe.
+ if (!metaTag || !metaTag.ownerDocument || window != this.contentWindow) {
+ return;
+ }
+
+ const url = metaTag.ownerDocument.documentURI;
+
+ let name = metaTag.name;
+ let prop = metaTag.getAttributeNS(null, "property");
+ if (!name && !prop) {
+ return;
+ }
+
+ let tag = name || prop;
+
+ const entry = this.metaTags.get(url) || {
+ description: { value: null, currMaxScore: -1 },
+ image: { value: null, currMaxScore: -1 },
+ timeout: null,
+ };
+
+ // Malformed meta tag - do not store it
+ const content = metaTag.getAttributeNS(null, "content");
+ if (!content) {
+ return;
+ }
+
+ if (shouldExtractMetadata(DESCRIPTION_RULES, tag, entry.description)) {
+ // Extract the description
+ entry.description.value = content;
+ entry.description.currMaxScore = DESCRIPTION_RULES.indexOf(tag);
+ } else if (shouldExtractMetadata(PREVIEW_IMAGE_RULES, tag, entry.image)) {
+ // Extract the preview image
+ let value;
+ try {
+ value = new URL(content, url);
+ } catch (e) {
+ return;
+ }
+ if (value && checkLoadURIStr(value)) {
+ entry.image.value = value.href;
+ entry.image.currMaxScore = PREVIEW_IMAGE_RULES.indexOf(tag);
+ }
+ } else {
+ // We don't care about other meta tags
+ return;
+ }
+
+ if (!this.metaTags.has(url)) {
+ this.metaTags.set(url, entry);
+ }
+
+ if (entry.timeout) {
+ entry.timeout.delay = TIMEOUT_DELAY;
+ } else {
+ // We want to debounce incoming meta tags until we're certain we have the
+ // best one for description and preview image, and only store that one
+ entry.timeout = Cc["@mozilla.org/timer;1"].createInstance(Ci.nsITimer);
+ entry.timeout.initWithCallback(
+ () => {
+ entry.timeout = null;
+ this.metaTags.delete(url);
+ // We try to cancel the timers when we get destroyed, but if
+ // there's a race, catch it:
+ if (!this.manager || this.manager.isClosed) {
+ return;
+ }
+
+ // Save description and preview image to moz_places
+ this.sendAsyncMessage("Meta:SetPageInfo", {
+ url,
+ description: entry.description.value,
+ previewImageURL: entry.image.value,
+ });
+
+ // Telemetry for recording the size of page metadata
+ let metadataSize = entry.description.value
+ ? entry.description.value.length
+ : 0;
+ metadataSize += entry.image.value ? entry.image.value.length : 0;
+ Services.telemetry
+ .getHistogramById("PAGE_METADATA_SIZE")
+ .add(metadataSize);
+ },
+ TIMEOUT_DELAY,
+ Ci.nsITimer.TYPE_ONE_SHOT
+ );
+ }
+ }
+}