1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
|
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
// Debounce time in milliseconds - this should be long enough to account for
// sync script tags that could appear between desired meta tags
const TIMEOUT_DELAY = 1000;
const ACCEPTED_PROTOCOLS = ["http:", "https:"];
// Possible description tags, listed in order from least favourable to most favourable
const DESCRIPTION_RULES = [
"twitter:description",
"description",
"og:description",
];
// Possible image tags, listed in order from least favourable to most favourable
const PREVIEW_IMAGE_RULES = [
"thumbnail",
"twitter:image",
"og:image",
"og:image:url",
"og:image:secure_url",
];
/*
* Checks if the incoming meta tag has a greater score than the current best
* score by checking the index of the meta tag in the list of rules provided.
*
* @param {Array} aRules
* The list of rules for a given type of meta tag
* @param {String} aTag
* The name or property of the incoming meta tag
* @param {String} aEntry
* The current best entry for the given meta tag
*
* @returns {Boolean} true if the incoming meta tag is better than the current
* best meta tag of that same kind, false otherwise
*/
function shouldExtractMetadata(aRules, aTag, aEntry) {
return aRules.indexOf(aTag) > aEntry.currMaxScore;
}
/*
* Ensure that the preview image URL is safe and valid before storing
*
* @param {URL} aURL
* A URL object that needs to be checked for valid principal and protocol
*
* @returns {Boolean} true if the preview URL is safe and can be stored, false otherwise
*/
function checkLoadURIStr(aURL) {
if (!ACCEPTED_PROTOCOLS.includes(aURL.protocol)) {
return false;
}
try {
let ssm = Services.scriptSecurityManager;
let principal = ssm.createNullPrincipal({});
ssm.checkLoadURIStrWithPrincipal(
principal,
aURL.href,
ssm.DISALLOW_INHERIT_PRINCIPAL
);
} catch (e) {
return false;
}
return true;
}
/*
* This listens to DOMMetaAdded events and collects relevant metadata about the
* meta tag received. Then, it sends the metadata gathered from the meta tags
* and the url of the page as it's payload to be inserted into moz_places.
*/
export class ContentMetaChild extends JSWindowActorChild {
constructor() {
super();
// Store a mapping of the best description and preview
// image collected so far for a given URL.
this.metaTags = new Map();
}
didDestroy() {
for (let entry of this.metaTags.values()) {
entry.timeout.cancel();
}
}
handleEvent(event) {
switch (event.type) {
case "DOMContentLoaded":
const metaTags = this.contentWindow.document.querySelectorAll("meta");
for (let metaTag of metaTags) {
this.onMetaTag(metaTag);
}
break;
case "DOMMetaAdded":
this.onMetaTag(event.originalTarget);
break;
default:
}
}
onMetaTag(metaTag) {
const window = metaTag.ownerGlobal;
// If there's no meta tag, ignore this. Also verify that the window
// matches just to be safe.
if (!metaTag || !metaTag.ownerDocument || window != this.contentWindow) {
return;
}
const url = metaTag.ownerDocument.documentURI;
let name = metaTag.name;
let prop = metaTag.getAttributeNS(null, "property");
if (!name && !prop) {
return;
}
let tag = name || prop;
const entry = this.metaTags.get(url) || {
description: { value: null, currMaxScore: -1 },
image: { value: null, currMaxScore: -1 },
timeout: null,
};
// Malformed meta tag - do not store it
const content = metaTag.getAttributeNS(null, "content");
if (!content) {
return;
}
if (shouldExtractMetadata(DESCRIPTION_RULES, tag, entry.description)) {
// Extract the description
entry.description.value = content;
entry.description.currMaxScore = DESCRIPTION_RULES.indexOf(tag);
} else if (shouldExtractMetadata(PREVIEW_IMAGE_RULES, tag, entry.image)) {
// Extract the preview image
let value;
try {
value = new URL(content, url);
} catch (e) {
return;
}
if (value && checkLoadURIStr(value)) {
entry.image.value = value.href;
entry.image.currMaxScore = PREVIEW_IMAGE_RULES.indexOf(tag);
}
} else {
// We don't care about other meta tags
return;
}
if (!this.metaTags.has(url)) {
this.metaTags.set(url, entry);
}
if (entry.timeout) {
entry.timeout.delay = TIMEOUT_DELAY;
} else {
// We want to debounce incoming meta tags until we're certain we have the
// best one for description and preview image, and only store that one
entry.timeout = Cc["@mozilla.org/timer;1"].createInstance(Ci.nsITimer);
entry.timeout.initWithCallback(
() => {
entry.timeout = null;
this.metaTags.delete(url);
// We try to cancel the timers when we get destroyed, but if
// there's a race, catch it:
if (!this.manager || this.manager.isClosed) {
return;
}
// Save description and preview image to moz_places
this.sendAsyncMessage("Meta:SetPageInfo", {
url,
description: entry.description.value,
previewImageURL: entry.image.value,
});
// Telemetry for recording the size of page metadata
let metadataSize = entry.description.value
? entry.description.value.length
: 0;
metadataSize += entry.image.value ? entry.image.value.length : 0;
Services.telemetry
.getHistogramById("PAGE_METADATA_SIZE")
.add(metadataSize);
},
TIMEOUT_DELAY,
Ci.nsITimer.TYPE_ONE_SHOT
);
}
}
}
|