summaryrefslogtreecommitdiffstats
path: root/browser/components/pagedata/tests/unit
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--browser/components/pagedata/tests/unit/head.js103
-rw-r--r--browser/components/pagedata/tests/unit/test_opengraph.js67
-rw-r--r--browser/components/pagedata/tests/unit/test_pagedata_basic.js100
-rw-r--r--browser/components/pagedata/tests/unit/test_pagedata_schema.js210
-rw-r--r--browser/components/pagedata/tests/unit/test_queue.js527
-rw-r--r--browser/components/pagedata/tests/unit/test_schemaorg.js213
-rw-r--r--browser/components/pagedata/tests/unit/test_schemaorg_parse.js193
-rw-r--r--browser/components/pagedata/tests/unit/test_twitter.js34
-rw-r--r--browser/components/pagedata/tests/unit/xpcshell.ini14
9 files changed, 1461 insertions, 0 deletions
diff --git a/browser/components/pagedata/tests/unit/head.js b/browser/components/pagedata/tests/unit/head.js
new file mode 100644
index 0000000000..eda154b0bc
--- /dev/null
+++ b/browser/components/pagedata/tests/unit/head.js
@@ -0,0 +1,103 @@
+/* Any copyright is dedicated to the Public Domain.
+ * http://creativecommons.org/publicdomain/zero/1.0/ */
+
+const { XPCOMUtils } = ChromeUtils.importESModule(
+ "resource://gre/modules/XPCOMUtils.sys.mjs"
+);
+
+ChromeUtils.defineESModuleGetters(this, {
+ PageDataSchema: "resource:///modules/pagedata/PageDataSchema.sys.mjs",
+});
+
+const { HttpServer } = ChromeUtils.import("resource://testing-common/httpd.js");
+
+const server = new HttpServer();
+server.start(-1);
+
+const SERVER_PORT = server.identity.primaryPort;
+const BASE_URL = "http://localhost:" + SERVER_PORT;
+const DEFAULT_PATH = "/document.html";
+const TEST_URL = BASE_URL + DEFAULT_PATH;
+
+registerCleanupFunction(() => {
+ server.stop();
+});
+
+do_get_profile();
+Services.prefs.setBoolPref("browser.pagedata.log", true);
+
+/**
+ * Given a string parses it as HTML into a DOM Document object.
+ *
+ * @param {string} str
+ * The string to parse.
+ * @param {string} path
+ * The path for the document on the server, defaults to "/document.html"
+ * @returns {Promise<Document>} the HTML DOM Document object.
+ */
+function parseDocument(str, path = DEFAULT_PATH) {
+ server.registerPathHandler(path, (request, response) => {
+ response.setHeader("Content-Type", "text/html;charset=utf-8");
+
+ let converter = Cc[
+ "@mozilla.org/intl/converter-output-stream;1"
+ ].createInstance(Ci.nsIConverterOutputStream);
+ converter.init(response.bodyOutputStream, "utf-8");
+ converter.writeString(str);
+ });
+
+ return new Promise((resolve, reject) => {
+ let request = new XMLHttpRequest();
+ request.responseType = "document";
+ request.open("GET", BASE_URL + path, true);
+
+ request.addEventListener("error", reject);
+ request.addEventListener("abort", reject);
+
+ request.addEventListener("load", function() {
+ resolve(request.responseXML);
+ });
+
+ request.send();
+ });
+}
+
+/**
+ * Parses page data from a HTML string.
+ *
+ * @param {string} str
+ * The HTML string to parse.
+ * @param {string} path
+ * The path for the document on the server, defaults to "/document.html"
+ * @returns {Promise<PageData>} A promise that resolves to the page data found.
+ */
+async function parsePageData(str, path) {
+ let doc = await parseDocument(str, path);
+ return PageDataSchema.collectPageData(doc);
+}
+
+/**
+ * Verifies that the HTML string given parses to the expected page data.
+ *
+ * @param {string} str
+ * The HTML string to parse.
+ * @param {PageData} expected
+ * The expected pagedata excluding the date and url properties.
+ * @param {string} path
+ * The path for the document on the server, defaults to "/document.html"
+ * @returns {Promise<PageData>} A promise that resolves to the page data found.
+ */
+async function verifyPageData(str, expected, path = DEFAULT_PATH) {
+ let pageData = await parsePageData(str, path);
+
+ delete pageData.date;
+
+ Assert.equal(pageData.url, BASE_URL + path);
+ delete pageData.url;
+
+ Assert.deepEqual(
+ pageData,
+ expected,
+ "Should have seen the expected page data."
+ );
+}
diff --git a/browser/components/pagedata/tests/unit/test_opengraph.js b/browser/components/pagedata/tests/unit/test_opengraph.js
new file mode 100644
index 0000000000..e5accaf675
--- /dev/null
+++ b/browser/components/pagedata/tests/unit/test_opengraph.js
@@ -0,0 +1,67 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/**
+ * Tests that the page data service can parse Open Graph metadata.
+ */
+
+add_task(async function test_type_website() {
+ await verifyPageData(
+ `
+ <!DOCTYPE html>
+ <html>
+ <head>
+ <title>Internet for people, not profit — Mozilla</title>
+ <meta property="og:type" content="website">
+ <meta property="og:site_name" content="Mozilla">
+ <meta property="og:url" content="https://www.mozilla.org/">
+ <meta property="og:image" content="https://example.com/preview-image">
+ <meta property="og:title" content="Internet for people, not profit">
+ <!-- We expect the test will ignore tags the parser does not recognize. -->
+ <meta property="og:locale" content="en_CA">
+ <meta property="og:description" content="Mozilla is the not-for-profit behind the lightning fast Firefox browser. We put people over profit to give everyone more power online.">
+ </head>
+ <body>
+ <p>Test page</p>
+ </body>
+ </html>
+ `,
+ {
+ siteName: "Mozilla",
+ description:
+ "Mozilla is the not-for-profit behind the lightning fast Firefox browser. We put people over profit to give everyone more power online.",
+ image: "https://example.com/preview-image",
+ data: {},
+ }
+ );
+});
+
+add_task(async function test_type_movie() {
+ await verifyPageData(
+ `
+ <!DOCTYPE html>
+ <html>
+ <head>
+ <title>Code Rush (TV Movie 2000)</title>
+ <meta property="og:url" content="https://www.imdb.com/title/tt0499004/"/>
+ <!-- Omitting og:site_name to test that the parser doesn't break on missing tags. -->
+ <meta property="og:title" content="Code Rush (TV Movie 2000) - IMDb"/>
+ <meta property="og:description" content="This is the description of the movie."/>
+ <meta property="og:type" content="video.movie"/>
+ <meta property="og:image" content="https://example.com/preview-code-rush"/>
+ <meta property="og:image:height" content="750"/>
+ <meta property="og:image:width" content="1000"/>
+ </head>
+ <body>
+ <p>Test page</p>
+ </body>
+ </html>
+ `,
+ {
+ image: "https://example.com/preview-code-rush",
+ description: "This is the description of the movie.",
+ data: {},
+ }
+ );
+});
diff --git a/browser/components/pagedata/tests/unit/test_pagedata_basic.js b/browser/components/pagedata/tests/unit/test_pagedata_basic.js
new file mode 100644
index 0000000000..5d31645a4c
--- /dev/null
+++ b/browser/components/pagedata/tests/unit/test_pagedata_basic.js
@@ -0,0 +1,100 @@
+/* Any copyright is dedicated to the Public Domain.
+ * http://creativecommons.org/publicdomain/zero/1.0/ */
+
+/*
+ * Simply tests that the notification is dispatched when new page data is
+ * discovered.
+ */
+
+ChromeUtils.defineESModuleGetters(this, {
+ PageDataService: "resource:///modules/pagedata/PageDataService.sys.mjs",
+});
+
+add_task(async function test_pageDataDiscovered_notifies() {
+ let url = "https://www.mozilla.org/";
+
+ Assert.equal(
+ PageDataService.getCached(url),
+ null,
+ "Should be no cached data."
+ );
+
+ let promise = PageDataService.once("page-data");
+
+ PageDataService.pageDataDiscovered({
+ url,
+ date: 32453456,
+ data: {
+ [PageDataSchema.DATA_TYPE.PRODUCT]: {
+ name: "Bolts",
+ price: { value: 276 },
+ },
+ },
+ });
+
+ let pageData = await promise;
+ Assert.equal(
+ pageData.url,
+ url,
+ "Should have notified data for the expected url"
+ );
+
+ Assert.deepEqual(
+ pageData,
+ {
+ url,
+ date: 32453456,
+ data: {
+ [PageDataSchema.DATA_TYPE.PRODUCT]: {
+ name: "Bolts",
+ price: { value: 276 },
+ },
+ },
+ },
+ "Should have returned the correct product data"
+ );
+
+ Assert.equal(
+ PageDataService.getCached(url),
+ null,
+ "Should not have cached the data as there was no actor locking."
+ );
+
+ let actor = {};
+ PageDataService.lockEntry(actor, url);
+
+ PageDataService.pageDataDiscovered({
+ url,
+ date: 32453456,
+ data: {
+ [PageDataSchema.DATA_TYPE.PRODUCT]: {
+ name: "Bolts",
+ price: { value: 276 },
+ },
+ },
+ });
+
+ // Should now be in the cache.
+ Assert.deepEqual(
+ PageDataService.getCached(url),
+ {
+ url,
+ date: 32453456,
+ data: {
+ [PageDataSchema.DATA_TYPE.PRODUCT]: {
+ name: "Bolts",
+ price: { value: 276 },
+ },
+ },
+ },
+ "Should have cached the data"
+ );
+
+ PageDataService.unlockEntry(actor, url);
+
+ Assert.equal(
+ PageDataService.getCached(url),
+ null,
+ "Should have dropped the data from the cache."
+ );
+});
diff --git a/browser/components/pagedata/tests/unit/test_pagedata_schema.js b/browser/components/pagedata/tests/unit/test_pagedata_schema.js
new file mode 100644
index 0000000000..fcd9c4b297
--- /dev/null
+++ b/browser/components/pagedata/tests/unit/test_pagedata_schema.js
@@ -0,0 +1,210 @@
+/* Any copyright is dedicated to the Public Domain.
+ * http://creativecommons.org/publicdomain/zero/1.0/ */
+
+/*
+ * Tests schema validation.
+ */
+
+add_task(async function testBasic() {
+ // Old data types, should not be recognised.
+ Assert.equal(PageDataSchema.nameForType(1), null);
+ Assert.equal(PageDataSchema.nameForType(2), null);
+
+ Assert.equal(
+ PageDataSchema.nameForType(PageDataSchema.DATA_TYPE.VIDEO),
+ "VIDEO"
+ );
+ Assert.equal(
+ PageDataSchema.nameForType(PageDataSchema.DATA_TYPE.PRODUCT),
+ "PRODUCT"
+ );
+});
+
+add_task(async function testProduct() {
+ // Products must have a name
+ await Assert.rejects(
+ PageDataSchema.validateData(PageDataSchema.DATA_TYPE.PRODUCT, {}),
+ /missing required property 'name'/
+ );
+
+ await PageDataSchema.validateData(PageDataSchema.DATA_TYPE.PRODUCT, {
+ name: "Bolts",
+ });
+
+ await PageDataSchema.validateData(PageDataSchema.DATA_TYPE.PRODUCT, {
+ name: "Bolts",
+ price: {
+ value: 5,
+ },
+ });
+
+ await PageDataSchema.validateData(PageDataSchema.DATA_TYPE.PRODUCT, {
+ name: "Bolts",
+ price: {
+ value: 5,
+ currency: "USD",
+ },
+ });
+
+ await Assert.rejects(
+ PageDataSchema.validateData(PageDataSchema.DATA_TYPE.PRODUCT, {
+ name: "Bolts",
+ price: {
+ currency: "USD",
+ },
+ }),
+ /missing required property 'value'/
+ );
+
+ await PageDataSchema.validateData(PageDataSchema.DATA_TYPE.PRODUCT, {
+ name: "Bolts",
+ shippingCost: {
+ value: 5,
+ currency: "USD",
+ },
+ });
+
+ await Assert.rejects(
+ PageDataSchema.validateData(PageDataSchema.DATA_TYPE.PRODUCT, {
+ name: "Bolts",
+ shippingCost: {
+ currency: "USD",
+ },
+ }),
+ /missing required property 'value'/
+ );
+});
+
+add_task(async function testCoalesce() {
+ let joined = PageDataSchema.coalescePageData({}, {});
+ Assert.deepEqual(joined, { data: {} });
+
+ joined = PageDataSchema.coalescePageData(
+ {
+ url: "https://www.google.com/",
+ data: {
+ [PageDataSchema.DATA_TYPE.PRODUCT]: {
+ name: "bolts",
+ },
+ [PageDataSchema.DATA_TYPE.VIDEO]: {
+ name: "My video",
+ duration: 500,
+ },
+ },
+ },
+ {
+ url: "https://www.mozilla.com/",
+ date: 27,
+ siteName: "Mozilla",
+ data: {
+ [PageDataSchema.DATA_TYPE.PRODUCT]: {
+ name: "newname",
+ price: {
+ value: 55,
+ },
+ },
+ [PageDataSchema.DATA_TYPE.AUDIO]: {
+ name: "My song",
+ },
+ },
+ }
+ );
+
+ Assert.deepEqual(joined, {
+ url: "https://www.google.com/",
+ date: 27,
+ siteName: "Mozilla",
+ data: {
+ [PageDataSchema.DATA_TYPE.PRODUCT]: {
+ name: "bolts",
+ price: {
+ value: 55,
+ },
+ },
+ [PageDataSchema.DATA_TYPE.VIDEO]: {
+ name: "My video",
+ duration: 500,
+ },
+ [PageDataSchema.DATA_TYPE.AUDIO]: {
+ name: "My song",
+ },
+ },
+ });
+});
+
+add_task(async function testPageData() {
+ // Full page data needs a url and a date
+ await Assert.rejects(
+ PageDataSchema.validatePageData({}),
+ /missing required property 'url'/
+ );
+
+ await Assert.rejects(
+ PageDataSchema.validatePageData({ url: "https://www.google.com" }),
+ /missing required property 'date'/
+ );
+
+ await Assert.rejects(
+ PageDataSchema.validatePageData({ date: 55 }),
+ /missing required property 'url'/
+ );
+
+ Assert.deepEqual(
+ await PageDataSchema.validatePageData({
+ url: "https://www.google.com",
+ date: 55,
+ }),
+ { url: "https://www.google.com", date: 55, data: {} }
+ );
+
+ Assert.deepEqual(
+ await PageDataSchema.validatePageData({
+ url: "https://www.google.com",
+ date: 55,
+ data: {
+ 0: {
+ name: "unknown",
+ },
+ [PageDataSchema.DATA_TYPE.PRODUCT]: {
+ name: "Bolts",
+ price: {
+ value: 55,
+ },
+ },
+ },
+ }),
+ {
+ url: "https://www.google.com",
+ date: 55,
+ data: {
+ [PageDataSchema.DATA_TYPE.PRODUCT]: {
+ name: "Bolts",
+ price: {
+ value: 55,
+ },
+ },
+ },
+ }
+ );
+
+ // Should drop invalid inner data.
+ Assert.deepEqual(
+ await PageDataSchema.validatePageData({
+ url: "https://www.google.com",
+ date: 55,
+ data: {
+ [PageDataSchema.DATA_TYPE.PRODUCT]: {
+ name: "Bolts",
+ price: {
+ currency: "USD",
+ },
+ },
+ },
+ }),
+ {
+ url: "https://www.google.com",
+ date: 55,
+ data: {},
+ }
+ );
+});
diff --git a/browser/components/pagedata/tests/unit/test_queue.js b/browser/components/pagedata/tests/unit/test_queue.js
new file mode 100644
index 0000000000..3d180edd13
--- /dev/null
+++ b/browser/components/pagedata/tests/unit/test_queue.js
@@ -0,0 +1,527 @@
+/* Any copyright is dedicated to the Public Domain.
+ * http://creativecommons.org/publicdomain/zero/1.0/ */
+
+ChromeUtils.defineESModuleGetters(this, {
+ PageDataService: "resource:///modules/pagedata/PageDataService.sys.mjs",
+ PromiseUtils: "resource://gre/modules/PromiseUtils.sys.mjs",
+});
+
+// Test that urls are retrieved in the expected order.
+add_task(async function test_queueOrder() {
+ Services.prefs.setIntPref("browser.pagedata.maxBackgroundFetches", 0);
+ // Pretend we are idle.
+ PageDataService.observe(null, "idle", null);
+
+ let pageDataResults = [
+ {
+ date: Date.now(),
+ url: "http://www.mozilla.org/1",
+ siteName: "Mozilla",
+ data: {},
+ },
+ {
+ date: Date.now() - 3600,
+ url: "http://www.google.com/2",
+ siteName: "Google",
+ data: {},
+ },
+ {
+ date: Date.now() + 3600,
+ url: "http://www.example.com/3",
+ image: "http://www.example.com/banner.jpg",
+ data: {},
+ },
+ {
+ date: Date.now() / 2,
+ url: "http://www.wikipedia.org/4",
+ data: {},
+ },
+ {
+ date: Date.now() / 3,
+ url: "http://www.microsoft.com/5",
+ data: {
+ [PageDataSchema.DATA_TYPE.PRODUCT]: {
+ name: "Windows 11",
+ },
+ },
+ },
+ ];
+
+ let requests = [];
+ PageDataService.fetchPageData = url => {
+ requests.push(url);
+
+ for (let pageData of pageDataResults) {
+ if (pageData.url == url) {
+ return Promise.resolve(pageData);
+ }
+ }
+
+ return Promise.reject(new Error("Unknown url"));
+ };
+
+ let { promise: completePromise, resolve } = PromiseUtils.defer();
+
+ let results = [];
+ let listener = (_, pageData) => {
+ results.push(pageData);
+ if (results.length == pageDataResults.length) {
+ resolve();
+ }
+ };
+
+ PageDataService.on("page-data", listener);
+
+ for (let pageData of pageDataResults) {
+ PageDataService.queueFetch(pageData.url);
+ }
+
+ await completePromise;
+ PageDataService.off("page-data", listener);
+
+ Assert.deepEqual(
+ requests,
+ pageDataResults.map(pd => pd.url)
+ );
+
+ // Because our fetch implementation is essentially synchronous the results
+ // will be in a known order. This isn't guaranteed by the API though.
+ Assert.deepEqual(results, pageDataResults);
+
+ delete PageDataService.fetchPageData;
+});
+
+// Tests that limiting the number of fetches works.
+add_task(async function test_queueLimit() {
+ Services.prefs.setIntPref("browser.pagedata.maxBackgroundFetches", 3);
+ // Pretend we are idle.
+ PageDataService.observe(null, "idle", null);
+
+ let requests = [];
+ PageDataService.fetchPageData = url => {
+ let { promise, resolve, reject } = PromiseUtils.defer();
+ requests.push({ url, resolve, reject });
+
+ return promise;
+ };
+
+ let results = [];
+ let listener = (_, pageData) => {
+ results.push(pageData?.url);
+ };
+
+ PageDataService.on("page-data", listener);
+
+ PageDataService.queueFetch("https://www.mozilla.org/1");
+ PageDataService.queueFetch("https://www.mozilla.org/2");
+ PageDataService.queueFetch("https://www.mozilla.org/3");
+ PageDataService.queueFetch("https://www.mozilla.org/4");
+ PageDataService.queueFetch("https://www.mozilla.org/5");
+ PageDataService.queueFetch("https://www.mozilla.org/6");
+ PageDataService.queueFetch("https://www.mozilla.org/7");
+ PageDataService.queueFetch("https://www.mozilla.org/8");
+ PageDataService.queueFetch("https://www.mozilla.org/9");
+ PageDataService.queueFetch("https://www.mozilla.org/10");
+ PageDataService.queueFetch("https://www.mozilla.org/11");
+
+ // Let a tick pass.
+ await Promise.resolve();
+
+ Assert.deepEqual(
+ requests.map(r => r.url),
+ [
+ "https://www.mozilla.org/1",
+ "https://www.mozilla.org/2",
+ "https://www.mozilla.org/3",
+ ]
+ );
+
+ // Completing or rejecting a request should start new ones.
+
+ requests[1].resolve({
+ date: 2345,
+ url: "https://www.mozilla.org/2",
+ siteName: "Test 2",
+ data: {},
+ });
+
+ // Let a tick pass.
+ await Promise.resolve();
+
+ Assert.deepEqual(
+ requests.map(r => r.url),
+ [
+ "https://www.mozilla.org/1",
+ "https://www.mozilla.org/2",
+ "https://www.mozilla.org/3",
+ "https://www.mozilla.org/4",
+ ]
+ );
+
+ requests[3].reject(new Error("Fail"));
+
+ // Let a tick pass.
+ await Promise.resolve();
+
+ Assert.deepEqual(
+ requests.map(r => r.url),
+ [
+ "https://www.mozilla.org/1",
+ "https://www.mozilla.org/2",
+ "https://www.mozilla.org/3",
+ "https://www.mozilla.org/4",
+ "https://www.mozilla.org/5",
+ ]
+ );
+
+ // Increasing the limit should start more requests.
+ Services.prefs.setIntPref("browser.pagedata.maxBackgroundFetches", 5);
+
+ // Let a tick pass.
+ await Promise.resolve();
+
+ Assert.deepEqual(
+ requests.map(r => r.url),
+ [
+ "https://www.mozilla.org/1",
+ "https://www.mozilla.org/2",
+ "https://www.mozilla.org/3",
+ "https://www.mozilla.org/4",
+ "https://www.mozilla.org/5",
+ "https://www.mozilla.org/6",
+ "https://www.mozilla.org/7",
+ ]
+ );
+
+ // Dropping the limit shouldn't start anything new.
+ Services.prefs.setIntPref("browser.pagedata.maxBackgroundFetches", 3);
+
+ // Let a tick pass.
+ await Promise.resolve();
+
+ Assert.deepEqual(
+ requests.map(r => r.url),
+ [
+ "https://www.mozilla.org/1",
+ "https://www.mozilla.org/2",
+ "https://www.mozilla.org/3",
+ "https://www.mozilla.org/4",
+ "https://www.mozilla.org/5",
+ "https://www.mozilla.org/6",
+ "https://www.mozilla.org/7",
+ ]
+ );
+
+ // But resolving should also not start new requests.
+ requests[5].resolve({
+ date: 345334,
+ url: "https://www.mozilla.org/6",
+ siteName: "Test 6",
+ data: {},
+ });
+
+ requests[0].resolve({
+ date: 343446434,
+ url: "https://www.mozilla.org/1",
+ siteName: "Test 1",
+ data: {},
+ });
+
+ // Let a tick pass.
+ await Promise.resolve();
+
+ Assert.deepEqual(
+ requests.map(r => r.url),
+ [
+ "https://www.mozilla.org/1",
+ "https://www.mozilla.org/2",
+ "https://www.mozilla.org/3",
+ "https://www.mozilla.org/4",
+ "https://www.mozilla.org/5",
+ "https://www.mozilla.org/6",
+ "https://www.mozilla.org/7",
+ ]
+ );
+
+ // Until a previous request completes.
+ requests[4].resolve(null);
+
+ // Let a tick pass.
+ await Promise.resolve();
+
+ Assert.deepEqual(
+ requests.map(r => r.url),
+ [
+ "https://www.mozilla.org/1",
+ "https://www.mozilla.org/2",
+ "https://www.mozilla.org/3",
+ "https://www.mozilla.org/4",
+ "https://www.mozilla.org/5",
+ "https://www.mozilla.org/6",
+ "https://www.mozilla.org/7",
+ "https://www.mozilla.org/8",
+ ]
+ );
+
+ // Inifinite queue should work.
+ Services.prefs.setIntPref("browser.pagedata.maxBackgroundFetches", 0);
+
+ // Let a tick pass.
+ await Promise.resolve();
+
+ Assert.deepEqual(
+ requests.map(r => r.url),
+ [
+ "https://www.mozilla.org/1",
+ "https://www.mozilla.org/2",
+ "https://www.mozilla.org/3",
+ "https://www.mozilla.org/4",
+ "https://www.mozilla.org/5",
+ "https://www.mozilla.org/6",
+ "https://www.mozilla.org/7",
+ "https://www.mozilla.org/8",
+ "https://www.mozilla.org/9",
+ "https://www.mozilla.org/10",
+ "https://www.mozilla.org/11",
+ ]
+ );
+
+ requests[10].resolve({
+ date: 345334,
+ url: "https://www.mozilla.org/11",
+ data: {},
+ });
+ requests[2].resolve({
+ date: 345334,
+ url: "https://www.mozilla.org/3",
+ data: {},
+ });
+ requests[7].resolve({
+ date: 345334,
+ url: "https://www.mozilla.org/8",
+ data: {},
+ });
+ requests[6].resolve({
+ date: 345334,
+ url: "https://www.mozilla.org/7",
+ data: {},
+ });
+ requests[8].resolve({
+ date: 345334,
+ url: "https://www.mozilla.org/9",
+ data: {},
+ });
+ requests[9].resolve({
+ date: 345334,
+ url: "https://www.mozilla.org/10",
+ data: {},
+ });
+
+ // Let a tick pass.
+ await Promise.resolve();
+
+ Assert.deepEqual(
+ requests.map(r => r.url),
+ [
+ "https://www.mozilla.org/1",
+ "https://www.mozilla.org/2",
+ "https://www.mozilla.org/3",
+ "https://www.mozilla.org/4",
+ "https://www.mozilla.org/5",
+ "https://www.mozilla.org/6",
+ "https://www.mozilla.org/7",
+ "https://www.mozilla.org/8",
+ "https://www.mozilla.org/9",
+ "https://www.mozilla.org/10",
+ "https://www.mozilla.org/11",
+ ]
+ );
+
+ PageDataService.off("page-data", listener);
+
+ delete PageDataService.fetchPageData;
+
+ Assert.deepEqual(results, [
+ "https://www.mozilla.org/2",
+ "https://www.mozilla.org/6",
+ "https://www.mozilla.org/1",
+ "https://www.mozilla.org/11",
+ "https://www.mozilla.org/3",
+ "https://www.mozilla.org/8",
+ "https://www.mozilla.org/7",
+ "https://www.mozilla.org/9",
+ "https://www.mozilla.org/10",
+ ]);
+});
+
+// Tests that the user idle state stops and starts fetches.
+add_task(async function test_idle() {
+ Services.prefs.setIntPref("browser.pagedata.maxBackgroundFetches", 3);
+ // Pretend we are active.
+ PageDataService.observe(null, "active", null);
+
+ let requests = [];
+ PageDataService.fetchPageData = url => {
+ let { promise, resolve, reject } = PromiseUtils.defer();
+ requests.push({ url, resolve, reject });
+
+ return promise;
+ };
+
+ let results = [];
+ let listener = (_, pageData) => {
+ results.push(pageData?.url);
+ };
+
+ PageDataService.on("page-data", listener);
+
+ PageDataService.queueFetch("https://www.mozilla.org/1");
+ PageDataService.queueFetch("https://www.mozilla.org/2");
+ PageDataService.queueFetch("https://www.mozilla.org/3");
+ PageDataService.queueFetch("https://www.mozilla.org/4");
+ PageDataService.queueFetch("https://www.mozilla.org/5");
+ PageDataService.queueFetch("https://www.mozilla.org/6");
+ PageDataService.queueFetch("https://www.mozilla.org/7");
+
+ // Let a tick pass.
+ await Promise.resolve();
+
+ // Nothing will start when active.
+ Assert.deepEqual(
+ requests.map(r => r.url),
+ []
+ );
+
+ // Pretend we are idle.
+ PageDataService.observe(null, "idle", null);
+
+ // Let a tick pass.
+ await Promise.resolve();
+
+ Assert.deepEqual(
+ requests.map(r => r.url),
+ [
+ "https://www.mozilla.org/1",
+ "https://www.mozilla.org/2",
+ "https://www.mozilla.org/3",
+ ]
+ );
+
+ // Completing or rejecting a request should start new ones.
+
+ requests[1].resolve({
+ date: 2345,
+ url: "https://www.mozilla.org/2",
+ data: {},
+ });
+
+ // Let a tick pass.
+ await Promise.resolve();
+
+ Assert.deepEqual(
+ requests.map(r => r.url),
+ [
+ "https://www.mozilla.org/1",
+ "https://www.mozilla.org/2",
+ "https://www.mozilla.org/3",
+ "https://www.mozilla.org/4",
+ ]
+ );
+
+ // But not when active
+ PageDataService.observe(null, "active", null);
+
+ requests[3].resolve({
+ date: 2345,
+ url: "https://www.mozilla.org/4",
+ data: {},
+ });
+ requests[0].resolve({
+ date: 2345,
+ url: "https://www.mozilla.org/1",
+ data: {},
+ });
+ requests[2].resolve({
+ date: 2345,
+ url: "https://www.mozilla.org/3",
+ data: {},
+ });
+
+ // Let a tick pass.
+ await Promise.resolve();
+
+ Assert.deepEqual(
+ requests.map(r => r.url),
+ [
+ "https://www.mozilla.org/1",
+ "https://www.mozilla.org/2",
+ "https://www.mozilla.org/3",
+ "https://www.mozilla.org/4",
+ ]
+ );
+
+ // Going idle should start more workers
+ PageDataService.observe(null, "idle", null);
+
+ // Let a tick pass.
+ await Promise.resolve();
+
+ Assert.deepEqual(
+ requests.map(r => r.url),
+ [
+ "https://www.mozilla.org/1",
+ "https://www.mozilla.org/2",
+ "https://www.mozilla.org/3",
+ "https://www.mozilla.org/4",
+ "https://www.mozilla.org/5",
+ "https://www.mozilla.org/6",
+ "https://www.mozilla.org/7",
+ ]
+ );
+
+ requests[4].resolve({
+ date: 2345,
+ url: "https://www.mozilla.org/5",
+ data: {},
+ });
+ requests[5].resolve({
+ date: 2345,
+ url: "https://www.mozilla.org/6",
+ data: {},
+ });
+ requests[6].resolve({
+ date: 2345,
+ url: "https://www.mozilla.org/7",
+ data: {},
+ });
+
+ // Let a tick pass.
+ await Promise.resolve();
+
+ Assert.deepEqual(
+ requests.map(r => r.url),
+ [
+ "https://www.mozilla.org/1",
+ "https://www.mozilla.org/2",
+ "https://www.mozilla.org/3",
+ "https://www.mozilla.org/4",
+ "https://www.mozilla.org/5",
+ "https://www.mozilla.org/6",
+ "https://www.mozilla.org/7",
+ ]
+ );
+
+ PageDataService.off("page-data", listener);
+
+ delete PageDataService.fetchPageData;
+
+ Assert.deepEqual(results, [
+ "https://www.mozilla.org/2",
+ "https://www.mozilla.org/4",
+ "https://www.mozilla.org/1",
+ "https://www.mozilla.org/3",
+ "https://www.mozilla.org/5",
+ "https://www.mozilla.org/6",
+ "https://www.mozilla.org/7",
+ ]);
+});
diff --git a/browser/components/pagedata/tests/unit/test_schemaorg.js b/browser/components/pagedata/tests/unit/test_schemaorg.js
new file mode 100644
index 0000000000..5470410e4f
--- /dev/null
+++ b/browser/components/pagedata/tests/unit/test_schemaorg.js
@@ -0,0 +1,213 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/**
+ * Tests that the page data service can parse schema.org metadata into PageData.
+ */
+
+add_task(async function test_single_product_microdata() {
+ await verifyPageData(
+ `
+ <!DOCTYPE html>
+ <html>
+ <head>
+ <title>Product Info 1</title>
+ </head>
+ <body>
+ <div itemscope itemtype="https://schema.org/Organization">
+ <div itemprop="employee" itemscope itemtype="https://schema.org/Person">
+ <span itemprop="name">Mr. Nested Name</span>
+ </div>
+
+ <span itemprop="name">Mozilla</span>
+ </div>
+
+ <div itemscope itemtype="https://schema.org/Product">
+ <img itemprop="image" src="bon-echo-microwave-17in.jpg" />
+ <a href="microwave.html" itemprop="url">
+ <span itemprop="name">Bon Echo Microwave</span>
+ </a>
+
+ <div itemprop="offers" itemscope itemtype="https://schema.org/Offer">
+ <span itemprop="price" content="3.50">£3.50</span>
+ <span itemprop="priceCurrency" content="GBP"></span>
+ </div>
+
+ <span itemprop="gtin" content="13572468"></span>
+
+ <span itemprop="description">The most amazing microwave in the world</span>
+ </div>
+ </body>
+ </html>
+ `,
+ {
+ siteName: "Mozilla",
+ description: "The most amazing microwave in the world",
+ image: BASE_URL + "/bon-echo-microwave-17in.jpg",
+ data: {
+ [PageDataSchema.DATA_TYPE.PRODUCT]: {
+ name: "Bon Echo Microwave",
+ price: {
+ value: 3.5,
+ currency: "GBP",
+ },
+ },
+ },
+ }
+ );
+});
+
+add_task(async function test_single_product_json_ld() {
+ await verifyPageData(
+ `
+ <!DOCTYPE html>
+ <html>
+ <head>
+ <script type="application/ld+json">
+ {
+ "@context": "http://schema.org",
+ "@type": "Organization",
+ "employee": {
+ "@type": "Person",
+ "name": "Mr. Nested Name"
+ },
+ "name": "Mozilla"
+ }
+ </script>
+ <script type="application/ld+json">
+ {
+ "@context": "https://schema.org",
+ "@type": "Product",
+ "image": "bon-echo-microwave-17in.jpg",
+ "url": "microwave.html",
+ "name": "Bon Echo Microwave",
+ "offers": {
+ "@type": "Offer",
+ "price": "3.50",
+ "priceCurrency": "GBP"
+ },
+ "gtin": "13572468",
+ "description": "The most amazing microwave in the world"
+ }
+ </script>
+ </head>
+ <body>
+ </body>
+ </html>
+ `,
+ {
+ siteName: "Mozilla",
+ description: "The most amazing microwave in the world",
+ image: BASE_URL + "/bon-echo-microwave-17in.jpg",
+ data: {
+ [PageDataSchema.DATA_TYPE.PRODUCT]: {
+ name: "Bon Echo Microwave",
+ price: {
+ value: 3.5,
+ currency: "GBP",
+ },
+ },
+ },
+ }
+ );
+});
+
+add_task(async function test_single_product_combined() {
+ await verifyPageData(
+ `
+ <!DOCTYPE html>
+ <html>
+ <head>
+ <script type="application/ld+json">
+ {
+ "@context": "https://schema.org",
+ "@type": "Product",
+ "image": "bon-echo-microwave-17in.jpg",
+ "url": "microwave.html",
+ "name": "Bon Echo Microwave",
+ "offers": {
+ "@type": "Offer",
+ "price": "3.50",
+ "priceCurrency": "GBP"
+ },
+ "gtin": "13572468",
+ "description": "The most amazing microwave in the world"
+ }
+ </script>
+ </head>
+ <body>
+ <div itemscope itemtype="https://schema.org/Organization">
+ <div itemprop="employee" itemscope itemtype="https://schema.org/Person">
+ <span itemprop="name">Mr. Nested Name</span>
+ </div>
+
+ <span itemprop="name">Mozilla</span>
+ </div>
+ </body>
+ </html>
+ `,
+ {
+ siteName: "Mozilla",
+ description: "The most amazing microwave in the world",
+ image: BASE_URL + "/bon-echo-microwave-17in.jpg",
+ data: {
+ [PageDataSchema.DATA_TYPE.PRODUCT]: {
+ name: "Bon Echo Microwave",
+ price: {
+ value: 3.5,
+ currency: "GBP",
+ },
+ },
+ },
+ }
+ );
+});
+
+add_task(async function test_single_multiple_microdata() {
+ await verifyPageData(
+ `
+ <!DOCTYPE html>
+ <html>
+ <head>
+ <title>Product Info 2</title>
+ </head>
+ <body>
+ <div itemscope itemtype="https://schema.org/Product">
+ <img itemprop="image" src="bon-echo-microwave-17in.jpg" />
+ <a href="microwave.html" itemprop="url">
+ <span itemprop="name">Bon Echo Microwave</span>
+ </a>
+
+ <div itemprop="offers" itemscope itemtype="https://schema.org/Offer">
+ <span itemprop="price" content="3.28">£3.28</span>
+ <span itemprop="priceCurrency" content="GBP"></span>
+ </div>
+
+ <span itemprop="gtin" content="13572468"></span>
+ </div>
+ <div itemscope itemtype="http://schema.org/Product">
+ <img itemprop="image" src="gran-paradiso-toaster-17in.jpg" />
+ <a href="toaster.html" itemprop="url">
+ <span itemprop="name">Gran Paradiso Toaster</span>
+ </a>
+
+ <span itemprop="gtin" content="15263748"></span>
+ </div>
+ </body>
+ </html>
+ `,
+ {
+ image: BASE_URL + "/bon-echo-microwave-17in.jpg",
+ data: {
+ [PageDataSchema.DATA_TYPE.PRODUCT]: {
+ name: "Bon Echo Microwave",
+ price: {
+ value: 3.28,
+ currency: "GBP",
+ },
+ },
+ },
+ }
+ );
+});
diff --git a/browser/components/pagedata/tests/unit/test_schemaorg_parse.js b/browser/components/pagedata/tests/unit/test_schemaorg_parse.js
new file mode 100644
index 0000000000..e002598af2
--- /dev/null
+++ b/browser/components/pagedata/tests/unit/test_schemaorg_parse.js
@@ -0,0 +1,193 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/**
+ * Tests that the page data service can parse schema.org metadata into Item
+ * structures.
+ */
+
+const { SchemaOrgPageData } = ChromeUtils.importESModule(
+ "resource:///modules/pagedata/SchemaOrgPageData.sys.mjs"
+);
+
+/**
+ * Collects the schema.org items from the given html string.
+ *
+ * @param {string} docStr
+ * The html to parse.
+ * @returns {Promise<Item[]>}
+ */
+async function collectItems(docStr) {
+ let doc = await parseDocument(docStr);
+ return SchemaOrgPageData.collectItems(doc);
+}
+
+/**
+ * Verifies that the items parsed from the html match the expected JSON-LD
+ * format.
+ *
+ * @param {string} docStr
+ * The html to parse.
+ * @param {object[]} expected
+ * The JSON-LD objects to match to.
+ */
+async function verifyItems(docStr, expected) {
+ let items = await collectItems(docStr);
+ let jsonLD = items.map(item => item.toJsonLD());
+ Assert.deepEqual(jsonLD, expected);
+}
+
+add_task(async function test_microdata_parse() {
+ await verifyItems(
+ `
+ <!DOCTYPE html>
+ <html>
+ <head>
+ <title>Product Info 1</title>
+ </head>
+ <body itemprop="badprop">
+ <div itemscope itemtype="https://schema.org/Organization">
+ <div itemprop="employee" itemscope itemtype="https://schema.org/Person">
+ <span itemprop="name">Mr. Nested Name</span>
+ </div>
+
+ <span itemprop="name">Mozilla</span>
+ </div>
+
+ <div itemscope itemtype="https://schema.org/Product">
+ <img itemprop="image" src="bon-echo-microwave-17in.jpg" />
+ <a href="microwave.html" itemprop="url">
+ <span itemprop="name">Bon Echo Microwave</span>
+ </a>
+
+ <div itemprop="offers" itemscope itemtype="https://schema.org/Offer">
+ <span itemprop="price" content="3.50">£3.50</span>
+ <span itemprop="priceCurrency" content="GBP"></span>
+ </div>
+
+ <span itemprop="gtin" content="13572468"></span>
+
+ <span itemprop="description">The most amazing microwave in the world</span>
+ </div>
+ </body>
+ </html>
+ `,
+ [
+ {
+ "@type": "Organization",
+ employee: {
+ "@type": "Person",
+ name: "Mr. Nested Name",
+ },
+ name: "Mozilla",
+ },
+ {
+ "@type": "Product",
+ image: BASE_URL + "/bon-echo-microwave-17in.jpg",
+ url: BASE_URL + "/microwave.html",
+ name: "Bon Echo Microwave",
+ offers: {
+ "@type": "Offer",
+ price: "3.50",
+ priceCurrency: "GBP",
+ },
+ gtin: "13572468",
+ description: "The most amazing microwave in the world",
+ },
+ ]
+ );
+});
+
+add_task(async function test_json_ld_parse() {
+ await verifyItems(
+ `
+ <!DOCTYPE html>
+ <html>
+ <head>
+ <script type="application/ld+json">
+ {
+ "@context": "http://schema.org",
+ "@type": "Organization",
+ "employee": {
+ "@type": "Person",
+ "name": "Mr. Nested Name"
+ },
+ "name": "Mozilla"
+ }
+ </script>
+ <script type="application/ld+json">
+ {
+ "@context": "https://schema.org",
+ "@type": "Product",
+ "image": "bon-echo-microwave-17in.jpg",
+ "url": "microwave.html",
+ "name": "Bon Echo Microwave",
+ "offers": {
+ "@type": "Offer",
+ "price": "3.50",
+ "priceCurrency": "GBP"
+ },
+ "gtin": "13572468",
+ "description": "The most amazing microwave in the world"
+ }
+ </script>
+ </head>
+ <body>
+ </body>
+ </html>
+ `,
+ [
+ {
+ "@type": "Organization",
+ employee: {
+ "@type": "Person",
+ name: "Mr. Nested Name",
+ },
+ name: "Mozilla",
+ },
+ {
+ "@type": "Product",
+ image: "bon-echo-microwave-17in.jpg",
+ url: "microwave.html",
+ name: "Bon Echo Microwave",
+ offers: {
+ "@type": "Offer",
+ price: "3.50",
+ priceCurrency: "GBP",
+ },
+ gtin: "13572468",
+ description: "The most amazing microwave in the world",
+ },
+ ]
+ );
+});
+
+add_task(async function test_microdata_lazy_image() {
+ await verifyItems(
+ `
+ <!DOCTYPE html>
+ <html>
+ <head>
+ <title>Product Info 1</title>
+ </head>
+ <body itemprop="badprop">
+ <div itemscope itemtype="https://schema.org/Product">
+ <img itemprop="image" src="lazy-load.gif" data-src="bon-echo-microwave-17in.jpg" />
+ <a href="microwave.html" itemprop="url">
+ <span itemprop="name">Bon Echo Microwave</span>
+ </a>
+ </div>
+ </body>
+ </html>
+ `,
+ [
+ {
+ "@type": "Product",
+ image: BASE_URL + "/bon-echo-microwave-17in.jpg",
+ url: BASE_URL + "/microwave.html",
+ name: "Bon Echo Microwave",
+ },
+ ]
+ );
+});
diff --git a/browser/components/pagedata/tests/unit/test_twitter.js b/browser/components/pagedata/tests/unit/test_twitter.js
new file mode 100644
index 0000000000..a49491f5c6
--- /dev/null
+++ b/browser/components/pagedata/tests/unit/test_twitter.js
@@ -0,0 +1,34 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/**
+ * Basic tests for twitter cards.
+ */
+
+add_task(async function test_twitter_card() {
+ await verifyPageData(
+ `
+ <!DOCTYPE html>
+ <html>
+ <head>
+ <meta name="twitter:card" content="summary_large_image">
+ <meta name="twitter:site" content="@nytimes">
+ <meta name="twitter:creator" content="@SarahMaslinNir">
+ <meta name="twitter:title" content="Parade of Fans for Houston’s Funeral">
+ <meta name="twitter:description" content="NEWARK - The guest list and parade of limousines">
+ <meta name="twitter:image" content="http://graphics8.nytimes.com/images/2012/02/19/us/19whitney-span/19whitney-span-articleLarge.jpg">
+ </head>
+ <body>
+ </body>
+ </html>
+ `,
+ {
+ siteName: "@nytimes",
+ description: "NEWARK - The guest list and parade of limousines",
+ image:
+ "http://graphics8.nytimes.com/images/2012/02/19/us/19whitney-span/19whitney-span-articleLarge.jpg",
+ data: {},
+ }
+ );
+});
diff --git a/browser/components/pagedata/tests/unit/xpcshell.ini b/browser/components/pagedata/tests/unit/xpcshell.ini
new file mode 100644
index 0000000000..3104e61a86
--- /dev/null
+++ b/browser/components/pagedata/tests/unit/xpcshell.ini
@@ -0,0 +1,14 @@
+[DEFAULT]
+firefox-appdir = browser
+skip-if = toolkit == 'android' # bug 1730213
+support-files =
+ head.js
+head = head.js
+
+[test_pagedata_basic.js]
+[test_pagedata_schema.js]
+[test_opengraph.js]
+[test_queue.js]
+[test_schemaorg.js]
+[test_schemaorg_parse.js]
+[test_twitter.js]