574 lines
17 KiB
JavaScript
574 lines
17 KiB
JavaScript
// -*- indent-tabs-mode: nil; js-indent-level: 2 -*-
|
|
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
|
* You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
// Constants for telemetry.
|
|
const DOWNLOAD_SUCCESS = 0;
|
|
const DOWNLOAD_ERROR_XHR = 1;
|
|
const DOWNLOAD_ERROR_NO_DOC = 2;
|
|
|
|
const PARSE_SUCCESS = 0;
|
|
const PARSE_ERROR_TOO_MANY_ELEMENTS = 1;
|
|
const PARSE_ERROR_WORKER = 2;
|
|
const PARSE_ERROR_NO_ARTICLE = 3;
|
|
|
|
// Class names to preserve in the readerized output. We preserve these class
|
|
// names so that rules in aboutReader.css can match them.
|
|
const CLASSES_TO_PRESERVE = [
|
|
"caption",
|
|
"emoji",
|
|
"hidden",
|
|
"invisible",
|
|
"sr-only",
|
|
"visually-hidden",
|
|
"visuallyhidden",
|
|
"wp-caption",
|
|
"wp-caption-text",
|
|
"wp-smiley",
|
|
];
|
|
|
|
import { XPCOMUtils } from "resource://gre/modules/XPCOMUtils.sys.mjs";
|
|
|
|
const lazy = {};
|
|
|
|
ChromeUtils.defineESModuleGetters(lazy, {
|
|
LanguageDetector:
|
|
"resource://gre/modules/translations/LanguageDetector.sys.mjs",
|
|
ReaderWorker: "moz-src:///toolkit/components/reader/ReaderWorker.sys.mjs",
|
|
Readerable: "resource://gre/modules/Readerable.sys.mjs",
|
|
});
|
|
|
|
const gIsFirefoxDesktop =
|
|
Services.appinfo.ID == "{ec8030f7-c20a-464f-9b0e-13a3a9e97384}";
|
|
|
|
export var ReaderMode = {
|
|
DEBUG: 0,
|
|
|
|
// For time spent telemetry
|
|
enterTime: undefined,
|
|
leaveTime: undefined,
|
|
|
|
/**
|
|
* Enter the reader mode by going forward one step in history if applicable,
|
|
* if not, append the about:reader page in the history instead.
|
|
*/
|
|
enterReaderMode(docShell, win) {
|
|
this.enterTime = Date.now();
|
|
|
|
Glean.readermode.viewOn.record({
|
|
subcategory: "feature",
|
|
});
|
|
|
|
let url = win.document.location.href;
|
|
let readerURL = "about:reader?url=" + encodeURIComponent(url);
|
|
|
|
if (!Services.appinfo.sessionHistoryInParent) {
|
|
let webNav = docShell.QueryInterface(Ci.nsIWebNavigation);
|
|
let sh = webNav.sessionHistory;
|
|
if (webNav.canGoForward) {
|
|
let forwardEntry = sh.legacySHistory.getEntryAtIndex(sh.index + 1);
|
|
let forwardURL = forwardEntry.URI.spec;
|
|
if (forwardURL && (forwardURL == readerURL || !readerURL)) {
|
|
webNav.goForward();
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
// This could possibly move to the parent. See bug 1664982.
|
|
win.document.location = readerURL;
|
|
},
|
|
|
|
/**
|
|
* Exit the reader mode by going back one step in history if applicable,
|
|
* if not, append the original page in the history instead.
|
|
*/
|
|
leaveReaderMode(docShell, win) {
|
|
this.leaveTime = Date.now();
|
|
|
|
// Measured in seconds (whole number)
|
|
let timeSpentInReaderMode = Math.floor(
|
|
(this.leaveTime - this.enterTime) / 1000
|
|
);
|
|
|
|
// Measured as percentage (whole number)
|
|
let scrollPosition = Math.floor(
|
|
((win.scrollY + win.innerHeight) / win.document.body.clientHeight) * 100
|
|
);
|
|
|
|
Glean.readermode.viewOff.record({
|
|
subcategory: "feature",
|
|
reader_time: `${timeSpentInReaderMode}`,
|
|
scroll_position: `${scrollPosition}`,
|
|
});
|
|
|
|
let url = win.document.location.href;
|
|
let originalURL = this.getOriginalUrl(url);
|
|
let webNav = docShell.QueryInterface(Ci.nsIWebNavigation);
|
|
|
|
if (!Services.appinfo.sessionHistoryInParent) {
|
|
let sh = webNav.sessionHistory;
|
|
if (webNav.canGoBack) {
|
|
let prevEntry = sh.legacySHistory.getEntryAtIndex(sh.index - 1);
|
|
let prevURL = prevEntry.URI.spec;
|
|
if (prevURL && (prevURL == originalURL || !originalURL)) {
|
|
webNav.goBack();
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
let referrerURI, principal;
|
|
try {
|
|
referrerURI = Services.io.newURI(url);
|
|
principal = Services.scriptSecurityManager.createContentPrincipal(
|
|
referrerURI,
|
|
win.document.nodePrincipal.originAttributes
|
|
);
|
|
} catch (e) {
|
|
console.error(e);
|
|
return;
|
|
}
|
|
let loadFlags = webNav.LOAD_FLAGS_DISALLOW_INHERIT_PRINCIPAL;
|
|
let ReferrerInfo = Components.Constructor(
|
|
"@mozilla.org/referrer-info;1",
|
|
"nsIReferrerInfo",
|
|
"init"
|
|
);
|
|
let loadURIOptions = {
|
|
triggeringPrincipal: principal,
|
|
loadFlags,
|
|
referrerInfo: new ReferrerInfo(
|
|
Ci.nsIReferrerInfo.EMPTY,
|
|
true,
|
|
referrerURI
|
|
),
|
|
};
|
|
// This could possibly move to the parent. See bug 1664982.
|
|
webNav.fixupAndLoadURIString(originalURL, loadURIOptions);
|
|
},
|
|
|
|
/**
|
|
* Returns original URL from an about:reader URL.
|
|
*
|
|
* @param url An about:reader URL.
|
|
* @return The original URL for the article, or null if we did not find
|
|
* a properly formatted about:reader URL.
|
|
*/
|
|
getOriginalUrl(url) {
|
|
if (!url.startsWith("about:reader?")) {
|
|
return null;
|
|
}
|
|
|
|
let outerHash = "";
|
|
try {
|
|
let uriObj = Services.io.newURI(url);
|
|
url = uriObj.specIgnoringRef;
|
|
outerHash = uriObj.ref;
|
|
} catch (ex) {
|
|
/* ignore, use the raw string */
|
|
}
|
|
|
|
let searchParams = new URLSearchParams(
|
|
url.substring("about:reader?".length)
|
|
);
|
|
if (!searchParams.has("url")) {
|
|
return null;
|
|
}
|
|
let originalUrl = searchParams.get("url");
|
|
if (outerHash) {
|
|
try {
|
|
let uriObj = Services.io.newURI(originalUrl);
|
|
uriObj = Services.io.newURI("#" + outerHash, null, uriObj);
|
|
originalUrl = uriObj.spec;
|
|
} catch (ex) {}
|
|
}
|
|
return originalUrl;
|
|
},
|
|
|
|
getOriginalUrlObjectForDisplay(url) {
|
|
let originalUrl = this.getOriginalUrl(url);
|
|
if (originalUrl) {
|
|
let uriObj;
|
|
try {
|
|
uriObj = Services.uriFixup.getFixupURIInfo(originalUrl).preferredURI;
|
|
} catch (ex) {
|
|
return null;
|
|
}
|
|
try {
|
|
return Services.io.createExposableURI(uriObj);
|
|
} catch (ex) {
|
|
return null;
|
|
}
|
|
}
|
|
return null;
|
|
},
|
|
|
|
/**
|
|
* Gets an article from a loaded browser's document. This method will not attempt
|
|
* to parse certain URIs (e.g. about: URIs).
|
|
*
|
|
* @param doc A document to parse.
|
|
* @return {Promise}
|
|
* @resolves JS object representing the article, or null if no article is found.
|
|
*/
|
|
parseDocument(doc) {
|
|
if (
|
|
!lazy.Readerable.shouldCheckUri(doc.documentURIObject) ||
|
|
!lazy.Readerable.shouldCheckUri(doc.baseURIObject, true)
|
|
) {
|
|
this.log("Reader mode disabled for URI");
|
|
return null;
|
|
}
|
|
|
|
return this._readerParse(doc);
|
|
},
|
|
|
|
/**
|
|
* Downloads and parses a document from a URL.
|
|
*
|
|
* @param url URL to download and parse.
|
|
* @param attrs OriginAttributes to use for the request.
|
|
* @return {Promise}
|
|
* @resolves JS object representing the article, or null if no article is found.
|
|
*/
|
|
async downloadAndParseDocument(url, attrs = {}, docContentType = "document") {
|
|
let result = await this._downloadDocument(url, attrs, docContentType);
|
|
if (!result?.doc) {
|
|
return null;
|
|
}
|
|
let { doc, newURL } = result;
|
|
if (
|
|
!lazy.Readerable.shouldCheckUri(doc.documentURIObject) ||
|
|
!lazy.Readerable.shouldCheckUri(doc.baseURIObject, true)
|
|
) {
|
|
this.log("Reader mode disabled for URI");
|
|
return null;
|
|
}
|
|
|
|
let article = await this._readerParse(doc);
|
|
// If we have to redirect, reject to the caller with the parsed article,
|
|
// so we can update the URL before displaying it.
|
|
if (newURL) {
|
|
return Promise.reject({ newURL, article });
|
|
}
|
|
// Otherwise, we can just continue with the article.
|
|
return article;
|
|
},
|
|
|
|
_downloadDocument(url, attrs = {}, docContentType = "document") {
|
|
let uri;
|
|
try {
|
|
uri = Services.io.newURI(url);
|
|
if (!lazy.Readerable.shouldCheckUri(uri)) {
|
|
return null;
|
|
}
|
|
} catch (ex) {
|
|
console.error(
|
|
new Error(`Couldn't create URI from ${url} to download: ${ex}`)
|
|
);
|
|
return null;
|
|
}
|
|
try {
|
|
attrs.firstPartyDomain = Services.eTLD.getSchemelessSite(uri);
|
|
} catch (e) {
|
|
console.error("Failed to get first party domain for about:reader", e);
|
|
}
|
|
return new Promise((resolve, reject) => {
|
|
let xhr = new XMLHttpRequest({ mozAnon: false });
|
|
xhr.open("GET", url, true);
|
|
xhr.setOriginAttributes(attrs);
|
|
xhr.onerror = evt => reject(evt.error);
|
|
xhr.responseType = docContentType === "text/plain" ? "text" : "document";
|
|
xhr.onload = () => {
|
|
if (xhr.status !== 200) {
|
|
reject("Reader mode XHR failed with status: " + xhr.status);
|
|
Glean.readermode.downloadResult.accumulateSingleSample(
|
|
DOWNLOAD_ERROR_XHR
|
|
);
|
|
return;
|
|
}
|
|
|
|
let doc =
|
|
xhr.responseType === "text" ? xhr.responseText : xhr.responseXML;
|
|
if (!doc) {
|
|
reject("Reader mode XHR didn't return a document");
|
|
Glean.readermode.downloadResult.accumulateSingleSample(
|
|
DOWNLOAD_ERROR_NO_DOC
|
|
);
|
|
return;
|
|
}
|
|
|
|
let responseURL = xhr.responseURL;
|
|
let givenURL = url;
|
|
// Convert these to real URIs to make sure the escaping (or lack
|
|
// thereof) is identical:
|
|
try {
|
|
responseURL = Services.io.newURI(responseURL).specIgnoringRef;
|
|
} catch (ex) {
|
|
/* Ignore errors - we'll use what we had before */
|
|
}
|
|
try {
|
|
givenURL = Services.io.newURI(givenURL).specIgnoringRef;
|
|
} catch (ex) {
|
|
/* Ignore errors - we'll use what we had before */
|
|
}
|
|
|
|
if (xhr.responseType != "document") {
|
|
let initialText = doc;
|
|
let parser = new DOMParser();
|
|
doc = parser.parseFromString(`<pre></pre>`, "text/html");
|
|
doc.querySelector("pre").textContent = initialText;
|
|
}
|
|
|
|
// We treat redirects as download successes here:
|
|
Glean.readermode.downloadResult.accumulateSingleSample(
|
|
DOWNLOAD_SUCCESS
|
|
);
|
|
|
|
let result = { doc };
|
|
if (responseURL != givenURL) {
|
|
result.newURL = xhr.responseURL;
|
|
}
|
|
|
|
resolve(result);
|
|
};
|
|
xhr.send();
|
|
});
|
|
},
|
|
|
|
log(msg) {
|
|
if (this.DEBUG) {
|
|
dump("Reader: " + msg);
|
|
}
|
|
},
|
|
|
|
/**
|
|
* Attempts to parse a document into an article. Heavy lifting happens
|
|
* in Reader.worker.js.
|
|
*
|
|
* @param doc The document to parse.
|
|
* @return {Promise}
|
|
* @resolves JS object representing the article, or null if no article is found.
|
|
*/
|
|
async _readerParse(doc) {
|
|
if (this.parseNodeLimit) {
|
|
let numTags = doc.getElementsByTagName("*").length;
|
|
if (numTags > this.parseNodeLimit) {
|
|
this.log(
|
|
"Aborting parse for " +
|
|
doc.baseURIObject.spec +
|
|
"; " +
|
|
numTags +
|
|
" elements found"
|
|
);
|
|
Glean.readermode.parseResult.accumulateSingleSample(
|
|
PARSE_ERROR_TOO_MANY_ELEMENTS
|
|
);
|
|
return null;
|
|
}
|
|
}
|
|
|
|
// Fetch this here before we send `doc` off to the worker thread, as later on the
|
|
// document might be nuked but we will still want the URI.
|
|
let { documentURI } = doc;
|
|
|
|
let uriParam;
|
|
uriParam = {
|
|
spec: doc.baseURIObject.spec,
|
|
prePath: doc.baseURIObject.prePath,
|
|
scheme: doc.baseURIObject.scheme,
|
|
|
|
// Fallback
|
|
host: documentURI,
|
|
pathBase: documentURI,
|
|
};
|
|
|
|
// nsIURI.host throws an exception if a host doesn't exist.
|
|
try {
|
|
uriParam.host = doc.baseURIObject.host;
|
|
uriParam.pathBase = Services.io.newURI(".", null, doc.baseURIObject).spec;
|
|
} catch (ex) {
|
|
// Fall back to the initial values we assigned.
|
|
console.warn("Error accessing host name: ", ex);
|
|
}
|
|
|
|
// convert text/plain document, if any, to XHTML format
|
|
if (this._isDocumentPlainText(doc)) {
|
|
doc = this._convertPlainTextDocument(doc);
|
|
}
|
|
|
|
let serializer = new XMLSerializer();
|
|
let serializedDoc = serializer.serializeToString(doc);
|
|
// Explicitly null out doc to make it clear it might not be available from this
|
|
// point on.
|
|
doc = null;
|
|
|
|
let options = {
|
|
classesToPreserve: CLASSES_TO_PRESERVE,
|
|
debug: Services.prefs.getBoolPref("reader.debug", false),
|
|
};
|
|
|
|
let article = null;
|
|
try {
|
|
article = await lazy.ReaderWorker.post("parseDocument", [
|
|
uriParam,
|
|
serializedDoc,
|
|
options,
|
|
]);
|
|
} catch (e) {
|
|
console.error("Error in ReaderWorker: ", e);
|
|
Glean.readermode.parseResult.accumulateSingleSample(PARSE_ERROR_WORKER);
|
|
}
|
|
|
|
if (!article) {
|
|
this.log("Worker did not return an article");
|
|
Glean.readermode.parseResult.accumulateSingleSample(
|
|
PARSE_ERROR_NO_ARTICLE
|
|
);
|
|
return null;
|
|
}
|
|
|
|
// Readability returns a URI object based on the baseURI, but we only care
|
|
// about the original document's URL from now on. This also avoids spoofing
|
|
// attempts where the baseURI doesn't match the domain of the documentURI
|
|
article.url = documentURI;
|
|
delete article.uri;
|
|
|
|
let flags =
|
|
Ci.nsIDocumentEncoder.OutputSelectionOnly |
|
|
Ci.nsIDocumentEncoder.OutputAbsoluteLinks;
|
|
article.title = Cc["@mozilla.org/parserutils;1"]
|
|
.getService(Ci.nsIParserUtils)
|
|
.convertToPlainText(article.title, flags, 0);
|
|
if (gIsFirefoxDesktop) {
|
|
await this._assignLanguage(article);
|
|
this._maybeAssignTextDirection(article);
|
|
}
|
|
|
|
this._assignReadTime(article);
|
|
|
|
Glean.readermode.parseResult.accumulateSingleSample(PARSE_SUCCESS);
|
|
return article;
|
|
},
|
|
|
|
/**
|
|
* Sets a global language string value if the result is confident
|
|
*
|
|
* @return Promise
|
|
* @resolves when the language is detected
|
|
*/
|
|
_assignLanguage(article) {
|
|
return lazy.LanguageDetector.detectLanguage(article.textContent).then(
|
|
result => {
|
|
article.detectedLanguage = result.confident ? result.language : null;
|
|
}
|
|
);
|
|
},
|
|
|
|
_maybeAssignTextDirection(article) {
|
|
// Assign `article.dir` a value if not set and if we have a valid detected language.
|
|
if (
|
|
!article.dir &&
|
|
typeof article.detectedLanguage === "string" &&
|
|
article.detectedLanguage
|
|
) {
|
|
article.dir = Services.intl.getScriptDirection(article.detectedLanguage);
|
|
}
|
|
},
|
|
|
|
/**
|
|
* Assigns the estimated reading time range of the article to the article object.
|
|
*
|
|
* @param article the article object to assign the reading time estimate to.
|
|
*/
|
|
_assignReadTime(article) {
|
|
let lang = article.detectedLanguage || "en";
|
|
const readingSpeed = this._getReadingSpeedForLanguage(lang);
|
|
const charactersPerMinuteLow = readingSpeed.cpm - readingSpeed.variance;
|
|
const charactersPerMinuteHigh = readingSpeed.cpm + readingSpeed.variance;
|
|
const length = article.length;
|
|
|
|
article.readingTimeMinsSlow = Math.ceil(length / charactersPerMinuteLow);
|
|
article.readingTimeMinsFast = Math.ceil(length / charactersPerMinuteHigh);
|
|
},
|
|
|
|
/**
|
|
* Returns the reading speed of a selection of languages with likely variance.
|
|
*
|
|
* Reading speed estimated from a study done on reading speeds in various languages.
|
|
* study can be found here: http://iovs.arvojournals.org/article.aspx?articleid=2166061
|
|
*
|
|
* @return object with characters per minute and variance. Defaults to English
|
|
* if no suitable language is found in the collection.
|
|
*/
|
|
_getReadingSpeedForLanguage(lang) {
|
|
const readingSpeed = new Map([
|
|
["en", { cpm: 987, variance: 118 }],
|
|
["ar", { cpm: 612, variance: 88 }],
|
|
["de", { cpm: 920, variance: 86 }],
|
|
["es", { cpm: 1025, variance: 127 }],
|
|
["fi", { cpm: 1078, variance: 121 }],
|
|
["fr", { cpm: 998, variance: 126 }],
|
|
["he", { cpm: 833, variance: 130 }],
|
|
["it", { cpm: 950, variance: 140 }],
|
|
["ja", { cpm: 357, variance: 56 }],
|
|
["nl", { cpm: 978, variance: 143 }],
|
|
["pl", { cpm: 916, variance: 126 }],
|
|
["pt", { cpm: 913, variance: 145 }],
|
|
["ru", { cpm: 986, variance: 175 }],
|
|
["sl", { cpm: 885, variance: 145 }],
|
|
["sv", { cpm: 917, variance: 156 }],
|
|
["tr", { cpm: 1054, variance: 156 }],
|
|
["zh", { cpm: 255, variance: 29 }],
|
|
]);
|
|
|
|
return readingSpeed.get(lang) || readingSpeed.get("en");
|
|
},
|
|
/**
|
|
*
|
|
* Check if the document to be parsed is text document.
|
|
* @param doc the doc object to be parsed.
|
|
* @return boolean
|
|
*
|
|
*/
|
|
_isDocumentPlainText(doc) {
|
|
return doc.contentType == "text/plain";
|
|
},
|
|
/**
|
|
*
|
|
* The document to be parsed is text document and is converted to HTML format.
|
|
* @param doc the doc object to be parsed.
|
|
* @return doc
|
|
*
|
|
*/
|
|
_convertPlainTextDocument(doc) {
|
|
let preTag = doc.querySelector("pre");
|
|
let docFrag = doc.createDocumentFragment();
|
|
let content = preTag.textContent;
|
|
let paragraphs = content.split(/\r?\n\r?\n/);
|
|
for (let para of paragraphs) {
|
|
let pElem = doc.createElement("p");
|
|
let lines = para.split(/\n/);
|
|
for (let line of lines) {
|
|
pElem.append(line);
|
|
let brElem = doc.createElement("br");
|
|
pElem.append(brElem);
|
|
}
|
|
docFrag.append(pElem);
|
|
}
|
|
// Clone the document to avoid the original document being affected
|
|
// (which shows up when exiting reader mode again).
|
|
let clone = doc.documentElement.cloneNode(true);
|
|
clone.querySelector("pre").replaceWith(docFrag);
|
|
return clone;
|
|
},
|
|
};
|
|
|
|
XPCOMUtils.defineLazyPreferenceGetter(
|
|
ReaderMode,
|
|
"maxElemsToParse",
|
|
"reader.parse-node-limit",
|
|
0
|
|
);
|