diff options
Diffstat (limited to 'toolkit/components/reader')
42 files changed, 7880 insertions, 0 deletions
diff --git a/toolkit/components/reader/.eslintrc.js b/toolkit/components/reader/.eslintrc.js new file mode 100644 index 0000000000..4df95dd5f8 --- /dev/null +++ b/toolkit/components/reader/.eslintrc.js @@ -0,0 +1,13 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +"use strict"; + +module.exports = { + rules: { + "no-inner-declarations": "error", + "no-shadow": "error", + "no-unused-vars": ["error", { vars: "all", args: "none" }], + }, +}; diff --git a/toolkit/components/reader/AboutReader.sys.mjs b/toolkit/components/reader/AboutReader.sys.mjs new file mode 100644 index 0000000000..e9bc0bb729 --- /dev/null +++ b/toolkit/components/reader/AboutReader.sys.mjs @@ -0,0 +1,1548 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +import { ReaderMode } from "resource://gre/modules/ReaderMode.sys.mjs"; + +import { AppConstants } from "resource://gre/modules/AppConstants.sys.mjs"; +import { XPCOMUtils } from "resource://gre/modules/XPCOMUtils.sys.mjs"; + +const lazy = {}; + +ChromeUtils.defineESModuleGetters(lazy, { + AsyncPrefs: "resource://gre/modules/AsyncPrefs.sys.mjs", + NarrateControls: "resource://gre/modules/narrate/NarrateControls.sys.mjs", + NimbusFeatures: "resource://nimbus/ExperimentAPI.sys.mjs", +}); + +XPCOMUtils.defineLazyGetter( + lazy, + "numberFormat", + () => new Services.intl.NumberFormat(undefined) +); +XPCOMUtils.defineLazyGetter( + lazy, + "pluralRules", + () => new Services.intl.PluralRules(undefined) +); + +const COLORSCHEME_L10N_IDS = { + light: "about-reader-color-scheme-light", + dark: "about-reader-color-scheme-dark", + sepia: "about-reader-color-scheme-sepia", + auto: "about-reader-color-scheme-auto", +}; + +Services.telemetry.setEventRecordingEnabled("readermode", true); + +const zoomOnCtrl = + Services.prefs.getIntPref("mousewheel.with_control.action", 3) == 3; +const zoomOnMeta = + Services.prefs.getIntPref("mousewheel.with_meta.action", 1) == 3; +const isAppLocaleRTL = Services.locale.isAppLocaleRTL; + +export var AboutReader = function ( + actor, + articlePromise, + docContentType = "document", + docTitle = "" +) { + let win = actor.contentWindow; + let url = this._getOriginalUrl(win); + if ( + !( + url.startsWith("http://") || + url.startsWith("https://") || + url.startsWith("file://") + ) + ) { + let errorMsg = + "Only http://, https:// and file:// URLs can be loaded in about:reader."; + if (Services.prefs.getBoolPref("reader.errors.includeURLs")) { + errorMsg += " Tried to load: " + url + "."; + } + console.error(errorMsg); + win.location.href = "about:blank"; + return; + } + + let doc = win.document; + if (isAppLocaleRTL) { + doc.dir = "rtl"; + } + doc.documentElement.setAttribute("platform", AppConstants.platform); + + doc.title = docTitle; + + this._actor = actor; + this._isLoggedInPocketUser = undefined; + + this._docRef = Cu.getWeakReference(doc); + this._winRef = Cu.getWeakReference(win); + this._innerWindowId = win.windowGlobalChild.innerWindowId; + + this._article = null; + this._languagePromise = new Promise(resolve => { + this._foundLanguage = resolve; + }); + + if (articlePromise) { + this._articlePromise = articlePromise; + } + + this._headerElementRef = Cu.getWeakReference( + doc.querySelector(".reader-header") + ); + this._domainElementRef = Cu.getWeakReference( + doc.querySelector(".reader-domain") + ); + this._titleElementRef = Cu.getWeakReference( + doc.querySelector(".reader-title") + ); + this._readTimeElementRef = Cu.getWeakReference( + doc.querySelector(".reader-estimated-time") + ); + this._creditsElementRef = Cu.getWeakReference( + doc.querySelector(".reader-credits") + ); + this._contentElementRef = Cu.getWeakReference( + doc.querySelector(".moz-reader-content") + ); + this._toolbarContainerElementRef = Cu.getWeakReference( + doc.querySelector(".toolbar-container") + ); + this._toolbarElementRef = Cu.getWeakReference( + doc.querySelector(".reader-controls") + ); + this._messageElementRef = Cu.getWeakReference( + doc.querySelector(".reader-message") + ); + this._containerElementRef = Cu.getWeakReference( + doc.querySelector(".container") + ); + + doc.addEventListener("mousedown", this); + doc.addEventListener("click", this); + doc.addEventListener("touchstart", this); + + win.addEventListener("pagehide", this); + win.addEventListener("resize", this); + win.addEventListener("wheel", this, { passive: false }); + + this.colorSchemeMediaList = win.matchMedia("(prefers-color-scheme: dark)"); + this.colorSchemeMediaList.addEventListener("change", this); + + this.forcedColorsMediaList = win.matchMedia("(forced-colors)"); + this.forcedColorsMediaList.addEventListener("change", this); + + this._topScrollChange = this._topScrollChange.bind(this); + this._intersectionObs = new win.IntersectionObserver(this._topScrollChange, { + root: null, + threshold: [0, 1], + }); + this._intersectionObs.observe(doc.querySelector(".top-anchor")); + + this._ctaIntersectionObserver = new win.IntersectionObserver( + this._pocketCTAObserved.bind(this), + { + threshold: 0.5, + } + ); + + Services.obs.addObserver(this, "inner-window-destroyed"); + + this._setupButton("close-button", this._onReaderClose.bind(this)); + + // we're ready for any external setup, send a signal for that. + this._actor.sendAsyncMessage("Reader:OnSetup"); + + let colorSchemeValues = JSON.parse( + Services.prefs.getCharPref("reader.color_scheme.values") + ); + let colorSchemeOptions = colorSchemeValues.map(value => ({ + l10nId: COLORSCHEME_L10N_IDS[value], + groupName: "color-scheme", + value, + itemClass: value + "-button", + })); + let colorScheme = Services.prefs.getCharPref("reader.color_scheme"); + + this._setupSegmentedButton( + "color-scheme-buttons", + colorSchemeOptions, + colorScheme, + this._setColorSchemePref.bind(this) + ); + this._setColorSchemePref(colorScheme); + + let fontTypeOptions = [ + { + l10nId: "about-reader-font-type-sans-serif", + groupName: "font-type", + value: "sans-serif", + itemClass: "sans-serif-button", + }, + { + l10nId: "about-reader-font-type-serif", + groupName: "font-type", + value: "serif", + itemClass: "serif-button", + }, + ]; + + let fontType = Services.prefs.getCharPref("reader.font_type"); + this._setupSegmentedButton( + "font-type-buttons", + fontTypeOptions, + fontType, + this._setFontType.bind(this) + ); + this._setFontType(fontType); + + this._setupFontSizeButtons(); + + this._setupContentWidthButtons(); + + this._setupLineHeightButtons(); + + if (win.speechSynthesis && Services.prefs.getBoolPref("narrate.enabled")) { + new lazy.NarrateControls(win, this._languagePromise); + } + + this._loadArticle(docContentType); +}; + +AboutReader.prototype = { + _BLOCK_IMAGES_SELECTOR: + ".content p > img:only-child, " + + ".content p > a:only-child > img:only-child, " + + ".content .wp-caption img, " + + ".content figure img", + + _TABLES_SELECTOR: ".content table", + + FONT_SIZE_MIN: 1, + + FONT_SIZE_LEGACY_MAX: 9, + + FONT_SIZE_MAX: 15, + + FONT_SIZE_EXTENDED_VALUES: [32, 40, 56, 72, 96, 128], + + get _doc() { + return this._docRef.get(); + }, + + get _win() { + return this._winRef.get(); + }, + + get _headerElement() { + return this._headerElementRef.get(); + }, + + get _domainElement() { + return this._domainElementRef.get(); + }, + + get _titleElement() { + return this._titleElementRef.get(); + }, + + get _readTimeElement() { + return this._readTimeElementRef.get(); + }, + + get _creditsElement() { + return this._creditsElementRef.get(); + }, + + get _contentElement() { + return this._contentElementRef.get(); + }, + + get _toolbarElement() { + return this._toolbarElementRef.get(); + }, + + get _toolbarContainerElement() { + return this._toolbarContainerElementRef.get(); + }, + + get _messageElement() { + return this._messageElementRef.get(); + }, + + get _containerElement() { + return this._containerElementRef.get(); + }, + + get _isToolbarVertical() { + if (this._toolbarVertical !== undefined) { + return this._toolbarVertical; + } + return (this._toolbarVertical = Services.prefs.getBoolPref( + "reader.toolbar.vertical" + )); + }, + + receiveMessage({ data, name }) { + const doc = this._doc; + switch (name) { + case "Reader:AddButton": { + if (data.id && data.image && !doc.getElementsByClassName(data.id)[0]) { + let btn = doc.createElement("button"); + btn.dataset.buttonid = data.id; + btn.dataset.telemetryId = `reader-${data.telemetryId}`; + btn.className = "toolbar-button " + data.id; + btn.setAttribute("aria-labelledby", "label-" + data.id); + let tip = doc.createElement("span"); + tip.className = "hover-label"; + tip.id = "label-" + data.id; + doc.l10n.setAttributes(tip, data.l10nId); + btn.append(tip); + btn.style.backgroundImage = "url('" + data.image + "')"; + if (data.width && data.height) { + btn.style.backgroundSize = `${data.width}px ${data.height}px`; + } + let tb = this._toolbarElement; + tb.appendChild(btn); + this._setupButton(data.id, button => { + this._actor.sendAsyncMessage( + "Reader:Clicked-" + button.dataset.buttonid, + { article: this._article } + ); + }); + } + break; + } + case "Reader:RemoveButton": { + if (data.id) { + let btn = doc.getElementsByClassName(data.id)[0]; + if (btn) { + btn.remove(); + } + } + break; + } + case "Reader:ZoomIn": { + this._changeFontSize(+1); + break; + } + case "Reader:ZoomOut": { + this._changeFontSize(-1); + break; + } + case "Reader:ResetZoom": { + this._resetFontSize(); + break; + } + } + }, + + handleEvent(aEvent) { + if (!aEvent.isTrusted) { + return; + } + + let target = aEvent.target; + switch (aEvent.type) { + case "touchstart": + /* fall through */ + case "mousedown": + if ( + !target.closest(".dropdown-popup") && + // Skip handling the toggle button here becase + // the dropdown will get toggled with the 'click' event. + !target.classList.contains("dropdown-toggle") + ) { + this._closeDropdowns(); + } + break; + case "click": + const buttonLabel = + target.attributes.getNamedItem(`data-telemetry-id`)?.value; + + if (buttonLabel) { + Services.telemetry.recordEvent( + "readermode", + "button", + "click", + null, + { + label: buttonLabel, + } + ); + } + + if (target.classList.contains("dropdown-toggle")) { + this._toggleDropdownClicked(aEvent); + } + break; + case "scroll": + let lastHeight = this._lastHeight; + let { windowUtils } = this._win; + this._lastHeight = windowUtils.getBoundsWithoutFlushing( + this._doc.body + ).height; + // Only close dropdowns if the scroll events are not a result of line + // height / font-size changes that caused a page height change. + if (lastHeight == this._lastHeight) { + this._closeDropdowns(true); + } + + break; + case "resize": + this._updateImageMargins(); + this._scheduleToolbarOverlapHandler(); + break; + + case "wheel": + let doZoom = + (aEvent.ctrlKey && zoomOnCtrl) || (aEvent.metaKey && zoomOnMeta); + if (!doZoom) { + return; + } + aEvent.preventDefault(); + + // Throttle events to once per 150ms. This avoids excessively fast zooming. + if (aEvent.timeStamp <= this._zoomBackoffTime) { + return; + } + this._zoomBackoffTime = aEvent.timeStamp + 150; + + // Determine the direction of the delta (we don't care about its size); + // This code is adapted from normalizeWheelEventDelta in + // toolkt/components/pdfjs/content/web/viewer.js + let delta = Math.abs(aEvent.deltaX) + Math.abs(aEvent.deltaY); + let angle = Math.atan2(aEvent.deltaY, aEvent.deltaX); + if (-0.25 * Math.PI < angle && angle < 0.75 * Math.PI) { + delta = -delta; + } + + if (delta > 0) { + this._changeFontSize(+1); + } else if (delta < 0) { + this._changeFontSize(-1); + } + break; + + case "pagehide": + this._closeDropdowns(); + + this._actor.readerModeHidden(); + this.clearActor(); + + // Disconnect and delete IntersectionObservers to prevent memory leaks: + + this._intersectionObs.unobserve(this._doc.querySelector(".top-anchor")); + this._ctaIntersectionObserver.disconnect(); + + delete this._intersectionObs; + delete this._ctaIntersectionObserver; + + break; + + case "change": + let colorScheme; + if (this.forcedColorsMediaList.matches) { + colorScheme = "hcm"; + } else { + colorScheme = Services.prefs.getCharPref("reader.color_scheme"); + // We should be changing the color scheme in relation to a preference change + // if the user has the color scheme preference set to "Auto". + if (colorScheme == "auto") { + colorScheme = this.colorSchemeMediaList.matches ? "dark" : "light"; + } + } + this._setColorScheme(colorScheme); + + break; + } + }, + + clearActor() { + this._actor = null; + }, + + _onReaderClose() { + if (this._actor) { + this._actor.closeReaderMode(); + } + }, + + async _resetFontSize() { + await lazy.AsyncPrefs.reset("reader.font_size"); + let currentSize = Services.prefs.getIntPref("reader.font_size"); + this._setFontSize(currentSize); + }, + + _setFontSize(newFontSize) { + this._fontSize = Math.min( + this.FONT_SIZE_MAX, + Math.max(this.FONT_SIZE_MIN, newFontSize) + ); + let size; + if (this._fontSize > this.FONT_SIZE_LEGACY_MAX) { + // -1 because we're indexing into a 0-indexed array, so the first value + // over the legacy max should be 0, the next 1, etc. + let index = this._fontSize - this.FONT_SIZE_LEGACY_MAX - 1; + size = this.FONT_SIZE_EXTENDED_VALUES[index]; + } else { + size = 10 + 2 * this._fontSize; + } + + let readerBody = this._doc.body; + readerBody.style.setProperty("--font-size", size + "px"); + return lazy.AsyncPrefs.set("reader.font_size", this._fontSize); + }, + + _setupFontSizeButtons() { + let plusButton = this._doc.querySelector(".plus-button"); + let minusButton = this._doc.querySelector(".minus-button"); + + let currentSize = Services.prefs.getIntPref("reader.font_size"); + this._setFontSize(currentSize); + this._updateFontSizeButtonControls(); + + plusButton.addEventListener( + "click", + event => { + if (!event.isTrusted) { + return; + } + event.stopPropagation(); + this._changeFontSize(+1); + }, + true + ); + + minusButton.addEventListener( + "click", + event => { + if (!event.isTrusted) { + return; + } + event.stopPropagation(); + this._changeFontSize(-1); + }, + true + ); + }, + + _updateFontSizeButtonControls() { + let plusButton = this._doc.querySelector(".plus-button"); + let minusButton = this._doc.querySelector(".minus-button"); + + let currentSize = this._fontSize; + let fontValue = this._doc.querySelector(".font-size-value"); + fontValue.textContent = currentSize; + + if (currentSize === this.FONT_SIZE_MIN) { + minusButton.setAttribute("disabled", true); + } else { + minusButton.removeAttribute("disabled"); + } + if (currentSize === this.FONT_SIZE_MAX) { + plusButton.setAttribute("disabled", true); + } else { + plusButton.removeAttribute("disabled"); + } + }, + + _changeFontSize(changeAmount) { + let currentSize = + Services.prefs.getIntPref("reader.font_size") + changeAmount; + this._setFontSize(currentSize); + this._updateFontSizeButtonControls(); + this._scheduleToolbarOverlapHandler(); + }, + + _setContentWidth(newContentWidth) { + this._contentWidth = newContentWidth; + this._displayContentWidth(newContentWidth); + let width = 20 + 5 * (this._contentWidth - 1) + "em"; + this._doc.body.style.setProperty("--content-width", width); + this._scheduleToolbarOverlapHandler(); + return lazy.AsyncPrefs.set("reader.content_width", this._contentWidth); + }, + + _displayContentWidth(currentContentWidth) { + let contentWidthValue = this._doc.querySelector(".content-width-value"); + contentWidthValue.textContent = currentContentWidth; + }, + + _setupContentWidthButtons() { + const CONTENT_WIDTH_MIN = 1; + const CONTENT_WIDTH_MAX = 9; + + let currentContentWidth = Services.prefs.getIntPref("reader.content_width"); + currentContentWidth = Math.max( + CONTENT_WIDTH_MIN, + Math.min(CONTENT_WIDTH_MAX, currentContentWidth) + ); + + this._displayContentWidth(currentContentWidth); + + let plusButton = this._doc.querySelector(".content-width-plus-button"); + let minusButton = this._doc.querySelector(".content-width-minus-button"); + + function updateControls() { + if (currentContentWidth === CONTENT_WIDTH_MIN) { + minusButton.setAttribute("disabled", true); + } else { + minusButton.removeAttribute("disabled"); + } + if (currentContentWidth === CONTENT_WIDTH_MAX) { + plusButton.setAttribute("disabled", true); + } else { + plusButton.removeAttribute("disabled"); + } + } + + updateControls(); + this._setContentWidth(currentContentWidth); + + plusButton.addEventListener( + "click", + event => { + if (!event.isTrusted) { + return; + } + event.stopPropagation(); + + if (currentContentWidth >= CONTENT_WIDTH_MAX) { + return; + } + + currentContentWidth++; + updateControls(); + this._setContentWidth(currentContentWidth); + }, + true + ); + + minusButton.addEventListener( + "click", + event => { + if (!event.isTrusted) { + return; + } + event.stopPropagation(); + + if (currentContentWidth <= CONTENT_WIDTH_MIN) { + return; + } + + currentContentWidth--; + updateControls(); + this._setContentWidth(currentContentWidth); + }, + true + ); + }, + + _setLineHeight(newLineHeight) { + this._displayLineHeight(newLineHeight); + let height = 1 + 0.2 * (newLineHeight - 1) + "em"; + this._containerElement.style.setProperty("--line-height", height); + return lazy.AsyncPrefs.set("reader.line_height", newLineHeight); + }, + + _displayLineHeight(currentLineHeight) { + let lineHeightValue = this._doc.querySelector(".line-height-value"); + lineHeightValue.textContent = currentLineHeight; + }, + + _setupLineHeightButtons() { + const LINE_HEIGHT_MIN = 1; + const LINE_HEIGHT_MAX = 9; + + let currentLineHeight = Services.prefs.getIntPref("reader.line_height"); + currentLineHeight = Math.max( + LINE_HEIGHT_MIN, + Math.min(LINE_HEIGHT_MAX, currentLineHeight) + ); + + this._displayLineHeight(currentLineHeight); + + let plusButton = this._doc.querySelector(".line-height-plus-button"); + let minusButton = this._doc.querySelector(".line-height-minus-button"); + + function updateControls() { + if (currentLineHeight === LINE_HEIGHT_MIN) { + minusButton.setAttribute("disabled", true); + } else { + minusButton.removeAttribute("disabled"); + } + if (currentLineHeight === LINE_HEIGHT_MAX) { + plusButton.setAttribute("disabled", true); + } else { + plusButton.removeAttribute("disabled"); + } + } + + updateControls(); + this._setLineHeight(currentLineHeight); + + plusButton.addEventListener( + "click", + event => { + if (!event.isTrusted) { + return; + } + event.stopPropagation(); + + if (currentLineHeight >= LINE_HEIGHT_MAX) { + return; + } + + currentLineHeight++; + updateControls(); + this._setLineHeight(currentLineHeight); + }, + true + ); + + minusButton.addEventListener( + "click", + event => { + if (!event.isTrusted) { + return; + } + event.stopPropagation(); + + if (currentLineHeight <= LINE_HEIGHT_MIN) { + return; + } + + currentLineHeight--; + updateControls(); + this._setLineHeight(currentLineHeight); + }, + true + ); + }, + + _setColorScheme(newColorScheme) { + // There's nothing to change if the new color scheme is the same as our current scheme. + if (this._colorScheme === newColorScheme) { + return; + } + + let bodyClasses = this._doc.body.classList; + + if (this._colorScheme) { + bodyClasses.remove(this._colorScheme); + } + + if (!this._win.matchMedia("(forced-colors)").matches) { + if (newColorScheme === "auto") { + this._colorScheme = this.colorSchemeMediaList.matches + ? "dark" + : "light"; + } else { + this._colorScheme = newColorScheme; + } + } else { + this._colorScheme = "hcm"; + } + + bodyClasses.add(this._colorScheme); + }, + + // Pref values include "dark", "light", "sepia", and "auto" + _setColorSchemePref(colorSchemePref) { + this._setColorScheme(colorSchemePref); + + lazy.AsyncPrefs.set("reader.color_scheme", colorSchemePref); + }, + + _setFontType(newFontType) { + if (this._fontType === newFontType) { + return; + } + + let bodyClasses = this._doc.body.classList; + + if (this._fontType) { + bodyClasses.remove(this._fontType); + } + + this._fontType = newFontType; + bodyClasses.add(this._fontType); + + lazy.AsyncPrefs.set("reader.font_type", this._fontType); + }, + + async _loadArticle(docContentType = "document") { + let url = this._getOriginalUrl(); + this._showProgressDelayed(); + + let article; + if (this._articlePromise) { + article = await this._articlePromise; + } + + if (!article) { + try { + article = await ReaderMode.downloadAndParseDocument( + url, + docContentType + ); + } catch (e) { + if (e?.newURL && this._actor) { + await this._actor.sendQuery("RedirectTo", { + newURL: e.newURL, + article: e.article, + }); + + let readerURL = "about:reader?url=" + encodeURIComponent(e.newURL); + this._win.location.replace(readerURL); + return; + } + } + } + + if (!this._actor) { + return; + } + + // Replace the loading message with an error message if there's a failure. + // Users are supposed to navigate away by themselves (because we cannot + // remove ourselves from session history.) + if (!article) { + this._showError(); + return; + } + + this._showContent(article); + }, + + async _requestPocketLoginStatus() { + let isLoggedIn = await this._actor.sendQuery( + "Reader:PocketLoginStatusRequest" + ); + + return isLoggedIn; + }, + + async _requestPocketArticleInfo(url) { + let articleInfo = await this._actor.sendQuery( + "Reader:PocketGetArticleInfo", + { + url, + } + ); + + return articleInfo?.item_preview?.item_id; + }, + + async _requestPocketArticleRecs(itemID) { + let recs = await this._actor.sendQuery("Reader:PocketGetArticleRecs", { + itemID, + }); + + return recs; + }, + + async _savePocketArticle(url) { + let result = await this._actor.sendQuery("Reader:PocketSaveArticle", { + url, + }); + + return result; + }, + + async _requestFavicon() { + let iconDetails = await this._actor.sendQuery("Reader:FaviconRequest", { + url: this._article.url, + preferredWidth: 16 * this._win.devicePixelRatio, + }); + + if (iconDetails) { + this._loadFavicon(iconDetails.url, iconDetails.faviconUrl); + } + }, + + _loadFavicon(url, faviconUrl) { + if (this._article.url !== url) { + return; + } + + let doc = this._doc; + + let link = doc.createElement("link"); + link.rel = "shortcut icon"; + link.href = faviconUrl; + + doc.getElementsByTagName("head")[0].appendChild(link); + }, + + _updateImageMargins() { + let windowWidth = this._win.innerWidth; + let bodyWidth = this._doc.body.clientWidth; + + let setImageMargins = function (img) { + img.classList.add("moz-reader-block-img"); + + // If the image is at least as wide as the window, make it fill edge-to-edge on mobile. + if (img.naturalWidth >= windowWidth) { + img.setAttribute("moz-reader-full-width", true); + } else { + img.removeAttribute("moz-reader-full-width"); + } + + // If the image is at least half as wide as the body, center it on desktop. + if (img.naturalWidth >= bodyWidth / 2) { + img.setAttribute("moz-reader-center", true); + } else { + img.removeAttribute("moz-reader-center"); + } + }; + + let imgs = this._doc.querySelectorAll(this._BLOCK_IMAGES_SELECTOR); + for (let i = imgs.length; --i >= 0; ) { + let img = imgs[i]; + + if (img.naturalWidth > 0) { + setImageMargins(img); + } else { + img.onload = function () { + setImageMargins(img); + }; + } + } + }, + + _updateWideTables() { + let windowWidth = this._win.innerWidth; + + // Avoid horizontal overflow in the document by making tables that are wider than half browser window's size + // by making it scrollable. + let tables = this._doc.querySelectorAll(this._TABLES_SELECTOR); + for (let i = tables.length; --i >= 0; ) { + let table = tables[i]; + let rect = table.getBoundingClientRect(); + let tableWidth = rect.width; + + if (windowWidth / 2 <= tableWidth) { + table.classList.add("moz-reader-wide-table"); + } + } + }, + + _maybeSetTextDirection: function Read_maybeSetTextDirection(article) { + // Set the article's "dir" on the contents. + // If no direction is specified, the contents should automatically be LTR + // regardless of the UI direction to avoid inheriting the parent's direction + // if the UI is RTL. + this._containerElement.dir = article.dir || "ltr"; + + // The native locale could be set differently than the article's text direction. + this._readTimeElement.dir = isAppLocaleRTL ? "rtl" : "ltr"; + + // This is used to mirror the line height buttons in the toolbar, when relevant. + this._toolbarElement.setAttribute("articledir", article.dir || "ltr"); + }, + + _showError() { + this._headerElement.classList.remove("reader-show-element"); + this._contentElement.classList.remove("reader-show-element"); + + this._doc.l10n.setAttributes( + this._messageElement, + "about-reader-load-error" + ); + this._doc.l10n.setAttributes( + this._doc.getElementById("reader-title"), + "about-reader-load-error" + ); + this._messageElement.style.display = "block"; + + this._doc.documentElement.dataset.isError = true; + + this._error = true; + + this._doc.dispatchEvent( + new this._win.CustomEvent("AboutReaderContentError", { + bubbles: true, + cancelable: false, + }) + ); + }, + + // This function is the JS version of Java's StringUtils.stripCommonSubdomains. + _stripHost(host) { + if (!host) { + return host; + } + + let start = 0; + + if (host.startsWith("www.")) { + start = 4; + } else if (host.startsWith("m.")) { + start = 2; + } else if (host.startsWith("mobile.")) { + start = 7; + } + + return host.substring(start); + }, + + _showContent(article) { + this._messageElement.classList.remove("reader-show-element"); + + this._article = article; + + this._domainElement.href = article.url; + let articleUri = Services.io.newURI(article.url); + + try { + this._domainElement.textContent = this._stripHost(articleUri.host); + } catch (ex) { + let url = this._actor.document.URL; + url = url.substring(url.indexOf("%2F") + 6); + url = url.substring(0, url.indexOf("%2F")); + + this._domainElement.textContent = url; + } + + this._creditsElement.textContent = article.byline; + + this._titleElement.textContent = article.title; + + // TODO: Once formatRange() and selectRange() are available outside Nightly, + // use them here. https://bugzilla.mozilla.org/show_bug.cgi?id=1795317 + const slow = article.readingTimeMinsSlow; + const fast = article.readingTimeMinsFast; + const fastStr = lazy.numberFormat.format(fast); + const slowStr = lazy.numberFormat.format(slow); + this._doc.l10n.setAttributes( + this._readTimeElement, + "about-reader-estimated-read-time", + { + range: fastStr === slowStr ? `~${fastStr}` : `${fastStr}–${slowStr}`, + rangePlural: lazy.pluralRules.select(slow), + } + ); + + // If a document title was not provided in the constructor, we'll fall back + // to using the article title. + if (!this._doc.title) { + this._doc.title = article.title; + } + + this._containerElement.setAttribute("lang", article.lang); + + this._headerElement.classList.add("reader-show-element"); + + let parserUtils = Cc["@mozilla.org/parserutils;1"].getService( + Ci.nsIParserUtils + ); + let contentFragment = parserUtils.parseFragment( + article.content, + Ci.nsIParserUtils.SanitizerDropForms | + Ci.nsIParserUtils.SanitizerAllowStyle, + false, + articleUri, + this._contentElement + ); + this._contentElement.innerHTML = ""; + this._contentElement.appendChild(contentFragment); + this._maybeSetTextDirection(article); + this._foundLanguage(article.language); + + this._contentElement.classList.add("reader-show-element"); + this._updateImageMargins(); + this._updateWideTables(); + + this._requestFavicon(); + this._doc.body.classList.add("loaded"); + + this._goToReference(articleUri.ref); + + Services.obs.notifyObservers(this._win, "AboutReader:Ready"); + + this._doc.dispatchEvent( + new this._win.CustomEvent("AboutReaderContentReady", { + bubbles: true, + cancelable: false, + }) + ); + + // Show Pocket CTA block after article has loaded to prevent it flashing in prematurely + this._setupPocketCTA(); + }, + + _hideContent() { + this._headerElement.classList.remove("reader-show-element"); + this._contentElement.classList.remove("reader-show-element"); + }, + + _showProgressDelayed() { + this._win.setTimeout(() => { + // No need to show progress if the article has been loaded, + // if the window has been unloaded, or if there was an error + // trying to load the article. + if (this._article || !this._actor || this._error) { + return; + } + + this._headerElement.classList.remove("reader-show-element"); + this._contentElement.classList.remove("reader-show-element"); + + this._doc.l10n.setAttributes( + this._messageElement, + "about-reader-loading" + ); + this._messageElement.classList.add("reader-show-element"); + }, 300); + }, + + /** + * Returns the original article URL for this about:reader view. + */ + _getOriginalUrl(win) { + let url = win ? win.location.href : this._win.location.href; + return ReaderMode.getOriginalUrl(url) || url; + }, + + _setupSegmentedButton(id, options, initialValue, callback) { + let doc = this._doc; + let segmentedButton = doc.getElementsByClassName(id)[0]; + + for (let option of options) { + let radioButton = doc.createElement("input"); + radioButton.id = "radio-item" + option.itemClass; + radioButton.type = "radio"; + radioButton.classList.add("radio-button"); + radioButton.name = option.groupName; + segmentedButton.appendChild(radioButton); + + let item = doc.createElement("label"); + item.htmlFor = radioButton.id; + item.classList.add(option.itemClass); + doc.l10n.setAttributes(item, option.l10nId); + + segmentedButton.appendChild(item); + + radioButton.addEventListener( + "input", + function (aEvent) { + if (!aEvent.isTrusted) { + return; + } + + let labels = segmentedButton.children; + for (let label of labels) { + label.removeAttribute("checked"); + } + + aEvent.target.nextElementSibling.setAttribute("checked", "true"); + callback(option.value); + }, + true + ); + + if (option.value === initialValue) { + radioButton.checked = true; + item.setAttribute("checked", "true"); + } + } + }, + + _setupButton(id, callback) { + let button = this._doc.querySelector("." + id); + button.removeAttribute("hidden"); + button.addEventListener( + "click", + function (aEvent) { + if (!aEvent.isTrusted) { + return; + } + + let btn = aEvent.target; + callback(btn); + }, + true + ); + }, + + _toggleDropdownClicked(event) { + let dropdown = event.target.closest(".dropdown"); + + if (!dropdown) { + return; + } + + event.stopPropagation(); + + if (dropdown.classList.contains("open")) { + this._closeDropdowns(); + } else { + this._openDropdown(dropdown); + } + }, + + /* + * If the ReaderView banner font-dropdown is closed, open it. + */ + _openDropdown(dropdown, window) { + if (dropdown.classList.contains("open")) { + return; + } + + this._closeDropdowns(); + + // Get the height of the doc and start handling scrolling: + let { windowUtils } = this._win; + this._lastHeight = windowUtils.getBoundsWithoutFlushing( + this._doc.body + ).height; + this._doc.addEventListener("scroll", this); + + dropdown.classList.add("open"); + this._toolbarElement.classList.add("dropdown-open"); + + this._toolbarContainerElement.classList.add("dropdown-open"); + this._toggleToolbarFixedPosition(true); + }, + + /* + * If the ReaderView has open dropdowns, close them. If we are closing the + * dropdowns because the page is scrolling, allow popups to stay open with + * the keep-open class. + */ + _closeDropdowns(scrolling) { + let selector = ".dropdown.open"; + if (scrolling) { + selector += ":not(.keep-open)"; + } + + let openDropdowns = this._doc.querySelectorAll(selector); + let haveOpenDropdowns = openDropdowns.length; + for (let dropdown of openDropdowns) { + dropdown.classList.remove("open"); + } + this._toolbarElement.classList.remove("dropdown-open"); + + if (haveOpenDropdowns) { + this._toolbarContainerElement.classList.remove("dropdown-open"); + this._toggleToolbarFixedPosition(false); + } + + // Stop handling scrolling: + this._doc.removeEventListener("scroll", this); + }, + + _toggleToolbarFixedPosition(shouldBeFixed) { + let el = this._toolbarContainerElement; + let fontSize = this._doc.body.style.getPropertyValue("--font-size"); + let contentWidth = this._doc.body.style.getPropertyValue("--content-width"); + if (shouldBeFixed) { + el.style.setProperty("--font-size", fontSize); + el.style.setProperty("--content-width", contentWidth); + el.classList.add("transition-location"); + } else { + let expectTransition = + el.style.getPropertyValue("--font-size") != fontSize || + el.style.getPropertyValue("--content-width") != contentWidth; + if (expectTransition) { + el.addEventListener( + "transitionend", + () => el.classList.remove("transition-location"), + { once: true } + ); + } else { + el.classList.remove("transition-location"); + } + el.style.removeProperty("--font-size"); + el.style.removeProperty("--content-width"); + el.classList.remove("overlaps"); + } + }, + + _scheduleToolbarOverlapHandler() { + if (this._enqueuedToolbarOverlapHandler) { + return; + } + this._enqueuedToolbarOverlapHandler = this._win.requestAnimationFrame( + () => { + this._win.setTimeout(() => this._toolbarOverlapHandler(), 0); + } + ); + }, + + _toolbarOverlapHandler() { + delete this._enqueuedToolbarOverlapHandler; + // Ensure the dropdown is still open to avoid racing with that changing. + if (this._toolbarContainerElement.classList.contains("dropdown-open")) { + let { windowUtils } = this._win; + let toolbarBounds = windowUtils.getBoundsWithoutFlushing( + this._toolbarElement.parentNode + ); + let textBounds = windowUtils.getBoundsWithoutFlushing( + this._containerElement + ); + let overlaps = false; + if (isAppLocaleRTL) { + overlaps = textBounds.right > toolbarBounds.left; + } else { + overlaps = textBounds.left < toolbarBounds.right; + } + this._toolbarContainerElement.classList.toggle("overlaps", overlaps); + } + }, + + _topScrollChange(entries) { + if (!entries.length) { + return; + } + // If we don't intersect the item at the top of the document, we're + // scrolled down: + let scrolled = !entries[entries.length - 1].isIntersecting; + let tbc = this._toolbarContainerElement; + tbc.classList.toggle("scrolled", scrolled); + }, + + /* + * Scroll reader view to a reference + */ + _goToReference(ref) { + if (ref) { + if (this._doc.readyState == "complete") { + this._win.location.hash = ref; + } else { + this._win.addEventListener( + "load", + () => { + this._win.location.hash = ref; + }, + { once: true } + ); + } + } + }, + + _enableDismissCTA() { + let elDismissCta = this._doc.querySelector(`.pocket-dismiss-cta`); + + elDismissCta?.addEventListener(`click`, e => { + this._doc.querySelector("#pocket-cta-container").hidden = true; + + Services.telemetry.recordEvent( + "readermode", + "pocket_cta", + "close_cta", + null, + {} + ); + }); + }, + + _enableRecShowHide() { + let elPocketRecs = this._doc.querySelector(`.pocket-recs`); + let elCollapseRecs = this._doc.querySelector(`.pocket-collapse-recs`); + let elSignUp = this._doc.querySelector(`div.pocket-sign-up-wrapper`); + + let toggleRecsVisibility = () => { + let isClosed = elPocketRecs.classList.contains(`closed`); + + isClosed = !isClosed; // Toggle + + if (isClosed) { + elPocketRecs.classList.add(`closed`); + elCollapseRecs.classList.add(`closed`); + elSignUp.setAttribute(`hidden`, true); + + Services.telemetry.recordEvent( + "readermode", + "pocket_cta", + "minimize_recs_click", + null, + {} + ); + } else { + elPocketRecs.classList.remove(`closed`); + elCollapseRecs.classList.remove(`closed`); + elSignUp.removeAttribute(`hidden`); + } + }; + + elCollapseRecs?.addEventListener(`click`, e => { + toggleRecsVisibility(); + }); + }, + + _buildPocketRec(title, url, publisher, thumb, time) { + let fragment = this._doc.createDocumentFragment(); + + let elContainer = this._doc.createElement(`div`); + let elTitle = this._doc.createElement(`header`); + let elMetadata = this._doc.createElement(`p`); + let elThumb = this._doc.createElement(`img`); + let elSideWrap = this._doc.createElement(`div`); + let elTop = this._doc.createElement(`a`); + let elBottom = this._doc.createElement(`div`); + let elAdd = this._doc.createElement(`button`); + + elAdd.classList.add(`pocket-btn-add`); + elBottom.classList.add(`pocket-rec-bottom`); + elTop.classList.add(`pocket-rec-top`); + elSideWrap.classList.add(`pocket-rec-side`); + elContainer.classList.add(`pocket-rec`); + elTitle.classList.add(`pocket-rec-title`); + elMetadata.classList.add(`pocket-rec-meta`); + + elTop.setAttribute(`href`, url); + + elTop.addEventListener(`click`, e => { + Services.telemetry.recordEvent( + "readermode", + "pocket_cta", + "rec_click", + null, + {} + ); + }); + + elThumb.classList.add(`pocket-rec-thumb`); + elThumb.setAttribute(`loading`, `lazy`); + elThumb.addEventListener(`load`, () => { + elThumb.classList.add(`pocket-rec-thumb-loaded`); + }); + elThumb.setAttribute( + `src`, + `https://img-getpocket.cdn.mozilla.net/132x132/filters:format(jpeg):quality(60):no_upscale():strip_exif()/${thumb}` + ); + + elAdd.textContent = `Save`; + elTitle.textContent = title; + + if (publisher && time) { + elMetadata.textContent = `${publisher} · ${time} min`; + } else if (publisher) { + elMetadata.textContent = `${publisher}`; + } else if (time) { + elMetadata.textContent = `${time} min`; + } + + elSideWrap.appendChild(elTitle); + elSideWrap.appendChild(elMetadata); + elTop.appendChild(elSideWrap); + elTop.appendChild(elThumb); + elBottom.appendChild(elAdd); + elContainer.appendChild(elTop); + elContainer.appendChild(elBottom); + fragment.appendChild(elContainer); + + elAdd.addEventListener(`click`, e => { + this._savePocketArticle(url); + elAdd.textContent = `Saved`; + elAdd.classList.add(`saved`); + + Services.telemetry.recordEvent( + "readermode", + "pocket_cta", + "rec_saved", + null, + {} + ); + }); + + return fragment; + }, + + async _getAndBuildPocketRecs() { + let elTarget = this._doc.querySelector(`.pocket-recs`); + let url = this._getOriginalUrl(); + let itemID = await this._requestPocketArticleInfo(url); + let articleRecs = await this._requestPocketArticleRecs(itemID); + + articleRecs.recommendations.forEach(rec => { + // Parse a domain from the article URL in case the Publisher name isn't available + let parsedDomain = new URL(rec.item?.normal_url)?.hostname; + + // Calculate read time from word count in case it's not available + let calculatedReadTime = Math.ceil(rec.item?.word_count / 220); + + let elRec = this._buildPocketRec( + rec.item?.title, + rec.item?.normal_url, + rec.item?.domain_metadata?.name || parsedDomain, + rec.item?.top_image_url, + rec.item?.time_to_read || calculatedReadTime + ); + + elTarget.appendChild(elRec); + }); + }, + + _pocketCTAObserved(entries) { + if (entries && entries[0]?.isIntersecting) { + this._ctaIntersectionObserver.disconnect(); + + Services.telemetry.recordEvent( + "readermode", + "pocket_cta", + "cta_seen", + null, + { + logged_in: `${this._isLoggedInPocketUser}`, + } + ); + } + }, + + async _setupPocketCTA() { + let ctaVersion = + lazy.NimbusFeatures.readerMode.getAllVariables()?.pocketCTAVersion; + this._isLoggedInPocketUser = await this._requestPocketLoginStatus(); + let elPocketCTAWrapper = this._doc.querySelector("#pocket-cta-container"); + + // Show the Pocket CTA container if the pref is set and valid + if (ctaVersion === `cta-and-recs` || ctaVersion === `cta-only`) { + if (ctaVersion === `cta-and-recs` && this._isLoggedInPocketUser) { + this._getAndBuildPocketRecs(); + this._enableRecShowHide(); + } else if (ctaVersion === `cta-and-recs` && !this._isLoggedInPocketUser) { + // Fall back to cta only for logged out users: + ctaVersion = `cta-only`; + } + + if (ctaVersion == `cta-only`) { + this._enableDismissCTA(); + } + + elPocketCTAWrapper.hidden = false; + elPocketCTAWrapper.classList.add(`pocket-cta-container-${ctaVersion}`); + elPocketCTAWrapper.classList.add( + `pocket-cta-container-${ + this._isLoggedInPocketUser ? `logged-in` : `logged-out` + }` + ); + + // Set up tracking for sign up buttons + this._doc + .querySelectorAll(`.pocket-sign-up, .pocket-discover-more`) + .forEach(el => { + el.addEventListener(`click`, e => { + Services.telemetry.recordEvent( + "readermode", + "pocket_cta", + "sign_up_click", + null, + {} + ); + }); + }); + + // Set up tracking for user seeing CTA + this._ctaIntersectionObserver.observe( + this._doc.querySelector(`#pocket-cta-container`) + ); + } + }, +}; diff --git a/toolkit/components/reader/JSDOMParser.js b/toolkit/components/reader/JSDOMParser.js new file mode 100644 index 0000000000..a01e71e5c7 --- /dev/null +++ b/toolkit/components/reader/JSDOMParser.js @@ -0,0 +1,1203 @@ +/*eslint-env es6:false*/ +/* + * DO NOT MODIFY THIS FILE DIRECTLY! + * + * This is a shared library that is maintained in an external repo: + * https://github.com/mozilla/readability + */ + +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/** + * This is a relatively lightweight DOMParser that is safe to use in a web + * worker. This is far from a complete DOM implementation; however, it should + * contain the minimal set of functionality necessary for Readability.js. + * + * Aside from not implementing the full DOM API, there are other quirks to be + * aware of when using the JSDOMParser: + * + * 1) Properly formed HTML/XML must be used. This means you should be extra + * careful when using this parser on anything received directly from an + * XMLHttpRequest. Providing a serialized string from an XMLSerializer, + * however, should be safe (since the browser's XMLSerializer should + * generate valid HTML/XML). Therefore, if parsing a document from an XHR, + * the recommended approach is to do the XHR in the main thread, use + * XMLSerializer.serializeToString() on the responseXML, and pass the + * resulting string to the worker. + * + * 2) Live NodeLists are not supported. DOM methods and properties such as + * getElementsByTagName() and childNodes return standard arrays. If you + * want these lists to be updated when nodes are removed or added to the + * document, you must take care to manually update them yourself. + */ +(function (global) { + + // XML only defines these and the numeric ones: + + var entityTable = { + "lt": "<", + "gt": ">", + "amp": "&", + "quot": '"', + "apos": "'", + }; + + var reverseEntityTable = { + "<": "<", + ">": ">", + "&": "&", + '"': """, + "'": "'", + }; + + function encodeTextContentHTML(s) { + return s.replace(/[&<>]/g, function(x) { + return reverseEntityTable[x]; + }); + } + + function encodeHTML(s) { + return s.replace(/[&<>'"]/g, function(x) { + return reverseEntityTable[x]; + }); + } + + function decodeHTML(str) { + return str.replace(/&(quot|amp|apos|lt|gt);/g, function(match, tag) { + return entityTable[tag]; + }).replace(/&#(?:x([0-9a-z]{1,4})|([0-9]{1,4}));/gi, function(match, hex, numStr) { + var num = parseInt(hex || numStr, hex ? 16 : 10); // read num + return String.fromCharCode(num); + }); + } + + // When a style is set in JS, map it to the corresponding CSS attribute + var styleMap = { + "alignmentBaseline": "alignment-baseline", + "background": "background", + "backgroundAttachment": "background-attachment", + "backgroundClip": "background-clip", + "backgroundColor": "background-color", + "backgroundImage": "background-image", + "backgroundOrigin": "background-origin", + "backgroundPosition": "background-position", + "backgroundPositionX": "background-position-x", + "backgroundPositionY": "background-position-y", + "backgroundRepeat": "background-repeat", + "backgroundRepeatX": "background-repeat-x", + "backgroundRepeatY": "background-repeat-y", + "backgroundSize": "background-size", + "baselineShift": "baseline-shift", + "border": "border", + "borderBottom": "border-bottom", + "borderBottomColor": "border-bottom-color", + "borderBottomLeftRadius": "border-bottom-left-radius", + "borderBottomRightRadius": "border-bottom-right-radius", + "borderBottomStyle": "border-bottom-style", + "borderBottomWidth": "border-bottom-width", + "borderCollapse": "border-collapse", + "borderColor": "border-color", + "borderImage": "border-image", + "borderImageOutset": "border-image-outset", + "borderImageRepeat": "border-image-repeat", + "borderImageSlice": "border-image-slice", + "borderImageSource": "border-image-source", + "borderImageWidth": "border-image-width", + "borderLeft": "border-left", + "borderLeftColor": "border-left-color", + "borderLeftStyle": "border-left-style", + "borderLeftWidth": "border-left-width", + "borderRadius": "border-radius", + "borderRight": "border-right", + "borderRightColor": "border-right-color", + "borderRightStyle": "border-right-style", + "borderRightWidth": "border-right-width", + "borderSpacing": "border-spacing", + "borderStyle": "border-style", + "borderTop": "border-top", + "borderTopColor": "border-top-color", + "borderTopLeftRadius": "border-top-left-radius", + "borderTopRightRadius": "border-top-right-radius", + "borderTopStyle": "border-top-style", + "borderTopWidth": "border-top-width", + "borderWidth": "border-width", + "bottom": "bottom", + "boxShadow": "box-shadow", + "boxSizing": "box-sizing", + "captionSide": "caption-side", + "clear": "clear", + "clip": "clip", + "clipPath": "clip-path", + "clipRule": "clip-rule", + "color": "color", + "colorInterpolation": "color-interpolation", + "colorInterpolationFilters": "color-interpolation-filters", + "colorProfile": "color-profile", + "colorRendering": "color-rendering", + "content": "content", + "counterIncrement": "counter-increment", + "counterReset": "counter-reset", + "cursor": "cursor", + "direction": "direction", + "display": "display", + "dominantBaseline": "dominant-baseline", + "emptyCells": "empty-cells", + "enableBackground": "enable-background", + "fill": "fill", + "fillOpacity": "fill-opacity", + "fillRule": "fill-rule", + "filter": "filter", + "cssFloat": "float", + "floodColor": "flood-color", + "floodOpacity": "flood-opacity", + "font": "font", + "fontFamily": "font-family", + "fontSize": "font-size", + "fontStretch": "font-stretch", + "fontStyle": "font-style", + "fontVariant": "font-variant", + "fontWeight": "font-weight", + "glyphOrientationHorizontal": "glyph-orientation-horizontal", + "glyphOrientationVertical": "glyph-orientation-vertical", + "height": "height", + "imageRendering": "image-rendering", + "kerning": "kerning", + "left": "left", + "letterSpacing": "letter-spacing", + "lightingColor": "lighting-color", + "lineHeight": "line-height", + "listStyle": "list-style", + "listStyleImage": "list-style-image", + "listStylePosition": "list-style-position", + "listStyleType": "list-style-type", + "margin": "margin", + "marginBottom": "margin-bottom", + "marginLeft": "margin-left", + "marginRight": "margin-right", + "marginTop": "margin-top", + "marker": "marker", + "markerEnd": "marker-end", + "markerMid": "marker-mid", + "markerStart": "marker-start", + "mask": "mask", + "maxHeight": "max-height", + "maxWidth": "max-width", + "minHeight": "min-height", + "minWidth": "min-width", + "opacity": "opacity", + "orphans": "orphans", + "outline": "outline", + "outlineColor": "outline-color", + "outlineOffset": "outline-offset", + "outlineStyle": "outline-style", + "outlineWidth": "outline-width", + "overflow": "overflow", + "overflowX": "overflow-x", + "overflowY": "overflow-y", + "padding": "padding", + "paddingBottom": "padding-bottom", + "paddingLeft": "padding-left", + "paddingRight": "padding-right", + "paddingTop": "padding-top", + "page": "page", + "pageBreakAfter": "page-break-after", + "pageBreakBefore": "page-break-before", + "pageBreakInside": "page-break-inside", + "pointerEvents": "pointer-events", + "position": "position", + "quotes": "quotes", + "resize": "resize", + "right": "right", + "shapeRendering": "shape-rendering", + "size": "size", + "speak": "speak", + "src": "src", + "stopColor": "stop-color", + "stopOpacity": "stop-opacity", + "stroke": "stroke", + "strokeDasharray": "stroke-dasharray", + "strokeDashoffset": "stroke-dashoffset", + "strokeLinecap": "stroke-linecap", + "strokeLinejoin": "stroke-linejoin", + "strokeMiterlimit": "stroke-miterlimit", + "strokeOpacity": "stroke-opacity", + "strokeWidth": "stroke-width", + "tableLayout": "table-layout", + "textAlign": "text-align", + "textAnchor": "text-anchor", + "textDecoration": "text-decoration", + "textIndent": "text-indent", + "textLineThrough": "text-line-through", + "textLineThroughColor": "text-line-through-color", + "textLineThroughMode": "text-line-through-mode", + "textLineThroughStyle": "text-line-through-style", + "textLineThroughWidth": "text-line-through-width", + "textOverflow": "text-overflow", + "textOverline": "text-overline", + "textOverlineColor": "text-overline-color", + "textOverlineMode": "text-overline-mode", + "textOverlineStyle": "text-overline-style", + "textOverlineWidth": "text-overline-width", + "textRendering": "text-rendering", + "textShadow": "text-shadow", + "textTransform": "text-transform", + "textUnderline": "text-underline", + "textUnderlineColor": "text-underline-color", + "textUnderlineMode": "text-underline-mode", + "textUnderlineStyle": "text-underline-style", + "textUnderlineWidth": "text-underline-width", + "top": "top", + "unicodeBidi": "unicode-bidi", + "unicodeRange": "unicode-range", + "vectorEffect": "vector-effect", + "verticalAlign": "vertical-align", + "visibility": "visibility", + "whiteSpace": "white-space", + "widows": "widows", + "width": "width", + "wordBreak": "word-break", + "wordSpacing": "word-spacing", + "wordWrap": "word-wrap", + "writingMode": "writing-mode", + "zIndex": "z-index", + "zoom": "zoom", + }; + + // Elements that can be self-closing + var voidElems = { + "area": true, + "base": true, + "br": true, + "col": true, + "command": true, + "embed": true, + "hr": true, + "img": true, + "input": true, + "link": true, + "meta": true, + "param": true, + "source": true, + "wbr": true + }; + + var whitespace = [" ", "\t", "\n", "\r"]; + + // See https://developer.mozilla.org/en-US/docs/Web/API/Node/nodeType + var nodeTypes = { + ELEMENT_NODE: 1, + ATTRIBUTE_NODE: 2, + TEXT_NODE: 3, + CDATA_SECTION_NODE: 4, + ENTITY_REFERENCE_NODE: 5, + ENTITY_NODE: 6, + PROCESSING_INSTRUCTION_NODE: 7, + COMMENT_NODE: 8, + DOCUMENT_NODE: 9, + DOCUMENT_TYPE_NODE: 10, + DOCUMENT_FRAGMENT_NODE: 11, + NOTATION_NODE: 12 + }; + + function getElementsByTagName(tag) { + tag = tag.toUpperCase(); + var elems = []; + var allTags = (tag === "*"); + function getElems(node) { + var length = node.children.length; + for (var i = 0; i < length; i++) { + var child = node.children[i]; + if (allTags || (child.tagName === tag)) + elems.push(child); + getElems(child); + } + } + getElems(this); + elems._isLiveNodeList = true; + return elems; + } + + var Node = function () {}; + + Node.prototype = { + attributes: null, + childNodes: null, + localName: null, + nodeName: null, + parentNode: null, + textContent: null, + nextSibling: null, + previousSibling: null, + + get firstChild() { + return this.childNodes[0] || null; + }, + + get firstElementChild() { + return this.children[0] || null; + }, + + get lastChild() { + return this.childNodes[this.childNodes.length - 1] || null; + }, + + get lastElementChild() { + return this.children[this.children.length - 1] || null; + }, + + appendChild: function (child) { + if (child.parentNode) { + child.parentNode.removeChild(child); + } + + var last = this.lastChild; + if (last) + last.nextSibling = child; + child.previousSibling = last; + + if (child.nodeType === Node.ELEMENT_NODE) { + child.previousElementSibling = this.children[this.children.length - 1] || null; + this.children.push(child); + child.previousElementSibling && (child.previousElementSibling.nextElementSibling = child); + } + this.childNodes.push(child); + child.parentNode = this; + }, + + removeChild: function (child) { + var childNodes = this.childNodes; + var childIndex = childNodes.indexOf(child); + if (childIndex === -1) { + throw "removeChild: node not found"; + } else { + child.parentNode = null; + var prev = child.previousSibling; + var next = child.nextSibling; + if (prev) + prev.nextSibling = next; + if (next) + next.previousSibling = prev; + + if (child.nodeType === Node.ELEMENT_NODE) { + prev = child.previousElementSibling; + next = child.nextElementSibling; + if (prev) + prev.nextElementSibling = next; + if (next) + next.previousElementSibling = prev; + this.children.splice(this.children.indexOf(child), 1); + } + + child.previousSibling = child.nextSibling = null; + child.previousElementSibling = child.nextElementSibling = null; + + return childNodes.splice(childIndex, 1)[0]; + } + }, + + replaceChild: function (newNode, oldNode) { + var childNodes = this.childNodes; + var childIndex = childNodes.indexOf(oldNode); + if (childIndex === -1) { + throw "replaceChild: node not found"; + } else { + // This will take care of updating the new node if it was somewhere else before: + if (newNode.parentNode) + newNode.parentNode.removeChild(newNode); + + childNodes[childIndex] = newNode; + + // update the new node's sibling properties, and its new siblings' sibling properties + newNode.nextSibling = oldNode.nextSibling; + newNode.previousSibling = oldNode.previousSibling; + if (newNode.nextSibling) + newNode.nextSibling.previousSibling = newNode; + if (newNode.previousSibling) + newNode.previousSibling.nextSibling = newNode; + + newNode.parentNode = this; + + // Now deal with elements before we clear out those values for the old node, + // because it can help us take shortcuts here: + if (newNode.nodeType === Node.ELEMENT_NODE) { + if (oldNode.nodeType === Node.ELEMENT_NODE) { + // Both were elements, which makes this easier, we just swap things out: + newNode.previousElementSibling = oldNode.previousElementSibling; + newNode.nextElementSibling = oldNode.nextElementSibling; + if (newNode.previousElementSibling) + newNode.previousElementSibling.nextElementSibling = newNode; + if (newNode.nextElementSibling) + newNode.nextElementSibling.previousElementSibling = newNode; + this.children[this.children.indexOf(oldNode)] = newNode; + } else { + // Hard way: + newNode.previousElementSibling = (function() { + for (var i = childIndex - 1; i >= 0; i--) { + if (childNodes[i].nodeType === Node.ELEMENT_NODE) + return childNodes[i]; + } + return null; + })(); + if (newNode.previousElementSibling) { + newNode.nextElementSibling = newNode.previousElementSibling.nextElementSibling; + } else { + newNode.nextElementSibling = (function() { + for (var i = childIndex + 1; i < childNodes.length; i++) { + if (childNodes[i].nodeType === Node.ELEMENT_NODE) + return childNodes[i]; + } + return null; + })(); + } + if (newNode.previousElementSibling) + newNode.previousElementSibling.nextElementSibling = newNode; + if (newNode.nextElementSibling) + newNode.nextElementSibling.previousElementSibling = newNode; + + if (newNode.nextElementSibling) + this.children.splice(this.children.indexOf(newNode.nextElementSibling), 0, newNode); + else + this.children.push(newNode); + } + } else if (oldNode.nodeType === Node.ELEMENT_NODE) { + // new node is not an element node. + // if the old one was, update its element siblings: + if (oldNode.previousElementSibling) + oldNode.previousElementSibling.nextElementSibling = oldNode.nextElementSibling; + if (oldNode.nextElementSibling) + oldNode.nextElementSibling.previousElementSibling = oldNode.previousElementSibling; + this.children.splice(this.children.indexOf(oldNode), 1); + + // If the old node wasn't an element, neither the new nor the old node was an element, + // and the children array and its members shouldn't need any updating. + } + + + oldNode.parentNode = null; + oldNode.previousSibling = null; + oldNode.nextSibling = null; + if (oldNode.nodeType === Node.ELEMENT_NODE) { + oldNode.previousElementSibling = null; + oldNode.nextElementSibling = null; + } + return oldNode; + } + }, + + __JSDOMParser__: true, + }; + + for (var nodeType in nodeTypes) { + Node[nodeType] = Node.prototype[nodeType] = nodeTypes[nodeType]; + } + + var Attribute = function (name, value) { + this.name = name; + this._value = value; + }; + + Attribute.prototype = { + get value() { + return this._value; + }, + setValue: function(newValue) { + this._value = newValue; + }, + getEncodedValue: function() { + return encodeHTML(this._value); + }, + }; + + var Comment = function () { + this.childNodes = []; + }; + + Comment.prototype = { + __proto__: Node.prototype, + + nodeName: "#comment", + nodeType: Node.COMMENT_NODE + }; + + var Text = function () { + this.childNodes = []; + }; + + Text.prototype = { + __proto__: Node.prototype, + + nodeName: "#text", + nodeType: Node.TEXT_NODE, + get textContent() { + if (typeof this._textContent === "undefined") { + this._textContent = decodeHTML(this._innerHTML || ""); + } + return this._textContent; + }, + get innerHTML() { + if (typeof this._innerHTML === "undefined") { + this._innerHTML = encodeTextContentHTML(this._textContent || ""); + } + return this._innerHTML; + }, + + set innerHTML(newHTML) { + this._innerHTML = newHTML; + delete this._textContent; + }, + set textContent(newText) { + this._textContent = newText; + delete this._innerHTML; + }, + }; + + var Document = function (url) { + this.documentURI = url; + this.styleSheets = []; + this.childNodes = []; + this.children = []; + }; + + Document.prototype = { + __proto__: Node.prototype, + + nodeName: "#document", + nodeType: Node.DOCUMENT_NODE, + title: "", + + getElementsByTagName: getElementsByTagName, + + getElementById: function (id) { + function getElem(node) { + var length = node.children.length; + if (node.id === id) + return node; + for (var i = 0; i < length; i++) { + var el = getElem(node.children[i]); + if (el) + return el; + } + return null; + } + return getElem(this); + }, + + createElement: function (tag) { + var node = new Element(tag); + return node; + }, + + createTextNode: function (text) { + var node = new Text(); + node.textContent = text; + return node; + }, + + get baseURI() { + if (!this.hasOwnProperty("_baseURI")) { + this._baseURI = this.documentURI; + var baseElements = this.getElementsByTagName("base"); + var href = baseElements[0] && baseElements[0].getAttribute("href"); + if (href) { + try { + this._baseURI = (new URL(href, this._baseURI)).href; + } catch (ex) {/* Just fall back to documentURI */} + } + } + return this._baseURI; + }, + }; + + var Element = function (tag) { + // We use this to find the closing tag. + this._matchingTag = tag; + // We're explicitly a non-namespace aware parser, we just pretend it's all HTML. + var lastColonIndex = tag.lastIndexOf(":"); + if (lastColonIndex != -1) { + tag = tag.substring(lastColonIndex + 1); + } + this.attributes = []; + this.childNodes = []; + this.children = []; + this.nextElementSibling = this.previousElementSibling = null; + this.localName = tag.toLowerCase(); + this.tagName = tag.toUpperCase(); + this.style = new Style(this); + }; + + Element.prototype = { + __proto__: Node.prototype, + + nodeType: Node.ELEMENT_NODE, + + getElementsByTagName: getElementsByTagName, + + get className() { + return this.getAttribute("class") || ""; + }, + + set className(str) { + this.setAttribute("class", str); + }, + + get id() { + return this.getAttribute("id") || ""; + }, + + set id(str) { + this.setAttribute("id", str); + }, + + get href() { + return this.getAttribute("href") || ""; + }, + + set href(str) { + this.setAttribute("href", str); + }, + + get src() { + return this.getAttribute("src") || ""; + }, + + set src(str) { + this.setAttribute("src", str); + }, + + get srcset() { + return this.getAttribute("srcset") || ""; + }, + + set srcset(str) { + this.setAttribute("srcset", str); + }, + + get nodeName() { + return this.tagName; + }, + + get innerHTML() { + function getHTML(node) { + var i = 0; + for (i = 0; i < node.childNodes.length; i++) { + var child = node.childNodes[i]; + if (child.localName) { + arr.push("<" + child.localName); + + // serialize attribute list + for (var j = 0; j < child.attributes.length; j++) { + var attr = child.attributes[j]; + // the attribute value will be HTML escaped. + var val = attr.getEncodedValue(); + var quote = (val.indexOf('"') === -1 ? '"' : "'"); + arr.push(" " + attr.name + "=" + quote + val + quote); + } + + if (child.localName in voidElems && !child.childNodes.length) { + // if this is a self-closing element, end it here + arr.push("/>"); + } else { + // otherwise, add its children + arr.push(">"); + getHTML(child); + arr.push("</" + child.localName + ">"); + } + } else { + // This is a text node, so asking for innerHTML won't recurse. + arr.push(child.innerHTML); + } + } + } + + // Using Array.join() avoids the overhead from lazy string concatenation. + var arr = []; + getHTML(this); + return arr.join(""); + }, + + set innerHTML(html) { + var parser = new JSDOMParser(); + var node = parser.parse(html); + var i; + for (i = this.childNodes.length; --i >= 0;) { + this.childNodes[i].parentNode = null; + } + this.childNodes = node.childNodes; + this.children = node.children; + for (i = this.childNodes.length; --i >= 0;) { + this.childNodes[i].parentNode = this; + } + }, + + set textContent(text) { + // clear parentNodes for existing children + for (var i = this.childNodes.length; --i >= 0;) { + this.childNodes[i].parentNode = null; + } + + var node = new Text(); + this.childNodes = [ node ]; + this.children = []; + node.textContent = text; + node.parentNode = this; + }, + + get textContent() { + function getText(node) { + var nodes = node.childNodes; + for (var i = 0; i < nodes.length; i++) { + var child = nodes[i]; + if (child.nodeType === 3) { + text.push(child.textContent); + } else { + getText(child); + } + } + } + + // Using Array.join() avoids the overhead from lazy string concatenation. + // See http://blog.cdleary.com/2012/01/string-representation-in-spidermonkey/#ropes + var text = []; + getText(this); + return text.join(""); + }, + + getAttribute: function (name) { + for (var i = this.attributes.length; --i >= 0;) { + var attr = this.attributes[i]; + if (attr.name === name) { + return attr.value; + } + } + return undefined; + }, + + setAttribute: function (name, value) { + for (var i = this.attributes.length; --i >= 0;) { + var attr = this.attributes[i]; + if (attr.name === name) { + attr.setValue(value); + return; + } + } + this.attributes.push(new Attribute(name, value)); + }, + + removeAttribute: function (name) { + for (var i = this.attributes.length; --i >= 0;) { + var attr = this.attributes[i]; + if (attr.name === name) { + this.attributes.splice(i, 1); + break; + } + } + }, + + hasAttribute: function (name) { + return this.attributes.some(function (attr) { + return attr.name == name; + }); + }, + }; + + var Style = function (node) { + this.node = node; + }; + + // getStyle() and setStyle() use the style attribute string directly. This + // won't be very efficient if there are a lot of style manipulations, but + // it's the easiest way to make sure the style attribute string and the JS + // style property stay in sync. Readability.js doesn't do many style + // manipulations, so this should be okay. + Style.prototype = { + getStyle: function (styleName) { + var attr = this.node.getAttribute("style"); + if (!attr) + return undefined; + + var styles = attr.split(";"); + for (var i = 0; i < styles.length; i++) { + var style = styles[i].split(":"); + var name = style[0].trim(); + if (name === styleName) + return style[1].trim(); + } + + return undefined; + }, + + setStyle: function (styleName, styleValue) { + var value = this.node.getAttribute("style") || ""; + var index = 0; + do { + var next = value.indexOf(";", index) + 1; + var length = next - index - 1; + var style = (length > 0 ? value.substr(index, length) : value.substr(index)); + if (style.substr(0, style.indexOf(":")).trim() === styleName) { + value = value.substr(0, index).trim() + (next ? " " + value.substr(next).trim() : ""); + break; + } + index = next; + } while (index); + + value += " " + styleName + ": " + styleValue + ";"; + this.node.setAttribute("style", value.trim()); + } + }; + + // For each item in styleMap, define a getter and setter on the style + // property. + for (var jsName in styleMap) { + (function (cssName) { + Style.prototype.__defineGetter__(jsName, function () { + return this.getStyle(cssName); + }); + Style.prototype.__defineSetter__(jsName, function (value) { + this.setStyle(cssName, value); + }); + })(styleMap[jsName]); + } + + var JSDOMParser = function () { + this.currentChar = 0; + + // In makeElementNode() we build up many strings one char at a time. Using + // += for this results in lots of short-lived intermediate strings. It's + // better to build an array of single-char strings and then join() them + // together at the end. And reusing a single array (i.e. |this.strBuf|) + // over and over for this purpose uses less memory than using a new array + // for each string. + this.strBuf = []; + + // Similarly, we reuse this array to return the two arguments from + // makeElementNode(), which saves us from having to allocate a new array + // every time. + this.retPair = []; + + this.errorState = ""; + }; + + JSDOMParser.prototype = { + error: function(m) { + if (typeof dump !== "undefined") { + dump("JSDOMParser error: " + m + "\n"); + } else if (typeof console !== "undefined") { + console.log("JSDOMParser error: " + m + "\n"); + } + this.errorState += m + "\n"; + }, + + /** + * Look at the next character without advancing the index. + */ + peekNext: function () { + return this.html[this.currentChar]; + }, + + /** + * Get the next character and advance the index. + */ + nextChar: function () { + return this.html[this.currentChar++]; + }, + + /** + * Called after a quote character is read. This finds the next quote + * character and returns the text string in between. + */ + readString: function (quote) { + var str; + var n = this.html.indexOf(quote, this.currentChar); + if (n === -1) { + this.currentChar = this.html.length; + str = null; + } else { + str = this.html.substring(this.currentChar, n); + this.currentChar = n + 1; + } + + return str; + }, + + /** + * Called when parsing a node. This finds the next name/value attribute + * pair and adds the result to the attributes list. + */ + readAttribute: function (node) { + var name = ""; + + var n = this.html.indexOf("=", this.currentChar); + if (n === -1) { + this.currentChar = this.html.length; + } else { + // Read until a '=' character is hit; this will be the attribute key + name = this.html.substring(this.currentChar, n); + this.currentChar = n + 1; + } + + if (!name) + return; + + // After a '=', we should see a '"' for the attribute value + var c = this.nextChar(); + if (c !== '"' && c !== "'") { + this.error("Error reading attribute " + name + ", expecting '\"'"); + return; + } + + // Read the attribute value (and consume the matching quote) + var value = this.readString(c); + + node.attributes.push(new Attribute(name, decodeHTML(value))); + + return; + }, + + /** + * Parses and returns an Element node. This is called after a '<' has been + * read. + * + * @returns an array; the first index of the array is the parsed node; + * the second index is a boolean indicating whether this is a void + * Element + */ + makeElementNode: function (retPair) { + var c = this.nextChar(); + + // Read the Element tag name + var strBuf = this.strBuf; + strBuf.length = 0; + while (whitespace.indexOf(c) == -1 && c !== ">" && c !== "/") { + if (c === undefined) + return false; + strBuf.push(c); + c = this.nextChar(); + } + var tag = strBuf.join(""); + + if (!tag) + return false; + + var node = new Element(tag); + + // Read Element attributes + while (c !== "/" && c !== ">") { + if (c === undefined) + return false; + while (whitespace.indexOf(this.html[this.currentChar++]) != -1) { + // Advance cursor to first non-whitespace char. + } + this.currentChar--; + c = this.nextChar(); + if (c !== "/" && c !== ">") { + --this.currentChar; + this.readAttribute(node); + } + } + + // If this is a self-closing tag, read '/>' + var closed = false; + if (c === "/") { + closed = true; + c = this.nextChar(); + if (c !== ">") { + this.error("expected '>' to close " + tag); + return false; + } + } + + retPair[0] = node; + retPair[1] = closed; + return true; + }, + + /** + * If the current input matches this string, advance the input index; + * otherwise, do nothing. + * + * @returns whether input matched string + */ + match: function (str) { + var strlen = str.length; + if (this.html.substr(this.currentChar, strlen).toLowerCase() === str.toLowerCase()) { + this.currentChar += strlen; + return true; + } + return false; + }, + + /** + * Searches the input until a string is found and discards all input up to + * and including the matched string. + */ + discardTo: function (str) { + var index = this.html.indexOf(str, this.currentChar) + str.length; + if (index === -1) + this.currentChar = this.html.length; + this.currentChar = index; + }, + + /** + * Reads child nodes for the given node. + */ + readChildren: function (node) { + var child; + while ((child = this.readNode())) { + // Don't keep Comment nodes + if (child.nodeType !== 8) { + node.appendChild(child); + } + } + }, + + discardNextComment: function() { + if (this.match("--")) { + this.discardTo("-->"); + } else { + var c = this.nextChar(); + while (c !== ">") { + if (c === undefined) + return null; + if (c === '"' || c === "'") + this.readString(c); + c = this.nextChar(); + } + } + return new Comment(); + }, + + + /** + * Reads the next child node from the input. If we're reading a closing + * tag, or if we've reached the end of input, return null. + * + * @returns the node + */ + readNode: function () { + var c = this.nextChar(); + + if (c === undefined) + return null; + + // Read any text as Text node + var textNode; + if (c !== "<") { + --this.currentChar; + textNode = new Text(); + var n = this.html.indexOf("<", this.currentChar); + if (n === -1) { + textNode.innerHTML = this.html.substring(this.currentChar, this.html.length); + this.currentChar = this.html.length; + } else { + textNode.innerHTML = this.html.substring(this.currentChar, n); + this.currentChar = n; + } + return textNode; + } + + if (this.match("![CDATA[")) { + var endChar = this.html.indexOf("]]>", this.currentChar); + if (endChar === -1) { + this.error("unclosed CDATA section"); + return null; + } + textNode = new Text(); + textNode.textContent = this.html.substring(this.currentChar, endChar); + this.currentChar = endChar + ("]]>").length; + return textNode; + } + + c = this.peekNext(); + + // Read Comment node. Normally, Comment nodes know their inner + // textContent, but we don't really care about Comment nodes (we throw + // them away in readChildren()). So just returning an empty Comment node + // here is sufficient. + if (c === "!" || c === "?") { + // We're still before the ! or ? that is starting this comment: + this.currentChar++; + return this.discardNextComment(); + } + + // If we're reading a closing tag, return null. This means we've reached + // the end of this set of child nodes. + if (c === "/") { + --this.currentChar; + return null; + } + + // Otherwise, we're looking at an Element node + var result = this.makeElementNode(this.retPair); + if (!result) + return null; + + var node = this.retPair[0]; + var closed = this.retPair[1]; + var localName = node.localName; + + // If this isn't a void Element, read its child nodes + if (!closed) { + this.readChildren(node); + var closingTag = "</" + node._matchingTag + ">"; + if (!this.match(closingTag)) { + this.error("expected '" + closingTag + "' and got " + this.html.substr(this.currentChar, closingTag.length)); + return null; + } + } + + // Only use the first title, because SVG might have other + // title elements which we don't care about (medium.com + // does this, at least). + if (localName === "title" && !this.doc.title) { + this.doc.title = node.textContent.trim(); + } else if (localName === "head") { + this.doc.head = node; + } else if (localName === "body") { + this.doc.body = node; + } else if (localName === "html") { + this.doc.documentElement = node; + } + + return node; + }, + + /** + * Parses an HTML string and returns a JS implementation of the Document. + */ + parse: function (html, url) { + this.html = html; + var doc = this.doc = new Document(url); + this.readChildren(doc); + + // If this is an HTML document, remove root-level children except for the + // <html> node + if (doc.documentElement) { + for (var i = doc.childNodes.length; --i >= 0;) { + var child = doc.childNodes[i]; + if (child !== doc.documentElement) { + doc.removeChild(child); + } + } + } + + return doc; + } + }; + + // Attach the standard DOM types to the global scope + global.Node = Node; + global.Comment = Comment; + global.Document = Document; + global.Element = Element; + global.Text = Text; + + // Attach JSDOMParser to the global scope + global.JSDOMParser = JSDOMParser; + +})(this); + +if (typeof module === "object") { + module.exports = this.JSDOMParser; +} diff --git a/toolkit/components/reader/Readability-readerable.js b/toolkit/components/reader/Readability-readerable.js new file mode 100644 index 0000000000..fe9c89d789 --- /dev/null +++ b/toolkit/components/reader/Readability-readerable.js @@ -0,0 +1,116 @@ +/* eslint-env es6:false */ +/* globals exports */ +/* + * DO NOT MODIFY THIS FILE DIRECTLY! + * + * This is a shared library that is maintained in an external repo: + * https://github.com/mozilla/readability + */ + +/* + * Copyright (c) 2010 Arc90 Inc + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This code is heavily based on Arc90's readability.js (1.7.1) script + * available at: http://code.google.com/p/arc90labs-readability + */ + +var REGEXPS = { + // NOTE: These two regular expressions are duplicated in + // Readability.js. Please keep both copies in sync. + unlikelyCandidates: /-ad-|ai2html|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i, + okMaybeItsACandidate: /and|article|body|column|content|main|shadow/i, +}; + +function isNodeVisible(node) { + // Have to null-check node.style and node.className.indexOf to deal with SVG and MathML nodes. + return (!node.style || node.style.display != "none") + && !node.hasAttribute("hidden") + //check for "fallback-image" so that wikimedia math images are displayed + && (!node.hasAttribute("aria-hidden") || node.getAttribute("aria-hidden") != "true" || (node.className && node.className.indexOf && node.className.indexOf("fallback-image") !== -1)); +} + +/** + * Decides whether or not the document is reader-able without parsing the whole thing. + * @param {Object} options Configuration object. + * @param {number} [options.minContentLength=140] The minimum node content length used to decide if the document is readerable. + * @param {number} [options.minScore=20] The minumum cumulated 'score' used to determine if the document is readerable. + * @param {Function} [options.visibilityChecker=isNodeVisible] The function used to determine if a node is visible. + * @return {boolean} Whether or not we suspect Readability.parse() will suceeed at returning an article object. + */ +function isProbablyReaderable(doc, options = {}) { + // For backward compatibility reasons 'options' can either be a configuration object or the function used + // to determine if a node is visible. + if (typeof options == "function") { + options = { visibilityChecker: options }; + } + + var defaultOptions = { minScore: 20, minContentLength: 140, visibilityChecker: isNodeVisible }; + options = Object.assign(defaultOptions, options); + + var nodes = doc.querySelectorAll("p, pre, article"); + + // Get <div> nodes which have <br> node(s) and append them into the `nodes` variable. + // Some articles' DOM structures might look like + // <div> + // Sentences<br> + // <br> + // Sentences<br> + // </div> + var brNodes = doc.querySelectorAll("div > br"); + if (brNodes.length) { + var set = new Set(nodes); + [].forEach.call(brNodes, function (node) { + set.add(node.parentNode); + }); + nodes = Array.from(set); + } + + var score = 0; + // This is a little cheeky, we use the accumulator 'score' to decide what to return from + // this callback: + return [].some.call(nodes, function (node) { + if (!options.visibilityChecker(node)) { + return false; + } + + var matchString = node.className + " " + node.id; + if (REGEXPS.unlikelyCandidates.test(matchString) && + !REGEXPS.okMaybeItsACandidate.test(matchString)) { + return false; + } + + if (node.matches("li p")) { + return false; + } + + var textContentLength = node.textContent.trim().length; + if (textContentLength < options.minContentLength) { + return false; + } + + score += Math.sqrt(textContentLength - options.minContentLength); + + if (score > options.minScore) { + return true; + } + return false; + }); +} + +if (typeof module === "object") { + module.exports = isProbablyReaderable; +} diff --git a/toolkit/components/reader/Readability.js b/toolkit/components/reader/Readability.js new file mode 100644 index 0000000000..44604970c1 --- /dev/null +++ b/toolkit/components/reader/Readability.js @@ -0,0 +1,2290 @@ +/*eslint-env es6:false*/ +/* + * DO NOT MODIFY THIS FILE DIRECTLY! + * + * This is a shared library that is maintained in an external repo: + * https://github.com/mozilla/readability + */ + +/* + * Copyright (c) 2010 Arc90 Inc + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This code is heavily based on Arc90's readability.js (1.7.1) script + * available at: http://code.google.com/p/arc90labs-readability + */ + +/** + * Public constructor. + * @param {HTMLDocument} doc The document to parse. + * @param {Object} options The options object. + */ +function Readability(doc, options) { + // In some older versions, people passed a URI as the first argument. Cope: + if (options && options.documentElement) { + doc = options; + options = arguments[2]; + } else if (!doc || !doc.documentElement) { + throw new Error("First argument to Readability constructor should be a document object."); + } + options = options || {}; + + this._doc = doc; + this._docJSDOMParser = this._doc.firstChild.__JSDOMParser__; + this._articleTitle = null; + this._articleByline = null; + this._articleDir = null; + this._articleSiteName = null; + this._attempts = []; + + // Configurable options + this._debug = !!options.debug; + this._maxElemsToParse = options.maxElemsToParse || this.DEFAULT_MAX_ELEMS_TO_PARSE; + this._nbTopCandidates = options.nbTopCandidates || this.DEFAULT_N_TOP_CANDIDATES; + this._charThreshold = options.charThreshold || this.DEFAULT_CHAR_THRESHOLD; + this._classesToPreserve = this.CLASSES_TO_PRESERVE.concat(options.classesToPreserve || []); + this._keepClasses = !!options.keepClasses; + this._serializer = options.serializer || function(el) { + return el.innerHTML; + }; + this._disableJSONLD = !!options.disableJSONLD; + + // Start with all flags set + this._flags = this.FLAG_STRIP_UNLIKELYS | + this.FLAG_WEIGHT_CLASSES | + this.FLAG_CLEAN_CONDITIONALLY; + + + // Control whether log messages are sent to the console + if (this._debug) { + let logNode = function(node) { + if (node.nodeType == node.TEXT_NODE) { + return `${node.nodeName} ("${node.textContent}")`; + } + let attrPairs = Array.from(node.attributes || [], function(attr) { + return `${attr.name}="${attr.value}"`; + }).join(" "); + return `<${node.localName} ${attrPairs}>`; + }; + this.log = function () { + if (typeof dump !== "undefined") { + var msg = Array.prototype.map.call(arguments, function(x) { + return (x && x.nodeName) ? logNode(x) : x; + }).join(" "); + dump("Reader: (Readability) " + msg + "\n"); + } else if (typeof console !== "undefined") { + let args = Array.from(arguments, arg => { + if (arg && arg.nodeType == this.ELEMENT_NODE) { + return logNode(arg); + } + return arg; + }); + args.unshift("Reader: (Readability)"); + console.log.apply(console, args); + } + }; + } else { + this.log = function () {}; + } +} + +Readability.prototype = { + FLAG_STRIP_UNLIKELYS: 0x1, + FLAG_WEIGHT_CLASSES: 0x2, + FLAG_CLEAN_CONDITIONALLY: 0x4, + + // https://developer.mozilla.org/en-US/docs/Web/API/Node/nodeType + ELEMENT_NODE: 1, + TEXT_NODE: 3, + + // Max number of nodes supported by this parser. Default: 0 (no limit) + DEFAULT_MAX_ELEMS_TO_PARSE: 0, + + // The number of top candidates to consider when analysing how + // tight the competition is among candidates. + DEFAULT_N_TOP_CANDIDATES: 5, + + // Element tags to score by default. + DEFAULT_TAGS_TO_SCORE: "section,h2,h3,h4,h5,h6,p,td,pre".toUpperCase().split(","), + + // The default number of chars an article must have in order to return a result + DEFAULT_CHAR_THRESHOLD: 500, + + // All of the regular expressions in use within readability. + // Defined up here so we don't instantiate them repeatedly in loops. + REGEXPS: { + // NOTE: These two regular expressions are duplicated in + // Readability-readerable.js. Please keep both copies in sync. + unlikelyCandidates: /-ad-|ai2html|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i, + okMaybeItsACandidate: /and|article|body|column|content|main|shadow/i, + + positive: /article|body|content|entry|hentry|h-entry|main|page|pagination|post|text|blog|story/i, + negative: /-ad-|hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|gdpr|masthead|media|meta|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget/i, + extraneous: /print|archive|comment|discuss|e[\-]?mail|share|reply|all|login|sign|single|utility/i, + byline: /byline|author|dateline|writtenby|p-author/i, + replaceFonts: /<(\/?)font[^>]*>/gi, + normalize: /\s{2,}/g, + videos: /\/\/(www\.)?((dailymotion|youtube|youtube-nocookie|player\.vimeo|v\.qq)\.com|(archive|upload\.wikimedia)\.org|player\.twitch\.tv)/i, + shareElements: /(\b|_)(share|sharedaddy)(\b|_)/i, + nextLink: /(next|weiter|continue|>([^\|]|$)|»([^\|]|$))/i, + prevLink: /(prev|earl|old|new|<|«)/i, + tokenize: /\W+/g, + whitespace: /^\s*$/, + hasContent: /\S$/, + hashUrl: /^#.+/, + srcsetUrl: /(\S+)(\s+[\d.]+[xw])?(\s*(?:,|$))/g, + b64DataUrl: /^data:\s*([^\s;,]+)\s*;\s*base64\s*,/i, + // See: https://schema.org/Article + jsonLdArticleTypes: /^Article|AdvertiserContentArticle|NewsArticle|AnalysisNewsArticle|AskPublicNewsArticle|BackgroundNewsArticle|OpinionNewsArticle|ReportageNewsArticle|ReviewNewsArticle|Report|SatiricalArticle|ScholarlyArticle|MedicalScholarlyArticle|SocialMediaPosting|BlogPosting|LiveBlogPosting|DiscussionForumPosting|TechArticle|APIReference$/ + }, + + UNLIKELY_ROLES: [ "menu", "menubar", "complementary", "navigation", "alert", "alertdialog", "dialog" ], + + DIV_TO_P_ELEMS: new Set([ "BLOCKQUOTE", "DL", "DIV", "IMG", "OL", "P", "PRE", "TABLE", "UL" ]), + + ALTER_TO_DIV_EXCEPTIONS: ["DIV", "ARTICLE", "SECTION", "P"], + + PRESENTATIONAL_ATTRIBUTES: [ "align", "background", "bgcolor", "border", "cellpadding", "cellspacing", "frame", "hspace", "rules", "style", "valign", "vspace" ], + + DEPRECATED_SIZE_ATTRIBUTE_ELEMS: [ "TABLE", "TH", "TD", "HR", "PRE" ], + + // The commented out elements qualify as phrasing content but tend to be + // removed by readability when put into paragraphs, so we ignore them here. + PHRASING_ELEMS: [ + // "CANVAS", "IFRAME", "SVG", "VIDEO", + "ABBR", "AUDIO", "B", "BDO", "BR", "BUTTON", "CITE", "CODE", "DATA", + "DATALIST", "DFN", "EM", "EMBED", "I", "IMG", "INPUT", "KBD", "LABEL", + "MARK", "MATH", "METER", "NOSCRIPT", "OBJECT", "OUTPUT", "PROGRESS", "Q", + "RUBY", "SAMP", "SCRIPT", "SELECT", "SMALL", "SPAN", "STRONG", "SUB", + "SUP", "TEXTAREA", "TIME", "VAR", "WBR" + ], + + // These are the classes that readability sets itself. + CLASSES_TO_PRESERVE: [ "page" ], + + // These are the list of HTML entities that need to be escaped. + HTML_ESCAPE_MAP: { + "lt": "<", + "gt": ">", + "amp": "&", + "quot": '"', + "apos": "'", + }, + + /** + * Run any post-process modifications to article content as necessary. + * + * @param Element + * @return void + **/ + _postProcessContent: function(articleContent) { + // Readability cannot open relative uris so we convert them to absolute uris. + this._fixRelativeUris(articleContent); + + this._simplifyNestedElements(articleContent); + + if (!this._keepClasses) { + // Remove classes. + this._cleanClasses(articleContent); + } + }, + + /** + * Iterates over a NodeList, calls `filterFn` for each node and removes node + * if function returned `true`. + * + * If function is not passed, removes all the nodes in node list. + * + * @param NodeList nodeList The nodes to operate on + * @param Function filterFn the function to use as a filter + * @return void + */ + _removeNodes: function(nodeList, filterFn) { + // Avoid ever operating on live node lists. + if (this._docJSDOMParser && nodeList._isLiveNodeList) { + throw new Error("Do not pass live node lists to _removeNodes"); + } + for (var i = nodeList.length - 1; i >= 0; i--) { + var node = nodeList[i]; + var parentNode = node.parentNode; + if (parentNode) { + if (!filterFn || filterFn.call(this, node, i, nodeList)) { + parentNode.removeChild(node); + } + } + } + }, + + /** + * Iterates over a NodeList, and calls _setNodeTag for each node. + * + * @param NodeList nodeList The nodes to operate on + * @param String newTagName the new tag name to use + * @return void + */ + _replaceNodeTags: function(nodeList, newTagName) { + // Avoid ever operating on live node lists. + if (this._docJSDOMParser && nodeList._isLiveNodeList) { + throw new Error("Do not pass live node lists to _replaceNodeTags"); + } + for (const node of nodeList) { + this._setNodeTag(node, newTagName); + } + }, + + /** + * Iterate over a NodeList, which doesn't natively fully implement the Array + * interface. + * + * For convenience, the current object context is applied to the provided + * iterate function. + * + * @param NodeList nodeList The NodeList. + * @param Function fn The iterate function. + * @return void + */ + _forEachNode: function(nodeList, fn) { + Array.prototype.forEach.call(nodeList, fn, this); + }, + + /** + * Iterate over a NodeList, and return the first node that passes + * the supplied test function + * + * For convenience, the current object context is applied to the provided + * test function. + * + * @param NodeList nodeList The NodeList. + * @param Function fn The test function. + * @return void + */ + _findNode: function(nodeList, fn) { + return Array.prototype.find.call(nodeList, fn, this); + }, + + /** + * Iterate over a NodeList, return true if any of the provided iterate + * function calls returns true, false otherwise. + * + * For convenience, the current object context is applied to the + * provided iterate function. + * + * @param NodeList nodeList The NodeList. + * @param Function fn The iterate function. + * @return Boolean + */ + _someNode: function(nodeList, fn) { + return Array.prototype.some.call(nodeList, fn, this); + }, + + /** + * Iterate over a NodeList, return true if all of the provided iterate + * function calls return true, false otherwise. + * + * For convenience, the current object context is applied to the + * provided iterate function. + * + * @param NodeList nodeList The NodeList. + * @param Function fn The iterate function. + * @return Boolean + */ + _everyNode: function(nodeList, fn) { + return Array.prototype.every.call(nodeList, fn, this); + }, + + /** + * Concat all nodelists passed as arguments. + * + * @return ...NodeList + * @return Array + */ + _concatNodeLists: function() { + var slice = Array.prototype.slice; + var args = slice.call(arguments); + var nodeLists = args.map(function(list) { + return slice.call(list); + }); + return Array.prototype.concat.apply([], nodeLists); + }, + + _getAllNodesWithTag: function(node, tagNames) { + if (node.querySelectorAll) { + return node.querySelectorAll(tagNames.join(",")); + } + return [].concat.apply([], tagNames.map(function(tag) { + var collection = node.getElementsByTagName(tag); + return Array.isArray(collection) ? collection : Array.from(collection); + })); + }, + + /** + * Removes the class="" attribute from every element in the given + * subtree, except those that match CLASSES_TO_PRESERVE and + * the classesToPreserve array from the options object. + * + * @param Element + * @return void + */ + _cleanClasses: function(node) { + var classesToPreserve = this._classesToPreserve; + var className = (node.getAttribute("class") || "") + .split(/\s+/) + .filter(function(cls) { + return classesToPreserve.indexOf(cls) != -1; + }) + .join(" "); + + if (className) { + node.setAttribute("class", className); + } else { + node.removeAttribute("class"); + } + + for (node = node.firstElementChild; node; node = node.nextElementSibling) { + this._cleanClasses(node); + } + }, + + /** + * Converts each <a> and <img> uri in the given element to an absolute URI, + * ignoring #ref URIs. + * + * @param Element + * @return void + */ + _fixRelativeUris: function(articleContent) { + var baseURI = this._doc.baseURI; + var documentURI = this._doc.documentURI; + function toAbsoluteURI(uri) { + // Leave hash links alone if the base URI matches the document URI: + if (baseURI == documentURI && uri.charAt(0) == "#") { + return uri; + } + + // Otherwise, resolve against base URI: + try { + return new URL(uri, baseURI).href; + } catch (ex) { + // Something went wrong, just return the original: + } + return uri; + } + + var links = this._getAllNodesWithTag(articleContent, ["a"]); + this._forEachNode(links, function(link) { + var href = link.getAttribute("href"); + if (href) { + // Remove links with javascript: URIs, since + // they won't work after scripts have been removed from the page. + if (href.indexOf("javascript:") === 0) { + // if the link only contains simple text content, it can be converted to a text node + if (link.childNodes.length === 1 && link.childNodes[0].nodeType === this.TEXT_NODE) { + var text = this._doc.createTextNode(link.textContent); + link.parentNode.replaceChild(text, link); + } else { + // if the link has multiple children, they should all be preserved + var container = this._doc.createElement("span"); + while (link.firstChild) { + container.appendChild(link.firstChild); + } + link.parentNode.replaceChild(container, link); + } + } else { + link.setAttribute("href", toAbsoluteURI(href)); + } + } + }); + + var medias = this._getAllNodesWithTag(articleContent, [ + "img", "picture", "figure", "video", "audio", "source" + ]); + + this._forEachNode(medias, function(media) { + var src = media.getAttribute("src"); + var poster = media.getAttribute("poster"); + var srcset = media.getAttribute("srcset"); + + if (src) { + media.setAttribute("src", toAbsoluteURI(src)); + } + + if (poster) { + media.setAttribute("poster", toAbsoluteURI(poster)); + } + + if (srcset) { + var newSrcset = srcset.replace(this.REGEXPS.srcsetUrl, function(_, p1, p2, p3) { + return toAbsoluteURI(p1) + (p2 || "") + p3; + }); + + media.setAttribute("srcset", newSrcset); + } + }); + }, + + _simplifyNestedElements: function(articleContent) { + var node = articleContent; + + while (node) { + if (node.parentNode && ["DIV", "SECTION"].includes(node.tagName) && !(node.id && node.id.startsWith("readability"))) { + if (this._isElementWithoutContent(node)) { + node = this._removeAndGetNext(node); + continue; + } else if (this._hasSingleTagInsideElement(node, "DIV") || this._hasSingleTagInsideElement(node, "SECTION")) { + var child = node.children[0]; + for (var i = 0; i < node.attributes.length; i++) { + child.setAttribute(node.attributes[i].name, node.attributes[i].value); + } + node.parentNode.replaceChild(child, node); + node = child; + continue; + } + } + + node = this._getNextNode(node); + } + }, + + /** + * Get the article title as an H1. + * + * @return string + **/ + _getArticleTitle: function() { + var doc = this._doc; + var curTitle = ""; + var origTitle = ""; + + try { + curTitle = origTitle = doc.title.trim(); + + // If they had an element with id "title" in their HTML + if (typeof curTitle !== "string") + curTitle = origTitle = this._getInnerText(doc.getElementsByTagName("title")[0]); + } catch (e) {/* ignore exceptions setting the title. */} + + var titleHadHierarchicalSeparators = false; + function wordCount(str) { + return str.split(/\s+/).length; + } + + // If there's a separator in the title, first remove the final part + if ((/ [\|\-\\\/>»] /).test(curTitle)) { + titleHadHierarchicalSeparators = / [\\\/>»] /.test(curTitle); + curTitle = origTitle.replace(/(.*)[\|\-\\\/>»] .*/gi, "$1"); + + // If the resulting title is too short (3 words or fewer), remove + // the first part instead: + if (wordCount(curTitle) < 3) + curTitle = origTitle.replace(/[^\|\-\\\/>»]*[\|\-\\\/>»](.*)/gi, "$1"); + } else if (curTitle.indexOf(": ") !== -1) { + // Check if we have an heading containing this exact string, so we + // could assume it's the full title. + var headings = this._concatNodeLists( + doc.getElementsByTagName("h1"), + doc.getElementsByTagName("h2") + ); + var trimmedTitle = curTitle.trim(); + var match = this._someNode(headings, function(heading) { + return heading.textContent.trim() === trimmedTitle; + }); + + // If we don't, let's extract the title out of the original title string. + if (!match) { + curTitle = origTitle.substring(origTitle.lastIndexOf(":") + 1); + + // If the title is now too short, try the first colon instead: + if (wordCount(curTitle) < 3) { + curTitle = origTitle.substring(origTitle.indexOf(":") + 1); + // But if we have too many words before the colon there's something weird + // with the titles and the H tags so let's just use the original title instead + } else if (wordCount(origTitle.substr(0, origTitle.indexOf(":"))) > 5) { + curTitle = origTitle; + } + } + } else if (curTitle.length > 150 || curTitle.length < 15) { + var hOnes = doc.getElementsByTagName("h1"); + + if (hOnes.length === 1) + curTitle = this._getInnerText(hOnes[0]); + } + + curTitle = curTitle.trim().replace(this.REGEXPS.normalize, " "); + // If we now have 4 words or fewer as our title, and either no + // 'hierarchical' separators (\, /, > or ») were found in the original + // title or we decreased the number of words by more than 1 word, use + // the original title. + var curTitleWordCount = wordCount(curTitle); + if (curTitleWordCount <= 4 && + (!titleHadHierarchicalSeparators || + curTitleWordCount != wordCount(origTitle.replace(/[\|\-\\\/>»]+/g, "")) - 1)) { + curTitle = origTitle; + } + + return curTitle; + }, + + /** + * Prepare the HTML document for readability to scrape it. + * This includes things like stripping javascript, CSS, and handling terrible markup. + * + * @return void + **/ + _prepDocument: function() { + var doc = this._doc; + + // Remove all style tags in head + this._removeNodes(this._getAllNodesWithTag(doc, ["style"])); + + if (doc.body) { + this._replaceBrs(doc.body); + } + + this._replaceNodeTags(this._getAllNodesWithTag(doc, ["font"]), "SPAN"); + }, + + /** + * Finds the next node, starting from the given node, and ignoring + * whitespace in between. If the given node is an element, the same node is + * returned. + */ + _nextNode: function (node) { + var next = node; + while (next + && (next.nodeType != this.ELEMENT_NODE) + && this.REGEXPS.whitespace.test(next.textContent)) { + next = next.nextSibling; + } + return next; + }, + + /** + * Replaces 2 or more successive <br> elements with a single <p>. + * Whitespace between <br> elements are ignored. For example: + * <div>foo<br>bar<br> <br><br>abc</div> + * will become: + * <div>foo<br>bar<p>abc</p></div> + */ + _replaceBrs: function (elem) { + this._forEachNode(this._getAllNodesWithTag(elem, ["br"]), function(br) { + var next = br.nextSibling; + + // Whether 2 or more <br> elements have been found and replaced with a + // <p> block. + var replaced = false; + + // If we find a <br> chain, remove the <br>s until we hit another node + // or non-whitespace. This leaves behind the first <br> in the chain + // (which will be replaced with a <p> later). + while ((next = this._nextNode(next)) && (next.tagName == "BR")) { + replaced = true; + var brSibling = next.nextSibling; + next.parentNode.removeChild(next); + next = brSibling; + } + + // If we removed a <br> chain, replace the remaining <br> with a <p>. Add + // all sibling nodes as children of the <p> until we hit another <br> + // chain. + if (replaced) { + var p = this._doc.createElement("p"); + br.parentNode.replaceChild(p, br); + + next = p.nextSibling; + while (next) { + // If we've hit another <br><br>, we're done adding children to this <p>. + if (next.tagName == "BR") { + var nextElem = this._nextNode(next.nextSibling); + if (nextElem && nextElem.tagName == "BR") + break; + } + + if (!this._isPhrasingContent(next)) + break; + + // Otherwise, make this node a child of the new <p>. + var sibling = next.nextSibling; + p.appendChild(next); + next = sibling; + } + + while (p.lastChild && this._isWhitespace(p.lastChild)) { + p.removeChild(p.lastChild); + } + + if (p.parentNode.tagName === "P") + this._setNodeTag(p.parentNode, "DIV"); + } + }); + }, + + _setNodeTag: function (node, tag) { + this.log("_setNodeTag", node, tag); + if (this._docJSDOMParser) { + node.localName = tag.toLowerCase(); + node.tagName = tag.toUpperCase(); + return node; + } + + var replacement = node.ownerDocument.createElement(tag); + while (node.firstChild) { + replacement.appendChild(node.firstChild); + } + node.parentNode.replaceChild(replacement, node); + if (node.readability) + replacement.readability = node.readability; + + for (var i = 0; i < node.attributes.length; i++) { + try { + replacement.setAttribute(node.attributes[i].name, node.attributes[i].value); + } catch (ex) { + /* it's possible for setAttribute() to throw if the attribute name + * isn't a valid XML Name. Such attributes can however be parsed from + * source in HTML docs, see https://github.com/whatwg/html/issues/4275, + * so we can hit them here and then throw. We don't care about such + * attributes so we ignore them. + */ + } + } + return replacement; + }, + + /** + * Prepare the article node for display. Clean out any inline styles, + * iframes, forms, strip extraneous <p> tags, etc. + * + * @param Element + * @return void + **/ + _prepArticle: function(articleContent) { + this._cleanStyles(articleContent); + + // Check for data tables before we continue, to avoid removing items in + // those tables, which will often be isolated even though they're + // visually linked to other content-ful elements (text, images, etc.). + this._markDataTables(articleContent); + + this._fixLazyImages(articleContent); + + // Clean out junk from the article content + this._cleanConditionally(articleContent, "form"); + this._cleanConditionally(articleContent, "fieldset"); + this._clean(articleContent, "object"); + this._clean(articleContent, "embed"); + this._clean(articleContent, "footer"); + this._clean(articleContent, "link"); + this._clean(articleContent, "aside"); + + // Clean out elements with little content that have "share" in their id/class combinations from final top candidates, + // which means we don't remove the top candidates even they have "share". + + var shareElementThreshold = this.DEFAULT_CHAR_THRESHOLD; + + this._forEachNode(articleContent.children, function (topCandidate) { + this._cleanMatchedNodes(topCandidate, function (node, matchString) { + return this.REGEXPS.shareElements.test(matchString) && node.textContent.length < shareElementThreshold; + }); + }); + + this._clean(articleContent, "iframe"); + this._clean(articleContent, "input"); + this._clean(articleContent, "textarea"); + this._clean(articleContent, "select"); + this._clean(articleContent, "button"); + this._cleanHeaders(articleContent); + + // Do these last as the previous stuff may have removed junk + // that will affect these + this._cleanConditionally(articleContent, "table"); + this._cleanConditionally(articleContent, "ul"); + this._cleanConditionally(articleContent, "div"); + + // replace H1 with H2 as H1 should be only title that is displayed separately + this._replaceNodeTags(this._getAllNodesWithTag(articleContent, ["h1"]), "h2"); + + // Remove extra paragraphs + this._removeNodes(this._getAllNodesWithTag(articleContent, ["p"]), function (paragraph) { + var imgCount = paragraph.getElementsByTagName("img").length; + var embedCount = paragraph.getElementsByTagName("embed").length; + var objectCount = paragraph.getElementsByTagName("object").length; + // At this point, nasty iframes have been removed, only remain embedded video ones. + var iframeCount = paragraph.getElementsByTagName("iframe").length; + var totalCount = imgCount + embedCount + objectCount + iframeCount; + + return totalCount === 0 && !this._getInnerText(paragraph, false); + }); + + this._forEachNode(this._getAllNodesWithTag(articleContent, ["br"]), function(br) { + var next = this._nextNode(br.nextSibling); + if (next && next.tagName == "P") + br.parentNode.removeChild(br); + }); + + // Remove single-cell tables + this._forEachNode(this._getAllNodesWithTag(articleContent, ["table"]), function(table) { + var tbody = this._hasSingleTagInsideElement(table, "TBODY") ? table.firstElementChild : table; + if (this._hasSingleTagInsideElement(tbody, "TR")) { + var row = tbody.firstElementChild; + if (this._hasSingleTagInsideElement(row, "TD")) { + var cell = row.firstElementChild; + cell = this._setNodeTag(cell, this._everyNode(cell.childNodes, this._isPhrasingContent) ? "P" : "DIV"); + table.parentNode.replaceChild(cell, table); + } + } + }); + }, + + /** + * Initialize a node with the readability object. Also checks the + * className/id for special names to add to its score. + * + * @param Element + * @return void + **/ + _initializeNode: function(node) { + node.readability = {"contentScore": 0}; + + switch (node.tagName) { + case "DIV": + node.readability.contentScore += 5; + break; + + case "PRE": + case "TD": + case "BLOCKQUOTE": + node.readability.contentScore += 3; + break; + + case "ADDRESS": + case "OL": + case "UL": + case "DL": + case "DD": + case "DT": + case "LI": + case "FORM": + node.readability.contentScore -= 3; + break; + + case "H1": + case "H2": + case "H3": + case "H4": + case "H5": + case "H6": + case "TH": + node.readability.contentScore -= 5; + break; + } + + node.readability.contentScore += this._getClassWeight(node); + }, + + _removeAndGetNext: function(node) { + var nextNode = this._getNextNode(node, true); + node.parentNode.removeChild(node); + return nextNode; + }, + + /** + * Traverse the DOM from node to node, starting at the node passed in. + * Pass true for the second parameter to indicate this node itself + * (and its kids) are going away, and we want the next node over. + * + * Calling this in a loop will traverse the DOM depth-first. + */ + _getNextNode: function(node, ignoreSelfAndKids) { + // First check for kids if those aren't being ignored + if (!ignoreSelfAndKids && node.firstElementChild) { + return node.firstElementChild; + } + // Then for siblings... + if (node.nextElementSibling) { + return node.nextElementSibling; + } + // And finally, move up the parent chain *and* find a sibling + // (because this is depth-first traversal, we will have already + // seen the parent nodes themselves). + do { + node = node.parentNode; + } while (node && !node.nextElementSibling); + return node && node.nextElementSibling; + }, + + // compares second text to first one + // 1 = same text, 0 = completely different text + // works the way that it splits both texts into words and then finds words that are unique in second text + // the result is given by the lower length of unique parts + _textSimilarity: function(textA, textB) { + var tokensA = textA.toLowerCase().split(this.REGEXPS.tokenize).filter(Boolean); + var tokensB = textB.toLowerCase().split(this.REGEXPS.tokenize).filter(Boolean); + if (!tokensA.length || !tokensB.length) { + return 0; + } + var uniqTokensB = tokensB.filter(token => !tokensA.includes(token)); + var distanceB = uniqTokensB.join(" ").length / tokensB.join(" ").length; + return 1 - distanceB; + }, + + _checkByline: function(node, matchString) { + if (this._articleByline) { + return false; + } + + if (node.getAttribute !== undefined) { + var rel = node.getAttribute("rel"); + var itemprop = node.getAttribute("itemprop"); + } + + if ((rel === "author" || (itemprop && itemprop.indexOf("author") !== -1) || this.REGEXPS.byline.test(matchString)) && this._isValidByline(node.textContent)) { + this._articleByline = node.textContent.trim(); + return true; + } + + return false; + }, + + _getNodeAncestors: function(node, maxDepth) { + maxDepth = maxDepth || 0; + var i = 0, ancestors = []; + while (node.parentNode) { + ancestors.push(node.parentNode); + if (maxDepth && ++i === maxDepth) + break; + node = node.parentNode; + } + return ancestors; + }, + + /*** + * grabArticle - Using a variety of metrics (content score, classname, element types), find the content that is + * most likely to be the stuff a user wants to read. Then return it wrapped up in a div. + * + * @param page a document to run upon. Needs to be a full document, complete with body. + * @return Element + **/ + _grabArticle: function (page) { + this.log("**** grabArticle ****"); + var doc = this._doc; + var isPaging = page !== null; + page = page ? page : this._doc.body; + + // We can't grab an article if we don't have a page! + if (!page) { + this.log("No body found in document. Abort."); + return null; + } + + var pageCacheHtml = page.innerHTML; + + while (true) { + this.log("Starting grabArticle loop"); + var stripUnlikelyCandidates = this._flagIsActive(this.FLAG_STRIP_UNLIKELYS); + + // First, node prepping. Trash nodes that look cruddy (like ones with the + // class name "comment", etc), and turn divs into P tags where they have been + // used inappropriately (as in, where they contain no other block level elements.) + var elementsToScore = []; + var node = this._doc.documentElement; + + let shouldRemoveTitleHeader = true; + + while (node) { + + if (node.tagName === "HTML") { + this._articleLang = node.getAttribute("lang"); + } + + var matchString = node.className + " " + node.id; + + if (!this._isProbablyVisible(node)) { + this.log("Removing hidden node - " + matchString); + node = this._removeAndGetNext(node); + continue; + } + + // Check to see if this node is a byline, and remove it if it is. + if (this._checkByline(node, matchString)) { + node = this._removeAndGetNext(node); + continue; + } + + if (shouldRemoveTitleHeader && this._headerDuplicatesTitle(node)) { + this.log("Removing header: ", node.textContent.trim(), this._articleTitle.trim()); + shouldRemoveTitleHeader = false; + node = this._removeAndGetNext(node); + continue; + } + + // Remove unlikely candidates + if (stripUnlikelyCandidates) { + if (this.REGEXPS.unlikelyCandidates.test(matchString) && + !this.REGEXPS.okMaybeItsACandidate.test(matchString) && + !this._hasAncestorTag(node, "table") && + !this._hasAncestorTag(node, "code") && + node.tagName !== "BODY" && + node.tagName !== "A") { + this.log("Removing unlikely candidate - " + matchString); + node = this._removeAndGetNext(node); + continue; + } + + if (this.UNLIKELY_ROLES.includes(node.getAttribute("role"))) { + this.log("Removing content with role " + node.getAttribute("role") + " - " + matchString); + node = this._removeAndGetNext(node); + continue; + } + } + + // Remove DIV, SECTION, and HEADER nodes without any content(e.g. text, image, video, or iframe). + if ((node.tagName === "DIV" || node.tagName === "SECTION" || node.tagName === "HEADER" || + node.tagName === "H1" || node.tagName === "H2" || node.tagName === "H3" || + node.tagName === "H4" || node.tagName === "H5" || node.tagName === "H6") && + this._isElementWithoutContent(node)) { + node = this._removeAndGetNext(node); + continue; + } + + if (this.DEFAULT_TAGS_TO_SCORE.indexOf(node.tagName) !== -1) { + elementsToScore.push(node); + } + + // Turn all divs that don't have children block level elements into p's + if (node.tagName === "DIV") { + // Put phrasing content into paragraphs. + var p = null; + var childNode = node.firstChild; + while (childNode) { + var nextSibling = childNode.nextSibling; + if (this._isPhrasingContent(childNode)) { + if (p !== null) { + p.appendChild(childNode); + } else if (!this._isWhitespace(childNode)) { + p = doc.createElement("p"); + node.replaceChild(p, childNode); + p.appendChild(childNode); + } + } else if (p !== null) { + while (p.lastChild && this._isWhitespace(p.lastChild)) { + p.removeChild(p.lastChild); + } + p = null; + } + childNode = nextSibling; + } + + // Sites like http://mobile.slate.com encloses each paragraph with a DIV + // element. DIVs with only a P element inside and no text content can be + // safely converted into plain P elements to avoid confusing the scoring + // algorithm with DIVs with are, in practice, paragraphs. + if (this._hasSingleTagInsideElement(node, "P") && this._getLinkDensity(node) < 0.25) { + var newNode = node.children[0]; + node.parentNode.replaceChild(newNode, node); + node = newNode; + elementsToScore.push(node); + } else if (!this._hasChildBlockElement(node)) { + node = this._setNodeTag(node, "P"); + elementsToScore.push(node); + } + } + node = this._getNextNode(node); + } + + /** + * Loop through all paragraphs, and assign a score to them based on how content-y they look. + * Then add their score to their parent node. + * + * A score is determined by things like number of commas, class names, etc. Maybe eventually link density. + **/ + var candidates = []; + this._forEachNode(elementsToScore, function(elementToScore) { + if (!elementToScore.parentNode || typeof(elementToScore.parentNode.tagName) === "undefined") + return; + + // If this paragraph is less than 25 characters, don't even count it. + var innerText = this._getInnerText(elementToScore); + if (innerText.length < 25) + return; + + // Exclude nodes with no ancestor. + var ancestors = this._getNodeAncestors(elementToScore, 5); + if (ancestors.length === 0) + return; + + var contentScore = 0; + + // Add a point for the paragraph itself as a base. + contentScore += 1; + + // Add points for any commas within this paragraph. + contentScore += innerText.split(",").length; + + // For every 100 characters in this paragraph, add another point. Up to 3 points. + contentScore += Math.min(Math.floor(innerText.length / 100), 3); + + // Initialize and score ancestors. + this._forEachNode(ancestors, function(ancestor, level) { + if (!ancestor.tagName || !ancestor.parentNode || typeof(ancestor.parentNode.tagName) === "undefined") + return; + + if (typeof(ancestor.readability) === "undefined") { + this._initializeNode(ancestor); + candidates.push(ancestor); + } + + // Node score divider: + // - parent: 1 (no division) + // - grandparent: 2 + // - great grandparent+: ancestor level * 3 + if (level === 0) + var scoreDivider = 1; + else if (level === 1) + scoreDivider = 2; + else + scoreDivider = level * 3; + ancestor.readability.contentScore += contentScore / scoreDivider; + }); + }); + + // After we've calculated scores, loop through all of the possible + // candidate nodes we found and find the one with the highest score. + var topCandidates = []; + for (var c = 0, cl = candidates.length; c < cl; c += 1) { + var candidate = candidates[c]; + + // Scale the final candidates score based on link density. Good content + // should have a relatively small link density (5% or less) and be mostly + // unaffected by this operation. + var candidateScore = candidate.readability.contentScore * (1 - this._getLinkDensity(candidate)); + candidate.readability.contentScore = candidateScore; + + this.log("Candidate:", candidate, "with score " + candidateScore); + + for (var t = 0; t < this._nbTopCandidates; t++) { + var aTopCandidate = topCandidates[t]; + + if (!aTopCandidate || candidateScore > aTopCandidate.readability.contentScore) { + topCandidates.splice(t, 0, candidate); + if (topCandidates.length > this._nbTopCandidates) + topCandidates.pop(); + break; + } + } + } + + var topCandidate = topCandidates[0] || null; + var neededToCreateTopCandidate = false; + var parentOfTopCandidate; + + // If we still have no top candidate, just use the body as a last resort. + // We also have to copy the body node so it is something we can modify. + if (topCandidate === null || topCandidate.tagName === "BODY") { + // Move all of the page's children into topCandidate + topCandidate = doc.createElement("DIV"); + neededToCreateTopCandidate = true; + // Move everything (not just elements, also text nodes etc.) into the container + // so we even include text directly in the body: + while (page.firstChild) { + this.log("Moving child out:", page.firstChild); + topCandidate.appendChild(page.firstChild); + } + + page.appendChild(topCandidate); + + this._initializeNode(topCandidate); + } else if (topCandidate) { + // Find a better top candidate node if it contains (at least three) nodes which belong to `topCandidates` array + // and whose scores are quite closed with current `topCandidate` node. + var alternativeCandidateAncestors = []; + for (var i = 1; i < topCandidates.length; i++) { + if (topCandidates[i].readability.contentScore / topCandidate.readability.contentScore >= 0.75) { + alternativeCandidateAncestors.push(this._getNodeAncestors(topCandidates[i])); + } + } + var MINIMUM_TOPCANDIDATES = 3; + if (alternativeCandidateAncestors.length >= MINIMUM_TOPCANDIDATES) { + parentOfTopCandidate = topCandidate.parentNode; + while (parentOfTopCandidate.tagName !== "BODY") { + var listsContainingThisAncestor = 0; + for (var ancestorIndex = 0; ancestorIndex < alternativeCandidateAncestors.length && listsContainingThisAncestor < MINIMUM_TOPCANDIDATES; ancestorIndex++) { + listsContainingThisAncestor += Number(alternativeCandidateAncestors[ancestorIndex].includes(parentOfTopCandidate)); + } + if (listsContainingThisAncestor >= MINIMUM_TOPCANDIDATES) { + topCandidate = parentOfTopCandidate; + break; + } + parentOfTopCandidate = parentOfTopCandidate.parentNode; + } + } + if (!topCandidate.readability) { + this._initializeNode(topCandidate); + } + + // Because of our bonus system, parents of candidates might have scores + // themselves. They get half of the node. There won't be nodes with higher + // scores than our topCandidate, but if we see the score going *up* in the first + // few steps up the tree, that's a decent sign that there might be more content + // lurking in other places that we want to unify in. The sibling stuff + // below does some of that - but only if we've looked high enough up the DOM + // tree. + parentOfTopCandidate = topCandidate.parentNode; + var lastScore = topCandidate.readability.contentScore; + // The scores shouldn't get too low. + var scoreThreshold = lastScore / 3; + while (parentOfTopCandidate.tagName !== "BODY") { + if (!parentOfTopCandidate.readability) { + parentOfTopCandidate = parentOfTopCandidate.parentNode; + continue; + } + var parentScore = parentOfTopCandidate.readability.contentScore; + if (parentScore < scoreThreshold) + break; + if (parentScore > lastScore) { + // Alright! We found a better parent to use. + topCandidate = parentOfTopCandidate; + break; + } + lastScore = parentOfTopCandidate.readability.contentScore; + parentOfTopCandidate = parentOfTopCandidate.parentNode; + } + + // If the top candidate is the only child, use parent instead. This will help sibling + // joining logic when adjacent content is actually located in parent's sibling node. + parentOfTopCandidate = topCandidate.parentNode; + while (parentOfTopCandidate.tagName != "BODY" && parentOfTopCandidate.children.length == 1) { + topCandidate = parentOfTopCandidate; + parentOfTopCandidate = topCandidate.parentNode; + } + if (!topCandidate.readability) { + this._initializeNode(topCandidate); + } + } + + // Now that we have the top candidate, look through its siblings for content + // that might also be related. Things like preambles, content split by ads + // that we removed, etc. + var articleContent = doc.createElement("DIV"); + if (isPaging) + articleContent.id = "readability-content"; + + var siblingScoreThreshold = Math.max(10, topCandidate.readability.contentScore * 0.2); + // Keep potential top candidate's parent node to try to get text direction of it later. + parentOfTopCandidate = topCandidate.parentNode; + var siblings = parentOfTopCandidate.children; + + for (var s = 0, sl = siblings.length; s < sl; s++) { + var sibling = siblings[s]; + var append = false; + + this.log("Looking at sibling node:", sibling, sibling.readability ? ("with score " + sibling.readability.contentScore) : ""); + this.log("Sibling has score", sibling.readability ? sibling.readability.contentScore : "Unknown"); + + if (sibling === topCandidate) { + append = true; + } else { + var contentBonus = 0; + + // Give a bonus if sibling nodes and top candidates have the example same classname + if (sibling.className === topCandidate.className && topCandidate.className !== "") + contentBonus += topCandidate.readability.contentScore * 0.2; + + if (sibling.readability && + ((sibling.readability.contentScore + contentBonus) >= siblingScoreThreshold)) { + append = true; + } else if (sibling.nodeName === "P") { + var linkDensity = this._getLinkDensity(sibling); + var nodeContent = this._getInnerText(sibling); + var nodeLength = nodeContent.length; + + if (nodeLength > 80 && linkDensity < 0.25) { + append = true; + } else if (nodeLength < 80 && nodeLength > 0 && linkDensity === 0 && + nodeContent.search(/\.( |$)/) !== -1) { + append = true; + } + } + } + + if (append) { + this.log("Appending node:", sibling); + + if (this.ALTER_TO_DIV_EXCEPTIONS.indexOf(sibling.nodeName) === -1) { + // We have a node that isn't a common block level element, like a form or td tag. + // Turn it into a div so it doesn't get filtered out later by accident. + this.log("Altering sibling:", sibling, "to div."); + + sibling = this._setNodeTag(sibling, "DIV"); + } + + articleContent.appendChild(sibling); + // Fetch children again to make it compatible + // with DOM parsers without live collection support. + siblings = parentOfTopCandidate.children; + // siblings is a reference to the children array, and + // sibling is removed from the array when we call appendChild(). + // As a result, we must revisit this index since the nodes + // have been shifted. + s -= 1; + sl -= 1; + } + } + + if (this._debug) + this.log("Article content pre-prep: " + articleContent.innerHTML); + // So we have all of the content that we need. Now we clean it up for presentation. + this._prepArticle(articleContent); + if (this._debug) + this.log("Article content post-prep: " + articleContent.innerHTML); + + if (neededToCreateTopCandidate) { + // We already created a fake div thing, and there wouldn't have been any siblings left + // for the previous loop, so there's no point trying to create a new div, and then + // move all the children over. Just assign IDs and class names here. No need to append + // because that already happened anyway. + topCandidate.id = "readability-page-1"; + topCandidate.className = "page"; + } else { + var div = doc.createElement("DIV"); + div.id = "readability-page-1"; + div.className = "page"; + while (articleContent.firstChild) { + div.appendChild(articleContent.firstChild); + } + articleContent.appendChild(div); + } + + if (this._debug) + this.log("Article content after paging: " + articleContent.innerHTML); + + var parseSuccessful = true; + + // Now that we've gone through the full algorithm, check to see if + // we got any meaningful content. If we didn't, we may need to re-run + // grabArticle with different flags set. This gives us a higher likelihood of + // finding the content, and the sieve approach gives us a higher likelihood of + // finding the -right- content. + var textLength = this._getInnerText(articleContent, true).length; + if (textLength < this._charThreshold) { + parseSuccessful = false; + page.innerHTML = pageCacheHtml; + + if (this._flagIsActive(this.FLAG_STRIP_UNLIKELYS)) { + this._removeFlag(this.FLAG_STRIP_UNLIKELYS); + this._attempts.push({articleContent: articleContent, textLength: textLength}); + } else if (this._flagIsActive(this.FLAG_WEIGHT_CLASSES)) { + this._removeFlag(this.FLAG_WEIGHT_CLASSES); + this._attempts.push({articleContent: articleContent, textLength: textLength}); + } else if (this._flagIsActive(this.FLAG_CLEAN_CONDITIONALLY)) { + this._removeFlag(this.FLAG_CLEAN_CONDITIONALLY); + this._attempts.push({articleContent: articleContent, textLength: textLength}); + } else { + this._attempts.push({articleContent: articleContent, textLength: textLength}); + // No luck after removing flags, just return the longest text we found during the different loops + this._attempts.sort(function (a, b) { + return b.textLength - a.textLength; + }); + + // But first check if we actually have something + if (!this._attempts[0].textLength) { + return null; + } + + articleContent = this._attempts[0].articleContent; + parseSuccessful = true; + } + } + + if (parseSuccessful) { + // Find out text direction from ancestors of final top candidate. + var ancestors = [parentOfTopCandidate, topCandidate].concat(this._getNodeAncestors(parentOfTopCandidate)); + this._someNode(ancestors, function(ancestor) { + if (!ancestor.tagName) + return false; + var articleDir = ancestor.getAttribute("dir"); + if (articleDir) { + this._articleDir = articleDir; + return true; + } + return false; + }); + return articleContent; + } + } + }, + + /** + * Check whether the input string could be a byline. + * This verifies that the input is a string, and that the length + * is less than 100 chars. + * + * @param possibleByline {string} - a string to check whether its a byline. + * @return Boolean - whether the input string is a byline. + */ + _isValidByline: function(byline) { + if (typeof byline == "string" || byline instanceof String) { + byline = byline.trim(); + return (byline.length > 0) && (byline.length < 100); + } + return false; + }, + + /** + * Converts some of the common HTML entities in string to their corresponding characters. + * + * @param str {string} - a string to unescape. + * @return string without HTML entity. + */ + _unescapeHtmlEntities: function(str) { + if (!str) { + return str; + } + + var htmlEscapeMap = this.HTML_ESCAPE_MAP; + return str.replace(/&(quot|amp|apos|lt|gt);/g, function(_, tag) { + return htmlEscapeMap[tag]; + }).replace(/&#(?:x([0-9a-z]{1,4})|([0-9]{1,4}));/gi, function(_, hex, numStr) { + var num = parseInt(hex || numStr, hex ? 16 : 10); + return String.fromCharCode(num); + }); + }, + + /** + * Try to extract metadata from JSON-LD object. + * For now, only Schema.org objects of type Article or its subtypes are supported. + * @return Object with any metadata that could be extracted (possibly none) + */ + _getJSONLD: function (doc) { + var scripts = this._getAllNodesWithTag(doc, ["script"]); + + var metadata; + + this._forEachNode(scripts, function(jsonLdElement) { + if (!metadata && jsonLdElement.getAttribute("type") === "application/ld+json") { + try { + // Strip CDATA markers if present + var content = jsonLdElement.textContent.replace(/^\s*<!\[CDATA\[|\]\]>\s*$/g, ""); + var parsed = JSON.parse(content); + if ( + !parsed["@context"] || + !parsed["@context"].match(/^https?\:\/\/schema\.org$/) + ) { + return; + } + + if (!parsed["@type"] && Array.isArray(parsed["@graph"])) { + parsed = parsed["@graph"].find(function(it) { + return (it["@type"] || "").match( + this.REGEXPS.jsonLdArticleTypes + ); + }); + } + + if ( + !parsed || + !parsed["@type"] || + !parsed["@type"].match(this.REGEXPS.jsonLdArticleTypes) + ) { + return; + } + + metadata = {}; + + if (typeof parsed.name === "string" && typeof parsed.headline === "string" && parsed.name !== parsed.headline) { + // we have both name and headline element in the JSON-LD. They should both be the same but some websites like aktualne.cz + // put their own name into "name" and the article title to "headline" which confuses Readability. So we try to check if either + // "name" or "headline" closely matches the html title, and if so, use that one. If not, then we use "name" by default. + + var title = this._getArticleTitle(); + var nameMatches = this._textSimilarity(parsed.name, title) > 0.75; + var headlineMatches = this._textSimilarity(parsed.headline, title) > 0.75; + + if (headlineMatches && !nameMatches) { + metadata.title = parsed.headline; + } else { + metadata.title = parsed.name; + } + } else if (typeof parsed.name === "string") { + metadata.title = parsed.name.trim(); + } else if (typeof parsed.headline === "string") { + metadata.title = parsed.headline.trim(); + } + if (parsed.author) { + if (typeof parsed.author.name === "string") { + metadata.byline = parsed.author.name.trim(); + } else if (Array.isArray(parsed.author) && parsed.author[0] && typeof parsed.author[0].name === "string") { + metadata.byline = parsed.author + .filter(function(author) { + return author && typeof author.name === "string"; + }) + .map(function(author) { + return author.name.trim(); + }) + .join(", "); + } + } + if (typeof parsed.description === "string") { + metadata.excerpt = parsed.description.trim(); + } + if ( + parsed.publisher && + typeof parsed.publisher.name === "string" + ) { + metadata.siteName = parsed.publisher.name.trim(); + } + return; + } catch (err) { + this.log(err.message); + } + } + }); + return metadata ? metadata : {}; + }, + + /** + * Attempts to get excerpt and byline metadata for the article. + * + * @param {Object} jsonld — object containing any metadata that + * could be extracted from JSON-LD object. + * + * @return Object with optional "excerpt" and "byline" properties + */ + _getArticleMetadata: function(jsonld) { + var metadata = {}; + var values = {}; + var metaElements = this._doc.getElementsByTagName("meta"); + + // property is a space-separated list of values + var propertyPattern = /\s*(dc|dcterm|og|twitter)\s*:\s*(author|creator|description|title|site_name)\s*/gi; + + // name is a single value + var namePattern = /^\s*(?:(dc|dcterm|og|twitter|weibo:(article|webpage))\s*[\.:]\s*)?(author|creator|description|title|site_name)\s*$/i; + + // Find description tags. + this._forEachNode(metaElements, function(element) { + var elementName = element.getAttribute("name"); + var elementProperty = element.getAttribute("property"); + var content = element.getAttribute("content"); + if (!content) { + return; + } + var matches = null; + var name = null; + + if (elementProperty) { + matches = elementProperty.match(propertyPattern); + if (matches) { + // Convert to lowercase, and remove any whitespace + // so we can match below. + name = matches[0].toLowerCase().replace(/\s/g, ""); + // multiple authors + values[name] = content.trim(); + } + } + if (!matches && elementName && namePattern.test(elementName)) { + name = elementName; + if (content) { + // Convert to lowercase, remove any whitespace, and convert dots + // to colons so we can match below. + name = name.toLowerCase().replace(/\s/g, "").replace(/\./g, ":"); + values[name] = content.trim(); + } + } + }); + + // get title + metadata.title = jsonld.title || + values["dc:title"] || + values["dcterm:title"] || + values["og:title"] || + values["weibo:article:title"] || + values["weibo:webpage:title"] || + values["title"] || + values["twitter:title"]; + + if (!metadata.title) { + metadata.title = this._getArticleTitle(); + } + + // get author + metadata.byline = jsonld.byline || + values["dc:creator"] || + values["dcterm:creator"] || + values["author"]; + + // get description + metadata.excerpt = jsonld.excerpt || + values["dc:description"] || + values["dcterm:description"] || + values["og:description"] || + values["weibo:article:description"] || + values["weibo:webpage:description"] || + values["description"] || + values["twitter:description"]; + + // get site name + metadata.siteName = jsonld.siteName || + values["og:site_name"]; + + // in many sites the meta value is escaped with HTML entities, + // so here we need to unescape it + metadata.title = this._unescapeHtmlEntities(metadata.title); + metadata.byline = this._unescapeHtmlEntities(metadata.byline); + metadata.excerpt = this._unescapeHtmlEntities(metadata.excerpt); + metadata.siteName = this._unescapeHtmlEntities(metadata.siteName); + + return metadata; + }, + + /** + * Check if node is image, or if node contains exactly only one image + * whether as a direct child or as its descendants. + * + * @param Element + **/ + _isSingleImage: function(node) { + if (node.tagName === "IMG") { + return true; + } + + if (node.children.length !== 1 || node.textContent.trim() !== "") { + return false; + } + + return this._isSingleImage(node.children[0]); + }, + + /** + * Find all <noscript> that are located after <img> nodes, and which contain only one + * <img> element. Replace the first image with the image from inside the <noscript> tag, + * and remove the <noscript> tag. This improves the quality of the images we use on + * some sites (e.g. Medium). + * + * @param Element + **/ + _unwrapNoscriptImages: function(doc) { + // Find img without source or attributes that might contains image, and remove it. + // This is done to prevent a placeholder img is replaced by img from noscript in next step. + var imgs = Array.from(doc.getElementsByTagName("img")); + this._forEachNode(imgs, function(img) { + for (var i = 0; i < img.attributes.length; i++) { + var attr = img.attributes[i]; + switch (attr.name) { + case "src": + case "srcset": + case "data-src": + case "data-srcset": + return; + } + + if (/\.(jpg|jpeg|png|webp)/i.test(attr.value)) { + return; + } + } + + img.parentNode.removeChild(img); + }); + + // Next find noscript and try to extract its image + var noscripts = Array.from(doc.getElementsByTagName("noscript")); + this._forEachNode(noscripts, function(noscript) { + // Parse content of noscript and make sure it only contains image + var tmp = doc.createElement("div"); + tmp.innerHTML = noscript.innerHTML; + if (!this._isSingleImage(tmp)) { + return; + } + + // If noscript has previous sibling and it only contains image, + // replace it with noscript content. However we also keep old + // attributes that might contains image. + var prevElement = noscript.previousElementSibling; + if (prevElement && this._isSingleImage(prevElement)) { + var prevImg = prevElement; + if (prevImg.tagName !== "IMG") { + prevImg = prevElement.getElementsByTagName("img")[0]; + } + + var newImg = tmp.getElementsByTagName("img")[0]; + for (var i = 0; i < prevImg.attributes.length; i++) { + var attr = prevImg.attributes[i]; + if (attr.value === "") { + continue; + } + + if (attr.name === "src" || attr.name === "srcset" || /\.(jpg|jpeg|png|webp)/i.test(attr.value)) { + if (newImg.getAttribute(attr.name) === attr.value) { + continue; + } + + var attrName = attr.name; + if (newImg.hasAttribute(attrName)) { + attrName = "data-old-" + attrName; + } + + newImg.setAttribute(attrName, attr.value); + } + } + + noscript.parentNode.replaceChild(tmp.firstElementChild, prevElement); + } + }); + }, + + /** + * Removes script tags from the document. + * + * @param Element + **/ + _removeScripts: function(doc) { + this._removeNodes(this._getAllNodesWithTag(doc, ["script"]), function(scriptNode) { + scriptNode.nodeValue = ""; + scriptNode.removeAttribute("src"); + return true; + }); + this._removeNodes(this._getAllNodesWithTag(doc, ["noscript"])); + }, + + /** + * Check if this node has only whitespace and a single element with given tag + * Returns false if the DIV node contains non-empty text nodes + * or if it contains no element with given tag or more than 1 element. + * + * @param Element + * @param string tag of child element + **/ + _hasSingleTagInsideElement: function(element, tag) { + // There should be exactly 1 element child with given tag + if (element.children.length != 1 || element.children[0].tagName !== tag) { + return false; + } + + // And there should be no text nodes with real content + return !this._someNode(element.childNodes, function(node) { + return node.nodeType === this.TEXT_NODE && + this.REGEXPS.hasContent.test(node.textContent); + }); + }, + + _isElementWithoutContent: function(node) { + return node.nodeType === this.ELEMENT_NODE && + node.textContent.trim().length == 0 && + (node.children.length == 0 || + node.children.length == node.getElementsByTagName("br").length + node.getElementsByTagName("hr").length); + }, + + /** + * Determine whether element has any children block level elements. + * + * @param Element + */ + _hasChildBlockElement: function (element) { + return this._someNode(element.childNodes, function(node) { + return this.DIV_TO_P_ELEMS.has(node.tagName) || + this._hasChildBlockElement(node); + }); + }, + + /*** + * Determine if a node qualifies as phrasing content. + * https://developer.mozilla.org/en-US/docs/Web/Guide/HTML/Content_categories#Phrasing_content + **/ + _isPhrasingContent: function(node) { + return node.nodeType === this.TEXT_NODE || this.PHRASING_ELEMS.indexOf(node.tagName) !== -1 || + ((node.tagName === "A" || node.tagName === "DEL" || node.tagName === "INS") && + this._everyNode(node.childNodes, this._isPhrasingContent)); + }, + + _isWhitespace: function(node) { + return (node.nodeType === this.TEXT_NODE && node.textContent.trim().length === 0) || + (node.nodeType === this.ELEMENT_NODE && node.tagName === "BR"); + }, + + /** + * Get the inner text of a node - cross browser compatibly. + * This also strips out any excess whitespace to be found. + * + * @param Element + * @param Boolean normalizeSpaces (default: true) + * @return string + **/ + _getInnerText: function(e, normalizeSpaces) { + normalizeSpaces = (typeof normalizeSpaces === "undefined") ? true : normalizeSpaces; + var textContent = e.textContent.trim(); + + if (normalizeSpaces) { + return textContent.replace(this.REGEXPS.normalize, " "); + } + return textContent; + }, + + /** + * Get the number of times a string s appears in the node e. + * + * @param Element + * @param string - what to split on. Default is "," + * @return number (integer) + **/ + _getCharCount: function(e, s) { + s = s || ","; + return this._getInnerText(e).split(s).length - 1; + }, + + /** + * Remove the style attribute on every e and under. + * TODO: Test if getElementsByTagName(*) is faster. + * + * @param Element + * @return void + **/ + _cleanStyles: function(e) { + if (!e || e.tagName.toLowerCase() === "svg") + return; + + // Remove `style` and deprecated presentational attributes + for (var i = 0; i < this.PRESENTATIONAL_ATTRIBUTES.length; i++) { + e.removeAttribute(this.PRESENTATIONAL_ATTRIBUTES[i]); + } + + if (this.DEPRECATED_SIZE_ATTRIBUTE_ELEMS.indexOf(e.tagName) !== -1) { + e.removeAttribute("width"); + e.removeAttribute("height"); + } + + var cur = e.firstElementChild; + while (cur !== null) { + this._cleanStyles(cur); + cur = cur.nextElementSibling; + } + }, + + /** + * Get the density of links as a percentage of the content + * This is the amount of text that is inside a link divided by the total text in the node. + * + * @param Element + * @return number (float) + **/ + _getLinkDensity: function(element) { + var textLength = this._getInnerText(element).length; + if (textLength === 0) + return 0; + + var linkLength = 0; + + // XXX implement _reduceNodeList? + this._forEachNode(element.getElementsByTagName("a"), function(linkNode) { + var href = linkNode.getAttribute("href"); + var coefficient = href && this.REGEXPS.hashUrl.test(href) ? 0.3 : 1; + linkLength += this._getInnerText(linkNode).length * coefficient; + }); + + return linkLength / textLength; + }, + + /** + * Get an elements class/id weight. Uses regular expressions to tell if this + * element looks good or bad. + * + * @param Element + * @return number (Integer) + **/ + _getClassWeight: function(e) { + if (!this._flagIsActive(this.FLAG_WEIGHT_CLASSES)) + return 0; + + var weight = 0; + + // Look for a special classname + if (typeof(e.className) === "string" && e.className !== "") { + if (this.REGEXPS.negative.test(e.className)) + weight -= 25; + + if (this.REGEXPS.positive.test(e.className)) + weight += 25; + } + + // Look for a special ID + if (typeof(e.id) === "string" && e.id !== "") { + if (this.REGEXPS.negative.test(e.id)) + weight -= 25; + + if (this.REGEXPS.positive.test(e.id)) + weight += 25; + } + + return weight; + }, + + /** + * Clean a node of all elements of type "tag". + * (Unless it's a youtube/vimeo video. People love movies.) + * + * @param Element + * @param string tag to clean + * @return void + **/ + _clean: function(e, tag) { + var isEmbed = ["object", "embed", "iframe"].indexOf(tag) !== -1; + + this._removeNodes(this._getAllNodesWithTag(e, [tag]), function(element) { + // Allow youtube and vimeo videos through as people usually want to see those. + if (isEmbed) { + // First, check the elements attributes to see if any of them contain youtube or vimeo + for (var i = 0; i < element.attributes.length; i++) { + if (this.REGEXPS.videos.test(element.attributes[i].value)) { + return false; + } + } + + // For embed with <object> tag, check inner HTML as well. + if (element.tagName === "object" && this.REGEXPS.videos.test(element.innerHTML)) { + return false; + } + } + + return true; + }); + }, + + /** + * Check if a given node has one of its ancestor tag name matching the + * provided one. + * @param HTMLElement node + * @param String tagName + * @param Number maxDepth + * @param Function filterFn a filter to invoke to determine whether this node 'counts' + * @return Boolean + */ + _hasAncestorTag: function(node, tagName, maxDepth, filterFn) { + maxDepth = maxDepth || 3; + tagName = tagName.toUpperCase(); + var depth = 0; + while (node.parentNode) { + if (maxDepth > 0 && depth > maxDepth) + return false; + if (node.parentNode.tagName === tagName && (!filterFn || filterFn(node.parentNode))) + return true; + node = node.parentNode; + depth++; + } + return false; + }, + + /** + * Return an object indicating how many rows and columns this table has. + */ + _getRowAndColumnCount: function(table) { + var rows = 0; + var columns = 0; + var trs = table.getElementsByTagName("tr"); + for (var i = 0; i < trs.length; i++) { + var rowspan = trs[i].getAttribute("rowspan") || 0; + if (rowspan) { + rowspan = parseInt(rowspan, 10); + } + rows += (rowspan || 1); + + // Now look for column-related info + var columnsInThisRow = 0; + var cells = trs[i].getElementsByTagName("td"); + for (var j = 0; j < cells.length; j++) { + var colspan = cells[j].getAttribute("colspan") || 0; + if (colspan) { + colspan = parseInt(colspan, 10); + } + columnsInThisRow += (colspan || 1); + } + columns = Math.max(columns, columnsInThisRow); + } + return {rows: rows, columns: columns}; + }, + + /** + * Look for 'data' (as opposed to 'layout') tables, for which we use + * similar checks as + * https://searchfox.org/mozilla-central/rev/f82d5c549f046cb64ce5602bfd894b7ae807c8f8/accessible/generic/TableAccessible.cpp#19 + */ + _markDataTables: function(root) { + var tables = root.getElementsByTagName("table"); + for (var i = 0; i < tables.length; i++) { + var table = tables[i]; + var role = table.getAttribute("role"); + if (role == "presentation") { + table._readabilityDataTable = false; + continue; + } + var datatable = table.getAttribute("datatable"); + if (datatable == "0") { + table._readabilityDataTable = false; + continue; + } + var summary = table.getAttribute("summary"); + if (summary) { + table._readabilityDataTable = true; + continue; + } + + var caption = table.getElementsByTagName("caption")[0]; + if (caption && caption.childNodes.length > 0) { + table._readabilityDataTable = true; + continue; + } + + // If the table has a descendant with any of these tags, consider a data table: + var dataTableDescendants = ["col", "colgroup", "tfoot", "thead", "th"]; + var descendantExists = function(tag) { + return !!table.getElementsByTagName(tag)[0]; + }; + if (dataTableDescendants.some(descendantExists)) { + this.log("Data table because found data-y descendant"); + table._readabilityDataTable = true; + continue; + } + + // Nested tables indicate a layout table: + if (table.getElementsByTagName("table")[0]) { + table._readabilityDataTable = false; + continue; + } + + var sizeInfo = this._getRowAndColumnCount(table); + if (sizeInfo.rows >= 10 || sizeInfo.columns > 4) { + table._readabilityDataTable = true; + continue; + } + // Now just go by size entirely: + table._readabilityDataTable = sizeInfo.rows * sizeInfo.columns > 10; + } + }, + + /* convert images and figures that have properties like data-src into images that can be loaded without JS */ + _fixLazyImages: function (root) { + this._forEachNode(this._getAllNodesWithTag(root, ["img", "picture", "figure"]), function (elem) { + // In some sites (e.g. Kotaku), they put 1px square image as base64 data uri in the src attribute. + // So, here we check if the data uri is too short, just might as well remove it. + if (elem.src && this.REGEXPS.b64DataUrl.test(elem.src)) { + // Make sure it's not SVG, because SVG can have a meaningful image in under 133 bytes. + var parts = this.REGEXPS.b64DataUrl.exec(elem.src); + if (parts[1] === "image/svg+xml") { + return; + } + + // Make sure this element has other attributes which contains image. + // If it doesn't, then this src is important and shouldn't be removed. + var srcCouldBeRemoved = false; + for (var i = 0; i < elem.attributes.length; i++) { + var attr = elem.attributes[i]; + if (attr.name === "src") { + continue; + } + + if (/\.(jpg|jpeg|png|webp)/i.test(attr.value)) { + srcCouldBeRemoved = true; + break; + } + } + + // Here we assume if image is less than 100 bytes (or 133B after encoded to base64) + // it will be too small, therefore it might be placeholder image. + if (srcCouldBeRemoved) { + var b64starts = elem.src.search(/base64\s*/i) + 7; + var b64length = elem.src.length - b64starts; + if (b64length < 133) { + elem.removeAttribute("src"); + } + } + } + + // also check for "null" to work around https://github.com/jsdom/jsdom/issues/2580 + if ((elem.src || (elem.srcset && elem.srcset != "null")) && elem.className.toLowerCase().indexOf("lazy") === -1) { + return; + } + + for (var j = 0; j < elem.attributes.length; j++) { + attr = elem.attributes[j]; + if (attr.name === "src" || attr.name === "srcset" || attr.name === "alt") { + continue; + } + var copyTo = null; + if (/\.(jpg|jpeg|png|webp)\s+\d/.test(attr.value)) { + copyTo = "srcset"; + } else if (/^\s*\S+\.(jpg|jpeg|png|webp)\S*\s*$/.test(attr.value)) { + copyTo = "src"; + } + if (copyTo) { + //if this is an img or picture, set the attribute directly + if (elem.tagName === "IMG" || elem.tagName === "PICTURE") { + elem.setAttribute(copyTo, attr.value); + } else if (elem.tagName === "FIGURE" && !this._getAllNodesWithTag(elem, ["img", "picture"]).length) { + //if the item is a <figure> that does not contain an image or picture, create one and place it inside the figure + //see the nytimes-3 testcase for an example + var img = this._doc.createElement("img"); + img.setAttribute(copyTo, attr.value); + elem.appendChild(img); + } + } + } + }); + }, + + _getTextDensity: function(e, tags) { + var textLength = this._getInnerText(e, true).length; + if (textLength === 0) { + return 0; + } + var childrenLength = 0; + var children = this._getAllNodesWithTag(e, tags); + this._forEachNode(children, (child) => childrenLength += this._getInnerText(child, true).length); + return childrenLength / textLength; + }, + + /** + * Clean an element of all tags of type "tag" if they look fishy. + * "Fishy" is an algorithm based on content length, classnames, link density, number of images & embeds, etc. + * + * @return void + **/ + _cleanConditionally: function(e, tag) { + if (!this._flagIsActive(this.FLAG_CLEAN_CONDITIONALLY)) + return; + + // Gather counts for other typical elements embedded within. + // Traverse backwards so we can remove nodes at the same time + // without effecting the traversal. + // + // TODO: Consider taking into account original contentScore here. + this._removeNodes(this._getAllNodesWithTag(e, [tag]), function(node) { + // First check if this node IS data table, in which case don't remove it. + var isDataTable = function(t) { + return t._readabilityDataTable; + }; + + var isList = tag === "ul" || tag === "ol"; + if (!isList) { + var listLength = 0; + var listNodes = this._getAllNodesWithTag(node, ["ul", "ol"]); + this._forEachNode(listNodes, (list) => listLength += this._getInnerText(list).length); + isList = listLength / this._getInnerText(node).length > 0.9; + } + + if (tag === "table" && isDataTable(node)) { + return false; + } + + // Next check if we're inside a data table, in which case don't remove it as well. + if (this._hasAncestorTag(node, "table", -1, isDataTable)) { + return false; + } + + if (this._hasAncestorTag(node, "code")) { + return false; + } + + var weight = this._getClassWeight(node); + + this.log("Cleaning Conditionally", node); + + var contentScore = 0; + + if (weight + contentScore < 0) { + return true; + } + + if (this._getCharCount(node, ",") < 10) { + // If there are not very many commas, and the number of + // non-paragraph elements is more than paragraphs or other + // ominous signs, remove the element. + var p = node.getElementsByTagName("p").length; + var img = node.getElementsByTagName("img").length; + var li = node.getElementsByTagName("li").length - 100; + var input = node.getElementsByTagName("input").length; + var headingDensity = this._getTextDensity(node, ["h1", "h2", "h3", "h4", "h5", "h6"]); + + var embedCount = 0; + var embeds = this._getAllNodesWithTag(node, ["object", "embed", "iframe"]); + + for (var i = 0; i < embeds.length; i++) { + // If this embed has attribute that matches video regex, don't delete it. + for (var j = 0; j < embeds[i].attributes.length; j++) { + if (this.REGEXPS.videos.test(embeds[i].attributes[j].value)) { + return false; + } + } + + // For embed with <object> tag, check inner HTML as well. + if (embeds[i].tagName === "object" && this.REGEXPS.videos.test(embeds[i].innerHTML)) { + return false; + } + + embedCount++; + } + + var linkDensity = this._getLinkDensity(node); + var contentLength = this._getInnerText(node).length; + + var haveToRemove = + (img > 1 && p / img < 0.5 && !this._hasAncestorTag(node, "figure")) || + (!isList && li > p) || + (input > Math.floor(p/3)) || + (!isList && headingDensity < 0.9 && contentLength < 25 && (img === 0 || img > 2) && !this._hasAncestorTag(node, "figure")) || + (!isList && weight < 25 && linkDensity > 0.2) || + (weight >= 25 && linkDensity > 0.5) || + ((embedCount === 1 && contentLength < 75) || embedCount > 1); + return haveToRemove; + } + return false; + }); + }, + + /** + * Clean out elements that match the specified conditions + * + * @param Element + * @param Function determines whether a node should be removed + * @return void + **/ + _cleanMatchedNodes: function(e, filter) { + var endOfSearchMarkerNode = this._getNextNode(e, true); + var next = this._getNextNode(e); + while (next && next != endOfSearchMarkerNode) { + if (filter.call(this, next, next.className + " " + next.id)) { + next = this._removeAndGetNext(next); + } else { + next = this._getNextNode(next); + } + } + }, + + /** + * Clean out spurious headers from an Element. + * + * @param Element + * @return void + **/ + _cleanHeaders: function(e) { + let headingNodes = this._getAllNodesWithTag(e, ["h1", "h2"]); + this._removeNodes(headingNodes, function(node) { + let shouldRemove = this._getClassWeight(node) < 0; + if (shouldRemove) { + this.log("Removing header with low class weight:", node); + } + return shouldRemove; + }); + }, + + /** + * Check if this node is an H1 or H2 element whose content is mostly + * the same as the article title. + * + * @param Element the node to check. + * @return boolean indicating whether this is a title-like header. + */ + _headerDuplicatesTitle: function(node) { + if (node.tagName != "H1" && node.tagName != "H2") { + return false; + } + var heading = this._getInnerText(node, false); + this.log("Evaluating similarity of header:", heading, this._articleTitle); + return this._textSimilarity(this._articleTitle, heading) > 0.75; + }, + + _flagIsActive: function(flag) { + return (this._flags & flag) > 0; + }, + + _removeFlag: function(flag) { + this._flags = this._flags & ~flag; + }, + + _isProbablyVisible: function(node) { + // Have to null-check node.style and node.className.indexOf to deal with SVG and MathML nodes. + return (!node.style || node.style.display != "none") + && !node.hasAttribute("hidden") + //check for "fallback-image" so that wikimedia math images are displayed + && (!node.hasAttribute("aria-hidden") || node.getAttribute("aria-hidden") != "true" || (node.className && node.className.indexOf && node.className.indexOf("fallback-image") !== -1)); + }, + + /** + * Runs readability. + * + * Workflow: + * 1. Prep the document by removing script tags, css, etc. + * 2. Build readability's DOM tree. + * 3. Grab the article content from the current dom tree. + * 4. Replace the current DOM tree with the new one. + * 5. Read peacefully. + * + * @return void + **/ + parse: function () { + // Avoid parsing too large documents, as per configuration option + if (this._maxElemsToParse > 0) { + var numTags = this._doc.getElementsByTagName("*").length; + if (numTags > this._maxElemsToParse) { + throw new Error("Aborting parsing document; " + numTags + " elements found"); + } + } + + // Unwrap image from noscript + this._unwrapNoscriptImages(this._doc); + + // Extract JSON-LD metadata before removing scripts + var jsonLd = this._disableJSONLD ? {} : this._getJSONLD(this._doc); + + // Remove script tags from the document. + this._removeScripts(this._doc); + + this._prepDocument(); + + var metadata = this._getArticleMetadata(jsonLd); + this._articleTitle = metadata.title; + + var articleContent = this._grabArticle(); + if (!articleContent) + return null; + + this.log("Grabbed: " + articleContent.innerHTML); + + this._postProcessContent(articleContent); + + // If we haven't found an excerpt in the article's metadata, use the article's + // first paragraph as the excerpt. This is used for displaying a preview of + // the article's content. + if (!metadata.excerpt) { + var paragraphs = articleContent.getElementsByTagName("p"); + if (paragraphs.length > 0) { + metadata.excerpt = paragraphs[0].textContent.trim(); + } + } + + var textContent = articleContent.textContent; + return { + title: this._articleTitle, + byline: metadata.byline || this._articleByline, + dir: this._articleDir, + lang: this._articleLang, + content: this._serializer(articleContent), + textContent: textContent, + length: textContent.length, + excerpt: metadata.excerpt, + siteName: metadata.siteName || this._articleSiteName + }; + } +}; + +if (typeof module === "object") { + module.exports = Readability; +} diff --git a/toolkit/components/reader/ReaderMode.sys.mjs b/toolkit/components/reader/ReaderMode.sys.mjs new file mode 100644 index 0000000000..92fbcce367 --- /dev/null +++ b/toolkit/components/reader/ReaderMode.sys.mjs @@ -0,0 +1,561 @@ +// -*- indent-tabs-mode: nil; js-indent-level: 2 -*- +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// Constants for telemetry. +const DOWNLOAD_SUCCESS = 0; +const DOWNLOAD_ERROR_XHR = 1; +const DOWNLOAD_ERROR_NO_DOC = 2; + +const PARSE_SUCCESS = 0; +const PARSE_ERROR_TOO_MANY_ELEMENTS = 1; +const PARSE_ERROR_WORKER = 2; +const PARSE_ERROR_NO_ARTICLE = 3; + +// Class names to preserve in the readerized output. We preserve these class +// names so that rules in aboutReader.css can match them. +const CLASSES_TO_PRESERVE = [ + "caption", + "emoji", + "hidden", + "invisible", + "sr-only", + "visually-hidden", + "visuallyhidden", + "wp-caption", + "wp-caption-text", + "wp-smiley", +]; + +import { XPCOMUtils } from "resource://gre/modules/XPCOMUtils.sys.mjs"; + +const lazy = {}; + +ChromeUtils.defineESModuleGetters(lazy, { + LanguageDetector: + "resource://gre/modules/translation/LanguageDetector.sys.mjs", + ReaderWorker: "resource://gre/modules/reader/ReaderWorker.sys.mjs", + Readerable: "resource://gre/modules/Readerable.sys.mjs", +}); + +const gIsFirefoxDesktop = + Services.appinfo.ID == "{ec8030f7-c20a-464f-9b0e-13a3a9e97384}"; + +Services.telemetry.setEventRecordingEnabled("readermode", true); + +export var ReaderMode = { + DEBUG: 0, + + // For time spent telemetry + enterTime: undefined, + leaveTime: undefined, + + /** + * Enter the reader mode by going forward one step in history if applicable, + * if not, append the about:reader page in the history instead. + */ + enterReaderMode(docShell, win) { + this.enterTime = Date.now(); + + Services.telemetry.recordEvent("readermode", "view", "on", null, { + subcategory: "feature", + }); + + let url = win.document.location.href; + let readerURL = "about:reader?url=" + encodeURIComponent(url); + + if (!Services.appinfo.sessionHistoryInParent) { + let webNav = docShell.QueryInterface(Ci.nsIWebNavigation); + let sh = webNav.sessionHistory; + if (webNav.canGoForward) { + let forwardEntry = sh.legacySHistory.getEntryAtIndex(sh.index + 1); + let forwardURL = forwardEntry.URI.spec; + if (forwardURL && (forwardURL == readerURL || !readerURL)) { + webNav.goForward(); + return; + } + } + } + + // This could possibly move to the parent. See bug 1664982. + win.document.location = readerURL; + }, + + /** + * Exit the reader mode by going back one step in history if applicable, + * if not, append the original page in the history instead. + */ + leaveReaderMode(docShell, win) { + this.leaveTime = Date.now(); + + // Measured in seconds (whole number) + let timeSpentInReaderMode = Math.floor( + (this.leaveTime - this.enterTime) / 1000 + ); + + // Measured as percentage (whole number) + let scrollPosition = Math.floor( + ((win.scrollY + win.innerHeight) / win.document.body.clientHeight) * 100 + ); + + Services.telemetry.recordEvent("readermode", "view", "off", null, { + subcategory: "feature", + reader_time: `${timeSpentInReaderMode}`, + scroll_position: `${scrollPosition}`, + }); + + let url = win.document.location.href; + let originalURL = this.getOriginalUrl(url); + let webNav = docShell.QueryInterface(Ci.nsIWebNavigation); + + if (!Services.appinfo.sessionHistoryInParent) { + let sh = webNav.sessionHistory; + if (webNav.canGoBack) { + let prevEntry = sh.legacySHistory.getEntryAtIndex(sh.index - 1); + let prevURL = prevEntry.URI.spec; + if (prevURL && (prevURL == originalURL || !originalURL)) { + webNav.goBack(); + return; + } + } + } + + let referrerURI, principal; + try { + referrerURI = Services.io.newURI(url); + principal = Services.scriptSecurityManager.createContentPrincipal( + referrerURI, + win.document.nodePrincipal.originAttributes + ); + } catch (e) { + console.error(e); + return; + } + let loadFlags = webNav.LOAD_FLAGS_DISALLOW_INHERIT_PRINCIPAL; + let ReferrerInfo = Components.Constructor( + "@mozilla.org/referrer-info;1", + "nsIReferrerInfo", + "init" + ); + let loadURIOptions = { + triggeringPrincipal: principal, + loadFlags, + referrerInfo: new ReferrerInfo( + Ci.nsIReferrerInfo.EMPTY, + true, + referrerURI + ), + }; + // This could possibly move to the parent. See bug 1664982. + webNav.fixupAndLoadURIString(originalURL, loadURIOptions); + }, + + /** + * Returns original URL from an about:reader URL. + * + * @param url An about:reader URL. + * @return The original URL for the article, or null if we did not find + * a properly formatted about:reader URL. + */ + getOriginalUrl(url) { + if (!url.startsWith("about:reader?")) { + return null; + } + + let outerHash = ""; + try { + let uriObj = Services.io.newURI(url); + url = uriObj.specIgnoringRef; + outerHash = uriObj.ref; + } catch (ex) { + /* ignore, use the raw string */ + } + + let searchParams = new URLSearchParams( + url.substring("about:reader?".length) + ); + if (!searchParams.has("url")) { + return null; + } + let originalUrl = searchParams.get("url"); + if (outerHash) { + try { + let uriObj = Services.io.newURI(originalUrl); + uriObj = Services.io.newURI("#" + outerHash, null, uriObj); + originalUrl = uriObj.spec; + } catch (ex) {} + } + return originalUrl; + }, + + getOriginalUrlObjectForDisplay(url) { + let originalUrl = this.getOriginalUrl(url); + if (originalUrl) { + let uriObj; + try { + uriObj = Services.uriFixup.getFixupURIInfo(originalUrl).preferredURI; + } catch (ex) { + return null; + } + try { + return Services.io.createExposableURI(uriObj); + } catch (ex) { + return null; + } + } + return null; + }, + + /** + * Gets an article from a loaded browser's document. This method will not attempt + * to parse certain URIs (e.g. about: URIs). + * + * @param doc A document to parse. + * @return {Promise} + * @resolves JS object representing the article, or null if no article is found. + */ + parseDocument(doc) { + if ( + !lazy.Readerable.shouldCheckUri(doc.documentURIObject) || + !lazy.Readerable.shouldCheckUri(doc.baseURIObject, true) + ) { + this.log("Reader mode disabled for URI"); + return null; + } + + return this._readerParse(doc); + }, + + /** + * Downloads and parses a document from a URL. + * + * @param url URL to download and parse. + * @return {Promise} + * @resolves JS object representing the article, or null if no article is found. + */ + async downloadAndParseDocument(url, docContentType = "document") { + let result = await this._downloadDocument(url, docContentType); + if (!result?.doc) { + return null; + } + let { doc, newURL } = result; + if ( + !lazy.Readerable.shouldCheckUri(doc.documentURIObject) || + !lazy.Readerable.shouldCheckUri(doc.baseURIObject, true) + ) { + this.log("Reader mode disabled for URI"); + return null; + } + + let article = await this._readerParse(doc); + // If we have to redirect, reject to the caller with the parsed article, + // so we can update the URL before displaying it. + if (newURL) { + return Promise.reject({ newURL, article }); + } + // Otherwise, we can just continue with the article. + return article; + }, + + _downloadDocument(url, docContentType = "document") { + try { + if (!lazy.Readerable.shouldCheckUri(Services.io.newURI(url))) { + return null; + } + } catch (ex) { + console.error( + new Error(`Couldn't create URI from ${url} to download: ${ex}`) + ); + return null; + } + let histogram = Services.telemetry.getHistogramById( + "READER_MODE_DOWNLOAD_RESULT" + ); + return new Promise((resolve, reject) => { + let xhr = new XMLHttpRequest(); + xhr.open("GET", url, true); + xhr.onerror = evt => reject(evt.error); + xhr.responseType = docContentType === "text/plain" ? "text" : "document"; + xhr.onload = evt => { + if (xhr.status !== 200) { + reject("Reader mode XHR failed with status: " + xhr.status); + histogram.add(DOWNLOAD_ERROR_XHR); + return; + } + + let doc = + xhr.responseType === "text" ? xhr.responseText : xhr.responseXML; + if (!doc) { + reject("Reader mode XHR didn't return a document"); + histogram.add(DOWNLOAD_ERROR_NO_DOC); + return; + } + + let responseURL = xhr.responseURL; + let givenURL = url; + // Convert these to real URIs to make sure the escaping (or lack + // thereof) is identical: + try { + responseURL = Services.io.newURI(responseURL).specIgnoringRef; + } catch (ex) { + /* Ignore errors - we'll use what we had before */ + } + try { + givenURL = Services.io.newURI(givenURL).specIgnoringRef; + } catch (ex) { + /* Ignore errors - we'll use what we had before */ + } + + if (xhr.responseType != "document") { + let initialText = doc; + let parser = new DOMParser(); + doc = parser.parseFromString(`<pre></pre>`, "text/html"); + doc.querySelector("pre").textContent = initialText; + } + + // We treat redirects as download successes here: + histogram.add(DOWNLOAD_SUCCESS); + + let result = { doc }; + if (responseURL != givenURL) { + result.newURL = xhr.responseURL; + } + + resolve(result); + }; + xhr.send(); + }); + }, + + log(msg) { + if (this.DEBUG) { + dump("Reader: " + msg); + } + }, + + /** + * Attempts to parse a document into an article. Heavy lifting happens + * in readerWorker.js. + * + * @param doc The document to parse. + * @return {Promise} + * @resolves JS object representing the article, or null if no article is found. + */ + async _readerParse(doc) { + let histogram = Services.telemetry.getHistogramById( + "READER_MODE_PARSE_RESULT" + ); + if (this.parseNodeLimit) { + let numTags = doc.getElementsByTagName("*").length; + if (numTags > this.parseNodeLimit) { + this.log( + "Aborting parse for " + + doc.baseURIObject.spec + + "; " + + numTags + + " elements found" + ); + histogram.add(PARSE_ERROR_TOO_MANY_ELEMENTS); + return null; + } + } + + // Fetch this here before we send `doc` off to the worker thread, as later on the + // document might be nuked but we will still want the URI. + let { documentURI } = doc; + + let uriParam; + uriParam = { + spec: doc.baseURIObject.spec, + prePath: doc.baseURIObject.prePath, + scheme: doc.baseURIObject.scheme, + + // Fallback + host: documentURI, + pathBase: documentURI, + }; + + // nsIURI.host throws an exception if a host doesn't exist. + try { + uriParam.host = doc.baseURIObject.host; + uriParam.pathBase = Services.io.newURI(".", null, doc.baseURIObject).spec; + } catch (ex) { + // Fall back to the initial values we assigned. + console.warn("Error accessing host name: ", ex); + } + + // convert text/plain document, if any, to XHTML format + if (this._isDocumentPlainText(doc)) { + doc = this._convertPlainTextDocument(doc); + } + + let serializer = new XMLSerializer(); + let serializedDoc = serializer.serializeToString(doc); + // Explicitly null out doc to make it clear it might not be available from this + // point on. + doc = null; + + let options = { + classesToPreserve: CLASSES_TO_PRESERVE, + }; + + let article = null; + try { + article = await lazy.ReaderWorker.post("parseDocument", [ + uriParam, + serializedDoc, + options, + ]); + } catch (e) { + console.error("Error in ReaderWorker: ", e); + histogram.add(PARSE_ERROR_WORKER); + } + + if (!article) { + this.log("Worker did not return an article"); + histogram.add(PARSE_ERROR_NO_ARTICLE); + return null; + } + + // Readability returns a URI object based on the baseURI, but we only care + // about the original document's URL from now on. This also avoids spoofing + // attempts where the baseURI doesn't match the domain of the documentURI + article.url = documentURI; + delete article.uri; + + let flags = + Ci.nsIDocumentEncoder.OutputSelectionOnly | + Ci.nsIDocumentEncoder.OutputAbsoluteLinks; + article.title = Cc["@mozilla.org/parserutils;1"] + .getService(Ci.nsIParserUtils) + .convertToPlainText(article.title, flags, 0); + if (gIsFirefoxDesktop) { + await this._assignLanguage(article); + this._maybeAssignTextDirection(article); + } + + this._assignReadTime(article); + + histogram.add(PARSE_SUCCESS); + return article; + }, + + /** + * Sets a global language string value if the result is confident + * + * @return Promise + * @resolves when the language is detected + */ + _assignLanguage(article) { + return lazy.LanguageDetector.detectLanguage(article.textContent).then( + result => { + article.language = result.confident ? result.language : null; + } + ); + }, + + _maybeAssignTextDirection(article) { + // TODO: Remove the hardcoded language codes below once bug 1320265 is resolved. + if ( + !article.dir && + ["ar", "fa", "he", "ug", "ur"].includes(article.language) + ) { + article.dir = "rtl"; + } + }, + + /** + * Assigns the estimated reading time range of the article to the article object. + * + * @param article the article object to assign the reading time estimate to. + */ + _assignReadTime(article) { + let lang = article.language || "en"; + const readingSpeed = this._getReadingSpeedForLanguage(lang); + const charactersPerMinuteLow = readingSpeed.cpm - readingSpeed.variance; + const charactersPerMinuteHigh = readingSpeed.cpm + readingSpeed.variance; + const length = article.length; + + article.readingTimeMinsSlow = Math.ceil(length / charactersPerMinuteLow); + article.readingTimeMinsFast = Math.ceil(length / charactersPerMinuteHigh); + }, + + /** + * Returns the reading speed of a selection of languages with likely variance. + * + * Reading speed estimated from a study done on reading speeds in various languages. + * study can be found here: http://iovs.arvojournals.org/article.aspx?articleid=2166061 + * + * @return object with characters per minute and variance. Defaults to English + * if no suitable language is found in the collection. + */ + _getReadingSpeedForLanguage(lang) { + const readingSpeed = new Map([ + ["en", { cpm: 987, variance: 118 }], + ["ar", { cpm: 612, variance: 88 }], + ["de", { cpm: 920, variance: 86 }], + ["es", { cpm: 1025, variance: 127 }], + ["fi", { cpm: 1078, variance: 121 }], + ["fr", { cpm: 998, variance: 126 }], + ["he", { cpm: 833, variance: 130 }], + ["it", { cpm: 950, variance: 140 }], + ["jw", { cpm: 357, variance: 56 }], + ["nl", { cpm: 978, variance: 143 }], + ["pl", { cpm: 916, variance: 126 }], + ["pt", { cpm: 913, variance: 145 }], + ["ru", { cpm: 986, variance: 175 }], + ["sk", { cpm: 885, variance: 145 }], + ["sv", { cpm: 917, variance: 156 }], + ["tr", { cpm: 1054, variance: 156 }], + ["zh", { cpm: 255, variance: 29 }], + ]); + + return readingSpeed.get(lang) || readingSpeed.get("en"); + }, + /** + * + * Check if the document to be parsed is text document. + * @param doc the doc object to be parsed. + * @return boolean + * + */ + _isDocumentPlainText(doc) { + return doc.contentType == "text/plain"; + }, + /** + * + * The document to be parsed is text document and is converted to HTML format. + * @param doc the doc object to be parsed. + * @return doc + * + */ + _convertPlainTextDocument(doc) { + let preTag = doc.querySelector("pre"); + let docFrag = doc.createDocumentFragment(); + let content = preTag.textContent; + let paragraphs = content.split(/\r?\n\r?\n/); + for (let para of paragraphs) { + let pElem = doc.createElement("p"); + let lines = para.split(/\n/); + for (let line of lines) { + pElem.append(line); + let brElem = doc.createElement("br"); + pElem.append(brElem); + } + docFrag.append(pElem); + } + // Clone the document to avoid the original document being affected + // (which shows up when exiting reader mode again). + let clone = doc.documentElement.cloneNode(true); + clone.querySelector("pre").replaceWith(docFrag); + return clone; + }, +}; + +XPCOMUtils.defineLazyPreferenceGetter( + ReaderMode, + "maxElemsToParse", + "reader.parse-node-limit", + 0 +); diff --git a/toolkit/components/reader/ReaderWorker.js b/toolkit/components/reader/ReaderWorker.js new file mode 100644 index 0000000000..f604707ba9 --- /dev/null +++ b/toolkit/components/reader/ReaderWorker.js @@ -0,0 +1,61 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* eslint-env mozilla/chrome-worker */ + +"use strict"; + +/** + * A worker dedicated to handle parsing documents for reader view. + */ + +/* import-globals-from /toolkit/components/workerloader/require.js */ +/* global JSDOMParser */ +/* import-globals-from /toolkit/components/reader/Readability.js */ +importScripts( + "resource://gre/modules/workers/require.js", + "resource://gre/modules/reader/JSDOMParser.js", + "resource://gre/modules/reader/Readability.js" +); + +var PromiseWorker = require("resource://gre/modules/workers/PromiseWorker.js"); + +const DEBUG = false; + +var worker = new PromiseWorker.AbstractWorker(); +worker.dispatch = function (method, args = []) { + return Agent[method](...args); +}; +worker.postMessage = function (result, ...transfers) { + self.postMessage(result, ...transfers); +}; +worker.close = function () { + self.close(); +}; +worker.log = function (...args) { + if (DEBUG) { + dump("ReaderWorker: " + args.join(" ") + "\n"); + } +}; + +self.addEventListener("message", msg => worker.handleMessage(msg)); +self.addEventListener("unhandledrejection", function (error) { + throw error.reason; +}); + +var Agent = { + /** + * Parses structured article data from a document. + * + * @param {object} uri URI data for the document. + * @param {string} serializedDoc The serialized document. + * @param {object} options Options object to pass to Readability. + * + * @return {object} Article object returned from Readability. + */ + parseDocument(uri, serializedDoc, options) { + let doc = new JSDOMParser().parse(serializedDoc, uri.spec); + return new Readability(doc, options).parse(); + }, +}; diff --git a/toolkit/components/reader/ReaderWorker.sys.mjs b/toolkit/components/reader/ReaderWorker.sys.mjs new file mode 100644 index 0000000000..dbfd207ac5 --- /dev/null +++ b/toolkit/components/reader/ReaderWorker.sys.mjs @@ -0,0 +1,13 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/** + * Interface to a dedicated thread handling readability parsing. + */ + +import { BasePromiseWorker } from "resource://gre/modules/PromiseWorker.sys.mjs"; + +export var ReaderWorker = new BasePromiseWorker( + "resource://gre/modules/reader/ReaderWorker.js" +); diff --git a/toolkit/components/reader/Readerable.js b/toolkit/components/reader/Readerable.js new file mode 100644 index 0000000000..e558642fe3 --- /dev/null +++ b/toolkit/components/reader/Readerable.js @@ -0,0 +1,93 @@ +// -*- indent-tabs-mode: nil; js-indent-level: 2 -*- +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ +"use strict"; + +// This file and Readability-readerable.js are merged together into +// Readerable.sys.mjs. + +/* exported Readerable */ +/* import-globals-from Readability-readerable.js */ + +const { XPCOMUtils } = ChromeUtils.importESModule( + "resource://gre/modules/XPCOMUtils.sys.mjs" +); + +var Readerable = { + get isEnabledForParseOnLoad() { + return this.isEnabled; + }, + + /** + * Decides whether or not a document is reader-able without parsing the whole thing. + * + * @param doc A document to parse. + * @return boolean Whether or not we should show the reader mode button. + */ + isProbablyReaderable(doc) { + // Only care about 'real' HTML documents: + if ( + doc.mozSyntheticDocument || + !doc.defaultView.HTMLDocument.isInstance(doc) + ) { + return false; + } + + let uri = Services.io.newURI(doc.location.href); + if (!this.shouldCheckUri(uri)) { + return false; + } + + return isProbablyReaderable(doc, this._isNodeVisible); + }, + + _isNodeVisible(node) { + return node.clientHeight > 0 && node.clientWidth > 0; + }, + + _blockedHosts: [ + "amazon.com", + "github.com", + "mail.google.com", + "pinterest.com", + "reddit.com", + "twitter.com", + "youtube.com", + ], + + shouldCheckUri(uri, isBaseUri = false) { + if (!["http", "https", "file", "moz-nullprincipal"].includes(uri.scheme)) { + return false; + } + + if (!isBaseUri && uri.scheme.startsWith("http")) { + // Sadly, some high-profile pages have false positives, so bail early for those: + let { host } = uri; + if (this._blockedHosts.some(blockedHost => host.endsWith(blockedHost))) { + // Allow github on non-project pages + if ( + host == "github.com" && + !uri.filePath.includes("/projects") && + !uri.filePath.includes("/issues") + ) { + return true; + } + return false; + } + + if (uri.filePath == "/") { + return false; + } + } + + return true; + }, +}; + +XPCOMUtils.defineLazyPreferenceGetter( + Readerable, + "isEnabled", + "reader.parse-on-load.enabled", + true +); diff --git a/toolkit/components/reader/Readerable.sys.mjs b/toolkit/components/reader/Readerable.sys.mjs new file mode 100644 index 0000000000..5412171ae3 --- /dev/null +++ b/toolkit/components/reader/Readerable.sys.mjs @@ -0,0 +1,9 @@ +// -*- indent-tabs-mode: nil; js-indent-level: 2 -*- +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include Readability-readerable.js +#include Readerable.js + +export { Readerable }; diff --git a/toolkit/components/reader/content/aboutReader.html b/toolkit/components/reader/content/aboutReader.html new file mode 100644 index 0000000000..86ff62f852 --- /dev/null +++ b/toolkit/components/reader/content/aboutReader.html @@ -0,0 +1,182 @@ +<!-- This Source Code Form is subject to the terms of the Mozilla Public + - License, v. 2.0. If a copy of the MPL was not distributed with this file, + - You can obtain one at http://mozilla.org/MPL/2.0/. --> + +<!DOCTYPE html> +<html> + <head> + <title id="reader-title"></title> + <meta + http-equiv="Content-Security-Policy" + content="default-src chrome:; img-src data: *; media-src *; object-src 'none'" + /> + <meta content="text/html; charset=UTF-8" http-equiv="content-type" /> + <meta name="viewport" content="width=device-width; user-scalable=0" /> + <link + rel="stylesheet" + href="chrome://global/skin/aboutReader.css" + type="text/css" + /> + <link + rel="stylesheet" + href="chrome://global/skin/aboutReaderPocket.css" + type="text/css" + /> + <link rel="localization" href="toolkit/about/aboutReader.ftl" /> + <link rel="localization" href="toolkit/branding/brandings.ftl" /> + </head> + + <body> + <div class="top-anchor"></div> + + <div id="toolbar" class="toolbar-container"> + <div class="toolbar reader-toolbar"> + <div class="reader-controls"> + <button + class="close-button toolbar-button" + aria-labelledby="toolbar-close" + data-telemetry-id="reader-close" + > + <span + class="hover-label" + id="toolbar-close" + data-l10n-id="about-reader-toolbar-close" + ></span> + </button> + <ul class="dropdown style-dropdown"> + <li> + <button + class="dropdown-toggle toolbar-button style-button" + aria-labelledby="toolbar-type-controls" + data-telemetry-id="reader-type-controls" + > + <span + class="hover-label" + id="toolbar-type-controls" + data-l10n-id="about-reader-toolbar-type-controls" + ></span> + </button> + </li> + <li class="dropdown-popup"> + <div class="dropdown-arrow"></div> + <div class="font-type-buttons radiorow"></div> + <div class="font-size-buttons buttonrow"> + <button + class="minus-button" + data-l10n-id="about-reader-toolbar-minus" + ></button> + <span class="font-size-value"></span> + <button + class="plus-button" + data-l10n-id="about-reader-toolbar-plus" + ></button> + </div> + <div class="content-width-buttons buttonrow"> + <button + class="content-width-minus-button" + data-l10n-id="about-reader-toolbar-contentwidthminus" + ></button> + <span class="content-width-value"></span> + <button + class="content-width-plus-button" + data-l10n-id="about-reader-toolbar-contentwidthplus" + ></button> + </div> + <div class="line-height-buttons buttonrow"> + <button + class="line-height-minus-button" + data-l10n-id="about-reader-toolbar-lineheightminus" + ></button> + <span class="line-height-value"></span> + <button + class="line-height-plus-button" + data-l10n-id="about-reader-toolbar-lineheightplus" + ></button> + </div> + <div class="color-scheme-buttons radiorow"></div> + </li> + </ul> + </div> + </div> + </div> + + <div class="container"> + <div class="header reader-header"> + <a class="domain reader-domain"></a> + <div class="domain-border"></div> + <h1 class="reader-title"></h1> + <div class="credits reader-credits"></div> + <div class="meta-data"> + <div class="reader-estimated-time"></div> + </div> + </div> + + <hr /> + + <div class="content"> + <div class="moz-reader-content"></div> + </div> + + <div> + <div class="reader-message"></div> + </div> + <div aria-owns="toolbar"></div> + </div> + + <div id="pocket-cta-container" hidden> + <div class="pocket-cta-inner" id="pocket-cta-only"> + <div class="pocket-cta"> + <header class="pocket-cta-header"> + Save anything from across the web in Pocket, your personal library. + </header> + <p class="pocket-cta-subhead"> + As part of the Firefox family, Pocket provides a quiet, calm space + that’s perfect for reading. It strips away all the distractions of + the internet so you can really focus. + </p> + <a + href="https://getpocket.com/signup?utm_source=firefox_reader&utm_medium=variant_cta_only" + class="pocket-btn pocket-sign-up" + ><strong>Sign up</strong> - it’s free</a + > + <a + href="https://getpocket.com/explore?utm_source=firefox_reader&utm_medium=variant_cta_only" + class="pocket-btn pocket-discover-more" + ><strong>Discover more</strong></a + > + </div> + <button class="pocket-dismiss-cta" title="Dismiss"></button> + </div> + + <div class="pocket-cta-inner" id="pocket-cta-and-recs"> + <div class="pocket-recs-top"> + <div class="col"> + <header class="pocket-cta-header"> + Discover the most thought-provoking stories out there, curated by + Pocket. + </header> + <p class="pocket-cta-subhead"> + As part of the Firefox family, Pocket surfaces the best articles + out there—new perspectives, intriguing deep-dives, timeless + classics—and we do this with the same dedication to privacy you’ve + come to expect from Firefox and Mozilla. + </p> + </div> + <div class="col"> + <button class="pocket-collapse-recs"></button> + </div> + </div> + + <div class="pocket-recs"></div> + + <div class="pocket-sign-up-wrapper"> + <a + href="https://getpocket.com/explore?utm_source=firefox_reader&utm_medium=variant_cta_plus_recs" + class="pocket-btn pocket-sign-up" + ><strong>Discover more</strong></a + > + </div> + </div> + </div> + </body> +</html> diff --git a/toolkit/components/reader/jar.mn b/toolkit/components/reader/jar.mn new file mode 100644 index 0000000000..4b72136192 --- /dev/null +++ b/toolkit/components/reader/jar.mn @@ -0,0 +1,6 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +toolkit.jar: + content/global/reader/aboutReader.html (content/aboutReader.html) diff --git a/toolkit/components/reader/moz.build b/toolkit/components/reader/moz.build new file mode 100644 index 0000000000..e0283a28c5 --- /dev/null +++ b/toolkit/components/reader/moz.build @@ -0,0 +1,28 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +JAR_MANIFESTS += ["jar.mn"] + +EXTRA_JS_MODULES += [ + "AboutReader.sys.mjs", + "ReaderMode.sys.mjs", +] + +EXTRA_PP_JS_MODULES += [ + "Readerable.sys.mjs", +] + +EXTRA_JS_MODULES.reader = [ + "JSDOMParser.js", + "Readability.js", + "ReaderWorker.js", + "ReaderWorker.sys.mjs", +] + +BROWSER_CHROME_MANIFESTS += ["test/browser.ini"] + +with Files("**"): + BUG_COMPONENT = ("Toolkit", "Reader Mode") diff --git a/toolkit/components/reader/test/browser.ini b/toolkit/components/reader/test/browser.ini new file mode 100644 index 0000000000..ee8c6d2ad8 --- /dev/null +++ b/toolkit/components/reader/test/browser.ini @@ -0,0 +1,66 @@ +[DEFAULT] +support-files = head.js +[browser_localfile_readerMode.js] +support-files = + readerModeArticle.html +[browser_readerMode.js] +support-files = + readerModeNonArticle.html + readerModeArticle.html + readerModeArticleHiddenNodes.html +skip-if = + os == "win" && os_version == "6.1" # Skip on Azure - frequent failure +[browser_readerMode_bc_reuse.js] +support-files = + readerModeArticle.html +[browser_readerMode_cached.js] +support-files = + readerModeRandom.sjs +[browser_readerMode_colorSchemePref.js] +support-files = + readerModeArticle.html +[browser_readerMode_hidden_nodes.js] +support-files = + readerModeArticleHiddenNodes.html +[browser_readerMode_menu.js] +support-files = + readerModeArticleShort.html +[browser_readerMode_pocket.js] +support-files = + readerModeArticleShort.html + readerModeArticleMedium.html +[browser_readerMode_refresh.js] +support-files = + readerModeArticleShort.html + readerModeArticleTextPlain.txt +skip-if = + os == "win" && os_version == "6.1" # Skip on Azure - frequent failure +[browser_readerMode_samesite_cookie_redirect.js] +support-files = + getCookies.sjs + setSameSiteCookie.html + setSameSiteCookie.html^headers^ +[browser_readerMode_with_anchor.js] +support-files = + readerModeArticle.html +[browser_bug1124271_readerModePinnedTab.js] +support-files = + readerModeArticle.html +[browser_bug1453818_samesite_cookie.js] +support-files = + getCookies.sjs + linkToGetCookies.html + setSameSiteCookie.html + setSameSiteCookie.html^headers^ +[browser_readerMode_readingTime.js] +support-files = + readerModeArticle.html + readerModeArticleShort.html + readerModeArticleMedium.html +[browser_readerMode_remoteType.js] +support-files = + readerModeArticleShort.html +[browser_drag_url_readerMode.js] +support-files = + readerModeArticle.html + diff --git a/toolkit/components/reader/test/browser_bug1124271_readerModePinnedTab.js b/toolkit/components/reader/test/browser_bug1124271_readerModePinnedTab.js new file mode 100644 index 0000000000..346d503675 --- /dev/null +++ b/toolkit/components/reader/test/browser_bug1124271_readerModePinnedTab.js @@ -0,0 +1,53 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// Test that the reader mode button won't open in a new tab when clicked from a pinned tab + +const PREF = "reader.parse-on-load.enabled"; + +const TEST_PATH = getRootDirectory(gTestPath).replace( + "chrome://mochitests/content", + "https://example.com" +); + +var readerButton = document.getElementById("reader-mode-button"); + +add_task(async function () { + registerCleanupFunction(function () { + Services.prefs.clearUserPref(PREF); + while (gBrowser.tabs.length > 1) { + gBrowser.removeCurrentTab(); + } + }); + + // Enable the reader mode button. + Services.prefs.setBoolPref(PREF, true); + + let tab = (gBrowser.selectedTab = BrowserTestUtils.addTab(gBrowser)); + gBrowser.pinTab(tab); + + let initialTabsCount = gBrowser.tabs.length; + + // Point tab to a test page that is reader-able. + let url = TEST_PATH + "readerModeArticle.html"; + await promiseTabLoadEvent(tab, url); + await TestUtils.waitForCondition(() => !readerButton.hidden); + + readerButton.click(); + await promiseTabLoadEvent(tab); + + // Ensure no new tabs are opened when exiting reader mode in a pinned tab + is(gBrowser.tabs.length, initialTabsCount, "No additional tabs were opened."); + + let pageShownPromise = BrowserTestUtils.waitForContentEvent( + tab.linkedBrowser, + "pageshow" + ); + readerButton.click(); + await pageShownPromise; + // Ensure no new tabs are opened when exiting reader mode in a pinned tab + is(gBrowser.tabs.length, initialTabsCount, "No additional tabs were opened."); + + gBrowser.removeCurrentTab(); +}); diff --git a/toolkit/components/reader/test/browser_bug1453818_samesite_cookie.js b/toolkit/components/reader/test/browser_bug1453818_samesite_cookie.js new file mode 100644 index 0000000000..1fbfdeabfb --- /dev/null +++ b/toolkit/components/reader/test/browser_bug1453818_samesite_cookie.js @@ -0,0 +1,132 @@ +/* Any copyright is dedicated to the Public Domain. + * http://creativecommons.org/publicdomain/zero/1.0/ */ + +"use strict"; + +const TEST_ORIGIN1 = getRootDirectory(gTestPath).replace( + "chrome://mochitests/content", + "http://example.com" +); +const TEST_ORIGIN2 = getRootDirectory(gTestPath).replace( + "chrome://mochitests/content", + "http://example.org" +); + +async function clickLink(browser) { + info("Waiting for the page to load after clicking the link..."); + let pageLoaded = BrowserTestUtils.waitForContentEvent( + browser, + "DOMContentLoaded" + ); + await SpecialPowers.spawn(browser, [], async function () { + let link = content.document.getElementById("link"); + ok(link, "The link element was found."); + link.click(); + }); + await pageLoaded; +} + +async function checkCookiePresent(browser) { + await SpecialPowers.spawn(browser, [], async function () { + let cookieSpan = content.document.getElementById("cookieSpan"); + ok(cookieSpan, "cookieSpan element should be in document"); + is( + cookieSpan.textContent, + "foo=bar", + "The SameSite cookie was sent correctly." + ); + }); +} + +async function checkCookie(browser) { + info("Check that the SameSite cookie was not sent."); + await SpecialPowers.spawn(browser, [], async function () { + let cookieSpan = content.document.getElementById("cookieSpan"); + ok(cookieSpan, "cookieSpan element should be in document"); + is( + cookieSpan.textContent, + "", + "The SameSite cookie was blocked correctly." + ); + }); +} + +async function runTest() { + await SpecialPowers.pushPrefEnv({ + set: [["reader.parse-on-load.enabled", true]], + }); + + info("Set a SameSite=strict cookie."); + await BrowserTestUtils.withNewTab( + TEST_ORIGIN1 + "setSameSiteCookie.html", + () => {} + ); + + info("Check that the cookie has been correctly set."); + await BrowserTestUtils.withNewTab( + TEST_ORIGIN1 + "getCookies.sjs", + async function (browser) { + await checkCookiePresent(browser); + } + ); + + info( + "Open a cross-origin page with a link to the domain that set the cookie." + ); + { + let browser; + let pageLoaded; + let tab = await BrowserTestUtils.openNewForegroundTab( + gBrowser, + () => { + let t = BrowserTestUtils.addTab( + gBrowser, + TEST_ORIGIN2 + "linkToGetCookies.html" + ); + gBrowser.selectedTab = t; + browser = gBrowser.selectedBrowser; + pageLoaded = BrowserTestUtils.waitForContentEvent( + browser, + "DOMContentLoaded" + ); + return t; + }, + false + ); + + info("Waiting for the page to load in normal mode..."); + await pageLoaded; + + await clickLink(browser); + await checkCookie(browser); + await BrowserTestUtils.removeTab(tab); + } + + info("Open the cross-origin page again."); + await BrowserTestUtils.withNewTab( + TEST_ORIGIN2 + "linkToGetCookies.html", + async function (browser) { + let pageShown = BrowserTestUtils.waitForContentEvent( + browser, + "AboutReaderContentReady" + ); + let readerButton = document.getElementById("reader-mode-button"); + ok(readerButton, "readerButton should be available"); + readerButton.click(); + + info("Waiting for the page to be displayed in reader mode..."); + await pageShown; + + await clickLink(browser); + await checkCookie(browser); + } + ); +} + +add_task(async function () { + await runTest(true); +}); + +add_task(async function () { + await runTest(false); +}); diff --git a/toolkit/components/reader/test/browser_drag_url_readerMode.js b/toolkit/components/reader/test/browser_drag_url_readerMode.js new file mode 100644 index 0000000000..e8a157c527 --- /dev/null +++ b/toolkit/components/reader/test/browser_drag_url_readerMode.js @@ -0,0 +1,54 @@ +/* Any copyright is dedicated to the Public Domain. + * http://creativecommons.org/publicdomain/zero/1.0/ */ + +"use strict"; + +const TEST_PATH = getRootDirectory(gTestPath).replace( + "chrome://mochitests/content", + "http://example.com" +); + +add_task(async function test_readerModeURLDrag() { + await BrowserTestUtils.withNewTab( + { + gBrowser, + url: TEST_PATH + "readerModeArticle.html", + }, + + async browser => { + let readerButton = document.getElementById("reader-mode-button"); + await TestUtils.waitForCondition( + () => !readerButton.hidden, + "Reader mode button should become visible" + ); + + is_element_visible( + readerButton, + "Reader mode button is present on a reader-able page" + ); + + // Switch page into reader mode. + let promiseTabLoad = BrowserTestUtils.browserLoaded(browser); + readerButton.click(); + await promiseTabLoad; + let urlbar = document.getElementById("urlbar-input"); + let readerUrl = gBrowser.selectedBrowser.currentURI.spec; + ok( + readerUrl.startsWith("about:reader"), + "about:reader loaded after clicking reader mode button" + ); + + let dataTran = new DataTransfer(); + let urlEvent = new DragEvent("dragstart", { dataTransfer: dataTran }); + let oldUrl = TEST_PATH + "readerModeArticle.html"; + let urlBarContainer = document.getElementById("urlbar-input-container"); + urlBarContainer.click(); + urlbar.dispatchEvent(urlEvent); + + let newUrl = urlEvent.dataTransfer.getData("text/plain"); + ok(!newUrl.includes("about:reader"), "URL does not contain about:reader"); + + Assert.equal(newUrl, oldUrl, "URL is the same"); + } + ); +}); diff --git a/toolkit/components/reader/test/browser_localfile_readerMode.js b/toolkit/components/reader/test/browser_localfile_readerMode.js new file mode 100644 index 0000000000..118e4bb23f --- /dev/null +++ b/toolkit/components/reader/test/browser_localfile_readerMode.js @@ -0,0 +1,54 @@ +/* Any copyright is dedicated to the Public Domain. + http://creativecommons.org/publicdomain/zero/1.0/ */ + +"use strict"; + +const TEST_BASE_URI = getResolvedURI(getRootDirectory(gTestPath)).spec; + +let readerButton = document.getElementById("reader-mode-button"); + +/** + * Reader mode should work on local files. + */ +add_task(async function test_readermode_available_for_local_files() { + await BrowserTestUtils.withNewTab( + TEST_BASE_URI + "readerModeArticle.html", + async function (browser) { + await TestUtils.waitForCondition( + () => !readerButton.hidden, + "Reader mode button should become visible" + ); + + is_element_visible( + readerButton, + "Reader mode button is present on a reader-able page" + ); + + // Switch page into reader mode. + let promiseTabLoad = BrowserTestUtils.browserLoaded(browser); + readerButton.click(); + await promiseTabLoad; + + let readerUrl = gBrowser.selectedBrowser.currentURI.spec; + ok( + readerUrl.startsWith("about:reader"), + "about:reader loaded after clicking reader mode button" + ); + is_element_visible( + readerButton, + "Reader mode button is present on about:reader" + ); + + is( + gURLBar.untrimmedValue, + TEST_BASE_URI + "readerModeArticle.html", + "gURLBar value is about:reader URL" + ); + is( + gURLBar.value, + gURLBar.untrimmedValue, + "gURLBar is displaying original article URL" + ); + } + ); +}); diff --git a/toolkit/components/reader/test/browser_readerMode.js b/toolkit/components/reader/test/browser_readerMode.js new file mode 100644 index 0000000000..9a1c11329b --- /dev/null +++ b/toolkit/components/reader/test/browser_readerMode.js @@ -0,0 +1,394 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/** + * Test that the reader mode button appears and works properly on + * reader-able content. + */ +const TEST_PREFS = [["reader.parse-on-load.enabled", true]]; + +const TEST_PATH = getRootDirectory(gTestPath).replace( + "chrome://mochitests/content", + "https://example.com" +); + +var readerButton = document.getElementById("reader-mode-button"); + +ChromeUtils.defineESModuleGetters(this, { + PlacesTestUtils: "resource://testing-common/PlacesTestUtils.sys.mjs", +}); + +add_task(async function test_reader_button() { + registerCleanupFunction(function () { + // Reset test prefs. + TEST_PREFS.forEach(([name, value]) => { + Services.prefs.clearUserPref(name); + }); + while (gBrowser.tabs.length > 1) { + gBrowser.removeCurrentTab(); + } + }); + + // Set required test prefs. + TEST_PREFS.forEach(([name, value]) => { + Services.prefs.setBoolPref(name, value); + }); + + let tab = (gBrowser.selectedTab = BrowserTestUtils.addTab(gBrowser)); + is_element_hidden( + readerButton, + "Reader mode button is not present on a new tab" + ); + ok( + !UITour.isInfoOnTarget(window, "readerMode-urlBar"), + "Info panel shouldn't appear without the reader mode button" + ); + + // Point tab to a test page that is reader-able. + let url = TEST_PATH + "readerModeArticle.html"; + // Set up favicon for testing. + let favicon = + "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAA" + + "AAAA6fptVAAAACklEQVQI12NgAAAAAgAB4iG8MwAAAABJRU5ErkJggg=="; + info("Adding visit so we can add favicon"); + await PlacesTestUtils.addVisits(new URL(url)); + info("Adding favicon"); + await PlacesTestUtils.addFavicons(new Map([[url, favicon]])); + info("Opening tab and waiting for reader mode button to show up"); + + await promiseTabLoadEvent(tab, url); + await TestUtils.waitForCondition(() => !readerButton.hidden); + + is_element_visible( + readerButton, + "Reader mode button is present on a reader-able page" + ); + + // Switch page into reader mode. + let promiseTabLoad = promiseTabLoadEvent(tab); + readerButton.click(); + await promiseTabLoad; + + let readerUrl = gBrowser.selectedBrowser.currentURI.spec; + ok( + readerUrl.startsWith("about:reader"), + "about:reader loaded after clicking reader mode button" + ); + is_element_visible( + readerButton, + "Reader mode button is present on about:reader" + ); + let iconEl = tab.iconImage; + await TestUtils.waitForCondition( + () => iconEl.getBoundingClientRect().width != 0 + ); + is_element_visible(iconEl, "Favicon should be visible"); + is(iconEl.src, favicon, "Correct favicon should be loaded"); + + is(gURLBar.untrimmedValue, url, "gURLBar value is about:reader URL"); + is(gURLBar.value, url, "gURLBar is displaying original article URL"); + + // Check selected value for URL bar + await new Promise((resolve, reject) => { + waitForClipboard( + url, + function () { + gURLBar.focus(); + gURLBar.select(); + goDoCommand("cmd_copy"); + }, + resolve, + reject + ); + }); + + info("Got correct URL when copying"); + + // Switch page back out of reader mode. + let promisePageShow = BrowserTestUtils.waitForContentEvent( + tab.linkedBrowser, + "pageshow", + false, + e => e.target.location.href != "about:blank" + ); + readerButton.click(); + await promisePageShow; + is( + gBrowser.selectedBrowser.currentURI.spec, + url, + "Back to the original page after clicking active reader mode button" + ); + ok( + gBrowser.selectedBrowser.canGoForward, + "Moved one step back in the session history." + ); + + let nonReadableUrl = TEST_PATH + "readerModeNonArticle.html"; + + // Load a new tab that is NOT reader-able. + let newTab = (gBrowser.selectedTab = BrowserTestUtils.addTab(gBrowser)); + await promiseTabLoadEvent(newTab, nonReadableUrl); + await TestUtils.waitForCondition(() => readerButton.hidden); + is_element_hidden( + readerButton, + "Reader mode button is not present on a non-reader-able page" + ); + + // Switch back to the original tab to make sure reader mode button is still visible. + gBrowser.removeCurrentTab(); + await TestUtils.waitForCondition(() => !readerButton.hidden); + is_element_visible( + readerButton, + "Reader mode button is present on a reader-able page" + ); + + // Load a new tab in reader mode that is NOT reader-able in the reader mode. + newTab = gBrowser.selectedTab = BrowserTestUtils.addTab(gBrowser); + let promiseAboutReaderError = BrowserTestUtils.waitForContentEvent( + newTab.linkedBrowser, + "AboutReaderContentError" + ); + await promiseTabLoadEvent(newTab, "about:reader?url=" + nonReadableUrl); + await promiseAboutReaderError; + await TestUtils.waitForCondition(() => !readerButton.hidden); + is_element_visible( + readerButton, + "Reader mode button is present on about:reader even in error state" + ); + + // Switch page back out of reader mode. + promisePageShow = BrowserTestUtils.waitForContentEvent( + newTab.linkedBrowser, + "pageshow", + false, + e => e.target.location.href != "about:blank" + ); + readerButton.click(); + await promisePageShow; + is( + gBrowser.selectedBrowser.currentURI.spec, + nonReadableUrl, + "Back to the original non-reader-able page after clicking active reader mode button" + ); + await TestUtils.waitForCondition(() => readerButton.hidden); + is_element_hidden( + readerButton, + "Reader mode button is not present on a non-reader-able page" + ); +}); + +add_task(async function test_getOriginalUrl() { + let { ReaderMode } = ChromeUtils.importESModule( + "resource://gre/modules/ReaderMode.sys.mjs" + ); + let url = "https://foo.com/article.html"; + + is( + ReaderMode.getOriginalUrl("about:reader?url=" + encodeURIComponent(url)), + url, + "Found original URL from encoded URL" + ); + is( + ReaderMode.getOriginalUrl("about:reader?foobar"), + null, + "Did not find original URL from malformed reader URL" + ); + is( + ReaderMode.getOriginalUrl(url), + null, + "Did not find original URL from non-reader URL" + ); + + let badUrl = "https://foo.com/?;$%^^"; + is( + ReaderMode.getOriginalUrl("about:reader?url=" + encodeURIComponent(badUrl)), + badUrl, + "Found original URL from encoded malformed URL" + ); + is( + ReaderMode.getOriginalUrl("about:reader?url=" + badUrl), + badUrl, + "Found original URL from non-encoded malformed URL" + ); +}); + +add_task(async function test_reader_view_element_attribute_transform() { + registerCleanupFunction(function () { + while (gBrowser.tabs.length > 1) { + gBrowser.removeCurrentTab(); + } + }); + + function observeAttribute(element, attributes, triggerFn, checkFn) { + return new Promise(resolve => { + let observer = new MutationObserver(mutations => { + for (let mu of mutations) { + if (element.getAttribute(mu.attributeName) !== mu.oldValue) { + if (checkFn()) { + resolve(); + observer.disconnect(); + return; + } + } + } + }); + + observer.observe(element, { + attributes: true, + attributeOldValue: true, + attributeFilter: Array.isArray(attributes) ? attributes : [attributes], + }); + + triggerFn(); + }); + } + + let menuitem = document.getElementById("menu_readerModeItem"); + let tab = await BrowserTestUtils.openNewForegroundTab(gBrowser); + is( + menuitem.hidden, + true, + "menuitem element should have the hidden attribute" + ); + + info("Navigate a reader-able page"); + function waitForNonBlankPage() { + return BrowserTestUtils.waitForContentEvent( + tab.linkedBrowser, + "pageshow", + false, + e => e.target.location.href != "about:blank" + ); + } + let waitForPageshow = waitForNonBlankPage(); + await observeAttribute( + menuitem, + "hidden", + () => { + let url = TEST_PATH + "readerModeArticle.html"; + BrowserTestUtils.loadURIString(tab.linkedBrowser, url); + }, + () => !menuitem.hidden + ); + is( + menuitem.hidden, + false, + "menuitem's hidden attribute should be false on a reader-able page" + ); + await waitForPageshow; + + info("Navigate a non-reader-able page"); + waitForPageshow = waitForNonBlankPage(); + await observeAttribute( + menuitem, + "hidden", + () => { + let url = TEST_PATH + "readerModeArticleHiddenNodes.html"; + BrowserTestUtils.loadURIString(tab.linkedBrowser, url); + }, + () => menuitem.hidden + ); + is( + menuitem.hidden, + true, + "menuitem's hidden attribute should be true on a non-reader-able page" + ); + await waitForPageshow; + + info("Navigate a reader-able page"); + waitForPageshow = waitForNonBlankPage(); + await observeAttribute( + menuitem, + "hidden", + () => { + let url = TEST_PATH + "readerModeArticle.html"; + BrowserTestUtils.loadURIString(tab.linkedBrowser, url); + }, + () => !menuitem.hidden + ); + is( + menuitem.hidden, + false, + "menuitem's hidden attribute should be false on a reader-able page" + ); + await waitForPageshow; + + info("Enter Reader Mode"); + waitForPageshow = waitForNonBlankPage(); + await observeAttribute( + readerButton, + "readeractive", + () => { + readerButton.click(); + }, + () => readerButton.getAttribute("readeractive") == "true" + ); + is( + readerButton.getAttribute("readeractive"), + "true", + "readerButton's readeractive attribute should be true when entering reader mode" + ); + await waitForPageshow; + + info("Exit Reader Mode"); + waitForPageshow = waitForNonBlankPage(); + await observeAttribute( + readerButton, + ["readeractive", "hidden"], + () => { + readerButton.click(); + }, + () => { + info( + `active: ${readerButton.getAttribute("readeractive")}; hidden: ${ + menuitem.hidden + }` + ); + return !readerButton.getAttribute("readeractive") && !menuitem.hidden; + } + ); + ok( + !readerButton.getAttribute("readeractive"), + "readerButton's readeractive attribute should be empty when reader mode is exited" + ); + ok(!menuitem.hidden, "menuitem should not be hidden."); + await waitForPageshow; + + info("Navigate a non-reader-able page"); + waitForPageshow = waitForNonBlankPage(); + await observeAttribute( + menuitem, + "hidden", + () => { + let url = TEST_PATH + "readerModeArticleHiddenNodes.html"; + BrowserTestUtils.loadURIString(tab.linkedBrowser, url); + }, + () => menuitem.hidden + ); + is( + menuitem.hidden, + true, + "menuitem's hidden attribute should be true on a non-reader-able page" + ); + await waitForPageshow; +}); + +add_task(async function test_reader_mode_lang() { + let url = TEST_PATH + "readerModeArticle.html"; + let tab = await BrowserTestUtils.openNewForegroundTab(gBrowser); + BrowserTestUtils.loadURIString(tab.linkedBrowser, url); + + await promiseTabLoadEvent(tab, url); + await TestUtils.waitForCondition(() => !readerButton.hidden); + + // Switch page into reader mode. + let promiseTabLoad = promiseTabLoadEvent(tab); + readerButton.click(); + await promiseTabLoad; + + await SpecialPowers.spawn(gBrowser.selectedBrowser, [], () => { + let container = content.document.querySelector(".container"); + is(container.lang, "en"); + }); +}); diff --git a/toolkit/components/reader/test/browser_readerMode_bc_reuse.js b/toolkit/components/reader/test/browser_readerMode_bc_reuse.js new file mode 100644 index 0000000000..9ac0e367ca --- /dev/null +++ b/toolkit/components/reader/test/browser_readerMode_bc_reuse.js @@ -0,0 +1,44 @@ +/* Any copyright is dedicated to the Public Domain. + http://creativecommons.org/publicdomain/zero/1.0/ */ + +"use strict"; + +const TEST_PATH = getRootDirectory(gTestPath).replace( + "chrome://mochitests/content", + "http://example.com" +); + +const TEST_URL = TEST_PATH + "readerModeArticle.html"; + +add_task(async function test_TODO() { + await BrowserTestUtils.withNewTab( + "data:text/html,<p>Opener", + async browser => { + let newTabPromise = BrowserTestUtils.waitForNewTab( + gBrowser, + TEST_URL, + true + ); + await SpecialPowers.spawn(browser, [TEST_URL], url => { + content.eval(`window.x = open("${url}", "_blank");`); + }); + let newTab = await newTabPromise; + + let readerButton = document.getElementById("reader-mode-button"); + await BrowserTestUtils.waitForMutationCondition( + readerButton, + { attributes: true }, + () => !readerButton.hidden + ); + let tabLoaded = promiseTabLoadEvent(newTab); + readerButton.click(); + await tabLoaded; + isnot( + newTab.linkedBrowser.browsingContext.group.id, + browser.browsingContext.group.id, + "BC should be in a different group now." + ); + BrowserTestUtils.removeTab(newTab); + } + ); +}); diff --git a/toolkit/components/reader/test/browser_readerMode_cached.js b/toolkit/components/reader/test/browser_readerMode_cached.js new file mode 100644 index 0000000000..7f36a15dbb --- /dev/null +++ b/toolkit/components/reader/test/browser_readerMode_cached.js @@ -0,0 +1,32 @@ +/* Any copyright is dedicated to the Public Domain. + * http://creativecommons.org/publicdomain/zero/1.0/ */ +"use strict"; + +// This test verifies that the article is properly using the cached data +// when switching into reader mode. The article produces a random number +// contained within it, so if the article gets reloaded instead of using +// the cached version, it would have a different value in it. +const URL = + "http://mochi.test:8888/browser/toolkit/components/reader/test/readerModeRandom.sjs"; + +add_task(async function () { + let tab = await BrowserTestUtils.openNewForegroundTab(gBrowser, URL); + + let randomNumber = await SpecialPowers.spawn(tab.linkedBrowser, [], () => { + return content.document.getElementById("rnd").textContent; + }); + + let promiseTabLoad = promiseTabLoadEvent(tab); + let readerButton = document.getElementById("reader-mode-button"); + readerButton.click(); + await promiseTabLoad; + await TestUtils.waitForCondition(() => !readerButton.hidden); + + let newRandomNumber = await SpecialPowers.spawn(tab.linkedBrowser, [], () => { + return content.document.getElementById("rnd").textContent; + }); + + is(randomNumber, newRandomNumber, "used the same value"); + + BrowserTestUtils.removeTab(tab); +}); diff --git a/toolkit/components/reader/test/browser_readerMode_colorSchemePref.js b/toolkit/components/reader/test/browser_readerMode_colorSchemePref.js new file mode 100644 index 0000000000..0bee6eaf05 --- /dev/null +++ b/toolkit/components/reader/test/browser_readerMode_colorSchemePref.js @@ -0,0 +1,67 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +"use strict"; + +const TEST_PATH = getRootDirectory(gTestPath).replace( + "chrome://mochitests/content", + "http://example.com" +); + +async function testColorScheme(aPref, aExpectation) { + // Set the browser content theme to light or dark. + Services.prefs.setIntPref("browser.theme.content-theme", aPref); + + // Reader Mode Color Scheme Preference must be manually set by the user, will + // default to "auto" initially. + Services.prefs.setCharPref("reader.color_scheme", aExpectation); + + let aBodyExpectation = aExpectation; + if (aBodyExpectation === "auto") { + aBodyExpectation = aPref === 1 ? "light" : "dark"; + } + + // Open a browser tab, enter reader mode, and test if we have the valid + // reader mode color scheme preference pre-selected. + await BrowserTestUtils.withNewTab( + TEST_PATH + "readerModeArticle.html", + async function (browser) { + let pageShownPromise = BrowserTestUtils.waitForContentEvent( + browser, + "AboutReaderContentReady" + ); + + let readerButton = document.getElementById("reader-mode-button"); + readerButton.click(); + await pageShownPromise; + + let colorScheme = Services.prefs.getCharPref("reader.color_scheme"); + + Assert.equal(colorScheme, aExpectation); + + await SpecialPowers.spawn(browser, [aBodyExpectation], expectation => { + let bodyClass = content.document.body.className; + ok( + bodyClass.includes(expectation), + "The body of the test document has the correct color scheme." + ); + }); + } + ); +} + +/** + * Test that opening reader mode maintains the correct color scheme preference + * until the user manually sets a different color scheme. + */ +add_task(async function () { + await testColorScheme(0, "auto"); + await testColorScheme(1, "auto"); + await testColorScheme(0, "light"); + await testColorScheme(1, "light"); + await testColorScheme(0, "dark"); + await testColorScheme(1, "dark"); + await testColorScheme(0, "sepia"); + await testColorScheme(1, "sepia"); +}); diff --git a/toolkit/components/reader/test/browser_readerMode_hidden_nodes.js b/toolkit/components/reader/test/browser_readerMode_hidden_nodes.js new file mode 100644 index 0000000000..38a5955f61 --- /dev/null +++ b/toolkit/components/reader/test/browser_readerMode_hidden_nodes.js @@ -0,0 +1,56 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/** + * Test that the reader mode button appears and works properly on + * reader-able content. + */ +const TEST_PREFS = [["reader.parse-on-load.enabled", true]]; + +const TEST_PATH = getRootDirectory(gTestPath).replace( + "chrome://mochitests/content", + "http://example.com" +); + +var readerButton = document.getElementById("reader-mode-button"); + +add_task(async function test_reader_button() { + registerCleanupFunction(function () { + // Reset test prefs. + TEST_PREFS.forEach(([name, value]) => { + Services.prefs.clearUserPref(name); + }); + while (gBrowser.tabs.length > 1) { + gBrowser.removeCurrentTab(); + } + }); + + // Set required test prefs. + TEST_PREFS.forEach(([name, value]) => { + Services.prefs.setBoolPref(name, value); + }); + + let tab = (gBrowser.selectedTab = BrowserTestUtils.addTab(gBrowser)); + is_element_hidden( + readerButton, + "Reader mode button is not present on a new tab" + ); + // Point tab to a test page that is not reader-able due to hidden nodes. + let url = TEST_PATH + "readerModeArticleHiddenNodes.html"; + let paintPromise = BrowserTestUtils.waitForContentEvent( + tab.linkedBrowser, + "MozAfterPaint", + false, + e => + e.originalTarget.location.href.endsWith("HiddenNodes.html") && + e.originalTarget.document.readyState == "complete" + ); + BrowserTestUtils.loadURIString(tab.linkedBrowser, url); + await paintPromise; + + is_element_hidden( + readerButton, + "Reader mode button is still not present on tab with unreadable content." + ); +}); diff --git a/toolkit/components/reader/test/browser_readerMode_menu.js b/toolkit/components/reader/test/browser_readerMode_menu.js new file mode 100644 index 0000000000..304fefa5d0 --- /dev/null +++ b/toolkit/components/reader/test/browser_readerMode_menu.js @@ -0,0 +1,69 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +"use strict"; + +const TEST_PATH = getRootDirectory(gTestPath).replace( + "chrome://mochitests/content", + "http://example.com" +); + +/** + * Test that the reader mode correctly calculates and displays the + * estimated reading time for a short article + */ +add_task(async function () { + await BrowserTestUtils.withNewTab( + TEST_PATH + "readerModeArticleShort.html", + async function (browser) { + let pageShownPromise = BrowserTestUtils.waitForContentEvent( + browser, + "AboutReaderContentReady" + ); + let readerButton = document.getElementById("reader-mode-button"); + readerButton.click(); + await pageShownPromise; + await SpecialPowers.spawn(browser, [], async function () { + function dispatchMouseEvent(win, target, eventName) { + let mouseEvent = new win.MouseEvent(eventName, { + view: win, + bubbles: true, + cancelable: true, + composed: true, + }); + target.dispatchEvent(mouseEvent); + } + + function simulateClick(target) { + dispatchMouseEvent(win, target, "mousedown"); + dispatchMouseEvent(win, target, "mouseup"); + dispatchMouseEvent(win, target, "click"); + } + + let doc = content.document; + let win = content.window; + let styleButton = doc.querySelector(".style-button"); + + let styleDropdown = doc.querySelector(".style-dropdown"); + ok(!styleDropdown.classList.contains("open"), "dropdown is closed"); + + simulateClick(styleButton); + ok(styleDropdown.classList.contains("open"), "dropdown is open"); + + // simulate clicking on the article title to close the dropdown + let title = doc.querySelector("h1"); + simulateClick(title); + ok(!styleDropdown.classList.contains("open"), "dropdown is closed"); + + // reopen the dropdown + simulateClick(styleButton); + ok(styleDropdown.classList.contains("open"), "dropdown is open"); + + // now click on the button again to close it + simulateClick(styleButton); + ok(!styleDropdown.classList.contains("open"), "dropdown is closed"); + }); + } + ); +}); diff --git a/toolkit/components/reader/test/browser_readerMode_pocket.js b/toolkit/components/reader/test/browser_readerMode_pocket.js new file mode 100644 index 0000000000..e68aa7f9c0 --- /dev/null +++ b/toolkit/components/reader/test/browser_readerMode_pocket.js @@ -0,0 +1,136 @@ +/* Any copyright is dedicated to the Public Domain. + * http://creativecommons.org/publicdomain/zero/1.0/ */ +"use strict"; + +// This test verifies that the Save To Pocket button appears in reader mode, +// and is toggled hidden and visible when pocket is disabled and enabled. + +const TEST_PATH = getRootDirectory(gTestPath).replace( + "chrome://mochitests/content", + "http://example.com" +); + +async function getPocketButtonsCount(browser) { + return SpecialPowers.spawn(browser, [], () => { + return content.document.getElementsByClassName("pocket-button").length; + }); +} + +add_task(async function () { + // set the pocket preference before beginning. + await SpecialPowers.pushPrefEnv({ + set: [["extensions.pocket.enabled", true]], + }); + + var readerButton = document.getElementById("reader-mode-button"); + + let tab1 = await BrowserTestUtils.openNewForegroundTab( + gBrowser, + TEST_PATH + "readerModeArticleShort.html" + ); + + let promiseTabLoad = promiseTabLoadEvent(tab1); + readerButton.click(); + await promiseTabLoad; + + let tab2 = await BrowserTestUtils.openNewForegroundTab( + gBrowser, + TEST_PATH + "readerModeArticleMedium.html" + ); + + promiseTabLoad = promiseTabLoadEvent(tab2); + readerButton.click(); + await promiseTabLoad; + + is( + await getPocketButtonsCount(tab1.linkedBrowser), + 1, + "tab 1 has a pocket button" + ); + is( + await getPocketButtonsCount(tab1.linkedBrowser), + 1, + "tab 2 has a pocket button" + ); + + // Turn off the pocket preference. The Save To Pocket buttons should disappear. + await SpecialPowers.pushPrefEnv({ + set: [["extensions.pocket.enabled", false]], + }); + + is( + await getPocketButtonsCount(tab1.linkedBrowser), + 0, + "tab 1 has no pocket button" + ); + is( + await getPocketButtonsCount(tab1.linkedBrowser), + 0, + "tab 2 has no pocket button" + ); + + // Turn on the pocket preference. The Save To Pocket buttons should reappear again. + await SpecialPowers.pushPrefEnv({ + set: [["extensions.pocket.enabled", true]], + }); + + is( + await getPocketButtonsCount(tab1.linkedBrowser), + 1, + "tab 1 has a pocket button again" + ); + is( + await getPocketButtonsCount(tab1.linkedBrowser), + 1, + "tab 2 has a pocket button again" + ); + + BrowserTestUtils.removeTab(tab1); + BrowserTestUtils.removeTab(tab2); +}); + +/** + * Test that the pocket button toggles the pocket popup successfully + */ +add_task(async function () { + await BrowserTestUtils.withNewTab( + TEST_PATH + "readerModeArticleShort.html", + async function (browser) { + let pageShownPromise = BrowserTestUtils.waitForContentEvent( + browser, + "AboutReaderContentReady" + ); + let readerButton = document.getElementById("reader-mode-button"); + readerButton.click(); + await pageShownPromise; + + await SpecialPowers.spawn(browser, [], async function () { + content.document.querySelector(".pocket-button").click(); + }); + let panel = gBrowser.selectedBrowser.ownerDocument.querySelector( + "#customizationui-widget-panel" + ); + await BrowserTestUtils.waitForMutationCondition( + panel, + { attributes: true }, + () => { + return BrowserTestUtils.is_visible(panel); + } + ); + ok(BrowserTestUtils.is_visible(panel), "Panel buttons are visible"); + + await SpecialPowers.spawn(browser, [], async function () { + content.document.querySelector(".pocket-button").click(); + }); + await BrowserTestUtils.waitForMutationCondition( + panel, + { attributes: true }, + () => { + return BrowserTestUtils.is_hidden(panel); + } + ); + + ok(BrowserTestUtils.is_hidden(panel), "Panel buttons are hidden"); + } + ); +}); diff --git a/toolkit/components/reader/test/browser_readerMode_readingTime.js b/toolkit/components/reader/test/browser_readerMode_readingTime.js new file mode 100644 index 0000000000..91631b6234 --- /dev/null +++ b/toolkit/components/reader/test/browser_readerMode_readingTime.js @@ -0,0 +1,101 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +"use strict"; + +const TEST_PATH = getRootDirectory(gTestPath).replace( + "chrome://mochitests/content", + "http://example.com" +); + +/** + * Test that the reader mode correctly calculates and displays the + * estimated reading time for a normal length article + */ +add_task(async function () { + await BrowserTestUtils.withNewTab( + TEST_PATH + "readerModeArticle.html", + async function (browser) { + let pageShownPromise = BrowserTestUtils.waitForContentEvent( + browser, + "AboutReaderContentReady" + ); + let readerButton = document.getElementById("reader-mode-button"); + readerButton.click(); + await pageShownPromise; + await SpecialPowers.spawn(browser, [], async function () { + // make sure there is a reading time on the page and that it displays the correct information + let readingTimeElement = content.document.querySelector( + ".reader-estimated-time" + ); + ok(readingTimeElement, "Reading time element should be in document"); + const args = JSON.parse(readingTimeElement.dataset.l10nArgs); + is(args.rangePlural, "other", "Reading time should be '9-12 minutes'"); + ok( + /\b9\b.*\b12\b/.test(args.range), + "Reading time should be '9-12 minutes'" + ); + }); + } + ); +}); + +/** + * Test that the reader mode correctly calculates and displays the + * estimated reading time for a short article + */ +add_task(async function () { + await BrowserTestUtils.withNewTab( + TEST_PATH + "readerModeArticleShort.html", + async function (browser) { + let pageShownPromise = BrowserTestUtils.waitForContentEvent( + browser, + "AboutReaderContentReady" + ); + let readerButton = document.getElementById("reader-mode-button"); + readerButton.click(); + await pageShownPromise; + await SpecialPowers.spawn(browser, [], async function () { + // make sure there is a reading time on the page and that it displays the correct information + let readingTimeElement = content.document.querySelector( + ".reader-estimated-time" + ); + ok(readingTimeElement, "Reading time element should be in document"); + const args = JSON.parse(readingTimeElement.dataset.l10nArgs); + is(args.rangePlural, "one", "Reading time should be '~1 minute'"); + ok(/\b1\b/.test(args.range), "Reading time should be '~1 minute'"); + }); + } + ); +}); + +/** + * Test that the reader mode correctly calculates and displays the + * estimated reading time for a medium article where a single number + * is displayed. + */ +add_task(async function () { + await BrowserTestUtils.withNewTab( + TEST_PATH + "readerModeArticleMedium.html", + async function (browser) { + let pageShownPromise = BrowserTestUtils.waitForContentEvent( + browser, + "AboutReaderContentReady" + ); + let readerButton = document.getElementById("reader-mode-button"); + readerButton.click(); + await pageShownPromise; + await SpecialPowers.spawn(browser, [], async function () { + // make sure there is a reading time on the page and that it displays the correct information + let readingTimeElement = content.document.querySelector( + ".reader-estimated-time" + ); + ok(readingTimeElement, "Reading time element should be in document"); + const args = JSON.parse(readingTimeElement.dataset.l10nArgs); + is(args.rangePlural, "other", "Reading time should be '~3 minutes'"); + ok(/\b3\b/.test(args.range), "Reading time should be '~3 minutes'"); + }); + } + ); +}); diff --git a/toolkit/components/reader/test/browser_readerMode_refresh.js b/toolkit/components/reader/test/browser_readerMode_refresh.js new file mode 100644 index 0000000000..00b4557f70 --- /dev/null +++ b/toolkit/components/reader/test/browser_readerMode_refresh.js @@ -0,0 +1,49 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +"use strict"; + +const TEST_PATH = getRootDirectory(gTestPath).replace( + "chrome://mochitests/content", + "http://example.com" +); + +async function testRefresh(url) { + // Open an article in a browser tab + await BrowserTestUtils.withNewTab(url, async function (browser) { + let pageShownPromise = BrowserTestUtils.waitForContentEvent( + browser, + "AboutReaderContentReady" + ); + + let readerButton = document.getElementById("reader-mode-button"); + let refreshButton = document.getElementById("reload-button"); + + // Enter Reader Mode + readerButton.click(); + await pageShownPromise; + + // Refresh the page + pageShownPromise = BrowserTestUtils.waitForContentEvent( + browser, + "AboutReaderContentReady" + ); + refreshButton.click(); + await pageShownPromise; + await SpecialPowers.spawn(browser, [], () => { + ok( + !content.document.documentElement.hasAttribute("data-is-error"), + "The data-is-error attribute is present when Reader Mode failed to load an article." + ); + }); + }); +} + +add_task(async function () { + // Testing a non-text/plain document + await testRefresh(TEST_PATH + "readerModeArticle.html"); + + // Testing a test/plain document + await testRefresh(TEST_PATH + "readerModeArticleTextPlain.txt"); +}); diff --git a/toolkit/components/reader/test/browser_readerMode_remoteType.js b/toolkit/components/reader/test/browser_readerMode_remoteType.js new file mode 100644 index 0000000000..690cb74338 --- /dev/null +++ b/toolkit/components/reader/test/browser_readerMode_remoteType.js @@ -0,0 +1,87 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +"use strict"; + +const TEST_PATH = getRootDirectory(gTestPath).replace( + "chrome://mochitests/content", + "http://example.com" +); + +const CROSS_SITE_TEST_PATH = getRootDirectory(gTestPath).replace( + "chrome://mochitests/content", + "http://example.org" +); + +/** + * Test that switching an article into readermode doesn't change its' remoteType. + * Test that the reader mode correctly calculates and displays the + * estimated reading time for a short article + */ +add_task(async function () { + info("opening readermode normally to ensure process doesn't change"); + let articleRemoteType; + let aboutReaderURL; + await BrowserTestUtils.withNewTab( + TEST_PATH + "readerModeArticleShort.html", + async function (browser) { + articleRemoteType = browser.remoteType; + + // Click on the readermode button to switch into reader mode, and get the + // URL for that reader mode. + let pageShownPromise = BrowserTestUtils.waitForContentEvent( + browser, + "AboutReaderContentReady" + ); + let readerButton = document.getElementById("reader-mode-button"); + readerButton.click(); + await pageShownPromise; + + aboutReaderURL = browser.documentURI.spec; + ok( + aboutReaderURL.startsWith("about:reader"), + "about:reader should have been opened" + ); + is( + browser.remoteType, + articleRemoteType, + "remote type should not have changed" + ); + } + ); + + info( + "opening new tab directly with about reader URL into correct remote type" + ); + await BrowserTestUtils.withNewTab(aboutReaderURL, async function (browser) { + is( + browser.remoteType, + articleRemoteType, + "Should have performed about:reader load in the correct remote type" + ); + }); + + info("navigating process into correct remote type"); + await BrowserTestUtils.withNewTab( + CROSS_SITE_TEST_PATH + "readerModeArticleShort.html", + async function (browser) { + if (SpecialPowers.useRemoteSubframes) { + isnot( + browser.remoteType, + articleRemoteType, + "Cross-site article should have different remote type with fission" + ); + } + + BrowserTestUtils.loadURIString(browser, aboutReaderURL); + await BrowserTestUtils.browserLoaded(browser); + + is( + browser.remoteType, + articleRemoteType, + "Should have switched into the correct remote type" + ); + } + ); +}); diff --git a/toolkit/components/reader/test/browser_readerMode_samesite_cookie_redirect.js b/toolkit/components/reader/test/browser_readerMode_samesite_cookie_redirect.js new file mode 100644 index 0000000000..caa152360c --- /dev/null +++ b/toolkit/components/reader/test/browser_readerMode_samesite_cookie_redirect.js @@ -0,0 +1,50 @@ +/* Any copyright is dedicated to the Public Domain. + http://creativecommons.org/publicdomain/zero/1.0/ */ + +"use strict"; + +const TEST_ORIGIN = getRootDirectory(gTestPath).replace( + "chrome://mochitests/content", + "http://example.com" +); + +const { HttpServer } = ChromeUtils.import("resource://testing-common/httpd.js"); + +add_task(async function test_ss_cookie_redirect() { + // Set the samesite cookie + let tab = await BrowserTestUtils.openNewForegroundTab( + gBrowser, + TEST_ORIGIN + "setSameSiteCookie.html" + ); + BrowserTestUtils.removeTab(tab); + + let server = new HttpServer(); + server.start(-1); + server.registerPathHandler("/foo", (request, response) => { + response.setStatusLine(request.httpVersion, 302, "Found"); + response.setHeader("Location", TEST_ORIGIN + "getCookies.sjs"); + }); + registerCleanupFunction(() => server.stop()); + const { primaryPort, primaryHost } = server.identity; + const serverURL = `http://${primaryHost}:${primaryPort}/foo`; + + // Now open `getCookies.sjs` but via a redirect: + await BrowserTestUtils.withNewTab("about:blank", async browser => { + let loaded = BrowserTestUtils.waitForContentEvent( + browser, + "AboutReaderContentReady" + ); + await BrowserTestUtils.loadURIString( + browser, + "about:reader?url=" + encodeURIComponent(serverURL) + ); + await loaded; + await SpecialPowers.spawn(browser, [], () => { + is( + content.document.getElementById("cookieSpan").textContent, + "", + "Shouldn't get cookies." + ); + }); + }); +}); diff --git a/toolkit/components/reader/test/browser_readerMode_with_anchor.js b/toolkit/components/reader/test/browser_readerMode_with_anchor.js new file mode 100644 index 0000000000..229daaed9d --- /dev/null +++ b/toolkit/components/reader/test/browser_readerMode_with_anchor.js @@ -0,0 +1,89 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +"use strict"; + +const TEST_PATH = getRootDirectory(gTestPath).replace( + "chrome://mochitests/content", + "http://example.com" +); + +add_task(async function test_loading_withHash() { + await BrowserTestUtils.withNewTab( + TEST_PATH + "readerModeArticle.html#foo", + async function (browser) { + let pageShownPromise = BrowserTestUtils.waitForContentEvent( + browser, + "AboutReaderContentReady" + ); + let readerButton = document.getElementById("reader-mode-button"); + readerButton.click(); + await pageShownPromise; + await SpecialPowers.spawn(browser, [], async function () { + let foo = content.document.getElementById("foo"); + ok(foo, "foo element should be in document"); + let { scrollTop } = content.document.documentElement; + if (scrollTop == 0) { + await ContentTaskUtils.waitForEvent(content.document, "scroll"); + ({ scrollTop } = content.document.documentElement); + } + let { offsetTop } = foo; + Assert.lessOrEqual( + Math.abs(scrollTop - offsetTop), + 1, + `scrollTop (${scrollTop}) should be within 1 CSS pixel of offsetTop (${offsetTop})` + ); + }); + } + ); +}); + +add_task(async function test_loading_withoutHash() { + await BrowserTestUtils.withNewTab( + TEST_PATH + "readerModeArticle.html", + async function (browser) { + let pageShownPromise = BrowserTestUtils.waitForContentEvent( + browser, + "AboutReaderContentReady" + ); + let pageLoadedPromise = BrowserTestUtils.waitForContentEvent( + browser, + "load", + true + ); + let readerButton = document.getElementById("reader-mode-button"); + readerButton.click(); + await Promise.all([pageShownPromise, pageLoadedPromise]); + await SpecialPowers.spawn(gBrowser.selectedBrowser, [], async () => { + Assert.equal( + content.document.documentElement.scrollTop, + 0, + "scrollTop should be 0" + ); + }); + let scrollEventPromise = BrowserTestUtils.waitForContentEvent( + browser, + "scroll", + true + ); + await BrowserTestUtils.synthesizeMouseAtCenter( + "#foo-anchor", + {}, + browser + ); + await scrollEventPromise; + await SpecialPowers.spawn(browser, [], async function () { + let foo = content.document.getElementById("foo"); + ok(foo, "foo element should be in document"); + let { scrollTop } = content.document.documentElement; + let { offsetTop } = foo; + Assert.lessOrEqual( + Math.abs(scrollTop - offsetTop), + 1, + `scrollTop (${scrollTop}) should be within 1 CSS pixel of offsetTop (${offsetTop})` + ); + }); + } + ); +}); diff --git a/toolkit/components/reader/test/getCookies.sjs b/toolkit/components/reader/test/getCookies.sjs new file mode 100644 index 0000000000..02e29fd877 --- /dev/null +++ b/toolkit/components/reader/test/getCookies.sjs @@ -0,0 +1,16 @@ +function handleRequest(request, response) { + const cookies = request.hasHeader("Cookie") + ? request.getHeader("Cookie") + : ""; + response.write(` + <!DOCTYPE html> + <html> + <head> + <meta charset="utf-8"/> + </head> + <body> + <p>Cookie: <span id="cookieSpan">${cookies}</span></p> + </body> + </html> + `); +} diff --git a/toolkit/components/reader/test/head.js b/toolkit/components/reader/test/head.js new file mode 100644 index 0000000000..6a8e09ecb2 --- /dev/null +++ b/toolkit/components/reader/test/head.js @@ -0,0 +1,63 @@ +ChromeUtils.defineESModuleGetters(this, { + PromiseUtils: "resource://gre/modules/PromiseUtils.sys.mjs", +}); + +/* exported promiseTabLoadEvent, is_element_visible, is_element_hidden */ + +/** + * Waits for a load (or custom) event to finish in a given tab. If provided + * load an uri into the tab. + * + * @param tab + * The tab to load into. + * @param [optional] url + * The url to load, or the current url. + * @return {Promise} resolved when the event is handled. + * @resolves to the received event + * @rejects if a valid load event is not received within a meaningful interval + */ +function promiseTabLoadEvent(tab, url) { + let deferred = PromiseUtils.defer(); + info("Wait tab event: load"); + + function handle(loadedUrl) { + if (loadedUrl === "about:blank" || (url && loadedUrl !== url)) { + info(`Skipping spurious load event for ${loadedUrl}`); + return false; + } + + info("Tab event received: load"); + return true; + } + + // Create two promises: one resolved from the content process when the page + // loads and one that is rejected if we take too long to load the url. + let loaded = BrowserTestUtils.browserLoaded(tab.linkedBrowser, false, handle); + + let timeout = setTimeout(() => { + deferred.reject(new Error("Timed out while waiting for a 'load' event")); + }, 30000); + + loaded.then(() => { + clearTimeout(timeout); + deferred.resolve(); + }); + + if (url) { + BrowserTestUtils.loadURIString(tab.linkedBrowser, url); + } + + // Promise.all rejects if either promise rejects (i.e. if we time out) and + // if our loaded promise resolves before the timeout, then we resolve the + // timeout promise as well, causing the all promise to resolve. + return Promise.all([deferred.promise, loaded]); +} + +function is_element_visible(element, msg) { + isnot(element, null, "Element should not be null, when checking visibility"); + ok(BrowserTestUtils.is_visible(element), msg || "Element should be visible"); +} +function is_element_hidden(element, msg) { + isnot(element, null, "Element should not be null, when checking visibility"); + ok(BrowserTestUtils.is_hidden(element), msg || "Element should be hidden"); +} diff --git a/toolkit/components/reader/test/linkToGetCookies.html b/toolkit/components/reader/test/linkToGetCookies.html new file mode 100644 index 0000000000..341046a21d --- /dev/null +++ b/toolkit/components/reader/test/linkToGetCookies.html @@ -0,0 +1,13 @@ +<!DOCTYPE html> +<html> + <head> + <meta charset="utf-8"> + </head> + <body> + <article> + <p>Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec a diam lectus. Sed sit amet ipsum mauris. Maecenas congue ligula ac quam viverra nec consectetur ante hendrerit. Donec et mollis dolor. Praesent et diam eget libero egestas mattis sit amet vitae augue. Nam tincidunt congue enim, ut porta lorem lacinia consectetur. Donec ut libero sed arcu vehicula ultricies a non tortor. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aenean ut gravida lorem. Ut turpis felis, pulvinar a semper sed, adipiscing id dolor. Pellentesque auctor nisi id magna consequat sagittis. Curabitur dapibus enim sit amet elit pharetra tincidunt feugiat nisl imperdiet. Ut convallis libero in urna ultrices accumsan. Donec sed odio eros. Donec viverra mi quis quam pulvinar at malesuada arcu rhoncus. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. In rutrum accumsan ultricies. Mauris vitae nisi at sem facilisis semper ac in est.</p> + + <p><a href="http://example.com/browser/toolkit/components/reader/test/getCookies.sjs" id="link">Cross-origin link to getCookies.html</a></p> + </article> + </body> +</html> diff --git a/toolkit/components/reader/test/readerModeArticle.html b/toolkit/components/reader/test/readerModeArticle.html new file mode 100644 index 0000000000..a0f1c64da0 --- /dev/null +++ b/toolkit/components/reader/test/readerModeArticle.html @@ -0,0 +1,28 @@ +<!DOCTYPE html> +<html lang="en"> +<head> +<title>Article title</title> +<meta name="description" content="This is the article description." /> +</head> +<body> +<header>Site header</header> +<div> +<h1>Article title</h1> +<ul> + <li><a id="foo-anchor" href="#foo">by John Doe</a></li> +</ul> +<h2 class="author">by Jane Doe</h2> +<p>Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec a diam lectus. Sed sit amet ipsum mauris. Maecenas congue ligula ac quam viverra nec consectetur ante hendrerit. Donec et mollis dolor. Praesent et diam eget libero egestas mattis sit amet vitae augue. Nam tincidunt congue enim, ut porta lorem lacinia consectetur. Donec ut libero sed arcu vehicula ultricies a non tortor. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aenean ut gravida lorem. Ut turpis felis, pulvinar a semper sed, adipiscing id dolor. Pellentesque auctor nisi id magna consequat sagittis. Curabitur dapibus enim sit amet elit pharetra tincidunt feugiat nisl imperdiet. Ut convallis libero in urna ultrices accumsan. Donec sed odio eros. Donec viverra mi quis quam pulvinar at malesuada arcu rhoncus. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. In rutrum accumsan ultricies. Mauris vitae nisi at sem facilisis semper ac in est.</p> +<p>Vivamus fermentum semper porta. Nunc diam velit, adipiscing ut tristique vitae, sagittis vel odio. Maecenas convallis ullamcorper ultricies. Curabitur ornare, ligula semper consectetur sagittis, nisi diam iaculis velit, id fringilla sem nunc vel mi. Nam dictum, odio nec pretium volutpat, arcu ante placerat erat, non tristique elit urna et turpis. Quisque mi metus, ornare sit amet fermentum et, tincidunt et orci. Fusce eget orci a orci congue vestibulum. Ut dolor diam, elementum et vestibulum eu, porttitor vel elit. Curabitur venenatis pulvinar tellus gravida ornare. Sed et erat faucibus nunc euismod ultricies ut id justo. Nullam cursus suscipit nisi, et ultrices justo sodales nec. Fusce venenatis facilisis lectus ac semper. Aliquam at massa ipsum. Quisque bibendum purus convallis nulla ultrices ultricies. Nullam aliquam, mi eu aliquam tincidunt, purus velit laoreet tortor, viverra pretium nisi quam vitae mi. Fusce vel volutpat elit. Nam sagittis nisi dui.</p> +<p>Vivamus fermentum semper porta. Nunc diam velit, adipiscing ut tristique vitae, sagittis vel odio. Maecenas convallis ullamcorper ultricies. Curabitur ornare, ligula semper consectetur sagittis, nisi diam iaculis velit, id fringilla sem nunc vel mi. Nam dictum, odio nec pretium volutpat, arcu ante placerat erat, non tristique elit urna et turpis. Quisque mi metus, ornare sit amet fermentum et, tincidunt et orci. Fusce eget orci a orci congue vestibulum. Ut dolor diam, elementum et vestibulum eu, porttitor vel elit. Curabitur venenatis pulvinar tellus gravida ornare. Sed et erat faucibus nunc euismod ultricies ut id justo. Nullam cursus suscipit nisi, et ultrices justo sodales nec. Fusce venenatis facilisis lectus ac semper. Aliquam at massa ipsum. Quisque bibendum purus convallis nulla ultrices ultricies. Nullam aliquam, mi eu aliquam tincidunt, purus velit laoreet tortor, viverra pretium nisi quam vitae mi. Fusce vel volutpat elit. Nam sagittis nisi dui.</p> +<p>Vivamus fermentum semper porta. Nunc diam velit, adipiscing ut tristique vitae, sagittis vel odio. Maecenas convallis ullamcorper ultricies. Curabitur ornare, ligula semper consectetur sagittis, nisi diam iaculis velit, id fringilla sem nunc vel mi. Nam dictum, odio nec pretium volutpat, arcu ante placerat erat, non tristique elit urna et turpis. Quisque mi metus, ornare sit amet fermentum et, tincidunt et orci. Fusce eget orci a orci congue vestibulum. Ut dolor diam, elementum et vestibulum eu, porttitor vel elit. Curabitur venenatis pulvinar tellus gravida ornare. Sed et erat faucibus nunc euismod ultricies ut id justo. Nullam cursus suscipit nisi, et ultrices justo sodales nec. Fusce venenatis facilisis lectus ac semper. Aliquam at massa ipsum. Quisque bibendum purus convallis nulla ultrices ultricies. Nullam aliquam, mi eu aliquam tincidunt, purus velit laoreet tortor, viverra pretium nisi quam vitae mi. Fusce vel volutpat elit. Nam sagittis nisi dui.</p> +<p>Vivamus fermentum semper porta. Nunc diam velit, adipiscing ut tristique vitae, sagittis vel odio. Maecenas convallis ullamcorper ultricies. Curabitur ornare, ligula semper consectetur sagittis, nisi diam iaculis velit, id fringilla sem nunc vel mi. Nam dictum, odio nec pretium volutpat, arcu ante placerat erat, non tristique elit urna et turpis. Quisque mi metus, ornare sit amet fermentum et, tincidunt et orci. Fusce eget orci a orci congue vestibulum. Ut dolor diam, elementum et vestibulum eu, porttitor vel elit. Curabitur venenatis pulvinar tellus gravida ornare. Sed et erat faucibus nunc euismod ultricies ut id justo. Nullam cursus suscipit nisi, et ultrices justo sodales nec. Fusce venenatis facilisis lectus ac semper. Aliquam at massa ipsum. Quisque bibendum purus convallis nulla ultrices ultricies. Nullam aliquam, mi eu aliquam tincidunt, purus velit laoreet tortor, viverra pretium nisi quam vitae mi. Fusce vel volutpat elit. Nam sagittis nisi dui.</p> +<p id="foo">by John Doe</p> +<p>Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec a diam lectus. Sed sit amet ipsum mauris. Maecenas congue ligula ac quam viverra nec consectetur ante hendrerit. Donec et mollis dolor. Praesent et diam eget libero egestas mattis sit amet vitae augue. Nam tincidunt congue enim, ut porta lorem lacinia consectetur. Donec ut libero sed arcu vehicula ultricies a non tortor. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aenean ut gravida lorem. Ut turpis felis, pulvinar a semper sed, adipiscing id dolor. Pellentesque auctor nisi id magna consequat sagittis. Curabitur dapibus enim sit amet elit pharetra tincidunt feugiat nisl imperdiet. Ut convallis libero in urna ultrices accumsan. Donec sed odio eros. Donec viverra mi quis quam pulvinar at malesuada arcu rhoncus. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. In rutrum accumsan ultricies. Mauris vitae nisi at sem facilisis semper ac in est.</p> +<p>Vivamus fermentum semper porta. Nunc diam velit, adipiscing ut tristique vitae, sagittis vel odio. Maecenas convallis ullamcorper ultricies. Curabitur ornare, ligula semper consectetur sagittis, nisi diam iaculis velit, id fringilla sem nunc vel mi. Nam dictum, odio nec pretium volutpat, arcu ante placerat erat, non tristique elit urna et turpis. Quisque mi metus, ornare sit amet fermentum et, tincidunt et orci. Fusce eget orci a orci congue vestibulum. Ut dolor diam, elementum et vestibulum eu, porttitor vel elit. Curabitur venenatis pulvinar tellus gravida ornare. Sed et erat faucibus nunc euismod ultricies ut id justo. Nullam cursus suscipit nisi, et ultrices justo sodales nec. Fusce venenatis facilisis lectus ac semper. Aliquam at massa ipsum. Quisque bibendum purus convallis nulla ultrices ultricies. Nullam aliquam, mi eu aliquam tincidunt, purus velit laoreet tortor, viverra pretium nisi quam vitae mi. Fusce vel volutpat elit. Nam sagittis nisi dui.</p> +<p>Vivamus fermentum semper porta. Nunc diam velit, adipiscing ut tristique vitae, sagittis vel odio. Maecenas convallis ullamcorper ultricies. Curabitur ornare, ligula semper consectetur sagittis, nisi diam iaculis velit, id fringilla sem nunc vel mi. Nam dictum, odio nec pretium volutpat, arcu ante placerat erat, non tristique elit urna et turpis. Quisque mi metus, ornare sit amet fermentum et, tincidunt et orci. Fusce eget orci a orci congue vestibulum. Ut dolor diam, elementum et vestibulum eu, porttitor vel elit. Curabitur venenatis pulvinar tellus gravida ornare. Sed et erat faucibus nunc euismod ultricies ut id justo. Nullam cursus suscipit nisi, et ultrices justo sodales nec. Fusce venenatis facilisis lectus ac semper. Aliquam at massa ipsum. Quisque bibendum purus convallis nulla ultrices ultricies. Nullam aliquam, mi eu aliquam tincidunt, purus velit laoreet tortor, viverra pretium nisi quam vitae mi. Fusce vel volutpat elit. Nam sagittis nisi dui.</p> +<p>Vivamus fermentum semper porta. Nunc diam velit, adipiscing ut tristique vitae, sagittis vel odio. Maecenas convallis ullamcorper ultricies. Curabitur ornare, ligula semper consectetur sagittis, nisi diam iaculis velit, id fringilla sem nunc vel mi. Nam dictum, odio nec pretium volutpat, arcu ante placerat erat, non tristique elit urna et turpis. Quisque mi metus, ornare sit amet fermentum et, tincidunt et orci. Fusce eget orci a orci congue vestibulum. Ut dolor diam, elementum et vestibulum eu, porttitor vel elit. Curabitur venenatis pulvinar tellus gravida ornare. Sed et erat faucibus nunc euismod ultricies ut id justo. Nullam cursus suscipit nisi, et ultrices justo sodales nec. Fusce venenatis facilisis lectus ac semper. Aliquam at massa ipsum. Quisque bibendum purus convallis nulla ultrices ultricies. Nullam aliquam, mi eu aliquam tincidunt, purus velit laoreet tortor, viverra pretium nisi quam vitae mi. Fusce vel volutpat elit. Nam sagittis nisi dui.</p> +<p>Vivamus fermentum semper porta. Nunc diam velit, adipiscing ut tristique vitae, sagittis vel odio. Maecenas convallis ullamcorper ultricies. Curabitur ornare, ligula semper consectetur sagittis, nisi diam iaculis velit, id fringilla sem nunc vel mi. Nam dictum, odio nec pretium volutpat, arcu ante placerat erat, non tristique elit urna et turpis. Quisque mi metus, ornare sit amet fermentum et, tincidunt et orci. Fusce eget orci a orci congue vestibulum. Ut dolor diam, elementum et vestibulum eu, porttitor vel elit. Curabitur venenatis pulvinar tellus gravida ornare. Sed et erat faucibus nunc euismod ultricies ut id justo. Nullam cursus suscipit nisi, et ultrices justo sodales nec. Fusce venenatis facilisis lectus ac semper. Aliquam at massa ipsum. Quisque bibendum purus convallis nulla ultrices ultricies. Nullam aliquam, mi eu aliquam tincidunt, purus velit laoreet tortor, viverra pretium nisi quam vitae mi. Fusce vel volutpat elit. Nam sagittis nisi dui.</p> +</div> +</body> +</html> diff --git a/toolkit/components/reader/test/readerModeArticleHiddenNodes.html b/toolkit/components/reader/test/readerModeArticleHiddenNodes.html new file mode 100644 index 0000000000..92441b7978 --- /dev/null +++ b/toolkit/components/reader/test/readerModeArticleHiddenNodes.html @@ -0,0 +1,22 @@ +<!DOCTYPE html> +<html> +<head> +<title>Article title</title> +<meta name="description" content="This is the article description." /> +</head> +<body> +<style> +p { display: none } +</style> +<header>Site header</header> +<div> +<h1>Article title</h1> +<h2 class="author">by Jane Doe</h2> +<p>Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec a diam lectus. Sed sit amet ipsum mauris. Maecenas congue ligula ac quam viverra nec consectetur ante hendrerit. Donec et mollis dolor. Praesent et diam eget libero egestas mattis sit amet vitae augue. Nam tincidunt congue enim, ut porta lorem lacinia consectetur. Donec ut libero sed arcu vehicula ultricies a non tortor. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aenean ut gravida lorem. Ut turpis felis, pulvinar a semper sed, adipiscing id dolor. Pellentesque auctor nisi id magna consequat sagittis. Curabitur dapibus enim sit amet elit pharetra tincidunt feugiat nisl imperdiet. Ut convallis libero in urna ultrices accumsan. Donec sed odio eros. Donec viverra mi quis quam pulvinar at malesuada arcu rhoncus. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. In rutrum accumsan ultricies. Mauris vitae nisi at sem facilisis semper ac in est.</p> +<p>Vivamus fermentum semper porta. Nunc diam velit, adipiscing ut tristique vitae, sagittis vel odio. Maecenas convallis ullamcorper ultricies. Curabitur ornare, ligula semper consectetur sagittis, nisi diam iaculis velit, id fringilla sem nunc vel mi. Nam dictum, odio nec pretium volutpat, arcu ante placerat erat, non tristique elit urna et turpis. Quisque mi metus, ornare sit amet fermentum et, tincidunt et orci. Fusce eget orci a orci congue vestibulum. Ut dolor diam, elementum et vestibulum eu, porttitor vel elit. Curabitur venenatis pulvinar tellus gravida ornare. Sed et erat faucibus nunc euismod ultricies ut id justo. Nullam cursus suscipit nisi, et ultrices justo sodales nec. Fusce venenatis facilisis lectus ac semper. Aliquam at massa ipsum. Quisque bibendum purus convallis nulla ultrices ultricies. Nullam aliquam, mi eu aliquam tincidunt, purus velit laoreet tortor, viverra pretium nisi quam vitae mi. Fusce vel volutpat elit. Nam sagittis nisi dui.</p> +<p>Vivamus fermentum semper porta. Nunc diam velit, adipiscing ut tristique vitae, sagittis vel odio. Maecenas convallis ullamcorper ultricies. Curabitur ornare, ligula semper consectetur sagittis, nisi diam iaculis velit, id fringilla sem nunc vel mi. Nam dictum, odio nec pretium volutpat, arcu ante placerat erat, non tristique elit urna et turpis. Quisque mi metus, ornare sit amet fermentum et, tincidunt et orci. Fusce eget orci a orci congue vestibulum. Ut dolor diam, elementum et vestibulum eu, porttitor vel elit. Curabitur venenatis pulvinar tellus gravida ornare. Sed et erat faucibus nunc euismod ultricies ut id justo. Nullam cursus suscipit nisi, et ultrices justo sodales nec. Fusce venenatis facilisis lectus ac semper. Aliquam at massa ipsum. Quisque bibendum purus convallis nulla ultrices ultricies. Nullam aliquam, mi eu aliquam tincidunt, purus velit laoreet tortor, viverra pretium nisi quam vitae mi. Fusce vel volutpat elit. Nam sagittis nisi dui.</p> +<p>Vivamus fermentum semper porta. Nunc diam velit, adipiscing ut tristique vitae, sagittis vel odio. Maecenas convallis ullamcorper ultricies. Curabitur ornare, ligula semper consectetur sagittis, nisi diam iaculis velit, id fringilla sem nunc vel mi. Nam dictum, odio nec pretium volutpat, arcu ante placerat erat, non tristique elit urna et turpis. Quisque mi metus, ornare sit amet fermentum et, tincidunt et orci. Fusce eget orci a orci congue vestibulum. Ut dolor diam, elementum et vestibulum eu, porttitor vel elit. Curabitur venenatis pulvinar tellus gravida ornare. Sed et erat faucibus nunc euismod ultricies ut id justo. Nullam cursus suscipit nisi, et ultrices justo sodales nec. Fusce venenatis facilisis lectus ac semper. Aliquam at massa ipsum. Quisque bibendum purus convallis nulla ultrices ultricies. Nullam aliquam, mi eu aliquam tincidunt, purus velit laoreet tortor, viverra pretium nisi quam vitae mi. Fusce vel volutpat elit. Nam sagittis nisi dui.</p> +<p>Vivamus fermentum semper porta. Nunc diam velit, adipiscing ut tristique vitae, sagittis vel odio. Maecenas convallis ullamcorper ultricies. Curabitur ornare, ligula semper consectetur sagittis, nisi diam iaculis velit, id fringilla sem nunc vel mi. Nam dictum, odio nec pretium volutpat, arcu ante placerat erat, non tristique elit urna et turpis. Quisque mi metus, ornare sit amet fermentum et, tincidunt et orci. Fusce eget orci a orci congue vestibulum. Ut dolor diam, elementum et vestibulum eu, porttitor vel elit. Curabitur venenatis pulvinar tellus gravida ornare. Sed et erat faucibus nunc euismod ultricies ut id justo. Nullam cursus suscipit nisi, et ultrices justo sodales nec. Fusce venenatis facilisis lectus ac semper. Aliquam at massa ipsum. Quisque bibendum purus convallis nulla ultrices ultricies. Nullam aliquam, mi eu aliquam tincidunt, purus velit laoreet tortor, viverra pretium nisi quam vitae mi. Fusce vel volutpat elit. Nam sagittis nisi dui.</p> +</div> +</body> +</html> diff --git a/toolkit/components/reader/test/readerModeArticleMedium.html b/toolkit/components/reader/test/readerModeArticleMedium.html new file mode 100644 index 0000000000..70b172cf63 --- /dev/null +++ b/toolkit/components/reader/test/readerModeArticleMedium.html @@ -0,0 +1,16 @@ +<!DOCTYPE html> +<html> +<head> +<title>Article title</title> +<meta name="description" content="This is the article description." /> +</head> +<body> +<header>Site header</header> +<div> +<h1>Article title</h1> +<h2 class="author">by Jane Doe</h2> +<p>Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec a diam lectus. Sed sit amet ipsum mauris. Maecenas congue ligula ac quam viverra nec consectetur ante hendrerit. Donec et mollis dolor. Praesent et diam eget libero egestas mattis sit amet vitae augue. Nam tincidunt congue enim, ut porta lorem lacinia consectetur. Donec ut libero sed arcu vehicula ultricies a non tortor. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aenean ut gravida lorem. Ut turpis felis, pulvinar a semper sed, adipiscing id dolor. Pellentesque auctor nisi id magna consequat sagittis. Curabitur dapibus enim sit amet elit pharetra tincidunt feugiat nisl imperdiet. Ut convallis libero in urna ultrices accumsan. Donec sed odio eros. Donec viverra mi quis quam pulvinar at malesuada arcu rhoncus. Cum sociis natoque penatibus et magnis dis parturient montes, nascetu</p> +<p>Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec a diam lectus. Sed sit amet ipsum mauris. Maecenas congue ligula ac quam viverra nec consectetur ante hendrerit. Donec et mollis dolor. Praesent et diam eget libero egestas mattis sit amet vitae augue. Nam tincidunt congue enim, ut porta lorem lacinia consectetur. Donec ut libero sed arcu vehicula ultricies a non tortor. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aenean ut gravida lorem. Ut turpis felis, pulvinar a semper sed, adipiscing id dolor. Pellentesque auctor nisi id magna consequat sagittis. Curabitur dapibus enim sit amet elit pharetra tincidunt feugiat nisl imperdiet. Ut convallis libero in urna ultrices accumsan. Donec sed odio eros. Donec viverra mi quis quam pulvinar at malesuada arcu rhoncus. Cum sociis natoque penatibus et magnis dis parturient montes, nascetu</p> +<p>Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec a diam lectus. Sed sit amet ipsum mauris. Maecenas congue ligula ac quam viverra nec consectetur ante hendrerit. Donec et mollis dolor. Praesent et diam eget libero egestas mattis sit amet vitae augue. Nam tincidunt congue enim, ut porta lorem lacinia consectetur. Donec ut libero sed arcu vehicula ultricies a non tortor. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aenean ut gravida lorem. Ut turpis felis, pulvinar a semper sed, adipiscing id dolor. Pellentesque auctor nisi id magna consequat sagittis. Curabitur dapibus enim sit amet elit pharetra tincidunt feugiat nisl imperdiet. Ut convallis libero in urna ultrices accumsan. Donec sed odio eros. Donec viverra mi quis quam pulvinar at malesuada arcu rhoncus. Cum sociis natoque penatibus et magnis dis parturient montes, nascetu</p> +</body> +</html> diff --git a/toolkit/components/reader/test/readerModeArticleShort.html b/toolkit/components/reader/test/readerModeArticleShort.html new file mode 100644 index 0000000000..692471f27f --- /dev/null +++ b/toolkit/components/reader/test/readerModeArticleShort.html @@ -0,0 +1,14 @@ +<!DOCTYPE html> +<html> +<head> +<title>Article title</title> +<meta name="description" content="This is the article description." /> +</head> +<body> +<header>Site header</header> +<div> +<h1>Article title</h1> +<h2 class="author">by Jane Doe</h2> +<p>Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec a diam lectus. Sed sit amet ipsum mauris. Maecenas congue ligula ac quam viverra nec consectetur ante hendrerit. Donec et mollis dolor. Praesent et diam eget libero egestas mattis sit amet vitae augue. Nam tincidunt congue enim, ut porta lorem lacinia consectetur. Donec ut libero sed arcu vehicula ultricies a non tortor. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aenean ut gravida lorem. Ut turpis felis, pulvinar a semper sed, adipiscing id dolor. Pellentesque auctor nisi id magna consequat sagittis. Curabitur dapibus enim sit amet elit pharetra tincidunt feugiat nisl imperdiet. Ut convallis libero in urna ultrices accumsan. Donec sed odio eros. Donec viverra mi quis quam pulvinar at malesuada arcu rhoncus. Cum sociis natoque penatibus et magnis dis parturient montes, nascetu</p> +</body> +</html> diff --git a/toolkit/components/reader/test/readerModeArticleTextPlain.txt b/toolkit/components/reader/test/readerModeArticleTextPlain.txt new file mode 100644 index 0000000000..c5b7861b73 --- /dev/null +++ b/toolkit/components/reader/test/readerModeArticleTextPlain.txt @@ -0,0 +1,10 @@ + +Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Tortor id aliquet lectus proin nibh nisl condimentum. Eget magna fermentum iaculis eu non diam phasellus. Sed viverra tellus in hac habitasse platea dictumst. Quis commodo odio aenean sed. Diam vulputate ut pharetra sit amet aliquam id diam. Felis imperdiet proin fermentum leo vel orci. Diam vel quam elementum pulvinar. Vestibulum lectus mauris ultrices eros in cursus turpis massa. Sagittis vitae et leo duis ut diam. Quam elementum pulvinar etiam non quam lacus suspendisse faucibus interdum. At augue eget arcu dictum varius duis at consectetur. Bibendum enim facilisis gravida neque convallis a cras semper auctor. Suspendisse interdum consectetur libero id faucibus. Neque ornare aenean euismod elementum nisi. + +Lacus sed turpis tincidunt id aliquet. Euismod nisi porta lorem mollis. Sollicitudin aliquam ultrices sagittis orci. A diam sollicitudin tempor id eu nisl nunc. Molestie a iaculis at erat pellentesque adipiscing commodo elit. Tellus mauris a diam maecenas. Dolor morbi non arcu risus quis. Dictum non consectetur a erat nam at lectus. Convallis posuere morbi leo urna molestie. Blandit turpis cursus in hac habitasse platea dictumst quisque sagittis. Sed ullamcorper morbi tincidunt ornare massa eget egestas. Sit amet risus nullam eget felis eget nunc. Turpis in eu mi bibendum neque egestas congue. Accumsan in nisl nisi scelerisque eu ultrices vitae. Vel quam elementum pulvinar etiam non quam lacus. + +Erat velit scelerisque in dictum non consectetur a. Vulputate sapien nec sagittis aliquam malesuada bibendum. Odio facilisis mauris sit amet massa vitae tortor condimentum lacinia. Tempor nec feugiat nisl pretium. At urna condimentum mattis pellentesque id nibh tortor. Viverra tellus in hac habitasse platea dictumst. Turpis massa tincidunt dui ut ornare. Nunc id cursus metus aliquam eleifend mi. Etiam dignissim diam quis enim lobortis scelerisque fermentum. Aenean sed adipiscing diam donec adipiscing tristique risus nec feugiat. Vitae aliquet nec ullamcorper sit amet risus nullam eget felis. Quis hendrerit dolor magna eget est lorem ipsum dolor. Ultrices vitae auctor eu augue ut lectus. Curabitur gravida arcu ac tortor dignissim convallis. Justo laoreet sit amet cursus sit. Lorem ipsum dolor sit amet. Sed sed risus pretium quam vulputate dignissim suspendisse in. + +Egestas erat imperdiet sed euismod nisi porta lorem mollis. Pharetra magna ac placerat vestibulum lectus mauris ultrices eros in. Est ante in nibh mauris cursus mattis. Habitasse platea dictumst vestibulum rhoncus est pellentesque elit ullamcorper dignissim. Nunc aliquet bibendum enim facilisis gravida neque. Massa sapien faucibus et molestie. Sapien eget mi proin sed libero enim sed faucibus. Mauris a diam maecenas sed enim ut sem. Consectetur adipiscing elit duis tristique sollicitudin nibh sit. Sed arcu non odio euismod lacinia at. + +Ultricies mi quis hendrerit dolor. A erat nam at lectus urna duis convallis convallis tellus. Est sit amet facilisis magna etiam tempor orci. Porttitor massa id neque aliquam vestibulum. Lobortis feugiat vivamus at augue eget arcu dictum varius duis. Diam sit amet nisl suscipit adipiscing. Leo in vitae turpis massa. Netus et malesuada fames ac. Ac turpis egestas sed tempus urna et pharetra. Ut eu sem integer vitae justo. At erat pellentesque adipiscing commodo elit at. Consectetur purus ut faucibus pulvinar elementum integer enim. Cursus eget nunc scelerisque viverra mauris in aliquam sem. Aenean et tortor at risus viverra adipiscing at in. Platea dictumst vestibulum rhoncus est pellentesque elit ullamcorper dignissim cras. Tincidunt id aliquet risus feugiat in ante. Amet consectetur adipiscing elit pellentesque. Dignissim enim sit amet venenatis urna cursus eget nunc. Sit amet porttitor eget dolor morbi non. diff --git a/toolkit/components/reader/test/readerModeNonArticle.html b/toolkit/components/reader/test/readerModeNonArticle.html new file mode 100644 index 0000000000..e216af3c1f --- /dev/null +++ b/toolkit/components/reader/test/readerModeNonArticle.html @@ -0,0 +1,9 @@ +<!DOCTYPE html> +<html> +<head> +<title>Non article title</title> +<meta name="description" content="This is the non-article description." /> +</head> +<body> +</body> +</html> diff --git a/toolkit/components/reader/test/readerModeRandom.sjs b/toolkit/components/reader/test/readerModeRandom.sjs new file mode 100644 index 0000000000..f6bb15c06a --- /dev/null +++ b/toolkit/components/reader/test/readerModeRandom.sjs @@ -0,0 +1,23 @@ +// Generate a article ending in a piece of text with some random values in it. + +let readerModeText = `<!DOCTYPE html> +<html> +<head> +<title>Article title</title> +<meta name="description" content="This is the article description." /> +</head> +<body> +<header>Site header</header> +<div> +<h1>Article title</h1> +<h2 class="author">by Jane Doe</h2> +<p>Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec a diam lectus. Sed sit amet ipsum mauris. Maecenas congue ligula ac quam viverra nec consectetur ante hendrerit. Donec et mollis dolor. Praesent et diam eget libero egestas mattis sit amet vitae augue. Nam tincidunt congue enim, ut porta lorem lacinia consectetur. Donec ut libero sed arcu vehicula ultricies a non tortor. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aenean ut gravida lorem. Ut turpis felis, pulvinar a semper sed, adipiscing id dolor. Pellentesque auctor nisi id magna consequat sagittis. Curabitur dapibus enim sit amet elit pharetra tincidunt feugiat nisl imperdiet. Ut convallis libero in urna ultrices accumsan. Donec sed odio eros. Donec viverra mi quis quam pulvinar at malesuada arcu rhoncus. Cum sociis natoque penatibus et magnis dis parturient montes, nascetu</p> +`; + +function handleRequest(aRequest, aResponse) { + aResponse.setStatusLine(aRequest.httpVersion, 200); + aResponse.setHeader("Content-Type", "text/html", false); + aResponse.write( + readerModeText + "<p id='rnd'>" + Date.now() + "," + Math.random() + "</p>" + ); +} diff --git a/toolkit/components/reader/test/setSameSiteCookie.html b/toolkit/components/reader/test/setSameSiteCookie.html new file mode 100644 index 0000000000..67bb714922 --- /dev/null +++ b/toolkit/components/reader/test/setSameSiteCookie.html @@ -0,0 +1,9 @@ +<!DOCTYPE html> +<html> + <head> + <meta charset="utf-8"> + </head> + <body> + <p>This page just set a cookie with the <code>SameSite</code> attribute.</p> + </body> +</html> diff --git a/toolkit/components/reader/test/setSameSiteCookie.html^headers^ b/toolkit/components/reader/test/setSameSiteCookie.html^headers^ new file mode 100644 index 0000000000..c0229c93b6 --- /dev/null +++ b/toolkit/components/reader/test/setSameSiteCookie.html^headers^ @@ -0,0 +1 @@ +Set-Cookie: foo=bar; Path='/' ; SameSite=strict |