diff options
Diffstat (limited to 'toolkit/components/translations/content')
7 files changed, 4478 insertions, 0 deletions
diff --git a/toolkit/components/translations/content/translations-document.sys.mjs b/toolkit/components/translations/content/translations-document.sys.mjs new file mode 100644 index 0000000000..7f436575d8 --- /dev/null +++ b/toolkit/components/translations/content/translations-document.sys.mjs @@ -0,0 +1,2140 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +const lazy = {}; + +ChromeUtils.defineESModuleGetters(lazy, { + setTimeout: "resource://gre/modules/Timer.sys.mjs", + clearTimeout: "resource://gre/modules/Timer.sys.mjs", +}); + +ChromeUtils.defineLazyGetter(lazy, "console", () => { + return console.createInstance({ + maxLogLevelPref: "browser.translations.logLevel", + prefix: "Translations", + }); +}); + +/** + * Map the NodeFilter enums that are used by the TreeWalker into enums that make + * sense for determining the status of the nodes for the TranslationsDocument process. + * This aligns the meanings of the filtering for the translations process. + */ +const NodeStatus = { + // This node is ready to translate as is. + READY_TO_TRANSLATE: NodeFilter.FILTER_ACCEPT, + + // This node is a shadow host and needs to be subdivided further. + SHADOW_HOST: NodeFilter.FILTER_ACCEPT, + + // This node contains too many block elements and needs to be subdivided further. + SUBDIVIDE_FURTHER: NodeFilter.FILTER_SKIP, + + // This node should not be considered for translation. + NOT_TRANSLATABLE: NodeFilter.FILTER_REJECT, +}; + +/** + * @typedef {import("../translations").NodeVisibility} NodeVisibility + * @typedef {(message: string) => Promise<string>} TranslationFunction + */ + +/** + * Create a translation cache with a limit. It implements a "least recently used" strategy + * to remove old translations. After `#cacheExpirationMS` the cache will be emptied. + * This cache is owned statically by the TranslationsChild. This means that it will be + * re-used on page reloads if the origin of the site does not change. + */ +export class LRUCache { + /** @type {Map<string, string>} */ + #htmlCache = new Map(); + /** @type {Map<string, string>} */ + #textCache = new Map(); + /** @type {string} */ + #fromLanguage; + /** @type {string} */ + #toLanguage; + + /** + * This limit is used twice, once for Text translations, and once for HTML translations. + */ + #cacheLimit = 5_000; + + /** + * This cache will self-destruct after 10 minutes. + */ + #cacheExpirationMS = 10 * 60_000; + + /** + * @param {string} fromLanguage + * @param {string} toLanguage + */ + constructor(fromLanguage, toLanguage) { + this.#fromLanguage = fromLanguage; + this.#toLanguage = toLanguage; + } + + /** + * @param {boolean} isHTML + * @returns {boolean} + */ + #getCache(isHTML) { + return isHTML ? this.#htmlCache : this.#textCache; + } + + /** + * Get a translation if it exists from the cache, and move it to the end of the cache + * to keep it alive longer. + * + * @param {string} sourceString + * @param {boolean} isHTML + * @returns {string} + */ + get(sourceString, isHTML) { + const cache = this.#getCache(isHTML); + const targetString = cache.get(sourceString); + + if (targetString === undefined) { + return undefined; + } + + // Maps are ordered, move this item to the end of the list so it will stay + // alive longer. + cache.delete(sourceString); + cache.set(sourceString, targetString); + + this.keepAlive(); + + return targetString; + } + + /** + * @param {string} sourceString + * @param {string} targetString + * @param {boolean} isHTML + */ + set(sourceString, targetString, isHTML) { + const cache = this.#getCache(isHTML); + if (cache.size === this.#cacheLimit) { + // If the cache is at the limit, get the least recently used translation and + // remove it. This works since Maps have keys ordered by insertion order. + const key = cache.keys().next().value; + cache.delete(key); + } + cache.set(sourceString, targetString); + this.keepAlive(); + } + + /** + * @param {string} fromLanguage + * @param {string} toLanguage + */ + matches(fromLanguage, toLanguage) { + return ( + this.#fromLanguage === fromLanguage && this.#toLanguage === toLanguage + ); + } + + /** + * @type {number} + */ + #timeoutId = 0; + + #pendingKeepAlive = false; + + /** + * Clear out the cache on a timer. + */ + keepAlive() { + if (this.#timeoutId) { + lazy.clearTimeout(this.#timeoutId); + } + if (!this.#pendingKeepAlive) { + // Rather than continuously creating new functions in a tight loop, only schedule + // one keepAlive timeout on the next tick. + this.#pendingKeepAlive = true; + + lazy.setTimeout(() => { + this.#pendingKeepAlive = false; + this.#timeoutId = lazy.setTimeout(() => { + this.#htmlCache = new Map(); + this.#textCache = new Map(); + }, this.#cacheExpirationMS); + }, 0); + } + } +} + +/** + * How often the DOM is updated with translations, in milliseconds. + */ +const DOM_UPDATE_INTERVAL_MS = 50; + +/** + * These tags are excluded from translation. + */ +const EXCLUDED_TAGS = new Set([ + // The following are elements that semantically should not be translated. + "CODE", + "KBD", + "SAMP", + "VAR", + "ACRONYM", + + // The following are deprecated tags. + "DIR", + "APPLET", + + // The following are embedded elements, and are not supported (yet). + "MATH", + "EMBED", + "OBJECT", + "IFRAME", + + // This is an SVG tag that can contain arbitrary XML, ignore it. + "METADATA", + + // These are elements that are treated as opaque by Firefox which causes their + // innerHTML property to be just the raw text node behind it. Any text that is sent as + // HTML must be valid, and there is no guarantee that the innerHTML is valid. + "NOSCRIPT", + "NOEMBED", + "NOFRAMES", + + // The title is handled separately, and a HEAD tag should not be considered. + "HEAD", + + // These are not user-visible tags. + "STYLE", + "SCRIPT", + "TEMPLATE", + + // Textarea elements contain user content, which should not be translated. + "TEXTAREA", +]); + +/** + * Attributes to be translated + */ +const TRANSLATABLE_ATTRIBUTES = ["title", "placeholder"]; + +/** + * Selector to get all the attributes + * ["[attribute1]", "[attribute2]", ...]; + */ +const TRANSLATABLE_ATTRIBUTES_SELECTOR = TRANSLATABLE_ATTRIBUTES.map( + attribute => "[" + attribute + "]" +); + +/** + * Options used by the mutation observer + */ +const MUTATION_OBSERVER_OPTIONS = { + characterData: true, + childList: true, + subtree: true, + attributes: true, + attributeFilter: TRANSLATABLE_ATTRIBUTES, +}; + +/** + * This class manages the process of translating the DOM from one language to another. + * A translateHTML and a translateText function are injected into the constructor. This + * class is responsible for subdividing a Node into small enough pieces to where it + * contains a reasonable amount of text and inline elements for the translations engine + * to translate. Once a node has been identified as a small enough chunk, its innerHTML + * is read, and sent for translation. The async translation result comes back as an HTML + * string. The DOM node is updated with the new text and potentially changed DOM ordering. + * + * This class also handles mutations of the DOM and will translate nodes as they are added + * to the page, or the when the node's text is changed by content scripts. + */ +export class TranslationsDocument { + /** + * The BCP 47 language tag that is used on the page. + * + * @type {string} */ + documentLanguage; + + /** + * The timeout between the first translation received and the call to update the DOM + * with translations. + */ + #updateTimeout = null; + #attributeUpdateTimeout = null; + + /** + * The nodes that need translations. They are queued when the document tree is walked, + * and then they are dispatched for translation based on their visibility. The viewport + * nodes are given the highest priority. + * + * @type {Map<Node, NodeVisibility>} + */ + #queuedNodes = new Map(); + + /** + * The nodes that need Attribute translations. They are queued when the document tree is walked, + * and then they are dispatched for translation based on their visibility. The viewport + * nodes are given the highest priority. + * + * @type {Map<Node, { attributeList: string[], visibility: NodeVisibility }>} + */ + #queuedAttributeNodes = new Map(); + + /** + * The count of how many pending translations have been sent to the translations + * engine. + */ + #pendingTranslationsCount = 0; + + /** + * The list of nodes that need updating with the translated HTML. These are batched + * into an update. + * + * @type {Set<{ node: Node, translatedHTML: string }} + */ + #nodesWithTranslatedHTML = new Set(); + + /** + * The list of nodes that need updating with the translated Attribute HTML. These are batched + * into an update. + * + * @type {Set<{ node: Node, translation: string, attribute: string }} + */ + #nodesWithTranslatedAttributes = new Set(); + + /** + * The set of nodes that have been subdivided and processed for translation. They + * should not be submitted again unless their contents have been changed. + * + * @type {WeakSet<Node>} + */ + #processedNodes = new WeakSet(); + + /** + * All root elements we're trying to translate. This should be the `document.body` + * and the the `title` element. + * + * @type {Set<Node>} + */ + #rootNodes = new Set(); + + /** + * This promise gets resolved when the initial viewport translations are done. + * This is a key user-visible performance metric. It represents what the user + * actually sees. + * + * @type {Promise<void> | null} + */ + viewportTranslated = null; + + isDestroyed = false; + + /** + * Construct a new TranslationsDocument. It is tied to a specific Document and cannot + * be re-used. The translation functions are injected since this class shouldn't + * manage the life cycle of the translations engines. + * + * @param {Document} document + * @param {string} documentLanguage - The BCP 47 tag of the source language. + * @param {string} toLanguage - The BCP 47 tag of the destination language. + * @param {number} innerWindowId - This is used for better profiler marker reporting. + * @param {MessagePort} port - The port to the translations engine. + * @param {() => void} requestNewPort - Used when an engine times out and a new + * translation request comes in. + * @param {number} translationsStart + * @param {() => number} now + * @param {LRUCache} translationsCache + */ + constructor( + document, + documentLanguage, + toLanguage, + innerWindowId, + port, + requestNewPort, + translationsStart, + now, + translationsCache + ) { + /** + * The language of the document. If elements are found that do not match this language, + * then they are skipped. + * + * @type {string} + */ + this.documentLanguage = documentLanguage; + if (documentLanguage.length !== 2) { + throw new Error( + "Expected the document language to be a valid 2 letter BCP 47 language tag: " + + documentLanguage + ); + } + if (toLanguage.length !== 2) { + throw new Error( + "Expected the destination language to be a valid 2 letter BCP 47 language tag: " + + toLanguage + ); + } + + /** @type {QueuedTranslator} */ + this.translator = new QueuedTranslator(port, requestNewPort); + + /** @type {number} */ + this.innerWindowId = innerWindowId; + + /** @type {DOMParser} */ + this.domParser = new document.ownerGlobal.DOMParser(); + + /** @type {Document} */ + this.document = document; + + /** @type {LRUCache} */ + this.translationsCache = translationsCache; + + /** + * This selector runs to find child nodes that should be excluded. It should be + * basically the same implementation of `isExcludedNode`, but as a selector. + * + * @type {string} + */ + this.excludedNodeSelector = [ + // Use: [lang|=value] to match language codes. + // + // Per: https://developer.mozilla.org/en-US/docs/Web/CSS/Attribute_selectors + // + // The elements with an attribute name of attr whose value can be exactly + // value or can begin with value immediately followed by a hyphen, - (U+002D). + // It is often used for language subcode matches. + `[lang]:not([lang|="${this.documentLanguage}"])`, + `[translate=no]`, + `.notranslate`, + `[contenteditable="true"]`, + `[contenteditable=""]`, + [...EXCLUDED_TAGS].join(","), + ].join(","); + + this.observer = new document.ownerGlobal.MutationObserver(mutationsList => { + for (const mutation of mutationsList) { + switch (mutation.type) { + case "childList": + for (const node of mutation.addedNodes) { + this.#processedNodes.delete(node); + this.subdivideNodeForTranslations(node); + if (node.nodeType === Node.ELEMENT_NODE) { + this.translateAttributes(node); + } + } + break; + case "characterData": + this.#processedNodes.delete(mutation); + this.subdivideNodeForTranslations(mutation.target); + break; + case "attributes": + this.queueAttributeNodeForTranslation(mutation.target, [ + mutation.attributeName, + ]); + this.dispatchQueuedAttributeTranslations(); + break; + default: + break; + } + } + }); + + this.document.addEventListener( + "visibilitychange", + this.handleVisibilityChange + ); + + this.addRootElement(document.querySelector("title")); + this.addRootElement(document.body, true /* reportWordsInViewport */); + + this.viewportTranslated?.then(() => { + ChromeUtils.addProfilerMarker( + "TranslationsChild", + { innerWindowId, startTime: now() }, + "Viewport translations" + ); + ChromeUtils.addProfilerMarker( + "TranslationsChild", + { innerWindowId, startTime: translationsStart }, + "Time to first translation" + ); + }); + + document.documentElement.lang = toLanguage; + + lazy.console.log( + "Beginning to translate.", + // The defaultView may not be there on tests. + document.defaultView?.location.href + ); + } + + /** + * Queue a node for translation of attributes. + * @param {Node} node + * @param {Array<String>} + */ + queueAttributeNodeForTranslation(node, attributeList) { + /** @type {NodeVisibility} */ + let visibility = "out-of-viewport"; + if (isNodeHidden(node)) { + visibility = "hidden"; + } else if (isNodeInViewport(node)) { + visibility = "in-viewport"; + } + this.#queuedAttributeNodes.set(node, { attributeList, visibility }); + } + + /** + * Start and stop the translator as the page is shown. For instance, this will + * transition into "hidden" when the user tabs away from a document. + */ + handleVisibilityChange = () => { + if (this.document.visibilityState === "visible") { + this.translator.showPage(); + } else { + ChromeUtils.addProfilerMarker( + "Translations", + { innerWindowId: this.innerWindowId }, + "Pausing translations and discarding the port" + ); + this.translator.hidePage(); + } + }; + + /** + * Remove any dangling event handlers. + */ + destroy() { + this.isDestroyed = true; + this.translator.destroy(); + this.stopMutationObserver(); + this.document.removeEventListener( + "visibilitychange", + this.handleVisibilityChange + ); + } + + /** + * Helper function for adding a new root to the mutation + * observer. + * @param {Node} root + */ + observeNewRoot(root) { + this.#rootNodes.add(root); + this.observer.observe(root, MUTATION_OBSERVER_OPTIONS); + } + + /** + * This function finds all sub shadow trees of node and + * add the ShadowRoot of those subtrees to the mutation + * observer. + */ + addShadowRootsToObserver(node) { + const nodeIterator = node.ownerDocument.createTreeWalker( + node, + NodeFilter.SHOW_ELEMENT, + function (node) { + return node.openOrClosedShadowRoot + ? NodeFilter.FILTER_ACCEPT + : NodeFilter.FILTER_SKIP; + } + ); + let currentNode; + while ((currentNode = nodeIterator.nextNode())) { + // Only shadow hosts are accepted nodes + const shadowRoot = currentNode.openOrClosedShadowRoot; + this.observeNewRoot(shadowRoot); + this.addShadowRootsToObserver(shadowRoot); + } + } + + /** + * Add a new element to start translating. This root is tracked for mutations and + * kept up to date with translations. This will be the body element and title tag + * for the document. + * + * @param {Element} [node] + */ + addRootElement(node) { + if (!node) { + return; + } + + if (node.nodeType !== Node.ELEMENT_NODE) { + // This node is not an element, do not add it. + return; + } + + if (this.#rootNodes.has(node)) { + // Exclude nodes that are already targetted. + return; + } + + this.#rootNodes.add(node); + + let viewportNodeTranslations = this.subdivideNodeForTranslations(node); + let viewportAttributeTranslations = this.translateAttributes(node); + + if (!this.viewportTranslated) { + this.viewportTranslated = Promise.allSettled([ + ...(viewportNodeTranslations ?? []), + ...(viewportAttributeTranslations ?? []), + ]); + } + + this.observer.observe(node, MUTATION_OBSERVER_OPTIONS); + this.addShadowRootsToObserver(node); + } + + /** + * Add qualified nodes to queueNodeForTranslation by recursively walk + * through the DOM tree of node, including elements in Shadow DOM. + * + * @param {Element} [node] + */ + processSubdivide(node) { + const nodeIterator = node.ownerDocument.createTreeWalker( + node, + NodeFilter.SHOW_ELEMENT | NodeFilter.SHOW_TEXT, + this.determineTranslationStatusForUnprocessedNodes + ); + + // This iterator will contain each node that has been subdivided enough to + // be translated. + let currentNode; + while ((currentNode = nodeIterator.nextNode())) { + const shadowRoot = currentNode.openOrClosedShadowRoot; + if (shadowRoot) { + this.processSubdivide(shadowRoot); + } else { + this.queueNodeForTranslation(currentNode); + } + } + } + + /** + * Start walking down through a node's subtree and decide which nodes to queue for + * translation. This first node could be the root nodes of the DOM, such as the + * document body, or the title element, or it could be a mutation target. + * + * The nodes go through a process of subdivision until an appropriate sized chunk + * of inline text can be found. + * + * @param {Node} node + */ + subdivideNodeForTranslations(node) { + if (!this.#rootNodes.has(node)) { + // This is a non-root node, which means it came from a mutation observer. + // This new node could be a host element for shadow tree + const shadowRoot = node.openOrClosedShadowRoot; + if (shadowRoot && !this.#rootNodes.has(shadowRoot)) { + this.observeNewRoot(shadowRoot); + } else { + // Ensure that it is a valid node to translate by checking all of its ancestors. + for (let parent of getAncestorsIterator(node)) { + // Parent is ShadowRoot. We can stop here since this is + // the top ancestor of the shadow tree. + if (parent.containingShadowRoot == parent) { + break; + } + if ( + this.determineTranslationStatus(parent) === + NodeStatus.NOT_TRANSLATABLE + ) { + return; + } + } + } + } + + switch (this.determineTranslationStatusForUnprocessedNodes(node)) { + case NodeStatus.NOT_TRANSLATABLE: + // This node is rejected as it shouldn't be translated. + return; + + // SHADOW_HOST and READY_TO_TRANSLATE both map to FILTER_ACCEPT + case NodeStatus.SHADOW_HOST: + case NodeStatus.READY_TO_TRANSLATE: + const shadowRoot = node.openOrClosedShadowRoot; + if (shadowRoot) { + this.processSubdivide(shadowRoot); + } else { + // This node is ready for translating, and doesn't need to be subdivided. There + // is no reason to run the TreeWalker, it can be directly submitted for + // translation. + this.queueNodeForTranslation(node); + } + break; + + case NodeStatus.SUBDIVIDE_FURTHER: + // This node may be translatable, but it needs to be subdivided into smaller + // pieces. Create a TreeWalker to walk the subtree, and find the subtrees/nodes + // that contain enough inline elements to send to be translated. + this.processSubdivide(node); + break; + } + + if (node.nodeName === "BODY") { + this.reportWordsInViewport(); + } + this.dispatchQueuedTranslations(); + } + + /** + * Get all the nodes which have selected attributes + * from the node/document and queue them. + * Call the translate function on these nodes + * @param {Node} node + * @returns {Array<Promise<void>> | null} + */ + translateAttributes(node) { + const attributeList = getTranslatableAttributes(node); + if (attributeList.length) { + // Queue the root node if it has any attributes + // Because querySelectorAll searches only child nodes. + this.queueAttributeNodeForTranslation(node, attributeList); + } + // Get all attributes in child nodes at once + const nodesWithTranslatableAttributes = node.querySelectorAll( + TRANSLATABLE_ATTRIBUTES_SELECTOR + ); + for (const node of nodesWithTranslatableAttributes) { + const attributeList = getTranslatableAttributes(node); + this.queueAttributeNodeForTranslation(node, attributeList); + } + return this.dispatchQueuedAttributeTranslations(); + } + + /** + * Test whether this is an element we do not want to translate. These are things like + * <code> elements, elements with a different "lang" attribute, and elements that + * have a `translate=no` attribute. + * + * @param {Node} node + */ + isExcludedNode(node) { + // Property access be expensive, so destructure required properties so they are + // not accessed multiple times. + const { nodeType } = node; + + if (nodeType === Node.TEXT_NODE) { + // Text nodes are never excluded. + return false; + } + if (nodeType !== Node.ELEMENT_NODE) { + // Only elements and and text nodes should be considered. + return true; + } + + const { nodeName } = node; + + if ( + EXCLUDED_TAGS.has( + // SVG tags can be lowercased, so ensure everything is uppercased. + nodeName.toUpperCase() + ) + ) { + // This is an excluded tag. + return true; + } + + if (!this.matchesDocumentLanguage(node)) { + // Exclude nodes that don't match the fromLanguage. + return true; + } + + if (node.getAttribute("translate") === "no") { + // This element has a translate="no" attribute. + // https://developer.mozilla.org/en-US/docs/Web/HTML/Global_attributes/translate + return true; + } + + if (node.classList.contains("notranslate")) { + // Google Translate skips translations if the classList contains "notranslate" + // https://cloud.google.com/translate/troubleshooting + return true; + } + + if (node.isContentEditable) { + // This field is editable, and so exclude it similar to the way that form input + // fields are excluded. + return true; + } + + return false; + } + + /** + * Runs `determineTranslationStatus`, but only on unprocessed nodes. + * + * @param {Node} node + * @return {number} - One of the NodeStatus values. + */ + determineTranslationStatusForUnprocessedNodes = node => { + if (this.#processedNodes.has(node)) { + // Skip nodes that have already been processed. + return NodeStatus.NOT_TRANSLATABLE; + } + + return this.determineTranslationStatus(node); + }; + + /** + * Determines if a node should be submitted for translation, not translatable, or if + * it should be subdivided further. It doesn't check if the node has already been + * processed. + * + * The return result works as a TreeWalker NodeFilter as well. + * + * @param {Node} node + * @returns {number} - One of the `NodeStatus` values. See that object + * for documentation. These values match the filters for the TreeWalker. + * These values also work as a `NodeFilter` value. + */ + determineTranslationStatus(node) { + if (node.openOrClosedShadowRoot) { + return NodeStatus.SHADOW_HOST; + } + + if (isNodeQueued(node, this.#queuedNodes)) { + // This node or its parent was already queued, reject it. + return NodeStatus.NOT_TRANSLATABLE; + } + + if (this.isExcludedNode(node)) { + // This is an explicitly excluded node. + return NodeStatus.NOT_TRANSLATABLE; + } + + if (node.textContent.trim().length === 0) { + // Do not use subtrees that are empty of text. This textContent call is fairly + // expensive. + return !node.hasChildNodes() + ? NodeStatus.NOT_TRANSLATABLE + : NodeStatus.SUBDIVIDE_FURTHER; + } + + if (nodeNeedsSubdividing(node)) { + // Skip this node, and dig deeper into its tree to cut off smaller pieces + // to translate. It is presumed to be a wrapper of block elements. + return NodeStatus.SUBDIVIDE_FURTHER; + } + + if ( + containsExcludedNode(node, this.excludedNodeSelector) && + !hasTextNodes(node) + ) { + // Skip this node, and dig deeper into its tree to cut off smaller pieces + // to translate. + return NodeStatus.SUBDIVIDE_FURTHER; + } + + // This node can be treated as entire block to submit for translation. + return NodeStatus.READY_TO_TRANSLATE; + } + + /** + * Queue a node for translation. + * @param {Node} node + */ + queueNodeForTranslation(node) { + /** @type {NodeVisibility} */ + let visibility = "out-of-viewport"; + if (isNodeHidden(node)) { + visibility = "hidden"; + } else if (isNodeInViewport(node)) { + visibility = "in-viewport"; + } + + this.#queuedNodes.set(node, visibility); + } + + /** + * Submit the translations giving priority to nodes in the viewport. + * @returns {Array<Promise<void>> | null} + */ + dispatchQueuedTranslations() { + let inViewportCounts = 0; + let outOfViewportCounts = 0; + let hiddenCounts = 0; + + let inViewportTranslations = null; + if (!this.viewportTranslated) { + inViewportTranslations = []; + } + + for (const [node, visibility] of this.#queuedNodes) { + if (visibility === "in-viewport") { + inViewportCounts++; + const promise = this.submitTranslation(node); + if (inViewportTranslations) { + inViewportTranslations.push(promise); + } + } + } + for (const [node, visibility] of this.#queuedNodes) { + if (visibility === "out-of-viewport") { + outOfViewportCounts++; + this.submitTranslation(node); + } + } + for (const [node, visibility] of this.#queuedNodes) { + if (visibility === "hidden") { + hiddenCounts++; + this.submitTranslation(node); + } + } + + ChromeUtils.addProfilerMarker( + "Translations", + { innerWindowId: this.innerWindowId }, + `Translate ${this.#queuedNodes.size} nodes.\n\n` + + `In viewport: ${inViewportCounts}\n` + + `Out of viewport: ${outOfViewportCounts}\n` + + `Hidden: ${hiddenCounts}\n` + ); + + this.#queuedNodes.clear(); + return inViewportTranslations; + } + + /** + * Submit the Attribute translations giving priority to nodes in the viewport. + * @returns {Array<Promise<void>> | null} + */ + dispatchQueuedAttributeTranslations() { + let inViewportCounts = 0; + let outOfViewportCounts = 0; + let hiddenCounts = 0; + + let inViewportTranslations = null; + if (!this.viewportTranslated) { + inViewportTranslations = []; + } + // Submit the nodes with attrbutes to be translated. + for (const [node, { attributeList, visibility }] of this + .#queuedAttributeNodes) { + if (visibility === "in-viewport") { + inViewportCounts++; + const promise = this.submitAttributeTranslation(node, attributeList); + if (inViewportTranslations) { + inViewportTranslations.push(promise); + } + } + } + for (const [node, { attributeList, visibility }] of this + .#queuedAttributeNodes) { + if (visibility === "out-of-viewport") { + outOfViewportCounts++; + this.submitAttributeTranslation(node, attributeList); + } + } + for (const [node, { attributeList, visibility }] of this + .#queuedAttributeNodes) { + if (visibility === "hidden") { + hiddenCounts++; + this.submitAttributeTranslation(node, attributeList); + } + } + + ChromeUtils.addProfilerMarker( + "Attribute Translations", + { innerWindowId: this.innerWindowId }, + `Attribute Translate ${this.#queuedAttributeNodes.size} nodes.\n\n` + + `In viewport: ${inViewportCounts}\n` + + `Out of viewport: ${outOfViewportCounts}\n` + + `Hidden: ${hiddenCounts}\n` + ); + + this.#queuedAttributeNodes.clear(); + + return inViewportTranslations; + } + + /** + * Submit a node for Attribute translation to the translations engine. + * + * @param {Node} node + * @returns {Promise<void>} + */ + async submitAttributeTranslation(node, attributeList) { + if (node.nodeType === Node.ELEMENT_NODE) { + for (const attribute of attributeList) { + const text = node.getAttribute(attribute); + + if (text.trim().length === 0) { + continue; + } + const translation = await this.maybeTranslate( + node, + text, + false /*isHTML*/ + ); + if (translation != null) { + this.scheduleNodeUpdateWithTranslationAttribute( + node, + translation, + attribute + ); + } + } + } + } + + /** + * Schedule a node to be updated with a translation. + * + * @param {Node} node + * @param {string} translation + */ + scheduleNodeUpdateWithTranslationAttribute(node, translation, attribute) { + // Add the nodes to be populated with the next translation update. + this.#nodesWithTranslatedAttributes.add({ + node, + translation, + attribute, + }); + + if (this.#pendingTranslationsCount === 0) { + // No translations are pending, update the node. + this.updateNodesWithTranslationsAttributes(); + } else if (!this.#attributeUpdateTimeout) { + // Schedule an update. + this.#attributeUpdateTimeout = lazy.setTimeout( + this.updateNodesWithTranslationsAttributes.bind(this), + DOM_UPDATE_INTERVAL_MS + ); + } else { + // An update has been previously scheduled, do nothing here. + } + } + + /** + * This is called every `DOM_UPDATE_INTERVAL_MS` ms with translations + * for attributes in the nodes. + * + * This function is called asynchronously, so nodes may already be dead. Before + * accessing a node make sure and run `Cu.isDeadWrapper` to check that it is alive. + */ + updateNodesWithTranslationsAttributes() { + // Stop the mutations so that the updates won't trigger observations. + + this.pauseMutationObserverAndRun(() => { + for (const { node, translation, attribute } of this + .#nodesWithTranslatedAttributes) { + if (Cu.isDeadWrapper(node)) { + // The node is no longer alive. + ChromeUtils.addProfilerMarker( + "Translations", + { innerWindowId: this.innerWindowId }, + "Node is no long alive." + ); + continue; + } + // Update the attribute of the node with translated attribute + if (attribute) { + node.setAttribute(attribute, translation); + } + } + this.#nodesWithTranslatedAttributes.clear(); + this.#attributeUpdateTimeout = null; + }); + } + + /** + * Record how many words were in the viewport, as this is the most important + * user-visible translation content. + */ + reportWordsInViewport() { + if ( + // This promise gets created for the first dispatchQueuedTranslations + this.viewportTranslated || + this.#queuedNodes.size === 0 + ) { + return; + } + + // TODO(Bug 1814195) - Add telemetry. + // TODO(Bug 1820618) - This whitespace regex will not work in CJK-like languages. + // This requires a segmenter for a proper implementation. + + const whitespace = /\s+/; + let wordCount = 0; + for (const [node, visibility] of this.#queuedNodes) { + if (visibility === "in-viewport") { + wordCount += node.textContent.trim().split(whitespace).length; + } + } + + const message = wordCount + " words are in the viewport."; + lazy.console.log(message); + ChromeUtils.addProfilerMarker( + "Translations", + { innerWindowId: this.innerWindowId }, + message + ); + } + + /** + * Submit a node for translation to the translations engine. + * + * @param {Node} node + * @returns {Promise<void>} + */ + async submitTranslation(node) { + // Give each element an id that gets passed through the translation so it can be + // reunited later on. + if (node.nodeType === Node.ELEMENT_NODE) { + node.querySelectorAll("*").forEach((el, i) => { + el.dataset.mozTranslationsId = i; + }); + } + + /** @type {string} */ + let text; + /** @type {boolean} */ + let isHTML; + + if (node.nodeType === Node.ELEMENT_NODE) { + text = node.innerHTML; + isHTML = true; + } else { + text = node.textContent; + isHTML = false; + } + + if (text.trim().length === 0) { + return; + } + + // Mark this node as not to be translated again unless the contents are changed + // (which the observer will pick up on) + this.#processedNodes.add(node); + const translatedHTML = await this.maybeTranslate(node, text, isHTML); + if (translatedHTML != null) { + this.scheduleNodeUpdateWithTranslation(node, translatedHTML); + } + } + + /** + * A single function to update pendingTranslationsCount while + * calling the translate function + * @param {Node} node + * @param {string} text + * @prop {boolean} isHTML + * @returns {Promise<string | null>} + */ + async maybeTranslate(node, text, isHTML) { + this.#pendingTranslationsCount++; + try { + let translation = this.translationsCache.get(text, isHTML); + if (translation === undefined) { + translation = await this.translator.translate(node, text, isHTML); + this.translationsCache.set(text, translation, isHTML); + } + + return translation; + } catch (error) { + lazy.console.log("Translation failed", error); + } finally { + this.#pendingTranslationsCount--; + } + return null; + } + + /** + * Start the mutation observer, for instance after applying the translations to the DOM. + */ + startMutationObserver() { + if (Cu.isDeadWrapper(this.observer)) { + // This observer is no longer alive. + return; + } + for (const node of this.#rootNodes) { + if (Cu.isDeadWrapper(node)) { + // This node is no longer alive. + continue; + } + this.observer.observe(node, MUTATION_OBSERVER_OPTIONS); + } + } + + /** + * Stop the mutation observer, for instance to apply the translations to the DOM. + */ + stopMutationObserver() { + // Was the window already destroyed? + if (!Cu.isDeadWrapper(this.observer)) { + this.observer.disconnect(); + } + } + + /** + * This is called every `DOM_UPDATE_INTERVAL_MS` ms with translations for nodes. + * + * This function is called asynchronously, so nodes may already be dead. Before + * accessing a node make sure and run `Cu.isDeadWrapper` to check that it is alive. + */ + updateNodesWithTranslations() { + // Stop the mutations so that the updates won't trigger observations. + this.pauseMutationObserverAndRun(() => { + for (const { node, translatedHTML } of this.#nodesWithTranslatedHTML) { + if (Cu.isDeadWrapper(node)) { + // The node is no longer alive. + ChromeUtils.addProfilerMarker( + "Translations", + { innerWindowId: this.innerWindowId }, + "Node is no long alive." + ); + continue; + } + switch (node.nodeType) { + case Node.TEXT_NODE: { + if (translatedHTML.trim().length !== 0) { + // Only update the node if there is new text. + node.textContent = translatedHTML; + } + break; + } + case Node.ELEMENT_NODE: { + // TODO (Bug 1820625) - This is slow compared to the original implementation + // in the addon which set the innerHTML directly. We can't set the innerHTML + // here, but perhaps there is another way to get back some of the performance. + const translationsDocument = this.domParser.parseFromString( + `<!DOCTYPE html><div>${translatedHTML}</div>`, + "text/html" + ); + updateElement(translationsDocument, node); + break; + } + } + } + + this.#nodesWithTranslatedHTML.clear(); + this.#updateTimeout = null; + }); + } + + /** + * Stop the mutations so that the updates of the translations + * in the nodes won't trigger observations. + * @param {Function} run The function to update translations + */ + pauseMutationObserverAndRun(run) { + this.stopMutationObserver(); + run(); + this.startMutationObserver(); + } + + /** + * Schedule a node to be updated with a translation. + * + * @param {Node} node + * @param {string} translatedHTML + */ + scheduleNodeUpdateWithTranslation(node, translatedHTML) { + // Add the nodes to be populated with the next translation update. + this.#nodesWithTranslatedHTML.add({ node, translatedHTML }); + + if (this.#pendingTranslationsCount === 0) { + // No translations are pending, update the node. + this.updateNodesWithTranslations(); + } else if (!this.#updateTimeout) { + // Schedule an update. + this.#updateTimeout = lazy.setTimeout( + this.updateNodesWithTranslations.bind(this), + DOM_UPDATE_INTERVAL_MS + ); + } else { + // An update has been previously scheduled, do nothing here. + } + } + + /** + * Check to see if a language matches the document language. + * + * @param {Node} node + */ + matchesDocumentLanguage(node) { + if (!node.lang) { + // No `lang` was present, so assume it matches the language. + return true; + } + + // First, cheaply check if language tags match, without canonicalizing. + if (langTagsMatch(this.documentLanguage, node.lang)) { + return true; + } + + try { + // Make sure the local is in the canonical form, and check again. This function + // throws, so don't trust that the language tags are formatting correctly. + const [language] = Intl.getCanonicalLocales(node.lang); + + return langTagsMatch(this.documentLanguage, language); + } catch (_error) { + return false; + } + } +} + +/** + * Get the list of attributes that need to be translated + * in a given node. + * @returns Array<string> + */ +function getTranslatableAttributes(node) { + if (node.nodeType !== Node.ELEMENT_NODE) { + return []; + } + return TRANSLATABLE_ATTRIBUTES.filter(attribute => + node.hasAttribute(attribute) + ); +} + +/** + * This function needs to be fairly fast since it's used on many nodes when iterating + * over the DOM to find nodes to translate. + * + * @param {Text | HTMLElement} node + */ +function isNodeHidden(node) { + /** @type {HTMLElement} */ + const element = getElementForStyle(node); + if (!element) { + throw new Error("Unable to find the Element to compute the style for node"); + } + + // This flushes the style, which is a performance cost. + const style = element.ownerGlobal.getComputedStyle(element); + return style.display === "none" || style.visibility === "hidden"; +} + +/** + * This function cheaply checks that language tags match. + * + * @param {string} knownLanguage + * @param {string} otherLanguage + */ +function langTagsMatch(knownLanguage, otherLanguage) { + if (knownLanguage === otherLanguage) { + // A simple direct match. + return true; + } + if (knownLanguage.length !== 2) { + throw new Error("Expected the knownLanguage to be of length 2."); + } + // Check if the language tags part match, e.g. "en" and "en-US". + return ( + knownLanguage[0] === otherLanguage[0] && + knownLanguage[1] === otherLanguage[1] && + otherLanguage[2] === "-" + ); +} + +/** + * This function returns the correct element to determine the + * style of node. + * + * @param {Node} node + * @returns {HTMLElement} */ +function getElementForStyle(node) { + if (node.nodeType != Node.TEXT_NODE) { + return node; + } + + if (node.parentElement) { + return node.parentElement; + } + + // For cases like text node where its parent is ShadowRoot, + // we'd like to use flattenedTreeParentNode + if (node.flattenedTreeParentNode) { + return node.flattenedTreeParentNode; + } + + // If the text node is not connected or doesn't have a frame. + return null; +} + +/** + * This function runs when walking the DOM, which means it is a hot function. It runs + * fairly fast even though it is computing the bounding box. This is all done in a tight + * loop, and it is done on mutations. Care should be taken with reflows caused by + * getBoundingClientRect, as this is a common performance issue. + * + * The following are the counts of how often this is run on a news site: + * + * Given: + * 1573 DOM nodes + * 504 Text nodes + * 1069 Elements + * + * There were: + * 209 calls to get this funcion. + * + * @param {Node} node + */ +function isNodeInViewport(node) { + const window = node.ownerGlobal; + const document = node.ownerDocument; + + /** @type {HTMLElement} */ + const element = getElementForStyle(node); + if (!element) { + throw new Error("Unable to find the Element to compute the style for node"); + } + + const rect = element.getBoundingClientRect(); + return ( + rect.top >= 0 && + rect.left >= 0 && + rect.bottom <= + (window.innerHeight || document.documentElement.clientHeight) && + rect.right <= (window.innerWidth || document.documentElement.clientWidth) + ); +} + +/** + * Actually perform the update of the element with the translated node. This step + * will detach all of the "live" nodes, and match them up in the correct order as provided + * by the translations engine. + * + * @param {Document} translationsDocument + * @param {Element} element + * @returns {void} + */ +function updateElement(translationsDocument, element) { + // This text should have the same layout as the target, but it's not completely + // guaranteed since the content page could change at any time, and the translation process is async. + // + // The document has the following structure: + // + // <html> + // <head> + // <body>{translated content}</body> + // </html> + + const originalHTML = element.innerHTML; + + /** + * The Set of translation IDs for nodes that have been cloned. + * @type {Set<number>} + */ + const clonedNodes = new Set(); + + merge(element, translationsDocument.body.firstChild); + + /** + * Merge the live tree with the translated tree by re-using elements from the live tree. + * + * @param {Node} liveTree + * @param {Node} translatedTree + */ + function merge(liveTree, translatedTree) { + /** @type {Map<number, Element>} */ + const liveElementsById = new Map(); + + /** @type {Array<Text>} */ + const liveTextNodes = []; + + // Remove all the nodes from the liveTree, and categorize them by Text node or + // Element node. + let node; + while ((node = liveTree.firstChild)) { + node.remove(); + + if (node.nodeType === Node.ELEMENT_NODE) { + liveElementsById.set(node.dataset.mozTranslationsId, node); + } else if (node.nodeType === Node.TEXT_NODE) { + liveTextNodes.push(node); + } + } + + // The translated tree dictates the order. + const translatedNodes = [...translatedTree.childNodes]; + for ( + let translatedIndex = 0; + translatedIndex < translatedNodes.length; + translatedIndex++ + ) { + const translatedNode = translatedNodes[translatedIndex]; + + if (translatedNode.nodeType === Node.TEXT_NODE) { + // Copy the translated text to the original Text node and re-append it. + let liveTextNode = liveTextNodes.shift(); + + if (liveTextNode) { + liveTextNode.data = translatedNode.data; + } else { + liveTextNode = translatedNode; + } + + liveTree.appendChild(liveTextNode); + } else if (translatedNode.nodeType === Node.ELEMENT_NODE) { + const translationsId = translatedNode.dataset.mozTranslationsId; + // Element nodes try to use the already existing DOM nodes. + + // Find the element in the live tree that matches the one in the translated tree. + let liveElement = liveElementsById.get(translationsId); + + if (!liveElement) { + lazy.console.warn("Could not find a corresponding live element", { + path: createNodePath(translatedNode, translationsDocument.body), + translationsId, + liveElementsById, + translatedNode, + }); + continue; + } + + // Has this element already been added to the list? Then duplicate it and re-add + // it as a clone. The Translations Engine can sometimes duplicate HTML. + if (liveElement.parentNode) { + liveElement = liveElement.cloneNode(true /* deep clone */); + clonedNodes.add(translationsId); + lazy.console.warn( + "Cloning a node because it was already inserted earlier", + { + path: createNodePath(translatedNode, translationsDocument.body), + translatedNode, + liveElement, + } + ); + } + + if (isNodeTextEmpty(translatedNode)) { + // The original node had text, but the one that came out of translation + // didn't have any text. This scenario might be caused by one of two causes: + // + // 1) The element was duplicated by translation but then not given text + // content. This happens on Wikipedia articles for example. + // + // 2) The translator messed up and could not translate the text. This + // happens on YouTube in the language selector. In that case, having the + // original text is much better than no text at all. + // + // To make sure it is case 1 and not case 2 check whether this is the only occurrence. + for (let i = 0; i < translatedNodes.length; i++) { + if (translatedIndex === i) { + // This is the current node, not a sibling. + continue; + } + const sibling = translatedNodes[i]; + if ( + // Only consider other element nodes. + sibling.nodeType === Node.ELEMENT_NODE && + // If the sibling's translationsId matches, then use the sibling's + // node instead. + translationsId === sibling.dataset.mozTranslationsId + ) { + // This is case 1 from above. Remove this element's original text nodes, + // since a sibling text node now has all of the text nodes. + removeTextNodes(liveElement); + } + } + + // Report this issue to the console. + lazy.console.warn( + "The translated element has no text even though the original did.", + { + path: createNodePath(translatedNode, translationsDocument.body), + translatedNode, + liveElement, + } + ); + } else if (!isNodeTextEmpty(liveElement)) { + // There are still text nodes to find and update, recursively merge. + merge(liveElement, translatedNode); + } + + // Put the live node back in the live branch. But now t has been synced with the + // translated text and order. + liveTree.appendChild(liveElement); + } + } + + const unhandledElements = [...liveElementsById].filter( + ([, element]) => !element.parentNode + ); + + if (unhandledElements.length) { + lazy.console.warn( + `${createNodePath( + translatedTree, + translationsDocument.body + )} Not all nodes unified`, + { + unhandledElements, + clonedNodes, + originalHTML, + translatedHTML: translationsDocument.body.innerHTML, + liveTree: liveTree.outerHTML, + translatedTree: translatedTree.outerHTML, + } + ); + } + } +} + +/** + * For debug purposes, compute a string path to an element. + * + * e.g. "div/div#header/p.bold.string/a" + * + * @param {Node} node + * @param {Node | null} root + */ +function createNodePath(node, root) { + if (root === null) { + root = node.ownerDocument.body; + } + let path = + node.parentNode && node.parentNode !== root + ? createNodePath(node.parentNode) + : ""; + path += `/${node.nodeName}`; + if (node.id) { + path += `#${node.id}`; + } else if (node.className) { + for (const className of node.classList) { + path += "." + className; + } + } + return path; +} + +/** + * @param {Node} node + * @returns {boolean} + */ +function isNodeTextEmpty(node) { + if ("innerText" in node) { + return node.innerText.trim().length === 0; + } + if (node.nodeType === Node.TEXT_NODE && node.nodeValue) { + return node.nodeValue.trim().length === 0; + } + return true; +} + +/** + * @param {Node} node + */ +function removeTextNodes(node) { + for (const child of node.childNodes) { + switch (child.nodeType) { + case Node.TEXT_NODE: + node.removeChild(child); + break; + case Node.ELEMENT_NODE: + removeTextNodes(child); + break; + default: + break; + } + } +} + +/** + * Test whether any of the direct child text nodes of are non-whitespace + * text nodes. + * + * For example: + * - `<p>test</p>`: yes + * - `<p> </p>`: no + * - `<p><b>test</b></p>`: no + * @param {Node} node + * @returns {boolean} + */ +function hasTextNodes(node) { + if (node.nodeType !== Node.ELEMENT_NODE) { + // Only check element nodes. + return false; + } + + for (const child of node.childNodes) { + if (child.nodeType === Node.TEXT_NODE) { + if (child.textContent.trim() === "") { + // This is just whitespace. + continue; + } + // A text node with content was found. + return true; + } + } + + // No text nodes were found. + return false; +} + +/** + * Like `isExcludedNode` but looks at the full subtree. Used to see whether + * we can submit a subtree, or whether we should split it into smaller + * branches first to try to exclude more of the non-translatable content. + * + * @param {Node} node + * @param {string} excludedNodeSelector + * @returns {boolean} + */ +function containsExcludedNode(node, excludedNodeSelector) { + return ( + node.nodeType === Node.ELEMENT_NODE && + node.querySelector(excludedNodeSelector) + ); +} + +/** + * Check if this node has already been queued to be translated. This can be because + * the node is itself is queued, or its parent node is queued. + * + * @param {Node} node + * @param {Map<Node, any>} queuedNodes + * @returns {boolean} + */ +function isNodeQueued(node, queuedNodes) { + if (queuedNodes.has(node)) { + return true; + } + + // If the immediate parent is the body, it is allowed. + if (node.parentNode === node.ownerDocument.body) { + return false; + } + + // Accessing the parentNode is expensive here according to performance profilling. This + // is due to XrayWrappers. Minimize reading attributes by storing a reference to the + // `parentNode` in a named variable, rather than re-accessing it. + let parentNode; + let lastNode = node; + while ((parentNode = lastNode.parentNode)) { + if (queuedNodes.has(parentNode)) { + return parentNode; + } + lastNode = parentNode; + } + + return false; +} + +/** + * Reads the elements computed style and determines if the element is inline or not. + * + * @param {Element} element + */ +function getIsInline(element) { + const win = element.ownerGlobal; + if (element.namespaceURI === "http://www.w3.org/2000/svg") { + // SVG elements will report as inline, but there is no block layout in SVG. + // Treat every SVG element as being block so that every node will be subdivided. + return false; + } + return win.getComputedStyle(element).display === "inline"; +} + +/** + * Determine if this element is an inline element or a block element. Inline elements + * should be sent as a contiguous chunk of text, while block elements should be further + * subdivided before sending them in for translation. + * + * @param {Node} node + * @returns {boolean} + */ +function nodeNeedsSubdividing(node) { + if (node.nodeType === Node.TEXT_NODE) { + // Text nodes are fully subdivided. + return false; + } + + if (getIsInline(node)) { + return false; + } + + for (let child of node.childNodes) { + switch (child.nodeType) { + case Node.TEXT_NODE: + // Keep checking for more inline or text nodes. + continue; + case Node.ELEMENT_NODE: { + if (getIsInline(child)) { + // Keep checking for more inline or text nodes. + continue; + } + // A child element is not inline, so subdivide this node further. + return true; + } + default: + return true; + } + } + return false; +} + +/** + * Returns an iterator of a node's ancestors. + * + * @param {Node} node + * @returns {Generator<ParentNode>} + */ +function* getAncestorsIterator(node) { + const document = node.ownerDocument; + for ( + let parent = node.parentNode; + parent && parent !== document.documentElement; + parent = parent.parentNode + ) { + yield parent; + } +} + +/** + * This contains all of the information needed to perform a translation request. + * + * @typedef {Object} TranslationRequest + * @prop {Node} node + * @prop {string} sourceText + * @prop {boolean} isHTML + * @prop {Function} resolve + * @prop {Function} reject + */ + +/** + * When a page is hidden, mutations may occur in the DOM. It doesn't make sense to + * translate those elements while the page is hidden, especially as it may bring + * a translations engine back to life, which can be quite expensive. Queue those + * messages here. + */ +class QueuedTranslator { + /** + * @type {MessagePort | null} + */ + #port = null; + + /** + * @type {() => void} + */ + #actorRequestNewPort; + + /** + * An id for each message sent. This is used to match up the request and response. + */ + #nextMessageId = 0; + + /** + * Tie together a message id to a resolved response. + * @type {Map<number, TranslationRequest} + */ + #requests = new Map(); + + /** + * If the translations are paused, they are queued here. This Map is ordered by + * from oldest to newest requests with stale requests being removed. + * @type {Map<Node, TranslationRequest>} + */ + #queue = new Map(); + + /** + * @type {"uninitialized" | "ready" | "error" | "closed"} + */ + engineStatus = "uninitialized"; + + /** + * @param {MessagePort} port + * @param {Document} document + * @param {() => void} actorRequestNewPort + */ + constructor(port, actorRequestNewPort) { + this.#actorRequestNewPort = actorRequestNewPort; + + this.acquirePort(port); + } + + /** + * When an engine gets closed while still in use, a new one will need to be requested. + * + * @type {{ promise: Promise<void>, resolve: Function, reject: Function } | null} + */ + #portRequest = null; + + /** + * Keep track if the page is shown or hidden. When the page is hidden, no translations + * will be posted to the translations engine. + */ + #isPageShown = true; + + /** + * Note when a new port is being requested so we don't re-request it. + */ + showPage() { + this.#isPageShown = true; + if (this.#port) { + throw new Error( + "Attempting to show the page when there is already port available" + ); + } + if (this.#queue.size) { + // There are queued translations, request a new port. After the port is retrieved + // the pending queue will be processed. + this.#requestNewPort(); + } + } + + /** + * Hide the page, and move any outstanding translation requests to a queue. + */ + hidePage() { + this.#isPageShown = false; + this.discardPort(); + + if (this.#requests.size) { + lazy.console.log( + "Pausing translations with pending translation requests." + ); + } + this.#moveRequestsToQueue(); + } + + /** + * Request a new port. The port will come in via `acquirePort`, and then resolved + * through the `this.#portRequest.resolve`. + * @returns {Promise<void>} + */ + #requestNewPort() { + if (this.#portRequest) { + // A port was already requested. + return this.#portRequest.promise; + } + + const portRequest = { promise: null, resolve: null, reject: null }; + portRequest.promise = new Promise((resolve, reject) => { + portRequest.resolve = resolve; + portRequest.reject = reject; + }); + + this.#portRequest = portRequest; + + // Send a request through the actor for a new port. The request response will + // trigger the method `QueuedTranslator.prototype.acquirePort` + this.#actorRequestNewPort(); + + this.#portRequest.promise + .then( + () => { + this.#portRequest = null; + + // Resume the queued translations. + if (this.#queue.size) { + lazy.console.log( + `Resuming ${ + this.#queue.size + } translations from the pending translation queue.` + ); + + const oldQueue = this.#queue; + this.#queue = new Map(); + this.#repostTranslations(oldQueue); + } + }, + error => { + lazy.console.error(error); + } + ) + .finally(() => { + this.#portRequest = null; + }); + + return portRequest.promise; + } + + /** + * Send a request to translate text to the Translations Engine. If it returns `null` + * then the request is stale. A rejection means there was an error in the translation. + * This request may be queued. + * + * @param {node} Node + * @param {string} sourceText + * @param {boolean} isHTML + */ + async translate(node, sourceText, isHTML) { + if (this.#isPageShown && !this.#port) { + try { + await this.#requestNewPort(); + } catch {} + } + + // At this point we don't know if the page is still shown, or if the attempt + // to get a port was successful so check again. + + if (!this.#isPageShown || !this.#port) { + // Queue the request while the page isn't shown. + return new Promise((resolve, reject) => { + const previousRequest = this.#queue.get(node); + if (previousRequest) { + // Previous requests get resolved as null, as this new one will replace it. + previousRequest.resolve(null); + // Delete the entry so that the order of the queue is maintained. The + // new request will be put on the end. + this.#queue.delete(node); + } + + // This Promises's resolve and reject will be chained after the translation + // request. For now add it to the queue along with the other arguments. + this.#queue.set(node, { node, sourceText, isHTML, resolve, reject }); + }); + } + + return this.#postTranslationRequest(node, sourceText, isHTML); + } + + /** + * Posts the translation to the translations engine through the MessagePort. + * + * @param {Node} node + * @param {string} sourceText + * @param {boolean} isHTML + * @return {{ translateText: TranslationFunction, translateHTML: TranslationFunction}} + */ + #postTranslationRequest(node, sourceText, isHTML) { + return new Promise((resolve, reject) => { + const messageId = this.#nextMessageId++; + // Store the "resolve" for the promise. It will be matched back up with the + // `messageId` in #handlePortMessage. + this.#requests.set(messageId, { + node, + sourceText, + isHTML, + resolve, + reject, + }); + this.#port.postMessage({ + type: "TranslationsPort:TranslationRequest", + messageId, + sourceText, + isHTML, + }); + }); + } + + /** + * Close the port and move any pending translations onto a queue. + */ + discardPort() { + if (this.#port) { + this.#port.postMessage({ type: "TranslationsPort:DiscardTranslations" }); + this.#port.close(); + this.#port = null; + } + this.#moveRequestsToQueue(); + this.engineStatus = "uninitialized"; + } + + /** + * Move any unfulfilled requests to the queue so they can be sent again when + * the page is active again. + */ + #moveRequestsToQueue() { + if (this.#requests.size) { + for (const request of this.#requests.values()) { + this.#queue.set(request.node, request); + } + this.#requests = new Map(); + } + } + + /** + * Acquires a port, checks on the engine status, and then starts or resumes + * translations. + * @param {MessagePort} port + */ + acquirePort(port) { + if (this.#port) { + if (this.engineStatus === "ready") { + lazy.console.error( + "Received a new translation port while one already existed." + ); + } + this.discardPort(); + } + + this.#port = port; + + const portRequest = this.#portRequest; + + // Match up a response on the port to message that was sent. + port.onmessage = ({ data }) => { + switch (data.type) { + case "TranslationsPort:TranslationResponse": { + const { targetText, messageId } = data; + // A request may not match match a messageId if there is a race during the pausing + // and discarding of the queue. + this.#requests.get(messageId)?.resolve(targetText); + this.#requests.delete(messageId); + break; + } + case "TranslationsPort:GetEngineStatusResponse": { + if (portRequest) { + const { resolve, reject } = portRequest; + if (data.status === "ready") { + resolve(); + } else { + reject(new Error("The engine failed to load.")); + } + } + this.engineStatus = data.status; + break; + } + case "TranslationsPort:EngineTerminated": { + // The engine was terminated, and if a translation is needed a new port + // will need to be requested. + this.engineStatus = "closed"; + this.discardPort(); + if (this.#queue.size && this.#isPageShown) { + this.#requestNewPort(); + } + break; + } + default: + lazy.console.error("Unknown translations port message: " + data.type); + break; + } + }; + + port.postMessage({ type: "TranslationsPort:GetEngineStatusRequest" }); + } + + /** + * Re-send a list of translation requests. + * + * @param {Map<any, TranslationRequest>} mappedRequests + * This is either the this.#queue or this.#requests. + */ + #repostTranslations(mappedRequests) { + for (const value of mappedRequests.values()) { + const { node, sourceText, isHTML, resolve, reject } = value; + if (Cu.isDeadWrapper(node)) { + // If the node is dead, resolve without any text. Do not reject as that + // will be treated as an error. + resolve(null); + } else { + this.#postTranslationRequest(node, sourceText, isHTML).then( + resolve, + reject + ); + } + } + } + + /** + * Close the port and remove any pending or queued requests. + */ + destroy() { + this.#port.close(); + this.#requests = new Map(); + this.#queue = new Map(); + } +} diff --git a/toolkit/components/translations/content/translations-engine.html b/toolkit/components/translations/content/translations-engine.html new file mode 100644 index 0000000000..866a4d7f47 --- /dev/null +++ b/toolkit/components/translations/content/translations-engine.html @@ -0,0 +1,20 @@ +<!-- This Source Code Form is subject to the terms of the Mozilla Public + - License, v. 2.0. If a copy of the MPL was not distributed with this + - file, You can obtain one at http://mozilla.org/MPL/2.0/. --> + +<!DOCTYPE html> +<html> + <head> + <meta charset="utf-8" /> + <meta + http-equiv="Content-Security-Policy" + content="default-src chrome: resource:; object-src 'none'" + /> + <!-- Run the translations engine in its own singleton unprivileged content process. --> + <script + type="module" + src="chrome://global/content/translations/translations-engine.sys.mjs" + ></script> + </head> + <body></body> +</html> diff --git a/toolkit/components/translations/content/translations-engine.sys.mjs b/toolkit/components/translations/content/translations-engine.sys.mjs new file mode 100644 index 0000000000..e9aeb8076b --- /dev/null +++ b/toolkit/components/translations/content/translations-engine.sys.mjs @@ -0,0 +1,557 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* eslint-env browser */ +/* globals TE_addProfilerMarker, TE_getLogLevel, TE_log, TE_logError, TE_getLogLevel, + TE_destroyEngineProcess, TE_requestEnginePayload, TE_reportEngineStatus, + TE_resolveForceShutdown */ + +/** + * This file lives in the translation engine's process and is in charge of managing the + * lifecycle of the translations engines. This process is a singleton Web Content + * process that can be created and destroyed as needed. + * + * The goal of the code in this file is to be as unprivileged as possible, which should + * unlock Bug 1813789, which will make this file fully unprivileged. + * + * Each translation needs an engine for that specific translation pair. This engine is + * kept around as long as the CACHE_TIMEOUT_MS, after this if some keepAlive event does + * not happen, the engine is destroyed. An engine may be destroyed even when a page is + * still open and may need translations in the future. This is handled gracefully by + * creating new engines and MessagePorts on the fly. + * + * The engine communicates directly with the content page via a MessagePort. Each end + * of the port is transfered from the parent process to the content process, and this + * engine process. This port is transitory, and may be closed at any time. Only when a + * translation has been requested once (which is initiated by the parent process) can + * the content process re-request translation ports. This ensures a rogue content process + * only has the capabilities to perform tasks that the parent process has given it. + * + * The messaging flow can get a little convoluted to handle all of the correctness cases, + * but ideally communication passes through the message port as much as possible. There + * are many scenarios such as: + * + * - Translation pages becoming idle + * - Tab changing causing "pageshow" and "pagehide" visibility changes + * - Translation actor destruction (this can happen long after the page has been + * navigated away from, but is still alive in the + * page history) + * - Error states + * - Engine Process being graceful shut down (no engines left) + * - Engine Process being killed by the OS. + * + * The following is a diagram that attempts to illustrate the structure of the processes + * and the communication channels that exist between them. + * + * ┌─────────────────────────────────────────────────────────────┐ + * │ PARENT PROCESS │ + * │ │ + * │ [TranslationsParent] ←────→ [TranslationsEngineParent] │ + * │ ↑ ↑ │ + * └──────────────────│────────────────────────────────────│─────┘ + * │ JSWindowActor IPC calls │ JSWindowActor IPC calls + * │ │ + * ┌──────────────────│────────┐ ┌─────│─────────────────────────────┐ + * │ CONTENT PROCESS │ │ │ │ ENGINE PROCESS │ + * │ │ │ │ ↓ │ + * │ [french.html] │ │ │ [TranslationsEngineChild] │ + * │ ↕ ↓ │ │ ↕ │ + * │ [TranslationsChild] │ │ [translations-engine.html] │ + * │ └──TranslationsDocument │ │ ├── "fr to en" engine │ + * │ └──port1 « ═══════════ MessageChannel ════ » │ └── port2 │ + * │ │ │ └── "de to en" engine (idle) │ + * └───────────────────────────┘ └───────────────────────────────────┘ + */ + +// How long the cache remains alive between uses, in milliseconds. In automation the +// engine is manually created and destroyed to avoid timing issues. +const CACHE_TIMEOUT_MS = 15_000; + +/** + * @typedef {import("./translations-document.sys.mjs").TranslationsDocument} TranslationsDocument + * @typedef {import("../translations.js").TranslationsEnginePayload} TranslationsEnginePayload + */ + +/** + * The TranslationsEngine encapsulates the logic for translating messages. It can + * only be set up for a single language translation pair. In order to change languages + * a new engine should be constructed. + * + * The actual work for the translations happens in a worker. This class manages + * instantiating and messaging the worker. + * + * Keep unused engines around in the TranslationsEngine.#cachedEngine cache in case + * page navigation happens and we can re-use previous engines. The engines are very + * heavy-weight, so get rid of them after a timeout. Once all are destroyed the + * TranslationsEngineParent is notified that it can be destroyed. + */ +export class TranslationsEngine { + /** + * Maps a language pair key to a cached engine. Engines are kept around for a timeout + * before they are removed so that they can be re-used during navigation. + * + * @type {Map<string, Promise<TranslationsEngine>>} + */ + static #cachedEngines = new Map(); + + /** @type {TimeoutID | null} */ + #keepAliveTimeout = null; + + /** @type {Worker} */ + #worker; + + /** + * Multiple messages can be sent before a response is received. This ID is used to keep + * track of the messages. It is incremented on every use. + */ + #messageId = 0; + + /** + * Returns a getter function that will create a translations engine on the first + * call, and then return the cached one. After a timeout when the engine hasn't + * been used, it is destroyed. + * + * @param {string} fromLanguage + * @param {string} toLanguage + * @param {number} innerWindowId + * @returns {Promise<TranslationsEngine>} + */ + static getOrCreate(fromLanguage, toLanguage, innerWindowId) { + const languagePairKey = getLanguagePairKey(fromLanguage, toLanguage); + let enginePromise = TranslationsEngine.#cachedEngines.get(languagePairKey); + + if (enginePromise) { + return enginePromise; + } + + TE_log(`Creating a new engine for "${fromLanguage}" to "${toLanguage}".`); + + // A new engine needs to be created. + enginePromise = TranslationsEngine.create( + fromLanguage, + toLanguage, + innerWindowId + ); + + TranslationsEngine.#cachedEngines.set(languagePairKey, enginePromise); + + enginePromise.catch(error => { + TE_logError( + `The engine failed to load for translating "${fromLanguage}" to "${toLanguage}". Removing it from the cache.`, + error + ); + // Remove the engine if it fails to initialize. + TranslationsEngine.#removeEngineFromCache(languagePairKey); + }); + + return enginePromise; + } + + /** + * Removes the engine, and if it's the last, call the process to destroy itself. + * @param {string} languagePairKey + * @param {boolean} force - On forced shutdowns, it's not necessary to notify the + * parent process. + */ + static #removeEngineFromCache(languagePairKey, force) { + TranslationsEngine.#cachedEngines.delete(languagePairKey); + if (TranslationsEngine.#cachedEngines.size === 0 && !force) { + TE_log("The last engine was removed, destroying this process."); + TE_destroyEngineProcess(); + } + } + + /** + * Create a TranslationsEngine and bypass the cache. + * + * @param {string} fromLanguage + * @param {string} toLanguage + * @param {number} innerWindowId + * @returns {Promise<TranslationsEngine>} + */ + static async create(fromLanguage, toLanguage, innerWindowId) { + const startTime = performance.now(); + + const engine = new TranslationsEngine( + fromLanguage, + toLanguage, + await TE_requestEnginePayload(fromLanguage, toLanguage) + ); + + await engine.isReady; + + TE_addProfilerMarker({ + startTime, + message: `Translations engine loaded for "${fromLanguage}" to "${toLanguage}"`, + innerWindowId, + }); + + return engine; + } + + /** + * Signal to the engines that they are being forced to shutdown. + */ + static forceShutdown() { + return Promise.allSettled( + [...TranslationsEngine.#cachedEngines].map( + async ([langPair, enginePromise]) => { + TE_log(`Force shutdown of the engine "${langPair}"`); + const engine = await enginePromise; + engine.terminate(true /* force */); + } + ) + ); + } + + /** + * Terminates the engine and its worker after a timeout. + * @param {boolean} force + */ + terminate = (force = false) => { + const message = `Terminating translations engine "${this.languagePairKey}".`; + TE_addProfilerMarker({ message }); + TE_log(message); + this.#worker.terminate(); + this.#worker = null; + if (this.#keepAliveTimeout) { + clearTimeout(this.#keepAliveTimeout); + } + for (const [innerWindowId, data] of ports) { + const { fromLanguage, toLanguage, port } = data; + if ( + fromLanguage === this.fromLanguage && + toLanguage === this.toLanguage + ) { + // This port is still active but being closed. + ports.delete(innerWindowId); + port.postMessage({ type: "TranslationsPort:EngineTerminated" }); + port.close(); + } + } + TranslationsEngine.#removeEngineFromCache(this.languagePairKey, force); + }; + + /** + * The worker needs to be shutdown after some amount of time of not being used. + */ + keepAlive() { + if (this.#keepAliveTimeout) { + // Clear any previous timeout. + clearTimeout(this.#keepAliveTimeout); + } + // In automated tests, the engine is manually destroyed. + if (!Cu.isInAutomation) { + this.#keepAliveTimeout = setTimeout(this.terminate, CACHE_TIMEOUT_MS); + } + } + + /** + * Construct and initialize the worker. + * + * @param {string} fromLanguage + * @param {string} toLanguage + * @param {TranslationsEnginePayload} enginePayload - If there is no engine payload + * then the engine will be mocked. This allows this class to be used in tests. + */ + constructor(fromLanguage, toLanguage, enginePayload) { + /** @type {string} */ + this.fromLanguage = fromLanguage; + /** @type {string} */ + this.toLanguage = toLanguage; + this.languagePairKey = getLanguagePairKey(fromLanguage, toLanguage); + this.#worker = new Worker( + "chrome://global/content/translations/translations-engine.worker.js" + ); + + /** @type {Promise<void>} */ + this.isReady = new Promise((resolve, reject) => { + const onMessage = ({ data }) => { + TE_log("Received initialization message", data); + if (data.type === "initialization-success") { + resolve(); + } else if (data.type === "initialization-error") { + reject(data.error); + } + this.#worker.removeEventListener("message", onMessage); + }; + this.#worker.addEventListener("message", onMessage); + + // Schedule the first timeout for keeping the engine alive. + this.keepAlive(); + }); + + // Make sure the ArrayBuffers are transferred, not cloned. + // https://developer.mozilla.org/en-US/docs/Web/API/Web_Workers_API/Transferable_objects + const transferables = []; + if (enginePayload) { + transferables.push(enginePayload.bergamotWasmArrayBuffer); + for (const files of enginePayload.languageModelFiles) { + for (const { buffer } of Object.values(files)) { + transferables.push(buffer); + } + } + } + + this.#worker.postMessage( + { + type: "initialize", + fromLanguage, + toLanguage, + enginePayload, + messageId: this.#messageId++, + logLevel: TE_getLogLevel(), + }, + transferables + ); + } + + /** + * The implementation for translation. Use translateText or translateHTML for the + * public API. + * + * @param {string} sourceText + * @param {boolean} isHTML + * @param {number} innerWindowId + * @returns {Promise<string[]>} + */ + translate(sourceText, isHTML, innerWindowId) { + this.keepAlive(); + + const messageId = this.#messageId++; + + return new Promise((resolve, reject) => { + const onMessage = ({ data }) => { + if ( + data.type === "translations-discarded" && + data.innerWindowId === innerWindowId + ) { + // The page was unloaded, and we no longer need to listen for a response. + this.#worker.removeEventListener("message", onMessage); + return; + } + + if (data.messageId !== messageId) { + // Multiple translation requests can be sent before a response is received. + // Ensure that the response received here is the correct one. + return; + } + + if (data.type === "translation-response") { + // Also keep the translation alive after getting a result, as many translations + // can queue up at once, and then it can take minutes to resolve them all. + this.keepAlive(); + resolve(data.targetText); + } + if (data.type === "translation-error") { + reject(data.error); + } + this.#worker.removeEventListener("message", onMessage); + }; + + this.#worker.addEventListener("message", onMessage); + + this.#worker.postMessage({ + type: "translation-request", + isHTML, + sourceText, + messageId, + innerWindowId, + }); + }); + } + + /** + * Applies a function only if a cached engine exists. + * + * @param {string} fromLanguage + * @param {string} toLanguage + * @param {(engine: TranslationsEngine) => void} fn + */ + static withCachedEngine(fromLanguage, toLanguage, fn) { + const engine = TranslationsEngine.#cachedEngines.get( + getLanguagePairKey(fromLanguage, toLanguage) + ); + + if (engine) { + engine.then(fn).catch(() => {}); + } + } + + /** + * Stop processing the translation queue. All in-progress messages will be discarded. + * + * @param {number} innerWindowId + */ + discardTranslationQueue(innerWindowId) { + this.#worker.postMessage({ + type: "discard-translation-queue", + innerWindowId, + }); + } + + /** + * Pause or resume the translations from a cached engine. + * + * @param {boolean} pause + * @param {string} fromLanguage + * @param {string} toLanguage + * @param {number} innerWindowId + */ + static pause(pause, fromLanguage, toLanguage, innerWindowId) { + TranslationsEngine.withCachedEngine(fromLanguage, toLanguage, engine => { + engine.pause(pause, innerWindowId); + }); + } +} + +/** + * Creates a lookup key that is unique to each fromLanguage-toLanguage pair. + * + * @param {string} fromLanguage + * @param {string} toLanguage + * @returns {string} + */ +function getLanguagePairKey(fromLanguage, toLanguage) { + return `${fromLanguage},${toLanguage}`; +} + +/** + * Maps the innerWindowId to the port. + * @type {Map<number, { fromLanguage: string, toLanguage: string, port: MessagePort }} + */ +const ports = new Map(); + +/** + * Listen to the port to the content process for incoming messages, and pass + * them to the TranslationsEngine manager. The other end of the port is held + * in the content process by the TranslationsDocument. + * @param {string} fromLanguage + * @param {string} toLanguage + * @param {number} innerWindowId + * @param {MessagePort} port + */ +function listenForPortMessages(fromLanguage, toLanguage, innerWindowId, port) { + async function handleMessage({ data }) { + switch (data.type) { + case "TranslationsPort:GetEngineStatusRequest": { + // This message gets sent first before the translation queue is processed. + // The engine is most likely to fail on the initial invocation. Any failure + // past the first one is not reported to the UI. + TranslationsEngine.getOrCreate( + fromLanguage, + toLanguage, + innerWindowId + ).then( + () => { + TE_log("The engine is ready for translations.", { + innerWindowId, + }); + TE_reportEngineStatus(innerWindowId, "ready"); + port.postMessage({ + type: "TranslationsPort:GetEngineStatusResponse", + status: "ready", + }); + }, + () => { + TE_reportEngineStatus(innerWindowId, "error"); + port.postMessage({ + type: "TranslationsPort:GetEngineStatusResponse", + status: "error", + }); + // After an error no more translation requests will be sent. Go ahead + // and close the port. + port.close(); + ports.delete(innerWindowId); + } + ); + break; + } + case "TranslationsPort:TranslationRequest": { + const { sourceText, isHTML, messageId } = data; + const engine = await TranslationsEngine.getOrCreate( + fromLanguage, + toLanguage, + innerWindowId + ); + const targetText = await engine.translate( + sourceText, + isHTML, + innerWindowId + ); + port.postMessage({ + type: "TranslationsPort:TranslationResponse", + messageId, + targetText, + }); + break; + } + case "TranslationsPort:DiscardTranslations": { + discardTranslations(innerWindowId); + break; + } + default: + TE_logError("Unknown translations port message: " + data.type); + break; + } + } + + if (port.onmessage) { + TE_logError( + new Error("The MessagePort onmessage handler was already present.") + ); + } + + port.onmessage = event => { + handleMessage(event).catch(error => TE_logError(error)); + }; +} + +/** + * Discards the queue and removes the port. + * + * @param {innerWindowId} number + */ +function discardTranslations(innerWindowId) { + TE_log("Discarding translations, innerWindowId:", innerWindowId); + + const portData = ports.get(innerWindowId); + if (portData) { + const { port, fromLanguage, toLanguage } = portData; + port.close(); + ports.delete(innerWindowId); + + TranslationsEngine.withCachedEngine(fromLanguage, toLanguage, engine => { + engine.discardTranslationQueue(innerWindowId); + }); + } +} + +/** + * Listen for events coming from the TranslationsEngine actor. + */ +window.addEventListener("message", ({ data }) => { + switch (data.type) { + case "StartTranslation": { + const { fromLanguage, toLanguage, innerWindowId, port } = data; + TE_log("Starting translation", innerWindowId); + listenForPortMessages(fromLanguage, toLanguage, innerWindowId, port); + ports.set(innerWindowId, { port, fromLanguage, toLanguage }); + break; + } + case "DiscardTranslations": { + const { innerWindowId } = data; + discardTranslations(innerWindowId); + break; + } + case "ForceShutdown": { + TranslationsEngine.forceShutdown().then(() => { + TE_resolveForceShutdown(); + }); + break; + } + default: + throw new Error("Unknown TranslationsEngineChromeToContent event."); + } +}); diff --git a/toolkit/components/translations/content/translations-engine.worker.js b/toolkit/components/translations/content/translations-engine.worker.js new file mode 100644 index 0000000000..f7eb23b61c --- /dev/null +++ b/toolkit/components/translations/content/translations-engine.worker.js @@ -0,0 +1,734 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +"use strict"; + +/** + * @typedef {import("../translations").Bergamot} Bergamot + * @typedef {import("../translations").LanguageTranslationModelFiles} LanguageTranslationModelFiles + */ + +/* global loadBergamot */ +importScripts("chrome://global/content/translations/bergamot-translator.js"); + +// Respect the preference "browser.translations.logLevel". +let _loggingLevel = "Error"; +function log(...args) { + if (_loggingLevel !== "Error" && _loggingLevel !== "Warn") { + console.log("Translations:", ...args); + } +} +function trace(...args) { + if (_loggingLevel === "Trace" || _loggingLevel === "All") { + console.log("Translations:", ...args); + } +} + +// Throw Promise rejection errors so that they are visible in the console. +self.addEventListener("unhandledrejection", event => { + throw event.reason; +}); + +/** + * The alignment for each file type, file type strings should be same as in the + * model registry. + */ +const MODEL_FILE_ALIGNMENTS = { + model: 256, + lex: 64, + vocab: 64, + qualityModel: 64, + srcvocab: 64, + trgvocab: 64, +}; + +/** + * Initialize the engine, and get it ready to handle translation requests. + * The "initialize" message must be received before any other message handling + * requests will be processed. + */ +addEventListener("message", handleInitializationMessage); + +async function handleInitializationMessage({ data }) { + const startTime = performance.now(); + if (data.type !== "initialize") { + console.error( + "The TranslationEngine worker received a message before it was initialized." + ); + return; + } + + try { + const { fromLanguage, toLanguage, enginePayload, logLevel, innerWindowId } = + data; + + if (!fromLanguage) { + throw new Error('Worker initialization missing "fromLanguage"'); + } + if (!toLanguage) { + throw new Error('Worker initialization missing "toLanguage"'); + } + + if (logLevel) { + // Respect the "browser.translations.logLevel" preference. + _loggingLevel = logLevel; + } + + let engine; + if (enginePayload.isMocked) { + // The engine is testing mode, and no Bergamot wasm is available. + engine = new MockedEngine(fromLanguage, toLanguage); + } else { + const { bergamotWasmArrayBuffer, languageModelFiles } = enginePayload; + const bergamot = await BergamotUtils.initializeWasm( + bergamotWasmArrayBuffer + ); + engine = new Engine( + fromLanguage, + toLanguage, + bergamot, + languageModelFiles + ); + } + + ChromeUtils.addProfilerMarker( + "TranslationsWorker", + { startTime, innerWindowId }, + "Translations engine loaded." + ); + + handleMessages(engine); + postMessage({ type: "initialization-success" }); + } catch (error) { + console.error(error); + postMessage({ type: "initialization-error", error: error?.message }); + } + + removeEventListener("message", handleInitializationMessage); +} + +/** + * Sets up the message handling for the worker. + * + * @param {Engine | MockedEngine} engine + */ +function handleMessages(engine) { + let discardPromise; + addEventListener("message", async ({ data }) => { + try { + if (data.type === "initialize") { + throw new Error("The Translations engine must not be re-initialized."); + } + if (data.type === "translation-request") { + // Only show these messages when "All" logging is on, since there are so many + // of them. + trace("Received message", data); + } else { + log("Received message", data); + } + + switch (data.type) { + case "translation-request": { + const { sourceText, messageId, isHTML, innerWindowId } = data; + if (discardPromise) { + // Wait for messages to be discarded if there are any. + await discardPromise; + } + try { + // Add a translation to the work queue, and when it returns, post the message + // back. The translation may never return if the translations are discarded + // before it have time to be run. In this case this await is just never + // resolved, and the postMessage is never run. + const targetText = await engine.translate( + sourceText, + isHTML, + innerWindowId + ); + + // This logging level can be very verbose and slow, so only do it under the + // "Trace" level, which is the most verbose. Set the logging level to "Info" to avoid + // these, and get all of the other logs. + trace("Translation complete", { + sourceText, + targetText, + isHTML, + innerWindowId, + }); + + postMessage({ + type: "translation-response", + targetText, + messageId, + }); + } catch (error) { + console.error(error); + let message = "An error occurred in the engine worker."; + if (typeof error?.message === "string") { + message = error.message; + } + let stack = "(no stack)"; + if (typeof error?.stack === "string") { + stack = error.stack; + } + postMessage({ + type: "translation-error", + error: { message, stack }, + messageId, + innerWindowId, + }); + } + break; + } + case "discard-translation-queue": { + ChromeUtils.addProfilerMarker( + "TranslationsWorker", + { innerWindowId: data.innerWindowId }, + "Translations discard requested" + ); + + discardPromise = engine.discardTranslations(data.innerWindowId); + await discardPromise; + discardPromise = null; + + // Signal to the "message" listeners in the main thread to stop listening. + postMessage({ + type: "translations-discarded", + }); + break; + } + default: + console.warn("Unknown message type:", data.type); + } + } catch (error) { + // Ensure the unexpected errors are surfaced in the console. + console.error(error); + } + }); +} + +/** + * The Engine is created once for a language pair. The initialization process copies the + * ArrayBuffers for the language buffers from JS-managed ArrayBuffers, to aligned + * internal memory for the wasm heap. + * + * After this the ArrayBuffers are discarded and GC'd. This file should be managed + * from the TranslationsEngine class on the main thread. + * + * This class starts listening for messages only after the Bergamot engine has been + * fully initialized. + */ +class Engine { + /** + * @param {string} fromLanguage + * @param {string} toLanguage + * @param {Bergamot} bergamot + * @param {Array<LanguageTranslationModelFiles>} languageTranslationModelFiles + */ + constructor( + fromLanguage, + toLanguage, + bergamot, + languageTranslationModelFiles + ) { + /** @type {string} */ + this.fromLanguage = fromLanguage; + /** @type {string} */ + this.toLanguage = toLanguage; + /** @type {Bergamot} */ + this.bergamot = bergamot; + /** @type {Bergamot["TranslationModel"][]} */ + this.languageTranslationModels = languageTranslationModelFiles.map( + languageTranslationModelFiles => + BergamotUtils.constructSingleTranslationModel( + bergamot, + languageTranslationModelFiles + ) + ); + + /** @type {Bergamot["BlockingService"]} */ + this.translationService = new bergamot.BlockingService({ + // Caching is disabled (see https://github.com/mozilla/firefox-translations/issues/288) + cacheSize: 0, + }); + } + + /** + * Run the translation models to perform a batch of message translations. The + * promise is rejected when the sync version of this function throws an error. + * This function creates an async interface over the synchronous translation + * mechanism. This allows other microtasks such as message handling to still work + * even though the translations are CPU-intensive. + * + * @param {string} sourceText + * @param {boolean} isHTML + * @param {number} innerWindowId - This is required + * + * @returns {Promise<string>}sourceText + */ + translate(sourceText, isHTML, innerWindowId) { + return this.#getWorkQueue(innerWindowId).runTask(() => + this.#syncTranslate(sourceText, isHTML, innerWindowId) + ); + } + + /** + * Map each innerWindowId to its own WorkQueue. This makes it easy to shut down + * an entire queue of work when the page is unloaded. + * + * @type {Map<number, WorkQueue>} + */ + #workQueues = new Map(); + + /** + * Get or create a `WorkQueue` that is unique to an `innerWindowId`. + * + * @param {number} innerWindowId + * @returns {WorkQueue} + */ + #getWorkQueue(innerWindowId) { + let workQueue = this.#workQueues.get(innerWindowId); + if (workQueue) { + return workQueue; + } + workQueue = new WorkQueue(innerWindowId); + this.#workQueues.set(innerWindowId, workQueue); + return workQueue; + } + + /** + * Cancels any in-progress translations by removing the work queue. + * + * @param {number} innerWindowId + */ + discardTranslations(innerWindowId) { + let workQueue = this.#workQueues.get(innerWindowId); + if (workQueue) { + workQueue.cancelWork(); + this.#workQueues.delete(innerWindowId); + } + } + + /** + * Run the translation models to perform a translation. This + * blocks the worker thread until it is completed. + * + * @param {string} sourceText + * @param {boolean} isHTML + * @param {number} innerWindowId + * @returns {string} + */ + #syncTranslate(sourceText, isHTML, innerWindowId) { + const startTime = performance.now(); + let response; + sourceText = sourceText.trim(); + const { messages, options } = BergamotUtils.getTranslationArgs( + this.bergamot, + sourceText, + isHTML + ); + try { + if (messages.size() === 0) { + return []; + } + + /** @type {Bergamot["VectorResponse"]} */ + let responses; + + if (this.languageTranslationModels.length === 1) { + responses = this.translationService.translate( + this.languageTranslationModels[0], + messages, + options + ); + } else if (this.languageTranslationModels.length === 2) { + responses = this.translationService.translateViaPivoting( + this.languageTranslationModels[0], + this.languageTranslationModels[1], + messages, + options + ); + } else { + throw new Error( + "Too many models were provided to the translation worker." + ); + } + + // Report on the time it took to do this translation. + ChromeUtils.addProfilerMarker( + "TranslationsWorker", + { startTime, innerWindowId }, + `Translated ${sourceText.length} code units.` + ); + + const targetText = responses.get(0).getTranslatedText(); + return targetText; + } finally { + // Free up any memory that was allocated. This will always run. + messages?.delete(); + options?.delete(); + response?.delete(); + } + } +} + +/** + * Static utilities to help work with the Bergamot wasm module. + */ +class BergamotUtils { + /** + * Construct a single translation model. + * + * @param {Bergamot} bergamot + * @param {LanguageTranslationModelFiles} languageTranslationModelFiles + * @returns {Bergamot["TranslationModel"]} + */ + static constructSingleTranslationModel( + bergamot, + languageTranslationModelFiles + ) { + log(`Constructing translation model.`); + + const { model, lex, vocab, qualityModel, srcvocab, trgvocab } = + BergamotUtils.allocateModelMemory( + bergamot, + languageTranslationModelFiles + ); + + // Transform the bytes to mb, like "10.2mb" + const getMemory = memory => `${Math.floor(memory.size() / 100_000) / 10}mb`; + + let memoryLog = `Model memory sizes in wasm heap:`; + memoryLog += `\n Model: ${getMemory(model)}`; + memoryLog += `\n Shortlist: ${getMemory(lex)}`; + + // Set up the vocab list, which could either be a single "vocab" model, or a + // "srcvocab" and "trgvocab" pair. + const vocabList = new bergamot.AlignedMemoryList(); + + if (vocab) { + vocabList.push_back(vocab); + memoryLog += `\n Vocab: ${getMemory(vocab)}`; + } else if (srcvocab && trgvocab) { + vocabList.push_back(srcvocab); + vocabList.push_back(trgvocab); + memoryLog += `\n Src Vocab: ${getMemory(srcvocab)}`; + memoryLog += `\n Trg Vocab: ${getMemory(trgvocab)}`; + } else { + throw new Error("Vocabulary key is not found."); + } + + if (qualityModel) { + memoryLog += `\n QualityModel: ${getMemory(qualityModel)}\n`; + } + + const config = BergamotUtils.generateTextConfig({ + "beam-size": "1", + normalize: "1.0", + "word-penalty": "0", + "max-length-break": "128", + "mini-batch-words": "1024", + workspace: "128", + "max-length-factor": "2.0", + "skip-cost": (!qualityModel).toString(), + "cpu-threads": "0", + quiet: "true", + "quiet-translation": "true", + "gemm-precision": + languageTranslationModelFiles.model.record.name.endsWith("intgemm8.bin") + ? "int8shiftAll" + : "int8shiftAlphaAll", + alignment: "soft", + }); + + log(`Bergamot translation model config: ${config}`); + log(memoryLog); + + return new bergamot.TranslationModel( + config, + model, + lex, + vocabList, + qualityModel ?? null + ); + } + + /** + * The models must be placed in aligned memory that the Bergamot wasm module has access + * to. This function copies over the model blobs into this memory space. + * + * @param {Bergamot} bergamot + * @param {LanguageTranslationModelFiles} languageTranslationModelFiles + * @returns {LanguageTranslationModelFilesAligned} + */ + static allocateModelMemory(bergamot, languageTranslationModelFiles) { + /** @type {LanguageTranslationModelFilesAligned} */ + const results = {}; + + for (const [fileType, file] of Object.entries( + languageTranslationModelFiles + )) { + const alignment = MODEL_FILE_ALIGNMENTS[fileType]; + if (!alignment) { + throw new Error(`Unknown file type: "${fileType}"`); + } + + const alignedMemory = new bergamot.AlignedMemory( + file.buffer.byteLength, + alignment + ); + + alignedMemory.getByteArrayView().set(new Uint8Array(file.buffer)); + + results[fileType] = alignedMemory; + } + + return results; + } + + /** + * Initialize the Bergamot translation engine. It is a wasm compiled version of the + * Marian translation software. The wasm is delivered remotely to cut down on binary size. + * + * https://github.com/mozilla/bergamot-translator/ + * + * @param {ArrayBuffer} wasmBinary + * @returns {Promise<Bergamot>} + */ + static initializeWasm(wasmBinary) { + return new Promise((resolve, reject) => { + /** @type {number} */ + let start = performance.now(); + + /** @type {Bergamot} */ + const bergamot = loadBergamot({ + // This is the amount of memory that a simple run of Bergamot uses, in bytes. + INITIAL_MEMORY: 234_291_200, + print: log, + onAbort() { + reject(new Error("Error loading Bergamot wasm module.")); + }, + onRuntimeInitialized: async () => { + const duration = performance.now() - start; + log( + `Bergamot wasm runtime initialized in ${duration / 1000} seconds.` + ); + // Await at least one microtask so that the captured `bergamot` variable is + // fully initialized. + await Promise.resolve(); + resolve(bergamot); + }, + wasmBinary, + }); + }); + } + + /** + * Maps the Bergamot Vector to a JS array + * + * @param {Bergamot["Vector"]} vector + * @param {Function} fn + * @returns {Array} + */ + static mapVector(vector, fn) { + const result = []; + for (let index = 0; index < vector.size(); index++) { + result.push(fn(vector.get(index), index)); + } + return result; + } + + /** + * Generate a config for the Marian translation service. It requires specific whitespace. + * + * https://marian-nmt.github.io/docs/cmd/marian-decoder/ + * + * @param {Record<string, string>} config + * @returns {string} + */ + static generateTextConfig(config) { + const indent = " "; + let result = "\n"; + + for (const [key, value] of Object.entries(config)) { + result += `${indent}${key}: ${value}\n`; + } + + return result + indent; + } + + /** + * JS objects need to be translated into wasm objects to configure the translation engine. + * + * @param {Bergamot} bergamot + * @param {string[]} sourceText + * @returns {{ messages: Bergamot["VectorString"], options: Bergamot["VectorResponseOptions"] }} + */ + static getTranslationArgs(bergamot, sourceText, isHTML) { + const messages = new bergamot.VectorString(); + const options = new bergamot.VectorResponseOptions(); + + sourceText = sourceText.trim(); + // Empty paragraphs break the translation. + if (sourceText) { + messages.push_back(sourceText); + options.push_back({ + qualityScores: false, + alignment: true, + html: isHTML, + }); + } + + return { messages, options }; + } +} + +/** + * For testing purposes, provide a fully mocked engine. This allows for easy integration + * testing of the UI, without having to rely on downloading remote models and remote + * wasm binaries. + */ +class MockedEngine { + /** + * @param {string} fromLanguage + * @param {string} toLanguage + */ + constructor(fromLanguage, toLanguage) { + /** @type {string} */ + this.fromLanguage = fromLanguage; + /** @type {string} */ + this.toLanguage = toLanguage; + } + + /** + * Create a fake translation of the text. + * + * @param {string} sourceText + * @param {bool} isHTML + * @returns {string} + */ + translate(sourceText, isHTML) { + // Note when an HTML translations is requested. + let html = isHTML ? ", html" : ""; + const targetText = sourceText.toUpperCase(); + return `${targetText} [${this.fromLanguage} to ${this.toLanguage}${html}]`; + } + + discardTranslations() {} +} + +/** + * This class takes tasks that may block the thread's event loop, and has them yield + * after a time budget via setTimeout calls to allow other code to execute. + */ +class WorkQueue { + #TIME_BUDGET = 100; // ms + #RUN_IMMEDIATELY_COUNT = 20; + + /** @type {Array<{task: Function, resolve: Function}>} */ + #tasks = []; + #isRunning = false; + #isWorkCancelled = false; + #runImmediately = this.#RUN_IMMEDIATELY_COUNT; + + /** + * @param {number} innerWindowId + */ + constructor(innerWindowId) { + this.innerWindowId = innerWindowId; + } + + /** + * Run the task and return the result. + * + * @template {T} + * @param {() => T} task + * @returns {Promise<T>} + */ + runTask(task) { + if (this.#runImmediately > 0) { + // Run the first N translations immediately, most likely these are the user-visible + // translations on the page, as they are sent in first. The setTimeout of 0 can + // still delay the translations noticeably. + this.#runImmediately--; + return Promise.resolve(task()); + } + return new Promise((resolve, reject) => { + this.#tasks.push({ task, resolve, reject }); + this.#run().catch(error => console.error(error)); + }); + } + + /** + * The internal run function. + */ + async #run() { + if (this.#isRunning) { + // The work queue is already running. + return; + } + + this.#isRunning = true; + + // Measure the timeout + let lastTimeout = null; + + let tasksInBatch = 0; + const addProfilerMarker = () => { + ChromeUtils.addProfilerMarker( + "TranslationsWorker WorkQueue", + { startTime: lastTimeout, innerWindowId: this.innerWindowId }, + `WorkQueue processed ${tasksInBatch} tasks` + ); + }; + + while (this.#tasks.length !== 0) { + if (this.#isWorkCancelled) { + // The work was already cancelled. + break; + } + const now = performance.now(); + + if (lastTimeout === null) { + lastTimeout = now; + // Allow other work to get on the queue. + await new Promise(resolve => setTimeout(resolve, 0)); + } else if (now - lastTimeout > this.#TIME_BUDGET) { + // Perform a timeout with no effective wait. This clears the current + // promise queue from the event loop. + await new Promise(resolve => setTimeout(resolve, 0)); + addProfilerMarker(); + lastTimeout = performance.now(); + } + + // Check this between every `await`. + if (this.#isWorkCancelled || !this.#tasks.length) { + break; + } + + tasksInBatch++; + const { task, resolve, reject } = this.#tasks.shift(); + try { + const result = await task(); + + // Check this between every `await`. + if (this.#isWorkCancelled) { + break; + } + // The work is done, resolve the original task. + resolve(result); + } catch (error) { + reject(error); + } + } + addProfilerMarker(); + this.#isRunning = false; + } + + async cancelWork() { + this.#isWorkCancelled = true; + this.#tasks = []; + await new Promise(resolve => setTimeout(resolve, 0)); + this.#isWorkCancelled = false; + } +} diff --git a/toolkit/components/translations/content/translations.css b/toolkit/components/translations/content/translations.css new file mode 100644 index 0000000000..ee3c0ba8ec --- /dev/null +++ b/toolkit/components/translations/content/translations.css @@ -0,0 +1,169 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +:root { + /* Provide defaults for when this page is viewed in "toolkit". */ + background-color: var(--in-content-page-background, #fff); + color: var(--in-content-page-color, #15141a); + + /* Provide backup values for some of the variables used in "browser" so that the styles + look nice by default in "toolkit". */ + --AT-box-background: var(--in-content-box-background, #fff); + --AT-box-border-color: var(--in-content-box-border-color, #9e9ea0); + --AT-box-info-background: var(--in-content-box-info-background, #f0f0f4); + + /* Variables used in the page layout */ + --AT-page-margin: 20px; + --AT-input-padding: 20px; + /* This is somewhat arbitrary, but works well for the current design. If the computed + header height changes, this will need to be adjusted. */ + --AT-header-height: 156px; + --AT-input-height: calc(min(400px, calc(100vh - var(--AT-header-height)))); + --AT-select-arrow-inset: 5px; +} + +body { + display: flex; + justify-content: center; + align-items: center; + inset: 0; + position: absolute; + visibility: hidden; + flex-direction: column; +} + +.about-translations-header { + display: flex; +} + +.about-translations-header > * { + flex: 1; + display: flex; + max-width: 50%; +} + +.about-translations-header-start { + justify-content: start; +} + +.about-translations-header-end { + justify-content: end; +} + +/* Increase the selector specificity to override the base `select` styles. */ +select.about-translations-select { + position: relative; + padding-inline: 10px 20px; + padding-block: 0px; + min-width: 50%; + margin: 5px; + background-position: right var(--AT-select-arrow-inset) center; +} + +select.about-translations-select:dir(rtl) { + background-position-x: left var(--AT-select-arrow-inset); +} + +.about-translations-contents { + display: flex; + flex-direction: column; + box-sizing: border-box; + width: calc(100% - var(--AT-page-margin) * 2); + max-width: 1200px; + background-color: var(--AT-box-background); + border: 1px solid var(--AT-box-border-color); + border-radius: 4px; +} + +.about-translations-input { + display: flex; + width: 100%; + border-top: 1px solid var(--AT-box-border-color); +} + +.about-translations-input-start { + border-inline-end: 1px solid var(--AT-box-border-color); +} + +.about-translations-input > * { + position: relative; + width: 50%; +} + +.about-translations-input-textarea { + /* Override user's dragging of the textarea width. */ + width: 100% !important; + height: var(--AT-input-height); + box-sizing: border-box; + margin: 0; + padding: var(--AT-input-padding); + border: 0; +} + +.about-translations-input-results-blank { + opacity: 0.7; +} + +.about-translations-input-results { + position: absolute; + inset: 0; + padding: var(--AT-input-padding); + box-sizing: border-box; + overflow-y: scroll; +} + +.about-translations-info { + display: none; + padding: 10px; + background-color: var(--AT-box-info-background); + border-radius: 4px; + margin-bottom: var(--AT-input-padding); +} + +.about-translations-info-message { + flex: 1; + align-self: center; +} + +.about-translations-info-icon { + width: 16px; + height: 16px; + margin: 10px; + background-image: url('chrome://global/skin/icons/info.svg'); + -moz-context-properties: fill; + fill: currentColor; +} + +@media (max-width: 700px) { + :root { + --AT-page-margin: 10px; + } + h1 { + margin-top: 15px; + } + body { + padding-bottom: var(--AT-page-margin); + } + .about-translations-input { + flex-direction: column; + flex: 1; + } + .about-translations-input-textarea, + .about-translations-input { + font-size: 16px; + } + .about-translations-input > * { + width: 100%; + flex: 1; + } + .about-translations-input-end { + border-top: 1px solid var(--AT-box-border-color); + } + .about-translations-input-textarea { + height: 100%; + } + .about-translations-contents { + flex: 1; + } +} diff --git a/toolkit/components/translations/content/translations.html b/toolkit/components/translations/content/translations.html new file mode 100644 index 0000000000..bd2c114a0a --- /dev/null +++ b/toolkit/components/translations/content/translations.html @@ -0,0 +1,70 @@ +<!-- This Source Code Form is subject to the terms of the Mozilla Public + - License, v. 2.0. If a copy of the MPL was not distributed with this + - file, You can obtain one at http://mozilla.org/MPL/2.0/. --> + +<!DOCTYPE html> +<html> + <head> + <meta charset="utf-8"> + <meta http-equiv="Content-Security-Policy" content="default-src chrome:; object-src 'none'"> + <meta name="color-scheme" content="light dark"> + <meta name="viewport" content="width=device-width" /> + <title data-l10n-id="about-translations-title"></title> + <link rel="stylesheet" href="chrome://global/skin/global.css"> + <link rel="stylesheet" href="chrome://global/skin/in-content/common.css"> + <link rel="stylesheet" href="chrome://global/content/translations/translations.css"> + <link rel="localization" href="toolkit/branding/brandings.ftl"/> + <link rel="localization" href="locales-preview/aboutTranslations.ftl"/> + <script type="module" src="chrome://global/content/translations/translations.mjs"></script> + </head> + <body> + <h1 data-l10n-id="about-translations-header"></h1> + <main class="about-translations-contents"> + + <header class="about-translations-header"> + <div class="about-translations-header-start"> + <select + class="about-translations-select" + id="language-from" + disabled> + <option data-l10n-id="about-translations-detect" value="detect"></option> + </select> + </div> + <div class="about-translations-header-end"> + <select + class="about-translations-select" + id="language-to" + disabled> + <option data-l10n-id="about-translations-select" value=""></option> + </select> + </div> + </header> + + <main class="about-translations-input"> + <div class="about-translations-input-start"> + <textarea + class="about-translations-input-textarea" + data-l10n-id="about-translations-textarea" + id="translation-from" + ></textarea> + </div> + <div class="about-translations-input-end"> + <div + class="about-translations-input-results about-translations-input-results-blank" + id="translation-to-blank"> + <div class="about-translations-info" id="translation-info"> + <div class="about-translations-info-icon"></div> + <div class="about-translations-info-message" id="translation-info-message"></div> + </div> + <div data-l10n-id="about-translations-results-placeholder"></div> + </div> + <div + class="about-translations-input-results" + id="translation-to"> + </div> + </div> + </main> + + </div> + </body> +</html> diff --git a/toolkit/components/translations/content/translations.mjs b/toolkit/components/translations/content/translations.mjs new file mode 100644 index 0000000000..0ec8b2d475 --- /dev/null +++ b/toolkit/components/translations/content/translations.mjs @@ -0,0 +1,788 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// The following globals are injected via the AboutTranslationsChild actor. +// translations.mjs is running in an unprivileged context, and these injected functions +// allow for the page to get access to additional privileged features. + +/* global AT_getSupportedLanguages, AT_log, AT_getScriptDirection, + AT_logError, AT_createTranslationsPort, AT_isHtmlTranslation, + AT_isTranslationEngineSupported, AT_identifyLanguage */ + +// Allow tests to override this value so that they can run faster. +// This is the delay in milliseconds. +window.DEBOUNCE_DELAY = 200; +// Allow tests to test the debounce behavior by counting debounce runs. +window.DEBOUNCE_RUN_COUNT = 0; + +/** + * @typedef {import("../translations").SupportedLanguages} SupportedLanguages + */ + +/** + * The model and controller for initializing about:translations. + */ +class TranslationsState { + /** + * This class is responsible for all UI updated. + * + * @type {TranslationsUI} + */ + ui; + + /** + * The language to translate from, in the form of a BCP 47 language tag, + * e.g. "en" or "fr". + * + * @type {string} + */ + fromLanguage = ""; + + /** + * The language to translate to, in the form of a BCP 47 language tag, + * e.g. "en" or "fr". + * + * @type {string} + */ + toLanguage = ""; + + /** + * The message to translate, cached so that it can be determined if the text + * needs to be re-translated. + * + * @type {string} + */ + messageToTranslate = ""; + + /** + * Only send one translation in at a time to the worker. + * @type {Promise<string[]>} + */ + translationRequest = Promise.resolve([]); + + /** + * The translator is only valid for a single language pair, and needs + * to be recreated if the language pair changes. + * + * @type {null | Promise<Translator>} + */ + translator = null; + + /** + * @param {boolean} isSupported + */ + constructor(isSupported) { + /** + * Is the engine supported by the device? + * @type {boolean} + */ + this.isTranslationEngineSupported = isSupported; + + /** + * @type {SupportedLanguages} + */ + this.supportedLanguages = isSupported + ? AT_getSupportedLanguages() + : Promise.resolve([]); + + this.ui = new TranslationsUI(this); + this.ui.setup(); + + // Set the UI as ready after all of the state promises have settled. + this.supportedLanguages + .then(() => { + this.ui.setAsReady(); + }) + .catch(error => { + AT_logError("Failed to load the supported languages", error); + }); + } + + /** + * Identifies the human language in which the message is written and returns + * the BCP 47 language tag of the language it is determined to be. + * + * e.g. "en" for English. + * + * @param {string} message + */ + async identifyLanguage(message) { + const start = performance.now(); + const { langTag, confidence } = await AT_identifyLanguage(message); + const duration = performance.now() - start; + AT_log( + `[ ${langTag}(${(confidence * 100).toFixed(2)}%) ]`, + `Source language identified in ${duration / 1000} seconds` + ); + return langTag; + } + + /** + * Only request a translation when it's ready. + */ + maybeRequestTranslation = debounce({ + /** + * Debounce the translation requests so that the worker doesn't fire for every + * single keyboard input, but instead the keyboard events are ignored until + * there is a short break, or enough events have happened that it's worth sending + * in a new translation request. + */ + onDebounce: async () => { + // The contents of "this" can change between async steps, store a local variable + // binding of these values. + const { + fromLanguage, + toLanguage, + messageToTranslate, + translator: translatorPromise, + } = this; + + if (!this.isTranslationEngineSupported) { + // Never translate when the engine isn't supported. + return; + } + + if ( + !fromLanguage || + !toLanguage || + !messageToTranslate || + !translatorPromise + ) { + // Not everything is set for translation. + this.ui.updateTranslation(""); + return; + } + + const [translator] = await Promise.all([ + // Ensure the engine is ready to go. + translatorPromise, + // Ensure the previous translation has finished so that only the latest + // translation goes through. + this.translationRequest, + ]); + + if ( + // Check if the current configuration has changed and if this is stale. If so + // then skip this request, as there is already a newer request with more up to + // date information. + this.translator !== translatorPromise || + this.fromLanguage !== fromLanguage || + this.toLanguage !== toLanguage || + this.messageToTranslate !== messageToTranslate + ) { + return; + } + + const start = performance.now(); + + this.translationRequest = translator.translate(messageToTranslate); + const translation = await this.translationRequest; + + // The measure events will show up in the Firefox Profiler. + performance.measure( + `Translations: Translate "${this.fromLanguage}" to "${this.toLanguage}" with ${messageToTranslate.length} characters.`, + { + start, + end: performance.now(), + } + ); + + this.ui.updateTranslation(translation); + const duration = performance.now() - start; + AT_log(`Translation done in ${duration / 1000} seconds`); + }, + + // Mark the events so that they show up in the Firefox Profiler. This makes it handy + // to visualize the debouncing behavior. + doEveryTime: () => { + performance.mark( + `Translations: input changed to ${this.messageToTranslate.length} characters` + ); + }, + }); + + /** + * Any time a language pair is changed, a new Translator needs to be created. + */ + async maybeCreateNewTranslator() { + // If we may need to re-building the worker, the old translation is no longer valid. + this.ui.updateTranslation(""); + + // These are cases in which it wouldn't make sense or be possible to load any translations models. + if ( + // If fromLanguage or toLanguage are unpopulated we cannot load anything. + !this.fromLanguage || + !this.toLanguage || + // If fromLanguage's value is "detect", rather than a BCP 47 language tag, then no language + // has been detected yet. + this.fromLanguage === "detect" || + // If fromLanguage and toLanguage are the same, this means that the detected language + // is the same as the toLanguage, and we do not want to translate from one language to itself. + this.fromLanguage === this.toLanguage + ) { + if (this.translator) { + // The engine is no longer needed. + this.translator.then(translator => translator.destroy()); + this.translator = null; + } + return; + } + + const start = performance.now(); + AT_log( + `Creating a new translator for "${this.fromLanguage}" to "${this.toLanguage}"` + ); + + this.translator = Translator.create(this.fromLanguage, this.toLanguage); + this.maybeRequestTranslation(); + + try { + await this.translator; + const duration = performance.now() - start; + // Signal to tests that the translator was created so they can exit. + window.postMessage("translator-ready"); + AT_log(`Created a new Translator in ${duration / 1000} seconds`); + } catch (error) { + this.ui.showInfo("about-translations-engine-error"); + AT_logError("Failed to get the Translations worker", error); + } + } + + /** + * Updates the fromLanguage to match the detected language only if the + * about-translations-detect option is selected in the language-from dropdown. + * + * If the new fromLanguage is different than the previous fromLanguage this + * may update the UI to display the new language and may rebuild the translations + * worker if there is a valid selected target language. + */ + async maybeUpdateDetectedLanguage() { + if (!this.ui.detectOptionIsSelected() || this.messageToTranslate === "") { + // If we are not detecting languages or if the message has been cleared + // we should ensure that the UI is not displaying a detected language + // and there is no need to run any language detection. + this.ui.setDetectOptionTextContent(""); + return; + } + + const [langTag, supportedLanguages] = await Promise.all([ + this.identifyLanguage(this.messageToTranslate), + this.supportedLanguages, + ]); + + // Only update the language if the detected language matches + // one of our supported languages. + const entry = supportedLanguages.fromLanguages.find( + ({ langTag: existingTag }) => existingTag === langTag + ); + if (entry) { + const { displayName } = entry; + await this.setFromLanguage(langTag); + this.ui.setDetectOptionTextContent(displayName); + } + } + + /** + * @param {string} lang + */ + async setFromLanguage(lang) { + if (lang !== this.fromLanguage) { + this.fromLanguage = lang; + await this.maybeCreateNewTranslator(); + } + } + + /** + * @param {string} lang + */ + setToLanguage(lang) { + if (lang !== this.toLanguage) { + this.toLanguage = lang; + this.maybeCreateNewTranslator(); + } + } + + /** + * @param {string} message + */ + async setMessageToTranslate(message) { + if (message !== this.messageToTranslate) { + this.messageToTranslate = message; + await this.maybeUpdateDetectedLanguage(); + this.maybeRequestTranslation(); + } + } +} + +/** + * + */ +class TranslationsUI { + /** @type {HTMLSelectElement} */ + languageFrom = document.getElementById("language-from"); + /** @type {HTMLSelectElement} */ + languageTo = document.getElementById("language-to"); + /** @type {HTMLTextAreaElement} */ + translationFrom = document.getElementById("translation-from"); + /** @type {HTMLDivElement} */ + translationTo = document.getElementById("translation-to"); + /** @type {HTMLDivElement} */ + translationToBlank = document.getElementById("translation-to-blank"); + /** @type {HTMLDivElement} */ + translationInfo = document.getElementById("translation-info"); + /** @type {HTMLDivElement} */ + translationInfoMessage = document.getElementById("translation-info-message"); + /** @type {TranslationsState} */ + state; + + /** + * The detect-language option element. We want to maintain a handle to this so that + * we can dynamically update its display text to include the detected language. + * + * @type {HTMLOptionElement} + */ + #detectOption; + + /** + * @param {TranslationsState} state + */ + constructor(state) { + this.state = state; + this.translationTo.style.visibility = "visible"; + this.#detectOption = document.querySelector('option[value="detect"]'); + } + + /** + * Do the initial setup. + */ + setup() { + if (!this.state.isTranslationEngineSupported) { + this.showInfo("about-translations-no-support"); + this.disableUI(); + return; + } + this.setupDropdowns(); + this.setupTextarea(); + } + + /** + * Signals that the UI is ready, for tests. + */ + setAsReady() { + document.body.setAttribute("ready", ""); + } + + /** + * Once the models have been synced from remote settings, populate them with the display + * names of the languages. + */ + async setupDropdowns() { + const supportedLanguages = await this.state.supportedLanguages; + + // Update the DOM elements with the display names. + for (const { langTag, displayName } of supportedLanguages.toLanguages) { + const option = document.createElement("option"); + option.value = langTag; + option.text = displayName; + this.languageTo.add(option); + } + + for (const { langTag, displayName } of supportedLanguages.fromLanguages) { + const option = document.createElement("option"); + option.value = langTag; + option.text = displayName; + this.languageFrom.add(option); + } + + // Enable the controls. + this.languageFrom.disabled = false; + this.languageTo.disabled = false; + + // Focus the language dropdowns if they are empty. + if (this.languageFrom.value == "") { + this.languageFrom.focus(); + } else if (this.languageTo.value == "") { + this.languageTo.focus(); + } + + this.state.setFromLanguage(this.languageFrom.value); + this.state.setToLanguage(this.languageTo.value); + this.updateOnLanguageChange(); + + this.languageFrom.addEventListener("input", () => { + this.state.setFromLanguage(this.languageFrom.value); + this.updateOnLanguageChange(); + }); + + this.languageTo.addEventListener("input", () => { + this.state.setToLanguage(this.languageTo.value); + this.updateOnLanguageChange(); + this.translationTo.setAttribute("lang", this.languageTo.value); + }); + } + + /** + * Show an info message to the user. + * + * @param {string} l10nId + */ + showInfo(l10nId) { + document.l10n.setAttributes(this.translationInfoMessage, l10nId); + this.translationInfo.style.display = "flex"; + } + + /** + * Hides the info UI. + */ + hideInfo() { + this.translationInfo.style.display = "none"; + } + + /** + * Returns true if about-translations-detect is the currently + * selected option in the language-from dropdown, otherwise false. + * + * @returns {boolean} + */ + detectOptionIsSelected() { + return this.languageFrom.value === "detect"; + } + + /** + * Sets the textContent of the about-translations-detect option in the + * language-from dropdown to include the detected language's display name. + * + * @param {string} displayName + */ + setDetectOptionTextContent(displayName) { + // Set the text to the fluent value that takes an arg to display the language name. + if (displayName) { + document.l10n.setAttributes( + this.#detectOption, + "about-translations-detect-lang", + { language: displayName } + ); + } else { + // Reset the text to the fluent value that does not display any language name. + document.l10n.setAttributes( + this.#detectOption, + "about-translations-detect" + ); + } + } + + /** + * React to language changes. + */ + updateOnLanguageChange() { + this.#updateDropdownLanguages(); + this.#updateMessageDirections(); + } + + /** + * You cant translate from one language to another language. Hide the options + * if this is the case. + */ + #updateDropdownLanguages() { + for (const option of this.languageFrom.options) { + option.hidden = false; + } + for (const option of this.languageTo.options) { + option.hidden = false; + } + if (this.state.toLanguage) { + const option = this.languageFrom.querySelector( + `[value=${this.state.toLanguage}]` + ); + if (option) { + option.hidden = true; + } + } + if (this.state.fromLanguage) { + const option = this.languageTo.querySelector( + `[value=${this.state.fromLanguage}]` + ); + if (option) { + option.hidden = true; + } + } + this.state.maybeUpdateDetectedLanguage(); + } + + /** + * Define the direction of the language message text, otherwise it might not display + * correctly. For instance English in an RTL UI would display incorrectly like so: + * + * LTR text in LTR UI: + * + * ┌──────────────────────────────────────────────┐ + * │ This is in English. │ + * └──────────────────────────────────────────────┘ + * + * LTR text in RTL UI: + * ┌──────────────────────────────────────────────┐ + * │ .This is in English │ + * └──────────────────────────────────────────────┘ + * + * LTR text in RTL UI, but in an LTR container: + * ┌──────────────────────────────────────────────┐ + * │ This is in English. │ + * └──────────────────────────────────────────────┘ + * + * The effects are similar, but reversed for RTL text in an LTR UI. + */ + #updateMessageDirections() { + if (this.state.toLanguage) { + this.translationTo.setAttribute( + "dir", + AT_getScriptDirection(this.state.toLanguage) + ); + } else { + this.translationTo.removeAttribute("dir"); + } + if (this.state.fromLanguage) { + this.translationFrom.setAttribute( + "dir", + AT_getScriptDirection(this.state.fromLanguage) + ); + } else { + this.translationFrom.removeAttribute("dir"); + } + } + + setupTextarea() { + this.state.setMessageToTranslate(this.translationFrom.value); + this.translationFrom.addEventListener("input", () => { + this.state.setMessageToTranslate(this.translationFrom.value); + }); + } + + disableUI() { + this.translationFrom.disabled = true; + this.languageFrom.disabled = true; + this.languageTo.disabled = true; + } + + /** + * @param {string} message + */ + updateTranslation(message) { + this.translationTo.innerText = message; + if (message) { + this.translationTo.style.visibility = "visible"; + this.translationToBlank.style.visibility = "hidden"; + this.hideInfo(); + } else { + this.translationTo.style.visibility = "hidden"; + this.translationToBlank.style.visibility = "visible"; + } + } +} + +/** + * Listen for events coming from the AboutTranslations actor. + */ +window.addEventListener("AboutTranslationsChromeToContent", ({ detail }) => { + switch (detail.type) { + case "enable": { + // While the feature is in development, hide the feature behind a pref. See the + // "browser.translations.enable" pref in modules/libpref/init/all.js and Bug 971044 + // for the status of enabling this project. + if (window.translationsState) { + throw new Error("about:translations was already initialized."); + } + AT_isTranslationEngineSupported().then(isSupported => { + window.translationsState = new TranslationsState(isSupported); + }); + document.body.style.visibility = "visible"; + break; + } + default: + throw new Error("Unknown AboutTranslationsChromeToContent event."); + } +}); + +/** + * Debounce a function so that it is only called after some wait time with no activity. + * This is good for grouping text entry via keyboard. + * + * @param {Object} settings + * @param {Function} settings.onDebounce + * @param {Function} settings.doEveryTime + * @returns {Function} + */ +function debounce({ onDebounce, doEveryTime }) { + /** @type {number | null} */ + let timeoutId = null; + let lastDispatch = null; + + return (...args) => { + doEveryTime(...args); + + const now = Date.now(); + if (lastDispatch === null) { + // This is the first call to the function. + lastDispatch = now; + } + + const timeLeft = lastDispatch + window.DEBOUNCE_DELAY - now; + + // Always discard the old timeout, either the function will run, or a new + // timer will be scheduled. + clearTimeout(timeoutId); + + if (timeLeft <= 0) { + // It's been long enough to go ahead and call the function. + timeoutId = null; + lastDispatch = null; + window.DEBOUNCE_RUN_COUNT += 1; + onDebounce(...args); + return; + } + + // Re-set the timeout with the current time left. + clearTimeout(timeoutId); + + timeoutId = setTimeout(() => { + // Timeout ended, call the function. + timeoutId = null; + lastDispatch = null; + window.DEBOUNCE_RUN_COUNT += 1; + onDebounce(...args); + }, timeLeft); + }; +} + +/** + * Perform transalations over a `MessagePort`. This class manages the communications to + * the translations engine. + */ +class Translator { + /** + * @type {MessagePort} + */ + #port; + + /** + * An id for each message sent. This is used to match up the request and response. + */ + #nextMessageId = 0; + + /** + * Tie together a message id to a resolved response. + * @type {Map<number, TranslationRequest} + */ + #requests = new Map(); + + engineStatus = "initializing"; + + /** + * @param {MessagePort} port + */ + constructor(port) { + this.#port = port; + + // Create a promise that will be resolved when the engine is ready. + let engineLoaded; + let engineFailed; + this.ready = new Promise((resolve, reject) => { + engineLoaded = resolve; + engineFailed = reject; + }); + + // Match up a response on the port to message that was sent. + port.onmessage = ({ data }) => { + switch (data.type) { + case "TranslationsPort:TranslationResponse": { + const { targetText, messageId } = data; + // A request may not match match a messageId if there is a race during the pausing + // and discarding of the queue. + this.#requests.get(messageId)?.resolve(targetText); + break; + } + case "TranslationsPort:GetEngineStatusResponse": { + if (data.status === "ready") { + engineLoaded(); + } else { + engineFailed(); + } + break; + } + default: + AT_logError("Unknown translations port message: " + data.type); + break; + } + }; + + port.postMessage({ type: "TranslationsPort:GetEngineStatusRequest" }); + } + + /** + * Opens up a port and creates a new translator. + * + * @param {string} fromLanguage + * @param {string} toLanguage + * @returns {Promise<Translator>} + */ + static create(fromLanguage, toLanguage) { + return new Promise((resolve, reject) => { + AT_createTranslationsPort(fromLanguage, toLanguage); + + function getResponse({ data }) { + if ( + data.type == "GetTranslationsPort" && + fromLanguage === data.fromLanguage && + toLanguage === data.toLanguage + ) { + // The response matches, resolve the port. + const translator = new Translator(data.port); + + // Resolve the translator once it is ready, or propagate the rejection + // if it failed. + translator.ready.then(() => resolve(translator), reject); + window.removeEventListener("message", getResponse); + } + } + + // Listen for a response for the message port. + window.addEventListener("message", getResponse); + }); + } + + /** + * Send a request to translate text to the Translations Engine. If it returns `null` + * then the request is stale. A rejection means there was an error in the translation. + * This request may be queued. + * + * @param {string} sourceText + * @returns {Promise<string>} + */ + translate(sourceText) { + return new Promise((resolve, reject) => { + const messageId = this.#nextMessageId++; + // Store the "resolve" for the promise. It will be matched back up with the + // `messageId` in #handlePortMessage. + const isHTML = AT_isHtmlTranslation(); + this.#requests.set(messageId, { + sourceText, + isHTML, + resolve, + reject, + }); + this.#port.postMessage({ + type: "TranslationsPort:TranslationRequest", + messageId, + sourceText, + isHTML, + }); + }); + } + + /** + * Close the port and remove any pending or queued requests. + */ + destroy() { + this.#port.close(); + } +} |