diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
commit | 36d22d82aa202bb199967e9512281e9a53db42c9 (patch) | |
tree | 105e8c98ddea1c1e4784a60a5a6410fa416be2de /toolkit/components/translations/actors | |
parent | Initial commit. (diff) | |
download | firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip |
Adding upstream version 115.7.0esr.upstream/115.7.0esr
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'toolkit/components/translations/actors')
5 files changed, 3394 insertions, 0 deletions
diff --git a/toolkit/components/translations/actors/AboutTranslationsChild.sys.mjs b/toolkit/components/translations/actors/AboutTranslationsChild.sys.mjs new file mode 100644 index 0000000000..112ac3c444 --- /dev/null +++ b/toolkit/components/translations/actors/AboutTranslationsChild.sys.mjs @@ -0,0 +1,312 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +import { XPCOMUtils } from "resource://gre/modules/XPCOMUtils.sys.mjs"; + +const lazy = {}; + +XPCOMUtils.defineLazyGetter(lazy, "console", () => { + return console.createInstance({ + maxLogLevelPref: "browser.translations.logLevel", + prefix: "Translations", + }); +}); + +/** + * @typedef {import("./TranslationsChild.sys.mjs").LanguageIdEngine} LanguageIdEngine + * @typedef {import("./TranslationsChild.sys.mjs").TranslationsEngine} TranslationsEngine + * @typedef {import("./TranslationsChild.sys.mjs").SupportedLanguages} SupportedLanguages + */ + +/** + * The AboutTranslationsChild is responsible for coordinating what privileged APIs + * are exposed to the un-privileged scope of the about:translations page. + */ +export class AboutTranslationsChild extends JSWindowActorChild { + /** @type {LanguageIdEngine | null} */ + languageIdEngine = null; + + /** @type {TranslationsEngine | null} */ + translationsEngine = null; + + /** + * The translations engine uses text translations by default in about:translations, + * but it can be changed to translate HTML by setting this pref to true. This is + * useful for manually testing HTML translation behavior, but is not useful to surface + * as a user-facing feature. + * + * @type {bool} + */ + #isHtmlTranslation = Services.prefs.getBoolPref( + "browser.translations.useHTML" + ); + + handleEvent(event) { + if (event.type === "DOMDocElementInserted") { + this.#exportFunctions(); + } + + if ( + event.type === "DOMContentLoaded" && + Services.prefs.getBoolPref("browser.translations.enable") + ) { + this.#sendEventToContent({ type: "enable" }); + } + } + + /** + * @param {object} detail + */ + #sendEventToContent(detail) { + this.contentWindow.dispatchEvent( + new this.contentWindow.CustomEvent("AboutTranslationsChromeToContent", { + detail: Cu.cloneInto(detail, this.contentWindow), + }) + ); + } + + /** + * @returns {TranslationsChild} + */ + #getTranslationsChild() { + const child = this.contentWindow.windowGlobalChild.getActor("Translations"); + if (!child) { + throw new Error("Unable to find the TranslationsChild"); + } + return child; + } + + /** + * A privileged promise can't be used in the content page, so convert a privileged + * promise into a content one. + * + * @param {Promise<any>} promise + * @returns {Promise<any>} + */ + #convertToContentPromise(promise) { + return new this.contentWindow.Promise((resolve, reject) => + promise.then(resolve, error => { + let contentWindow; + try { + contentWindow = this.contentWindow; + } catch (error) { + // The content window is no longer available. + reject(); + return; + } + // Create an error in the content window, if the content window is still around. + let message = "An error occured in the AboutTranslations actor."; + if (typeof error === "string") { + message = error; + } + if (typeof error?.message === "string") { + message = error.message; + } + if (typeof error?.stack === "string") { + message += `\n\nOriginal stack:\n\n${error.stack}\n`; + } + + reject(new contentWindow.Error(message)); + }) + ); + } + + /** + * Export any of the child functions that start with "AT_" to the unprivileged content + * page. This restricts the security capabilities of the the content page. + */ + #exportFunctions() { + const window = this.contentWindow; + + const fns = [ + "AT_log", + "AT_logError", + "AT_getAppLocale", + "AT_getSupportedLanguages", + "AT_isTranslationEngineSupported", + "AT_createLanguageIdEngine", + "AT_createTranslationsEngine", + "AT_identifyLanguage", + "AT_translate", + "AT_destroyTranslationsEngine", + "AT_getScriptDirection", + ]; + for (const name of fns) { + Cu.exportFunction(this[name].bind(this), window, { defineAs: name }); + } + } + + /** + * Log messages if "browser.translations.logLevel" is set to "All". + * + * @param {...any} args + */ + AT_log(...args) { + lazy.console.log(...args); + } + + /** + * Report an error to the console. + * + * @param {...any} args + */ + AT_logError(...args) { + lazy.console.error(...args); + } + + /** + * Returns the app's locale. + * + * @returns {Intl.Locale} + */ + AT_getAppLocale() { + return Services.locale.appLocaleAsBCP47; + } + + /** + * Wire this function to the TranslationsChild. + * + * @returns {Promise<SupportedLanguages>} + */ + AT_getSupportedLanguages() { + return this.#convertToContentPromise( + this.#getTranslationsChild() + .getSupportedLanguages() + .then(data => Cu.cloneInto(data, this.contentWindow)) + ); + } + + /** + * Does this device support the translation engine? + * @returns {Promise<boolean>} + */ + AT_isTranslationEngineSupported() { + return this.#convertToContentPromise( + this.#getTranslationsChild().isTranslationsEngineSupported + ); + } + + /** + * Creates the LanguageIdEngine which attempts to identify in which + * human language a string is written. + * + * Unlike TranslationsEngine, which handles only a single language pair + * and must be rebuilt to handle a new language pair, the LanguageIdEngine + * is a one-to-many engine that can recognize all of its supported languages. + * + * Subsequent calls to this function after the engine is initialized will do nothing + * instead of rebuilding the engine. + * + * @returns {Promise<void>} + */ + AT_createLanguageIdEngine() { + if (this.languageIdEngine) { + return this.#convertToContentPromise(Promise.resolve()); + } + return this.#convertToContentPromise( + this.#getTranslationsChild() + .createLanguageIdEngine() + .then(engine => { + this.languageIdEngine = engine; + }) + ); + } + + /** + * Creates the TranslationsEngine which is responsible for translating + * from one language to the other. + * + * The instantiated TranslationsEngine is unique to its language pair. + * In order to translate a different language pair, a new engine must be + * created for that pair. + * + * Subsequent calls to this function will destroy the existing engine and + * rebuild a new engine for the new language pair. + * + * @param {string} fromLanguage + * @param {string} toLanguage + * @returns {Promise<void>} + */ + AT_createTranslationsEngine(fromLanguage, toLanguage) { + if (this.translationsEngine) { + this.translationsEngine.terminate(); + this.translationsEngine = null; + } + return this.#convertToContentPromise( + this.#getTranslationsChild() + .createTranslationsEngine(fromLanguage, toLanguage) + .then(engine => { + this.translationsEngine = engine; + }) + ); + } + + /** + * Attempts to identify the human language in which the message is written. + * @see LanguageIdEngine#identifyLanguage for more detailed documentation. + * + * @param {string} message + * @returns {Promise<{ langTag: string, confidence: number }>} + */ + AT_identifyLanguage(message) { + if (!this.languageIdEngine) { + const { Promise, Error } = this.contentWindow; + return Promise.reject( + new Error("The language identification was not created.") + ); + } + + return this.#convertToContentPromise( + this.languageIdEngine + .identifyLanguage(message) + .then(data => Cu.cloneInto(data, this.contentWindow)) + ); + } + + /** + * @param {string[]} messageBatch + * @param {number} innerWindowId + * @returns {Promise<string[]>} + */ + AT_translate(messageBatch, innerWindowId) { + if (!this.translationsEngine) { + throw new this.contentWindow.Error( + "The translations engine was not created." + ); + } + const promise = this.#isHtmlTranslation + ? this.translationsEngine.translateHTML(messageBatch, innerWindowId) + : this.translationsEngine.translateText(messageBatch, innerWindowId); + + return this.#convertToContentPromise( + promise.then(translations => + Cu.cloneInto(translations, this.contentWindow) + ) + ); + } + + /** + * This is not strictly necessary, but could free up resources quicker. + */ + AT_destroyTranslationsEngine() { + if (this.translationsEngine) { + this.translationsEngine.terminate(); + this.translationsEngine = null; + } + } + + /** + * TODO - Remove this when Intl.Locale.prototype.textInfo is available to + * content scripts. + * + * https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/Locale/textInfo + * https://bugzilla.mozilla.org/show_bug.cgi?id=1693576 + * + * @param {string} locale + * @returns {string} + */ + AT_getScriptDirection(locale) { + return Services.intl.getScriptDirection(locale); + } +} diff --git a/toolkit/components/translations/actors/AboutTranslationsParent.sys.mjs b/toolkit/components/translations/actors/AboutTranslationsParent.sys.mjs new file mode 100644 index 0000000000..097a0a1d92 --- /dev/null +++ b/toolkit/components/translations/actors/AboutTranslationsParent.sys.mjs @@ -0,0 +1,13 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/** + * This parent is blank because the Translations actor handles most of the features + * needed in AboutTranslations. + */ +export class AboutTranslationsParent extends JSWindowActorParent { + constructor() { + super(); + } +} diff --git a/toolkit/components/translations/actors/TranslationsChild.sys.mjs b/toolkit/components/translations/actors/TranslationsChild.sys.mjs new file mode 100644 index 0000000000..4a5388ec88 --- /dev/null +++ b/toolkit/components/translations/actors/TranslationsChild.sys.mjs @@ -0,0 +1,1106 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/** + * @typedef {import("../content/translations-document.sys.mjs").TranslationsDocument} TranslationsDocument + * @typedef {import("../translations").LanguageIdEnginePayload} LanguageIdEnginePayload + * @typedef {import("../translations").LanguageTranslationModelFiles} LanguageTranslationModelFiles + * @typedef {import("../translations").TranslationsEnginePayload} TranslationsEnginePayload + * @typedef {import("../translations").LanguagePair} LanguagePair + * @typedef {import("../translations").SupportedLanguages} SupportedLanguages + * @typedef {import("../translations").LangTags} LangTags + */ + +/** + * @type {{ + * TranslationsDocument: typeof TranslationsDocument + * console: typeof console + * }} + */ +const lazy = {}; + +/** + * The threshold that the language-identification confidence + * value must be greater than in order to provide the detected language + * tag for translations. + * + * This value should ideally be one that does not allow false positives + * while also not being too restrictive. + * + * At this time, this value is not driven by statistical data or analysis. + */ +const DOC_LANGUAGE_DETECTION_THRESHOLD = 0.65; + +/** + * The length of the substring to pull from the document's text for language + * identification. + * + * This value should ideally be one that is large enough to yield a confident + * identification result without being too large or expensive to extract. + * + * At this time, this value is not driven by statistical data or analysis. + */ +const DOC_TEXT_TO_IDENTIFY_LENGTH = 1024; + +const PIVOT_LANGUAGE = "en"; + +import { XPCOMUtils } from "resource://gre/modules/XPCOMUtils.sys.mjs"; + +ChromeUtils.defineESModuleGetters(lazy, { + setTimeout: "resource://gre/modules/Timer.sys.mjs", + clearTimeout: "resource://gre/modules/Timer.sys.mjs", + TranslationsDocument: + "chrome://global/content/translations/translations-document.sys.mjs", + TranslationsTelemetry: + "chrome://global/content/translations/TranslationsTelemetry.sys.mjs", +}); + +XPCOMUtils.defineLazyGetter(lazy, "console", () => { + return console.createInstance({ + maxLogLevelPref: "browser.translations.logLevel", + prefix: "Translations", + }); +}); + +XPCOMUtils.defineLazyPreferenceGetter( + lazy, + "logLevel", + "browser.translations.logLevel" +); + +export class LanguageIdEngine { + /** @type {Worker} */ + #languageIdWorker; + // Multiple messages can be sent before a response is received. This ID is used to keep + // track of the messages. It is incremented on every use. + #messageId = 0; + + /** + * Construct and initialize the language-id worker. + * + * @param {Object} data + * @param {string} data.type - The message type, expects "initialize". + * @param {ArrayBuffer} data.wasmBuffer - The buffer containing the wasm binary. + * @param {ArrayBuffer} data.modelBuffer - The buffer containing the language-id model binary. + * @param {null | string} data.mockedLangTag - The mocked language tag value (only present when mocking). + * @param {null | number} data.mockedConfidence - The mocked confidence value (only present when mocking). + * @param {boolean} data.isLoggingEnabled + */ + constructor(data) { + this.#languageIdWorker = new Worker( + "chrome://global/content/translations/language-id-engine-worker.js" + ); + + this.isReady = new Promise((resolve, reject) => { + const onMessage = ({ data }) => { + if (data.type === "initialization-success") { + resolve(); + } else if (data.type === "initialization-error") { + reject(data.error); + } + this.#languageIdWorker.removeEventListener("message", onMessage); + }; + this.#languageIdWorker.addEventListener("message", onMessage); + }); + + const transferables = []; + // Make sure the ArrayBuffers are transferred, not cloned. + // https://developer.mozilla.org/en-US/docs/Web/API/Web_Workers_API/Transferable_objects + transferables.push(data.wasmBuffer, data.modelBuffer); + + this.#languageIdWorker.postMessage(data, transferables); + } + + /** + * Attempts to identify the human language in which the message is written. + * Generally, the longer a message is, the higher the likelihood that the + * identified language will be correct. Shorter messages increase the chance + * of false identification. + * + * The returned confidence is a number between 0.0 and 1.0 of how confident + * the language identification model was that it identified the correct language. + * + * @param {string} message + * @returns {Promise<{ langTag: string, confidence: number }>} + */ + identifyLanguage(message) { + const messageId = this.#messageId++; + return new Promise((resolve, reject) => { + const onMessage = ({ data }) => { + if (data.messageId !== messageId) { + // Multiple translation requests can be sent before a response is received. + // Ensure that the response received here is the correct one. + return; + } + if (data.type === "language-id-response") { + let { langTag, confidence } = data; + resolve({ langTag, confidence }); + } + if (data.type === "language-id-error") { + reject(data.error); + } + this.#languageIdWorker.removeEventListener("message", onMessage); + }; + this.#languageIdWorker.addEventListener("message", onMessage); + this.#languageIdWorker.postMessage({ + type: "language-id-request", + message, + messageId, + }); + }); + } +} + +// How long the cache remains alive between uses, in milliseconds. +const CACHE_TIMEOUT_MS = 10_000; + +class TranslationsEngineCache { + /** @type {Record<string, Promise<TranslationsEngine>>} */ + #engines = {}; + + /** @type {Record<string, TimeoutID>} */ + #timeouts = {}; + + /** + * Returns a getter function that will create a translations engine on the first + * call, and then return the cached one. After a timeout when the engine hasn't + * been used, it is destroyed. + * + * @param {TranslationsChild} actor + * @param {string} fromLanguage + * @param {string} toLanguage + * @returns {(() => Promise<TranslationsEngine>) | ((onlyFromCache: true) => Promise<TranslationsEngine | null>)} + */ + createGetter(actor, fromLanguage, toLanguage) { + return async (onlyFromCache = false) => { + let enginePromise = + this.#engines[ + TranslationsChild.languagePairKey(fromLanguage, toLanguage) + ]; + if (enginePromise) { + return enginePromise; + } + if (onlyFromCache) { + return null; + } + + // A new engine needs to be created. + enginePromise = actor.createTranslationsEngine(fromLanguage, toLanguage); + + const key = TranslationsChild.languagePairKey(fromLanguage, toLanguage); + this.#engines[key] = enginePromise; + + // Remove the engine if it fails to initialize. + enginePromise.catch(error => { + lazy.console.log( + `The engine failed to load for translating "${fromLanguage}" to "${toLanguage}". Removing it from the cache.`, + error + ); + this.#engines[key] = null; + }); + + const engine = await enginePromise; + + // These methods will be spied on, and when called they will keep the engine alive. + this.spyOn(engine, "translateText"); + this.spyOn(engine, "translateHTML"); + this.spyOn(engine, "discardTranslationQueue"); + this.keepAlive(fromLanguage, toLanguage); + + return engine; + }; + } + + /** + * Spies on a method, so that when it is called, the engine is kept alive. + * + * @param {TranslationsEngine} engine + * @param {string} methodName + */ + spyOn(engine, methodName) { + const method = engine[methodName].bind(engine); + engine[methodName] = (...args) => { + this.keepAlive(engine.fromLanguage, engine.toLanguage); + return method(...args); + }; + } + + /** + * @param {string} fromLanguage + * @param {string} toLanguage + */ + keepAlive(fromLanguage, toLanguage) { + const key = TranslationsChild.languagePairKey(fromLanguage); + const timeoutId = this.#timeouts[key]; + if (timeoutId) { + lazy.clearTimeout(timeoutId); + } + const enginePromise = this.#engines[key]; + if (!enginePromise) { + // It appears that the engine is already dead. + return; + } + this.#timeouts[key] = lazy.setTimeout(() => { + // Delete the caches. + delete this.#engines[key]; + delete this.#timeouts[key]; + + // Terminate the engine worker. + enginePromise.then(engine => engine.terminate()); + }, CACHE_TIMEOUT_MS); + } + + /** + * Sees if an engine is still in the cache. + */ + isInCache(fromLanguage, toLanguage) { + this.keepAlive(fromLanguage, toLanguage); + return Boolean( + this.#engines[TranslationsChild.languagePairKey(fromLanguage, toLanguage)] + ); + } +} + +const translationsEngineCache = new TranslationsEngineCache(); + +/** + * The TranslationsEngine encapsulates the logic for translating messages. It can + * only be set up for a single language translation pair. In order to change languages + * a new engine should be constructed. + * + * The actual work for the translations happens in a worker. This class manages + * instantiating and messaging the worker. + */ +export class TranslationsEngine { + /** @type {Worker} */ + #translationsWorker; + // Multiple messages can be sent before a response is received. This ID is used to keep + // track of the messages. It is incremented on every use. + #messageId = 0; + + /** + * Construct and initialize the worker. + * + * @param {string} fromLanguage + * @param {string} toLanguage + * @param {TranslationsEnginePayload} enginePayload - If there is no engine payload + * then the engine will be mocked. This allows this class to be used in tests. + * @param {number} innerWindowId - This only used for creating profiler markers in + * the initial creation of the engine. + */ + constructor(fromLanguage, toLanguage, enginePayload, innerWindowId) { + /** @type {string} */ + this.fromLanguage = fromLanguage; + /** @type {string} */ + this.toLanguage = toLanguage; + this.#translationsWorker = new Worker( + "chrome://global/content/translations/translations-engine-worker.js" + ); + + /** @type {Promise<void>} */ + this.isReady = new Promise((resolve, reject) => { + const onMessage = ({ data }) => { + lazy.console.log("Received initialization message", data); + if (data.type === "initialization-success") { + resolve(); + } else if (data.type === "initialization-error") { + reject(data.error); + } + this.#translationsWorker.removeEventListener("message", onMessage); + }; + this.#translationsWorker.addEventListener("message", onMessage); + }); + + // Make sure the ArrayBuffers are transferred, not cloned. + // https://developer.mozilla.org/en-US/docs/Web/API/Web_Workers_API/Transferable_objects + const transferables = []; + if (enginePayload) { + transferables.push(enginePayload.bergamotWasmArrayBuffer); + for (const files of enginePayload.languageModelFiles) { + for (const { buffer } of Object.values(files)) { + transferables.push(buffer); + } + } + } + + this.#translationsWorker.postMessage( + { + type: "initialize", + fromLanguage, + toLanguage, + enginePayload, + innerWindowId, + messageId: this.#messageId++, + logLevel: lazy.logLevel, + }, + transferables + ); + } + + /** + * Translate text without any HTML. + * + * @param {string[]} messageBatch + * @param {number} innerWindowId + * @returns {Promise<string[]>} + */ + translateText(messageBatch, innerWindowId) { + return this.#translate(messageBatch, false, innerWindowId); + } + + /** + * Translate valid HTML. Note that this method throws if invalid markup is provided. + * + * @param {string[]} messageBatch + * @param {number} innerWindowId + * @returns {Promise<string[]>} + */ + translateHTML(messageBatch, innerWindowId) { + return this.#translate(messageBatch, true, innerWindowId); + } + + /** + * The implementation for translation. Use translateText or translateHTML for the + * public API. + * + * @param {string[]} messageBatch + * @param {boolean} isHTML + * @param {number} innerWindowId + * @returns {Promise<string[]>} + */ + #translate(messageBatch, isHTML, innerWindowId) { + const messageId = this.#messageId++; + + return new Promise((resolve, reject) => { + const onMessage = ({ data }) => { + if ( + data.type === "translations-discarded" && + data.innerWindowId === innerWindowId + ) { + // The page was unloaded, and we no longer need to listen for a response. + this.#translationsWorker.removeEventListener("message", onMessage); + return; + } + + if (data.messageId !== messageId) { + // Multiple translation requests can be sent before a response is received. + // Ensure that the response received here is the correct one. + return; + } + + if (data.type === "translation-response") { + resolve(data.translations); + } + if (data.type === "translation-error") { + reject(data.error); + } + this.#translationsWorker.removeEventListener("message", onMessage); + }; + + this.#translationsWorker.addEventListener("message", onMessage); + + this.#translationsWorker.postMessage({ + type: "translation-request", + isHTML, + messageBatch, + messageId, + innerWindowId, + }); + }); + } + + /** + * The worker should be GCed just fine on its own, but go ahead and signal to + * the worker that it's no longer needed. This will immediately cancel any in-progress + * translations. + */ + terminate() { + this.#translationsWorker.terminate(); + } + + /** + * Stop processing the translation queue. All in-progress messages will be discarded. + * + * @param {number} innerWindowId + */ + discardTranslationQueue(innerWindowId) { + ChromeUtils.addProfilerMarker( + "TranslationsChild", + null, + "Request to discard translation queue" + ); + this.#translationsWorker.postMessage({ + type: "discard-translation-queue", + innerWindowId, + }); + } +} + +/** + * See the TranslationsParent for documentation. + */ +export class TranslationsChild extends JSWindowActorChild { + constructor() { + super(); + ChromeUtils.addProfilerMarker( + "TranslationsChild", + null, + "TranslationsChild constructor" + ); + } + + /** + * The getter for the TranslationsEngine, managed by the EngineCache. + * + * @type {null | (() => Promise<TranslationsEngine>) | ((fromCache: true) => Promise<TranslationsEngine | null>)} + */ + #getTranslationsEngine = null; + + /** + * The actor can be destroyed leaving dangling references to dead objects. + */ + #isDestroyed = false; + + /** + * Store this at the beginning so that there is no risk of access a dead object + * to read it. + * @type {number | null} + */ + innerWindowId = null; + + /** + * @type {TranslationsDocument | null} + */ + translatedDoc = null; + + /** + * The matched language tags for the page. Used to find a default language pair for + * translations. + * + * @type {null | LangTags} + * */ + #langTags = null; + + /** + * Creates a lookup key that is unique to each fromLanguage-toLanguage pair. + * + * @param {string} fromLanguage + * @param {string} toLanguage + * @returns {string} + */ + static languagePairKey(fromLanguage, toLanguage) { + return `${fromLanguage},${toLanguage}`; + } + + /** + * Retrieve a substring of text from the document body to be + * analyzed by the LanguageIdEngine to determine the page's language. + * + * @returns {string} + */ + #getTextToIdentify() { + let encoder = Cu.createDocumentEncoder("text/plain"); + encoder.init(this.document, "text/plain", encoder.SkipInvisibleContent); + return encoder + .encodeToStringWithMaxLength(DOC_TEXT_TO_IDENTIFY_LENGTH) + .replaceAll("\r", "") + .replaceAll("\n", " "); + } + + /** + * @overrides JSWindowActorChild.prototype.handleEvent + * @param {{ type: string }} event + */ + handleEvent(event) { + ChromeUtils.addProfilerMarker( + "TranslationsChild", + null, + "Event: " + event.type + ); + switch (event.type) { + case "DOMContentLoaded": + this.innerWindowId = this.contentWindow.windowGlobalChild.innerWindowId; + this.maybeOfferTranslation().catch(error => lazy.console.log(error)); + break; + case "pagehide": + lazy.console.log( + "pagehide", + this.contentWindow.location, + this.#langTags + ); + this.reportDetectedLangTagsToParent(null); + break; + } + return undefined; + } + + /** + * This is used to conditionally add the translations button. + * @param {null | LangTags} langTags + */ + reportDetectedLangTagsToParent(langTags) { + this.sendAsyncMessage("Translations:ReportDetectedLangTags", { + langTags, + }); + } + + /** + * Returns the principal from the content window's origin. + * @returns {nsIPrincipal} + */ + getContentWindowPrincipal() { + return Services.scriptSecurityManager.createContentPrincipalFromOrigin( + this.contentWindow.location.origin + ); + } + + /** + * Only translate pages that match certain protocols, that way internal pages like + * about:* pages will not be translated. + */ + #isRestrictedPage() { + const { href } = this.contentWindow.location; + // Keep this logic up to date with TranslationsParent.isRestrictedPage. + return !( + href.startsWith("http://") || + href.startsWith("https://") || + href.startsWith("file:///") + ); + } + + /** + * Determine if the page should be translated by checking the App's languages and + * comparing it to the reported language of the page. Return the best translation fit + * (if available). + * + * @param {number} [translationsStart] + * @returns {Promise<LangTags>} + */ + async getLangTagsForTranslation(translationsStart = this.docShell.now()) { + if (this.#langTags) { + return this.#langTags; + } + + const langTags = { + docLangTag: null, + userLangTag: null, + isDocLangTagSupported: false, + }; + this.#langTags = langTags; + + if (this.#isRestrictedPage()) { + // The langTags are still blank here. + return langTags; + } + let languagePairs = await this.getLanguagePairs(); + + const determineIsDocLangTagSupported = docLangTag => + Boolean( + languagePairs.find(({ fromLang }) => fromLang === langTags.docLangTag) + ); + + // First try to get the langTag from the document's markup. + try { + const docLocale = new Intl.Locale(this.document.documentElement.lang); + langTags.docLangTag = docLocale.language; + langTags.isDocLangTagSupported = determineIsDocLangTagSupported( + docLocale.language + ); + } catch (error) {} + + // If the document's markup had no specified langTag, attempt + // to identify the page's language using the LanguageIdEngine. + if (!langTags.docLangTag) { + let languageIdEngine = await this.createLanguageIdEngine(); + let { langTag, confidence } = await languageIdEngine.identifyLanguage( + this.#getTextToIdentify() + ); + lazy.console.log( + `${langTag}(${confidence.toFixed(2)}) Detected Page Language` + ); + if (confidence >= DOC_LANGUAGE_DETECTION_THRESHOLD) { + langTags.docLangTag = langTag; + langTags.isDocLangTagSupported = + determineIsDocLangTagSupported(langTag); + } + } + + const preferredLanguages = await this.getPreferredLanguages(); + + if (!langTags.docLangTag) { + const message = "No valid language detected."; + ChromeUtils.addProfilerMarker( + "TranslationsChild", + { innerWindowId: this.innerWindowId }, + message + ); + lazy.console.log(message, this.contentWindow.location.href); + + const languagePairs = await this.getLanguagePairs(); + + // Attempt to find a good language to select for the user. + langTags.userLangTag = + preferredLanguages.find(langTag => langTag === languagePairs.toLang) ?? + null; + + return langTags; + } + + ChromeUtils.addProfilerMarker( + "TranslationsChild", + { innerWindowId: this.innerWindowId, startTime: translationsStart }, + "Time to determine langTags" + ); + + // This is a special case where we do not offer a translation if the main app language + // and the doc language match. The main app language should be the first preferred + // language. + if (preferredLanguages[0] === langTags.docLangTag) { + // The doc language and the main language match. + const message = + "The app and document languages match, so not translating."; + ChromeUtils.addProfilerMarker( + "TranslationsChild", + { innerWindowId: this.innerWindowId }, + message + ); + lazy.console.log(message, this.contentWindow.location.href); + // The docLangTag will be set, while the userLangTag will be null. + return langTags; + } + + // Attempt to find a matching language pair for a preferred language. + for (const preferredLangTag of preferredLanguages) { + if ( + translationsEngineCache.isInCache(langTags.docLangTag, preferredLangTag) + ) { + // There is no reason to look at the language pairs if the engine is already in + // the cache. + langTags.userLangTag = preferredLangTag; + break; + } + + if (!langTags.isDocLangTagSupported) { + if (languagePairs.some(({ toLang }) => toLang === preferredLangTag)) { + // Only match the "to" language, since the "from" is not supported. + langTags.userLangTag = preferredLangTag; + } + break; + } + + // Is there a direct language pair match? + if ( + languagePairs.some( + ({ fromLang, toLang }) => + fromLang === langTags.docLangTag && toLang === preferredLangTag + ) + ) { + // A match was found in one of the preferred languages. + langTags.userLangTag = preferredLangTag; + break; + } + + // Is there a pivot language match? + if ( + // Match doc -> pivot + languagePairs.some( + ({ fromLang, toLang }) => + fromLang === langTags.docLangTag && toLang === PIVOT_LANGUAGE + ) && + // Match pivot -> preferred language + languagePairs.some( + ({ fromLang, toLang }) => + fromLang === PIVOT_LANGUAGE && toLang === preferredLangTag + ) + ) { + langTags.userLangTag = preferredLangTag; + break; + } + } + + if (!langTags.userLangTag) { + // No language pairs match. + const message = `No matching translation pairs were found for translating from "${langTags.docLangTag}".`; + ChromeUtils.addProfilerMarker( + "TranslationsChild", + { innerWindowId: this.innerWindowId }, + message + ); + lazy.console.log(message, languagePairs); + } + + return langTags; + } + + /** + * Deduce the language tags on the page, and either: + * 1. Show an offer to translate. + * 2. Auto-translate. + * 3. Do nothing. + */ + async maybeOfferTranslation() { + const translationsStart = this.docShell.now(); + + const isSupported = await this.isTranslationsEngineSupported; + if (!isSupported) { + return; + } + + const langTags = await this.getLangTagsForTranslation(translationsStart); + + this.#langTags = langTags; + this.reportDetectedLangTagsToParent(langTags); + + if (langTags.docLangTag && langTags.userLangTag) { + const { maybeAutoTranslate, maybeNeverTranslate } = await this.sendQuery( + "Translations:GetTranslationConditions", + langTags + ); + if (maybeAutoTranslate && !maybeNeverTranslate) { + lazy.TranslationsTelemetry.onTranslate({ + fromLanguage: langTags.docLangTag, + toLanguage: langTags.userLangTag, + autoTranslate: maybeAutoTranslate, + }); + this.translatePage( + langTags.docLangTag, + langTags.userLangTag, + translationsStart + ); + } + } + } + + /** + * Lazily initialize this value. It doesn't change after being set. + * + * @type {Promise<boolean>} + */ + get isTranslationsEngineSupported() { + // Delete the getter and set the real value directly onto the TranslationsChild's + // prototype. This value never changes while a browser is open. + delete TranslationsChild.isTranslationsEngineSupported; + return (TranslationsChild.isTranslationsEngineSupported = this.sendQuery( + "Translations:GetIsTranslationsEngineSupported" + )); + } + + /** + * Load the translation engine and translate the page. + * + * @param {{fromLanguage: string, toLanguage: string}} langTags + * @param {number} [translationsStart] + * @returns {Promise<void>} + */ + async translatePage( + fromLanguage, + toLanguage, + translationsStart = this.docShell.now() + ) { + if (this.translatedDoc) { + lazy.console.warn("This page was already translated."); + return; + } + if (this.#isRestrictedPage()) { + lazy.console.warn("Attempting to translate a restricted page."); + return; + } + + try { + const engineLoadStart = this.docShell.now(); + // Create a function to get an engine. These engines are pretty heavy in terms + // of memory usage, so they will be destroyed when not in use, and attempt to + // be re-used when loading a new page. + this.#getTranslationsEngine = await translationsEngineCache.createGetter( + this, + fromLanguage, + toLanguage + ); + if (this.#isDestroyed) { + return; + } + + // Start loading the engine if it doesn't exist. + this.#getTranslationsEngine().then( + () => { + ChromeUtils.addProfilerMarker( + "TranslationsChild", + { innerWindowId: this.innerWindowId, startTime: engineLoadStart }, + "Load Translations Engine" + ); + }, + error => { + lazy.console.log("Failed to load the translations engine.", error); + } + ); + } catch (error) { + lazy.TranslationsTelemetry.onError(error); + lazy.console.log( + "Failed to load the translations engine", + error, + this.contentWindow.location.href + ); + this.sendAsyncMessage("Translations:FullPageTranslationFailed", { + reason: "engine-load-failure", + }); + return; + } + + // Ensure the translation engine loads correctly at least once before instantiating + // the TranslationsDocument. + try { + await this.#getTranslationsEngine(); + } catch (error) { + lazy.TranslationsTelemetry.onError(error); + this.sendAsyncMessage("Translations:FullPageTranslationFailed", { + reason: "engine-load-failure", + }); + return; + } + + this.translatedDoc = new lazy.TranslationsDocument( + this.document, + fromLanguage, + this.innerWindowId, + html => + this.#getTranslationsEngine().then(engine => + engine.translateHTML([html], this.innerWindowId) + ), + text => + this.#getTranslationsEngine().then(engine => + engine.translateText([text], this.innerWindowId) + ), + () => this.docShell.now() + ); + + lazy.console.log( + "Beginning to translate.", + this.contentWindow.location.href + ); + + this.sendAsyncMessage("Translations:EngineIsReady"); + + this.translatedDoc.addRootElement(this.document.querySelector("title")); + this.translatedDoc.addRootElement( + this.document.body, + true /* reportWordsInViewport */ + ); + + { + const startTime = this.docShell.now(); + this.translatedDoc.viewportTranslated.then(() => { + ChromeUtils.addProfilerMarker( + "TranslationsChild", + { innerWindowId: this.innerWindowId, startTime }, + "Viewport translations" + ); + ChromeUtils.addProfilerMarker( + "TranslationsChild", + { innerWindowId: this.innerWindowId, startTime: translationsStart }, + "Time to first translation" + ); + }); + } + } + + /** + * Receive a message from the parent. + * + * @param {{ name: string, data: any }} message + */ + receiveMessage({ name, data }) { + switch (name) { + case "Translations:TranslatePage": + const langTags = data ?? this.#langTags; + if (!langTags) { + lazy.console.warn( + "Attempting to translate a page, but no language tags were given." + ); + break; + } + lazy.TranslationsTelemetry.onTranslate({ + fromLanguage: langTags.fromLanguage, + toLanguage: langTags.toLanguage, + autoTranslate: false, + }); + this.translatePage(langTags.fromLanguage, langTags.toLanguage); + break; + case "Translations:GetLangTagsForTranslation": + return this.getLangTagsForTranslation(); + case "Translations:GetContentWindowPrincipal": + return this.getContentWindowPrincipal(); + default: + lazy.console.warn("Unknown message.", name); + } + return undefined; + } + + /** + * Get the list of languages and their display names, sorted by their display names. + * This is more expensive of a call than getLanguagePairs since the display names + * are looked up. + * + * @returns {Promise<Array<SupportedLanguages>>} + */ + getSupportedLanguages() { + return this.sendQuery("Translations:GetSupportedLanguages"); + } + + /** + * @param {string} language The BCP 47 language tag. + */ + hasAllFilesForLanguage(language) { + return this.sendQuery("Translations:HasAllFilesForLanguage", { + language, + }); + } + + /** + * @param {string} language The BCP 47 language tag. + */ + deleteLanguageFiles(language) { + return this.sendQuery("Translations:DeleteLanguageFiles", { + language, + }); + } + + /** + * @param {string} language The BCP 47 language tag. + */ + downloadLanguageFiles(language) { + return this.sendQuery("Translations:DownloadLanguageFiles", { + language, + }); + } + + /** + * Download all files from Remote Settings. + */ + downloadAllFiles() { + return this.sendQuery("Translations:DownloadAllFiles"); + } + + /** + * Delete all language files. + * @returns {Promise<string[]>} Returns a list of deleted record ids. + */ + deleteAllLanguageFiles() { + return this.sendQuery("Translations:DeleteAllLanguageFiles"); + } + + /** + * Get the language pairs that can be used for translations. This is cheaper than + * the getSupportedLanguages call, since the localized display names of the languages + * are not needed. + * + * @returns {Promise<Array<LanguagePair>>} + */ + getLanguagePairs() { + return this.sendQuery("Translations:GetLanguagePairs"); + } + + /** + * The ordered list of preferred BCP 47 language tags. + * + * 1. App languages + * 2. Web requested languages + * 3. OS languages + * + * @returns {Promise<string[]>} + */ + getPreferredLanguages() { + return this.sendQuery("Translations:GetPreferredLanguages"); + } + + /** + * Retrieve the payload for creating a LanguageIdEngine. + * + * @returns {Promise<LanguageIdEnginePayload>} + */ + async #getLanguageIdEnginePayload() { + return this.sendQuery("Translations:GetLanguageIdEnginePayload"); + } + + /** + * @param {string} fromLanguage + * @param {string} toLanguage + * @returns {TranslationsEnginePayload} + */ + async #getTranslationsEnginePayload(fromLanguage, toLanguage) { + return this.sendQuery("Translations:GetTranslationsEnginePayload", { + fromLanguage, + toLanguage, + }); + } + + /** + * Construct and initialize the LanguageIdEngine. + * + * @returns {LanguageIdEngine} + */ + async createLanguageIdEngine() { + const payload = await this.#getLanguageIdEnginePayload(); + const engine = new LanguageIdEngine({ + type: "initialize", + isLoggingEnabled: lazy.logLevel === "All", + ...payload, + }); + await engine.isReady; + return engine; + } + + /** + * Construct and initialize the Translations Engine. + * + * @param {string} fromLanguage + * @param {string} toLanguage + * @returns {TranslationsEngine | null} + */ + async createTranslationsEngine(fromLanguage, toLanguage) { + const startTime = this.docShell.now(); + const enginePayload = await this.#getTranslationsEnginePayload( + fromLanguage, + toLanguage + ); + + const engine = new TranslationsEngine( + fromLanguage, + toLanguage, + enginePayload, + this.innerWindowId + ); + + await engine.isReady; + + ChromeUtils.addProfilerMarker( + "TranslationsChild", + { innerWindowId: this.innerWindowId, startTime }, + `Translations engine loaded for "${fromLanguage}" to "${toLanguage}"` + ); + return engine; + } + + /** + * Override JSWindowActorChild.prototype.didDestroy. This is called by the actor + * manager when the actor was destroyed. + */ + async didDestroy() { + this.#isDestroyed = true; + const getTranslationsEngine = this.#getTranslationsEngine; + if (!getTranslationsEngine) { + return; + } + const engine = await getTranslationsEngine( + // Just get it from cache, don't create a new one. + true + ); + if (engine) { + // Discard the queue otherwise the worker will continue to translate. + engine.discardTranslationQueue(this.innerWindowId); + + // Keep it alive long enough for another page load. + translationsEngineCache.keepAlive(engine.fromLanguage, engine.toLanguage); + } + } +} diff --git a/toolkit/components/translations/actors/TranslationsParent.sys.mjs b/toolkit/components/translations/actors/TranslationsParent.sys.mjs new file mode 100644 index 0000000000..5b3953be50 --- /dev/null +++ b/toolkit/components/translations/actors/TranslationsParent.sys.mjs @@ -0,0 +1,1953 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/** + * The pivot language is used to pivot between two different language translations + * when there is not a model available to translate directly between the two. In this + * case "en" is common between the various supported models. + * + * For instance given the following two models: + * "fr" -> "en" + * "en" -> "it" + * + * You can accomplish: + * "fr" -> "it" + * + * By doing: + * "fr" -> "en" -> "it" + */ +const PIVOT_LANGUAGE = "en"; + +const TRANSLATIONS_PERMISSION = "translations"; +const ALWAYS_TRANSLATE_LANGS_PREF = + "browser.translations.alwaysTranslateLanguages"; +const NEVER_TRANSLATE_LANGS_PREF = + "browser.translations.neverTranslateLanguages"; + +const lazy = {}; + +import { XPCOMUtils } from "resource://gre/modules/XPCOMUtils.sys.mjs"; + +ChromeUtils.defineESModuleGetters(lazy, { + RemoteSettings: "resource://services-settings/remote-settings.sys.mjs", + setTimeout: "resource://gre/modules/Timer.sys.mjs", +}); + +XPCOMUtils.defineLazyGetter(lazy, "console", () => { + return console.createInstance({ + maxLogLevelPref: "browser.translations.logLevel", + prefix: "Translations", + }); +}); + +XPCOMUtils.defineLazyPreferenceGetter( + lazy, + "translationsEnabledPref", + "browser.translations.enable" +); + +XPCOMUtils.defineLazyPreferenceGetter( + lazy, + "autoTranslatePagePref", + "browser.translations.autoTranslate" +); + +XPCOMUtils.defineLazyPreferenceGetter( + lazy, + "chaosErrorsPref", + "browser.translations.chaos.errors" +); + +XPCOMUtils.defineLazyPreferenceGetter( + lazy, + "chaosTimeoutMSPref", + "browser.translations.chaos.timeoutMS" +); + +/** + * Returns the always-translate language tags as an array. + */ +XPCOMUtils.defineLazyPreferenceGetter( + lazy, + "alwaysTranslateLangTags", + ALWAYS_TRANSLATE_LANGS_PREF, + /* aDefaultPrefValue */ "", + /* onUpdate */ null, + /* aTransform */ rawLangTags => (rawLangTags ? rawLangTags.split(",") : []) +); + +/** + * Returns the never-translate language tags as an array. + */ +XPCOMUtils.defineLazyPreferenceGetter( + lazy, + "neverTranslateLangTags", + NEVER_TRANSLATE_LANGS_PREF, + /* aDefaultPrefValue */ "", + /* onUpdate */ null, + /* aTransform */ rawLangTags => (rawLangTags ? rawLangTags.split(",") : []) +); + +XPCOMUtils.defineLazyPreferenceGetter( + lazy, + "simulateUnsupportedEnginePref", + "browser.translations.simulateUnsupportedEngine" +); + +// At this time the signatures of the files are not being checked when they are being +// loaded from disk. This signature check involves hitting the network, and translations +// are explicitly an offline-capable feature. See Bug 1827265 for re-enabling this +// check. +const VERIFY_SIGNATURES_FROM_FS = false; + +/** + * @typedef {import("../translations").TranslationModelRecord} TranslationModelRecord + * @typedef {import("../translations").RemoteSettingsClient} RemoteSettingsClient + * @typedef {import("../translations").LanguageIdEngineMockedPayload} LanguageIdEngineMockedPayload + * @typedef {import("../translations").LanguageTranslationModelFiles} LanguageTranslationModelFiles + * @typedef {import("../translations").WasmRecord} WasmRecord + * @typedef {import("../translations").LangTags} LangTags + * @typedef {import("../translations").LanguagePair} LanguagePair + * @typedef {import("../translations").SupportedLanguages} SupportedLanguages + * @typedef {import("../translations").LanguageIdModelRecord} LanguageIdModelRecord + * @typedef {import("../translations").TranslationErrors} TranslationErrors + */ + +/** + * @typedef {Object} TranslationPair + * @prop {string} fromLanguage + * @prop {string} toLanguage + * @prop {string} [fromDisplayLanguage] + * @prop {string} [toDisplayLanguage] + */ + +/** + * The translations parent is used to orchestrate translations in Firefox. It can + * download the wasm translation engines, and the machine learning language models. + * + * See Bug 971044 for more details of planned work. + */ +export class TranslationsParent extends JSWindowActorParent { + /** + * Contains the state that would affect UI. Anytime this state is changed, a dispatch + * event is sent so that UI can react to it. The actor is inside of /toolkit and + * needs a way of notifying /browser code (or other users) of when the state changes. + * + * @type {TranslationsLanguageState} + */ + languageState; + + actorCreated() { + this.languageState = new TranslationsLanguageState(this); + + if (TranslationsParent.#translateOnPageReload) { + // The actor was recreated after a page reload, start the translation. + const { fromLanguage, toLanguage } = + TranslationsParent.#translateOnPageReload; + TranslationsParent.#translateOnPageReload = null; + + lazy.console.log( + `Translating on a page reload from "${fromLanguage}" to "${toLanguage}".` + ); + + this.translate(fromLanguage, toLanguage); + } + } + + /** + * The remote settings client that retrieves the language-identification model binary. + * + * @type {RemoteSettingsClient | null} + */ + static #languageIdModelsRemoteClient = null; + + /** + * A map of the TranslationModelRecord["id"] to the record of the model in Remote Settings. + * Used to coordinate the downloads. + * + * @type {Map<string, TranslationModelRecord>} + */ + #translationModelRecords = new Map(); + + /** + * The RemoteSettingsClient that downloads the translation models. + * + * @type {RemoteSettingsClient | null} + */ + static #translationModelsRemoteClient = null; + + /** + * The RemoteSettingsClient that downloads the wasm binaries. + * + * @type {RemoteSettingsClient | null} + */ + static #translationsWasmRemoteClient = null; + + /** + * If "browser.translations.autoTranslate" is set to "true" then the page will + * auto-translate. A user can restore the page to the original UI. This flag indicates + * that an auto-translate should be skipped. + */ + static #isPageRestoredForAutoTranslate = false; + + /** + * Allows the actor's behavior to be changed when the translations engine is mocked via + * a dummy RemoteSettingsClient. + * + * @type {bool} + */ + static #isTranslationsEngineMocked = false; + + /** + * The language identification engine can be mocked for testing + * by pre-defining this value. + * + * @type {string | null} + */ + static #mockedLangTag = null; + + /** + * The language identification engine can be mocked for testing + * by pre-defining this value. + * + * @type {number | null} + */ + static #mockedLanguageIdConfidence = null; + + /** + * @type {null | Promise<boolean>} + */ + static #isTranslationsEngineSupported = null; + + /** + * When reloading the page, store the translation pair that needs translating. + * + * @type {null | TranslationPair} + */ + static #translateOnPageReload = null; + + /** + * An ordered list of preferred languages based on: + * 1. App languages + * 2. Web requested languages + * 3. OS language + * + * @type {null | string[]} + */ + static #preferredLanguages = null; + static #observingLanguages = false; + + // On a fast connection, 10 concurrent downloads were measured to be the fastest when + // downloading all of the language files. + static MAX_CONCURRENT_DOWNLOADS = 10; + static MAX_DOWNLOAD_RETRIES = 3; + + /** + * Detect if Wasm SIMD is supported, and cache the value. It's better to check + * for support before downloading large binary blobs to a user who can't even + * use the feature. This function also respects mocks and simulating unsupported + * engines. + * + * @type {Promise<boolean>} + */ + static getIsTranslationsEngineSupported() { + if (lazy.simulateUnsupportedEnginePref) { + // Use the non-lazy console.log so that the user is always informed as to why + // the translations engine is not working. + console.log( + "Translations: The translations engine is disabled through the pref " + + '"browser.translations.simulateUnsupportedEngine".' + ); + + // The user is manually testing unsupported engines. + return Promise.resolve(false); + } + + if (TranslationsParent.#isTranslationsEngineMocked) { + // A mocked translations engine is always supported. + return Promise.resolve(true); + } + + if (TranslationsParent.#isTranslationsEngineSupported === null) { + TranslationsParent.#isTranslationsEngineSupported = detectSimdSupport(); + + TranslationsParent.#isTranslationsEngineSupported.then( + isSupported => () => { + // Use the non-lazy console.log so that the user is always informed as to why + // the translations engine is not working. + if (!isSupported) { + console.log( + "Translations: The translations engine is not supported on your device as " + + "it does not support Wasm SIMD operations." + ); + } + } + ); + } + + return TranslationsParent.#isTranslationsEngineSupported; + } + + /** + * Only translate pages that match certain protocols, that way internal pages like + * about:* pages will not be translated. + * @param {string} url + */ + static isRestrictedPage(url) { + // Keep this logic up to date with TranslationsChild.prototype.#isRestrictedPage. + return !( + url.startsWith("http://") || + url.startsWith("https://") || + url.startsWith("file:///") + ); + } + + static #resetPreferredLanguages() { + TranslationsParent.#preferredLanguages = null; + TranslationsParent.getPreferredLanguages(); + } + + static async observe(_subject, topic, _data) { + switch (topic) { + case "nsPref:changed": + case "intl:app-locales-changed": { + this.#resetPreferredLanguages(); + break; + } + default: + throw new Error("Unknown observer event", topic); + } + } + + /** + * Provide a way for tests to override the system locales. + * @type {null | string[]} + */ + mockedSystemLocales = null; + + /** + * An ordered list of preferred languages based on: + * + * 1. App languages + * 2. Web requested languages + * 3. OS language + * + * @returns {string[]} + */ + static getPreferredLanguages() { + if (TranslationsParent.#preferredLanguages) { + return TranslationsParent.#preferredLanguages; + } + + if (!TranslationsParent.#observingLanguages) { + Services.obs.addObserver( + TranslationsParent.#resetPreferredLanguages, + "intl:app-locales-changed" + ); + Services.prefs.addObserver( + "intl.accept_languages", + TranslationsParent.#resetPreferredLanguages + ); + TranslationsParent.#observingLanguages = true; + } + + // The "Accept-Language" values that the localizer or user has indicated for + // the preferences for the web. https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Language + // Note that this preference often falls back ultimately to English, even if the + // user doesn't actually speak English, or to other languages they do not speak. + // However, this preference will be used as an indication that a user may prefer + // this language. + const webLanguages = Services.prefs + .getComplexValue("intl.accept_languages", Ci.nsIPrefLocalizedString) + .data.split(/\s*,\s*/g); + + // The system language could also be a good option for a language to offer the user. + const osPrefs = Cc["@mozilla.org/intl/ospreferences;1"].getService( + Ci.mozIOSPreferences + ); + const systemLocales = this.mockedSystemLocales ?? osPrefs.systemLocales; + + // Combine the locales together. + const preferredLocales = new Set([ + ...Services.locale.appLocalesAsBCP47, + ...webLanguages, + ...systemLocales, + ]); + + // Attempt to convert the locales to lang tags. Do not completely trust the + // values coming from preferences and the OS to have been validated as correct + // BCP 47 locale identifiers. + const langTags = new Set(); + for (const locale of preferredLocales) { + try { + langTags.add(new Intl.Locale(locale).language); + } catch (_) { + // The locale was invalid, discard it. + } + } + + // Convert the Set to an array to indicate that it is an ordered listing of languages. + TranslationsParent.#preferredLanguages = [...langTags]; + + return TranslationsParent.#preferredLanguages; + } + + async receiveMessage({ name, data }) { + switch (name) { + case "Translations:GetTranslationsEnginePayload": { + const { fromLanguage, toLanguage } = data; + const bergamotWasmArrayBuffer = this.#getBergamotWasmArrayBuffer(); + + let files = await this.getLanguageTranslationModelFiles( + fromLanguage, + toLanguage + ); + + let languageModelFiles; + if (files) { + languageModelFiles = [files]; + } else { + // No matching model was found, try to pivot between English. + const [files1, files2] = await Promise.all([ + this.getLanguageTranslationModelFiles(fromLanguage, PIVOT_LANGUAGE), + this.getLanguageTranslationModelFiles(PIVOT_LANGUAGE, toLanguage), + ]); + if (!files1 || !files2) { + throw new Error( + `No language models were found for ${fromLanguage} to ${toLanguage}` + ); + } + languageModelFiles = [files1, files2]; + } + + return { + bergamotWasmArrayBuffer: await bergamotWasmArrayBuffer, + languageModelFiles, + isMocked: TranslationsParent.#isTranslationsEngineMocked, + }; + } + case "Translations:GetLanguageIdEnginePayload": { + const [modelBuffer, wasmBuffer] = await Promise.all([ + this.#getLanguageIdModelArrayBuffer(), + this.#getLanguageIdWasmArrayBuffer(), + ]); + return { + modelBuffer, + wasmBuffer, + mockedConfidence: TranslationsParent.#mockedLanguageIdConfidence, + mockedLangTag: TranslationsParent.#mockedLangTag, + }; + } + case "Translations:GetIsTranslationsEngineMocked": { + return TranslationsParent.#isTranslationsEngineMocked; + } + case "Translations:GetIsTranslationsEngineSupported": { + return TranslationsParent.getIsTranslationsEngineSupported(); + } + case "Translations:FullPageTranslationFailed": { + this.languageState.error = data.reason; + break; + } + case "Translations:GetSupportedLanguages": { + return this.getSupportedLanguages(); + } + case "Translations:HasAllFilesForLanguage": { + return this.hasAllFilesForLanguage(data.language); + } + case "Translations:DownloadLanguageFiles": { + return this.downloadLanguageFiles(data.language); + } + case "Translations:DownloadAllFiles": { + return this.downloadAllFiles(); + } + case "Translations:DeleteAllLanguageFiles": { + return this.deleteAllLanguageFiles(); + } + case "Translations:DeleteLanguageFiles": { + return this.deleteLanguageFiles(data.language); + } + case "Translations:GetLanguagePairs": { + return this.getLanguagePairs(); + } + case "Translations:GetPreferredLanguages": { + return TranslationsParent.getPreferredLanguages(); + } + case "Translations:EngineIsReady": { + this.isEngineReady = true; + this.languageState.isEngineReady = true; + break; + } + case "Translations:GetTranslationConditions": { + const maybeAutoTranslate = TranslationsParent.#maybeAutoTranslate( + data.docLangTag + ); + const maybeNeverTranslate = + TranslationsParent.shouldNeverTranslateLanguage(data.docLangTag) || + (await this.shouldNeverTranslateSite()); + + if (maybeAutoTranslate && !maybeNeverTranslate) { + this.languageState.requestedTranslationPair = { + fromLanguage: data.docLangTag, + toLanguage: data.userLangTag, + }; + } + + return { maybeAutoTranslate, maybeNeverTranslate }; + } + case "Translations:ReportDetectedLangTags": { + this.languageState.detectedLanguages = data.langTags; + return undefined; + } + } + return undefined; + } + + /** + * Returns true if translations should auto-translate from the given + * language, otherwise returns false. + * + * @param {string} langTag - A BCP-47 language tag + * @returns {boolean} + */ + static #maybeAutoTranslate(langTag) { + if ( + // The user has not marked this language as always translate. + !TranslationsParent.shouldAlwaysTranslateLanguage(langTag) && + // The pref to always auto-translate is off. + !lazy.autoTranslatePagePref + ) { + return false; + } + + if (TranslationsParent.#isPageRestoredForAutoTranslate) { + // The user clicked the restore button. Respect it for one page load. + TranslationsParent.#isPageRestoredForAutoTranslate = false; + + // Skip this auto-translation. + return false; + } + + // The page can be auto-translated + return true; + } + + /** + * Retrieves the language-identification model binary from remote settings. + * + * @returns {Promise<ArrayBuffer>} + */ + async #getLanguageIdModelArrayBuffer() { + lazy.console.log("Getting language-identification model array buffer."); + const now = Date.now(); + const client = this.#getLanguageIdModelRemoteClient(); + + /** @type {LanguageIdModelRecord[]} */ + let modelRecords = await TranslationsParent.getMaxVersionRecords(client); + + if (modelRecords.length === 0) { + throw new Error( + "Unable to get language-identification model record from remote settings" + ); + } + + if (modelRecords.length > 1) { + TranslationsParent.reportError( + new Error( + "Expected the language-identification model collection to have only 1 record." + ), + modelRecords + ); + } + const [modelRecord] = modelRecords; + + await chaosMode(1 / 3); + + /** @type {{buffer: ArrayBuffer}} */ + const { buffer } = await client.attachments.download(modelRecord); + + const duration = (Date.now() - now) / 1000; + lazy.console.log( + `Remote language-identification model loaded in ${duration} seconds.` + ); + + return buffer; + } + + /** + * Initializes the RemoteSettingsClient for the language-identification model binary. + * + * @returns {RemoteSettingsClient} + */ + #getLanguageIdModelRemoteClient() { + if (TranslationsParent.#languageIdModelsRemoteClient) { + return TranslationsParent.#languageIdModelsRemoteClient; + } + + /** @type {RemoteSettingsClient} */ + const client = lazy.RemoteSettings("translations-identification-models"); + + TranslationsParent.#languageIdModelsRemoteClient = client; + return client; + } + + /** + * Retrieves the language-identification wasm binary from remote settings. + * + * @returns {Promise<ArrayBuffer>} + */ + async #getLanguageIdWasmArrayBuffer() { + const start = Date.now(); + const client = this.#getTranslationsWasmRemoteClient(); + + // Load the wasm binary from remote settings, if it hasn't been already. + lazy.console.log(`Getting remote language-identification wasm binary.`); + + /** @type {WasmRecord[]} */ + let wasmRecords = await TranslationsParent.getMaxVersionRecords(client, { + filters: { name: "fasttext-wasm" }, + }); + + if (wasmRecords.length === 0) { + // The remote settings client provides an empty list of records when there is + // an error. + throw new Error( + 'Unable to get "fasttext-wasm" language-identification wasm binary from Remote Settings.' + ); + } + + if (wasmRecords.length > 1) { + TranslationsParent.reportError( + new Error( + 'Expected the "fasttext-wasm" language-identification wasm collection to only have 1 record.' + ), + wasmRecords + ); + } + + // Unlike the models, greedily download the wasm. It will pull it from a locale + // cache on disk if it's already been downloaded. Do not retain a copy, as + // this will be running in the parent process. It's not worth holding onto + // this much memory, so reload it every time it is needed. + + await chaosMode(1 / 3); + + /** @type {{buffer: ArrayBuffer}} */ + const { buffer } = await client.attachments.download(wasmRecords[0]); + + const duration = (Date.now() - start) / 1000; + lazy.console.log( + `Remote language-identification wasm binary loaded in ${duration} seconds.` + ); + + return buffer; + } + + /** + * Creates a lookup key that is unique to each fromLanguage-toLanguage pair. + * + * @param {string} fromLanguage + * @param {string} toLanguage + * @returns {string} + */ + static languagePairKey(fromLanguage, toLanguage) { + return `${fromLanguage},${toLanguage}`; + } + + /** + * Get the list of translation pairs supported by the translations engine. + * + * @returns {Promise<Array<LanguagePair>>} + */ + async getLanguagePairs() { + const records = await this.#getTranslationModelRecords(); + const languagePairMap = new Map(); + + for (const { fromLang, toLang, version } of records.values()) { + const isBeta = Services.vc.compare(version, "1.0") < 0; + const key = TranslationsParent.languagePairKey(fromLang, toLang); + if (!languagePairMap.has(key)) { + languagePairMap.set(key, { fromLang, toLang, isBeta }); + } + } + + return Array.from(languagePairMap.values()); + } + + /** + * Returns all of the information needed to render dropdowns for translation + * language selection. + * + * @returns {Promise<SupportedLanguages>} + */ + async getSupportedLanguages() { + const languagePairs = await this.getLanguagePairs(); + + /** @type {Map<string, boolean>} */ + const fromLanguages = new Map(); + /** @type {Map<string, boolean>} */ + const toLanguages = new Map(); + + for (const { fromLang, toLang, isBeta } of languagePairs) { + // [BetaLanguage, BetaLanguage] => isBeta == true, + // [BetaLanguage, NonBetaLanguage] => isBeta == true, + // [NonBetaLanguage, BetaLanguage] => isBeta == true, + // [NonBetaLanguage, NonBetaLanguage] => isBeta == false, + if (isBeta) { + // If these languages are part of a beta languagePair, at least one of them is a beta language + // but the other may not be, so only tentatively mark them as beta if there is no entry. + if (!fromLanguages.has(fromLang)) { + fromLanguages.set(fromLang, isBeta); + } + if (!toLanguages.has(toLang)) { + toLanguages.set(toLang, isBeta); + } + } else { + // If these languages are part of a non-beta languagePair, then they are both + // guaranteed to be non-beta languages. Idempotently overwrite any previous entry. + fromLanguages.set(fromLang, isBeta); + toLanguages.set(toLang, isBeta); + } + } + + // Build a map of the langTag to the display name. + /** @type {Map<string, string>} */ + const displayNames = new Map(); + { + const dn = new Services.intl.DisplayNames(undefined, { + type: "language", + }); + + for (const langTagSet of [fromLanguages, toLanguages]) { + for (const langTag of langTagSet.keys()) { + if (displayNames.has(langTag)) { + continue; + } + displayNames.set(langTag, dn.of(langTag)); + } + } + } + + const addDisplayName = ([langTag, isBeta]) => ({ + langTag, + isBeta, + displayName: displayNames.get(langTag), + }); + + const sort = (a, b) => a.displayName.localeCompare(b.displayName); + + return { + languagePairs, + fromLanguages: Array.from(fromLanguages.entries()) + .map(addDisplayName) + .sort(sort), + toLanguages: Array.from(toLanguages.entries()) + .map(addDisplayName) + .sort(sort), + }; + } + + /** + * Lazily initializes the RemoteSettingsClient for the language models. + * + * @returns {RemoteSettingsClient} + */ + #getTranslationModelsRemoteClient() { + if (TranslationsParent.#translationModelsRemoteClient) { + return TranslationsParent.#translationModelsRemoteClient; + } + + /** @type {RemoteSettingsClient} */ + const client = lazy.RemoteSettings("translations-models"); + TranslationsParent.#translationModelsRemoteClient = client; + + client.on("sync", async ({ data: { created, updated, deleted } }) => { + // Language model attachments will only be downloaded when they are used. + lazy.console.log( + `Remote Settings "sync" event for remote language models `, + { + created, + updated, + deleted, + } + ); + + // Remove all the deleted records. + for (const record of deleted) { + await client.attachments.deleteDownloaded(record); + this.#translationModelRecords.delete(record.id); + } + + // Pre-emptively remove the old downloads, and set the new updated record. + for (const { old: oldRecord, new: newRecord } of updated) { + await client.attachments.deleteDownloaded(oldRecord); + // The language pairs should be the same on the update, but use the old + // record just in case. + this.#translationModelRecords.delete(oldRecord.id); + this.#translationModelRecords.set(newRecord.id, newRecord); + } + + // Add the new records, but don't download any attachments. + for (const record of created) { + this.#translationModelRecords.set(record.id, record); + } + }); + + return client; + } + + /** + * Retrieves the maximum version of each record in the RemoteSettingsClient. + * + * If the client contains two different-version copies of the same record (e.g. 1.0 and 1.1) + * then only the 1.1-version record will be returned in the resulting collection. + * + * @param {RemoteSettingsClient} remoteSettingsClient + * @param {Object} [options] + * @param {Object} [options.filters={}] + * The filters to apply when retrieving the records from RemoteSettings. + * Filters should correspond to properties on the RemoteSettings records themselves. + * For example, A filter to retrieve only records with a `fromLang` value of "en" and a `toLang` value of "es": + * { filters: { fromLang: "en", toLang: "es" } } + * @param {Function} [options.lookupKey=(record => record.name)] + * The function to use to extract a lookup key from each record. + * This function should take a record as input and return a string that represents the lookup key for the record. + * For most record types, the name (default) is sufficient, however if a collection contains records with + * non-unique name values, it may be necessary to provide an alternative function here. + * @returns {Array<TranslationModelRecord | LanguageIdModelRecord | WasmRecord>} + */ + static async getMaxVersionRecords( + remoteSettingsClient, + { filters = {}, lookupKey = record => record.name } = {} + ) { + try { + await chaosMode(1 / 4); + } catch (_error) { + // Simulate an error by providing empty records. + return []; + } + const retrievedRecords = await remoteSettingsClient.get({ + // Pull the records from the network. + syncIfEmpty: true, + // Don't verify the signature if the client is mocked. + verifySignature: VERIFY_SIGNATURES_FROM_FS, + // Apply any filters for retrieving the records. + filters, + }); + + // Create a mapping to only the max version of each record discriminated by + // the result of the lookupKey() function. + const maxVersionRecordMap = retrievedRecords.reduce((records, record) => { + const key = lookupKey(record); + const existing = records.get(key); + if ( + !existing || + // existing version less than record version + Services.vc.compare(existing.version, record.version) < 0 + ) { + records.set(key, record); + } + return records; + }, new Map()); + + return Array.from(maxVersionRecordMap.values()); + } + + /** + * Lazily initializes the model records, and returns the cached ones if they + * were already retrieved. The key of the returned `Map` is the record id. + * + * @returns {Promise<Map<string, TranslationModelRecord>>} + */ + async #getTranslationModelRecords() { + if (this.#translationModelRecords.size > 0) { + return this.#translationModelRecords; + } + + const now = Date.now(); + const client = this.#getTranslationModelsRemoteClient(); + + // Load the models. If no data is present, then there will be an initial sync. + // Rely on Remote Settings for the syncing strategy for receiving updates. + lazy.console.log(`Getting remote language models.`); + + /** @type {TranslationModelRecord[]} */ + const translationModelRecords = + await TranslationsParent.getMaxVersionRecords(client, { + // Names in this collection are not unique, so we are appending the languagePairKey + // to guarantee uniqueness. + lookupKey: record => + `${record.name}${TranslationsParent.languagePairKey( + record.fromLang, + record.toLang + )}`, + }); + + if (translationModelRecords.length === 0) { + throw new Error("Unable to retrieve the translation models."); + } + + for (const record of TranslationsParent.ensureLanguagePairsHavePivots( + translationModelRecords + )) { + this.#translationModelRecords.set(record.id, record); + } + + const duration = (Date.now() - now) / 1000; + lazy.console.log( + `Remote language models loaded in ${duration} seconds.`, + this.#translationModelRecords + ); + + return this.#translationModelRecords; + } + + /** + * This implementation assumes that every language pair has access to the + * pivot language. If any languages are added without a pivot language, or the + * pivot language is changed, then this implementation will need a more complicated + * language solver. This means that any UI pickers would need to be updated, and + * the pivot language selection would need a solver. + * + * @param {TranslationModelRecord[] | LanguagePair[]} records + */ + static ensureLanguagePairsHavePivots(records) { + // lang -> pivot + const hasToPivot = new Set(); + // pivot -> en + const hasFromPivot = new Set(); + + const fromLangs = new Set(); + const toLangs = new Set(); + + for (const { fromLang, toLang } of records) { + fromLangs.add(fromLang); + toLangs.add(toLang); + + if (toLang === PIVOT_LANGUAGE) { + // lang -> pivot + hasToPivot.add(fromLang); + } + if (fromLang === PIVOT_LANGUAGE) { + // pivot -> en + hasFromPivot.add(toLang); + } + } + + const fromLangsToRemove = new Set(); + const toLangsToRemove = new Set(); + + for (const lang of fromLangs) { + if (lang === PIVOT_LANGUAGE) { + continue; + } + // Check for "lang -> pivot" + if (!hasToPivot.has(lang)) { + TranslationsParent.reportError( + new Error( + `The "from" language model "${lang}" is being discarded as it doesn't have a pivot language.` + ) + ); + fromLangsToRemove.add(lang); + } + } + + for (const lang of toLangs) { + if (lang === PIVOT_LANGUAGE) { + continue; + } + // Check for "pivot -> lang" + if (!hasFromPivot.has(lang)) { + TranslationsParent.reportError( + new Error( + `The "to" language model "${lang}" is being discarded as it doesn't have a pivot language.` + ) + ); + toLangsToRemove.add(lang); + } + } + + const after = records.filter(record => { + if (fromLangsToRemove.has(record.fromLang)) { + return false; + } + if (toLangsToRemove.has(record.toLang)) { + return false; + } + return true; + }); + return after; + } + + /** + * Lazily initializes the RemoteSettingsClient for the downloaded wasm binary data. + * + * @returns {RemoteSettingsClient} + */ + #getTranslationsWasmRemoteClient() { + if (TranslationsParent.#translationsWasmRemoteClient) { + return TranslationsParent.#translationsWasmRemoteClient; + } + + /** @type {RemoteSettingsClient} */ + const client = lazy.RemoteSettings("translations-wasm"); + + TranslationsParent.#translationsWasmRemoteClient = client; + + client.on("sync", async ({ data: { created, updated, deleted } }) => { + lazy.console.log(`"sync" event for remote bergamot wasm `, { + created, + updated, + deleted, + }); + + // Remove all the deleted records. + for (const record of deleted) { + await client.attachments.deleteDownloaded(record); + } + + // Remove any updated records, and download the new ones. + for (const { old: oldRecord } of updated) { + await client.attachments.deleteDownloaded(oldRecord); + } + + // Do nothing for the created records. + }); + + return client; + } + + /** + * Bergamot is the translation engine that has been compiled to wasm. It is shipped + * to the user via Remote Settings. + * + * https://github.com/mozilla/bergamot-translator/ + */ + /** + * @returns {Promise<ArrayBuffer>} + */ + async #getBergamotWasmArrayBuffer() { + const start = Date.now(); + const client = this.#getTranslationsWasmRemoteClient(); + + // Load the wasm binary from remote settings, if it hasn't been already. + lazy.console.log(`Getting remote bergamot-translator wasm records.`); + + /** @type {WasmRecord[]} */ + const wasmRecords = await TranslationsParent.getMaxVersionRecords(client, { + filters: { name: "bergamot-translator" }, + }); + + if (wasmRecords.length === 0) { + // The remote settings client provides an empty list of records when there is + // an error. + throw new Error( + "Unable to get the bergamot translator from Remote Settings." + ); + } + + if (wasmRecords.length > 1) { + TranslationsParent.reportError( + new Error("Expected the bergamot-translator to only have 1 record."), + wasmRecords + ); + } + + // Unlike the models, greedily download the wasm. It will pull it from a locale + // cache on disk if it's already been downloaded. Do not retain a copy, as + // this will be running in the parent process. It's not worth holding onto + // this much memory, so reload it every time it is needed. + + await chaosModeError(1 / 3); + + /** @type {{buffer: ArrayBuffer}} */ + const { buffer } = await client.attachments.download(wasmRecords[0]); + + const duration = Date.now() - start; + lazy.console.log( + `"bergamot-translator" wasm binary loaded in ${duration / 1000} seconds` + ); + + return buffer; + } + + /** + * Deletes language files that match a language. + * + * @param {string} requestedLanguage The BCP 47 language tag. + */ + async deleteLanguageFiles(language) { + const client = this.#getTranslationModelsRemoteClient(); + const isForDeletion = true; + return Promise.all( + Array.from( + await this.getRecordsForTranslatingToAndFromAppLanguage( + language, + isForDeletion + ) + ).map(record => { + lazy.console.log("Deleting record", record); + return client.attachments.deleteDownloaded(record); + }) + ); + } + + /** + * Download language files that match a language. + * + * @param {string} requestedLanguage The BCP 47 language tag. + */ + async downloadLanguageFiles(language) { + const client = this.#getTranslationModelsRemoteClient(); + + const queue = []; + + for (const record of await this.getRecordsForTranslatingToAndFromAppLanguage( + language + )) { + const download = () => { + lazy.console.log("Downloading record", record.name, record.id); + return client.attachments.download(record); + }; + queue.push({ download }); + } + + return downloadManager(queue); + } + + /** + * Download all files used for translations. + */ + async downloadAllFiles() { + const client = this.#getTranslationModelsRemoteClient(); + + const queue = []; + + for (const [recordId, record] of await this.#getTranslationModelRecords()) { + queue.push({ + onSuccess: () => { + this.sendQuery("Translations:DownloadedLanguageFile", { recordId }); + }, + // The download may be attempted multiple times. + onFailure: () => { + this.sendQuery("Translations:DownloadLanguageFileError", { + recordId, + }); + }, + download: () => client.attachments.download(record), + }); + } + + queue.push({ download: () => this.#getBergamotWasmArrayBuffer() }); + queue.push({ download: () => this.#getLanguageIdModelArrayBuffer() }); + queue.push({ download: () => this.#getLanguageIdWasmArrayBuffer() }); + + return downloadManager(queue); + } + + /** + * Delete all language model files. + * @returns {Promise<string[]>} A list of record IDs. + */ + async deleteAllLanguageFiles() { + const client = this.#getTranslationModelsRemoteClient(); + await chaosMode(); + await client.attachments.deleteAll(); + return [...(await this.#getTranslationModelRecords()).keys()]; + } + + /** + * Only returns true if all language files are present for a requested language. + * It's possible only half the files exist for a pivot translation into another + * language, or there was a download error, and we're still missing some files. + * + * @param {string} requestedLanguage The BCP 47 language tag. + */ + async hasAllFilesForLanguage(requestedLanguage) { + const client = this.#getTranslationModelsRemoteClient(); + for (const record of await this.getRecordsForTranslatingToAndFromAppLanguage( + requestedLanguage, + true + )) { + if (!(await client.attachments.isDownloaded(record))) { + return false; + } + } + + return true; + } + + /** + * Get the necessary files for translating to and from the app language and a + * requested language. This may require the files for a pivot language translation + * if there is no language model for a direct translation. + * + * @param {string} requestedLanguage The BCP 47 language tag. + * @param {boolean} isForDeletion - Return a more restrictive set of languages, as + * these files are marked for deletion. We don't want to remove + * files that are needed for some other language's pivot translation. + * @returns {Set<TranslationModelRecord>} + */ + async getRecordsForTranslatingToAndFromAppLanguage( + requestedLanguage, + isForDeletion = false + ) { + const records = await this.#getTranslationModelRecords(); + const appLanguage = new Intl.Locale(Services.locale.appLocaleAsBCP47) + .language; + + let matchedRecords = new Set(); + + if (requestedLanguage === appLanguage) { + // There are no records if the requested language and app language are the same. + return matchedRecords; + } + + const addLanguagePair = (fromLang, toLang) => { + let matchFound = false; + for (const record of records.values()) { + if (record.fromLang === fromLang && record.toLang === toLang) { + matchedRecords.add(record); + matchFound = true; + } + } + return matchFound; + }; + + if ( + // Is there a direct translation? + !addLanguagePair(requestedLanguage, appLanguage) + ) { + // This is no direct translation, get the pivot files. + addLanguagePair(requestedLanguage, PIVOT_LANGUAGE); + // These files may be required for other pivot translations, so don't remove + // them if we are deleting records. + if (!isForDeletion) { + addLanguagePair(PIVOT_LANGUAGE, appLanguage); + } + } + + if ( + // Is there a direct translation? + !addLanguagePair(appLanguage, requestedLanguage) + ) { + // This is no direct translation, get the pivot files. + addLanguagePair(PIVOT_LANGUAGE, requestedLanguage); + // These files may be required for other pivot translations, so don't remove + // them if we are deleting records. + if (!isForDeletion) { + addLanguagePair(appLanguage, PIVOT_LANGUAGE); + } + } + + return matchedRecords; + } + + /** + * Gets the language model files in an array buffer by downloading attachments from + * Remote Settings, or retrieving them from the local cache. Each translation + * requires multiple files. + * + * Results are only returned if the model is found. + * + * @param {string} fromLanguage + * @param {string} toLanguage + * @param {boolean} withQualityEstimation + * @returns {null | LanguageTranslationModelFiles} + */ + async getLanguageTranslationModelFiles( + fromLanguage, + toLanguage, + withQualityEstimation = false + ) { + const client = this.#getTranslationModelsRemoteClient(); + + lazy.console.log( + `Beginning model downloads: "${fromLanguage}" to "${toLanguage}"` + ); + + const records = [...(await this.#getTranslationModelRecords()).values()]; + + /** @type {LanguageTranslationModelFiles} */ + let results; + + // Use Promise.all to download (or retrieve from cache) the model files in parallel. + await Promise.all( + records.map(async record => { + if (record.fileType === "qualityModel" && !withQualityEstimation) { + // Do not include the quality models if they aren't needed. + return; + } + + if (record.fromLang !== fromLanguage || record.toLang !== toLanguage) { + // Only use models that match. + return; + } + + if (!results) { + results = {}; + } + + const start = Date.now(); + + // Download or retrieve from the local cache: + + await chaosMode(1 / 3); + + /** @type {{buffer: ArrayBuffer }} */ + const { buffer } = await client.attachments.download(record); + + results[record.fileType] = { + buffer, + record, + }; + + const duration = Date.now() - start; + lazy.console.log( + `Translation model fetched in ${duration / 1000} seconds:`, + record.fromLang, + record.toLang, + record.fileType + ); + }) + ); + + if (!results) { + // No model files were found, pivoting will be required. + return null; + } + + // Validate that all of the files we expected were actually available and + // downloaded. + + if (!results.model) { + throw new Error( + `No model file was found for "${fromLanguage}" to "${toLanguage}."` + ); + } + + if (!results.lex) { + throw new Error( + `No lex file was found for "${fromLanguage}" to "${toLanguage}."` + ); + } + + if (withQualityEstimation && !results.qualityModel) { + throw new Error( + `No quality file was found for "${fromLanguage}" to "${toLanguage}."` + ); + } + + if (results.vocab) { + if (results.srcvocab) { + throw new Error( + `A srcvocab and vocab file were both included for "${fromLanguage}" to "${toLanguage}." Only one is needed.` + ); + } + if (results.trgvocab) { + throw new Error( + `A trgvocab and vocab file were both included for "${fromLanguage}" to "${toLanguage}." Only one is needed.` + ); + } + } else if (!results.srcvocab || !results.srcvocab) { + throw new Error( + `No vocab files were provided for "${fromLanguage}" to "${toLanguage}."` + ); + } + + return results; + } + + /** + * For testing purposes, allow the Translations Engine to be mocked. If called + * with `null` the mock is removed. + * + * @param {null | RemoteSettingsClient} [translationModelsRemoteClient] + * @param {null | RemoteSettingsClient} [translationsWasmRemoteClient] + */ + static mockTranslationsEngine( + translationModelsRemoteClient, + translationsWasmRemoteClient + ) { + lazy.console.log("Mocking RemoteSettings for the translations engine."); + TranslationsParent.#translationModelsRemoteClient = + translationModelsRemoteClient; + TranslationsParent.#translationsWasmRemoteClient = + translationsWasmRemoteClient; + TranslationsParent.#isTranslationsEngineMocked = true; + } + + /** + * Remove the mocks. + */ + static unmockTranslationsEngine() { + lazy.console.log( + "Removing RemoteSettings mock for the translations engine." + ); + TranslationsParent.#translationModelsRemoteClient = null; + TranslationsParent.#translationsWasmRemoteClient = null; + TranslationsParent.#isTranslationsEngineMocked = false; + } + + /** + * For testing purposes, allow the LanguageIdEngine to be mocked. If called + * with `null` in each argument, the mock is removed. + * + * @param {string} langTag - The BCP 47 language tag. + * @param {number} confidence - The confidence score of the detected language. + * @param {RemoteSettingsClient} client + */ + static mockLanguageIdentification(langTag, confidence, client) { + lazy.console.log("Mocking language identification.", { + langTag, + confidence, + }); + TranslationsParent.#mockedLangTag = langTag; + TranslationsParent.#mockedLanguageIdConfidence = confidence; + TranslationsParent.#languageIdModelsRemoteClient = client; + } + + /** + * Remove the mocks + */ + static unmockLanguageIdentification() { + lazy.console.log("Removing language identification mock."); + TranslationsParent.#mockedLangTag = null; + TranslationsParent.#mockedLanguageIdConfidence = null; + TranslationsParent.#languageIdModelsRemoteClient = null; + } + /** + * Report an error. Having this as a method allows tests to check that an error + * was properly reported. + * @param {Error} error - Providing an Error object makes sure the stack is properly + * reported. + * @param {any[]} args - Any args to pass on to console.error. + */ + static reportError(error, ...args) { + lazy.console.log(error, ...args); + } + + /** + * @param {string} fromLanguage + * @param {string} toLanguage + */ + translate(fromLanguage, toLanguage) { + if (this.languageState.requestedTranslationPair) { + // This page has already been translated, restore it and translate it + // again once the actor has been recreated. + TranslationsParent.#translateOnPageReload = { fromLanguage, toLanguage }; + this.restorePage(fromLanguage); + } else { + this.languageState.requestedTranslationPair = { + fromLanguage, + toLanguage, + }; + this.sendAsyncMessage("Translations:TranslatePage", { + fromLanguage, + toLanguage, + }); + } + } + + /** + * Restore the page to the original language by doing a hard reload. + * + * @param {string} fromLanguage A BCP-47 language tag + */ + restorePage(fromLanguage) { + if ( + lazy.autoTranslatePagePref || + TranslationsParent.shouldAlwaysTranslateLanguage(fromLanguage) + ) { + // Skip auto-translate for one page load. + TranslationsParent.#isPageRestoredForAutoTranslate = true; + } + this.languageState.requestedTranslationPair = null; + + const browser = this.browsingContext.embedderElement; + browser.reload(); + } + + /** + * Keep track of when the location changes. + */ + static #locationChangeId = 0; + + static onLocationChange(browser) { + if (!lazy.translationsEnabledPref) { + // The pref isn't enabled, so don't attempt to get the actor. + return; + } + let windowGlobal = browser.browsingContext.currentWindowGlobal; + let actor = windowGlobal.getActor("Translations"); + TranslationsParent.#locationChangeId++; + actor.languageState.locationChangeId = TranslationsParent.#locationChangeId; + } + + /** + * Is this actor active for the current location change? + * + * @param {number} locationChangeId - The id sent by the "TranslationsParent:LanguageState" event. + * @returns {boolean} + */ + static isActiveLocation(locationChangeId) { + return locationChangeId === TranslationsParent.#locationChangeId; + } + + /** + * Returns the lang tags that should be offered for translation. + * + * @returns {Promise<LangTags>} + */ + getLangTagsForTranslation() { + return this.sendQuery("Translations:GetLangTagsForTranslation"); + } + + /** + * Returns the principal from the content window's origin. + * @returns {nsIPrincipal} + */ + getContentWindowPrincipal() { + return this.sendQuery("Translations:GetContentWindowPrincipal"); + } + + /** + * Returns true if the given language tag is present in the always-translate + * languages preference, otherwise false. + * + * @param {string} langTag - A BCP-47 language tag + * @returns {boolean} + */ + static shouldAlwaysTranslateLanguage(langTag) { + return lazy.alwaysTranslateLangTags.includes(langTag); + } + + /** + * Returns true if the given language tag is present in the never-translate + * languages preference, otherwise false. + * + * @param {string} langTag - A BCP-47 language tag + * @returns {boolean} + */ + static shouldNeverTranslateLanguage(langTag) { + return lazy.neverTranslateLangTags.includes(langTag); + } + + /** + * Returns true if the current site is denied permissions to translate, + * otherwise returns false. + * + * @returns {Promise<boolean>} + */ + async shouldNeverTranslateSite() { + let principal; + try { + principal = await this.getContentWindowPrincipal(); + } catch { + // Unable to get content window principal. + return false; + } + const perms = Services.perms; + const permission = perms.getPermissionObject( + principal, + TRANSLATIONS_PERMISSION, + /* exactHost */ false + ); + return permission?.capability === perms.DENY_ACTION; + } + + /** + * Removes the given language tag from the given preference. + * + * @param {string} langTag - A BCP-47 language tag + * @param {string} prefName - The pref name + */ + static #removeLangTagFromPref(langTag, prefName) { + const langTags = + prefName === ALWAYS_TRANSLATE_LANGS_PREF + ? lazy.alwaysTranslateLangTags + : lazy.neverTranslateLangTags; + const newLangTags = langTags.filter(tag => tag !== langTag); + Services.prefs.setCharPref(prefName, newLangTags.join(",")); + } + + /** + * Adds the given language tag to the given preference. + * + * @param {string} langTag - A BCP-47 language tag + * @param {string} prefName - The pref name + */ + static #addLangTagToPref(langTag, prefName) { + const langTags = + prefName === ALWAYS_TRANSLATE_LANGS_PREF + ? lazy.alwaysTranslateLangTags + : lazy.neverTranslateLangTags; + if (!langTags.includes(langTag)) { + langTags.push(langTag); + } + Services.prefs.setCharPref(prefName, langTags.join(",")); + } + + /** + * Toggles the always-translate language preference by adding the language + * to the pref list if it is not present, or removing it if it is present. + * + * @param {string} langTag - A BCP-47 language tag + */ + static toggleAlwaysTranslateLanguagePref(langTag) { + if (TranslationsParent.shouldAlwaysTranslateLanguage(langTag)) { + // The pref was toggled off for this langTag + this.#removeLangTagFromPref(langTag, ALWAYS_TRANSLATE_LANGS_PREF); + return; + } + + // The pref was toggled on for this langTag + this.#addLangTagToPref(langTag, ALWAYS_TRANSLATE_LANGS_PREF); + this.#removeLangTagFromPref(langTag, NEVER_TRANSLATE_LANGS_PREF); + } + + /** + * Toggles the never-translate language preference by adding the language + * to the pref list if it is not present, or removing it if it is present. + * + * @param {string} langTag - A BCP-47 language tag + */ + static toggleNeverTranslateLanguagePref(langTag) { + if (TranslationsParent.shouldNeverTranslateLanguage(langTag)) { + // The pref was toggled off for this langTag + this.#removeLangTagFromPref(langTag, NEVER_TRANSLATE_LANGS_PREF); + return; + } + + // The pref was toggled on for this langTag + this.#addLangTagToPref(langTag, NEVER_TRANSLATE_LANGS_PREF); + this.#removeLangTagFromPref(langTag, ALWAYS_TRANSLATE_LANGS_PREF); + } + + /** + * Toggles the never-translate site permissions by adding DENY_ACTION to + * the site principal if it is not present, or removing it if it is present. + */ + async toggleNeverTranslateSitePermissions() { + const perms = Services.perms; + const principal = await this.getContentWindowPrincipal(); + const shouldNeverTranslateSite = await this.shouldNeverTranslateSite(); + if (shouldNeverTranslateSite) { + perms.removeFromPrincipal(principal, TRANSLATIONS_PERMISSION); + } else { + perms.addFromPrincipal( + principal, + TRANSLATIONS_PERMISSION, + perms.DENY_ACTION + ); + } + } +} + +/** + * WebAssembly modules must be instantiated from a Worker, since it's considered + * unsafe eval. + */ +function detectSimdSupport() { + return new Promise(resolve => { + lazy.console.log("Loading wasm simd detector worker."); + + const worker = new Worker( + "chrome://global/content/translations/simd-detect-worker.js" + ); + + // This should pretty much immediately resolve, so it does not need Firefox shutdown + // detection. + worker.addEventListener("message", ({ data }) => { + resolve(data.isSimdSupported); + worker.terminate(); + }); + }); +} + +/** + * State that affects the UI. Any of the state that gets set triggers a dispatch to update + * the UI. + */ +class TranslationsLanguageState { + /** + * @param {TranslationsParent} actor + */ + constructor(actor) { + this.#actor = actor; + this.dispatch(); + } + + /** + * The data members for TranslationsLanguageState, see the getters for their + * documentation. + */ + + /** @type {TranslationsParent} */ + #actor; + + /** @type {TranslationPair | null} */ + #requestedTranslationPair = null; + + /** @type {LangTags | null} */ + #detectedLanguages = null; + + /** @type {number} */ + #locationChangeId = -1; + + /** @type {null | TranslationErrors} */ + #error = null; + + #isEngineReady = false; + + /** + * Dispatch anytime the language details change, so that any UI can react to it. + */ + dispatch() { + if (!TranslationsParent.isActiveLocation(this.#locationChangeId)) { + // Do not dispatch as this location is not active. + return; + } + + const browser = this.#actor.browsingContext.top.embedderElement; + if (!browser) { + return; + } + const { CustomEvent } = browser.ownerGlobal; + browser.dispatchEvent( + new CustomEvent("TranslationsParent:LanguageState", { + bubbles: true, + detail: { + detectedLanguages: this.#detectedLanguages, + requestedTranslationPair: this.#requestedTranslationPair, + error: this.#error, + isEngineReady: this.#isEngineReady, + }, + }) + ); + } + + /** + * When a translation is requested, this contains the translation pair. This means + * that the TranslationsChild should be creating a TranslationsDocument and keep + * the page updated with the target language. + * + * @returns {TranslationPair | null} + */ + get requestedTranslationPair() { + return this.#requestedTranslationPair; + } + + set requestedTranslationPair(requestedTranslationPair) { + this.#error = null; + this.#isEngineReady = false; + this.#requestedTranslationPair = requestedTranslationPair; + this.dispatch(); + } + + /** + * The TranslationsChild will detect languages and offer them up for translation. + * The results are stored here. + * + * @returns {LangTags | null} + */ + get detectedLanguages() { + return this.#detectedLanguages; + } + + set detectedLanguages(detectedLanguages) { + this.#detectedLanguages = detectedLanguages; + this.dispatch(); + } + + /** + * This id represents the last location change that happened for this actor. This + * allows the UI to disambiguate when there are races and out of order events that + * are dispatched. Only the most up to date `locationChangeId` is used. + * + * @returns {number} + */ + get locationChangeId() { + return this.#locationChangeId; + } + + set locationChangeId(locationChangeId) { + this.#locationChangeId = locationChangeId; + + // When the location changes remove the previous error. + this.#error = null; + + this.dispatch(); + } + + /** + * The last error that occured during translation. + */ + get error() { + return this.#error; + } + + set error(error) { + this.#error = error; + // Setting an error invalidates the requested translation pair. + this.#requestedTranslationPair = null; + this.#isEngineReady = false; + this.dispatch(); + } + + /** + * Stores when the translations engine is ready. The wasm and language files must + * be downloaded, which can take some time. + */ + get isEngineReady() { + return this.#isEngineReady; + } + + set isEngineReady(isEngineReady) { + this.#isEngineReady = isEngineReady; + this.dispatch(); + } +} + +/** + * @typedef {Object} QueueItem + * @prop {Function} download + * @prop {Function} [onSuccess] + * @prop {Function} [onFailure] + * @prop {number} [retriesLeft] + */ + +/** + * Manage the download of the files by providing a maximum number of concurrent files + * and the ability to retry a file download in case of an error. + * + * @param {QueueItem[]} queue + */ +async function downloadManager(queue) { + const NOOP = () => {}; + + const pendingDownloadAttempts = new Set(); + let failCount = 0; + let index = 0; + const start = Date.now(); + const originalQueueLength = queue.length; + + while (index < queue.length || pendingDownloadAttempts.size > 0) { + // Start new downloads up to the maximum limit + while ( + index < queue.length && + pendingDownloadAttempts.size < TranslationsParent.MAX_CONCURRENT_DOWNLOADS + ) { + lazy.console.log(`Starting download ${index + 1} of ${queue.length}`); + + const { + download, + onSuccess = NOOP, + onFailure = NOOP, + retriesLeft = TranslationsParent.MAX_DOWNLOAD_RETRIES, + } = queue[index]; + + const handleFailedDownload = error => { + // The download failed. Either retry it, or report the failure. + TranslationsParent.reportError( + new Error("Failed to download file."), + error + ); + + const newRetriesLeft = retriesLeft - 1; + + if (retriesLeft > 0) { + lazy.console.log( + `Queueing another attempt. ${newRetriesLeft} attempts left.` + ); + queue.push({ + download, + retriesLeft: newRetriesLeft, + onSuccess, + onFailure, + }); + } else { + // Give up on this download. + failCount++; + onFailure(); + } + }; + + const afterDownloadAttempt = () => { + pendingDownloadAttempts.delete(downloadAttempt); + }; + + // Kick off the download. If it fails, retry it a certain number of attempts. + // This is done asynchronously from the rest of the for loop. + const downloadAttempt = download() + .then(onSuccess, handleFailedDownload) + .then(afterDownloadAttempt); + + pendingDownloadAttempts.add(downloadAttempt); + index++; + } + + // Wait for any active downloads to complete. + await Promise.race(pendingDownloadAttempts); + } + + const duration = ((Date.now() - start) / 1000).toFixed(3); + + if (failCount > 0) { + const message = `Finished downloads in ${duration} seconds, but ${failCount} download(s) failed.`; + lazy.console.log( + `Finished downloads in ${duration} seconds, but ${failCount} download(s) failed.` + ); + throw new Error(message); + } + + lazy.console.log( + `Finished ${originalQueueLength} downloads in ${duration} seconds.` + ); +} + +/** + * The translations code has lots of async code and fallible network requests. To test + * this manually while using the feature, enable chaos mode by setting "errors" to true + * and "timeoutMS" to a positive number of milliseconds. + * prefs to true: + * + * - browser.translations.chaos.timeoutMS + * - browser.translations.chaos.errors + */ +async function chaosMode(probability = 0.5) { + await chaosModeTimer(); + await chaosModeError(probability); +} + +/** + * The translations code has lots of async code that relies on the network. To test + * this manually while using the feature, enable chaos mode by setting the following pref + * to a positive number of milliseconds. + * + * - browser.translations.chaos.timeoutMS + */ +async function chaosModeTimer() { + if (lazy.chaosTimeoutMSPref) { + const timeout = Math.random() * lazy.chaosTimeoutMSPref; + lazy.console.log( + `Chaos mode timer started for ${(timeout / 1000).toFixed(1)} seconds.` + ); + await new Promise(resolve => lazy.setTimeout(resolve, timeout)); + } +} + +/** + * The translations code has lots of async code that is fallible. To test this manually + * while using the feature, enable chaos mode by setting the following pref to true. + * + * - browser.translations.chaos.errors + */ +async function chaosModeError(probability = 0.5) { + if (lazy.chaosErrorsPref && Math.random() < probability) { + lazy.console.trace(`Chaos mode error generated.`); + throw new Error( + `Chaos Mode error from the pref "browser.translations.chaos.errors".` + ); + } +} diff --git a/toolkit/components/translations/actors/moz.build b/toolkit/components/translations/actors/moz.build new file mode 100644 index 0000000000..b05acabab5 --- /dev/null +++ b/toolkit/components/translations/actors/moz.build @@ -0,0 +1,10 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +FINAL_TARGET_FILES.actors += [ + "AboutTranslationsChild.sys.mjs", + "AboutTranslationsParent.sys.mjs", + "TranslationsChild.sys.mjs", + "TranslationsParent.sys.mjs", +] |