/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ "use strict"; /** * @typedef {import("../translations").Bergamot} Bergamot * @typedef {import("../translations").LanguageTranslationModelFiles} LanguageTranslationModelFiles */ /* global loadBergamot */ importScripts("chrome://global/content/translations/bergamot-translator.js"); // Respect the preference "browser.translations.logLevel". let _loggingLevel = "Error"; function log(...args) { if (_loggingLevel !== "Error" && _loggingLevel !== "Warn") { console.log("Translations:", ...args); } } function trace(...args) { if (_loggingLevel === "Trace" || _loggingLevel === "All") { console.log("Translations:", ...args); } } // Throw Promise rejection errors so that they are visible in the console. self.addEventListener("unhandledrejection", event => { throw event.reason; }); /** * The alignment for each file type, file type strings should be same as in the * model registry. */ const MODEL_FILE_ALIGNMENTS = { model: 256, lex: 64, vocab: 64, qualityModel: 64, srcvocab: 64, trgvocab: 64, }; /** * Initialize the engine, and get it ready to handle translation requests. * The "initialize" message must be received before any other message handling * requests will be processed. */ addEventListener("message", handleInitializationMessage); async function handleInitializationMessage({ data }) { const startTime = performance.now(); if (data.type !== "initialize") { console.error( "The TranslationEngine worker received a message before it was initialized." ); return; } try { const { fromLanguage, toLanguage, enginePayload, logLevel, innerWindowId } = data; if (!fromLanguage) { throw new Error('Worker initialization missing "fromLanguage"'); } if (!toLanguage) { throw new Error('Worker initialization missing "toLanguage"'); } if (logLevel) { // Respect the "browser.translations.logLevel" preference. _loggingLevel = logLevel; } let engine; if (enginePayload.isMocked) { // The engine is testing mode, and no Bergamot wasm is available. engine = new MockedEngine(fromLanguage, toLanguage); } else { const { bergamotWasmArrayBuffer, languageModelFiles } = enginePayload; const bergamot = await BergamotUtils.initializeWasm( bergamotWasmArrayBuffer ); engine = new Engine( fromLanguage, toLanguage, bergamot, languageModelFiles ); } ChromeUtils.addProfilerMarker( "TranslationsWorker", { startTime, innerWindowId }, "Translations engine loaded." ); handleMessages(engine); postMessage({ type: "initialization-success" }); } catch (error) { console.error(error); postMessage({ type: "initialization-error", error: error?.message }); } removeEventListener("message", handleInitializationMessage); } /** * Sets up the message handling for the worker. * * @param {Engine | MockedEngine} engine */ function handleMessages(engine) { let discardPromise; addEventListener("message", async ({ data }) => { try { if (data.type === "initialize") { throw new Error("The Translations engine must not be re-initialized."); } if (data.type === "translation-request") { // Only show these messages when "All" logging is on, since there are so many // of them. trace("Received message", data); } else { log("Received message", data); } switch (data.type) { case "translation-request": { const { sourceText, messageId, isHTML, innerWindowId } = data; if (discardPromise) { // Wait for messages to be discarded if there are any. await discardPromise; } try { // Add a translation to the work queue, and when it returns, post the message // back. The translation may never return if the translations are discarded // before it have time to be run. In this case this await is just never // resolved, and the postMessage is never run. const targetText = await engine.translate( sourceText, isHTML, innerWindowId ); // This logging level can be very verbose and slow, so only do it under the // "Trace" level, which is the most verbose. Set the logging level to "Info" to avoid // these, and get all of the other logs. trace("Translation complete", { sourceText, targetText, isHTML, innerWindowId, }); postMessage({ type: "translation-response", targetText, messageId, }); } catch (error) { console.error(error); let message = "An error occurred in the engine worker."; if (typeof error?.message === "string") { message = error.message; } let stack = "(no stack)"; if (typeof error?.stack === "string") { stack = error.stack; } postMessage({ type: "translation-error", error: { message, stack }, messageId, innerWindowId, }); } break; } case "discard-translation-queue": { ChromeUtils.addProfilerMarker( "TranslationsWorker", { innerWindowId: data.innerWindowId }, "Translations discard requested" ); discardPromise = engine.discardTranslations(data.innerWindowId); await discardPromise; discardPromise = null; // Signal to the "message" listeners in the main thread to stop listening. postMessage({ type: "translations-discarded", }); break; } default: console.warn("Unknown message type:", data.type); } } catch (error) { // Ensure the unexpected errors are surfaced in the console. console.error(error); } }); } /** * The Engine is created once for a language pair. The initialization process copies the * ArrayBuffers for the language buffers from JS-managed ArrayBuffers, to aligned * internal memory for the wasm heap. * * After this the ArrayBuffers are discarded and GC'd. This file should be managed * from the TranslationsEngine class on the main thread. * * This class starts listening for messages only after the Bergamot engine has been * fully initialized. */ class Engine { /** * @param {string} fromLanguage * @param {string} toLanguage * @param {Bergamot} bergamot * @param {Array} languageTranslationModelFiles */ constructor( fromLanguage, toLanguage, bergamot, languageTranslationModelFiles ) { /** @type {string} */ this.fromLanguage = fromLanguage; /** @type {string} */ this.toLanguage = toLanguage; /** @type {Bergamot} */ this.bergamot = bergamot; /** @type {Bergamot["TranslationModel"][]} */ this.languageTranslationModels = languageTranslationModelFiles.map( languageTranslationModelFiles => BergamotUtils.constructSingleTranslationModel( bergamot, languageTranslationModelFiles ) ); /** @type {Bergamot["BlockingService"]} */ this.translationService = new bergamot.BlockingService({ // Caching is disabled (see https://github.com/mozilla/firefox-translations/issues/288) cacheSize: 0, }); } /** * Run the translation models to perform a batch of message translations. The * promise is rejected when the sync version of this function throws an error. * This function creates an async interface over the synchronous translation * mechanism. This allows other microtasks such as message handling to still work * even though the translations are CPU-intensive. * * @param {string} sourceText * @param {boolean} isHTML * @param {number} innerWindowId - This is required * * @returns {Promise}sourceText */ translate(sourceText, isHTML, innerWindowId) { return this.#getWorkQueue(innerWindowId).runTask(() => this.#syncTranslate(sourceText, isHTML, innerWindowId) ); } /** * Map each innerWindowId to its own WorkQueue. This makes it easy to shut down * an entire queue of work when the page is unloaded. * * @type {Map} */ #workQueues = new Map(); /** * Get or create a `WorkQueue` that is unique to an `innerWindowId`. * * @param {number} innerWindowId * @returns {WorkQueue} */ #getWorkQueue(innerWindowId) { let workQueue = this.#workQueues.get(innerWindowId); if (workQueue) { return workQueue; } workQueue = new WorkQueue(innerWindowId); this.#workQueues.set(innerWindowId, workQueue); return workQueue; } /** * Cancels any in-progress translations by removing the work queue. * * @param {number} innerWindowId */ discardTranslations(innerWindowId) { let workQueue = this.#workQueues.get(innerWindowId); if (workQueue) { workQueue.cancelWork(); this.#workQueues.delete(innerWindowId); } } /** * Run the translation models to perform a translation. This * blocks the worker thread until it is completed. * * @param {string} sourceText * @param {boolean} isHTML * @param {number} innerWindowId * @returns {string} */ #syncTranslate(sourceText, isHTML, innerWindowId) { const startTime = performance.now(); let response; sourceText = sourceText.trim(); const { messages, options } = BergamotUtils.getTranslationArgs( this.bergamot, sourceText, isHTML ); try { if (messages.size() === 0) { return []; } /** @type {Bergamot["VectorResponse"]} */ let responses; if (this.languageTranslationModels.length === 1) { responses = this.translationService.translate( this.languageTranslationModels[0], messages, options ); } else if (this.languageTranslationModels.length === 2) { responses = this.translationService.translateViaPivoting( this.languageTranslationModels[0], this.languageTranslationModels[1], messages, options ); } else { throw new Error( "Too many models were provided to the translation worker." ); } // Report on the time it took to do this translation. ChromeUtils.addProfilerMarker( "TranslationsWorker", { startTime, innerWindowId }, `Translated ${sourceText.length} code units.` ); const targetText = responses.get(0).getTranslatedText(); return targetText; } finally { // Free up any memory that was allocated. This will always run. messages?.delete(); options?.delete(); response?.delete(); } } } /** * Static utilities to help work with the Bergamot wasm module. */ class BergamotUtils { /** * Construct a single translation model. * * @param {Bergamot} bergamot * @param {LanguageTranslationModelFiles} languageTranslationModelFiles * @returns {Bergamot["TranslationModel"]} */ static constructSingleTranslationModel( bergamot, languageTranslationModelFiles ) { log(`Constructing translation model.`); const { model, lex, vocab, qualityModel, srcvocab, trgvocab } = BergamotUtils.allocateModelMemory( bergamot, languageTranslationModelFiles ); // Transform the bytes to mb, like "10.2mb" const getMemory = memory => `${Math.floor(memory.size() / 100_000) / 10}mb`; let memoryLog = `Model memory sizes in wasm heap:`; memoryLog += `\n Model: ${getMemory(model)}`; memoryLog += `\n Shortlist: ${getMemory(lex)}`; // Set up the vocab list, which could either be a single "vocab" model, or a // "srcvocab" and "trgvocab" pair. const vocabList = new bergamot.AlignedMemoryList(); if (vocab) { vocabList.push_back(vocab); memoryLog += `\n Vocab: ${getMemory(vocab)}`; } else if (srcvocab && trgvocab) { vocabList.push_back(srcvocab); vocabList.push_back(trgvocab); memoryLog += `\n Src Vocab: ${getMemory(srcvocab)}`; memoryLog += `\n Trg Vocab: ${getMemory(trgvocab)}`; } else { throw new Error("Vocabulary key is not found."); } if (qualityModel) { memoryLog += `\n QualityModel: ${getMemory(qualityModel)}\n`; } const config = BergamotUtils.generateTextConfig({ "beam-size": "1", normalize: "1.0", "word-penalty": "0", "max-length-break": "128", "mini-batch-words": "1024", workspace: "128", "max-length-factor": "2.0", "skip-cost": (!qualityModel).toString(), "cpu-threads": "0", quiet: "true", "quiet-translation": "true", "gemm-precision": languageTranslationModelFiles.model.record.name.endsWith("intgemm8.bin") ? "int8shiftAll" : "int8shiftAlphaAll", alignment: "soft", }); log(`Bergamot translation model config: ${config}`); log(memoryLog); return new bergamot.TranslationModel( config, model, lex, vocabList, qualityModel ?? null ); } /** * The models must be placed in aligned memory that the Bergamot wasm module has access * to. This function copies over the model blobs into this memory space. * * @param {Bergamot} bergamot * @param {LanguageTranslationModelFiles} languageTranslationModelFiles * @returns {LanguageTranslationModelFilesAligned} */ static allocateModelMemory(bergamot, languageTranslationModelFiles) { /** @type {LanguageTranslationModelFilesAligned} */ const results = {}; for (const [fileType, file] of Object.entries( languageTranslationModelFiles )) { const alignment = MODEL_FILE_ALIGNMENTS[fileType]; if (!alignment) { throw new Error(`Unknown file type: "${fileType}"`); } const alignedMemory = new bergamot.AlignedMemory( file.buffer.byteLength, alignment ); alignedMemory.getByteArrayView().set(new Uint8Array(file.buffer)); results[fileType] = alignedMemory; } return results; } /** * Initialize the Bergamot translation engine. It is a wasm compiled version of the * Marian translation software. The wasm is delivered remotely to cut down on binary size. * * https://github.com/mozilla/bergamot-translator/ * * @param {ArrayBuffer} wasmBinary * @returns {Promise} */ static initializeWasm(wasmBinary) { return new Promise((resolve, reject) => { /** @type {number} */ let start = performance.now(); /** @type {Bergamot} */ const bergamot = loadBergamot({ // This is the amount of memory that a simple run of Bergamot uses, in bytes. INITIAL_MEMORY: 234_291_200, print: log, onAbort() { reject(new Error("Error loading Bergamot wasm module.")); }, onRuntimeInitialized: async () => { const duration = performance.now() - start; log( `Bergamot wasm runtime initialized in ${duration / 1000} seconds.` ); // Await at least one microtask so that the captured `bergamot` variable is // fully initialized. await Promise.resolve(); resolve(bergamot); }, wasmBinary, }); }); } /** * Maps the Bergamot Vector to a JS array * * @param {Bergamot["Vector"]} vector * @param {Function} fn * @returns {Array} */ static mapVector(vector, fn) { const result = []; for (let index = 0; index < vector.size(); index++) { result.push(fn(vector.get(index), index)); } return result; } /** * Generate a config for the Marian translation service. It requires specific whitespace. * * https://marian-nmt.github.io/docs/cmd/marian-decoder/ * * @param {Record} config * @returns {string} */ static generateTextConfig(config) { const indent = " "; let result = "\n"; for (const [key, value] of Object.entries(config)) { result += `${indent}${key}: ${value}\n`; } return result + indent; } /** * JS objects need to be translated into wasm objects to configure the translation engine. * * @param {Bergamot} bergamot * @param {string[]} sourceText * @returns {{ messages: Bergamot["VectorString"], options: Bergamot["VectorResponseOptions"] }} */ static getTranslationArgs(bergamot, sourceText, isHTML) { const messages = new bergamot.VectorString(); const options = new bergamot.VectorResponseOptions(); sourceText = sourceText.trim(); // Empty paragraphs break the translation. if (sourceText) { messages.push_back(sourceText); options.push_back({ qualityScores: false, alignment: true, html: isHTML, }); } return { messages, options }; } } /** * For testing purposes, provide a fully mocked engine. This allows for easy integration * testing of the UI, without having to rely on downloading remote models and remote * wasm binaries. */ class MockedEngine { /** * @param {string} fromLanguage * @param {string} toLanguage */ constructor(fromLanguage, toLanguage) { /** @type {string} */ this.fromLanguage = fromLanguage; /** @type {string} */ this.toLanguage = toLanguage; } /** * Create a fake translation of the text. * * @param {string} sourceText * @param {bool} isHTML * @returns {string} */ translate(sourceText, isHTML) { // Note when an HTML translations is requested. let html = isHTML ? ", html" : ""; const targetText = sourceText.toUpperCase(); return `${targetText} [${this.fromLanguage} to ${this.toLanguage}${html}]`; } discardTranslations() {} } /** * This class takes tasks that may block the thread's event loop, and has them yield * after a time budget via setTimeout calls to allow other code to execute. */ class WorkQueue { #TIME_BUDGET = 100; // ms #RUN_IMMEDIATELY_COUNT = 20; /** @type {Array<{task: Function, resolve: Function}>} */ #tasks = []; #isRunning = false; #isWorkCancelled = false; #runImmediately = this.#RUN_IMMEDIATELY_COUNT; /** * @param {number} innerWindowId */ constructor(innerWindowId) { this.innerWindowId = innerWindowId; } /** * Run the task and return the result. * * @template {T} * @param {() => T} task * @returns {Promise} */ runTask(task) { if (this.#runImmediately > 0) { // Run the first N translations immediately, most likely these are the user-visible // translations on the page, as they are sent in first. The setTimeout of 0 can // still delay the translations noticeably. this.#runImmediately--; return Promise.resolve(task()); } return new Promise((resolve, reject) => { this.#tasks.push({ task, resolve, reject }); this.#run().catch(error => console.error(error)); }); } /** * The internal run function. */ async #run() { if (this.#isRunning) { // The work queue is already running. return; } this.#isRunning = true; // Measure the timeout let lastTimeout = null; let tasksInBatch = 0; const addProfilerMarker = () => { ChromeUtils.addProfilerMarker( "TranslationsWorker WorkQueue", { startTime: lastTimeout, innerWindowId: this.innerWindowId }, `WorkQueue processed ${tasksInBatch} tasks` ); }; while (this.#tasks.length !== 0) { if (this.#isWorkCancelled) { // The work was already cancelled. break; } const now = performance.now(); if (lastTimeout === null) { lastTimeout = now; // Allow other work to get on the queue. await new Promise(resolve => setTimeout(resolve, 0)); } else if (now - lastTimeout > this.#TIME_BUDGET) { // Perform a timeout with no effective wait. This clears the current // promise queue from the event loop. await new Promise(resolve => setTimeout(resolve, 0)); addProfilerMarker(); lastTimeout = performance.now(); } // Check this between every `await`. if (this.#isWorkCancelled || !this.#tasks.length) { break; } tasksInBatch++; const { task, resolve, reject } = this.#tasks.shift(); try { const result = await task(); // Check this between every `await`. if (this.#isWorkCancelled) { break; } // The work is done, resolve the original task. resolve(result); } catch (error) { reject(error); } } addProfilerMarker(); this.#isRunning = false; } async cancelWork() { this.#isWorkCancelled = true; this.#tasks = []; await new Promise(resolve => setTimeout(resolve, 0)); this.#isWorkCancelled = false; } }