diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 17:32:43 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 17:32:43 +0000 |
commit | 6bf0a5cb5034a7e684dcc3500e841785237ce2dd (patch) | |
tree | a68f146d7fa01f0134297619fbe7e33db084e0aa /toolkit/components/translation/LanguageDetector.sys.mjs | |
parent | Initial commit. (diff) | |
download | thunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.tar.xz thunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.zip |
Adding upstream version 1:115.7.0.upstream/1%115.7.0upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'toolkit/components/translation/LanguageDetector.sys.mjs')
-rw-r--r-- | toolkit/components/translation/LanguageDetector.sys.mjs | 153 |
1 files changed, 153 insertions, 0 deletions
diff --git a/toolkit/components/translation/LanguageDetector.sys.mjs b/toolkit/components/translation/LanguageDetector.sys.mjs new file mode 100644 index 0000000000..b69357cae2 --- /dev/null +++ b/toolkit/components/translation/LanguageDetector.sys.mjs @@ -0,0 +1,153 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// workerManager is exported for tests. +import { clearTimeout, setTimeout } from "resource://gre/modules/Timer.sys.mjs"; + +const WORKER_URL = "resource://gre/modules/translation/cld-worker.js"; + +export var workerManager = { + // Since Emscripten can handle heap growth, but not heap shrinkage, we + // need to refresh the worker after we've processed a particularly large + // string in order to prevent unnecessary resident memory growth. + // + // These values define the cut-off string length and the idle timeout + // (in milliseconds) before destroying a worker. Once a string of the + // maximum size has been processed, the worker is marked for + // destruction, and is terminated as soon as it has been idle for the + // given timeout. + // + // 1.5MB. This is the approximate string length that forces heap growth + // for a 2MB heap. + LARGE_STRING: 1.5 * 1024 * 1024, + IDLE_TIMEOUT: 10 * 1000, + + detectionQueue: [], + + detectLanguage(aParams) { + return this.workerReady + .then(worker => { + return new Promise(resolve => { + this.detectionQueue.push({ resolve }); + worker.postMessage(aParams); + }); + }) + .then(result => { + // We have our asynchronous result from the worker. + // + // Determine if our input was large enough to trigger heap growth, + // or if we're already waiting to destroy the worker when it's + // idle. If so, schedule termination after the idle timeout. + if ( + aParams.text.length >= this.LARGE_STRING || + this._idleTimeout != null + ) { + this.flushWorker(); + } + + return result; + }); + }, + + _worker: null, + _workerReadyPromise: null, + + get workerReady() { + if (!this._workerReadyPromise) { + this._workerReadyPromise = new Promise(resolve => { + let worker = new Worker(WORKER_URL); + worker.onmessage = aMsg => { + if (aMsg.data == "ready") { + resolve(worker); + } else { + this.detectionQueue.shift().resolve(aMsg.data); + } + }; + this._worker = worker; + }); + } + + return this._workerReadyPromise; + }, + + // Holds the ID of the current pending idle cleanup setTimeout. + _idleTimeout: null, + + // Schedule the current worker to be terminated after the idle timeout. + flushWorker() { + if (this._idleTimeout != null) { + clearTimeout(this._idleTimeout); + } + + this._idleTimeout = setTimeout( + this._flushWorker.bind(this), + this.IDLE_TIMEOUT + ); + }, + + // Immediately terminate the worker, as long as there no pending + // results. Otherwise, reschedule termination until after the next + // idle timeout. + _flushWorker() { + if (this.detectionQueue.length) { + this.flushWorker(); + } else { + if (this._worker) { + this._worker.terminate(); + } + + this._worker = null; + this._workerReadyPromise = null; + this._idleTimeout = null; + } + }, +}; + +export var LanguageDetector = { + /** + * Detect the language of a given string. + * + * The argument may be either a string containing the text to analyze, + * or an object with the following properties: + * + * - 'text' The text to analyze. + * + * - 'isHTML' (optional) A boolean, indicating whether the text + * should be analyzed as HTML rather than plain text. + * + * - 'language' (optional) A string indicating the expected language. + * For text extracted from HTTP documents, this is expected to + * come from the Content-Language header. + * + * - 'tld' (optional) A string indicating the top-level domain of the + * document the text was extracted from. + * + * - 'encoding' (optional) A string describing the encoding of the + * document the string was extracted from. Note that, regardless + * of the value of this property, the 'text' property must be a + * UTF-16 JavaScript string. + * + * @returns {Promise<Object>} + * @resolves When detection is finished, with a object containing + * these fields: + * - 'language' (string with a language code) + * - 'confident' (boolean) Whether the detector is confident of the + * result. + * - 'languages' (array) An array of up to three elements, containing + * the most prevalent languages detected. It contains a + * 'languageCode' property, containing the ISO language code of + * the language, and a 'percent' property, describing the + * approximate percentage of the input which is in that language. + * For text of an unknown language, the result may contain an + * entry with the languge code 'un', indicating the percent of + * the text which is unknown. + */ + detectLanguage(aParams) { + if (typeof aParams == "string") { + aParams = { text: aParams }; + } + + return workerManager.detectLanguage(aParams); + }, +}; |