diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
commit | 43a97878ce14b72f0981164f87f2e35e14151312 (patch) | |
tree | 620249daf56c0258faa40cbdcf9cfba06de2a846 /browser/components/translation/BingTranslator.jsm | |
parent | Initial commit. (diff) | |
download | firefox-upstream.tar.xz firefox-upstream.zip |
Adding upstream version 110.0.1.upstream/110.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'browser/components/translation/BingTranslator.jsm')
-rw-r--r-- | browser/components/translation/BingTranslator.jsm | 486 |
1 files changed, 486 insertions, 0 deletions
diff --git a/browser/components/translation/BingTranslator.jsm b/browser/components/translation/BingTranslator.jsm new file mode 100644 index 0000000000..40871549f2 --- /dev/null +++ b/browser/components/translation/BingTranslator.jsm @@ -0,0 +1,486 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +"use strict"; + +var EXPORTED_SYMBOLS = ["BingTranslator"]; + +const { PromiseUtils } = ChromeUtils.importESModule( + "resource://gre/modules/PromiseUtils.sys.mjs" +); +const { Async } = ChromeUtils.import("resource://services-common/async.js"); +const { httpRequest } = ChromeUtils.importESModule( + "resource://gre/modules/Http.sys.mjs" +); + +// The maximum amount of net data allowed per request on Bing's API. +const MAX_REQUEST_DATA = 5000; // Documentation says 10000 but anywhere +// close to that is refused by the service. + +// The maximum number of chunks allowed to be translated in a single +// request. +const MAX_REQUEST_CHUNKS = 1000; // Documentation says 2000. + +// Self-imposed limit of 15 requests. This means that a page that would need +// to be broken in more than 15 requests won't be fully translated. +// The maximum amount of data that we will translate for a single page +// is MAX_REQUESTS * MAX_REQUEST_DATA. +const MAX_REQUESTS = 15; + +/** + * Translates a webpage using Bing's Translation API. + * + * @param translationDocument The TranslationDocument object that represents + * the webpage to be translated + * @param sourceLanguage The source language of the document + * @param targetLanguage The target language for the translation + * + * @returns {Promise} A promise that will resolve when the translation + * task is finished. + */ +var BingTranslator = function( + translationDocument, + sourceLanguage, + targetLanguage +) { + this.translationDocument = translationDocument; + this.sourceLanguage = sourceLanguage; + this.targetLanguage = targetLanguage; + this._pendingRequests = 0; + this._partialSuccess = false; + this._serviceUnavailable = false; + this._translatedCharacterCount = 0; +}; + +BingTranslator.prototype = { + /** + * Performs the translation, splitting the document into several chunks + * respecting the data limits of the API. + * + * @returns {Promise} A promise that will resolve when the translation + * task is finished. + */ + translate() { + return (async () => { + let currentIndex = 0; + this._onFinishedDeferred = PromiseUtils.defer(); + + // Let's split the document into various requests to be sent to + // Bing's Translation API. + for (let requestCount = 0; requestCount < MAX_REQUESTS; requestCount++) { + // Generating the text for each request can be expensive, so + // let's take the opportunity of the chunkification process to + // allow for the event loop to attend other pending events + // before we continue. + await Async.promiseYield(); + + // Determine the data for the next request. + let request = this._generateNextTranslationRequest(currentIndex); + + // Create a real request to the server, and put it on the + // pending requests list. + let bingRequest = new BingRequest( + request.data, + this.sourceLanguage, + this.targetLanguage + ); + this._pendingRequests++; + bingRequest + .fireRequest() + .then(this._chunkCompleted.bind(this), this._chunkFailed.bind(this)); + + currentIndex = request.lastIndex; + if (request.finished) { + break; + } + } + + return this._onFinishedDeferred.promise; + })(); + }, + + /** + * Resets the expiration time of the current token, in order to + * force the token manager to ask for a new token during the next request. + */ + _resetToken() { + // Force the token manager to get update token + BingTokenManager._currentExpiryTime = 0; + }, + + /** + * Function called when a request sent to the server completed successfully. + * This function handles calling the function to parse the result and the + * function to resolve the promise returned by the public `translate()` + * method when there's no pending request left. + * + * @param request The BingRequest sent to the server. + */ + _chunkCompleted(bingRequest) { + if (this._parseChunkResult(bingRequest)) { + this._partialSuccess = true; + // Count the number of characters successfully translated. + this._translatedCharacterCount += bingRequest.characterCount; + } + + this._checkIfFinished(); + }, + + /** + * Function called when a request sent to the server has failed. + * This function handles deciding if the error is transient or means the + * service is unavailable (zero balance on the key or request credentials are + * not in an active state) and calling the function to resolve the promise + * returned by the public `translate()` method when there's no pending. + * request left. + * + * @param aError [optional] The XHR object of the request that failed. + */ + _chunkFailed(aError) { + if ( + XMLHttpRequest.isInstance(aError) && + [400, 401].includes(aError.status) + ) { + let body = aError.responseText; + if ( + body && + body.includes("TranslateApiException") && + (body.includes("balance") || body.includes("active state")) + ) { + this._serviceUnavailable = true; + } + } + + this._checkIfFinished(); + }, + + /** + * Function called when a request sent to the server has completed. + * This function handles resolving the promise + * returned by the public `translate()` method when all chunks are completed. + */ + _checkIfFinished() { + // Check if all pending requests have been + // completed and then resolves the promise. + // If at least one chunk was successful, the + // promise will be resolved positively which will + // display the "Success" state for the infobar. Otherwise, + // the "Error" state will appear. + if (--this._pendingRequests == 0) { + if (this._partialSuccess) { + this._onFinishedDeferred.resolve({ + characterCount: this._translatedCharacterCount, + }); + } else { + let error = this._serviceUnavailable ? "unavailable" : "failure"; + this._onFinishedDeferred.reject(error); + } + } + }, + + /** + * This function parses the result returned by Bing's Http.svc API, + * which is a XML file that contains a number of elements. To our + * particular interest, the only part of the response that matters + * are the <TranslatedText> nodes, which contains the resulting + * items that were sent to be translated. + * + * @param request The request sent to the server. + * @returns boolean True if parsing of this chunk was successful. + */ + _parseChunkResult(bingRequest) { + let results; + try { + let doc = bingRequest.networkRequest.responseXML; + results = doc.querySelectorAll("TranslatedText"); + } catch (e) { + return false; + } + + let len = results.length; + if (len != bingRequest.translationData.length) { + // This should never happen, but if the service returns a different number + // of items (from the number of items submitted), we can't use this chunk + // because all items would be paired incorrectly. + return false; + } + + let error = false; + for (let i = 0; i < len; i++) { + try { + let result = results[i].firstChild.nodeValue; + let root = bingRequest.translationData[i][0]; + + if (root.isSimpleRoot) { + // Workaround for Bing's service problem in which "&" chars in + // plain-text TranslationItems are double-escaped. + result = result.replace(/&/g, "&"); + } + + root.parseResult(result); + } catch (e) { + error = true; + } + } + + return !error; + }, + + /** + * This function will determine what is the data to be used for + * the Nth request we are generating, based on the input params. + * + * @param startIndex What is the index, in the roots list, that the + * chunk should start. + */ + _generateNextTranslationRequest(startIndex) { + let currentDataSize = 0; + let currentChunks = 0; + let output = []; + let rootsList = this.translationDocument.roots; + + for (let i = startIndex; i < rootsList.length; i++) { + let root = rootsList[i]; + let text = this.translationDocument.generateTextForItem(root); + if (!text) { + continue; + } + + text = escapeXML(text); + let newCurSize = currentDataSize + text.length; + let newChunks = currentChunks + 1; + + if (newCurSize > MAX_REQUEST_DATA || newChunks > MAX_REQUEST_CHUNKS) { + // If we've reached the API limits, let's stop accumulating data + // for this request and return. We return information useful for + // the caller to pass back on the next call, so that the function + // can keep working from where it stopped. + return { + data: output, + finished: false, + lastIndex: i, + }; + } + + currentDataSize = newCurSize; + currentChunks = newChunks; + output.push([root, text]); + } + + return { + data: output, + finished: true, + lastIndex: 0, + }; + }, +}; + +/** + * Represents a request (for 1 chunk) sent off to Bing's service. + * + * @params translationData The data to be used for this translation, + * generated by the generateNextTranslationRequest... + * function. + * @param sourceLanguage The source language of the document. + * @param targetLanguage The target language for the translation. + * + */ +function BingRequest(translationData, sourceLanguage, targetLanguage) { + this.translationData = translationData; + this.sourceLanguage = sourceLanguage; + this.targetLanguage = targetLanguage; + this.characterCount = 0; +} + +BingRequest.prototype = { + /** + * Initiates the request + */ + fireRequest() { + return (async () => { + // Prepare authentication. + let token = await BingTokenManager.getToken(); + let auth = "Bearer " + token; + + // Prepare URL. + let url = getUrlParam( + "https://api.microsofttranslator.com/v2/Http.svc/TranslateArray", + "browser.translation.bing.translateArrayURL" + ); + + // Prepare request headers. + let headers = [ + ["Content-type", "text/xml"], + ["Authorization", auth], + ]; + + // Prepare the request body. + let requestString = + "<TranslateArrayRequest>" + + "<AppId/>" + + "<From>" + + this.sourceLanguage + + "</From>" + + "<Options>" + + '<ContentType xmlns="http://schemas.datacontract.org/2004/07/Microsoft.MT.Web.Service.V2">text/html</ContentType>' + + '<ReservedFlags xmlns="http://schemas.datacontract.org/2004/07/Microsoft.MT.Web.Service.V2" />' + + "</Options>" + + '<Texts xmlns:s="http://schemas.microsoft.com/2003/10/Serialization/Arrays">'; + + for (let [, text] of this.translationData) { + requestString += "<s:string>" + text + "</s:string>"; + this.characterCount += text.length; + } + + requestString += + "</Texts>" + + "<To>" + + this.targetLanguage + + "</To>" + + "</TranslateArrayRequest>"; + + // Set up request options. + return new Promise((resolve, reject) => { + let options = { + onLoad: (responseText, xhr) => { + resolve(this); + }, + onError(e, responseText, xhr) { + reject(xhr); + }, + postData: requestString, + headers, + }; + + // Fire the request. + let request = httpRequest(url, options); + + // Override the response MIME type. + request.overrideMimeType("text/xml"); + this.networkRequest = request; + }); + })(); + }, +}; + +/** + * Authentication Token manager for the API + */ +var BingTokenManager = { + _currentToken: null, + _currentExpiryTime: 0, + _pendingRequest: null, + + /** + * Get a valid, non-expired token to be used for the API calls. + * + * @returns {Promise} A promise that resolves with the token + * string once it is obtained. The token returned + * can be the same one used in the past if it is still + * valid. + */ + getToken() { + if (this._pendingRequest) { + return this._pendingRequest; + } + + let remainingMs = this._currentExpiryTime - new Date(); + // Our existing token is still good for more than a minute, let's use it. + if (remainingMs > 60 * 1000) { + return Promise.resolve(this._currentToken); + } + + return this._getNewToken(); + }, + + /** + * Generates a new token from the server. + * + * @returns {Promise} A promise that resolves with the token + * string once it is obtained. + */ + _getNewToken() { + let url = getUrlParam( + "https://datamarket.accesscontrol.windows.net/v2/OAuth2-13", + "browser.translation.bing.authURL" + ); + let params = [ + ["grant_type", "client_credentials"], + ["scope", "http://api.microsofttranslator.com"], + [ + "client_id", + getUrlParam( + "%BING_API_CLIENTID%", + "browser.translation.bing.clientIdOverride" + ), + ], + [ + "client_secret", + getUrlParam( + "%BING_API_KEY%", + "browser.translation.bing.apiKeyOverride" + ), + ], + ]; + + this._pendingRequest = new Promise((resolve, reject) => { + let options = { + onLoad(responseText, xhr) { + BingTokenManager._pendingRequest = null; + try { + let json = JSON.parse(responseText); + + if (json.error) { + reject(json.error); + return; + } + + let token = json.access_token; + let expires_in = json.expires_in; + BingTokenManager._currentToken = token; + BingTokenManager._currentExpiryTime = new Date( + Date.now() + expires_in * 1000 + ); + resolve(token); + } catch (e) { + reject(e); + } + }, + onError(e, responseText, xhr) { + BingTokenManager._pendingRequest = null; + reject(e); + }, + postData: params, + }; + + httpRequest(url, options); + }); + return this._pendingRequest; + }, +}; + +/** + * Escape a string to be valid XML content. + */ +function escapeXML(aStr) { + return aStr + .toString() + .replace(/&/g, "&") + .replace(/\"/g, """) + .replace(/\'/g, "'") + .replace(/</g, "<") + .replace(/>/g, ">"); +} + +/** + * Fetch an auth token (clientID or client secret), which may be overridden by + * a pref if it's set. + */ +function getUrlParam(paramValue, prefName) { + if (Services.prefs.getPrefType(prefName)) { + paramValue = Services.prefs.getCharPref(prefName); + } + paramValue = Services.urlFormatter.formatURL(paramValue); + return paramValue; +} |