/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ "use strict"; var EXPORTED_SYMBOLS = ["TranslationDocument"]; const { Async } = ChromeUtils.import("resource://services-common/async.js"); /** * This class represents a document that is being translated, * and it is responsible for parsing the document, * generating the data structures translation (the list of * translation items and roots), and managing the original * and translated texts on the translation items. * * @param document The document to be translated */ var TranslationDocument = function(document) { this.itemsMap = new Map(); this.roots = []; this._init(document); }; TranslationDocument.prototype = { translatedFrom: "", translatedTo: "", translationError: false, originalShown: true, /** * Initializes the object and populates * the roots lists. * * @param document The document to be translated */ _init(document) { let winUtils = document.defaultView.windowUtils; // Get all the translation nodes in the document's body: // a translation node is a node from the document which // contains useful content for translation, and therefore // must be included in the translation process. let nodeList = winUtils.getTranslationNodes(document.body); let length = nodeList.length; for (let i = 0; i < length; i++) { let node = nodeList.item(i); let isRoot = nodeList.isTranslationRootAtIndex(i); // Create a TranslationItem object for this node. // This function will also add it to the this.roots array. this._createItemForNode(node, i, isRoot); } // At first all roots are stored in the roots list, and only after // the process has finished we're able to determine which roots are // simple, and which ones are not. // A simple root is defined by a root with no children items, which // basically represents an element from a page with only text content // inside. // This distinction is useful for optimization purposes: we treat a // simple root as plain-text in the translation process and with that // we are able to reduce their data payload sent to the translation service. for (let root of this.roots) { if (!root.children.length && root.nodeRef.childElementCount == 0) { root.isSimpleRoot = true; } } }, /** * Creates a TranslationItem object, which should be called * for each node returned by getTranslationNodes. * * @param node The DOM node for this item. * @param id A unique, numeric id for this item. * @parem isRoot A boolean saying whether this item is a root. * * @returns A TranslationItem object. */ _createItemForNode(node, id, isRoot) { if (this.itemsMap.has(node)) { return this.itemsMap.get(node); } let item = new TranslationItem(node, id, isRoot); if (isRoot) { // Root items do not have a parent item. this.roots.push(item); } else { let parentItem = this.itemsMap.get(node.parentNode); if (parentItem) { parentItem.children.push(item); } } this.itemsMap.set(node, item); return item; }, /** * Generate the text string that represents a TranslationItem object. * Besides generating the string, it's also stored in the "original" * field of the TranslationItem object, which needs to be stored for * later to be used in the "Show Original" functionality. * If this function had already been called for the given item (determined * by the presence of the "original" array in the item), the text will * be regenerated from the "original" data instead of from the related * DOM nodes (because the nodes might contain translated data). * * @param item A TranslationItem object * * @returns A string representation of the TranslationItem. */ generateTextForItem(item) { if (item.original) { return regenerateTextFromOriginalHelper(item); } if (item.isSimpleRoot) { let text = item.nodeRef.firstChild.nodeValue.trim(); item.original = [text]; return text; } let str = ""; item.original = []; let wasLastItemPlaceholder = false; for (let child of item.nodeRef.childNodes) { if (child.nodeType == child.TEXT_NODE) { let x = child.nodeValue.trim(); if (x != "") { item.original.push(x); str += x; wasLastItemPlaceholder = false; } continue; } let objInMap = this.itemsMap.get(child); if (objInMap && !objInMap.isRoot) { // If this childNode is present in the itemsMap, it means // it's a translation node: it has useful content for translation. // In this case, we need to stringify this node. // However, if this item is a root, we should skip it here in this // object's child list (and just add a placeholder for it), because // it will be stringfied separately for being a root. item.original.push(objInMap); str += this.generateTextForItem(objInMap); wasLastItemPlaceholder = false; } else if (!wasLastItemPlaceholder) { // Otherwise, if this node doesn't contain any useful content, // or if it is a root itself, we can replace it with a placeholder node. // We can't simply eliminate this node from our string representation // because that could change the HTML structure (e.g., it would // probably merge two separate text nodes). // It's not necessary to add more than one placeholder in sequence; // we can optimize them away. item.original.push(TranslationItem_NodePlaceholder); str += "
"; wasLastItemPlaceholder = true; } } return generateTranslationHtmlForItem(item, str); }, /** * Changes the document to display its translated * content. */ showTranslation() { this.originalShown = false; this._swapDocumentContent("translation"); }, /** * Changes the document to display its original * content. */ showOriginal() { this.originalShown = true; this._swapDocumentContent("original"); }, /** * Swap the document with the resulting translation, * or back with the original content. * * @param target A string that is either "translation" * or "original". */ _swapDocumentContent(target) { (async () => { // Let the event loop breath on every 100 nodes // that are replaced. const YIELD_INTERVAL = 100; await Async.yieldingForEach( this.roots, root => root.swapText(target), YIELD_INTERVAL ); })(); }, }; /** * This class represents an item for translation. It's basically our * wrapper class around a node returned by getTranslationNode, with * more data and structural information on it. * * At the end of the translation process, besides the properties below, * a TranslationItem will contain two other properties: one called "original" * and one called "translation". They are twin objects, one which reflect * the structure of that node in its original state, and the other in its * translated state. * * The "original" array is generated in the generateTextForItem function, * and the "translation" array is generated when the translation results * are parsed. * * They are both arrays, which contain a mix of strings and references to * child TranslationItems. The references in both arrays point to the * same * * TranslationItem object, but they might appear in different orders between the * "original" and "translation" arrays. * * An example: * * English:
Welcome to Mozilla's website
* Portuguese:
Bem vindo a pagina da Mozilla
* * TranslationItem n1 = { * id: 1, * original: ["Welcome to", ptr to n2, "website"] * translation: ["Bem vindo a pagina", ptr to n2] * } * * TranslationItem n2 = { * id: 2, * original: ["Mozilla's"], * translation: ["da Mozilla"] * } */ function TranslationItem(node, id, isRoot) { this.nodeRef = node; this.id = id; this.isRoot = isRoot; this.children = []; } TranslationItem.prototype = { isRoot: false, isSimpleRoot: false, toString() { let rootType = ""; if (this.isRoot) { if (this.isSimpleRoot) { rootType = " (simple root)"; } else { rootType = " (non simple root)"; } } return ( "[object TranslationItem: <" + this.nodeRef.localName + ">" + rootType + "]" ); }, /** * This function will parse the result of the translation of one translation * item. If this item was a simple root, all we sent was a plain-text version * of it, so the result is also straightforward text. * * For non-simple roots, we sent a simplified HTML representation of that * node, and we'll first parse that into an HTML doc and then call the * parseResultNode helper function to parse it. * * While parsing, the result is stored in the "translation" field of the * TranslationItem, which will be used to display the final translation when * all items are finished. It remains stored too to allow back-and-forth * switching between the "Show Original" and "Show Translation" functions. * * @param result A string with the textual result received from the server, * which can be plain-text or a serialized HTML doc. */ parseResult(result) { if (this.isSimpleRoot) { this.translation = [result]; return; } let domParser = new DOMParser(); let doc = domParser.parseFromString(result, "text/html"); parseResultNode(this, doc.body.firstChild); }, /** * This function finds a child TranslationItem * with the given id. * @param id The id to look for, in the format "n#" * @returns A TranslationItem with the given id, or null if * it was not found. */ getChildById(id) { for (let child of this.children) { if ("n" + child.id == id) { return child; } } return null; }, /** * Swap the text of this TranslationItem between * its original and translated states. * * @param target A string that is either "translation" * or "original". */ swapText(target) { swapTextForItem(this, target); }, }; /** * This object represents a placeholder item for translation. It's similar to * the TranslationItem class, but it represents nodes that have no meaningful * content for translation. These nodes will be replaced by "
" in a * translation request. It's necessary to keep them to use it as a mark * for correct positioning and spliting of text nodes. */ const TranslationItem_NodePlaceholder = { toString() { return "[object TranslationItem_NodePlaceholder]"; }, }; /** * Generate the outer HTML representation for a given item. * * @param item A TranslationItem object. * param content The inner content for this item. * @returns string The outer HTML needed for translation * of this item. */ function generateTranslationHtmlForItem(item, content) { let localName = item.isRoot ? "div" : "b"; return ( "<" + localName + " id=n" + item.id + ">" + content + "" ); } /** * Regenerate the text string that represents a TranslationItem object, * with data from its "original" array. The array must have already * been created by TranslationDocument.generateTextForItem(). * * @param item A TranslationItem object * * @returns A string representation of the TranslationItem. */ function regenerateTextFromOriginalHelper(item) { if (item.isSimpleRoot) { return item.original[0]; } let str = ""; for (let child of item.original) { if (child instanceof TranslationItem) { str += regenerateTextFromOriginalHelper(child); } else if (child === TranslationItem_NodePlaceholder) { str += "
"; } else { str += child; } } return generateTranslationHtmlForItem(item, str); } /** * Helper function to parse a HTML doc result. * How it works: * * An example result string is: * *
Hello World of Mozilla.
* * For an element node, we look at its id and find the corresponding * TranslationItem that was associated with this node, and then we * walk down it repeating the process. * * For text nodes we simply add it as a string. */ function parseResultNode(item, node) { item.translation = []; for (let child of node.childNodes) { if (child.nodeType == child.TEXT_NODE) { item.translation.push(child.nodeValue); } else if (child.localName == "br") { item.translation.push(TranslationItem_NodePlaceholder); } else { let translationItemChild = item.getChildById(child.id); if (translationItemChild) { item.translation.push(translationItemChild); parseResultNode(translationItemChild, child); } } } } /** * Helper function to swap the text of a TranslationItem * between its original and translated states. * How it works: * * The function iterates through the target array (either the `original` or * `translation` array from the TranslationItem), while also keeping a pointer * to a current position in the child nodes from the actual DOM node that we * are modifying. This pointer is moved forward after each item of the array * is translated. If, at any given time, the pointer doesn't match the expected * node that was supposed to be seen, it means that the original and translated * contents have a different ordering, and thus we need to adjust that. * * A full example of the reordering process, swapping from Original to * Translation: * * Original (en):
I miss you
* * Translation (fr):
Tu me manques
* * Step 1: * pointer points to firstChild of the DOM node, textnode "I " * first item in item.translation is [object TranslationItem ] * * pointer does not match the expected element, . So let's move to the * pointer position. * * Current state of the DOM: *
youI miss
* * Step 2: * pointer moves forward to nextSibling, textnode "I " again. * second item in item.translation is the string " me " * * pointer points to a text node, and we were expecting a text node. Match! * just replace the text content. * * Current state of the DOM: *
you me miss
* * Step 3: * pointer moves forward to nextSibling, miss * third item in item.translation is [object TranslationItem ] * * pointer points to the expected node. Match! Nothing to do. * * Step 4: * all items in this item.translation were transformed. The remaining * text nodes are cleared to "", and domNode.normalize() removes them. * * Current state of the DOM: *
you me miss
* * Further steps: * After that, the function will visit the child items (from the visitStack), * and the text inside the and nodes will be swapped as well, * yielding the final result: * *
Tu me manques
* * * @param item A TranslationItem object * @param target A string that is either "translation" * or "original". */ function swapTextForItem(item, target) { // visitStack is the stack of items that we still need to visit. // Let's start the process by adding the root item. let visitStack = [item]; while (visitStack.length) { let curItem = visitStack.shift(); let domNode = curItem.nodeRef; if (!domNode) { // Skipping this item due to a missing node. continue; } if (!curItem[target]) { // Translation not found for this item. This could be due to // an error in the server response. For example, if a translation // was broken in various chunks, and one of the chunks failed, // the items from that chunk will be missing its "translation" // field. continue; } domNode.normalize(); // curNode points to the child nodes of the DOM node that we are // modifying. During most of the process, while the target array is // being iterated (in the for loop below), it should walk together with // the array and be pointing to the correct node that needs to modified. // If it's not pointing to it, that means some sort of node reordering // will be necessary to produce the correct translation. // Note that text nodes don't need to be reordered, as we can just replace // the content of one text node with another. // // curNode starts in the firstChild... let curNode = domNode.firstChild; // ... actually, let's make curNode start at the first useful node (either // a non-blank text node or something else). This is not strictly necessary, // as the reordering algorithm would correctly handle this case. However, // this better aligns the resulting translation with the DOM content of the // page, avoiding cases that would need to be unecessarily reordered. // // An example of how this helps: // // ---- Original:
Hello world.
// ^textnode 1 ^item 1 ^textnode 2 // // - Translation:
Hallo Welt.
// // Transformation process without this optimization: // 1 - start pointer at textnode 1 // 2 - move item 1 to first position inside the
// // Node now looks like:
Hello [ ][ world.]
// textnode 1^ ^textnode 2 // // 3 - replace textnode 1 with " Welt." // 4 - clear remaining text nodes (in this case, textnode 2) // // Transformation process with this optimization: // 1 - start pointer at item 1 // 2 - item 1 is already in position // 3 - replace textnode 2 with " Welt." // // which completely avoids any node reordering, and requires only one // text change instead of two (while also leaving the page closer to // its original state). while ( curNode && curNode.nodeType == curNode.TEXT_NODE && curNode.nodeValue.trim() == "" ) { curNode = curNode.nextSibling; } // Now let's walk through all items in the `target` array of the // TranslationItem. This means either the TranslationItem.original or // TranslationItem.translation array. for (let targetItem of curItem[target]) { if (targetItem instanceof TranslationItem) { // If the array element is another TranslationItem object, let's // add it to the stack to be visited. visitStack.push(targetItem); let targetNode = targetItem.nodeRef; // If the node is not in the expected position, let's reorder // it into position... if ( curNode != targetNode && // ...unless the page has reparented this node under a totally // different node (or removed it). In this case, all bets are off // on being able to do anything correctly, so it's better not to // bring back the node to this parent. targetNode.parentNode == domNode ) { // We don't need to null-check curNode because insertBefore(..., null) // does what we need in that case: reorder this node to the end // of child nodes. domNode.insertBefore(targetNode, curNode); curNode = targetNode; } // Move pointer forward. Since we do not add empty text nodes to the // list of translation items, we must skip them here too while // traversing the DOM in order to get better alignment between the // text nodes and the translation items. if (curNode) { curNode = getNextSiblingSkippingEmptyTextNodes(curNode); } } else if (targetItem === TranslationItem_NodePlaceholder) { // If the current item is a placeholder node, we need to move // our pointer "past" it, jumping from one side of a block of // elements + empty text nodes to the other side. Even if // non-placeholder elements exists inside the jumped block, // they will be pulled correctly later in the process when the // targetItem for those nodes are handled. while ( curNode && (curNode.nodeType != curNode.TEXT_NODE || curNode.nodeValue.trim() == "") ) { curNode = curNode.nextSibling; } } else { // Finally, if it's a text item, we just need to find the next // text node to use. Text nodes don't need to be reordered, so // the first one found can be used. while (curNode && curNode.nodeType != curNode.TEXT_NODE) { curNode = curNode.nextSibling; } // If none was found and we reached the end of the child nodes, // let's create a new one. if (!curNode) { // We don't know if the original content had a space or not, // so the best bet is to create the text node with " " which // will add one space at the beginning and one at the end. curNode = domNode.appendChild( domNode.ownerDocument.createTextNode(" ") ); } // A trailing and a leading space must be preserved because // they are meaningful in HTML. let preSpace = /^\s/.test(curNode.nodeValue) ? " " : ""; let endSpace = /\s$/.test(curNode.nodeValue) ? " " : ""; curNode.nodeValue = preSpace + targetItem + endSpace; curNode = getNextSiblingSkippingEmptyTextNodes(curNode); } } // The translated version of a node might have less text nodes than its // original version. If that's the case, let's clear the remaining nodes. if (curNode) { clearRemainingNonEmptyTextNodesFromElement(curNode); } // And remove any garbage "" nodes left after clearing. domNode.normalize(); } } function getNextSiblingSkippingEmptyTextNodes(startSibling) { let item = startSibling.nextSibling; while ( item && item.nodeType == item.TEXT_NODE && item.nodeValue.trim() == "" ) { item = item.nextSibling; } return item; } function clearRemainingNonEmptyTextNodesFromElement(startSibling) { let item = startSibling; while (item) { if (item.nodeType == item.TEXT_NODE && item.nodeValue != "") { item.nodeValue = ""; } item = item.nextSibling; } }