summaryrefslogtreecommitdiffstats
path: root/toolkit/components/translation/cld2/post.js
diff options
context:
space:
mode:
Diffstat (limited to 'toolkit/components/translation/cld2/post.js')
-rw-r--r--toolkit/components/translation/cld2/post.js171
1 files changed, 171 insertions, 0 deletions
diff --git a/toolkit/components/translation/cld2/post.js b/toolkit/components/translation/cld2/post.js
new file mode 100644
index 0000000000..8a905f9887
--- /dev/null
+++ b/toolkit/components/translation/cld2/post.js
@@ -0,0 +1,171 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// The WebIDL binder places static methods on the prototype, rather than
+// on the constructor, which is a bit clumsy, and is definitely not
+// idiomatic.
+LanguageInfo.detectLanguage = LanguageInfo.prototype.detectLanguage;
+
+// Closure is overzealous in its function call optimization, and tries
+// to turn these singleton methods into unbound function calls.
+ensureCache.alloc = ensureCache.alloc.bind(ensureCache);
+ensureCache.prepare = ensureCache.prepare.bind(ensureCache);
+
+// From public/encodings.h. Unfortunately, the WebIDL binder doesn't
+// allow us to define or automatically derive these in the IDL.
+var Encodings = {
+ "ISO_8859_1" : 0,
+ "ISO_8859_2" : 1,
+ "ISO_8859_3" : 2,
+ "ISO_8859_4" : 3,
+ "ISO_8859_5" : 4,
+ "ISO_8859_6" : 5,
+ "ISO_8859_7" : 6,
+ "ISO_8859_8" : 7,
+ "ISO_8859_9" : 8,
+ "ISO_8859_10" : 9,
+ "JAPANESE_EUC_JP" : 10,
+ "EUC_JP" : 10,
+ "JAPANESE_SHIFT_JIS" : 11,
+ "SHIFT_JIS" : 11,
+ "JAPANESE_JIS" : 12,
+ "JIS" : 12,
+ "CHINESE_BIG5" : 13,
+ "BIG5" : 13,
+ "CHINESE_GB" : 14,
+ "CHINESE_EUC_CN" : 15,
+ "EUC_CN" : 15,
+ "KOREAN_EUC_KR" : 16,
+ "EUC_KR" : 16,
+ "UNICODE_UNUSED" : 17,
+ "CHINESE_EUC_DEC" : 18,
+ "EUC_DEC" : 18,
+ "CHINESE_CNS" : 19,
+ "CNS" : 19,
+ "CHINESE_BIG5_CP950" : 20,
+ "BIG5_CP950" : 20,
+ "JAPANESE_CP932" : 21,
+ "CP932" : 21,
+ "UTF8" : 22,
+ "UNKNOWN_ENCODING" : 23,
+ "ASCII_7BIT" : 24,
+ "RUSSIAN_KOI8_R" : 25,
+ "KOI8_R" : 25,
+ "RUSSIAN_CP1251" : 26,
+ "CP1251" : 26,
+ "MSFT_CP1252" : 27,
+ "CP1252" : 27,
+ "RUSSIAN_KOI8_RU" : 28,
+ "KOI8_RU" : 28,
+ "MSFT_CP1250" : 29,
+ "CP1250" : 29,
+ "ISO_8859_15" : 30,
+ "MSFT_CP1254" : 31,
+ "CP1254" : 31,
+ "MSFT_CP1257" : 32,
+ "CP1257" : 32,
+ "ISO_8859_11" : 33,
+ "MSFT_CP874" : 34,
+ "CP874" : 34,
+ "MSFT_CP1256" : 35,
+ "CP1256" : 35,
+ "MSFT_CP1255" : 36,
+ "CP1255" : 36,
+ "ISO_8859_8_I" : 37,
+ "HEBREW_VISUAL" : 38,
+ "CZECH_CP852" : 39,
+ "CP852" : 39,
+ "CZECH_CSN_369103" : 40,
+ "CSN_369103" : 40,
+ "MSFT_CP1253" : 41,
+ "CP1253" : 41,
+ "RUSSIAN_CP866" : 42,
+ "CP866" : 42,
+ "ISO_8859_13" : 43,
+ "ISO_2022_KR" : 44,
+ "GBK" : 45,
+ "GB18030" : 46,
+ "BIG5_HKSCS" : 47,
+ "ISO_2022_CN" : 48,
+ "TSCII" : 49,
+ "TAMIL_MONO" : 50,
+ "TAMIL_BI" : 51,
+ "JAGRAN" : 52,
+ "MACINTOSH_ROMAN" : 53,
+ "UTF7" : 54,
+ "BHASKAR" : 55,
+ "HTCHANAKYA" : 56,
+ "UTF16BE" : 57,
+ "UTF16LE" : 58,
+ "UTF32BE" : 59,
+ "UTF32LE" : 60,
+ "BINARYENC" : 61,
+ "HZ_GB_2312" : 62,
+ "UTF8UTF8" : 63,
+ "TAM_ELANGO" : 64,
+ "TAM_LTTMBARANI" : 65,
+ "TAM_SHREE" : 66,
+ "TAM_TBOOMIS" : 67,
+ "TAM_TMNEWS" : 68,
+ "TAM_WEBTAMIL" : 69,
+ "KDDI_SHIFT_JIS" : 70,
+ "DOCOMO_SHIFT_JIS" : 71,
+ "SOFTBANK_SHIFT_JIS" : 72,
+ "KDDI_ISO_2022_JP" : 73,
+ "ISO_2022_JP" : 73,
+ "SOFTBANK_ISO_2022_JP" : 74,
+};
+
+// Accept forms both with and without underscores/hypens.
+for (let code of Object.keys(Encodings)) {
+ if (code["includes"]("_"))
+ Encodings[code.replace(/_/g, "")] = Encodings[code];
+}
+
+addOnPreMain(function() {
+
+ onmessage = function(aMsg) {
+ let data = aMsg["data"];
+
+ let langInfo;
+ if (data["tld"] == undefined && data["encoding"] == undefined && data["language"] == undefined) {
+ langInfo = LanguageInfo.detectLanguage(data["text"], !data["isHTML"]);
+ } else {
+ // Do our best to find the given encoding in the encodings table.
+ // Otherwise, just fall back to unknown.
+ let enc = String(data["encoding"]).toUpperCase().replace(/[_-]/g, "");
+
+ let encoding;
+ if (Encodings.hasOwnProperty(enc))
+ encoding = Encodings[enc];
+ else
+ encoding = Encodings["UNKNOWN_ENCODING"];
+
+ langInfo = LanguageInfo.detectLanguage(data["text"], !data["isHTML"],
+ data["tld"] || null,
+ encoding,
+ data["language"] || null);
+ }
+
+ postMessage({
+ "language": langInfo.getLanguageCode(),
+ "confident": langInfo.getIsReliable(),
+
+ "languages": new Array(3).fill(0).map((_, index) => {
+ let lang = langInfo.get_languages(index);
+ return {
+ "languageCode": lang.getLanguageCode(),
+ "percent": lang.getPercent(),
+ };
+ }).filter(lang => {
+ // Ignore empty results.
+ return lang["languageCode"] != "un" || lang["percent"] > 0;
+ }),
+ });
+
+ Module.destroy(langInfo);
+ };
+
+ postMessage("ready");
+});