diff options
Diffstat (limited to 'comm/mail/modules/AttachmentChecker.jsm')
-rw-r--r-- | comm/mail/modules/AttachmentChecker.jsm | 118 |
1 files changed, 118 insertions, 0 deletions
diff --git a/comm/mail/modules/AttachmentChecker.jsm b/comm/mail/modules/AttachmentChecker.jsm new file mode 100644 index 0000000000..88da3a1e83 --- /dev/null +++ b/comm/mail/modules/AttachmentChecker.jsm @@ -0,0 +1,118 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +const EXPORTED_SYMBOLS = ["AttachmentChecker"]; + +var AttachmentChecker = { + getAttachmentKeywords, +}; + +/** + * Check whether the character is a CJK character or not. + * + * @returns true if it is a CJK character. + */ +function IsCJK(code) { + if (code >= 0x2000 && code <= 0x9fff) { + // Hiragana, Katakana and Kanaji + return true; + } else if (code >= 0xac00 && code <= 0xd7ff) { + // Hangul + return true; + } else if (code >= 0xf900 && code <= 0xffff) { + // Hiragana, Katakana and Kanaji + return true; + } + return false; +} + +/** + * Get the (possibly-empty) list of attachment keywords in this message. + * + * @returns the (possibly-empty) list of attachment keywords in this message + */ +function getAttachmentKeywords(mailData, keywordsInCsv) { + // The empty string would get split to an array of size 1. Avoid that... + var keywordsArray = keywordsInCsv ? keywordsInCsv.split(",") : []; + + function escapeRegxpSpecials(inputString) { + const specials = [ + ".", + "\\", + "^", + "$", + "*", + "+", + "?", + "|", + "(", + ")", + "[", + "]", + "{", + "}", + ]; + var re = new RegExp("(\\" + specials.join("|\\") + ")", "g"); + inputString = inputString.replace(re, "\\$1"); + return inputString.replace(" ", "\\s+"); + } + + // NOT_W is the character class that isn't in the Unicode classes "Ll", + // "Lu" and "Lt". It should work like \W, if \W knew about Unicode. + const NOT_W = + "[^\\u0041-\\u005a\\u0061-\\u007a\\u00aa\\u00b5\\u00ba\\u00c0-\\u00d6\\u00d8-\\u00f6\\u00f8-\\u01ba\\u01bc-\\u01bf\\u01c4-\\u02ad\\u0386\\u0388-\\u0481\\u048c-\\u0556\\u0561-\\u0587\\u10a0-\\u10c5\\u1e00-\\u1fbc\\u1fbe\\u1fc2-\\u1fcc\\u1fd0-\\u1fdb\\u1fe0-\\u1fec\\u1ff2-\\u1ffc\\u207f\\u2102\\u2107\\u210a-\\u2113\\u2115\\u2119-\\u211d\\u2124\\u2126\\u2128\\u212a-\\u212d\\u212f-\\u2131\\u2133\\u2134\\u2139\\ufb00-\\ufb17\\uff21-\\uff3a\\uff41-\\uff5a]"; + + var keywordsFound = []; + for (var i = 0; i < keywordsArray.length; i++) { + var kw = escapeRegxpSpecials(keywordsArray[i]); + // If the keyword starts (ends) with a CJK character, we don't care + // what the previous (next) character is, because the words aren't + // space delimited. + if (keywordsArray[i].charAt(0) == ".") { + // like .pdf + // For this case we want to match the whole document name. + let start = "(([^\\s]*)\\b)"; + let end = IsCJK(kw.charCodeAt(kw.length - 1)) ? "" : "(\\s|$)"; + let re = new RegExp(start + kw + end, "ig"); + let matching = mailData.match(re); + if (matching) { + for (var j = 0; j < matching.length; j++) { + // Ignore the match if it was in a URL. + if (!/^(https?|ftp):\/\//i.test(matching[j])) { + // We can have several *different* matches for one dot-keyword. + // E.g. foo.pdf and bar.pdf would both match for .pdf. + var m = matching[j].trim(); + if (!keywordsFound.includes(m)) { + keywordsFound.push(m); + } + } + } + } + } else { + let start = IsCJK(kw.charCodeAt(0)) ? "" : "((^|\\s)\\S*)"; + let end = IsCJK(kw.charCodeAt(kw.length - 1)) ? "" : "(" + NOT_W + "|$)"; + let re = new RegExp(start + kw + end, "ig"); + let matching; + while ((matching = re.exec(mailData)) !== null) { + // Ignore the match if it was in a URL. + if (!/^(https?|ftp):\/\//i.test(matching[0].trim())) { + keywordsFound.push(keywordsArray[i]); + break; + } + } + } + } + return keywordsFound; +} + +// This file is also used as a Worker. +/* exported onmessage */ +/* globals postMessage */ +var onmessage = function (event) { + var keywordsFound = AttachmentChecker.getAttachmentKeywords( + event.data[0], + event.data[1] + ); + postMessage(keywordsFound); +}; |