From 6bf0a5cb5034a7e684dcc3500e841785237ce2dd Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 19:32:43 +0200 Subject: Adding upstream version 1:115.7.0. Signed-off-by: Daniel Baumann --- comm/mailnews/compose/src/MimeEncoder.jsm | 430 ++++++++++++++++++++++++++++++ 1 file changed, 430 insertions(+) create mode 100644 comm/mailnews/compose/src/MimeEncoder.jsm (limited to 'comm/mailnews/compose/src/MimeEncoder.jsm') diff --git a/comm/mailnews/compose/src/MimeEncoder.jsm b/comm/mailnews/compose/src/MimeEncoder.jsm new file mode 100644 index 0000000000..ab4c60de42 --- /dev/null +++ b/comm/mailnews/compose/src/MimeEncoder.jsm @@ -0,0 +1,430 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +const EXPORTED_SYMBOLS = ["MimeEncoder"]; + +const LINELENGTH_ENCODING_THRESHOLD = 990; +const MESSAGE_RFC822 = "message/rfc822"; + +/** + * A class to pick Content-Transfer-Encoding for a MimePart, and encode MimePart + * body accordingly. + */ +class MimeEncoder { + /** + * Create a MimeEncoder. + * + * @param {string} charset + * @param {string} contentType + * @param {boolean} forceMsgEncoding + * @param {boolean} isMainBody + * @param {string} content + */ + constructor(charset, contentType, forceMsgEncoding, isMainBody, content) { + this._charset = charset; + this._contentType = contentType.toLowerCase(); + this._forceMsgEncoding = forceMsgEncoding; + this._isMainBody = isMainBody; + this._body = content; + this._bodySize = content.length; + + // The encoding value will be used to set Content-Transfer-Encoding header + // and encode this._body. + this._encoding = ""; + + // Flags used to pick encoding. + this._highBitCount = 0; + this._unPrintableCount = 0; + this._ctrlCount = 0; + this._nullCount = 0; + this._hasCr = 0; + this._hasLf = 0; + this._hasCrLf = 0; + this._maxColumn = 0; + } + + /** + * @type {string} + */ + get encoding() { + return this._encoding; + } + + /** + * Use the combination of charset, content type and scanning this._body to + * decide what encoding it should have. + */ + pickEncoding() { + this._analyzeBody(); + + let strictlyMime = Services.prefs.getBoolPref("mail.strictly_mime"); + let needsB64 = false; + let isUsingQP = false; + + // Allow users to override our percentage-wise guess on whether + // the file is text or binary. + let forceB64 = Services.prefs.getBoolPref("mail.file_attach_binary"); + + // If the content-type is "image/" or something else known to be binary or + // several flavors of newlines are present, use base64 unless we're attaching + // a message (so that we don't get confused by newline conversions). + if ( + !this._isMainBody && + (forceB64 || + this._requiresB64() || + this._hasCr + this._hasLf + this._hasCrLf != 1) && + this._contentType != MESSAGE_RFC822 + ) { + needsB64 = true; + } else { + // Otherwise, we need to pick an encoding based on the contents of the + // document. + let encodeP = false; + + // Force quoted-printable if the sender does not allow conversion to 7bit. + if ( + this._forceMsgEncoding || + this._maxColumn > LINELENGTH_ENCODING_THRESHOLD || + (strictlyMime && this._unPrintableCount) || + this._nullCount + ) { + if ( + this._isMainBody && + this._contentType == "text/plain" && + // From rfc3676#section-4.2, Quoted-Printable encoding SHOULD NOT be + // used with Format=Flowed unless absolutely necessary. + Services.prefs.getBoolPref("mailnews.send_plaintext_flowed") + ) { + needsB64 = true; + } else { + encodeP = true; + } + } + + // MIME requires a special case that these types never be encoded. + if ( + this._contentType.startsWith("message") || + this._contentType.startsWith("multipart") + ) { + encodeP = false; + } + + let manager = Cc["@mozilla.org/charset-converter-manager;1"].getService( + Ci.nsICharsetConverterManager + ); + let isCharsetMultiByte = false; + try { + isCharsetMultiByte = + manager.getCharsetData(this._charset, ".isMultibyte") == "true"; + } catch {} + + // If the Mail charset is multibyte, we force it to use Base64 for + // attachments. + if ( + !this._isMainBody && + this._charset && + isCharsetMultiByte && + (this._contentType.startsWith("text") || + // text/vcard synonym + this._contentType == "application/directory") + ) { + needsB64 = true; + } else if (this._charset == "ISO-2022-JP") { + this._encoding = "7bit"; + } else if (encodeP && this._unPrintableCount > this._bodySize / 10) { + // If the document contains more than 10% unprintable characters, + // then that seems like a good candidate for base64 instead of + // quoted-printable. + needsB64 = true; + } else if (encodeP) { + this._encoding = "quoted-printable"; + isUsingQP = true; + } else if (this._highBitCount > 0) { + this._encoding = "8bit"; + } else { + this._encoding = "7bit"; + } + } + + // Always base64 binary data. + if (needsB64) { + this._encoding = "base64"; + } + + // According to RFC 821 we must always have lines shorter than 998 bytes. + // To encode "long lines" use a CTE that will transmit shorter lines. + // Switch to base64 if we are not already using "quoted printable". + + // We don't do this for message/rfc822 attachments, since we can't + // change the original Content-Transfer-Encoding of the message we're + // attaching. We rely on the original message complying with RFC 821, + // if it doesn't we won't either. Not ideal. + if ( + this._contentType != MESSAGE_RFC822 && + this._maxColumn > LINELENGTH_ENCODING_THRESHOLD && + !isUsingQP + ) { + this._encoding = "base64"; + } + } + + /** + * Encode this._body according to the value of this.encoding. + */ + encode() { + let output; + if (this.encoding == "base64") { + output = this._encodeBase64(); + } else if (this.encoding == "quoted-printable") { + output = this._encodeQP(); + } else { + output = this._body.replaceAll("\r\n", "\n").replaceAll("\n", "\r\n"); + } + if (!output.endsWith("\r\n")) { + output += "\r\n"; + } + return output; + } + + /** + * Scan this._body to set flags that will be used by pickEncoding. + */ + _analyzeBody() { + let currentColumn = 0; + let prevCharWasCr = false; + + for (let i = 0; i < this._bodySize; i++) { + let ch = this._body.charAt(i); + let charCode = this._body.charCodeAt(i); + if (charCode > 126) { + this._highBitCount++; + this._unPrintableCount++; + } else if (ch < " " && !"\t\r\n".includes(ch)) { + this._unPrintableCount++; + this._ctrlCount++; + if (ch == "\0") { + this._nullCount++; + } + } + + if ("\r\n".includes(ch)) { + if (ch == "\r") { + if (prevCharWasCr) { + this._hasCr = 1; + } else { + prevCharWasCr = true; + } + } else if (prevCharWasCr) { + if (currentColumn == 0) { + this._hasCrLf = 1; + } else { + this._hasCr = 1; + this._hasLf = 1; + } + prevCharWasCr = false; + } else { + this._hasLf = 1; + } + + if (this._maxColumn < currentColumn) { + this._maxColumn = currentColumn; + } + currentColumn = 0; + } else { + currentColumn++; + } + } + + if (this._maxColumn < currentColumn) { + this._maxColumn = currentColumn; + } + } + + /** + * Determine if base64 is required according to contentType. + */ + _requiresB64() { + if (this._contentType == "application/x-unknown-content-type") { + // Unknown types don't necessarily require encoding. (Note that + // "unknown" and "application/octet-stream" aren't the same.) + return false; + } + if ( + this._contentType.startsWith("image/") || + this._contentType.startsWith("audio/") || + this._contentType.startsWith("video/") || + this._contentType.startsWith("application/") + ) { + // The following types are application/ or image/ types that are actually + // known to contain textual data (meaning line-based, not binary, where + // CRLF conversion is desired rather than disastrous.) So, if the type + // is any of these, it does not *require* base64, and if we do need to + // encode it for other reasons, we'll probably use quoted-printable. + // But, if it's not one of these types, then we assume that any subtypes + // of the non-"text/" types are binary data, where CRLF conversion would + // corrupt it, so we use base64 right off the bat. + // The reason it's desirable to ship these as text instead of just using + // base64 all the time is mainly to preserve the readability of them for + // non-MIME users: if I mail a /bin/sh script to someone, it might not + // need to be encoded at all, so we should leave it readable if we can. + // This list of types was derived from the comp.mail.mime FAQ, section + // 10.2.2, "List of known unregistered MIME types" on 2-Feb-96. + const typesWhichAreReallyText = [ + "application/mac-binhex40", // APPLICATION_BINHEX + "application/pgp", // APPLICATION_PGP + "application/pgp-keys", + "application/x-pgp-message", // APPLICATION_PGP2 + "application/postscript", // APPLICATION_POSTSCRIPT + "application/x-uuencode", // APPLICATION_UUENCODE + "application/x-uue", // APPLICATION_UUENCODE2 + "application/uue", // APPLICATION_UUENCODE4 + "application/uuencode", // APPLICATION_UUENCODE3 + "application/sgml", + "application/x-csh", + "application/javascript", + "application/ecmascript", + "application/x-javascript", + "application/x-latex", + "application/x-macbinhex40", + "application/x-ns-proxy-autoconfig", + "application/x-www-form-urlencoded", + "application/x-perl", + "application/x-sh", + "application/x-shar", + "application/x-tcl", + "application/x-tex", + "application/x-texinfo", + "application/x-troff", + "application/x-troff-man", + "application/x-troff-me", + "application/x-troff-ms", + "application/x-troff-ms", + "application/x-wais-source", + "image/x-bitmap", + "image/x-pbm", + "image/x-pgm", + "image/x-portable-anymap", + "image/x-portable-bitmap", + "image/x-portable-graymap", + "image/x-portable-pixmap", // IMAGE_PPM + "image/x-ppm", + "image/x-xbitmap", // IMAGE_XBM + "image/x-xbm", // IMAGE_XBM2 + "image/xbm", // IMAGE_XBM3 + "image/x-xpixmap", + "image/x-xpm", + ]; + if (typesWhichAreReallyText.includes(this._contentType)) { + return false; + } + return true; + } + return false; + } + + /** + * Base64 encoding. See RFC 2045 6.8. We use the built-in `btoa`, then ensure + * line width is no more than 72. + */ + _encodeBase64() { + let encoded = btoa(this._body); + let ret = ""; + let length = encoded.length; + let i = 0; + let limit = 72; + while (true) { + if (i * limit > length) { + break; + } + ret += encoded.substr(i * limit, limit) + "\r\n"; + i++; + } + return ret; + } + + /** + * Quoted-printable encoding. See RFC 2045 6.7. + */ + _encodeQP() { + let currentColumn = 0; + let hexdigits = "0123456789ABCDEF"; + let white = false; + let out = ""; + + function encodeChar(ch) { + let charCode = ch.charCodeAt(0); + let ret = "="; + ret += hexdigits[charCode >> 4]; + ret += hexdigits[charCode & 0xf]; + return ret; + } + + for (let i = 0; i < this._bodySize; i++) { + let ch = this._body.charAt(i); + let charCode = this._body.charCodeAt(i); + if (ch == "\r" || ch == "\n") { + // If it's CRLF, swallow two chars instead of one. + if (i + 1 < this._bodySize && ch == "\r" && this._body[i + 1] == "\n") { + i++; + } + + // Whitespace cannot be allowed to occur at the end of the line, so we + // back up and replace the whitespace with its code. + if (white) { + let whiteChar = out.slice(-1); + out = out.slice(0, -1); + out += encodeChar(whiteChar); + } + + // Now write out the newline. + out += "\r"; + out += "\n"; + white = false; + currentColumn = 0; + } else if ( + currentColumn == 0 && + (ch == "." || + (ch == "F" && + (i >= this._bodySize - 1 || this._body[i + 1] == "r") && + (i >= this._bodySize - 2 || this._body[i + 2] == "o") && + (i >= this._bodySize - 3 || this._body[i + 3] == "m") && + (i >= this._bodySize - 4 || this._body[i + 4] == " "))) + ) { + // Just to be SMTP-safe, if "." appears in column 0, encode it. + // If this line begins with "From " (or it could but we don't have enough + // data in the buffer to be certain), encode the 'F' in hex to avoid + // potential problems with BSD mailbox formats. + white = false; + out += encodeChar(ch); + currentColumn += 3; + } else if ( + (charCode >= 33 && charCode <= 60) || + (charCode >= 62 && charCode <= 126) + ) { + // Printable characters except for '=' + white = false; + out += ch; + currentColumn++; + } else if (ch == " " || ch == "\t") { + // Whitespace + white = true; + out += ch; + currentColumn++; + } else { + white = false; + out += encodeChar(ch); + currentColumn += 3; + } + + if (currentColumn >= 73) { + // Soft line break for readability + out += "=\r\n"; + white = false; + currentColumn = 0; + } + } + + return out; + } +} -- cgit v1.2.3