summaryrefslogtreecommitdiffstats
path: root/comm/mailnews/mime/jsmime/jsmime.js
diff options
context:
space:
mode:
Diffstat (limited to 'comm/mailnews/mime/jsmime/jsmime.js')
-rw-r--r--comm/mailnews/mime/jsmime/jsmime.js3682
1 files changed, 3682 insertions, 0 deletions
diff --git a/comm/mailnews/mime/jsmime/jsmime.js b/comm/mailnews/mime/jsmime/jsmime.js
new file mode 100644
index 0000000000..28308daf4f
--- /dev/null
+++ b/comm/mailnews/mime/jsmime/jsmime.js
@@ -0,0 +1,3682 @@
+/* import-globals-from ../src/jsmime.jsm */
+/* globals define, module */
+
+(function (root, fn) {
+ if (typeof define === "function" && define.amd) {
+ define(fn);
+ } else if (typeof module !== "undefined" && module.exports) {
+ module.exports = fn();
+ } else {
+ root.jsmime = fn();
+ }
+})(this, function () {
+ var mods = {};
+ function req(id) {
+ return mods[id.replace(/^\.\//, "")];
+ }
+
+ function def(id, fn) {
+ mods[id] = fn(req);
+ }
+ def("mimeutils", function () {
+ "use strict";
+
+ /**
+ * Decode a quoted-printable buffer into a binary string.
+ *
+ * @param buffer {BinaryString} The string to decode.
+ * @param more {Boolean} This argument is ignored.
+ * @returns {Array(BinaryString, BinaryString)} The first element of the array
+ * is the decoded string. The second element is always the empty
+ * string.
+ */
+ function decode_qp(buffer, more) {
+ // Unlike base64, quoted-printable isn't stateful across multiple lines, so
+ // there is no need to buffer input, so we can always ignore more.
+ let decoded = buffer.replace(
+ // Replace either =<hex><hex> or =<wsp>CRLF
+ /=([0-9A-F][0-9A-F]|[ \t]*(\r\n|[\r\n]|$))/gi,
+ function (match, param) {
+ // If trailing text matches [ \t]*CRLF, drop everything, since it's a
+ // soft line break.
+ if (param.trim().length == 0) {
+ return "";
+ }
+ return String.fromCharCode(parseInt(param, 16));
+ }
+ );
+ return [decoded, ""];
+ }
+
+ /**
+ * Decode a base64 buffer into a binary string. Unlike window.atob, the buffer
+ * may contain non-base64 characters that will be ignored.
+ *
+ * @param buffer {BinaryString} The string to decode.
+ * @param more {Boolean} If true, we expect that this function could be
+ * called again and should retain extra data. If
+ * false, we should flush all pending output.
+ * @returns {Array(BinaryString, BinaryString)} The first element of the array
+ * is the decoded string. The second element contains the data that
+ * could not be decoded and needs to be retained for the next call.
+ */
+ function decode_base64(buffer, more) {
+ // Drop all non-base64 characters
+ let sanitize = buffer.replace(/[^A-Za-z0-9+\/=]/g, "");
+ // Remove harmful `=' chars in the middle.
+ sanitize = sanitize.replace(/=+([A-Za-z0-9+\/])/g, "$1");
+ // We need to encode in groups of 4 chars. If we don't have enough, leave the
+ // excess for later. If there aren't any more, drop enough to make it 4.
+ let excess = sanitize.length % 4;
+ if (excess != 0 && more) {
+ buffer = sanitize.slice(-excess);
+ } else {
+ buffer = "";
+ }
+ sanitize = sanitize.substring(0, sanitize.length - excess);
+ // Delete all unnecessary '====' in padding.
+ sanitize = sanitize.replace(/(====)+$/g, "");
+ // Use the atob function we (ought to) have in global scope.
+ return [atob(sanitize), buffer];
+ }
+
+ /**
+ * Converts a binary string into a Uint8Array buffer.
+ *
+ * @param buffer {BinaryString} The string to convert.
+ * @returns {Uint8Array} The converted data.
+ */
+ function stringToTypedArray(buffer) {
+ var typedarray = new Uint8Array(buffer.length);
+ for (var i = 0; i < buffer.length; i++) {
+ typedarray[i] = buffer.charCodeAt(i);
+ }
+ return typedarray;
+ }
+
+ /**
+ * Converts a Uint8Array buffer to a binary string.
+ *
+ * @param buffer {Uint8Array} The Uint8Array to convert.
+ * @returns {string} The converted string.
+ */
+ function typedArrayToString(buffer) {
+ var string = "";
+ for (let i = 0; i < buffer.length; i += 100) {
+ string += String.fromCharCode.apply(
+ undefined,
+ buffer.subarray(i, i + 100)
+ );
+ }
+ return string;
+ }
+
+ /** A list of month names for Date parsing. */
+ var kMonthNames = [
+ "Jan",
+ "Feb",
+ "Mar",
+ "Apr",
+ "May",
+ "Jun",
+ "Jul",
+ "Aug",
+ "Sep",
+ "Oct",
+ "Nov",
+ "Dec",
+ ];
+
+ return {
+ decode_base64,
+ decode_qp,
+ kMonthNames,
+ stringToTypedArray,
+ typedArrayToString,
+ };
+ });
+ /**
+ * This file implements knowledge of how to encode or decode structured headers
+ * for several key headers. It is not meant to be used externally to jsmime.
+ */
+
+ def("structuredHeaders", function (require) {
+ "use strict";
+
+ var structuredDecoders = new Map();
+ var structuredEncoders = new Map();
+ var preferredSpellings = new Map();
+
+ function addHeader(name, decoder, encoder) {
+ var lowerName = name.toLowerCase();
+ structuredDecoders.set(lowerName, decoder);
+ structuredEncoders.set(lowerName, encoder);
+ preferredSpellings.set(lowerName, name);
+ }
+
+ // Addressing headers: We assume that they can be specified in 1* form (this is
+ // false for From, but it's close enough to the truth that it shouldn't matter).
+ // There is no need to specialize the results for the header, so just pun it
+ // back to parseAddressingHeader.
+ function parseAddress(value) {
+ let headerparser = this;
+ return value.reduce(function (results, header) {
+ return results.concat(headerparser.parseAddressingHeader(header, true));
+ }, []);
+ }
+ function writeAddress(value) {
+ // Make sure the input is an array (accept a single entry)
+ if (!Array.isArray(value)) {
+ value = [value];
+ }
+ this.addAddresses(value);
+ }
+
+ // Addressing headers from RFC 5322:
+ addHeader("Bcc", parseAddress, writeAddress);
+ addHeader("Cc", parseAddress, writeAddress);
+ addHeader("From", parseAddress, writeAddress);
+ addHeader("Reply-To", parseAddress, writeAddress);
+ addHeader("Resent-Bcc", parseAddress, writeAddress);
+ addHeader("Resent-Cc", parseAddress, writeAddress);
+ addHeader("Resent-From", parseAddress, writeAddress);
+ addHeader("Resent-Reply-To", parseAddress, writeAddress);
+ addHeader("Resent-Sender", parseAddress, writeAddress);
+ addHeader("Resent-To", parseAddress, writeAddress);
+ addHeader("Sender", parseAddress, writeAddress);
+ addHeader("To", parseAddress, writeAddress);
+ // From RFC 5536:
+ addHeader("Approved", parseAddress, writeAddress);
+ // From RFC 3798:
+ addHeader("Disposition-Notification-To", parseAddress, writeAddress);
+ // Non-standard headers:
+ addHeader("Delivered-To", parseAddress, writeAddress);
+ addHeader("Return-Receipt-To", parseAddress, writeAddress);
+
+ // http://cr.yp.to/proto/replyto.html
+ addHeader("Mail-Reply-To", parseAddress, writeAddress);
+ addHeader("Mail-Followup-To", parseAddress, writeAddress);
+
+ // Parameter-based headers. Note that all parameters are slightly different, so
+ // we use slightly different variants here.
+ function parseParameterHeader(value, do2231, do2047) {
+ // Only use the first header for parameters; ignore subsequent redefinitions.
+ return this.parseParameterHeader(value[0], do2231, do2047);
+ }
+
+ // RFC 2045
+ function parseContentType(value) {
+ let params = parseParameterHeader.call(this, value, false, false);
+ let origtype = params.preSemi;
+ let parts = origtype.split("/");
+ if (parts.length != 2) {
+ // Malformed. Return to text/plain. Evil, ain't it?
+ params = new Map();
+ parts = ["text", "plain"];
+ }
+ let mediatype = parts[0].toLowerCase();
+ let subtype = parts[1].toLowerCase();
+ let type = mediatype + "/" + subtype;
+ let structure = new Map();
+ structure.mediatype = mediatype;
+ structure.subtype = subtype;
+ structure.type = type;
+ params.forEach(function (value, name) {
+ structure.set(name.toLowerCase(), value);
+ });
+ return structure;
+ }
+ structuredDecoders.set("Content-Type", parseContentType);
+
+ // Unstructured headers (just decode RFC 2047 for the first header value)
+ function parseUnstructured(values) {
+ return this.decodeRFC2047Words(values[0]);
+ }
+ function writeUnstructured(value) {
+ this.addUnstructured(value);
+ }
+
+ // Message-ID headers.
+ function parseMessageID(values) {
+ // TODO: Proper parsing support for these headers is currently unsupported).
+ return this.decodeRFC2047Words(values[0]);
+ }
+ function writeMessageID(value) {
+ // TODO: Proper parsing support for these headers is currently unsupported).
+ this.addUnstructured(value);
+ }
+
+ // RFC 5322
+ addHeader("Comments", parseUnstructured, writeUnstructured);
+ addHeader("Keywords", parseUnstructured, writeUnstructured);
+ addHeader("Subject", parseUnstructured, writeUnstructured);
+
+ // RFC 2045
+ addHeader("MIME-Version", parseUnstructured, writeUnstructured);
+ addHeader("Content-Description", parseUnstructured, writeUnstructured);
+
+ // RFC 7231
+ addHeader("User-Agent", parseUnstructured, writeUnstructured);
+
+ // Date headers
+ function parseDate(values) {
+ return this.parseDateHeader(values[0]);
+ }
+ function writeDate(value) {
+ this.addDate(value);
+ }
+
+ // RFC 5322
+ addHeader("Date", parseDate, writeDate);
+ addHeader("Resent-Date", parseDate, writeDate);
+ // RFC 5536
+ addHeader("Expires", parseDate, writeDate);
+ addHeader("Injection-Date", parseDate, writeDate);
+ addHeader("NNTP-Posting-Date", parseDate, writeDate);
+
+ // RFC 5322
+ addHeader("Message-ID", parseMessageID, writeMessageID);
+ addHeader("Resent-Message-ID", parseMessageID, writeMessageID);
+
+ // Miscellaneous headers (those that don't fall under the above schemes):
+
+ // RFC 2047
+ structuredDecoders.set("Content-Transfer-Encoding", function (values) {
+ return values[0].toLowerCase();
+ });
+ structuredEncoders.set("Content-Transfer-Encoding", writeUnstructured);
+
+ // Some clients like outlook.com send non-compliant References headers that
+ // separate values using commas. Also, some clients don't separate References
+ // with spaces, since these are optional according to RFC2822. So here we
+ // preprocess these headers (see bug 1154521 and bug 1197686).
+ function preprocessMessageIDs(values) {
+ let msgId = /<[^>]*>/g;
+ let match,
+ ids = [];
+ while ((match = msgId.exec(values)) !== null) {
+ ids.push(match[0]);
+ }
+ return ids.join(" ");
+ }
+ structuredDecoders.set("References", preprocessMessageIDs);
+ structuredDecoders.set("In-Reply-To", preprocessMessageIDs);
+
+ return Object.freeze({
+ decoders: structuredDecoders,
+ encoders: structuredEncoders,
+ spellings: preferredSpellings,
+ });
+ });
+ def("headerparser", function (require) {
+ /**
+ * This file implements the structured decoding of message header fields. It is
+ * part of the same system as found in mimemimeutils.js, and occasionally makes
+ * references to globals defined in that file or other dependencies thereof. See
+ * documentation in that file for more information about external dependencies.
+ */
+
+ "use strict";
+ var mimeutils = require("./mimeutils");
+
+ /**
+ * This is the API that we ultimately return.
+ *
+ * We define it as a global here, because we need to pass it as a |this|
+ * argument to a few functions.
+ */
+ var headerparser = {};
+
+ /**
+ * Clean up characters that could cause display problems since they
+ * are not displayed.
+ *
+ * @param {string} token - The string to be cleaned.
+ * @returns {string} The cleaned string.
+ */
+ function cleanToken(token) {
+ // Replace problematic characters so we don't get unexpected behavior
+ // down the line. These fall into a few categories:
+ // A) "Separator, space" (Zs),
+ // B) "Mark, Nonspacing" (Mn)
+ // C) "Other, Control" (Cc)
+ // D) "Other, Format" (Cf)
+ // E) "Symbol, Other"
+ // Unfortunately, no support for the needed regexp Unicode property escapes
+ // in our engine. So we need to hand-roll it. Used the regexpu tool for
+ // that: https://mothereff.in/regexpu.
+ // This should be updated regularly, to take into account new additions
+ // to the unicode standard. Last updated July 2019.
+ // For a full list of categories, see http://unicode.org/Public//5.0.0/ucd/UCD.html.
+
+ // -- case A: /\p{Zs}/u
+ // https://www.fileformat.info/info/unicode/category/Zs/list.htm
+ // https://mothereff.in/regexpu#input=/\p{Zs}/u&unicodePropertyEscape=1
+ token = token.replace(
+ /[\xA0\u1680\u2000-\u200A\u202F\u205F\u3000]/g,
+ " "
+ );
+
+ // -- case B: /\p{Mn}/u
+ // https://www.fileformat.info/info/unicode/category/Mn/list.htm
+ // https://mothereff.in/regexpu#input=/\p{Mn}/u&unicodePropertyEscape=1
+ // This is a bit more complicated as some of them could be "real", so we'll
+ // only remove the ones that are known to show as blank.
+ token = token.replace(
+ /[\u034F\u17B4\u17B5\u180B-\u180D\uFE00-\uFE0F]/g,
+ ""
+ );
+ // \uE0100-\uE01EF need to be written using their surrogate code point pairs
+ // until extended Unicode escapes are supported in regexps.
+ // https://www.fileformat.info/info/unicode/char/e0100/index.htm says \uDB40\uDD00.
+ // https://www.fileformat.info/info/unicode/char/e01ef/index.htm says \uDB40\uDDEF.
+ token = token.replace(/\uDB40[\uDD00-\uDDEF]/g, "");
+
+ // -- case C: /\p{Cc}/u, except Tab/LF/CR
+ // https://www.fileformat.info/info/unicode/category/Cc/list.htm
+ // https://mothereff.in/regexpu#input=/\p{Cc}/u&unicodePropertyEscape=1
+ // eslint-disable-next-line no-control-regex
+ token = token.replace(/(?![\t\n\r])[\0-\x1F\x7F-\x9F]/g, "");
+
+ // -- case D: /\p{Cf}/u
+ // https://www.fileformat.info/info/unicode/category/Cf/list.htm
+ // https://mothereff.in/regexpu#input=/\p{Cf}/u&unicodePropertyEscape=1
+ // Remove all of these except for \u0600-\u0605.
+ // XXX: We replace these with spaces (" "), not empty strings ("").
+ // Notably, for zero width space (\u200B) replacing with empty space
+ // would later drop real spaces surrounding it. Dunno why.
+ token = token.replace(
+ /(?:[\xAD\u061C\u06DD\u070F\u08E2\u180E\u200B-\u200F\u202A-\u202E\u2060-\u2064\u2066-\u206F\uFEFF\uFFF9-\uFFFB]|\uD804[\uDCBD\uDCCD]|\uD80D[\uDC30-\uDC38]|\uD82F[\uDCA0-\uDCA3]|\uD834[\uDD73-\uDD7A]|\uDB40[\uDC01\uDC20-\uDC7F])/g,
+ " "
+ );
+
+ // -- case E: problematic symbols
+ // https://www.fileformat.info/info/unicode/category/So/list.htm
+ // Replace U+2800 BRAILLE PATTERN BLANK with space.
+ token = token.replace(/\u2800/g, " ");
+
+ return token;
+ }
+
+ /**
+ * Tokenizes a message header into a stream of tokens as a generator.
+ *
+ * The low-level tokens are meant to be loosely correspond to the tokens as
+ * defined in RFC 5322. For reasons of saner error handling, however, the two
+ * definitions are not exactly equivalent. The tokens we emit are the following:
+ * 1. Special delimiters: Any char in the delimiters string is emitted as a
+ * string by itself. Parsing parameter headers, for example, would use ";="
+ * for the delimiter string.
+ * 2. Quoted-strings (if opt.qstring is true): A string which is surrounded by
+ * double quotes. Escapes in the string are omitted when returning.
+ * 3. Domain Literals (if opt.dliteral is true): A string which matches the
+ * dliteral construct in RFC 5322. Escapes here are NOT omitted.
+ * 4. Comments (if opt.comments is true): Comments are handled specially. In
+ * practice, decoding the comments in To headers appears to be necessary, so
+ * comments are not stripped in the output value. Instead, they are emitted
+ * as if they are a special delimiter. However, all delimiters found within a
+ * comment are returned as if they were a quoted string, so that consumers
+ * ignore delimiters within comments. If ignoring comment text completely is
+ * desired, upon seeing a "(" token, consumers should ignore all tokens until
+ * a matching ")" is found (note that comments can be nested).
+ * 5. RFC 2047 encoded-words (if opts.rfc2047 is true): These are strings which
+ * are the decoded contents of RFC 2047's =?UTF-8?Q?blah?=-style words.
+ * 6. Atoms: Atoms are defined not in the RFC 5322 sense, but rather as the
+ * longest sequence of characters that is neither whitespace nor any of the
+ * special characters above.
+ *
+ * The intended interpretation of the stream of output tokens is that they are
+ * the portions of text which can be safely wrapped in whitespace with no ill
+ * effect. The output tokens are either strings (which represent individual
+ * delimiter tokens) or instances of a class that has a customized .toString()
+ * for output (for quoted strings, atoms, domain literals, and encoded-words).
+ * Checking for a delimiter MUST use the strictly equals operator (===). For
+ * example, the proper way to call this method is as follows:
+ *
+ * for (let token of getHeaderTokens(rest, ";=", opts)) {
+ * if (token === ';') {
+ * // This represents a literal ';' in the string
+ * } else if (token === '=') {
+ * // This represents a literal '=' in the string
+ * } else {
+ * // If a ";" qstring was parsed, we fall through to here!
+ * token = token.toString();
+ * }
+ * }
+ *
+ * This method does not properly tokenize 5322 in all corner cases; however,
+ * this is equivalent in those corner cases to an older header parsing
+ * algorithm, so the algorithm should be correct for all real-world cases. The
+ * corner cases are as follows:
+ * 1. Quoted-strings and domain literals are parsed even if they are within a
+ * comment block (we effectively treat ctext as containing qstring).
+ * 2. WSP need not be between a qstring and an atom (a"b" produces two tokens,
+ * a and b). This is an error case, though.
+ * 3. Legacy comments as display names: We recognize address fields with
+ * comments, and (a) either drop them if inside addr-spec or (b) preserve
+ * them as part of the display-name if not. If the display-name is empty
+ * while the last comment is not, we assume it's the legacy form above and
+ * take the comment content as the display-name.
+ *
+ * @param {string} value - The header value, post charset conversion but
+ * before RFC 2047 decoding, to be parsed.
+ * @param {string} delimiters A set of delimiters to include as individual
+ * tokens.
+ * @param {object} opts - A set of options selecting what to parse.
+ * @param {boolean} [opts.qstring] - If true, recognize quoted strings.
+ * @param {boolean} [opts.dliteral] If true, recognize domain literals.
+ * @param {boolean} [opts.comments] If true, recognize comments.
+ * @param {boolean} [opts.rfc2047] - If true, parse and decode RFC 2047
+ * encoded-words.
+ * @returns {(Token | string)[]} An array of Token objects (which have a toString
+ * method returning their value) or String objects
+ * (representing delimiters).
+ */
+ /* eslint-disable complexity */
+ function getHeaderTokens(value, delimiters, opts) {
+ // The array of parsed tokens. This method used to be a generator, but it
+ // appears that generators are poorly optimized in current engines, so it was
+ // converted to not be one.
+ let tokenList = [];
+
+ // Represents a non-delimiter token.
+ function Token(token) {
+ // Unescape all quoted pairs. Any trailing \ is deleted.
+ this.token = token.replace(/\\(.?)/g, "$1");
+ }
+ Token.prototype.toString = function () {
+ return this.token;
+ };
+
+ // The start of the current token (e.g., atoms, strings)
+ let tokenStart = undefined;
+ // The set of whitespace characters, as defined by RFC 5322
+ let wsp = " \t\r\n";
+ // If we are a domain literal ([]) or a quoted string ("), this is set to the
+ // character to look for at the end.
+ let endQuote = undefined;
+ // The current depth of comments, since they can be nested. A value 0 means we
+ // are not in a comment.
+ let commentDepth = 0;
+
+ // Iterate over every character one character at a time.
+ let length = value.length;
+ for (let i = 0; i < length; i++) {
+ let ch = value[i];
+ // If we see a \, no matter what context we are in, ignore the next
+ // character.
+ if (ch == "\\") {
+ i++;
+ continue;
+ }
+
+ // If we are in a qstring or a dliteral, process the character only if it is
+ // what we are looking for to end the quote.
+ if (endQuote !== undefined) {
+ if (ch == endQuote && ch == '"') {
+ // Quoted strings don't include their delimiters.
+ let text = value.slice(tokenStart + 1, i);
+
+ // If RFC 2047 is enabled, always decode the qstring.
+ if (opts.rfc2047) {
+ text = decodeRFC2047Words(text);
+ }
+
+ tokenList.push(new Token(text));
+ endQuote = undefined;
+ tokenStart = undefined;
+ } else if (ch == endQuote && ch == "]") {
+ // Domain literals include their delimiters.
+ tokenList.push(new Token(value.slice(tokenStart, i + 1)));
+ endQuote = undefined;
+ tokenStart = undefined;
+ }
+ // Avoid any further processing.
+ continue;
+ }
+
+ // If we can match the RFC 2047 encoded-word pattern, we need to decode the
+ // entire word or set of words.
+ if (
+ opts.rfc2047 &&
+ ch == "=" &&
+ i + 1 < value.length &&
+ value[i + 1] == "?"
+ ) {
+ // RFC 2047 tokens separated only by whitespace are conceptually part of
+ // the same output token, so we need to decode them all at once.
+ let encodedWordsRE = /([ \t\r\n]*=\?[^?]*\?[BbQq]\?[^?]*\?=)+/;
+ let result = encodedWordsRE.exec(value.slice(i));
+ if (result !== null) {
+ // If we were in the middle of a prior token (i.e., something like
+ // foobar=?UTF-8?Q?blah?=), yield the previous segment as a token.
+ if (tokenStart !== undefined) {
+ tokenList.push(new Token(value.slice(tokenStart, i)));
+ tokenStart = undefined;
+ }
+
+ // Find out how much we need to decode...
+ let encWordsLen = result[0].length;
+ let string = decodeRFC2047Words(
+ value.slice(i, i + encWordsLen),
+ "UTF-8"
+ );
+ // Don't make a new Token variable, since we do not want to unescape the
+ // decoded string.
+ tokenList.push({
+ toString() {
+ return string;
+ },
+ });
+
+ // Skip everything we decoded. The -1 is because we don't want to
+ // include the starting character.
+ i += encWordsLen - 1;
+ continue;
+ }
+
+ // If we are here, then we failed to match the simple 2047 encoded-word
+ // regular expression, despite the fact that it matched the =? at the
+ // beginning. Fall through and treat the text as if we aren't trying to
+ // decode RFC 2047.
+ }
+
+ // If we reach this point, we're not inside of quoted strings, domain
+ // literals, or RFC 2047 encoded-words. This means that the characters we
+ // parse are potential delimiters (unless we're in comments, where
+ // everything starts to go really wonky). Several things could happen,
+ // depending on the kind of character we read and whether or not we were in
+ // the middle of a token. The three values here tell us what we could need
+ // to do at this point:
+ // tokenIsEnding: The current character is not able to be accumulated to an
+ // atom, so we need to flush the atom if there is one.
+ // tokenIsStarting: The current character could begin an atom (or
+ // anything that requires us to mark the starting point), so we need to save
+ // the location.
+ // isSpecial: The current character is a delimiter that needs to be output.
+ let tokenIsEnding = false,
+ tokenIsStarting = false,
+ isSpecial = false;
+ if (wsp.includes(ch)) {
+ // Whitespace ends current tokens, doesn't emit anything.
+ tokenIsEnding = true;
+ } else if (commentDepth == 0 && delimiters.includes(ch)) {
+ // Delimiters end the current token, and need to be output. They do not
+ // apply within comments.
+ tokenIsEnding = true;
+ isSpecial = true;
+ } else if (opts.qstring && ch == '"') {
+ // Quoted strings end the last token and start a new one.
+ tokenIsEnding = true;
+ tokenIsStarting = true;
+ endQuote = ch;
+ } else if (opts.dliteral && ch == "[") {
+ // Domain literals end the last token and start a new one.
+ tokenIsEnding = true;
+ tokenIsStarting = true;
+ endQuote = "]";
+ } else if (opts.comments && ch == "(") {
+ // Comments are nested (oh joy). We only really care for the outer
+ // delimiter, though, which also ends the prior token and needs to be
+ // output if the consumer requests it.
+ commentDepth++;
+ if (commentDepth == 1) {
+ tokenIsEnding = true;
+ isSpecial = true;
+ } else {
+ tokenIsStarting = true;
+ }
+ } else if (opts.comments && ch == ")") {
+ // Comments are nested (oh joy). We only really care for the outer
+ // delimiter, though, which also ends the prior token and needs to be
+ // output if the consumer requests it.
+ if (commentDepth > 0) {
+ commentDepth--;
+ }
+ if (commentDepth == 0) {
+ tokenIsEnding = true;
+ isSpecial = true;
+ } else {
+ tokenIsStarting = true;
+ }
+ } else {
+ // Not a delimiter, whitespace, comment, domain literal, or quoted string.
+ // Must be part of an atom then!
+ tokenIsStarting = true;
+ }
+
+ // If our analysis concluded that we closed an open token, and there is an
+ // open token, then yield that token.
+ if (tokenIsEnding && tokenStart !== undefined) {
+ tokenList.push(new Token(value.slice(tokenStart, i)));
+ tokenStart = undefined;
+ }
+ // If we need to output a delimiter, do so.
+ if (isSpecial) {
+ tokenList.push(ch);
+ }
+ // If our analysis concluded that we could open a token, and no token is
+ // opened yet, then start the token.
+ if (tokenIsStarting && tokenStart === undefined) {
+ tokenStart = i;
+ }
+ }
+
+ // That concludes the loop! If there is a currently open token, close that
+ // token now.
+ if (tokenStart !== undefined) {
+ // Error case: a partially-open quoted string is assumed to have a trailing
+ // " character.
+ if (endQuote == '"') {
+ tokenList.push(new Token(value.slice(tokenStart + 1)));
+ } else {
+ tokenList.push(new Token(value.slice(tokenStart)));
+ }
+ }
+ return tokenList;
+ }
+ /* eslint-enable complexity */
+
+ /**
+ * Convert a header value into UTF-16 strings by attempting to decode as UTF-8
+ * or another legacy charset. If the header is valid UTF-8, it will be decoded
+ * as UTF-8; if it is not, the fallbackCharset will be attempted instead.
+ *
+ * @param {string} headerValue - The header (as a binary string) to attempt
+ * to convert to UTF-16.
+ * @param {string} [fallbackCharset] The optional charset to try if UTF-8
+ * doesn't work.
+ * @returns {string} The UTF-16 representation of the string above.
+ */
+ function convert8BitHeader(headerValue, fallbackCharset) {
+ // Only attempt to convert the headerValue if it contains non-ASCII
+ // characters.
+ if (/[\x80-\xff]/.exec(headerValue)) {
+ // First convert the value to a typed-array for MimeTextDecoder.
+ let typedarray = mimeutils.stringToTypedArray(headerValue);
+
+ // Don't try UTF-8 as fallback (redundant), and don't try UTF-16 or UTF-32
+ // either, since they radically change header interpretation.
+ // If we have a fallback charset, we want to know if decoding will fail;
+ // otherwise, we want to replace with substitution chars.
+ let hasFallback =
+ fallbackCharset && !fallbackCharset.toLowerCase().startsWith("utf");
+ let utf8Decoder = new MimeTextDecoder("utf-8", { fatal: hasFallback });
+ try {
+ headerValue = utf8Decoder.decode(typedarray);
+ } catch (e) {
+ // Failed, try the fallback
+ try {
+ let decoder = new MimeTextDecoder(fallbackCharset, {
+ fatal: false,
+ });
+ headerValue = decoder.decode(typedarray);
+ } catch (ex) {}
+ }
+ }
+ return cleanToken(headerValue);
+ }
+
+ /**
+ * Decodes all RFC 2047 encoded-words in the input string. The string does not
+ * necessarily have to contain any such words. This is useful, for example, for
+ * parsing unstructured headers.
+ *
+ * @param {string} headerValue The header which may contain RFC 2047 encoded-
+ * words.
+ * @returns {string} A full UTF-16 string with all encoded words expanded.
+ */
+ function decodeRFC2047Words(headerValue) {
+ // Unfortunately, many implementations of RFC 2047 encoding are actually wrong
+ // in that they split over-long encoded words without regard for whether or
+ // not the split point is in the middle of a multibyte character. Therefore,
+ // we need to be able to handle these situations gracefully. This is done by
+ // using the decoder in streaming mode so long as the next token is another
+ // 2047 token with the same charset.
+ let lastCharset = "",
+ currentDecoder = undefined;
+
+ /**
+ * Decode a single RFC 2047 token. This function is inline so that we can
+ * easily close over the lastCharset/currentDecoder variables, needed for
+ * handling bad RFC 2047 productions properly.
+ * E.g. =?iso-8859-1?q?this=20is=20some=20text?=
+ */
+ function decode2047Token(token, isLastToken) {
+ let tokenParts = token.split("?");
+
+ // If it's obviously not a valid token, return false immediately.
+ if (tokenParts.length != 5 || tokenParts[4] != "=") {
+ return false;
+ }
+
+ // The charset parameter is defined in RFC 2231 to be charset or
+ // charset*language. We only care about the charset here, so ignore any
+ // language parameter that gets passed in.
+ let charset = tokenParts[1].split("*", 1)[0];
+ let encoding = tokenParts[2],
+ text = tokenParts[3];
+
+ let buffer;
+ if (encoding == "B" || encoding == "b") {
+ // Decode base64. If there's any non-base64 data, treat the string as
+ // an illegal token.
+ if (/[^ A-Za-z0-9+\/=]/.exec(text)) {
+ return false;
+ }
+
+ // Decode the string
+ buffer = mimeutils.decode_base64(text, false)[0];
+ } else if (encoding == "Q" || encoding == "q") {
+ // Q encoding here looks a lot like quoted-printable text. The differences
+ // between quoted-printable and this are that quoted-printable allows you
+ // to quote newlines (this doesn't), while this replaces spaces with _.
+ // We can reuse the decode_qp code here, since newlines are already
+ // stripped from the header. There is one edge case that could trigger a
+ // false positive, namely when you have a single = or an = followed by
+ // whitespace at the end of the string. Such an input string is already
+ // malformed to begin with, so stripping the = and following input in that
+ // case should not be an important loss.
+ buffer = mimeutils.decode_qp(text.replace(/_/g, " "), false)[0];
+ } else {
+ return false;
+ }
+
+ // Make the buffer be a typed array for what follows
+ let stringBuffer = buffer;
+ buffer = mimeutils.stringToTypedArray(buffer);
+
+ // If we cannot reuse the last decoder, flush out whatever remains.
+ var output = "";
+ if (charset != lastCharset && currentDecoder) {
+ output += currentDecoder.decode();
+ currentDecoder = null;
+ }
+
+ // Initialize the decoder for this token.
+ lastCharset = charset;
+ if (!currentDecoder) {
+ try {
+ currentDecoder = new MimeTextDecoder(charset, { fatal: false });
+ } catch (e) {
+ // We don't recognize the charset, so give up.
+ return false;
+ }
+ }
+
+ // Convert this token with the buffer. Note the stream parameter--although
+ // RFC 2047 tokens aren't supposed to break in the middle of a multibyte
+ // character, a lot of software messes up and does so because it's hard not
+ // to (see headeremitter.js for exactly how hard!).
+ // We must not stream ISO-2022-JP if the buffer switches back to
+ // the ASCII state, that is, ends in "ESC(B".
+ // Also, we shouldn't do streaming on the last token.
+ let doStreaming;
+ if (
+ isLastToken ||
+ (charset.toUpperCase() == "ISO-2022-JP" &&
+ stringBuffer.endsWith("\x1B(B"))
+ ) {
+ doStreaming = { stream: false };
+ } else {
+ doStreaming = { stream: true };
+ }
+ return output + currentDecoder.decode(buffer, doStreaming);
+ }
+
+ // The first step of decoding is to split the string into RFC 2047 and
+ // non-RFC 2047 tokens. RFC 2047 tokens look like the following:
+ // =?charset?c?text?=, where c is one of B, b, Q, and q. The split regex does
+ // some amount of semantic checking, so that malformed RFC 2047 tokens will
+ // get ignored earlier.
+ let components = headerValue.split(/(=\?[^?]*\?[BQbq]\?[^?]*\?=)/);
+
+ // Find last RFC 2047 token.
+ let lastRFC2047Index = -1;
+ for (let i = 0; i < components.length; i++) {
+ if (components[i].substring(0, 2) == "=?") {
+ lastRFC2047Index = i;
+ }
+ }
+ for (let i = 0; i < components.length; i++) {
+ if (components[i].substring(0, 2) == "=?") {
+ let decoded = decode2047Token(components[i], i == lastRFC2047Index);
+ if (decoded !== false) {
+ // If 2047 decoding succeeded for this bit, rewrite the original value
+ // with the proper decoding.
+ components[i] = decoded;
+
+ // We're done processing, so continue to the next link.
+ continue;
+ }
+ } else if (/^[ \t\r\n]*$/.exec(components[i])) {
+ // Whitespace-only tokens get squashed into nothing, so 2047 tokens will
+ // be concatenated together.
+ components[i] = "";
+ continue;
+ }
+
+ // If there was stuff left over from decoding the last 2047 token, flush it
+ // out.
+ lastCharset = "";
+ if (currentDecoder) {
+ components[i] = currentDecoder.decode() + components[i];
+ currentDecoder = null;
+ }
+ }
+
+ // After the for loop, we'll have a set of decoded strings. Concatenate them
+ // together to make the return value.
+ return cleanToken(components.join(""));
+ }
+
+ // Structured field decoders
+ // -------------------------
+
+ /**
+ * Extract a list of addresses from a header which matches the RFC 5322
+ * address-list production, possibly doing RFC 2047 decoding along the way.
+ *
+ * The output of this method is an array of elements corresponding to the
+ * addresses and the groups in the input header. An address is represented by
+ * an object of the form:
+ * {
+ * name: The display name of the address
+ * email: The address of the object
+ * }
+ * while a group is represented by an object of the form:
+ * {
+ * name: The display name of the group
+ * group: An array of address object for members in the group.
+ * }
+ *
+ * @param {string} header - The MIME header text to be parsed
+ * @param {boolean} doRFC2047 If true, decode RFC 2047 parameters found in the
+ * header.
+ * @returns {(Address|Group)[]} An array of the addresses found in the header,
+ * where each element is of the form mentioned
+ * above.
+ */
+ function parseAddressingHeader(header, doRFC2047) {
+ // Default to true
+ if (doRFC2047 === undefined) {
+ doRFC2047 = true;
+ }
+
+ // The final (top-level) results list to append to.
+ let results = [];
+ // Temporary results
+ let addrlist = [];
+
+ // Build up all of the values
+ let name = "",
+ groupName = "",
+ localPart = "",
+ address = "",
+ comment = "";
+ // Indicators of current state
+ let inAngle = false,
+ inComment = false,
+ needsSpace = false,
+ afterAddress = false;
+ let preserveSpace = false;
+ let commentClosed = false;
+
+ // RFC 5322 §3.4 notes that legacy implementations exist which use a simple
+ // recipient form where the addr-spec appears without the angle brackets,
+ // but includes the name of the recipient in parentheses as a comment
+ // following the addr-spec. While we do not create this format, we still
+ // want to recognize it, though.
+ // Furthermore, despite allowing comments in addresses, RFC 5322 §3.4 notes
+ // that legacy implementations may interpret the comment, and thus it
+ // recommends not to use them. (Also, they may be illegal as per RFC 5321.)
+ // While we do not create address fields with comments, we recognize such
+ // comments during parsing and (a) either drop them if inside addr-spec or
+ // (b) preserve them as part of the display-name if not.
+ // If the display-name is empty while the last comment is not, we assume it's
+ // the legacy form above and take the comment content as the display-name.
+ //
+ // When parsing the address field, we at first do not know whether any
+ // strings belong to the display-name (which may include comments) or to the
+ // local-part of an addr-spec (where we ignore comments) until we find an
+ // '@' or an '<' token. Thus, we collect both variants until the fog lifts,
+ // plus the last comment seen.
+ let lastComment = "";
+
+ /**
+ * Add the parsed mailbox object to the address list.
+ * If it's in the legacy form above, correct the display-name.
+ * Also reset any faked flags.
+ *
+ * @param {string} displayName - display-name as per RFC 5322
+ * @param {string} addrSpec - addr-spec as per RFC 5322
+ */
+ function addToAddrList(displayName, addrSpec) {
+ // Keep the local-part quoted if it needs to be.
+ let lp = addrSpec.substring(0, addrSpec.lastIndexOf("@"));
+ if (/[ !()<>\[\]:;@\\,"]/.exec(lp) !== null) {
+ addrSpec =
+ '"' +
+ lp.replace(/([\\"])/g, "\\$1") +
+ '"' +
+ addrSpec.substring(addrSpec.lastIndexOf("@"));
+ }
+
+ // Replace all whitespace characters with a single whitespace,
+ // to avoid consecutive whitespace and also to normalize tabs and newlines.
+ displayName = displayName.replace(/\s+/g, " ").trim();
+
+ if (displayName === "" && lastComment !== "") {
+ // Take last comment content as the display-name.
+ let offset = lastComment[0] === " " ? 2 : 1;
+ displayName = lastComment.substr(
+ offset,
+ lastComment.length - offset - 1
+ );
+ }
+ if (displayName !== "" || addrSpec !== "") {
+ addrlist.push({ name: displayName, email: addrSpec });
+ }
+ // Clear pending flags and variables.
+ name = localPart = address = lastComment = "";
+ inAngle = inComment = needsSpace = afterAddress = false;
+ }
+
+ // Main parsing loop
+ for (let token of getHeaderTokens(header, ":,;<>@", {
+ qstring: true,
+ comments: true,
+ dliteral: true,
+ rfc2047: doRFC2047,
+ })) {
+ if (token === ":") {
+ groupName = name;
+ name = "";
+ localPart = "";
+ // If we had prior email address results, commit them to the top-level.
+ if (addrlist.length > 0) {
+ results = results.concat(addrlist);
+ }
+ addrlist = [];
+ } else if (token === "<" && !afterAddress) {
+ if (inAngle) {
+ // Interpret the address we were parsing as a name.
+ if (address.length > 0) {
+ name = address;
+ }
+ localPart = address = "";
+ } else {
+ inAngle = true;
+ }
+ } else if (token === ">" && !afterAddress) {
+ inAngle = false;
+ // Forget addr-spec comments.
+ lastComment = "";
+ afterAddress = true;
+ } else if (token === "(") {
+ inComment = true;
+ // The needsSpace flag may not always be set even if it should be,
+ // e.g. for a comment behind an angle-addr.
+ // Also, we need to restore the needsSpace flag if we ignore the comment.
+ preserveSpace = needsSpace;
+ if (!needsSpace) {
+ needsSpace = name !== "" && name.substr(-1) !== " ";
+ }
+ comment = needsSpace ? " (" : "(";
+ commentClosed = false;
+ } else if (token === ")") {
+ inComment = false;
+ comment += ")";
+ lastComment = comment;
+ // The comment may be part of the name, but not of the local-part.
+ // Enforce a space behind the comment only when not ignoring it.
+ if (inAngle) {
+ needsSpace = preserveSpace;
+ } else {
+ name += comment;
+ needsSpace = true;
+ }
+ commentClosed = true;
+ continue;
+ } else if (token === "@") {
+ if (afterAddress) {
+ continue;
+ }
+ // An @ means we see an email address. If we're not within <> brackets,
+ // then we just parsed an email address instead of a display name. Empty
+ // out the display name for the current production.
+ if (!inAngle) {
+ address = localPart;
+ name = "";
+ localPart = "";
+ // The remainder of this mailbox is part of an addr-spec.
+ inAngle = true;
+ }
+ address += "@";
+ } else if (token === ",") {
+ // A comma ends the current name. If we have something that's kind of a
+ // name, add it to the result list. If we don't, then our input looks like
+ // To: , , -> don't bother adding an empty entry.
+ addToAddrList(name, address);
+ afterAddress = false;
+ } else if (token === ";") {
+ // Add pending name to the list
+ addToAddrList(name, address);
+
+ // If no group name was found, treat the ';' as a ','. In any case, we
+ // need to copy the results of addrlist into either a new group object or
+ // the main list.
+ if (groupName === "") {
+ results = results.concat(addrlist);
+ } else {
+ results.push({
+ name: groupName,
+ group: addrlist,
+ });
+ }
+ // ... and reset every other variable.
+ addrlist = [];
+ groupName = "";
+ } else {
+ // This is either comment content, a quoted-string, or some span of
+ // dots and atoms.
+ token = cleanToken(token.toString());
+
+ // Ignore the needs space if we're a "close" delimiter token.
+ let spacedToken = token;
+ if (needsSpace && token && token[0] != ".") {
+ spacedToken = " " + spacedToken;
+ }
+
+ // Which field do we add this data to?
+ if (inComment) {
+ comment += spacedToken;
+ } else if (inAngle) {
+ address += spacedToken;
+ } else {
+ if (!afterAddress) {
+ name += spacedToken;
+ }
+ // Never add a space to the local-part, if we just ignored a comment.
+ if (commentClosed) {
+ localPart += token;
+ commentClosed = false;
+ } else {
+ localPart += spacedToken;
+ }
+ }
+
+ // We need space for the next token if we aren't some kind of comment or
+ // . delimiter.
+ needsSpace = token && token[0] != ".";
+ // The fall-through case after this resets needsSpace to false, and we
+ // don't want that!
+ continue;
+ }
+
+ // If we just parsed a delimiter, we don't need any space for the next
+ // token.
+ needsSpace = false;
+ }
+
+ // If we're missing the final ';' of a group, assume it was present. Also, add
+ // in the details of any email/address that we previously saw.
+ addToAddrList(name, address);
+ if (groupName !== "") {
+ results.push({ name: groupName, group: addrlist });
+ addrlist = [];
+ }
+
+ // Add the current address list build-up to the list of addresses, and return
+ // the whole array to the caller.
+ return results.concat(addrlist);
+ }
+
+ /**
+ * Extract parameters from a header which is a series of ;-separated
+ * attribute=value tokens.
+ *
+ * @param {string} headerValue The MIME header value to parse.
+ * @param {boolean} doRFC2047 - If true, decode RFC 2047 encoded-words.
+ * @param {boolean} doRFC2231 - If true, decode RFC 2231 encoded parameters.
+ * @returns {Map(String -> String)} A map of parameter names to parameter values.
+ * The property preSemi is set to the token that
+ * precedes the first semicolon.
+ */
+ /* eslint-disable complexity */
+ function parseParameterHeader(headerValue, doRFC2047, doRFC2231) {
+ // The basic syntax of headerValue is token [; token = token-or-qstring]*
+ // Copying more or less liberally from nsMIMEHeaderParamImpl:
+ // The first token is the text to the first whitespace or semicolon.
+ var semi = headerValue.indexOf(";");
+ let start, rest;
+ if (semi < 0) {
+ start = headerValue;
+ rest = "";
+ } else {
+ start = headerValue.substring(0, semi);
+ rest = headerValue.substring(semi); // Include the semicolon
+ }
+ // Strip start to be <WSP><nowsp><WSP>.
+ start = start.trim().split(/[ \t\r\n]/)[0];
+
+ // Decode the the parameter tokens.
+ let opts = { qstring: true, rfc2047: doRFC2047 };
+ // Name is the name of the parameter, inName is true iff we don't have a name
+ // yet.
+ let name = "",
+ inName = true;
+ // Matches is a list of [name, value] pairs, where we found something that
+ // looks like name=value in the input string.
+ let matches = [];
+ for (let token of getHeaderTokens(rest, ";=", opts)) {
+ if (token === ";") {
+ // If we didn't find a name yet (we have ... tokenA; tokenB), push the
+ // name with an empty token instead.
+ if (name != "" && !inName) {
+ matches.push([name, ""]);
+ }
+ name = "";
+ inName = true;
+ } else if (token === "=") {
+ inName = false;
+ } else if (inName && name == "") {
+ name = token.toString();
+ } else if (!inName && name != "") {
+ token = token.toString();
+ // RFC 2231 doesn't make it clear if %-encoding is supposed to happen
+ // within a quoted string, but this is very much required in practice. If
+ // it ends with a '*', then the string is an extended-value, which means
+ // that its value may be %-encoded.
+ if (doRFC2231 && name.endsWith("*")) {
+ token = token.replace(
+ /%([0-9A-Fa-f]{2})/g,
+ function (match, hexchars) {
+ return String.fromCharCode(parseInt(hexchars, 16));
+ }
+ );
+ }
+ matches.push([name, token]);
+ // Clear the name, so we ignore anything afterwards.
+ name = "";
+ } else if (inName) {
+ // We have ...; tokenA tokenB ... -> ignore both tokens
+ name = ""; // Error recovery, ignore this one
+ }
+ }
+ // If we have a leftover ...; tokenA, push the tokenA
+ if (name != "" && !inName) {
+ matches.push([name, ""]);
+ }
+
+ // Now matches holds the parameters, so clean up for RFC 2231. There are three
+ // cases: param=val, param*=us-ascii'en-US'blah, and param*n= variants. The
+ // order of preference is to pick the middle, then the last, then the first.
+ // Note that we already unpacked %-encoded values.
+
+ // simpleValues is just a straight parameter -> value map.
+ // charsetValues is the parameter -> value map, although values are stored
+ // before charset decoding happens.
+ // continuationValues maps parameter -> array of values, with extra properties
+ // valid (if we decided we couldn't do anything anymore) and hasCharset (which
+ // records if we need to decode the charset parameter or not).
+ var simpleValues = new Map(),
+ charsetValues = new Map(),
+ continuationValues = new Map();
+ for (let pair of matches) {
+ let name = pair[0];
+ let value = pair[1];
+ // Get first index, not last index, so we match param*0*= like param*0=.
+ let star = name.indexOf("*");
+ if (star == -1) {
+ // This is the case of param=val. Select the first value here, if there
+ // are multiple ones.
+ if (!simpleValues.has(name)) {
+ simpleValues.set(name, value);
+ }
+ } else if (star == name.length - 1) {
+ // This is the case of param*=us-ascii'en-US'blah.
+ name = name.substring(0, star);
+ // Again, select only the first value here.
+ if (!charsetValues.has(name)) {
+ charsetValues.set(name, value);
+ }
+ } else {
+ // This is the case of param*0= or param*0*=.
+ let param = name.substring(0, star);
+ let entry = continuationValues.get(param);
+ // Did we previously find this one to be bungled? Then ignore it.
+ if (continuationValues.has(param) && !entry.valid) {
+ continue;
+ }
+
+ // If we haven't seen it yet, set up entry already. Note that entries are
+ // not straight string values but rather [valid, hasCharset, param0, ... ]
+ if (!continuationValues.has(param)) {
+ entry = [];
+ entry.valid = true;
+ entry.hasCharset = undefined;
+ continuationValues.set(param, entry);
+ }
+
+ // When the string ends in *, we need to charset decoding.
+ // Note that the star is only meaningful for the *0*= case.
+ let lastStar = name[name.length - 1] == "*";
+ let number = name.substring(
+ star + 1,
+ name.length - (lastStar ? 1 : 0)
+ );
+ if (number == "0") {
+ entry.hasCharset = lastStar;
+ } else if (
+ number.length == 0 ||
+ (number[0] == "0" && number != "0") ||
+ !/^[0-9]+$/.test(number)
+ ) {
+ // Is the continuation number illegal?
+ entry.valid = false;
+ continue;
+ }
+ // Normalize to an integer
+ number = parseInt(number, 10);
+
+ // Is this a repeat? If so, bail.
+ if (entry[number] !== undefined) {
+ entry.valid = false;
+ continue;
+ }
+
+ // Set the value for this continuation index. JS's magic array setter will
+ // expand the array if necessary.
+ entry[number] = value;
+ }
+ }
+
+ // Build the actual parameter array from the parsed values
+ var values = new Map();
+ // Simple values have lowest priority, so just add everything into the result
+ // now.
+ for (let pair of simpleValues) {
+ values.set(pair[0], pair[1]);
+ }
+
+ if (doRFC2231) {
+ // Continuation values come next
+ for (let pair of continuationValues) {
+ let name = pair[0];
+ let entry = pair[1];
+ // If we never saw a param*0= or param*0*= value, then we can't do any
+ // reasoning about what it looks like, so bail out now.
+ if (entry.hasCharset === undefined) {
+ continue;
+ }
+
+ // Use as many entries in the array as are valid--if we are missing an
+ // entry, stop there.
+ let valid = true;
+ for (var i = 0; valid && i < entry.length; i++) {
+ if (entry[i] === undefined) {
+ valid = false;
+ }
+ }
+
+ // Concatenate as many parameters as are valid. If we need to decode thec
+ // charset, do so now.
+ let value = entry.slice(0, i).join("");
+ if (entry.hasCharset) {
+ try {
+ value = decode2231Value(value);
+ } catch (e) {
+ // Bad charset, don't add anything.
+ continue;
+ }
+ }
+ // Finally, add this to the output array.
+ values.set(name, value);
+ }
+
+ // Highest priority is the charset conversion.
+ for (let pair of charsetValues) {
+ try {
+ values.set(pair[0], decode2231Value(pair[1]));
+ } catch (e) {
+ // Bad charset, don't add anything.
+ }
+ }
+ }
+
+ for (let [key, value] of values.entries()) {
+ values.set(key, cleanToken(value));
+ }
+
+ // Finally, return the values computed above.
+ values.preSemi = start;
+ return values;
+ }
+ /* eslint-enable complexity */
+
+ /**
+ * Convert a RFC 2231-encoded string parameter into a Unicode version of the
+ * string. This assumes that percent-decoding has already been applied.
+ *
+ * @param {string} value The RFC 2231-encoded string to decode.
+ * @returns The Unicode version of the string.
+ */
+ function decode2231Value(value) {
+ let quote1 = value.indexOf("'");
+ let quote2 = quote1 >= 0 ? value.indexOf("'", quote1 + 1) : -1;
+
+ let charset = quote1 >= 0 ? value.substring(0, quote1) : "";
+ // It turns out that the language isn't useful anywhere in our codebase for
+ // the present time, so we will safely ignore it.
+ // var language = (quote2 >= 0 ? value.substring(quote1 + 2, quote2) : "");
+ value = value.substring(Math.max(quote1, quote2) + 1);
+
+ // Convert the value into a typed array for decoding
+ let typedarray = mimeutils.stringToTypedArray(value);
+
+ // Decode the charset. If the charset isn't found, we throw an error. Try to
+ // fallback in that case.
+ return new MimeTextDecoder(charset, { fatal: true }).decode(typedarray, {
+ stream: false,
+ });
+ }
+
+ // This is a map of known timezone abbreviations, for fallback in obsolete Date
+ // productions.
+ var kKnownTZs = {
+ // The following timezones are explicitly listed in RFC 5322.
+ UT: "+0000",
+ GMT: "+0000",
+ EST: "-0500",
+ EDT: "-0400",
+ CST: "-0600",
+ CDT: "-0500",
+ MST: "-0700",
+ MDT: "-0600",
+ PST: "-0800",
+ PDT: "-0700",
+ // The following are time zones copied from NSPR's prtime.c
+ AST: "-0400", // Atlantic Standard Time
+ NST: "-0330", // Newfoundland Standard Time
+ BST: "+0100", // British Summer Time
+ MET: "+0100", // Middle Europe Time
+ EET: "+0200", // Eastern Europe Time
+ JST: "+0900", // Japan Standard Time
+ };
+
+ /**
+ * Parse a header that contains a date-time definition according to RFC 5322.
+ * The result is a JS date object with the same timestamp as the header.
+ *
+ * The dates returned by this parser cannot be reliably converted back into the
+ * original header for two reasons. First, JS date objects cannot retain the
+ * timezone information they were initialized with, so reserializing a date
+ * header would necessarily produce a date in either the current timezone or in
+ * UTC. Second, JS dates measure time as seconds elapsed from the POSIX epoch
+ * excluding leap seconds. Any timestamp containing a leap second is instead
+ * converted into one that represents the next second.
+ *
+ * Dates that do not match the RFC 5322 production are instead attempted to
+ * parse using the Date.parse function. The strings that are accepted by
+ * Date.parse are not fully defined by the standard, but most implementations
+ * should accept strings that look rather close to RFC 5322 strings. Truly
+ * invalid dates produce a formulation that results in an invalid date,
+ * detectable by having its .getTime() method return NaN.
+ *
+ * @param {string} header The MIME header value to parse.
+ * @returns {Date} The date contained within the header, as described
+ * above.
+ */
+ function parseDateHeader(header) {
+ let tokens = getHeaderTokens(header, ",:", {}).map(x => x.toString());
+ // What does a Date header look like? In practice, most date headers devolve
+ // into Date: [dow ,] dom mon year hh:mm:ss tzoff [(abbrev)], with the day of
+ // week mostly present and the timezone abbreviation mostly absent.
+
+ // First, ignore the day-of-the-week if present. This would be the first two
+ // tokens.
+ if (tokens.length > 1 && tokens[1] === ",") {
+ tokens = tokens.slice(2);
+ }
+
+ // If there are too few tokens, the date is obviously invalid.
+ if (tokens.length < 8) {
+ return new Date(NaN);
+ }
+
+ // Save off the numeric tokens
+ let day = parseInt(tokens[0]);
+ // month is tokens[1]
+ let year = parseInt(tokens[2]);
+ let hours = parseInt(tokens[3]);
+ // tokens[4] === ':'
+ let minutes = parseInt(tokens[5]);
+ // tokens[6] === ':'
+ let seconds = parseInt(tokens[7]);
+
+ // Compute the month. Check only the first three digits for equality; this
+ // allows us to accept, e.g., "January" in lieu of "Jan."
+ let month = mimeutils.kMonthNames.indexOf(tokens[1].slice(0, 3));
+ // If the month name is not recognized, make the result illegal.
+ if (month < 0) {
+ month = NaN;
+ }
+
+ // Compute the full year if it's only 2 digits. RFC 5322 states that the
+ // cutoff is 50 instead of 70.
+ if (year < 100) {
+ year += year < 50 ? 2000 : 1900;
+ }
+
+ // Compute the timezone offset. If it's not in the form ±hhmm, convert it to
+ // that form.
+ let tzoffset = tokens[8];
+ if (tzoffset in kKnownTZs) {
+ tzoffset = kKnownTZs[tzoffset];
+ }
+ let decompose = /^([+-])(\d\d)(\d\d)$/.exec(tzoffset);
+ // Unknown? Make it +0000
+ if (decompose === null) {
+ decompose = ["+0000", "+", "00", "00"];
+ }
+ let tzOffsetInMin = parseInt(decompose[2]) * 60 + parseInt(decompose[3]);
+ if (decompose[1] == "-") {
+ tzOffsetInMin = -tzOffsetInMin;
+ }
+
+ // How do we make the date at this point? Well, the JS date's constructor
+ // builds the time in terms of the local timezone. To account for the offset
+ // properly, we need to build in UTC.
+ let finalDate = new Date(
+ Date.UTC(year, month, day, hours, minutes, seconds) -
+ tzOffsetInMin * 60 * 1000
+ );
+
+ // Suppose our header was mangled and we couldn't read it--some of the fields
+ // became undefined. In that case, the date would become invalid, and the
+ // indication that it is so is that the underlying number is a NaN. In that
+ // scenario, we could build attempt to use JS Date parsing as a last-ditch
+ // attempt. But it's not clear that such messages really exist in practice,
+ // and the valid formats for Date in ES6 are unspecified.
+ return finalDate;
+ }
+
+ // Structured header decoding support
+ // ----------------------------------
+
+ // Load the default structured decoders
+ var structuredDecoders = new Map();
+ var structuredHeaders = require("./structuredHeaders");
+ var preferredSpellings = structuredHeaders.spellings;
+ var forbiddenHeaders = new Set();
+ for (let pair of structuredHeaders.decoders) {
+ addStructuredDecoder(pair[0], pair[1]);
+ forbiddenHeaders.add(pair[0].toLowerCase());
+ }
+
+ /**
+ * Use an already-registered structured decoder to parse the value of the header
+ * into a structured representation.
+ *
+ * As this method is designed to be used for the internal MIME Parser to convert
+ * the raw header values to well-structured values, value is intended to be an
+ * array consisting of all occurrences of the header in order. However, for ease
+ * of use by other callers, it can also be treated as a string.
+ *
+ * If the decoder for the header is not found, an exception will be thrown.
+ *
+ * A large set of headers have pre-defined structured decoders; these decoders
+ * cannot be overridden with addStructuredDecoder, as doing so could prevent the
+ * MIME or message parsers from working properly. The pre-defined structured
+ * headers break down into five clases of results, plus some ad-hoc
+ * representations. They are:
+ *
+ * Addressing headers (results are the same as parseAddressingHeader):
+ * - Approved
+ * - Bcc
+ * - Cc
+ * - Delivered-To
+ * - Disposition-Notification-To
+ * - From
+ * - Mail-Reply-To
+ * - Mail-Followup-To
+ * - Reply-To
+ * - Resent-Bcc
+ * - Resent-Cc
+ * - Resent-From
+ * - Resent-Reply-To
+ * - Resent-Sender
+ * - Resent-To
+ * - Return-Receipt-To
+ * - Sender
+ * - To
+ *
+ * Date headers (results are the same as parseDateHeader):
+ * - Date
+ * - Expires
+ * - Injection-Date
+ * - NNTP-Posting-Date
+ * - Resent-Date
+ *
+ * References headers (results are the same as parseReferencesHeader):
+ * - (TODO: Parsing support for these headers is currently unsupported)
+ *
+ * Message-ID headers (results are the first entry of the result of
+ * parseReferencesHeader):
+ * - (TODO: Parsing support for these headers is currently unsupported)
+ *
+ * Unstructured headers (results are merely decoded according to RFC 2047):
+ * - Comments
+ * - Content-Description
+ * - Keywords
+ * - Subject
+ *
+ * The ad-hoc headers and their resulting formats are as follows:
+ * Content-Type: returns a JS Map of parameter names (in lower case) to their
+ * values, along with the following extra properties defined on the map:
+ * - mediatype: the type to the left of '/' (e.g., 'text', 'message')
+ * - subtype: the type to the right of '/' (e.g., 'plain', 'rfc822')
+ * - type: the full typename (e.g., 'text/plain')
+ * RFC 2047 and RFC 2231 decoding is applied where appropriate. The values of
+ * the type, mediatype, and subtype attributes are all normalized to lower-case,
+ * as are the names of all parameters.
+ *
+ * Content-Transfer-Encoding: the first value is converted to lower-case.
+ *
+ * @param {string} header The name of the header of the values.
+ * @param {string | Array} value The value(s) of the headers, after charset
+ * conversion (if any) has been applied. If it is
+ * an array, the headers are listed in the order
+ * they appear in the message.
+ * @returns {object} A structured representation of the header values.
+ */
+ function parseStructuredHeader(header, value) {
+ // Enforce that the parameter is an array. If it's a string, make it a
+ // 1-element array.
+ if (typeof value === "string" || value instanceof String) {
+ value = [value];
+ }
+ if (!Array.isArray(value)) {
+ throw new TypeError("Header value is not an array: " + value);
+ }
+
+ // Lookup the header in our decoders; if present, use that to decode the
+ // header.
+ let lowerHeader = header.toLowerCase();
+ if (structuredDecoders.has(lowerHeader)) {
+ return structuredDecoders.get(lowerHeader).call(headerparser, value);
+ }
+
+ // If not present, throw an exception.
+ throw new Error("Unknown structured header: " + header);
+ }
+
+ /**
+ * Add a custom structured MIME decoder to the set of known decoders. These
+ * decoders are used for {@link parseStructuredHeader} and similar functions to
+ * encode richer, more structured values instead of relying on string
+ * representations everywhere.
+ *
+ * Structured decoders are functions which take in a single parameter consisting
+ * of an array of the string values of the header, in order that they appear in
+ * the message. These headers have had the charset conversion (if necessary)
+ * applied to them already. The this parameter of the function is set to be the
+ * jsmime.headerparser module.
+ *
+ * There is a large set of structured decoders built-in to the jsmime library
+ * already. As these headers are fundamental to the workings of jsmime,
+ * attempting to replace them with a custom version will instead produce an
+ * exception.
+ *
+ * @param {string} header The header name (in any case)
+ * for which the decoder will be
+ * used.
+ * @param {Function(String[] -> Object)} decoder The structured decoder
+ * function.
+ */
+ function addStructuredDecoder(header, decoder) {
+ let lowerHeader = header.toLowerCase();
+ if (forbiddenHeaders.has(lowerHeader)) {
+ throw new Error("Cannot override header: " + header);
+ }
+ structuredDecoders.set(lowerHeader, decoder);
+ if (!preferredSpellings.has(lowerHeader)) {
+ preferredSpellings.set(lowerHeader, header);
+ }
+ }
+
+ headerparser.addStructuredDecoder = addStructuredDecoder;
+ headerparser.convert8BitHeader = convert8BitHeader;
+ headerparser.decodeRFC2047Words = decodeRFC2047Words;
+ headerparser.getHeaderTokens = getHeaderTokens;
+ headerparser.parseAddressingHeader = parseAddressingHeader;
+ headerparser.parseDateHeader = parseDateHeader;
+ headerparser.parseParameterHeader = parseParameterHeader;
+ headerparser.parseStructuredHeader = parseStructuredHeader;
+ return Object.freeze(headerparser);
+ });
+
+ // JavaScript Raw MIME Parser
+ // --------------------------
+
+ /**
+ * The parser implemented in this file produces a MIME part tree for a given
+ * input message via a streaming callback interface. It does not, by itself,
+ * understand concepts like attachments (hence the term 'Raw'); the consumer
+ * must translate output into such a format.
+ *
+ * Charsets:
+ * The MIME specifications permit a single message to contain multiple charsets
+ * (or perhaps none) as raw octets. As JavaScript strings are implicitly
+ * implemented in UTF-16, it is possible that some engines will attempt to
+ * convert these strings using an incorrect charset or simply fail to convert
+ * them at all. This parser assumes that its input is in the form of a "binary
+ * string", a string that uses only the first 256 characters of Unicode to
+ * represent the individual octets. To verify that charsets are not getting
+ * mangled elsewhere in the pipeline, the auxiliary test file test/data/charsets
+ * can be used.
+ *
+ * This parser attempts to hide the charset details from clients as much as
+ * possible. The resulting values of structured headers are always converted
+ * into proper Unicode strings before being exposed to clients; getting at the
+ * raw binary string data can only be done via getRawHeader. The .charset
+ * parameter on header objects, if changed, changes the fallback charset used
+ * for headers. It is initialized to the presumed charset of the corresponding
+ * part, taking into account the charset and force-charset options of the
+ * parser. Body parts are only converted into Unicode strings if the strformat
+ * option is set to Unicode. Even then, only the bodies of parts with a media
+ * type of text are converted to Unicode strings using available charset data;
+ * other parts are retained as Uint8Array objects.
+ *
+ * Part numbering:
+ * Since the output is a streaming format, individual parts are identified by a
+ * numbering scheme. The intent of the numbering scheme for parts is to comply
+ * with the part numbers as dictated by RFC 3501 as much possible; however,
+ * that scheme does have several edge cases which would, if strictly followed,
+ * make it impossible to refer to certain parts of the message. In addition, we
+ * wish to make it possible to refer to parts which are not discoverable in the
+ * original MIME tree but are still viewable as parts. The part numbering
+ * scheme is as follows:
+ * - Individual sections of a multipart/* body are numbered in increasing order
+ * sequentially, starting from 1. Note that the prologue and the epilogue of
+ * a multipart/* body are not considered entities and are therefore not
+ * included in the part numbering scheme (there is no way to refer to them).
+ * - The numbers of multipart/* parts are separated by `.' characters.
+ * - The outermost message is referred to by use of the empty string.
+ * --> The following segments are not accounted for by IMAP part numbering. <--
+ * - The body of any message/rfc822 or similar part is distinguished from the
+ * message part as a whole by appending a `$' character. This does not apply
+ * to the outermost message/rfc822 envelope.
+ */
+
+ def("mimeparser", function (require) {
+ "use strict";
+
+ var mimeutils = require("./mimeutils");
+ var headerparser = require("./headerparser");
+ var spellings = require("./structuredHeaders").spellings;
+
+ /**
+ * An object that represents the structured MIME headers for a message.
+ *
+ * This class is primarily used as the 'headers' parameter in the startPart
+ * callback on handlers for MimeParser. As such, it is designed to do the right
+ * thing in common cases as much as possible, with some advanced customization
+ * possible for clients that need such flexibility.
+ *
+ * In a nutshell, this class stores the raw headers as an internal Map. The
+ * structured headers are not computed until they are actually used, which means
+ * that potentially expensive structuring (e.g., doing manual DKIM validation)
+ * can be performed as a structured decoder without impeding performance for
+ * those who just want a few common headers.
+ *
+ * The outer API of this class is intended to be similar to a read-only Map
+ * object (complete with iterability support), with a few extra properties to
+ * represent things that are hard to determine properly from headers. The keys
+ * used are "preferred spellings" of the headers, although the get and has
+ * methods will accept header parameters of any case. Preferred spellings are
+ * derived from the name passed to addStructuredDecoder/addStructuredEncoder; if
+ * no structured decoder has been registered, then the name capitalizes the
+ * first letter of every word in the header name.
+ *
+ * Extra properties compared to a Map object are:
+ * - charset: This field represents the assumed charset of the associated MIME
+ * body. It is prefilled using a combination of the charset and force-charset
+ * options on the associated MimeParser instance as well as attempting to find
+ * a charset parameter in the Content-Type header.
+ *
+ * If the force-charset option is false, the charset is guessed first using
+ * the Content-Type header's charset parameter, falling back to the charset
+ * option if it is present. If the force-charset option is true, the charset
+ * is initially set to the charset option. This initial guessed value can be
+ * overridden at any time by simply setting the field on this object.
+ *
+ * The charset is better reflected as a parameter of the body rather than the
+ * headers; this is ultimately the charset parameter that will be used if a
+ * body part is being converted to a Unicode strformat. Headers are converted
+ * using headerparser.convert8BitHeader, and this field is used as the
+ * fallbackCharset parameter, which will always to attempt to decode as UTF-8
+ * first (in accordance with RFC 6532) and will refuse to decode as UTF-16 or
+ * UTF-32, as ASCII is not a subset of those charsets.
+ *
+ * - rawHeaderText: This read-only field contains the original header text from
+ * which headers were parsed, preserving case and whitespace (including
+ * alternate line endings instead of CRLF) exactly. If the header text begins
+ * with the mbox delimiter (i.e., a line that begins with "From "), then that
+ * is excluded from the rawHeaderText value and is not reflected anywhere in
+ * this object.
+ *
+ * - contentType: This field contains the structured representation of the
+ * Content-Type header, if it is present. If it is not present, it is set to
+ * the structured representation of the default Content-Type for a part (as
+ * this data is not easily guessed given only MIME tree events).
+ *
+ * The constructor for these objects is not externally exported, and thus they
+ * can only be created via MimeParser.
+ *
+ * @param rawHeaderText {BinaryString} The contents of the MIME headers to be
+ * parsed.
+ * @param options {Object} Options for the header parser.
+ * @param options.stripcontinuations {Boolean} If true, elide CRLFs from the
+ * raw header output.
+ */
+ function StructuredHeaders(rawHeaderText, options) {
+ // An individual header is terminated by a CRLF, except if the CRLF is
+ // followed by a SP or TAB. Use negative lookahead to capture the latter case,
+ // and don't capture the strings or else split results get nasty.
+ let values = rawHeaderText.split(/(?:\r\n|\n)(?![ \t])|\r(?![ \t\n])/);
+
+ // Ignore the first "header" if it begins with an mbox delimiter
+ if (values.length > 0 && values[0].substring(0, 5) == "From ") {
+ values.shift();
+ // Elide the mbox delimiter from this._headerData
+ if (values.length == 0) {
+ rawHeaderText = "";
+ } else {
+ rawHeaderText = rawHeaderText.substring(
+ rawHeaderText.indexOf(values[0])
+ );
+ }
+ }
+
+ let headers = new Map();
+ for (let i = 0; i < values.length; i++) {
+ // Look for a colon. If it's not present, this header line is malformed,
+ // perhaps by premature EOF or similar.
+ let colon = values[i].indexOf(":");
+ let header, val;
+ if (colon >= 0) {
+ header = values[i].substring(0, colon);
+ val = values[i].substring(colon + 1).trim();
+ if (options.stripcontinuations) {
+ val = val.replace(/[\r\n]/g, "");
+ }
+ } else {
+ header = values[i];
+ val = "";
+ }
+
+ // Canonicalize the header in lower-case form.
+ header = header.trim().toLowerCase();
+ // Omit "empty" headers
+ if (header == "") {
+ continue;
+ }
+
+ // We keep an array of values for each header, since a given header may be
+ // repeated multiple times.
+ if (headers.has(header)) {
+ headers.get(header).push(val);
+ } else {
+ headers.set(header, [val]);
+ }
+ }
+
+ /**
+ * A map of header names to arrays of raw values found in this header block.
+ *
+ * @private
+ */
+ this._rawHeaders = headers;
+ /**
+ * Cached results of structured header parsing.
+ *
+ * @private
+ */
+ this._cachedHeaders = new Map();
+ Object.defineProperty(this, "rawHeaderText", {
+ get() {
+ return rawHeaderText;
+ },
+ });
+ Object.defineProperty(this, "size", {
+ get() {
+ return this._rawHeaders.size;
+ },
+ });
+ Object.defineProperty(this, "charset", {
+ get() {
+ return this._charset;
+ },
+ set(value) {
+ this._charset = value;
+ // Clear the cached headers, since this could change their values
+ this._cachedHeaders.clear();
+ },
+ });
+
+ // Default to the charset, until the message parser overrides us.
+ if ("charset" in options) {
+ this._charset = options.charset;
+ } else {
+ this._charset = null;
+ }
+
+ // If we have a Content-Type header, set contentType to return the structured
+ // representation. We don't set the value off the bat, since we want to let
+ // someone who changes the charset affect the values of 8-bit parameters.
+ Object.defineProperty(this, "contentType", {
+ configurable: true,
+ get() {
+ return this.get("Content-Type");
+ },
+ });
+ }
+
+ /**
+ * Get a raw header.
+ *
+ * Raw headers are an array of the header values, listed in order that they were
+ * specified in the header block, and without any attempt to convert charsets or
+ * apply RFC 2047 decoding. For example, in the following message (where the
+ * <XX> is meant to represent binary-octets):
+ *
+ * X-Header: Value A
+ * X-Header: V<C3><A5>lue B
+ * Header2: Q
+ *
+ * the result of calling getRawHeader('X-Header') or getRawHeader('x-header')
+ * would be ['Value A', 'V\xC3\xA5lue B'] and the result of
+ * getRawHeader('Header2') would be ['Q'].
+ *
+ * @param headerName {String} The header name for which to get header values.
+ * @returns {BinaryString[]} The raw header values (with no charset conversion
+ * applied).
+ */
+ StructuredHeaders.prototype.getRawHeader = function (headerName) {
+ return this._rawHeaders.get(headerName.toLowerCase());
+ };
+
+ /**
+ * Retrieve a structured version of the header.
+ *
+ * If there is a registered structured decoder (registration happens via
+ * headerparser.addStructuredDecoder), then the result of calling that decoder
+ * on the charset-corrected version of the header is returned. Otherwise, the
+ * values are charset-corrected and RFC 2047 decoding is applied as if the
+ * header were an unstructured header.
+ *
+ * A substantial set of headers have pre-registed structured decoders, which, in
+ * some cases, are unable to be overridden due to their importance in the
+ * functioning of the parser code itself.
+ *
+ * @param headerName {String} The header name for which to get the header value.
+ * @returns The structured header value of the output.
+ */
+ StructuredHeaders.prototype.get = function (headerName) {
+ // Normalize the header name to lower case
+ headerName = headerName.toLowerCase();
+
+ // First, check the cache for the header value
+ if (this._cachedHeaders.has(headerName)) {
+ return this._cachedHeaders.get(headerName);
+ }
+
+ // Not cached? Grab it [propagating lack of header to caller]
+ let headerValue = this._rawHeaders.get(headerName);
+ if (headerValue === undefined) {
+ return headerValue;
+ }
+
+ // Convert the header to Unicode
+ let charset = this.charset;
+ headerValue = headerValue.map(function (value) {
+ return headerparser.convert8BitHeader(value, charset);
+ });
+
+ // If there is a structured decoder, use that; otherwise, assume that the
+ // header is unstructured and only do RFC 2047 conversion
+ let structured;
+ try {
+ structured = headerparser.parseStructuredHeader(
+ headerName,
+ headerValue
+ );
+ } catch (e) {
+ structured = headerValue.map(function (value) {
+ return headerparser.decodeRFC2047Words(value);
+ });
+ }
+
+ // Cache the result and return it
+ this._cachedHeaders.set(headerName, structured);
+ return structured;
+ };
+
+ /**
+ * Check if the message has the given header.
+ *
+ * @param headerName {String} The header name for which to get the header value.
+ * @returns {boolean} True if the header is present in this header block.
+ */
+ StructuredHeaders.prototype.has = function (headerName) {
+ // Check for presence in the raw headers instead of cached headers.
+ return this._rawHeaders.has(headerName.toLowerCase());
+ };
+
+ // Make a custom iterator. Presently, support for Symbol isn't yet present in
+ // SpiderMonkey (or V8 for that matter), so type-pun the name for now.
+ var JS_HAS_SYMBOLS = typeof Symbol === "function";
+ var ITERATOR_SYMBOL = JS_HAS_SYMBOLS ? Symbol.iterator : "@@iterator";
+
+ /**
+ * An equivalent of Map.@@iterator, applied to the structured header
+ * representations. This is the function that makes
+ * for (let [header, value] of headers) work properly.
+ */
+ StructuredHeaders.prototype[ITERATOR_SYMBOL] = function* () {
+ // Iterate over all the raw headers, and use the cached headers to retrieve
+ // them.
+ for (let headerName of this.keys()) {
+ yield [headerName, this.get(headerName)];
+ }
+ };
+
+ /**
+ * An equivalent of Map.forEach, applied to the structured header
+ * representations.
+ *
+ * @param callback {Function(value, name, headers)} The callback to call for
+ * each header/value combo.
+ * @param thisarg {Object} The parameter that will be
+ * the |this| of the callback.
+ */
+ StructuredHeaders.prototype.forEach = function (callback, thisarg) {
+ for (let [header, value] of this) {
+ callback.call(thisarg, value, header, this);
+ }
+ };
+
+ /**
+ * An equivalent of Map.entries, applied to the structured header
+ * representations.
+ */
+ StructuredHeaders.prototype.entries =
+ StructuredHeaders.prototype[Symbol.iterator];
+
+ // This function maps lower case names to a pseudo-preferred spelling.
+ function capitalize(headerName) {
+ return headerName.replace(/\b[a-z]/g, function (match) {
+ return match.toUpperCase();
+ });
+ }
+
+ /**
+ * An equivalent of Map.keys, applied to the structured header representations.
+ */
+ StructuredHeaders.prototype.keys = function* () {
+ for (let name of this._rawHeaders.keys()) {
+ yield spellings.get(name) || capitalize(name);
+ }
+ };
+
+ /**
+ * An equivalent of Map.values, applied to the structured header
+ * representations.
+ */
+ StructuredHeaders.prototype.values = function* () {
+ for (let [, value] of this) {
+ yield value;
+ }
+ };
+
+ /**
+ * A MIME parser.
+ *
+ * The inputs to the constructor consist of a callback object which receives
+ * information about the output data and an optional object containing the
+ * settings for the parser.
+ *
+ * The first parameter, emitter, is an object which contains several callbacks.
+ * Note that any and all of these methods are optional; the parser will not
+ * crash if one is missing. The callbacks are as follows:
+ * startMessage()
+ * Called when the stream to be parsed has started delivering data. This
+ * will be called exactly once, before any other call.
+ * endMessage()
+ * Called after all data has been delivered and the message parsing has
+ * been completed. This will be called exactly once, after any other call.
+ * startPart(string partNum, object headers)
+ * Called after the headers for a body part (including the top-level
+ * message) have been parsed. The first parameter is the part number (see
+ * the discussion on part numbering). The second parameter is an instance
+ * of StructuredHeaders that represents all of the headers for the part.
+ * endPart(string partNum)
+ * Called after all of the data for a body part (including sub-parts) has
+ * been parsed. The first parameter is the part number.
+ * deliverPartData(string partNum, {string,typedarray} data)
+ * Called when some data for a body part has been delivered. The first
+ * parameter is the part number. The second parameter is the data which is
+ * being delivered; the exact type of this data depends on the options
+ * used. Note that data is only delivered for leaf body parts.
+ *
+ * The second parameter, options, is an optional object containing the options
+ * for the parser. The following are the options that the parser may use:
+ * pruneat: <string> [default=""]
+ * Treat the message as starting at the given part number, so that no parts
+ * above <string> are returned.
+ * bodyformat: one of {none, raw, nodecode, decode} [default=nodecode]
+ * How to return the bodies of parts:
+ * none: no part data is returned
+ * raw: the body of the part is passed through raw
+ * nodecode: the body is passed through without decoding QP/Base64
+ * decode: quoted-printable and base64 are fully decoded
+ * strformat: one of {binarystring, unicode, typedarray} [default=binarystring]
+ * How to treat output strings:
+ * binarystring: Data is a JS string with chars in the range [\x00-\xff]
+ * unicode: Data for text parts is converted to UTF-16; data for other
+ * parts is a typed array buffer, akin to typedarray.
+ * typedarray: Data is a JS typed array buffer
+ * charset: <string> [default=""]
+ * What charset to assume if no charset information is explicitly provided.
+ * This only matters if strformat is unicode. See above note on charsets
+ * for more details.
+ * force-charset: <boolean> [default=false]
+ * If true, this coerces all types to use the charset option, even if the
+ * message specifies a different content-type.
+ * stripcontinuations: <boolean> [default=true]
+ * If true, then the newlines in headers are removed in the returned
+ * header objects.
+ * onerror: <function(thrown error)> [default = nop-function]
+ * An error function that is called if an emitter callback throws an error.
+ * By default, such errors are swallowed by the parser. If you want the
+ * parser itself to throw an error, rethrow it via the onerror function.
+ * decodeSubMessages: <boolean> [default=true]
+ * Parse attached messages (message/rfc822, message/global & message/news)
+ * and return all of their mime data instead of returning their content
+ * as regular attachments.
+ */
+ function MimeParser(emitter, options) {
+ // The actual emitter
+ this._emitter = emitter;
+ // Options for the parser (those listed here are defaults)
+ this._options = {
+ decodeSubMessages: true,
+ pruneat: "",
+ bodyformat: "nodecode",
+ strformat: "binarystring",
+ stripcontinuations: true,
+ charset: "",
+ "force-charset": false,
+ onerror(error) {},
+ };
+ // Load the options as a copy here (prevents people from changing on the fly).
+ if (options) {
+ for (var opt in options) {
+ this._options[opt] = options[opt];
+ }
+ }
+
+ // Ensure that the error function is in fact a function
+ if (typeof this._options.onerror != "function") {
+ throw new Error("onerror callback must be a function");
+ }
+
+ // Reset the parser
+ this.resetParser();
+ }
+
+ /**
+ * Resets the parser to read a new message. This method need not be called
+ * immediately after construction.
+ */
+ MimeParser.prototype.resetParser = function () {
+ // Current parser state
+ this._state = PARSING_HEADERS;
+ // Input data that needs to be held for buffer conditioning
+ this._holdData = "";
+ // Complete collection of headers (also used to accumulate _headerData)
+ this._headerData = "";
+ // Whether or not emitter.startMessage has been called
+ this._triggeredCall = false;
+
+ // Splitting input
+ this._splitRegex = this._handleSplit = undefined;
+ // Subparsing
+ this._subparser = this._subPartNum = undefined;
+ // Data that has yet to be consumed by _convertData
+ this._savedBuffer = "";
+ // Convert data
+ this._convertData = undefined;
+ // String decoder
+ this._decoder = undefined;
+ };
+
+ /**
+ * Deliver a buffer of data to the parser.
+ *
+ * @param buffer {BinaryString} The raw data to add to the message.
+ */
+ MimeParser.prototype.deliverData = function (buffer) {
+ // In ideal circumstances, we'd like to parse the message all at once. In
+ // reality, though, data will be coming to us in packets. To keep the amount
+ // of saved state low, we want to make basic guarantees about how packets get
+ // delivered. Our basic model is a twist on line-buffering, as the format of
+ // MIME and messages make it hard to not do so: we can handle multiple lines
+ // at once. To ensure this, we start by conditioning the packet by
+ // withholding data to make sure that the internal deliveries have the
+ // guarantees. This implies that we need to do the following steps:
+ // 1. We don't know if a `\r' comes from `\r\n' or the old mac line ending
+ // until we see the next character. So withhold the last `\r'.
+ // 2. Ensure that every packet ends on a newline. So scan for the end of the
+ // line and withhold until the \r\n comes through.
+ // [Note that this means that an input message that uses \r line endings and
+ // is being passed to us via a line-buffered input is going to have most of
+ // its data being withhold until the next buffer. Since \r is so uncommon of
+ // a line ending in modern times, this is acceptable lossage.]
+ // 3. Eliminate empty packets.
+
+ // Add in previously saved data
+ if (this._holdData) {
+ buffer = this._holdData + buffer;
+ this._holdData = "";
+ }
+
+ // Condition the input, so that we get the multiline-buffering mentioned in
+ // the above comment.
+ if (buffer.length > 0) {
+ [buffer, this._holdData] = conditionToEndOnCRLF(buffer);
+ }
+
+ // Ignore 0-length buffers.
+ if (buffer.length == 0) {
+ return;
+ }
+
+ // Signal the beginning, if we haven't done so.
+ if (!this._triggeredCall) {
+ this._callEmitter("startMessage");
+ this._triggeredCall = true;
+ }
+
+ // Finally, send it the internal parser.
+ this._dispatchData("", buffer, true);
+ };
+
+ /**
+ * Ensure that a set of data always ends in an end-of-line character.
+ *
+ * @param buffer {BinaryString} The data with no guarantees about where it ends.
+ * @returns {BinaryString[]} An array of 2 binary strings where the first string
+ * ends in a newline and the last string contains the
+ * text in buffer following the first string.
+ */
+ function conditionToEndOnCRLF(buffer) {
+ // Find the last occurrence of '\r' or '\n' to split the string. However, we
+ // don't want to consider '\r' if it is the very last character, as we need
+ // the next packet to tell if the '\r' is the beginning of a CRLF or a line
+ // ending by itself.
+ let lastCR = buffer.lastIndexOf("\r", buffer.length - 2);
+ let lastLF = buffer.lastIndexOf("\n");
+ let end = lastLF > lastCR ? lastLF : lastCR;
+ return [buffer.substring(0, end + 1), buffer.substring(end + 1)];
+ }
+
+ /**
+ * Tell the parser that all of the data has been delivered.
+ *
+ * This will flush all of the internal state of the parser.
+ */
+ MimeParser.prototype.deliverEOF = function () {
+ // Start of input buffered too long? Call start message now.
+ if (!this._triggeredCall) {
+ this._triggeredCall = true;
+ this._callEmitter("startMessage");
+ }
+ // Force a flush of all of the data.
+ if (this._holdData) {
+ this._dispatchData("", this._holdData, true);
+ }
+ this._dispatchEOF("");
+ // Signal to the emitter that we're done.
+ this._callEmitter("endMessage");
+ };
+
+ /**
+ * Calls a method on the emitter safely.
+ *
+ * This method ensures that errors in the emitter call won't cause the parser
+ * to exit with an error, unless the user wants it to.
+ *
+ * @param funcname {String} The function name to call on the emitter.
+ * @param args... Extra arguments to pass into the emitter callback.
+ */
+ MimeParser.prototype._callEmitter = function (funcname, ...args) {
+ if (this._emitter && funcname in this._emitter) {
+ if (args.length > 0 && this._willIgnorePart(args[0])) {
+ // partNum is always the first argument, so check to make sure that it
+ // satisfies our emitter's pruneat requirement.
+ return;
+ }
+ try {
+ this._emitter[funcname].apply(this._emitter, args);
+ } catch (e) {
+ // We ensure that the onerror attribute in options is a function, so this
+ // is always safe.
+ this._options.onerror(e);
+ }
+ }
+ };
+
+ /**
+ * Helper function to decide if a part's output will never be seen.
+ *
+ * @param part {String} The number of the part.
+ * @returns {boolean} True if the emitter is not interested in this part.
+ */
+ MimeParser.prototype._willIgnorePart = function (part) {
+ if (this._options.pruneat) {
+ let match = this._options.pruneat;
+ let start = part.substr(0, match.length);
+ // It needs to start with and follow with a new part indicator
+ // (i.e., don't let 10 match with 1, but let 1.1 or 1$ do so)
+ if (
+ start != match ||
+ (match.length < part.length && !"$.".includes(part[match.length]))
+ ) {
+ return true;
+ }
+ }
+ return false;
+ };
+
+ // MIME parser core
+ // ----------------
+
+ // This MIME parser is a stateful parser; handling of the MIME tree is mostly
+ // done by creating new parsers and feeding data to them manually. In parallel
+ // to the externally-visible deliverData and deliverEOF, the two methods
+ // _dispatchData and _dispatchEOF are the internal counterparts that do the
+ // main work of moving data to where it needs to go; helper functions are used
+ // to handle translation.
+ //
+ // The overall flow of the parser is this. First, it buffers all of the data
+ // until the dual-CRLF pattern is noticed. Once that is found, it parses the
+ // entire header chunk at once. As a result of header parsing, the parser enters
+ // one of three modes for handling data, and uses a special regex to change
+ // modes and handle state changes. Specific details about the states the parser
+ // can be in are as follows:
+ // PARSING_HEADERS: The input buffer is concatenated to the currently-received
+ // text, which is then searched for the CRLFCRLF pattern. If found, the data
+ // is split at this boundary; the first chunk is parsed using _parseHeaders,
+ // and the second chunk will fall through to buffer processing. After
+ // splitting, the headers are deliverd via the emitter, and _startBody is
+ // called to set up state for the parser.
+ // SEND_TO_BLACK_HOLE: All data in the input is ignored.
+ // SEND_TO_EMITTER: All data is passed into the emitter, if it is desired.
+ // Data can be optionally converted with this._convertData.
+ // SEND_TO_SUBPARSER: All data is passed into the subparser's _dispatchData
+ // method, using _subPartNum as the part number and _subparser as the object
+ // to call. Data can be optionally converted first with this._convertData.
+ //
+ // Additional state modifications can be done using a regex in _splitRegex and
+ // the callback method this._handleSplit(partNum, regexResult). The _handleSplit
+ // callback is free to do any modification to the current parser, including
+ // modifying the _splitRegex value. Packet conditioning guarantees that every
+ // buffer string passed into _dispatchData will have started immediately after a
+ // newline character in the fully assembled message.
+ //
+ // The this._convertData method, if present, is expected to return an array of
+ // two values, [{typedarray, string} decoded_buffer, string unused_buffer], and
+ // has as its arguments (string buffer, bool moreToCome).
+ //
+ // The header parsing by itself does very little parsing, only parsing as if all
+ // headers were unstructured fields. Values are munged so that embedded newlines
+ // are stripped and the result is also trimmed. Headers themselves are
+ // canonicalized into lower-case.
+
+ // Parser states. See the large comment above.
+ var PARSING_HEADERS = 1;
+ var SEND_TO_BLACK_HOLE = 2;
+ var SEND_TO_EMITTER = 3;
+ var SEND_TO_SUBPARSER = 4;
+
+ /**
+ * Main dispatch for incoming packet data.
+ *
+ * The incoming data needs to have been sanitized so that each packet begins on
+ * a newline boundary. The part number for the current parser also needs to be
+ * passed in. The checkSplit parameter controls whether or not the data in
+ * buffer needs to be checked against _splitRegex; this is used internally for
+ * the mechanics of splitting and should otherwise always be true.
+ *
+ * @param partNum {String} The part number being currently parsed.
+ * @param buffer {BinaryString} The text (conditioned as mentioned above) to
+ * pass to the parser.
+ * @param checkSplit {Boolean} - If true, split the text using _splitRegex.
+ * This is set to false internally to handle
+ * low-level splitting details.
+ */
+ MimeParser.prototype._dispatchData = function (
+ partNum,
+ buffer,
+ checkSplit
+ ) {
+ // Are we parsing headers?
+ if (this._state == PARSING_HEADERS) {
+ this._headerData += buffer;
+ // Find the end of the headers--either it's a CRLF at the beginning (in
+ // which case we have no headers), or it's a pair of CRLFs.
+ let result = /(?:^(?:\r\n|[\r\n]))|(\r\n|[\r\n])\1/.exec(
+ this._headerData
+ );
+ if (result != null) {
+ // If we found the end of headers, split the data at this point and send
+ // the stuff after the double-CRLF into the later body parsing.
+ let headers = this._headerData.substr(0, result.index);
+ buffer = this._headerData.substring(result.index + result[0].length);
+ this._headerData = headers;
+ this._headers = this._parseHeaders();
+ this._callEmitter("startPart", partNum, this._headers);
+ this._startBody(partNum);
+ } else {
+ return;
+ }
+ }
+
+ // We're in the middle of the body. Start by testing the split regex, to see
+ // if there are many things that need to be done.
+ if (checkSplit && this._splitRegex) {
+ let splitResult = this._splitRegex.exec(buffer);
+ if (splitResult) {
+ // Pass the text before the split through the current state.
+ let start = splitResult.index,
+ len = splitResult[0].length;
+ if (start > 0) {
+ this._dispatchData(partNum, buffer.substr(0, start), false);
+ }
+
+ // Tell the handler that we've seen the split. Note that this can change
+ // any method on `this'.
+ this._handleSplit(partNum, splitResult);
+
+ // Send the rest of the data to where it needs to go. There could be more
+ // splits in the data, so watch out!
+ buffer = buffer.substring(start + len);
+ if (buffer.length > 0) {
+ this._dispatchData(partNum, buffer, true);
+ }
+ return;
+ }
+ }
+
+ // Where does the data go?
+ if (this._state == SEND_TO_BLACK_HOLE) {
+ // Don't send any data when going to the black hole.
+ } else if (this._state == SEND_TO_EMITTER) {
+ // Don't pass body data if the format is to be none
+ let passData = this._options.bodyformat != "none";
+ if (!passData || this._willIgnorePart(partNum)) {
+ return;
+ }
+ buffer = this._applyDataConversion(buffer, this._options.strformat);
+ if (buffer.length > 0) {
+ this._callEmitter("deliverPartData", partNum, buffer);
+ }
+ } else if (this._state == SEND_TO_SUBPARSER) {
+ buffer = this._applyDataConversion(buffer, "binarystring");
+ if (buffer.length > 0) {
+ this._subparser._dispatchData(this._subPartNum, buffer, true);
+ }
+ }
+ };
+
+ /**
+ * Output data using the desired output format, saving data if data conversion
+ * needs extra data to be saved.
+ *
+ * @param buf {BinaryString} The data to be sent to the output.
+ * @param type {String} - The type of the data to output. Valid values are
+ * the same as the strformat option.
+ * @returns Coerced and converted data that can be sent to the emitter or
+ * subparser.
+ */
+ MimeParser.prototype._applyDataConversion = function (buf, type) {
+ // If we need to convert data, do so.
+ if (this._convertData) {
+ // Prepend leftover data from the last conversion.
+ buf = this._savedBuffer + buf;
+ [buf, this._savedBuffer] = this._convertData(buf, true);
+ }
+ return this._coerceData(buf, type, false);
+ };
+
+ /**
+ * Coerce the input buffer into the given output type.
+ *
+ * @param buffer {BinaryString|Uint8Array} The data to be converted.
+ * @param type {String} The type to convert the data to.
+ * @param more {boolean} If true, this function will never be
+ * called again.
+ * @returns {BinaryString | string | Uint8Array} The desired output format.
+ */
+ // Coerces the buffer (a string or typedarray) into a given type
+ MimeParser.prototype._coerceData = function (buffer, type, more) {
+ if (typeof buffer == "string") {
+ // string -> binarystring is a nop
+ if (type == "binarystring") {
+ return buffer;
+ }
+ // Either we're going to array or unicode. Both people need the array
+ var typedarray = mimeutils.stringToTypedArray(buffer);
+ // If it's unicode, do the coercion from the array
+ // If its typedarray, just return the synthesized one
+ return type == "unicode"
+ ? this._coerceData(typedarray, "unicode", more)
+ : typedarray;
+ } else if (type == "binarystring") {
+ // Doing array -> binarystring
+ return mimeutils.typedArrayToString(buffer);
+ } else if (type == "unicode") {
+ // Doing array-> unicode: Use the decoder set up earlier to convert
+ if (this._decoder) {
+ return this._decoder.decode(buffer, { stream: more });
+ }
+ // If there is no charset, just return the typed array instead.
+ return buffer;
+ }
+ throw new Error("Invalid type: " + type);
+ };
+
+ /**
+ * Signal that no more data will be dispatched to this parser.
+ *
+ * @param partNum {String} The part number being currently parsed.
+ */
+ MimeParser.prototype._dispatchEOF = function (partNum) {
+ if (this._state == PARSING_HEADERS) {
+ // Unexpected EOF in headers. Parse them now and call startPart/endPart
+ this._headers = this._parseHeaders();
+ this._callEmitter("startPart", partNum, this._headers);
+ } else if (this._state == SEND_TO_SUBPARSER) {
+ // Pass in any lingering data
+ if (this._convertData && this._savedBuffer) {
+ this._subparser._dispatchData(
+ this._subPartNum,
+ this._convertData(this._savedBuffer, false)[0],
+ true
+ );
+ }
+ this._subparser._dispatchEOF(this._subPartNum);
+ // Clean up after ourselves
+ this._subparser = null;
+ } else if (this._convertData && this._savedBuffer) {
+ // Convert lingering data
+ let [buffer] = this._convertData(this._savedBuffer, false);
+ buffer = this._coerceData(buffer, this._options.strformat, false);
+ if (buffer.length > 0) {
+ this._callEmitter("deliverPartData", partNum, buffer);
+ }
+ }
+
+ // We've reached EOF for this part; tell the emitter
+ this._callEmitter("endPart", partNum);
+ };
+
+ /**
+ * Produce a dictionary of all headers as if they were unstructured fields.
+ *
+ * @returns {StructuredHeaders} The structured header objects for the header
+ * block.
+ */
+ MimeParser.prototype._parseHeaders = function () {
+ let headers = new StructuredHeaders(this._headerData, this._options);
+
+ // Fill the headers.contentType parameter of headers.
+ let contentType = headers.get("Content-Type");
+ if (typeof contentType === "undefined") {
+ contentType = headerparser.parseStructuredHeader(
+ "Content-Type",
+ this._defaultContentType || "text/plain"
+ );
+ Object.defineProperty(headers, "contentType", {
+ get() {
+ return contentType;
+ },
+ });
+ } else {
+ Object.defineProperty(headers, "contentType", { configurable: false });
+ }
+
+ // Find the charset for the current part. If the user requested a forced
+ // conversion, use that first. Otherwise, check the content-type for one and
+ // fallback to a default if it is not present.
+ let charset = "";
+ if (this._options["force-charset"]) {
+ charset = this._options.charset;
+ } else if (contentType.has("charset")) {
+ charset = contentType.get("charset");
+ } else {
+ charset = this._options.charset;
+ }
+ headers.charset = charset;
+
+ // Retain a copy of the charset so that users don't override our decision for
+ // decoding body parts.
+ this._charset = charset;
+ return headers;
+ };
+
+ /**
+ * Initialize the parser state for the body of this message.
+ *
+ * @param partNum {String} The part number being currently parsed.
+ */
+ MimeParser.prototype._startBody = function (partNum) {
+ let contentType = this._headers.contentType;
+
+ // Should the bodyformat be raw, we just want to pass through all data without
+ // trying to interpret it.
+ if (
+ this._options.bodyformat == "raw" &&
+ partNum == this._options.pruneat
+ ) {
+ this._state = SEND_TO_EMITTER;
+ return;
+ }
+
+ // The output depents on the content-type. Basic rule of thumb:
+ // 1. Discrete media types (text, video, audio, image, application) are passed
+ // through with no alterations beyond Content-Transfer-Encoding unpacking.
+ // 2. Everything with a media type of multipart is treated the same.
+ // 3. Any message/* type that acts like a mail message (rfc822, news, global)
+ // is parsed as a header/body pair again. Most of the other message/* types
+ // have similar structures, but they don't have cascading child subparts,
+ // so it's better to pass their entire contents to the emitter and let the
+ // consumer deal with them.
+ // 4. For untyped data, there needs to be no Content-Type header. This helps
+ // avoid false positives.
+ if (contentType.mediatype == "multipart") {
+ // If there's no boundary type, everything will be part of the prologue of
+ // the multipart message, so just feed everything into a black hole.
+ if (!contentType.has("boundary")) {
+ this._state = SEND_TO_BLACK_HOLE;
+ return;
+ }
+ // The boundary of a multipart message needs to start with -- and be at the
+ // beginning of the line. If -- is after the boundary, it represents the
+ // terminator of the multipart. After the line, there may be only whitespace
+ // and then the CRLF at the end. Since the CRLFs in here are necessary for
+ // distinguishing the parts, they are not included in the subparts, so we
+ // need to capture them in the regex as well to prevent them leaking out.
+ this._splitRegex = new RegExp(
+ "(\r\n|[\r\n]|^)--" +
+ contentType.get("boundary").replace(/[\\^$*+?.()|{}[\]]/g, "\\$&") +
+ "(--)?[ \t]*(?:\r\n|[\r\n]|$)"
+ );
+ this._handleSplit = this._whenMultipart;
+ this._subparser = new MimeParser(this._emitter, this._options);
+ // multipart/digest defaults to message/rfc822 instead of text/plain
+ if (contentType.subtype == "digest") {
+ this._subparser._defaultContentType = "message/rfc822";
+ }
+
+ // All text before the first boundary and after the closing boundary are
+ // supposed to be ignored ("must be ignored", according to RFC 2046 §5.1.1);
+ // in accordance with these wishes, ensure they don't get passed to any
+ // deliverPartData.
+ this._state = SEND_TO_BLACK_HOLE;
+
+ // Multipart MIME messages stipulate that the final CRLF before the boundary
+ // delimiter is not matched. When the packet ends on a CRLF, we don't know
+ // if the next text could be the boundary. Therefore, we need to withhold
+ // the last line of text to be sure of what's going on. The _convertData is
+ // how we do this, even though we're not really converting any data.
+ this._convertData = function (buffer, more) {
+ let splitPoint = buffer.length;
+ if (more) {
+ if (buffer.charAt(splitPoint - 1) == "\n") {
+ splitPoint--;
+ }
+ if (splitPoint >= 0 && buffer.charAt(splitPoint - 1) == "\r") {
+ splitPoint--;
+ }
+ }
+ let res = conditionToEndOnCRLF(buffer.substring(0, splitPoint));
+ let preLF = res[0];
+ let rest = res[1];
+ return [preLF, rest + buffer.substring(splitPoint)];
+ };
+ } else if (
+ (this._options.decodeSubMessages || this._willIgnorePart(partNum)) &&
+ (contentType.type == "message/rfc822" ||
+ contentType.type == "message/global" ||
+ contentType.type == "message/news")
+ ) {
+ // The subpart is just another header/body pair that goes to EOF, so just
+ // return the parse from that blob
+ this._state = SEND_TO_SUBPARSER;
+ this._subPartNum = partNum + "$";
+ this._subparser = new MimeParser(this._emitter, this._options);
+
+ // So, RFC 6532 happily allows message/global types to have CTE applied.
+ // This means that subparts would need to be decoded to determine their
+ // contents properly. There seems to be some evidence that message/rfc822
+ // that is illegally-encoded exists in the wild, so be lenient and decode
+ // for any message/* type that gets here.
+ let cte = this._extractHeader("content-transfer-encoding", "");
+ if (cte in ContentDecoders) {
+ this._convertData = ContentDecoders[cte];
+ }
+ } else {
+ // Okay, we just have to feed the data into the output
+ this._state = SEND_TO_EMITTER;
+ if (this._options.bodyformat == "decode") {
+ // If we wish to decode, look it up in one of our decoders.
+ let cte = this._extractHeader("content-transfer-encoding", "");
+ if (cte in ContentDecoders) {
+ this._convertData = ContentDecoders[cte];
+ }
+ }
+ }
+
+ // Set up the encoder for charset conversions; only do this for text parts.
+ // Other parts are almost certainly binary, so no translation should be
+ // applied to them.
+ if (
+ this._options.strformat == "unicode" &&
+ contentType.mediatype == "text"
+ ) {
+ // If the charset is nonempty, initialize the decoder
+ this._decoder = null;
+ if (this._charset !== "") {
+ try {
+ this._decoder = new MimeTextDecoder(this._charset);
+ } catch (e) {}
+ }
+ if (!this._decoder) {
+ // There's no charset we can use for decoding, so pass through as an
+ // identity encoder or otherwise this._coerceData will complain.
+ this._decoder = {
+ decode(buffer) {
+ return MimeParser.prototype._coerceData(
+ buffer,
+ "binarystring",
+ true
+ );
+ },
+ };
+ }
+ } else {
+ this._decoder = null;
+ }
+ };
+
+ // Internal split handling for multipart messages.
+ /**
+ * When a multipary boundary is found, handle the process of managing the
+ * subparser state. This is meant to be used as a value for this._handleSplit.
+ *
+ * @param partNum {String} The part number being currently parsed.
+ * @param lastResult {Array} - The result of the regular expression match.
+ */
+ MimeParser.prototype._whenMultipart = function (partNum, lastResult) {
+ // Fix up the part number (don't do '' -> '.4' and don't do '1' -> '14')
+ if (partNum != "") {
+ partNum += ".";
+ }
+ if (!this._subPartNum) {
+ // No count? This means that this is the first time we've seen the boundary,
+ // so do some initialization for later here.
+ this._count = 1;
+ } else {
+ // If we did not match a CRLF at the beginning of the line, strip CRLF from
+ // the saved buffer. We do this in the else block because it is not
+ // necessary for the prologue, since that gets ignored anyways.
+ if (this._savedBuffer != "" && lastResult[1] === "") {
+ let useEnd = this._savedBuffer.length - 1;
+ if (this._savedBuffer[useEnd] == "\n") {
+ useEnd--;
+ }
+ if (useEnd >= 0 && this._savedBuffer[useEnd] == "\r") {
+ useEnd--;
+ }
+ this._savedBuffer = this._savedBuffer.substring(0, useEnd + 1);
+ }
+ // If we have saved data and we matched a CRLF, pass the saved data in.
+ if (this._savedBuffer != "") {
+ this._subparser._dispatchData(
+ this._subPartNum,
+ this._savedBuffer,
+ true
+ );
+ }
+ // We've seen the boundary at least once before, so this must end a subpart.
+ // Tell that subpart that it has reached EOF.
+ this._subparser._dispatchEOF(this._subPartNum);
+ }
+ this._savedBuffer = "";
+
+ // The regex feeder has a capture on the (--)?, so if its result is present,
+ // then we have seen the terminator. Alternatively, the message may have been
+ // mangled to exclude the terminator, so also check if EOF has occurred.
+ if (lastResult[2] == undefined) {
+ this._subparser.resetParser();
+ this._state = SEND_TO_SUBPARSER;
+ this._subPartNum = partNum + this._count;
+ this._count += 1;
+ } else {
+ // Ignore the epilogue
+ this._splitRegex = null;
+ this._state = SEND_TO_BLACK_HOLE;
+ }
+ };
+
+ /**
+ * Return the structured header from the current header block, or a default if
+ * it is not present.
+ *
+ * @param name {String} The header name to get.
+ * @param dflt {String} The default MIME value of the header.
+ * @returns The structured representation of the header.
+ */
+ MimeParser.prototype._extractHeader = function (name, dflt) {
+ name = name.toLowerCase(); // Normalize name
+ return this._headers.has(name)
+ ? this._headers.get(name)
+ : headerparser.parseStructuredHeader(name, [dflt]);
+ };
+
+ var ContentDecoders = {};
+ ContentDecoders["quoted-printable"] = mimeutils.decode_qp;
+ ContentDecoders.base64 = mimeutils.decode_base64;
+
+ return MimeParser;
+ });
+ def("headeremitter", function (require) {
+ /**
+ * This module implements the code for emitting structured representations of
+ * MIME headers into their encoded forms. The code here is a companion to,
+ * but completely independent of, jsmime.headerparser: the structured
+ * representations that are used as input to the functions in this file are the
+ * same forms that would be parsed.
+ */
+
+ "use strict";
+
+ var mimeutils = require("./mimeutils");
+
+ // Get the default structured encoders and add them to the map
+ var structuredHeaders = require("./structuredHeaders");
+ var encoders = new Map();
+ var preferredSpellings = structuredHeaders.spellings;
+ for (let [header, encoder] of structuredHeaders.encoders) {
+ addStructuredEncoder(header, encoder);
+ }
+
+ // Clamp a value in the range [min, max], defaulting to def
+ // if the object[property] does not contain the value.
+ function clamp(object, property, min, max, def) {
+ if (!(property in object)) {
+ return def;
+ }
+ let value = object[property];
+ if (value < min) {
+ return min;
+ }
+ if (value > max) {
+ return max;
+ }
+ return value;
+ }
+
+ /**
+ * An object that can assemble structured header representations into their MIME
+ * representation.
+ *
+ * The character-counting portion of this class operates using individual JS
+ * characters as its representation of logical character, which is not the same
+ * as the number of octets used as UTF-8. If non-ASCII characters are to be
+ * included in headers without some form of encoding, then care should be taken
+ * to set the maximum line length to account for the mismatch between character
+ * counts and octet counts: the maximum line is 998 octets, which could be as
+ * few as 332 JS characters (non-BMP characters, although they take up 4 octets
+ * in UTF-8, count as 2 in JS strings).
+ *
+ * This code takes care to only insert line breaks at the higher-level breaking
+ * points in a header (as recommended by RFC 5322), but it may need to resort to
+ * including them more aggressively if this is not possible. If even aggressive
+ * line-breaking cannot allow a header to be emitted without violating line
+ * length restrictions, the methods will throw an exception to indicate this
+ * situation.
+ *
+ * In general, this code does not attempt to modify its input; for example, it
+ * does not attempt to change the case of any input characters, apply any
+ * Unicode normalization algorithms, or convert email addresses to ACE where
+ * applicable. The biggest exception to this rule is that most whitespace is
+ * collapsed to a single space, even in unstructured headers, while most leading
+ * and trailing whitespace is trimmed from inputs.
+ *
+ * @param {StreamHandler} handler The handler to which all output is sent.
+ * @param {Function(string)} handler.deliverData Receives encoded data.
+ * @param {Function()} handler.deliverEOF Sent when all text is sent.
+ * @param {object} options Options for the emitter.
+ * @param [options.softMargin=78] {30 <= Integer <= 900}
+ * The ideal maximum number of logical characters to include in a line, not
+ * including the final CRLF pair. Lines may exceed this margin if parameters
+ * are excessively long.
+ * @param [options.hardMargin=332] {softMargin <= Integer <= 998}
+ * The maximum number of logical characters that can be included in a line,
+ * not including the final CRLF pair. If this count would be exceeded, then
+ * an error will be thrown and encoding will not be possible.
+ * @param [options.useASCII=true] {Boolean}
+ * If true, then RFC 2047 and RFC 2231 encoding of headers will be performed
+ * as needed to retain headers as ASCII.
+ */
+ function HeaderEmitter(handler, options) {
+ // The inferred value of options.useASCII
+ this._useASCII = options.useASCII === undefined ? true : options.useASCII;
+ this._sanitizeDate =
+ options.sanitizeDate === undefined ? false : options.sanitizeDate;
+ // The handler to use.
+ this._handler = handler;
+ /**
+ * The current line being built; note that we may insert a line break in the
+ * middle to keep under the maximum line length.
+ *
+ * @type String
+ * @private
+ */
+ this._currentLine = "";
+
+ // Our bounds for soft and margins are not completely arbitrary. The minimum
+ // amount we need to encode is 20 characters, which can encode a single
+ // non-BMP character with RFC 2047. The value of 30 is chosen to give some
+ // breathing room for delimiters or other unbreakable characters. The maximum
+ // length is 998 octets, per RFC 5322; soft margins are slightly lower to
+ // allow for breathing room as well. The default of 78 for the soft margin is
+ // recommended by RFC 5322.
+ this._softMargin = clamp(options, "softMargin", 30, 900, 78);
+ this._hardMargin = clamp(
+ options,
+ "hardMargin",
+ this._softMargin,
+ 998,
+ 998
+ );
+
+ /**
+ * The index of the last preferred breakable position in the current line.
+ *
+ * @type Integer
+ * @private
+ */
+ this._preferredBreakpoint = 0;
+ }
+
+ // Low-level methods
+ // -----------------
+
+ // Explanation of the emitter internals:
+ // RFC 5322 requires that we wrap our lines, ideally at 78 characters and at
+ // least by 998 octets. We can't wrap in arbitrary places, but wherever CFWS is
+ // valid... and ideally wherever clients are likely to expect it. In theory, we
+ // can break between every token (this is how RFC 822 operates), but, in RFC
+ // 5322, many of those breaks are relegated to obsolete productions, mostly
+ // because it is common to not properly handle breaks in those locations.
+ //
+ // So how do we do line breaking? The algorithm we implement is greedy, to
+ // simplify implementation. There are two margins: the soft margin, which we
+ // want to keep within, and the hard margin, which we absolutely have to keep
+ // within. There are also two kinds of break points: preferred and emergency.
+ // As long as we keep the line within the hard margin, we will only break at
+ // preferred breakpoints; emergency breakpoints are only used if we would
+ // otherwise exceed the hard margin.
+ //
+ // For illustration, here is an example header and where these break points are
+ // located:
+ //
+ // To: John "The Rock" Smith <jsmith@a.long.domain.invalid>
+ // Preferred: ^ ^ ^
+ // Emergency: ^ ^ ^ ^^ ^ ^ ^ ^ ^
+ //
+ // Preferred breakpoints are indicated by setting the mayBreakAfter parameter of
+ // addText to true, while emergency breakpoints are set after every token passed
+ // into addText. This is handled implicitly by only adding text to _currentLine
+ // if it ends in an emergency breakpoint.
+ //
+ // Internally, the code keeps track of margins by use of two variables. The
+ // _softMargin and _hardMargin variables encode the positions at which code must
+ // absolutely break, and are set up from the initial options parameter. Breaking
+ // happens when _currentLine.length approaches these values, as mentioned above.
+
+ /**
+ * Send a header line consisting of the first N characters to the handler.
+ *
+ * If the count parameter is missing, then we presume that the current header
+ * value being emitted is done and therefore we should not send a continuation
+ * space. Otherwise, we presume that we're still working, so we will send the
+ * continuation space.
+ *
+ * @private
+ * @param [count] {Integer} The number of characters in the current line to
+ * include before wrapping.
+ */
+ HeaderEmitter.prototype._commitLine = function (count) {
+ let isContinuing = typeof count !== "undefined";
+
+ // Split at the point, and lop off whitespace immediately before and after.
+ let firstN, lastN;
+ if (isContinuing) {
+ firstN = this._currentLine.slice(0, count).trimRight();
+ lastN = this._currentLine.slice(count).trimLeft();
+ } else {
+ firstN = this._currentLine.trimRight();
+ lastN = "";
+ }
+
+ // Send the line plus the final CRLF.
+ this._handler.deliverData(firstN + "\r\n");
+
+ // Fill the start of the line with the new data.
+ this._currentLine = lastN;
+
+ // If this is a continuation, add an extra space at the beginning of the line.
+ // Adjust the breakpoint shift amount as well.
+ if (isContinuing) {
+ this._currentLine = " " + this._currentLine;
+ }
+
+ // We will always break at a point at or after the _preferredBreakpoint, if it
+ // exists, so this always gets reset to 0.
+ this._preferredBreakpoint = 0;
+ };
+
+ /**
+ * Reserve at least length characters in the current line. If there aren't
+ * enough characters, insert a line break.
+ *
+ * @private
+ * @param length {Integer} The number of characters to reserve space for.
+ * @returns {boolean} Whether or not there is enough space for length characters.
+ */
+ HeaderEmitter.prototype._reserveTokenSpace = function (length) {
+ // We are not going to do a sanity check that length is within the wrap
+ // margins. The rationale is that this lets code simply call this function to
+ // force a higher-level line break than normal preferred line breaks (see
+ // addAddress for an example use). The text that would be added may need to be
+ // itself broken up, so it might not need all the length anyways, but it
+ // starts the break already.
+
+ // If we have enough space, we don't need to do anything.
+ if (this._currentLine.length + length <= this._softMargin) {
+ return true;
+ }
+
+ // If we have a preferred breakpoint, commit the line at that point, and see
+ // if that is sufficient line-breaking.
+ if (this._preferredBreakpoint > 0) {
+ this._commitLine(this._preferredBreakpoint);
+ if (this._currentLine.length + length <= this._softMargin) {
+ return true;
+ }
+ }
+
+ // At this point, we can no longer keep within the soft margin. Let us see if
+ // we can fit within the hard margin.
+ if (this._currentLine.length + length <= this._hardMargin) {
+ return true;
+ }
+
+ // Adding the text to length would violate the hard margin as well. Break at
+ // the last emergency breakpoint.
+ if (this._currentLine.length > 0) {
+ this._commitLine(this._currentLine.length);
+ }
+
+ // At this point, if there is still insufficient room in the hard margin, we
+ // can no longer do anything to encode this word. Bail.
+ return this._currentLine.length + length <= this._hardMargin;
+ };
+
+ /**
+ * Adds a block of text to the current header, inserting a break if necessary.
+ * If mayBreakAfter is true and text does not end in whitespace, a single space
+ * character may be added to the output. If the text could not be added without
+ * violating line length restrictions, an error is thrown instead.
+ *
+ * @protected
+ * @param {string} text The text to add to the output.
+ * @param {boolean} mayBreakAfter If true, the end of this text is a preferred
+ * breakpoint.
+ */
+ HeaderEmitter.prototype.addText = function (text, mayBreakAfter) {
+ // Try to reserve space for the tokens. If we can't, give up.
+ if (!this._reserveTokenSpace(text.length)) {
+ throw new Error("Cannot encode " + text + " due to length.");
+ }
+
+ this._currentLine += text;
+ if (mayBreakAfter) {
+ // Make sure that there is an extra space if text could break afterwards.
+ this._preferredBreakpoint = this._currentLine.length;
+ if (text[text.length - 1] != " ") {
+ this._currentLine += " ";
+ }
+ }
+ };
+
+ /**
+ * Adds a block of text that may need quoting if it contains some character in
+ * qchars. If it is already quoted, no quoting will be applied. If the text
+ * cannot be added without violating maximum line length, an error is thrown
+ * instead.
+ *
+ * @protected
+ * @param {string} text The text to add to the output.
+ * @param {string} qchars The set of characters that cannot appear
+ * outside of a quoted string.
+ * @param {boolean} mayBreakAfter If true, the end of this text is a preferred
+ * breakpoint.
+ */
+ HeaderEmitter.prototype.addQuotable = function (
+ text,
+ qchars,
+ mayBreakAfter
+ ) {
+ // No text -> no need to be quoted (prevents strict warning errors).
+ if (text.length == 0) {
+ return;
+ }
+
+ // Figure out if we need to quote the string. Don't quote a string which
+ // already appears to be quoted.
+ let needsQuote = false;
+
+ if (!(text[0] == '"' && text[text.length - 1] == '"') && qchars != "") {
+ for (let i = 0; i < text.length; i++) {
+ if (qchars.includes(text[i])) {
+ needsQuote = true;
+ break;
+ }
+ }
+ }
+
+ if (needsQuote) {
+ text = '"' + text.replace(/["\\]/g, "\\$&") + '"';
+ }
+ this.addText(text, mayBreakAfter);
+ };
+
+ /**
+ * Adds a block of text that corresponds to the phrase production in RFC 5322.
+ * Such text is a sequence of atoms, quoted-strings, or RFC-2047 encoded-words.
+ * This method will preprocess input to normalize all space sequences to a
+ * single space. If the text cannot be added without violating maximum line
+ * length, an error is thrown instead.
+ *
+ * @protected
+ * @param {string} text The text to add to the output.
+ * @param {string} qchars The set of characters that cannot appear
+ * outside of a quoted string.
+ * @param {boolean} mayBreakAfter If true, the end of this text is a preferred
+ * breakpoint.
+ */
+ HeaderEmitter.prototype.addPhrase = function (text, qchars, mayBreakAfter) {
+ // Collapse all whitespace spans into a single whitespace node.
+ text = text.replace(/[ \t\r\n]+/g, " ");
+
+ // If we have non-ASCII text, encode it using RFC 2047.
+ if (this._useASCII && nonAsciiRe.test(text)) {
+ this.encodeRFC2047Phrase(text, mayBreakAfter);
+ return;
+ }
+
+ // If quoting the entire string at once could fit in the line length, then do
+ // so. The check here is very loose, but this will inform is if we are going
+ // to definitely overrun the soft margin.
+ if (this._currentLine.length + text.length < this._softMargin) {
+ try {
+ this.addQuotable(text, qchars, mayBreakAfter);
+ // If we don't have a breakpoint, and the text is encoded as a sequence of
+ // atoms (and not a quoted-string), then make the last space we added a
+ // breakpoint, regardless of the mayBreakAfter setting.
+ if (this._preferredBreakpoint == 0 && text.includes(" ")) {
+ if (this._currentLine[this._currentLine.length - 1] != '"') {
+ this._preferredBreakpoint = this._currentLine.lastIndexOf(" ");
+ }
+ }
+ return;
+ } catch (e) {
+ // If we get an error at this point, we failed to add the quoted string
+ // because the string was too long. Fall through to the case where we know
+ // that the input was too long to begin with.
+ }
+ }
+
+ // If the text is too long, split the quotable string at space boundaries and
+ // add each word individually. If we still can't add all those words, there is
+ // nothing that we can do.
+ let words = text.split(" ");
+ for (let i = 0; i < words.length; i++) {
+ this.addQuotable(
+ words[i],
+ qchars,
+ i == words.length - 1 ? mayBreakAfter : true
+ );
+ }
+ };
+
+ // A regular expression for characters that need to be encoded.
+ var nonAsciiRe = /[^\x20-\x7e]/;
+
+ // The beginnings of RFC 2047 encoded-word
+ var b64Prelude = "=?UTF-8?B?",
+ qpPrelude = "=?UTF-8?Q?";
+
+ // A list of ASCII characters forbidden in RFC 2047 encoded-words
+ var qpForbidden = "\"#$%&'(),.:;<=>?@[\\]^_`{|}~";
+
+ var hexString = "0123456789ABCDEF";
+
+ /**
+ * Add a block of text as a single RFC 2047 encoded word. This does not try to
+ * split words if they are too long.
+ *
+ * @private
+ * @param {Uint8Array} encodedText - The octets to encode.
+ * @param {boolean} useQP If true, use quoted-printable; if false,
+ * use base64.
+ * @param {boolean} mayBreakAfter If true, the end of this text is a
+ * preferred breakpoint.
+ */
+ HeaderEmitter.prototype._addRFC2047Word = function (
+ encodedText,
+ useQP,
+ mayBreakAfter
+ ) {
+ let binaryString = mimeutils.typedArrayToString(encodedText);
+ let token;
+ if (useQP) {
+ token = qpPrelude;
+ for (let i = 0; i < encodedText.length; i++) {
+ if (
+ encodedText[i] < 0x20 ||
+ encodedText[i] >= 0x7f ||
+ qpForbidden.includes(binaryString[i])
+ ) {
+ let ch = encodedText[i];
+ token += "=" + hexString[(ch & 0xf0) >> 4] + hexString[ch & 0x0f];
+ } else if (binaryString[i] == " ") {
+ token += "_";
+ } else {
+ token += binaryString[i];
+ }
+ }
+ token += "?=";
+ } else {
+ token = b64Prelude + btoa(binaryString) + "?=";
+ }
+ this.addText(token, mayBreakAfter);
+ };
+
+ /**
+ * Add a block of text as potentially several RFC 2047 encoded-word tokens.
+ *
+ * @protected
+ * @param {string} text The text to add to the output.
+ * @param {boolean} mayBreakAfter If true, the end of this text is a preferred
+ * breakpoint.
+ */
+ HeaderEmitter.prototype.encodeRFC2047Phrase = function (
+ text,
+ mayBreakAfter
+ ) {
+ // Start by encoding the text into UTF-8 directly.
+ let encodedText = new TextEncoder("UTF-8").encode(text);
+
+ // Make sure there's enough room for a single token.
+ let minLineLen = b64Prelude.length + 10; // Eight base64 characters plus ?=
+ if (!this._reserveTokenSpace(minLineLen)) {
+ this._commitLine(this._currentLine.length);
+ }
+
+ // Try to encode as much UTF-8 text as possible in each go.
+ let b64Len = 0,
+ qpLen = 0,
+ start = 0;
+ let maxChars =
+ this._softMargin - this._currentLine.length - (b64Prelude.length + 2);
+ for (let i = 0; i < encodedText.length; i++) {
+ let b64Inc = 0,
+ qpInc = 0;
+ // The length we need for base64 is ceil(length / 3) * 4...
+ if ((i - start) % 3 == 0) {
+ b64Inc += 4;
+ }
+
+ // The length for quoted-printable is 3 chars only if encoded
+ if (
+ encodedText[i] < 0x20 ||
+ encodedText[i] >= 0x7f ||
+ qpForbidden.includes(String.fromCharCode(encodedText[i]))
+ ) {
+ qpInc = 3;
+ } else {
+ qpInc = 1;
+ }
+
+ if (b64Len + b64Inc > maxChars && qpLen + qpInc > maxChars) {
+ // Oops, we have too many characters! We need to encode everything through
+ // the current character. However, we can't split in the middle of a
+ // multibyte character. In UTF-8, characters that start with 10xx xxxx are
+ // the middle of multibyte characters, so backtrack until the start
+ // character is legal.
+ while ((encodedText[i] & 0xc0) == 0x80) {
+ --i;
+ }
+
+ // Add this part of the word and then make a continuation.
+ this._addRFC2047Word(
+ encodedText.subarray(start, i),
+ b64Len >= qpLen,
+ true
+ );
+
+ // Reset the array for parsing.
+ start = i;
+ --i; // Reparse this character as well
+ b64Len = qpLen = 0;
+ maxChars = this._softMargin - b64Prelude.length - 3;
+ } else {
+ // Add the counts for the current variable to the count to encode.
+ b64Len += b64Inc;
+ qpLen += qpInc;
+ }
+ }
+
+ // Add the entire array at this point.
+ this._addRFC2047Word(
+ encodedText.subarray(start),
+ b64Len >= qpLen,
+ mayBreakAfter
+ );
+ };
+
+ // High-level methods
+ // ------------------
+
+ /**
+ * Add the header name, with the colon and trailing space, to the output.
+ *
+ * @public
+ * @param {string} name The name of the header.
+ */
+ HeaderEmitter.prototype.addHeaderName = function (name) {
+ this._currentLine = this._currentLine.trimRight();
+ if (this._currentLine.length > 0) {
+ this._commitLine();
+ }
+ this.addText(name + ": ", false);
+ };
+
+ /**
+ * Add a header and its structured value to the output.
+ *
+ * The name can be any case-insensitive variant of a known structured header;
+ * the output will include the preferred name of the structure instead of the
+ * case put into the name. If no structured encoder can be found, and the input
+ * value is a string, then the header is assumed to be unstructured and the
+ * value is added as if {@link addUnstructured} were called.
+ *
+ * @public
+ * @param {string} name - The name of the header.
+ * @param value The structured value of the header.
+ */
+ HeaderEmitter.prototype.addStructuredHeader = function (name, value) {
+ let lowerName = name.toLowerCase();
+ if (encoders.has(lowerName)) {
+ this.addHeaderName(preferredSpellings.get(lowerName));
+ encoders.get(lowerName).call(this, value);
+ } else if (typeof value === "string") {
+ // Assume it's an unstructured header.
+ // All-lower-case-names are ugly, so capitalize first letters.
+ name = name.replace(/(^|-)[a-z]/g, function (match) {
+ return match.toUpperCase();
+ });
+ this.addHeaderName(name);
+ this.addUnstructured(value);
+ } else {
+ throw new Error("Unknown header " + name);
+ }
+ };
+
+ /**
+ * Add a single address to the header. The address is an object consisting of a
+ * possibly-empty display name and an email address.
+ *
+ * @public
+ * @param Address addr The address to be added.
+ * @param {string} addr.name - The (possibly-empty) name of the address to add.
+ * @param {string} addr.email The email of the address to add.
+ * @see headerparser.parseAddressingHeader
+ */
+ HeaderEmitter.prototype.addAddress = function (addr) {
+ // If we have a display name, add that first.
+ if (addr.name) {
+ // This is a simple estimate that keeps names on one line if possible.
+ this._reserveTokenSpace(addr.name.length + addr.email.length + 3);
+ this.addPhrase(addr.name, ',()<>[]:;@."', true);
+
+ // If we don't have an email address, don't write out the angle brackets for
+ // the address. It's already an abnormal situation should this appear, and
+ // this has better round-tripping properties.
+ if (!addr.email) {
+ return;
+ }
+
+ this.addText("<", false);
+ }
+
+ // Find the local-part and domain of the address, since the local-part may
+ // need to be quoted separately. Note that the @ goes to the domain, so that
+ // the local-part may be quoted if it needs to be.
+ let at = addr.email.lastIndexOf("@");
+ let localpart = "",
+ domain = "";
+ if (at == -1) {
+ localpart = addr.email;
+ } else {
+ localpart = addr.email.slice(0, at);
+ domain = addr.email.slice(at);
+ }
+
+ this.addQuotable(localpart, '()<>[]:;@\\," !', false);
+ this.addText(domain + (addr.name ? ">" : ""), false);
+ };
+
+ /**
+ * Add an array of addresses and groups to the output. Such an array may be
+ * found as the output of {@link headerparser.parseAddressingHeader}. Each
+ * element is either an address (an object with properties name and email), or a
+ * group (an object with properties name and group).
+ *
+ * @public
+ * @param {(Address|Group)[]} addrs A collection of addresses to add.
+ * @param {string} addrs[i].name The (possibly-empty) name of the
+ * address or the group to add.
+ * @param {string} [addrs[i].email] The email of the address to add.
+ * @param {Address[]} [addrs[i].group] A list of email addresses in the group.
+ * @see HeaderEmitter.addAddress
+ * @see headerparser.parseAddressingHeader
+ */
+ HeaderEmitter.prototype.addAddresses = function (addresses) {
+ let needsComma = false;
+ for (let addr of addresses) {
+ // Add a comma if this is not the first element.
+ if (needsComma) {
+ this.addText(", ", true);
+ }
+ needsComma = true;
+
+ if ("email" in addr) {
+ this.addAddress(addr);
+ } else {
+ // A group has format name: member, member;
+ // Note that we still add a comma after the group is completed.
+ this.addPhrase(addr.name, ',()<>[]:;@."', false);
+ this.addText(":", true);
+
+ this.addAddresses(addr.group);
+ this.addText(";", true);
+ }
+ }
+ };
+
+ /**
+ * Add an unstructured header value to the output. This effectively means only
+ * inserting line breaks were necessary, and using RFC 2047 encoding where
+ * necessary.
+ *
+ * @public
+ * @param {string} text The text to add to the output.
+ */
+ HeaderEmitter.prototype.addUnstructured = function (text) {
+ if (text.length == 0) {
+ return;
+ }
+
+ // Unstructured text is basically a phrase that can't be quoted. So, if we
+ // have nothing in qchars, nothing should be quoted.
+ this.addPhrase(text, "", false);
+ };
+
+ /** RFC 822 labels for days of the week. */
+ var kDaysOfWeek = ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"];
+
+ /**
+ * Formatting helper to output numbers between 0-9 as 00-09 instead.
+ */
+ function padTo2Digits(num) {
+ return num < 10 ? "0" + num : num.toString();
+ }
+
+ /**
+ * Add a date/time field to the output, using the JS date object as the time
+ * representation. The value will be output using the timezone offset of the
+ * date object, which is usually the timezone of the user (modulo timezone and
+ * DST changes).
+ *
+ * Note that if the date is an invalid date (its internal date parameter is a
+ * NaN value), this method throws an error instead of generating an invalid
+ * string.
+ *
+ * @public
+ * @param {Date} date The date to be added to the output string.
+ */
+ HeaderEmitter.prototype.addDate = function (date) {
+ // Rather than make a header plastered with NaN values, throw an error on
+ // specific invalid dates.
+ if (isNaN(date.getTime())) {
+ throw new Error("Cannot encode an invalid date");
+ }
+
+ let fullYear,
+ month,
+ dayOfMonth,
+ dayOfWeek,
+ hours,
+ minutes,
+ seconds,
+ tzOffset;
+
+ if (this._sanitizeDate) {
+ fullYear = date.getUTCFullYear();
+ month = date.getUTCMonth();
+ dayOfMonth = date.getUTCDate();
+ dayOfWeek = date.getUTCDay();
+ hours = date.getUTCHours();
+ minutes = date.getUTCMinutes();
+ // To reduce the chance of fingerprinting the clock offset,
+ // round the time down to the nearest minute.
+ seconds = 0;
+ tzOffset = 0;
+ } else {
+ fullYear = date.getFullYear();
+ month = date.getMonth();
+ dayOfMonth = date.getDate();
+ dayOfWeek = date.getDay();
+ hours = date.getHours();
+ minutes = date.getMinutes();
+ seconds = date.getSeconds();
+ tzOffset = date.getTimezoneOffset();
+ }
+
+ // RFC 5322 says years can't be before 1900. The after 9999 is a bit that
+ // derives from the specification saying that years have 4 digits.
+ if (fullYear < 1900 || fullYear > 9999) {
+ throw new Error("Date year is out of encodable range");
+ }
+
+ // Start by computing the timezone offset for a day. We lack a good format, so
+ // the the 0-padding is done by hand. Note that the tzoffset we output is in
+ // the form ±hhmm, so we need to separate the offset (in minutes) into an hour
+ // and minute pair.
+ let tzOffHours = Math.abs(Math.trunc(tzOffset / 60));
+ let tzOffMinutes = Math.abs(tzOffset) % 60;
+ let tzOffsetStr =
+ (tzOffset > 0 ? "-" : "+") +
+ padTo2Digits(tzOffHours) +
+ padTo2Digits(tzOffMinutes);
+
+ // Convert the day-time figure into a single value to avoid unwanted line
+ // breaks in the middle.
+ let dayTime = [
+ kDaysOfWeek[dayOfWeek] + ",",
+ dayOfMonth,
+ mimeutils.kMonthNames[month],
+ fullYear,
+ padTo2Digits(hours) +
+ ":" +
+ padTo2Digits(minutes) +
+ ":" +
+ padTo2Digits(seconds),
+ tzOffsetStr,
+ ].join(" ");
+ this.addText(dayTime, false);
+ };
+
+ /**
+ * Signal that the current header has been finished encoding.
+ *
+ * @public
+ * @param {boolean} deliverEOF If true, signal to the handler that no more text
+ * will be arriving.
+ */
+ HeaderEmitter.prototype.finish = function (deliverEOF) {
+ this._commitLine();
+ if (deliverEOF) {
+ this._handler.deliverEOF();
+ }
+ };
+
+ /**
+ * Make a streaming header emitter that outputs on the given handler.
+ *
+ * @param {StreamHandler} handler The handler to consume output
+ * @param options Options to pass into the HeaderEmitter
+ * constructor.
+ * @returns {HeaderEmitter} A header emitter constructed with the given options.
+ */
+ function makeStreamingEmitter(handler, options) {
+ return new HeaderEmitter(handler, options);
+ }
+
+ function StringHandler() {
+ this.value = "";
+ this.deliverData = function (str) {
+ this.value += str;
+ };
+ this.deliverEOF = function () {};
+ }
+
+ /**
+ * Given a header name and its structured value, output a string containing its
+ * MIME-encoded value. The trailing CRLF for the header is included.
+ *
+ * @param {string} name - The name of the structured header.
+ * @param value The value of the structured header.
+ * @param options Options for the HeaderEmitter constructor.
+ * @returns {string} A MIME-encoded representation of the structured header.
+ * @see HeaderEmitter.addStructuredHeader
+ */
+ function emitStructuredHeader(name, value, options) {
+ let handler = new StringHandler();
+ let emitter = new HeaderEmitter(handler, options);
+ emitter.addStructuredHeader(name, value);
+ emitter.finish(true);
+ return handler.value;
+ }
+
+ /**
+ * Given a map of header names and their structured values, output a string
+ * containing all of their headers and their MIME-encoded values.
+ *
+ * This method is designed to be able to emit header values given the headerData
+ * values produced by MIME parsing. Thus, the values of the map are arrays
+ * corresponding to header multiplicity.
+ *
+ * @param {Map(String->Object[])} headerValues A map of header names to arrays
+ * of their structured values.
+ * @param options Options for the HeaderEmitter
+ * constructor.
+ * @returns {string} A MIME-encoded representation of the structured header.
+ * @see HeaderEmitter.addStructuredHeader
+ */
+ function emitStructuredHeaders(headerValues, options) {
+ let handler = new StringHandler();
+ let emitter = new HeaderEmitter(handler, options);
+ for (let instance of headerValues) {
+ instance[1].forEach(function (e) {
+ emitter.addStructuredHeader(instance[0], e);
+ });
+ }
+ emitter.finish(true);
+ return handler.value;
+ }
+
+ /**
+ * Add a custom structured MIME encoder to the set of known encoders. These
+ * encoders are used for {@link emitStructuredHeader} and similar functions to
+ * encode richer, more structured values instead of relying on string
+ * representations everywhere.
+ *
+ * Structured encoders are functions which take in a single parameter
+ * representing their structured value. The this parameter is set to be an
+ * instance of {@link HeaderEmitter}, and it is intended that the several public
+ * or protected methods on that class are useful for encoding values.
+ *
+ * There is a large set of structured encoders built-in to the jsmime library
+ * already.
+ *
+ * @param {string} header The header name (in its preferred case) for
+ * which the encoder will be used.
+ * @param {Function(Value)} encoder The structured encoder function.
+ */
+ function addStructuredEncoder(header, encoder) {
+ let lowerName = header.toLowerCase();
+ encoders.set(lowerName, encoder);
+ if (!preferredSpellings.has(lowerName)) {
+ preferredSpellings.set(lowerName, header);
+ }
+ }
+
+ return Object.freeze({
+ addStructuredEncoder,
+ emitStructuredHeader,
+ emitStructuredHeaders,
+ makeStreamingEmitter,
+ });
+ });
+
+ def("jsmime", function (require) {
+ return {
+ mimeutils: require("./mimeutils"),
+ MimeParser: require("./mimeparser"),
+ headerparser: require("./headerparser"),
+ headeremitter: require("./headeremitter"),
+ };
+ });
+ return mods.jsmime;
+});