summaryrefslogtreecommitdiffstats
path: root/comm/mailnews/compose/src/MimeEncoder.jsm
blob: ab4c60de4227e7f96f5ef37d0af4be8b340f2ef8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

const EXPORTED_SYMBOLS = ["MimeEncoder"];

const LINELENGTH_ENCODING_THRESHOLD = 990;
const MESSAGE_RFC822 = "message/rfc822";

/**
 * A class to pick Content-Transfer-Encoding for a MimePart, and encode MimePart
 * body accordingly.
 */
class MimeEncoder {
  /**
   * Create a MimeEncoder.
   *
   * @param {string} charset
   * @param {string} contentType
   * @param {boolean} forceMsgEncoding
   * @param {boolean} isMainBody
   * @param {string} content
   */
  constructor(charset, contentType, forceMsgEncoding, isMainBody, content) {
    this._charset = charset;
    this._contentType = contentType.toLowerCase();
    this._forceMsgEncoding = forceMsgEncoding;
    this._isMainBody = isMainBody;
    this._body = content;
    this._bodySize = content.length;

    // The encoding value will be used to set Content-Transfer-Encoding header
    // and encode this._body.
    this._encoding = "";

    // Flags used to pick encoding.
    this._highBitCount = 0;
    this._unPrintableCount = 0;
    this._ctrlCount = 0;
    this._nullCount = 0;
    this._hasCr = 0;
    this._hasLf = 0;
    this._hasCrLf = 0;
    this._maxColumn = 0;
  }

  /**
   * @type {string}
   */
  get encoding() {
    return this._encoding;
  }

  /**
   * Use the combination of charset, content type and scanning this._body to
   * decide what encoding it should have.
   */
  pickEncoding() {
    this._analyzeBody();

    let strictlyMime = Services.prefs.getBoolPref("mail.strictly_mime");
    let needsB64 = false;
    let isUsingQP = false;

    // Allow users to override our percentage-wise guess on whether
    // the file is text or binary.
    let forceB64 = Services.prefs.getBoolPref("mail.file_attach_binary");

    // If the content-type is "image/" or something else known to be binary or
    // several flavors of newlines are present, use base64 unless we're attaching
    // a message (so that we don't get confused by newline conversions).
    if (
      !this._isMainBody &&
      (forceB64 ||
        this._requiresB64() ||
        this._hasCr + this._hasLf + this._hasCrLf != 1) &&
      this._contentType != MESSAGE_RFC822
    ) {
      needsB64 = true;
    } else {
      // Otherwise, we need to pick an encoding based on the contents of the
      // document.
      let encodeP = false;

      // Force quoted-printable if the sender does not allow conversion to 7bit.
      if (
        this._forceMsgEncoding ||
        this._maxColumn > LINELENGTH_ENCODING_THRESHOLD ||
        (strictlyMime && this._unPrintableCount) ||
        this._nullCount
      ) {
        if (
          this._isMainBody &&
          this._contentType == "text/plain" &&
          // From rfc3676#section-4.2, Quoted-Printable encoding SHOULD NOT be
          // used with Format=Flowed unless absolutely necessary.
          Services.prefs.getBoolPref("mailnews.send_plaintext_flowed")
        ) {
          needsB64 = true;
        } else {
          encodeP = true;
        }
      }

      // MIME requires a special case that these types never be encoded.
      if (
        this._contentType.startsWith("message") ||
        this._contentType.startsWith("multipart")
      ) {
        encodeP = false;
      }

      let manager = Cc["@mozilla.org/charset-converter-manager;1"].getService(
        Ci.nsICharsetConverterManager
      );
      let isCharsetMultiByte = false;
      try {
        isCharsetMultiByte =
          manager.getCharsetData(this._charset, ".isMultibyte") == "true";
      } catch {}

      // If the Mail charset is multibyte, we force it to use Base64 for
      // attachments.
      if (
        !this._isMainBody &&
        this._charset &&
        isCharsetMultiByte &&
        (this._contentType.startsWith("text") ||
          // text/vcard synonym
          this._contentType == "application/directory")
      ) {
        needsB64 = true;
      } else if (this._charset == "ISO-2022-JP") {
        this._encoding = "7bit";
      } else if (encodeP && this._unPrintableCount > this._bodySize / 10) {
        // If the document contains more than 10% unprintable characters,
        // then that seems like a good candidate for base64 instead of
        // quoted-printable.
        needsB64 = true;
      } else if (encodeP) {
        this._encoding = "quoted-printable";
        isUsingQP = true;
      } else if (this._highBitCount > 0) {
        this._encoding = "8bit";
      } else {
        this._encoding = "7bit";
      }
    }

    // Always base64 binary data.
    if (needsB64) {
      this._encoding = "base64";
    }

    // According to RFC 821 we must always have lines shorter than 998 bytes.
    // To encode "long lines" use a CTE that will transmit shorter lines.
    // Switch to base64 if we are not already using "quoted printable".

    // We don't do this for message/rfc822 attachments, since we can't
    // change the original Content-Transfer-Encoding of the message we're
    // attaching. We rely on the original message complying with RFC 821,
    // if it doesn't we won't either. Not ideal.
    if (
      this._contentType != MESSAGE_RFC822 &&
      this._maxColumn > LINELENGTH_ENCODING_THRESHOLD &&
      !isUsingQP
    ) {
      this._encoding = "base64";
    }
  }

  /**
   * Encode this._body according to the value of this.encoding.
   */
  encode() {
    let output;
    if (this.encoding == "base64") {
      output = this._encodeBase64();
    } else if (this.encoding == "quoted-printable") {
      output = this._encodeQP();
    } else {
      output = this._body.replaceAll("\r\n", "\n").replaceAll("\n", "\r\n");
    }
    if (!output.endsWith("\r\n")) {
      output += "\r\n";
    }
    return output;
  }

  /**
   * Scan this._body to set flags that will be used by pickEncoding.
   */
  _analyzeBody() {
    let currentColumn = 0;
    let prevCharWasCr = false;

    for (let i = 0; i < this._bodySize; i++) {
      let ch = this._body.charAt(i);
      let charCode = this._body.charCodeAt(i);
      if (charCode > 126) {
        this._highBitCount++;
        this._unPrintableCount++;
      } else if (ch < " " && !"\t\r\n".includes(ch)) {
        this._unPrintableCount++;
        this._ctrlCount++;
        if (ch == "\0") {
          this._nullCount++;
        }
      }

      if ("\r\n".includes(ch)) {
        if (ch == "\r") {
          if (prevCharWasCr) {
            this._hasCr = 1;
          } else {
            prevCharWasCr = true;
          }
        } else if (prevCharWasCr) {
          if (currentColumn == 0) {
            this._hasCrLf = 1;
          } else {
            this._hasCr = 1;
            this._hasLf = 1;
          }
          prevCharWasCr = false;
        } else {
          this._hasLf = 1;
        }

        if (this._maxColumn < currentColumn) {
          this._maxColumn = currentColumn;
        }
        currentColumn = 0;
      } else {
        currentColumn++;
      }
    }

    if (this._maxColumn < currentColumn) {
      this._maxColumn = currentColumn;
    }
  }

  /**
   * Determine if base64 is required according to contentType.
   */
  _requiresB64() {
    if (this._contentType == "application/x-unknown-content-type") {
      // Unknown types don't necessarily require encoding.  (Note that
      // "unknown" and "application/octet-stream" aren't the same.)
      return false;
    }
    if (
      this._contentType.startsWith("image/") ||
      this._contentType.startsWith("audio/") ||
      this._contentType.startsWith("video/") ||
      this._contentType.startsWith("application/")
    ) {
      // The following types are application/ or image/ types that are actually
      // known to contain textual data (meaning line-based, not binary, where
      // CRLF conversion is desired rather than disastrous.)  So, if the type
      // is any of these, it does not *require* base64, and if we do need to
      // encode it for other reasons, we'll probably use quoted-printable.
      // But, if it's not one of these types, then we assume that any subtypes
      // of the non-"text/" types are binary data, where CRLF conversion would
      // corrupt it, so we use base64 right off the bat.
      // The reason it's desirable to ship these as text instead of just using
      // base64 all the time is mainly to preserve the readability of them for
      // non-MIME users: if I mail a /bin/sh script to someone, it might not
      // need to be encoded at all, so we should leave it readable if we can.
      // This list of types was derived from the comp.mail.mime FAQ, section
      // 10.2.2, "List of known unregistered MIME types" on 2-Feb-96.
      const typesWhichAreReallyText = [
        "application/mac-binhex40", // APPLICATION_BINHEX
        "application/pgp", // APPLICATION_PGP
        "application/pgp-keys",
        "application/x-pgp-message", // APPLICATION_PGP2
        "application/postscript", // APPLICATION_POSTSCRIPT
        "application/x-uuencode", // APPLICATION_UUENCODE
        "application/x-uue", // APPLICATION_UUENCODE2
        "application/uue", // APPLICATION_UUENCODE4
        "application/uuencode", // APPLICATION_UUENCODE3
        "application/sgml",
        "application/x-csh",
        "application/javascript",
        "application/ecmascript",
        "application/x-javascript",
        "application/x-latex",
        "application/x-macbinhex40",
        "application/x-ns-proxy-autoconfig",
        "application/x-www-form-urlencoded",
        "application/x-perl",
        "application/x-sh",
        "application/x-shar",
        "application/x-tcl",
        "application/x-tex",
        "application/x-texinfo",
        "application/x-troff",
        "application/x-troff-man",
        "application/x-troff-me",
        "application/x-troff-ms",
        "application/x-troff-ms",
        "application/x-wais-source",
        "image/x-bitmap",
        "image/x-pbm",
        "image/x-pgm",
        "image/x-portable-anymap",
        "image/x-portable-bitmap",
        "image/x-portable-graymap",
        "image/x-portable-pixmap", // IMAGE_PPM
        "image/x-ppm",
        "image/x-xbitmap", // IMAGE_XBM
        "image/x-xbm", // IMAGE_XBM2
        "image/xbm", // IMAGE_XBM3
        "image/x-xpixmap",
        "image/x-xpm",
      ];
      if (typesWhichAreReallyText.includes(this._contentType)) {
        return false;
      }
      return true;
    }
    return false;
  }

  /**
   * Base64 encoding. See RFC 2045 6.8. We use the built-in `btoa`, then ensure
   * line width is no more than 72.
   */
  _encodeBase64() {
    let encoded = btoa(this._body);
    let ret = "";
    let length = encoded.length;
    let i = 0;
    let limit = 72;
    while (true) {
      if (i * limit > length) {
        break;
      }
      ret += encoded.substr(i * limit, limit) + "\r\n";
      i++;
    }
    return ret;
  }

  /**
   * Quoted-printable encoding. See RFC 2045 6.7.
   */
  _encodeQP() {
    let currentColumn = 0;
    let hexdigits = "0123456789ABCDEF";
    let white = false;
    let out = "";

    function encodeChar(ch) {
      let charCode = ch.charCodeAt(0);
      let ret = "=";
      ret += hexdigits[charCode >> 4];
      ret += hexdigits[charCode & 0xf];
      return ret;
    }

    for (let i = 0; i < this._bodySize; i++) {
      let ch = this._body.charAt(i);
      let charCode = this._body.charCodeAt(i);
      if (ch == "\r" || ch == "\n") {
        // If it's CRLF, swallow two chars instead of one.
        if (i + 1 < this._bodySize && ch == "\r" && this._body[i + 1] == "\n") {
          i++;
        }

        // Whitespace cannot be allowed to occur at the end of the line, so we
        // back up and replace the whitespace with its code.
        if (white) {
          let whiteChar = out.slice(-1);
          out = out.slice(0, -1);
          out += encodeChar(whiteChar);
        }

        // Now write out the newline.
        out += "\r";
        out += "\n";
        white = false;
        currentColumn = 0;
      } else if (
        currentColumn == 0 &&
        (ch == "." ||
          (ch == "F" &&
            (i >= this._bodySize - 1 || this._body[i + 1] == "r") &&
            (i >= this._bodySize - 2 || this._body[i + 2] == "o") &&
            (i >= this._bodySize - 3 || this._body[i + 3] == "m") &&
            (i >= this._bodySize - 4 || this._body[i + 4] == " ")))
      ) {
        // Just to be SMTP-safe, if "." appears in column 0, encode it.
        // If this line begins with "From " (or it could but we don't have enough
        // data in the buffer to be certain), encode the 'F' in hex to avoid
        // potential problems with BSD mailbox formats.
        white = false;
        out += encodeChar(ch);
        currentColumn += 3;
      } else if (
        (charCode >= 33 && charCode <= 60) ||
        (charCode >= 62 && charCode <= 126)
      ) {
        // Printable characters except for '='
        white = false;
        out += ch;
        currentColumn++;
      } else if (ch == " " || ch == "\t") {
        // Whitespace
        white = true;
        out += ch;
        currentColumn++;
      } else {
        white = false;
        out += encodeChar(ch);
        currentColumn += 3;
      }

      if (currentColumn >= 73) {
        // Soft line break for readability
        out += "=\r\n";
        white = false;
        currentColumn = 0;
      }
    }

    return out;
  }
}