summaryrefslogtreecommitdiffstats
path: root/comm/mailnews/mime/src/mimeParser.jsm
blob: 95256ba41ce15be307d5456106e36d61450e2f8e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
 * You can obtain one at http://mozilla.org/MPL/2.0/. */
// vim:set ts=2 sw=2 sts=2 et ft=javascript:

var EXPORTED_SYMBOLS = ["MimeParser"];

var { jsmime } = ChromeUtils.import("resource:///modules/jsmime.jsm");
var { MailStringUtils } = ChromeUtils.import(
  "resource:///modules/MailStringUtils.jsm"
);

// Emitter helpers, for internal functions later on.
var ExtractMimeMsgEmitter = {
  getAttachmentName(part) {
    if (!part || !part.hasOwnProperty("headers")) {
      return "";
    }

    if (part.headers.hasOwnProperty("content-disposition")) {
      let filename = MimeParser.getParameter(
        part.headers["content-disposition"][0],
        "filename"
      );
      if (filename) {
        return filename;
      }
    }

    if (part.headers.hasOwnProperty("content-type")) {
      let name = MimeParser.getParameter(
        part.headers["content-type"][0],
        "name"
      );
      if (name) {
        return name;
      }
    }

    return "";
  },

  // All parts of content-disposition = "attachment" are returned as attachments.
  // For content-disposition = "inline", all parts except those with content-type
  // text/plain, text/html and text/enriched are returned as attachments.
  isAttachment(part) {
    if (!part) {
      return false;
    }

    let contentType = part.contentType || "text/plain";
    if (contentType.search(/^multipart\//i) === 0) {
      return false;
    }

    let contentDisposition = "";
    if (
      Array.isArray(part.headers["content-disposition"]) &&
      part.headers["content-disposition"].length > 0
    ) {
      contentDisposition = part.headers["content-disposition"][0];
    }

    if (
      contentDisposition.search(/^attachment/i) === 0 ||
      contentType.search(/^text\/plain|^text\/html|^text\/enriched/i) === -1
    ) {
      return true;
    }

    return false;
  },

  /** JSMime API */
  startMessage() {
    this.mimeTree = {
      partName: "",
      contentType: "message/rfc822",
      parts: [],
      size: 0,
      headers: {},
      attachments: [],
      // No support for encryption.
      isEncrypted: false,
    };
    // partsPath is a hierarchical stack of parts from the root to the
    // current part.
    this.partsPath = [this.mimeTree];
    this.options = this.options || {};
  },

  endMessage() {
    // Prepare the mimeMsg object, which is the final output of the emitter.
    this.mimeMsg = null;
    if (this.mimeTree.parts.length == 0) {
      return;
    }

    // Check if only a specific mime part has been requested.
    if (this.options.getMimePart) {
      if (this.mimeTree.parts[0].partName == this.options.getMimePart) {
        this.mimeMsg = this.mimeTree.parts[0];
      }
      return;
    }

    this.mimeTree.attachments.sort((a, b) => a.partName > b.partName);
    this.mimeMsg = this.mimeTree;
  },

  startPart(partNum, headerMap) {
    let contentType = headerMap.contentType?.type
      ? headerMap.contentType.type
      : "text/plain";

    let headers = {};
    for (let [headerName, headerValue] of headerMap._rawHeaders) {
      // MsgHdrToMimeMessage always returns an array, even for single values.
      let valueArray = Array.isArray(headerValue) ? headerValue : [headerValue];
      // Return a binary string, to mimic MsgHdrToMimeMessage.
      headers[headerName] = valueArray.map(value => {
        return MailStringUtils.stringToByteString(value);
      });
    }

    // Get the most recent part from the hierarchical parts stack, which is the
    // parent of the new part to by added.
    let parentPart = this.partsPath[this.partsPath.length - 1];

    // Add a leading 1 to the partNum and convert the "$" sub-message deliminator.
    let partName = "1" + (partNum ? "." : "") + partNum.replaceAll("$", ".1");

    // MsgHdrToMimeMessage differentiates between the message headers and the
    // headers of the first part. jsmime.js however returns all headers of
    // the message in the first multipart/* part: Merge all headers into the
    // parent part and only keep content-* headers.
    if (parentPart.contentType.startsWith("message/")) {
      for (let [k, v] of Object.entries(headers)) {
        if (!parentPart.headers[k]) {
          parentPart.headers[k] = v;
        }
      }
      headers = Object.fromEntries(
        Object.entries(headers).filter(h => h[0].startsWith("content-"))
      );
    }

    // Add default content-type header.
    if (!headers.hasOwnProperty("content-type")) {
      headers["content-type"] = ["text/plain"];
    }

    let newPart = {
      partName,
      body: "",
      headers,
      contentType,
      size: 0,
      parts: [],
      // No support for encryption.
      isEncrypted: false,
    };

    // Add nested new part.
    parentPart.parts.push(newPart);
    // Push the newly added part into the hierarchical parts stack.
    this.partsPath.push(newPart);
  },

  endPart(partNum) {
    let deleteBody = false;
    // Get the most recent part from the hierarchical parts stack.
    let currentPart = this.partsPath[this.partsPath.length - 1];

    // Add size.
    let size = currentPart.body.length;
    currentPart.size += size;
    let partSize = currentPart.size;

    if (this.isAttachment(currentPart)) {
      currentPart.name = this.getAttachmentName(currentPart);
      this.mimeTree.attachments.push({ ...currentPart });
      deleteBody = !this.options.getMimePart;
    }

    if (deleteBody || currentPart.body == "") {
      delete currentPart.body;
    }

    // Remove content-disposition and content-transfer-encoding headers.
    currentPart.headers = Object.fromEntries(
      Object.entries(currentPart.headers).filter(
        h =>
          !["content-disposition", "content-transfer-encoding"].includes(h[0])
      )
    );

    // Set the parent of this part to be the new current part.
    this.partsPath.pop();

    // Add the size of this part to its parent as well.
    currentPart = this.partsPath[this.partsPath.length - 1];
    currentPart.size += partSize;
  },

  /**
   * The data parameter is either a string or a Uint8Array.
   */
  deliverPartData(partNum, data) {
    // Get the most recent part from the hierarchical parts stack.
    let currentPart = this.partsPath[this.partsPath.length - 1];

    if (typeof data === "string") {
      currentPart.body += data;
    } else {
      currentPart.body += MailStringUtils.uint8ArrayToByteString(data);
    }
  },
};

var ExtractHeadersEmitter = {
  startPart(partNum, headers) {
    if (partNum == "") {
      this.headers = headers;
    }
  },
};

var ExtractHeadersAndBodyEmitter = {
  body: "",
  startPart: ExtractHeadersEmitter.startPart,
  deliverPartData(partNum, data) {
    if (partNum == "") {
      this.body += data;
    }
  },
};

// Sets appropriate default options for chrome-privileged environments
function setDefaultParserOptions(opts) {
  if (!("onerror" in opts)) {
    opts.onerror = Cu.reportError;
  }
}

var MimeParser = {
  /***
   * Determine an arbitrary "parameter" part of a mail header.
   *
   * @param {string} headerStr - The string containing all parts of the header.
   * @param {string} parameter - The parameter we are looking for.
   *
   *
   * 'multipart/signed; protocol="xyz"', 'protocol' --> returns "xyz"
   *
   * @return {string} String containing the value of the parameter; or "".
   */

  getParameter(headerStr, parameter) {
    parameter = parameter.toLowerCase();
    headerStr = headerStr.replace(/[\r\n]+[ \t]+/g, "");

    let hdrMap = jsmime.headerparser.parseParameterHeader(
      ";" + headerStr,
      true,
      true
    );

    for (let [key, value] of hdrMap.entries()) {
      if (parameter == key.toLowerCase()) {
        return value;
      }
    }

    return "";
  },

  /**
   * Triggers an asynchronous parse of the given input.
   *
   * The input is an input stream; the stream will be read until EOF and then
   * closed upon completion. Both blocking and nonblocking streams are
   * supported by this implementation, but it is still guaranteed that the first
   * callback will not happen before this method returns.
   *
   * @param input   An input stream of text to parse.
   * @param emitter The emitter to receive callbacks on.
   * @param opts    A set of options for the parser.
   */
  parseAsync(input, emitter, opts) {
    // Normalize the input into an input stream.
    if (!(input instanceof Ci.nsIInputStream)) {
      throw new Error("input is not a recognizable type!");
    }

    // We need a pump for the listener
    var pump = Cc["@mozilla.org/network/input-stream-pump;1"].createInstance(
      Ci.nsIInputStreamPump
    );
    pump.init(input, 0, 0, true);

    // Make a stream listener with the given emitter and use it to read from
    // the pump.
    var parserListener = MimeParser.makeStreamListenerParser(emitter, opts);
    pump.asyncRead(parserListener);
  },

  /**
   * Triggers an synchronous parse of the given input.
   *
   * The input is a string that is immediately parsed, calling all functions on
   * the emitter before this function returns.
   *
   * @param input   A string or input stream of text to parse.
   * @param emitter The emitter to receive callbacks on.
   * @param opts    A set of options for the parser.
   */
  parseSync(input, emitter, opts) {
    // We only support string parsing if we are trying to do this parse
    // synchronously.
    if (typeof input != "string") {
      throw new Error("input is not a recognizable type!");
    }
    setDefaultParserOptions(opts);
    var parser = new jsmime.MimeParser(emitter, opts);
    parser.deliverData(input);
    parser.deliverEOF();
  },

  /**
   * Returns a stream listener that feeds data into a parser.
   *
   * In addition to the functions on the emitter that the parser may use, the
   * generated stream listener will also make calls to onStartRequest and
   * onStopRequest on the emitter (if they exist).
   *
   * @param emitter The emitter to receive callbacks on.
   * @param opts    A set of options for the parser.
   */
  makeStreamListenerParser(emitter, opts) {
    var StreamListener = {
      onStartRequest(aRequest) {
        try {
          if ("onStartRequest" in emitter) {
            emitter.onStartRequest(aRequest);
          }
        } finally {
          this._parser.resetParser();
        }
      },
      onStopRequest(aRequest, aStatus) {
        this._parser.deliverEOF();
        if ("onStopRequest" in emitter) {
          emitter.onStopRequest(aRequest, aStatus);
        }
      },
      onDataAvailable(aRequest, aStream, aOffset, aCount) {
        var scriptIn = Cc[
          "@mozilla.org/scriptableinputstream;1"
        ].createInstance(Ci.nsIScriptableInputStream);
        scriptIn.init(aStream);
        // Use readBytes instead of read to handle embedded NULs properly.
        this._parser.deliverData(scriptIn.readBytes(aCount));
      },
      QueryInterface: ChromeUtils.generateQI([
        "nsIStreamListener",
        "nsIRequestObserver",
      ]),
    };
    setDefaultParserOptions(opts);
    StreamListener._parser = new jsmime.MimeParser(emitter, opts);
    return StreamListener;
  },

  /**
   * Returns a new raw MIME parser.
   *
   * Prefer one of the other methods where possible, since the input here must
   * be driven manually.
   *
   * @param emitter The emitter to receive callbacks on.
   * @param opts    A set of options for the parser.
   */
  makeParser(emitter, opts) {
    setDefaultParserOptions(opts);
    return new jsmime.MimeParser(emitter, opts);
  },

  /**
   * Returns a mimeMsg object for the given input. The returned object tries to
   * be compatible with the return value of MsgHdrToMimeMessage. Differences:
   *  - no support for encryption
   *  - returned attachments include the body and not the URL
   *  - returned attachments match either allInlineAttachments or
   *    allUserAttachments (decodeSubMessages = false)
   *  - does not eat TABs in headers, if they follow a CRLF
   *
   * The input is any type of input that would be accepted by parseSync.
   *
   * @param input   A string of text to parse.
   */
  extractMimeMsg(input, options) {
    var emitter = Object.create(ExtractMimeMsgEmitter);
    // Set default options.
    emitter.options = {
      getMimePart: "",
      decodeSubMessages: true,
    };
    // Override default options.
    for (let option of Object.keys(options)) {
      emitter.options[option] = options[option];
    }

    MimeParser.parseSync(input, emitter, {
      // jsmime does not use the "1." prefix for the partName.
      // jsmime uses "$." as sub-message deliminator.
      pruneat: emitter.options.getMimePart
        .split(".")
        .slice(1)
        .join(".")
        .replaceAll(".1.", "$."),
      decodeSubMessages: emitter.options.decodeSubMessages,
      bodyformat: "decode",
      stripcontinuations: true,
      strformat: "unicode",
    });
    return emitter.mimeMsg;
  },

  /**
   * Returns a dictionary of headers for the given input.
   *
   * The input is any type of input that would be accepted by parseSync. What
   * is returned is a JS object that represents the headers of the entire
   * envelope as would be received by startPart when partNum is the empty
   * string.
   *
   * @param input   A string of text to parse.
   */
  extractHeaders(input) {
    var emitter = Object.create(ExtractHeadersEmitter);
    MimeParser.parseSync(input, emitter, { pruneat: "", bodyformat: "none" });
    return emitter.headers;
  },

  /**
   * Returns the headers and body for the given input message.
   *
   * The return value is an array whose first element is the dictionary of
   * headers (as would be returned by extractHeaders) and whose second element
   * is a binary string of the entire body of the message.
   *
   * @param input   A string of text to parse.
   */
  extractHeadersAndBody(input) {
    var emitter = Object.create(ExtractHeadersAndBodyEmitter);
    MimeParser.parseSync(input, emitter, { pruneat: "", bodyformat: "raw" });
    return [emitter.headers, emitter.body];
  },

  // Parameters for parseHeaderField

  /**
   * Parse the header as if it were unstructured.
   *
   * This results in the same string if no other options are specified. If other
   * options are specified, this causes the string to be modified appropriately.
   */
  HEADER_UNSTRUCTURED: 0x00,
  /**
   * Parse the header as if it were in the form text; attr=val; attr=val.
   *
   * Such headers include Content-Type, Content-Disposition, and most other
   * headers used by MIME as opposed to messages.
   */
  HEADER_PARAMETER: 0x02,
  /**
   * Parse the header as if it were a sequence of mailboxes.
   */
  HEADER_ADDRESS: 0x03,

  /**
   * This decodes parameter values according to RFC 2231.
   *
   * This flag means nothing if HEADER_PARAMETER is not specified.
   */
  HEADER_OPTION_DECODE_2231: 0x10,
  /**
   * This decodes the inline encoded-words that are in RFC 2047.
   */
  HEADER_OPTION_DECODE_2047: 0x20,
  /**
   * This converts the header from a raw string to proper Unicode.
   */
  HEADER_OPTION_ALLOW_RAW: 0x40,

  // Convenience for all three of the above.
  HEADER_OPTION_ALL_I18N: 0x70,

  /**
   * Parse a header field according to the specification given by flags.
   *
   * Permissible flags begin with one of the HEADER_* flags, which may be or'd
   * with any of the HEADER_OPTION_* flags to modify the result appropriately.
   *
   * If the option HEADER_OPTION_ALLOW_RAW is passed, the charset parameter, if
   * present, is the charset to fallback to if the header is not decodable as
   * UTF-8 text. If HEADER_OPTION_ALLOW_RAW is passed but the charset parameter
   * is not provided, then no fallback decoding will be done. If
   * HEADER_OPTION_ALLOW_RAW is not passed, then no attempt will be made to
   * convert charsets.
   *
   * @param text    The value of a MIME or message header to parse.
   * @param flags   A set of flags that controls interpretation of the header.
   * @param charset A default charset to assume if no information may be found.
   */
  parseHeaderField(text, flags, charset) {
    // If we have a raw string, convert it to Unicode first
    if (flags & MimeParser.HEADER_OPTION_ALLOW_RAW) {
      text = jsmime.headerparser.convert8BitHeader(text, charset);
    }

    // The low 4 bits indicate the type of the header we are parsing. All of the
    // higher-order bits are flags.
    switch (flags & 0x0f) {
      case MimeParser.HEADER_UNSTRUCTURED:
        if (flags & MimeParser.HEADER_OPTION_DECODE_2047) {
          text = jsmime.headerparser.decodeRFC2047Words(text);
        }
        return text;
      case MimeParser.HEADER_PARAMETER:
        return jsmime.headerparser.parseParameterHeader(
          text,
          (flags & MimeParser.HEADER_OPTION_DECODE_2047) != 0,
          (flags & MimeParser.HEADER_OPTION_DECODE_2231) != 0
        );
      case MimeParser.HEADER_ADDRESS:
        return jsmime.headerparser.parseAddressingHeader(
          text,
          (flags & MimeParser.HEADER_OPTION_DECODE_2047) != 0
        );
      default:
        throw new Error("Illegal type of header field");
    }
  },
};