diff options
Diffstat (limited to 'tools/mime-parser.c')
-rw-r--r-- | tools/mime-parser.c | 833 |
1 files changed, 833 insertions, 0 deletions
diff --git a/tools/mime-parser.c b/tools/mime-parser.c new file mode 100644 index 0000000..a151dc6 --- /dev/null +++ b/tools/mime-parser.c @@ -0,0 +1,833 @@ +/* mime-parser.c - Parse MIME structures (high level rfc822 parser). + * Copyright (C) 2016 g10 Code GmbH + * Copyright (C) 2016 Bundesamt für Sicherheit in der Informationstechnik + * + * This file is part of GnuPG. + * + * This file is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * This file is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, see <https://www.gnu.org/licenses/>. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "../common/util.h" +#include "rfc822parse.h" +#include "mime-parser.h" + + +enum pgpmime_states + { + PGPMIME_NONE = 0, + PGPMIME_WAIT_ENCVERSION, + PGPMIME_IN_ENCVERSION, + PGPMIME_WAIT_ENCDATA, + PGPMIME_IN_ENCDATA, + PGPMIME_GOT_ENCDATA, + PGPMIME_WAIT_SIGNEDDATA, + PGPMIME_IN_SIGNEDDATA, + PGPMIME_WAIT_SIGNATURE, + PGPMIME_IN_SIGNATURE, + PGPMIME_GOT_SIGNATURE, + PGPMIME_INVALID + }; + + +/* Definition of the mime parser object. */ +struct mime_parser_context_s +{ + void *cookie; /* Cookie passed to all callbacks. */ + + /* The callback to announce the transation from header to body. */ + gpg_error_t (*t2body) (void *cookie, int level); + + /* The callback to announce a new part. */ + gpg_error_t (*new_part) (void *cookie, + const char *mediatype, + const char *mediasubtype); + /* The callback to return data of a part. */ + gpg_error_t (*part_data) (void *cookie, + const void *data, + size_t datalen); + /* The callback to collect encrypted data. */ + gpg_error_t (*collect_encrypted) (void *cookie, const char *data); + /* The callback to collect signed data. */ + gpg_error_t (*collect_signeddata) (void *cookie, const char *data); + /* The callback to collect a signature. */ + gpg_error_t (*collect_signature) (void *cookie, const char *data); + + /* The RFC822 parser context is stored here during callbacks. */ + rfc822parse_t msg; + + /* Helper to convey error codes from user callbacks. */ + gpg_error_t err; + + int nesting_level; /* The current nesting level. */ + int hashing_at_level; /* The nesting level at which we are hashing. */ + enum pgpmime_states pgpmime; /* Current PGP/MIME state. */ + unsigned int delay_hashing:1;/* Helper for PGPMIME_IN_SIGNEDDATA. */ + unsigned int want_part:1; /* Return the current part. */ + unsigned int decode_part:2; /* Decode the part. 1 = QP, 2 = Base64. */ + + unsigned int verbose:1; /* Enable verbose mode. */ + unsigned int debug:1; /* Enable debug mode. */ + + /* Flags to help with debug output. */ + struct { + unsigned int n_skip; /* Skip showing these number of lines. */ + unsigned int header:1; /* Show the header lines. */ + unsigned int data:1; /* Show the data lines. */ + unsigned int as_note:1; /* Show the next data line as a note. */ + unsigned int boundary : 1; + } show; + + struct b64state *b64state; /* NULL or malloced Base64 decoder state. */ + + /* A buffer for reading a mail line, */ + char line[5000]; +}; + + +/* Print the event received by the parser for debugging. */ +static void +show_message_parser_event (rfc822parse_event_t event) +{ + const char *s; + + switch (event) + { + case RFC822PARSE_OPEN: s= "Open"; break; + case RFC822PARSE_CLOSE: s= "Close"; break; + case RFC822PARSE_CANCEL: s= "Cancel"; break; + case RFC822PARSE_T2BODY: s= "T2Body"; break; + case RFC822PARSE_FINISH: s= "Finish"; break; + case RFC822PARSE_RCVD_SEEN: s= "Rcvd_Seen"; break; + case RFC822PARSE_LEVEL_DOWN: s= "Level_Down"; break; + case RFC822PARSE_LEVEL_UP: s= "Level_Up"; break; + case RFC822PARSE_BOUNDARY: s= "Boundary"; break; + case RFC822PARSE_LAST_BOUNDARY: s= "Last_Boundary"; break; + case RFC822PARSE_BEGIN_HEADER: s= "Begin_Header"; break; + case RFC822PARSE_PREAMBLE: s= "Preamble"; break; + case RFC822PARSE_EPILOGUE: s= "Epilogue"; break; + default: s= "[unknown event]"; break; + } + log_debug ("*** RFC822 event %s\n", s); +} + + +/* Do in-place decoding of quoted-printable data of LENGTH in BUFFER. + Returns the new length of the buffer and stores true at R_SLBRK if + the line ended with a soft line break; false is stored if not. + This function asssumes that a complete line is passed in + buffer. */ +static size_t +qp_decode (char *buffer, size_t length, int *r_slbrk) +{ + char *d, *s; + + if (r_slbrk) + *r_slbrk = 0; + + /* Fixme: We should remove trailing white space first. */ + for (s=d=buffer; length; length--) + { + if (*s == '=') + { + if (length > 2 && hexdigitp (s+1) && hexdigitp (s+2)) + { + s++; + *(unsigned char*)d++ = xtoi_2 (s); + s += 2; + length -= 2; + } + else if (length > 2 && s[1] == '\r' && s[2] == '\n') + { + /* Soft line break. */ + s += 3; + length -= 2; + if (r_slbrk && length == 1) + *r_slbrk = 1; + } + else if (length > 1 && s[1] == '\n') + { + /* Soft line break with only a Unix line terminator. */ + s += 2; + length -= 1; + if (r_slbrk && length == 1) + *r_slbrk = 1; + } + else if (length == 1) + { + /* Soft line break at the end of the line. */ + s += 1; + if (r_slbrk) + *r_slbrk = 1; + } + else + *d++ = *s++; + } + else + *d++ = *s++; + } + + return d - buffer; +} + + +/* This function is called by parse_mail to communicate events. This + * callback communicates with the caller using a structure passed in + * OPAQUE. Should return 0 on success or set ERRNO and return -1. */ +static int +parse_message_cb (void *opaque, rfc822parse_event_t event, rfc822parse_t msg) +{ + mime_parser_t ctx = opaque; + const char *s; + int rc = 0; + + /* Make the RFC822 parser context availabale for callbacks. */ + ctx->msg = msg; + + if (ctx->debug) + show_message_parser_event (event); + + if (event == RFC822PARSE_BEGIN_HEADER || event == RFC822PARSE_T2BODY) + { + /* We need to check here whether to start collecting signed data + * because attachments might come without header lines and thus + * we won't see the BEGIN_HEADER event. */ + if (ctx->pgpmime == PGPMIME_WAIT_SIGNEDDATA) + { + if (ctx->debug) + log_debug ("begin_hash\n"); + ctx->hashing_at_level = ctx->nesting_level; + ctx->pgpmime = PGPMIME_IN_SIGNEDDATA; + ctx->delay_hashing = 0; + } + } + + if (event == RFC822PARSE_OPEN) + { + /* Initialize for a new message. */ + ctx->show.header = 1; + } + else if (event == RFC822PARSE_T2BODY) + { + rfc822parse_field_t field; + + ctx->want_part = 0; + ctx->decode_part = 0; + + if (ctx->t2body) + { + rc = ctx->t2body (ctx->cookie, ctx->nesting_level); + if (rc) + goto t2body_leave; + } + + field = rfc822parse_parse_field (msg, "Content-Type", -1); + if (field) + { + const char *s1, *s2; + + s1 = rfc822parse_query_media_type (field, &s2); + if (s1) + { + if (ctx->verbose) + log_debug ("h media: %*s%s %s\n", + ctx->nesting_level*2, "", s1, s2); + if (ctx->pgpmime == PGPMIME_WAIT_ENCVERSION) + { + if (!strcmp (s1, "application") + && !strcmp (s2, "pgp-encrypted")) + { + if (ctx->debug) + log_debug ("c begin_encversion\n"); + ctx->pgpmime = PGPMIME_IN_ENCVERSION; + } + else + { + log_error ("invalid PGP/MIME structure;" + " expected '%s', got '%s/%s'\n", + "application/pgp-encrypted", s1, s2); + ctx->pgpmime = PGPMIME_INVALID; + } + } + else if (ctx->pgpmime == PGPMIME_WAIT_ENCDATA) + { + if (!strcmp (s1, "application") + && !strcmp (s2, "octet-stream")) + { + if (ctx->debug) + log_debug ("c begin_encdata\n"); + ctx->pgpmime = PGPMIME_IN_ENCDATA; + } + else + { + log_error ("invalid PGP/MIME structure;" + " expected '%s', got '%s/%s'\n", + "application/octet-stream", s1, s2); + ctx->pgpmime = PGPMIME_INVALID; + } + } + else if (ctx->pgpmime == PGPMIME_WAIT_SIGNATURE) + { + if (!strcmp (s1, "application") + && !strcmp (s2, "pgp-signature")) + { + if (ctx->debug) + log_debug ("c begin_signature\n"); + ctx->pgpmime = PGPMIME_IN_SIGNATURE; + } + else + { + log_error ("invalid PGP/MIME structure;" + " expected '%s', got '%s/%s'\n", + "application/pgp-signature", s1, s2); + ctx->pgpmime = PGPMIME_INVALID; + } + } + else if (!strcmp (s1, "multipart") + && !strcmp (s2, "encrypted")) + { + s = rfc822parse_query_parameter (field, "protocol", 0); + if (s) + { + if (ctx->debug) + log_debug ("h encrypted.protocol: %s\n", s); + if (!strcmp (s, "application/pgp-encrypted")) + { + if (ctx->pgpmime) + log_error ("note: " + "ignoring nested PGP/MIME signature\n"); + else + ctx->pgpmime = PGPMIME_WAIT_ENCVERSION; + } + else if (ctx->verbose) + log_debug ("# this protocol is not supported\n"); + } + } + else if (!strcmp (s1, "multipart") + && !strcmp (s2, "signed")) + { + s = rfc822parse_query_parameter (field, "protocol", 1); + if (s) + { + if (ctx->debug) + log_debug ("h signed.protocol: %s\n", s); + if (!strcmp (s, "application/pgp-signature")) + { + if (ctx->pgpmime) + log_error ("note: " + "ignoring nested PGP/MIME signature\n"); + else + ctx->pgpmime = PGPMIME_WAIT_SIGNEDDATA; + } + else if (ctx->verbose) + log_debug ("# this protocol is not supported\n"); + } + } + else if (ctx->new_part) + { + ctx->err = ctx->new_part (ctx->cookie, s1, s2); + if (!ctx->err) + ctx->want_part = 1; + else if (gpg_err_code (ctx->err) == GPG_ERR_FALSE) + ctx->err = 0; + else if (gpg_err_code (ctx->err) == GPG_ERR_TRUE) + { + ctx->want_part = ctx->decode_part = 1; + ctx->err = 0; + } + } + } + else + { + if (ctx->debug) + log_debug ("h media: %*s none\n", ctx->nesting_level*2, ""); + if (ctx->new_part) + { + ctx->err = ctx->new_part (ctx->cookie, "", ""); + if (!ctx->err) + ctx->want_part = 1; + else if (gpg_err_code (ctx->err) == GPG_ERR_FALSE) + ctx->err = 0; + else if (gpg_err_code (ctx->err) == GPG_ERR_TRUE) + { + ctx->want_part = ctx->decode_part = 1; + ctx->err = 0; + } + } + } + + rfc822parse_release_field (field); + } + else + { + if (ctx->verbose) + log_debug ("h media: %*stext plain [assumed]\n", + ctx->nesting_level*2, ""); + if (ctx->new_part) + { + ctx->err = ctx->new_part (ctx->cookie, "text", "plain"); + if (!ctx->err) + ctx->want_part = 1; + else if (gpg_err_code (ctx->err) == GPG_ERR_FALSE) + ctx->err = 0; + else if (gpg_err_code (ctx->err) == GPG_ERR_TRUE) + { + ctx->want_part = ctx->decode_part = 1; + ctx->err = 0; + } + } + } + + /* Figure out the encoding if needed. */ + if (ctx->decode_part) + { + char *value; + size_t valueoff; + + ctx->decode_part = 0; /* Fallback for unknown encoding. */ + value = rfc822parse_get_field (msg, "Content-Transfer-Encoding", -1, + &valueoff); + if (value) + { + if (!stricmp (value+valueoff, "quoted-printable")) + ctx->decode_part = 1; + else if (!stricmp (value+valueoff, "base64")) + { + ctx->decode_part = 2; + if (ctx->b64state) + b64dec_finish (ctx->b64state); /* Reuse state. */ + else + { + ctx->b64state = xtrymalloc (sizeof *ctx->b64state); + if (!ctx->b64state) + rc = gpg_error_from_syserror (); + } + if (!rc) + rc = b64dec_start (ctx->b64state, NULL); + } + free (value); /* Right, we need a plain free. */ + } + } + + t2body_leave: + ctx->show.header = 0; + ctx->show.data = 1; + ctx->show.n_skip = 1; + } + else if (event == RFC822PARSE_PREAMBLE) + ctx->show.as_note = 1; + else if (event == RFC822PARSE_LEVEL_DOWN) + { + if (ctx->debug) + log_debug ("b down\n"); + ctx->nesting_level++; + } + else if (event == RFC822PARSE_LEVEL_UP) + { + if (ctx->debug) + log_debug ("b up\n"); + if (ctx->nesting_level) + ctx->nesting_level--; + else + log_error ("invalid structure (bad nesting level)\n"); + } + else if (event == RFC822PARSE_BOUNDARY || event == RFC822PARSE_LAST_BOUNDARY) + { + ctx->show.data = 0; + ctx->show.boundary = 1; + if (event == RFC822PARSE_BOUNDARY) + { + ctx->show.header = 1; + ctx->show.n_skip = 1; + if (ctx->debug) + log_debug ("b part\n"); + } + else if (ctx->debug) + log_debug ("b last\n"); + + if (ctx->pgpmime == PGPMIME_IN_ENCDATA) + { + if (ctx->debug) + log_debug ("c end_encdata\n"); + ctx->pgpmime = PGPMIME_GOT_ENCDATA; + /* FIXME: We should assert (event == LAST_BOUNDARY). */ + } + else if (ctx->pgpmime == PGPMIME_IN_SIGNEDDATA + && ctx->nesting_level == ctx->hashing_at_level) + { + if (ctx->debug) + log_debug ("c end_hash\n"); + ctx->pgpmime = PGPMIME_WAIT_SIGNATURE; + if (ctx->collect_signeddata) + ctx->err = ctx->collect_signeddata (ctx->cookie, NULL); + } + else if (ctx->pgpmime == PGPMIME_IN_SIGNATURE) + { + if (ctx->debug) + log_debug ("c end_signature\n"); + ctx->pgpmime = PGPMIME_GOT_SIGNATURE; + /* FIXME: We should assert (event == LAST_BOUNDARY). */ + } + else if (ctx->want_part) + { + if (ctx->part_data) + { + /* FIXME: We may need to flush things. */ + ctx->err = ctx->part_data (ctx->cookie, NULL, 0); + } + ctx->want_part = 0; + } + } + + ctx->msg = NULL; + + return rc; +} + + +/* Create a new mime parser object. COOKIE is a values which will be + * used as first argument for all callbacks registered with this + * parser object. */ +gpg_error_t +mime_parser_new (mime_parser_t *r_parser, void *cookie) +{ + mime_parser_t ctx; + + *r_parser = NULL; + + ctx = xtrycalloc (1, sizeof *ctx); + if (!ctx) + return gpg_error_from_syserror (); + ctx->cookie = cookie; + + *r_parser = ctx; + return 0; +} + + +/* Release a mime parser object. */ +void +mime_parser_release (mime_parser_t ctx) +{ + if (!ctx) + return; + + if (ctx->b64state) + { + b64dec_finish (ctx->b64state); + xfree (ctx->b64state); + } + xfree (ctx); +} + + +/* Set verbose and debug mode. */ +void +mime_parser_set_verbose (mime_parser_t ctx, int level) +{ + if (!level) + { + ctx->verbose = 0; + ctx->debug = 0; + } + else + { + ctx->verbose = 1; + if (level > 10) + ctx->debug = 1; + } +} + + +/* Set a callback for the transition from header to body. LEVEL is + * the current nesting level, starting with 0. This callback can be + * used to evaluate headers before any other action is done. Note + * that if a new NEW_PART callback needs to be called it is done after + * this T2BODY callback. */ +void +mime_parser_set_t2body (mime_parser_t ctx, + gpg_error_t (*fnc) (void *cookie, int level)) +{ + ctx->t2body = fnc; +} + + +/* Set the callback used to announce a new part. It will be called + * with the media type and media subtype of the part. If no + * Content-type header was given both values are the empty string. + * The callback should return 0 on success or an error code. The + * error code GPG_ERR_FALSE indicates that the caller is not + * interested in the part and data shall not be returned via a + * registered part_data callback. The error code GPG_ERR_TRUE + * indicates that the parts shall be redurned in decoded format + * (i.e. base64 or QP encoding is removed). */ +void +mime_parser_set_new_part (mime_parser_t ctx, + gpg_error_t (*fnc) (void *cookie, + const char *mediatype, + const char *mediasubtype)) +{ + ctx->new_part = fnc; +} + + +/* Set the callback used to return the data of a part to the caller. + * The end of the part is indicated by passing NUL for DATA. */ +void +mime_parser_set_part_data (mime_parser_t ctx, + gpg_error_t (*fnc) (void *cookie, + const void *data, + size_t datalen)) +{ + ctx->part_data = fnc; +} + + +/* Set the callback to collect encrypted data. A NULL passed to the + * callback indicates the end of the encrypted data; the callback may + * then decrypt the collected data. */ +void +mime_parser_set_collect_encrypted (mime_parser_t ctx, + gpg_error_t (*fnc) (void *cookie, + const char *data)) +{ + ctx->collect_encrypted = fnc; +} + + +/* Set the callback to collect signed data. A NULL passed to the + * callback indicates the end of the signed data. */ +void +mime_parser_set_collect_signeddata (mime_parser_t ctx, + gpg_error_t (*fnc) (void *cookie, + const char *data)) +{ + ctx->collect_signeddata = fnc; +} + + +/* Set the callback to collect the signature. A NULL passed to the + * callback indicates the end of the signature; the callback may the + * verify the signature. */ +void +mime_parser_set_collect_signature (mime_parser_t ctx, + gpg_error_t (*fnc) (void *cookie, + const char *data)) +{ + ctx->collect_signature = fnc; +} + + +/* Return the RFC888 parser context. This is only available inside a + * callback. */ +rfc822parse_t +mime_parser_rfc822parser (mime_parser_t ctx) +{ + return ctx->msg; +} + + +/* Helper for mime_parser_parse. */ +static gpg_error_t +process_part_data (mime_parser_t ctx, char *line, size_t *length) +{ + gpg_error_t err; + size_t nbytes; + + if (!ctx->want_part) + return 0; + if (!ctx->part_data) + return 0; + + if (ctx->decode_part == 1) + { + *length = qp_decode (line, *length, NULL); + } + else if (ctx->decode_part == 2) + { + log_assert (ctx->b64state); + err = b64dec_proc (ctx->b64state, line, *length, &nbytes); + if (err) + return err; + *length = nbytes; + } + + return ctx->part_data (ctx->cookie, line, *length); +} + + +/* Read and parse a message from FP and call the appropriate + * callbacks. */ +gpg_error_t +mime_parser_parse (mime_parser_t ctx, estream_t fp) +{ + gpg_error_t err; + rfc822parse_t msg = NULL; + unsigned int lineno = 0; + size_t length; + char *line; + + line = ctx->line; + + msg = rfc822parse_open (parse_message_cb, ctx); + if (!msg) + { + err = gpg_error_from_syserror (); + log_error ("can't open mail parser: %s", gpg_strerror (err)); + goto leave; + } + + /* Fixme: We should not use fgets because it can't cope with + embedded nul characters. */ + while (es_fgets (ctx->line, sizeof (ctx->line), fp)) + { + lineno++; + if (lineno == 1 && !strncmp (line, "From ", 5)) + continue; /* We better ignore a leading From line. */ + + length = strlen (line); + if (length && line[length - 1] == '\n') + line[--length] = 0; + else + log_error ("mail parser detected too long or" + " non terminated last line (lnr=%u)\n", lineno); + if (length && line[length - 1] == '\r') + line[--length] = 0; + + ctx->err = 0; + if (rfc822parse_insert (msg, line, length)) + { + err = gpg_error_from_syserror (); + log_error ("mail parser failed: %s", gpg_strerror (err)); + goto leave; + } + if (ctx->err) + { + /* Error from a callback detected. */ + err = ctx->err; + goto leave; + } + + + /* Debug output. Note that the boundary is shown before n_skip + * is evaluated. */ + if (ctx->show.boundary) + { + if (ctx->debug) + log_debug ("# Boundary: %s\n", line); + ctx->show.boundary = 0; + } + if (ctx->show.n_skip) + ctx->show.n_skip--; + else if (ctx->show.data) + { + if (ctx->show.as_note) + { + if (ctx->verbose) + log_debug ("# Note: %s\n", line); + ctx->show.as_note = 0; + } + else if (ctx->debug) + log_debug ("# Data: %s\n", line); + } + else if (ctx->show.header && ctx->verbose) + log_debug ("# Header: %s\n", line); + + if (ctx->pgpmime == PGPMIME_IN_ENCVERSION) + { + trim_trailing_spaces (line); + if (!*line) + ; /* Skip empty lines. */ + else if (!strcmp (line, "Version: 1")) + ctx->pgpmime = PGPMIME_WAIT_ENCDATA; + else + { + log_error ("invalid PGP/MIME structure;" + " garbage in pgp-encrypted part ('%s')\n", line); + ctx->pgpmime = PGPMIME_INVALID; + } + } + else if (ctx->pgpmime == PGPMIME_IN_ENCDATA) + { + if (ctx->collect_encrypted) + { + err = ctx->collect_encrypted (ctx->cookie, line); + if (!err) + err = ctx->collect_encrypted (ctx->cookie, "\r\n"); + if (err) + goto leave; + } + } + else if (ctx->pgpmime == PGPMIME_GOT_ENCDATA) + { + ctx->pgpmime = PGPMIME_NONE; + if (ctx->collect_encrypted) + ctx->collect_encrypted (ctx->cookie, NULL); + } + else if (ctx->pgpmime == PGPMIME_IN_SIGNEDDATA) + { + /* If we are processing signed data, store the signed data. + * We need to delay the hashing of the CR/LF because the + * last line ending belongs to the next boundary. This is + * the reason why we can't use the PGPMIME state as a + * condition. */ + if (ctx->debug) + log_debug ("# hashing %s'%s'\n", + ctx->delay_hashing? "CR,LF+":"", line); + if (ctx->collect_signeddata) + { + if (ctx->delay_hashing) + ctx->collect_signeddata (ctx->cookie, "\r\n"); + ctx->collect_signeddata (ctx->cookie, line); + } + ctx->delay_hashing = 1; + + err = process_part_data (ctx, line, &length); + if (err) + goto leave; + } + else if (ctx->pgpmime == PGPMIME_IN_SIGNATURE) + { + if (ctx->collect_signeddata) + { + ctx->collect_signature (ctx->cookie, line); + ctx->collect_signature (ctx->cookie, "\r\n"); + } + } + else if (ctx->pgpmime == PGPMIME_GOT_SIGNATURE) + { + ctx->pgpmime = PGPMIME_NONE; + if (ctx->collect_signeddata) + ctx->collect_signature (ctx->cookie, NULL); + } + else + { + err = process_part_data (ctx, line, &length); + if (err) + goto leave; + } + } + + rfc822parse_close (msg); + msg = NULL; + err = 0; + + leave: + rfc822parse_cancel (msg); + return err; +} |