diff options
Diffstat (limited to 'src/lib-mail/message-part-data.c')
-rw-r--r-- | src/lib-mail/message-part-data.c | 594 |
1 files changed, 594 insertions, 0 deletions
diff --git a/src/lib-mail/message-part-data.c b/src/lib-mail/message-part-data.c new file mode 100644 index 0000000..a5771f8 --- /dev/null +++ b/src/lib-mail/message-part-data.c @@ -0,0 +1,594 @@ +/* Copyright (c) 2014-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "str.h" +#include "wildcard-match.h" +#include "array.h" +#include "rfc822-parser.h" +#include "rfc2231-parser.h" +#include "message-address.h" +#include "message-header-parser.h" + +#include "message-part-data.h" + +const char *message_part_envelope_headers[] = { + "Date", "Subject", "From", "Sender", "Reply-To", + "To", "Cc", "Bcc", "In-Reply-To", "Message-ID", + NULL +}; + +/* + * + */ + +bool message_part_data_is_plain_7bit(const struct message_part *part) +{ + const struct message_part_data *data = part->data; + + i_assert(data != NULL); + i_assert(part->parent == NULL); + + /* if content-type is text/xxx we don't have to check any + multipart stuff */ + if ((part->flags & MESSAGE_PART_FLAG_TEXT) == 0) + return FALSE; + if (part->next != NULL || part->children != NULL) + return FALSE; /* shouldn't happen normally.. */ + + /* must be text/plain */ + if (data->content_subtype != NULL && + strcasecmp(data->content_subtype, "plain") != 0) + return FALSE; + + /* only allowed parameter is charset=us-ascii, which is also default */ + if (data->content_type_params_count == 0) { + /* charset defaults to us-ascii */ + } else if (data->content_type_params_count != 1 || + strcasecmp(data->content_type_params[0].name, "charset") != 0 || + strcasecmp(data->content_type_params[0].value, + MESSAGE_PART_DEFAULT_CHARSET) != 0) + return FALSE; + + if (data->content_id != NULL || + data->content_description != NULL) + return FALSE; + + if (data->content_transfer_encoding != NULL && + strcasecmp(data->content_transfer_encoding, "7bit") != 0) + return FALSE; + + /* BODYSTRUCTURE checks: */ + if (data->content_md5 != NULL || + data->content_disposition != NULL || + data->content_language != NULL || + data->content_location != NULL) + return FALSE; + + return TRUE; +} + +bool message_part_data_get_filename(const struct message_part *part, + const char **filename_r) +{ + const struct message_part_data *data = part->data; + const struct message_part_param *params; + unsigned int params_count, i; + + i_assert(data != NULL); + + params = data->content_disposition_params; + params_count = data->content_disposition_params_count; + + if (data->content_disposition != NULL && + strcasecmp(data->content_disposition, "attachment") != 0) { + return FALSE; + } + for (i = 0; i < params_count; i++) { + if (strcasecmp(params[i].name, "filename") == 0 && + params[i].value != NULL) { + *filename_r = params[i].value; + return TRUE; + } + } + return FALSE; +} + +/* + * Header parsing + */ + +/* Message part envelope */ + +enum envelope_field { + ENVELOPE_FIELD_DATE = 0, + ENVELOPE_FIELD_SUBJECT, + ENVELOPE_FIELD_FROM, + ENVELOPE_FIELD_SENDER, + ENVELOPE_FIELD_REPLY_TO, + ENVELOPE_FIELD_TO, + ENVELOPE_FIELD_CC, + ENVELOPE_FIELD_BCC, + ENVELOPE_FIELD_IN_REPLY_TO, + ENVELOPE_FIELD_MESSAGE_ID, + + ENVELOPE_FIELD_UNKNOWN +}; + +static enum envelope_field +envelope_get_field(const char *name) +{ + switch (*name) { + case 'B': + case 'b': + if (strcasecmp(name, "Bcc") == 0) + return ENVELOPE_FIELD_BCC; + break; + case 'C': + case 'c': + if (strcasecmp(name, "Cc") == 0) + return ENVELOPE_FIELD_CC; + break; + case 'D': + case 'd': + if (strcasecmp(name, "Date") == 0) + return ENVELOPE_FIELD_DATE; + break; + case 'F': + case 'f': + if (strcasecmp(name, "From") == 0) + return ENVELOPE_FIELD_FROM; + break; + case 'I': + case 'i': + if (strcasecmp(name, "In-reply-to") == 0) + return ENVELOPE_FIELD_IN_REPLY_TO; + break; + case 'M': + case 'm': + if (strcasecmp(name, "Message-id") == 0) + return ENVELOPE_FIELD_MESSAGE_ID; + break; + case 'R': + case 'r': + if (strcasecmp(name, "Reply-to") == 0) + return ENVELOPE_FIELD_REPLY_TO; + break; + case 'S': + case 's': + if (strcasecmp(name, "Subject") == 0) + return ENVELOPE_FIELD_SUBJECT; + if (strcasecmp(name, "Sender") == 0) + return ENVELOPE_FIELD_SENDER; + break; + case 'T': + case 't': + if (strcasecmp(name, "To") == 0) + return ENVELOPE_FIELD_TO; + break; + } + + return ENVELOPE_FIELD_UNKNOWN; +} + +void message_part_envelope_parse_from_header(pool_t pool, + struct message_part_envelope **data, + struct message_header_line *hdr) +{ + struct message_part_envelope *d; + enum envelope_field field; + struct message_address **addr_p, *addr; + const char **str_p; + + if (*data == NULL) { + *data = p_new(pool, struct message_part_envelope, 1); + } + + if (hdr == NULL) + return; + field = envelope_get_field(hdr->name); + if (field == ENVELOPE_FIELD_UNKNOWN) + return; + + if (hdr->continues) { + /* wait for full value */ + hdr->use_full_value = TRUE; + return; + } + + d = *data; + addr_p = NULL; str_p = NULL; + switch (field) { + case ENVELOPE_FIELD_DATE: + str_p = &d->date; + break; + case ENVELOPE_FIELD_SUBJECT: + str_p = &d->subject; + break; + case ENVELOPE_FIELD_MESSAGE_ID: + str_p = &d->message_id; + break; + case ENVELOPE_FIELD_IN_REPLY_TO: + str_p = &d->in_reply_to; + break; + + case ENVELOPE_FIELD_CC: + addr_p = &d->cc; + break; + case ENVELOPE_FIELD_BCC: + addr_p = &d->bcc; + break; + case ENVELOPE_FIELD_FROM: + addr_p = &d->from; + break; + case ENVELOPE_FIELD_SENDER: + addr_p = &d->sender; + break; + case ENVELOPE_FIELD_TO: + addr_p = &d->to; + break; + case ENVELOPE_FIELD_REPLY_TO: + addr_p = &d->reply_to; + break; + case ENVELOPE_FIELD_UNKNOWN: + i_unreached(); + } + + if (addr_p != NULL) { + addr = message_address_parse(pool, hdr->full_value, + hdr->full_value_len, + UINT_MAX, + MESSAGE_ADDRESS_PARSE_FLAG_FILL_MISSING); + /* Merge multiple headers the same as if they were comma + separated in a single line. This is better from security + point of view, because attacker could intentionally write + addresses in a way that e.g. the first From header is + validated while MUA only shows the second From header. */ + while (*addr_p != NULL) + addr_p = &(*addr_p)->next; + *addr_p = addr; + } else if (str_p != NULL) { + *str_p = message_header_strdup(pool, hdr->full_value, + hdr->full_value_len); + } +} + +/* Message part data */ + +static void +parse_mime_parameters(struct rfc822_parser_context *parser, + pool_t pool, const struct message_part_param **params_r, + unsigned int *params_count_r) +{ + const char *const *results; + struct message_part_param *params; + unsigned int params_count, i; + + rfc2231_parse(parser, &results); + + params_count = str_array_length(results); + i_assert((params_count % 2) == 0); + params_count /= 2; + + if (params_count > 0) { + params = p_new(pool, struct message_part_param, params_count); + for (i = 0; i < params_count; i++) { + params[i].name = p_strdup(pool, results[i*2+0]); + params[i].value = p_strdup(pool, results[i*2+1]); + } + *params_r = params; + } + + *params_count_r = params_count; +} + +static void +parse_content_type(struct message_part_data *data, + pool_t pool, struct message_header_line *hdr) +{ + struct rfc822_parser_context parser; + string_t *str; + const char *value; + unsigned int i; + int ret; + + rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL); + rfc822_skip_lwsp(&parser); + + str = t_str_new(256); + ret = rfc822_parse_content_type(&parser, str); + + /* Save content type and subtype */ + value = str_c(str); + for (i = 0; value[i] != '\0'; i++) { + if (value[i] == '/') { + data->content_subtype = p_strdup(pool, value + i+1); + break; + } + } + str_truncate(str, i); + data->content_type = p_strdup(pool, str_c(str)); + if (data->content_subtype == NULL) { + /* The Content-Type is invalid. Don't leave it NULL so that + callers can assume that if content_type != NULL, + content_subtype != NULL also. */ + data->content_subtype = p_strdup(pool, ""); + } + + if (ret < 0) { + /* Content-Type is broken, but we wanted to get it as well as + we could. Don't try to read the parameters anymore though. + + We don't completely ignore a broken Content-Type, because + then it would be written as text/plain. This would cause a + mismatch with the message_part's MESSAGE_PART_FLAG_TEXT. */ + return; + } + + parse_mime_parameters(&parser, pool, + &data->content_type_params, + &data->content_type_params_count); + rfc822_parser_deinit(&parser); +} + +static void +parse_content_transfer_encoding(struct message_part_data *data, + pool_t pool, struct message_header_line *hdr) +{ + struct rfc822_parser_context parser; + string_t *str; + + rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL); + rfc822_skip_lwsp(&parser); + + str = t_str_new(256); + if (rfc822_parse_mime_token(&parser, str) >= 0 && + rfc822_skip_lwsp(&parser) == 0 && str_len(str) > 0) { + data->content_transfer_encoding = + p_strdup(pool, str_c(str)); + } + rfc822_parser_deinit(&parser); +} + +static void +parse_content_disposition(struct message_part_data *data, + pool_t pool, struct message_header_line *hdr) +{ + struct rfc822_parser_context parser; + string_t *str; + + rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL); + rfc822_skip_lwsp(&parser); + + str = t_str_new(256); + if (rfc822_parse_mime_token(&parser, str) < 0) { + rfc822_parser_deinit(&parser); + return; + } + data->content_disposition = p_strdup(pool, str_c(str)); + + parse_mime_parameters(&parser, pool, + &data->content_disposition_params, + &data->content_disposition_params_count); + rfc822_parser_deinit(&parser); +} + +static void +parse_content_language(struct message_part_data *data, + pool_t pool, const unsigned char *value, size_t value_len) +{ + struct rfc822_parser_context parser; + ARRAY_TYPE(const_string) langs; + string_t *str; + + /* Language-Header = "Content-Language" ":" 1#Language-tag + Language-Tag = Primary-tag *( "-" Subtag ) + Primary-tag = 1*8ALPHA + Subtag = 1*8ALPHA */ + + rfc822_parser_init(&parser, value, value_len, NULL); + + t_array_init(&langs, 16); + str = t_str_new(128); + + rfc822_skip_lwsp(&parser); + while (rfc822_parse_atom(&parser, str) >= 0) { + const char *lang = p_strdup(pool, str_c(str)); + + array_push_back(&langs, &lang); + str_truncate(str, 0); + + if (parser.data >= parser.end || *parser.data != ',') + break; + parser.data++; + rfc822_skip_lwsp(&parser); + } + rfc822_parser_deinit(&parser); + + if (array_count(&langs) > 0) { + array_append_zero(&langs); + data->content_language = + p_strarray_dup(pool, array_front(&langs)); + } +} + +static void +parse_content_header(struct message_part_data *data, + pool_t pool, struct message_header_line *hdr) +{ + const char *name = hdr->name + strlen("Content-"); + + if (hdr->continues) { + hdr->use_full_value = TRUE; + return; + } + + switch (*name) { + case 'i': + case 'I': + if (strcasecmp(name, "ID") == 0 && data->content_id == NULL) + data->content_id = + message_header_strdup(pool, hdr->full_value, + hdr->full_value_len); + break; + + case 'm': + case 'M': + if (strcasecmp(name, "MD5") == 0 && data->content_md5 == NULL) + data->content_md5 = + message_header_strdup(pool, hdr->full_value, + hdr->full_value_len); + break; + + case 't': + case 'T': + if (strcasecmp(name, "Type") == 0 && data->content_type == NULL) + parse_content_type(data, pool, hdr); + else if (strcasecmp(name, "Transfer-Encoding") == 0 && + data->content_transfer_encoding == NULL) + parse_content_transfer_encoding(data, pool, hdr); + break; + + case 'l': + case 'L': + if (strcasecmp(name, "Language") == 0 && + data->content_language == NULL) { + parse_content_language(data, pool, + hdr->full_value, hdr->full_value_len); + } else if (strcasecmp(name, "Location") == 0 && + data->content_location == NULL) { + data->content_location = + message_header_strdup(pool, hdr->full_value, + hdr->full_value_len); + } + break; + + case 'd': + case 'D': + if (strcasecmp(name, "Description") == 0 && + data->content_description == NULL) + data->content_description = + message_header_strdup(pool, hdr->full_value, + hdr->full_value_len); + else if (strcasecmp(name, "Disposition") == 0 && + data->content_disposition_params == NULL) + parse_content_disposition(data, pool, hdr); + break; + } +} + +void message_part_data_parse_from_header(pool_t pool, + struct message_part *part, + struct message_header_line *hdr) +{ + struct message_part_data *part_data; + struct message_part_envelope *envelope; + bool parent_rfc822; + + if (hdr == NULL) { + if (part->data == NULL) { + /* no Content-* headers. add an empty context + structure anyway. */ + part->data = p_new(pool, struct message_part_data, 1); + } else if ((part->flags & MESSAGE_PART_FLAG_IS_MIME) == 0) { + /* If there was no Mime-Version, forget all + the Content-stuff */ + part_data = part->data; + envelope = part_data->envelope; + + i_zero(part_data); + part_data->envelope = envelope; + } + return; + } + + if (hdr->eoh) + return; + + parent_rfc822 = part->parent != NULL && + (part->parent->flags & MESSAGE_PART_FLAG_MESSAGE_RFC822) != 0; + if (!parent_rfc822 && strncasecmp(hdr->name, "Content-", 8) != 0) + return; + + if (part->data == NULL) { + /* initialize message part data */ + part->data = p_new(pool, struct message_part_data, 1); + } + part_data = part->data; + + if (strncasecmp(hdr->name, "Content-", 8) == 0) { + T_BEGIN { + parse_content_header(part_data, pool, hdr); + } T_END; + } + + if (parent_rfc822) { + /* message/rfc822, we need the envelope */ + message_part_envelope_parse_from_header(pool, &part_data->envelope, hdr); + } +} + +bool message_part_has_content_types(struct message_part *part, + const char *const *types) +{ + struct message_part_data *data = part->data; + bool ret = TRUE; + const char *const *ptr; + const char *content_type; + + i_assert(data != NULL); + + if (data->content_type == NULL) + return FALSE; + else if (data->content_subtype == NULL) + content_type = t_strdup_printf("%s/", data->content_type); + else + content_type = t_strdup_printf("%s/%s", data->content_type, + data->content_subtype); + for(ptr = types; *ptr != NULL; ptr++) { + bool exclude = (**ptr == '!'); + if (wildcard_match_icase(content_type, (*ptr)+(exclude?1:0))) + ret = !exclude; + } + + return ret; +} + +bool message_part_has_parameter(struct message_part *part, const char *parameter, + bool has_value) +{ + struct message_part_data *data = part->data; + + i_assert(data != NULL); + + for (unsigned int i = 0; i < data->content_disposition_params_count; i++) { + const struct message_part_param *param = + &data->content_disposition_params[i]; + if (strcasecmp(param->name, parameter) == 0 && + (!has_value || *param->value != '\0')) { + return TRUE; + } + } + return FALSE; +} + +bool message_part_is_attachment(struct message_part *part, + const struct message_part_attachment_settings *set) +{ + struct message_part_data *data = part->data; + + i_assert(data != NULL); + + /* see if the content-type is excluded */ + if (set->content_type_filter != NULL && + !message_part_has_content_types(part, set->content_type_filter)) + return FALSE; + + /* accept any attachment, or any inlined attachment with filename, + unless inlined ones are excluded */ + if (null_strcasecmp(data->content_disposition, "attachment") == 0 || + (!set->exclude_inlined && + null_strcasecmp(data->content_disposition, "inline") == 0 && + message_part_has_parameter(part, "filename", FALSE))) + return TRUE; + return FALSE; +} |