diff options
Diffstat (limited to 'src/lib-mail/message-decoder.c')
-rw-r--r-- | src/lib-mail/message-decoder.c | 390 |
1 files changed, 390 insertions, 0 deletions
diff --git a/src/lib-mail/message-decoder.c b/src/lib-mail/message-decoder.c new file mode 100644 index 0000000..845b3d5 --- /dev/null +++ b/src/lib-mail/message-decoder.c @@ -0,0 +1,390 @@ +/* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "buffer.h" +#include "base64.h" +#include "str.h" +#include "unichar.h" +#include "charset-utf8.h" +#include "qp-decoder.h" +#include "rfc822-parser.h" +#include "rfc2231-parser.h" +#include "message-parser.h" +#include "message-header-decode.h" +#include "message-decoder.h" + +struct message_decoder_context { + enum message_decoder_flags flags; + normalizer_func_t *normalizer; + struct message_part *prev_part; + + struct message_header_line hdr; + buffer_t *buf, *buf2; + + char *charset_trans_charset; + struct charset_translation *charset_trans; + char translation_buf[CHARSET_MAX_PENDING_BUF_SIZE]; + size_t translation_size; + + struct qp_decoder *qp; + struct base64_decoder base64_decoder; + + char *content_type, *content_charset; + enum message_cte message_cte; + + bool binary_input:1; +}; + +static void +message_decode_body_init_charset(struct message_decoder_context *ctx, + struct message_part *part); + +struct message_decoder_context * +message_decoder_init(normalizer_func_t *normalizer, + enum message_decoder_flags flags) +{ + struct message_decoder_context *ctx; + + ctx = i_new(struct message_decoder_context, 1); + ctx->flags = flags; + ctx->normalizer = normalizer; + ctx->buf = buffer_create_dynamic(default_pool, 8192); + ctx->buf2 = buffer_create_dynamic(default_pool, 8192); + base64_decode_init(&ctx->base64_decoder, &base64_scheme, 0); + return ctx; +} + +void message_decoder_deinit(struct message_decoder_context **_ctx) +{ + struct message_decoder_context *ctx = *_ctx; + + *_ctx = NULL; + + if (ctx->charset_trans != NULL) + charset_to_utf8_end(&ctx->charset_trans); + if (ctx->qp != NULL) + qp_decoder_deinit(&ctx->qp); + + buffer_free(&ctx->buf); + buffer_free(&ctx->buf2); + i_free(ctx->charset_trans_charset); + i_free(ctx->content_type); + i_free(ctx->content_charset); + i_free(ctx); +} + +void message_decoder_set_return_binary(struct message_decoder_context *ctx, + bool set) +{ + if (set) + ctx->flags |= MESSAGE_DECODER_FLAG_RETURN_BINARY; + else + ctx->flags &= ENUM_NEGATE(MESSAGE_DECODER_FLAG_RETURN_BINARY); + message_decode_body_init_charset(ctx, ctx->prev_part); +} + +enum message_cte message_decoder_parse_cte(const struct message_header_line *hdr) +{ + struct rfc822_parser_context parser; + enum message_cte message_cte; + string_t *value; + + value = t_str_new(64); + rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL); + + rfc822_skip_lwsp(&parser); + + /* Ensure we do not accidentically accept confused values like + 'base64 binary' or embedded NULs */ + if (rfc822_parse_mime_token(&parser, value) == 1) { + rfc822_skip_lwsp(&parser); + /* RFC 2045 does not permit parameters for CTE, + but in case someone uses them, we accept + parameter separator ';' to be lenient. */ + if (*parser.data != ';') + return MESSAGE_CTE_UNKNOWN; + } + + message_cte = MESSAGE_CTE_UNKNOWN; + switch (str_len(value)) { + case 4: + if (i_memcasecmp(str_data(value), "7bit", 4) == 0 || + i_memcasecmp(str_data(value), "8bit", 4) == 0) + message_cte = MESSAGE_CTE_78BIT; + break; + case 6: + if (i_memcasecmp(str_data(value), "base64", 6) == 0) + message_cte = MESSAGE_CTE_BASE64; + else if (i_memcasecmp(str_data(value), "binary", 6) == 0) + message_cte = MESSAGE_CTE_BINARY; + break; + case 16: + if (i_memcasecmp(str_data(value), "quoted-printable", 16) == 0) + message_cte = MESSAGE_CTE_QP; + break; + } + rfc822_parser_deinit(&parser); + return message_cte; +} + +static void +parse_content_type(struct message_decoder_context *ctx, + struct message_header_line *hdr) +{ + struct rfc822_parser_context parser; + const char *const *results; + string_t *str; + int ret; + + if (ctx->content_type != NULL) + return; + + rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL); + rfc822_skip_lwsp(&parser); + str = t_str_new(64); + ret = rfc822_parse_content_type(&parser, str); + ctx->content_type = i_strdup(str_c(str)); + if (ret < 0) { + rfc822_parser_deinit(&parser); + return; + } + + rfc2231_parse(&parser, &results); + for (; *results != NULL; results += 2) { + if (strcasecmp(results[0], "charset") == 0) { + ctx->content_charset = i_strdup(results[1]); + break; + } + } + rfc822_parser_deinit(&parser); +} + +static bool message_decode_header(struct message_decoder_context *ctx, + struct message_header_line *hdr, + struct message_block *output) +{ + size_t value_len; + + if (hdr->continues) { + hdr->use_full_value = TRUE; + return FALSE; + } + + T_BEGIN { + if (hdr->name_len == 12 && + strcasecmp(hdr->name, "Content-Type") == 0) + parse_content_type(ctx, hdr); + if (hdr->name_len == 25 && + strcasecmp(hdr->name, "Content-Transfer-Encoding") == 0) + ctx->message_cte = message_decoder_parse_cte(hdr); + } T_END; + + buffer_set_used_size(ctx->buf, 0); + message_header_decode_utf8(hdr->full_value, hdr->full_value_len, + ctx->buf, ctx->normalizer); + value_len = ctx->buf->used; + + if (ctx->normalizer != NULL) { + (void)ctx->normalizer(hdr->name, hdr->name_len, ctx->buf); + buffer_append_c(ctx->buf, '\0'); + } else { + if (!uni_utf8_get_valid_data((const unsigned char *)hdr->name, + hdr->name_len, ctx->buf)) + buffer_append_c(ctx->buf, '\0'); + } + + ctx->hdr = *hdr; + ctx->hdr.full_value = ctx->buf->data; + ctx->hdr.full_value_len = value_len; + ctx->hdr.value_len = 0; + if (ctx->buf->used != value_len) { + ctx->hdr.name = CONST_PTR_OFFSET(ctx->buf->data, + ctx->hdr.full_value_len); + ctx->hdr.name_len = ctx->buf->used - 1 - value_len; + } + + output->hdr = &ctx->hdr; + return TRUE; +} + +static void translation_buf_decode(struct message_decoder_context *ctx, + const unsigned char **data, size_t *size) +{ + unsigned char trans_buf[CHARSET_MAX_PENDING_BUF_SIZE+1]; + size_t data_wanted, skip; + size_t trans_size, orig_size; + + /* @UNSAFE: move the previously untranslated bytes to trans_buf + and see if we have now enough data to get the next character + translated */ + memcpy(trans_buf, ctx->translation_buf, ctx->translation_size); + data_wanted = sizeof(trans_buf) - ctx->translation_size; + if (data_wanted > *size) + data_wanted = *size; + memcpy(trans_buf + ctx->translation_size, *data, data_wanted); + + orig_size = trans_size = ctx->translation_size + data_wanted; + (void)charset_to_utf8(ctx->charset_trans, trans_buf, + &trans_size, ctx->buf2); + + if (trans_size <= ctx->translation_size) { + /* need more data to finish the translation. */ + i_assert(orig_size < CHARSET_MAX_PENDING_BUF_SIZE); + memcpy(ctx->translation_buf, trans_buf, orig_size); + ctx->translation_size = orig_size; + *data += *size; + *size = 0; + return; + } + skip = trans_size - ctx->translation_size; + + i_assert(*size >= skip); + *data += skip; + *size -= skip; + + ctx->translation_size = 0; +} + +static void +message_decode_body_init_charset(struct message_decoder_context *ctx, + struct message_part *part) +{ + ctx->binary_input = ctx->content_charset == NULL && + (ctx->flags & MESSAGE_DECODER_FLAG_RETURN_BINARY) != 0 && + (part->flags & (MESSAGE_PART_FLAG_TEXT | + MESSAGE_PART_FLAG_MESSAGE_RFC822)) == 0; + + if (ctx->binary_input) + return; + + if (ctx->charset_trans != NULL && ctx->content_charset != NULL && + strcasecmp(ctx->content_charset, ctx->charset_trans_charset) == 0) { + /* already have the correct translation selected */ + charset_to_utf8_reset(ctx->charset_trans); + return; + } + + if (ctx->charset_trans != NULL) + charset_to_utf8_end(&ctx->charset_trans); + i_free_and_null(ctx->charset_trans_charset); + + ctx->charset_trans_charset = i_strdup(ctx->content_charset != NULL ? + ctx->content_charset : "UTF-8"); + if (charset_to_utf8_begin(ctx->charset_trans_charset, ctx->normalizer, + &ctx->charset_trans) < 0) + ctx->charset_trans = charset_utf8_to_utf8_begin(ctx->normalizer); +} + +static bool message_decode_body(struct message_decoder_context *ctx, + struct message_block *input, + struct message_block *output) +{ + const unsigned char *data = NULL; + size_t pos, size = 0; + const char *error; + + switch (ctx->message_cte) { + case MESSAGE_CTE_UNKNOWN: + /* just skip this body */ + return FALSE; + + case MESSAGE_CTE_78BIT: + case MESSAGE_CTE_BINARY: + data = input->data; + size = input->size; + break; + case MESSAGE_CTE_QP: { + buffer_set_used_size(ctx->buf, 0); + if (ctx->qp == NULL) + ctx->qp = qp_decoder_init(ctx->buf); + (void)qp_decoder_more(ctx->qp, input->data, input->size, + &pos, &error); + data = ctx->buf->data; + size = ctx->buf->used; + break; + } + case MESSAGE_CTE_BASE64: + buffer_set_used_size(ctx->buf, 0); + if (!base64_decode_is_finished(&ctx->base64_decoder)) { + if (base64_decode_more(&ctx->base64_decoder, + input->data, input->size, + &pos, ctx->buf) <= 0) { + /* ignore the rest of the input in this + MIME part */ + (void)base64_decode_finish(&ctx->base64_decoder); + } + } + data = ctx->buf->data; + size = ctx->buf->used; + break; + } + + if (ctx->binary_input) { + output->data = data; + output->size = size; + } else { + buffer_set_used_size(ctx->buf2, 0); + if (ctx->translation_size != 0) + translation_buf_decode(ctx, &data, &size); + + pos = size; + (void)charset_to_utf8(ctx->charset_trans, + data, &pos, ctx->buf2); + if (pos != size) { + ctx->translation_size = size - pos; + i_assert(ctx->translation_size <= + sizeof(ctx->translation_buf)); + memcpy(ctx->translation_buf, data + pos, + ctx->translation_size); + } + output->data = ctx->buf2->data; + output->size = ctx->buf2->used; + } + + output->hdr = NULL; + return TRUE; +} + +bool message_decoder_decode_next_block(struct message_decoder_context *ctx, + struct message_block *input, + struct message_block *output) +{ + if (input->part != ctx->prev_part) { + /* MIME part changed. */ + message_decoder_decode_reset(ctx); + } + + output->part = input->part; + ctx->prev_part = input->part; + + if (input->hdr != NULL) { + output->size = 0; + return message_decode_header(ctx, input->hdr, output); + } else if (input->size != 0) + return message_decode_body(ctx, input, output); + else { + output->hdr = NULL; + output->size = 0; + message_decode_body_init_charset(ctx, input->part); + return TRUE; + } +} + +const char * +message_decoder_current_content_type(struct message_decoder_context *ctx) +{ + return ctx->content_type; +} + +void message_decoder_decode_reset(struct message_decoder_context *ctx) +{ + const char *error; + + base64_decode_reset(&ctx->base64_decoder); + + if (ctx->qp != NULL) + (void)qp_decoder_finish(ctx->qp, &error); + i_free_and_null(ctx->content_type); + i_free_and_null(ctx->content_charset); + ctx->message_cte = MESSAGE_CTE_78BIT; +} |