summaryrefslogtreecommitdiffstats
path: root/src/lib-mail/message-decoder.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib-mail/message-decoder.c')
-rw-r--r--src/lib-mail/message-decoder.c390
1 files changed, 390 insertions, 0 deletions
diff --git a/src/lib-mail/message-decoder.c b/src/lib-mail/message-decoder.c
new file mode 100644
index 0000000..845b3d5
--- /dev/null
+++ b/src/lib-mail/message-decoder.c
@@ -0,0 +1,390 @@
+/* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "buffer.h"
+#include "base64.h"
+#include "str.h"
+#include "unichar.h"
+#include "charset-utf8.h"
+#include "qp-decoder.h"
+#include "rfc822-parser.h"
+#include "rfc2231-parser.h"
+#include "message-parser.h"
+#include "message-header-decode.h"
+#include "message-decoder.h"
+
+struct message_decoder_context {
+ enum message_decoder_flags flags;
+ normalizer_func_t *normalizer;
+ struct message_part *prev_part;
+
+ struct message_header_line hdr;
+ buffer_t *buf, *buf2;
+
+ char *charset_trans_charset;
+ struct charset_translation *charset_trans;
+ char translation_buf[CHARSET_MAX_PENDING_BUF_SIZE];
+ size_t translation_size;
+
+ struct qp_decoder *qp;
+ struct base64_decoder base64_decoder;
+
+ char *content_type, *content_charset;
+ enum message_cte message_cte;
+
+ bool binary_input:1;
+};
+
+static void
+message_decode_body_init_charset(struct message_decoder_context *ctx,
+ struct message_part *part);
+
+struct message_decoder_context *
+message_decoder_init(normalizer_func_t *normalizer,
+ enum message_decoder_flags flags)
+{
+ struct message_decoder_context *ctx;
+
+ ctx = i_new(struct message_decoder_context, 1);
+ ctx->flags = flags;
+ ctx->normalizer = normalizer;
+ ctx->buf = buffer_create_dynamic(default_pool, 8192);
+ ctx->buf2 = buffer_create_dynamic(default_pool, 8192);
+ base64_decode_init(&ctx->base64_decoder, &base64_scheme, 0);
+ return ctx;
+}
+
+void message_decoder_deinit(struct message_decoder_context **_ctx)
+{
+ struct message_decoder_context *ctx = *_ctx;
+
+ *_ctx = NULL;
+
+ if (ctx->charset_trans != NULL)
+ charset_to_utf8_end(&ctx->charset_trans);
+ if (ctx->qp != NULL)
+ qp_decoder_deinit(&ctx->qp);
+
+ buffer_free(&ctx->buf);
+ buffer_free(&ctx->buf2);
+ i_free(ctx->charset_trans_charset);
+ i_free(ctx->content_type);
+ i_free(ctx->content_charset);
+ i_free(ctx);
+}
+
+void message_decoder_set_return_binary(struct message_decoder_context *ctx,
+ bool set)
+{
+ if (set)
+ ctx->flags |= MESSAGE_DECODER_FLAG_RETURN_BINARY;
+ else
+ ctx->flags &= ENUM_NEGATE(MESSAGE_DECODER_FLAG_RETURN_BINARY);
+ message_decode_body_init_charset(ctx, ctx->prev_part);
+}
+
+enum message_cte message_decoder_parse_cte(const struct message_header_line *hdr)
+{
+ struct rfc822_parser_context parser;
+ enum message_cte message_cte;
+ string_t *value;
+
+ value = t_str_new(64);
+ rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL);
+
+ rfc822_skip_lwsp(&parser);
+
+ /* Ensure we do not accidentically accept confused values like
+ 'base64 binary' or embedded NULs */
+ if (rfc822_parse_mime_token(&parser, value) == 1) {
+ rfc822_skip_lwsp(&parser);
+ /* RFC 2045 does not permit parameters for CTE,
+ but in case someone uses them, we accept
+ parameter separator ';' to be lenient. */
+ if (*parser.data != ';')
+ return MESSAGE_CTE_UNKNOWN;
+ }
+
+ message_cte = MESSAGE_CTE_UNKNOWN;
+ switch (str_len(value)) {
+ case 4:
+ if (i_memcasecmp(str_data(value), "7bit", 4) == 0 ||
+ i_memcasecmp(str_data(value), "8bit", 4) == 0)
+ message_cte = MESSAGE_CTE_78BIT;
+ break;
+ case 6:
+ if (i_memcasecmp(str_data(value), "base64", 6) == 0)
+ message_cte = MESSAGE_CTE_BASE64;
+ else if (i_memcasecmp(str_data(value), "binary", 6) == 0)
+ message_cte = MESSAGE_CTE_BINARY;
+ break;
+ case 16:
+ if (i_memcasecmp(str_data(value), "quoted-printable", 16) == 0)
+ message_cte = MESSAGE_CTE_QP;
+ break;
+ }
+ rfc822_parser_deinit(&parser);
+ return message_cte;
+}
+
+static void
+parse_content_type(struct message_decoder_context *ctx,
+ struct message_header_line *hdr)
+{
+ struct rfc822_parser_context parser;
+ const char *const *results;
+ string_t *str;
+ int ret;
+
+ if (ctx->content_type != NULL)
+ return;
+
+ rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL);
+ rfc822_skip_lwsp(&parser);
+ str = t_str_new(64);
+ ret = rfc822_parse_content_type(&parser, str);
+ ctx->content_type = i_strdup(str_c(str));
+ if (ret < 0) {
+ rfc822_parser_deinit(&parser);
+ return;
+ }
+
+ rfc2231_parse(&parser, &results);
+ for (; *results != NULL; results += 2) {
+ if (strcasecmp(results[0], "charset") == 0) {
+ ctx->content_charset = i_strdup(results[1]);
+ break;
+ }
+ }
+ rfc822_parser_deinit(&parser);
+}
+
+static bool message_decode_header(struct message_decoder_context *ctx,
+ struct message_header_line *hdr,
+ struct message_block *output)
+{
+ size_t value_len;
+
+ if (hdr->continues) {
+ hdr->use_full_value = TRUE;
+ return FALSE;
+ }
+
+ T_BEGIN {
+ if (hdr->name_len == 12 &&
+ strcasecmp(hdr->name, "Content-Type") == 0)
+ parse_content_type(ctx, hdr);
+ if (hdr->name_len == 25 &&
+ strcasecmp(hdr->name, "Content-Transfer-Encoding") == 0)
+ ctx->message_cte = message_decoder_parse_cte(hdr);
+ } T_END;
+
+ buffer_set_used_size(ctx->buf, 0);
+ message_header_decode_utf8(hdr->full_value, hdr->full_value_len,
+ ctx->buf, ctx->normalizer);
+ value_len = ctx->buf->used;
+
+ if (ctx->normalizer != NULL) {
+ (void)ctx->normalizer(hdr->name, hdr->name_len, ctx->buf);
+ buffer_append_c(ctx->buf, '\0');
+ } else {
+ if (!uni_utf8_get_valid_data((const unsigned char *)hdr->name,
+ hdr->name_len, ctx->buf))
+ buffer_append_c(ctx->buf, '\0');
+ }
+
+ ctx->hdr = *hdr;
+ ctx->hdr.full_value = ctx->buf->data;
+ ctx->hdr.full_value_len = value_len;
+ ctx->hdr.value_len = 0;
+ if (ctx->buf->used != value_len) {
+ ctx->hdr.name = CONST_PTR_OFFSET(ctx->buf->data,
+ ctx->hdr.full_value_len);
+ ctx->hdr.name_len = ctx->buf->used - 1 - value_len;
+ }
+
+ output->hdr = &ctx->hdr;
+ return TRUE;
+}
+
+static void translation_buf_decode(struct message_decoder_context *ctx,
+ const unsigned char **data, size_t *size)
+{
+ unsigned char trans_buf[CHARSET_MAX_PENDING_BUF_SIZE+1];
+ size_t data_wanted, skip;
+ size_t trans_size, orig_size;
+
+ /* @UNSAFE: move the previously untranslated bytes to trans_buf
+ and see if we have now enough data to get the next character
+ translated */
+ memcpy(trans_buf, ctx->translation_buf, ctx->translation_size);
+ data_wanted = sizeof(trans_buf) - ctx->translation_size;
+ if (data_wanted > *size)
+ data_wanted = *size;
+ memcpy(trans_buf + ctx->translation_size, *data, data_wanted);
+
+ orig_size = trans_size = ctx->translation_size + data_wanted;
+ (void)charset_to_utf8(ctx->charset_trans, trans_buf,
+ &trans_size, ctx->buf2);
+
+ if (trans_size <= ctx->translation_size) {
+ /* need more data to finish the translation. */
+ i_assert(orig_size < CHARSET_MAX_PENDING_BUF_SIZE);
+ memcpy(ctx->translation_buf, trans_buf, orig_size);
+ ctx->translation_size = orig_size;
+ *data += *size;
+ *size = 0;
+ return;
+ }
+ skip = trans_size - ctx->translation_size;
+
+ i_assert(*size >= skip);
+ *data += skip;
+ *size -= skip;
+
+ ctx->translation_size = 0;
+}
+
+static void
+message_decode_body_init_charset(struct message_decoder_context *ctx,
+ struct message_part *part)
+{
+ ctx->binary_input = ctx->content_charset == NULL &&
+ (ctx->flags & MESSAGE_DECODER_FLAG_RETURN_BINARY) != 0 &&
+ (part->flags & (MESSAGE_PART_FLAG_TEXT |
+ MESSAGE_PART_FLAG_MESSAGE_RFC822)) == 0;
+
+ if (ctx->binary_input)
+ return;
+
+ if (ctx->charset_trans != NULL && ctx->content_charset != NULL &&
+ strcasecmp(ctx->content_charset, ctx->charset_trans_charset) == 0) {
+ /* already have the correct translation selected */
+ charset_to_utf8_reset(ctx->charset_trans);
+ return;
+ }
+
+ if (ctx->charset_trans != NULL)
+ charset_to_utf8_end(&ctx->charset_trans);
+ i_free_and_null(ctx->charset_trans_charset);
+
+ ctx->charset_trans_charset = i_strdup(ctx->content_charset != NULL ?
+ ctx->content_charset : "UTF-8");
+ if (charset_to_utf8_begin(ctx->charset_trans_charset, ctx->normalizer,
+ &ctx->charset_trans) < 0)
+ ctx->charset_trans = charset_utf8_to_utf8_begin(ctx->normalizer);
+}
+
+static bool message_decode_body(struct message_decoder_context *ctx,
+ struct message_block *input,
+ struct message_block *output)
+{
+ const unsigned char *data = NULL;
+ size_t pos, size = 0;
+ const char *error;
+
+ switch (ctx->message_cte) {
+ case MESSAGE_CTE_UNKNOWN:
+ /* just skip this body */
+ return FALSE;
+
+ case MESSAGE_CTE_78BIT:
+ case MESSAGE_CTE_BINARY:
+ data = input->data;
+ size = input->size;
+ break;
+ case MESSAGE_CTE_QP: {
+ buffer_set_used_size(ctx->buf, 0);
+ if (ctx->qp == NULL)
+ ctx->qp = qp_decoder_init(ctx->buf);
+ (void)qp_decoder_more(ctx->qp, input->data, input->size,
+ &pos, &error);
+ data = ctx->buf->data;
+ size = ctx->buf->used;
+ break;
+ }
+ case MESSAGE_CTE_BASE64:
+ buffer_set_used_size(ctx->buf, 0);
+ if (!base64_decode_is_finished(&ctx->base64_decoder)) {
+ if (base64_decode_more(&ctx->base64_decoder,
+ input->data, input->size,
+ &pos, ctx->buf) <= 0) {
+ /* ignore the rest of the input in this
+ MIME part */
+ (void)base64_decode_finish(&ctx->base64_decoder);
+ }
+ }
+ data = ctx->buf->data;
+ size = ctx->buf->used;
+ break;
+ }
+
+ if (ctx->binary_input) {
+ output->data = data;
+ output->size = size;
+ } else {
+ buffer_set_used_size(ctx->buf2, 0);
+ if (ctx->translation_size != 0)
+ translation_buf_decode(ctx, &data, &size);
+
+ pos = size;
+ (void)charset_to_utf8(ctx->charset_trans,
+ data, &pos, ctx->buf2);
+ if (pos != size) {
+ ctx->translation_size = size - pos;
+ i_assert(ctx->translation_size <=
+ sizeof(ctx->translation_buf));
+ memcpy(ctx->translation_buf, data + pos,
+ ctx->translation_size);
+ }
+ output->data = ctx->buf2->data;
+ output->size = ctx->buf2->used;
+ }
+
+ output->hdr = NULL;
+ return TRUE;
+}
+
+bool message_decoder_decode_next_block(struct message_decoder_context *ctx,
+ struct message_block *input,
+ struct message_block *output)
+{
+ if (input->part != ctx->prev_part) {
+ /* MIME part changed. */
+ message_decoder_decode_reset(ctx);
+ }
+
+ output->part = input->part;
+ ctx->prev_part = input->part;
+
+ if (input->hdr != NULL) {
+ output->size = 0;
+ return message_decode_header(ctx, input->hdr, output);
+ } else if (input->size != 0)
+ return message_decode_body(ctx, input, output);
+ else {
+ output->hdr = NULL;
+ output->size = 0;
+ message_decode_body_init_charset(ctx, input->part);
+ return TRUE;
+ }
+}
+
+const char *
+message_decoder_current_content_type(struct message_decoder_context *ctx)
+{
+ return ctx->content_type;
+}
+
+void message_decoder_decode_reset(struct message_decoder_context *ctx)
+{
+ const char *error;
+
+ base64_decode_reset(&ctx->base64_decoder);
+
+ if (ctx->qp != NULL)
+ (void)qp_decoder_finish(ctx->qp, &error);
+ i_free_and_null(ctx->content_type);
+ i_free_and_null(ctx->content_charset);
+ ctx->message_cte = MESSAGE_CTE_78BIT;
+}