diff options
Diffstat (limited to '')
-rw-r--r-- | src/lib-mail/message-header-decode.c | 188 |
1 files changed, 188 insertions, 0 deletions
diff --git a/src/lib-mail/message-header-decode.c b/src/lib-mail/message-header-decode.c new file mode 100644 index 0000000..18f6ca2 --- /dev/null +++ b/src/lib-mail/message-header-decode.c @@ -0,0 +1,188 @@ +/* Copyright (c) 2002-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "base64.h" +#include "buffer.h" +#include "unichar.h" +#include "charset-utf8.h" +#include "quoted-printable.h" +#include "message-header-decode.h" + +static size_t +message_header_decode_encoded(const unsigned char *data, size_t size, + buffer_t *decodebuf, size_t *charsetlen_r) +{ +#define QCOUNT 3 + unsigned int num = 0; + size_t i, start_pos[QCOUNT] = {0, 0, 0}; + + /* data should contain "charset?encoding?text?=" */ + for (i = 0; i < size; i++) { + if (data[i] == '?') { + start_pos[num++] = i; + if (num == QCOUNT) + break; + } + } + + if (i+1 >= size || data[i+1] != '=') { + /* invalid block */ + return 0; + } + + i_assert(num == QCOUNT); + + buffer_append(decodebuf, data, start_pos[0]); + buffer_append_c(decodebuf, '\0'); + *charsetlen_r = decodebuf->used; + + switch (data[start_pos[0]+1]) { + case 'q': + case 'Q': + if (quoted_printable_q_decode(data + start_pos[1] + 1, + start_pos[2] - start_pos[1] - 1, + decodebuf) < 0) { + /* we skipped over some invalid data */ + } + break; + case 'b': + case 'B': + if (base64_decode(data + start_pos[1] + 1, + start_pos[2] - start_pos[1] - 1, + NULL, decodebuf) < 0) { + /* contains invalid data. show what we got so far. */ + } + break; + default: + /* unknown encoding */ + return 0; + } + + return start_pos[2] + 2; +} + +static bool is_only_lwsp(const unsigned char *data, size_t size) +{ + size_t i; + + for (i = 0; i < size; i++) { + if (!(data[i] == ' ' || data[i] == '\t' || + data[i] == '\r' || data[i] == '\n')) + return FALSE; + } + return TRUE; +} + +void message_header_decode(const unsigned char *data, size_t size, + message_header_decode_callback_t *callback, + void *context) +{ + buffer_t *decodebuf = NULL; + size_t charsetlen = 0; + size_t pos, start_pos, ret; + + /* =?charset?Q|B?text?= */ + start_pos = 0; + for (pos = 0; pos + 1 < size; ) { + if (data[pos] != '=' || data[pos+1] != '?') { + pos++; + continue; + } + + /* encoded string beginning */ + if (pos != start_pos && + !is_only_lwsp(data+start_pos, pos-start_pos)) { + /* send the unencoded data so far */ + if (!callback(data + start_pos, pos - start_pos, + NULL, context)) { + start_pos = size; + break; + } + } + + if (decodebuf == NULL) { + decodebuf = buffer_create_dynamic(default_pool, + size - pos); + } else { + buffer_set_used_size(decodebuf, 0); + } + + pos += 2; + ret = message_header_decode_encoded(data + pos, size - pos, + decodebuf, &charsetlen); + if (ret == 0) { + start_pos = pos-2; + continue; + } + pos += ret; + + if (decodebuf->used > charsetlen) { + /* decodebuf contains <charset> NUL <text> */ + if (!callback(CONST_PTR_OFFSET(decodebuf->data, + charsetlen), + decodebuf->used - charsetlen, + decodebuf->data, context)) { + start_pos = size; + break; + } + } + + start_pos = pos; + } + + if (size != start_pos) { + i_assert(size > start_pos); + (void)callback(data + start_pos, size - start_pos, + NULL, context); + } + buffer_free(&decodebuf); +} + +struct decode_utf8_context { + buffer_t *dest; + normalizer_func_t *normalizer; + bool changed:1; +}; + +static bool +decode_utf8_callback(const unsigned char *data, size_t size, + const char *charset, void *context) +{ + struct decode_utf8_context *ctx = context; + struct charset_translation *t; + + if (charset == NULL || charset_is_utf8(charset)) { + /* ASCII / UTF-8 */ + if (ctx->normalizer != NULL) { + (void)ctx->normalizer(data, size, ctx->dest); + } else { + if (uni_utf8_get_valid_data(data, size, ctx->dest)) + buffer_append(ctx->dest, data, size); + } + return TRUE; + } + + if (charset_to_utf8_begin(charset, ctx->normalizer, &t) < 0) { + /* data probably still contains some valid ASCII characters. + append them. */ + if (uni_utf8_get_valid_data(data, size, ctx->dest)) + buffer_append(ctx->dest, data, size); + return TRUE; + } + + /* ignore any errors */ + (void)charset_to_utf8(t, data, &size, ctx->dest); + charset_to_utf8_end(&t); + return TRUE; +} + +void message_header_decode_utf8(const unsigned char *data, size_t size, + buffer_t *dest, normalizer_func_t *normalizer) +{ + struct decode_utf8_context ctx; + + i_zero(&ctx); + ctx.dest = dest; + ctx.normalizer = normalizer; + message_header_decode(data, size, decode_utf8_callback, &ctx); +} |