summaryrefslogtreecommitdiffstats
path: root/src/lib-mail/message-header-decode.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/lib-mail/message-header-decode.c188
1 files changed, 188 insertions, 0 deletions
diff --git a/src/lib-mail/message-header-decode.c b/src/lib-mail/message-header-decode.c
new file mode 100644
index 0000000..18f6ca2
--- /dev/null
+++ b/src/lib-mail/message-header-decode.c
@@ -0,0 +1,188 @@
+/* Copyright (c) 2002-2018 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "base64.h"
+#include "buffer.h"
+#include "unichar.h"
+#include "charset-utf8.h"
+#include "quoted-printable.h"
+#include "message-header-decode.h"
+
+static size_t
+message_header_decode_encoded(const unsigned char *data, size_t size,
+ buffer_t *decodebuf, size_t *charsetlen_r)
+{
+#define QCOUNT 3
+ unsigned int num = 0;
+ size_t i, start_pos[QCOUNT] = {0, 0, 0};
+
+ /* data should contain "charset?encoding?text?=" */
+ for (i = 0; i < size; i++) {
+ if (data[i] == '?') {
+ start_pos[num++] = i;
+ if (num == QCOUNT)
+ break;
+ }
+ }
+
+ if (i+1 >= size || data[i+1] != '=') {
+ /* invalid block */
+ return 0;
+ }
+
+ i_assert(num == QCOUNT);
+
+ buffer_append(decodebuf, data, start_pos[0]);
+ buffer_append_c(decodebuf, '\0');
+ *charsetlen_r = decodebuf->used;
+
+ switch (data[start_pos[0]+1]) {
+ case 'q':
+ case 'Q':
+ if (quoted_printable_q_decode(data + start_pos[1] + 1,
+ start_pos[2] - start_pos[1] - 1,
+ decodebuf) < 0) {
+ /* we skipped over some invalid data */
+ }
+ break;
+ case 'b':
+ case 'B':
+ if (base64_decode(data + start_pos[1] + 1,
+ start_pos[2] - start_pos[1] - 1,
+ NULL, decodebuf) < 0) {
+ /* contains invalid data. show what we got so far. */
+ }
+ break;
+ default:
+ /* unknown encoding */
+ return 0;
+ }
+
+ return start_pos[2] + 2;
+}
+
+static bool is_only_lwsp(const unsigned char *data, size_t size)
+{
+ size_t i;
+
+ for (i = 0; i < size; i++) {
+ if (!(data[i] == ' ' || data[i] == '\t' ||
+ data[i] == '\r' || data[i] == '\n'))
+ return FALSE;
+ }
+ return TRUE;
+}
+
+void message_header_decode(const unsigned char *data, size_t size,
+ message_header_decode_callback_t *callback,
+ void *context)
+{
+ buffer_t *decodebuf = NULL;
+ size_t charsetlen = 0;
+ size_t pos, start_pos, ret;
+
+ /* =?charset?Q|B?text?= */
+ start_pos = 0;
+ for (pos = 0; pos + 1 < size; ) {
+ if (data[pos] != '=' || data[pos+1] != '?') {
+ pos++;
+ continue;
+ }
+
+ /* encoded string beginning */
+ if (pos != start_pos &&
+ !is_only_lwsp(data+start_pos, pos-start_pos)) {
+ /* send the unencoded data so far */
+ if (!callback(data + start_pos, pos - start_pos,
+ NULL, context)) {
+ start_pos = size;
+ break;
+ }
+ }
+
+ if (decodebuf == NULL) {
+ decodebuf = buffer_create_dynamic(default_pool,
+ size - pos);
+ } else {
+ buffer_set_used_size(decodebuf, 0);
+ }
+
+ pos += 2;
+ ret = message_header_decode_encoded(data + pos, size - pos,
+ decodebuf, &charsetlen);
+ if (ret == 0) {
+ start_pos = pos-2;
+ continue;
+ }
+ pos += ret;
+
+ if (decodebuf->used > charsetlen) {
+ /* decodebuf contains <charset> NUL <text> */
+ if (!callback(CONST_PTR_OFFSET(decodebuf->data,
+ charsetlen),
+ decodebuf->used - charsetlen,
+ decodebuf->data, context)) {
+ start_pos = size;
+ break;
+ }
+ }
+
+ start_pos = pos;
+ }
+
+ if (size != start_pos) {
+ i_assert(size > start_pos);
+ (void)callback(data + start_pos, size - start_pos,
+ NULL, context);
+ }
+ buffer_free(&decodebuf);
+}
+
+struct decode_utf8_context {
+ buffer_t *dest;
+ normalizer_func_t *normalizer;
+ bool changed:1;
+};
+
+static bool
+decode_utf8_callback(const unsigned char *data, size_t size,
+ const char *charset, void *context)
+{
+ struct decode_utf8_context *ctx = context;
+ struct charset_translation *t;
+
+ if (charset == NULL || charset_is_utf8(charset)) {
+ /* ASCII / UTF-8 */
+ if (ctx->normalizer != NULL) {
+ (void)ctx->normalizer(data, size, ctx->dest);
+ } else {
+ if (uni_utf8_get_valid_data(data, size, ctx->dest))
+ buffer_append(ctx->dest, data, size);
+ }
+ return TRUE;
+ }
+
+ if (charset_to_utf8_begin(charset, ctx->normalizer, &t) < 0) {
+ /* data probably still contains some valid ASCII characters.
+ append them. */
+ if (uni_utf8_get_valid_data(data, size, ctx->dest))
+ buffer_append(ctx->dest, data, size);
+ return TRUE;
+ }
+
+ /* ignore any errors */
+ (void)charset_to_utf8(t, data, &size, ctx->dest);
+ charset_to_utf8_end(&t);
+ return TRUE;
+}
+
+void message_header_decode_utf8(const unsigned char *data, size_t size,
+ buffer_t *dest, normalizer_func_t *normalizer)
+{
+ struct decode_utf8_context ctx;
+
+ i_zero(&ctx);
+ ctx.dest = dest;
+ ctx.normalizer = normalizer;
+ message_header_decode(data, size, decode_utf8_callback, &ctx);
+}