diff options
Diffstat (limited to 'src/lib-storage/index/mbox/mbox-sync-parse.c')
-rw-r--r-- | src/lib-storage/index/mbox/mbox-sync-parse.c | 616 |
1 files changed, 616 insertions, 0 deletions
diff --git a/src/lib-storage/index/mbox/mbox-sync-parse.c b/src/lib-storage/index/mbox/mbox-sync-parse.c new file mode 100644 index 0000000..bbc2da2 --- /dev/null +++ b/src/lib-storage/index/mbox/mbox-sync-parse.c @@ -0,0 +1,616 @@ +/* Copyright (c) 2004-2018 Dovecot authors, see the included COPYING file */ + +/* MD5 header summing logic was pretty much copy&pasted from popa3d by + Solar Designer */ + +#include "lib.h" +#include "ioloop.h" +#include "array.h" +#include "buffer.h" +#include "istream.h" +#include "str.h" +#include "write-full.h" +#include "message-parser.h" +#include "mail-index.h" +#include "mbox-storage.h" +#include "mbox-md5.h" +#include "mbox-sync-private.h" + + +#define IS_LWSP_LF(c) (IS_LWSP(c) || (c) == '\n') + +struct mbox_sync_header_func { + const char *header; + bool (*func)(struct mbox_sync_mail_context *ctx, + struct message_header_line *hdr); +}; + +struct mbox_flag_type mbox_status_flags[] = { + { 'R', MAIL_SEEN }, + { 'O', MBOX_NONRECENT_KLUDGE }, + { 0, 0 } +}; + +struct mbox_flag_type mbox_xstatus_flags[] = { + { 'A', MAIL_ANSWERED }, + { 'F', MAIL_FLAGGED }, + { 'T', MAIL_DRAFT }, + { 'D', MAIL_DELETED }, + { 0, 0 } +}; + +static void parse_trailing_whitespace(struct mbox_sync_mail_context *ctx, + struct message_header_line *hdr) +{ + size_t i, space = 0; + + /* the value may contain newlines. we can't count whitespace before + and after it as a single contiguous whitespace block, as that may + get us into situation where removing whitespace goes eg. + " \n \n" -> " \n\n" which would then be treated as end of headers. + + that could probably be avoided by being careful, but as newlines + should never be there (we don't generate them), it's not worth the + trouble. */ + + for (i = hdr->full_value_len; i > 0; i--) { + if (!IS_LWSP(hdr->full_value[i-1])) + break; + space++; + } + + if ((ssize_t)space > ctx->mail.space) { + i_assert(space != 0); + ctx->mail.offset = ctx->hdr_offset + str_len(ctx->header) + i; + ctx->mail.space = space; + } +} + +static enum mail_flags mbox_flag_find(struct mbox_flag_type *flags, char chr) +{ + int i; + + for (i = 0; flags[i].chr != 0; i++) { + if (flags[i].chr == chr) + return flags[i].flag; + } + + return 0; +} + +static bool parse_status_flags(struct mbox_sync_mail_context *ctx, + struct message_header_line *hdr, + struct mbox_flag_type *flags_list) +{ + enum mail_flags flag; + size_t i; + bool duplicates = FALSE; + + ctx->mail.flags ^= MBOX_NONRECENT_KLUDGE; + for (i = 0; i < hdr->full_value_len; i++) { + flag = mbox_flag_find(flags_list, hdr->full_value[i]); + if ((ctx->mail.flags & flag) != 0) + duplicates = TRUE; + else + ctx->mail.flags |= flag; + } + ctx->mail.flags ^= MBOX_NONRECENT_KLUDGE; + return duplicates; +} + +static bool parse_status(struct mbox_sync_mail_context *ctx, + struct message_header_line *hdr) +{ + if (parse_status_flags(ctx, hdr, mbox_status_flags)) + ctx->mail.status_broken = TRUE; + ctx->hdr_pos[MBOX_HDR_STATUS] = str_len(ctx->header); + return TRUE; +} + +static bool parse_x_status(struct mbox_sync_mail_context *ctx, + struct message_header_line *hdr) +{ + if (parse_status_flags(ctx, hdr, mbox_xstatus_flags)) + ctx->mail.xstatus_broken = TRUE; + ctx->hdr_pos[MBOX_HDR_X_STATUS] = str_len(ctx->header); + return TRUE; +} + +static void +parse_imap_keywords_list(struct mbox_sync_mail_context *ctx, + struct message_header_line *hdr, size_t pos) +{ + struct mailbox *box = &ctx->sync_ctx->mbox->box; + struct index_mailbox_context *ibox = INDEX_STORAGE_CONTEXT(box); + const char *keyword, *error; + size_t keyword_start; + unsigned int idx, count; + + count = 0; + while (pos < hdr->full_value_len) { + if (IS_LWSP_LF(hdr->full_value[pos])) { + pos++; + continue; + } + + /* read the keyword */ + keyword_start = pos; + for (; pos < hdr->full_value_len; pos++) { + if (IS_LWSP_LF(hdr->full_value[pos])) + break; + } + + /* add it to index's keyword list if it's not there already */ + keyword = t_strndup(hdr->full_value + keyword_start, + pos - keyword_start); + if (mailbox_keyword_is_valid(&ctx->sync_ctx->mbox->box, + keyword, &error)) { + mail_index_keyword_lookup_or_create(box->index, + keyword, &idx); + } + count++; + } + + if (count != array_count(ibox->keyword_names)) { + /* need to update this list */ + ctx->imapbase_rewrite = TRUE; + ctx->need_rewrite = TRUE; + } +} + +static bool parse_x_imap_base(struct mbox_sync_mail_context *ctx, + struct message_header_line *hdr) +{ + size_t i, j, uid_last_pos; + uint32_t uid_validity, uid_last; + + if (ctx->seq != 1 || ctx->seen_imapbase || + ctx->sync_ctx->renumber_uids) { + /* Valid only in first message */ + return FALSE; + } + + /* <uid-validity> 10x<uid-last> */ + for (i = 0, uid_validity = 0; i < hdr->full_value_len; i++) { + if (hdr->full_value[i] < '0' || hdr->full_value[i] > '9') { + if (hdr->full_value[i] != ' ') + return FALSE; + break; + } + uid_validity = uid_validity * 10 + (hdr->full_value[i] - '0'); + } + + if (uid_validity == 0) { + /* broken */ + return FALSE; + } + + for (; i < hdr->full_value_len; i++) { + if (!IS_LWSP_LF(hdr->full_value[i])) + break; + } + uid_last_pos = i; + + for (uid_last = 0, j = 0; i < hdr->full_value_len; i++, j++) { + if (hdr->full_value[i] < '0' || hdr->full_value[i] > '9') { + if (!IS_LWSP_LF(hdr->full_value[i])) + return FALSE; + break; + } + uid_last = uid_last * 10 + (hdr->full_value[i] - '0'); + } + + if (j != 10 || + hdr->full_value_offset != ctx->hdr_offset + str_len(ctx->header)) { + /* uid-last field must be exactly 10 characters to make + rewriting it easier. also don't try to do this if some + headers have been removed */ + ctx->imapbase_rewrite = TRUE; + ctx->need_rewrite = TRUE; + } else { + ctx->last_uid_value_start_pos = uid_last_pos; + ctx->sync_ctx->base_uid_last_offset = + hdr->full_value_offset + uid_last_pos; + } + + if (ctx->sync_ctx->base_uid_validity == 0) { + /* first time parsing this (ie. we're not rewriting). + save the values. */ + ctx->sync_ctx->base_uid_validity = uid_validity; + ctx->sync_ctx->base_uid_last = uid_last; + + if (ctx->sync_ctx->next_uid-1 <= uid_last) { + /* new messages have been added since our last sync. + just update our internal next_uid. */ + ctx->sync_ctx->next_uid = uid_last+1; + } else { + /* we need to rewrite the next-uid */ + ctx->need_rewrite = TRUE; + } + i_assert(ctx->sync_ctx->next_uid > ctx->sync_ctx->prev_msg_uid); + } + + ctx->hdr_pos[MBOX_HDR_X_IMAPBASE] = str_len(ctx->header); + ctx->seen_imapbase = TRUE; + + T_BEGIN { + parse_imap_keywords_list(ctx, hdr, i); + } T_END; + parse_trailing_whitespace(ctx, hdr); + return TRUE; +} + +static bool parse_x_imap(struct mbox_sync_mail_context *ctx, + struct message_header_line *hdr) +{ + if (!parse_x_imap_base(ctx, hdr)) + return FALSE; + + /* this is the c-client style "FOLDER INTERNAL DATA" message. + skip it. */ + ctx->mail.pseudo = TRUE; + return TRUE; +} + +static bool parse_x_keywords_real(struct mbox_sync_mail_context *ctx, + struct message_header_line *hdr) +{ + struct mailbox *box = &ctx->sync_ctx->mbox->box; + ARRAY_TYPE(keyword_indexes) keyword_list; + const unsigned int *list; + string_t *keyword; + size_t keyword_start; + unsigned int i, idx, count; + size_t pos; + + if (array_is_created(&ctx->mail.keywords)) + return FALSE; /* duplicate header, delete */ + + /* read keyword indexes to temporary array first */ + keyword = t_str_new(128); + t_array_init(&keyword_list, 16); + + for (pos = 0; pos < hdr->full_value_len; ) { + if (IS_LWSP_LF(hdr->full_value[pos])) { + pos++; + continue; + } + + /* read the keyword string */ + keyword_start = pos; + for (; pos < hdr->full_value_len; pos++) { + if (IS_LWSP_LF(hdr->full_value[pos])) + break; + } + + str_truncate(keyword, 0); + str_append_data(keyword, hdr->full_value + keyword_start, + pos - keyword_start); + if (!mail_index_keyword_lookup(box->index, str_c(keyword), + &idx)) { + /* keyword wasn't found. that means the sent mail + originally contained X-Keywords header. Delete it. */ + return FALSE; + } + + /* check that the keyword isn't already added there. + we don't want duplicates. */ + list = array_get(&keyword_list, &count); + for (i = 0; i < count; i++) { + if (list[i] == idx) + break; + } + + if (i == count) + array_push_back(&keyword_list, &idx); + } + + /* once we know how many keywords there are, we can allocate the array + from mail_keyword_pool without wasting memory. */ + if (array_count(&keyword_list) > 0) { + p_array_init(&ctx->mail.keywords, + ctx->sync_ctx->mail_keyword_pool, + array_count(&keyword_list)); + array_append_array(&ctx->mail.keywords, &keyword_list); + } + + ctx->hdr_pos[MBOX_HDR_X_KEYWORDS] = str_len(ctx->header); + parse_trailing_whitespace(ctx, hdr); + return TRUE; +} + +static bool parse_x_keywords(struct mbox_sync_mail_context *ctx, + struct message_header_line *hdr) +{ + bool ret; + + T_BEGIN { + ret = parse_x_keywords_real(ctx, hdr); + } T_END; + return ret; +} + +static bool parse_x_uid(struct mbox_sync_mail_context *ctx, + struct message_header_line *hdr) +{ + uint32_t value = 0; + size_t i; + + if (ctx->mail.uid != 0) { + /* duplicate */ + return FALSE; + } + + for (i = 0; i < hdr->full_value_len; i++) { + if (hdr->full_value[i] < '0' || hdr->full_value[i] > '9') + break; + value = value*10 + (hdr->full_value[i] - '0'); + } + + for (; i < hdr->full_value_len; i++) { + if (!IS_LWSP_LF(hdr->full_value[i])) { + /* broken value */ + return FALSE; + } + } + + if (ctx->sync_ctx == NULL) { + /* we're in mbox_sync_parse_match_mail(). + don't do any extra checks. */ + ctx->mail.uid = value; + return TRUE; + } + + if (ctx->seq == 1 && !ctx->seen_imapbase) { + /* Don't bother allowing X-UID before X-IMAPbase + header. c-client doesn't allow it either, and this + way the UID doesn't have to be reset if X-IMAPbase + header isn't what we expect it to be. */ + return FALSE; + } + + if (value == ctx->sync_ctx->next_uid) { + /* X-UID is the next expected one. allow it because + we'd just use this UID anyway. X-IMAPbase header + still needs to be updated for this. */ + ctx->sync_ctx->next_uid++; + } else if (value > ctx->sync_ctx->next_uid) { + /* UID is larger than expected. Don't allow it because + incoming mails can contain untrusted X-UID fields, + causing possibly DoS if the UIDs get large enough. */ + ctx->mail.uid_broken = TRUE; + return FALSE; + } + + if (value <= ctx->sync_ctx->prev_msg_uid) { + /* broken - UIDs must be growing */ + ctx->mail.uid_broken = TRUE; + return FALSE; + } + + ctx->mail.uid = value; + /* if we had multiple X-UID headers, we could have + uid_broken=TRUE here. */ + ctx->mail.uid_broken = FALSE; + + if (ctx->sync_ctx->dest_first_mail && ctx->seq != 1) { + /* if we're expunging the first mail, delete this header since + otherwise X-IMAPbase header would be added after this, which + we don't like */ + return FALSE; + } + + ctx->hdr_pos[MBOX_HDR_X_UID] = str_len(ctx->header); + ctx->parsed_uid = value; + parse_trailing_whitespace(ctx, hdr); + return TRUE; +} + +static bool parse_content_length(struct mbox_sync_mail_context *ctx, + struct message_header_line *hdr) +{ + uoff_t value = 0; + size_t i; + + if (ctx->content_length != UOFF_T_MAX) { + /* duplicate */ + return FALSE; + } + + for (i = 0; i < hdr->full_value_len; i++) { + if (hdr->full_value[i] < '0' || hdr->full_value[i] > '9') + break; + value = value*10 + (hdr->full_value[i] - '0'); + } + + for (; i < hdr->full_value_len; i++) { + if (!IS_LWSP_LF(hdr->full_value[i])) { + /* broken value */ + return FALSE; + } + } + + ctx->content_length = value; + return TRUE; +} + +static struct mbox_sync_header_func header_funcs[] = { + { "Content-Length", parse_content_length }, + { "Status", parse_status }, + { "X-IMAP", parse_x_imap }, + { "X-IMAPbase", parse_x_imap_base }, + { "X-Keywords", parse_x_keywords }, + { "X-Status", parse_x_status }, + { "X-UID", parse_x_uid } +}; + +static int mbox_sync_bsearch_header_func_cmp(const void *p1, const void *p2) +{ + const char *key = p1; + const struct mbox_sync_header_func *func = p2; + + return strcasecmp(key, func->header); +} + +int mbox_sync_parse_next_mail(struct istream *input, + struct mbox_sync_mail_context *ctx) +{ + struct mbox_sync_context *sync_ctx = ctx->sync_ctx; + struct message_header_parser_ctx *hdr_ctx; + struct message_header_line *hdr; + struct mbox_sync_header_func *func; + struct mbox_md5_context *mbox_md5_ctx; + size_t line_start_pos; + int i, ret; + + ctx->hdr_offset = ctx->mail.offset; + ctx->mail.flags = MAIL_RECENT; /* default to having recent flag */ + + ctx->header_first_change = SIZE_MAX; + ctx->header_last_change = 0; + + for (i = 0; i < MBOX_HDR_COUNT; i++) + ctx->hdr_pos[i] = SIZE_MAX; + + ctx->content_length = UOFF_T_MAX; + str_truncate(ctx->header, 0); + + mbox_md5_ctx = ctx->sync_ctx->mbox->md5_v.init(); + + line_start_pos = 0; + hdr_ctx = message_parse_header_init(input, NULL, 0); + while ((ret = message_parse_header_next(hdr_ctx, &hdr)) > 0) { + if (hdr->eoh) { + ctx->have_eoh = TRUE; + break; + } + + if (!hdr->continued) { + line_start_pos = str_len(ctx->header); + str_append(ctx->header, hdr->name); + str_append_data(ctx->header, hdr->middle, hdr->middle_len); + } + + func = bsearch(hdr->name, header_funcs, + N_ELEMENTS(header_funcs), sizeof(*header_funcs), + mbox_sync_bsearch_header_func_cmp); + + if (func != NULL) { + if (hdr->continues) { + hdr->use_full_value = TRUE; + continue; + } + + if (!func->func(ctx, hdr)) { + /* this header is broken, remove it */ + ctx->need_rewrite = TRUE; + str_truncate(ctx->header, line_start_pos); + if (ctx->header_first_change == SIZE_MAX) { + ctx->header_first_change = + line_start_pos; + } + continue; + } + buffer_append(ctx->header, hdr->full_value, + hdr->full_value_len); + } else { + ctx->sync_ctx->mbox->md5_v.more(mbox_md5_ctx, hdr); + buffer_append(ctx->header, hdr->value, + hdr->value_len); + } + if (!hdr->no_newline) { + if (hdr->crlf_newline) + str_append_c(ctx->header, '\r'); + str_append_c(ctx->header, '\n'); + } + } + i_assert(ret != 0); + message_parse_header_deinit(&hdr_ctx); + + ctx->sync_ctx->mbox->md5_v.finish(mbox_md5_ctx, ctx->hdr_md5_sum); + + if ((ctx->seq == 1 && !ctx->seen_imapbase) || + (ctx->seq > 1 && sync_ctx->dest_first_mail)) { + /* missing X-IMAPbase */ + ctx->need_rewrite = TRUE; + if (sync_ctx->base_uid_validity == 0) { + /* figure out a new UIDVALIDITY for us. */ + sync_ctx->base_uid_validity = + sync_ctx->hdr->uid_validity != 0 && + !sync_ctx->renumber_uids ? + sync_ctx->hdr->uid_validity : + I_MAX((uint32_t)ioloop_time, 1); + } + } + + ctx->body_offset = input->v_offset; + if (input->stream_errno != 0) { + mbox_sync_set_critical(ctx->sync_ctx, "read(%s) failed: %s", + i_stream_get_name(input), i_stream_get_error(input)); + return -1; + } + return 0; +} + +bool mbox_sync_parse_match_mail(struct mbox_mailbox *mbox, + struct mail_index_view *view, uint32_t seq) +{ + struct mbox_sync_mail_context ctx; + struct message_header_parser_ctx *hdr_ctx; + struct message_header_line *hdr; + struct header_func *func; + struct mbox_md5_context *mbox_md5_ctx; + const void *data; + bool expunged; + uint32_t uid; + int ret; + + /* we only wish to be sure that this mail actually is what we expect + it to be. If there's X-UID header and it matches our UID, we use it. + Otherwise it could mean that the X-UID header is invalid and it's + just not yet been rewritten. In that case use MD5 sum, if it + exists. */ + + mail_index_lookup_uid(view, seq, &uid); + i_zero(&ctx); + mbox_md5_ctx = mbox->md5_v.init(); + + hdr_ctx = message_parse_header_init(mbox->mbox_stream, NULL, 0); + while ((ret = message_parse_header_next(hdr_ctx, &hdr)) > 0) { + if (hdr->eoh) + break; + + func = bsearch(hdr->name, header_funcs, + N_ELEMENTS(header_funcs), sizeof(*header_funcs), + mbox_sync_bsearch_header_func_cmp); + if (func != NULL) { + if (strcasecmp(hdr->name, "X-UID") == 0) { + if (hdr->continues) { + hdr->use_full_value = TRUE; + continue; + } + (void)parse_x_uid(&ctx, hdr); + + if (ctx.mail.uid == uid) + break; + } + } else { + mbox->md5_v.more(mbox_md5_ctx, hdr); + } + } + i_assert(ret != 0); + message_parse_header_deinit(&hdr_ctx); + + mbox->md5_v.finish(mbox_md5_ctx, ctx.hdr_md5_sum); + + if (ctx.mail.uid == uid) + return TRUE; + + /* match by MD5 sum */ + mbox->mbox_save_md5 = TRUE; + + mail_index_lookup_ext(view, seq, mbox->md5hdr_ext_idx, + &data, &expunged); + return data == NULL ? 0 : + memcmp(data, ctx.hdr_md5_sum, 16) == 0; +} |