diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 09:51:24 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 09:51:24 +0000 |
commit | f7548d6d28c313cf80e6f3ef89aed16a19815df1 (patch) | |
tree | a3f6f2a3f247293bee59ecd28e8cd8ceb6ca064a /src/lib-index/mail-cache-lookup.c | |
parent | Initial commit. (diff) | |
download | dovecot-f7548d6d28c313cf80e6f3ef89aed16a19815df1.tar.xz dovecot-f7548d6d28c313cf80e6f3ef89aed16a19815df1.zip |
Adding upstream version 1:2.3.19.1+dfsg1.upstream/1%2.3.19.1+dfsg1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/lib-index/mail-cache-lookup.c')
-rw-r--r-- | src/lib-index/mail-cache-lookup.c | 694 |
1 files changed, 694 insertions, 0 deletions
diff --git a/src/lib-index/mail-cache-lookup.c b/src/lib-index/mail-cache-lookup.c new file mode 100644 index 0000000..e57b40b --- /dev/null +++ b/src/lib-index/mail-cache-lookup.c @@ -0,0 +1,694 @@ +/* Copyright (c) 2003-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "array.h" +#include "buffer.h" +#include "str.h" +#include "mail-cache-private.h" + + +#define CACHE_PREFETCH IO_BLOCK_SIZE + +int mail_cache_get_record(struct mail_cache *cache, uint32_t offset, + const struct mail_cache_record **rec_r) +{ + const struct mail_cache_record *rec; + const void *data; + int ret; + + i_assert(offset != 0); + + if (offset % sizeof(uint32_t) != 0) { + /* records are always 32-bit aligned */ + mail_cache_set_corrupted(cache, "invalid record offset"); + return -1; + } + + /* we don't know yet how large the record is, so just guess */ + if (mail_cache_map(cache, offset, sizeof(*rec) + CACHE_PREFETCH, + &data) < 0) + return -1; + + if (offset + sizeof(*rec) > cache->mmap_length) { + mail_cache_set_corrupted(cache, "record points outside file"); + return -1; + } + rec = data; + + if (rec->size < sizeof(*rec)) { + mail_cache_set_corrupted(cache, "invalid record size"); + return -1; + } + if (rec->size > CACHE_PREFETCH) { + /* larger than we guessed. map the rest of the record. */ + if ((ret = mail_cache_map(cache, offset, rec->size, &data)) < 0) + return -1; + if (ret == 0) { + mail_cache_set_corrupted(cache, "record points outside file"); + return -1; + } + rec = data; + } + + *rec_r = rec; + return 0; +} + +uint32_t mail_cache_lookup_cur_offset(struct mail_index_view *view, + uint32_t seq, uint32_t *reset_id_r) +{ + struct mail_cache *cache = mail_index_view_get_index(view)->cache; + struct mail_index_map *map; + const void *data; + uint32_t offset; + + mail_index_lookup_ext_full(view, seq, cache->ext_id, &map, &data, NULL); + if (data == NULL) { + /* no cache offsets */ + return 0; + } + offset = *((const uint32_t *)data); + if (offset == 0) + return 0; + + if (!mail_index_ext_get_reset_id(view, map, cache->ext_id, reset_id_r)) + i_unreached(); + return offset; +} + +static int +mail_cache_lookup_offset(struct mail_cache *cache, struct mail_index_view *view, + uint32_t seq, uint32_t *offset_r) +{ + uint32_t offset, reset_id, reset_id2; + int ret; + + offset = mail_cache_lookup_cur_offset(view, seq, &reset_id); + if (offset == 0) + return 0; + + while (cache->hdr->file_seq != reset_id) { + /* reset_it doesn't match - sync the index/cache */ + if ((ret = mail_cache_sync_reset_id(cache)) <= 0) + return ret; + + /* lookup again after syncing */ + offset = mail_cache_lookup_cur_offset(view, seq, &reset_id2); + if (offset == 0) + return 0; + if (cache->hdr->file_seq == reset_id2) + break; /* match - all good */ + if (reset_id == reset_id2) { + /* reset_id didn't change after sync. This means it's + pointing to an old already deleted cache file. */ + return 0; + } + /* reset_id changed - try again */ + reset_id = reset_id2; + } + + *offset_r = offset; + return 1; +} + +bool mail_cache_track_loops(struct mail_cache_loop_track *loop_track, + uoff_t offset, uoff_t size) +{ + i_assert(offset != 0); + i_assert(size != 0); + + /* looping happens only in rare error conditions, so it's enough if we + just catch it eventually. we do this by checking if we've seen + more record data than possible in the accessed file area. */ + if (loop_track->size_sum == 0) { + /* first call */ + loop_track->min_offset = offset; + loop_track->max_offset = offset + size; + } else { + if (loop_track->min_offset > offset) + loop_track->min_offset = offset; + if (loop_track->max_offset < offset + size) + loop_track->max_offset = offset + size; + } + + loop_track->size_sum += size; + return loop_track->size_sum > + (loop_track->max_offset - loop_track->min_offset); +} + +void mail_cache_lookup_iter_init(struct mail_cache_view *view, uint32_t seq, + struct mail_cache_lookup_iterate_ctx *ctx_r) +{ + struct mail_cache_lookup_iterate_ctx *ctx = ctx_r; + int ret; + + if (!view->cache->opened) + (void)mail_cache_open_and_verify(view->cache); + + i_zero(ctx); + ctx->view = view; + ctx->seq = seq; + + if (!MAIL_CACHE_IS_UNUSABLE(view->cache)) { + /* look up the first offset */ + ret = mail_cache_lookup_offset(view->cache, view->view, seq, + &ctx->offset); + if (ret <= 0) { + ctx->stop = TRUE; + ctx->failed = ret < 0; + } + } + ctx->remap_counter = view->cache->remap_counter; + + i_zero(&view->loop_track); +} + +static bool +mail_cache_lookup_iter_transaction(struct mail_cache_lookup_iterate_ctx *ctx) +{ + ctx->rec = mail_cache_transaction_lookup_rec(ctx->view->transaction, + ctx->seq, + &ctx->trans_next_idx); + if (ctx->rec == NULL) + return FALSE; + + ctx->inmemory_field_idx = TRUE; + ctx->remap_counter = ctx->view->cache->remap_counter; + ctx->pos = sizeof(*ctx->rec); + ctx->rec_size = ctx->rec->size; + return TRUE; +} + +static int +mail_cache_lookup_iter_next_record(struct mail_cache_lookup_iterate_ctx *ctx) +{ + struct mail_cache_view *view = ctx->view; + + if (ctx->failed) + return -1; + + if (ctx->rec != NULL) + ctx->offset = ctx->rec->prev_offset; + if (ctx->offset == 0) { + /* end of this record list. check newly appended data. */ + if (view->trans_seq1 > ctx->seq || + view->trans_seq2 < ctx->seq) + return 0; + /* check data still in memory. this works for recent mails + even with INDEX=MEMORY */ + if (!ctx->memory_appends_checked) { + if (mail_cache_lookup_iter_transaction(ctx)) + return 1; + ctx->memory_appends_checked = TRUE; + } + if (MAIL_CACHE_IS_UNUSABLE(view->cache) || ctx->stop) + return 0; + + /* check data already written to cache file */ + if (ctx->disk_appends_checked || + mail_cache_lookup_offset(view->cache, view->trans_view, + ctx->seq, &ctx->offset) <= 0) + return 0; + + ctx->disk_appends_checked = TRUE; + ctx->remap_counter = view->cache->remap_counter; + i_zero(&view->loop_track); + } + + if (ctx->stop) + return 0; + + /* look up the next record */ + if (mail_cache_get_record(view->cache, ctx->offset, &ctx->rec) < 0) + return -1; + if (mail_cache_track_loops(&view->loop_track, ctx->offset, + ctx->rec->size)) { + mail_cache_set_corrupted(view->cache, + "record list is circular"); + return -1; + } + ctx->inmemory_field_idx = FALSE; + ctx->remap_counter = view->cache->remap_counter; + + ctx->pos = sizeof(*ctx->rec); + ctx->rec_size = ctx->rec->size; + return 1; +} + +static int +mail_cache_lookup_rec_get_field(struct mail_cache_lookup_iterate_ctx *ctx, + unsigned int *field_idx_r) +{ + struct mail_cache *cache = ctx->view->cache; + uint32_t file_field; + + file_field = *((const uint32_t *)CONST_PTR_OFFSET(ctx->rec, ctx->pos)); + if (ctx->inmemory_field_idx) { + *field_idx_r = file_field; + return 0; + } + + if (file_field >= cache->file_fields_count) { + /* new field, have to re-read fields header to figure + out its size. don't do this if we're purging. */ + if (!cache->locked) { + if (mail_cache_header_fields_read(cache) < 0) + return -1; + } + if (file_field >= cache->file_fields_count) { + mail_cache_set_corrupted(cache, + "field index too large (%u >= %u)", + file_field, cache->file_fields_count); + return -1; + } + + /* field reading might have re-mmaped the file and + caused rec pointer to break. need to get it again. */ + if (mail_cache_get_record(cache, ctx->offset, &ctx->rec) < 0) + return -1; + ctx->remap_counter = cache->remap_counter; + } + + *field_idx_r = cache->file_field_map[file_field]; + return 0; +} + +int mail_cache_lookup_iter_next(struct mail_cache_lookup_iterate_ctx *ctx, + struct mail_cache_iterate_field *field_r) +{ + struct mail_cache *cache = ctx->view->cache; + unsigned int field_idx; + unsigned int data_size; + int ret; + + i_assert(ctx->remap_counter == cache->remap_counter); + + if (ctx->pos + sizeof(uint32_t) > ctx->rec_size) { + if (ctx->pos != ctx->rec_size) { + mail_cache_set_corrupted(cache, + "record has invalid size"); + return -1; + } + + if ((ret = mail_cache_lookup_iter_next_record(ctx)) <= 0) + return ret; + } + + /* return the next field */ + if (mail_cache_lookup_rec_get_field(ctx, &field_idx) < 0) + return -1; + ctx->pos += sizeof(uint32_t); + + data_size = cache->fields[field_idx].field.field_size; + if (data_size == UINT_MAX && + ctx->pos + sizeof(uint32_t) <= ctx->rec->size) { + /* variable size field. get its size from the file. */ + data_size = *((const uint32_t *) + CONST_PTR_OFFSET(ctx->rec, ctx->pos)); + ctx->pos += sizeof(uint32_t); + } + + if (ctx->rec->size - ctx->pos < data_size) { + mail_cache_set_corrupted(cache, + "record continues outside its allocated size"); + return -1; + } + + field_r->field_idx = field_idx; + field_r->data = CONST_PTR_OFFSET(ctx->rec, ctx->pos); + field_r->size = data_size; + field_r->offset = ctx->offset + ctx->pos; + + /* each record begins from 32bit aligned position */ + ctx->pos += (data_size + sizeof(uint32_t)-1) & ~(sizeof(uint32_t)-1); + return 1; +} + +static int mail_cache_seq(struct mail_cache_view *view, uint32_t seq) +{ + struct mail_cache_lookup_iterate_ctx iter; + struct mail_cache_iterate_field field; + int ret; + + view->cached_exists_value = (view->cached_exists_value + 1) & UINT8_MAX; + if (view->cached_exists_value == 0) { + /* wrapped, we'll have to clear the buffer */ + buffer_set_used_size(view->cached_exists_buf, 0); + view->cached_exists_value++; + } + view->cached_exists_seq = seq; + + mail_cache_lookup_iter_init(view, seq, &iter); + while ((ret = mail_cache_lookup_iter_next(&iter, &field)) > 0) { + buffer_write(view->cached_exists_buf, field.field_idx, + &view->cached_exists_value, 1); + } + return ret; +} + +int mail_cache_field_exists(struct mail_cache_view *view, uint32_t seq, + unsigned int field) +{ + const uint8_t *data; + + i_assert(seq > 0); + + /* NOTE: view might point to a non-committed transaction that has + fields that don't yet exist in the cache file. So don't add any + fast-paths checking whether the field exists in the file. */ + + /* FIXME: we should discard the cache if view has been synced */ + if (view->cached_exists_seq != seq) { + if (mail_cache_seq(view, seq) < 0) + return -1; + } + + data = view->cached_exists_buf->data; + return (field < view->cached_exists_buf->used && + data[field] == view->cached_exists_value) ? 1 : 0; +} + +bool mail_cache_field_exists_any(struct mail_cache_view *view, uint32_t seq) +{ + uint32_t reset_id; + + return mail_cache_lookup_cur_offset(view->view, seq, &reset_id) != 0; +} + +enum mail_cache_decision_type +mail_cache_field_get_decision(struct mail_cache *cache, unsigned int field_idx) +{ + i_assert(field_idx < cache->fields_count); + + return cache->fields[field_idx].field.decision; +} + +static int +mail_cache_lookup_bitmask(struct mail_cache_lookup_iterate_ctx *iter, + unsigned int field_idx, unsigned int field_size, + buffer_t *dest_buf) +{ + struct mail_cache_iterate_field field; + const unsigned char *src; + unsigned char *dest; + unsigned int i; + bool found = FALSE; + int ret; + + /* make sure all bits are cleared first */ + buffer_write_zero(dest_buf, 0, field_size); + + while ((ret = mail_cache_lookup_iter_next(iter, &field)) > 0) { + if (field.field_idx != field_idx) + continue; + + /* merge all bits */ + src = field.data; + dest = buffer_get_space_unsafe(dest_buf, 0, field.size); + for (i = 0; i < field.size; i++) + dest[i] |= src[i]; + found = TRUE; + } + return ret < 0 ? -1 : (found ? 1 : 0); +} + +int mail_cache_lookup_field(struct mail_cache_view *view, buffer_t *dest_buf, + uint32_t seq, unsigned int field_idx) +{ + struct mail_cache_lookup_iterate_ctx iter; + struct mail_cache_iterate_field field; + int ret; + + ret = mail_cache_field_exists(view, seq, field_idx); + mail_cache_decision_state_update(view, seq, field_idx); + if (ret <= 0) + return ret; + + /* the field should exist */ + mail_cache_lookup_iter_init(view, seq, &iter); + if (view->cache->fields[field_idx].field.type == MAIL_CACHE_FIELD_BITMASK) { + ret = mail_cache_lookup_bitmask(&iter, field_idx, + view->cache->fields[field_idx].field.field_size, + dest_buf); + } else { + /* return the first one that's found. if there are multiple + they're all identical. */ + while ((ret = mail_cache_lookup_iter_next(&iter, &field)) > 0) { + if (field.field_idx == field_idx) { + buffer_append(dest_buf, field.data, field.size); + break; + } + } + } + /* NOTE: view->cache->fields may have been reallocated by + mail_cache_lookup_*(). */ + return ret; +} + +struct header_lookup_data { + uint32_t data_size; + const unsigned char *data; +}; + +struct header_lookup_line { + uint32_t line_num; + struct header_lookup_data *data; +}; + +struct header_lookup_context { + struct mail_cache_view *view; + pool_t pool; + ARRAY(struct header_lookup_line) lines; +}; + +enum { + HDR_FIELD_STATE_DONTWANT = 0, + HDR_FIELD_STATE_WANT, + HDR_FIELD_STATE_SEEN +}; + +static void header_lines_save(struct header_lookup_context *ctx, + const struct mail_cache_iterate_field *field) +{ + const uint32_t *lines = field->data; + uint32_t data_size = field->size; + struct header_lookup_line hdr_line; + struct header_lookup_data *hdr_data; + void *data_dup; + unsigned int i, lines_count, pos; + + /* data = { line_nums[], 0, "headers" } */ + for (i = 0; data_size >= sizeof(uint32_t); i++) { + data_size -= sizeof(uint32_t); + if (lines[i] == 0) + break; + } + lines_count = i; + pos = (lines_count+1) * sizeof(uint32_t); + + hdr_data = p_new(ctx->pool, struct header_lookup_data, 1); + hdr_data->data_size = data_size; + if (data_size > 0) { + hdr_data->data = data_dup = + p_malloc(ctx->pool, data_size); + memcpy(data_dup, CONST_PTR_OFFSET(field->data, pos), data_size); + } + + for (i = 0; i < lines_count; i++) { + hdr_line.line_num = lines[i]; + hdr_line.data = hdr_data; + array_push_back(&ctx->lines, &hdr_line); + } +} + +static int header_lookup_line_cmp(const struct header_lookup_line *l1, + const struct header_lookup_line *l2) +{ + return (int)l1->line_num - (int)l2->line_num; +} + +static int +mail_cache_lookup_headers_real(struct mail_cache_view *view, string_t *dest, + uint32_t seq, const unsigned int field_idxs[], + unsigned int fields_count, pool_t *pool_r) +{ + struct mail_cache_lookup_iterate_ctx iter; + struct mail_cache_iterate_field field; + struct header_lookup_context ctx; + struct header_lookup_line *lines; + const unsigned char *p, *start, *end; + uint8_t *field_state; + unsigned int i, count, max_field = 0; + size_t hdr_size; + uint8_t want = HDR_FIELD_STATE_WANT; + buffer_t *buf; + int ret; + + *pool_r = NULL; + + if (fields_count == 0) + return 1; + + /* update the decision state regardless of whether the fields + actually exist or not. */ + for (i = 0; i < fields_count; i++) + mail_cache_decision_state_update(view, seq, field_idxs[i]); + + /* mark all the fields we want to find. */ + buf = t_buffer_create(32); + for (i = 0; i < fields_count; i++) { + if (field_idxs[i] > max_field) + max_field = field_idxs[i]; + + buffer_write(buf, field_idxs[i], &want, 1); + } + field_state = buffer_get_modifiable_data(buf, NULL); + + /* lookup the fields */ + i_zero(&ctx); + ctx.view = view; + ctx.pool = *pool_r = pool_alloconly_create(MEMPOOL_GROWING"mail cache headers", 1024); + t_array_init(&ctx.lines, 32); + + mail_cache_lookup_iter_init(view, seq, &iter); + while ((ret = mail_cache_lookup_iter_next(&iter, &field)) > 0) { + if (field.field_idx > max_field || + field_state[field.field_idx] != HDR_FIELD_STATE_WANT) { + /* a) don't want it, b) duplicate */ + } else { + field_state[field.field_idx] = HDR_FIELD_STATE_SEEN; + header_lines_save(&ctx, &field); + } + + } + if (ret < 0) + return -1; + + /* check that all fields were found */ + for (i = 0; i <= max_field; i++) { + if (field_state[i] == HDR_FIELD_STATE_WANT) + return 0; + } + + /* we need to return headers in the order they existed originally. + we can do this by sorting the messages by their line numbers. */ + array_sort(&ctx.lines, header_lookup_line_cmp); + lines = array_get_modifiable(&ctx.lines, &count); + + /* then start filling dest buffer from the headers */ + for (i = 0; i < count; i++) { + start = lines[i].data->data; + end = start + lines[i].data->data_size; + + /* find the end of the (multiline) header */ + for (p = start; p != end; p++) { + if (*p == '\n' && + (p+1 == end || (p[1] != ' ' && p[1] != '\t'))) { + p++; + break; + } + } + hdr_size = (size_t)(p - start); + buffer_append(dest, start, hdr_size); + + /* if there are more lines for this header, the following lines + continue after this one. so skip this line. */ + lines[i].data->data += hdr_size; + lines[i].data->data_size -= hdr_size; + } + return 1; +} + +int mail_cache_lookup_headers(struct mail_cache_view *view, string_t *dest, + uint32_t seq, const unsigned int field_idxs[], + unsigned int fields_count) +{ + pool_t pool = NULL; + int ret; + + if (buffer_get_pool(dest)->datastack_pool) + ret = mail_cache_lookup_headers_real(view, dest, seq, + field_idxs, fields_count, + &pool); + else T_BEGIN { + ret = mail_cache_lookup_headers_real(view, dest, seq, + field_idxs, fields_count, + &pool); + } T_END; + pool_unref(&pool); + return ret; +} + +static uint32_t +mail_cache_get_highest_seq_with_cache(struct mail_cache_view *view, + uint32_t below_seq, uint32_t *reset_id_r) +{ + struct mail_cache_missing_reason_cache *rc = &view->reason_cache; + uint32_t seq = below_seq-1, highest_checked_seq = 0; + + /* find the newest mail that has anything in cache */ + if (rc->log_file_head_offset == view->view->log_file_head_offset && + rc->log_file_head_seq == view->view->log_file_head_seq) { + /* reason_cache matches the current view - we can use it */ + highest_checked_seq = rc->highest_checked_seq; + } else { + rc->log_file_head_offset = view->view->log_file_head_offset; + rc->log_file_head_seq = view->view->log_file_head_seq; + } + rc->highest_checked_seq = below_seq; + + /* first check anything not already in reason_cache */ + for (; seq > highest_checked_seq; seq--) { + if (mail_cache_lookup_cur_offset(view->view, seq, reset_id_r) != 0) { + rc->highest_seq_with_cache = seq; + rc->reset_id = *reset_id_r; + return seq; + } + } + if (seq == 0) + return 0; + /* then return the result from cache */ + *reset_id_r = rc->reset_id; + return rc->highest_seq_with_cache; +} + +const char * +mail_cache_get_missing_reason(struct mail_cache_view *view, uint32_t seq) +{ + uint32_t offset, reset_id; + + if (mail_index_is_expunged(view->view, seq)) + return "Mail is already expunged"; + + if (MAIL_CACHE_IS_UNUSABLE(view->cache)) + return "Cache file is unusable"; + + offset = mail_cache_lookup_cur_offset(view->view, seq, &reset_id); + if (offset != 0) { + if (view->cache->hdr->file_seq != reset_id) { + return t_strdup_printf( + "Index reset_id=%u doesn't match cache reset_id=%u", + reset_id, view->cache->hdr->file_seq); + } + return t_strdup_printf( + "Mail has other cached fields, reset_id=%u", reset_id); + } + seq = mail_cache_get_highest_seq_with_cache(view, seq, &reset_id); + if (seq == 0) { + return t_strdup_printf("Cache file is empty, reset_id=%u", + view->cache->hdr->file_seq); + } + + uint32_t uid; + mail_index_lookup_uid(view->view, seq, &uid); + + if (view->cache->hdr->file_seq != reset_id) { + return t_strdup_printf( + "Mail not cached, highest cached seq=%u uid=%u: " + "Index reset_id=%u doesn't match cache reset_id=%u", + seq, uid, reset_id, view->cache->hdr->file_seq); + } + return t_strdup_printf( + "Mail not cached, highest cached seq=%u uid=%u: reset_id=%u", + seq, uid, reset_id); +} |