diff options
Diffstat (limited to '')
-rw-r--r-- | src/lib-index/mail-cache-fields.c | 660 |
1 files changed, 660 insertions, 0 deletions
diff --git a/src/lib-index/mail-cache-fields.c b/src/lib-index/mail-cache-fields.c new file mode 100644 index 0000000..429e0d2 --- /dev/null +++ b/src/lib-index/mail-cache-fields.c @@ -0,0 +1,660 @@ +/* Copyright (c) 2004-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "ioloop.h" +#include "buffer.h" +#include "hash.h" +#include "file-cache.h" +#include "read-full.h" +#include "write-full.h" +#include "mmap-util.h" +#include "mail-cache-private.h" + +#include <stddef.h> + +#define CACHE_FIELD_IS_NEWLY_WANTED(cache, field_idx) \ + ((cache)->field_file_map[field_idx] == (uint32_t)-1 && \ + (cache)->fields[field_idx].used) + +static bool field_has_fixed_size(enum mail_cache_field_type type) +{ + switch (type) { + case MAIL_CACHE_FIELD_FIXED_SIZE: + case MAIL_CACHE_FIELD_BITMASK: + return TRUE; + case MAIL_CACHE_FIELD_VARIABLE_SIZE: + case MAIL_CACHE_FIELD_STRING: + case MAIL_CACHE_FIELD_HEADER: + return FALSE; + + case MAIL_CACHE_FIELD_COUNT: + break; + } + + i_unreached(); + return FALSE; +} + +static bool field_decision_is_valid(enum mail_cache_decision_type type) +{ + switch (type & ENUM_NEGATE(MAIL_CACHE_DECISION_FORCED)) { + case MAIL_CACHE_DECISION_NO: + case MAIL_CACHE_DECISION_TEMP: + case MAIL_CACHE_DECISION_YES: + return TRUE; + default: + return FALSE; + } +} + +static int field_type_verify(struct mail_cache *cache, unsigned int idx, + enum mail_cache_field_type type, unsigned int size) +{ + const struct mail_cache_field *field = &cache->fields[idx].field; + + if (field->type != type) { + mail_cache_set_corrupted(cache, + "registered field %s type changed", field->name); + return -1; + } + if (field->field_size != size && field_has_fixed_size(type)) { + mail_cache_set_corrupted(cache, + "registered field %s size changed", field->name); + return -1; + } + return 0; +} + +static void +mail_cache_field_update(struct mail_cache *cache, + const struct mail_cache_field *newfield) +{ + struct mail_cache_field_private *orig; + bool initial_registering; + + i_assert(newfield->type < MAIL_CACHE_FIELD_COUNT); + + /* are we still doing the initial cache field registering for + internal fields and for mail_*cache_fields settings? */ + initial_registering = cache->file_fields_count == 0; + + orig = &cache->fields[newfield->idx]; + if ((newfield->decision & MAIL_CACHE_DECISION_FORCED) != 0 || + ((orig->field.decision & MAIL_CACHE_DECISION_FORCED) == 0 && + newfield->decision > orig->field.decision)) { + orig->field.decision = newfield->decision; + if (!initial_registering) + orig->decision_dirty = TRUE; + } + if (orig->field.last_used < newfield->last_used) { + orig->field.last_used = newfield->last_used; + if (!initial_registering) + orig->decision_dirty = TRUE; + } + if (orig->decision_dirty) + cache->field_header_write_pending = TRUE; + + (void)field_type_verify(cache, newfield->idx, + newfield->type, newfield->field_size); +} + +void mail_cache_register_fields(struct mail_cache *cache, + struct mail_cache_field *fields, + unsigned int fields_count) +{ + char *name; + void *value; + unsigned int new_idx; + unsigned int i, j, registered_count; + + new_idx = cache->fields_count; + for (i = 0; i < fields_count; i++) { + if (hash_table_lookup_full(cache->field_name_hash, + fields[i].name, &name, &value)) { + fields[i].idx = POINTER_CAST_TO(value, unsigned int); + mail_cache_field_update(cache, &fields[i]); + continue; + } + + /* check if the same header is being registered in the + same field array */ + for (j = 0; j < i; j++) { + if (strcasecmp(fields[i].name, fields[j].name) == 0) { + fields[i].idx = fields[j].idx; + break; + } + } + + if (j == i) + fields[i].idx = new_idx++; + } + + if (new_idx == cache->fields_count) + return; + + /* @UNSAFE */ + cache->fields = i_realloc_type(cache->fields, + struct mail_cache_field_private, + cache->fields_count, new_idx); + cache->field_file_map = + i_realloc_type(cache->field_file_map, uint32_t, + cache->fields_count, new_idx); + + registered_count = cache->fields_count; + for (i = 0; i < fields_count; i++) { + unsigned int idx = fields[i].idx; + + if (idx < registered_count) + continue; + + /* new index - save it */ + name = p_strdup(cache->field_pool, fields[i].name); + cache->fields[idx].field = fields[i]; + cache->fields[idx].field.name = name; + cache->fields[idx].field.last_used = fields[i].last_used; + cache->field_file_map[idx] = (uint32_t)-1; + + if (!field_has_fixed_size(cache->fields[idx].field.type)) + cache->fields[idx].field.field_size = UINT_MAX; + + hash_table_insert(cache->field_name_hash, name, + POINTER_CAST(idx)); + registered_count++; + } + i_assert(registered_count == new_idx); + cache->fields_count = new_idx; +} + +unsigned int +mail_cache_register_lookup(struct mail_cache *cache, const char *name) +{ + char *key; + void *value; + + if (hash_table_lookup_full(cache->field_name_hash, name, &key, &value)) + return POINTER_CAST_TO(value, unsigned int); + else + return UINT_MAX; +} + +const struct mail_cache_field * +mail_cache_register_get_field(struct mail_cache *cache, unsigned int field_idx) +{ + i_assert(field_idx < cache->fields_count); + + return &cache->fields[field_idx].field; +} + +struct mail_cache_field * +mail_cache_register_get_list(struct mail_cache *cache, pool_t pool, + unsigned int *count_r) +{ + struct mail_cache_field *list; + unsigned int i; + + if (!cache->opened) + (void)mail_cache_open_and_verify(cache); + + list = cache->fields_count == 0 ? NULL : + p_new(pool, struct mail_cache_field, cache->fields_count); + for (i = 0; i < cache->fields_count; i++) { + list[i] = cache->fields[i].field; + list[i].name = p_strdup(pool, list[i].name); + } + + *count_r = cache->fields_count; + return list; +} + +static int +mail_cache_header_fields_get_offset(struct mail_cache *cache, + uint32_t *offset_r, + const struct mail_cache_header_fields **field_hdr_r) +{ + const struct mail_cache_header_fields *field_hdr; + struct mail_cache_header_fields tmp_field_hdr; + const void *data; + uint32_t offset = 0, next_offset, field_hdr_size; + unsigned int next_count = 0; + int ret; + + if (MAIL_CACHE_IS_UNUSABLE(cache)) { + *offset_r = 0; + if (field_hdr_r != NULL) + *field_hdr_r = NULL; + return 0; + } + + /* find the latest header */ + offset = 0; + next_offset = cache->last_field_header_offset != 0 ? + cache->last_field_header_offset : + mail_index_offset_to_uint32(cache->hdr->field_header_offset); + while (next_offset != 0) { + if (next_offset == offset) { + mail_cache_set_corrupted(cache, + "next_offset in field header loops"); + return -1; + } + /* In Dovecot v2.2+ we don't try to use any holes, + so next_offset must always be larger than current offset. + also makes it easier to guarantee there aren't any loops + (which we don't bother doing for old files) */ + if (next_offset < offset && cache->hdr->minor_version != 0) { + mail_cache_set_corrupted(cache, + "next_offset in field header decreases"); + return -1; + } + offset = next_offset; + + if (cache->mmap_base != NULL || cache->map_with_read) { + ret = mail_cache_map(cache, offset, sizeof(*field_hdr), + &data); + if (ret <= 0) { + if (ret < 0) + return -1; + mail_cache_set_corrupted(cache, + "header field next_offset points outside file"); + return -1; + } + field_hdr = data; + } else { + /* if we need to follow multiple offsets to get to + the last one, it's faster to just pread() the file + instead of going through cache */ + ret = pread_full(cache->fd, &tmp_field_hdr, + sizeof(tmp_field_hdr), offset); + if (ret < 0) { + mail_cache_set_syscall_error(cache, "pread()"); + return -1; + } + if (ret == 0) { + mail_cache_set_corrupted(cache, + "header field next_offset points outside file"); + return -1; + } + field_hdr = &tmp_field_hdr; + } + + next_offset = + mail_index_offset_to_uint32(field_hdr->next_offset); + next_count++; + } + + if (offset == 0) { + mail_cache_set_corrupted(cache, "missing header fields"); + return -1; + } + cache->last_field_header_offset = offset; + + if (next_count > cache->index->optimization_set.cache.purge_header_continue_count) { + mail_cache_purge_later(cache, t_strdup_printf( + "Too many continued headers (%u)", next_count)); + } + + if (field_hdr_r != NULL) { + /* detect corrupted size later */ + field_hdr_size = I_MAX(field_hdr->size, sizeof(*field_hdr)); + if (cache->file_cache != NULL) { + /* invalidate the cache fields area to make sure we + get the latest cache decisions/last_used fields */ + file_cache_invalidate(cache->file_cache, offset, + field_hdr_size); + } + if (cache->read_buf != NULL) + buffer_set_used_size(cache->read_buf, 0); + ret = mail_cache_map(cache, offset, field_hdr_size, &data); + if (ret < 0) + return -1; + if (ret == 0) { + mail_cache_set_corrupted(cache, + "header field size outside file"); + return -1; + } + *field_hdr_r = data; + } + *offset_r = offset; + return 0; +} + +int mail_cache_header_fields_read(struct mail_cache *cache) +{ + const struct mail_cache_header_fields *field_hdr; + struct mail_cache_field field; + const uint32_t *last_used, *sizes; + const uint8_t *types, *decisions; + const char *p, *names, *end; + char *orig_key; + void *orig_value; + unsigned int fidx, new_fields_count; + struct mail_cache_purge_drop_ctx drop_ctx; + uint32_t offset, i; + + if (mail_cache_header_fields_get_offset(cache, &offset, &field_hdr) < 0) + return -1; + + if (offset == 0) { + /* no fields - the file is empty */ + return 0; + } + + /* check the fixed size of the header. name[] has to be checked + separately */ + if (field_hdr->fields_count > INT_MAX / MAIL_CACHE_FIELD_NAMES(1) || + field_hdr->size < MAIL_CACHE_FIELD_NAMES(field_hdr->fields_count)) { + mail_cache_set_corrupted(cache, "invalid field header size"); + return -1; + } + + new_fields_count = field_hdr->fields_count; + if (new_fields_count != 0) { + cache->file_field_map = + i_realloc_type(cache->file_field_map, unsigned int, + cache->file_fields_count, new_fields_count); + } else { + i_free_and_null(cache->file_field_map); + } + cache->file_fields_count = new_fields_count; + + last_used = CONST_PTR_OFFSET(field_hdr, MAIL_CACHE_FIELD_LAST_USED()); + sizes = CONST_PTR_OFFSET(field_hdr, + MAIL_CACHE_FIELD_SIZE(field_hdr->fields_count)); + types = CONST_PTR_OFFSET(field_hdr, + MAIL_CACHE_FIELD_TYPE(field_hdr->fields_count)); + decisions = CONST_PTR_OFFSET(field_hdr, + MAIL_CACHE_FIELD_DECISION(field_hdr->fields_count)); + names = CONST_PTR_OFFSET(field_hdr, + MAIL_CACHE_FIELD_NAMES(field_hdr->fields_count)); + end = CONST_PTR_OFFSET(field_hdr, field_hdr->size); + i_assert(names <= end); + + /* clear the old mapping */ + for (i = 0; i < cache->fields_count; i++) + cache->field_file_map[i] = (uint32_t)-1; + + mail_cache_purge_drop_init(cache, &cache->index->map->hdr, &drop_ctx); + i_zero(&field); + for (i = 0; i < field_hdr->fields_count; i++) { + for (p = names; p != end && *p != '\0'; p++) ; + if (p == end || *names == '\0') { + mail_cache_set_corrupted(cache, + "field header names corrupted"); + return -1; + } + + if (types[i] > MAIL_CACHE_FIELD_COUNT) { + mail_cache_set_corrupted(cache, "field type corrupted"); + return -1; + } + if (!field_decision_is_valid(decisions[i])) { + mail_cache_set_corrupted(cache, + "field decision type corrupted"); + return -1; + } + + /* ignore any forced-flags in the file */ + enum mail_cache_decision_type file_dec = + decisions[i] & ENUM_NEGATE(MAIL_CACHE_DECISION_FORCED); + + if (hash_table_lookup_full(cache->field_name_hash, names, + &orig_key, &orig_value)) { + /* already exists, see if decision can be updated */ + fidx = POINTER_CAST_TO(orig_value, unsigned int); + enum mail_cache_decision_type cur_dec = + cache->fields[fidx].field.decision; + if ((cur_dec & MAIL_CACHE_DECISION_FORCED) != 0) { + /* Forced decision. If the decision has + changed, update the fields in the file. */ + if ((cur_dec & ENUM_NEGATE(MAIL_CACHE_DECISION_FORCED)) != file_dec) + cache->field_header_write_pending = TRUE; + } else if (cache->fields[fidx].decision_dirty) { + /* Decisions have recently been updated + internally. Don't change them. */ + } else { + /* Use the decision from the cache file. */ + cache->fields[fidx].field.decision = file_dec; + } + if (field_type_verify(cache, fidx, + types[i], sizes[i]) < 0) + return -1; + } else { + /* field is currently unknown, so just use whatever + exists in the file. */ + field.name = names; + field.type = types[i]; + field.field_size = sizes[i]; + field.decision = file_dec; + mail_cache_register_fields(cache, &field, 1); + fidx = field.idx; + } + if (cache->field_file_map[fidx] != (uint32_t)-1) { + mail_cache_set_corrupted(cache, + "Duplicated field in header: %s", names); + return -1; + } + cache->fields[fidx].used = TRUE; + + cache->field_file_map[fidx] = i; + cache->file_field_map[i] = fidx; + + /* Update last_used if it's newer than ours. Note that the + last_used may have been overwritten while we were reading + this cache header. In theory this can mean that the + last_used field is only half-updated and contains garbage. + This practically won't matter, since the worst that can + happen is that we trigger a purge earlier than necessary. + The purging re-reads the last_used while cache is locked and + correctly figures out whether to drop the field. */ + if ((time_t)last_used[i] > cache->fields[fidx].field.last_used) + cache->fields[fidx].field.last_used = last_used[i]; + + switch (mail_cache_purge_drop_test(&drop_ctx, fidx)) { + case MAIL_CACHE_PURGE_DROP_DECISION_NONE: + break; + case MAIL_CACHE_PURGE_DROP_DECISION_DROP: + mail_cache_purge_later(cache, t_strdup_printf( + "Drop old field %s (last_used=%"PRIdTIME_T")", + cache->fields[fidx].field.name, + cache->fields[fidx].field.last_used)); + break; + case MAIL_CACHE_PURGE_DROP_DECISION_TO_TEMP: + /* This cache decision change can cause the field to be + dropped for old mails, so do it via purging. */ + mail_cache_purge_later(cache, t_strdup_printf( + "Change cache decision to temp for old field %s " + "(last_used=%"PRIdTIME_T")", + cache->fields[fidx].field.name, + cache->fields[fidx].field.last_used)); + break; + } + + names = p + 1; + } + return 0; +} + +static void copy_to_buf(struct mail_cache *cache, buffer_t *dest, bool add_new, + size_t offset, size_t size) +{ + const void *data; + unsigned int i, field; + + /* copy the existing fields */ + for (i = 0; i < cache->file_fields_count; i++) { + field = cache->file_field_map[i]; + data = CONST_PTR_OFFSET(&cache->fields[field], offset); + buffer_append(dest, data, size); + } + if (!add_new) + return; + + /* copy newly wanted fields */ + for (i = 0; i < cache->fields_count; i++) { + if (CACHE_FIELD_IS_NEWLY_WANTED(cache, i)) { + data = CONST_PTR_OFFSET(&cache->fields[i], offset); + buffer_append(dest, data, size); + } + } +} + +static void copy_to_buf_byte(struct mail_cache *cache, buffer_t *dest, + bool add_new, size_t offset) +{ + const int *data; + unsigned int i, field; + uint8_t byte; + + /* copy the existing fields */ + for (i = 0; i < cache->file_fields_count; i++) { + field = cache->file_field_map[i]; + data = CONST_PTR_OFFSET(&cache->fields[field], offset); + byte = (uint8_t)*data; + buffer_append(dest, &byte, 1); + } + if (!add_new) + return; + + /* copy newly wanted fields */ + for (i = 0; i < cache->fields_count; i++) { + if (CACHE_FIELD_IS_NEWLY_WANTED(cache, i)) { + data = CONST_PTR_OFFSET(&cache->fields[i], offset); + byte = (uint8_t)*data; + buffer_append(dest, &byte, 1); + } + } +} + +static void +copy_to_buf_last_used(struct mail_cache *cache, buffer_t *dest, bool add_new) +{ + size_t offset = offsetof(struct mail_cache_field, last_used); +#if defined(WORDS_BIGENDIAN) && SIZEOF_VOID_P == 8 + /* 64bit time_t with big endian CPUs: copy the last 32 bits instead of + the first 32 bits (that are always 0). The 32 bits are enough until + year 2106, so we're not in a hurry to use 64 bits on disk. */ + offset += sizeof(uint32_t); +#endif + copy_to_buf(cache, dest, add_new, offset, sizeof(uint32_t)); +} + +static int mail_cache_header_fields_update_locked(struct mail_cache *cache) +{ + buffer_t *buffer; + uint32_t i, offset, dec_offset; + int ret = 0; + + if (mail_cache_header_fields_read(cache) < 0 || + mail_cache_header_fields_get_offset(cache, &offset, NULL) < 0) + return -1; + + buffer = t_buffer_create(256); + + copy_to_buf_last_used(cache, buffer, FALSE); + ret = mail_cache_write(cache, buffer->data, buffer->used, + offset + MAIL_CACHE_FIELD_LAST_USED()); + if (ret == 0) { + buffer_set_used_size(buffer, 0); + copy_to_buf_byte(cache, buffer, FALSE, + offsetof(struct mail_cache_field, decision)); + + dec_offset = offset + + MAIL_CACHE_FIELD_DECISION(cache->file_fields_count); + ret = mail_cache_write(cache, buffer->data, buffer->used, + dec_offset); + if (ret == 0) { + for (i = 0; i < cache->file_fields_count; i++) + cache->fields[i].decision_dirty = FALSE; + } + } + + if (ret == 0) + cache->field_header_write_pending = FALSE; + return ret; +} + +int mail_cache_header_fields_update(struct mail_cache *cache) +{ + int ret; + + if (cache->locked) { + T_BEGIN { + ret = mail_cache_header_fields_update_locked(cache); + } T_END; + return ret; + } + + if (mail_cache_lock(cache) <= 0) + return -1; + + T_BEGIN { + ret = mail_cache_header_fields_update_locked(cache); + } T_END; + i_assert(!cache->hdr_modified); + mail_cache_unlock(cache); + return ret; +} + +void mail_cache_header_fields_get(struct mail_cache *cache, buffer_t *dest) +{ + struct mail_cache_header_fields hdr; + unsigned int field; + const char *name; + uint32_t i; + + i_zero(&hdr); + hdr.fields_count = cache->file_fields_count; + for (i = 0; i < cache->fields_count; i++) { + if (CACHE_FIELD_IS_NEWLY_WANTED(cache, i)) + hdr.fields_count++; + } + buffer_append(dest, &hdr, sizeof(hdr)); + + /* we have to keep the field order for the existing fields. */ + copy_to_buf_last_used(cache, dest, TRUE); + copy_to_buf(cache, dest, TRUE, + offsetof(struct mail_cache_field, field_size), + sizeof(uint32_t)); + copy_to_buf_byte(cache, dest, TRUE, + offsetof(struct mail_cache_field, type)); + copy_to_buf_byte(cache, dest, TRUE, + offsetof(struct mail_cache_field, decision)); + + i_assert(dest->used == sizeof(hdr) + + (sizeof(uint32_t)*2 + 2) * hdr.fields_count); + + /* add existing fields' names */ + for (i = 0; i < cache->file_fields_count; i++) { + field = cache->file_field_map[i]; + name = cache->fields[field].field.name; + buffer_append(dest, name, strlen(name)+1); + } + /* add newly wanted fields' names */ + for (i = 0; i < cache->fields_count; i++) { + if (CACHE_FIELD_IS_NEWLY_WANTED(cache, i)) { + name = cache->fields[i].field.name; + buffer_append(dest, name, strlen(name)+1); + } + } + + hdr.size = dest->used; + buffer_write(dest, 0, &hdr, sizeof(hdr)); + + if ((hdr.size & 3) != 0) + buffer_append_zero(dest, 4 - (hdr.size & 3)); +} + +int mail_cache_header_fields_get_next_offset(struct mail_cache *cache, + uint32_t *offset_r) +{ + if (mail_cache_header_fields_get_offset(cache, offset_r, NULL) < 0) + return -1; + + if (*offset_r == 0) { + *offset_r = offsetof(struct mail_cache_header, + field_header_offset); + } else { + *offset_r += offsetof(struct mail_cache_header_fields, + next_offset); + } + return 0; +} |