diff options
Diffstat (limited to '')
-rw-r--r-- | src/lib-index/mail-index-fsck.c | 495 |
1 files changed, 495 insertions, 0 deletions
diff --git a/src/lib-index/mail-index-fsck.c b/src/lib-index/mail-index-fsck.c new file mode 100644 index 0000000..6636edf --- /dev/null +++ b/src/lib-index/mail-index-fsck.c @@ -0,0 +1,495 @@ +/* Copyright (c) 2004-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "ioloop.h" +#include "array.h" +#include "mail-index-private.h" +#include "mail-transaction-log-private.h" + +static void mail_index_fsck_error(struct mail_index *index, + const char *fmt, ...) ATTR_FORMAT(2, 3); +static void mail_index_fsck_error(struct mail_index *index, + const char *fmt, ...) +{ + va_list va; + + va_start(va, fmt); + mail_index_set_error(index, "Fixed index file %s: %s", + index->filepath, t_strdup_vprintf(fmt, va)); + va_end(va); +} + +#define CHECK(field, oper) \ + if (hdr->field oper map->hdr.field) { \ + mail_index_fsck_error(index, #field" %u -> %u", \ + map->hdr.field, hdr->field); \ + } + +static void +mail_index_fsck_log_pos(struct mail_index *index, struct mail_index_map *map, + struct mail_index_header *hdr) +{ + unsigned int hdr_size = index->log->head->hdr.hdr_size; + uint32_t file_seq; + uoff_t file_offset; + + mail_transaction_log_get_head(index->log, &file_seq, &file_offset); + if (hdr->log_file_seq < file_seq) { + /* index's log_file_seq is too old. move it to log head. */ + hdr->log_file_head_offset = hdr->log_file_tail_offset = + sizeof(struct mail_transaction_log_header); + } else if (hdr->log_file_seq == file_seq) { + /* index's log_file_seq matches the current log. make sure the + offsets are valid. */ + if (hdr->log_file_head_offset > file_offset) + hdr->log_file_head_offset = file_offset; + else if (hdr->log_file_head_offset < hdr_size) + hdr->log_file_head_offset = hdr_size; + + if (hdr->log_file_tail_offset > hdr->log_file_head_offset) + hdr->log_file_tail_offset = hdr->log_file_head_offset; + else if (hdr->log_file_tail_offset != 0 && + hdr->log_file_tail_offset < hdr_size) + hdr->log_file_tail_offset = hdr_size; + } else { + /* index's log_file_seq is newer than exists. move it to + end of the current log head. */ + hdr->log_file_head_offset = hdr->log_file_tail_offset = + file_offset; + } + hdr->log_file_seq = file_seq; + + CHECK(log_file_seq, !=); + if (hdr->log_file_seq == map->hdr.log_file_seq) { + /* don't bother complaining about these if file changed too */ + CHECK(log_file_head_offset, !=); + CHECK(log_file_tail_offset, !=); + } +} + +static void +mail_index_fsck_header(struct mail_index *index, struct mail_index_map *map, + struct mail_index_header *hdr) +{ + /* mail_index_map_check_header() has already checked that the index + isn't completely broken. */ + if (hdr->uid_validity == 0 && hdr->next_uid != 1) + hdr->uid_validity = ioloop_time; + + if (index->log->head != NULL) + mail_index_fsck_log_pos(index, map, hdr); +} + +static bool +array_has_name(const ARRAY_TYPE(const_string) *names, const char *name) +{ + const char *arr_name; + + array_foreach_elem(names, arr_name) { + if (strcmp(arr_name, name) == 0) + return TRUE; + } + return FALSE; +} + +static unsigned int +mail_index_fsck_find_keyword_count(struct mail_index_map *map, + const struct mail_index_ext_header *ext_hdr) +{ + const struct mail_index_record *rec; + const uint8_t *kw; + unsigned int r, i, j, cur, max = 0, kw_pos, kw_size; + + kw_pos = ext_hdr->record_offset; + kw_size = ext_hdr->record_size; + + rec = map->rec_map->records; + for (r = 0; r < map->rec_map->records_count; r++) { + kw = CONST_PTR_OFFSET(rec, kw_pos); + for (i = cur = 0; i < kw_size; i++) { + if (kw[i] != 0) { + for (j = 0; j < 8; j++) { + if ((kw[i] & (1 << j)) != 0) + cur = i * 8 + j + 1; + } + } + } + if (cur > max) { + max = cur; + if (max == kw_size*8) + return max; + } + rec = CONST_PTR_OFFSET(rec, map->hdr.record_size); + } + return max; +} + +static bool +keyword_name_is_valid(const char *buffer, unsigned int pos, unsigned int size) +{ + for (; pos < size; pos++) { + if (buffer[pos] == '\0') + return TRUE; + if (((unsigned char)buffer[pos] & 0x7f) < 32) { + /* control characters aren't valid */ + return FALSE; + } + } + return FALSE; +} + +static void +mail_index_fsck_keywords(struct mail_index *index, struct mail_index_map *map, + struct mail_index_header *hdr, + const struct mail_index_ext_header *ext_hdr, + unsigned int ext_offset, unsigned int *offset_p) +{ + const struct mail_index_keyword_header *kw_hdr; + struct mail_index_keyword_header *new_kw_hdr; + const struct mail_index_keyword_header_rec *kw_rec; + struct mail_index_keyword_header_rec new_kw_rec; + const char *name, *name_buffer, **name_array; + unsigned int i, j, name_pos, name_size, rec_pos, hdr_offset, diff; + unsigned int changed_count, keywords_count, name_base_pos; + ARRAY_TYPE(const_string) names; + buffer_t *dest; + bool changed = FALSE; + + hdr_offset = ext_offset + + mail_index_map_ext_hdr_offset(sizeof(MAIL_INDEX_EXT_KEYWORDS)-1); + kw_hdr = MAIL_INDEX_MAP_HDR_OFFSET(map, hdr_offset); + keywords_count = kw_hdr->keywords_count; + + kw_rec = (const void *)(kw_hdr + 1); + name_buffer = (const char *)(kw_rec + keywords_count); + + name_pos = (size_t)(name_buffer - (const char *)kw_hdr); + if (name_pos > ext_hdr->hdr_size) { + /* the header is completely broken */ + keywords_count = + mail_index_fsck_find_keyword_count(map, ext_hdr); + mail_index_fsck_error(index, "Assuming keywords_count = %u", + keywords_count); + kw_rec = NULL; + name_size = 0; + changed = TRUE; + } else { + name_size = ext_hdr->hdr_size - name_pos; + } + + /* create keyword name array. invalid keywords are added as + empty strings */ + t_array_init(&names, keywords_count); + for (i = 0; i < keywords_count; i++) { + if (name_size == 0 || + !keyword_name_is_valid(name_buffer, kw_rec[i].name_offset, + name_size)) + name = ""; + else + name = name_buffer + kw_rec[i].name_offset; + + if (*name != '\0' && array_has_name(&names, name)) { + /* duplicate */ + name = ""; + } + array_push_back(&names, &name); + } + + /* give new names to invalid keywords */ + changed_count = 0; + name_array = array_front_modifiable(&names); + for (i = j = 0; i < keywords_count; i++) { + while (name_array[i][0] == '\0') { + name = t_strdup_printf("unknown-%d", j++); + if (!array_has_name(&names, name)) { + name_array[i] = name; + changed = TRUE; + changed_count++; + } + } + } + + if (!changed) { + /* nothing was broken */ + return; + } + + mail_index_fsck_error(index, "Renamed %u keywords to unknown-*", + changed_count); + + dest = buffer_create_dynamic(default_pool, + I_MAX(ext_hdr->hdr_size, 128)); + new_kw_hdr = buffer_append_space_unsafe(dest, sizeof(*new_kw_hdr)); + new_kw_hdr->keywords_count = keywords_count; + + /* add keyword records so we can start appending names directly */ + rec_pos = dest->used; + i_zero(&new_kw_rec); + (void)buffer_append_space_unsafe(dest, keywords_count * sizeof(*kw_rec)); + + /* write the actual records and names */ + name_base_pos = dest->used; + for (i = 0; i < keywords_count; i++) { + new_kw_rec.name_offset = dest->used - name_base_pos; + buffer_write(dest, rec_pos, &new_kw_rec, sizeof(new_kw_rec)); + rec_pos += sizeof(*kw_rec); + + buffer_append(dest, name_array[i], strlen(name_array[i]) + 1); + } + + /* keep the header size at least the same size as before */ + if (dest->used < ext_hdr->hdr_size) + buffer_append_zero(dest, ext_hdr->hdr_size - dest->used); + + if (dest->used > ext_hdr->hdr_size) { + /* need to resize the header */ + struct mail_index_ext_header new_ext_hdr; + + diff = dest->used - ext_hdr->hdr_size; + buffer_copy(map->hdr_copy_buf, hdr_offset + diff, + map->hdr_copy_buf, hdr_offset, SIZE_MAX); + hdr->header_size += diff; + *offset_p += diff; + + new_ext_hdr = *ext_hdr; + new_ext_hdr.hdr_size += diff; + buffer_write(map->hdr_copy_buf, ext_offset, + &new_ext_hdr, sizeof(new_ext_hdr)); + } + + i_assert(hdr_offset + dest->used <= map->hdr_copy_buf->used); + buffer_write(map->hdr_copy_buf, hdr_offset, dest->data, dest->used); + + /* keywords changed unexpectedly, so all views are broken now */ + index->inconsistency_id++; + + buffer_free(&dest); +} + +static void +mail_index_fsck_extensions(struct mail_index *index, struct mail_index_map *map, + struct mail_index_header *hdr) +{ + const struct mail_index_ext_header *ext_hdr; + ARRAY_TYPE(const_string) names; + const char *name, *error; + unsigned int offset, next_offset, i; + + t_array_init(&names, 64); + offset = MAIL_INDEX_HEADER_SIZE_ALIGN(hdr->base_header_size); + for (i = 0; offset < hdr->header_size; i++) { + /* mail_index_map_ext_get_next() uses map->hdr, so make sure + it's up-to-date */ + map->hdr = *hdr; + + next_offset = offset; + if (mail_index_map_ext_get_next(map, &next_offset, + &ext_hdr, &name) < 0) { + /* the extension continued outside header, drop it */ + mail_index_fsck_error(index, + "Dropped extension #%d (%s) " + "with invalid header size", + i, name); + hdr->header_size = offset; + buffer_set_used_size(map->hdr_copy_buf, hdr->header_size); + break; + } + if (mail_index_map_ext_hdr_check(hdr, ext_hdr, name, + &error) < 0) { + mail_index_fsck_error(index, + "Dropped broken extension #%d (%s)", i, name); + } else if (array_has_name(&names, name)) { + mail_index_fsck_error(index, + "Dropped duplicate extension %s", name); + } else { + /* name may change if header buffer is changed */ + name = t_strdup(name); + + if (strcmp(name, MAIL_INDEX_EXT_KEYWORDS) == 0) { + mail_index_fsck_keywords(index, map, hdr, + ext_hdr, offset, + &next_offset); + } + array_push_back(&names, &name); + offset = next_offset; + continue; + } + + /* drop the field */ + hdr->header_size -= next_offset - offset; + buffer_copy(map->hdr_copy_buf, offset, + map->hdr_copy_buf, next_offset, SIZE_MAX); + buffer_set_used_size(map->hdr_copy_buf, hdr->header_size); + } +} + +static void +mail_index_fsck_records(struct mail_index *index, struct mail_index_map *map, + struct mail_index_header *hdr) +{ + struct mail_index_record *rec, *next_rec; + uint32_t i, last_uid; + bool logged_unordered_uids = FALSE, logged_zero_uids = FALSE; + bool records_dropped = FALSE; + + hdr->messages_count = 0; + hdr->seen_messages_count = 0; + hdr->deleted_messages_count = 0; + + hdr->first_unseen_uid_lowwater = 0; + hdr->first_deleted_uid_lowwater = 0; + + rec = map->rec_map->records; last_uid = 0; + for (i = 0; i < map->rec_map->records_count; ) { + next_rec = PTR_OFFSET(rec, hdr->record_size); + if (rec->uid <= last_uid) { + /* log an error once, and skip this record */ + if (rec->uid == 0) { + if (!logged_zero_uids) { + mail_index_fsck_error(index, + "Record UIDs have zeroes"); + logged_zero_uids = TRUE; + } + } else { + if (!logged_unordered_uids) { + mail_index_fsck_error(index, + "Record UIDs unordered"); + logged_unordered_uids = TRUE; + } + } + /* not the fastest way when we're skipping lots of + records, but this should happen rarely so don't + bother optimizing. */ + memmove(rec, next_rec, hdr->record_size * + (map->rec_map->records_count - i - 1)); + map->rec_map->records_count--; + records_dropped = TRUE; + continue; + } + + hdr->messages_count++; + if ((rec->flags & MAIL_SEEN) != 0) + hdr->seen_messages_count++; + if ((rec->flags & MAIL_DELETED) != 0) + hdr->deleted_messages_count++; + + if ((rec->flags & MAIL_SEEN) == 0 && + hdr->first_unseen_uid_lowwater == 0) + hdr->first_unseen_uid_lowwater = rec->uid; + if ((rec->flags & MAIL_DELETED) != 0 && + hdr->first_deleted_uid_lowwater == 0) + hdr->first_deleted_uid_lowwater = rec->uid; + + last_uid = rec->uid; + rec = next_rec; + i++; + } + + if (records_dropped) { + /* all existing views are broken now */ + index->inconsistency_id++; + } + + if (hdr->next_uid <= last_uid) { + mail_index_fsck_error(index, "next_uid %u -> %u", + hdr->next_uid, last_uid+1); + hdr->next_uid = last_uid+1; + } + + if (hdr->first_unseen_uid_lowwater == 0) + hdr->first_unseen_uid_lowwater = hdr->next_uid; + if (hdr->first_deleted_uid_lowwater == 0) + hdr->first_deleted_uid_lowwater = hdr->next_uid; + if (hdr->first_recent_uid > hdr->next_uid) + hdr->first_recent_uid = hdr->next_uid; + if (hdr->first_recent_uid == 0) + hdr->first_recent_uid = 1; + + CHECK(uid_validity, !=); + CHECK(messages_count, !=); + CHECK(seen_messages_count, !=); + CHECK(deleted_messages_count, !=); + + CHECK(first_unseen_uid_lowwater, <); + CHECK(first_deleted_uid_lowwater, <); + CHECK(first_recent_uid, !=); +} + +static void +mail_index_fsck_map(struct mail_index *index, struct mail_index_map *map) +{ + struct mail_index_header hdr; + + if (index->log->head != NULL) { + /* Remember the log head position. If we go back in the index's + head offset, ignore errors in the log up to this offset. */ + mail_transaction_log_get_head(index->log, + &index->fsck_log_head_file_seq, + &index->fsck_log_head_file_offset); + } + hdr = map->hdr; + + mail_index_fsck_header(index, map, &hdr); + mail_index_fsck_extensions(index, map, &hdr); + mail_index_fsck_records(index, map, &hdr); + + hdr.flags |= MAIL_INDEX_HDR_FLAG_FSCKD; + map->hdr = hdr; + i_assert(map->hdr_copy_buf->used == map->hdr.header_size); +} + +int mail_index_fsck(struct mail_index *index) +{ + bool orig_locked = index->log_sync_locked; + struct mail_index_map *map; + uint32_t file_seq; + uoff_t file_offset; + + i_warning("fscking index file %s", index->filepath); + + index->fscked = TRUE; + + if (index->log->head == NULL) { + /* we're trying to open the index files, but there wasn't + any .log file. */ + if (mail_transaction_log_create(index->log, FALSE) < 0) + return -1; + } + + if (!orig_locked) { + if (mail_transaction_log_sync_lock(index->log, "fscking", + &file_seq, &file_offset) < 0) + return -1; + } + + map = mail_index_map_clone(index->map); + mail_index_unmap(&index->map); + index->map = map; + + T_BEGIN { + mail_index_fsck_map(index, map); + } T_END; + + mail_index_write(index, FALSE, "fscking"); + + if (!orig_locked) + mail_transaction_log_sync_unlock(index->log, "fscking"); + return 0; +} + +void mail_index_fsck_locked(struct mail_index *index) +{ + int ret; + + i_assert(index->log_sync_locked); + ret = mail_index_fsck(index); + i_assert(ret == 0); +} + +bool mail_index_reset_fscked(struct mail_index *index) +{ + bool ret = index->fscked; + + index->fscked = FALSE; + return ret; +} |