summaryrefslogtreecommitdiffstats
path: root/src/lib-storage/index/dbox-common/dbox-file-fix.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib-storage/index/dbox-common/dbox-file-fix.c')
-rw-r--r--src/lib-storage/index/dbox-common/dbox-file-fix.c519
1 files changed, 519 insertions, 0 deletions
diff --git a/src/lib-storage/index/dbox-common/dbox-file-fix.c b/src/lib-storage/index/dbox-common/dbox-file-fix.c
new file mode 100644
index 0000000..1c44ca4
--- /dev/null
+++ b/src/lib-storage/index/dbox-common/dbox-file-fix.c
@@ -0,0 +1,519 @@
+/* Copyright (c) 2009-2018 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "hex-dec.h"
+#include "istream.h"
+#include "ostream.h"
+#include "message-size.h"
+#include "dbox-storage.h"
+#include "dbox-file.h"
+
+#include <stdio.h>
+
+#define DBOX_MAIL_FILE_BROKEN_COPY_SUFFIX ".broken"
+
+static int
+dbox_file_match_pre_magic(struct istream *input,
+ uoff_t *pre_offset, size_t *need_bytes)
+{
+ const struct dbox_message_header *hdr;
+ const unsigned char *data;
+ size_t size;
+ uoff_t offset = input->v_offset;
+ bool have_lf = FALSE;
+
+ data = i_stream_get_data(input, &size);
+ if (data[0] == '\n') {
+ data++; size--; offset++;
+ have_lf = TRUE;
+ }
+ i_assert(data[0] == DBOX_MAGIC_PRE[0]);
+ if (size < sizeof(*hdr)) {
+ *need_bytes = sizeof(*hdr) + (have_lf ? 1 : 0);
+ return -1;
+ }
+ hdr = (const void *)data;
+ if (memcmp(hdr->magic_pre, DBOX_MAGIC_PRE, strlen(DBOX_MAGIC_PRE)) != 0)
+ return 0;
+ if (hdr->type != DBOX_MESSAGE_TYPE_NORMAL)
+ return 0;
+ if (hdr->space1 != ' ' || hdr->space2 != ' ')
+ return 0;
+ if (hex2dec(hdr->message_size_hex, sizeof(hdr->message_size_hex)) == 0 &&
+ memcmp(hdr->message_size_hex, "0000000000000000", sizeof(hdr->message_size_hex)) != 0)
+ return 0;
+
+ *pre_offset = offset;
+ return 1;
+}
+
+static bool memchr_nocontrol(const unsigned char *data, char chr,
+ unsigned int len, const unsigned char **pos_r)
+{
+ unsigned int i;
+
+ for (i = 0; i < len; i++) {
+ if (data[i] == chr) {
+ *pos_r = data+i;
+ return TRUE;
+ }
+ if (data[i] < ' ')
+ return FALSE;
+ }
+ *pos_r = NULL;
+ return TRUE;
+}
+
+static int
+dbox_file_match_post_magic(struct istream *input, bool input_full,
+ size_t *need_bytes)
+{
+ const unsigned char *data, *p;
+ size_t i, size;
+ bool allow_control;
+
+ data = i_stream_get_data(input, &size);
+ if (size < strlen(DBOX_MAGIC_POST)) {
+ *need_bytes = strlen(DBOX_MAGIC_POST);
+ return -1;
+ }
+ if (memcmp(data, DBOX_MAGIC_POST, strlen(DBOX_MAGIC_POST)) != 0)
+ return 0;
+
+ /* see if the metadata block looks valid */
+ for (i = strlen(DBOX_MAGIC_POST); i < size; ) {
+ switch (data[i]) {
+ case '\n':
+ return 1;
+ case DBOX_METADATA_GUID:
+ case DBOX_METADATA_POP3_UIDL:
+ case DBOX_METADATA_ORIG_MAILBOX:
+ case DBOX_METADATA_OLDV1_KEYWORDS:
+ /* these could contain anything */
+ allow_control = TRUE;
+ break;
+ case DBOX_METADATA_POP3_ORDER:
+ case DBOX_METADATA_RECEIVED_TIME:
+ case DBOX_METADATA_PHYSICAL_SIZE:
+ case DBOX_METADATA_VIRTUAL_SIZE:
+ case DBOX_METADATA_EXT_REF:
+ case DBOX_METADATA_OLDV1_EXPUNGED:
+ case DBOX_METADATA_OLDV1_FLAGS:
+ case DBOX_METADATA_OLDV1_SAVE_TIME:
+ case DBOX_METADATA_OLDV1_SPACE:
+ /* no control chars */
+ allow_control = FALSE;
+ break;
+ default:
+ if (data[i] < 'A' || data[i] > 'Z')
+ return 0;
+ /* unknown */
+ allow_control = TRUE;
+ break;
+ }
+ if (allow_control) {
+ p = memchr(data+i, '\n', size-i);
+ } else {
+ if (!memchr_nocontrol(data+i, '\n', size-i, &p))
+ return 0;
+ }
+ if (p == NULL) {
+ /* LF not found - try to find the end-of-metadata LF */
+ if (input_full) {
+ /* can't look any further - assume it's ok */
+ return 1;
+ }
+ *need_bytes = size+1;
+ return -1;
+ }
+ i = p - data+1;
+ }
+ *need_bytes = size+1;
+ return -1;
+}
+
+static int
+dbox_file_find_next_magic(struct dbox_file *file, uoff_t *offset_r, bool *pre_r)
+{
+ /* We're scanning message bodies here, trying to find the beginning of
+ the next message. Although our magic strings are very unlikely to
+ be found in regular emails, they are much more likely when emails
+ are stored compressed.. So try to be sure we find the correct
+ magic markers. */
+
+ struct istream *input = file->input;
+ uoff_t orig_offset, pre_offset, post_offset, prev_offset;
+ const unsigned char *data, *magic;
+ size_t size, need_bytes, prev_need_bytes;
+ int ret, match;
+
+ *pre_r = FALSE;
+
+ orig_offset = prev_offset = input->v_offset;
+ need_bytes = strlen(DBOX_MAGIC_POST); prev_need_bytes = 0;
+ while ((ret = i_stream_read_bytes(input, &data, &size, need_bytes)) > 0 ||
+ ret == -2) {
+ /* search for the beginning of a potential pre/post magic */
+ i_assert(size > 1);
+ i_assert(prev_offset != input->v_offset ||
+ need_bytes > prev_need_bytes);
+ prev_offset = input->v_offset;
+ prev_need_bytes = need_bytes;
+
+ magic = memchr(data, DBOX_MAGIC_PRE[0], size);
+ if (magic == NULL) {
+ i_stream_skip(input, size-1);
+ need_bytes = strlen(DBOX_MAGIC_POST);
+ continue;
+ }
+ if (magic == data && input->v_offset == orig_offset) {
+ /* beginning of the file */
+ } else if (magic != data && magic[-1] == '\n') {
+ /* PRE/POST block? leave \n */
+ i_stream_skip(input, magic-data-1);
+ } else {
+ i_stream_skip(input, magic-data+1);
+ need_bytes = strlen(DBOX_MAGIC_POST);
+ continue;
+ }
+
+ pre_offset = UOFF_T_MAX;
+ match = dbox_file_match_pre_magic(input, &pre_offset, &need_bytes);
+ if (match < 0) {
+ /* more data needed */
+ if (ret == -2) {
+ i_stream_skip(input, 2);
+ need_bytes = strlen(DBOX_MAGIC_POST);
+ }
+ continue;
+ }
+ if (match > 0)
+ *pre_r = TRUE;
+
+ match = dbox_file_match_post_magic(input, ret == -2, &need_bytes);
+ if (match < 0) {
+ /* more data needed */
+ if (ret == -2) {
+ i_stream_skip(input, 2);
+ need_bytes = strlen(DBOX_MAGIC_POST);
+ }
+ continue;
+ }
+ if (match > 0) {
+ post_offset = input->v_offset;
+ if (pre_offset == UOFF_T_MAX ||
+ post_offset < pre_offset) {
+ pre_offset = post_offset;
+ *pre_r = FALSE;
+ }
+ }
+
+ if (pre_offset != UOFF_T_MAX) {
+ *offset_r = pre_offset;
+ ret = 1;
+ break;
+ }
+ i_stream_skip(input, size-1);
+ }
+ if (ret <= 0) {
+ i_assert(ret == -1);
+ if (input->stream_errno != 0)
+ dbox_file_set_syscall_error(file, "read()");
+ else {
+ ret = 0;
+ *offset_r = input->v_offset;
+ }
+ }
+ i_stream_seek(input, orig_offset);
+ return ret <= 0 ? ret : 1;
+}
+
+static int
+stream_copy(struct dbox_file *file, struct ostream *output,
+ const char *out_path, uoff_t count)
+{
+ struct istream *input;
+ int ret = 0;
+
+ input = i_stream_create_limit(file->input, count);
+ o_stream_nsend_istream(output, input);
+
+ if (input->stream_errno != 0) {
+ mail_storage_set_critical(&file->storage->storage,
+ "read(%s) failed: %s", file->cur_path,
+ i_stream_get_error(input));
+ ret = -1;
+ } else if (o_stream_flush(output) < 0) {
+ mail_storage_set_critical(&file->storage->storage,
+ "write(%s) failed: %s", out_path,
+ o_stream_get_error(output));
+ ret = -1;
+ } else if (input->v_offset != count) {
+ mail_storage_set_critical(&file->storage->storage,
+ "o_stream_send_istream(%s) copied only %"
+ PRIuUOFF_T" of %"PRIuUOFF_T" bytes",
+ out_path, input->v_offset, count);
+ ret = -1;
+ }
+ i_stream_unref(&input);
+ return ret;
+}
+
+static void dbox_file_skip_broken_header(struct dbox_file *file)
+{
+ const size_t magic_len = strlen(DBOX_MAGIC_PRE);
+ const unsigned char *data;
+ size_t i, size;
+
+ /* if there's LF close to our position, assume that the header ends
+ there. */
+ data = i_stream_get_data(file->input, &size);
+ if (size > file->msg_header_size + 16)
+ size = file->msg_header_size + 16;
+ for (i = 0; i < size; i++) {
+ if (data[i] == '\n') {
+ i_stream_skip(file->input, i);
+ return;
+ }
+ }
+
+ /* skip at least the magic bytes if possible */
+ if (size > magic_len && memcmp(data, DBOX_MAGIC_PRE, magic_len) == 0)
+ i_stream_skip(file->input, magic_len);
+}
+
+static void
+dbox_file_copy_metadata(struct dbox_file *file, struct ostream *output,
+ bool *have_guid_r)
+{
+ const char *line;
+ uoff_t prev_offset = file->input->v_offset;
+
+ *have_guid_r = FALSE;
+ while ((line = i_stream_read_next_line(file->input)) != NULL) {
+ if (*line == DBOX_METADATA_OLDV1_SPACE || *line == '\0') {
+ /* end of metadata */
+ return;
+ }
+ if (*line < 32) {
+ /* broken - possibly a new pre-magic block */
+ i_stream_seek(file->input, prev_offset);
+ return;
+ }
+ if (*line == DBOX_METADATA_VIRTUAL_SIZE) {
+ /* it may be wrong - recreate it */
+ continue;
+ }
+ if (*line == DBOX_METADATA_GUID)
+ *have_guid_r = TRUE;
+ o_stream_nsend_str(output, line);
+ o_stream_nsend_str(output, "\n");
+ }
+}
+
+static int
+dbox_file_fix_write_stream(struct dbox_file *file, uoff_t start_offset,
+ const char *temp_path, struct ostream *output)
+{
+ struct dbox_message_header msg_hdr;
+ uoff_t offset, msg_size, hdr_offset, body_offset;
+ bool pre, write_header, have_guid;
+ struct message_size body;
+ bool has_nuls;
+ struct istream *body_input;
+ guid_128_t guid_128;
+ int ret;
+
+ i_stream_seek(file->input, 0);
+ if (start_offset > 0) {
+ /* copy the valid data */
+ if (stream_copy(file, output, temp_path, start_offset) < 0)
+ return -1;
+ } else {
+ /* the file header is broken. recreate it */
+ if (dbox_file_header_write(file, output) < 0) {
+ dbox_file_set_syscall_error(file, "write()");
+ return -1;
+ }
+ }
+
+ while ((ret = dbox_file_find_next_magic(file, &offset, &pre)) > 0) {
+ msg_size = offset - file->input->v_offset;
+ if (msg_size < 256 && pre) {
+ /* probably some garbage or some broken headers.
+ we most likely don't miss anything by skipping
+ over this data. */
+ i_stream_skip(file->input, msg_size);
+ hdr_offset = file->input->v_offset;
+ ret = dbox_file_read_mail_header(file, &msg_size);
+ if (ret <= 0) {
+ if (ret < 0)
+ return -1;
+ dbox_file_skip_broken_header(file);
+ body_offset = file->input->v_offset;
+ msg_size = UOFF_T_MAX;
+ } else {
+ i_stream_skip(file->input,
+ file->msg_header_size);
+ body_offset = file->input->v_offset;
+ i_stream_skip(file->input, msg_size);
+ }
+
+ ret = dbox_file_find_next_magic(file, &offset, &pre);
+ if (ret <= 0)
+ break;
+
+ if (!pre && msg_size == offset - body_offset) {
+ /* msg header ok, copy it */
+ i_stream_seek(file->input, hdr_offset);
+ if (stream_copy(file, output, temp_path,
+ file->msg_header_size) < 0)
+ return -1;
+ write_header = FALSE;
+ } else {
+ /* msg header is broken. write our own. */
+ i_stream_seek(file->input, body_offset);
+ if (msg_size != UOFF_T_MAX) {
+ /* previous magic find might have
+ skipped too much. seek back and
+ make sure */
+ ret = dbox_file_find_next_magic(file, &offset, &pre);
+ if (ret <= 0)
+ break;
+ }
+
+ write_header = TRUE;
+ msg_size = offset - body_offset;
+ }
+ } else {
+ /* treat this data as a separate message. */
+ write_header = TRUE;
+ body_offset = file->input->v_offset;
+ }
+ /* write msg header */
+ if (write_header) {
+ dbox_msg_header_fill(&msg_hdr, msg_size);
+ o_stream_nsend(output, &msg_hdr, sizeof(msg_hdr));
+ }
+ /* write msg body */
+ i_assert(file->input->v_offset == body_offset);
+ if (stream_copy(file, output, temp_path, msg_size) < 0)
+ return -1;
+ i_assert(file->input->v_offset == offset);
+
+ /* get message body size */
+ i_stream_seek(file->input, body_offset);
+ body_input = i_stream_create_limit(file->input, msg_size);
+ ret = message_get_body_size(body_input, &body, &has_nuls);
+ i_stream_unref(&body_input);
+ if (ret < 0) {
+ mail_storage_set_critical(&file->storage->storage,
+ "read(%s) failed: %s", file->cur_path,
+ i_stream_get_error(body_input));
+ return -1;
+ }
+
+ /* write msg metadata. */
+ i_assert(file->input->v_offset == offset);
+ ret = dbox_file_metadata_skip_header(file);
+ if (ret < 0)
+ return -1;
+ o_stream_nsend_str(output, DBOX_MAGIC_POST);
+ if (ret == 0)
+ have_guid = FALSE;
+ else
+ dbox_file_copy_metadata(file, output, &have_guid);
+ if (!have_guid) {
+ guid_128_generate(guid_128);
+ o_stream_nsend_str(output,
+ t_strdup_printf("%c%s\n", DBOX_METADATA_GUID,
+ guid_128_to_string(guid_128)));
+ }
+ o_stream_nsend_str(output,
+ t_strdup_printf("%c%llx\n", DBOX_METADATA_VIRTUAL_SIZE,
+ (unsigned long long)body.virtual_size));
+ o_stream_nsend_str(output, "\n");
+ if (output->stream_errno != 0)
+ break;
+ }
+ if (o_stream_flush(output) < 0) {
+ mail_storage_set_critical(&file->storage->storage,
+ "write(%s) failed: %s", temp_path, o_stream_get_error(output));
+ ret = -1;
+ }
+ return ret;
+}
+
+int dbox_file_fix(struct dbox_file *file, uoff_t start_offset)
+{
+ struct ostream *output;
+ const char *dir, *p, *temp_path, *broken_path;
+ bool deleted, have_messages;
+ int fd, ret;
+
+ i_assert(dbox_file_is_open(file));
+
+ p = strrchr(file->cur_path, '/');
+ i_assert(p != NULL);
+ dir = t_strdup_until(file->cur_path, p);
+
+ temp_path = t_strdup_printf("%s/%s", dir, dbox_generate_tmp_filename());
+ fd = file->storage->v.file_create_fd(file, temp_path, FALSE);
+ if (fd == -1)
+ return -1;
+
+ output = o_stream_create_fd_file(fd, 0, FALSE);
+ o_stream_cork(output);
+ ret = dbox_file_fix_write_stream(file, start_offset, temp_path, output);
+ if (ret < 0)
+ o_stream_abort(output);
+ have_messages = output->offset > file->file_header_size;
+ o_stream_unref(&output);
+ if (close(fd) < 0) {
+ mail_storage_set_critical(&file->storage->storage,
+ "close(%s) failed: %m", temp_path);
+ ret = -1;
+ }
+ if (ret < 0) {
+ if (unlink(temp_path) < 0) {
+ mail_storage_set_critical(&file->storage->storage,
+ "unlink(%s) failed: %m", temp_path);
+ }
+ return -1;
+ }
+ /* keep a copy of the original file in case someone wants to look
+ at it */
+ broken_path = t_strconcat(file->cur_path,
+ DBOX_MAIL_FILE_BROKEN_COPY_SUFFIX, NULL);
+ if (link(file->cur_path, broken_path) < 0) {
+ mail_storage_set_critical(&file->storage->storage,
+ "link(%s, %s) failed: %m",
+ file->cur_path, broken_path);
+ } else {
+ i_warning("dbox: Copy of the broken file saved to %s",
+ broken_path);
+ }
+ if (!have_messages) {
+ /* the resulting file has no messages. just delete the file. */
+ dbox_file_close(file);
+ i_unlink(temp_path);
+ i_unlink(file->cur_path);
+ return 0;
+ }
+ if (rename(temp_path, file->cur_path) < 0) {
+ mail_storage_set_critical(&file->storage->storage,
+ "rename(%s, %s) failed: %m",
+ temp_path, file->cur_path);
+ return -1;
+ }
+
+ /* file was successfully recreated - reopen it */
+ dbox_file_close(file);
+ if (dbox_file_open(file, &deleted) <= 0) {
+ mail_storage_set_critical(&file->storage->storage,
+ "dbox_file_fix(%s): reopening file failed",
+ file->cur_path);
+ return -1;
+ }
+ return 1;
+}