summaryrefslogtreecommitdiffstats
path: root/src/lib-storage/index/mbox/mbox-sync-rewrite.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib-storage/index/mbox/mbox-sync-rewrite.c')
-rw-r--r--src/lib-storage/index/mbox/mbox-sync-rewrite.c615
1 files changed, 615 insertions, 0 deletions
diff --git a/src/lib-storage/index/mbox/mbox-sync-rewrite.c b/src/lib-storage/index/mbox/mbox-sync-rewrite.c
new file mode 100644
index 0000000..eeb4878
--- /dev/null
+++ b/src/lib-storage/index/mbox/mbox-sync-rewrite.c
@@ -0,0 +1,615 @@
+/* Copyright (c) 2004-2018 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "array.h"
+#include "buffer.h"
+#include "istream.h"
+#include "ostream.h"
+#include "str.h"
+#include "write-full.h"
+#include "message-parser.h"
+#include "mbox-storage.h"
+#include "mbox-sync-private.h"
+#include "istream-raw-mbox.h"
+
+int mbox_move(struct mbox_sync_context *sync_ctx,
+ uoff_t dest, uoff_t source, uoff_t size)
+{
+ struct mbox_mailbox *mbox = sync_ctx->mbox;
+ struct istream *input;
+ struct ostream *output;
+ int ret;
+
+ i_assert(source > 0 || (dest != 1 && dest != 2));
+ i_assert(size < OFF_T_MAX);
+
+ if (size == 0 || source == dest)
+ return 0;
+
+ i_stream_sync(sync_ctx->input);
+
+ output = o_stream_create_fd_file(sync_ctx->write_fd, UOFF_T_MAX, FALSE);
+ i_stream_seek(sync_ctx->file_input, source);
+ if (o_stream_seek(output, dest) < 0) {
+ mbox_ostream_set_syscall_error(sync_ctx->mbox, output,
+ "o_stream_seek()");
+ o_stream_unref(&output);
+ return -1;
+ }
+
+ /* we're moving data within a file. it really shouldn't be failing at
+ this point or we're corrupted. */
+ input = i_stream_create_limit(sync_ctx->file_input, size);
+ o_stream_nsend_istream(output, input);
+ if (input->stream_errno != 0) {
+ mailbox_set_critical(&mbox->box,
+ "read() failed with mbox: %s",
+ i_stream_get_error(input));
+ ret = -1;
+ } else if (output->stream_errno != 0) {
+ mailbox_set_critical(&mbox->box,
+ "write() failed with mbox: %s",
+ o_stream_get_error(output));
+ ret = -1;
+ } else if (input->v_offset != size) {
+ mbox_sync_set_critical(sync_ctx,
+ "mbox_move(%"PRIuUOFF_T", %"PRIuUOFF_T", %"PRIuUOFF_T
+ ") moved only %"PRIuUOFF_T" bytes",
+ dest, source, size, input->v_offset);
+ ret = -1;
+ } else {
+ ret = 0;
+ }
+ i_stream_unref(&input);
+
+ mbox_sync_file_updated(sync_ctx, FALSE);
+ o_stream_destroy(&output);
+ return ret;
+}
+
+static int mbox_fill_space(struct mbox_sync_context *sync_ctx,
+ uoff_t offset, uoff_t size)
+{
+ unsigned char space[1024];
+
+ memset(space, ' ', sizeof(space));
+ while (size > sizeof(space)) {
+ if (pwrite_full(sync_ctx->write_fd, space,
+ sizeof(space), offset) < 0) {
+ mbox_set_syscall_error(sync_ctx->mbox, "pwrite_full()");
+ return -1;
+ }
+ size -= sizeof(space);
+ }
+
+ if (pwrite_full(sync_ctx->write_fd, space, size, offset) < 0) {
+ mbox_set_syscall_error(sync_ctx->mbox, "pwrite_full()");
+ return -1;
+ }
+ mbox_sync_file_updated(sync_ctx, TRUE);
+ return 0;
+}
+
+void mbox_sync_headers_add_space(struct mbox_sync_mail_context *ctx,
+ size_t size)
+{
+ size_t data_size, pos, start_pos;
+ const unsigned char *data;
+ void *p;
+
+ i_assert(size < SSIZE_T_MAX);
+
+ if (ctx->mail.pseudo)
+ start_pos = ctx->hdr_pos[MBOX_HDR_X_IMAPBASE];
+ else if (ctx->mail.space > 0) {
+ /* update the header using the existing offset.
+ otherwise we might chose wrong header and just decrease
+ the available space */
+ start_pos = ctx->mail.offset - ctx->hdr_offset;
+ } else {
+ /* Append at the end of X-Keywords header,
+ or X-UID if it doesn't exist */
+ start_pos = ctx->hdr_pos[MBOX_HDR_X_KEYWORDS] != SIZE_MAX ?
+ ctx->hdr_pos[MBOX_HDR_X_KEYWORDS] :
+ ctx->hdr_pos[MBOX_HDR_X_UID];
+ }
+
+ data = str_data(ctx->header);
+ data_size = str_len(ctx->header);
+ i_assert(start_pos < data_size);
+
+ for (pos = start_pos; pos < data_size; pos++) {
+ if (data[pos] == '\n') {
+ /* possibly continues in next line */
+ if (pos+1 == data_size || !IS_LWSP(data[pos+1]))
+ break;
+ start_pos = pos+1;
+ } else if (!IS_LWSP(data[pos]) && data[pos] != '\r') {
+ start_pos = pos+1;
+ }
+ }
+
+ /* pos points to end of header now, and start_pos to beginning
+ of whitespace. */
+ mbox_sync_move_buffer(ctx, pos, size, 0);
+
+ p = buffer_get_space_unsafe(ctx->header, pos, size);
+ memset(p, ' ', size);
+
+ if (ctx->header_first_change > pos)
+ ctx->header_first_change = pos;
+ ctx->header_last_change = SIZE_MAX;
+
+ ctx->mail.space = (pos - start_pos) + size;
+ ctx->mail.offset = ctx->hdr_offset;
+ if (ctx->mail.space > 0)
+ ctx->mail.offset += start_pos;
+}
+
+static void mbox_sync_header_remove_space(struct mbox_sync_mail_context *ctx,
+ size_t start_pos, size_t *size)
+{
+ const unsigned char *data;
+ size_t data_size, pos, last_line_pos;
+
+ /* find the end of the LWSP */
+ data = str_data(ctx->header);
+ data_size = str_len(ctx->header);
+
+ for (pos = last_line_pos = start_pos; pos < data_size; pos++) {
+ if (data[pos] == '\n') {
+ /* possibly continues in next line */
+ if (pos+1 == data_size || !IS_LWSP(data[pos+1])) {
+ data_size = pos;
+ break;
+ }
+ last_line_pos = pos+1;
+ } else if (!IS_LWSP(data[pos]) && data[pos] != '\r') {
+ start_pos = last_line_pos = pos+1;
+ }
+ }
+
+ if (start_pos == data_size)
+ return;
+
+ /* and remove what we can */
+ if (ctx->header_first_change > start_pos)
+ ctx->header_first_change = start_pos;
+ ctx->header_last_change = SIZE_MAX;
+
+ if (data_size - start_pos <= *size) {
+ /* remove it all */
+ mbox_sync_move_buffer(ctx, start_pos, 0, data_size - start_pos);
+ *size -= data_size - start_pos;
+ return;
+ }
+
+ /* we have more space than needed. since we're removing from
+ the beginning of header instead of end, we don't have to
+ worry about multiline-headers. */
+ mbox_sync_move_buffer(ctx, start_pos, 0, *size);
+ if (last_line_pos <= start_pos + *size)
+ last_line_pos = start_pos;
+ else
+ last_line_pos -= *size;
+ data_size -= *size;
+
+ *size = 0;
+
+ if (ctx->mail.space < (off_t)(data_size - last_line_pos)) {
+ ctx->mail.space = data_size - last_line_pos;
+ ctx->mail.offset = ctx->hdr_offset;
+ if (ctx->mail.space > 0)
+ ctx->mail.offset += last_line_pos;
+ }
+}
+
+static void mbox_sync_headers_remove_space(struct mbox_sync_mail_context *ctx,
+ size_t size)
+{
+ static enum header_position space_positions[] = {
+ MBOX_HDR_X_UID,
+ MBOX_HDR_X_KEYWORDS,
+ MBOX_HDR_X_IMAPBASE
+ };
+ enum header_position pos;
+ int i;
+
+ ctx->mail.space = 0;
+ ctx->mail.offset = ctx->hdr_offset;
+
+ for (i = 0; i < 3 && size > 0; i++) {
+ pos = space_positions[i];
+ if (ctx->hdr_pos[pos] != SIZE_MAX) {
+ mbox_sync_header_remove_space(ctx, ctx->hdr_pos[pos],
+ &size);
+ }
+ }
+
+ /* FIXME: see if we could remove X-Keywords header completely */
+}
+
+static void mbox_sync_first_mail_written(struct mbox_sync_mail_context *ctx,
+ uoff_t hdr_offset)
+{
+ /* we wrote the first mail. update last-uid offset so we can find
+ it later */
+ i_assert(ctx->last_uid_value_start_pos != 0);
+ i_assert(ctx->hdr_pos[MBOX_HDR_X_IMAPBASE] != SIZE_MAX);
+
+ ctx->sync_ctx->base_uid_last_offset = hdr_offset +
+ ctx->hdr_pos[MBOX_HDR_X_IMAPBASE] +
+ ctx->last_uid_value_start_pos;
+
+ if (ctx->imapbase_updated) {
+ /* update so a) we don't try to update it later needlessly,
+ b) if we do actually update it, we see the correct value */
+ ctx->sync_ctx->base_uid_last = ctx->last_uid_updated_value;
+ }
+}
+
+int mbox_sync_try_rewrite(struct mbox_sync_mail_context *ctx, off_t move_diff)
+{
+ struct mbox_sync_context *sync_ctx = ctx->sync_ctx;
+ size_t old_hdr_size, new_hdr_size;
+
+ i_assert(sync_ctx->mbox->mbox_lock_type == F_WRLCK);
+
+ old_hdr_size = ctx->body_offset - ctx->hdr_offset;
+ new_hdr_size = str_len(ctx->header);
+
+ if (new_hdr_size <= old_hdr_size) {
+ /* add space. note that we must call add_space() even if we're
+ not adding anything so mail.offset gets fixed. */
+ mbox_sync_headers_add_space(ctx, old_hdr_size - new_hdr_size);
+ } else if (new_hdr_size > old_hdr_size) {
+ /* try removing the space where we can */
+ mbox_sync_headers_remove_space(ctx,
+ new_hdr_size - old_hdr_size);
+ new_hdr_size = str_len(ctx->header);
+
+ if (new_hdr_size <= old_hdr_size) {
+ /* good, we removed enough. */
+ i_assert(new_hdr_size == old_hdr_size);
+ } else if (move_diff < 0 &&
+ new_hdr_size - old_hdr_size <= (uoff_t)-move_diff) {
+ /* moving backwards - we can use the extra space from
+ it, just update expunged_space accordingly */
+ i_assert(ctx->mail.space == 0);
+ i_assert(sync_ctx->expunged_space >=
+ (off_t)(new_hdr_size - old_hdr_size));
+ sync_ctx->expunged_space -= new_hdr_size - old_hdr_size;
+ } else {
+ /* couldn't get enough space */
+ i_assert(ctx->mail.space == 0);
+ ctx->mail.space =
+ -(ssize_t)(new_hdr_size - old_hdr_size);
+ return 0;
+ }
+ }
+
+ i_assert(ctx->mail.space >= 0);
+
+ if (ctx->header_first_change == SIZE_MAX && move_diff == 0) {
+ /* no changes actually. we get here if index sync record told
+ us to do something that was already there */
+ return 1;
+ }
+
+ if (move_diff != 0) {
+ /* forget about partial write optimizations */
+ ctx->header_first_change = 0;
+ ctx->header_last_change = 0;
+ }
+
+ if (ctx->header_last_change != SIZE_MAX &&
+ ctx->header_last_change != 0)
+ str_truncate(ctx->header, ctx->header_last_change);
+
+ if (pwrite_full(sync_ctx->write_fd,
+ str_data(ctx->header) + ctx->header_first_change,
+ str_len(ctx->header) - ctx->header_first_change,
+ (off_t)ctx->hdr_offset + (off_t)ctx->header_first_change +
+ move_diff) < 0) {
+ mbox_set_syscall_error(sync_ctx->mbox, "pwrite_full()");
+ return -1;
+ }
+
+ if (sync_ctx->dest_first_mail &&
+ (ctx->imapbase_updated || ctx->sync_ctx->base_uid_last != 0)) {
+ /* the position might have moved as a result of moving
+ whitespace */
+ mbox_sync_first_mail_written(ctx, (off_t)ctx->hdr_offset + move_diff);
+ }
+
+ mbox_sync_file_updated(sync_ctx, FALSE);
+ return 1;
+}
+
+static int mbox_sync_read_next(struct mbox_sync_context *sync_ctx,
+ struct mbox_sync_mail_context *mail_ctx,
+ struct mbox_sync_mail *mails,
+ uint32_t seq, uint32_t idx,
+ uoff_t expunged_space)
+{
+ unsigned int first_mail_expunge_extra;
+ uint32_t orig_next_uid;
+
+ i_zero(mail_ctx);
+ mail_ctx->sync_ctx = sync_ctx;
+ mail_ctx->seq = seq;
+ mail_ctx->header = sync_ctx->header;
+
+ if (istream_raw_mbox_get_header_offset(sync_ctx->input,
+ &mail_ctx->mail.offset) < 0) {
+ mbox_sync_set_critical(sync_ctx,
+ "Couldn't get header offset for seq=%u", seq);
+ return -1;
+ }
+ mail_ctx->mail.body_size = mails[idx].body_size;
+
+ orig_next_uid = sync_ctx->next_uid;
+ if (mails[idx].uid != 0) {
+ /* This will force the UID to be the one that we originally
+ assigned to it, regardless of whether it's broken or not in
+ the file. */
+ sync_ctx->next_uid = mails[idx].uid;
+ sync_ctx->prev_msg_uid = mails[idx].uid - 1;
+ } else {
+ /* Pseudo mail shouldn't have X-UID header at all */
+ i_assert(mails[idx].pseudo);
+ sync_ctx->prev_msg_uid = 0;
+ }
+
+ first_mail_expunge_extra = 1 +
+ (sync_ctx->first_mail_crlf_expunged ? 1 : 0);
+ if (mails[idx].from_offset +
+ first_mail_expunge_extra - expunged_space != 0) {
+ sync_ctx->dest_first_mail = mails[idx].from_offset == 0;
+ } else {
+ /* we need to skip over the initial \n (it's already counted in
+ expunged_space) */
+ sync_ctx->dest_first_mail = TRUE;
+ mails[idx].from_offset += first_mail_expunge_extra;
+ }
+
+ if (mbox_sync_parse_next_mail(sync_ctx->input, mail_ctx) < 0)
+ return -1;
+ i_assert(mail_ctx->mail.pseudo == mails[idx].pseudo);
+
+ /* set next_uid back before updating the headers. this is important
+ if we're updating the first message to make X-IMAP[base] header
+ have the correct value. */
+ sync_ctx->next_uid = orig_next_uid;
+
+ if (mails[idx].space != 0) {
+ if (mails[idx].space < 0) {
+ /* remove all possible spacing before updating */
+ mbox_sync_headers_remove_space(mail_ctx, SIZE_MAX);
+ }
+ mbox_sync_update_header_from(mail_ctx, &mails[idx]);
+ } else {
+ /* updating might just try to add headers and mess up our
+ calculations completely. so only add the EOH here. */
+ if (mail_ctx->have_eoh)
+ str_append_c(mail_ctx->header, '\n');
+ }
+ return 0;
+}
+
+static int mbox_sync_read_and_move(struct mbox_sync_context *sync_ctx,
+ struct mbox_sync_mail_context *mail_ctx,
+ struct mbox_sync_mail *mails,
+ uint32_t seq, uint32_t idx, uint32_t padding,
+ off_t move_diff, uoff_t expunged_space,
+ uoff_t end_offset, bool first_nonexpunged)
+{
+ struct mbox_sync_mail_context new_mail_ctx;
+ uoff_t offset, dest_offset;
+ size_t need_space;
+
+ if (mail_ctx == NULL) {
+ if (mbox_sync_seek(sync_ctx, mails[idx].from_offset) < 0)
+ return -1;
+
+ if (mbox_sync_read_next(sync_ctx, &new_mail_ctx, mails, seq, idx,
+ expunged_space) < 0)
+ return -1;
+ mail_ctx = &new_mail_ctx;
+ } else {
+ i_assert(seq == mail_ctx->seq);
+ if (mail_ctx->mail.space < 0)
+ mail_ctx->mail.space = 0;
+ i_stream_seek(sync_ctx->input, mail_ctx->body_offset);
+ }
+
+ if (mail_ctx->mail.space <= 0) {
+ need_space = str_len(mail_ctx->header) - mail_ctx->mail.space -
+ (mail_ctx->body_offset - mail_ctx->hdr_offset);
+ if (need_space != (uoff_t)-mails[idx].space) {
+ /* this check works only if we're doing the first
+ write, or if the file size was changed externally */
+ mbox_sync_file_update_ext_modified(sync_ctx);
+
+ mbox_sync_set_critical(sync_ctx,
+ "seq=%u uid=%u uid_broken=%d "
+ "originally needed %"PRIuUOFF_T
+ " bytes, now needs %zu bytes",
+ seq, mails[idx].uid, mails[idx].uid_broken ? 1 : 0,
+ (uoff_t)-mails[idx].space, need_space);
+ return -1;
+ }
+ }
+
+ if (first_nonexpunged && expunged_space > 0) {
+ /* move From-line (after parsing headers so we don't
+ overwrite them) */
+ i_assert(mails[idx].from_offset >= expunged_space);
+ if (mbox_move(sync_ctx, mails[idx].from_offset - expunged_space,
+ mails[idx].from_offset,
+ mails[idx].offset - mails[idx].from_offset) < 0)
+ return -1;
+ }
+
+ if (mails[idx].space == 0) {
+ /* don't touch spacing */
+ } else if (padding < (uoff_t)mail_ctx->mail.space) {
+ mbox_sync_headers_remove_space(mail_ctx, mail_ctx->mail.space -
+ padding);
+ } else {
+ mbox_sync_headers_add_space(mail_ctx, padding -
+ mail_ctx->mail.space);
+ }
+
+ /* move the body of this message and headers of next message forward,
+ then write the headers */
+ offset = sync_ctx->input->v_offset;
+ dest_offset = offset + move_diff;
+ i_assert(offset <= end_offset);
+ if (mbox_move(sync_ctx, dest_offset, offset, end_offset - offset) < 0)
+ return -1;
+
+ /* the header may actually be moved backwards if there was expunged
+ space which we wanted to remove */
+ i_assert(dest_offset >= str_len(mail_ctx->header));
+ dest_offset -= str_len(mail_ctx->header);
+ i_assert(dest_offset >= mails[idx].from_offset - expunged_space);
+ if (pwrite_full(sync_ctx->write_fd, str_data(mail_ctx->header),
+ str_len(mail_ctx->header), dest_offset) < 0) {
+ mbox_set_syscall_error(sync_ctx->mbox, "pwrite_full()");
+ return -1;
+ }
+ mbox_sync_file_updated(sync_ctx, TRUE);
+
+ if (sync_ctx->dest_first_mail) {
+ mbox_sync_first_mail_written(mail_ctx, dest_offset);
+ sync_ctx->dest_first_mail = FALSE;
+ }
+
+ mails[idx].offset = dest_offset +
+ (mail_ctx->mail.offset - mail_ctx->hdr_offset);
+ mails[idx].space = mail_ctx->mail.space;
+ return 0;
+}
+
+int mbox_sync_rewrite(struct mbox_sync_context *sync_ctx,
+ struct mbox_sync_mail_context *mail_ctx,
+ uoff_t end_offset, off_t move_diff, uoff_t extra_space,
+ uint32_t first_seq, uint32_t last_seq)
+{
+ struct mbox_sync_mail *mails;
+ uoff_t offset, dest_offset, next_end_offset, next_move_diff;
+ uoff_t start_offset, expunged_space;
+ uint32_t idx, first_nonexpunged_idx, padding_per_mail;
+ uint32_t orig_prev_msg_uid;
+ unsigned int count;
+ int ret = 0;
+
+ i_assert(extra_space < OFF_T_MAX);
+ i_assert(sync_ctx->mbox->mbox_lock_type == F_WRLCK);
+
+ mails = array_get_modifiable(&sync_ctx->mails, &count);
+ i_assert(count == last_seq - first_seq + 1);
+
+ /* if there's expunges in mails[], we would get more correct balancing
+ by counting only them here. however, that might make us overwrite
+ data which hasn't yet been copied backwards. to avoid too much
+ complexity, we just leave all the rest of the extra space to first
+ mail */
+ idx = last_seq - first_seq + 1;
+ padding_per_mail = extra_space / idx;
+
+ /* after expunge the next mail must have been missing space, or we
+ would have moved it backwards already */
+ expunged_space = 0;
+ start_offset = mails[0].from_offset;
+ for (first_nonexpunged_idx = 0;; first_nonexpunged_idx++) {
+ i_assert(first_nonexpunged_idx != idx);
+ if (!mails[first_nonexpunged_idx].expunged)
+ break;
+ expunged_space += mails[first_nonexpunged_idx].space;
+ }
+ i_assert(mails[first_nonexpunged_idx].space < 0);
+
+ orig_prev_msg_uid = sync_ctx->prev_msg_uid;
+
+ /* start moving backwards. */
+ while (idx > first_nonexpunged_idx) {
+ idx--;
+ if (idx == first_nonexpunged_idx) {
+ /* give the rest of the extra space to first mail.
+ we might also have to move the mail backwards to
+ fill the expunged space */
+ padding_per_mail = move_diff + (off_t)expunged_space +
+ (off_t)mails[idx].space;
+ }
+
+ next_end_offset = mails[idx].offset;
+
+ if (mails[idx].space <= 0 && !mails[idx].expunged) {
+ /* give space to this mail. end_offset is left to
+ contain this message's From-line (ie. below we
+ move only headers + body). */
+ bool first_nonexpunged = idx == first_nonexpunged_idx;
+
+ next_move_diff = -mails[idx].space;
+ if (mbox_sync_read_and_move(sync_ctx, mail_ctx, mails,
+ first_seq + idx, idx,
+ padding_per_mail,
+ move_diff, expunged_space,
+ end_offset,
+ first_nonexpunged) < 0) {
+ ret = -1;
+ break;
+ }
+ move_diff -= next_move_diff + mails[idx].space;
+ } else {
+ /* this mail provides more space. just move it forward
+ from the extra space offset and set end_offset to
+ point to beginning of extra space. that way the
+ header will be moved along with previous mail's
+ body.
+
+ if this is expunged mail, we're moving following
+ mail's From-line and maybe headers. */
+ offset = mails[idx].offset + mails[idx].space;
+ dest_offset = offset + move_diff;
+ i_assert(offset <= end_offset);
+ if (mbox_move(sync_ctx, dest_offset, offset,
+ end_offset - offset) < 0) {
+ ret = -1;
+ break;
+ }
+
+ move_diff += mails[idx].space;
+ if (!mails[idx].expunged) {
+ move_diff -= padding_per_mail;
+ mails[idx].space = padding_per_mail;
+
+ if (mbox_fill_space(sync_ctx, move_diff +
+ mails[idx].offset,
+ padding_per_mail) < 0) {
+ ret = -1;
+ break;
+ }
+ }
+ mails[idx].offset += move_diff;
+ }
+ mail_ctx = NULL;
+
+ i_assert(move_diff >= 0 || idx == first_nonexpunged_idx);
+ i_assert(next_end_offset <= end_offset);
+
+ end_offset = next_end_offset;
+ mails[idx].from_offset += move_diff;
+ }
+
+ if (ret == 0) {
+ i_assert(mails[idx].from_offset == start_offset);
+ i_assert(move_diff + (off_t)expunged_space >= 0);
+ }
+
+ mbox_sync_file_updated(sync_ctx, FALSE);
+ sync_ctx->prev_msg_uid = orig_prev_msg_uid;
+ return ret;
+}