diff options
Diffstat (limited to 'src/lib-storage/index/index-mail-binary.c')
-rw-r--r-- | src/lib-storage/index/index-mail-binary.c | 598 |
1 files changed, 598 insertions, 0 deletions
diff --git a/src/lib-storage/index/index-mail-binary.c b/src/lib-storage/index/index-mail-binary.c new file mode 100644 index 0000000..80c319e --- /dev/null +++ b/src/lib-storage/index/index-mail-binary.c @@ -0,0 +1,598 @@ +/* Copyright (c) 2002-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "str.h" +#include "safe-mkstemp.h" +#include "istream.h" +#include "istream-crlf.h" +#include "istream-seekable.h" +#include "istream-base64.h" +#include "istream-qp.h" +#include "istream-header-filter.h" +#include "ostream.h" +#include "message-binary-part.h" +#include "message-parser.h" +#include "message-decoder.h" +#include "mail-user.h" +#include "index-storage.h" +#include "index-mail.h" + +#define MAIL_BINARY_CACHE_EXPIRE_MSECS (60*1000) + +#define IS_CONVERTED_CTE(cte) \ + ((cte) == MESSAGE_CTE_QP || (cte) == MESSAGE_CTE_BASE64) + +struct binary_block { + struct istream *input; + uoff_t physical_pos; + unsigned int body_lines_count; + bool converted, converted_hdr; +}; + +struct binary_ctx { + struct mail *mail; + struct istream *input; + bool has_nuls, converted; + /* each block is its own input stream. basically each converted MIME + body has its own block and the parts between the MIME bodies are + unconverted blocks */ + ARRAY(struct binary_block) blocks; + + uoff_t copy_start_offset; +}; + +static void binary_copy_to(struct binary_ctx *ctx, uoff_t end_offset) +{ + struct binary_block *block; + struct istream *linput, *cinput; + uoff_t orig_offset, size; + + i_assert(end_offset >= ctx->copy_start_offset); + + if (end_offset == ctx->copy_start_offset) + return; + + size = end_offset - ctx->copy_start_offset; + orig_offset = ctx->input->v_offset; + + i_stream_seek(ctx->input, ctx->copy_start_offset); + linput = i_stream_create_limit(ctx->input, size); + cinput = i_stream_create_crlf(linput); + i_stream_unref(&linput); + + block = array_append_space(&ctx->blocks); + block->input = cinput; + + i_stream_seek(ctx->input, orig_offset); +} + +static void +binary_cte_filter_callback(struct header_filter_istream *input, + struct message_header_line *hdr, + bool *matched ATTR_UNUSED, void *context ATTR_UNUSED) +{ + static const char *cte_binary = "Content-Transfer-Encoding: binary\r\n"; + + if (hdr != NULL && hdr->eoh) { + i_stream_header_filter_add(input, cte_binary, + strlen(cte_binary)); + } +} + +static int +add_binary_part(struct binary_ctx *ctx, const struct message_part *part, + bool include_hdr) +{ + static const char *filter_headers[] = { + "Content-Transfer-Encoding", + }; + struct message_header_parser_ctx *parser; + struct message_header_line *hdr; + struct message_part *child; + struct message_size hdr_size; + struct istream *linput; + struct binary_block *block; + enum message_cte cte; + uoff_t part_end_offset; + int ret; + + /* first parse the header to find c-t-e. */ + i_stream_seek(ctx->input, part->physical_pos); + + cte = MESSAGE_CTE_78BIT; + parser = message_parse_header_init(ctx->input, &hdr_size, 0); + while ((ret = message_parse_header_next(parser, &hdr)) > 0) { + if (strcasecmp(hdr->name, "Content-Transfer-Encoding") == 0) + cte = message_decoder_parse_cte(hdr); + } + i_assert(ret < 0); + if (message_parse_header_has_nuls(parser)) { + /* we're not converting NULs to 0x80 when doing a binary fetch, + even if they're in the message header. */ + ctx->has_nuls = TRUE; + } + message_parse_header_deinit(&parser); + + if (ctx->input->stream_errno != 0) { + mail_set_critical(ctx->mail, + "read(%s) failed: %s", i_stream_get_name(ctx->input), + i_stream_get_error(ctx->input)); + return -1; + } + + if (cte == MESSAGE_CTE_UNKNOWN) { + mail_storage_set_error(ctx->mail->box->storage, + MAIL_ERROR_CONVERSION, + "Unknown Content-Transfer-Encoding."); + return -1; + } + + i_stream_seek(ctx->input, part->physical_pos); + if (!include_hdr) { + /* body only */ + } else if (IS_CONVERTED_CTE(cte)) { + /* write header with modified content-type */ + if (ctx->copy_start_offset != 0) + binary_copy_to(ctx, part->physical_pos); + block = array_append_space(&ctx->blocks); + block->physical_pos = part->physical_pos; + block->converted = TRUE; + block->converted_hdr = TRUE; + + linput = i_stream_create_limit(ctx->input, UOFF_T_MAX); + block->input = i_stream_create_header_filter(linput, + HEADER_FILTER_EXCLUDE | HEADER_FILTER_HIDE_BODY, + filter_headers, N_ELEMENTS(filter_headers), + binary_cte_filter_callback, NULL); + i_stream_unref(&linput); + } else { + /* copy everything as-is until the end of this header */ + binary_copy_to(ctx, part->physical_pos + + part->header_size.physical_size); + } + ctx->copy_start_offset = part->physical_pos + + part->header_size.physical_size; + part_end_offset = part->physical_pos + + part->header_size.physical_size + + part->body_size.physical_size; + + if (part->children != NULL) { + /* multipart */ + for (child = part->children; child != NULL; child = child->next) { + if (add_binary_part(ctx, child, TRUE) < 0) + return -1; + } + binary_copy_to(ctx, part_end_offset); + ctx->copy_start_offset = part_end_offset; + return 0; + } + if (part->body_size.physical_size == 0) { + /* no body */ + ctx->copy_start_offset = part_end_offset; + return 0; + } + + /* single part - write decoded data */ + block = array_append_space(&ctx->blocks); + block->physical_pos = part->physical_pos; + + i_stream_seek(ctx->input, part->physical_pos + + part->header_size.physical_size); + linput = i_stream_create_limit(ctx->input, part->body_size.physical_size); + switch (cte) { + case MESSAGE_CTE_UNKNOWN: + i_unreached(); + case MESSAGE_CTE_78BIT: + case MESSAGE_CTE_BINARY: + /* no conversion necessary */ + if ((part->flags & MESSAGE_PART_FLAG_HAS_NULS) != 0) + ctx->has_nuls = TRUE; + block->input = i_stream_create_crlf(linput); + break; + case MESSAGE_CTE_QP: + block->input = i_stream_create_qp_decoder(linput); + ctx->converted = block->converted = TRUE; + break; + case MESSAGE_CTE_BASE64: + block->input = i_stream_create_base64_decoder(linput); + ctx->converted = block->converted = TRUE; + break; + } + i_stream_unref(&linput); + + ctx->copy_start_offset = part_end_offset; + return 0; +} + +static int fd_callback(const char **path_r, void *context) +{ + struct mail *_mail = context; + string_t *path; + int fd; + + path = t_str_new(256); + mail_user_set_get_temp_prefix(path, _mail->box->storage->user->set); + fd = safe_mkstemp_hostpid(path, 0600, (uid_t)-1, (gid_t)-1); + if (fd == -1) { + i_error("Temp file creation to %s failed: %m", str_c(path)); + return -1; + } + + /* we just want the fd, unlink it */ + if (i_unlink(str_c(path)) < 0) { + /* shouldn't happen.. */ + i_close_fd(&fd); + return -1; + } + *path_r = str_c(path); + return fd; +} + +static void binary_streams_free(struct binary_ctx *ctx) +{ + struct binary_block *block; + + array_foreach_modifiable(&ctx->blocks, block) + i_stream_unref(&block->input); +} + +static void +binary_parts_update(struct binary_ctx *ctx, const struct message_part *part, + struct message_binary_part **msg_bin_parts) +{ + struct index_mail *mail = INDEX_MAIL(ctx->mail); + struct binary_block *blocks; + struct message_binary_part bin_part; + unsigned int i, count; + uoff_t size; + bool found; + + blocks = array_get_modifiable(&ctx->blocks, &count); + for (; part != NULL; part = part->next) { + binary_parts_update(ctx, part->children, msg_bin_parts); + + i_zero(&bin_part); + /* default to unchanged header */ + bin_part.binary_hdr_size = part->header_size.virtual_size; + bin_part.physical_pos = part->physical_pos; + found = FALSE; + for (i = 0; i < count; i++) { + if (blocks[i].physical_pos != part->physical_pos || + !blocks[i].converted) + continue; + + size = blocks[i].input->v_offset; + if (blocks[i].converted_hdr) + bin_part.binary_hdr_size = size; + else + bin_part.binary_body_size = size; + found = TRUE; + } + if (found) { + bin_part.next = *msg_bin_parts; + *msg_bin_parts = p_new(mail->mail.data_pool, + struct message_binary_part, 1); + **msg_bin_parts = bin_part; + } + } +} + +static void binary_parts_cache(struct binary_ctx *ctx) +{ + struct index_mail *mail = INDEX_MAIL(ctx->mail); + buffer_t *buf; + + buf = t_buffer_create(128); + message_binary_part_serialize(mail->data.bin_parts, buf); + index_mail_cache_add(mail, MAIL_CACHE_BINARY_PARTS, + buf->data, buf->used); +} + +static struct istream **blocks_get_streams(struct binary_ctx *ctx) +{ + struct istream **streams; + const struct binary_block *blocks; + unsigned int i, count; + + blocks = array_get(&ctx->blocks, &count); + streams = t_new(struct istream *, count+1); + for (i = 0; i < count; i++) { + streams[i] = blocks[i].input; + i_assert(streams[i]->v_offset == 0); + } + return streams; +} + +static int +blocks_count_lines(struct binary_ctx *ctx, struct istream *full_input) +{ + struct binary_block *blocks, *cur_block; + unsigned int block_idx, block_count; + uoff_t cur_block_offset, cur_block_size; + const unsigned char *data, *p; + size_t size, skip; + ssize_t ret; + + blocks = array_get_modifiable(&ctx->blocks, &block_count); + cur_block = blocks; + cur_block_offset = 0; + block_idx = 0; + + /* count the number of lines each block contains */ + while ((ret = i_stream_read_more(full_input, &data, &size)) > 0) { + i_assert(cur_block_offset <= cur_block->input->v_offset); + if (cur_block->input->eof) { + /* this is the last input for this block. the input + may also contain the next block's data, which we + don't want to include in this block's line count. */ + cur_block_size = cur_block->input->v_offset + + i_stream_get_data_size(cur_block->input); + i_assert(size >= cur_block_size - cur_block_offset); + size = cur_block_size - cur_block_offset; + } + skip = size; + while ((p = memchr(data, '\n', size)) != NULL) { + size -= p-data+1; + data = p+1; + cur_block->body_lines_count++; + } + i_stream_skip(full_input, skip); + cur_block_offset += skip; + + if (i_stream_read_eof(cur_block->input)) { + /* go to the next block */ + if (block_idx+1 == block_count) { + i_assert(i_stream_read_eof(full_input)); + ret = -1; + break; + } + block_idx++; + cur_block++; + cur_block_offset = 0; + } + } + i_assert(ret == -1); + if (full_input->stream_errno != 0) + return -1; + i_assert(block_count == 0 || !i_stream_have_bytes_left(cur_block->input)); + i_assert(block_count == 0 || block_idx+1 == block_count); + return 0; +} + +static int +index_mail_read_binary_to_cache(struct mail *_mail, + const struct message_part *part, + bool include_hdr, const char *reason, + bool *binary_r, bool *converted_r) +{ + struct index_mail *mail = INDEX_MAIL(_mail); + struct mail_binary_cache *cache = &_mail->box->storage->binary_cache; + struct binary_ctx ctx; + struct istream *is; + + i_zero(&ctx); + ctx.mail = _mail; + t_array_init(&ctx.blocks, 8); + + mail_storage_free_binary_cache(_mail->box->storage); + if (mail_get_stream_because(_mail, NULL, NULL, reason, &ctx.input) < 0) + return -1; + + if (add_binary_part(&ctx, part, include_hdr) < 0) { + binary_streams_free(&ctx); + return -1; + } + + if (array_count(&ctx.blocks) != 0) { + is = i_streams_merge(blocks_get_streams(&ctx), + IO_BLOCK_SIZE, + fd_callback, _mail); + } else { + is = i_stream_create_from_data("", 0); + } + i_stream_set_name(is, t_strdup_printf( + "<binary stream of mailbox %s UID %u>", + _mail->box->vname, _mail->uid)); + if (blocks_count_lines(&ctx, is) < 0) { + if (is->stream_errno == EINVAL) { + /* MIME part contains invalid data */ + mail_storage_set_error(_mail->box->storage, + MAIL_ERROR_INVALIDDATA, + "Invalid data in MIME part"); + } else { + mail_set_critical(_mail, "read(%s) failed: %s", + i_stream_get_name(is), + i_stream_get_error(is)); + } + i_stream_unref(&is); + binary_streams_free(&ctx); + return -1; + } + + if (_mail->uid > 0) { + cache->to = timeout_add(MAIL_BINARY_CACHE_EXPIRE_MSECS, + mail_storage_free_binary_cache, + _mail->box->storage); + cache->box = _mail->box; + cache->uid = _mail->uid; + cache->orig_physical_pos = part->physical_pos; + cache->include_hdr = include_hdr; + cache->input = is; + } + + i_assert(!i_stream_have_bytes_left(is)); + cache->size = is->v_offset; + i_stream_seek(is, 0); + + if (part->parent == NULL && include_hdr && + mail->data.bin_parts == NULL) { + binary_parts_update(&ctx, part, &mail->data.bin_parts); + if (_mail->uid > 0) + binary_parts_cache(&ctx); + } + binary_streams_free(&ctx); + + *binary_r = ctx.converted ? TRUE : ctx.has_nuls; + *converted_r = ctx.converted; + return 0; +} + +static bool get_cached_binary_parts(struct index_mail *mail) +{ + const unsigned int field_idx = + mail->ibox->cache_fields[MAIL_CACHE_BINARY_PARTS].idx; + buffer_t *part_buf; + int ret; + + if (mail->data.bin_parts != NULL) + return TRUE; + + part_buf = t_buffer_create(128); + ret = index_mail_cache_lookup_field(mail, part_buf, field_idx); + if (ret <= 0) + return FALSE; + + if (message_binary_part_deserialize(mail->mail.data_pool, + part_buf->data, part_buf->used, + &mail->data.bin_parts) < 0) { + mail_set_mail_cache_corrupted(&mail->mail.mail, + "Corrupted cached binary.parts data"); + return FALSE; + } + return TRUE; +} + +static struct message_part * +msg_part_find(struct message_part *parts, uoff_t physical_pos) +{ + struct message_part *part, *child; + + for (part = parts; part != NULL; part = part->next) { + if (part->physical_pos == physical_pos) + return part; + child = msg_part_find(part->children, physical_pos); + if (child != NULL) + return child; + } + return NULL; +} + +static int +index_mail_get_binary_size(struct mail *_mail, + const struct message_part *part, bool include_hdr, + uoff_t *size_r, unsigned int *lines_r) +{ + struct index_mail *mail = INDEX_MAIL(_mail); + struct message_part *all_parts, *msg_part; + const struct message_binary_part *bin_part, *root_bin_part; + uoff_t size, end_offset; + unsigned int lines; + bool binary, converted; + + if (mail_get_parts(_mail, &all_parts) < 0) + return -1; + + /* first lookup from cache */ + if (!get_cached_binary_parts(mail)) { + /* not found. parse the whole message */ + if (index_mail_read_binary_to_cache(_mail, all_parts, TRUE, + "binary.size", &binary, &converted) < 0) + return -1; + } + + size = part->header_size.virtual_size + + part->body_size.virtual_size; + /* note that we assume here that binary translation doesn't change the + headers' line counts. this isn't true if the original message + contained duplicate Content-Transfer-Encoding lines, but since + that's invalid anyway we don't bother trying to handle it. */ + lines = part->header_size.lines + part->body_size.lines; + end_offset = part->physical_pos + size; + + bin_part = mail->data.bin_parts; root_bin_part = NULL; + for (; bin_part != NULL; bin_part = bin_part->next) { + msg_part = msg_part_find(all_parts, bin_part->physical_pos); + if (msg_part == NULL) { + /* either binary.parts or mime.parts is broken */ + mail_set_cache_corrupted(_mail, MAIL_FETCH_MESSAGE_PARTS, t_strdup_printf( + "BINARY part at offset %"PRIuUOFF_T" not found from MIME parts", + bin_part->physical_pos)); + return -1; + } + if (msg_part->physical_pos >= part->physical_pos && + msg_part->physical_pos < end_offset) { + if (msg_part->physical_pos == part->physical_pos) + root_bin_part = bin_part; + size -= msg_part->header_size.virtual_size + + msg_part->body_size.virtual_size; + size += bin_part->binary_hdr_size + + bin_part->binary_body_size; + lines -= msg_part->body_size.lines; + lines += bin_part->binary_body_lines_count; + } + } + if (!include_hdr) { + if (root_bin_part != NULL) + size -= root_bin_part->binary_hdr_size; + else + size -= part->header_size.virtual_size; + lines -= part->header_size.lines; + } + *size_r = size; + *lines_r = lines; + return 0; +} + +int index_mail_get_binary_stream(struct mail *_mail, + const struct message_part *part, + bool include_hdr, uoff_t *size_r, + unsigned int *lines_r, bool *binary_r, + struct istream **stream_r) +{ + struct index_mail *mail = INDEX_MAIL(_mail); + struct mail_binary_cache *cache = &_mail->box->storage->binary_cache; + struct istream *input; + bool binary, converted; + + if (stream_r == NULL) { + return index_mail_get_binary_size(_mail, part, include_hdr, + size_r, lines_r); + } + /* current implementation doesn't bother implementing this, + because it's not needed by anything. */ + i_assert(lines_r == NULL); + + /* FIXME: always put the header to temp file. skip it when needed. */ + if (cache->box == _mail->box && cache->uid == _mail->uid && + cache->orig_physical_pos == part->physical_pos && + cache->include_hdr == include_hdr) { + /* we have this cached already */ + i_stream_seek(cache->input, 0); + timeout_reset(cache->to); + binary = TRUE; + converted = TRUE; + } else { + if (index_mail_read_binary_to_cache(_mail, part, include_hdr, + "binary stream", &binary, &converted) < 0) + return -1; + mail->data.cache_fetch_fields |= MAIL_FETCH_STREAM_BINARY; + } + *size_r = cache->size; + *binary_r = binary; + if (!converted) { + /* don't keep this cached. it's exactly the same as + the original stream */ + i_assert(mail->data.stream != NULL); + i_stream_seek(mail->data.stream, part->physical_pos + + (include_hdr ? 0 : + part->header_size.physical_size)); + input = i_stream_create_crlf(mail->data.stream); + *stream_r = i_stream_create_limit(input, *size_r); + i_stream_unref(&input); + mail_storage_free_binary_cache(_mail->box->storage); + } else { + *stream_r = cache->input; + i_stream_ref(cache->input); + } + return 0; +} |