summaryrefslogtreecommitdiffstats
path: root/src/lib-storage/index/index-mail-binary.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib-storage/index/index-mail-binary.c')
-rw-r--r--src/lib-storage/index/index-mail-binary.c598
1 files changed, 598 insertions, 0 deletions
diff --git a/src/lib-storage/index/index-mail-binary.c b/src/lib-storage/index/index-mail-binary.c
new file mode 100644
index 0000000..80c319e
--- /dev/null
+++ b/src/lib-storage/index/index-mail-binary.c
@@ -0,0 +1,598 @@
+/* Copyright (c) 2002-2018 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "str.h"
+#include "safe-mkstemp.h"
+#include "istream.h"
+#include "istream-crlf.h"
+#include "istream-seekable.h"
+#include "istream-base64.h"
+#include "istream-qp.h"
+#include "istream-header-filter.h"
+#include "ostream.h"
+#include "message-binary-part.h"
+#include "message-parser.h"
+#include "message-decoder.h"
+#include "mail-user.h"
+#include "index-storage.h"
+#include "index-mail.h"
+
+#define MAIL_BINARY_CACHE_EXPIRE_MSECS (60*1000)
+
+#define IS_CONVERTED_CTE(cte) \
+ ((cte) == MESSAGE_CTE_QP || (cte) == MESSAGE_CTE_BASE64)
+
+struct binary_block {
+ struct istream *input;
+ uoff_t physical_pos;
+ unsigned int body_lines_count;
+ bool converted, converted_hdr;
+};
+
+struct binary_ctx {
+ struct mail *mail;
+ struct istream *input;
+ bool has_nuls, converted;
+ /* each block is its own input stream. basically each converted MIME
+ body has its own block and the parts between the MIME bodies are
+ unconverted blocks */
+ ARRAY(struct binary_block) blocks;
+
+ uoff_t copy_start_offset;
+};
+
+static void binary_copy_to(struct binary_ctx *ctx, uoff_t end_offset)
+{
+ struct binary_block *block;
+ struct istream *linput, *cinput;
+ uoff_t orig_offset, size;
+
+ i_assert(end_offset >= ctx->copy_start_offset);
+
+ if (end_offset == ctx->copy_start_offset)
+ return;
+
+ size = end_offset - ctx->copy_start_offset;
+ orig_offset = ctx->input->v_offset;
+
+ i_stream_seek(ctx->input, ctx->copy_start_offset);
+ linput = i_stream_create_limit(ctx->input, size);
+ cinput = i_stream_create_crlf(linput);
+ i_stream_unref(&linput);
+
+ block = array_append_space(&ctx->blocks);
+ block->input = cinput;
+
+ i_stream_seek(ctx->input, orig_offset);
+}
+
+static void
+binary_cte_filter_callback(struct header_filter_istream *input,
+ struct message_header_line *hdr,
+ bool *matched ATTR_UNUSED, void *context ATTR_UNUSED)
+{
+ static const char *cte_binary = "Content-Transfer-Encoding: binary\r\n";
+
+ if (hdr != NULL && hdr->eoh) {
+ i_stream_header_filter_add(input, cte_binary,
+ strlen(cte_binary));
+ }
+}
+
+static int
+add_binary_part(struct binary_ctx *ctx, const struct message_part *part,
+ bool include_hdr)
+{
+ static const char *filter_headers[] = {
+ "Content-Transfer-Encoding",
+ };
+ struct message_header_parser_ctx *parser;
+ struct message_header_line *hdr;
+ struct message_part *child;
+ struct message_size hdr_size;
+ struct istream *linput;
+ struct binary_block *block;
+ enum message_cte cte;
+ uoff_t part_end_offset;
+ int ret;
+
+ /* first parse the header to find c-t-e. */
+ i_stream_seek(ctx->input, part->physical_pos);
+
+ cte = MESSAGE_CTE_78BIT;
+ parser = message_parse_header_init(ctx->input, &hdr_size, 0);
+ while ((ret = message_parse_header_next(parser, &hdr)) > 0) {
+ if (strcasecmp(hdr->name, "Content-Transfer-Encoding") == 0)
+ cte = message_decoder_parse_cte(hdr);
+ }
+ i_assert(ret < 0);
+ if (message_parse_header_has_nuls(parser)) {
+ /* we're not converting NULs to 0x80 when doing a binary fetch,
+ even if they're in the message header. */
+ ctx->has_nuls = TRUE;
+ }
+ message_parse_header_deinit(&parser);
+
+ if (ctx->input->stream_errno != 0) {
+ mail_set_critical(ctx->mail,
+ "read(%s) failed: %s", i_stream_get_name(ctx->input),
+ i_stream_get_error(ctx->input));
+ return -1;
+ }
+
+ if (cte == MESSAGE_CTE_UNKNOWN) {
+ mail_storage_set_error(ctx->mail->box->storage,
+ MAIL_ERROR_CONVERSION,
+ "Unknown Content-Transfer-Encoding.");
+ return -1;
+ }
+
+ i_stream_seek(ctx->input, part->physical_pos);
+ if (!include_hdr) {
+ /* body only */
+ } else if (IS_CONVERTED_CTE(cte)) {
+ /* write header with modified content-type */
+ if (ctx->copy_start_offset != 0)
+ binary_copy_to(ctx, part->physical_pos);
+ block = array_append_space(&ctx->blocks);
+ block->physical_pos = part->physical_pos;
+ block->converted = TRUE;
+ block->converted_hdr = TRUE;
+
+ linput = i_stream_create_limit(ctx->input, UOFF_T_MAX);
+ block->input = i_stream_create_header_filter(linput,
+ HEADER_FILTER_EXCLUDE | HEADER_FILTER_HIDE_BODY,
+ filter_headers, N_ELEMENTS(filter_headers),
+ binary_cte_filter_callback, NULL);
+ i_stream_unref(&linput);
+ } else {
+ /* copy everything as-is until the end of this header */
+ binary_copy_to(ctx, part->physical_pos +
+ part->header_size.physical_size);
+ }
+ ctx->copy_start_offset = part->physical_pos +
+ part->header_size.physical_size;
+ part_end_offset = part->physical_pos +
+ part->header_size.physical_size +
+ part->body_size.physical_size;
+
+ if (part->children != NULL) {
+ /* multipart */
+ for (child = part->children; child != NULL; child = child->next) {
+ if (add_binary_part(ctx, child, TRUE) < 0)
+ return -1;
+ }
+ binary_copy_to(ctx, part_end_offset);
+ ctx->copy_start_offset = part_end_offset;
+ return 0;
+ }
+ if (part->body_size.physical_size == 0) {
+ /* no body */
+ ctx->copy_start_offset = part_end_offset;
+ return 0;
+ }
+
+ /* single part - write decoded data */
+ block = array_append_space(&ctx->blocks);
+ block->physical_pos = part->physical_pos;
+
+ i_stream_seek(ctx->input, part->physical_pos +
+ part->header_size.physical_size);
+ linput = i_stream_create_limit(ctx->input, part->body_size.physical_size);
+ switch (cte) {
+ case MESSAGE_CTE_UNKNOWN:
+ i_unreached();
+ case MESSAGE_CTE_78BIT:
+ case MESSAGE_CTE_BINARY:
+ /* no conversion necessary */
+ if ((part->flags & MESSAGE_PART_FLAG_HAS_NULS) != 0)
+ ctx->has_nuls = TRUE;
+ block->input = i_stream_create_crlf(linput);
+ break;
+ case MESSAGE_CTE_QP:
+ block->input = i_stream_create_qp_decoder(linput);
+ ctx->converted = block->converted = TRUE;
+ break;
+ case MESSAGE_CTE_BASE64:
+ block->input = i_stream_create_base64_decoder(linput);
+ ctx->converted = block->converted = TRUE;
+ break;
+ }
+ i_stream_unref(&linput);
+
+ ctx->copy_start_offset = part_end_offset;
+ return 0;
+}
+
+static int fd_callback(const char **path_r, void *context)
+{
+ struct mail *_mail = context;
+ string_t *path;
+ int fd;
+
+ path = t_str_new(256);
+ mail_user_set_get_temp_prefix(path, _mail->box->storage->user->set);
+ fd = safe_mkstemp_hostpid(path, 0600, (uid_t)-1, (gid_t)-1);
+ if (fd == -1) {
+ i_error("Temp file creation to %s failed: %m", str_c(path));
+ return -1;
+ }
+
+ /* we just want the fd, unlink it */
+ if (i_unlink(str_c(path)) < 0) {
+ /* shouldn't happen.. */
+ i_close_fd(&fd);
+ return -1;
+ }
+ *path_r = str_c(path);
+ return fd;
+}
+
+static void binary_streams_free(struct binary_ctx *ctx)
+{
+ struct binary_block *block;
+
+ array_foreach_modifiable(&ctx->blocks, block)
+ i_stream_unref(&block->input);
+}
+
+static void
+binary_parts_update(struct binary_ctx *ctx, const struct message_part *part,
+ struct message_binary_part **msg_bin_parts)
+{
+ struct index_mail *mail = INDEX_MAIL(ctx->mail);
+ struct binary_block *blocks;
+ struct message_binary_part bin_part;
+ unsigned int i, count;
+ uoff_t size;
+ bool found;
+
+ blocks = array_get_modifiable(&ctx->blocks, &count);
+ for (; part != NULL; part = part->next) {
+ binary_parts_update(ctx, part->children, msg_bin_parts);
+
+ i_zero(&bin_part);
+ /* default to unchanged header */
+ bin_part.binary_hdr_size = part->header_size.virtual_size;
+ bin_part.physical_pos = part->physical_pos;
+ found = FALSE;
+ for (i = 0; i < count; i++) {
+ if (blocks[i].physical_pos != part->physical_pos ||
+ !blocks[i].converted)
+ continue;
+
+ size = blocks[i].input->v_offset;
+ if (blocks[i].converted_hdr)
+ bin_part.binary_hdr_size = size;
+ else
+ bin_part.binary_body_size = size;
+ found = TRUE;
+ }
+ if (found) {
+ bin_part.next = *msg_bin_parts;
+ *msg_bin_parts = p_new(mail->mail.data_pool,
+ struct message_binary_part, 1);
+ **msg_bin_parts = bin_part;
+ }
+ }
+}
+
+static void binary_parts_cache(struct binary_ctx *ctx)
+{
+ struct index_mail *mail = INDEX_MAIL(ctx->mail);
+ buffer_t *buf;
+
+ buf = t_buffer_create(128);
+ message_binary_part_serialize(mail->data.bin_parts, buf);
+ index_mail_cache_add(mail, MAIL_CACHE_BINARY_PARTS,
+ buf->data, buf->used);
+}
+
+static struct istream **blocks_get_streams(struct binary_ctx *ctx)
+{
+ struct istream **streams;
+ const struct binary_block *blocks;
+ unsigned int i, count;
+
+ blocks = array_get(&ctx->blocks, &count);
+ streams = t_new(struct istream *, count+1);
+ for (i = 0; i < count; i++) {
+ streams[i] = blocks[i].input;
+ i_assert(streams[i]->v_offset == 0);
+ }
+ return streams;
+}
+
+static int
+blocks_count_lines(struct binary_ctx *ctx, struct istream *full_input)
+{
+ struct binary_block *blocks, *cur_block;
+ unsigned int block_idx, block_count;
+ uoff_t cur_block_offset, cur_block_size;
+ const unsigned char *data, *p;
+ size_t size, skip;
+ ssize_t ret;
+
+ blocks = array_get_modifiable(&ctx->blocks, &block_count);
+ cur_block = blocks;
+ cur_block_offset = 0;
+ block_idx = 0;
+
+ /* count the number of lines each block contains */
+ while ((ret = i_stream_read_more(full_input, &data, &size)) > 0) {
+ i_assert(cur_block_offset <= cur_block->input->v_offset);
+ if (cur_block->input->eof) {
+ /* this is the last input for this block. the input
+ may also contain the next block's data, which we
+ don't want to include in this block's line count. */
+ cur_block_size = cur_block->input->v_offset +
+ i_stream_get_data_size(cur_block->input);
+ i_assert(size >= cur_block_size - cur_block_offset);
+ size = cur_block_size - cur_block_offset;
+ }
+ skip = size;
+ while ((p = memchr(data, '\n', size)) != NULL) {
+ size -= p-data+1;
+ data = p+1;
+ cur_block->body_lines_count++;
+ }
+ i_stream_skip(full_input, skip);
+ cur_block_offset += skip;
+
+ if (i_stream_read_eof(cur_block->input)) {
+ /* go to the next block */
+ if (block_idx+1 == block_count) {
+ i_assert(i_stream_read_eof(full_input));
+ ret = -1;
+ break;
+ }
+ block_idx++;
+ cur_block++;
+ cur_block_offset = 0;
+ }
+ }
+ i_assert(ret == -1);
+ if (full_input->stream_errno != 0)
+ return -1;
+ i_assert(block_count == 0 || !i_stream_have_bytes_left(cur_block->input));
+ i_assert(block_count == 0 || block_idx+1 == block_count);
+ return 0;
+}
+
+static int
+index_mail_read_binary_to_cache(struct mail *_mail,
+ const struct message_part *part,
+ bool include_hdr, const char *reason,
+ bool *binary_r, bool *converted_r)
+{
+ struct index_mail *mail = INDEX_MAIL(_mail);
+ struct mail_binary_cache *cache = &_mail->box->storage->binary_cache;
+ struct binary_ctx ctx;
+ struct istream *is;
+
+ i_zero(&ctx);
+ ctx.mail = _mail;
+ t_array_init(&ctx.blocks, 8);
+
+ mail_storage_free_binary_cache(_mail->box->storage);
+ if (mail_get_stream_because(_mail, NULL, NULL, reason, &ctx.input) < 0)
+ return -1;
+
+ if (add_binary_part(&ctx, part, include_hdr) < 0) {
+ binary_streams_free(&ctx);
+ return -1;
+ }
+
+ if (array_count(&ctx.blocks) != 0) {
+ is = i_streams_merge(blocks_get_streams(&ctx),
+ IO_BLOCK_SIZE,
+ fd_callback, _mail);
+ } else {
+ is = i_stream_create_from_data("", 0);
+ }
+ i_stream_set_name(is, t_strdup_printf(
+ "<binary stream of mailbox %s UID %u>",
+ _mail->box->vname, _mail->uid));
+ if (blocks_count_lines(&ctx, is) < 0) {
+ if (is->stream_errno == EINVAL) {
+ /* MIME part contains invalid data */
+ mail_storage_set_error(_mail->box->storage,
+ MAIL_ERROR_INVALIDDATA,
+ "Invalid data in MIME part");
+ } else {
+ mail_set_critical(_mail, "read(%s) failed: %s",
+ i_stream_get_name(is),
+ i_stream_get_error(is));
+ }
+ i_stream_unref(&is);
+ binary_streams_free(&ctx);
+ return -1;
+ }
+
+ if (_mail->uid > 0) {
+ cache->to = timeout_add(MAIL_BINARY_CACHE_EXPIRE_MSECS,
+ mail_storage_free_binary_cache,
+ _mail->box->storage);
+ cache->box = _mail->box;
+ cache->uid = _mail->uid;
+ cache->orig_physical_pos = part->physical_pos;
+ cache->include_hdr = include_hdr;
+ cache->input = is;
+ }
+
+ i_assert(!i_stream_have_bytes_left(is));
+ cache->size = is->v_offset;
+ i_stream_seek(is, 0);
+
+ if (part->parent == NULL && include_hdr &&
+ mail->data.bin_parts == NULL) {
+ binary_parts_update(&ctx, part, &mail->data.bin_parts);
+ if (_mail->uid > 0)
+ binary_parts_cache(&ctx);
+ }
+ binary_streams_free(&ctx);
+
+ *binary_r = ctx.converted ? TRUE : ctx.has_nuls;
+ *converted_r = ctx.converted;
+ return 0;
+}
+
+static bool get_cached_binary_parts(struct index_mail *mail)
+{
+ const unsigned int field_idx =
+ mail->ibox->cache_fields[MAIL_CACHE_BINARY_PARTS].idx;
+ buffer_t *part_buf;
+ int ret;
+
+ if (mail->data.bin_parts != NULL)
+ return TRUE;
+
+ part_buf = t_buffer_create(128);
+ ret = index_mail_cache_lookup_field(mail, part_buf, field_idx);
+ if (ret <= 0)
+ return FALSE;
+
+ if (message_binary_part_deserialize(mail->mail.data_pool,
+ part_buf->data, part_buf->used,
+ &mail->data.bin_parts) < 0) {
+ mail_set_mail_cache_corrupted(&mail->mail.mail,
+ "Corrupted cached binary.parts data");
+ return FALSE;
+ }
+ return TRUE;
+}
+
+static struct message_part *
+msg_part_find(struct message_part *parts, uoff_t physical_pos)
+{
+ struct message_part *part, *child;
+
+ for (part = parts; part != NULL; part = part->next) {
+ if (part->physical_pos == physical_pos)
+ return part;
+ child = msg_part_find(part->children, physical_pos);
+ if (child != NULL)
+ return child;
+ }
+ return NULL;
+}
+
+static int
+index_mail_get_binary_size(struct mail *_mail,
+ const struct message_part *part, bool include_hdr,
+ uoff_t *size_r, unsigned int *lines_r)
+{
+ struct index_mail *mail = INDEX_MAIL(_mail);
+ struct message_part *all_parts, *msg_part;
+ const struct message_binary_part *bin_part, *root_bin_part;
+ uoff_t size, end_offset;
+ unsigned int lines;
+ bool binary, converted;
+
+ if (mail_get_parts(_mail, &all_parts) < 0)
+ return -1;
+
+ /* first lookup from cache */
+ if (!get_cached_binary_parts(mail)) {
+ /* not found. parse the whole message */
+ if (index_mail_read_binary_to_cache(_mail, all_parts, TRUE,
+ "binary.size", &binary, &converted) < 0)
+ return -1;
+ }
+
+ size = part->header_size.virtual_size +
+ part->body_size.virtual_size;
+ /* note that we assume here that binary translation doesn't change the
+ headers' line counts. this isn't true if the original message
+ contained duplicate Content-Transfer-Encoding lines, but since
+ that's invalid anyway we don't bother trying to handle it. */
+ lines = part->header_size.lines + part->body_size.lines;
+ end_offset = part->physical_pos + size;
+
+ bin_part = mail->data.bin_parts; root_bin_part = NULL;
+ for (; bin_part != NULL; bin_part = bin_part->next) {
+ msg_part = msg_part_find(all_parts, bin_part->physical_pos);
+ if (msg_part == NULL) {
+ /* either binary.parts or mime.parts is broken */
+ mail_set_cache_corrupted(_mail, MAIL_FETCH_MESSAGE_PARTS, t_strdup_printf(
+ "BINARY part at offset %"PRIuUOFF_T" not found from MIME parts",
+ bin_part->physical_pos));
+ return -1;
+ }
+ if (msg_part->physical_pos >= part->physical_pos &&
+ msg_part->physical_pos < end_offset) {
+ if (msg_part->physical_pos == part->physical_pos)
+ root_bin_part = bin_part;
+ size -= msg_part->header_size.virtual_size +
+ msg_part->body_size.virtual_size;
+ size += bin_part->binary_hdr_size +
+ bin_part->binary_body_size;
+ lines -= msg_part->body_size.lines;
+ lines += bin_part->binary_body_lines_count;
+ }
+ }
+ if (!include_hdr) {
+ if (root_bin_part != NULL)
+ size -= root_bin_part->binary_hdr_size;
+ else
+ size -= part->header_size.virtual_size;
+ lines -= part->header_size.lines;
+ }
+ *size_r = size;
+ *lines_r = lines;
+ return 0;
+}
+
+int index_mail_get_binary_stream(struct mail *_mail,
+ const struct message_part *part,
+ bool include_hdr, uoff_t *size_r,
+ unsigned int *lines_r, bool *binary_r,
+ struct istream **stream_r)
+{
+ struct index_mail *mail = INDEX_MAIL(_mail);
+ struct mail_binary_cache *cache = &_mail->box->storage->binary_cache;
+ struct istream *input;
+ bool binary, converted;
+
+ if (stream_r == NULL) {
+ return index_mail_get_binary_size(_mail, part, include_hdr,
+ size_r, lines_r);
+ }
+ /* current implementation doesn't bother implementing this,
+ because it's not needed by anything. */
+ i_assert(lines_r == NULL);
+
+ /* FIXME: always put the header to temp file. skip it when needed. */
+ if (cache->box == _mail->box && cache->uid == _mail->uid &&
+ cache->orig_physical_pos == part->physical_pos &&
+ cache->include_hdr == include_hdr) {
+ /* we have this cached already */
+ i_stream_seek(cache->input, 0);
+ timeout_reset(cache->to);
+ binary = TRUE;
+ converted = TRUE;
+ } else {
+ if (index_mail_read_binary_to_cache(_mail, part, include_hdr,
+ "binary stream", &binary, &converted) < 0)
+ return -1;
+ mail->data.cache_fetch_fields |= MAIL_FETCH_STREAM_BINARY;
+ }
+ *size_r = cache->size;
+ *binary_r = binary;
+ if (!converted) {
+ /* don't keep this cached. it's exactly the same as
+ the original stream */
+ i_assert(mail->data.stream != NULL);
+ i_stream_seek(mail->data.stream, part->physical_pos +
+ (include_hdr ? 0 :
+ part->header_size.physical_size));
+ input = i_stream_create_crlf(mail->data.stream);
+ *stream_r = i_stream_create_limit(input, *size_r);
+ i_stream_unref(&input);
+ mail_storage_free_binary_cache(_mail->box->storage);
+ } else {
+ *stream_r = cache->input;
+ i_stream_ref(cache->input);
+ }
+ return 0;
+}