1 files changed, 990 insertions, 0 deletions
diff --git a/src/lib-storage/index/index-mail-headers.c b/src/lib-storage/index/index-mail-headers.c
new file mode 100644
index 0000000..ce23e9d
--- /dev/null
+++ b/src/lib-storage/index/index-mail-headers.c
@@ -0,0 +1,990 @@
+/* Copyright (c) 2003-2018 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "istream.h"
+#include "array.h"
+#include "buffer.h"
+#include "str.h"
+#include "message-date.h"
+#include "message-part-data.h"
+#include "message-parser.h"
+#include "message-header-decode.h"
+#include "istream-tee.h"
+#include "istream-header-filter.h"
+#include "imap-envelope.h"
+#include "imap-bodystructure.h"
+#include "index-storage.h"
+#include "index-mail.h"
+
+static const struct message_parser_settings msg_parser_set = {
+	.hdr_flags = MESSAGE_HEADER_PARSER_FLAG_SKIP_INITIAL_LWSP |
+		MESSAGE_HEADER_PARSER_FLAG_DROP_CR,
+	.flags = MESSAGE_PARSER_FLAG_SKIP_BODY_BLOCK,
+};
+
+static void index_mail_filter_stream_destroy(struct index_mail *mail);
+
+static int header_line_cmp(const struct index_mail_line *l1,
+			   const struct index_mail_line *l2)
+{
+	int diff;
+
+	diff = (int)l1->field_idx - (int)l2->field_idx;
+	return diff != 0 ? diff :
+		(int)l1->line_num - (int)l2->line_num;
+}
+
+void index_mail_parse_header_deinit(struct index_mail *mail)
+{
+	mail->data.header_parser_initialized = FALSE;
+}
+
+static void index_mail_parse_header_finish(struct index_mail *mail)
+{
+	struct mail *_mail = &mail->mail.mail;
+	const struct index_mail_line *lines;
+	const unsigned char *header;
+	const uint8_t *match;
+	buffer_t *buf;
+	unsigned int i, j, count, match_idx, match_count;
+	bool noncontiguous;
+
+	/* sort it first so fields are grouped together and ordered by
+	   line number */
+	array_sort(&mail->header_lines, header_line_cmp);
+
+	lines = array_get(&mail->header_lines, &count);
+	match = array_get(&mail->header_match, &match_count);
+	header = mail->header_data->data;
+	buf = t_buffer_create(256);
+
+	/* go through all the header lines we found */
+	for (i = match_idx = 0; i < count; i = j) {
+		/* matches and header lines are both sorted, all matches
+		   until lines[i] weren't found */
+		while (match_idx < lines[i].field_idx &&
+		       match_idx < match_count) {
+			if (HEADER_MATCH_USABLE(mail, match[match_idx]) &&
+			    mail_cache_field_can_add(_mail->transaction->cache_trans,
+						     _mail->seq, match_idx)) {
+				/* this header doesn't exist. remember that. */
+				i_assert((match[match_idx] &
+					  HEADER_MATCH_FLAG_FOUND) == 0);
+				index_mail_cache_add_idx(mail, match_idx,
+							 "", 0);
+			}
+			match_idx++;
+		}
+
+		if (match_idx < match_count) {
+			/* save index to first header line */
+			i_assert(match_idx == lines[i].field_idx);
+			j = i + 1;
+			array_idx_set(&mail->header_match_lines, match_idx, &j);
+			match_idx++;
+		}
+
+		if (!mail_cache_field_can_add(_mail->transaction->cache_trans,
+					      _mail->seq, lines[i].field_idx)) {
+			/* header is already cached. skip over all the
+			   header lines. */
+			for (j = i+1; j < count; j++) {
+				if (lines[j].field_idx != lines[i].field_idx)
+					break;
+			}
+			continue;
+		}
+
+		/* buffer contains: { uint32_t line_num[], 0, header texts }
+		   noncontiguous is just a small optimization.. */
+		buffer_set_used_size(buf, 0);
+		buffer_append(buf, &lines[i].line_num,
+			      sizeof(lines[i].line_num));
+
+		noncontiguous = FALSE;
+		for (j = i+1; j < count; j++) {
+			if (lines[j].field_idx != lines[i].field_idx)
+				break;
+
+			if (lines[j].start_pos != lines[j-1].end_pos)
+				noncontiguous = TRUE;
+			buffer_append(buf, &lines[j].line_num,
+				      sizeof(lines[j].line_num));
+		}
+		buffer_append_zero(buf, sizeof(uint32_t));
+
+		if (noncontiguous) {
+			for (; i < j; i++) {
+				buffer_append(buf, header + lines[i].start_pos,
+					      lines[i].end_pos -
+					      lines[i].start_pos);
+			}
+			i--;
+		} else {
+			buffer_append(buf, header + lines[i].start_pos,
+				      lines[j-1].end_pos - lines[i].start_pos);
+		}
+
+		index_mail_cache_add_idx(mail, lines[i].field_idx,
+					 buf->data, buf->used);
+	}
+
+	for (; match_idx < match_count; match_idx++) {
+		if (HEADER_MATCH_USABLE(mail, match[match_idx]) &&
+		    mail_cache_field_can_add(_mail->transaction->cache_trans,
+					     _mail->seq, match_idx)) {
+			/* this header doesn't exist. remember that. */
+			i_assert((match[match_idx] &
+				  HEADER_MATCH_FLAG_FOUND) == 0);
+			index_mail_cache_add_idx(mail, match_idx, "", 0);
+		}
+	}
+
+	mail->data.dont_cache_field_idx = UINT_MAX;
+	index_mail_parse_header_deinit(mail);
+}
+
+static unsigned int
+get_header_field_idx(struct mailbox *box, const char *field)
+{
+	struct mail_cache_field header_field;
+
+	i_zero(&header_field);
+	header_field.type = MAIL_CACHE_FIELD_HEADER;
+	/* Always register with NO decision. The field should be added soon
+	   with mail_cache_add(), which changes the decision to TEMP. Most
+	   importantly doing it this way emits mail_cache_decision event. */
+	header_field.decision = MAIL_CACHE_DECISION_NO;
+	T_BEGIN {
+		header_field.name = t_strconcat("hdr.", field, NULL);
+		mail_cache_register_fields(box->cache, &header_field, 1);
+	} T_END;
+	return header_field.idx;
+}
+
+bool index_mail_want_parse_headers(struct index_mail *mail)
+{
+	if (mail->data.wanted_headers != NULL ||
+	    mail->data.save_bodystructure_header)
+		return TRUE;
+
+	if ((mail->data.cache_fetch_fields & MAIL_FETCH_DATE) != 0 &&
+	    !mail->data.sent_date_parsed)
+		return TRUE;
+	return FALSE;
+}
+
+static void index_mail_parse_header_register_all_wanted(struct index_mail *mail)
+{
+	struct mail *_mail = &mail->mail.mail;
+	const struct mail_cache_field *all_cache_fields;
+	unsigned int i, count;
+
+	all_cache_fields =
+		mail_cache_register_get_list(_mail->box->cache,
+					     pool_datastack_create(), &count);
+	for (i = 0; i < count; i++) {
+		if (strncasecmp(all_cache_fields[i].name, "hdr.", 4) != 0)
+			continue;
+		if (!mail_cache_field_want_add(_mail->transaction->cache_trans,
+					       _mail->seq, i))
+			continue;
+
+		array_idx_set(&mail->header_match, all_cache_fields[i].idx,
+			      &mail->header_match_value);
+	}
+}
+
+void index_mail_parse_header_init(struct index_mail *mail,
+				  struct mailbox_header_lookup_ctx *headers)
+{
+	struct index_mail_data *data = &mail->data;
+	const uint8_t *match;
+	unsigned int i, field_idx, match_count;
+
+	index_mail_filter_stream_destroy(mail);
+	i_assert(!mail->data.header_parser_initialized);
+
+	mail->header_seq = mail->mail.mail.seq;
+	if (mail->header_data == NULL) {
+		mail->header_data = buffer_create_dynamic(default_pool, 4096);
+		i_array_init(&mail->header_lines, 32);
+		i_array_init(&mail->header_match, 32);
+		i_array_init(&mail->header_match_lines, 32);
+		mail->header_match_value = HEADER_MATCH_SKIP_COUNT;
+	} else {
+		buffer_set_used_size(mail->header_data, 0);
+		array_clear(&mail->header_lines);
+		array_clear(&mail->header_match_lines);
+
+		i_assert((mail->header_match_value &
+			  (HEADER_MATCH_SKIP_COUNT-1)) == 0);
+		if (mail->header_match_value + HEADER_MATCH_SKIP_COUNT <= UINT8_MAX)
+			mail->header_match_value += HEADER_MATCH_SKIP_COUNT;
+		else {
+			/* wrapped, we'll have to clear the buffer */
+			array_clear(&mail->header_match);
+			mail->header_match_value = HEADER_MATCH_SKIP_COUNT;
+		}
+	}
+
+	if (headers != NULL) {
+		for (i = 0; i < headers->count; i++) {
+			array_idx_set(&mail->header_match, headers->idx[i],
+				      &mail->header_match_value);
+		}
+	}
+
+	if (data->wanted_headers != NULL && data->wanted_headers != headers) {
+		headers = data->wanted_headers;
+		for (i = 0; i < headers->count; i++) {
+			array_idx_set(&mail->header_match, headers->idx[i],
+				      &mail->header_match_value);
+		}
+	}
+
+	/* register also all the other headers that exist in cache file */
+	T_BEGIN {
+		index_mail_parse_header_register_all_wanted(mail);
+	} T_END;
+
+	/* if we want sent date, it doesn't mean that we also want to cache
+	   Date: header. if we have Date field's index set at this point we
+	   know that we want it. otherwise add it and remember that we don't
+	   want it cached. */
+	field_idx = get_header_field_idx(mail->mail.mail.box, "Date");
+	match = array_get(&mail->header_match, &match_count);
+	if (field_idx < match_count &&
+	    match[field_idx] == mail->header_match_value) {
+		/* cache Date: header */
+	} else if ((data->cache_fetch_fields & MAIL_FETCH_DATE) != 0 ||
+		   data->save_sent_date) {
+		/* parse Date: header, but don't cache it. */
+		data->dont_cache_field_idx = field_idx;
+		array_idx_set(&mail->header_match, field_idx,
+			      &mail->header_match_value);
+	}
+	mail->data.header_parser_initialized = TRUE;
+	mail->data.parse_line_num = 0;
+	i_zero(&mail->data.parse_line);
+}
+
+static void index_mail_parse_finish_imap_envelope(struct index_mail *mail)
+{
+	struct mail *_mail = &mail->mail.mail;
+	const unsigned int cache_field_envelope =
+		mail->ibox->cache_fields[MAIL_CACHE_IMAP_ENVELOPE].idx;
+	string_t *str;
+
+	str = str_new(mail->mail.data_pool, 256);
+	imap_envelope_write(mail->data.envelope_data, str);
+	mail->data.envelope = str_c(str);
+	mail->data.save_envelope = FALSE;
+
+	if (mail_cache_field_can_add(_mail->transaction->cache_trans,
+				     _mail->seq, cache_field_envelope)) {
+		index_mail_cache_add_idx(mail, cache_field_envelope,
+					 str_data(str), str_len(str));
+	}
+}
+
+void index_mail_parse_header(struct message_part *part,
+			     struct message_header_line *hdr,
+			     struct index_mail *mail)
+{
+	struct mail *_mail = &mail->mail.mail;
+	struct index_mail_data *data = &mail->data;
+	unsigned int field_idx, count;
+	uint8_t *match;
+
+	i_assert(data->header_parser_initialized);
+
+        data->parse_line_num++;
+
+	if (data->save_bodystructure_header &&
+	    !data->parsed_bodystructure_header) {
+		i_assert(part != NULL);
+		message_part_data_parse_from_header(mail->mail.data_pool, part, hdr);
+	}
+
+	if (data->save_envelope) {
+		message_part_envelope_parse_from_header(mail->mail.data_pool,
+					   &data->envelope_data, hdr);
+
+		if (hdr == NULL)
+                        index_mail_parse_finish_imap_envelope(mail);
+	}
+
+	if (hdr == NULL) {
+		/* end of headers */
+		if (mail->data.save_sent_date)
+			mail->data.sent_date_parsed = TRUE;
+		T_BEGIN {
+			index_mail_parse_header_finish(mail);
+		} T_END;
+		if (data->save_bodystructure_header) {
+			i_assert(data->parser_ctx != NULL);
+			data->parsed_bodystructure_header = TRUE;
+		}
+		return;
+	}
+
+	if (!hdr->continued) {
+		T_BEGIN {
+			const char *cache_field_name =
+				t_strconcat("hdr.", hdr->name, NULL);
+			data->parse_line.field_idx =
+				mail_cache_register_lookup(_mail->box->cache,
+							   cache_field_name);
+		} T_END;
+	}
+	field_idx = data->parse_line.field_idx;
+	match = array_get_modifiable(&mail->header_match, &count);
+	if (field_idx >= count ||
+	    !HEADER_MATCH_USABLE(mail, match[field_idx])) {
+		/* we don't want this header. */
+		return;
+	}
+
+	if (!hdr->continued) {
+		/* beginning of a line. add the header name. */
+		data->parse_line.start_pos = str_len(mail->header_data);
+		data->parse_line.line_num = data->parse_line_num;
+		str_append(mail->header_data, hdr->name);
+		str_append_data(mail->header_data, hdr->middle, hdr->middle_len);
+
+		/* remember that we saw this header so we don't add it to
+		   cache as nonexistent. */
+		match[field_idx] |= HEADER_MATCH_FLAG_FOUND;
+	}
+	str_append_data(mail->header_data, hdr->value, hdr->value_len);
+	if (!hdr->no_newline)
+		str_append(mail->header_data, "\n");
+	if (!hdr->continues) {
+		data->parse_line.end_pos = str_len(mail->header_data);
+		array_push_back(&mail->header_lines, &data->parse_line);
+	}
+}
+
+static void
+index_mail_parse_part_header_cb(struct message_part *part,
+				struct message_header_line *hdr,
+				struct index_mail *mail)
+{
+	index_mail_parse_header(part, hdr, mail);
+}
+
+static void
+index_mail_parse_header_cb(struct message_header_line *hdr,
+			   struct index_mail *mail)
+{
+	index_mail_parse_header(mail->data.parts, hdr, mail);
+}
+
+struct istream *
+index_mail_cache_parse_init(struct mail *_mail, struct istream *input)
+{
+	struct index_mail *mail = INDEX_MAIL(_mail);
+	struct istream *input2;
+
+	i_assert(mail->data.tee_stream == NULL);
+	i_assert(mail->data.parser_ctx == NULL);
+
+	/* we're doing everything for now, figure out later if we want to
+	   save them. */
+	mail->data.save_sent_date = TRUE;
+	mail->data.save_bodystructure_header = TRUE;
+	mail->data.save_bodystructure_body = TRUE;
+	/* Don't unnecessarily waste time generating a snippet, since it's
+	   not as cheap as the others to generate. */
+	if (index_mail_want_cache(mail, MAIL_CACHE_BODY_SNIPPET))
+		mail->data.save_body_snippet = TRUE;
+
+	mail->data.tee_stream = tee_i_stream_create(input);
+	input = tee_i_stream_create_child(mail->data.tee_stream);
+	input2 = tee_i_stream_create_child(mail->data.tee_stream);
+
+	index_mail_parse_header_init(mail, NULL);
+	mail->data.parser_input = input;
+	mail->data.parser_ctx =
+		message_parser_init(mail->mail.data_pool, input,
+				    &msg_parser_set);
+	i_stream_unref(&input);
+	return input2;
+}
+
+static void index_mail_init_parser(struct index_mail *mail)
+{
+	struct index_mail_data *data = &mail->data;
+	struct message_part *parts;
+	const char *error;
+
+	if (data->parser_ctx != NULL) {
+		data->parser_input = NULL;
+		if (message_parser_deinit_from_parts(&data->parser_ctx, &parts, &error) < 0) {
+			index_mail_set_message_parts_corrupted(&mail->mail.mail, error);
+			index_mail_parts_reset(mail);
+		}
+		if (data->parts == NULL || data->parts != parts) {
+			/* The previous parsing didn't finish, so we're
+			   re-parsing the header. The new parts don't have data
+			   filled anymore. */
+			data->parsed_bodystructure_header = FALSE;
+		}
+	}
+
+	/* make sure parsing starts from the beginning of the stream */
+	i_stream_seek(mail->data.stream, 0);
+	if (data->parts == NULL) {
+		data->parser_input = data->stream;
+		data->parser_ctx = message_parser_init(mail->mail.data_pool,
+						       data->stream,
+						       &msg_parser_set);
+	} else {
+		data->parser_ctx =
+			message_parser_init_from_parts(data->parts,
+						       data->stream,
+						       &msg_parser_set);
+	}
+}
+
+int index_mail_parse_headers_internal(struct index_mail *mail,
+				      struct mailbox_header_lookup_ctx *headers)
+{
+	struct index_mail_data *data = &mail->data;
+
+	i_assert(data->stream != NULL);
+
+	index_mail_parse_header_init(mail, headers);
+
+	if (data->parts == NULL || data->save_bodystructure_header ||
+	    (data->access_part & PARSE_BODY) != 0) {
+		/* initialize bodystructure parsing in case we read the whole
+		   message. */
+		index_mail_init_parser(mail);
+		message_parser_parse_header(data->parser_ctx, &data->hdr_size,
+					    index_mail_parse_part_header_cb,
+					    mail);
+	} else {
+		/* just read the header */
+		i_assert(!data->save_bodystructure_body ||
+			 data->parser_ctx != NULL);
+		message_parse_header(data->stream, &data->hdr_size,
+				     msg_parser_set.hdr_flags,
+				     index_mail_parse_header_cb, mail);
+	}
+	if (index_mail_stream_check_failure(mail) < 0) {
+		index_mail_parse_header_deinit(mail);
+		return -1;
+	}
+	i_assert(!mail->data.header_parser_initialized);
+	data->hdr_size_set = TRUE;
+	data->access_part &= ENUM_NEGATE(PARSE_HDR);
+	return 0;
+}
+
+int index_mail_parse_headers(struct index_mail *mail,
+			     struct mailbox_header_lookup_ctx *headers,
+			     const char *reason)
+{
+	struct index_mail_data *data = &mail->data;
+	struct istream *input;
+	uoff_t old_offset;
+
+	old_offset = data->stream == NULL ? 0 : data->stream->v_offset;
+
+	if (mail_get_hdr_stream_because(&mail->mail.mail, NULL, reason, &input) < 0)
+		return -1;
+
+	int ret = index_mail_parse_headers_internal(mail, headers);
+	i_stream_seek(data->stream, old_offset);
+	return ret;
+}
+
+static void
+imap_envelope_parse_callback(struct message_header_line *hdr,
+			     struct index_mail *mail)
+{
+	message_part_envelope_parse_from_header(mail->mail.data_pool,
+				   &mail->data.envelope_data, hdr);
+
+	if (hdr == NULL)
+		index_mail_parse_finish_imap_envelope(mail);
+}
+
+int index_mail_headers_get_envelope(struct index_mail *mail)
+{
+	const unsigned int cache_field_envelope =
+		mail->ibox->cache_fields[MAIL_CACHE_IMAP_ENVELOPE].idx;
+	struct mailbox_header_lookup_ctx *header_ctx;
+	struct istream *stream;
+	uoff_t old_offset;
+	string_t *str;
+
+	str = str_new(mail->mail.data_pool, 256);
+	if (index_mail_cache_lookup_field(mail, str,
+					  cache_field_envelope) > 0) {
+		mail->data.envelope = str_c(str);
+		return 0;
+	}
+	str_free(&str);
+
+	old_offset = mail->data.stream == NULL ? 0 :
+		mail->data.stream->v_offset;
+
+	/* Make sure header_cache_callback() isn't also parsing the ENVELOPE.
+	   Otherwise two callbacks are doing it and mixing up results. */
+	mail->data.save_envelope = FALSE;
+
+	header_ctx = mailbox_header_lookup_init(mail->mail.mail.box,
+						message_part_envelope_headers);
+	if (mail_get_header_stream(&mail->mail.mail, header_ctx, &stream) < 0) {
+		mailbox_header_lookup_unref(&header_ctx);
+		return -1;
+	}
+	mailbox_header_lookup_unref(&header_ctx);
+
+	if (mail->data.envelope == NULL) {
+		/* we got the headers from cache - parse them to get the
+		   envelope */
+		message_parse_header(stream, NULL, msg_parser_set.hdr_flags,
+				     imap_envelope_parse_callback, mail);
+		if (stream->stream_errno != 0) {
+			index_mail_stream_log_failure_for(mail, stream);
+			return -1;
+		}
+		i_assert(mail->data.envelope != NULL);
+	}
+
+	if (mail->data.stream != NULL)
+		i_stream_seek(mail->data.stream, old_offset);
+	return 0;
+}
+
+static size_t get_header_size(buffer_t *buffer, size_t pos)
+{
+	const unsigned char *data = buffer->data;
+	size_t i, size = buffer->used;
+
+	i_assert(pos <= size);
+
+	for (i = pos; i < size; i++) {
+		if (data[i] == '\n') {
+			if (i+1 == size ||
+			    (data[i+1] != ' ' && data[i+1] != '\t'))
+				return i - pos;
+		}
+	}
+	return size - pos;
+}
+
+static int index_mail_header_is_parsed(struct index_mail *mail,
+				       unsigned int field_idx)
+{
+	const uint8_t *match;
+	unsigned int count;
+
+	match = array_get(&mail->header_match, &count);
+	if (field_idx < count && HEADER_MATCH_USABLE(mail, match[field_idx]))
+		return (match[field_idx] & HEADER_MATCH_FLAG_FOUND) != 0 ? 1 : 0;
+	return -1;
+}
+
+static bool skip_header(const unsigned char **data, size_t len)
+{
+	const unsigned char *p = *data;
+	size_t i;
+
+	for (i = 0; i < len; i++) {
+		if (p[i] == ':')
+			break;
+	}
+	if (i == len)
+		return FALSE;
+
+	for (i++; i < len; i++) {
+		if (!IS_LWSP(p[i]))
+			break;
+	}
+
+	*data = p + i;
+	return TRUE;
+}
+
+static const char *const *
+index_mail_get_parsed_header(struct index_mail *mail, unsigned int field_idx)
+{
+	ARRAY(const char *) header_values;
+        const struct index_mail_line *lines;
+	const unsigned char *header, *value_start, *value_end;
+	const unsigned int *line_idx;
+	const char *value;
+	unsigned int i, lines_count, first_line_idx;
+
+	line_idx = array_idx(&mail->header_match_lines, field_idx);
+	i_assert(*line_idx != 0);
+	first_line_idx = *line_idx - 1;
+
+	p_array_init(&header_values, mail->mail.data_pool, 4);
+	header = mail->header_data->data;
+
+	lines = array_get(&mail->header_lines, &lines_count);
+	for (i = first_line_idx; i < lines_count; i++) {
+		if (lines[i].field_idx != lines[first_line_idx].field_idx)
+			break;
+
+		/* skip header: and drop ending LF */
+		value_start = header + lines[i].start_pos;
+		value_end = header + lines[i].end_pos;
+		if (skip_header(&value_start, value_end - value_start)) {
+			if (value_start != value_end && value_end[-1] == '\n')
+				value_end--;
+			value = message_header_strdup(mail->mail.data_pool,
+						      value_start,
+						      value_end - value_start);
+			array_push_back(&header_values, &value);
+		}
+	}
+
+	array_append_zero(&header_values);
+	return array_front(&header_values);
+}
+
+static int
+index_mail_get_raw_headers(struct index_mail *mail, const char *field,
+			   const char *const **value_r)
+{
+	struct mail *_mail = &mail->mail.mail;
+	const char *headers[2], *value;
+	struct mailbox_header_lookup_ctx *headers_ctx;
+	const unsigned char *data;
+	unsigned int field_idx;
+	string_t *dest;
+	size_t i, len, len2;
+	int ret;
+	ARRAY(const char *) header_values;
+
+	i_assert(field != NULL);
+
+	field_idx = get_header_field_idx(_mail->box, field);
+
+	dest = t_str_new(128);
+	if (mail_cache_lookup_headers(_mail->transaction->cache_view, dest,
+				      _mail->seq, &field_idx, 1) <= 0) {
+		/* not in cache / error - first see if it's already parsed */
+		p_free(mail->mail.data_pool, dest);
+		if (mail->data.header_parser_initialized) {
+			/* don't try to parse headers recursively. we're here
+			   because message size was wrong and istream-mail
+			   wants to log some cached headers. */
+			i_assert(mail->mail.mail.lookup_abort >= MAIL_LOOKUP_ABORT_NOT_IN_CACHE);
+			mail_set_aborted(&mail->mail.mail);
+			return -1;
+		}
+		if (mail->header_seq != mail->mail.mail.seq ||
+		    index_mail_header_is_parsed(mail, field_idx) < 0) {
+			/* parse */
+			const char *reason = index_mail_cache_reason(_mail,
+				t_strdup_printf("header %s", field));
+			headers[0] = field; headers[1] = NULL;
+			headers_ctx = mailbox_header_lookup_init(_mail->box,
+								 headers);
+			ret = index_mail_parse_headers(mail, headers_ctx, reason);
+			mailbox_header_lookup_unref(&headers_ctx);
+			if (ret < 0)
+				return -1;
+		}
+
+		if ((ret = index_mail_header_is_parsed(mail, field_idx)) <= 0) {
+			/* not found */
+			i_assert(ret != -1);
+			*value_r = p_new(mail->mail.data_pool, const char *, 1);
+			return 0;
+		}
+		*value_r = index_mail_get_parsed_header(mail, field_idx);
+		return 0;
+	}
+	_mail->transaction->stats.cache_hit_count++;
+	data = buffer_get_data(dest, &len);
+
+	if (len == 0) {
+		/* cached as nonexistent. */
+		*value_r = p_new(mail->mail.data_pool, const char *, 1);
+		return 0;
+	}
+
+	p_array_init(&header_values, mail->mail.data_pool, 4);
+
+	/* cached. skip "header name: " parts in dest. */
+	for (i = 0; i < len; i++) {
+		if (data[i] == ':') {
+			i++;
+			while (i < len && IS_LWSP(data[i])) i++;
+
+			/* @UNSAFE */
+			len2 = get_header_size(dest, i);
+			value = message_header_strdup(mail->mail.data_pool,
+						     data + i, len2);
+			i += len2 + 1;
+
+			array_push_back(&header_values, &value);
+		}
+	}
+
+	array_append_zero(&header_values);
+	*value_r = array_front(&header_values);
+	return 0;
+}
+
+static int unfold_header(pool_t pool, const char **_str)
+{
+	const char *str = *_str;
+	char *new_str;
+	unsigned int i, j;
+
+	for (i = 0; str[i] != '\0'; i++) {
+		if (str[i] == '\n')
+			break;
+	}
+	if (str[i] == '\0')
+		return 0;
+
+	/* @UNSAFE */
+	new_str = p_malloc(pool, i + strlen(str+i) + 1);
+	memcpy(new_str, str, i);
+	for (j = i; str[i] != '\0'; i++) {
+		if (str[i] == '\n') {
+			new_str[j++] = ' ';
+			i++;
+			if (str[i] == '\0')
+				break;
+
+			if (str[i] != ' ' && str[i] != '\t') {
+				/* corrupted */
+				return -1;
+			}
+		} else {
+			new_str[j++] = str[i];
+		}
+	}
+	new_str[j] = '\0';
+	*_str = new_str;
+	return 0;
+}
+
+static void str_replace_nuls(string_t *str)
+{
+	char *data = str_c_modifiable(str);
+	size_t i, len = str_len(str);
+
+	for (i = 0; i < len; i++) {
+		if (data[i] == '\0')
+			data[i] = ' ';
+	}
+}
+
+static int
+index_mail_headers_decode(struct index_mail *mail, const char *const **_list,
+			  unsigned int max_count)
+{
+	const char *const *list = *_list;
+	const char **decoded_list, *input;
+	unsigned int i, count;
+	string_t *str;
+
+	count = str_array_length(list);
+	if (count > max_count)
+		count = max_count;
+	decoded_list = p_new(mail->mail.data_pool, const char *, count + 1);
+
+	str = t_str_new(512);
+	for (i = 0; i < count; i++) {
+		str_truncate(str, 0);
+		input = list[i];
+		/* unfold all lines into a single line */
+		if (unfold_header(mail->mail.data_pool, &input) < 0)
+			return -1;
+
+		/* decode MIME encoded-words. decoding may also add new LFs. */
+		message_header_decode_utf8((const unsigned char *)input,
+					   strlen(input), str, NULL);
+		if (strcmp(str_c(str), input) != 0) {
+			if (strlen(str_c(str)) != str_len(str)) {
+				/* replace NULs with spaces */
+				str_replace_nuls(str);
+			}
+			input = p_strdup(mail->mail.data_pool, str_c(str));
+		}
+		decoded_list[i] = input;
+	}
+	*_list = decoded_list;
+	return 0;
+}
+
+int index_mail_get_headers(struct mail *_mail, const char *field,
+			   bool decode_to_utf8, const char *const **value_r)
+{
+	struct index_mail *mail = INDEX_MAIL(_mail);
+	bool retry = TRUE;
+	int ret;
+
+	for (;; retry = FALSE) {
+		if (index_mail_get_raw_headers(mail, field, value_r) < 0)
+			return -1;
+		if (**value_r == NULL)
+			return 0;
+		if (!decode_to_utf8)
+			return 1;
+
+		T_BEGIN {
+			ret = index_mail_headers_decode(mail, value_r, UINT_MAX);
+		} T_END;
+
+		if (ret < 0 && retry) {
+			mail_set_mail_cache_corrupted(_mail, "Broken header %s",
+						      field);
+		} else {
+			break;
+		}
+	}
+	if (ret < 0) {
+		i_panic("BUG: Broken header %s for mail UID %u "
+			"wasn't fixed by re-parsing the header",
+			field, _mail->uid);
+	}
+	return 1;
+}
+
+int index_mail_get_first_header(struct mail *_mail, const char *field,
+				bool decode_to_utf8, const char **value_r)
+{
+	struct index_mail *mail = INDEX_MAIL(_mail);
+	const char *const *list;
+	bool retry = TRUE;
+	int ret;
+
+	for (;; retry = FALSE) {
+		if (index_mail_get_raw_headers(mail, field, &list) < 0)
+			return -1;
+		if (!decode_to_utf8 || list[0] == NULL) {
+			ret = 0;
+			break;
+		}
+
+		T_BEGIN {
+			ret = index_mail_headers_decode(mail, &list, 1);
+		} T_END;
+
+		if (ret < 0 && retry) {
+			mail_set_mail_cache_corrupted(_mail, "Broken header %s",
+						      field);
+			/* retry by parsing the full header */
+		} else {
+			break;
+		}
+	}
+	if (ret < 0) {
+		i_panic("BUG: Broken header %s for mail UID %u "
+			"wasn't fixed by re-parsing the header",
+			field, _mail->uid);
+	}
+	*value_r = list[0];
+	return list[0] != NULL ? 1 : 0;
+}
+
+static void
+header_cache_callback(struct header_filter_istream *input ATTR_UNUSED,
+		      struct message_header_line *hdr,
+		      bool *matched ATTR_UNUSED, struct index_mail *mail)
+{
+	index_mail_parse_header(NULL, hdr, mail);
+}
+
+static void index_mail_filter_stream_destroy(struct index_mail *mail)
+{
+	if (mail->data.filter_stream == NULL)
+		return;
+
+	const unsigned char *data;
+	size_t size;
+
+	/* read through the previous filter_stream. this makes sure that the
+	   fields are added to cache, and most importantly it resets
+	   header_parser_initialized=FALSE so we don't assert on it. */
+	while (i_stream_read_more(mail->data.filter_stream, &data, &size) > 0)
+		i_stream_skip(mail->data.filter_stream, size);
+	if (mail->data.header_parser_initialized) {
+		/* istream failed while reading the header */
+		i_assert(mail->data.filter_stream->stream_errno != 0);
+		index_mail_parse_header_deinit(mail);
+	}
+	i_stream_destroy(&mail->data.filter_stream);
+}
+
+int index_mail_get_header_stream(struct mail *_mail,
+				 struct mailbox_header_lookup_ctx *headers,
+				 struct istream **stream_r)
+{
+	struct index_mail *mail = INDEX_MAIL(_mail);
+	struct istream *input;
+	string_t *dest;
+
+	index_mail_filter_stream_destroy(mail);
+
+	if (mail->data.save_bodystructure_header) {
+		/* we have to parse the header. */
+		const char *reason =
+			index_mail_cache_reason(_mail, "bodystructure");
+		mail->data.access_reason_code = "mail:header_fields";
+		if (index_mail_parse_headers(mail, headers, reason) < 0)
+			return -1;
+	}
+
+	dest = str_new(mail->mail.data_pool, 256);
+	if (mail_cache_lookup_headers(_mail->transaction->cache_view, dest,
+				      _mail->seq, headers->idx,
+				      headers->count) > 0) {
+		str_append(dest, "\n");
+		_mail->transaction->stats.cache_hit_count++;
+		mail->data.filter_stream =
+			i_stream_create_from_data(str_data(dest),
+						  str_len(dest));
+		*stream_r = mail->data.filter_stream;
+		return 0;
+	}
+	/* not in cache / error */
+	p_free(mail->mail.data_pool, dest);
+
+	unsigned int first_not_found = UINT_MAX, not_found_count = 0;
+	for (unsigned int i = 0; i < headers->count; i++) {
+		if (mail_cache_field_exists(_mail->transaction->cache_view,
+					    _mail->seq, headers->idx[i]) <= 0) {
+			if (not_found_count++ == 0)
+				first_not_found = i;
+		}
+	}
+
+	const char *reason;
+	if (not_found_count == 0)
+		reason = "BUG: all headers seem to exist in cache";
+	else {
+		i_assert(first_not_found != UINT_MAX);
+		reason = index_mail_cache_reason(_mail, t_strdup_printf(
+			"%u/%u headers not cached (first=%s)",
+			not_found_count, headers->count, headers->name[first_not_found]));
+	}
+	mail->data.access_reason_code = "mail:header_fields";
+	if (mail_get_hdr_stream_because(_mail, NULL, reason, &input) < 0)
+		return -1;
+
+	index_mail_parse_header_init(mail, headers);
+	mail->data.filter_stream =
+		i_stream_create_header_filter(mail->data.stream,
+					      HEADER_FILTER_INCLUDE |
+					      HEADER_FILTER_ADD_MISSING_EOH |
+					      HEADER_FILTER_HIDE_BODY,
+					      headers->name, headers->count,
+					      header_cache_callback, mail);
+	*stream_r = mail->data.filter_stream;
+	return 0;
+}