1 files changed, 907 insertions, 0 deletions
diff --git a/src/lib-mail/message-parser.c b/src/lib-mail/message-parser.c
new file mode 100644
index 0000000..9a9c9a3
--- /dev/null
+++ b/src/lib-mail/message-parser.c
@@ -0,0 +1,907 @@
+/* Copyright (c) 2002-2018 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "array.h"
+#include "str.h"
+#include "istream.h"
+#include "rfc822-parser.h"
+#include "rfc2231-parser.h"
+#include "message-parser-private.h"
+
+message_part_header_callback_t *null_message_part_header_callback = NULL;
+
+static int parse_next_header_init(struct message_parser_ctx *ctx,
+				  struct message_block *block_r);
+static int parse_next_body_to_boundary(struct message_parser_ctx *ctx,
+				       struct message_block *block_r);
+static int parse_next_body_to_eof(struct message_parser_ctx *ctx,
+				  struct message_block *block_r);
+
+static struct message_boundary *
+boundary_find(struct message_boundary *boundaries,
+	      const unsigned char *data, size_t len, bool trailing_dashes)
+{
+	struct message_boundary *best = NULL;
+
+	/* As MIME spec says: search from latest one to oldest one so that we
+	   don't break if the same boundary is used in nested parts. Also the
+	   full message line doesn't have to match the boundary, only the
+	   beginning. However, if there are multiple prefixes whose beginning
+	   matches, use the longest matching one. */
+	while (boundaries != NULL) {
+		if (boundaries->len <= len &&
+		    memcmp(boundaries->boundary, data, boundaries->len) == 0 &&
+		    (best == NULL || best->len < boundaries->len)) {
+			best = boundaries;
+			/* If we see "foo--", it could either mean that there
+			   is a boundary named "foo" that ends now or there's
+			   a boundary "foo--" which continues. */
+			if (best->len == len ||
+			    (best->len == len-2 && trailing_dashes)) {
+				/* This is exactly the wanted boundary. There
+				   can't be a better one. */
+				break;
+			}
+		}
+
+		boundaries = boundaries->next;
+	}
+
+	return best;
+}
+
+static void parse_body_add_block(struct message_parser_ctx *ctx,
+				 struct message_block *block)
+{
+	unsigned int missing_cr_count = 0;
+	const unsigned char *cur, *next, *data = block->data;
+
+	i_assert(block->size > 0);
+
+	block->hdr = NULL;
+
+	/* check if we have NULs */
+	if (memchr(data, '\0', block->size) != NULL)
+		ctx->part->flags |= MESSAGE_PART_FLAG_HAS_NULS;
+
+	/* count number of lines and missing CRs */
+	if (*data == '\n') {
+		ctx->part->body_size.lines++;
+		if (ctx->last_chr != '\r')
+			missing_cr_count++;
+	}
+
+	cur = data + 1;
+	while ((next = memchr(cur, '\n', block->size - (cur - data))) != NULL) {
+		ctx->part->body_size.lines++;
+		if (next[-1] != '\r')
+			missing_cr_count++;
+
+		cur = next + 1;
+	}
+	ctx->last_chr = data[block->size - 1];
+	ctx->skip += block->size;
+
+	ctx->part->body_size.physical_size += block->size;
+	ctx->part->body_size.virtual_size += block->size + missing_cr_count;
+}
+
+int message_parser_read_more(struct message_parser_ctx *ctx,
+			     struct message_block *block_r, bool *full_r)
+{
+	int ret;
+
+	if (ctx->skip > 0) {
+		i_stream_skip(ctx->input, ctx->skip);
+		ctx->skip = 0;
+	}
+
+	*full_r = FALSE;
+	ret = i_stream_read_bytes(ctx->input, &block_r->data,
+				  &block_r->size, ctx->want_count + 1);
+	if (ret <= 0) {
+		switch (ret) {
+		case 0:
+			if (!ctx->input->eof) {
+				i_assert(!ctx->input->blocking);
+				return 0;
+			}
+			break;
+		case -1:
+			i_assert(ctx->input->eof ||
+				 ctx->input->stream_errno != 0);
+			ctx->eof = TRUE;
+			if (block_r->size != 0) {
+				/* EOF, but we still have some data.
+				   return it. */
+				return 1;
+			}
+			return -1;
+		case -2:
+			*full_r = TRUE;
+			break;
+		default:
+			i_unreached();
+		}
+	}
+
+	if (!*full_r) {
+		/* reset number of wanted characters if we actually got them */
+		ctx->want_count = 1;
+	}
+	return 1;
+}
+
+static void
+message_part_append(struct message_parser_ctx *ctx)
+{
+	struct message_part *parent = ctx->part;
+	struct message_part *part;
+
+	i_assert(!ctx->preparsed);
+	i_assert(parent != NULL);
+	i_assert((parent->flags & (MESSAGE_PART_FLAG_MULTIPART |
+				   MESSAGE_PART_FLAG_MESSAGE_RFC822)) != 0);
+
+	part = p_new(ctx->part_pool, struct message_part, 1);
+	part->parent = parent;
+
+	/* set child position */
+	part->physical_pos =
+		parent->physical_pos +
+		parent->body_size.physical_size +
+		parent->header_size.physical_size;
+
+	/* add to parent's linked list */
+	*ctx->next_part = part;
+	/* update the parent's end-of-linked-list pointer */
+	struct message_part **next_part = &part->next;
+	array_push_back(&ctx->next_part_stack, &next_part);
+	/* This part is now the new parent for the next message_part_append()
+	   call. Its linked list begins with the children pointer. */
+	ctx->next_part = &part->children;
+
+	ctx->part = part;
+	ctx->nested_parts_count++;
+	ctx->total_parts_count++;
+	i_assert(ctx->nested_parts_count < ctx->max_nested_mime_parts);
+	i_assert(ctx->total_parts_count <= ctx->max_total_mime_parts);
+}
+
+static void message_part_finish(struct message_parser_ctx *ctx)
+{
+	struct message_part **const *parent_next_partp;
+
+	if (!ctx->preparsed) {
+		i_assert(ctx->nested_parts_count > 0);
+		ctx->nested_parts_count--;
+
+		parent_next_partp = array_back(&ctx->next_part_stack);
+		array_pop_back(&ctx->next_part_stack);
+		ctx->next_part = *parent_next_partp;
+	}
+
+	message_size_add(&ctx->part->parent->body_size, &ctx->part->body_size);
+	message_size_add(&ctx->part->parent->body_size, &ctx->part->header_size);
+	ctx->part->parent->children_count += 1 + ctx->part->children_count;
+	ctx->part = ctx->part->parent;
+}
+
+static void message_boundary_free(struct message_boundary *b)
+{
+	i_free(b->boundary);
+	i_free(b);
+}
+
+static void
+boundary_remove_until(struct message_parser_ctx *ctx,
+		      struct message_boundary *boundary)
+{
+	while (ctx->boundaries != boundary) {
+		struct message_boundary *cur = ctx->boundaries;
+
+		i_assert(cur != NULL);
+		ctx->boundaries = cur->next;
+		message_boundary_free(cur);
+
+	}
+	ctx->boundaries = boundary;
+}
+
+static void parse_next_body_multipart_init(struct message_parser_ctx *ctx)
+{
+	struct message_boundary *b;
+
+	b = i_new(struct message_boundary, 1);
+	b->part = ctx->part;
+	b->boundary = ctx->last_boundary;
+	ctx->last_boundary = NULL;
+	b->len = strlen(b->boundary);
+
+	b->next = ctx->boundaries;
+	ctx->boundaries = b;
+}
+
+static int parse_next_body_message_rfc822_init(struct message_parser_ctx *ctx,
+					       struct message_block *block_r)
+{
+	message_part_append(ctx);
+	return parse_next_header_init(ctx, block_r);
+}
+
+static int
+boundary_line_find(struct message_parser_ctx *ctx,
+		   const unsigned char *data, size_t size, bool full,
+		   struct message_boundary **boundary_r)
+{
+	*boundary_r = NULL;
+
+	if (size < 2) {
+		i_assert(!full);
+
+		if (ctx->input->eof)
+			return -1;
+		ctx->want_count = 2;
+		return 0;
+	}
+
+	if (data[0] != '-' || data[1] != '-') {
+		/* not a boundary, just skip this line */
+		return -1;
+	}
+
+	if (ctx->total_parts_count >= ctx->max_total_mime_parts) {
+		/* can't add any more MIME parts. just stop trying to find
+		   more boundaries. */
+		ctx->part->flags |= MESSAGE_PART_FLAG_OVERFLOW;
+		return -1;
+	}
+
+	/* need to find the end of line */
+	data += 2;
+	size -= 2;
+	const unsigned char *lf_pos = memchr(data, '\n', size);
+	if (lf_pos == NULL &&
+	    size+2 < BOUNDARY_END_MAX_LEN &&
+	    !ctx->input->eof && !full) {
+		/* no LF found */
+		ctx->want_count = BOUNDARY_END_MAX_LEN;
+		return 0;
+	}
+	size_t find_size = size;
+	bool trailing_dashes = FALSE;
+
+	if (lf_pos != NULL) {
+		find_size = lf_pos - data;
+		if (find_size > 0 && data[find_size-1] == '\r')
+			find_size--;
+		if (find_size > 2 && data[find_size-1] == '-' &&
+		    data[find_size-2] == '-')
+			trailing_dashes = TRUE;
+	} else if (find_size > BOUNDARY_END_MAX_LEN)
+		find_size = BOUNDARY_END_MAX_LEN;
+
+	*boundary_r = boundary_find(ctx->boundaries, data, find_size,
+				    trailing_dashes);
+	if (*boundary_r == NULL)
+		return -1;
+
+	(*boundary_r)->epilogue_found =
+		size >= (*boundary_r)->len + 2 &&
+		memcmp(data + (*boundary_r)->len, "--", 2) == 0;
+	return 1;
+}
+
+static int parse_next_mime_header_init(struct message_parser_ctx *ctx,
+				       struct message_block *block_r)
+{
+	message_part_append(ctx);
+	ctx->part->flags |= MESSAGE_PART_FLAG_IS_MIME;
+
+	return parse_next_header_init(ctx, block_r);
+}
+
+static int parse_next_body_skip_boundary_line(struct message_parser_ctx *ctx,
+					      struct message_block *block_r)
+{
+	const unsigned char *ptr;
+	int ret;
+	bool full;
+
+	if ((ret = message_parser_read_more(ctx, block_r, &full)) <= 0)
+		return ret;
+
+	ptr = memchr(block_r->data, '\n', block_r->size);
+	if (ptr == NULL) {
+		parse_body_add_block(ctx, block_r);
+		if (block_r->size > 0 &&
+		    (ctx->flags & MESSAGE_PARSER_FLAG_INCLUDE_BOUNDARIES) != 0)
+			return 1;
+		return 0;
+	}
+
+	/* found the LF */
+	block_r->size = (ptr - block_r->data) + 1;
+	parse_body_add_block(ctx, block_r);
+
+	if (ctx->boundaries == NULL || ctx->boundaries->part != ctx->part) {
+		/* epilogue */
+		if (ctx->boundaries != NULL)
+			ctx->parse_next_block = parse_next_body_to_boundary;
+		else
+			ctx->parse_next_block = parse_next_body_to_eof;
+	} else {
+		/* a new MIME part begins */
+		ctx->parse_next_block = parse_next_mime_header_init;
+	}
+	if (block_r->size > 0 &&
+	    (ctx->flags & MESSAGE_PARSER_FLAG_INCLUDE_BOUNDARIES) != 0)
+		return 1;
+	return ctx->parse_next_block(ctx, block_r);
+}
+
+static int parse_part_finish(struct message_parser_ctx *ctx,
+			     struct message_boundary *boundary,
+			     struct message_block *block_r, bool first_line)
+{
+	size_t line_size;
+	size_t boundary_len = boundary->len;
+	bool boundary_epilogue_found = boundary->epilogue_found;
+
+	i_assert(ctx->last_boundary == NULL);
+
+	/* get back to parent MIME part, summing the child MIME part sizes
+	   into parent's body sizes */
+	while (ctx->part != boundary->part) {
+		message_part_finish(ctx);
+		i_assert(ctx->part != NULL);
+	}
+
+	if (boundary->epilogue_found) {
+		/* this boundary isn't needed anymore */
+		boundary_remove_until(ctx, boundary->next);
+	} else {
+		/* forget about the boundaries we possibly skipped */
+		boundary_remove_until(ctx, boundary);
+	}
+
+	/* the boundary itself should already be in buffer. add that. */
+	block_r->data = i_stream_get_data(ctx->input, &block_r->size);
+	i_assert(block_r->size >= ctx->skip);
+	block_r->data += ctx->skip;
+	/* [[\r]\n]--<boundary>[--] */
+	if (first_line)
+		line_size = 0;
+	else if (block_r->data[0] == '\r') {
+		i_assert(block_r->data[1] == '\n');
+		line_size = 2;
+	} else {
+		i_assert(block_r->data[0] == '\n');
+		line_size = 1;
+	}
+	line_size += 2 + boundary_len + (boundary_epilogue_found ? 2 : 0);
+	i_assert(block_r->size >= ctx->skip + line_size);
+	block_r->size = line_size;
+	parse_body_add_block(ctx, block_r);
+
+	ctx->parse_next_block = parse_next_body_skip_boundary_line;
+
+	if ((ctx->flags & MESSAGE_PARSER_FLAG_INCLUDE_BOUNDARIES) != 0)
+		return 1;
+	return ctx->parse_next_block(ctx, block_r);
+}
+
+static int parse_next_body_to_boundary(struct message_parser_ctx *ctx,
+				       struct message_block *block_r)
+{
+	struct message_boundary *boundary = NULL;
+	const unsigned char *data, *cur, *next, *end;
+	size_t boundary_start;
+	int ret;
+	bool full;
+
+	if ((ret = message_parser_read_more(ctx, block_r, &full)) <= 0)
+		return ret;
+
+	data = block_r->data;
+	if (ctx->last_chr == '\n') {
+		/* handle boundary in first line of message. alternatively
+		   it's an empty line. */
+		ret = boundary_line_find(ctx, block_r->data,
+					 block_r->size, full, &boundary);
+		if (ret >= 0) {
+			return ret == 0 ? 0 :
+				parse_part_finish(ctx, boundary, block_r, TRUE);
+		}
+	}
+
+	i_assert(block_r->size > 0);
+	boundary_start = 0;
+
+	/* skip to beginning of the next line. the first line was
+	   handled already. */
+	cur = data; end = data + block_r->size;
+	while ((next = memchr(cur, '\n', end - cur)) != NULL) {
+		cur = next + 1;
+
+		boundary_start = next - data;
+		if (next > data && next[-1] == '\r')
+			boundary_start--;
+
+		if (boundary_start != 0) {
+			/* we can at least skip data until the first [CR]LF.
+			   input buffer can't be full anymore. */
+			full = FALSE;
+		}
+
+		ret = boundary_line_find(ctx, cur, end - cur, full, &boundary);
+		if (ret >= 0) {
+			/* found / need more data */
+			if (ret == 0 && boundary_start == 0)
+				ctx->want_count += cur - block_r->data;
+			break;
+		}
+	}
+
+	if (next != NULL) {
+		/* found / need more data */
+		i_assert(ret >= 0);
+		i_assert(!(ret == 0 && full));
+	} else if (boundary_start == 0) {
+		/* no linefeeds in this block. we can just skip it. */
+		ret = 0;
+		if (block_r->data[block_r->size-1] == '\r' && !ctx->eof) {
+			/* this may be the beginning of the \r\n--boundary */
+			block_r->size--;
+		}
+		boundary_start = block_r->size;
+	} else {
+		/* the boundary wasn't found from this data block,
+		   we'll need more data. */
+		ret = 0;
+		ctx->want_count = (block_r->size - boundary_start) + 1;
+	}
+
+	if (ret > 0 || (ret == 0 && !ctx->eof)) {
+		/* a) we found the boundary
+		   b) we need more data and haven't reached EOF yet
+		   so leave CR+LF + last line to buffer */
+		block_r->size = boundary_start;
+	}
+	if (block_r->size != 0) {
+		parse_body_add_block(ctx, block_r);
+
+		if ((ctx->part->flags & MESSAGE_PART_FLAG_MULTIPART) != 0 &&
+		    (ctx->flags & MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS) == 0)
+			return 0;
+
+		return 1;
+	}
+	return ret <= 0 ? ret :
+		parse_part_finish(ctx, boundary, block_r, FALSE);
+}
+
+static int parse_next_body_to_eof(struct message_parser_ctx *ctx,
+				  struct message_block *block_r)
+{
+	bool full;
+	int ret;
+
+	if ((ret = message_parser_read_more(ctx, block_r, &full)) <= 0)
+		return ret;
+
+	parse_body_add_block(ctx, block_r);
+
+	if ((ctx->part->flags & MESSAGE_PART_FLAG_MULTIPART) != 0 &&
+	    (ctx->flags & MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS) == 0)
+		return 0;
+
+	return 1;
+}
+
+static void parse_content_type(struct message_parser_ctx *ctx,
+			       struct message_header_line *hdr)
+{
+	struct rfc822_parser_context parser;
+	const char *const *results;
+	string_t *content_type;
+	int ret;
+
+	if (ctx->part_seen_content_type)
+		return;
+	ctx->part_seen_content_type = TRUE;
+
+	rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL);
+	rfc822_skip_lwsp(&parser);
+
+	content_type = t_str_new(64);
+	ret = rfc822_parse_content_type(&parser, content_type);
+
+	if (strcasecmp(str_c(content_type), "message/rfc822") == 0)
+		ctx->part->flags |= MESSAGE_PART_FLAG_MESSAGE_RFC822;
+	else if (strncasecmp(str_c(content_type), "text", 4) == 0 &&
+		 (str_len(content_type) == 4 ||
+		  str_data(content_type)[4] == '/'))
+		ctx->part->flags |= MESSAGE_PART_FLAG_TEXT;
+	else if (strncasecmp(str_c(content_type), "multipart/", 10) == 0) {
+		ctx->part->flags |= MESSAGE_PART_FLAG_MULTIPART;
+
+		if (strcasecmp(str_c(content_type)+10, "digest") == 0)
+			ctx->part->flags |= MESSAGE_PART_FLAG_MULTIPART_DIGEST;
+	}
+
+	if (ret < 0 ||
+	    (ctx->part->flags & MESSAGE_PART_FLAG_MULTIPART) == 0 ||
+	    ctx->last_boundary != NULL) {
+		rfc822_parser_deinit(&parser);
+		return;
+	}
+
+	rfc2231_parse(&parser, &results);
+	for (; *results != NULL; results += 2) {
+		if (strcasecmp(results[0], "boundary") == 0) {
+			/* truncate excessively long boundaries */
+			i_free(ctx->last_boundary);
+			ctx->last_boundary =
+				i_strndup(results[1], BOUNDARY_STRING_MAX_LEN);
+			break;
+		}
+	}
+	rfc822_parser_deinit(&parser);
+}
+
+static bool block_is_at_eoh(const struct message_block *block)
+{
+	if (block->size < 1)
+		return FALSE;
+	if (block->data[0] == '\n')
+		return TRUE;
+	if (block->data[0] == '\r') {
+		if (block->size < 2)
+			return FALSE;
+		if (block->data[1] == '\n')
+			return TRUE;
+	}
+	return FALSE;
+}
+
+static bool parse_too_many_nested_mime_parts(struct message_parser_ctx *ctx)
+{
+	return ctx->nested_parts_count+1 >= ctx->max_nested_mime_parts;
+}
+
+#define MUTEX_FLAGS \
+	(MESSAGE_PART_FLAG_MESSAGE_RFC822 | MESSAGE_PART_FLAG_MULTIPART)
+
+static int parse_next_header(struct message_parser_ctx *ctx,
+			     struct message_block *block_r)
+{
+	struct message_part *part = ctx->part;
+	struct message_header_line *hdr;
+	struct message_boundary *boundary;
+	bool full;
+	int ret;
+
+	if ((ret = message_parser_read_more(ctx, block_r, &full)) == 0)
+		return ret;
+
+	if (ret > 0 && block_is_at_eoh(block_r) &&
+	    ctx->last_boundary != NULL &&
+	    (part->flags & MESSAGE_PART_FLAG_IS_MIME) != 0) {
+		/* we are at the end of headers and we've determined that we're
+		   going to start a multipart. add the boundary already here
+		   at this point so we can reliably determine whether the
+		   "\n--boundary" belongs to us or to a previous boundary.
+		   this is a problem if the boundary prefixes are identical,
+		   because MIME requires only the prefix to match. */
+		if (!parse_too_many_nested_mime_parts(ctx)) {
+			parse_next_body_multipart_init(ctx);
+			ctx->multipart = TRUE;
+		} else {
+			part->flags |= MESSAGE_PART_FLAG_OVERFLOW;
+			part->flags &= ENUM_NEGATE(MESSAGE_PART_FLAG_MULTIPART);
+		}
+	}
+
+	/* before parsing the header see if we can find a --boundary from here.
+	   we're guaranteed to be at the beginning of the line here. */
+	if (ret > 0) {
+		ret = ctx->boundaries == NULL ? -1 :
+			boundary_line_find(ctx, block_r->data,
+					   block_r->size, full, &boundary);
+		if (ret > 0 && boundary->part == ctx->part) {
+			/* our own body begins with our own --boundary.
+			   we don't want to handle that yet. */
+			ret = -1;
+		}
+	}
+	if (ret < 0) {
+		/* no boundary */
+		ret = message_parse_header_next(ctx->hdr_parser_ctx, &hdr);
+		if (ret == 0 || (ret < 0 && ctx->input->stream_errno != 0)) {
+			ctx->want_count = i_stream_get_data_size(ctx->input) + 1;
+			return ret;
+		}
+	} else if (ret == 0) {
+		/* need more data */
+		return 0;
+	} else {
+		/* boundary found. stop parsing headers here. The previous
+		   [CR]LF belongs to the MIME boundary though. */
+		if (ctx->prev_hdr_newline_size > 0) {
+			i_assert(ctx->part->header_size.lines > 0);
+			/* remove the newline size from the MIME header */
+			ctx->part->header_size.lines--;
+			ctx->part->header_size.physical_size -=
+				ctx->prev_hdr_newline_size;
+			ctx->part->header_size.virtual_size -= 2;
+			/* add the newline size to the parent's body */
+			ctx->part->parent->body_size.lines++;
+			ctx->part->parent->body_size.physical_size +=
+				ctx->prev_hdr_newline_size;
+			ctx->part->parent->body_size.virtual_size += 2;
+		}
+		hdr = NULL;
+	}
+
+	if (hdr != NULL) {
+		if (hdr->eoh)
+			;
+		else if (strcasecmp(hdr->name, "Mime-Version") == 0) {
+			/* it's MIME. Content-* headers are valid */
+			part->flags |= MESSAGE_PART_FLAG_IS_MIME;
+		} else if (strcasecmp(hdr->name, "Content-Type") == 0) {
+			if ((ctx->flags &
+			     MESSAGE_PARSER_FLAG_MIME_VERSION_STRICT) == 0)
+				part->flags |= MESSAGE_PART_FLAG_IS_MIME;
+
+			if (hdr->continues)
+				hdr->use_full_value = TRUE;
+			else T_BEGIN {
+				parse_content_type(ctx, hdr);
+			} T_END;
+		}
+
+		block_r->hdr = hdr;
+		block_r->size = 0;
+		ctx->prev_hdr_newline_size = hdr->no_newline ? 0 :
+			(hdr->crlf_newline ? 2 : 1);
+		return 1;
+	}
+
+	/* end of headers */
+	if ((part->flags & MESSAGE_PART_FLAG_IS_MIME) == 0) {
+		/* It's not MIME. Reset everything we found from
+		   Content-Type. */
+		i_assert(!ctx->multipart);
+		part->flags = 0;
+	}
+	i_free(ctx->last_boundary);
+
+	if (!ctx->part_seen_content_type ||
+	    (part->flags & MESSAGE_PART_FLAG_IS_MIME) == 0) {
+		if (part->parent != NULL &&
+		    (part->parent->flags &
+		     MESSAGE_PART_FLAG_MULTIPART_DIGEST) != 0) {
+			/* when there's no content-type specified and we're
+			   below multipart/digest, assume message/rfc822
+			   content-type */
+			part->flags |= MESSAGE_PART_FLAG_MESSAGE_RFC822;
+		} else {
+			/* otherwise we default to text/plain */
+			part->flags |= MESSAGE_PART_FLAG_TEXT;
+		}
+	}
+
+	if (message_parse_header_has_nuls(ctx->hdr_parser_ctx))
+		part->flags |= MESSAGE_PART_FLAG_HAS_NULS;
+	message_parse_header_deinit(&ctx->hdr_parser_ctx);
+
+	i_assert((part->flags & MUTEX_FLAGS) != MUTEX_FLAGS);
+
+	ctx->last_chr = '\n';
+	if (ctx->multipart) {
+		i_assert(ctx->last_boundary == NULL);
+		ctx->multipart = FALSE;
+		ctx->parse_next_block = parse_next_body_to_boundary;
+	} else if ((part->flags & MESSAGE_PART_FLAG_MESSAGE_RFC822) == 0) {
+		/* Not message/rfc822 */
+		if (ctx->boundaries != NULL)
+			ctx->parse_next_block = parse_next_body_to_boundary;
+		else
+			ctx->parse_next_block = parse_next_body_to_eof;
+	} else if (!parse_too_many_nested_mime_parts(ctx) &&
+		   ctx->total_parts_count < ctx->max_total_mime_parts) {
+		/* message/rfc822 - not reached MIME part limits yet */
+		ctx->parse_next_block = parse_next_body_message_rfc822_init;
+	} else {
+		/* message/rfc822 - already reached MIME part limits */
+		part->flags |= MESSAGE_PART_FLAG_OVERFLOW;
+		part->flags &= ENUM_NEGATE(MESSAGE_PART_FLAG_MESSAGE_RFC822);
+		if (ctx->boundaries != NULL)
+			ctx->parse_next_block = parse_next_body_to_boundary;
+		else
+			ctx->parse_next_block = parse_next_body_to_eof;
+	}
+
+	ctx->want_count = 1;
+
+	/* return empty block as end of headers */
+	block_r->hdr = NULL;
+	block_r->size = 0;
+	return 1;
+}
+
+static int parse_next_header_init(struct message_parser_ctx *ctx,
+				  struct message_block *block_r)
+{
+	i_assert(ctx->hdr_parser_ctx == NULL);
+
+	ctx->hdr_parser_ctx =
+		message_parse_header_init(ctx->input, &ctx->part->header_size,
+					  ctx->hdr_flags);
+	ctx->part_seen_content_type = FALSE;
+	ctx->prev_hdr_newline_size = 0;
+
+	ctx->parse_next_block = parse_next_header;
+	return parse_next_header(ctx, block_r);
+}
+
+struct message_parser_ctx *
+message_parser_init_int(struct istream *input,
+			const struct message_parser_settings *set)
+{
+	struct message_parser_ctx *ctx;
+
+	ctx = i_new(struct message_parser_ctx, 1);
+	ctx->hdr_flags = set->hdr_flags;
+	ctx->flags = set->flags;
+	ctx->max_nested_mime_parts = set->max_nested_mime_parts != 0 ?
+		set->max_nested_mime_parts :
+		MESSAGE_PARSER_DEFAULT_MAX_NESTED_MIME_PARTS;
+	ctx->max_total_mime_parts = set->max_total_mime_parts != 0 ?
+		set->max_total_mime_parts :
+		MESSAGE_PARSER_DEFAULT_MAX_TOTAL_MIME_PARTS;
+	ctx->input = input;
+	i_stream_ref(input);
+	return ctx;
+}
+
+struct message_parser_ctx *
+message_parser_init(pool_t part_pool, struct istream *input,
+		    const struct message_parser_settings *set)
+{
+	struct message_parser_ctx *ctx;
+
+	ctx = message_parser_init_int(input, set);
+	ctx->part_pool = part_pool;
+	ctx->parts = ctx->part = p_new(part_pool, struct message_part, 1);
+	ctx->next_part = &ctx->part->children;
+	ctx->parse_next_block = parse_next_header_init;
+	ctx->total_parts_count = 1;
+	i_array_init(&ctx->next_part_stack, 4);
+	return ctx;
+}
+
+void message_parser_deinit(struct message_parser_ctx **_ctx,
+			  struct message_part **parts_r)
+{
+	const char *error;
+
+	i_assert((**_ctx).preparsed == FALSE);
+	if (message_parser_deinit_from_parts(_ctx, parts_r, &error) < 0)
+		i_panic("message_parser_deinit_from_parts: %s", error);
+}
+
+int message_parser_deinit_from_parts(struct message_parser_ctx **_ctx,
+				     struct message_part **parts_r,
+				     const char **error_r)
+{
+        struct message_parser_ctx *ctx = *_ctx;
+	int ret = ctx->broken_reason != NULL ? -1 : 0;
+
+	*_ctx = NULL;
+	*parts_r = ctx->parts;
+	*error_r = ctx->broken_reason;
+
+	if (ctx->hdr_parser_ctx != NULL)
+		message_parse_header_deinit(&ctx->hdr_parser_ctx);
+	if (ctx->part != NULL) {
+		/* If the whole message has been parsed, the parts are
+		   usually finished in message_parser_parse_next_block().
+		   However, it's possible that the caller finishes reading
+		   through the istream without calling
+		   message_parser_parse_next_block() afterwards. In that case
+		   we still need to finish these parts. */
+		while (ctx->part->parent != NULL)
+			message_part_finish(ctx);
+	}
+	boundary_remove_until(ctx, NULL);
+	i_assert(ctx->nested_parts_count == 0);
+
+	i_stream_unref(&ctx->input);
+	array_free(&ctx->next_part_stack);
+	i_free(ctx->last_boundary);
+	i_free(ctx);
+	i_assert(ret < 0 || *parts_r != NULL);
+	return ret;
+}
+
+int message_parser_parse_next_block(struct message_parser_ctx *ctx,
+				    struct message_block *block_r)
+{
+	int ret;
+	bool eof = FALSE, full;
+
+	i_zero(block_r);
+
+	while ((ret = ctx->parse_next_block(ctx, block_r)) == 0) {
+		ret = message_parser_read_more(ctx, block_r, &full);
+		if (ret == 0) {
+			i_assert(!ctx->input->blocking);
+			return 0;
+		}
+		if (ret == -1) {
+			i_assert(!eof);
+			eof = TRUE;
+		}
+	}
+
+	block_r->part = ctx->part;
+
+	if (ret < 0 && ctx->part != NULL) {
+		/* Successful EOF or unexpected failure */
+		i_assert(ctx->input->eof || ctx->input->closed ||
+			 ctx->input->stream_errno != 0 ||
+			 ctx->broken_reason != NULL);
+		while (ctx->part->parent != NULL)
+			message_part_finish(ctx);
+	}
+
+	if (block_r->size == 0) {
+		/* data isn't supposed to be read, so make sure it's NULL */
+		block_r->data = NULL;
+	}
+	return ret;
+}
+
+#undef message_parser_parse_header
+void message_parser_parse_header(struct message_parser_ctx *ctx,
+				 struct message_size *hdr_size,
+				 message_part_header_callback_t *callback,
+				 void *context)
+{
+	struct message_block block;
+	int ret;
+
+	while ((ret = message_parser_parse_next_block(ctx, &block)) > 0) {
+		callback(block.part, block.hdr, context);
+
+		if (block.hdr == NULL)
+			break;
+	}
+	i_assert(ret != 0);
+	i_assert(ctx->part != NULL);
+
+	if (ret < 0) {
+		/* well, can't return error so fake end of headers */
+		callback(ctx->part, NULL, context);
+	}
+
+        *hdr_size = ctx->part->header_size;
+}
+
+#undef message_parser_parse_body
+void message_parser_parse_body(struct message_parser_ctx *ctx,
+			       message_part_header_callback_t *hdr_callback,
+			       void *context)
+{
+	struct message_block block;
+	int ret;
+
+	while ((ret = message_parser_parse_next_block(ctx, &block)) > 0) {
+		if (block.size == 0 && hdr_callback != NULL)
+			hdr_callback(block.part, block.hdr, context);
+	}
+	i_assert(ret != 0);
+}