diff options
Diffstat (limited to 'src/lib-mail/message-parser.c')
-rw-r--r-- | src/lib-mail/message-parser.c | 907 |
1 files changed, 907 insertions, 0 deletions
diff --git a/src/lib-mail/message-parser.c b/src/lib-mail/message-parser.c new file mode 100644 index 0000000..9a9c9a3 --- /dev/null +++ b/src/lib-mail/message-parser.c @@ -0,0 +1,907 @@ +/* Copyright (c) 2002-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "array.h" +#include "str.h" +#include "istream.h" +#include "rfc822-parser.h" +#include "rfc2231-parser.h" +#include "message-parser-private.h" + +message_part_header_callback_t *null_message_part_header_callback = NULL; + +static int parse_next_header_init(struct message_parser_ctx *ctx, + struct message_block *block_r); +static int parse_next_body_to_boundary(struct message_parser_ctx *ctx, + struct message_block *block_r); +static int parse_next_body_to_eof(struct message_parser_ctx *ctx, + struct message_block *block_r); + +static struct message_boundary * +boundary_find(struct message_boundary *boundaries, + const unsigned char *data, size_t len, bool trailing_dashes) +{ + struct message_boundary *best = NULL; + + /* As MIME spec says: search from latest one to oldest one so that we + don't break if the same boundary is used in nested parts. Also the + full message line doesn't have to match the boundary, only the + beginning. However, if there are multiple prefixes whose beginning + matches, use the longest matching one. */ + while (boundaries != NULL) { + if (boundaries->len <= len && + memcmp(boundaries->boundary, data, boundaries->len) == 0 && + (best == NULL || best->len < boundaries->len)) { + best = boundaries; + /* If we see "foo--", it could either mean that there + is a boundary named "foo" that ends now or there's + a boundary "foo--" which continues. */ + if (best->len == len || + (best->len == len-2 && trailing_dashes)) { + /* This is exactly the wanted boundary. There + can't be a better one. */ + break; + } + } + + boundaries = boundaries->next; + } + + return best; +} + +static void parse_body_add_block(struct message_parser_ctx *ctx, + struct message_block *block) +{ + unsigned int missing_cr_count = 0; + const unsigned char *cur, *next, *data = block->data; + + i_assert(block->size > 0); + + block->hdr = NULL; + + /* check if we have NULs */ + if (memchr(data, '\0', block->size) != NULL) + ctx->part->flags |= MESSAGE_PART_FLAG_HAS_NULS; + + /* count number of lines and missing CRs */ + if (*data == '\n') { + ctx->part->body_size.lines++; + if (ctx->last_chr != '\r') + missing_cr_count++; + } + + cur = data + 1; + while ((next = memchr(cur, '\n', block->size - (cur - data))) != NULL) { + ctx->part->body_size.lines++; + if (next[-1] != '\r') + missing_cr_count++; + + cur = next + 1; + } + ctx->last_chr = data[block->size - 1]; + ctx->skip += block->size; + + ctx->part->body_size.physical_size += block->size; + ctx->part->body_size.virtual_size += block->size + missing_cr_count; +} + +int message_parser_read_more(struct message_parser_ctx *ctx, + struct message_block *block_r, bool *full_r) +{ + int ret; + + if (ctx->skip > 0) { + i_stream_skip(ctx->input, ctx->skip); + ctx->skip = 0; + } + + *full_r = FALSE; + ret = i_stream_read_bytes(ctx->input, &block_r->data, + &block_r->size, ctx->want_count + 1); + if (ret <= 0) { + switch (ret) { + case 0: + if (!ctx->input->eof) { + i_assert(!ctx->input->blocking); + return 0; + } + break; + case -1: + i_assert(ctx->input->eof || + ctx->input->stream_errno != 0); + ctx->eof = TRUE; + if (block_r->size != 0) { + /* EOF, but we still have some data. + return it. */ + return 1; + } + return -1; + case -2: + *full_r = TRUE; + break; + default: + i_unreached(); + } + } + + if (!*full_r) { + /* reset number of wanted characters if we actually got them */ + ctx->want_count = 1; + } + return 1; +} + +static void +message_part_append(struct message_parser_ctx *ctx) +{ + struct message_part *parent = ctx->part; + struct message_part *part; + + i_assert(!ctx->preparsed); + i_assert(parent != NULL); + i_assert((parent->flags & (MESSAGE_PART_FLAG_MULTIPART | + MESSAGE_PART_FLAG_MESSAGE_RFC822)) != 0); + + part = p_new(ctx->part_pool, struct message_part, 1); + part->parent = parent; + + /* set child position */ + part->physical_pos = + parent->physical_pos + + parent->body_size.physical_size + + parent->header_size.physical_size; + + /* add to parent's linked list */ + *ctx->next_part = part; + /* update the parent's end-of-linked-list pointer */ + struct message_part **next_part = &part->next; + array_push_back(&ctx->next_part_stack, &next_part); + /* This part is now the new parent for the next message_part_append() + call. Its linked list begins with the children pointer. */ + ctx->next_part = &part->children; + + ctx->part = part; + ctx->nested_parts_count++; + ctx->total_parts_count++; + i_assert(ctx->nested_parts_count < ctx->max_nested_mime_parts); + i_assert(ctx->total_parts_count <= ctx->max_total_mime_parts); +} + +static void message_part_finish(struct message_parser_ctx *ctx) +{ + struct message_part **const *parent_next_partp; + + if (!ctx->preparsed) { + i_assert(ctx->nested_parts_count > 0); + ctx->nested_parts_count--; + + parent_next_partp = array_back(&ctx->next_part_stack); + array_pop_back(&ctx->next_part_stack); + ctx->next_part = *parent_next_partp; + } + + message_size_add(&ctx->part->parent->body_size, &ctx->part->body_size); + message_size_add(&ctx->part->parent->body_size, &ctx->part->header_size); + ctx->part->parent->children_count += 1 + ctx->part->children_count; + ctx->part = ctx->part->parent; +} + +static void message_boundary_free(struct message_boundary *b) +{ + i_free(b->boundary); + i_free(b); +} + +static void +boundary_remove_until(struct message_parser_ctx *ctx, + struct message_boundary *boundary) +{ + while (ctx->boundaries != boundary) { + struct message_boundary *cur = ctx->boundaries; + + i_assert(cur != NULL); + ctx->boundaries = cur->next; + message_boundary_free(cur); + + } + ctx->boundaries = boundary; +} + +static void parse_next_body_multipart_init(struct message_parser_ctx *ctx) +{ + struct message_boundary *b; + + b = i_new(struct message_boundary, 1); + b->part = ctx->part; + b->boundary = ctx->last_boundary; + ctx->last_boundary = NULL; + b->len = strlen(b->boundary); + + b->next = ctx->boundaries; + ctx->boundaries = b; +} + +static int parse_next_body_message_rfc822_init(struct message_parser_ctx *ctx, + struct message_block *block_r) +{ + message_part_append(ctx); + return parse_next_header_init(ctx, block_r); +} + +static int +boundary_line_find(struct message_parser_ctx *ctx, + const unsigned char *data, size_t size, bool full, + struct message_boundary **boundary_r) +{ + *boundary_r = NULL; + + if (size < 2) { + i_assert(!full); + + if (ctx->input->eof) + return -1; + ctx->want_count = 2; + return 0; + } + + if (data[0] != '-' || data[1] != '-') { + /* not a boundary, just skip this line */ + return -1; + } + + if (ctx->total_parts_count >= ctx->max_total_mime_parts) { + /* can't add any more MIME parts. just stop trying to find + more boundaries. */ + ctx->part->flags |= MESSAGE_PART_FLAG_OVERFLOW; + return -1; + } + + /* need to find the end of line */ + data += 2; + size -= 2; + const unsigned char *lf_pos = memchr(data, '\n', size); + if (lf_pos == NULL && + size+2 < BOUNDARY_END_MAX_LEN && + !ctx->input->eof && !full) { + /* no LF found */ + ctx->want_count = BOUNDARY_END_MAX_LEN; + return 0; + } + size_t find_size = size; + bool trailing_dashes = FALSE; + + if (lf_pos != NULL) { + find_size = lf_pos - data; + if (find_size > 0 && data[find_size-1] == '\r') + find_size--; + if (find_size > 2 && data[find_size-1] == '-' && + data[find_size-2] == '-') + trailing_dashes = TRUE; + } else if (find_size > BOUNDARY_END_MAX_LEN) + find_size = BOUNDARY_END_MAX_LEN; + + *boundary_r = boundary_find(ctx->boundaries, data, find_size, + trailing_dashes); + if (*boundary_r == NULL) + return -1; + + (*boundary_r)->epilogue_found = + size >= (*boundary_r)->len + 2 && + memcmp(data + (*boundary_r)->len, "--", 2) == 0; + return 1; +} + +static int parse_next_mime_header_init(struct message_parser_ctx *ctx, + struct message_block *block_r) +{ + message_part_append(ctx); + ctx->part->flags |= MESSAGE_PART_FLAG_IS_MIME; + + return parse_next_header_init(ctx, block_r); +} + +static int parse_next_body_skip_boundary_line(struct message_parser_ctx *ctx, + struct message_block *block_r) +{ + const unsigned char *ptr; + int ret; + bool full; + + if ((ret = message_parser_read_more(ctx, block_r, &full)) <= 0) + return ret; + + ptr = memchr(block_r->data, '\n', block_r->size); + if (ptr == NULL) { + parse_body_add_block(ctx, block_r); + if (block_r->size > 0 && + (ctx->flags & MESSAGE_PARSER_FLAG_INCLUDE_BOUNDARIES) != 0) + return 1; + return 0; + } + + /* found the LF */ + block_r->size = (ptr - block_r->data) + 1; + parse_body_add_block(ctx, block_r); + + if (ctx->boundaries == NULL || ctx->boundaries->part != ctx->part) { + /* epilogue */ + if (ctx->boundaries != NULL) + ctx->parse_next_block = parse_next_body_to_boundary; + else + ctx->parse_next_block = parse_next_body_to_eof; + } else { + /* a new MIME part begins */ + ctx->parse_next_block = parse_next_mime_header_init; + } + if (block_r->size > 0 && + (ctx->flags & MESSAGE_PARSER_FLAG_INCLUDE_BOUNDARIES) != 0) + return 1; + return ctx->parse_next_block(ctx, block_r); +} + +static int parse_part_finish(struct message_parser_ctx *ctx, + struct message_boundary *boundary, + struct message_block *block_r, bool first_line) +{ + size_t line_size; + size_t boundary_len = boundary->len; + bool boundary_epilogue_found = boundary->epilogue_found; + + i_assert(ctx->last_boundary == NULL); + + /* get back to parent MIME part, summing the child MIME part sizes + into parent's body sizes */ + while (ctx->part != boundary->part) { + message_part_finish(ctx); + i_assert(ctx->part != NULL); + } + + if (boundary->epilogue_found) { + /* this boundary isn't needed anymore */ + boundary_remove_until(ctx, boundary->next); + } else { + /* forget about the boundaries we possibly skipped */ + boundary_remove_until(ctx, boundary); + } + + /* the boundary itself should already be in buffer. add that. */ + block_r->data = i_stream_get_data(ctx->input, &block_r->size); + i_assert(block_r->size >= ctx->skip); + block_r->data += ctx->skip; + /* [[\r]\n]--<boundary>[--] */ + if (first_line) + line_size = 0; + else if (block_r->data[0] == '\r') { + i_assert(block_r->data[1] == '\n'); + line_size = 2; + } else { + i_assert(block_r->data[0] == '\n'); + line_size = 1; + } + line_size += 2 + boundary_len + (boundary_epilogue_found ? 2 : 0); + i_assert(block_r->size >= ctx->skip + line_size); + block_r->size = line_size; + parse_body_add_block(ctx, block_r); + + ctx->parse_next_block = parse_next_body_skip_boundary_line; + + if ((ctx->flags & MESSAGE_PARSER_FLAG_INCLUDE_BOUNDARIES) != 0) + return 1; + return ctx->parse_next_block(ctx, block_r); +} + +static int parse_next_body_to_boundary(struct message_parser_ctx *ctx, + struct message_block *block_r) +{ + struct message_boundary *boundary = NULL; + const unsigned char *data, *cur, *next, *end; + size_t boundary_start; + int ret; + bool full; + + if ((ret = message_parser_read_more(ctx, block_r, &full)) <= 0) + return ret; + + data = block_r->data; + if (ctx->last_chr == '\n') { + /* handle boundary in first line of message. alternatively + it's an empty line. */ + ret = boundary_line_find(ctx, block_r->data, + block_r->size, full, &boundary); + if (ret >= 0) { + return ret == 0 ? 0 : + parse_part_finish(ctx, boundary, block_r, TRUE); + } + } + + i_assert(block_r->size > 0); + boundary_start = 0; + + /* skip to beginning of the next line. the first line was + handled already. */ + cur = data; end = data + block_r->size; + while ((next = memchr(cur, '\n', end - cur)) != NULL) { + cur = next + 1; + + boundary_start = next - data; + if (next > data && next[-1] == '\r') + boundary_start--; + + if (boundary_start != 0) { + /* we can at least skip data until the first [CR]LF. + input buffer can't be full anymore. */ + full = FALSE; + } + + ret = boundary_line_find(ctx, cur, end - cur, full, &boundary); + if (ret >= 0) { + /* found / need more data */ + if (ret == 0 && boundary_start == 0) + ctx->want_count += cur - block_r->data; + break; + } + } + + if (next != NULL) { + /* found / need more data */ + i_assert(ret >= 0); + i_assert(!(ret == 0 && full)); + } else if (boundary_start == 0) { + /* no linefeeds in this block. we can just skip it. */ + ret = 0; + if (block_r->data[block_r->size-1] == '\r' && !ctx->eof) { + /* this may be the beginning of the \r\n--boundary */ + block_r->size--; + } + boundary_start = block_r->size; + } else { + /* the boundary wasn't found from this data block, + we'll need more data. */ + ret = 0; + ctx->want_count = (block_r->size - boundary_start) + 1; + } + + if (ret > 0 || (ret == 0 && !ctx->eof)) { + /* a) we found the boundary + b) we need more data and haven't reached EOF yet + so leave CR+LF + last line to buffer */ + block_r->size = boundary_start; + } + if (block_r->size != 0) { + parse_body_add_block(ctx, block_r); + + if ((ctx->part->flags & MESSAGE_PART_FLAG_MULTIPART) != 0 && + (ctx->flags & MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS) == 0) + return 0; + + return 1; + } + return ret <= 0 ? ret : + parse_part_finish(ctx, boundary, block_r, FALSE); +} + +static int parse_next_body_to_eof(struct message_parser_ctx *ctx, + struct message_block *block_r) +{ + bool full; + int ret; + + if ((ret = message_parser_read_more(ctx, block_r, &full)) <= 0) + return ret; + + parse_body_add_block(ctx, block_r); + + if ((ctx->part->flags & MESSAGE_PART_FLAG_MULTIPART) != 0 && + (ctx->flags & MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS) == 0) + return 0; + + return 1; +} + +static void parse_content_type(struct message_parser_ctx *ctx, + struct message_header_line *hdr) +{ + struct rfc822_parser_context parser; + const char *const *results; + string_t *content_type; + int ret; + + if (ctx->part_seen_content_type) + return; + ctx->part_seen_content_type = TRUE; + + rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL); + rfc822_skip_lwsp(&parser); + + content_type = t_str_new(64); + ret = rfc822_parse_content_type(&parser, content_type); + + if (strcasecmp(str_c(content_type), "message/rfc822") == 0) + ctx->part->flags |= MESSAGE_PART_FLAG_MESSAGE_RFC822; + else if (strncasecmp(str_c(content_type), "text", 4) == 0 && + (str_len(content_type) == 4 || + str_data(content_type)[4] == '/')) + ctx->part->flags |= MESSAGE_PART_FLAG_TEXT; + else if (strncasecmp(str_c(content_type), "multipart/", 10) == 0) { + ctx->part->flags |= MESSAGE_PART_FLAG_MULTIPART; + + if (strcasecmp(str_c(content_type)+10, "digest") == 0) + ctx->part->flags |= MESSAGE_PART_FLAG_MULTIPART_DIGEST; + } + + if (ret < 0 || + (ctx->part->flags & MESSAGE_PART_FLAG_MULTIPART) == 0 || + ctx->last_boundary != NULL) { + rfc822_parser_deinit(&parser); + return; + } + + rfc2231_parse(&parser, &results); + for (; *results != NULL; results += 2) { + if (strcasecmp(results[0], "boundary") == 0) { + /* truncate excessively long boundaries */ + i_free(ctx->last_boundary); + ctx->last_boundary = + i_strndup(results[1], BOUNDARY_STRING_MAX_LEN); + break; + } + } + rfc822_parser_deinit(&parser); +} + +static bool block_is_at_eoh(const struct message_block *block) +{ + if (block->size < 1) + return FALSE; + if (block->data[0] == '\n') + return TRUE; + if (block->data[0] == '\r') { + if (block->size < 2) + return FALSE; + if (block->data[1] == '\n') + return TRUE; + } + return FALSE; +} + +static bool parse_too_many_nested_mime_parts(struct message_parser_ctx *ctx) +{ + return ctx->nested_parts_count+1 >= ctx->max_nested_mime_parts; +} + +#define MUTEX_FLAGS \ + (MESSAGE_PART_FLAG_MESSAGE_RFC822 | MESSAGE_PART_FLAG_MULTIPART) + +static int parse_next_header(struct message_parser_ctx *ctx, + struct message_block *block_r) +{ + struct message_part *part = ctx->part; + struct message_header_line *hdr; + struct message_boundary *boundary; + bool full; + int ret; + + if ((ret = message_parser_read_more(ctx, block_r, &full)) == 0) + return ret; + + if (ret > 0 && block_is_at_eoh(block_r) && + ctx->last_boundary != NULL && + (part->flags & MESSAGE_PART_FLAG_IS_MIME) != 0) { + /* we are at the end of headers and we've determined that we're + going to start a multipart. add the boundary already here + at this point so we can reliably determine whether the + "\n--boundary" belongs to us or to a previous boundary. + this is a problem if the boundary prefixes are identical, + because MIME requires only the prefix to match. */ + if (!parse_too_many_nested_mime_parts(ctx)) { + parse_next_body_multipart_init(ctx); + ctx->multipart = TRUE; + } else { + part->flags |= MESSAGE_PART_FLAG_OVERFLOW; + part->flags &= ENUM_NEGATE(MESSAGE_PART_FLAG_MULTIPART); + } + } + + /* before parsing the header see if we can find a --boundary from here. + we're guaranteed to be at the beginning of the line here. */ + if (ret > 0) { + ret = ctx->boundaries == NULL ? -1 : + boundary_line_find(ctx, block_r->data, + block_r->size, full, &boundary); + if (ret > 0 && boundary->part == ctx->part) { + /* our own body begins with our own --boundary. + we don't want to handle that yet. */ + ret = -1; + } + } + if (ret < 0) { + /* no boundary */ + ret = message_parse_header_next(ctx->hdr_parser_ctx, &hdr); + if (ret == 0 || (ret < 0 && ctx->input->stream_errno != 0)) { + ctx->want_count = i_stream_get_data_size(ctx->input) + 1; + return ret; + } + } else if (ret == 0) { + /* need more data */ + return 0; + } else { + /* boundary found. stop parsing headers here. The previous + [CR]LF belongs to the MIME boundary though. */ + if (ctx->prev_hdr_newline_size > 0) { + i_assert(ctx->part->header_size.lines > 0); + /* remove the newline size from the MIME header */ + ctx->part->header_size.lines--; + ctx->part->header_size.physical_size -= + ctx->prev_hdr_newline_size; + ctx->part->header_size.virtual_size -= 2; + /* add the newline size to the parent's body */ + ctx->part->parent->body_size.lines++; + ctx->part->parent->body_size.physical_size += + ctx->prev_hdr_newline_size; + ctx->part->parent->body_size.virtual_size += 2; + } + hdr = NULL; + } + + if (hdr != NULL) { + if (hdr->eoh) + ; + else if (strcasecmp(hdr->name, "Mime-Version") == 0) { + /* it's MIME. Content-* headers are valid */ + part->flags |= MESSAGE_PART_FLAG_IS_MIME; + } else if (strcasecmp(hdr->name, "Content-Type") == 0) { + if ((ctx->flags & + MESSAGE_PARSER_FLAG_MIME_VERSION_STRICT) == 0) + part->flags |= MESSAGE_PART_FLAG_IS_MIME; + + if (hdr->continues) + hdr->use_full_value = TRUE; + else T_BEGIN { + parse_content_type(ctx, hdr); + } T_END; + } + + block_r->hdr = hdr; + block_r->size = 0; + ctx->prev_hdr_newline_size = hdr->no_newline ? 0 : + (hdr->crlf_newline ? 2 : 1); + return 1; + } + + /* end of headers */ + if ((part->flags & MESSAGE_PART_FLAG_IS_MIME) == 0) { + /* It's not MIME. Reset everything we found from + Content-Type. */ + i_assert(!ctx->multipart); + part->flags = 0; + } + i_free(ctx->last_boundary); + + if (!ctx->part_seen_content_type || + (part->flags & MESSAGE_PART_FLAG_IS_MIME) == 0) { + if (part->parent != NULL && + (part->parent->flags & + MESSAGE_PART_FLAG_MULTIPART_DIGEST) != 0) { + /* when there's no content-type specified and we're + below multipart/digest, assume message/rfc822 + content-type */ + part->flags |= MESSAGE_PART_FLAG_MESSAGE_RFC822; + } else { + /* otherwise we default to text/plain */ + part->flags |= MESSAGE_PART_FLAG_TEXT; + } + } + + if (message_parse_header_has_nuls(ctx->hdr_parser_ctx)) + part->flags |= MESSAGE_PART_FLAG_HAS_NULS; + message_parse_header_deinit(&ctx->hdr_parser_ctx); + + i_assert((part->flags & MUTEX_FLAGS) != MUTEX_FLAGS); + + ctx->last_chr = '\n'; + if (ctx->multipart) { + i_assert(ctx->last_boundary == NULL); + ctx->multipart = FALSE; + ctx->parse_next_block = parse_next_body_to_boundary; + } else if ((part->flags & MESSAGE_PART_FLAG_MESSAGE_RFC822) == 0) { + /* Not message/rfc822 */ + if (ctx->boundaries != NULL) + ctx->parse_next_block = parse_next_body_to_boundary; + else + ctx->parse_next_block = parse_next_body_to_eof; + } else if (!parse_too_many_nested_mime_parts(ctx) && + ctx->total_parts_count < ctx->max_total_mime_parts) { + /* message/rfc822 - not reached MIME part limits yet */ + ctx->parse_next_block = parse_next_body_message_rfc822_init; + } else { + /* message/rfc822 - already reached MIME part limits */ + part->flags |= MESSAGE_PART_FLAG_OVERFLOW; + part->flags &= ENUM_NEGATE(MESSAGE_PART_FLAG_MESSAGE_RFC822); + if (ctx->boundaries != NULL) + ctx->parse_next_block = parse_next_body_to_boundary; + else + ctx->parse_next_block = parse_next_body_to_eof; + } + + ctx->want_count = 1; + + /* return empty block as end of headers */ + block_r->hdr = NULL; + block_r->size = 0; + return 1; +} + +static int parse_next_header_init(struct message_parser_ctx *ctx, + struct message_block *block_r) +{ + i_assert(ctx->hdr_parser_ctx == NULL); + + ctx->hdr_parser_ctx = + message_parse_header_init(ctx->input, &ctx->part->header_size, + ctx->hdr_flags); + ctx->part_seen_content_type = FALSE; + ctx->prev_hdr_newline_size = 0; + + ctx->parse_next_block = parse_next_header; + return parse_next_header(ctx, block_r); +} + +struct message_parser_ctx * +message_parser_init_int(struct istream *input, + const struct message_parser_settings *set) +{ + struct message_parser_ctx *ctx; + + ctx = i_new(struct message_parser_ctx, 1); + ctx->hdr_flags = set->hdr_flags; + ctx->flags = set->flags; + ctx->max_nested_mime_parts = set->max_nested_mime_parts != 0 ? + set->max_nested_mime_parts : + MESSAGE_PARSER_DEFAULT_MAX_NESTED_MIME_PARTS; + ctx->max_total_mime_parts = set->max_total_mime_parts != 0 ? + set->max_total_mime_parts : + MESSAGE_PARSER_DEFAULT_MAX_TOTAL_MIME_PARTS; + ctx->input = input; + i_stream_ref(input); + return ctx; +} + +struct message_parser_ctx * +message_parser_init(pool_t part_pool, struct istream *input, + const struct message_parser_settings *set) +{ + struct message_parser_ctx *ctx; + + ctx = message_parser_init_int(input, set); + ctx->part_pool = part_pool; + ctx->parts = ctx->part = p_new(part_pool, struct message_part, 1); + ctx->next_part = &ctx->part->children; + ctx->parse_next_block = parse_next_header_init; + ctx->total_parts_count = 1; + i_array_init(&ctx->next_part_stack, 4); + return ctx; +} + +void message_parser_deinit(struct message_parser_ctx **_ctx, + struct message_part **parts_r) +{ + const char *error; + + i_assert((**_ctx).preparsed == FALSE); + if (message_parser_deinit_from_parts(_ctx, parts_r, &error) < 0) + i_panic("message_parser_deinit_from_parts: %s", error); +} + +int message_parser_deinit_from_parts(struct message_parser_ctx **_ctx, + struct message_part **parts_r, + const char **error_r) +{ + struct message_parser_ctx *ctx = *_ctx; + int ret = ctx->broken_reason != NULL ? -1 : 0; + + *_ctx = NULL; + *parts_r = ctx->parts; + *error_r = ctx->broken_reason; + + if (ctx->hdr_parser_ctx != NULL) + message_parse_header_deinit(&ctx->hdr_parser_ctx); + if (ctx->part != NULL) { + /* If the whole message has been parsed, the parts are + usually finished in message_parser_parse_next_block(). + However, it's possible that the caller finishes reading + through the istream without calling + message_parser_parse_next_block() afterwards. In that case + we still need to finish these parts. */ + while (ctx->part->parent != NULL) + message_part_finish(ctx); + } + boundary_remove_until(ctx, NULL); + i_assert(ctx->nested_parts_count == 0); + + i_stream_unref(&ctx->input); + array_free(&ctx->next_part_stack); + i_free(ctx->last_boundary); + i_free(ctx); + i_assert(ret < 0 || *parts_r != NULL); + return ret; +} + +int message_parser_parse_next_block(struct message_parser_ctx *ctx, + struct message_block *block_r) +{ + int ret; + bool eof = FALSE, full; + + i_zero(block_r); + + while ((ret = ctx->parse_next_block(ctx, block_r)) == 0) { + ret = message_parser_read_more(ctx, block_r, &full); + if (ret == 0) { + i_assert(!ctx->input->blocking); + return 0; + } + if (ret == -1) { + i_assert(!eof); + eof = TRUE; + } + } + + block_r->part = ctx->part; + + if (ret < 0 && ctx->part != NULL) { + /* Successful EOF or unexpected failure */ + i_assert(ctx->input->eof || ctx->input->closed || + ctx->input->stream_errno != 0 || + ctx->broken_reason != NULL); + while (ctx->part->parent != NULL) + message_part_finish(ctx); + } + + if (block_r->size == 0) { + /* data isn't supposed to be read, so make sure it's NULL */ + block_r->data = NULL; + } + return ret; +} + +#undef message_parser_parse_header +void message_parser_parse_header(struct message_parser_ctx *ctx, + struct message_size *hdr_size, + message_part_header_callback_t *callback, + void *context) +{ + struct message_block block; + int ret; + + while ((ret = message_parser_parse_next_block(ctx, &block)) > 0) { + callback(block.part, block.hdr, context); + + if (block.hdr == NULL) + break; + } + i_assert(ret != 0); + i_assert(ctx->part != NULL); + + if (ret < 0) { + /* well, can't return error so fake end of headers */ + callback(ctx->part, NULL, context); + } + + *hdr_size = ctx->part->header_size; +} + +#undef message_parser_parse_body +void message_parser_parse_body(struct message_parser_ctx *ctx, + message_part_header_callback_t *hdr_callback, + void *context) +{ + struct message_block block; + int ret; + + while ((ret = message_parser_parse_next_block(ctx, &block)) > 0) { + if (block.size == 0 && hdr_callback != NULL) + hdr_callback(block.part, block.hdr, context); + } + i_assert(ret != 0); +} |