diff options
Diffstat (limited to 'src/lib-mail/message-search.c')
-rw-r--r-- | src/lib-mail/message-search.c | 246 |
1 files changed, 246 insertions, 0 deletions
diff --git a/src/lib-mail/message-search.c b/src/lib-mail/message-search.c new file mode 100644 index 0000000..5f54485 --- /dev/null +++ b/src/lib-mail/message-search.c @@ -0,0 +1,246 @@ +/* Copyright (c) 2002-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "buffer.h" +#include "istream.h" +#include "str.h" +#include "str-find.h" +#include "rfc822-parser.h" +#include "message-decoder.h" +#include "message-parser.h" +#include "message-search.h" + +struct message_search_context { + enum message_search_flags flags; + normalizer_func_t *normalizer; + + struct str_find_context *str_find_ctx; + struct message_part *prev_part; + + struct message_decoder_context *decoder; + bool content_type_text:1; /* text/any or message/any */ +}; + +struct message_search_context * +message_search_init(const char *normalized_key_utf8, + normalizer_func_t *normalizer, + enum message_search_flags flags) +{ + struct message_search_context *ctx; + + i_assert(*normalized_key_utf8 != '\0'); + + ctx = i_new(struct message_search_context, 1); + ctx->flags = flags; + ctx->decoder = message_decoder_init(normalizer, 0); + ctx->str_find_ctx = str_find_init(default_pool, normalized_key_utf8); + return ctx; +} + +void message_search_deinit(struct message_search_context **_ctx) +{ + struct message_search_context *ctx = *_ctx; + + *_ctx = NULL; + str_find_deinit(&ctx->str_find_ctx); + message_decoder_deinit(&ctx->decoder); + i_free(ctx); +} + +static void parse_content_type(struct message_search_context *ctx, + struct message_header_line *hdr) +{ + struct rfc822_parser_context parser; + string_t *content_type; + + rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL); + rfc822_skip_lwsp(&parser); + + content_type = t_str_new(64); + (void)rfc822_parse_content_type(&parser, content_type); + ctx->content_type_text = + strncasecmp(str_c(content_type), "text/", 5) == 0 || + strncasecmp(str_c(content_type), "message/", 8) == 0; + rfc822_parser_deinit(&parser); +} + +static void handle_header(struct message_search_context *ctx, + struct message_header_line *hdr) +{ + if (hdr->name_len == 12 && + strcasecmp(hdr->name, "Content-Type") == 0) { + if (hdr->continues) { + hdr->use_full_value = TRUE; + return; + } + T_BEGIN { + parse_content_type(ctx, hdr); + } T_END; + } +} + +static bool search_header(struct message_search_context *ctx, + const struct message_header_line *hdr) +{ + static const unsigned char crlf[2] = { '\r', '\n' }; + + return str_find_more(ctx->str_find_ctx, + (const unsigned char *)hdr->name, hdr->name_len) || + str_find_more(ctx->str_find_ctx, + hdr->middle, hdr->middle_len) || + str_find_more(ctx->str_find_ctx, hdr->full_value, + hdr->full_value_len) || + (!hdr->no_newline && + str_find_more(ctx->str_find_ctx, crlf, 2)); +} + +static bool message_search_more_decoded2(struct message_search_context *ctx, + struct message_block *block) +{ + if (block->hdr != NULL) { + if (search_header(ctx, block->hdr)) + return TRUE; + } else { + if (str_find_more(ctx->str_find_ctx, block->data, block->size)) + return TRUE; + } + return FALSE; +} + +bool message_search_more(struct message_search_context *ctx, + struct message_block *raw_block) +{ + struct message_block decoded_block; + + return message_search_more_get_decoded(ctx, raw_block, &decoded_block); +} + +bool message_search_more_get_decoded(struct message_search_context *ctx, + struct message_block *raw_block, + struct message_block *decoded_block_r) +{ + struct message_header_line *hdr = raw_block->hdr; + struct message_block decoded_block; + + i_zero(decoded_block_r); + decoded_block_r->part = raw_block->part; + + if (raw_block->part != ctx->prev_part) { + /* part changes. we must change this before looking at + content type */ + message_search_reset(ctx); + ctx->prev_part = raw_block->part; + + if (hdr == NULL) { + /* we're returning to a multipart message. */ + ctx->content_type_text = FALSE; + } + } + + if (hdr != NULL) { + handle_header(ctx, hdr); + if ((ctx->flags & MESSAGE_SEARCH_FLAG_SKIP_HEADERS) != 0) { + /* we want to search only message bodies, but + but decoder needs some headers so that it can + decode the body properly. */ + if (hdr->name_len != 12 && hdr->name_len != 25) + return FALSE; + if (strcasecmp(hdr->name, "Content-Type") != 0 && + strcasecmp(hdr->name, + "Content-Transfer-Encoding") != 0) + return FALSE; + } + } else { + /* body */ + if (!ctx->content_type_text) + return FALSE; + } + if (!message_decoder_decode_next_block(ctx->decoder, raw_block, + &decoded_block)) + return FALSE; + + if (decoded_block.hdr != NULL && + (ctx->flags & MESSAGE_SEARCH_FLAG_SKIP_HEADERS) != 0) { + /* Content-* header */ + return FALSE; + } + + *decoded_block_r = decoded_block; + return message_search_more_decoded2(ctx, &decoded_block); +} + +bool message_search_more_decoded(struct message_search_context *ctx, + struct message_block *block) +{ + if (block->part != ctx->prev_part) { + /* part changes */ + message_search_reset(ctx); + ctx->prev_part = block->part; + } + + return message_search_more_decoded2(ctx, block); +} + +void message_search_reset(struct message_search_context *ctx) +{ + /* Content-Type defaults to text/plain */ + ctx->content_type_text = TRUE; + + ctx->prev_part = NULL; + str_find_reset(ctx->str_find_ctx); + message_decoder_decode_reset(ctx->decoder); +} + +static int +message_search_msg_real(struct message_search_context *ctx, + struct istream *input, struct message_part *parts, + const char **error_r) +{ + const struct message_parser_settings parser_set = { + .hdr_flags = MESSAGE_HEADER_PARSER_FLAG_CLEAN_ONELINE, + }; + struct message_parser_ctx *parser_ctx; + struct message_block raw_block; + struct message_part *new_parts; + int ret; + + message_search_reset(ctx); + + if (parts != NULL) { + parser_ctx = message_parser_init_from_parts(parts, + input, &parser_set); + } else { + parser_ctx = message_parser_init(pool_datastack_create(), + input, &parser_set); + } + + while ((ret = message_parser_parse_next_block(parser_ctx, + &raw_block)) > 0) { + if (message_search_more(ctx, &raw_block)) { + ret = 1; + break; + } + } + i_assert(ret != 0); + if (ret < 0 && input->stream_errno == 0) { + /* normal exit */ + ret = 0; + } + if (message_parser_deinit_from_parts(&parser_ctx, &new_parts, error_r) < 0) { + /* broken parts */ + ret = -1; + } + return ret; +} + +int message_search_msg(struct message_search_context *ctx, + struct istream *input, struct message_part *parts, + const char **error_r) +{ + int ret; + + T_BEGIN { + ret = message_search_msg_real(ctx, input, parts, error_r); + } T_END_PASS_STR_IF(ret < 0, error_r); + return ret; +} |