summaryrefslogtreecommitdiffstats
path: root/src/plugins/fts/fts-build-mail.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-28 09:51:24 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-28 09:51:24 +0000
commitf7548d6d28c313cf80e6f3ef89aed16a19815df1 (patch)
treea3f6f2a3f247293bee59ecd28e8cd8ceb6ca064a /src/plugins/fts/fts-build-mail.c
parentInitial commit. (diff)
downloaddovecot-f7548d6d28c313cf80e6f3ef89aed16a19815df1.tar.xz
dovecot-f7548d6d28c313cf80e6f3ef89aed16a19815df1.zip
Adding upstream version 1:2.3.19.1+dfsg1.upstream/1%2.3.19.1+dfsg1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/plugins/fts/fts-build-mail.c')
-rw-r--r--src/plugins/fts/fts-build-mail.c719
1 files changed, 719 insertions, 0 deletions
diff --git a/src/plugins/fts/fts-build-mail.c b/src/plugins/fts/fts-build-mail.c
new file mode 100644
index 0000000..73d4f4b
--- /dev/null
+++ b/src/plugins/fts/fts-build-mail.c
@@ -0,0 +1,719 @@
+/* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "istream.h"
+#include "buffer.h"
+#include "str.h"
+#include "rfc822-parser.h"
+#include "message-address.h"
+#include "message-parser.h"
+#include "message-decoder.h"
+#include "mail-storage.h"
+#include "index-mail.h"
+#include "fts-parser.h"
+#include "fts-user.h"
+#include "fts-language.h"
+#include "fts-tokenizer.h"
+#include "fts-filter.h"
+#include "fts-api-private.h"
+#include "fts-build-mail.h"
+
+/* there are other characters as well, but this doesn't have to be exact */
+#define IS_WORD_WHITESPACE(c) \
+ ((c) == ' ' || (c) == '\t' || (c) == '\n')
+/* if we see a word larger than this, just go ahead and split it from
+ wherever */
+#define MAX_WORD_SIZE 1024
+
+struct fts_mail_build_context {
+ struct mail *mail;
+ struct fts_backend_update_context *update_ctx;
+
+ char *content_type, *content_disposition;
+ struct fts_parser *body_parser;
+
+ buffer_t *word_buf, *pending_input;
+ struct fts_user_language *cur_user_lang;
+};
+
+static int fts_build_data(struct fts_mail_build_context *ctx,
+ const unsigned char *data, size_t size, bool last);
+
+static void fts_build_parse_content_type(struct fts_mail_build_context *ctx,
+ const struct message_header_line *hdr)
+{
+ struct rfc822_parser_context parser;
+ string_t *content_type;
+
+ if (ctx->content_type != NULL)
+ return;
+
+ rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL);
+ rfc822_skip_lwsp(&parser);
+
+ T_BEGIN {
+ content_type = t_str_new(64);
+ (void)rfc822_parse_content_type(&parser, content_type);
+ ctx->content_type = str_lcase(i_strdup(str_c(content_type)));
+ } T_END;
+ rfc822_parser_deinit(&parser);
+}
+
+static void
+fts_build_parse_content_disposition(struct fts_mail_build_context *ctx,
+ const struct message_header_line *hdr)
+{
+ /* just pass it as-is to backend. */
+ i_free(ctx->content_disposition);
+ ctx->content_disposition =
+ i_strndup(hdr->full_value, hdr->full_value_len);
+}
+
+static void fts_parse_mail_header(struct fts_mail_build_context *ctx,
+ const struct message_block *raw_block)
+{
+ const struct message_header_line *hdr = raw_block->hdr;
+
+ if (strcasecmp(hdr->name, "Content-Type") == 0)
+ fts_build_parse_content_type(ctx, hdr);
+ else if (strcasecmp(hdr->name, "Content-Disposition") == 0)
+ fts_build_parse_content_disposition(ctx, hdr);
+}
+
+static int
+fts_build_unstructured_header(struct fts_mail_build_context *ctx,
+ const struct message_header_line *hdr)
+{
+ const unsigned char *data = hdr->full_value;
+ unsigned char *buf = NULL;
+ unsigned int i;
+ int ret;
+
+ /* @UNSAFE: if there are any NULs, replace them with spaces */
+ for (i = 0; i < hdr->full_value_len; i++) {
+ if (hdr->full_value[i] == '\0') {
+ if (buf == NULL) {
+ buf = i_memdup(hdr->full_value,
+ hdr->full_value_len);
+ data = buf;
+ }
+ buf[i] = ' ';
+ }
+ }
+ ret = fts_build_data(ctx, data, hdr->full_value_len, TRUE);
+ i_free(buf);
+ return ret;
+}
+
+static void fts_mail_build_ctx_set_lang(struct fts_mail_build_context *ctx,
+ struct fts_user_language *user_lang)
+{
+ i_assert(user_lang != NULL);
+
+ ctx->cur_user_lang = user_lang;
+ /* reset tokenizer between fields - just to be sure no state
+ leaks between fields (especially if previous indexing had
+ failed) */
+ fts_tokenizer_reset(user_lang->index_tokenizer);
+}
+
+static void
+fts_build_tokenized_hdr_update_lang(struct fts_mail_build_context *ctx,
+ const struct message_header_line *hdr)
+{
+ /* Headers that don't contain any human language will only be
+ translated to lowercase - no stemming or other filtering. There's
+ unfortunately no pefect way of detecting which headers contain
+ human languages, so we check with fts_header_has_language if the
+ header is something that's supposed to containing human text. */
+ if (fts_header_has_language(hdr->name))
+ ctx->cur_user_lang = NULL;
+ else {
+ fts_mail_build_ctx_set_lang(ctx,
+ fts_user_get_data_lang(ctx->update_ctx->backend->ns->user));
+ }
+}
+
+static int fts_build_mail_header(struct fts_mail_build_context *ctx,
+ const struct message_block *block)
+{
+ const struct message_header_line *hdr = block->hdr;
+ struct fts_backend_build_key key;
+ int ret;
+
+ if (hdr->eoh)
+ return 0;
+
+ /* hdr->full_value is always set because we get the block from
+ message_decoder */
+ i_zero(&key);
+ key.uid = ctx->mail->uid;
+ key.type = block->part->physical_pos == 0 ?
+ FTS_BACKEND_BUILD_KEY_HDR : FTS_BACKEND_BUILD_KEY_MIME_HDR;
+ key.part = block->part;
+ key.hdr_name = hdr->name;
+
+ if ((ctx->update_ctx->backend->flags &
+ FTS_BACKEND_FLAG_TOKENIZED_INPUT) != 0)
+ fts_build_tokenized_hdr_update_lang(ctx, hdr);
+
+ if (!fts_backend_update_set_build_key(ctx->update_ctx, &key))
+ return 0;
+
+ if (!message_header_is_address(hdr->name)) {
+ /* regular unstructured header */
+ ret = fts_build_unstructured_header(ctx, hdr);
+ } else T_BEGIN {
+ /* message address. normalize it to give better
+ search results. */
+ struct message_address *addr;
+ string_t *str;
+
+ addr = message_address_parse(pool_datastack_create(),
+ hdr->full_value,
+ hdr->full_value_len,
+ UINT_MAX, 0);
+ str = t_str_new(hdr->full_value_len);
+ message_address_write(str, addr);
+
+ ret = fts_build_data(ctx, str_data(str), str_len(str), TRUE);
+ } T_END;
+
+ if ((ctx->update_ctx->backend->flags &
+ FTS_BACKEND_FLAG_TOKENIZED_INPUT) != 0) {
+ /* index the header name itself using data-language. */
+ struct fts_user_language *prev_lang = ctx->cur_user_lang;
+
+ fts_mail_build_ctx_set_lang(ctx,
+ fts_user_get_data_lang(ctx->update_ctx->backend->ns->user));
+ key.hdr_name = "";
+ if (fts_backend_update_set_build_key(ctx->update_ctx, &key)) {
+ if (fts_build_data(ctx, (const void *)hdr->name,
+ strlen(hdr->name), TRUE) < 0)
+ ret = -1;
+ }
+ fts_mail_build_ctx_set_lang(ctx, prev_lang);
+ }
+ return ret;
+}
+
+static bool
+fts_build_body_begin(struct fts_mail_build_context *ctx,
+ struct message_part *part, bool *binary_body_r)
+{
+ struct mail_storage *storage;
+ struct fts_parser_context parser_context;
+ struct fts_backend_build_key key;
+
+ i_assert(ctx->body_parser == NULL);
+
+ *binary_body_r = FALSE;
+ i_zero(&key);
+ key.uid = ctx->mail->uid;
+ key.part = part;
+
+ i_zero(&parser_context);
+ parser_context.content_type = ctx->content_type != NULL ?
+ ctx->content_type : "text/plain";
+ if (str_begins(parser_context.content_type, "multipart/")) {
+ /* multiparts are never indexed, only their contents */
+ return FALSE;
+ }
+ storage = mailbox_get_storage(ctx->mail->box);
+ parser_context.user = mail_storage_get_user(storage);
+ parser_context.content_disposition = ctx->content_disposition;
+
+ if (fts_parser_init(&parser_context, &ctx->body_parser)) {
+ /* extract text using the the returned parser */
+ *binary_body_r = TRUE;
+ key.type = FTS_BACKEND_BUILD_KEY_BODY_PART;
+ } else if (str_begins(parser_context.content_type, "text/") ||
+ str_begins(parser_context.content_type, "message/")) {
+ /* text body parts */
+ key.type = FTS_BACKEND_BUILD_KEY_BODY_PART;
+ ctx->body_parser = fts_parser_text_init();
+ } else {
+ /* possibly binary */
+ if ((ctx->update_ctx->backend->flags &
+ FTS_BACKEND_FLAG_BINARY_MIME_PARTS) == 0)
+ return FALSE;
+ *binary_body_r = TRUE;
+ key.type = FTS_BACKEND_BUILD_KEY_BODY_PART_BINARY;
+ }
+ key.body_content_type = parser_context.content_type;
+ key.body_content_disposition = ctx->content_disposition;
+ ctx->cur_user_lang = NULL;
+ if (!fts_backend_update_set_build_key(ctx->update_ctx, &key)) {
+ if (ctx->body_parser != NULL)
+ (void)fts_parser_deinit(&ctx->body_parser, NULL);
+ return FALSE;
+ }
+ return TRUE;
+}
+
+static int
+fts_build_add_tokens_with_filter(struct fts_mail_build_context *ctx,
+ const unsigned char *data, size_t size)
+{
+ struct fts_tokenizer *tokenizer = ctx->cur_user_lang->index_tokenizer;
+ struct fts_filter *filter = ctx->cur_user_lang->filter;
+ const char *token, *error;
+ int ret = 1, ret2;
+
+ while (ret > 0) T_BEGIN {
+ ret = ret2 = fts_tokenizer_next(tokenizer, data, size, &token, &error);
+ if (ret2 > 0 && filter != NULL)
+ ret2 = fts_filter_filter(filter, &token, &error);
+ if (ret2 < 0) {
+ mail_set_critical(ctx->mail,
+ "fts: Couldn't create indexable tokens: %s",
+ error);
+ }
+ if (ret2 > 0) {
+ if (fts_backend_update_build_more(ctx->update_ctx,
+ (const void *)token,
+ strlen(token)) < 0) {
+ mail_storage_set_internal_error(ctx->mail->box->storage);
+ ret = -1;
+ }
+ }
+ } T_END;
+ return ret;
+}
+
+static int
+fts_detect_language(struct fts_mail_build_context *ctx,
+ const unsigned char *data, size_t size, bool last,
+ const struct fts_language **lang_r)
+{
+ struct mail_user *user = ctx->update_ctx->backend->ns->user;
+ struct fts_language_list *lang_list = fts_user_get_language_list(user);
+ const struct fts_language *lang;
+ const char *error;
+
+ switch (fts_language_detect(lang_list, data, size, &lang, &error)) {
+ case FTS_LANGUAGE_RESULT_SHORT:
+ /* save the input so far and try again later */
+ buffer_append(ctx->pending_input, data, size);
+ if (last) {
+ /* we've run out of data. use the default language. */
+ *lang_r = fts_language_list_get_first(lang_list);
+ return 1;
+ }
+ return 0;
+ case FTS_LANGUAGE_RESULT_UNKNOWN:
+ /* use the default language */
+ *lang_r = fts_language_list_get_first(lang_list);
+ return 1;
+ case FTS_LANGUAGE_RESULT_OK:
+ *lang_r = lang;
+ return 1;
+ case FTS_LANGUAGE_RESULT_ERROR:
+ /* internal language detection library failure
+ (e.g. invalid config). don't index anything. */
+ mail_set_critical(ctx->mail,
+ "Language detection library initialization failed: %s",
+ error);
+ return -1;
+ default:
+ i_unreached();
+ }
+}
+
+static int
+fts_build_tokenized(struct fts_mail_build_context *ctx,
+ const unsigned char *data, size_t size, bool last)
+{
+ struct mail_user *user = ctx->update_ctx->backend->ns->user;
+ const struct fts_language *lang;
+ int ret;
+
+ if (ctx->cur_user_lang != NULL) {
+ /* we already have a language */
+ } else if ((ret = fts_detect_language(ctx, data, size, last, &lang)) < 0) {
+ return -1;
+ } else if (ret == 0) {
+ /* wait for more data */
+ return 0;
+ } else {
+ fts_mail_build_ctx_set_lang(ctx, fts_user_language_find(user, lang));
+
+ if (ctx->pending_input->used > 0) {
+ if (fts_build_add_tokens_with_filter(ctx,
+ ctx->pending_input->data,
+ ctx->pending_input->used) < 0)
+ return -1;
+ buffer_set_used_size(ctx->pending_input, 0);
+ }
+ }
+ if (fts_build_add_tokens_with_filter(ctx, data, size) < 0)
+ return -1;
+ if (last) {
+ if (fts_build_add_tokens_with_filter(ctx, NULL, 0) < 0)
+ return -1;
+ }
+ return 0;
+}
+
+static int
+fts_build_full_words(struct fts_mail_build_context *ctx,
+ const unsigned char *data, size_t size, bool last)
+{
+ size_t i;
+
+ /* we'll need to send only full words to the backend */
+
+ if (ctx->word_buf != NULL && ctx->word_buf->used > 0) {
+ /* continuing previous word */
+ for (i = 0; i < size; i++) {
+ if (IS_WORD_WHITESPACE(data[i]))
+ break;
+ }
+ buffer_append(ctx->word_buf, data, i);
+ data += i;
+ size -= i;
+ if (size == 0 && ctx->word_buf->used < MAX_WORD_SIZE && !last) {
+ /* word is still not finished */
+ return 0;
+ }
+ /* we have a full word, index it */
+ if (fts_backend_update_build_more(ctx->update_ctx,
+ ctx->word_buf->data,
+ ctx->word_buf->used) < 0) {
+ mail_storage_set_internal_error(ctx->mail->box->storage);
+ return -1;
+ }
+ buffer_set_used_size(ctx->word_buf, 0);
+ }
+
+ /* find the boundary for last word */
+ if (last)
+ i = size;
+ else {
+ for (i = size; i > 0; i--) {
+ if (IS_WORD_WHITESPACE(data[i-1]))
+ break;
+ }
+ }
+
+ if (fts_backend_update_build_more(ctx->update_ctx, data, i) < 0) {
+ mail_storage_set_internal_error(ctx->mail->box->storage);
+ return -1;
+ }
+
+ if (i < size) {
+ if (ctx->word_buf == NULL) {
+ ctx->word_buf =
+ buffer_create_dynamic(default_pool, 128);
+ }
+ buffer_append(ctx->word_buf, data + i, size - i);
+ }
+ return 0;
+}
+
+static int fts_build_data(struct fts_mail_build_context *ctx,
+ const unsigned char *data, size_t size, bool last)
+{
+ if ((ctx->update_ctx->backend->flags &
+ FTS_BACKEND_FLAG_TOKENIZED_INPUT) != 0) {
+ return fts_build_tokenized(ctx, data, size, last);
+ } else if ((ctx->update_ctx->backend->flags &
+ FTS_BACKEND_FLAG_BUILD_FULL_WORDS) != 0) {
+ return fts_build_full_words(ctx, data, size, last);
+ } else {
+ if (fts_backend_update_build_more(ctx->update_ctx, data, size) < 0) {
+ mail_storage_set_internal_error(ctx->mail->box->storage);
+ return -1;
+ }
+ return 0;
+ }
+}
+
+static int fts_build_body_block(struct fts_mail_build_context *ctx,
+ const struct message_block *block, bool last)
+{
+ i_assert(block->hdr == NULL);
+
+ return fts_build_data(ctx, block->data, block->size, last);
+}
+
+static int fts_body_parser_finish(struct fts_mail_build_context *ctx,
+ const char **retriable_err_msg_r,
+ bool *may_need_retry_r)
+{
+ struct message_block block;
+ const char *retriable_error;
+ int ret = 0;
+ int deinit_ret;
+ *may_need_retry_r = FALSE;
+
+ do {
+ i_zero(&block);
+ fts_parser_more(ctx->body_parser, &block);
+ if (fts_build_body_block(ctx, &block, FALSE) < 0) {
+ ret = -1;
+ break;
+ }
+ } while (block.size > 0);
+
+ deinit_ret = fts_parser_deinit(&ctx->body_parser, &retriable_error);
+ if (ret < 0) {
+ /* indexing already failed - we don't want to retry
+ in any case */
+ return -1;
+ }
+
+ if (deinit_ret == 0) {
+ /* retry the parsing */
+ *may_need_retry_r = TRUE;
+ *retriable_err_msg_r = retriable_error;
+ return -1;
+ }
+ if (deinit_ret < 0) {
+ mail_storage_set_internal_error(ctx->mail->box->storage);
+ return -1;
+ }
+ return 0;
+}
+
+static void
+load_header_filter(const char *key, struct fts_backend *backend,
+ ARRAY_TYPE(const_string) list, bool *matches_all_r)
+{
+ const char *str = mail_user_plugin_getenv(backend->ns->user, key);
+
+ *matches_all_r = FALSE;
+ if (str == NULL || *str == '\0')
+ return;
+
+ char **entries = p_strsplit_spaces(backend->header_filters.pool, str, " ");
+ for (char **entry = entries; *entry != NULL; ++entry) {
+ const char *value = str_lcase(*entry);
+ array_push_back(&list, &value);
+ if (*value == '*') {
+ *matches_all_r = TRUE;
+ break;
+ }
+ }
+ array_sort(&list, i_strcmp_p);
+}
+
+static struct fts_header_filters *
+load_header_filters(struct fts_backend *backend)
+{
+ struct fts_header_filters *filters = &backend->header_filters;
+ if (!filters->loaded) {
+ bool match_all;
+
+ /* match_all return ignored in includes */
+ load_header_filter("fts_header_includes", backend,
+ filters->includes, &match_all);
+
+ load_header_filter("fts_header_excludes", backend,
+ filters->excludes, &match_all);
+ filters->loaded = TRUE;
+ filters->exclude_is_default = match_all;
+ }
+ return filters;
+}
+
+/* This performs comparison between two strings, where the second one can end
+ * with the wildcard '*'. When the match reaches a '*' on the pitem side, zero
+ * (match) is returned regardles of the remaining characters.
+ *
+ * The function obeys the same lexicographic order as i_strcmp_p() and
+ * strcmp(), which is the reason for the casts to unsigned before comparing.
+ */
+static int ATTR_PURE
+header_prefix_cmp(const char *const *pkey, const char *const *pitem)
+{
+ const char *key = *pkey;
+ const char *item = *pitem;
+
+ while (*key == *item && *key != '\0') key++, item++;
+ return item[0] == '*' && item[1] == '\0' ? 0 :
+ (unsigned char)*key - (unsigned char)*item;
+}
+
+static bool
+is_header_indexable(const char *header_name, struct fts_backend *backend)
+{
+ bool indexable;
+ T_BEGIN {
+ struct fts_header_filters *filters = load_header_filters(backend);
+ const char *hdr = t_str_lcase(header_name);
+
+ if (array_bsearch(&filters->includes, &hdr, header_prefix_cmp) != NULL)
+ indexable = TRUE;
+ else if (filters->exclude_is_default ||
+ array_bsearch(&filters->excludes, &hdr, header_prefix_cmp) != NULL)
+ indexable = FALSE;
+ else
+ indexable = TRUE;
+ } T_END;
+ return indexable;
+}
+
+static int
+fts_build_mail_real(struct fts_backend_update_context *update_ctx,
+ struct mail *mail,
+ const char **retriable_err_msg_r,
+ bool *may_need_retry_r)
+{
+ const struct message_parser_settings parser_set = {
+ .hdr_flags = MESSAGE_HEADER_PARSER_FLAG_CLEAN_ONELINE,
+ };
+ struct fts_mail_build_context ctx;
+ struct istream *input;
+ struct message_parser_ctx *parser;
+ struct message_decoder_context *decoder;
+ struct message_block raw_block, block;
+ struct message_part *prev_part, *parts;
+ bool skip_body = FALSE, body_part = FALSE, body_added = FALSE;
+ bool binary_body;
+ const char *error;
+ int ret;
+
+ *may_need_retry_r = FALSE;
+ if (mail_get_stream_because(mail, NULL, NULL, "fts indexing", &input) < 0) {
+ if (mail->expunged)
+ return 0;
+ mail_set_critical(mail, "Failed to read stream: %s",
+ mailbox_get_last_internal_error(mail->box, NULL));
+ return -1;
+ }
+
+ i_zero(&ctx);
+ ctx.update_ctx = update_ctx;
+ ctx.mail = mail;
+ if ((update_ctx->backend->flags & FTS_BACKEND_FLAG_TOKENIZED_INPUT) != 0)
+ ctx.pending_input = buffer_create_dynamic(default_pool, 128);
+
+ prev_part = NULL;
+ parser = message_parser_init(pool_datastack_create(), input, &parser_set);
+
+ decoder = message_decoder_init(update_ctx->normalizer, 0);
+ for (;;) {
+ ret = message_parser_parse_next_block(parser, &raw_block);
+ i_assert(ret != 0);
+ if (ret < 0) {
+ if (input->stream_errno == 0)
+ ret = 0;
+ else {
+ mail_set_critical(mail, "read(%s) failed: %s",
+ i_stream_get_name(input),
+ i_stream_get_error(input));
+ }
+ break;
+ }
+
+ if (raw_block.part != prev_part) {
+ /* body part changed. we're now parsing the end of
+ boundary, possibly followed by message epilogue */
+ if (ctx.body_parser != NULL) {
+ if (fts_body_parser_finish(&ctx, retriable_err_msg_r,
+ may_need_retry_r) < 0) {
+ ret = -1;
+ break;
+ }
+ }
+ message_decoder_set_return_binary(decoder, FALSE);
+ fts_backend_update_unset_build_key(update_ctx);
+ prev_part = raw_block.part;
+ i_free_and_null(ctx.content_type);
+ i_free_and_null(ctx.content_disposition);
+
+ if (raw_block.size != 0) {
+ /* multipart. skip until beginning of next
+ part's headers */
+ skip_body = TRUE;
+ }
+ }
+
+ if (raw_block.hdr != NULL) {
+ /* always handle headers */
+ } else if (raw_block.size == 0) {
+ /* end of headers */
+ skip_body = !fts_build_body_begin(&ctx, raw_block.part,
+ &binary_body);
+ if (binary_body)
+ message_decoder_set_return_binary(decoder, TRUE);
+ body_part = TRUE;
+ } else {
+ if (skip_body)
+ continue;
+ }
+
+ if (!message_decoder_decode_next_block(decoder, &raw_block,
+ &block))
+ continue;
+
+ if (block.hdr != NULL) {
+ fts_parse_mail_header(&ctx, &raw_block);
+ if (is_header_indexable(block.hdr->name, update_ctx->backend) &&
+ fts_build_mail_header(&ctx, &block) < 0) {
+ ret = -1;
+ break;
+ }
+ } else if (block.size == 0) {
+ /* end of headers */
+ } else {
+ i_assert(body_part);
+ if (ctx.body_parser != NULL)
+ fts_parser_more(ctx.body_parser, &block);
+ if (fts_build_body_block(&ctx, &block, FALSE) < 0) {
+ ret = -1;
+ break;
+ }
+ body_added = TRUE;
+ }
+ }
+ if (ctx.body_parser != NULL) {
+ if (ret == 0)
+ ret = fts_body_parser_finish(&ctx, retriable_err_msg_r,
+ may_need_retry_r);
+ else
+ (void)fts_parser_deinit(&ctx.body_parser, NULL);
+ }
+ if (ret == 0 && body_part && !skip_body && !body_added) {
+ /* make sure body is added even when it doesn't exist */
+ block.data = NULL; block.size = 0;
+ ret = fts_build_body_block(&ctx, &block, TRUE);
+ }
+ if (message_parser_deinit_from_parts(&parser, &parts, &error) < 0)
+ index_mail_set_message_parts_corrupted(mail, error);
+ message_decoder_deinit(&decoder);
+ i_free(ctx.content_type);
+ i_free(ctx.content_disposition);
+ buffer_free(&ctx.word_buf);
+ buffer_free(&ctx.pending_input);
+ return ret < 0 ? -1 : 1;
+}
+
+int fts_build_mail(struct fts_backend_update_context *update_ctx,
+ struct mail *mail)
+{
+ int ret;
+ /* Number of attempts to be taken if retry is needed */
+ unsigned int attempts = 2;
+ const char *retriable_err_msg;
+ bool may_need_retry;
+
+ T_BEGIN {
+ while ((ret = fts_build_mail_real(update_ctx, mail,
+ &retriable_err_msg,
+ &may_need_retry)) < 0 &&
+ may_need_retry) {
+ if (--attempts == 0) {
+ /* Log this as info instead of as error,
+ because e.g. Tika doesn't differentiate
+ between temporary errors and invalid
+ document input. */
+ i_info("%s - ignoring", retriable_err_msg);
+ ret = 0;
+ break;
+ }
+ }
+ } T_END;
+ return ret;
+}