summaryrefslogtreecommitdiffstats
path: root/src/plugins/fts/fts-search.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-28 09:51:24 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-28 09:51:24 +0000
commitf7548d6d28c313cf80e6f3ef89aed16a19815df1 (patch)
treea3f6f2a3f247293bee59ecd28e8cd8ceb6ca064a /src/plugins/fts/fts-search.c
parentInitial commit. (diff)
downloaddovecot-f7548d6d28c313cf80e6f3ef89aed16a19815df1.tar.xz
dovecot-f7548d6d28c313cf80e6f3ef89aed16a19815df1.zip
Adding upstream version 1:2.3.19.1+dfsg1.upstream/1%2.3.19.1+dfsg1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/plugins/fts/fts-search.c')
-rw-r--r--src/plugins/fts/fts-search.c385
1 files changed, 385 insertions, 0 deletions
diff --git a/src/plugins/fts/fts-search.c b/src/plugins/fts/fts-search.c
new file mode 100644
index 0000000..895ea59
--- /dev/null
+++ b/src/plugins/fts/fts-search.c
@@ -0,0 +1,385 @@
+/* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "array.h"
+#include "str.h"
+#include "seq-range-array.h"
+#include "mail-search.h"
+#include "fts-api-private.h"
+#include "fts-search-args.h"
+#include "fts-search-serialize.h"
+#include "fts-storage.h"
+#include "hash.h"
+
+static void
+uid_range_to_seqs(struct fts_search_context *fctx,
+ const ARRAY_TYPE(seq_range) *uid_range,
+ ARRAY_TYPE(seq_range) *seq_range)
+{
+ const struct seq_range *range;
+ unsigned int i, count;
+ uint32_t seq1, seq2;
+
+ range = array_get(uid_range, &count);
+ if (!array_is_created(seq_range))
+ p_array_init(seq_range, fctx->result_pool, count);
+ for (i = 0; i < count; i++) {
+ if (range[i].seq1 > range[i].seq2)
+ continue;
+ mailbox_get_seq_range(fctx->box, range[i].seq1, range[i].seq2,
+ &seq1, &seq2);
+ if (seq1 != 0)
+ seq_range_array_add_range(seq_range, seq1, seq2);
+ }
+}
+
+static int fts_search_lookup_level_single(struct fts_search_context *fctx,
+ struct mail_search_arg *args,
+ bool and_args)
+{
+ enum fts_lookup_flags flags = fctx->flags |
+ (and_args ? FTS_LOOKUP_FLAG_AND_ARGS : 0);
+ struct fts_search_level *level;
+ struct fts_result result;
+
+ i_zero(&result);
+ result.search_state = fctx->search_state;
+ result.pool = fctx->result_pool;
+ p_array_init(&result.definite_uids, fctx->result_pool, 32);
+ p_array_init(&result.maybe_uids, fctx->result_pool, 32);
+ p_array_init(&result.scores, fctx->result_pool, 32);
+
+ mail_search_args_reset(args, TRUE);
+ if (fts_backend_lookup(fctx->backend, fctx->box, args, flags,
+ &result) < 0)
+ return -1;
+
+ fctx->search_state = result.search_state;
+ level = array_append_space(&fctx->levels);
+ level->args_matches = buffer_create_dynamic(fctx->result_pool, 16);
+ fts_search_serialize(level->args_matches, args);
+
+ uid_range_to_seqs(fctx, &result.definite_uids, &level->definite_seqs);
+ uid_range_to_seqs(fctx, &result.maybe_uids, &level->maybe_seqs);
+ level->score_map = result.scores;
+ return 0;
+}
+
+static void
+level_scores_add_vuids(struct mailbox *box,
+ struct fts_search_level *level, struct fts_result *br)
+{
+ const struct fts_score_map *scores;
+ unsigned int i, count;
+ ARRAY_TYPE(seq_range) backend_uids;
+ ARRAY_TYPE(uint32_t) vuids_arr;
+ const uint32_t *vuids;
+ struct fts_score_map *score;
+
+ scores = array_get(&br->scores, &count);
+ t_array_init(&vuids_arr, count);
+ t_array_init(&backend_uids, 64);
+ for (i = 0; i < count; i++)
+ seq_range_array_add(&backend_uids, scores[i].uid);
+ box->virtual_vfuncs->get_virtual_uid_map(box, br->box,
+ &backend_uids, &vuids_arr);
+
+ i_assert(array_count(&vuids_arr) == array_count(&br->scores));
+ vuids = array_get(&vuids_arr, &count);
+ for (i = 0; i < count; i++) {
+ score = array_append_space(&level->score_map);
+ score->uid = vuids[i];
+ score->score = scores[i].score;
+ }
+}
+
+static int
+mailbox_cmp_fts_backend(struct mailbox *const *m1, struct mailbox *const *m2)
+{
+ struct fts_backend *b1, *b2;
+
+ b1 = fts_mailbox_backend(*m1);
+ b2 = fts_mailbox_backend(*m2);
+ if (b1 < b2)
+ return -1;
+ if (b1 > b2)
+ return 1;
+ return 0;
+}
+
+static int
+multi_add_lookup_result(struct fts_search_context *fctx,
+ struct fts_search_level *level,
+ struct mail_search_arg *args,
+ struct fts_multi_result *result)
+{
+ ARRAY_TYPE(seq_range) vuids;
+ size_t orig_size;
+ unsigned int i;
+
+ orig_size = level->args_matches->used;
+ fts_search_serialize(level->args_matches, args);
+ if (orig_size > 0) {
+ if (level->args_matches->used != orig_size * 2 ||
+ memcmp(level->args_matches->data,
+ CONST_PTR_OFFSET(level->args_matches->data,
+ orig_size), orig_size) != 0)
+ i_panic("incompatible fts backends for namespaces");
+ buffer_set_used_size(level->args_matches, orig_size);
+ }
+
+ t_array_init(&vuids, 64);
+ for (i = 0; result->box_results[i].box != NULL; i++) {
+ struct fts_result *br = &result->box_results[i];
+
+ array_clear(&vuids);
+ if (array_is_created(&br->definite_uids)) {
+ fctx->box->virtual_vfuncs->get_virtual_uids(fctx->box,
+ br->box, &br->definite_uids, &vuids);
+ }
+ uid_range_to_seqs(fctx, &vuids, &level->definite_seqs);
+
+ array_clear(&vuids);
+ if (array_is_created(&br->maybe_uids)) {
+ fctx->box->virtual_vfuncs->get_virtual_uids(fctx->box,
+ br->box, &br->maybe_uids, &vuids);
+ }
+ uid_range_to_seqs(fctx, &vuids, &level->maybe_seqs);
+
+ if (array_is_created(&br->scores))
+ level_scores_add_vuids(fctx->box, level, br);
+ }
+ return 0;
+}
+
+static int fts_search_lookup_level_multi(struct fts_search_context *fctx,
+ struct mail_search_arg *args,
+ bool and_args)
+{
+ enum fts_lookup_flags flags = fctx->flags |
+ (and_args ? FTS_LOOKUP_FLAG_AND_ARGS : 0);
+ ARRAY_TYPE(mailboxes) mailboxes_arr, tmp_mailboxes;
+ struct mailbox *const *mailboxes;
+ struct fts_backend *backend;
+ struct fts_search_level *level;
+ struct fts_multi_result result;
+ unsigned int i, j, mailbox_count;
+
+ p_array_init(&mailboxes_arr, fctx->result_pool, 8);
+ fctx->box->virtual_vfuncs->get_virtual_backend_boxes(fctx->box,
+ &mailboxes_arr, TRUE);
+ array_sort(&mailboxes_arr, mailbox_cmp_fts_backend);
+
+ i_zero(&result);
+ result.search_state = fctx->search_state;
+ result.pool = fctx->result_pool;
+
+ level = array_append_space(&fctx->levels);
+ level->args_matches = buffer_create_dynamic(fctx->result_pool, 16);
+ p_array_init(&level->score_map, fctx->result_pool, 1);
+
+ mailboxes = array_get(&mailboxes_arr, &mailbox_count);
+ t_array_init(&tmp_mailboxes, mailbox_count);
+ for (i = 0; i < mailbox_count; i = j) {
+ array_clear(&tmp_mailboxes);
+ array_push_back(&tmp_mailboxes, &mailboxes[i]);
+
+ backend = fts_mailbox_backend(mailboxes[i]);
+ for (j = i + 1; j < mailbox_count; j++) {
+ if (fts_mailbox_backend(mailboxes[j]) != backend)
+ break;
+ array_push_back(&tmp_mailboxes, &mailboxes[j]);
+ }
+ array_append_zero(&tmp_mailboxes);
+
+ mail_search_args_reset(args, TRUE);
+ if (fts_backend_lookup_multi(backend,
+ array_front(&tmp_mailboxes),
+ args, flags, &result) < 0)
+ return -1;
+
+ if (multi_add_lookup_result(fctx, level, args, &result) < 0)
+ return -1;
+ }
+ fctx->search_state = result.search_state;
+ return 0;
+}
+
+static int fts_search_lookup_level(struct fts_search_context *fctx,
+ struct mail_search_arg *args,
+ bool and_args)
+{
+ int ret;
+
+ T_BEGIN {
+ ret = !fctx->virtual_mailbox ?
+ fts_search_lookup_level_single(fctx, args, and_args) :
+ fts_search_lookup_level_multi(fctx, args, and_args);
+ } T_END;
+ if (ret < 0)
+ return -1;
+
+ for (; args != NULL; args = args->next) {
+ if (args->type != SEARCH_OR && args->type != SEARCH_SUB)
+ continue;
+
+ if (fts_search_lookup_level(fctx, args->value.subargs,
+ args->type == SEARCH_SUB) < 0)
+ return -1;
+ }
+ return 0;
+}
+
+static void
+fts_search_merge_scores_and(ARRAY_TYPE(fts_score_map) *dest,
+ const ARRAY_TYPE(fts_score_map) *src)
+{
+ struct fts_score_map *dest_map;
+ const struct fts_score_map *src_map;
+ unsigned int desti, srci, dest_count, src_count;
+
+ dest_map = array_get_modifiable(dest, &dest_count);
+ src_map = array_get(src, &src_count);
+
+ /* arg_scores are summed to current scores. we could drop UIDs that
+ don't exist in both, but that's just extra work so don't bother */
+ for (desti = srci = 0; desti < dest_count && srci < src_count;) {
+ if (dest_map[desti].uid < src_map[srci].uid)
+ desti++;
+ else if (dest_map[desti].uid > src_map[srci].uid)
+ srci++;
+ else {
+ if (dest_map[desti].score < src_map[srci].score)
+ dest_map[desti].score = src_map[srci].score;
+ desti++; srci++;
+ }
+ }
+}
+
+static void
+fts_search_merge_scores_or(ARRAY_TYPE(fts_score_map) *dest,
+ const ARRAY_TYPE(fts_score_map) *src)
+{
+ ARRAY_TYPE(fts_score_map) src2;
+ const struct fts_score_map *src_map, *src2_map;
+ unsigned int srci, src2i, src_count, src2_count;
+
+ t_array_init(&src2, array_count(dest));
+ array_append_array(&src2, dest);
+ array_clear(dest);
+
+ src_map = array_get(src, &src_count);
+ src2_map = array_get(&src2, &src2_count);
+
+ /* add any missing UIDs to current scores. if any existing UIDs have
+ lower scores than in arg_scores, increase them. */
+ for (srci = src2i = 0; srci < src_count || src2i < src2_count;) {
+ if (src2i == src2_count ||
+ src_map[srci].uid < src2_map[src2i].uid) {
+ array_push_back(dest, &src_map[srci]);
+ srci++;
+ } else if (srci == src_count ||
+ src_map[srci].uid > src2_map[src2i].uid) {
+ array_push_back(dest, &src2_map[src2i]);
+ src2i++;
+ } else {
+ i_assert(src_map[srci].uid == src2_map[src2i].uid);
+ if (src_map[srci].score > src2_map[src2i].score)
+ array_push_back(dest, &src_map[srci]);
+ else
+ array_push_back(dest, &src2_map[src2i]);
+ srci++; src2i++;
+ }
+ }
+}
+
+static void
+fts_search_merge_scores_level(struct fts_search_context *fctx,
+ struct mail_search_arg *args, unsigned int *idx,
+ bool and_args, ARRAY_TYPE(fts_score_map) *scores)
+{
+ const struct fts_search_level *level;
+ ARRAY_TYPE(fts_score_map) arg_scores;
+
+ i_assert(array_count(scores) == 0);
+
+ /*
+ The (simplified) args can look like:
+
+ A and B and (C or D) and (E or F) and ...
+ A or B or (C and D) or (E and F) or ...
+
+ The A op B part's scores are in level->scores. The child args'
+ scores are in the sub levels' scores.
+ */
+
+ level = array_idx(&fctx->levels, *idx);
+ array_append_array(scores, &level->score_map);
+
+ t_array_init(&arg_scores, 64);
+ for (; args != NULL; args = args->next) {
+ if (args->type != SEARCH_OR && args->type != SEARCH_SUB)
+ continue;
+
+ *idx += 1;
+ array_clear(&arg_scores);
+ fts_search_merge_scores_level(fctx, args->value.subargs, idx,
+ args->type == SEARCH_OR,
+ &arg_scores);
+
+ if (and_args)
+ fts_search_merge_scores_and(scores, &arg_scores);
+ else
+ fts_search_merge_scores_or(scores, &arg_scores);
+ }
+}
+
+static void fts_search_merge_scores(struct fts_search_context *fctx)
+{
+ unsigned int idx = 0;
+
+ fts_search_merge_scores_level(fctx, fctx->args->args, &idx,
+ TRUE, &fctx->scores->score_map);
+}
+
+static void fts_search_try_lookup(struct fts_search_context *fctx)
+{
+ uint32_t last_uid, seq1, seq2;
+
+ i_assert(array_count(&fctx->levels) == 0);
+ i_assert(fctx->args->simplified);
+
+ if (fts_backend_refresh(fctx->backend) < 0)
+ return;
+ if (fts_backend_get_last_uid(fctx->backend, fctx->box, &last_uid) < 0)
+ return;
+ mailbox_get_seq_range(fctx->box, last_uid+1, (uint32_t)-1,
+ &seq1, &seq2);
+ fctx->first_unindexed_seq = seq1 != 0 ? seq1 : (uint32_t)-1;
+
+ if (fctx->virtual_mailbox) {
+ hash_table_clear(fctx->last_indexed_virtual_uids, TRUE);
+ fctx->next_unindexed_seq = fctx->first_unindexed_seq;
+ }
+
+ if ((fctx->backend->flags & FTS_BACKEND_FLAG_TOKENIZED_INPUT) != 0) {
+ if (fts_search_args_expand(fctx->backend, fctx->args) < 0)
+ return;
+ }
+ fts_search_serialize(fctx->orig_matches, fctx->args->args);
+
+ if (fts_search_lookup_level(fctx, fctx->args->args, TRUE) == 0) {
+ fctx->fts_lookup_success = TRUE;
+ fts_search_merge_scores(fctx);
+ }
+
+ fts_search_deserialize(fctx->args->args, fctx->orig_matches);
+ fts_backend_lookup_done(fctx->backend);
+}
+
+void fts_search_lookup(struct fts_search_context *fctx)
+{
+ struct event_reason *reason = event_reason_begin("fts:lookup");
+ fts_search_try_lookup(fctx);
+ event_reason_end(&reason);
+}