summaryrefslogtreecommitdiffstats
path: root/src/plugins/fts/fts-user.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins/fts/fts-user.c')
-rw-r--r--src/plugins/fts/fts-user.c412
1 files changed, 412 insertions, 0 deletions
diff --git a/src/plugins/fts/fts-user.c b/src/plugins/fts/fts-user.c
new file mode 100644
index 0000000..d5ce916
--- /dev/null
+++ b/src/plugins/fts/fts-user.c
@@ -0,0 +1,412 @@
+/* Copyright (c) 2015-2018 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "module-context.h"
+#include "mail-user.h"
+#include "mail-storage-private.h"
+#include "mailbox-match-plugin.h"
+#include "fts-language.h"
+#include "fts-filter.h"
+#include "fts-tokenizer.h"
+#include "fts-user.h"
+
+#define FTS_USER_CONTEXT(obj) \
+ MODULE_CONTEXT(obj, fts_user_module)
+
+struct fts_user {
+ union mail_user_module_context module_ctx;
+ int refcount;
+
+ struct fts_language_list *lang_list;
+ struct fts_user_language *data_lang;
+ ARRAY_TYPE(fts_user_language) languages;
+
+ struct mailbox_match_plugin *autoindex_exclude;
+};
+
+static MODULE_CONTEXT_DEFINE_INIT(fts_user_module,
+ &mail_user_module_register);
+
+static const char *const *str_keyvalues_to_array(const char *str)
+{
+ const char *key, *value, *const *keyvalues;
+ ARRAY_TYPE(const_string) arr;
+ unsigned int i;
+
+ if (str == NULL)
+ return NULL;
+
+ t_array_init(&arr, 8);
+ keyvalues = t_strsplit_spaces(str, " ");
+ for (i = 0; keyvalues[i] != NULL; i++) {
+ value = strchr(keyvalues[i], '=');
+ if (value != NULL)
+ key = t_strdup_until(keyvalues[i], value++);
+ else {
+ key = keyvalues[i];
+ value = "";
+ }
+ array_push_back(&arr, &key);
+ array_push_back(&arr, &value);
+ }
+ array_append_zero(&arr);
+ return array_front(&arr);
+}
+
+static int
+fts_user_init_languages(struct mail_user *user, struct fts_user *fuser,
+ const char **error_r)
+{
+ const char *languages, *unknown;
+ const char *lang_config[3] = {NULL, NULL, NULL};
+
+ languages = mail_user_plugin_getenv(user, "fts_languages");
+ if (languages == NULL) {
+ *error_r = "fts_languages setting is missing";
+ return -1;
+ }
+
+ lang_config[1] = mail_user_plugin_getenv(user, "fts_language_config");
+ if (lang_config[1] != NULL)
+ lang_config[0] = "fts_language_config";
+ if (fts_language_list_init(lang_config, &fuser->lang_list, error_r) < 0)
+ return -1;
+
+ if (!fts_language_list_add_names(fuser->lang_list, languages, &unknown)) {
+ *error_r = t_strdup_printf(
+ "fts_languages: Unknown language '%s'", unknown);
+ return -1;
+ }
+ if (array_count(fts_language_list_get_all(fuser->lang_list)) == 0) {
+ *error_r = "fts_languages setting is empty";
+ return -1;
+ }
+ return 0;
+}
+
+static int
+fts_user_create_filters(struct mail_user *user, const struct fts_language *lang,
+ struct fts_filter **filter_r, const char **error_r)
+{
+ const struct fts_filter *filter_class;
+ struct fts_filter *filter = NULL, *parent = NULL;
+ const char *filters_key, *const *filters, *filter_set_name;
+ const char *str, *error, *set_key;
+ unsigned int i;
+ int ret = 0;
+
+ /* try to get the language-specific filters first */
+ filters_key = t_strconcat("fts_filters_", lang->name, NULL);
+ str = mail_user_plugin_getenv(user, filters_key);
+ if (str == NULL) {
+ /* fallback to global filters */
+ filters_key = "fts_filters";
+ str = mail_user_plugin_getenv(user, filters_key);
+ if (str == NULL) {
+ /* No filters */
+ *filter_r = NULL;
+ return 0;
+ }
+ }
+
+ filters = t_strsplit_spaces(str, " ");
+ for (i = 0; filters[i] != NULL; i++) {
+ filter_class = fts_filter_find(filters[i]);
+ if (filter_class == NULL) {
+ *error_r = t_strdup_printf("%s: Unknown filter '%s'",
+ filters_key, filters[i]);
+ ret = -1;
+ break;
+ }
+
+ /* try the language-specific setting first */
+ filter_set_name = t_str_replace(filters[i], '-', '_');
+ set_key = t_strdup_printf("fts_filter_%s_%s",
+ lang->name, filter_set_name);
+ str = mail_user_plugin_getenv(user, set_key);
+ if (str == NULL) {
+ set_key = t_strdup_printf("fts_filter_%s", filter_set_name);
+ str = mail_user_plugin_getenv(user, set_key);
+ }
+
+ if (fts_filter_create(filter_class, parent, lang,
+ str_keyvalues_to_array(str),
+ &filter, &error) < 0) {
+ *error_r = t_strdup_printf("%s: %s", set_key, error);
+ ret = -1;
+ break;
+ }
+ if (parent != NULL)
+ fts_filter_unref(&parent);
+ parent = filter;
+ }
+ if (ret < 0) {
+ if (parent != NULL)
+ fts_filter_unref(&parent);
+ return -1;
+ }
+ *filter_r = filter;
+ return 0;
+}
+
+static int
+fts_user_create_tokenizer(struct mail_user *user,
+ const struct fts_language *lang,
+ struct fts_tokenizer **tokenizer_r, bool search,
+ const char **error_r)
+{
+ const struct fts_tokenizer *tokenizer_class;
+ struct fts_tokenizer *tokenizer = NULL, *parent = NULL;
+ const char *tokenizers_key, *const *tokenizers, *tokenizer_set_name;
+ const char *str, *error, *set_key;
+ unsigned int i;
+ int ret = 0;
+
+ tokenizers_key = t_strconcat("fts_tokenizers_", lang->name, NULL);
+ str = mail_user_plugin_getenv(user, tokenizers_key);
+ if (str == NULL) {
+ str = mail_user_plugin_getenv(user, "fts_tokenizers");
+ if (str == NULL) {
+ *error_r = t_strdup_printf("%s or fts_tokenizers setting must exist", tokenizers_key);
+ return -1;
+ }
+ tokenizers_key = "fts_tokenizers";
+ }
+
+ tokenizers = t_strsplit_spaces(str, " ");
+
+ for (i = 0; tokenizers[i] != NULL; i++) {
+ tokenizer_class = fts_tokenizer_find(tokenizers[i]);
+ if (tokenizer_class == NULL) {
+ *error_r = t_strdup_printf("%s: Unknown tokenizer '%s'",
+ tokenizers_key, tokenizers[i]);
+ ret = -1;
+ break;
+ }
+
+ tokenizer_set_name = t_str_replace(tokenizers[i], '-', '_');
+ set_key = t_strdup_printf("fts_tokenizer_%s_%s", tokenizer_set_name, lang->name);
+ str = mail_user_plugin_getenv(user, set_key);
+ if (str == NULL) {
+ set_key = t_strdup_printf("fts_tokenizer_%s", tokenizer_set_name);
+ str = mail_user_plugin_getenv(user, set_key);
+ }
+
+ /* tell the tokenizers that we're tokenizing a search string
+ (instead of tokenizing indexed data) */
+ if (search)
+ str = t_strconcat("search=yes ", str, NULL);
+
+ if (fts_tokenizer_create(tokenizer_class, parent,
+ str_keyvalues_to_array(str),
+ &tokenizer, &error) < 0) {
+ *error_r = t_strdup_printf("%s: %s", set_key, error);
+ ret = -1;
+ break;
+ }
+ if (parent != NULL)
+ fts_tokenizer_unref(&parent);
+ parent = tokenizer;
+ }
+ if (ret < 0) {
+ if (parent != NULL)
+ fts_tokenizer_unref(&parent);
+ return -1;
+ }
+ *tokenizer_r = tokenizer;
+ return 0;
+}
+
+static int
+fts_user_language_init_tokenizers(struct mail_user *user,
+ struct fts_user_language *user_lang,
+ const char **error_r)
+{
+ if (fts_user_create_tokenizer(user, user_lang->lang,
+ &user_lang->index_tokenizer, FALSE,
+ error_r) < 0)
+ return -1;
+
+ if (fts_user_create_tokenizer(user, user_lang->lang,
+ &user_lang->search_tokenizer, TRUE,
+ error_r) < 0)
+ return -1;
+ return 0;
+}
+
+struct fts_user_language *
+fts_user_language_find(struct mail_user *user,
+ const struct fts_language *lang)
+{
+ struct fts_user_language *user_lang;
+ struct fts_user *fuser = FTS_USER_CONTEXT(user);
+
+ i_assert(fuser != NULL);
+ array_foreach_elem(&fuser->languages, user_lang) {
+ if (strcmp(user_lang->lang->name, lang->name) == 0)
+ return user_lang;
+ }
+ return NULL;
+}
+
+static int fts_user_language_create(struct mail_user *user,
+ struct fts_user *fuser,
+ const struct fts_language *lang,
+ const char **error_r)
+{
+ struct fts_user_language *user_lang;
+
+ user_lang = p_new(user->pool, struct fts_user_language, 1);
+ user_lang->lang = lang;
+ array_push_back(&fuser->languages, &user_lang);
+
+ if (fts_user_language_init_tokenizers(user, user_lang, error_r) < 0)
+ return -1;
+ if (fts_user_create_filters(user, lang, &user_lang->filter, error_r) < 0)
+ return -1;
+ return 0;
+}
+
+static int fts_user_languages_fill_all(struct mail_user *user,
+ struct fts_user *fuser,
+ const char **error_r)
+{
+ const struct fts_language *lang;
+
+ array_foreach_elem(fts_language_list_get_all(fuser->lang_list), lang) {
+ if (fts_user_language_create(user, fuser, lang, error_r) < 0)
+ return -1;
+ }
+ return 0;
+}
+
+static int
+fts_user_init_data_language(struct mail_user *user, struct fts_user *fuser,
+ const char **error_r)
+{
+ struct fts_user_language *user_lang;
+ const char *error;
+
+ user_lang = p_new(user->pool, struct fts_user_language, 1);
+ user_lang->lang = &fts_language_data;
+
+ if (fts_user_language_init_tokenizers(user, user_lang, error_r) < 0)
+ return -1;
+
+ if (fts_filter_create(fts_filter_lowercase, NULL, user_lang->lang, NULL,
+ &user_lang->filter, &error) < 0)
+ i_unreached();
+ i_assert(user_lang->filter != NULL);
+
+ array_push_back(&fuser->languages, &user_lang);
+
+ fuser->data_lang = user_lang;
+ return 0;
+}
+
+struct fts_language_list *fts_user_get_language_list(struct mail_user *user)
+{
+ struct fts_user *fuser = FTS_USER_CONTEXT(user);
+
+ i_assert(fuser != NULL);
+ return fuser->lang_list;
+}
+
+const ARRAY_TYPE(fts_user_language) *
+fts_user_get_all_languages(struct mail_user *user)
+{
+ struct fts_user *fuser = FTS_USER_CONTEXT(user);
+
+ i_assert(fuser != NULL);
+ return &fuser->languages;
+}
+
+struct fts_user_language *fts_user_get_data_lang(struct mail_user *user)
+{
+ struct fts_user *fuser = FTS_USER_CONTEXT(user);
+
+ i_assert(fuser != NULL);
+ return fuser->data_lang;
+}
+
+bool fts_user_autoindex_exclude(struct mailbox *box)
+{
+ struct fts_user *fuser = FTS_USER_CONTEXT(box->storage->user);
+
+ return mailbox_match_plugin_exclude(fuser->autoindex_exclude, box);
+}
+
+static void fts_user_language_free(struct fts_user_language *user_lang)
+{
+ if (user_lang->filter != NULL)
+ fts_filter_unref(&user_lang->filter);
+ if (user_lang->index_tokenizer != NULL)
+ fts_tokenizer_unref(&user_lang->index_tokenizer);
+ if (user_lang->search_tokenizer != NULL)
+ fts_tokenizer_unref(&user_lang->search_tokenizer);
+}
+
+static void fts_user_free(struct fts_user *fuser)
+{
+ struct fts_user_language *user_lang;
+
+ if (fuser->lang_list != NULL)
+ fts_language_list_deinit(&fuser->lang_list);
+
+ if (array_is_created(&fuser->languages)) {
+ array_foreach_elem(&fuser->languages, user_lang)
+ fts_user_language_free(user_lang);
+ }
+ mailbox_match_plugin_deinit(&fuser->autoindex_exclude);
+}
+
+static int
+fts_mail_user_init_libfts(struct mail_user *user, struct fts_user *fuser,
+ const char **error_r)
+{
+ p_array_init(&fuser->languages, user->pool, 4);
+
+ if (fts_user_init_languages(user, fuser, error_r) < 0 ||
+ fts_user_init_data_language(user, fuser, error_r) < 0)
+ return -1;
+ if (fts_user_languages_fill_all(user, fuser, error_r) < 0)
+ return -1;
+ return 0;
+}
+
+int fts_mail_user_init(struct mail_user *user, bool initialize_libfts,
+ const char **error_r)
+{
+ struct fts_user *fuser = FTS_USER_CONTEXT(user);
+
+ if (fuser != NULL) {
+ /* multiple fts plugins are loaded */
+ fuser->refcount++;
+ return 0;
+ }
+
+ fuser = p_new(user->pool, struct fts_user, 1);
+ fuser->refcount = 1;
+ if (initialize_libfts) {
+ if (fts_mail_user_init_libfts(user, fuser, error_r) < 0) {
+ fts_user_free(fuser);
+ return -1;
+ }
+ }
+ fuser->autoindex_exclude =
+ mailbox_match_plugin_init(user, "fts_autoindex_exclude");
+
+ MODULE_CONTEXT_SET(user, fts_user_module, fuser);
+ return 0;
+}
+
+void fts_mail_user_deinit(struct mail_user *user)
+{
+ struct fts_user *fuser = FTS_USER_CONTEXT(user);
+
+ if (fuser != NULL) {
+ i_assert(fuser->refcount > 0);
+ if (--fuser->refcount == 0)
+ fts_user_free(fuser);
+ }
+}