diff options
Diffstat (limited to 'src/plugins/fts/fts-user.c')
-rw-r--r-- | src/plugins/fts/fts-user.c | 412 |
1 files changed, 412 insertions, 0 deletions
diff --git a/src/plugins/fts/fts-user.c b/src/plugins/fts/fts-user.c new file mode 100644 index 0000000..d5ce916 --- /dev/null +++ b/src/plugins/fts/fts-user.c @@ -0,0 +1,412 @@ +/* Copyright (c) 2015-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "module-context.h" +#include "mail-user.h" +#include "mail-storage-private.h" +#include "mailbox-match-plugin.h" +#include "fts-language.h" +#include "fts-filter.h" +#include "fts-tokenizer.h" +#include "fts-user.h" + +#define FTS_USER_CONTEXT(obj) \ + MODULE_CONTEXT(obj, fts_user_module) + +struct fts_user { + union mail_user_module_context module_ctx; + int refcount; + + struct fts_language_list *lang_list; + struct fts_user_language *data_lang; + ARRAY_TYPE(fts_user_language) languages; + + struct mailbox_match_plugin *autoindex_exclude; +}; + +static MODULE_CONTEXT_DEFINE_INIT(fts_user_module, + &mail_user_module_register); + +static const char *const *str_keyvalues_to_array(const char *str) +{ + const char *key, *value, *const *keyvalues; + ARRAY_TYPE(const_string) arr; + unsigned int i; + + if (str == NULL) + return NULL; + + t_array_init(&arr, 8); + keyvalues = t_strsplit_spaces(str, " "); + for (i = 0; keyvalues[i] != NULL; i++) { + value = strchr(keyvalues[i], '='); + if (value != NULL) + key = t_strdup_until(keyvalues[i], value++); + else { + key = keyvalues[i]; + value = ""; + } + array_push_back(&arr, &key); + array_push_back(&arr, &value); + } + array_append_zero(&arr); + return array_front(&arr); +} + +static int +fts_user_init_languages(struct mail_user *user, struct fts_user *fuser, + const char **error_r) +{ + const char *languages, *unknown; + const char *lang_config[3] = {NULL, NULL, NULL}; + + languages = mail_user_plugin_getenv(user, "fts_languages"); + if (languages == NULL) { + *error_r = "fts_languages setting is missing"; + return -1; + } + + lang_config[1] = mail_user_plugin_getenv(user, "fts_language_config"); + if (lang_config[1] != NULL) + lang_config[0] = "fts_language_config"; + if (fts_language_list_init(lang_config, &fuser->lang_list, error_r) < 0) + return -1; + + if (!fts_language_list_add_names(fuser->lang_list, languages, &unknown)) { + *error_r = t_strdup_printf( + "fts_languages: Unknown language '%s'", unknown); + return -1; + } + if (array_count(fts_language_list_get_all(fuser->lang_list)) == 0) { + *error_r = "fts_languages setting is empty"; + return -1; + } + return 0; +} + +static int +fts_user_create_filters(struct mail_user *user, const struct fts_language *lang, + struct fts_filter **filter_r, const char **error_r) +{ + const struct fts_filter *filter_class; + struct fts_filter *filter = NULL, *parent = NULL; + const char *filters_key, *const *filters, *filter_set_name; + const char *str, *error, *set_key; + unsigned int i; + int ret = 0; + + /* try to get the language-specific filters first */ + filters_key = t_strconcat("fts_filters_", lang->name, NULL); + str = mail_user_plugin_getenv(user, filters_key); + if (str == NULL) { + /* fallback to global filters */ + filters_key = "fts_filters"; + str = mail_user_plugin_getenv(user, filters_key); + if (str == NULL) { + /* No filters */ + *filter_r = NULL; + return 0; + } + } + + filters = t_strsplit_spaces(str, " "); + for (i = 0; filters[i] != NULL; i++) { + filter_class = fts_filter_find(filters[i]); + if (filter_class == NULL) { + *error_r = t_strdup_printf("%s: Unknown filter '%s'", + filters_key, filters[i]); + ret = -1; + break; + } + + /* try the language-specific setting first */ + filter_set_name = t_str_replace(filters[i], '-', '_'); + set_key = t_strdup_printf("fts_filter_%s_%s", + lang->name, filter_set_name); + str = mail_user_plugin_getenv(user, set_key); + if (str == NULL) { + set_key = t_strdup_printf("fts_filter_%s", filter_set_name); + str = mail_user_plugin_getenv(user, set_key); + } + + if (fts_filter_create(filter_class, parent, lang, + str_keyvalues_to_array(str), + &filter, &error) < 0) { + *error_r = t_strdup_printf("%s: %s", set_key, error); + ret = -1; + break; + } + if (parent != NULL) + fts_filter_unref(&parent); + parent = filter; + } + if (ret < 0) { + if (parent != NULL) + fts_filter_unref(&parent); + return -1; + } + *filter_r = filter; + return 0; +} + +static int +fts_user_create_tokenizer(struct mail_user *user, + const struct fts_language *lang, + struct fts_tokenizer **tokenizer_r, bool search, + const char **error_r) +{ + const struct fts_tokenizer *tokenizer_class; + struct fts_tokenizer *tokenizer = NULL, *parent = NULL; + const char *tokenizers_key, *const *tokenizers, *tokenizer_set_name; + const char *str, *error, *set_key; + unsigned int i; + int ret = 0; + + tokenizers_key = t_strconcat("fts_tokenizers_", lang->name, NULL); + str = mail_user_plugin_getenv(user, tokenizers_key); + if (str == NULL) { + str = mail_user_plugin_getenv(user, "fts_tokenizers"); + if (str == NULL) { + *error_r = t_strdup_printf("%s or fts_tokenizers setting must exist", tokenizers_key); + return -1; + } + tokenizers_key = "fts_tokenizers"; + } + + tokenizers = t_strsplit_spaces(str, " "); + + for (i = 0; tokenizers[i] != NULL; i++) { + tokenizer_class = fts_tokenizer_find(tokenizers[i]); + if (tokenizer_class == NULL) { + *error_r = t_strdup_printf("%s: Unknown tokenizer '%s'", + tokenizers_key, tokenizers[i]); + ret = -1; + break; + } + + tokenizer_set_name = t_str_replace(tokenizers[i], '-', '_'); + set_key = t_strdup_printf("fts_tokenizer_%s_%s", tokenizer_set_name, lang->name); + str = mail_user_plugin_getenv(user, set_key); + if (str == NULL) { + set_key = t_strdup_printf("fts_tokenizer_%s", tokenizer_set_name); + str = mail_user_plugin_getenv(user, set_key); + } + + /* tell the tokenizers that we're tokenizing a search string + (instead of tokenizing indexed data) */ + if (search) + str = t_strconcat("search=yes ", str, NULL); + + if (fts_tokenizer_create(tokenizer_class, parent, + str_keyvalues_to_array(str), + &tokenizer, &error) < 0) { + *error_r = t_strdup_printf("%s: %s", set_key, error); + ret = -1; + break; + } + if (parent != NULL) + fts_tokenizer_unref(&parent); + parent = tokenizer; + } + if (ret < 0) { + if (parent != NULL) + fts_tokenizer_unref(&parent); + return -1; + } + *tokenizer_r = tokenizer; + return 0; +} + +static int +fts_user_language_init_tokenizers(struct mail_user *user, + struct fts_user_language *user_lang, + const char **error_r) +{ + if (fts_user_create_tokenizer(user, user_lang->lang, + &user_lang->index_tokenizer, FALSE, + error_r) < 0) + return -1; + + if (fts_user_create_tokenizer(user, user_lang->lang, + &user_lang->search_tokenizer, TRUE, + error_r) < 0) + return -1; + return 0; +} + +struct fts_user_language * +fts_user_language_find(struct mail_user *user, + const struct fts_language *lang) +{ + struct fts_user_language *user_lang; + struct fts_user *fuser = FTS_USER_CONTEXT(user); + + i_assert(fuser != NULL); + array_foreach_elem(&fuser->languages, user_lang) { + if (strcmp(user_lang->lang->name, lang->name) == 0) + return user_lang; + } + return NULL; +} + +static int fts_user_language_create(struct mail_user *user, + struct fts_user *fuser, + const struct fts_language *lang, + const char **error_r) +{ + struct fts_user_language *user_lang; + + user_lang = p_new(user->pool, struct fts_user_language, 1); + user_lang->lang = lang; + array_push_back(&fuser->languages, &user_lang); + + if (fts_user_language_init_tokenizers(user, user_lang, error_r) < 0) + return -1; + if (fts_user_create_filters(user, lang, &user_lang->filter, error_r) < 0) + return -1; + return 0; +} + +static int fts_user_languages_fill_all(struct mail_user *user, + struct fts_user *fuser, + const char **error_r) +{ + const struct fts_language *lang; + + array_foreach_elem(fts_language_list_get_all(fuser->lang_list), lang) { + if (fts_user_language_create(user, fuser, lang, error_r) < 0) + return -1; + } + return 0; +} + +static int +fts_user_init_data_language(struct mail_user *user, struct fts_user *fuser, + const char **error_r) +{ + struct fts_user_language *user_lang; + const char *error; + + user_lang = p_new(user->pool, struct fts_user_language, 1); + user_lang->lang = &fts_language_data; + + if (fts_user_language_init_tokenizers(user, user_lang, error_r) < 0) + return -1; + + if (fts_filter_create(fts_filter_lowercase, NULL, user_lang->lang, NULL, + &user_lang->filter, &error) < 0) + i_unreached(); + i_assert(user_lang->filter != NULL); + + array_push_back(&fuser->languages, &user_lang); + + fuser->data_lang = user_lang; + return 0; +} + +struct fts_language_list *fts_user_get_language_list(struct mail_user *user) +{ + struct fts_user *fuser = FTS_USER_CONTEXT(user); + + i_assert(fuser != NULL); + return fuser->lang_list; +} + +const ARRAY_TYPE(fts_user_language) * +fts_user_get_all_languages(struct mail_user *user) +{ + struct fts_user *fuser = FTS_USER_CONTEXT(user); + + i_assert(fuser != NULL); + return &fuser->languages; +} + +struct fts_user_language *fts_user_get_data_lang(struct mail_user *user) +{ + struct fts_user *fuser = FTS_USER_CONTEXT(user); + + i_assert(fuser != NULL); + return fuser->data_lang; +} + +bool fts_user_autoindex_exclude(struct mailbox *box) +{ + struct fts_user *fuser = FTS_USER_CONTEXT(box->storage->user); + + return mailbox_match_plugin_exclude(fuser->autoindex_exclude, box); +} + +static void fts_user_language_free(struct fts_user_language *user_lang) +{ + if (user_lang->filter != NULL) + fts_filter_unref(&user_lang->filter); + if (user_lang->index_tokenizer != NULL) + fts_tokenizer_unref(&user_lang->index_tokenizer); + if (user_lang->search_tokenizer != NULL) + fts_tokenizer_unref(&user_lang->search_tokenizer); +} + +static void fts_user_free(struct fts_user *fuser) +{ + struct fts_user_language *user_lang; + + if (fuser->lang_list != NULL) + fts_language_list_deinit(&fuser->lang_list); + + if (array_is_created(&fuser->languages)) { + array_foreach_elem(&fuser->languages, user_lang) + fts_user_language_free(user_lang); + } + mailbox_match_plugin_deinit(&fuser->autoindex_exclude); +} + +static int +fts_mail_user_init_libfts(struct mail_user *user, struct fts_user *fuser, + const char **error_r) +{ + p_array_init(&fuser->languages, user->pool, 4); + + if (fts_user_init_languages(user, fuser, error_r) < 0 || + fts_user_init_data_language(user, fuser, error_r) < 0) + return -1; + if (fts_user_languages_fill_all(user, fuser, error_r) < 0) + return -1; + return 0; +} + +int fts_mail_user_init(struct mail_user *user, bool initialize_libfts, + const char **error_r) +{ + struct fts_user *fuser = FTS_USER_CONTEXT(user); + + if (fuser != NULL) { + /* multiple fts plugins are loaded */ + fuser->refcount++; + return 0; + } + + fuser = p_new(user->pool, struct fts_user, 1); + fuser->refcount = 1; + if (initialize_libfts) { + if (fts_mail_user_init_libfts(user, fuser, error_r) < 0) { + fts_user_free(fuser); + return -1; + } + } + fuser->autoindex_exclude = + mailbox_match_plugin_init(user, "fts_autoindex_exclude"); + + MODULE_CONTEXT_SET(user, fts_user_module, fuser); + return 0; +} + +void fts_mail_user_deinit(struct mail_user *user) +{ + struct fts_user *fuser = FTS_USER_CONTEXT(user); + + if (fuser != NULL) { + i_assert(fuser->refcount > 0); + if (--fuser->refcount == 0) + fts_user_free(fuser); + } +} |