diff options
Diffstat (limited to 'src/libserver/symcache/symcache_internal.hxx')
-rw-r--r-- | src/libserver/symcache/symcache_internal.hxx | 652 |
1 files changed, 652 insertions, 0 deletions
diff --git a/src/libserver/symcache/symcache_internal.hxx b/src/libserver/symcache/symcache_internal.hxx new file mode 100644 index 0000000..255a4b1 --- /dev/null +++ b/src/libserver/symcache/symcache_internal.hxx @@ -0,0 +1,652 @@ +/* + * Copyright 2023 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Internal C++ structures and classes for symcache + */ + +#ifndef RSPAMD_SYMCACHE_INTERNAL_HXX +#define RSPAMD_SYMCACHE_INTERNAL_HXX +#pragma once + +#include <cmath> +#include <cstdlib> +#include <cstdint> +#include <utility> +#include <vector> +#include <string> +#include <string_view> +#include <memory> +#include <variant> + +#include "rspamd_symcache.h" +#include "contrib/libev/ev.h" +#include "contrib/ankerl/unordered_dense.h" +#include "contrib/expected/expected.hpp" +#include "cfg_file.h" + +#include "symcache_id_list.hxx" + +#define msg_err_cache(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL, \ + "symcache", log_tag(), \ + RSPAMD_LOG_FUNC, \ + __VA_ARGS__) +#define msg_err_cache_lambda(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL, \ + "symcache", log_tag(), \ + log_func, \ + __VA_ARGS__) +#define msg_err_cache_task(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL, \ + "symcache", task->task_pool->tag.uid, \ + RSPAMD_LOG_FUNC, \ + __VA_ARGS__) +#define msg_warn_cache(...) rspamd_default_log_function(G_LOG_LEVEL_WARNING, \ + "symcache", log_tag(), \ + RSPAMD_LOG_FUNC, \ + __VA_ARGS__) +#define msg_info_cache(...) rspamd_default_log_function(G_LOG_LEVEL_INFO, \ + "symcache", log_tag(), \ + RSPAMD_LOG_FUNC, \ + __VA_ARGS__) +#define msg_debug_cache(...) rspamd_conditional_debug_fast(NULL, NULL, \ + ::rspamd::symcache::rspamd_symcache_log_id, "symcache", log_tag(), \ + RSPAMD_LOG_FUNC, \ + __VA_ARGS__) +#define msg_debug_cache_lambda(...) rspamd_conditional_debug_fast(NULL, NULL, \ + ::rspamd::symcache::rspamd_symcache_log_id, "symcache", log_tag(), \ + log_func, \ + __VA_ARGS__) +#define msg_debug_cache_task(...) rspamd_conditional_debug_fast(NULL, NULL, \ + ::rspamd::symcache::rspamd_symcache_log_id, "symcache", task->task_pool->tag.uid, \ + RSPAMD_LOG_FUNC, \ + __VA_ARGS__) +#define msg_debug_cache_task_lambda(...) rspamd_conditional_debug_fast(NULL, NULL, \ + ::rspamd::symcache::rspamd_symcache_log_id, "symcache", task->task_pool->tag.uid, \ + log_func, \ + __VA_ARGS__) + +struct lua_State; + +namespace rspamd::symcache { + +/* Defined in symcache_impl.cxx */ +extern int rspamd_symcache_log_id; + +static const std::uint8_t symcache_magic[8] = {'r', 's', 'c', 2, 0, 0, 0, 0}; + +struct symcache_header { + std::uint8_t magic[8]; + unsigned int nitems; + std::uint8_t checksum[64]; + std::uint8_t unused[128]; +}; + +struct cache_item; +using cache_item_ptr = std::shared_ptr<cache_item>; + +/** + * This structure is intended to keep the current ordering for all symbols + * It is designed to be shared among all tasks and keep references to the real + * symbols. + * If some symbol has been added or removed to the symbol cache, it will not affect + * the current order, and it will only be regenerated for the subsequent tasks. + * This allows safe and no copy sharing and keeping track of all symbols in the + * cache runtime. + */ +struct order_generation { + /* All items ordered */ + std::vector<cache_item_ptr> d; + /* Mapping from symbol name to the position in the order array */ + ankerl::unordered_dense::map<std::string_view, unsigned int> by_symbol; + /* Mapping from symbol id to the position in the order array */ + ankerl::unordered_dense::map<unsigned int, unsigned int> by_cache_id; + /* It matches cache->generation_id; if not, a fresh ordering is required */ + unsigned int generation_id; + + explicit order_generation(std::size_t nelts, unsigned id) + : generation_id(id) + { + d.reserve(nelts); + by_symbol.reserve(nelts); + by_cache_id.reserve(nelts); + } + + auto size() const -> auto + { + return d.size(); + } +}; + +using order_generation_ptr = std::shared_ptr<order_generation>; + + +struct delayed_cache_dependency { + std::string from; + std::string to; + + delayed_cache_dependency(std::string_view _from, std::string_view _to) + : from(_from), to(_to) + { + } +}; + +struct delayed_cache_condition { + std::string sym; + int cbref; + lua_State *L; + +public: + delayed_cache_condition(std::string_view sym, int cbref, lua_State *L) + : sym(sym), cbref(cbref), L(L) + { + } +}; + +class delayed_symbol_elt { +private: + std::variant<std::string, rspamd_regexp_t *> content; + +public: + /* Disable copy */ + delayed_symbol_elt() = delete; + delayed_symbol_elt(const delayed_symbol_elt &) = delete; + delayed_symbol_elt &operator=(const delayed_symbol_elt &) = delete; + /* Enable move */ + delayed_symbol_elt(delayed_symbol_elt &&other) noexcept = default; + delayed_symbol_elt &operator=(delayed_symbol_elt &&other) noexcept = default; + + explicit delayed_symbol_elt(std::string_view elt) noexcept + { + if (!elt.empty() && elt[0] == '/') { + /* Possibly regexp */ + auto *re = rspamd_regexp_new_len(elt.data(), elt.size(), nullptr, nullptr); + + if (re != nullptr) { + std::get<rspamd_regexp_t *>(content) = re; + } + else { + std::get<std::string>(content) = elt; + } + } + else { + std::get<std::string>(content) = elt; + } + } + + ~delayed_symbol_elt() + { + if (std::holds_alternative<rspamd_regexp_t *>(content)) { + rspamd_regexp_unref(std::get<rspamd_regexp_t *>(content)); + } + } + + auto matches(std::string_view what) const -> bool + { + return std::visit([&](auto &elt) { + using T = typeof(elt); + if constexpr (std::is_same_v<T, rspamd_regexp_t *>) { + if (rspamd_regexp_match(elt, what.data(), what.size(), false)) { + return true; + } + } + else if constexpr (std::is_same_v<T, std::string>) { + return elt == what; + } + + return false; + }, + content); + } + + auto to_string_view() const -> std::string_view + { + return std::visit([&](auto &elt) { + using T = typeof(elt); + if constexpr (std::is_same_v<T, rspamd_regexp_t *>) { + return std::string_view{rspamd_regexp_get_pattern(elt)}; + } + else if constexpr (std::is_same_v<T, std::string>) { + return std::string_view{elt}; + } + + return std::string_view{}; + }, + content); + } +}; + +struct delayed_symbol_elt_equal { + using is_transparent = void; + auto operator()(const delayed_symbol_elt &a, const delayed_symbol_elt &b) const + { + return a.to_string_view() == b.to_string_view(); + } + auto operator()(const delayed_symbol_elt &a, const std::string_view &b) const + { + return a.to_string_view() == b; + } + auto operator()(const std::string_view &a, const delayed_symbol_elt &b) const + { + return a == b.to_string_view(); + } +}; + +struct delayed_symbol_elt_hash { + using is_transparent = void; + auto operator()(const delayed_symbol_elt &a) const + { + return ankerl::unordered_dense::hash<std::string_view>()(a.to_string_view()); + } + auto operator()(const std::string_view &a) const + { + return ankerl::unordered_dense::hash<std::string_view>()(a); + } +}; + +class symcache { +private: + using items_ptr_vec = std::vector<cache_item *>; + /* Map indexed by symbol name: all symbols must have unique names, so this map holds ownership */ + ankerl::unordered_dense::map<std::string_view, cache_item *> items_by_symbol; + ankerl::unordered_dense::map<int, cache_item_ptr> items_by_id; + + /* Items sorted into some order */ + order_generation_ptr items_by_order; + unsigned int cur_order_gen; + + /* Specific vectors for execution/iteration */ + items_ptr_vec connfilters; + items_ptr_vec prefilters; + items_ptr_vec filters; + items_ptr_vec postfilters; + items_ptr_vec composites; + items_ptr_vec idempotent; + items_ptr_vec classifiers; + items_ptr_vec virtual_symbols; + + /* These are stored within pointer to clean up after init */ + std::unique_ptr<std::vector<delayed_cache_dependency>> delayed_deps; + std::unique_ptr<std::vector<delayed_cache_condition>> delayed_conditions; + /* Delayed statically enabled or disabled symbols */ + using delayed_symbol_names = ankerl::unordered_dense::set<delayed_symbol_elt, + delayed_symbol_elt_hash, delayed_symbol_elt_equal>; + std::unique_ptr<delayed_symbol_names> disabled_symbols; + std::unique_ptr<delayed_symbol_names> enabled_symbols; + + rspamd_mempool_t *static_pool; + std::uint64_t cksum; + double total_weight; + std::size_t stats_symbols_count; + +private: + std::uint64_t total_hits; + + struct rspamd_config *cfg; + lua_State *L; + double reload_time; + double last_profile; + +private: + int peak_cb; + int cache_id; + +private: + /* Internal methods */ + auto load_items() -> bool; + auto resort() -> void; + auto get_item_specific_vector(const cache_item &) -> items_ptr_vec &; + /* Helper for g_hash_table_foreach */ + static auto metric_connect_cb(void *k, void *v, void *ud) -> void; + +public: + explicit symcache(struct rspamd_config *cfg) + : cfg(cfg) + { + /* XXX: do we need a special pool for symcache? I don't think so */ + static_pool = cfg->cfg_pool; + reload_time = cfg->cache_reload_time; + total_hits = 1; + total_weight = 1.0; + cksum = 0xdeadbabe; + peak_cb = -1; + cache_id = rspamd_random_uint64_fast(); + L = (lua_State *) cfg->lua_state; + delayed_conditions = std::make_unique<std::vector<delayed_cache_condition>>(); + delayed_deps = std::make_unique<std::vector<delayed_cache_dependency>>(); + } + + virtual ~symcache(); + + /** + * Saves items on disk (if possible) + * @return + */ + auto save_items() const -> bool; + + /** + * Get an item by ID + * @param id + * @param resolve_parent + * @return + */ + auto get_item_by_id(int id, bool resolve_parent) const -> const cache_item *; + auto get_item_by_id_mut(int id, bool resolve_parent) const -> cache_item *; + /** + * Get an item by it's name + * @param name + * @param resolve_parent + * @return + */ + auto get_item_by_name(std::string_view name, bool resolve_parent) const -> const cache_item *; + /** + * Get an item by it's name, mutable pointer + * @param name + * @param resolve_parent + * @return + */ + auto get_item_by_name_mut(std::string_view name, bool resolve_parent) const -> cache_item *; + + /** + * Add a direct dependency + * @param id_from + * @param to + * @param virtual_id_from + * @return + */ + auto add_dependency(int id_from, std::string_view to, int virtual_id_from) -> void; + + /** + * Add a delayed dependency between symbols that will be resolved on the init stage + * @param from + * @param to + */ + auto add_delayed_dependency(std::string_view from, std::string_view to) -> void + { + if (!delayed_deps) { + delayed_deps = std::make_unique<std::vector<delayed_cache_dependency>>(); + } + + delayed_deps->emplace_back(from, to); + } + + /** + * Adds a symbol to the list of the disabled symbols + * @param sym + * @return + */ + auto disable_symbol_delayed(std::string_view sym) -> bool + { + if (!disabled_symbols) { + disabled_symbols = std::make_unique<delayed_symbol_names>(); + } + + if (!disabled_symbols->contains(sym)) { + disabled_symbols->emplace(sym); + + return true; + } + + return false; + } + + /** + * Adds a symbol to the list of the enabled symbols + * @param sym + * @return + */ + auto enable_symbol_delayed(std::string_view sym) -> bool + { + if (!enabled_symbols) { + enabled_symbols = std::make_unique<delayed_symbol_names>(); + } + + if (!enabled_symbols->contains(sym)) { + enabled_symbols->emplace(sym); + + return true; + } + + return false; + } + + /** + * Initialises the symbols cache, must be called after all symbols are added + * and the config file is loaded + */ + auto init() -> bool; + + /** + * Log helper that returns cfg checksum + * @return + */ + auto log_tag() const -> const char * + { + return cfg->checksum; + } + + /** + * Helper to return a memory pool associated with the cache + * @return + */ + auto get_pool() const + { + return static_pool; + } + + /** + * A method to add a generic symbol with a callback to couple with C API + * @param name name of the symbol, unlike C API it must be "" for callback only (compat) symbols, in this case an automatic name is generated + * @param priority + * @param func + * @param user_data + * @param flags_and_type mix of flags and type in a messy C enum + * @return id of a new symbol or -1 in case of failure + */ + auto add_symbol_with_callback(std::string_view name, + int priority, + symbol_func_t func, + void *user_data, + int flags_and_type) -> int; + /** + * A method to add a generic virtual symbol with no function associated + * @param name must have some value, or a fatal error will strike you + * @param parent_id if this param is -1 then this symbol is associated with nothing + * @param flags_and_type mix of flags and type in a messy C enum + * @return id of a new symbol or -1 in case of failure + */ + auto add_virtual_symbol(std::string_view name, int parent_id, + int flags_and_type) -> int; + + /** + * Sets a lua callback to be called on peaks in execution time + * @param cbref + */ + auto set_peak_cb(int cbref) -> void; + + /** + * Add a delayed condition for a symbol that might not be registered yet + * @param sym + * @param cbref + */ + auto add_delayed_condition(std::string_view sym, int cbref) -> void; + + /** + * Returns number of symbols that needs to be checked in statistical algorithm + * @return + */ + auto get_stats_symbols_count() const + { + return stats_symbols_count; + } + + /** + * Returns a checksum for the cache + * @return + */ + auto get_cksum() const + { + return cksum; + } + + /** + * Validate symbols in the cache + * @param strict + * @return + */ + auto validate(bool strict) -> bool; + + /** + * Returns counters for the cache + * @return + */ + auto counters() const -> ucl_object_t *; + + /** + * Adjusts stats of the cache for the periodic counter + */ + auto periodic_resort(struct ev_loop *ev_loop, double cur_time, double last_resort) -> void; + + /** + * A simple helper to get the reload time + * @return + */ + auto get_reload_time() const + { + return reload_time; + }; + + /** + * Iterate over all symbols using a specific functor + * @tparam Functor + * @param f + */ + template<typename Functor> + auto symbols_foreach(Functor f) -> void + { + for (const auto &sym_it: items_by_symbol) { + f(sym_it.second); + } + } + + /** + * Iterate over all composites using a specific functor + * @tparam Functor + * @param f + */ + template<typename Functor> + auto composites_foreach(Functor f) -> void + { + for (const auto &sym_it: composites) { + f(sym_it); + } + } + + /** + * Iterate over all composites using a specific functor + * @tparam Functor + * @param f + */ + template<typename Functor> + auto connfilters_foreach(Functor f) -> bool + { + return std::all_of(std::begin(connfilters), std::end(connfilters), + [&](const auto &sym_it) { + return f(sym_it); + }); + } + template<typename Functor> + auto prefilters_foreach(Functor f) -> bool + { + return std::all_of(std::begin(prefilters), std::end(prefilters), + [&](const auto &sym_it) { + return f(sym_it); + }); + } + template<typename Functor> + auto postfilters_foreach(Functor f) -> bool + { + return std::all_of(std::begin(postfilters), std::end(postfilters), + [&](const auto &sym_it) { + return f(sym_it); + }); + } + template<typename Functor> + auto idempotent_foreach(Functor f) -> bool + { + return std::all_of(std::begin(idempotent), std::end(idempotent), + [&](const auto &sym_it) { + return f(sym_it); + }); + } + template<typename Functor> + auto filters_foreach(Functor f) -> bool + { + return std::all_of(std::begin(filters), std::end(filters), + [&](const auto &sym_it) { + return f(sym_it); + }); + } + + /** + * Resort cache if anything has been changed since last time + * @return + */ + auto maybe_resort() -> bool; + + /** + * Returns current set of items ordered for sharing ownership + * @return + */ + auto get_cache_order() const -> auto + { + return items_by_order; + } + + /** + * Get last profile timestamp + * @return + */ + auto get_last_profile() const -> auto + { + return last_profile; + } + + /** + * Sets last profile timestamp + * @param last_profile + * @return + */ + auto set_last_profile(double last_profile) + { + symcache::last_profile = last_profile; + } + + /** + * Process settings elt identified by id + * @param elt + */ + auto process_settings_elt(struct rspamd_config_settings_elt *elt) -> void; + + /** + * Returns maximum timeout that is requested by all rules + * @return + */ + auto get_max_timeout(std::vector<std::pair<double, const cache_item *>> &elts) const -> double; +}; + + +}// namespace rspamd::symcache + +#endif//RSPAMD_SYMCACHE_INTERNAL_HXX |