summaryrefslogtreecommitdiffstats
path: root/src/libserver
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-10 21:30:40 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-10 21:30:40 +0000
commit133a45c109da5310add55824db21af5239951f93 (patch)
treeba6ac4c0a950a0dda56451944315d66409923918 /src/libserver
parentInitial commit. (diff)
downloadrspamd-133a45c109da5310add55824db21af5239951f93.tar.xz
rspamd-133a45c109da5310add55824db21af5239951f93.zip
Adding upstream version 3.8.1.upstream/3.8.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--src/libserver/CMakeLists.txt52
-rw-r--r--src/libserver/async_session.c364
-rw-r--r--src/libserver/async_session.h121
-rw-r--r--src/libserver/backtrace.cxx61
-rw-r--r--src/libserver/cfg_file.h889
-rw-r--r--src/libserver/cfg_file_private.h41
-rw-r--r--src/libserver/cfg_rcl.cxx4110
-rw-r--r--src/libserver/cfg_rcl.h476
-rw-r--r--src/libserver/cfg_utils.cxx2955
-rw-r--r--src/libserver/composites/composites.cxx989
-rw-r--r--src/libserver/composites/composites.h64
-rw-r--r--src/libserver/composites/composites_internal.hxx112
-rw-r--r--src/libserver/composites/composites_manager.cxx330
-rw-r--r--src/libserver/css/CMakeLists.txt9
-rw-r--r--src/libserver/css/css.cxx227
-rw-r--r--src/libserver/css/css.hxx68
-rw-r--r--src/libserver/css/css_colors_list.hxx738
-rw-r--r--src/libserver/css/css_parser.cxx892
-rw-r--r--src/libserver/css/css_parser.hxx244
-rw-r--r--src/libserver/css/css_property.cxx69
-rw-r--r--src/libserver/css/css_property.hxx172
-rw-r--r--src/libserver/css/css_rule.cxx531
-rw-r--r--src/libserver/css/css_rule.hxx153
-rw-r--r--src/libserver/css/css_rule_parser.rl27
-rw-r--r--src/libserver/css/css_selector.cxx226
-rw-r--r--src/libserver/css/css_selector.hxx134
-rw-r--r--src/libserver/css/css_selector_parser.rl27
-rw-r--r--src/libserver/css/css_style.hxx66
-rw-r--r--src/libserver/css/css_syntax.rl110
-rw-r--r--src/libserver/css/css_tokeniser.cxx836
-rw-r--r--src/libserver/css/css_tokeniser.hxx215
-rw-r--r--src/libserver/css/css_util.cxx157
-rw-r--r--src/libserver/css/css_util.hxx37
-rw-r--r--src/libserver/css/css_value.cxx449
-rw-r--r--src/libserver/css/css_value.hxx174
-rw-r--r--src/libserver/css/parse_error.hxx61
-rw-r--r--src/libserver/dkim.c3588
-rw-r--r--src/libserver/dkim.h298
-rw-r--r--src/libserver/dns.c1124
-rw-r--r--src/libserver/dns.h110
-rw-r--r--src/libserver/dynamic_cfg.c743
-rw-r--r--src/libserver/dynamic_cfg.h81
-rw-r--r--src/libserver/fuzzy_backend/fuzzy_backend.c560
-rw-r--r--src/libserver/fuzzy_backend/fuzzy_backend.h131
-rw-r--r--src/libserver/fuzzy_backend/fuzzy_backend_redis.c1666
-rw-r--r--src/libserver/fuzzy_backend/fuzzy_backend_redis.h67
-rw-r--r--src/libserver/fuzzy_backend/fuzzy_backend_sqlite.c1029
-rw-r--r--src/libserver/fuzzy_backend/fuzzy_backend_sqlite.h107
-rw-r--r--src/libserver/fuzzy_wire.h154
-rw-r--r--src/libserver/html/html.cxx2393
-rw-r--r--src/libserver/html/html.h137
-rw-r--r--src/libserver/html/html.hxx146
-rw-r--r--src/libserver/html/html_block.hxx358
-rw-r--r--src/libserver/html/html_entities.cxx2644
-rw-r--r--src/libserver/html/html_entities.hxx31
-rw-r--r--src/libserver/html/html_tag.hxx159
-rw-r--r--src/libserver/html/html_tag_defs.hxx194
-rw-r--r--src/libserver/html/html_tags.h176
-rw-r--r--src/libserver/html/html_tests.cxx304
-rw-r--r--src/libserver/html/html_url.cxx496
-rw-r--r--src/libserver/html/html_url.hxx68
-rw-r--r--src/libserver/http/http_connection.c2649
-rw-r--r--src/libserver/http/http_connection.h320
-rw-r--r--src/libserver/http/http_context.c670
-rw-r--r--src/libserver/http/http_context.h122
-rw-r--r--src/libserver/http/http_message.c725
-rw-r--r--src/libserver/http/http_message.h254
-rw-r--r--src/libserver/http/http_private.h129
-rw-r--r--src/libserver/http/http_router.c559
-rw-r--r--src/libserver/http/http_router.h149
-rw-r--r--src/libserver/http/http_util.c295
-rw-r--r--src/libserver/http/http_util.h47
-rw-r--r--src/libserver/hyperscan_tools.cxx627
-rw-r--r--src/libserver/hyperscan_tools.h77
-rw-r--r--src/libserver/logger.h403
-rw-r--r--src/libserver/logger/logger.c1319
-rw-r--r--src/libserver/logger/logger_console.c211
-rw-r--r--src/libserver/logger/logger_file.c510
-rw-r--r--src/libserver/logger/logger_private.h218
-rw-r--r--src/libserver/logger/logger_syslog.c143
-rw-r--r--src/libserver/maps/map.c3195
-rw-r--r--src/libserver/maps/map.h168
-rw-r--r--src/libserver/maps/map_helpers.c1845
-rw-r--r--src/libserver/maps/map_helpers.h269
-rw-r--r--src/libserver/maps/map_private.h226
-rw-r--r--src/libserver/mempool_vars_internal.h47
-rw-r--r--src/libserver/milter.c2232
-rw-r--r--src/libserver/milter.h188
-rw-r--r--src/libserver/milter_internal.h176
-rw-r--r--src/libserver/monitored.c735
-rw-r--r--src/libserver/monitored.h161
-rw-r--r--src/libserver/protocol.c2185
-rw-r--r--src/libserver/protocol.h130
-rw-r--r--src/libserver/protocol_internal.h99
-rw-r--r--src/libserver/re_cache.c2712
-rw-r--r--src/libserver/re_cache.h212
-rw-r--r--src/libserver/redis_pool.cxx663
-rw-r--r--src/libserver/redis_pool.h91
-rw-r--r--src/libserver/roll_history.c432
-rw-r--r--src/libserver/roll_history.h98
-rw-r--r--src/libserver/rspamd_control.c1334
-rw-r--r--src/libserver/rspamd_control.h328
-rw-r--r--src/libserver/rspamd_symcache.h578
-rw-r--r--src/libserver/spf.c2799
-rw-r--r--src/libserver/spf.h159
-rw-r--r--src/libserver/ssl_util.c1133
-rw-r--r--src/libserver/ssl_util.h120
-rw-r--r--src/libserver/symcache/symcache_c.cxx715
-rw-r--r--src/libserver/symcache/symcache_id_list.hxx95
-rw-r--r--src/libserver/symcache/symcache_impl.cxx1316
-rw-r--r--src/libserver/symcache/symcache_internal.hxx652
-rw-r--r--src/libserver/symcache/symcache_item.cxx652
-rw-r--r--src/libserver/symcache/symcache_item.hxx561
-rw-r--r--src/libserver/symcache/symcache_periodic.hxx89
-rw-r--r--src/libserver/symcache/symcache_runtime.cxx823
-rw-r--r--src/libserver/symcache/symcache_runtime.hxx209
-rw-r--r--src/libserver/task.c1975
-rw-r--r--src/libserver/task.h392
-rw-r--r--src/libserver/url.c4365
-rw-r--r--src/libserver/url.h430
-rw-r--r--src/libserver/worker_util.c2313
-rw-r--r--src/libserver/worker_util.h334
122 files changed, 79613 insertions, 0 deletions
diff --git a/src/libserver/CMakeLists.txt b/src/libserver/CMakeLists.txt
new file mode 100644
index 0000000..dd17865
--- /dev/null
+++ b/src/libserver/CMakeLists.txt
@@ -0,0 +1,52 @@
+# Librspamdserver
+ADD_SUBDIRECTORY(css)
+SET(LIBRSPAMDSERVERSRC
+ ${CMAKE_CURRENT_SOURCE_DIR}/cfg_utils.cxx
+ ${CMAKE_CURRENT_SOURCE_DIR}/cfg_rcl.cxx
+ ${CMAKE_CURRENT_SOURCE_DIR}/composites/composites.cxx
+ ${CMAKE_CURRENT_SOURCE_DIR}/composites/composites_manager.cxx
+ ${CMAKE_CURRENT_SOURCE_DIR}/dkim.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/dns.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/dynamic_cfg.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/async_session.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/fuzzy_backend/fuzzy_backend.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/fuzzy_backend/fuzzy_backend_sqlite.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/fuzzy_backend/fuzzy_backend_redis.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/milter.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/monitored.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/protocol.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/re_cache.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/redis_pool.cxx
+ ${CMAKE_CURRENT_SOURCE_DIR}/roll_history.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/spf.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/ssl_util.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/symcache/symcache_impl.cxx
+ ${CMAKE_CURRENT_SOURCE_DIR}/symcache/symcache_item.cxx
+ ${CMAKE_CURRENT_SOURCE_DIR}/symcache/symcache_runtime.cxx
+ ${CMAKE_CURRENT_SOURCE_DIR}/symcache/symcache_c.cxx
+ ${CMAKE_CURRENT_SOURCE_DIR}/task.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/url.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/worker_util.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/logger/logger.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/logger/logger_file.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/logger/logger_syslog.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/logger/logger_console.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/http/http_util.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/http/http_message.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/http/http_connection.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/http/http_router.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/http/http_context.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/maps/map.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/maps/map_helpers.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/html/html_entities.cxx
+ ${CMAKE_CURRENT_SOURCE_DIR}/html/html_url.cxx
+ ${CMAKE_CURRENT_SOURCE_DIR}/html/html.cxx
+ ${CMAKE_CURRENT_SOURCE_DIR}/html/html_tests.cxx
+ ${CMAKE_CURRENT_SOURCE_DIR}/hyperscan_tools.cxx
+ ${CMAKE_CURRENT_SOURCE_DIR}/backtrace.cxx
+ ${LIBCSSSRC})
+
+# Librspamd-server
+SET(RSPAMD_SERVER ${LIBRSPAMDSERVERSRC} PARENT_SCOPE)
+SET(LIBSERVER_DEPENDS "${LIBCSS_DEPENDS}" PARENT_SCOPE)
+SET(LIBSERVER_GENERATED "${LIBCSS_GENERATED}" PARENT_SCOPE)
diff --git a/src/libserver/async_session.c b/src/libserver/async_session.c
new file mode 100644
index 0000000..baaee62
--- /dev/null
+++ b/src/libserver/async_session.c
@@ -0,0 +1,364 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "config.h"
+#include "rspamd.h"
+#include "contrib/uthash/utlist.h"
+#include "contrib/libucl/khash.h"
+#include "async_session.h"
+#include "cryptobox.h"
+
+#define RSPAMD_SESSION_FLAG_DESTROYING (1 << 1)
+#define RSPAMD_SESSION_FLAG_CLEANUP (1 << 2)
+
+#define RSPAMD_SESSION_CAN_ADD_EVENT(s) (!((s)->flags & (RSPAMD_SESSION_FLAG_DESTROYING | RSPAMD_SESSION_FLAG_CLEANUP)))
+
+#define msg_err_session(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL, \
+ "events", session->pool->tag.uid, \
+ G_STRFUNC, \
+ __VA_ARGS__)
+#define msg_warn_session(...) rspamd_default_log_function(G_LOG_LEVEL_WARNING, \
+ "events", session->pool->tag.uid, \
+ G_STRFUNC, \
+ __VA_ARGS__)
+#define msg_info_session(...) rspamd_default_log_function(G_LOG_LEVEL_INFO, \
+ "events", session->pool->tag.uid, \
+ G_STRFUNC, \
+ __VA_ARGS__)
+#define msg_debug_session(...) rspamd_conditional_debug_fast(NULL, NULL, \
+ rspamd_events_log_id, "events", session->pool->tag.uid, \
+ G_STRFUNC, \
+ __VA_ARGS__)
+
+INIT_LOG_MODULE(events)
+
+/* Average symbols count to optimize hash allocation */
+static struct rspamd_counter_data events_count;
+
+
+struct rspamd_async_event {
+ const gchar *subsystem;
+ const gchar *event_source;
+ event_finalizer_t fin;
+ void *user_data;
+};
+
+static inline bool
+rspamd_event_equal(const struct rspamd_async_event *ev1, const struct rspamd_async_event *ev2)
+{
+ return ev1->fin == ev2->fin && ev1->user_data == ev2->user_data;
+}
+
+static inline guint64
+rspamd_event_hash(const struct rspamd_async_event *ev)
+{
+ union _pointer_fp_thunk {
+ event_finalizer_t f;
+ gpointer p;
+ };
+ struct ev_storage {
+ union _pointer_fp_thunk p;
+ gpointer ud;
+ } st;
+
+ st.p.f = ev->fin;
+ st.ud = ev->user_data;
+
+ return rspamd_cryptobox_fast_hash(&st, sizeof(st), rspamd_hash_seed());
+}
+
+/* Define **SET** of events */
+KHASH_INIT(rspamd_events_hash,
+ struct rspamd_async_event *,
+ char,
+ false,
+ rspamd_event_hash,
+ rspamd_event_equal);
+
+struct rspamd_async_session {
+ session_finalizer_t fin;
+ event_finalizer_t restore;
+ event_finalizer_t cleanup;
+ khash_t(rspamd_events_hash) * events;
+ void *user_data;
+ rspamd_mempool_t *pool;
+ guint flags;
+};
+
+static void
+rspamd_session_dtor(gpointer d)
+{
+ struct rspamd_async_session *s = (struct rspamd_async_session *) d;
+
+ /* Events are usually empty at this point */
+ rspamd_set_counter_ema(&events_count, s->events->n_buckets, 0.5);
+ kh_destroy(rspamd_events_hash, s->events);
+}
+
+struct rspamd_async_session *
+rspamd_session_create(rspamd_mempool_t *pool,
+ session_finalizer_t fin,
+ event_finalizer_t restore,
+ event_finalizer_t cleanup,
+ void *user_data)
+{
+ struct rspamd_async_session *s;
+
+ s = rspamd_mempool_alloc0(pool, sizeof(struct rspamd_async_session));
+ s->pool = pool;
+ s->fin = fin;
+ s->restore = restore;
+ s->cleanup = cleanup;
+ s->user_data = user_data;
+ s->events = kh_init(rspamd_events_hash);
+
+ kh_resize(rspamd_events_hash, s->events, MAX(4, events_count.mean));
+ rspamd_mempool_add_destructor(pool, rspamd_session_dtor, s);
+
+ return s;
+}
+
+struct rspamd_async_event *
+rspamd_session_add_event_full(struct rspamd_async_session *session,
+ event_finalizer_t fin,
+ gpointer user_data,
+ const gchar *subsystem,
+ const gchar *event_source)
+{
+ struct rspamd_async_event *new_event;
+ gint ret;
+
+ if (session == NULL) {
+ msg_err("session is NULL");
+ g_assert_not_reached();
+ }
+
+ if (!RSPAMD_SESSION_CAN_ADD_EVENT(session)) {
+ msg_debug_session("skip adding event subsystem: %s: "
+ "session is destroying/cleaning",
+ subsystem);
+
+ return NULL;
+ }
+
+ new_event = rspamd_mempool_alloc(session->pool,
+ sizeof(struct rspamd_async_event));
+ new_event->fin = fin;
+ new_event->user_data = user_data;
+ new_event->subsystem = subsystem;
+ new_event->event_source = event_source;
+
+ msg_debug_session("added event: %p, pending %d (+1) events, "
+ "subsystem: %s (%s)",
+ user_data,
+ kh_size(session->events),
+ subsystem,
+ event_source);
+
+ kh_put(rspamd_events_hash, session->events, new_event, &ret);
+ g_assert(ret > 0);
+
+ return new_event;
+}
+
+void rspamd_session_remove_event_full(struct rspamd_async_session *session,
+ event_finalizer_t fin,
+ void *ud,
+ const gchar *event_source)
+{
+ struct rspamd_async_event search_ev, *found_ev;
+ khiter_t k;
+
+ if (session == NULL) {
+ msg_err("session is NULL");
+ return;
+ }
+
+ if (!RSPAMD_SESSION_CAN_ADD_EVENT(session)) {
+ /* Session is already cleaned up, ignore this */
+ return;
+ }
+
+ /* Search for event */
+ search_ev.fin = fin;
+ search_ev.user_data = ud;
+ k = kh_get(rspamd_events_hash, session->events, &search_ev);
+ if (k == kh_end(session->events)) {
+
+ msg_err_session("cannot find event: %p(%p) from %s (%d total events)", fin, ud,
+ event_source, (int) kh_size(session->events));
+ kh_foreach_key(session->events, found_ev, {
+ msg_err_session("existing event %s (%s): %p(%p)",
+ found_ev->subsystem,
+ found_ev->event_source,
+ found_ev->fin,
+ found_ev->user_data);
+ });
+
+ g_assert_not_reached();
+ }
+
+ found_ev = kh_key(session->events, k);
+ msg_debug_session("removed event: %p, pending %d (-1) events, "
+ "subsystem: %s (%s), added at %s",
+ ud,
+ kh_size(session->events),
+ found_ev->subsystem,
+ event_source,
+ found_ev->event_source);
+ kh_del(rspamd_events_hash, session->events, k);
+
+ /* Remove event */
+ if (fin) {
+ fin(ud);
+ }
+
+ rspamd_session_pending(session);
+}
+
+gboolean
+rspamd_session_destroy(struct rspamd_async_session *session)
+{
+ if (session == NULL) {
+ msg_err("session is NULL");
+ return FALSE;
+ }
+
+ if (!rspamd_session_blocked(session)) {
+ session->flags |= RSPAMD_SESSION_FLAG_DESTROYING;
+ rspamd_session_cleanup(session, false);
+
+ if (session->cleanup != NULL) {
+ session->cleanup(session->user_data);
+ }
+ }
+
+ return TRUE;
+}
+
+void rspamd_session_cleanup(struct rspamd_async_session *session, bool forced_cleanup)
+{
+ struct rspamd_async_event *ev;
+
+ if (session == NULL) {
+ msg_err("session is NULL");
+ return;
+ }
+
+ session->flags |= RSPAMD_SESSION_FLAG_CLEANUP;
+ khash_t(rspamd_events_hash) *uncancellable_events = kh_init(rspamd_events_hash);
+
+ kh_foreach_key(session->events, ev, {
+ /* Call event's finalizer */
+ int ret;
+
+ if (ev->fin != NULL) {
+ if (forced_cleanup) {
+ msg_info_session("forced removed event on destroy: %p, subsystem: %s, scheduled from: %s",
+ ev->user_data,
+ ev->subsystem,
+ ev->event_source);
+ }
+ else {
+ msg_debug_session("removed event on destroy: %p, subsystem: %s",
+ ev->user_data,
+ ev->subsystem);
+ }
+ ev->fin(ev->user_data);
+ }
+ else {
+ if (forced_cleanup) {
+ msg_info_session("NOT forced removed event on destroy - uncancellable: "
+ "%p, subsystem: %s, scheduled from: %s",
+ ev->user_data,
+ ev->subsystem,
+ ev->event_source);
+ }
+ else {
+ msg_debug_session("NOT removed event on destroy - uncancellable: %p, subsystem: %s",
+ ev->user_data,
+ ev->subsystem);
+ }
+ /* Assume an event is uncancellable, move it to a new hash table */
+ kh_put(rspamd_events_hash, uncancellable_events, ev, &ret);
+ }
+ });
+
+ kh_destroy(rspamd_events_hash, session->events);
+ session->events = uncancellable_events;
+ if (forced_cleanup) {
+ msg_info_session("pending %d uncancellable events", kh_size(uncancellable_events));
+ }
+ else {
+ msg_debug_session("pending %d uncancellable events", kh_size(uncancellable_events));
+ }
+
+ session->flags &= ~RSPAMD_SESSION_FLAG_CLEANUP;
+}
+
+gboolean
+rspamd_session_pending(struct rspamd_async_session *session)
+{
+ gboolean ret = TRUE;
+
+ if (kh_size(session->events) == 0) {
+ if (session->fin != NULL) {
+ msg_debug_session("call fin handler, as no events are pending");
+
+ if (!session->fin(session->user_data)) {
+ /* Session finished incompletely, perform restoration */
+ msg_debug_session("restore incomplete session");
+ if (session->restore != NULL) {
+ session->restore(session->user_data);
+ }
+ }
+ else {
+ ret = FALSE;
+ }
+ }
+
+ ret = FALSE;
+ }
+
+ return ret;
+}
+
+guint rspamd_session_events_pending(struct rspamd_async_session *session)
+{
+ guint npending;
+
+ g_assert(session != NULL);
+
+ npending = kh_size(session->events);
+ msg_debug_session("pending %d events", npending);
+
+ return npending;
+}
+
+rspamd_mempool_t *
+rspamd_session_mempool(struct rspamd_async_session *session)
+{
+ g_assert(session != NULL);
+
+ return session->pool;
+}
+
+gboolean
+rspamd_session_blocked(struct rspamd_async_session *session)
+{
+ g_assert(session != NULL);
+
+ return !RSPAMD_SESSION_CAN_ADD_EVENT(session);
+} \ No newline at end of file
diff --git a/src/libserver/async_session.h b/src/libserver/async_session.h
new file mode 100644
index 0000000..4573545
--- /dev/null
+++ b/src/libserver/async_session.h
@@ -0,0 +1,121 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef RSPAMD_ASYNC_SESSION_H
+#define RSPAMD_ASYNC_SESSION_H
+
+#include "config.h"
+#include "mem_pool.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct rspamd_async_event;
+struct rspamd_async_session;
+
+typedef void (*event_finalizer_t)(gpointer ud);
+
+typedef gboolean (*session_finalizer_t)(gpointer user_data);
+
+/**
+ * Make new async session
+ * @param pool pool to alloc memory from
+ * @param fin a callback called when no events are found in session
+ * @param restore a callback is called to restore processing of session
+ * @param cleanup a callback called when session is forcefully destroyed
+ * @param user_data abstract user data
+ * @return
+ */
+struct rspamd_async_session *rspamd_session_create(rspamd_mempool_t *pool,
+ session_finalizer_t fin, event_finalizer_t restore,
+ event_finalizer_t cleanup, gpointer user_data);
+
+/**
+ * Insert new event to the session
+ * @param session session object
+ * @param fin finalizer callback
+ * @param user_data abstract user_data
+ * @param forced unused
+ */
+struct rspamd_async_event *
+rspamd_session_add_event_full(struct rspamd_async_session *session,
+ event_finalizer_t fin,
+ gpointer user_data,
+ const gchar *subsystem,
+ const gchar *event_source);
+
+#define rspamd_session_add_event(session, fin, user_data, subsystem) \
+ rspamd_session_add_event_full(session, fin, user_data, subsystem, G_STRLOC)
+
+/**
+ * Remove normal event
+ * @param session session object
+ * @param fin final callback
+ * @param ud user data object
+ */
+void rspamd_session_remove_event_full(struct rspamd_async_session *session,
+ event_finalizer_t fin,
+ gpointer ud,
+ const gchar *event_source);
+
+#define rspamd_session_remove_event(session, fin, user_data) \
+ rspamd_session_remove_event_full(session, fin, user_data, G_STRLOC)
+
+/**
+ * Must be called at the end of session, it calls fin functions for all non-forced callbacks
+ * @return true if the whole session was destroyed and false if there are forced events
+ */
+gboolean rspamd_session_destroy(struct rspamd_async_session *session);
+
+/**
+ * Try to remove all events pending
+ */
+void rspamd_session_cleanup(struct rspamd_async_session *session, bool forced_cleanup);
+
+/**
+ * Returns mempool associated with async session
+ * @param session
+ * @return
+ */
+rspamd_mempool_t *rspamd_session_mempool(struct rspamd_async_session *session);
+
+/**
+ * Check session for events pending and call fin callback if no events are pending
+ * @param session session object
+ * @return TRUE if session has pending events
+ */
+gboolean rspamd_session_pending(struct rspamd_async_session *session);
+
+/**
+ * Returns number of events pending
+ * @param session
+ * @return
+ */
+guint rspamd_session_events_pending(struct rspamd_async_session *session);
+
+
+/**
+ * Returns TRUE if an async session is currently destroying
+ * @param s
+ * @return
+ */
+gboolean rspamd_session_blocked(struct rspamd_async_session *s);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /*RSPAMD_ASYNC_SESSION_H*/
diff --git a/src/libserver/backtrace.cxx b/src/libserver/backtrace.cxx
new file mode 100644
index 0000000..6507b96
--- /dev/null
+++ b/src/libserver/backtrace.cxx
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "config.h"
+
+#ifdef BACKWARD_ENABLE
+
+#include "contrib/backward-cpp/backward.hpp"
+#include "fmt/core.h"
+#include "logger.h"
+
+namespace rspamd {
+
+void log_backtrace(void)
+{
+ using namespace backward;
+ StackTrace st;
+ st.load_here(128);
+
+ TraceResolver tr;
+ tr.load_stacktrace(st);
+
+ for (auto i = 0ul; i < st.size(); ++i) {
+ auto trace = tr.resolve(st[i]);
+ auto trace_line = fmt::format("#{}: [{}]: ", i, trace.addr);
+
+ if (!trace.source.filename.empty()) {
+ trace_line += fmt::format("{}:{} in {}", trace.source.filename, trace.source.line, trace.source.function);
+ }
+ else {
+ trace_line += fmt::format("{} in {}", trace.object_filename, trace.object_function);
+ }
+
+ msg_err("%s", trace_line.c_str());
+ }
+}
+
+}// namespace rspamd
+#endif
+
+extern "C" void rspamd_print_crash(void);
+
+void rspamd_print_crash(void)
+{
+#ifdef BACKWARD_ENABLE
+ rspamd::log_backtrace();
+#endif
+} \ No newline at end of file
diff --git a/src/libserver/cfg_file.h b/src/libserver/cfg_file.h
new file mode 100644
index 0000000..4cb87d9
--- /dev/null
+++ b/src/libserver/cfg_file.h
@@ -0,0 +1,889 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef CFG_FILE_H
+#define CFG_FILE_H
+
+#include "config.h"
+#include "mem_pool.h"
+#include "upstream.h"
+#include "rspamd_symcache.h"
+#include "cfg_rcl.h"
+#include "ucl.h"
+#include "regexp.h"
+#include "libserver/re_cache.h"
+#include "libutil/ref.h"
+#include "libutil/radix.h"
+#include "monitored.h"
+#include "redis_pool.h"
+
+#define DEFAULT_BIND_PORT 11333
+#define DEFAULT_CONTROL_PORT 11334
+
+/* Default metric name */
+#define DEFAULT_METRIC "default"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct expression;
+struct tokenizer;
+struct rspamd_stat_classifier;
+struct module_s;
+struct worker_s;
+struct rspamd_external_libs_ctx;
+struct rspamd_cryptobox_pubkey;
+struct rspamd_dns_resolver;
+
+/**
+ * Logging type
+ */
+enum rspamd_log_type {
+ RSPAMD_LOG_CONSOLE,
+ RSPAMD_LOG_SYSLOG,
+ RSPAMD_LOG_FILE
+};
+
+enum rspamd_log_cfg_flags {
+ RSPAMD_LOG_FLAG_DEFAULT = 0u,
+ RSPAMD_LOG_FLAG_SYSTEMD = (1u << 0u),
+ RSPAMD_LOG_FLAG_COLOR = (1u << 1u),
+ RSPAMD_LOG_FLAG_RE_CACHE = (1u << 2u),
+ RSPAMD_LOG_FLAG_USEC = (1u << 3u),
+ RSPAMD_LOG_FLAG_RSPAMADM = (1u << 4u),
+ RSPAMD_LOG_FLAG_ENFORCED = (1u << 5u),
+ RSPAMD_LOG_FLAG_SEVERITY = (1u << 6u),
+ RSPAMD_LOG_FLAG_JSON = (1u << 7u),
+};
+
+struct rspamd_worker_log_pipe {
+ gint fd;
+ gint type;
+ struct rspamd_worker_log_pipe *prev, *next;
+};
+
+/**
+ * script module list item
+ */
+struct script_module {
+ gchar *name; /**< name of module */
+ gchar *path; /**< path to module */
+ gchar *digest;
+};
+
+enum rspamd_symbol_group_flags {
+ RSPAMD_SYMBOL_GROUP_NORMAL = 0u,
+ RSPAMD_SYMBOL_GROUP_DISABLED = (1u << 0u),
+ RSPAMD_SYMBOL_GROUP_ONE_SHOT = (1u << 1u),
+ RSPAMD_SYMBOL_GROUP_UNGROUPED = (1u << 2u),
+ RSPAMD_SYMBOL_GROUP_PUBLIC = (1u << 3u),
+};
+
+/**
+ * Symbols group
+ */
+struct rspamd_symbol;
+struct rspamd_symbols_group {
+ gchar *name;
+ gchar *description;
+ GHashTable *symbols;
+ gdouble max_score;
+ guint flags;
+};
+
+enum rspamd_symbol_flags {
+ RSPAMD_SYMBOL_FLAG_NORMAL = 0,
+ RSPAMD_SYMBOL_FLAG_IGNORE_METRIC = (1 << 1),
+ RSPAMD_SYMBOL_FLAG_ONEPARAM = (1 << 2),
+ RSPAMD_SYMBOL_FLAG_UNGROUPED = (1 << 3),
+ RSPAMD_SYMBOL_FLAG_DISABLED = (1 << 4),
+ RSPAMD_SYMBOL_FLAG_UNSCORED = (1 << 5),
+};
+
+/**
+ * Symbol config definition
+ */
+struct rspamd_symbol {
+ gchar *name;
+ gchar *description;
+ gdouble *weight_ptr;
+ gdouble score;
+ guint priority;
+ struct rspamd_symbols_group *gr; /* Main group */
+ GPtrArray *groups; /* Other groups */
+ guint flags;
+ void *cache_item;
+ gint nshots;
+};
+
+/**
+ * Statfile config definition
+ */
+struct rspamd_statfile_config {
+ gchar *symbol; /**< symbol of statfile */
+ gchar *label; /**< label of this statfile */
+ ucl_object_t *opts; /**< other options */
+ gboolean is_spam; /**< spam flag */
+ struct rspamd_classifier_config *clcf; /**< parent pointer of classifier configuration */
+ gpointer data; /**< opaque data */
+};
+
+struct rspamd_tokenizer_config {
+ const ucl_object_t *opts; /**< other options */
+ const gchar *name; /**< name of tokenizer */
+};
+
+
+/* Classifier has all integer values (e.g. bayes) */
+#define RSPAMD_FLAG_CLASSIFIER_INTEGER (1 << 0)
+/*
+ * Set if backend for a classifier is intended to increment and not set values
+ * (e.g. redis)
+ */
+#define RSPAMD_FLAG_CLASSIFIER_INCREMENTING_BACKEND (1 << 1)
+/*
+ * No backend required for classifier
+ */
+#define RSPAMD_FLAG_CLASSIFIER_NO_BACKEND (1 << 2)
+
+/**
+ * Classifier config definition
+ */
+struct rspamd_classifier_config {
+ GList *statfiles; /**< statfiles list */
+ GHashTable *labels; /**< statfiles with labels */
+ gchar *metric; /**< metric of this classifier */
+ gchar *classifier; /**< classifier interface */
+ struct rspamd_tokenizer_config *tokenizer; /**< tokenizer used for classifier */
+ const gchar *backend; /**< name of statfile's backend */
+ ucl_object_t *opts; /**< other options */
+ GList *learn_conditions; /**< list of learn condition callbacks */
+ GList *classify_conditions; /**< list of classify condition callbacks */
+ gchar *name; /**< unique name of classifier */
+ guint32 min_tokens; /**< minimal number of tokens to process classifier */
+ guint32 max_tokens; /**< maximum number of tokens */
+ guint min_token_hits; /**< minimum number of hits for a token to be considered */
+ gdouble min_prob_strength; /**< use only tokens with probability in [0.5 - MPS, 0.5 + MPS] */
+ guint min_learns; /**< minimum number of learns for each statfile */
+ guint flags;
+};
+
+struct rspamd_worker_bind_conf {
+ GPtrArray *addrs;
+ guint cnt;
+ gchar *name;
+ gchar *bind_line;
+ gboolean is_systemd;
+ struct rspamd_worker_bind_conf *next;
+};
+
+struct rspamd_worker_lua_script {
+ gint cbref;
+ struct rspamd_worker_lua_script *prev, *next;
+};
+
+/**
+ * Config params for rspamd worker
+ */
+struct rspamd_worker_conf {
+ struct worker_s *worker; /**< pointer to worker type */
+ GQuark type; /**< type of worker */
+ struct rspamd_worker_bind_conf *bind_conf; /**< bind configuration */
+ gint16 count; /**< number of workers */
+ GList *listen_socks; /**< listening sockets descriptors */
+ guint64 rlimit_nofile; /**< max files limit */
+ guint64 rlimit_maxcore; /**< maximum core file size */
+ GHashTable *params; /**< params for worker */
+ GQueue *active_workers; /**< linked list of spawned workers */
+ gpointer ctx; /**< worker's context */
+ ucl_object_t *options; /**< other worker's options */
+ struct rspamd_worker_lua_script *scripts; /**< registered lua scripts */
+ gboolean enabled;
+ ref_entry_t ref;
+};
+
+enum rspamd_log_format_type {
+ RSPAMD_LOG_STRING = 0,
+ RSPAMD_LOG_MID,
+ RSPAMD_LOG_QID,
+ RSPAMD_LOG_USER,
+ RSPAMD_LOG_ISSPAM,
+ RSPAMD_LOG_ACTION,
+ RSPAMD_LOG_SCORES,
+ RSPAMD_LOG_SYMBOLS,
+ RSPAMD_LOG_IP,
+ RSPAMD_LOG_LEN,
+ RSPAMD_LOG_DNS_REQ,
+ RSPAMD_LOG_SMTP_FROM,
+ RSPAMD_LOG_MIME_FROM,
+ RSPAMD_LOG_SMTP_RCPT,
+ RSPAMD_LOG_MIME_RCPT,
+ RSPAMD_LOG_SMTP_RCPTS,
+ RSPAMD_LOG_MIME_RCPTS,
+ RSPAMD_LOG_TIME_REAL,
+ RSPAMD_LOG_TIME_VIRTUAL,
+ RSPAMD_LOG_LUA,
+ RSPAMD_LOG_DIGEST,
+ RSPAMD_LOG_FILENAME,
+ RSPAMD_LOG_FORCED_ACTION,
+ RSPAMD_LOG_SETTINGS_ID,
+ RSPAMD_LOG_GROUPS,
+ RSPAMD_LOG_PUBLIC_GROUPS,
+ RSPAMD_LOG_MEMPOOL_SIZE,
+ RSPAMD_LOG_MEMPOOL_WASTE,
+};
+
+enum rspamd_log_format_flags {
+ RSPAMD_LOG_FMT_FLAG_DEFAULT = 0,
+ RSPAMD_LOG_FMT_FLAG_OPTIONAL = (1 << 0),
+ RSPAMD_LOG_FMT_FLAG_MIME_ALTERNATIVE = (1 << 1),
+ RSPAMD_LOG_FMT_FLAG_CONDITION = (1 << 2),
+ RSPAMD_LOG_FMT_FLAG_SYMBOLS_SCORES = (1 << 3),
+ RSPAMD_LOG_FMT_FLAG_SYMBOLS_PARAMS = (1 << 4)
+};
+
+struct rspamd_log_format {
+ enum rspamd_log_format_type type;
+ guint flags;
+ gsize len;
+ gpointer data;
+ struct rspamd_log_format *prev, *next;
+};
+
+/**
+ * Standard actions
+ */
+enum rspamd_action_type {
+ METRIC_ACTION_REJECT = 0,
+ METRIC_ACTION_SOFT_REJECT,
+ METRIC_ACTION_REWRITE_SUBJECT,
+ METRIC_ACTION_ADD_HEADER,
+ METRIC_ACTION_GREYLIST,
+ METRIC_ACTION_NOACTION,
+ METRIC_ACTION_MAX,
+ METRIC_ACTION_CUSTOM = 999,
+ METRIC_ACTION_DISCARD,
+ METRIC_ACTION_QUARANTINE
+};
+
+enum rspamd_action_flags {
+ RSPAMD_ACTION_NORMAL = 0u,
+ RSPAMD_ACTION_NO_THRESHOLD = (1u << 0u),
+ RSPAMD_ACTION_THRESHOLD_ONLY = (1u << 1u),
+ RSPAMD_ACTION_HAM = (1u << 2u),
+ RSPAMD_ACTION_MILTER = (1u << 3u),
+};
+
+
+struct rspamd_action;
+
+struct rspamd_config_cfg_lua_script {
+ gint cbref;
+ gint priority;
+ gchar *lua_src_pos;
+ struct rspamd_config_cfg_lua_script *prev, *next;
+};
+
+struct rspamd_config_post_init_script {
+ gint cbref;
+ struct rspamd_config_post_init_script *prev, *next;
+};
+
+struct rspamd_lang_detector;
+struct rspamd_rcl_sections_map;
+
+enum rspamd_config_settings_policy {
+ RSPAMD_SETTINGS_POLICY_DEFAULT = 0,
+ RSPAMD_SETTINGS_POLICY_IMPLICIT_ALLOW = 1,
+ RSPAMD_SETTINGS_POLICY_IMPLICIT_DENY = 2,
+};
+
+enum rspamd_gtube_patterns_policy {
+ RSPAMD_GTUBE_DISABLED = 0, /* Disabled */
+ RSPAMD_GTUBE_REJECT, /* Reject message with GTUBE pattern */
+ RSPAMD_GTUBE_ALL /* Check all GTUBE like patterns */
+};
+
+struct rspamd_config_settings_elt {
+ guint32 id;
+ enum rspamd_config_settings_policy policy;
+ const gchar *name;
+ ucl_object_t *symbols_enabled;
+ ucl_object_t *symbols_disabled;
+ struct rspamd_config_settings_elt *prev, *next;
+ ref_entry_t ref;
+};
+
+/**
+ * Structure that stores all config data
+ */
+struct rspamd_config {
+ gchar *rspamd_user; /**< user to run as */
+ gchar *rspamd_group; /**< group to run as */
+ rspamd_mempool_t *cfg_pool; /**< memory pool for config */
+ gchar *cfg_name; /**< name of config file */
+ gchar *pid_file; /**< name of pid file */
+ gchar *temp_dir; /**< dir for temp files */
+ gchar *control_socket_path; /**< path to the control socket */
+ const ucl_object_t *local_addrs; /**< tree of local addresses */
+#ifdef WITH_GPERF_TOOLS
+ gchar *profile_path;
+#endif
+ gdouble unknown_weight; /**< weight of unknown symbols */
+ gdouble grow_factor; /**< grow factor for metric */
+ GHashTable *symbols; /**< weights of symbols in metric */
+ const gchar *subject; /**< subject rewrite string */
+ GHashTable *groups; /**< groups of symbols */
+ void *actions; /**< all actions of the metric (opaque type) */
+
+ gboolean one_shot_mode; /**< rules add only one symbol */
+ gboolean check_text_attachements; /**< check text attachements as text */
+ gboolean check_all_filters; /**< check all filters */
+ gboolean allow_raw_input; /**< scan messages with invalid mime */
+ gboolean disable_hyperscan; /**< disable hyperscan usage */
+ gboolean vectorized_hyperscan; /**< use vectorized hyperscan matching */
+ gboolean enable_shutdown_workaround; /**< enable workaround for legacy SA clients (exim) */
+ gboolean ignore_received; /**< Ignore data from the first received header */
+ gboolean enable_sessions_cache; /**< Enable session cache for debug */
+ gboolean enable_experimental; /**< Enable experimental plugins */
+ gboolean disable_pcre_jit; /**< Disable pcre JIT */
+ gboolean own_lua_state; /**< True if we have created lua_state internally */
+ gboolean soft_reject_on_timeout; /**< If true emit soft reject on task timeout (if not reject) */
+ gboolean public_groups_only; /**< Output merely public groups everywhere */
+ enum rspamd_gtube_patterns_policy gtube_patterns_policy; /**< Enable test patterns */
+ gboolean enable_css_parser; /**< Enable css parsing in HTML */
+
+ gsize max_cores_size; /**< maximum size occupied by rspamd core files */
+ gsize max_cores_count; /**< maximum number of core files */
+ gchar *cores_dir; /**< directory for core files */
+ gsize max_message; /**< maximum size for messages */
+ gsize max_pic_size; /**< maximum size for a picture to process */
+ gsize images_cache_size; /**< size of LRU cache for DCT data from images */
+ gdouble task_timeout; /**< maximum message processing time */
+ gint default_max_shots; /**< default maximum count of symbols hits permitted (-1 for unlimited) */
+ gint32 heartbeats_loss_max; /**< number of heartbeats lost to consider worker's termination */
+ gdouble heartbeat_interval; /**< interval for heartbeats for workers */
+
+ enum rspamd_log_type log_type; /**< log type */
+ gint log_facility; /**< log facility in case of syslog */
+ gint log_level; /**< log level trigger */
+ gchar *log_file; /**< path to logfile in case of file logging */
+ gboolean log_buffered; /**< whether logging is buffered */
+ gboolean log_silent_workers; /**< silence info messages from workers */
+ guint32 log_buf_size; /**< length of log buffer */
+ const ucl_object_t *debug_ip_map; /**< turn on debugging for specified ip addresses */
+ gboolean log_urls; /**< whether we should log URLs */
+ GHashTable *debug_modules; /**< logging modules to debug */
+ struct rspamd_cryptobox_pubkey *log_encryption_key; /**< encryption key for logs */
+ guint log_flags; /**< logging flags */
+ guint log_error_elts; /**< number of elements in error logbuf */
+ guint log_error_elt_maxlen; /**< maximum size of error log element */
+ guint log_task_max_elts; /**< maximum number of elements in task logging */
+ struct rspamd_worker_log_pipe *log_pipes;
+
+ gboolean compat_messages; /**< use old messages in the protocol (array) */
+
+ GPtrArray *script_modules; /**< a list of script modules to load */
+ GHashTable *explicit_modules; /**< modules that should be always loaded */
+
+ GList *filters; /**< linked list of all filters */
+ GList *workers; /**< linked list of all workers params */
+ struct rspamd_rcl_sections_map *rcl_top_section; /**< top section for RCL config */
+ ucl_object_t *cfg_ucl_obj; /**< ucl object */
+ ucl_object_t *config_comments; /**< comments saved from the config */
+ ucl_object_t *doc_strings; /**< documentation strings for config options */
+ GPtrArray *c_modules; /**< list of C modules */
+ void *composites_manager; /**< hash of composite symbols indexed by its name */
+ GList *classifiers; /**< list of all classifiers defined */
+ GList *statfiles; /**< list of all statfiles in config file order */
+ GHashTable *classifiers_symbols; /**< hashtable indexed by symbol name of classifiers */
+ GHashTable *cfg_params; /**< all cfg params indexed by its name in this structure */
+ gchar *dynamic_conf; /**< path to dynamic configuration */
+ ucl_object_t *current_dynamic_conf; /**< currently loaded dynamic configuration */
+ gint clock_res; /**< resolution of clock used */
+
+ GList *maps; /**< maps active */
+ gdouble map_timeout; /**< maps watch timeout */
+ gdouble map_file_watch_multiplier; /**< multiplier for watch timeout when maps are files */
+ gchar *maps_cache_dir; /**< where to save HTTP cached data */
+
+ gdouble monitored_interval; /**< interval between monitored checks */
+ gboolean disable_monitored; /**< disable monitoring completely */
+ gboolean fips_mode; /**< turn on fips mode for openssl */
+
+ struct rspamd_symcache *cache; /**< symbols cache object */
+ gchar *cache_filename; /**< filename of cache file */
+ gdouble cache_reload_time; /**< how often cache reload should be performed */
+ gchar *checksum; /**< real checksum of config file */
+ gpointer lua_state; /**< pointer to lua state */
+ gpointer lua_thread_pool; /**< pointer to lua thread (coroutine) pool */
+
+ gchar *rrd_file; /**< rrd file to store statistics */
+ gchar *history_file; /**< file to save rolling history */
+ gchar *stats_file; /**< file to save stats */
+ gchar *tld_file; /**< file to load effective tld list from */
+ gchar *hs_cache_dir; /**< directory to save hyperscan databases */
+ gchar *events_backend; /**< string representation of the events backend used */
+
+ gdouble dns_timeout; /**< timeout in milliseconds for waiting for dns reply */
+ guint32 dns_retransmits; /**< maximum retransmits count */
+ guint32 dns_io_per_server; /**< number of sockets per DNS server */
+ const ucl_object_t *nameservers; /**< list of nameservers or NULL to parse resolv.conf */
+ guint32 dns_max_requests; /**< limit of DNS requests per task */
+ gboolean enable_dnssec; /**< enable dnssec stub resolver */
+
+ guint upstream_max_errors; /**< upstream max errors before shutting off */
+ gdouble upstream_error_time; /**< rate of upstream errors */
+ gdouble upstream_revive_time; /**< revive timeout for upstreams */
+ gdouble upstream_lazy_resolve_time; /**< lazy resolve time for upstreams */
+ struct upstream_ctx *ups_ctx; /**< upstream context */
+ struct rspamd_dns_resolver *dns_resolver; /**< dns resolver if loaded */
+
+ guint min_word_len; /**< minimum length of the word to be considered */
+ guint max_word_len; /**< maximum length of the word to be considered */
+ guint words_decay; /**< limit for words for starting adaptive ignoring */
+ guint history_rows; /**< number of history rows stored */
+ guint max_sessions_cache; /**< maximum number of sessions cache elts */
+ guint lua_gc_step; /**< lua gc step */
+ guint lua_gc_pause; /**< lua gc pause */
+ guint full_gc_iters; /**< iterations between full gc cycle */
+ guint max_lua_urls; /**< maximum number of urls to be passed to Lua */
+ guint max_urls; /**< maximum number of urls to be processed in general */
+ gint max_recipients; /**< maximum number of recipients to be processed */
+ guint max_blas_threads; /**< maximum threads for openblas when learning ANN */
+ guint max_opts_len; /**< maximum length for all options for a symbol */
+ gsize max_html_len; /**< maximum length of HTML document */
+
+ struct module_s **compiled_modules; /**< list of compiled C modules */
+ struct worker_s **compiled_workers; /**< list of compiled C modules */
+ struct rspamd_log_format *log_format; /**< parsed log format */
+ gchar *log_format_str; /**< raw log format string */
+
+ struct rspamd_external_libs_ctx *libs_ctx; /**< context for external libraries */
+ struct rspamd_monitored_ctx *monitored_ctx; /**< context for monitored resources */
+ void *redis_pool; /**< redis connection pool */
+
+ struct rspamd_re_cache *re_cache; /**< static regexp cache */
+
+ GHashTable *trusted_keys; /**< list of trusted public keys */
+
+ struct rspamd_config_cfg_lua_script *on_load_scripts; /**< list of scripts executed on workers load */
+ struct rspamd_config_cfg_lua_script *post_init_scripts; /**< list of scripts executed on config being fully loaded */
+ struct rspamd_config_cfg_lua_script *on_term_scripts; /**< list of callbacks called on worker's termination */
+ struct rspamd_config_cfg_lua_script *config_unload_scripts; /**< list of scripts executed on config unload */
+
+ gchar *ssl_ca_path; /**< path to CA certs */
+ gchar *ssl_ciphers; /**< set of preferred ciphers */
+ gchar *zstd_input_dictionary; /**< path to zstd input dictionary */
+ gchar *zstd_output_dictionary; /**< path to zstd output dictionary */
+ ucl_object_t *neighbours; /**< other servers in the cluster */
+
+ struct rspamd_config_settings_elt *setting_ids; /**< preprocessed settings ids */
+ struct rspamd_lang_detector *lang_det; /**< language detector */
+ struct rspamd_worker *cur_worker; /**< set dynamically by each worker */
+
+ ref_entry_t ref; /**< reference counter */
+};
+
+
+/**
+ * Parse bind credits
+ * @param cf config file to use
+ * @param str line that presents bind line
+ * @param type type of credits
+ * @return 1 if line was successfully parsed and 0 in case of error
+ */
+gboolean rspamd_parse_bind_line(struct rspamd_config *cfg,
+ struct rspamd_worker_conf *cf, const gchar *str);
+
+
+enum rspamd_config_init_flags {
+ RSPAMD_CONFIG_INIT_DEFAULT = 0u,
+ RSPAMD_CONFIG_INIT_SKIP_LUA = (1u << 0u),
+ RSPAMD_CONFIG_INIT_WIPE_LUA_MEM = (1u << 1u),
+};
+
+/**
+ * Init default values
+ * @param cfg config file
+ */
+struct rspamd_config *rspamd_config_new(enum rspamd_config_init_flags flags);
+
+/**
+ * Free memory used by config structure
+ * @param cfg config file
+ */
+void rspamd_config_free(struct rspamd_config *cfg);
+
+/**
+ * Gets module option with specified name
+ * @param cfg config file
+ * @param module_name name of module
+ * @param opt_name name of option to get
+ * @return module value or NULL if option does not defined
+ */
+const ucl_object_t *rspamd_config_get_module_opt(struct rspamd_config *cfg,
+ const gchar *module_name,
+ const gchar *opt_name) G_GNUC_WARN_UNUSED_RESULT;
+
+
+/**
+ * Parse flag
+ * @param str string representation of flag (eg. 'on')
+ * @return numeric value of flag (0 or 1)
+ */
+gint rspamd_config_parse_flag(const gchar *str, guint len);
+
+enum rspamd_post_load_options {
+ RSPAMD_CONFIG_INIT_URL = 1 << 0,
+ RSPAMD_CONFIG_INIT_LIBS = 1 << 1,
+ RSPAMD_CONFIG_INIT_SYMCACHE = 1 << 2,
+ RSPAMD_CONFIG_INIT_VALIDATE = 1 << 3,
+ RSPAMD_CONFIG_INIT_NO_TLD = 1 << 4,
+ RSPAMD_CONFIG_INIT_PRELOAD_MAPS = 1 << 5,
+ RSPAMD_CONFIG_INIT_POST_LOAD_LUA = 1 << 6,
+};
+
+#define RSPAMD_CONFIG_LOAD_ALL (RSPAMD_CONFIG_INIT_URL | \
+ RSPAMD_CONFIG_INIT_LIBS | \
+ RSPAMD_CONFIG_INIT_SYMCACHE | \
+ RSPAMD_CONFIG_INIT_VALIDATE | \
+ RSPAMD_CONFIG_INIT_PRELOAD_MAPS | \
+ RSPAMD_CONFIG_INIT_POST_LOAD_LUA)
+
+/**
+ * Do post load actions for config
+ * @param cfg config file
+ */
+gboolean rspamd_config_post_load(struct rspamd_config *cfg,
+ enum rspamd_post_load_options opts);
+
+/*
+ * Return a new classifier_config structure, setting default and non-conflicting attributes
+ */
+struct rspamd_classifier_config *rspamd_config_new_classifier(
+ struct rspamd_config *cfg,
+ struct rspamd_classifier_config *c);
+
+/*
+ * Return a new worker_conf structure, setting default and non-conflicting attributes
+ */
+struct rspamd_worker_conf *rspamd_config_new_worker(struct rspamd_config *cfg,
+ struct rspamd_worker_conf *c);
+
+/*
+ * Return a new metric structure, setting default and non-conflicting attributes
+ */
+void rspamd_config_init_metric(struct rspamd_config *cfg);
+
+/*
+ * Return new symbols group definition
+ */
+struct rspamd_symbols_group *rspamd_config_new_group(
+ struct rspamd_config *cfg,
+ const gchar *name);
+
+/*
+ * Return a new statfile structure, setting default and non-conflicting attributes
+ */
+struct rspamd_statfile_config *rspamd_config_new_statfile(
+ struct rspamd_config *cfg,
+ struct rspamd_statfile_config *c);
+
+/*
+ * Register symbols of classifiers inside metrics
+ */
+void rspamd_config_insert_classify_symbols(struct rspamd_config *cfg);
+
+/*
+ * Check statfiles inside a classifier
+ */
+gboolean rspamd_config_check_statfiles(struct rspamd_classifier_config *cf);
+
+/*
+ * Find classifier config by name
+ */
+struct rspamd_classifier_config *rspamd_config_find_classifier(
+ struct rspamd_config *cfg,
+ const gchar *name);
+
+void rspamd_ucl_add_conf_macros(struct ucl_parser *parser,
+ struct rspamd_config *cfg);
+
+void rspamd_ucl_add_conf_variables(struct ucl_parser *parser, GHashTable *vars);
+
+/**
+ * Initialize rspamd filtering system (lua and C filters)
+ * @param cfg
+ * @param reconfig
+ * @return
+ */
+gboolean rspamd_init_filters(struct rspamd_config *cfg, bool reconfig, bool strict);
+
+/**
+ * Add new symbol to the metric
+ * @param cfg
+ * @param metric metric's name (or NULL for the default metric)
+ * @param symbol symbol's name
+ * @param score symbol's score
+ * @param description optional description
+ * @param group optional group name
+ * @param one_shot TRUE if symbol can add its score once
+ * @param rewrite_existing TRUE if we need to rewrite the existing symbol
+ * @param priority use the following priority for a symbol
+ * @param nshots means maximum number of hits for a symbol in metric (-1 for unlimited)
+ * @return TRUE if symbol has been inserted or FALSE if symbol already exists with higher priority
+ */
+gboolean rspamd_config_add_symbol(struct rspamd_config *cfg,
+ const gchar *symbol,
+ gdouble score,
+ const gchar *description,
+ const gchar *group,
+ guint flags,
+ guint priority,
+ gint nshots);
+
+/**
+ * Adds new group for a symbol
+ * @param cfg
+ * @param symbol
+ * @param group
+ * @return
+ */
+gboolean rspamd_config_add_symbol_group(struct rspamd_config *cfg,
+ const gchar *symbol,
+ const gchar *group);
+
+/**
+ * Sets action score for a specified metric with the specified priority
+ * @param cfg config file
+ * @param metric metric name (or NULL for default metric)
+ * @param action_name symbolic name of action
+ * @param obj data to set for action
+ * @return TRUE if symbol has been inserted or FALSE if action already exists with higher priority
+ */
+gboolean rspamd_config_set_action_score(struct rspamd_config *cfg,
+ const gchar *action_name,
+ const ucl_object_t *obj);
+
+/**
+ * Check priority and maybe disable action completely
+ * @param cfg
+ * @param action_name
+ * @param priority
+ * @return
+ */
+gboolean rspamd_config_maybe_disable_action(struct rspamd_config *cfg,
+ const gchar *action_name,
+ guint priority);
+
+/**
+ * Checks if a specified C or lua module is enabled or disabled in the config.
+ * The logic of check is the following:
+ *
+ * - For C modules, we check `filters` line and enable module only if it is found there
+ * - For LUA modules we check the corresponding configuration section:
+ * - if section exists, then we check `enabled` key and check its value
+ * - if section is absent, we consider module as disabled
+ * - For both C and LUA modules we check if the group with the module name is disabled in the default metric
+ * @param cfg config file
+ * @param module_name module name
+ * @return TRUE if a module is enabled
+ */
+gboolean rspamd_config_is_module_enabled(struct rspamd_config *cfg,
+ const gchar *module_name);
+
+/**
+ * Verifies enabled/disabled combination in the specified object
+ * @param obj
+ * @return TRUE if there is no explicit disable in the object found
+ */
+gboolean rspamd_config_is_enabled_from_ucl(rspamd_mempool_t *pool,
+ const ucl_object_t *obj);
+
+/*
+ * Get action from a string
+ */
+gboolean rspamd_action_from_str(const gchar *data, enum rspamd_action_type *result);
+
+/*
+ * Return textual representation of action enumeration
+ */
+const gchar *rspamd_action_to_str(enum rspamd_action_type action);
+
+const gchar *rspamd_action_to_str_alt(enum rspamd_action_type action);
+
+/**
+ * Parse radix tree or radix map from ucl object
+ * @param cfg configuration object
+ * @param obj ucl object with parameter
+ * @param target target radix tree
+ * @param err error pointer
+ * @return
+ */
+struct rspamd_radix_map_helper;
+
+gboolean rspamd_config_radix_from_ucl(struct rspamd_config *cfg, const ucl_object_t *obj, const gchar *description,
+ struct rspamd_radix_map_helper **target, GError **err,
+ struct rspamd_worker *worker, const gchar *map_name);
+
+/**
+ * Adds new settings id to be preprocessed
+ * @param cfg
+ * @param name
+ * @param symbols_enabled (ownership is transferred to callee)
+ * @param symbols_disabled (ownership is transferred to callee)
+ */
+void rspamd_config_register_settings_id(struct rspamd_config *cfg,
+ const gchar *name,
+ ucl_object_t *symbols_enabled,
+ ucl_object_t *symbols_disabled,
+ enum rspamd_config_settings_policy policy);
+
+/**
+ * Convert settings name to settings id
+ * @param name
+ * @param namelen
+ * @return
+ */
+guint32 rspamd_config_name_to_id(const gchar *name, gsize namelen);
+
+/**
+ * Finds settings id element and obtain reference count (must be unrefed by caller)
+ * @param cfg
+ * @param id
+ * @return
+ */
+struct rspamd_config_settings_elt *rspamd_config_find_settings_id_ref(
+ struct rspamd_config *cfg,
+ guint32 id);
+
+/**
+ * Finds settings id element and obtain reference count (must be unrefed by callee)
+ * @param cfg
+ * @param id
+ * @return
+ */
+struct rspamd_config_settings_elt *rspamd_config_find_settings_name_ref(
+ struct rspamd_config *cfg,
+ const gchar *name, gsize namelen);
+
+/**
+ * Returns action object by name
+ * @param cfg
+ * @param name
+ * @return
+ */
+struct rspamd_action *rspamd_config_get_action(struct rspamd_config *cfg,
+ const gchar *name);
+
+struct rspamd_action *rspamd_config_get_action_by_type(struct rspamd_config *cfg,
+ enum rspamd_action_type type);
+
+/**
+ * Iterate over all actions
+ * @param cfg
+ * @param func
+ * @param data
+ */
+void rspamd_config_actions_foreach(struct rspamd_config *cfg,
+ void (*func)(struct rspamd_action *act, void *d),
+ void *data);
+/**
+ * Iterate over all actions with index
+ * @param cfg
+ * @param func
+ * @param data
+ */
+void rspamd_config_actions_foreach_enumerate(struct rspamd_config *cfg,
+ void (*func)(int idx, struct rspamd_action *act, void *d),
+ void *data);
+
+/**
+ * Returns number of actions defined in the config
+ * @param cfg
+ * @return
+ */
+gsize rspamd_config_actions_size(struct rspamd_config *cfg);
+
+int rspamd_config_ev_backend_get(struct rspamd_config *cfg);
+const gchar *rspamd_config_ev_backend_to_string(int ev_backend, gboolean *effective);
+
+struct rspamd_external_libs_ctx;
+
+/**
+ * Initialize rspamd libraries
+ */
+struct rspamd_external_libs_ctx *rspamd_init_libs(void);
+
+/**
+ * Reset and initialize decompressor
+ * @param ctx
+ */
+gboolean rspamd_libs_reset_decompression(struct rspamd_external_libs_ctx *ctx);
+
+/**
+ * Reset and initialize compressor
+ * @param ctx
+ */
+gboolean rspamd_libs_reset_compression(struct rspamd_external_libs_ctx *ctx);
+
+/**
+ * Destroy external libraries context
+ */
+void rspamd_deinit_libs(struct rspamd_external_libs_ctx *ctx);
+
+/**
+ * Returns TRUE if an address belongs to some local address
+ */
+gboolean rspamd_ip_is_local_cfg(struct rspamd_config *cfg,
+ const rspamd_inet_addr_t *addr);
+
+/**
+ * Configure libraries
+ */
+gboolean rspamd_config_libs(struct rspamd_external_libs_ctx *ctx,
+ struct rspamd_config *cfg);
+
+
+#define msg_err_config(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL, \
+ cfg->cfg_pool->tag.tagname, cfg->checksum, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_err_config_forced(...) rspamd_default_log_function((gint) G_LOG_LEVEL_CRITICAL | (gint) RSPAMD_LOG_FORCED, \
+ cfg->cfg_pool->tag.tagname, cfg->checksum, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_warn_config(...) rspamd_default_log_function(G_LOG_LEVEL_WARNING, \
+ cfg->cfg_pool->tag.tagname, cfg->checksum, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_info_config(...) rspamd_default_log_function(G_LOG_LEVEL_INFO, \
+ cfg->cfg_pool->tag.tagname, cfg->checksum, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+extern guint rspamd_config_log_id;
+#define msg_debug_config(...) rspamd_conditional_debug_fast(NULL, NULL, \
+ rspamd_config_log_id, "config", cfg->checksum, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* ifdef CFG_FILE_H */
diff --git a/src/libserver/cfg_file_private.h b/src/libserver/cfg_file_private.h
new file mode 100644
index 0000000..8c9fc65
--- /dev/null
+++ b/src/libserver/cfg_file_private.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef RSPAMD_CFG_FILE_PRIVATE_H
+#define RSPAMD_CFG_FILE_PRIVATE_H
+
+#include "cfg_file.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Action config definition
+ */
+struct rspamd_action {
+ enum rspamd_action_type action_type;
+ int flags; /* enum rspamd_action_flags */
+ guint priority;
+ gdouble threshold;
+ gchar *name;
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/libserver/cfg_rcl.cxx b/src/libserver/cfg_rcl.cxx
new file mode 100644
index 0000000..3ac7560
--- /dev/null
+++ b/src/libserver/cfg_rcl.cxx
@@ -0,0 +1,4110 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "lua/lua_common.h"
+#include "cfg_rcl.h"
+#include "rspamd.h"
+#include "cfg_file_private.h"
+#include "utlist.h"
+#include "cfg_file.h"
+#include "expression.h"
+#include "src/libserver/composites/composites.h"
+#include "libserver/worker_util.h"
+#include "unix-std.h"
+#include "cryptobox.h"
+#include "libutil/multipattern.h"
+#include "libmime/email_addr.h"
+#include "libmime/lang_detection.h"
+
+#include <string>
+#include <filesystem>
+#include <algorithm>// for std::transform
+#include <memory>
+#include "contrib/ankerl/unordered_dense.h"
+#include "fmt/core.h"
+#include "libutil/cxx/util.hxx"
+#include "libutil/cxx/file_util.hxx"
+#include "frozen/unordered_set.h"
+#include "frozen/string.h"
+
+#ifdef HAVE_SYSLOG_H
+#include <syslog.h>
+#endif
+
+#include <cmath>
+
+struct rspamd_rcl_default_handler_data {
+ struct rspamd_rcl_struct_parser pd;
+ std::string key;
+ rspamd_rcl_default_handler_t handler;
+};
+
+struct rspamd_rcl_sections_map;
+
+struct rspamd_rcl_section {
+ struct rspamd_rcl_sections_map *top{};
+ std::string name; /**< name of section */
+ std::optional<std::string> key_attr;
+ std::optional<std::string> default_key;
+ rspamd_rcl_handler_t handler{}; /**< handler of section attributes */
+ enum ucl_type type; /**< type of attribute */
+ bool required{}; /**< whether this param is required */
+ bool strict_type{}; /**< whether we need strict type */
+ mutable bool processed{}; /**< whether this section was processed */
+ ankerl::unordered_dense::map<std::string, std::shared_ptr<struct rspamd_rcl_section>> subsections;
+ ankerl::unordered_dense::map<std::string, struct rspamd_rcl_default_handler_data> default_parser; /**< generic parsing fields */
+ rspamd_rcl_section_fin_t fin{}; /** called at the end of section parsing */
+ gpointer fin_ud{};
+ ucl_object_t *doc_ref{}; /**< reference to the section's documentation */
+
+ virtual ~rspamd_rcl_section()
+ {
+ if (doc_ref) {
+ ucl_object_unref(doc_ref);
+ }
+ }
+};
+
+struct rspamd_worker_param_parser {
+ rspamd_rcl_default_handler_t handler; /**< handler function */
+ struct rspamd_rcl_struct_parser parser; /**< parser attributes */
+};
+
+struct rspamd_worker_cfg_parser {
+ struct pair_hash {
+ using is_avalanching = void;
+ template<class T1, class T2>
+ std::size_t operator()(const std::pair<T1, T2> &pair) const
+ {
+ return ankerl::unordered_dense::hash<T1>()(pair.first) ^ ankerl::unordered_dense::hash<T2>()(pair.second);
+ }
+ };
+ ankerl::unordered_dense::map<std::pair<std::string, gpointer>,
+ rspamd_worker_param_parser, pair_hash>
+ parsers; /**< parsers hash */
+ gint type; /**< workers quark */
+ gboolean (*def_obj_parser)(ucl_object_t *obj, gpointer ud); /**< default object parser */
+ gpointer def_ud;
+};
+
+struct rspamd_rcl_sections_map {
+ ankerl::unordered_dense::map<std::string, std::shared_ptr<struct rspamd_rcl_section>> sections;
+ std::vector<std::shared_ptr<struct rspamd_rcl_section>> sections_order;
+ ankerl::unordered_dense::map<int, struct rspamd_worker_cfg_parser> workers_parser;
+ ankerl::unordered_dense::set<std::string> lua_modules_seen;
+};
+
+static bool rspamd_rcl_process_section(struct rspamd_config *cfg,
+ const struct rspamd_rcl_section &sec,
+ gpointer ptr, const ucl_object_t *obj, rspamd_mempool_t *pool,
+ GError **err);
+static bool
+rspamd_rcl_section_parse_defaults(struct rspamd_config *cfg,
+ const struct rspamd_rcl_section &section,
+ rspamd_mempool_t *pool, const ucl_object_t *obj, gpointer ptr,
+ GError **err);
+
+/*
+ * Common section handlers
+ */
+static gboolean
+rspamd_rcl_logging_handler(rspamd_mempool_t *pool, const ucl_object_t *obj,
+ const gchar *key, gpointer ud, struct rspamd_rcl_section *section,
+ GError **err)
+{
+ const ucl_object_t *val;
+ const gchar *facility = nullptr, *log_type = nullptr, *log_level = nullptr;
+ auto *cfg = (struct rspamd_config *) ud;
+
+ val = ucl_object_lookup(obj, "type");
+ if (val != nullptr && ucl_object_tostring_safe(val, &log_type)) {
+ if (g_ascii_strcasecmp(log_type, "file") == 0) {
+ /* Need to get filename */
+ val = ucl_object_lookup(obj, "filename");
+ if (val == nullptr || val->type != UCL_STRING) {
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ ENOENT,
+ "filename attribute must be specified for file logging type");
+ return FALSE;
+ }
+ cfg->log_type = RSPAMD_LOG_FILE;
+ cfg->log_file = rspamd_mempool_strdup(cfg->cfg_pool,
+ ucl_object_tostring(val));
+ }
+ else if (g_ascii_strcasecmp(log_type, "syslog") == 0) {
+ /* Need to get facility */
+#ifdef HAVE_SYSLOG_H
+ cfg->log_facility = LOG_DAEMON;
+ cfg->log_type = RSPAMD_LOG_SYSLOG;
+ val = ucl_object_lookup(obj, "facility");
+ if (val != nullptr && ucl_object_tostring_safe(val, &facility)) {
+ if (g_ascii_strcasecmp(facility, "LOG_AUTH") == 0 ||
+ g_ascii_strcasecmp(facility, "auth") == 0) {
+ cfg->log_facility = LOG_AUTH;
+ }
+ else if (g_ascii_strcasecmp(facility, "LOG_CRON") == 0 ||
+ g_ascii_strcasecmp(facility, "cron") == 0) {
+ cfg->log_facility = LOG_CRON;
+ }
+ else if (g_ascii_strcasecmp(facility, "LOG_DAEMON") == 0 ||
+ g_ascii_strcasecmp(facility, "daemon") == 0) {
+ cfg->log_facility = LOG_DAEMON;
+ }
+ else if (g_ascii_strcasecmp(facility, "LOG_MAIL") == 0 ||
+ g_ascii_strcasecmp(facility, "mail") == 0) {
+ cfg->log_facility = LOG_MAIL;
+ }
+ else if (g_ascii_strcasecmp(facility, "LOG_USER") == 0 ||
+ g_ascii_strcasecmp(facility, "user") == 0) {
+ cfg->log_facility = LOG_USER;
+ }
+ else if (g_ascii_strcasecmp(facility, "LOG_LOCAL0") == 0 ||
+ g_ascii_strcasecmp(facility, "local0") == 0) {
+ cfg->log_facility = LOG_LOCAL0;
+ }
+ else if (g_ascii_strcasecmp(facility, "LOG_LOCAL1") == 0 ||
+ g_ascii_strcasecmp(facility, "local1") == 0) {
+ cfg->log_facility = LOG_LOCAL1;
+ }
+ else if (g_ascii_strcasecmp(facility, "LOG_LOCAL2") == 0 ||
+ g_ascii_strcasecmp(facility, "local2") == 0) {
+ cfg->log_facility = LOG_LOCAL2;
+ }
+ else if (g_ascii_strcasecmp(facility, "LOG_LOCAL3") == 0 ||
+ g_ascii_strcasecmp(facility, "local3") == 0) {
+ cfg->log_facility = LOG_LOCAL3;
+ }
+ else if (g_ascii_strcasecmp(facility, "LOG_LOCAL4") == 0 ||
+ g_ascii_strcasecmp(facility, "local4") == 0) {
+ cfg->log_facility = LOG_LOCAL4;
+ }
+ else if (g_ascii_strcasecmp(facility, "LOG_LOCAL5") == 0 ||
+ g_ascii_strcasecmp(facility, "local5") == 0) {
+ cfg->log_facility = LOG_LOCAL5;
+ }
+ else if (g_ascii_strcasecmp(facility, "LOG_LOCAL6") == 0 ||
+ g_ascii_strcasecmp(facility, "local6") == 0) {
+ cfg->log_facility = LOG_LOCAL6;
+ }
+ else if (g_ascii_strcasecmp(facility, "LOG_LOCAL7") == 0 ||
+ g_ascii_strcasecmp(facility, "local7") == 0) {
+ cfg->log_facility = LOG_LOCAL7;
+ }
+ else {
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "invalid log facility: %s",
+ facility);
+ return FALSE;
+ }
+ }
+#endif
+ }
+ else if (g_ascii_strcasecmp(log_type,
+ "stderr") == 0 ||
+ g_ascii_strcasecmp(log_type, "console") == 0) {
+ cfg->log_type = RSPAMD_LOG_CONSOLE;
+ }
+ else {
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "invalid log type: %s",
+ log_type);
+ return FALSE;
+ }
+ }
+ else {
+ /* No type specified */
+ msg_warn_config(
+ "logging type is not specified correctly, log output to the console");
+ }
+
+ /* Handle log level */
+ val = ucl_object_lookup(obj, "level");
+ if (val != nullptr && ucl_object_tostring_safe(val, &log_level)) {
+ if (g_ascii_strcasecmp(log_level, "error") == 0) {
+ cfg->log_level = G_LOG_LEVEL_ERROR | G_LOG_LEVEL_CRITICAL;
+ }
+ else if (g_ascii_strcasecmp(log_level, "warning") == 0) {
+ cfg->log_level = G_LOG_LEVEL_WARNING;
+ }
+ else if (g_ascii_strcasecmp(log_level, "info") == 0) {
+ cfg->log_level = G_LOG_LEVEL_INFO | G_LOG_LEVEL_MESSAGE;
+ }
+ else if (g_ascii_strcasecmp(log_level, "message") == 0 ||
+ g_ascii_strcasecmp(log_level, "notice") == 0) {
+ cfg->log_level = G_LOG_LEVEL_MESSAGE;
+ }
+ else if (g_ascii_strcasecmp(log_level, "silent") == 0) {
+ cfg->log_level = G_LOG_LEVEL_MESSAGE | G_LOG_LEVEL_INFO;
+ cfg->log_silent_workers = TRUE;
+ }
+ else if (g_ascii_strcasecmp(log_level, "debug") == 0) {
+ cfg->log_level = G_LOG_LEVEL_DEBUG;
+ }
+ else {
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "invalid log level: %s",
+ log_level);
+ return FALSE;
+ }
+ }
+
+ /* Handle flags */
+ val = ucl_object_lookup_any(obj, "color", "log_color", nullptr);
+ if (val && ucl_object_toboolean(val)) {
+ cfg->log_flags |= RSPAMD_LOG_FLAG_COLOR;
+ }
+
+ val = ucl_object_lookup_any(obj, "severity", "log_severity", nullptr);
+ if (val && ucl_object_toboolean(val)) {
+ cfg->log_flags |= RSPAMD_LOG_FLAG_SEVERITY;
+ }
+
+ val = ucl_object_lookup_any(obj, "systemd", "log_systemd", nullptr);
+ if (val && ucl_object_toboolean(val)) {
+ cfg->log_flags |= RSPAMD_LOG_FLAG_SYSTEMD;
+ }
+
+ val = ucl_object_lookup_any(obj, "json", "log_json", nullptr);
+ if (val && ucl_object_toboolean(val)) {
+ cfg->log_flags |= RSPAMD_LOG_FLAG_JSON;
+ }
+
+ val = ucl_object_lookup(obj, "log_re_cache");
+ if (val && ucl_object_toboolean(val)) {
+ cfg->log_flags |= RSPAMD_LOG_FLAG_RE_CACHE;
+ }
+
+ val = ucl_object_lookup_any(obj, "usec", "log_usec", nullptr);
+ if (val && ucl_object_toboolean(val)) {
+ cfg->log_flags |= RSPAMD_LOG_FLAG_USEC;
+ }
+
+ return rspamd_rcl_section_parse_defaults(cfg, *section, cfg->cfg_pool, obj,
+ (void *) cfg, err);
+}
+
+static gboolean
+rspamd_rcl_options_handler(rspamd_mempool_t *pool, const ucl_object_t *obj,
+ const gchar *key, gpointer ud,
+ struct rspamd_rcl_section *section, GError **err)
+{
+ const ucl_object_t *dns, *upstream, *neighbours;
+ auto *cfg = (struct rspamd_config *) ud;
+
+ auto maybe_subsection = rspamd::find_map(section->subsections, "dns");
+
+ dns = ucl_object_lookup(obj, "dns");
+ if (maybe_subsection && dns != nullptr) {
+ if (!rspamd_rcl_section_parse_defaults(cfg,
+ *maybe_subsection.value().get(), cfg->cfg_pool, dns,
+ cfg, err)) {
+ return FALSE;
+ }
+ }
+
+ maybe_subsection = rspamd::find_map(section->subsections, "upstream");
+
+ upstream = ucl_object_lookup_any(obj, "upstream", "upstreams", nullptr);
+ if (maybe_subsection && upstream != nullptr) {
+ if (!rspamd_rcl_section_parse_defaults(cfg,
+ *maybe_subsection.value().get(), cfg->cfg_pool,
+ upstream, cfg, err)) {
+ return FALSE;
+ }
+ }
+
+ maybe_subsection = rspamd::find_map(section->subsections, "neighbours");
+
+ neighbours = ucl_object_lookup(obj, "neighbours");
+ if (maybe_subsection && neighbours != nullptr) {
+ const ucl_object_t *cur;
+
+ LL_FOREACH(neighbours, cur)
+ {
+ if (!rspamd_rcl_process_section(cfg, *maybe_subsection.value().get(), cfg, cur,
+ pool, err)) {
+ return FALSE;
+ }
+ }
+ }
+
+ const auto *gtube_patterns = ucl_object_lookup(obj, "gtube_patterns");
+ if (gtube_patterns != nullptr && ucl_object_type(gtube_patterns) == UCL_STRING) {
+ auto gtube_st = std::string{ucl_object_tostring(gtube_patterns)};
+ std::transform(gtube_st.begin(), gtube_st.end(), gtube_st.begin(), [](const auto c) -> int {
+ if (c <= 'Z' && c >= 'A')
+ return c - ('Z' - 'z');
+ return c;
+ });
+
+
+ if (gtube_st == "all") {
+ cfg->gtube_patterns_policy = RSPAMD_GTUBE_ALL;
+ }
+ else if (gtube_st == "reject") {
+ cfg->gtube_patterns_policy = RSPAMD_GTUBE_REJECT;
+ }
+ else if (gtube_st == "disabled" || gtube_st == "disable") {
+ cfg->gtube_patterns_policy = RSPAMD_GTUBE_DISABLED;
+ }
+ else {
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "invalid GTUBE patterns policy: %s",
+ gtube_st.c_str());
+ return FALSE;
+ }
+ }
+ else if (auto *enable_test_patterns = ucl_object_lookup(obj, "enable_test_patterns"); enable_test_patterns != nullptr) {
+ /* Legacy setting */
+ if (!!ucl_object_toboolean(enable_test_patterns)) {
+ cfg->gtube_patterns_policy = RSPAMD_GTUBE_ALL;
+ }
+ }
+
+ if (rspamd_rcl_section_parse_defaults(cfg,
+ *section, cfg->cfg_pool, obj,
+ cfg, err)) {
+ /* We need to init this early */
+ rspamd_multipattern_library_init(cfg->hs_cache_dir);
+
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+struct rspamd_rcl_symbol_data {
+ struct rspamd_symbols_group *gr;
+ struct rspamd_config *cfg;
+};
+
+static gboolean
+rspamd_rcl_group_handler(rspamd_mempool_t *pool, const ucl_object_t *obj,
+ const gchar *key, gpointer ud,
+ struct rspamd_rcl_section *section, GError **err)
+{
+ auto *cfg = static_cast<rspamd_config *>(ud);
+
+ g_assert(key != nullptr);
+
+ auto *gr = static_cast<rspamd_symbols_group *>(g_hash_table_lookup(cfg->groups, key));
+
+ if (gr == nullptr) {
+ gr = rspamd_config_new_group(cfg, key);
+ }
+
+ if (!rspamd_rcl_section_parse_defaults(cfg, *section, pool, obj,
+ gr, err)) {
+ return FALSE;
+ }
+
+ if (const auto *elt = ucl_object_lookup(obj, "one_shot"); elt != nullptr) {
+ if (ucl_object_type(elt) != UCL_BOOLEAN) {
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "one_shot attribute is not boolean for symbol: '%s'",
+ key);
+
+ return FALSE;
+ }
+ if (ucl_object_toboolean(elt)) {
+ gr->flags |= RSPAMD_SYMBOL_GROUP_ONE_SHOT;
+ }
+ }
+
+ if (const auto *elt = ucl_object_lookup(obj, "disabled"); elt != nullptr) {
+ if (ucl_object_type(elt) != UCL_BOOLEAN) {
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "disabled attribute is not boolean for symbol: '%s'",
+ key);
+
+ return FALSE;
+ }
+ if (ucl_object_toboolean(elt)) {
+ gr->flags |= RSPAMD_SYMBOL_GROUP_DISABLED;
+ }
+ }
+
+ if (const auto *elt = ucl_object_lookup(obj, "enabled"); elt != nullptr) {
+ if (ucl_object_type(elt) != UCL_BOOLEAN) {
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "enabled attribute is not boolean for symbol: '%s'",
+ key);
+
+ return FALSE;
+ }
+ if (!ucl_object_toboolean(elt)) {
+ gr->flags |= RSPAMD_SYMBOL_GROUP_DISABLED;
+ }
+ }
+
+ if (const auto *elt = ucl_object_lookup(obj, "public"); elt != nullptr) {
+ if (ucl_object_type(elt) != UCL_BOOLEAN) {
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "public attribute is not boolean for symbol: '%s'",
+ key);
+
+ return FALSE;
+ }
+ if (ucl_object_toboolean(elt)) {
+ gr->flags |= RSPAMD_SYMBOL_GROUP_PUBLIC;
+ }
+ }
+
+ if (const auto *elt = ucl_object_lookup(obj, "private"); elt != nullptr) {
+ if (ucl_object_type(elt) != UCL_BOOLEAN) {
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "private attribute is not boolean for symbol: '%s'",
+ key);
+
+ return FALSE;
+ }
+ if (!ucl_object_toboolean(elt)) {
+ gr->flags |= RSPAMD_SYMBOL_GROUP_PUBLIC;
+ }
+ }
+
+
+ if (const auto *elt = ucl_object_lookup(obj, "description"); elt != nullptr) {
+ gr->description = rspamd_mempool_strdup(cfg->cfg_pool,
+ ucl_object_tostring(elt));
+ }
+
+ struct rspamd_rcl_symbol_data sd = {
+ .gr = gr,
+ .cfg = cfg,
+ };
+
+ /* Handle symbols */
+ if (const auto *val = ucl_object_lookup(obj, "symbols"); val != nullptr && ucl_object_type(val) == UCL_OBJECT) {
+ auto subsection = rspamd::find_map(section->subsections, "symbols");
+
+ g_assert(subsection.has_value());
+ if (!rspamd_rcl_process_section(cfg, *subsection.value().get(), &sd, val,
+ pool, err)) {
+
+ return FALSE;
+ }
+ }
+
+ return TRUE;
+}
+
+static gboolean
+rspamd_rcl_symbol_handler(rspamd_mempool_t *pool, const ucl_object_t *obj,
+ const gchar *key, gpointer ud,
+ struct rspamd_rcl_section *section, GError **err)
+{
+ auto *sd = static_cast<rspamd_rcl_symbol_data *>(ud);
+ struct rspamd_config *cfg;
+ const ucl_object_t *elt;
+ const gchar *description = nullptr;
+ gdouble score = NAN;
+ guint priority = 1, flags = 0;
+ gint nshots = 0;
+
+ g_assert(key != nullptr);
+ cfg = sd->cfg;
+
+ if ((elt = ucl_object_lookup(obj, "one_shot")) != nullptr) {
+ if (ucl_object_type(elt) != UCL_BOOLEAN) {
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "one_shot attribute is not boolean for symbol: '%s'",
+ key);
+
+ return FALSE;
+ }
+ if (ucl_object_toboolean(elt)) {
+ nshots = 1;
+ }
+ }
+
+ if ((elt = ucl_object_lookup(obj, "any_shot")) != nullptr) {
+ if (ucl_object_type(elt) != UCL_BOOLEAN) {
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "any_shot attribute is not boolean for symbol: '%s'",
+ key);
+
+ return FALSE;
+ }
+ if (ucl_object_toboolean(elt)) {
+ nshots = -1;
+ }
+ }
+
+ if ((elt = ucl_object_lookup(obj, "one_param")) != nullptr) {
+ if (ucl_object_type(elt) != UCL_BOOLEAN) {
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "one_param attribute is not boolean for symbol: '%s'",
+ key);
+
+ return FALSE;
+ }
+
+ if (ucl_object_toboolean(elt)) {
+ flags |= RSPAMD_SYMBOL_FLAG_ONEPARAM;
+ }
+ }
+
+ if ((elt = ucl_object_lookup(obj, "ignore")) != nullptr) {
+ if (ucl_object_type(elt) != UCL_BOOLEAN) {
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "ignore attribute is not boolean for symbol: '%s'",
+ key);
+
+ return FALSE;
+ }
+
+ if (ucl_object_toboolean(elt)) {
+ flags |= RSPAMD_SYMBOL_FLAG_IGNORE_METRIC;
+ }
+ }
+
+ if ((elt = ucl_object_lookup(obj, "enabled")) != nullptr) {
+ if (ucl_object_type(elt) != UCL_BOOLEAN) {
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "enabled attribute is not boolean for symbol: '%s'",
+ key);
+
+ return FALSE;
+ }
+
+ if (!ucl_object_toboolean(elt)) {
+ flags |= RSPAMD_SYMBOL_FLAG_DISABLED;
+ }
+ }
+
+ if ((elt = ucl_object_lookup(obj, "nshots")) != nullptr) {
+ if (ucl_object_type(elt) != UCL_FLOAT && ucl_object_type(elt) != UCL_INT) {
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "nshots attribute is not numeric for symbol: '%s'",
+ key);
+
+ return FALSE;
+ }
+
+ nshots = ucl_object_toint(elt);
+ }
+
+ elt = ucl_object_lookup_any(obj, "score", "weight", nullptr);
+ if (elt) {
+ if (ucl_object_type(elt) != UCL_FLOAT && ucl_object_type(elt) != UCL_INT) {
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "score attribute is not numeric for symbol: '%s'",
+ key);
+
+ return FALSE;
+ }
+
+ score = ucl_object_todouble(elt);
+ }
+
+ elt = ucl_object_lookup(obj, "priority");
+ if (elt) {
+ if (ucl_object_type(elt) != UCL_FLOAT && ucl_object_type(elt) != UCL_INT) {
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "priority attribute is not numeric for symbol: '%s'",
+ key);
+
+ return FALSE;
+ }
+
+ priority = ucl_object_toint(elt);
+ }
+ else {
+ priority = ucl_object_get_priority(obj) + 1;
+ }
+
+ elt = ucl_object_lookup(obj, "description");
+ if (elt) {
+ description = ucl_object_tostring(elt);
+ }
+
+ if (sd->gr) {
+ rspamd_config_add_symbol(cfg, key, score,
+ description, sd->gr->name, flags, priority, nshots);
+ }
+ else {
+ rspamd_config_add_symbol(cfg, key, score,
+ description, nullptr, flags, priority, nshots);
+ }
+
+ elt = ucl_object_lookup(obj, "groups");
+
+ if (elt) {
+ ucl_object_iter_t gr_it;
+ const ucl_object_t *cur_gr;
+
+ gr_it = ucl_object_iterate_new(elt);
+
+ while ((cur_gr = ucl_object_iterate_safe(gr_it, true)) != nullptr) {
+ rspamd_config_add_symbol_group(cfg, key,
+ ucl_object_tostring(cur_gr));
+ }
+
+ ucl_object_iterate_free(gr_it);
+ }
+
+ return TRUE;
+}
+
+static gboolean
+rspamd_rcl_actions_handler(rspamd_mempool_t *pool, const ucl_object_t *obj,
+ const gchar *key, gpointer ud,
+ struct rspamd_rcl_section *section, GError **err)
+{
+ auto *cfg = static_cast<rspamd_config *>(ud);
+ const ucl_object_t *cur;
+ ucl_object_iter_t it;
+
+ it = ucl_object_iterate_new(obj);
+
+ while ((cur = ucl_object_iterate_safe(it, true)) != nullptr) {
+ gint type = ucl_object_type(cur);
+
+ if (type == UCL_NULL) {
+ rspamd_config_maybe_disable_action(cfg, ucl_object_key(cur),
+ ucl_object_get_priority(cur));
+ }
+ else if (type == UCL_OBJECT || type == UCL_FLOAT || type == UCL_INT) {
+ /* Exceptions */
+ auto default_elt = false;
+
+ for (const auto &[name, def_elt]: section->default_parser) {
+ if (def_elt.key == ucl_object_key(cur)) {
+ default_elt = true;
+ break;
+ }
+ }
+
+ if (default_elt) {
+ continue;
+ }
+
+ /* Something non-default */
+ if (!rspamd_config_set_action_score(cfg,
+ ucl_object_key(cur),
+ cur)) {
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "invalid action definition for: '%s'",
+ ucl_object_key(cur));
+ ucl_object_iterate_free(it);
+
+ return FALSE;
+ }
+ }
+ }
+
+ ucl_object_iterate_free(it);
+
+ return rspamd_rcl_section_parse_defaults(cfg, *section, pool, obj, cfg, err);
+}
+constexpr const auto known_worker_attributes = frozen::make_unordered_set<frozen::string>({
+ "bind_socket",
+ "listen",
+ "bind",
+ "count",
+ "max_files",
+ "max_core",
+ "enabled",
+});
+static gboolean
+rspamd_rcl_worker_handler(rspamd_mempool_t *pool, const ucl_object_t *obj,
+ const gchar *key, gpointer ud,
+ struct rspamd_rcl_section *section, GError **err)
+{
+ auto *cfg = static_cast<rspamd_config *>(ud);
+
+ g_assert(key != nullptr);
+ const auto *worker_type = key;
+
+ auto qtype = g_quark_try_string(worker_type);
+ if (qtype == 0) {
+ msg_err_config("unknown worker type: %s", worker_type);
+ return FALSE;
+ }
+
+ auto *wrk = rspamd_config_new_worker(cfg, nullptr);
+ wrk->options = ucl_object_copy(obj);
+ wrk->worker = rspamd_get_worker_by_type(cfg, qtype);
+
+ if (wrk->worker == nullptr) {
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "unknown worker type: %s",
+ worker_type);
+ return FALSE;
+ }
+
+ wrk->type = qtype;
+
+ if (wrk->worker->worker_init_func) {
+ wrk->ctx = wrk->worker->worker_init_func(cfg);
+ }
+
+ const auto *val = ucl_object_lookup_any(obj, "bind_socket", "listen", "bind", nullptr);
+ /* This name is more logical */
+ if (val != nullptr) {
+ auto it = ucl_object_iterate_new(val);
+ const ucl_object_t *cur;
+ const char *worker_bind = nullptr;
+
+ while ((cur = ucl_object_iterate_safe(it, true)) != nullptr) {
+ if (!ucl_object_tostring_safe(cur, &worker_bind)) {
+ continue;
+ }
+ if (!rspamd_parse_bind_line(cfg, wrk, worker_bind)) {
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "cannot parse bind line: %s",
+ worker_bind);
+ ucl_object_iterate_free(it);
+ return FALSE;
+ }
+ }
+
+ ucl_object_iterate_free(it);
+ }
+
+ if (!rspamd_rcl_section_parse_defaults(cfg, *section, cfg->cfg_pool, obj,
+ wrk, err)) {
+ return FALSE;
+ }
+
+ /* Parse other attributes */
+ auto maybe_wparser = rspamd::find_map(section->top->workers_parser, wrk->type);
+
+ if (maybe_wparser && obj->type == UCL_OBJECT) {
+ auto &wparser = maybe_wparser.value().get();
+ auto it = ucl_object_iterate_new(obj);
+ const ucl_object_t *cur;
+
+ while ((cur = ucl_object_iterate_full(it, UCL_ITERATE_EXPLICIT)) != nullptr) {
+ auto srch = std::make_pair(ucl_object_key(cur), (gpointer) wrk->ctx);
+ auto maybe_specific = rspamd::find_map(wparser.parsers, srch);
+
+ if (maybe_specific) {
+ auto &whandler = maybe_specific.value().get();
+ const ucl_object_t *cur_obj;
+
+ LL_FOREACH(cur, cur_obj)
+ {
+ if (!whandler.handler(cfg->cfg_pool,
+ cur_obj,
+ (void *) &whandler.parser,
+ section,
+ err)) {
+
+ ucl_object_iterate_free(it);
+ return FALSE;
+ }
+
+ if (!(whandler.parser.flags & RSPAMD_CL_FLAG_MULTIPLE)) {
+ break;
+ }
+ }
+ }
+ else if (!(wrk->worker->flags & RSPAMD_WORKER_NO_STRICT_CONFIG) &&
+ known_worker_attributes.find(std::string_view{ucl_object_key(cur)}) == known_worker_attributes.end()) {
+ msg_warn_config("unknown worker attribute: %s; worker type: %s", ucl_object_key(cur), worker_type);
+ }
+ }
+
+ ucl_object_iterate_free(it);
+
+ if (wparser.def_obj_parser != nullptr) {
+ auto *robj = ucl_object_ref(obj);
+
+ if (!wparser.def_obj_parser(robj, wparser.def_ud)) {
+ ucl_object_unref(robj);
+
+ return FALSE;
+ }
+
+ ucl_object_unref(robj);
+ }
+ }
+
+ cfg->workers = g_list_prepend(cfg->workers, wrk);
+
+ return TRUE;
+}
+
+static gboolean
+rspamd_rcl_lua_handler(rspamd_mempool_t *pool, const ucl_object_t *obj,
+ const gchar *key, gpointer ud,
+ struct rspamd_rcl_section *section, GError **err)
+{
+ namespace fs = std::filesystem;
+ auto *cfg = static_cast<rspamd_config *>(ud);
+ auto lua_src = fs::path{ucl_object_tostring(obj)};
+ auto *L = RSPAMD_LUA_CFG_STATE(cfg);
+ std::error_code ec1;
+
+ auto lua_dir = fs::weakly_canonical(lua_src.parent_path(), ec1);
+ auto lua_file = lua_src.filename();
+
+ if (!ec1 && !lua_dir.empty() && !lua_file.empty()) {
+ auto cur_dir = fs::current_path(ec1);
+ if (!ec1 && !cur_dir.empty() && ::chdir(lua_dir.c_str()) != -1) {
+ /* Push traceback function */
+ lua_pushcfunction(L, &rspamd_lua_traceback);
+ auto err_idx = lua_gettop(L);
+
+ /* Load file */
+ if (luaL_loadfile(L, lua_file.c_str()) != 0) {
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "cannot load lua file %s: %s",
+ lua_src.c_str(),
+ lua_tostring(L, -1));
+ if (::chdir(cur_dir.c_str()) == -1) {
+ msg_err_config("cannot chdir to %s: %s", cur_dir.c_str(),
+ strerror(errno));
+ }
+
+ return FALSE;
+ }
+
+ /* Now do it */
+ if (lua_pcall(L, 0, 0, err_idx) != 0) {
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "cannot init lua file %s: %s",
+ lua_src.c_str(),
+ lua_tostring(L, -1));
+ lua_settop(L, 0);
+
+ if (::chdir(cur_dir.c_str()) == -1) {
+ msg_err_config("cannot chdir to %s: %s", cur_dir.c_str(),
+ strerror(errno));
+ }
+
+ return FALSE;
+ }
+
+ lua_pop(L, 1);
+ }
+ else {
+ g_set_error(err, CFG_RCL_ERROR, ENOENT, "cannot chdir to %s: %s",
+ lua_dir.c_str(), strerror(errno));
+ if (::chdir(cur_dir.c_str()) == -1) {
+ msg_err_config("cannot chdir back to %s: %s", cur_dir.c_str(), strerror(errno));
+ }
+
+ return FALSE;
+ }
+ if (::chdir(cur_dir.c_str()) == -1) {
+ msg_err_config("cannot chdir back to %s: %s", cur_dir.c_str(), strerror(errno));
+ }
+ }
+ else {
+
+ g_set_error(err, CFG_RCL_ERROR, ENOENT, "cannot find to %s: %s",
+ lua_src.c_str(), strerror(errno));
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static int
+rspamd_lua_mod_sort_fn(gconstpointer a, gconstpointer b)
+{
+ auto *m1 = *(const script_module **) a;
+ auto *m2 = *(const script_module **) b;
+
+ return strcmp(m1->name, m2->name);
+}
+
+gboolean
+rspamd_rcl_add_lua_plugins_path(struct rspamd_rcl_sections_map *sections,
+ struct rspamd_config *cfg,
+ const gchar *path,
+ gboolean main_path,
+ GError **err)
+{
+ namespace fs = std::filesystem;
+ auto dir = fs::path{path};
+ std::error_code ec;
+
+ auto add_single_file = [&](const fs::path &fpath) -> bool {
+ auto fname = fpath.filename();
+ auto modname = fname.string();
+
+ if (fname.has_extension()) {
+ modname = modname.substr(0, modname.size() - fname.extension().native().size());
+ }
+ auto *cur_mod = rspamd_mempool_alloc_type(cfg->cfg_pool,
+ struct script_module);
+ cur_mod->path = rspamd_mempool_strdup(cfg->cfg_pool, fpath.c_str());
+ cur_mod->name = rspamd_mempool_strdup(cfg->cfg_pool, modname.c_str());
+
+ if (sections->lua_modules_seen.contains(modname)) {
+ msg_info_config("already seen module %s, skip %s",
+ cur_mod->name, cur_mod->path);
+ return false;
+ }
+
+ g_ptr_array_add(cfg->script_modules, cur_mod);
+ sections->lua_modules_seen.insert(fname.string());
+
+ return true;
+ };
+
+ if (fs::is_regular_file(dir, ec) && dir.has_extension() && dir.extension() == ".lua") {
+ add_single_file(dir);
+ }
+ else if (!fs::is_directory(dir, ec)) {
+ if (!fs::exists(dir) && !main_path) {
+ msg_debug_config("optional plugins path %s is absent, skip it", path);
+
+ return TRUE;
+ }
+
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ errno,
+ "invalid lua path spec %s, %s",
+ path,
+ ec.message().c_str());
+ return FALSE;
+ }
+ else {
+ /* Handle directory */
+ for (const auto &p: fs::recursive_directory_iterator(dir, ec)) {
+ auto fpath = p.path().string();
+ if (p.is_regular_file() && fpath.ends_with(".lua")) {
+ add_single_file(p.path());
+ }
+ }
+ }
+
+ g_ptr_array_sort(cfg->script_modules, rspamd_lua_mod_sort_fn);
+
+ return TRUE;
+}
+
+static gboolean
+rspamd_rcl_modules_handler(rspamd_mempool_t *pool, const ucl_object_t *obj,
+ const gchar *key, gpointer ud,
+ struct rspamd_rcl_section *section, GError **err)
+{
+ auto *cfg = static_cast<rspamd_config *>(ud);
+ const char *data;
+
+ if (obj->type == UCL_OBJECT) {
+ const auto *val = ucl_object_lookup(obj, "path");
+
+ if (val) {
+ const auto *cur = val;
+ LL_FOREACH(val, cur)
+ {
+ if (ucl_object_tostring_safe(cur, &data)) {
+ if (!rspamd_rcl_add_lua_plugins_path(section->top,
+ cfg,
+ data,
+ TRUE,
+ err)) {
+ return FALSE;
+ }
+ }
+ }
+ }
+ else {
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "path attribute is missing");
+
+ return FALSE;
+ }
+
+ val = ucl_object_lookup(obj, "fallback_path");
+
+ if (val) {
+ const auto *cur = val;
+ LL_FOREACH(val, cur)
+ {
+ if (ucl_object_tostring_safe(cur, &data)) {
+ if (!rspamd_rcl_add_lua_plugins_path(section->top,
+ cfg,
+ data,
+ FALSE,
+ err)) {
+
+ return FALSE;
+ }
+ }
+ }
+ }
+
+ val = ucl_object_lookup(obj, "try_path");
+
+ if (val) {
+ const auto *cur = val;
+ LL_FOREACH(val, cur)
+ {
+ if (ucl_object_tostring_safe(cur, &data)) {
+ if (!rspamd_rcl_add_lua_plugins_path(section->top,
+ cfg,
+ data,
+ FALSE,
+ err)) {
+
+ return FALSE;
+ }
+ }
+ }
+ }
+ }
+ else if (ucl_object_tostring_safe(obj, &data)) {
+ if (!rspamd_rcl_add_lua_plugins_path(section->top, cfg, data, TRUE, err)) {
+ return FALSE;
+ }
+ }
+ else {
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "module parameter has wrong type (must be an object or a string)");
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+struct statfile_parser_data {
+ struct rspamd_config *cfg;
+ struct rspamd_classifier_config *ccf;
+};
+
+static gboolean
+rspamd_rcl_statfile_handler(rspamd_mempool_t *pool, const ucl_object_t *obj,
+ const gchar *key, gpointer ud,
+ struct rspamd_rcl_section *section, GError **err)
+{
+ auto *stud = (struct statfile_parser_data *) ud;
+ GList *labels;
+
+ g_assert(key != nullptr);
+
+ auto *cfg = stud->cfg;
+ auto *ccf = stud->ccf;
+
+ auto *st = rspamd_config_new_statfile(cfg, nullptr);
+ st->symbol = rspamd_mempool_strdup(cfg->cfg_pool, key);
+
+ if (rspamd_rcl_section_parse_defaults(cfg, *section, pool, obj, st, err)) {
+ ccf->statfiles = rspamd_mempool_glist_prepend(pool, ccf->statfiles, st);
+
+ if (st->label != nullptr) {
+ labels = (GList *) g_hash_table_lookup(ccf->labels, st->label);
+ if (labels != nullptr) {
+ /* Must use append to preserve the head stored in the hash table */
+ labels = g_list_append(labels, st);
+ }
+ else {
+ g_hash_table_insert(ccf->labels, st->label,
+ g_list_prepend(nullptr, st));
+ }
+ }
+
+ if (st->symbol != nullptr) {
+ g_hash_table_insert(cfg->classifiers_symbols, st->symbol, st);
+ }
+ else {
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "statfile must have a symbol defined");
+ return FALSE;
+ }
+
+ st->opts = (ucl_object_t *) obj;
+ st->clcf = ccf;
+
+ const auto *val = ucl_object_lookup(obj, "spam");
+ if (val == nullptr) {
+ msg_info_config(
+ "statfile %s has no explicit 'spam' setting, trying to guess by symbol",
+ st->symbol);
+ if (rspamd_substring_search_caseless(st->symbol,
+ strlen(st->symbol), "spam", 4) != -1) {
+ st->is_spam = TRUE;
+ }
+ else if (rspamd_substring_search_caseless(st->symbol,
+ strlen(st->symbol), "ham", 3) != -1) {
+ st->is_spam = FALSE;
+ }
+ else {
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "cannot guess spam setting from %s",
+ st->symbol);
+ return FALSE;
+ }
+ msg_info_config("guessed that statfile with symbol %s is %s",
+ st->symbol,
+ st->is_spam ? "spam" : "ham");
+ }
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static gboolean
+rspamd_rcl_classifier_handler(rspamd_mempool_t *pool,
+ const ucl_object_t *obj,
+ const gchar *key,
+ gpointer ud,
+ struct rspamd_rcl_section *section,
+ GError **err)
+{
+ auto *cfg = static_cast<rspamd_config *>(ud);
+
+ g_assert(key != nullptr);
+ auto *ccf = rspamd_config_new_classifier(cfg, nullptr);
+ auto *tkcf = (rspamd_tokenizer_config *) nullptr;
+
+ ccf->classifier = rspamd_mempool_strdup(cfg->cfg_pool, key);
+
+ if (rspamd_rcl_section_parse_defaults(cfg, *section, cfg->cfg_pool, obj,
+ ccf, err)) {
+
+ auto stat_section = rspamd::find_map(section->subsections, "statfile");
+
+ if (ccf->classifier == nullptr) {
+ ccf->classifier = rspamd_mempool_strdup(cfg->cfg_pool, "bayes");
+ }
+
+ if (ccf->name == nullptr) {
+ ccf->name = ccf->classifier;
+ }
+
+ auto it = ucl_object_iterate_new(obj);
+ const auto *val = obj;
+ auto res = TRUE;
+
+ while ((val = ucl_object_iterate_safe(it, true)) != nullptr && res) {
+ const auto *st_key = ucl_object_key(val);
+
+ if (st_key != nullptr) {
+ if (g_ascii_strcasecmp(st_key, "statfile") == 0) {
+ const auto *cur = val;
+ LL_FOREACH(val, cur)
+ {
+ struct statfile_parser_data stud = {.cfg = cfg, .ccf = ccf};
+ res = rspamd_rcl_process_section(cfg, *stat_section.value().get(), &stud,
+ cur, cfg->cfg_pool, err);
+
+ if (!res) {
+ ucl_object_iterate_free(it);
+
+ return FALSE;
+ }
+ }
+ }
+ else if (g_ascii_strcasecmp(st_key, "tokenizer") == 0) {
+ tkcf = rspamd_mempool_alloc0_type(cfg->cfg_pool, rspamd_tokenizer_config);
+
+ if (ucl_object_type(val) == UCL_STRING) {
+ tkcf->name = ucl_object_tostring(val);
+ }
+ else if (ucl_object_type(val) == UCL_OBJECT) {
+ const auto *cur = ucl_object_lookup(val, "name");
+ if (cur != nullptr) {
+ tkcf->name = ucl_object_tostring(cur);
+ tkcf->opts = val;
+ }
+ else {
+ cur = ucl_object_lookup(val, "type");
+ if (cur != nullptr) {
+ tkcf->name = ucl_object_tostring(cur);
+ tkcf->opts = val;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ ucl_object_iterate_free(it);
+ }
+ else {
+ msg_err_config("fatal configuration error, cannot parse statfile definition");
+ }
+
+ if (tkcf == nullptr) {
+ tkcf = rspamd_mempool_alloc0_type(cfg->cfg_pool, rspamd_tokenizer_config);
+ tkcf->name = nullptr;
+ }
+
+ ccf->tokenizer = tkcf;
+
+ /* Handle lua conditions */
+ const auto *val = ucl_object_lookup_any(obj, "learn_condition", nullptr);
+
+ if (val) {
+ const auto *cur = val;
+ LL_FOREACH(val, cur)
+ {
+ if (ucl_object_type(cur) == UCL_STRING) {
+ const gchar *lua_script;
+ gsize slen;
+ gint ref_idx;
+
+ lua_script = ucl_object_tolstring(cur, &slen);
+ ref_idx = rspamd_lua_function_ref_from_str(RSPAMD_LUA_CFG_STATE(cfg),
+ lua_script, slen, "learn_condition", err);
+
+ if (ref_idx == LUA_NOREF) {
+ return FALSE;
+ }
+
+ rspamd_lua_add_ref_dtor(RSPAMD_LUA_CFG_STATE(cfg), cfg->cfg_pool, ref_idx);
+ ccf->learn_conditions = rspamd_mempool_glist_append(
+ cfg->cfg_pool,
+ ccf->learn_conditions,
+ GINT_TO_POINTER(ref_idx));
+ }
+ }
+ }
+
+ val = ucl_object_lookup_any(obj, "classify_condition", nullptr);
+
+ if (val) {
+ const auto *cur = val;
+ LL_FOREACH(val, cur)
+ {
+ if (ucl_object_type(cur) == UCL_STRING) {
+ const gchar *lua_script;
+ gsize slen;
+ gint ref_idx;
+
+ lua_script = ucl_object_tolstring(cur, &slen);
+ ref_idx = rspamd_lua_function_ref_from_str(RSPAMD_LUA_CFG_STATE(cfg),
+ lua_script, slen, "classify_condition", err);
+
+ if (ref_idx == LUA_NOREF) {
+ return FALSE;
+ }
+
+ rspamd_lua_add_ref_dtor(RSPAMD_LUA_CFG_STATE(cfg), cfg->cfg_pool, ref_idx);
+ ccf->classify_conditions = rspamd_mempool_glist_append(
+ cfg->cfg_pool,
+ ccf->classify_conditions,
+ GINT_TO_POINTER(ref_idx));
+ }
+ }
+ }
+
+ ccf->opts = (ucl_object_t *) obj;
+ cfg->classifiers = g_list_prepend(cfg->classifiers, ccf);
+
+ return TRUE;
+}
+
+static gboolean
+rspamd_rcl_composite_handler(rspamd_mempool_t *pool,
+ const ucl_object_t *obj,
+ const gchar *key,
+ gpointer ud,
+ struct rspamd_rcl_section *section,
+ GError **err)
+{
+ auto *cfg = static_cast<rspamd_config *>(ud);
+ void *composite;
+ const gchar *composite_name;
+
+ g_assert(key != nullptr);
+
+ composite_name = key;
+
+ const auto *val = ucl_object_lookup(obj, "enabled");
+ if (val != nullptr && !ucl_object_toboolean(val)) {
+ msg_info_config("composite %s is disabled", composite_name);
+ return TRUE;
+ }
+
+ if ((composite = rspamd_composites_manager_add_from_ucl(cfg->composites_manager,
+ composite_name, obj)) != nullptr) {
+ rspamd_symcache_add_symbol(cfg->cache, composite_name, 0,
+ nullptr, composite, SYMBOL_TYPE_COMPOSITE, -1);
+ }
+
+ return composite != nullptr;
+}
+
+static gboolean
+rspamd_rcl_composites_handler(rspamd_mempool_t *pool,
+ const ucl_object_t *obj,
+ const gchar *key,
+ gpointer ud,
+ struct rspamd_rcl_section *section,
+ GError **err)
+{
+ auto success = TRUE;
+
+ auto it = ucl_object_iterate_new(obj);
+ const auto *cur = obj;
+
+ while ((cur = ucl_object_iterate_safe(it, true))) {
+ success = rspamd_rcl_composite_handler(pool, cur,
+ ucl_object_key(cur), ud, section, err);
+ if (!success) {
+ break;
+ }
+ }
+
+ ucl_object_iterate_free(it);
+
+ return success;
+}
+
+static gboolean
+rspamd_rcl_neighbours_handler(rspamd_mempool_t *pool,
+ const ucl_object_t *obj,
+ const gchar *key,
+ gpointer ud,
+ struct rspamd_rcl_section *section,
+ GError **err)
+{
+ auto *cfg = static_cast<rspamd_config *>(ud);
+ auto has_port = FALSE, has_proto = FALSE;
+ const gchar *p;
+
+ if (key == nullptr) {
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "missing name for neighbour");
+ return FALSE;
+ }
+
+ const auto *hostval = ucl_object_lookup(obj, "host");
+
+ if (hostval == nullptr || ucl_object_type(hostval) != UCL_STRING) {
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "missing host for neighbour: %s", ucl_object_key(obj));
+ return FALSE;
+ }
+
+ auto *neigh = ucl_object_typed_new(UCL_OBJECT);
+ ucl_object_insert_key(neigh, ucl_object_copy(hostval), "host", 0, false);
+
+ if ((p = strrchr(ucl_object_tostring(hostval), ':')) != nullptr) {
+ if (g_ascii_isdigit(p[1])) {
+ has_port = TRUE;
+ }
+ }
+
+ if (strstr(ucl_object_tostring(hostval), "://") != nullptr) {
+ has_proto = TRUE;
+ }
+
+ /* Now make url */
+ auto urlstr = std::string{};
+ const auto *pathval = ucl_object_lookup(obj, "path");
+
+ if (!has_proto) {
+ urlstr += "http://";
+ }
+
+ urlstr += ucl_object_tostring(hostval);
+
+ if (!has_port) {
+ urlstr += ":11334";
+ }
+
+ if (pathval == nullptr) {
+ urlstr += "/";
+ }
+ else {
+ urlstr += ucl_object_tostring(pathval);
+ }
+
+ ucl_object_insert_key(neigh,
+ ucl_object_fromlstring(urlstr.data(), urlstr.size()),
+ "url", 0, false);
+ ucl_object_insert_key(cfg->neighbours, neigh, key, 0, true);
+
+ return TRUE;
+}
+
+
+struct rspamd_rcl_section *
+rspamd_rcl_add_section(struct rspamd_rcl_sections_map **top,
+ struct rspamd_rcl_section *parent_section,
+ const gchar *name, const gchar *key_attr, rspamd_rcl_handler_t handler,
+ enum ucl_type type, gboolean required, gboolean strict_type)
+{
+ return rspamd_rcl_add_section_doc(top, parent_section, name, key_attr, handler,
+ type, required, strict_type, nullptr, nullptr);
+}
+
+struct rspamd_rcl_section *
+rspamd_rcl_add_section_doc(struct rspamd_rcl_sections_map **top,
+ struct rspamd_rcl_section *parent_section,
+ const gchar *name, const gchar *key_attr, rspamd_rcl_handler_t handler,
+ enum ucl_type type, gboolean required, gboolean strict_type,
+ ucl_object_t *doc_target,
+ const gchar *doc_string)
+{
+ if (top == nullptr) {
+ g_error("invalid arguments to rspamd_rcl_add_section");
+ return nullptr;
+ }
+ if (*top == nullptr) {
+ *top = new rspamd_rcl_sections_map;
+ }
+
+ auto fill_section = [&](struct rspamd_rcl_section *section) {
+ section->name = name;
+ if (key_attr) {
+ section->key_attr = std::string{key_attr};
+ }
+ section->handler = handler;
+ section->type = type;
+ section->strict_type = strict_type;
+
+ if (doc_target == nullptr) {
+ if (parent_section && parent_section->doc_ref) {
+ section->doc_ref = ucl_object_ref(rspamd_rcl_add_doc_obj(parent_section->doc_ref,
+ doc_string,
+ name,
+ type,
+ nullptr,
+ 0,
+ nullptr,
+ 0));
+ }
+ else {
+ section->doc_ref = nullptr;
+ }
+ }
+ else {
+ section->doc_ref = ucl_object_ref(rspamd_rcl_add_doc_obj(doc_target,
+ doc_string,
+ name,
+ type,
+ nullptr,
+ 0,
+ nullptr,
+ 0));
+ }
+ section->top = *top;
+ };
+
+ /* Select the appropriate container and insert section inside it */
+ if (parent_section) {
+ auto it = parent_section->subsections.insert(std::make_pair(std::string{name},
+ std::make_shared<rspamd_rcl_section>()));
+ if (!it.second) {
+ g_error("invalid arguments to rspamd_rcl_add_section");
+ return nullptr;
+ }
+
+ fill_section(it.first->second.get());
+ return it.first->second.get();
+ }
+ else {
+ auto it = (*top)->sections.insert(std::make_pair(std::string{name},
+ std::make_shared<rspamd_rcl_section>()));
+ if (!it.second) {
+ g_error("invalid arguments to rspamd_rcl_add_section");
+ return nullptr;
+ }
+
+ (*top)->sections_order.push_back(it.first->second);
+ fill_section(it.first->second.get());
+ return it.first->second.get();
+ }
+}
+
+struct rspamd_rcl_default_handler_data *
+rspamd_rcl_add_default_handler(struct rspamd_rcl_section *section,
+ const gchar *name,
+ rspamd_rcl_default_handler_t handler,
+ goffset offset,
+ gint flags,
+ const gchar *doc_string)
+{
+ auto it = section->default_parser.emplace(std::make_pair(std::string{name}, rspamd_rcl_default_handler_data{}));
+
+ auto &nhandler = it.first->second;
+ nhandler.key = name;
+ nhandler.handler = handler;
+ nhandler.pd.offset = offset;
+ nhandler.pd.flags = flags;
+
+ if (section->doc_ref != nullptr) {
+ rspamd_rcl_add_doc_obj(section->doc_ref,
+ doc_string,
+ name,
+ UCL_NULL,
+ handler,
+ flags,
+ nullptr,
+ 0);
+ }
+
+ return &nhandler;
+}
+
+struct rspamd_rcl_sections_map *
+rspamd_rcl_config_init(struct rspamd_config *cfg, GHashTable *skip_sections)
+{
+ auto *top = new rspamd_rcl_sections_map;
+ /*
+ * Important notice:
+ * the order of parsing is equal to order of this initialization, therefore
+ * it is possible to init some portions of config prior to others
+ */
+
+ /**
+ * Logging section
+ */
+ if (!(skip_sections && g_hash_table_lookup(skip_sections, "logging"))) {
+ auto *sub = rspamd_rcl_add_section_doc(&top, nullptr,
+ "logging", nullptr,
+ rspamd_rcl_logging_handler,
+ UCL_OBJECT,
+ FALSE,
+ TRUE,
+ cfg->doc_strings,
+ "Configure rspamd logging");
+ /* Default handlers */
+ rspamd_rcl_add_default_handler(sub,
+ "log_buffer",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET(struct rspamd_config, log_buf_size),
+ RSPAMD_CL_FLAG_INT_32,
+ "Size of log buffer in bytes (for file logging)");
+ rspamd_rcl_add_default_handler(sub,
+ "log_urls",
+ rspamd_rcl_parse_struct_boolean,
+ G_STRUCT_OFFSET(struct rspamd_config, log_urls),
+ 0,
+ "Write each URL found in a message to the log file");
+ rspamd_rcl_add_default_handler(sub,
+ "debug_ip",
+ rspamd_rcl_parse_struct_ucl,
+ G_STRUCT_OFFSET(struct rspamd_config, debug_ip_map),
+ 0,
+ "Enable debugging log for the specified IP addresses");
+ rspamd_rcl_add_default_handler(sub,
+ "debug_modules",
+ rspamd_rcl_parse_struct_string_list,
+ G_STRUCT_OFFSET(struct rspamd_config, debug_modules),
+ RSPAMD_CL_FLAG_STRING_LIST_HASH,
+ "Enable debugging for the specified modules");
+ rspamd_rcl_add_default_handler(sub,
+ "log_format",
+ rspamd_rcl_parse_struct_string,
+ G_STRUCT_OFFSET(struct rspamd_config, log_format_str),
+ 0,
+ "Specify format string for the task logging output "
+ "(https://rspamd.com/doc/configuration/logging.html "
+ "for details)");
+ rspamd_rcl_add_default_handler(sub,
+ "encryption_key",
+ rspamd_rcl_parse_struct_pubkey,
+ G_STRUCT_OFFSET(struct rspamd_config, log_encryption_key),
+ 0,
+ "Encrypt sensitive information in logs using this pubkey");
+ rspamd_rcl_add_default_handler(sub,
+ "error_elts",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET(struct rspamd_config, log_error_elts),
+ RSPAMD_CL_FLAG_UINT,
+ "Size of circular buffer for last errors (10 by default)");
+ rspamd_rcl_add_default_handler(sub,
+ "error_maxlen",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET(struct rspamd_config, log_error_elt_maxlen),
+ RSPAMD_CL_FLAG_UINT,
+ "Size of each element in error log buffer (1000 by default)");
+ rspamd_rcl_add_default_handler(sub,
+ "task_max_elts",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET(struct rspamd_config, log_task_max_elts),
+ RSPAMD_CL_FLAG_UINT,
+ "Maximum number of elements in task log entry (7 by default)");
+
+ /* Documentation only options, handled in log_handler to map flags */
+ rspamd_rcl_add_doc_by_path(cfg,
+ "logging",
+ "Enable colored output (for console logging)",
+ "log_color",
+ UCL_BOOLEAN,
+ nullptr,
+ 0,
+ nullptr,
+ 0);
+ rspamd_rcl_add_doc_by_path(cfg,
+ "logging",
+ "Enable severity logging output (e.g. [error] or [warning])",
+ "log_severity",
+ UCL_BOOLEAN,
+ nullptr,
+ 0,
+ nullptr,
+ 0);
+ rspamd_rcl_add_doc_by_path(cfg,
+ "logging",
+ "Enable systemd compatible logging",
+ "systemd",
+ UCL_BOOLEAN,
+ nullptr,
+ 0,
+ nullptr,
+ 0);
+ rspamd_rcl_add_doc_by_path(cfg,
+ "logging",
+ "Write statistics of regexp processing to log (useful for hyperscan)",
+ "log_re_cache",
+ UCL_BOOLEAN,
+ nullptr,
+ 0,
+ nullptr,
+ 0);
+ rspamd_rcl_add_doc_by_path(cfg,
+ "logging",
+ "Use microseconds resolution for timestamps",
+ "log_usec",
+ UCL_BOOLEAN,
+ nullptr,
+ 0,
+ nullptr,
+ 0);
+ }
+ if (!(skip_sections && g_hash_table_lookup(skip_sections, "options"))) {
+ /**
+ * Options section
+ */
+ auto *sub = rspamd_rcl_add_section_doc(&top, nullptr,
+ "options", nullptr,
+ rspamd_rcl_options_handler,
+ UCL_OBJECT,
+ FALSE,
+ TRUE,
+ cfg->doc_strings,
+ "Global rspamd options");
+ rspamd_rcl_add_default_handler(sub,
+ "cache_file",
+ rspamd_rcl_parse_struct_string,
+ G_STRUCT_OFFSET(struct rspamd_config, cache_filename),
+ RSPAMD_CL_FLAG_STRING_PATH,
+ "Path to the cache file");
+ rspamd_rcl_add_default_handler(sub,
+ "cache_reload",
+ rspamd_rcl_parse_struct_time,
+ G_STRUCT_OFFSET(struct rspamd_config, cache_reload_time),
+ RSPAMD_CL_FLAG_TIME_FLOAT,
+ "How often cache reload should be performed");
+
+ /* Old DNS configuration */
+ rspamd_rcl_add_default_handler(sub,
+ "dns_nameserver",
+ rspamd_rcl_parse_struct_ucl,
+ G_STRUCT_OFFSET(struct rspamd_config, nameservers),
+ 0,
+ "Legacy option for DNS servers used");
+ rspamd_rcl_add_default_handler(sub,
+ "dns_timeout",
+ rspamd_rcl_parse_struct_time,
+ G_STRUCT_OFFSET(struct rspamd_config, dns_timeout),
+ RSPAMD_CL_FLAG_TIME_FLOAT,
+ "Legacy option for DNS request timeout");
+ rspamd_rcl_add_default_handler(sub,
+ "dns_retransmits",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET(struct rspamd_config, dns_retransmits),
+ RSPAMD_CL_FLAG_INT_32,
+ "Legacy option for DNS retransmits count");
+ rspamd_rcl_add_default_handler(sub,
+ "dns_sockets",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET(struct rspamd_config, dns_io_per_server),
+ RSPAMD_CL_FLAG_INT_32,
+ "Legacy option for DNS sockets per server count");
+ rspamd_rcl_add_default_handler(sub,
+ "dns_max_requests",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET(struct rspamd_config, dns_max_requests),
+ RSPAMD_CL_FLAG_INT_32,
+ "Maximum DNS requests per task (default: 64)");
+ rspamd_rcl_add_default_handler(sub,
+ "control_socket",
+ rspamd_rcl_parse_struct_string,
+ G_STRUCT_OFFSET(struct rspamd_config, control_socket_path),
+ 0,
+ "Path to the control socket");
+ rspamd_rcl_add_default_handler(sub,
+ "explicit_modules",
+ rspamd_rcl_parse_struct_string_list,
+ G_STRUCT_OFFSET(struct rspamd_config, explicit_modules),
+ RSPAMD_CL_FLAG_STRING_LIST_HASH,
+ "Always load these modules even if they are not configured explicitly");
+ rspamd_rcl_add_default_handler(sub,
+ "allow_raw_input",
+ rspamd_rcl_parse_struct_boolean,
+ G_STRUCT_OFFSET(struct rspamd_config, allow_raw_input),
+ 0,
+ "Allow non MIME input for rspamd");
+ rspamd_rcl_add_default_handler(sub,
+ "one_shot",
+ rspamd_rcl_parse_struct_boolean,
+ G_STRUCT_OFFSET(struct rspamd_config, one_shot_mode),
+ 0,
+ "Add all symbols only once per message");
+ rspamd_rcl_add_default_handler(sub,
+ "check_attachements",
+ rspamd_rcl_parse_struct_boolean,
+ G_STRUCT_OFFSET(struct rspamd_config, check_text_attachements),
+ 0,
+ "Treat text attachments as normal text parts");
+ rspamd_rcl_add_default_handler(sub,
+ "tempdir",
+ rspamd_rcl_parse_struct_string,
+ G_STRUCT_OFFSET(struct rspamd_config, temp_dir),
+ RSPAMD_CL_FLAG_STRING_PATH,
+ "Directory for temporary files");
+ rspamd_rcl_add_default_handler(sub,
+ "pidfile",
+ rspamd_rcl_parse_struct_string,
+ G_STRUCT_OFFSET(struct rspamd_config, pid_file),
+ RSPAMD_CL_FLAG_STRING_PATH,
+ "Path to the pid file");
+ rspamd_rcl_add_default_handler(sub,
+ "filters",
+ rspamd_rcl_parse_struct_string_list,
+ G_STRUCT_OFFSET(struct rspamd_config, filters),
+ 0,
+ "List of internal filters enabled");
+ rspamd_rcl_add_default_handler(sub,
+ "map_watch_interval",
+ rspamd_rcl_parse_struct_time,
+ G_STRUCT_OFFSET(struct rspamd_config, map_timeout),
+ RSPAMD_CL_FLAG_TIME_FLOAT,
+ "Interval for checking maps");
+ rspamd_rcl_add_default_handler(sub,
+ "map_file_watch_multiplier",
+ rspamd_rcl_parse_struct_double,
+ G_STRUCT_OFFSET(struct rspamd_config, map_file_watch_multiplier),
+ 0,
+ "Multiplier for map watch interval when map is file");
+ rspamd_rcl_add_default_handler(sub,
+ "maps_cache_dir",
+ rspamd_rcl_parse_struct_string,
+ G_STRUCT_OFFSET(struct rspamd_config, maps_cache_dir),
+ 0,
+ "Directory to save maps cached data (default: $DBDIR)");
+ rspamd_rcl_add_default_handler(sub,
+ "monitoring_watch_interval",
+ rspamd_rcl_parse_struct_time,
+ G_STRUCT_OFFSET(struct rspamd_config, monitored_interval),
+ RSPAMD_CL_FLAG_TIME_FLOAT,
+ "Interval for checking monitored instances");
+ rspamd_rcl_add_default_handler(sub,
+ "disable_monitoring",
+ rspamd_rcl_parse_struct_boolean,
+ G_STRUCT_OFFSET(struct rspamd_config, disable_monitored),
+ 0,
+ "Disable monitoring completely");
+ rspamd_rcl_add_default_handler(sub,
+ "fips_mode",
+ rspamd_rcl_parse_struct_boolean,
+ G_STRUCT_OFFSET(struct rspamd_config, fips_mode),
+ 0,
+ "Enable FIPS 140-2 mode in OpenSSL");
+ rspamd_rcl_add_default_handler(sub,
+ "dynamic_conf",
+ rspamd_rcl_parse_struct_string,
+ G_STRUCT_OFFSET(struct rspamd_config, dynamic_conf),
+ 0,
+ "Path to the dynamic configuration");
+ rspamd_rcl_add_default_handler(sub,
+ "rrd",
+ rspamd_rcl_parse_struct_string,
+ G_STRUCT_OFFSET(struct rspamd_config, rrd_file),
+ RSPAMD_CL_FLAG_STRING_PATH,
+ "Path to RRD file");
+ rspamd_rcl_add_default_handler(sub,
+ "stats_file",
+ rspamd_rcl_parse_struct_string,
+ G_STRUCT_OFFSET(struct rspamd_config, stats_file),
+ RSPAMD_CL_FLAG_STRING_PATH,
+ "Path to stats file");
+ rspamd_rcl_add_default_handler(sub,
+ "history_file",
+ rspamd_rcl_parse_struct_string,
+ G_STRUCT_OFFSET(struct rspamd_config, history_file),
+ RSPAMD_CL_FLAG_STRING_PATH,
+ "Path to history file");
+ rspamd_rcl_add_default_handler(sub,
+ "check_all_filters",
+ rspamd_rcl_parse_struct_boolean,
+ G_STRUCT_OFFSET(struct rspamd_config, check_all_filters),
+ 0,
+ "Always check all filters");
+ rspamd_rcl_add_default_handler(sub,
+ "public_groups_only",
+ rspamd_rcl_parse_struct_boolean,
+ G_STRUCT_OFFSET(struct rspamd_config, public_groups_only),
+ 0,
+ "Output merely public groups everywhere");
+ rspamd_rcl_add_default_handler(sub,
+ "enable_css_parser",
+ rspamd_rcl_parse_struct_boolean,
+ G_STRUCT_OFFSET(struct rspamd_config, enable_css_parser),
+ 0,
+ "Enable CSS parser (experimental)");
+ rspamd_rcl_add_default_handler(sub,
+ "enable_experimental",
+ rspamd_rcl_parse_struct_boolean,
+ G_STRUCT_OFFSET(struct rspamd_config, enable_experimental),
+ 0,
+ "Enable experimental plugins");
+ rspamd_rcl_add_default_handler(sub,
+ "disable_pcre_jit",
+ rspamd_rcl_parse_struct_boolean,
+ G_STRUCT_OFFSET(struct rspamd_config, disable_pcre_jit),
+ 0,
+ "Disable PCRE JIT");
+ rspamd_rcl_add_default_handler(sub,
+ "min_word_len",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET(struct rspamd_config, min_word_len),
+ RSPAMD_CL_FLAG_UINT,
+ "Minimum length of the word to be considered in statistics/fuzzy");
+ rspamd_rcl_add_default_handler(sub,
+ "max_word_len",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET(struct rspamd_config, max_word_len),
+ RSPAMD_CL_FLAG_UINT,
+ "Maximum length of the word to be considered in statistics/fuzzy");
+ rspamd_rcl_add_default_handler(sub,
+ "max_html_len",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET(struct rspamd_config, max_word_len),
+ RSPAMD_CL_FLAG_INT_SIZE,
+ "Maximum length of the html part to be parsed");
+ rspamd_rcl_add_default_handler(sub,
+ "words_decay",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET(struct rspamd_config, words_decay),
+ RSPAMD_CL_FLAG_UINT,
+ "Start skipping words at this amount");
+ rspamd_rcl_add_default_handler(sub,
+ "url_tld",
+ rspamd_rcl_parse_struct_string,
+ G_STRUCT_OFFSET(struct rspamd_config, tld_file),
+ RSPAMD_CL_FLAG_STRING_PATH,
+ "Path to the TLD file for urls detector");
+ rspamd_rcl_add_default_handler(sub,
+ "tld",
+ rspamd_rcl_parse_struct_string,
+ G_STRUCT_OFFSET(struct rspamd_config, tld_file),
+ RSPAMD_CL_FLAG_STRING_PATH,
+ "Path to the TLD file for urls detector");
+ rspamd_rcl_add_default_handler(sub,
+ "hs_cache_dir",
+ rspamd_rcl_parse_struct_string,
+ G_STRUCT_OFFSET(struct rspamd_config, hs_cache_dir),
+ RSPAMD_CL_FLAG_STRING_PATH,
+ "Path directory where rspamd would save hyperscan cache");
+ rspamd_rcl_add_default_handler(sub,
+ "history_rows",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET(struct rspamd_config, history_rows),
+ RSPAMD_CL_FLAG_UINT,
+ "Number of records in the history file");
+ rspamd_rcl_add_default_handler(sub,
+ "disable_hyperscan",
+ rspamd_rcl_parse_struct_boolean,
+ G_STRUCT_OFFSET(struct rspamd_config, disable_hyperscan),
+ 0,
+ "Disable hyperscan optimizations for regular expressions");
+ rspamd_rcl_add_default_handler(sub,
+ "vectorized_hyperscan",
+ rspamd_rcl_parse_struct_boolean,
+ G_STRUCT_OFFSET(struct rspamd_config, vectorized_hyperscan),
+ 0,
+ "Use hyperscan in vectorized mode (obsoleted, do not use)");
+ rspamd_rcl_add_default_handler(sub,
+ "cores_dir",
+ rspamd_rcl_parse_struct_string,
+ G_STRUCT_OFFSET(struct rspamd_config, cores_dir),
+ RSPAMD_CL_FLAG_STRING_PATH,
+ "Path to the directory where rspamd core files are intended to be dumped");
+ rspamd_rcl_add_default_handler(sub,
+ "max_cores_size",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET(struct rspamd_config, max_cores_size),
+ RSPAMD_CL_FLAG_INT_SIZE,
+ "Limit of joint size of all files in `cores_dir`");
+ rspamd_rcl_add_default_handler(sub,
+ "max_cores_count",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET(struct rspamd_config, max_cores_count),
+ RSPAMD_CL_FLAG_INT_SIZE,
+ "Limit of files count in `cores_dir`");
+ rspamd_rcl_add_default_handler(sub,
+ "local_addrs",
+ rspamd_rcl_parse_struct_ucl,
+ G_STRUCT_OFFSET(struct rspamd_config, local_addrs),
+ 0,
+ "Use the specified addresses as local ones");
+ rspamd_rcl_add_default_handler(sub,
+ "local_networks",
+ rspamd_rcl_parse_struct_ucl,
+ G_STRUCT_OFFSET(struct rspamd_config, local_addrs),
+ 0,
+ "Use the specified addresses as local ones (alias for `local_addrs`)");
+ rspamd_rcl_add_default_handler(sub,
+ "trusted_keys",
+ rspamd_rcl_parse_struct_string_list,
+ G_STRUCT_OFFSET(struct rspamd_config, trusted_keys),
+ RSPAMD_CL_FLAG_STRING_LIST_HASH,
+ "List of trusted public keys used for signatures in base32 encoding");
+ rspamd_rcl_add_default_handler(sub,
+ "enable_shutdown_workaround",
+ rspamd_rcl_parse_struct_boolean,
+ G_STRUCT_OFFSET(struct rspamd_config, enable_shutdown_workaround),
+ 0,
+ "Enable workaround for legacy clients");
+ rspamd_rcl_add_default_handler(sub,
+ "ignore_received",
+ rspamd_rcl_parse_struct_boolean,
+ G_STRUCT_OFFSET(struct rspamd_config, ignore_received),
+ 0,
+ "Ignore data from the first received header");
+ rspamd_rcl_add_default_handler(sub,
+ "ssl_ca_path",
+ rspamd_rcl_parse_struct_string,
+ G_STRUCT_OFFSET(struct rspamd_config, ssl_ca_path),
+ RSPAMD_CL_FLAG_STRING_PATH,
+ "Path to ssl CA file");
+ rspamd_rcl_add_default_handler(sub,
+ "ssl_ciphers",
+ rspamd_rcl_parse_struct_string,
+ G_STRUCT_OFFSET(struct rspamd_config, ssl_ciphers),
+ 0,
+ "List of ssl ciphers (e.g. HIGH:!aNULL:!kRSA:!PSK:!SRP:!MD5:!RC4)");
+ rspamd_rcl_add_default_handler(sub,
+ "max_message",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET(struct rspamd_config, max_message),
+ RSPAMD_CL_FLAG_INT_SIZE,
+ "Maximum size of the message to be scanned (50Mb by default)");
+ rspamd_rcl_add_default_handler(sub,
+ "max_pic",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET(struct rspamd_config, max_pic_size),
+ RSPAMD_CL_FLAG_INT_SIZE,
+ "Maximum size of the picture to be normalized (1Mb by default)");
+ rspamd_rcl_add_default_handler(sub,
+ "images_cache",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET(struct rspamd_config, max_pic_size),
+ RSPAMD_CL_FLAG_INT_SIZE,
+ "Size of DCT data cache for images (256 elements by default)");
+ rspamd_rcl_add_default_handler(sub,
+ "zstd_input_dictionary",
+ rspamd_rcl_parse_struct_string,
+ G_STRUCT_OFFSET(struct rspamd_config, zstd_input_dictionary),
+ RSPAMD_CL_FLAG_STRING_PATH,
+ "Dictionary for zstd inbound protocol compression");
+ rspamd_rcl_add_default_handler(sub,
+ "zstd_output_dictionary",
+ rspamd_rcl_parse_struct_string,
+ G_STRUCT_OFFSET(struct rspamd_config, zstd_output_dictionary),
+ RSPAMD_CL_FLAG_STRING_PATH,
+ "Dictionary for outbound zstd compression");
+ rspamd_rcl_add_default_handler(sub,
+ "compat_messages",
+ rspamd_rcl_parse_struct_boolean,
+ G_STRUCT_OFFSET(struct rspamd_config, compat_messages),
+ 0,
+ "Use pre 1.4 style of messages in the protocol");
+ rspamd_rcl_add_default_handler(sub,
+ "max_shots",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET(struct rspamd_config, default_max_shots),
+ 0,
+ "Maximum number of hits per a single symbol (default: 100)");
+ rspamd_rcl_add_default_handler(sub,
+ "sessions_cache",
+ rspamd_rcl_parse_struct_boolean,
+ G_STRUCT_OFFSET(struct rspamd_config, enable_sessions_cache),
+ 0,
+ "Enable sessions cache to debug dangling sessions");
+ rspamd_rcl_add_default_handler(sub,
+ "max_sessions_cache",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET(struct rspamd_config, max_sessions_cache),
+ 0,
+ "Maximum number of sessions in cache before warning (default: 100)");
+ rspamd_rcl_add_default_handler(sub,
+ "task_timeout",
+ rspamd_rcl_parse_struct_time,
+ G_STRUCT_OFFSET(struct rspamd_config, task_timeout),
+ RSPAMD_CL_FLAG_TIME_FLOAT,
+ "Maximum time for checking a message");
+ rspamd_rcl_add_default_handler(sub,
+ "soft_reject_on_timeout",
+ rspamd_rcl_parse_struct_boolean,
+ G_STRUCT_OFFSET(struct rspamd_config, soft_reject_on_timeout),
+ 0,
+ "Emit soft reject if task timeout takes place");
+ rspamd_rcl_add_default_handler(sub,
+ "check_timeout",
+ rspamd_rcl_parse_struct_time,
+ G_STRUCT_OFFSET(struct rspamd_config, task_timeout),
+ RSPAMD_CL_FLAG_TIME_FLOAT,
+ "Maximum time for checking a message (alias for task_timeout)");
+ rspamd_rcl_add_default_handler(sub,
+ "lua_gc_step",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET(struct rspamd_config, lua_gc_step),
+ RSPAMD_CL_FLAG_UINT,
+ "Lua garbage-collector step (default: 200)");
+ rspamd_rcl_add_default_handler(sub,
+ "lua_gc_pause",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET(struct rspamd_config, lua_gc_pause),
+ RSPAMD_CL_FLAG_UINT,
+ "Lua garbage-collector pause (default: 200)");
+ rspamd_rcl_add_default_handler(sub,
+ "full_gc_iters",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET(struct rspamd_config, full_gc_iters),
+ RSPAMD_CL_FLAG_UINT,
+ "Task scanned before memory gc is performed (default: 0 - disabled)");
+ rspamd_rcl_add_default_handler(sub,
+ "heartbeat_interval",
+ rspamd_rcl_parse_struct_time,
+ G_STRUCT_OFFSET(struct rspamd_config, heartbeat_interval),
+ RSPAMD_CL_FLAG_TIME_FLOAT,
+ "Time between workers heartbeats");
+ rspamd_rcl_add_default_handler(sub,
+ "heartbeats_loss_max",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET(struct rspamd_config, heartbeats_loss_max),
+ RSPAMD_CL_FLAG_INT_32,
+ "Maximum count of heartbeats to be lost before trying to "
+ "terminate a worker (default: 0 - disabled)");
+ rspamd_rcl_add_default_handler(sub,
+ "max_lua_urls",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET(struct rspamd_config, max_lua_urls),
+ RSPAMD_CL_FLAG_INT_32,
+ "Maximum count of URLs to pass to Lua to avoid DoS (default: 1024)");
+ rspamd_rcl_add_default_handler(sub,
+ "max_urls",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET(struct rspamd_config, max_urls),
+ RSPAMD_CL_FLAG_INT_32,
+ "Maximum count of URLs to process to avoid DoS (default: 10240)");
+ rspamd_rcl_add_default_handler(sub,
+ "max_recipients",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET(struct rspamd_config, max_recipients),
+ RSPAMD_CL_FLAG_INT_32,
+ "Maximum count of recipients to process to avoid DoS (default: 1024)");
+ rspamd_rcl_add_default_handler(sub,
+ "max_blas_threads",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET(struct rspamd_config, max_blas_threads),
+ RSPAMD_CL_FLAG_INT_32,
+ "Maximum number of Blas threads for learning neural networks (default: 1)");
+ rspamd_rcl_add_default_handler(sub,
+ "max_opts_len",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET(struct rspamd_config, max_opts_len),
+ RSPAMD_CL_FLAG_INT_32,
+ "Maximum size of all options for a single symbol (default: 4096)");
+ rspamd_rcl_add_default_handler(sub,
+ "events_backend",
+ rspamd_rcl_parse_struct_string,
+ G_STRUCT_OFFSET(struct rspamd_config, events_backend),
+ 0,
+ "Events backend to use: kqueue, epoll, select, poll or auto (default: auto)");
+
+ rspamd_rcl_add_doc_by_path(cfg,
+ "options",
+ "Swtich mode of gtube patterns: disable, reject, all",
+ "gtube_patterns",
+ UCL_STRING,
+ nullptr,
+ 0,
+ "reject",
+ 0);
+
+ /* Neighbours configuration */
+ rspamd_rcl_add_section_doc(&top, sub, "neighbours", "name",
+ rspamd_rcl_neighbours_handler,
+ UCL_OBJECT, FALSE, TRUE,
+ cfg->doc_strings,
+ "List of members of Rspamd cluster");
+
+ /* New DNS configuration */
+ auto *ssub = rspamd_rcl_add_section_doc(&top, sub, "dns", nullptr, nullptr,
+ UCL_OBJECT, FALSE, TRUE,
+ cfg->doc_strings,
+ "Options for DNS resolver");
+ rspamd_rcl_add_default_handler(ssub,
+ "nameserver",
+ rspamd_rcl_parse_struct_ucl,
+ G_STRUCT_OFFSET(struct rspamd_config, nameservers),
+ 0,
+ "List of DNS servers");
+ rspamd_rcl_add_default_handler(ssub,
+ "server",
+ rspamd_rcl_parse_struct_ucl,
+ G_STRUCT_OFFSET(struct rspamd_config, nameservers),
+ 0,
+ "List of DNS servers");
+ rspamd_rcl_add_default_handler(ssub,
+ "timeout",
+ rspamd_rcl_parse_struct_time,
+ G_STRUCT_OFFSET(struct rspamd_config, dns_timeout),
+ RSPAMD_CL_FLAG_TIME_FLOAT,
+ "DNS request timeout");
+ rspamd_rcl_add_default_handler(ssub,
+ "retransmits",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET(struct rspamd_config, dns_retransmits),
+ RSPAMD_CL_FLAG_INT_32,
+ "DNS request retransmits");
+ rspamd_rcl_add_default_handler(ssub,
+ "sockets",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET(struct rspamd_config, dns_io_per_server),
+ RSPAMD_CL_FLAG_INT_32,
+ "Number of sockets per DNS server");
+ rspamd_rcl_add_default_handler(ssub,
+ "connections",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET(struct rspamd_config, dns_io_per_server),
+ RSPAMD_CL_FLAG_INT_32,
+ "Number of sockets per DNS server");
+ rspamd_rcl_add_default_handler(ssub,
+ "enable_dnssec",
+ rspamd_rcl_parse_struct_boolean,
+ G_STRUCT_OFFSET(struct rspamd_config, enable_dnssec),
+ 0,
+ "Enable DNSSEC support in Rspamd");
+
+
+ /* New upstreams configuration */
+ ssub = rspamd_rcl_add_section_doc(&top, sub, "upstream", nullptr, nullptr,
+ UCL_OBJECT, FALSE, TRUE,
+ cfg->doc_strings,
+ "Upstreams configuration parameters");
+ rspamd_rcl_add_default_handler(ssub,
+ "max_errors",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET(struct rspamd_config, upstream_max_errors),
+ RSPAMD_CL_FLAG_UINT,
+ "Maximum number of errors during `error_time` to consider upstream down");
+ rspamd_rcl_add_default_handler(ssub,
+ "error_time",
+ rspamd_rcl_parse_struct_time,
+ G_STRUCT_OFFSET(struct rspamd_config, upstream_error_time),
+ RSPAMD_CL_FLAG_TIME_FLOAT,
+ "Time frame to check errors");
+ rspamd_rcl_add_default_handler(ssub,
+ "revive_time",
+ rspamd_rcl_parse_struct_time,
+ G_STRUCT_OFFSET(struct rspamd_config, upstream_revive_time),
+ RSPAMD_CL_FLAG_TIME_FLOAT,
+ "Time before attempting to recover upstream after an error");
+ rspamd_rcl_add_default_handler(ssub,
+ "lazy_resolve_time",
+ rspamd_rcl_parse_struct_time,
+ G_STRUCT_OFFSET(struct rspamd_config, upstream_lazy_resolve_time),
+ RSPAMD_CL_FLAG_TIME_FLOAT,
+ "Time to resolve upstreams addresses in lazy mode");
+ }
+
+ if (!(skip_sections && g_hash_table_lookup(skip_sections, "actions"))) {
+ /**
+ * Symbols and actions sections
+ */
+ auto *sub = rspamd_rcl_add_section_doc(&top, nullptr,
+ "actions", nullptr,
+ rspamd_rcl_actions_handler,
+ UCL_OBJECT,
+ FALSE,
+ TRUE,
+ cfg->doc_strings,
+ "Actions configuration");
+ rspamd_rcl_add_default_handler(sub,
+ "unknown_weight",
+ rspamd_rcl_parse_struct_double,
+ G_STRUCT_OFFSET(struct rspamd_config, unknown_weight),
+ 0,
+ "Accept unknown symbols with the specified weight");
+ rspamd_rcl_add_default_handler(sub,
+ "grow_factor",
+ rspamd_rcl_parse_struct_double,
+ G_STRUCT_OFFSET(struct rspamd_config, grow_factor),
+ 0,
+ "Multiply the subsequent symbols by this number "
+ "(does not affect symbols with score less or "
+ "equal to zero)");
+ rspamd_rcl_add_default_handler(sub,
+ "subject",
+ rspamd_rcl_parse_struct_string,
+ G_STRUCT_OFFSET(struct rspamd_config, subject),
+ 0,
+ "Rewrite subject with this value");
+ }
+
+ if (!(skip_sections && g_hash_table_lookup(skip_sections, "group"))) {
+ auto *sub = rspamd_rcl_add_section_doc(&top, nullptr,
+ "group", "name",
+ rspamd_rcl_group_handler,
+ UCL_OBJECT,
+ FALSE,
+ TRUE,
+ cfg->doc_strings,
+ "Symbol groups configuration");
+ rspamd_rcl_add_section_doc(&top, sub, "symbols", "name",
+ rspamd_rcl_symbol_handler,
+ UCL_OBJECT, FALSE, TRUE,
+ cfg->doc_strings,
+ "Symbols configuration");
+
+ /* Group part */
+ rspamd_rcl_add_default_handler(sub,
+ "max_score",
+ rspamd_rcl_parse_struct_double,
+ G_STRUCT_OFFSET(struct rspamd_symbols_group, max_score),
+ 0,
+ "Maximum score that could be reached by this symbols group");
+ }
+
+ if (!(skip_sections && g_hash_table_lookup(skip_sections, "worker"))) {
+ /**
+ * Worker section
+ */
+ auto *sub = rspamd_rcl_add_section_doc(&top, nullptr, "worker", "type",
+ rspamd_rcl_worker_handler,
+ UCL_OBJECT,
+ FALSE,
+ TRUE,
+ cfg->doc_strings,
+ "Workers common options");
+ rspamd_rcl_add_default_handler(sub,
+ "count",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET(struct rspamd_worker_conf, count),
+ RSPAMD_CL_FLAG_INT_16,
+ "Number of workers to spawn");
+ rspamd_rcl_add_default_handler(sub,
+ "max_files",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET(struct rspamd_worker_conf, rlimit_nofile),
+ RSPAMD_CL_FLAG_INT_64,
+ "Maximum number of opened files per worker");
+ rspamd_rcl_add_default_handler(sub,
+ "max_core",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET(struct rspamd_worker_conf, rlimit_maxcore),
+ RSPAMD_CL_FLAG_INT_64,
+ "Max size of core file in bytes");
+ rspamd_rcl_add_default_handler(sub,
+ "enabled",
+ rspamd_rcl_parse_struct_boolean,
+ G_STRUCT_OFFSET(struct rspamd_worker_conf, enabled),
+ 0,
+ "Enable or disable a worker (true by default)");
+ }
+
+ if (!(skip_sections && g_hash_table_lookup(skip_sections, "modules"))) {
+ /**
+ * Modules handler
+ */
+ rspamd_rcl_add_section_doc(&top, nullptr,
+ "modules", nullptr,
+ rspamd_rcl_modules_handler,
+ UCL_OBJECT,
+ FALSE,
+ FALSE,
+ cfg->doc_strings,
+ "Lua plugins to load");
+ }
+
+ if (!(skip_sections && g_hash_table_lookup(skip_sections, "classifier"))) {
+ /**
+ * Classifiers handler
+ */
+ auto *sub = rspamd_rcl_add_section_doc(&top, nullptr,
+ "classifier", "type",
+ rspamd_rcl_classifier_handler,
+ UCL_OBJECT,
+ FALSE,
+ TRUE,
+ cfg->doc_strings,
+ "CLassifier options");
+ /* Default classifier is 'bayes' for now */
+ sub->default_key = "bayes";
+
+ rspamd_rcl_add_default_handler(sub,
+ "min_tokens",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET(struct rspamd_classifier_config, min_tokens),
+ RSPAMD_CL_FLAG_INT_32,
+ "Minimum count of tokens (words) to be considered for statistics");
+ rspamd_rcl_add_default_handler(sub,
+ "min_token_hits",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET(struct rspamd_classifier_config, min_token_hits),
+ RSPAMD_CL_FLAG_UINT,
+ "Minimum number of hits for a token to be considered");
+ rspamd_rcl_add_default_handler(sub,
+ "min_prob_strength",
+ rspamd_rcl_parse_struct_double,
+ G_STRUCT_OFFSET(struct rspamd_classifier_config, min_token_hits),
+ 0,
+ "Use only tokens with probability in [0.5 - MPS, 0.5 + MPS]");
+ rspamd_rcl_add_default_handler(sub,
+ "max_tokens",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET(struct rspamd_classifier_config, max_tokens),
+ RSPAMD_CL_FLAG_INT_32,
+ "Maximum count of tokens (words) to be considered for statistics");
+ rspamd_rcl_add_default_handler(sub,
+ "min_learns",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET(struct rspamd_classifier_config, min_learns),
+ RSPAMD_CL_FLAG_UINT,
+ "Minimum number of learns for each statfile to use this classifier");
+ rspamd_rcl_add_default_handler(sub,
+ "backend",
+ rspamd_rcl_parse_struct_string,
+ G_STRUCT_OFFSET(struct rspamd_classifier_config, backend),
+ 0,
+ "Statfiles engine");
+ rspamd_rcl_add_default_handler(sub,
+ "name",
+ rspamd_rcl_parse_struct_string,
+ G_STRUCT_OFFSET(struct rspamd_classifier_config, name),
+ 0,
+ "Name of classifier");
+
+ /*
+ * Statfile defaults
+ */
+ auto *ssub = rspamd_rcl_add_section_doc(&top, sub,
+ "statfile", "symbol",
+ rspamd_rcl_statfile_handler,
+ UCL_OBJECT,
+ TRUE,
+ TRUE,
+ sub->doc_ref,
+ "Statfiles options");
+ rspamd_rcl_add_default_handler(ssub,
+ "label",
+ rspamd_rcl_parse_struct_string,
+ G_STRUCT_OFFSET(struct rspamd_statfile_config, label),
+ 0,
+ "Statfile unique label");
+ rspamd_rcl_add_default_handler(ssub,
+ "spam",
+ rspamd_rcl_parse_struct_boolean,
+ G_STRUCT_OFFSET(struct rspamd_statfile_config, is_spam),
+ 0,
+ "Sets if this statfile contains spam samples");
+ }
+
+ if (!(skip_sections && g_hash_table_lookup(skip_sections, "composite"))) {
+ /**
+ * Composites handlers
+ */
+ rspamd_rcl_add_section_doc(&top, nullptr,
+ "composite", "name",
+ rspamd_rcl_composite_handler,
+ UCL_OBJECT,
+ FALSE,
+ TRUE,
+ cfg->doc_strings,
+ "Rspamd composite symbols");
+ rspamd_rcl_add_section_doc(&top, nullptr,
+ "composites", nullptr,
+ rspamd_rcl_composites_handler,
+ UCL_OBJECT,
+ FALSE,
+ TRUE,
+ cfg->doc_strings,
+ "Rspamd composite symbols");
+ }
+
+ if (!(skip_sections && g_hash_table_lookup(skip_sections, "lua"))) {
+ /**
+ * Lua handler
+ */
+ rspamd_rcl_add_section_doc(&top, nullptr,
+ "lua", nullptr,
+ rspamd_rcl_lua_handler,
+ UCL_STRING,
+ FALSE,
+ TRUE,
+ cfg->doc_strings,
+ "Lua files to load");
+ }
+
+ cfg->rcl_top_section = top;
+
+ return top;
+}
+
+static bool
+rspamd_rcl_process_section(struct rspamd_config *cfg,
+ const struct rspamd_rcl_section &sec,
+ gpointer ptr, const ucl_object_t *obj, rspamd_mempool_t *pool,
+ GError **err)
+{
+ ucl_object_iter_t it;
+ const ucl_object_t *cur;
+ auto is_nested = true;
+ const gchar *key = nullptr;
+
+ if (sec.processed) {
+ /* Section has been already processed */
+ return TRUE;
+ }
+
+ g_assert(obj != nullptr);
+ g_assert(sec.handler != nullptr);
+
+ if (sec.key_attr) {
+ it = ucl_object_iterate_new(obj);
+
+ while ((cur = ucl_object_iterate_full(it, UCL_ITERATE_EXPLICIT)) != nullptr) {
+ if (ucl_object_type(cur) != UCL_OBJECT) {
+ is_nested = false;
+ break;
+ }
+ }
+
+ ucl_object_iterate_free(it);
+ }
+ else {
+ is_nested = false;
+ }
+
+ if (is_nested) {
+ /* Just reiterate on all subobjects */
+ it = ucl_object_iterate_new(obj);
+
+ while ((cur = ucl_object_iterate_full(it, UCL_ITERATE_EXPLICIT)) != nullptr) {
+ if (!sec.handler(pool, cur, ucl_object_key(cur), ptr, const_cast<rspamd_rcl_section *>(&sec), err)) {
+ ucl_object_iterate_free(it);
+
+ return false;
+ }
+ }
+
+ ucl_object_iterate_free(it);
+
+ return true;
+ }
+ else {
+ if (sec.key_attr) {
+ /* First of all search for required attribute and use it as a key */
+ cur = ucl_object_lookup(obj, sec.key_attr.value().c_str());
+
+ if (cur == nullptr) {
+ if (!sec.default_key) {
+ g_set_error(err, CFG_RCL_ERROR, EINVAL, "required attribute "
+ "'%s' is missing for section '%s', current key: %s",
+ sec.key_attr.value().c_str(),
+ sec.name.c_str(),
+ ucl_object_key(obj));
+
+ return false;
+ }
+ else {
+ msg_info("using default key '%s' for mandatory field '%s' "
+ "for section '%s'",
+ sec.default_key.value().c_str(), sec.key_attr.value().c_str(),
+ sec.name.c_str());
+ key = sec.default_key.value().c_str();
+ }
+ }
+ else if (ucl_object_type(cur) != UCL_STRING) {
+ g_set_error(err, CFG_RCL_ERROR, EINVAL, "required attribute %s"
+ " is not a string for section %s",
+ sec.key_attr.value().c_str(), sec.name.c_str());
+
+ return false;
+ }
+ else {
+ key = ucl_object_tostring(cur);
+ }
+ }
+ }
+
+ return sec.handler(pool, obj, key, ptr, const_cast<rspamd_rcl_section *>(&sec), err);
+}
+
+gboolean
+rspamd_rcl_parse(struct rspamd_rcl_sections_map *top,
+ struct rspamd_config *cfg,
+ gpointer ptr, rspamd_mempool_t *pool,
+ const ucl_object_t *obj, GError **err)
+{
+ if (obj->type != UCL_OBJECT) {
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "top configuration must be an object");
+ return FALSE;
+ }
+
+ /* Iterate over known sections and ignore unknown ones */
+ for (const auto &sec_ptr: top->sections_order) {
+ if (sec_ptr->name == "*") {
+ /* Default section handler */
+ const auto *cur_obj = obj;
+ LL_FOREACH(obj, cur_obj)
+ {
+ if (!top->sections.contains(ucl_object_key(cur_obj))) {
+ if (sec_ptr->handler != nullptr) {
+ if (!rspamd_rcl_process_section(cfg, *sec_ptr, ptr, cur_obj,
+ pool, err)) {
+ return FALSE;
+ }
+ }
+ else {
+ rspamd_rcl_section_parse_defaults(cfg,
+ *sec_ptr,
+ pool,
+ cur_obj,
+ ptr,
+ err);
+ }
+ }
+ }
+ }
+ else {
+ const auto *found = ucl_object_lookup(obj, sec_ptr->name.c_str());
+ if (found == nullptr) {
+ if (sec_ptr->required) {
+ g_set_error(err, CFG_RCL_ERROR, ENOENT,
+ "required section %s is missing", sec_ptr->name.c_str());
+ return FALSE;
+ }
+ }
+ else {
+ /* Check type */
+ if (sec_ptr->strict_type) {
+ if (sec_ptr->type != found->type) {
+ g_set_error(err, CFG_RCL_ERROR, EINVAL,
+ "object in section %s has invalid type", sec_ptr->name.c_str());
+ return FALSE;
+ }
+ }
+
+ const auto *cur_obj = found;
+ LL_FOREACH(found, cur_obj)
+ {
+ if (sec_ptr->handler != nullptr) {
+ if (!rspamd_rcl_process_section(cfg, *sec_ptr, ptr, cur_obj,
+ pool, err)) {
+ return FALSE;
+ }
+ }
+ else {
+ rspamd_rcl_section_parse_defaults(cfg, *sec_ptr,
+ pool,
+ cur_obj,
+ ptr,
+ err);
+ }
+ }
+ }
+ }
+ if (sec_ptr->fin) {
+ sec_ptr->fin(pool, sec_ptr->fin_ud);
+ }
+ }
+
+ return TRUE;
+}
+
+static bool
+rspamd_rcl_section_parse_defaults(struct rspamd_config *cfg,
+ const struct rspamd_rcl_section &section,
+ rspamd_mempool_t *pool, const ucl_object_t *obj, gpointer ptr,
+ GError **err)
+{
+
+ if (obj->type != UCL_OBJECT) {
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "default configuration must be an object for section %s "
+ "(actual type is %s)",
+ section.name.c_str(), ucl_object_type_to_string(ucl_object_type(obj)));
+ return FALSE;
+ }
+
+ for (const auto &cur: section.default_parser) {
+ const auto *found = ucl_object_lookup(obj, cur.first.c_str());
+ if (found != nullptr) {
+ auto new_pd = cur.second.pd;
+ new_pd.user_struct = ptr;
+ new_pd.cfg = cfg;
+ const auto *cur_obj = found;
+
+ LL_FOREACH(found, cur_obj)
+ {
+ if (!cur.second.handler(pool, cur_obj, &new_pd, const_cast<rspamd_rcl_section *>(&section), err)) {
+ return FALSE;
+ }
+
+ if (!(new_pd.flags & RSPAMD_CL_FLAG_MULTIPLE)) {
+ break;
+ }
+ }
+ }
+ }
+
+ return TRUE;
+}
+
+gboolean
+rspamd_rcl_parse_struct_string(rspamd_mempool_t *pool,
+ const ucl_object_t *obj,
+ gpointer ud,
+ struct rspamd_rcl_section *section,
+ GError **err)
+{
+ auto *pd = (struct rspamd_rcl_struct_parser *) ud;
+ const gsize num_str_len = 32;
+
+ auto target = (gchar **) (((gchar *) pd->user_struct) + pd->offset);
+ switch (obj->type) {
+ case UCL_STRING:
+ *target =
+ rspamd_mempool_strdup(pool, ucl_copy_value_trash(obj));
+ break;
+ case UCL_INT:
+ *target = (gchar *) rspamd_mempool_alloc(pool, num_str_len);
+ rspamd_snprintf(*target, num_str_len, "%L", obj->value.iv);
+ break;
+ case UCL_FLOAT:
+ *target = (gchar *) rspamd_mempool_alloc(pool, num_str_len);
+ rspamd_snprintf(*target, num_str_len, "%f", obj->value.dv);
+ break;
+ case UCL_BOOLEAN:
+ *target = (gchar *) rspamd_mempool_alloc(pool, num_str_len);
+ rspamd_snprintf(*target, num_str_len, "%s",
+ ((gboolean) obj->value.iv) ? "true" : "false");
+ break;
+ case UCL_NULL:
+ /* String is enforced to be null */
+ *target = nullptr;
+ break;
+ default:
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "cannot convert %s to string in option %s",
+ ucl_object_type_to_string(ucl_object_type(obj)),
+ ucl_object_key(obj));
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+gboolean
+rspamd_rcl_parse_struct_integer(rspamd_mempool_t *pool,
+ const ucl_object_t *obj,
+ gpointer ud,
+ struct rspamd_rcl_section *section,
+ GError **err)
+{
+ auto *pd = (struct rspamd_rcl_struct_parser *) ud;
+ union {
+ gint *ip;
+ gint32 *i32p;
+ gint16 *i16p;
+ gint64 *i64p;
+ guint *up;
+ gsize *sp;
+ } target;
+ int64_t val;
+
+ if (pd->flags == RSPAMD_CL_FLAG_INT_32) {
+ target.i32p = (gint32 *) (((gchar *) pd->user_struct) + pd->offset);
+ if (!ucl_object_toint_safe(obj, &val)) {
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "cannot convert %s to integer in option %s",
+ ucl_object_type_to_string(ucl_object_type(obj)),
+ ucl_object_key(obj));
+ return FALSE;
+ }
+ *target.i32p = val;
+ }
+ else if (pd->flags == RSPAMD_CL_FLAG_INT_64) {
+ target.i64p = (gint64 *) (((gchar *) pd->user_struct) + pd->offset);
+ if (!ucl_object_toint_safe(obj, &val)) {
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "cannot convert %s to integer in option %s",
+ ucl_object_type_to_string(ucl_object_type(obj)),
+ ucl_object_key(obj));
+ return FALSE;
+ }
+ *target.i64p = val;
+ }
+ else if (pd->flags == RSPAMD_CL_FLAG_INT_SIZE) {
+ target.sp = (gsize *) (((gchar *) pd->user_struct) + pd->offset);
+ if (!ucl_object_toint_safe(obj, &val)) {
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "cannot convert %s to integer in option %s",
+ ucl_object_type_to_string(ucl_object_type(obj)),
+ ucl_object_key(obj));
+ return FALSE;
+ }
+ *target.sp = val;
+ }
+ else if (pd->flags == RSPAMD_CL_FLAG_INT_16) {
+ target.i16p = (gint16 *) (((gchar *) pd->user_struct) + pd->offset);
+ if (!ucl_object_toint_safe(obj, &val)) {
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "cannot convert %s to integer in option %s",
+ ucl_object_type_to_string(ucl_object_type(obj)),
+ ucl_object_key(obj));
+ return FALSE;
+ }
+ *target.i16p = val;
+ }
+ else if (pd->flags == RSPAMD_CL_FLAG_UINT) {
+ target.up = (guint *) (((gchar *) pd->user_struct) + pd->offset);
+ if (!ucl_object_toint_safe(obj, &val)) {
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "cannot convert %s to integer in option %s",
+ ucl_object_type_to_string(ucl_object_type(obj)),
+ ucl_object_key(obj));
+ return FALSE;
+ }
+ *target.up = val;
+ }
+ else {
+ target.ip = (gint *) (((gchar *) pd->user_struct) + pd->offset);
+ if (!ucl_object_toint_safe(obj, &val)) {
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "cannot convert %s to integer in option %s",
+ ucl_object_type_to_string(ucl_object_type(obj)),
+ ucl_object_key(obj));
+ return FALSE;
+ }
+ *target.ip = val;
+ }
+
+ return TRUE;
+}
+
+gboolean
+rspamd_rcl_parse_struct_double(rspamd_mempool_t *pool,
+ const ucl_object_t *obj,
+ gpointer ud,
+ struct rspamd_rcl_section *section,
+ GError **err)
+{
+ auto *pd = (struct rspamd_rcl_struct_parser *) ud;
+ gdouble *target;
+
+ target = (gdouble *) (((gchar *) pd->user_struct) + pd->offset);
+
+ if (!ucl_object_todouble_safe(obj, target)) {
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "cannot convert %s to double in option %s",
+ ucl_object_type_to_string(ucl_object_type(obj)),
+ ucl_object_key(obj));
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+gboolean
+rspamd_rcl_parse_struct_time(rspamd_mempool_t *pool,
+ const ucl_object_t *obj,
+ gpointer ud,
+ struct rspamd_rcl_section *section,
+ GError **err)
+{
+ auto *pd = (struct rspamd_rcl_struct_parser *) ud;
+ union {
+ gint *psec;
+ guint32 *pu32;
+ gdouble *pdv;
+ struct timeval *ptv;
+ struct timespec *pts;
+ } target;
+ gdouble val;
+
+ if (!ucl_object_todouble_safe(obj, &val)) {
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "cannot convert %s to double in option %s",
+ ucl_object_type_to_string(ucl_object_type(obj)),
+ ucl_object_key(obj));
+ return FALSE;
+ }
+
+ if (pd->flags == RSPAMD_CL_FLAG_TIME_TIMEVAL) {
+ target.ptv =
+ (struct timeval *) (((gchar *) pd->user_struct) + pd->offset);
+ target.ptv->tv_sec = (glong) val;
+ target.ptv->tv_usec = (val - (glong) val) * 1000000;
+ }
+ else if (pd->flags == RSPAMD_CL_FLAG_TIME_TIMESPEC) {
+ target.pts =
+ (struct timespec *) (((gchar *) pd->user_struct) + pd->offset);
+ target.pts->tv_sec = (glong) val;
+ target.pts->tv_nsec = (val - (glong) val) * 1000000000000LL;
+ }
+ else if (pd->flags == RSPAMD_CL_FLAG_TIME_FLOAT) {
+ target.pdv = (double *) (((gchar *) pd->user_struct) + pd->offset);
+ *target.pdv = val;
+ }
+ else if (pd->flags == RSPAMD_CL_FLAG_TIME_INTEGER) {
+ target.psec = (gint *) (((gchar *) pd->user_struct) + pd->offset);
+ *target.psec = val * 1000;
+ }
+ else if (pd->flags == RSPAMD_CL_FLAG_TIME_UINT_32) {
+ target.pu32 = (guint32 *) (((gchar *) pd->user_struct) + pd->offset);
+ *target.pu32 = val * 1000;
+ }
+ else {
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "cannot convert %s to time in option %s",
+ ucl_object_type_to_string(ucl_object_type(obj)),
+ ucl_object_key(obj));
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+gboolean
+rspamd_rcl_parse_struct_keypair(rspamd_mempool_t *pool,
+ const ucl_object_t *obj,
+ gpointer ud,
+ struct rspamd_rcl_section *section,
+ GError **err)
+{
+ auto *pd = (struct rspamd_rcl_struct_parser *) ud;
+ struct rspamd_cryptobox_keypair **target, *kp;
+
+ target = (struct rspamd_cryptobox_keypair **) (((gchar *) pd->user_struct) +
+ pd->offset);
+ if (obj->type == UCL_OBJECT) {
+ kp = rspamd_keypair_from_ucl(obj);
+
+ if (kp != nullptr) {
+ rspamd_mempool_add_destructor(pool,
+ (rspamd_mempool_destruct_t) rspamd_keypair_unref, kp);
+ *target = kp;
+ }
+ else {
+ gchar *dump = (char *) ucl_object_emit(obj, UCL_EMIT_JSON_COMPACT);
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "cannot load the keypair specified: %s; section: %s; value: %s",
+ ucl_object_key(obj), section->name.c_str(), dump);
+ free(dump);
+
+ return FALSE;
+ }
+ }
+ else {
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "no sane pubkey or privkey found in the keypair: %s",
+ ucl_object_key(obj));
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+gboolean
+rspamd_rcl_parse_struct_pubkey(rspamd_mempool_t *pool,
+ const ucl_object_t *obj,
+ gpointer ud,
+ struct rspamd_rcl_section *section,
+ GError **err)
+{
+ auto *pd = (struct rspamd_rcl_struct_parser *) ud;
+ struct rspamd_cryptobox_pubkey **target, *pk;
+ gsize len;
+ const gchar *str;
+ rspamd_cryptobox_keypair_type keypair_type = RSPAMD_KEYPAIR_KEX;
+ rspamd_cryptobox_mode keypair_mode = RSPAMD_CRYPTOBOX_MODE_25519;
+
+ if (pd->flags & RSPAMD_CL_FLAG_SIGNKEY) {
+ keypair_type = RSPAMD_KEYPAIR_SIGN;
+ }
+ if (pd->flags & RSPAMD_CL_FLAG_NISTKEY) {
+ keypair_mode = RSPAMD_CRYPTOBOX_MODE_NIST;
+ }
+
+ target = (struct rspamd_cryptobox_pubkey **) (((gchar *) pd->user_struct) +
+ pd->offset);
+ if (obj->type == UCL_STRING) {
+ str = ucl_object_tolstring(obj, &len);
+ pk = rspamd_pubkey_from_base32(str, len, keypair_type,
+ keypair_mode);
+
+ if (pk != nullptr) {
+ *target = pk;
+ }
+ else {
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "cannot load the pubkey specified: %s",
+ ucl_object_key(obj));
+ return FALSE;
+ }
+ }
+ else {
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "no sane pubkey found in the element: %s",
+ ucl_object_key(obj));
+ return FALSE;
+ }
+
+ rspamd_mempool_add_destructor(pool,
+ (rspamd_mempool_destruct_t) rspamd_pubkey_unref, pk);
+
+ return TRUE;
+}
+
+static void
+rspamd_rcl_insert_string_list_item(gpointer *target, rspamd_mempool_t *pool,
+ std::string_view elt, gboolean is_hash)
+{
+ union {
+ GHashTable *hv;
+ GList *lv;
+ gpointer p;
+ } d;
+ gchar *val;
+
+ d.p = *target;
+
+ if (is_hash) {
+ if (d.hv == nullptr) {
+ d.hv = g_hash_table_new(rspamd_str_hash, rspamd_str_equal);
+ rspamd_mempool_add_destructor(pool,
+ (rspamd_mempool_destruct_t) g_hash_table_unref, d.hv);
+ }
+
+ val = rspamd_mempool_strdup_len(pool, elt.data(), elt.size());
+ g_hash_table_insert(d.hv, val, val);
+ }
+ else {
+ val = rspamd_mempool_strdup_len(pool, elt.data(), elt.size());
+ d.lv = g_list_prepend(d.lv, val);
+ }
+
+ *target = d.p;
+}
+
+gboolean
+rspamd_rcl_parse_struct_string_list(rspamd_mempool_t *pool,
+ const ucl_object_t *obj,
+ gpointer ud,
+ struct rspamd_rcl_section *section,
+ GError **err)
+{
+ auto *pd = (struct rspamd_rcl_struct_parser *) ud;
+ constexpr const auto num_str_len = 32;
+ auto need_destructor = true;
+
+
+ auto is_hash = pd->flags & RSPAMD_CL_FLAG_STRING_LIST_HASH;
+ auto *target = (gpointer *) (((gchar *) pd->user_struct) + pd->offset);
+
+ if (!is_hash && *target != nullptr) {
+ need_destructor = FALSE;
+ }
+
+ auto iter = ucl_object_iterate_new(obj);
+ const auto *cur = obj;
+
+ while ((cur = ucl_object_iterate_safe(iter, true)) != nullptr) {
+ switch (cur->type) {
+ case UCL_STRING: {
+ rspamd::string_foreach_delim(ucl_object_tostring(cur), ", ", [&](const auto &elt) {
+ rspamd_rcl_insert_string_list_item(target, pool, elt, is_hash);
+ });
+
+ /* Go to the next object */
+ continue;
+ }
+ case UCL_INT: {
+ auto *val = (gchar *) rspamd_mempool_alloc(pool, num_str_len);
+ rspamd_snprintf(val, num_str_len, "%L", cur->value.iv);
+ rspamd_rcl_insert_string_list_item(target, pool, val, is_hash);
+ break;
+ }
+ case UCL_FLOAT: {
+ auto *val = (gchar *) rspamd_mempool_alloc(pool, num_str_len);
+ rspamd_snprintf(val, num_str_len, "%f", cur->value.dv);
+ rspamd_rcl_insert_string_list_item(target, pool, val, is_hash);
+ break;
+ }
+ case UCL_BOOLEAN: {
+ auto *val = (gchar *) rspamd_mempool_alloc(pool, num_str_len);
+ rspamd_snprintf(val, num_str_len, "%s",
+ ((gboolean) cur->value.iv) ? "true" : "false");
+ rspamd_rcl_insert_string_list_item(target, pool, val, is_hash);
+ break;
+ }
+ default:
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "cannot convert %s to a string list in option %s",
+ ucl_object_type_to_string(ucl_object_type(obj)),
+ ucl_object_key(obj));
+ ucl_object_iterate_free(iter);
+
+ return FALSE;
+ }
+ }
+
+ ucl_object_iterate_free(iter);
+
+#if 0
+ /* WTF: why don't we allow empty list here?? */
+ if (*target == nullptr) {
+ g_set_error (err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "non-empty array of strings is expected: %s, "
+ "got: %s, of length: %d",
+ ucl_object_key (obj), ucl_object_type_to_string (obj->type),
+ obj->len);
+ return FALSE;
+ }
+#endif
+
+ if (!is_hash && *target != nullptr) {
+ *target = g_list_reverse(*(GList **) target);
+
+ if (need_destructor) {
+ rspamd_mempool_add_destructor(pool,
+ (rspamd_mempool_destruct_t) g_list_free,
+ *target);
+ }
+ }
+
+ return TRUE;
+}
+
+gboolean
+rspamd_rcl_parse_struct_ucl(rspamd_mempool_t *pool,
+ const ucl_object_t *obj,
+ gpointer ud,
+ struct rspamd_rcl_section *section,
+ GError **err)
+{
+ auto *pd = (struct rspamd_rcl_struct_parser *) ud;
+ const ucl_object_t **target;
+
+ target = (const ucl_object_t **) (((gchar *) pd->user_struct) + pd->offset);
+
+ *target = obj;
+
+ return TRUE;
+}
+
+
+gboolean
+rspamd_rcl_parse_struct_boolean(rspamd_mempool_t *pool,
+ const ucl_object_t *obj,
+ gpointer ud,
+ struct rspamd_rcl_section *section,
+ GError **err)
+{
+ auto *pd = (struct rspamd_rcl_struct_parser *) ud;
+ gboolean *target;
+
+ target = (gboolean *) (((gchar *) pd->user_struct) + pd->offset);
+
+ if (obj->type == UCL_BOOLEAN) {
+ *target = obj->value.iv;
+ }
+ else if (obj->type == UCL_INT) {
+ *target = obj->value.iv;
+ }
+ else {
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "cannot convert %s to boolean in option %s",
+ ucl_object_type_to_string(ucl_object_type(obj)),
+ ucl_object_key(obj));
+ return FALSE;
+ }
+
+ if (pd->flags & RSPAMD_CL_FLAG_BOOLEAN_INVERSE) {
+ *target = !*target;
+ }
+
+ return TRUE;
+}
+
+gboolean
+rspamd_rcl_parse_struct_addr(rspamd_mempool_t *pool,
+ const ucl_object_t *obj,
+ gpointer ud,
+ struct rspamd_rcl_section *section,
+ GError **err)
+{
+ auto *pd = (struct rspamd_rcl_struct_parser *) ud;
+ rspamd_inet_addr_t **target;
+ const gchar *val;
+ gsize size;
+
+ target = (rspamd_inet_addr_t **) (((gchar *) pd->user_struct) + pd->offset);
+
+ if (ucl_object_type(obj) == UCL_STRING) {
+ val = ucl_object_tolstring(obj, &size);
+
+ if (!rspamd_parse_inet_address(target, val, size,
+ RSPAMD_INET_ADDRESS_PARSE_DEFAULT)) {
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "cannot parse inet address: %s", val);
+ return FALSE;
+ }
+ }
+ else {
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "cannot convert %s to inet address in option %s",
+ ucl_object_type_to_string(ucl_object_type(obj)),
+ ucl_object_key(obj));
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+gboolean
+rspamd_rcl_parse_struct_mime_addr(rspamd_mempool_t *pool,
+ const ucl_object_t *obj,
+ gpointer ud,
+ struct rspamd_rcl_section *section,
+ GError **err)
+{
+ auto *pd = (struct rspamd_rcl_struct_parser *) ud;
+ GPtrArray **target, *tmp_addr = nullptr;
+ const gchar *val;
+ ucl_object_iter_t it;
+ const ucl_object_t *cur;
+
+ target = (GPtrArray **) (((gchar *) pd->user_struct) + pd->offset);
+ it = ucl_object_iterate_new(obj);
+
+ while ((cur = ucl_object_iterate_safe(it, true)) != nullptr) {
+ if (ucl_object_type(cur) == UCL_STRING) {
+ val = ucl_object_tostring(obj);
+ tmp_addr = rspamd_email_address_from_mime(pool, val,
+ strlen(val), tmp_addr, -1);
+ }
+ else {
+ g_set_error(err,
+ CFG_RCL_ERROR,
+ EINVAL,
+ "cannot get inet address from ucl object in %s",
+ ucl_object_key(obj));
+ ucl_object_iterate_free(it);
+
+ return FALSE;
+ }
+ }
+
+ ucl_object_iterate_free(it);
+ *target = tmp_addr;
+
+ return TRUE;
+}
+
+void rspamd_rcl_register_worker_option(struct rspamd_config *cfg,
+ GQuark type,
+ const gchar *name,
+ rspamd_rcl_default_handler_t handler,
+ gpointer target,
+ glong offset,
+ gint flags,
+ const gchar *doc_string)
+{
+ auto parser_it = cfg->rcl_top_section->workers_parser.try_emplace(type, rspamd_worker_cfg_parser{});
+ auto &parser = parser_it.first->second;
+ auto handler_it = parser.parsers.try_emplace(std::make_pair(std::string{name}, target), rspamd_worker_param_parser{});
+
+ if (!handler_it.second) {
+ msg_warn_config(
+ "handler for parameter %s is already registered for worker type %s",
+ name,
+ g_quark_to_string(type));
+ return;
+ }
+
+ auto &nhandler = handler_it.first->second;
+ nhandler.parser.flags = flags;
+ nhandler.parser.offset = offset;
+ nhandler.parser.user_struct = target;
+ nhandler.handler = handler;
+
+ const auto *doc_workers = ucl_object_lookup(cfg->doc_strings, "workers");
+
+ if (doc_workers == nullptr) {
+ auto *doc_obj = ucl_object_typed_new(UCL_OBJECT);
+ ucl_object_insert_key(cfg->doc_strings, doc_obj, "workers", 0, false);
+ doc_workers = doc_obj;
+ }
+
+ const auto *doc_target = ucl_object_lookup(doc_workers, g_quark_to_string(type));
+
+ if (doc_target == nullptr) {
+ auto *doc_obj = ucl_object_typed_new(UCL_OBJECT);
+ ucl_object_insert_key((ucl_object_t *) doc_workers, doc_obj,
+ g_quark_to_string(type), 0, true);
+ doc_target = doc_obj;
+ }
+
+ rspamd_rcl_add_doc_obj((ucl_object_t *) doc_target,
+ doc_string,
+ name,
+ UCL_NULL,
+ handler,
+ flags,
+ nullptr,
+ 0);
+}
+
+/* Checksum functions */
+static int
+rspamd_rcl_emitter_append_c(unsigned char c, size_t nchars, void *ud)
+{
+ auto *hs = (rspamd_cryptobox_hash_state_t *) ud;
+ guint64 d[2];
+
+ d[0] = nchars;
+ d[1] = c;
+
+ rspamd_cryptobox_hash_update(hs, (const guchar *) d, sizeof(d));
+
+ return 0;
+}
+
+static int
+rspamd_rcl_emitter_append_len(unsigned const char *str, size_t len, void *ud)
+{
+ auto *hs = (rspamd_cryptobox_hash_state_t *) ud;
+
+ rspamd_cryptobox_hash_update(hs, str, len);
+
+ return 0;
+}
+static int
+rspamd_rcl_emitter_append_int(int64_t elt, void *ud)
+{
+ auto *hs = (rspamd_cryptobox_hash_state_t *) ud;
+
+ rspamd_cryptobox_hash_update(hs, (const guchar *) &elt, sizeof(elt));
+
+ return 0;
+}
+
+static int
+rspamd_rcl_emitter_append_double(double elt, void *ud)
+{
+ auto *hs = (rspamd_cryptobox_hash_state_t *) ud;
+
+ rspamd_cryptobox_hash_update(hs, (const guchar *) &elt, sizeof(elt));
+
+ return 0;
+}
+
+void rspamd_rcl_sections_free(struct rspamd_rcl_sections_map *sections)
+{
+ delete sections;
+}
+
+/**
+ * Calls for an external lua function to apply potential config transformations
+ * if needed. This function can change the cfg->rcl_obj.
+ *
+ * Example of transformation function:
+ *
+ * function(obj)
+ * if obj.something == 'foo' then
+ * obj.something = "bla"
+ * return true, obj
+ * end
+ *
+ * return false, nil
+ * end
+ *
+ * If function returns 'false' then rcl_obj is not touched. Otherwise,
+ * it is changed, then rcl_obj is imported from lua. Old config is dereferenced.
+ * @param cfg
+ */
+void rspamd_rcl_maybe_apply_lua_transform(struct rspamd_config *cfg)
+{
+ auto *L = RSPAMD_LUA_CFG_STATE(cfg);
+ static const char *transform_script = "lua_cfg_transform";
+
+ g_assert(L != nullptr);
+
+ if (!rspamd_lua_require_function(L, transform_script, nullptr)) {
+ /* No function defined */
+ msg_warn_config("cannot execute lua script %s: %s",
+ transform_script, lua_tostring(L, -1));
+
+ return;
+ }
+
+ lua_pushcfunction(L, &rspamd_lua_traceback);
+ auto err_idx = lua_gettop(L);
+
+ /* Push function */
+ lua_pushvalue(L, -2);
+
+ /* Push the existing config */
+ ucl_object_push_lua(L, cfg->cfg_ucl_obj, true);
+
+ if (auto ret = lua_pcall(L, 1, 2, err_idx); ret != 0) {
+ msg_err("call to rspamadm lua script failed (%d): %s", ret,
+ lua_tostring(L, -1));
+ lua_settop(L, 0);
+
+ return;
+ }
+
+ if (lua_toboolean(L, -2) && lua_type(L, -1) == LUA_TTABLE) {
+ ucl_object_t *old_cfg = cfg->cfg_ucl_obj;
+
+ msg_info_config("configuration has been transformed in Lua");
+ cfg->cfg_ucl_obj = ucl_object_lua_import(L, -1);
+ ucl_object_unref(old_cfg);
+ }
+
+ /* error function */
+ lua_settop(L, 0);
+}
+
+static bool
+rspamd_rcl_decrypt_handler(struct ucl_parser *parser,
+ const unsigned char *source, size_t source_len,
+ unsigned char **destination, size_t *dest_len,
+ void *user_data)
+{
+ GError *err = nullptr;
+ auto *kp = (struct rspamd_cryptobox_keypair *) user_data;
+
+ if (!rspamd_keypair_decrypt(kp, source, source_len,
+ destination, dest_len, &err)) {
+ msg_err("cannot decrypt file: %e", err);
+ g_error_free(err);
+
+ return false;
+ }
+
+ return true;
+}
+
+static bool
+rspamd_rcl_jinja_handler(struct ucl_parser *parser,
+ const unsigned char *source, size_t source_len,
+ unsigned char **destination, size_t *dest_len,
+ void *user_data)
+{
+ auto *cfg = (struct rspamd_config *) user_data;
+ auto *L = RSPAMD_LUA_CFG_STATE(cfg);
+
+ lua_pushcfunction(L, &rspamd_lua_traceback);
+ auto err_idx = lua_gettop(L);
+
+ /* Obtain function */
+ if (!rspamd_lua_require_function(L, "lua_util", "jinja_template")) {
+ msg_err_config("cannot require lua_util.jinja_template");
+ lua_settop(L, err_idx - 1);
+
+ return false;
+ }
+
+ lua_pushlstring(L, (const char *) source, source_len);
+ lua_getglobal(L, "rspamd_env");
+ lua_pushboolean(L, false);
+
+ if (lua_pcall(L, 3, 1, err_idx) != 0) {
+ msg_err_config("cannot call lua jinja_template script: %s",
+ lua_tostring(L, -1));
+ lua_settop(L, err_idx - 1);
+
+ return false;
+ }
+
+ if (lua_type(L, -1) == LUA_TSTRING) {
+ const char *ndata;
+ gsize nsize;
+
+ ndata = lua_tolstring(L, -1, &nsize);
+ *destination = (unsigned char *) UCL_ALLOC(nsize);
+ memcpy(*destination, ndata, nsize);
+ *dest_len = nsize;
+ }
+ else {
+ msg_err_config("invalid return type when templating jinja %s",
+ lua_typename(L, lua_type(L, -1)));
+ lua_settop(L, err_idx - 1);
+
+ return false;
+ }
+
+ lua_settop(L, err_idx - 1);
+
+ return true;
+}
+
+static void
+rspamd_rcl_decrypt_free(unsigned char *data, size_t len, void *user_data)
+{
+ g_free(data);
+}
+
+void rspamd_config_calculate_cksum(struct rspamd_config *cfg)
+{
+ rspamd_cryptobox_hash_state_t hs;
+ unsigned char cksumbuf[rspamd_cryptobox_HASHBYTES];
+ struct ucl_emitter_functions f;
+
+ /* Calculate checksum */
+ rspamd_cryptobox_hash_init(&hs, nullptr, 0);
+ f.ucl_emitter_append_character = rspamd_rcl_emitter_append_c;
+ f.ucl_emitter_append_double = rspamd_rcl_emitter_append_double;
+ f.ucl_emitter_append_int = rspamd_rcl_emitter_append_int;
+ f.ucl_emitter_append_len = rspamd_rcl_emitter_append_len;
+ f.ucl_emitter_free_func = nullptr;
+ f.ud = &hs;
+ ucl_object_emit_full(cfg->cfg_ucl_obj, UCL_EMIT_MSGPACK,
+ &f, cfg->config_comments);
+ rspamd_cryptobox_hash_final(&hs, cksumbuf);
+ cfg->checksum = rspamd_encode_base32(cksumbuf, sizeof(cksumbuf), RSPAMD_BASE32_DEFAULT);
+ /* Also change the tag of cfg pool to be equal to the checksum */
+ rspamd_strlcpy(cfg->cfg_pool->tag.uid, cfg->checksum,
+ MIN(sizeof(cfg->cfg_pool->tag.uid), strlen(cfg->checksum)));
+}
+
+gboolean
+rspamd_config_parse_ucl(struct rspamd_config *cfg,
+ const gchar *filename,
+ GHashTable *vars,
+ ucl_include_trace_func_t inc_trace,
+ void *trace_data,
+ gboolean skip_jinja,
+ GError **err)
+{
+ struct rspamd_cryptobox_keypair *decrypt_keypair = nullptr;
+ auto cfg_file_maybe = rspamd::util::raii_mmaped_file::mmap_shared(filename, O_RDONLY, PROT_READ, 0);
+
+ if (!cfg_file_maybe) {
+ g_set_error(err, cfg_rcl_error_quark(), errno,
+ "cannot open %s: %*s", filename, (int) cfg_file_maybe.error().error_message.size(),
+ cfg_file_maybe.error().error_message.data());
+ return FALSE;
+ }
+
+ auto &cfg_file = cfg_file_maybe.value();
+
+ /* Try to load keyfile if available */
+ rspamd::util::raii_file::open(fmt::format("{}.key", filename), O_RDONLY).map([&](const auto &keyfile) {
+ auto *kp_parser = ucl_parser_new(0);
+ if (ucl_parser_add_fd(kp_parser, keyfile.get_fd())) {
+ auto *kp_obj = ucl_parser_get_object(kp_parser);
+
+ g_assert(kp_obj != nullptr);
+ decrypt_keypair = rspamd_keypair_from_ucl(kp_obj);
+
+ if (decrypt_keypair == nullptr) {
+ msg_err_config_forced("cannot load keypair from %s.key: invalid keypair",
+ filename);
+ }
+ else {
+ /* Add decryption support to UCL */
+ rspamd_mempool_add_destructor(cfg->cfg_pool,
+ (rspamd_mempool_destruct_t) rspamd_keypair_unref,
+ decrypt_keypair);
+ }
+
+ ucl_object_unref(kp_obj);
+ }
+ else {
+ msg_err_config_forced("cannot load keypair from %s.key: %s",
+ filename, ucl_parser_get_error(kp_parser));
+ }
+ ucl_parser_free(kp_parser);
+ });
+
+ auto parser = std::shared_ptr<ucl_parser>(ucl_parser_new(UCL_PARSER_SAVE_COMMENTS), ucl_parser_free);
+ rspamd_ucl_add_conf_variables(parser.get(), vars);
+ rspamd_ucl_add_conf_macros(parser.get(), cfg);
+ ucl_parser_set_filevars(parser.get(), filename, true);
+
+ if (inc_trace) {
+ ucl_parser_set_include_tracer(parser.get(), inc_trace, trace_data);
+ }
+
+ if (decrypt_keypair) {
+ auto *decrypt_handler = rspamd_mempool_alloc0_type(cfg->cfg_pool,
+ struct ucl_parser_special_handler);
+ decrypt_handler->user_data = decrypt_keypair;
+ decrypt_handler->magic = encrypted_magic;
+ decrypt_handler->magic_len = sizeof(encrypted_magic);
+ decrypt_handler->handler = rspamd_rcl_decrypt_handler;
+ decrypt_handler->free_function = rspamd_rcl_decrypt_free;
+
+ ucl_parser_add_special_handler(parser.get(), decrypt_handler);
+ }
+
+ if (!skip_jinja) {
+ auto *jinja_handler = rspamd_mempool_alloc0_type(cfg->cfg_pool,
+ struct ucl_parser_special_handler);
+ jinja_handler->user_data = cfg;
+ jinja_handler->flags = UCL_SPECIAL_HANDLER_PREPROCESS_ALL;
+ jinja_handler->handler = rspamd_rcl_jinja_handler;
+
+ ucl_parser_add_special_handler(parser.get(), jinja_handler);
+ }
+
+ if (!ucl_parser_add_chunk(parser.get(), (unsigned char *) cfg_file.get_map(), cfg_file.get_size())) {
+ g_set_error(err, cfg_rcl_error_quark(), errno,
+ "ucl parser error: %s", ucl_parser_get_error(parser.get()));
+
+ return FALSE;
+ }
+
+ cfg->cfg_ucl_obj = ucl_parser_get_object(parser.get());
+ cfg->config_comments = ucl_object_ref(ucl_parser_get_comments(parser.get()));
+
+ return TRUE;
+}
+
+gboolean
+rspamd_config_read(struct rspamd_config *cfg,
+ const gchar *filename,
+ rspamd_rcl_section_fin_t logger_fin,
+ gpointer logger_ud,
+ GHashTable *vars,
+ gboolean skip_jinja,
+ gchar **lua_env)
+{
+ GError *err = nullptr;
+
+ rspamd_lua_set_path(RSPAMD_LUA_CFG_STATE(cfg), nullptr, vars);
+
+ if (!rspamd_lua_set_env(RSPAMD_LUA_CFG_STATE(cfg), vars, lua_env, &err)) {
+ msg_err_config_forced("failed to set up environment: %e", err);
+ g_error_free(err);
+
+ return FALSE;
+ }
+
+ if (!rspamd_config_parse_ucl(cfg, filename, vars, nullptr, nullptr, skip_jinja, &err)) {
+ msg_err_config_forced("failed to load config: %e", err);
+ g_error_free(err);
+
+ return FALSE;
+ }
+
+ auto *top = rspamd_rcl_config_init(cfg, nullptr);
+ cfg->rcl_top_section = top;
+ /* Add new paths if defined in options */
+ rspamd_lua_set_path(RSPAMD_LUA_CFG_STATE(cfg), cfg->cfg_ucl_obj, vars);
+ rspamd_lua_set_globals(cfg, RSPAMD_LUA_CFG_STATE(cfg));
+ rspamd_mempool_add_destructor(cfg->cfg_pool, (rspamd_mempool_destruct_t) rspamd_rcl_sections_free, top);
+ err = nullptr;
+
+ /* Pre-init logging if possible */
+ if (logger_fin != nullptr) {
+ auto logging_section_maybe = rspamd::find_map(top->sections, "logging");
+
+ if (logging_section_maybe) {
+ const auto *logger_obj = ucl_object_lookup_any(cfg->cfg_ucl_obj, "logging",
+ "logger", nullptr);
+
+ if (logger_obj == nullptr) {
+ logger_fin(cfg->cfg_pool, logger_ud);
+ }
+ else {
+ if (!rspamd_rcl_process_section(cfg, *logging_section_maybe.value().get().get(), cfg,
+ logger_obj, cfg->cfg_pool, &err)) {
+ msg_err_config_forced("cannot init logger: %e", err);
+ g_error_free(err);
+
+ return FALSE;
+ }
+ else {
+ logger_fin(cfg->cfg_pool, logger_ud);
+ }
+
+ /* Init lua logging */
+ lua_pushcfunction(RSPAMD_LUA_CFG_STATE(cfg), &rspamd_lua_traceback);
+ auto err_idx = lua_gettop(RSPAMD_LUA_CFG_STATE(cfg));
+
+ /* Obtain function */
+ if (!rspamd_lua_require_function(RSPAMD_LUA_CFG_STATE(cfg), "lua_util",
+ "init_debug_logging")) {
+ msg_err_config("cannot require lua_util.init_debug_logging");
+ lua_settop(RSPAMD_LUA_CFG_STATE(cfg), err_idx - 1);
+
+ return FALSE;
+ }
+
+ void *pcfg = lua_newuserdata(RSPAMD_LUA_CFG_STATE(cfg), sizeof(void *));
+ memcpy(pcfg, &cfg, sizeof(void *));
+ rspamd_lua_setclass(RSPAMD_LUA_CFG_STATE(cfg), "rspamd{config}", -1);
+
+ if (lua_pcall(RSPAMD_LUA_CFG_STATE(cfg), 1, 0, err_idx) != 0) {
+ msg_err_config("cannot call lua init_debug_logging script: %s",
+ lua_tostring(RSPAMD_LUA_CFG_STATE(cfg), -1));
+ lua_settop(RSPAMD_LUA_CFG_STATE(cfg), err_idx - 1);
+
+ return FALSE;
+ }
+
+ lua_settop(RSPAMD_LUA_CFG_STATE(cfg), err_idx - 1);
+ }
+ }
+ }
+
+ /* Transform config if needed */
+ rspamd_rcl_maybe_apply_lua_transform(cfg);
+ rspamd_config_calculate_cksum(cfg);
+
+ if (!rspamd_rcl_parse(top, cfg, cfg, cfg->cfg_pool, cfg->cfg_ucl_obj, &err)) {
+ msg_err_config("rcl parse error: %e", err);
+
+ if (err) {
+ g_error_free(err);
+ }
+
+ return FALSE;
+ }
+
+ cfg->lang_det = rspamd_language_detector_init(cfg);
+ rspamd_mempool_add_destructor(cfg->cfg_pool,
+ (rspamd_mempool_destruct_t) rspamd_language_detector_unref,
+ cfg->lang_det);
+
+ return TRUE;
+}
+
+static void
+rspamd_rcl_doc_obj_from_handler(ucl_object_t *doc_obj,
+ rspamd_rcl_default_handler_t handler,
+ gint flags)
+{
+ auto has_example = ucl_object_lookup(doc_obj, "example") != nullptr;
+ auto has_type = ucl_object_lookup(doc_obj, "type") != nullptr;
+
+ if (handler == rspamd_rcl_parse_struct_string) {
+ if (!has_type) {
+ ucl_object_insert_key(doc_obj, ucl_object_fromstring("string"),
+ "type", 0, false);
+ }
+ }
+ else if (handler == rspamd_rcl_parse_struct_integer) {
+ auto *type = "int";
+
+ if (flags & RSPAMD_CL_FLAG_INT_16) {
+ type = "int16";
+ }
+ else if (flags & RSPAMD_CL_FLAG_INT_32) {
+ type = "int32";
+ }
+ else if (flags & RSPAMD_CL_FLAG_INT_64) {
+ type = "int64";
+ }
+ else if (flags & RSPAMD_CL_FLAG_INT_SIZE) {
+ type = "size";
+ }
+ else if (flags & RSPAMD_CL_FLAG_UINT) {
+ type = "uint";
+ }
+
+ if (!has_type) {
+ ucl_object_insert_key(doc_obj, ucl_object_fromstring(type),
+ "type", 0, false);
+ }
+ }
+ else if (handler == rspamd_rcl_parse_struct_double) {
+ if (!has_type) {
+ ucl_object_insert_key(doc_obj, ucl_object_fromstring("double"),
+ "type", 0, false);
+ }
+ }
+ else if (handler == rspamd_rcl_parse_struct_time) {
+ auto *type = "time";
+
+ if (!has_type) {
+ ucl_object_insert_key(doc_obj, ucl_object_fromstring(type),
+ "type", 0, false);
+ }
+ }
+ else if (handler == rspamd_rcl_parse_struct_string_list) {
+ if (!has_type) {
+ ucl_object_insert_key(doc_obj, ucl_object_fromstring("string list"),
+ "type", 0, false);
+ }
+ if (!has_example) {
+ ucl_object_insert_key(doc_obj,
+ ucl_object_fromstring_common("param = \"str1, str2, str3\" OR "
+ "param = [\"str1\", \"str2\", \"str3\"]",
+ 0, static_cast<ucl_string_flags>(0)),
+ "example",
+ 0,
+ false);
+ }
+ }
+ else if (handler == rspamd_rcl_parse_struct_boolean) {
+ if (!has_type) {
+ ucl_object_insert_key(doc_obj,
+ ucl_object_fromstring("bool"),
+ "type",
+ 0,
+ false);
+ }
+ }
+ else if (handler == rspamd_rcl_parse_struct_keypair) {
+ if (!has_type) {
+ ucl_object_insert_key(doc_obj,
+ ucl_object_fromstring("keypair"),
+ "type",
+ 0,
+ false);
+ }
+ if (!has_example) {
+ ucl_object_insert_key(doc_obj,
+ ucl_object_fromstring("keypair { "
+ "pubkey = <base32_string>;"
+ " privkey = <base32_string>; "
+ "}"),
+ "example",
+ 0,
+ false);
+ }
+ }
+ else if (handler == rspamd_rcl_parse_struct_addr) {
+ if (!has_type) {
+ ucl_object_insert_key(doc_obj,
+ ucl_object_fromstring("socket address"),
+ "type",
+ 0,
+ false);
+ }
+ }
+ else if (handler == rspamd_rcl_parse_struct_mime_addr) {
+ if (!has_type) {
+ ucl_object_insert_key(doc_obj,
+ ucl_object_fromstring("email address"),
+ "type",
+ 0,
+ false);
+ }
+ }
+}
+
+ucl_object_t *
+rspamd_rcl_add_doc_obj(ucl_object_t *doc_target,
+ const char *doc_string,
+ const char *doc_name,
+ ucl_type_t type,
+ rspamd_rcl_default_handler_t handler,
+ gint flags,
+ const char *default_value,
+ gboolean required)
+{
+ ucl_object_t *doc_obj;
+
+ if (doc_target == nullptr || doc_name == nullptr) {
+ return nullptr;
+ }
+
+ doc_obj = ucl_object_typed_new(UCL_OBJECT);
+
+ /* Insert doc string itself */
+ if (doc_string) {
+ ucl_object_insert_key(doc_obj,
+ ucl_object_fromstring_common(doc_string, 0, static_cast<ucl_string_flags>(0)),
+ "data", 0, false);
+ }
+ else {
+ ucl_object_insert_key(doc_obj, ucl_object_fromstring("undocumented"),
+ "data", 0, false);
+ }
+
+ if (type != UCL_NULL) {
+ ucl_object_insert_key(doc_obj,
+ ucl_object_fromstring(ucl_object_type_to_string(type)),
+ "type", 0, false);
+ }
+
+ rspamd_rcl_doc_obj_from_handler(doc_obj, handler, flags);
+
+ ucl_object_insert_key(doc_obj,
+ ucl_object_frombool(required),
+ "required", 0, false);
+
+ if (default_value) {
+ ucl_object_insert_key(doc_obj,
+ ucl_object_fromstring_common(default_value, 0, static_cast<ucl_string_flags>(0)),
+ "default", 0, false);
+ }
+
+ ucl_object_insert_key(doc_target, doc_obj, doc_name, 0, true);
+
+ return doc_obj;
+}
+
+ucl_object_t *
+rspamd_rcl_add_doc_by_path(struct rspamd_config *cfg,
+ const gchar *doc_path,
+ const char *doc_string,
+ const char *doc_name,
+ ucl_type_t type,
+ rspamd_rcl_default_handler_t handler,
+ gint flags,
+ const char *default_value,
+ gboolean required)
+{
+ const auto *cur = cfg->doc_strings;
+
+ if (doc_path == nullptr) {
+ /* Assume top object */
+ return rspamd_rcl_add_doc_obj(cfg->doc_strings,
+ doc_string,
+ doc_name,
+ type,
+ handler,
+ flags,
+ default_value,
+ required);
+ }
+ else {
+ const auto *found = ucl_object_lookup_path(cfg->doc_strings, doc_path);
+
+ if (found != nullptr) {
+ return rspamd_rcl_add_doc_obj((ucl_object_t *) found,
+ doc_string,
+ doc_name,
+ type,
+ handler,
+ flags,
+ default_value,
+ required);
+ }
+
+ /* Otherwise we need to insert all components of the path */
+ rspamd::string_foreach_delim(doc_path, ".", [&](const std::string_view &elt) {
+ if (ucl_object_type(cur) != UCL_OBJECT) {
+ msg_err_config("Bad path while lookup for '%s' at %*s",
+ doc_path, (int) elt.size(), elt.data());
+ }
+ const auto *found = ucl_object_lookup_len(cur, elt.data(), elt.size());
+ if (found == nullptr) {
+ auto *obj = ucl_object_typed_new(UCL_OBJECT);
+ ucl_object_insert_key((ucl_object_t *) cur,
+ obj,
+ elt.data(),
+ elt.size(),
+ true);
+ cur = obj;
+ }
+ else {
+ cur = found;
+ }
+ });
+ }
+
+ return rspamd_rcl_add_doc_obj(ucl_object_ref(cur),
+ doc_string,
+ doc_name,
+ type,
+ handler,
+ flags,
+ default_value,
+ required);
+}
+
+static void
+rspamd_rcl_add_doc_from_comments(struct rspamd_config *cfg,
+ ucl_object_t *top_doc, const ucl_object_t *obj,
+ const ucl_object_t *comments, gboolean is_top)
+{
+ ucl_object_iter_t it = nullptr;
+ const ucl_object_t *cur, *cmt;
+ ucl_object_t *cur_doc;
+
+ if (ucl_object_type(obj) == UCL_OBJECT) {
+ while ((cur = ucl_object_iterate(obj, &it, true)) != nullptr) {
+ cur_doc = nullptr;
+
+ if ((cmt = ucl_comments_find(comments, cur)) != nullptr) {
+ cur_doc = rspamd_rcl_add_doc_obj(top_doc,
+ ucl_object_tostring(cmt), ucl_object_key(cur),
+ ucl_object_type(cur), nullptr, 0, nullptr, FALSE);
+ }
+
+ if (ucl_object_type(cur) == UCL_OBJECT) {
+ if (cur_doc) {
+ rspamd_rcl_add_doc_from_comments(cfg, cur_doc, cur,
+ comments,
+ FALSE);
+ }
+ else {
+ rspamd_rcl_add_doc_from_comments(cfg, top_doc, cur,
+ comments,
+ FALSE);
+ }
+ }
+ }
+ }
+ else if (!is_top) {
+ if ((cmt = ucl_comments_find(comments, obj)) != nullptr) {
+ rspamd_rcl_add_doc_obj(top_doc,
+ ucl_object_tostring(cmt), ucl_object_key(obj),
+ ucl_object_type(obj), nullptr, 0, nullptr, FALSE);
+ }
+ }
+}
+
+ucl_object_t *
+rspamd_rcl_add_doc_by_example(struct rspamd_config *cfg,
+ const gchar *root_path,
+ const gchar *doc_string,
+ const gchar *doc_name,
+ const gchar *example_data, gsize example_len)
+{
+ auto parser = std::shared_ptr<ucl_parser>(ucl_parser_new(UCL_PARSER_NO_FILEVARS | UCL_PARSER_SAVE_COMMENTS), ucl_parser_free);
+
+ if (!ucl_parser_add_chunk(parser.get(), reinterpret_cast<const unsigned char *>(example_data), example_len)) {
+ msg_err_config("cannot parse example: %s",
+ ucl_parser_get_error(parser.get()));
+
+ return nullptr;
+ }
+
+ auto *top = ucl_parser_get_object(parser.get());
+ const auto *comments = ucl_parser_get_comments(parser.get());
+
+ /* Add top object */
+ auto *top_doc = rspamd_rcl_add_doc_by_path(cfg, root_path, doc_string,
+ doc_name, ucl_object_type(top), nullptr, 0, nullptr, FALSE);
+ ucl_object_insert_key(top_doc,
+ ucl_object_fromstring_common(example_data, example_len, static_cast<ucl_string_flags>(0)),
+ "example", 0, false);
+
+ rspamd_rcl_add_doc_from_comments(cfg, top_doc, top, comments, TRUE);
+
+ return top_doc;
+}
diff --git a/src/libserver/cfg_rcl.h b/src/libserver/cfg_rcl.h
new file mode 100644
index 0000000..766c55e
--- /dev/null
+++ b/src/libserver/cfg_rcl.h
@@ -0,0 +1,476 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef CFG_RCL_H_
+#define CFG_RCL_H_
+
+#include "config.h"
+#include "cfg_file.h"
+#include "ucl.h"
+#include "mem_pool.h"
+
+#define CFG_RCL_ERROR cfg_rcl_error_quark()
+static inline GQuark
+cfg_rcl_error_quark(void)
+{
+ return g_quark_from_static_string("cfg-rcl-error-quark");
+}
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct rspamd_rcl_section;
+struct rspamd_rcl_sections_map;
+struct rspamd_config;
+struct rspamd_rcl_default_handler_data;
+
+enum rspamd_rcl_flag {
+ RSPAMD_CL_FLAG_TIME_FLOAT = 0x1 << 0,
+ RSPAMD_CL_FLAG_TIME_TIMEVAL = 0x1 << 1,
+ RSPAMD_CL_FLAG_TIME_TIMESPEC = 0x1 << 2,
+ RSPAMD_CL_FLAG_TIME_INTEGER = 0x1 << 3,
+ RSPAMD_CL_FLAG_TIME_UINT_32 = 0x1 << 4,
+ RSPAMD_CL_FLAG_INT_16 = 0x1 << 5,
+ RSPAMD_CL_FLAG_INT_32 = 0x1 << 6,
+ RSPAMD_CL_FLAG_INT_64 = 0x1 << 7,
+ RSPAMD_CL_FLAG_UINT = 0x1 << 8,
+ RSPAMD_CL_FLAG_INT_SIZE = 0x1 << 9,
+ RSPAMD_CL_FLAG_STRING_PATH = 0x1 << 10,
+ RSPAMD_CL_FLAG_BOOLEAN_INVERSE = 0x1 << 11,
+ RSPAMD_CL_FLAG_STRING_LIST_HASH = 0x1 << 12,
+ RSPAMD_CL_FLAG_MULTIPLE = 0x1 << 13,
+ RSPAMD_CL_FLAG_SIGNKEY = 0x1 << 14,
+ RSPAMD_CL_FLAG_NISTKEY = 0x1 << 15,
+};
+
+struct rspamd_rcl_struct_parser {
+ struct rspamd_config *cfg;
+ gpointer user_struct;
+ goffset offset;
+ int flags; /* enum rspamd_rcl_flag */
+};
+
+
+/**
+ * Common handler type
+ * @param cfg configuration
+ * @param obj object to parse
+ * @param ud user data (depends on section)
+ * @param err error object
+ * @return TRUE if a section has been parsed
+ */
+typedef gboolean (*rspamd_rcl_handler_t)(rspamd_mempool_t *pool,
+ const ucl_object_t *obj,
+ const gchar *key,
+ gpointer ud,
+ struct rspamd_rcl_section *section,
+ GError **err);
+
+typedef gboolean (*rspamd_rcl_default_handler_t)(rspamd_mempool_t *pool,
+ const ucl_object_t *obj,
+ gpointer ud,
+ struct rspamd_rcl_section *section,
+ GError **err);
+
+/**
+ * A handler type that is called at the end of section parsing
+ * @param cfg configuration
+ * @param ud user data
+ */
+typedef void (*rspamd_rcl_section_fin_t)(rspamd_mempool_t *pool, gpointer ud);
+
+/**
+ * Add a default handler for a section
+ * @param section section pointer
+ * @param name name of param
+ * @param handler handler of param
+ * @param offset offset in a structure
+ * @param flags flags for the parser
+ * @return newly created structure
+ */
+struct rspamd_rcl_default_handler_data *rspamd_rcl_add_default_handler(
+ struct rspamd_rcl_section *section,
+ const gchar *name,
+ rspamd_rcl_default_handler_t handler,
+ goffset offset,
+ gint flags,
+ const gchar *doc_string);
+
+/**
+ * Add new section to the configuration
+ * @param top top section
+ * @param name the name of the section
+ * @param key_attr name of the attribute that should be used as key attribute
+ * @param handler handler function for all attributes
+ * @param type type of object handled by a handler
+ * @param required whether at least one of these sections is required
+ * @param strict_type turn on strict check for types for this section
+ * @return newly created structure
+ */
+struct rspamd_rcl_section *rspamd_rcl_add_section(
+ struct rspamd_rcl_sections_map **top,
+ struct rspamd_rcl_section *parent_section,
+ const gchar *name,
+ const gchar *key_attr,
+ rspamd_rcl_handler_t handler,
+ enum ucl_type type,
+ gboolean required,
+ gboolean strict_type);
+
+struct rspamd_rcl_section *rspamd_rcl_add_section_doc(
+ struct rspamd_rcl_sections_map **top,
+ struct rspamd_rcl_section *parent_section,
+ const gchar *name, const gchar *key_attr,
+ rspamd_rcl_handler_t handler,
+ enum ucl_type type, gboolean required,
+ gboolean strict_type,
+ ucl_object_t *doc_target,
+ const gchar *doc_string);
+
+/**
+ * Init common sections known to rspamd
+ * @return top section
+ */
+struct rspamd_rcl_sections_map *rspamd_rcl_config_init(struct rspamd_config *cfg,
+ GHashTable *skip_sections);
+
+/**
+ * Parse configuration
+ * @param top top section
+ * @param cfg rspamd configuration
+ * @param ptr pointer to the target
+ * @param pool pool object
+ * @param obj ucl object to parse
+ * @param err error pointer
+ * @return
+ */
+gboolean rspamd_rcl_parse(struct rspamd_rcl_sections_map *top,
+ struct rspamd_config *cfg,
+ gpointer ptr, rspamd_mempool_t *pool,
+ const ucl_object_t *obj, GError **err);
+
+/**
+ * Here is a section of common handlers that accepts rcl_struct_parser
+ * which itself contains a struct pointer and the offset of a member in a
+ * specific structure
+ */
+
+/**
+ * Parse a string field of a structure
+ * @param cfg config pointer
+ * @param obj object to parse
+ * @param ud struct_parser structure
+ * @param section the current section
+ * @param err error pointer
+ * @return TRUE if a string value has been successfully parsed
+ */
+gboolean rspamd_rcl_parse_struct_string(rspamd_mempool_t *pool,
+ const ucl_object_t *obj,
+ gpointer ud,
+ struct rspamd_rcl_section *section,
+ GError **err);
+
+/**
+ * Parse an integer field of a structure
+ * @param cfg config pointer
+ * @param obj object to parse
+ * @param ud struct_parser structure
+ * @param section the current section
+ * @param err error pointer
+ * @return TRUE if a value has been successfully parsed
+ */
+gboolean rspamd_rcl_parse_struct_integer(rspamd_mempool_t *pool,
+ const ucl_object_t *obj,
+ gpointer ud,
+ struct rspamd_rcl_section *section,
+ GError **err);
+
+
+/**
+ * Parse a float field of a structure
+ * @param cfg config pointer
+ * @param obj object to parse
+ * @param ud struct_parser structure
+ * @param section the current section
+ * @param err error pointer
+ * @return TRUE if a value has been successfully parsed
+ */
+gboolean rspamd_rcl_parse_struct_double(rspamd_mempool_t *pool,
+ const ucl_object_t *obj,
+ gpointer ud,
+ struct rspamd_rcl_section *section,
+ GError **err);
+
+/**
+ * Parse a time field of a structure
+ * @param cfg config pointer
+ * @param obj object to parse
+ * @param ud struct_parser structure (flags mean the exact structure used)
+ * @param section the current section
+ * @param err error pointer
+ * @return TRUE if a value has been successfully parsed
+ */
+gboolean rspamd_rcl_parse_struct_time(rspamd_mempool_t *pool,
+ const ucl_object_t *obj,
+ gpointer ud,
+ struct rspamd_rcl_section *section,
+ GError **err);
+
+/**
+ * Parse a string list field of a structure presented by a GList* object
+ * @param cfg config pointer
+ * @param obj object to parse
+ * @param ud struct_parser structure (flags mean the exact structure used)
+ * @param section the current section
+ * @param err error pointer
+ * @return TRUE if a value has been successfully parsed
+ */
+gboolean rspamd_rcl_parse_struct_string_list(rspamd_mempool_t *pool,
+ const ucl_object_t *obj,
+ gpointer ud,
+ struct rspamd_rcl_section *section,
+ GError **err);
+
+/**
+ * Parse a boolean field of a structure
+ * @param cfg config pointer
+ * @param obj object to parse
+ * @param ud struct_parser structure (flags mean the exact structure used)
+ * @param section the current section
+ * @param err error pointer
+ * @return TRUE if a value has been successfully parsed
+ */
+gboolean rspamd_rcl_parse_struct_boolean(rspamd_mempool_t *pool,
+ const ucl_object_t *obj,
+ gpointer ud,
+ struct rspamd_rcl_section *section,
+ GError **err);
+
+/**
+ * Parse a keypair field of a structure
+ * @param cfg config pointer
+ * @param obj object to parse
+ * @param ud struct_parser structure (flags mean the exact structure used)
+ * @param section the current section
+ * @param err error pointer
+ * @return TRUE if a value has been successfully parsed
+ */
+gboolean rspamd_rcl_parse_struct_keypair(rspamd_mempool_t *pool,
+ const ucl_object_t *obj,
+ gpointer ud,
+ struct rspamd_rcl_section *section,
+ GError **err);
+
+/**
+ * Parse a pubkey field of a structure
+ * @param cfg config pointer
+ * @param obj object to parse
+ * @param ud struct_parser structure (flags mean the exact structure used)
+ * @param section the current section
+ * @param err error pointer
+ * @return TRUE if a value has been successfully parsed
+ */
+gboolean rspamd_rcl_parse_struct_pubkey(rspamd_mempool_t *pool,
+ const ucl_object_t *obj,
+ gpointer ud,
+ struct rspamd_rcl_section *section,
+ GError **err);
+
+/**
+ * Parse a inet addr field of a structure
+ * @param cfg config pointer
+ * @param obj object to parse
+ * @param ud struct_parser structure (flags mean the exact structure used)
+ * @param section the current section
+ * @param err error pointer
+ * @return TRUE if a value has been successfully parsed
+ */
+gboolean rspamd_rcl_parse_struct_addr(rspamd_mempool_t *pool,
+ const ucl_object_t *obj,
+ gpointer ud,
+ struct rspamd_rcl_section *section,
+ GError **err);
+
+/**
+ * Parse a gmime inet address field of a structure
+ * @param cfg config pointer
+ * @param obj object to parse
+ * @param ud struct_parser structure (flags mean the exact structure used)
+ * @param section the current section
+ * @param err error pointer
+ * @return TRUE if a value has been successfully parsed
+ */
+gboolean rspamd_rcl_parse_struct_mime_addr(rspamd_mempool_t *pool,
+ const ucl_object_t *obj,
+ gpointer ud,
+ struct rspamd_rcl_section *section,
+ GError **err);
+
+/**
+ * Parse a raw ucl object
+ * @param cfg config pointer
+ * @param obj object to parse
+ * @param ud struct_parser structure (flags mean the exact structure used)
+ * @param section the current section
+ * @param err error pointer
+ * @return TRUE if a value has been successfully parsed
+ */
+gboolean rspamd_rcl_parse_struct_ucl(rspamd_mempool_t *pool,
+ const ucl_object_t *obj,
+ gpointer ud,
+ struct rspamd_rcl_section *section,
+ GError **err);
+
+
+/**
+ * Utility functions
+ */
+
+/**
+ * Register new parser for a worker type of an option with the specified name
+ * @param cfg config structure
+ * @param type type of worker (GQuark)
+ * @param name name of option
+ * @param handler handler of option
+ * @param target opaque target structure, note it **MUST** be worker ctx due to some reasons I don't really remember
+ * @param offset offset inside a structure
+ */
+void rspamd_rcl_register_worker_option(struct rspamd_config *cfg,
+ GQuark type,
+ const gchar *name,
+ rspamd_rcl_default_handler_t handler,
+ gpointer target,
+ glong offset,
+ gint flags,
+ const gchar *doc_string);
+
+/**
+ * Adds new documentation object to the configuration
+ * @param doc_target target object where to insert documentation (top object is used if this is NULL)
+ * @param doc_object documentation object to insert
+ */
+ucl_object_t *rspamd_rcl_add_doc_obj(ucl_object_t *doc_target,
+ const char *doc_string,
+ const char *doc_name,
+ ucl_type_t type,
+ rspamd_rcl_default_handler_t handler,
+ gint flags,
+ const char *default_value,
+ gboolean required);
+
+/**
+ * Adds new documentation option specified by path `doc_path` that should be
+ * split by dots
+ */
+ucl_object_t *rspamd_rcl_add_doc_by_path(struct rspamd_config *cfg,
+ const gchar *doc_path,
+ const char *doc_string,
+ const char *doc_name,
+ ucl_type_t type,
+ rspamd_rcl_default_handler_t handler,
+ gint flags,
+ const char *default_value,
+ gboolean required);
+
+
+/**
+ * Parses example and adds documentation according to the example:
+ *
+ * ```
+ * section {
+ * param1 = value; # explanation
+ * param2 = value; # explanation
+ * }
+ * ```
+ *
+ * will produce the following documentation strings:
+ * section ->
+ * section.param1 : explanation
+ * section.param2 : explanation
+ *
+ * @param cfg
+ * @param root_path
+ * @param example_data
+ * @param example_len
+ * @return
+ */
+ucl_object_t *rspamd_rcl_add_doc_by_example(struct rspamd_config *cfg,
+ const gchar *root_path,
+ const gchar *doc_string,
+ const gchar *doc_name,
+ const gchar *example_data, gsize example_len);
+
+/**
+ * Add lua modules path
+ * @param cfg
+ * @param path
+ * @param err
+ * @return
+ */
+gboolean rspamd_rcl_add_lua_plugins_path(struct rspamd_rcl_sections_map *sections,
+ struct rspamd_config *cfg,
+ const gchar *path,
+ gboolean main_path,
+ GError **err);
+
+
+/**
+ * Calls for an external lua function to apply potential config transformations
+ * if needed. This function can change the cfg->rcl_obj.
+ *
+ * Example of transformation function:
+ *
+ * function(obj)
+ * if obj.something == 'foo' then
+ * obj.something = "bla"
+ * return true, obj
+ * end
+ *
+ * return false, nil
+ * end
+ *
+ * If function returns 'false' then rcl_obj is not touched. Otherwise,
+ * it is changed, then rcl_obj is imported from lua. Old config is dereferenced.
+ * @param cfg
+ */
+void rspamd_rcl_maybe_apply_lua_transform(struct rspamd_config *cfg);
+void rspamd_rcl_sections_free(struct rspamd_rcl_sections_map *sections);
+
+void rspamd_config_calculate_cksum(struct rspamd_config *cfg);
+
+/*
+ * Read configuration file
+ */
+gboolean rspamd_config_parse_ucl(struct rspamd_config *cfg,
+ const gchar *filename,
+ GHashTable *vars,
+ ucl_include_trace_func_t inc_trace,
+ void *trace_data,
+ gboolean skip_jinja,
+ GError **err);
+gboolean rspamd_config_read(struct rspamd_config *cfg,
+ const gchar *filename,
+ rspamd_rcl_section_fin_t logger_fin,
+ gpointer logger_ud,
+ GHashTable *vars,
+ gboolean skip_jinja,
+ gchar **lua_env);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* CFG_RCL_H_ */
diff --git a/src/libserver/cfg_utils.cxx b/src/libserver/cfg_utils.cxx
new file mode 100644
index 0000000..3a94b47
--- /dev/null
+++ b/src/libserver/cfg_utils.cxx
@@ -0,0 +1,2955 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "config.h"
+
+#include "lua/lua_common.h"
+#include "lua/lua_thread_pool.h"
+
+#include "cfg_file.h"
+#include "rspamd.h"
+#include "cfg_file_private.h"
+
+#include "maps/map.h"
+#include "maps/map_helpers.h"
+#include "maps/map_private.h"
+#include "dynamic_cfg.h"
+#include "utlist.h"
+#include "stat_api.h"
+#include "unix-std.h"
+#include "libutil/multipattern.h"
+#include "monitored.h"
+#include "ref.h"
+#include "cryptobox.h"
+#include "ssl_util.h"
+#include "contrib/libottery/ottery.h"
+#include "contrib/fastutf8/fastutf8.h"
+
+#ifdef SYS_ZSTD
+#include "zstd.h"
+#else
+#define ZSTD_STATIC_LINKING_ONLY
+#include "contrib/zstd/zstd.h"
+#endif
+
+#ifdef HAVE_OPENSSL
+#include <openssl/rand.h>
+#include <openssl/err.h>
+#include <openssl/evp.h>
+#include <openssl/ssl.h>
+#include <openssl/conf.h>
+#endif
+#ifdef HAVE_LOCALE_H
+#include <locale.h>
+#endif
+#ifdef HAVE_SYS_RESOURCE_H
+#include <sys/resource.h>
+#endif
+#include <math.h>
+#include "libserver/composites/composites.h"
+
+#include "blas-config.h"
+
+#include <string>
+#include <string_view>
+#include <vector>
+#include "fmt/core.h"
+#include "cxx/util.hxx"
+#include "frozen/unordered_map.h"
+#include "frozen/string.h"
+#include "contrib/ankerl/unordered_dense.h"
+
+#define DEFAULT_SCORE 10.0
+
+#define DEFAULT_RLIMIT_NOFILE 2048
+#define DEFAULT_RLIMIT_MAXCORE 0
+#define DEFAULT_MAP_TIMEOUT 60.0 * 5
+#define DEFAULT_MAP_FILE_WATCH_MULTIPLIER 1
+#define DEFAULT_MIN_WORD 0
+#define DEFAULT_MAX_WORD 40
+#define DEFAULT_WORDS_DECAY 600
+#define DEFAULT_MAX_MESSAGE (50 * 1024 * 1024)
+#define DEFAULT_MAX_PIC (1 * 1024 * 1024)
+#define DEFAULT_MAX_SHOTS 100
+#define DEFAULT_MAX_SESSIONS 100
+#define DEFAULT_MAX_WORKERS 4
+#define DEFAULT_MAX_HTML_SIZE DEFAULT_MAX_MESSAGE / 5 /* 10 Mb */
+/* Timeout for task processing */
+#define DEFAULT_TASK_TIMEOUT 8.0
+#define DEFAULT_LUA_GC_STEP 200
+#define DEFAULT_LUA_GC_PAUSE 200
+#define DEFAULT_GC_MAXITERS 0
+
+struct rspamd_ucl_map_cbdata {
+ struct rspamd_config *cfg;
+ std::string buf;
+
+ explicit rspamd_ucl_map_cbdata(struct rspamd_config *cfg)
+ : cfg(cfg)
+ {
+ }
+};
+static gchar *rspamd_ucl_read_cb(gchar *chunk,
+ gint len,
+ struct map_cb_data *data,
+ gboolean final);
+static void rspamd_ucl_fin_cb(struct map_cb_data *data, void **target);
+static void rspamd_ucl_dtor_cb(struct map_cb_data *data);
+
+guint rspamd_config_log_id = (guint) -1;
+RSPAMD_CONSTRUCTOR(rspamd_config_log_init)
+{
+ rspamd_config_log_id = rspamd_logger_add_debug_module("config");
+}
+
+struct rspamd_actions_list {
+ using action_ptr = std::shared_ptr<rspamd_action>;
+ std::vector<action_ptr> actions;
+ ankerl::unordered_dense::map<std::string_view, action_ptr> actions_by_name;
+
+ explicit rspamd_actions_list()
+ {
+ actions.reserve(METRIC_ACTION_MAX + 2);
+ actions_by_name.reserve(METRIC_ACTION_MAX + 2);
+ }
+
+ void add_action(action_ptr action)
+ {
+ actions.push_back(action);
+ actions_by_name[action->name] = action;
+ sort();
+ }
+
+ void sort()
+ {
+ std::sort(actions.begin(), actions.end(), [](const action_ptr &a1, const action_ptr &a2) -> bool {
+ if (!isnan(a1->threshold) && !isnan(a2->threshold)) {
+ return a1->threshold < a2->threshold;
+ }
+
+ if (isnan(a1->threshold) && isnan(a2->threshold)) {
+ return false;
+ }
+ else if (isnan(a1->threshold)) {
+ return true;
+ }
+
+ return false;
+ });
+ }
+
+ void clear()
+ {
+ actions.clear();
+ actions_by_name.clear();
+ }
+};
+
+#define RSPAMD_CFG_ACTIONS(cfg) (reinterpret_cast<rspamd_actions_list *>((cfg)->actions))
+
+gboolean
+rspamd_parse_bind_line(struct rspamd_config *cfg,
+ struct rspamd_worker_conf *cf,
+ const gchar *str)
+{
+ struct rspamd_worker_bind_conf *cnf;
+ const gchar *fdname;
+ gboolean ret = TRUE;
+
+ if (str == nullptr) {
+ return FALSE;
+ }
+
+ cnf = rspamd_mempool_alloc0_type(cfg->cfg_pool, struct rspamd_worker_bind_conf);
+
+ cnf->cnt = 1024;
+ cnf->bind_line = rspamd_mempool_strdup(cfg->cfg_pool, str);
+
+ auto bind_line = std::string_view{cnf->bind_line};
+
+ if (bind_line.starts_with("systemd:")) {
+ /* The actual socket will be passed by systemd environment */
+ fdname = str + sizeof("systemd:") - 1;
+ cnf->is_systemd = TRUE;
+ cnf->addrs = g_ptr_array_new_full(1, nullptr);
+ rspamd_mempool_add_destructor(cfg->cfg_pool,
+ rspamd_ptr_array_free_hard, cnf->addrs);
+
+ if (fdname[0]) {
+ g_ptr_array_add(cnf->addrs, rspamd_mempool_strdup(cfg->cfg_pool, fdname));
+ cnf->cnt = cnf->addrs->len;
+ cnf->name = rspamd_mempool_strdup(cfg->cfg_pool, str);
+ LL_PREPEND(cf->bind_conf, cnf);
+ }
+ else {
+ msg_err_config("cannot parse bind line: %s", str);
+ ret = FALSE;
+ }
+ }
+ else {
+ if (rspamd_parse_host_port_priority(str, &cnf->addrs,
+ nullptr, &cnf->name, DEFAULT_BIND_PORT, TRUE, cfg->cfg_pool) == RSPAMD_PARSE_ADDR_FAIL) {
+ msg_err_config("cannot parse bind line: %s", str);
+ ret = FALSE;
+ }
+ else {
+ cnf->cnt = cnf->addrs->len;
+ LL_PREPEND(cf->bind_conf, cnf);
+ }
+ }
+
+ return ret;
+}
+
+struct rspamd_config *
+rspamd_config_new(enum rspamd_config_init_flags flags)
+{
+ struct rspamd_config *cfg;
+ rspamd_mempool_t *pool;
+
+ pool = rspamd_mempool_new(8 * 1024 * 1024, "cfg", 0);
+ cfg = rspamd_mempool_alloc0_type(pool, struct rspamd_config);
+ /* Allocate larger pool for cfg */
+ cfg->cfg_pool = pool;
+ cfg->dns_timeout = 1.0;
+ cfg->dns_retransmits = 5;
+ /* 16 sockets per DNS server */
+ cfg->dns_io_per_server = 16;
+ cfg->unknown_weight = NAN;
+
+ cfg->actions = (void *) new rspamd_actions_list();
+
+ /* Add all internal actions to keep compatibility */
+ for (int i = METRIC_ACTION_REJECT; i < METRIC_ACTION_MAX; i++) {
+
+ auto &&action = std::make_shared<rspamd_action>();
+ action->threshold = NAN;
+ action->name = rspamd_mempool_strdup(cfg->cfg_pool,
+ rspamd_action_to_str(static_cast<rspamd_action_type>(i)));
+ action->action_type = static_cast<rspamd_action_type>(i);
+
+ if (i == METRIC_ACTION_SOFT_REJECT) {
+ action->flags |= RSPAMD_ACTION_NO_THRESHOLD | RSPAMD_ACTION_HAM;
+ }
+ else if (i == METRIC_ACTION_GREYLIST) {
+ action->flags |= RSPAMD_ACTION_THRESHOLD_ONLY | RSPAMD_ACTION_HAM;
+ }
+ else if (i == METRIC_ACTION_NOACTION) {
+ action->flags |= RSPAMD_ACTION_HAM;
+ }
+
+ RSPAMD_CFG_ACTIONS(cfg)->add_action(std::move(action));
+ }
+
+ /* Disable timeout */
+ cfg->task_timeout = DEFAULT_TASK_TIMEOUT;
+
+
+ rspamd_config_init_metric(cfg);
+ cfg->composites_manager = rspamd_composites_manager_create(cfg);
+ cfg->classifiers_symbols = g_hash_table_new(rspamd_str_hash,
+ rspamd_str_equal);
+ cfg->cfg_params = g_hash_table_new(rspamd_str_hash, rspamd_str_equal);
+ cfg->debug_modules = g_hash_table_new(rspamd_str_hash, rspamd_str_equal);
+ cfg->explicit_modules = g_hash_table_new(rspamd_str_hash, rspamd_str_equal);
+ cfg->trusted_keys = g_hash_table_new(rspamd_str_hash,
+ rspamd_str_equal);
+
+ cfg->map_timeout = DEFAULT_MAP_TIMEOUT;
+ cfg->map_file_watch_multiplier = DEFAULT_MAP_FILE_WATCH_MULTIPLIER;
+
+ cfg->log_level = G_LOG_LEVEL_WARNING;
+ cfg->log_flags = RSPAMD_LOG_FLAG_DEFAULT;
+
+ cfg->check_text_attachements = TRUE;
+
+ cfg->dns_max_requests = 64;
+ cfg->history_rows = 200;
+ cfg->log_error_elts = 10;
+ cfg->log_error_elt_maxlen = 1000;
+ cfg->log_task_max_elts = 7;
+ cfg->cache_reload_time = 30.0;
+ cfg->max_lua_urls = 1024;
+ cfg->max_urls = cfg->max_lua_urls * 10;
+ cfg->max_recipients = 1024;
+ cfg->max_blas_threads = 1;
+ cfg->max_opts_len = 4096;
+ cfg->gtube_patterns_policy = RSPAMD_GTUBE_REJECT;
+
+ /* Default log line */
+ cfg->log_format_str = rspamd_mempool_strdup(cfg->cfg_pool,
+ "id: <$mid>,$if_qid{ qid: <$>,}$if_ip{ ip: $,}"
+ "$if_user{ user: $,}$if_smtp_from{ from: <$>,} (default: $is_spam "
+ "($action): [$scores] [$symbols_scores_params]), len: $len, time: $time_real, "
+ "dns req: $dns_req, digest: <$digest>"
+ "$if_smtp_rcpts{ rcpts: <$>, }$if_mime_rcpt{ mime_rcpt: <$>, }");
+ /* Allow non-mime input by default */
+ cfg->allow_raw_input = TRUE;
+ /* Default maximum words processed */
+ cfg->words_decay = DEFAULT_WORDS_DECAY;
+ cfg->min_word_len = DEFAULT_MIN_WORD;
+ cfg->max_word_len = DEFAULT_MAX_WORD;
+ cfg->max_html_len = DEFAULT_MAX_HTML_SIZE;
+
+ /* GC limits */
+ cfg->lua_gc_pause = DEFAULT_LUA_GC_PAUSE;
+ cfg->lua_gc_step = DEFAULT_LUA_GC_STEP;
+ cfg->full_gc_iters = DEFAULT_GC_MAXITERS;
+
+ /* Default hyperscan cache */
+ cfg->hs_cache_dir = rspamd_mempool_strdup(cfg->cfg_pool, RSPAMD_DBDIR "/");
+
+ if (!(flags & RSPAMD_CONFIG_INIT_SKIP_LUA)) {
+ cfg->lua_state = (void *) rspamd_lua_init(flags & RSPAMD_CONFIG_INIT_WIPE_LUA_MEM);
+ cfg->own_lua_state = TRUE;
+ cfg->lua_thread_pool = (void *) lua_thread_pool_new(RSPAMD_LUA_CFG_STATE(cfg));
+ }
+
+ cfg->cache = rspamd_symcache_new(cfg);
+ cfg->ups_ctx = rspamd_upstreams_library_init();
+ cfg->re_cache = rspamd_re_cache_new();
+ cfg->doc_strings = ucl_object_typed_new(UCL_OBJECT);
+ /*
+ * Unless exim is fixed
+ */
+ cfg->enable_shutdown_workaround = TRUE;
+
+ cfg->ssl_ciphers = rspamd_mempool_strdup(cfg->cfg_pool, "HIGH:!aNULL:!kRSA:!PSK:!SRP:!MD5:!RC4");
+ cfg->max_message = DEFAULT_MAX_MESSAGE;
+ cfg->max_pic_size = DEFAULT_MAX_PIC;
+ cfg->images_cache_size = 256;
+ cfg->monitored_ctx = rspamd_monitored_ctx_init();
+ cfg->neighbours = ucl_object_typed_new(UCL_OBJECT);
+ cfg->redis_pool = rspamd_redis_pool_init();
+ cfg->default_max_shots = DEFAULT_MAX_SHOTS;
+ cfg->max_sessions_cache = DEFAULT_MAX_SESSIONS;
+ cfg->maps_cache_dir = rspamd_mempool_strdup(cfg->cfg_pool, RSPAMD_DBDIR);
+ cfg->c_modules = g_ptr_array_new();
+ cfg->heartbeat_interval = 10.0;
+
+ cfg->enable_css_parser = true;
+ cfg->script_modules = g_ptr_array_new();
+
+ REF_INIT_RETAIN(cfg, rspamd_config_free);
+
+ return cfg;
+}
+
+void rspamd_config_free(struct rspamd_config *cfg)
+{
+ struct rspamd_config_cfg_lua_script *sc, *sctmp;
+ struct rspamd_config_settings_elt *set, *stmp;
+ struct rspamd_worker_log_pipe *lp, *ltmp;
+
+ rspamd_lua_run_config_unload(RSPAMD_LUA_CFG_STATE(cfg), cfg);
+
+ /* Scripts part */
+ DL_FOREACH_SAFE(cfg->on_term_scripts, sc, sctmp)
+ {
+ luaL_unref(RSPAMD_LUA_CFG_STATE(cfg), LUA_REGISTRYINDEX, sc->cbref);
+ }
+
+ DL_FOREACH_SAFE(cfg->on_load_scripts, sc, sctmp)
+ {
+ luaL_unref(RSPAMD_LUA_CFG_STATE(cfg), LUA_REGISTRYINDEX, sc->cbref);
+ }
+
+ DL_FOREACH_SAFE(cfg->post_init_scripts, sc, sctmp)
+ {
+ luaL_unref(RSPAMD_LUA_CFG_STATE(cfg), LUA_REGISTRYINDEX, sc->cbref);
+ }
+
+ DL_FOREACH_SAFE(cfg->config_unload_scripts, sc, sctmp)
+ {
+ luaL_unref(RSPAMD_LUA_CFG_STATE(cfg), LUA_REGISTRYINDEX, sc->cbref);
+ }
+
+ DL_FOREACH_SAFE(cfg->setting_ids, set, stmp)
+ {
+ REF_RELEASE(set);
+ }
+
+ rspamd_map_remove_all(cfg);
+ rspamd_mempool_destructors_enforce(cfg->cfg_pool);
+
+ g_list_free(cfg->classifiers);
+ g_list_free(cfg->workers);
+ rspamd_symcache_destroy(cfg->cache);
+ ucl_object_unref(cfg->cfg_ucl_obj);
+ ucl_object_unref(cfg->config_comments);
+ ucl_object_unref(cfg->doc_strings);
+ ucl_object_unref(cfg->neighbours);
+ g_hash_table_remove_all(cfg->cfg_params);
+ g_hash_table_unref(cfg->cfg_params);
+ g_hash_table_unref(cfg->classifiers_symbols);
+ g_hash_table_unref(cfg->debug_modules);
+ g_hash_table_unref(cfg->explicit_modules);
+ g_hash_table_unref(cfg->trusted_keys);
+
+ rspamd_re_cache_unref(cfg->re_cache);
+ g_ptr_array_free(cfg->c_modules, TRUE);
+ g_ptr_array_free(cfg->script_modules, TRUE);
+
+ if (cfg->monitored_ctx) {
+ rspamd_monitored_ctx_destroy(cfg->monitored_ctx);
+ }
+
+ if (RSPAMD_LUA_CFG_STATE(cfg) && cfg->own_lua_state) {
+ lua_thread_pool_free((struct lua_thread_pool *) cfg->lua_thread_pool);
+ rspamd_lua_close(RSPAMD_LUA_CFG_STATE(cfg));
+ }
+
+ if (cfg->redis_pool) {
+ rspamd_redis_pool_destroy(cfg->redis_pool);
+ }
+
+ rspamd_upstreams_library_unref(cfg->ups_ctx);
+ delete RSPAMD_CFG_ACTIONS(cfg);
+
+ rspamd_mempool_destructors_enforce(cfg->cfg_pool);
+
+ if (cfg->checksum) {
+ g_free(cfg->checksum);
+ }
+
+ REF_RELEASE(cfg->libs_ctx);
+
+ DL_FOREACH_SAFE(cfg->log_pipes, lp, ltmp)
+ {
+ close(lp->fd);
+ g_free(lp);
+ }
+
+ rspamd_mempool_delete(cfg->cfg_pool);
+}
+
+const ucl_object_t *
+rspamd_config_get_module_opt(struct rspamd_config *cfg,
+ const gchar *module_name,
+ const gchar *opt_name)
+{
+ const ucl_object_t *res = nullptr, *sec;
+
+ sec = ucl_obj_get_key(cfg->cfg_ucl_obj, module_name);
+ if (sec != nullptr) {
+ res = ucl_obj_get_key(sec, opt_name);
+ }
+
+ return res;
+}
+
+gint rspamd_config_parse_flag(const gchar *str, guint len)
+{
+ gint c;
+
+ if (!str || !*str) {
+ return -1;
+ }
+
+ if (len == 0) {
+ len = strlen(str);
+ }
+
+ switch (len) {
+ case 1:
+ c = g_ascii_tolower(*str);
+ if (c == 'y' || c == '1') {
+ return 1;
+ }
+ else if (c == 'n' || c == '0') {
+ return 0;
+ }
+ break;
+ case 2:
+ if (g_ascii_strncasecmp(str, "no", len) == 0) {
+ return 0;
+ }
+ else if (g_ascii_strncasecmp(str, "on", len) == 0) {
+ return 1;
+ }
+ break;
+ case 3:
+ if (g_ascii_strncasecmp(str, "yes", len) == 0) {
+ return 1;
+ }
+ else if (g_ascii_strncasecmp(str, "off", len) == 0) {
+ return 0;
+ }
+ break;
+ case 4:
+ if (g_ascii_strncasecmp(str, "true", len) == 0) {
+ return 1;
+ }
+ break;
+ case 5:
+ if (g_ascii_strncasecmp(str, "false", len) == 0) {
+ return 0;
+ }
+ break;
+ }
+
+ return -1;
+}
+
+// A mapping between names and log format types + flags
+constexpr const auto config_vars = frozen::make_unordered_map<frozen::string, std::pair<rspamd_log_format_type, int>>({
+ {"mid", {RSPAMD_LOG_MID, 0}},
+ {"qid", {RSPAMD_LOG_QID, 0}},
+ {"user", {RSPAMD_LOG_USER, 0}},
+ {"ip", {RSPAMD_LOG_IP, 0}},
+ {"len", {RSPAMD_LOG_LEN, 0}},
+ {"dns_req", {RSPAMD_LOG_DNS_REQ, 0}},
+ {"smtp_from", {RSPAMD_LOG_SMTP_FROM, 0}},
+ {"mime_from", {RSPAMD_LOG_MIME_FROM, 0}},
+ {"smtp_rcpt", {RSPAMD_LOG_SMTP_RCPT, 0}},
+ {"mime_rcpt", {RSPAMD_LOG_MIME_RCPT, 0}},
+ {"smtp_rcpts", {RSPAMD_LOG_SMTP_RCPTS, 0}},
+ {"mime_rcpts", {RSPAMD_LOG_MIME_RCPTS, 0}},
+ {"time_real", {RSPAMD_LOG_TIME_REAL, 0}},
+ {"time_virtual", {RSPAMD_LOG_TIME_VIRTUAL, 0}},
+ {"lua", {RSPAMD_LOG_LUA, 0}},
+ {"digest", {RSPAMD_LOG_DIGEST, 0}},
+ {"checksum", {RSPAMD_LOG_DIGEST, 0}},
+ {"filename", {RSPAMD_LOG_FILENAME, 0}},
+ {"forced_action", {RSPAMD_LOG_FORCED_ACTION, 0}},
+ {"settings_id", {RSPAMD_LOG_SETTINGS_ID, 0}},
+ {"mempool_size", {RSPAMD_LOG_MEMPOOL_SIZE, 0}},
+ {"mempool_waste", {RSPAMD_LOG_MEMPOOL_WASTE, 0}},
+ {"action", {RSPAMD_LOG_ACTION, 0}},
+ {"scores", {RSPAMD_LOG_SCORES, 0}},
+ {"symbols", {RSPAMD_LOG_SYMBOLS, 0}},
+ {"symbols_scores", {RSPAMD_LOG_SYMBOLS, RSPAMD_LOG_FMT_FLAG_SYMBOLS_SCORES}},
+ {"symbols_params", {RSPAMD_LOG_SYMBOLS, RSPAMD_LOG_FMT_FLAG_SYMBOLS_PARAMS}},
+ {"symbols_scores_params", {RSPAMD_LOG_SYMBOLS, RSPAMD_LOG_FMT_FLAG_SYMBOLS_PARAMS | RSPAMD_LOG_FMT_FLAG_SYMBOLS_SCORES}},
+ {"groups", {RSPAMD_LOG_GROUPS, 0}},
+ {"public_groups", {RSPAMD_LOG_PUBLIC_GROUPS, 0}},
+ {"is_spam", {RSPAMD_LOG_ISSPAM, 0}},
+});
+
+static gboolean
+rspamd_config_process_var(struct rspamd_config *cfg, const rspamd_ftok_t *var,
+ const rspamd_ftok_t *content)
+{
+ g_assert(var != nullptr);
+
+ auto flags = 0;
+ auto lc_var = std::string{var->begin, var->len};
+ std::transform(lc_var.begin(), lc_var.end(), lc_var.begin(), g_ascii_tolower);
+ auto tok = std::string_view{lc_var};
+
+ if (var->len > 3 && tok.starts_with("if_")) {
+ flags |= RSPAMD_LOG_FMT_FLAG_CONDITION;
+ tok = tok.substr(3);
+ }
+
+ auto maybe_fmt_var = rspamd::find_map(config_vars, tok);
+
+ if (maybe_fmt_var) {
+ auto &fmt_var = maybe_fmt_var.value().get();
+ auto *log_format = rspamd_mempool_alloc0_type(cfg->cfg_pool, rspamd_log_format);
+
+ log_format->type = fmt_var.first;
+ log_format->flags = fmt_var.second | flags;
+
+ if (log_format->type != RSPAMD_LOG_LUA) {
+ if (content && content->len > 0) {
+ log_format->data = rspamd_mempool_alloc0(cfg->cfg_pool,
+ sizeof(rspamd_ftok_t));
+ memcpy(log_format->data, content, sizeof(*content));
+ log_format->len = sizeof(*content);
+ }
+ }
+ else {
+ /* Load lua code and ensure that we have function ref returned */
+ if (!content || content->len == 0) {
+ msg_err_config("lua variable needs content: %T", &tok);
+ return FALSE;
+ }
+
+ if (luaL_loadbuffer(RSPAMD_LUA_CFG_STATE(cfg), content->begin, content->len,
+ "lua log variable") != 0) {
+ msg_err_config("error loading lua code: '%T': %s", content,
+ lua_tostring(RSPAMD_LUA_CFG_STATE(cfg), -1));
+ return FALSE;
+ }
+ if (lua_pcall(RSPAMD_LUA_CFG_STATE(cfg), 0, 1, 0) != 0) {
+ msg_err_config("error executing lua code: '%T': %s", content,
+ lua_tostring(RSPAMD_LUA_CFG_STATE(cfg), -1));
+ lua_pop(RSPAMD_LUA_CFG_STATE(cfg), 1);
+
+ return FALSE;
+ }
+
+ if (lua_type(RSPAMD_LUA_CFG_STATE(cfg), -1) != LUA_TFUNCTION) {
+ msg_err_config("lua variable should return function: %T", content);
+ lua_pop(RSPAMD_LUA_CFG_STATE(cfg), 1);
+ return FALSE;
+ }
+
+ auto id = luaL_ref(RSPAMD_LUA_CFG_STATE(cfg), LUA_REGISTRYINDEX);
+ log_format->data = GINT_TO_POINTER(id);
+ log_format->len = 0;
+ }
+
+ DL_APPEND(cfg->log_format, log_format);
+ }
+ else {
+ std::string known_formats;
+
+ for (const auto &v: config_vars) {
+ known_formats += std::string_view{v.first.data(), v.first.size()};
+ known_formats += ", ";
+ }
+
+ if (known_formats.size() > 2) {
+ // Remove last comma
+ known_formats.resize(known_formats.size() - 2);
+ }
+ msg_err_config("unknown log variable: %T, known vars are: \"%s\"", var, known_formats.c_str());
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static gboolean
+rspamd_config_parse_log_format(struct rspamd_config *cfg)
+{
+ const gchar *p, *c, *end, *s;
+ gchar *d;
+ struct rspamd_log_format *lf = nullptr;
+ rspamd_ftok_t var, var_content;
+ enum {
+ parse_str,
+ parse_dollar,
+ parse_var_name,
+ parse_var_content,
+ } state = parse_str;
+ gint braces = 0;
+
+ g_assert(cfg != nullptr);
+ c = cfg->log_format_str;
+
+ if (c == nullptr) {
+ return FALSE;
+ }
+
+ p = c;
+ end = p + strlen(p);
+
+ while (p < end) {
+ switch (state) {
+ case parse_str:
+ if (*p == '$') {
+ state = parse_dollar;
+ }
+ else {
+ p++;
+ }
+ break;
+ case parse_dollar:
+ if (p > c) {
+ /* We have string element that we need to store */
+ lf = rspamd_mempool_alloc0_type(cfg->cfg_pool, struct rspamd_log_format);
+ lf->type = RSPAMD_LOG_STRING;
+ lf->data = rspamd_mempool_alloc(cfg->cfg_pool, p - c + 1);
+ /* Filter \r\n from the destination */
+ s = c;
+ d = (char *) lf->data;
+
+ while (s < p) {
+ if (*s != '\r' && *s != '\n') {
+ *d++ = *s++;
+ }
+ else {
+ *d++ = ' ';
+ s++;
+ }
+ }
+ *d = '\0';
+
+ lf->len = d - (char *) lf->data;
+ DL_APPEND(cfg->log_format, lf);
+ lf = nullptr;
+ }
+ p++;
+ c = p;
+ state = parse_var_name;
+ break;
+ case parse_var_name:
+ if (*p == '{') {
+ var.begin = c;
+ var.len = p - c;
+ p++;
+ c = p;
+ state = parse_var_content;
+ braces = 1;
+ }
+ else if (*p != '_' && *p != '-' && !g_ascii_isalnum(*p)) {
+ /* Variable with no content */
+ var.begin = c;
+ var.len = p - c;
+ c = p;
+
+ if (!rspamd_config_process_var(cfg, &var, nullptr)) {
+ return FALSE;
+ }
+
+ state = parse_str;
+ }
+ else {
+ p++;
+ }
+ break;
+ case parse_var_content:
+ if (*p == '}' && --braces == 0) {
+ var_content.begin = c;
+ var_content.len = p - c;
+ p++;
+ c = p;
+
+ if (!rspamd_config_process_var(cfg, &var, &var_content)) {
+ return FALSE;
+ }
+
+ state = parse_str;
+ }
+ else if (*p == '{') {
+ braces++;
+ p++;
+ }
+ else {
+ p++;
+ }
+ break;
+ }
+ }
+
+ /* Last state */
+ switch (state) {
+ case parse_str:
+ if (p > c) {
+ /* We have string element that we need to store */
+ lf = rspamd_mempool_alloc0_type(cfg->cfg_pool, struct rspamd_log_format);
+ lf->type = RSPAMD_LOG_STRING;
+ lf->data = rspamd_mempool_alloc(cfg->cfg_pool, p - c + 1);
+ /* Filter \r\n from the destination */
+ s = c;
+ d = (char *) lf->data;
+
+ while (s < p) {
+ if (*s != '\r' && *s != '\n') {
+ *d++ = *s++;
+ }
+ else {
+ *d++ = ' ';
+ s++;
+ }
+ }
+ *d = '\0';
+
+ lf->len = d - (char *) lf->data;
+ DL_APPEND(cfg->log_format, lf);
+ lf = nullptr;
+ }
+ break;
+
+ case parse_var_name:
+ var.begin = c;
+ var.len = p - c;
+
+ if (!rspamd_config_process_var(cfg, &var, nullptr)) {
+ return FALSE;
+ }
+ break;
+ case parse_dollar:
+ case parse_var_content:
+ msg_err_config("cannot parse log format %s: incomplete string",
+ cfg->log_format_str);
+ return FALSE;
+ break;
+ }
+
+ return TRUE;
+}
+
+static void
+rspamd_urls_config_dtor(gpointer _unused)
+{
+ rspamd_url_deinit();
+}
+
+static void
+rspamd_adjust_clocks_resolution(struct rspamd_config *cfg)
+{
+#ifdef HAVE_CLOCK_GETTIME
+ struct timespec ts;
+#endif
+
+#ifdef HAVE_CLOCK_GETTIME
+#ifdef HAVE_CLOCK_PROCESS_CPUTIME_ID
+ clock_getres(CLOCK_PROCESS_CPUTIME_ID, &ts);
+#elif defined(HAVE_CLOCK_VIRTUAL)
+ clock_getres(CLOCK_VIRTUAL, &ts);
+#else
+ clock_getres(CLOCK_REALTIME, &ts);
+#endif
+ cfg->clock_res = log10(1000000. / ts.tv_nsec);
+ if (cfg->clock_res < 0) {
+ cfg->clock_res = 0;
+ }
+ if (cfg->clock_res > 3) {
+ cfg->clock_res = 3;
+ }
+#else
+ /* For gettimeofday */
+ cfg->clock_res = 1;
+#endif
+}
+
+/*
+ * Perform post load actions
+ */
+gboolean
+rspamd_config_post_load(struct rspamd_config *cfg,
+ enum rspamd_post_load_options opts)
+{
+
+ auto ret = TRUE;
+
+ rspamd_adjust_clocks_resolution(cfg);
+ rspamd_logger_configure_modules(cfg->debug_modules);
+
+ if (cfg->one_shot_mode) {
+ msg_info_config("enabling one shot mode (was %d max shots)",
+ cfg->default_max_shots);
+ cfg->default_max_shots = 1;
+ }
+
+#if defined(WITH_HYPERSCAN) && !defined(__aarch64__) && !defined(__powerpc64__)
+ if (!cfg->disable_hyperscan) {
+ if (!(cfg->libs_ctx->crypto_ctx->cpu_config & CPUID_SSSE3)) {
+ msg_warn_config("CPU doesn't have SSSE3 instructions set "
+ "required for hyperscan, disable it");
+ cfg->disable_hyperscan = TRUE;
+ }
+ }
+#endif
+
+ rspamd_regexp_library_init(cfg);
+ rspamd_multipattern_library_init(cfg->hs_cache_dir);
+
+ if (opts & RSPAMD_CONFIG_INIT_URL) {
+ if (cfg->tld_file == nullptr) {
+ /* Try to guess tld file */
+ auto fpath = fmt::format("{0}{1}{2}", RSPAMD_SHAREDIR,
+ G_DIR_SEPARATOR, "effective_tld_names.dat");
+
+ if (access(fpath.c_str(), R_OK) != -1) {
+ msg_debug_config("url_tld option is not specified but %s is available,"
+ " therefore this file is assumed as TLD file for URL"
+ " extraction",
+ fpath.c_str());
+ cfg->tld_file = rspamd_mempool_strdup(cfg->cfg_pool, fpath.c_str());
+ }
+ else {
+ if (opts & RSPAMD_CONFIG_INIT_VALIDATE) {
+ msg_err_config("no url_tld option has been specified");
+ ret = FALSE;
+ }
+ }
+ }
+ else {
+ if (access(cfg->tld_file, R_OK) == -1) {
+ if (opts & RSPAMD_CONFIG_INIT_VALIDATE) {
+ ret = FALSE;
+ msg_err_config("cannot access tld file %s: %s", cfg->tld_file,
+ strerror(errno));
+ }
+ else {
+ msg_debug_config("cannot access tld file %s: %s", cfg->tld_file,
+ strerror(errno));
+ cfg->tld_file = nullptr;
+ }
+ }
+ }
+
+ if (opts & RSPAMD_CONFIG_INIT_NO_TLD) {
+ rspamd_url_init(nullptr);
+ }
+ else {
+ rspamd_url_init(cfg->tld_file);
+ }
+
+ rspamd_mempool_add_destructor(cfg->cfg_pool, rspamd_urls_config_dtor,
+ nullptr);
+ }
+
+ init_dynamic_config(cfg);
+ /* Insert classifiers symbols */
+ rspamd_config_insert_classify_symbols(cfg);
+
+ /* Parse format string that we have */
+ if (!rspamd_config_parse_log_format(cfg)) {
+ msg_err_config("cannot parse log format, task logging will not be available");
+ if (opts & RSPAMD_CONFIG_INIT_VALIDATE) {
+ ret = FALSE;
+ }
+ }
+
+ if (opts & RSPAMD_CONFIG_INIT_SYMCACHE) {
+ /* Init config cache */
+ ret = rspamd_symcache_init(cfg->cache) && ret;
+
+ /* Init re cache */
+ rspamd_re_cache_init(cfg->re_cache, cfg);
+
+ /* Try load Hypersan */
+ auto hs_ret = rspamd_re_cache_load_hyperscan(cfg->re_cache,
+ cfg->hs_cache_dir ? cfg->hs_cache_dir : RSPAMD_DBDIR "/",
+ true);
+
+ if (hs_ret == RSPAMD_HYPERSCAN_LOAD_ERROR) {
+ msg_debug_config("cannot load hyperscan database, disable it");
+ }
+ }
+
+ if (opts & RSPAMD_CONFIG_INIT_LIBS) {
+ /* Config other libraries */
+ ret = rspamd_config_libs(cfg->libs_ctx, cfg) && ret;
+
+ if (!ret) {
+ msg_err_config("cannot configure libraries, fatal error");
+ return FALSE;
+ }
+ }
+
+ /* Validate cache */
+ if (opts & RSPAMD_CONFIG_INIT_VALIDATE) {
+ /* Check for actions sanity */
+ auto seen_controller = FALSE;
+
+ auto *cur = cfg->workers;
+ while (cur) {
+ auto *wcf = (struct rspamd_worker_conf *) cur->data;
+
+ if (wcf->type == g_quark_from_static_string("controller")) {
+ seen_controller = TRUE;
+ break;
+ }
+
+ cur = g_list_next(cur);
+ }
+
+ if (!seen_controller) {
+ msg_warn_config("controller worker is unconfigured: learning,"
+ " periodic scripts, maps watching and many other"
+ " Rspamd features will be broken");
+ }
+
+ ret = rspamd_symcache_validate(cfg->cache, cfg, FALSE) && ret;
+ }
+
+ if (opts & RSPAMD_CONFIG_INIT_POST_LOAD_LUA) {
+ rspamd_lua_run_config_post_init(RSPAMD_LUA_CFG_STATE(cfg), cfg);
+ }
+
+ if (opts & RSPAMD_CONFIG_INIT_PRELOAD_MAPS) {
+ rspamd_map_preload(cfg);
+ }
+
+ return ret;
+}
+
+struct rspamd_classifier_config *
+rspamd_config_new_classifier(struct rspamd_config *cfg,
+ struct rspamd_classifier_config *c)
+{
+ if (c == nullptr) {
+ c =
+ rspamd_mempool_alloc0_type(cfg->cfg_pool,
+ struct rspamd_classifier_config);
+ c->min_prob_strength = 0.05;
+ c->min_token_hits = 2;
+ }
+
+ if (c->labels == nullptr) {
+ c->labels = g_hash_table_new_full(rspamd_str_hash,
+ rspamd_str_equal,
+ nullptr,
+ (GDestroyNotify) g_list_free);
+ rspamd_mempool_add_destructor(cfg->cfg_pool,
+ (rspamd_mempool_destruct_t) g_hash_table_destroy,
+ c->labels);
+ }
+
+ return c;
+}
+
+struct rspamd_statfile_config *
+rspamd_config_new_statfile(struct rspamd_config *cfg,
+ struct rspamd_statfile_config *c)
+{
+ if (c == nullptr) {
+ c =
+ rspamd_mempool_alloc0_type(cfg->cfg_pool, struct rspamd_statfile_config);
+ }
+
+ return c;
+}
+
+void rspamd_config_init_metric(struct rspamd_config *cfg)
+{
+ cfg->grow_factor = 1.0;
+ cfg->symbols = g_hash_table_new(rspamd_str_hash, rspamd_str_equal);
+ cfg->groups = g_hash_table_new(rspamd_strcase_hash, rspamd_strcase_equal);
+
+ cfg->subject = SPAM_SUBJECT;
+ rspamd_mempool_add_destructor(cfg->cfg_pool,
+ (rspamd_mempool_destruct_t) g_hash_table_unref,
+ cfg->symbols);
+ rspamd_mempool_add_destructor(cfg->cfg_pool,
+ (rspamd_mempool_destruct_t) g_hash_table_unref,
+ cfg->groups);
+}
+
+struct rspamd_symbols_group *
+rspamd_config_new_group(struct rspamd_config *cfg, const gchar *name)
+{
+ struct rspamd_symbols_group *gr;
+
+ gr = rspamd_mempool_alloc0_type(cfg->cfg_pool, struct rspamd_symbols_group);
+ gr->symbols = g_hash_table_new(rspamd_strcase_hash,
+ rspamd_strcase_equal);
+ rspamd_mempool_add_destructor(cfg->cfg_pool,
+ (rspamd_mempool_destruct_t) g_hash_table_unref, gr->symbols);
+ gr->name = rspamd_mempool_strdup(cfg->cfg_pool, name);
+
+ if (strcmp(gr->name, "ungrouped") == 0) {
+ gr->flags |= RSPAMD_SYMBOL_GROUP_UNGROUPED;
+ }
+
+ g_hash_table_insert(cfg->groups, gr->name, gr);
+
+ return gr;
+}
+
+static void
+rspamd_worker_conf_dtor(struct rspamd_worker_conf *wcf)
+{
+ if (wcf) {
+ ucl_object_unref(wcf->options);
+ g_queue_free(wcf->active_workers);
+ g_hash_table_unref(wcf->params);
+ g_free(wcf);
+ }
+}
+
+static void
+rspamd_worker_conf_cfg_fin(gpointer d)
+{
+ auto *wcf = (struct rspamd_worker_conf *) d;
+
+ REF_RELEASE(wcf);
+}
+
+struct rspamd_worker_conf *
+rspamd_config_new_worker(struct rspamd_config *cfg,
+ struct rspamd_worker_conf *c)
+{
+ if (c == nullptr) {
+ c = g_new0(struct rspamd_worker_conf, 1);
+ c->params = g_hash_table_new(rspamd_str_hash, rspamd_str_equal);
+ c->active_workers = g_queue_new();
+#ifdef HAVE_SC_NPROCESSORS_ONLN
+ auto nproc = sysconf(_SC_NPROCESSORS_ONLN);
+ c->count = MIN(DEFAULT_MAX_WORKERS, MAX(1, nproc - 2));
+#else
+ c->count = DEFAULT_MAX_WORKERS;
+#endif
+ c->rlimit_nofile = 0;
+ c->rlimit_maxcore = 0;
+ c->enabled = TRUE;
+
+ REF_INIT_RETAIN(c, rspamd_worker_conf_dtor);
+ rspamd_mempool_add_destructor(cfg->cfg_pool,
+ rspamd_worker_conf_cfg_fin, c);
+ }
+
+ return c;
+}
+
+
+static bool
+rspamd_include_map_handler(const guchar *data, gsize len,
+ const ucl_object_t *args, void *ud)
+{
+ auto *cfg = (struct rspamd_config *) ud;
+
+ auto ftok = rspamd_ftok_t{.len = len + 1, .begin = (char *) data};
+ auto *map_line = rspamd_mempool_ftokdup(cfg->cfg_pool, &ftok);
+
+ auto *cbdata = new rspamd_ucl_map_cbdata{cfg};
+ auto **pcbdata = new rspamd_ucl_map_cbdata *(cbdata);
+
+ return rspamd_map_add(cfg,
+ map_line,
+ "ucl include",
+ rspamd_ucl_read_cb,
+ rspamd_ucl_fin_cb,
+ rspamd_ucl_dtor_cb,
+ (void **) pcbdata,
+ nullptr, RSPAMD_MAP_DEFAULT) != nullptr;
+}
+
+/*
+ * Variables:
+ * $CONFDIR - configuration directory
+ * $LOCAL_CONFDIR - local configuration directory
+ * $RUNDIR - local states directory
+ * $DBDIR - databases dir
+ * $LOGDIR - logs dir
+ * $PLUGINSDIR - plugins dir
+ * $PREFIX - installation prefix
+ * $VERSION - rspamd version
+ */
+
+#define RSPAMD_CONFDIR_MACRO "CONFDIR"
+#define RSPAMD_LOCAL_CONFDIR_MACRO "LOCAL_CONFDIR"
+#define RSPAMD_RUNDIR_MACRO "RUNDIR"
+#define RSPAMD_DBDIR_MACRO "DBDIR"
+#define RSPAMD_LOGDIR_MACRO "LOGDIR"
+#define RSPAMD_PLUGINSDIR_MACRO "PLUGINSDIR"
+#define RSPAMD_SHAREDIR_MACRO "SHAREDIR"
+#define RSPAMD_RULESDIR_MACRO "RULESDIR"
+#define RSPAMD_WWWDIR_MACRO "WWWDIR"
+#define RSPAMD_PREFIX_MACRO "PREFIX"
+#define RSPAMD_VERSION_MACRO "VERSION"
+#define RSPAMD_VERSION_MAJOR_MACRO "VERSION_MAJOR"
+#define RSPAMD_VERSION_MINOR_MACRO "VERSION_MINOR"
+#define RSPAMD_BRANCH_VERSION_MACRO "BRANCH_VERSION"
+#define RSPAMD_HOSTNAME_MACRO "HOSTNAME"
+
+void rspamd_ucl_add_conf_variables(struct ucl_parser *parser, GHashTable *vars)
+{
+ GHashTableIter it;
+ gpointer k, v;
+
+ ucl_parser_register_variable(parser,
+ RSPAMD_CONFDIR_MACRO,
+ RSPAMD_CONFDIR);
+ ucl_parser_register_variable(parser,
+ RSPAMD_LOCAL_CONFDIR_MACRO,
+ RSPAMD_LOCAL_CONFDIR);
+ ucl_parser_register_variable(parser, RSPAMD_RUNDIR_MACRO,
+ RSPAMD_RUNDIR);
+ ucl_parser_register_variable(parser, RSPAMD_DBDIR_MACRO,
+ RSPAMD_DBDIR);
+ ucl_parser_register_variable(parser, RSPAMD_LOGDIR_MACRO,
+ RSPAMD_LOGDIR);
+ ucl_parser_register_variable(parser,
+ RSPAMD_PLUGINSDIR_MACRO,
+ RSPAMD_PLUGINSDIR);
+ ucl_parser_register_variable(parser,
+ RSPAMD_SHAREDIR_MACRO,
+ RSPAMD_SHAREDIR);
+ ucl_parser_register_variable(parser,
+ RSPAMD_RULESDIR_MACRO,
+ RSPAMD_RULESDIR);
+ ucl_parser_register_variable(parser, RSPAMD_WWWDIR_MACRO,
+ RSPAMD_WWWDIR);
+ ucl_parser_register_variable(parser, RSPAMD_PREFIX_MACRO,
+ RSPAMD_PREFIX);
+ ucl_parser_register_variable(parser, RSPAMD_VERSION_MACRO, RVERSION);
+ ucl_parser_register_variable(parser, RSPAMD_VERSION_MAJOR_MACRO,
+ RSPAMD_VERSION_MAJOR);
+ ucl_parser_register_variable(parser, RSPAMD_VERSION_MINOR_MACRO,
+ RSPAMD_VERSION_MINOR);
+ ucl_parser_register_variable(parser, RSPAMD_BRANCH_VERSION_MACRO,
+ RSPAMD_VERSION_BRANCH);
+
+ auto hostlen = sysconf(_SC_HOST_NAME_MAX);
+
+ if (hostlen <= 0) {
+ hostlen = 256;
+ }
+ else {
+ hostlen++;
+ }
+
+ auto hostbuf = std::string{};
+ hostbuf.resize(hostlen);
+
+ if (gethostname(hostbuf.data(), hostlen) != 0) {
+ hostbuf = "unknown";
+ }
+
+ /* UCL copies variables, so it is safe to pass an ephemeral buffer here */
+ ucl_parser_register_variable(parser, RSPAMD_HOSTNAME_MACRO,
+ hostbuf.c_str());
+
+ if (vars != nullptr) {
+ g_hash_table_iter_init(&it, vars);
+
+ while (g_hash_table_iter_next(&it, &k, &v)) {
+ ucl_parser_register_variable(parser, (const char *) k, (const char *) v);
+ }
+ }
+}
+
+void rspamd_ucl_add_conf_macros(struct ucl_parser *parser,
+ struct rspamd_config *cfg)
+{
+ ucl_parser_register_macro(parser,
+ "include_map",
+ rspamd_include_map_handler,
+ cfg);
+}
+
+static void
+symbols_classifiers_callback(gpointer key, gpointer value, gpointer ud)
+{
+ auto *cfg = (struct rspamd_config *) ud;
+
+ /* Actually, statistics should act like any ordinary symbol */
+ rspamd_symcache_add_symbol(cfg->cache, (const char *) key, 0, nullptr, nullptr,
+ SYMBOL_TYPE_CLASSIFIER | SYMBOL_TYPE_NOSTAT, -1);
+}
+
+void rspamd_config_insert_classify_symbols(struct rspamd_config *cfg)
+{
+ g_hash_table_foreach(cfg->classifiers_symbols,
+ symbols_classifiers_callback,
+ cfg);
+}
+
+struct rspamd_classifier_config *
+rspamd_config_find_classifier(struct rspamd_config *cfg, const gchar *name)
+{
+ if (name == nullptr) {
+ return nullptr;
+ }
+
+ auto *cur = cfg->classifiers;
+ while (cur) {
+ auto *cf = (struct rspamd_classifier_config *) cur->data;
+
+ if (g_ascii_strcasecmp(cf->name, name) == 0) {
+ return cf;
+ }
+
+ cur = g_list_next(cur);
+ }
+
+ return nullptr;
+}
+
+gboolean
+rspamd_config_check_statfiles(struct rspamd_classifier_config *cf)
+{
+ gboolean has_other = FALSE, res = FALSE, cur_class = FALSE;
+
+ /* First check classes directly */
+ auto *cur = cf->statfiles;
+ while (cur) {
+ auto *st = (struct rspamd_statfile_config *) cur->data;
+ if (!has_other) {
+ cur_class = st->is_spam;
+ has_other = TRUE;
+ }
+ else {
+ if (cur_class != st->is_spam) {
+ return TRUE;
+ }
+ }
+
+ cur = g_list_next(cur);
+ }
+
+ if (!has_other) {
+ /* We have only one statfile */
+ return FALSE;
+ }
+ /* We have not detected any statfile that has different class, so turn on heuristic based on symbol's name */
+ has_other = FALSE;
+ cur = cf->statfiles;
+ while (cur) {
+ auto *st = (struct rspamd_statfile_config *) cur->data;
+ if (rspamd_substring_search_caseless(st->symbol,
+ strlen(st->symbol), "spam", 4) != -1) {
+ st->is_spam = TRUE;
+ }
+ else if (rspamd_substring_search_caseless(st->symbol,
+ strlen(st->symbol), "ham", 3) != -1) {
+ st->is_spam = FALSE;
+ }
+
+ if (!has_other) {
+ cur_class = st->is_spam;
+ has_other = TRUE;
+ }
+ else {
+ if (cur_class != st->is_spam) {
+ res = TRUE;
+ }
+ }
+
+ cur = g_list_next(cur);
+ }
+
+ return res;
+}
+
+static gchar *
+rspamd_ucl_read_cb(gchar *chunk,
+ gint len,
+ struct map_cb_data *data,
+ gboolean final)
+{
+ auto *cbdata = (struct rspamd_ucl_map_cbdata *) data->cur_data;
+ auto *prev = (struct rspamd_ucl_map_cbdata *) data->prev_data;
+
+ if (cbdata == nullptr) {
+ cbdata = new rspamd_ucl_map_cbdata{prev->cfg};
+ data->cur_data = cbdata;
+ }
+ cbdata->buf.append(chunk, len);
+
+ /* Say not to copy any part of this buffer */
+ return nullptr;
+}
+
+static void
+rspamd_ucl_fin_cb(struct map_cb_data *data, void **target)
+{
+ auto *cbdata = (struct rspamd_ucl_map_cbdata *) data->cur_data;
+ auto *prev = (struct rspamd_ucl_map_cbdata *) data->prev_data;
+ auto *cfg = data->map->cfg;
+
+ if (cbdata == nullptr) {
+ msg_err_config("map fin error: new data is nullptr");
+ return;
+ }
+
+ /* New data available */
+ auto *parser = ucl_parser_new(0);
+ if (!ucl_parser_add_chunk(parser, (unsigned char *) cbdata->buf.data(),
+ cbdata->buf.size())) {
+ msg_err_config("cannot parse map %s: %s",
+ data->map->name,
+ ucl_parser_get_error(parser));
+ ucl_parser_free(parser);
+ }
+ else {
+ auto *obj = ucl_parser_get_object(parser);
+ ucl_object_iter_t it = nullptr;
+
+ for (auto *cur = ucl_object_iterate(obj, &it, true); cur != nullptr; cur = ucl_object_iterate(obj, &it, true)) {
+ ucl_object_replace_key(cbdata->cfg->cfg_ucl_obj, (ucl_object_t *) cur,
+ cur->key, cur->keylen, false);
+ }
+
+ ucl_parser_free(parser);
+ ucl_object_unref(obj);
+ }
+
+ if (target) {
+ *target = data->cur_data;
+ }
+
+ delete prev;
+}
+
+static void
+rspamd_ucl_dtor_cb(struct map_cb_data *data)
+{
+ auto *cbdata = (struct rspamd_ucl_map_cbdata *) data->cur_data;
+
+ delete cbdata;
+}
+
+gboolean
+rspamd_check_module(struct rspamd_config *cfg, module_t *mod)
+{
+ gboolean ret = TRUE;
+
+ if (mod != nullptr) {
+ if (mod->module_version != RSPAMD_CUR_MODULE_VERSION) {
+ msg_err_config("module %s has incorrect version %xd (%xd expected)",
+ mod->name, (gint) mod->module_version, RSPAMD_CUR_MODULE_VERSION);
+ ret = FALSE;
+ }
+ if (ret && mod->rspamd_version != RSPAMD_VERSION_NUM) {
+ msg_err_config("module %s has incorrect rspamd version %xL (%xL expected)",
+ mod->name, mod->rspamd_version, RSPAMD_VERSION_NUM);
+ ret = FALSE;
+ }
+ if (ret && strcmp(mod->rspamd_features, RSPAMD_FEATURES) != 0) {
+ msg_err_config("module %s has incorrect rspamd features '%s' ('%s' expected)",
+ mod->name, mod->rspamd_features, RSPAMD_FEATURES);
+ ret = FALSE;
+ }
+ }
+ else {
+ ret = FALSE;
+ }
+
+ return ret;
+}
+
+gboolean
+rspamd_check_worker(struct rspamd_config *cfg, worker_t *wrk)
+{
+ gboolean ret = TRUE;
+
+ if (wrk != nullptr) {
+ if (wrk->worker_version != RSPAMD_CUR_WORKER_VERSION) {
+ msg_err_config("worker %s has incorrect version %xd (%xd expected)",
+ wrk->name, wrk->worker_version, RSPAMD_CUR_WORKER_VERSION);
+ ret = FALSE;
+ }
+ if (ret && wrk->rspamd_version != RSPAMD_VERSION_NUM) {
+ msg_err_config("worker %s has incorrect rspamd version %xL (%xL expected)",
+ wrk->name, wrk->rspamd_version, RSPAMD_VERSION_NUM);
+ ret = FALSE;
+ }
+ if (ret && strcmp(wrk->rspamd_features, RSPAMD_FEATURES) != 0) {
+ msg_err_config("worker %s has incorrect rspamd features '%s' ('%s' expected)",
+ wrk->name, wrk->rspamd_features, RSPAMD_FEATURES);
+ ret = FALSE;
+ }
+ }
+ else {
+ ret = FALSE;
+ }
+
+ return ret;
+}
+
+gboolean
+rspamd_init_filters(struct rspamd_config *cfg, bool reconfig, bool strict)
+{
+ GList *cur;
+ module_t *mod, **pmod;
+ guint i = 0;
+ struct module_ctx *mod_ctx, *cur_ctx;
+ gboolean ret = TRUE;
+
+ /* Init all compiled modules */
+
+ for (pmod = cfg->compiled_modules; pmod != nullptr && *pmod != nullptr; pmod++) {
+ mod = *pmod;
+ if (rspamd_check_module(cfg, mod)) {
+ if (mod->module_init_func(cfg, &mod_ctx) == 0) {
+ g_assert(mod_ctx != nullptr);
+ g_ptr_array_add(cfg->c_modules, mod_ctx);
+ mod_ctx->mod = mod;
+ mod->ctx_offset = i++;
+ }
+ }
+ }
+
+ /* Now check what's enabled */
+ cur = g_list_first(cfg->filters);
+
+ while (cur) {
+ /* Perform modules configuring */
+ mod_ctx = nullptr;
+ PTR_ARRAY_FOREACH(cfg->c_modules, i, cur_ctx)
+ {
+ if (g_ascii_strcasecmp(cur_ctx->mod->name,
+ (const gchar *) cur->data) == 0) {
+ mod_ctx = cur_ctx;
+ break;
+ }
+ }
+
+ if (mod_ctx) {
+ mod = mod_ctx->mod;
+ mod_ctx->enabled = rspamd_config_is_module_enabled(cfg, mod->name);
+
+ if (reconfig) {
+ if (!mod->module_reconfig_func(cfg)) {
+ msg_err_config("reconfig of %s failed!", mod->name);
+ }
+ else {
+ msg_info_config("reconfig of %s", mod->name);
+ }
+ }
+ else {
+ if (!mod->module_config_func(cfg, strict)) {
+ msg_err_config("config of %s failed", mod->name);
+ ret = FALSE;
+
+ if (strict) {
+ return FALSE;
+ }
+ }
+ }
+ }
+
+ if (mod_ctx == nullptr) {
+ msg_warn_config("requested unknown module %s", cur->data);
+ }
+
+ cur = g_list_next(cur);
+ }
+
+ ret = rspamd_init_lua_filters(cfg, 0, strict) && ret;
+
+ return ret;
+}
+
+static void
+rspamd_config_new_symbol(struct rspamd_config *cfg, const gchar *symbol,
+ gdouble score, const gchar *description, const gchar *group,
+ guint flags, guint priority, gint nshots)
+{
+ struct rspamd_symbols_group *sym_group;
+ struct rspamd_symbol *sym_def;
+ double *score_ptr;
+
+ sym_def =
+ rspamd_mempool_alloc0_type(cfg->cfg_pool, struct rspamd_symbol);
+ score_ptr = rspamd_mempool_alloc_type(cfg->cfg_pool, double);
+
+ if (isnan(score)) {
+ /* In fact, it could be defined later */
+ msg_debug_config("score is not defined for symbol %s, set it to zero",
+ symbol);
+ score = 0.0;
+ /* Also set priority to 0 to allow override by anything */
+ sym_def->priority = 0;
+ flags |= RSPAMD_SYMBOL_FLAG_UNSCORED;
+ }
+ else {
+ sym_def->priority = priority;
+ }
+
+ *score_ptr = score;
+ sym_def->score = score;
+ sym_def->weight_ptr = score_ptr;
+ sym_def->name = rspamd_mempool_strdup(cfg->cfg_pool, symbol);
+ sym_def->flags = flags;
+ sym_def->nshots = nshots != 0 ? nshots : cfg->default_max_shots;
+ sym_def->groups = g_ptr_array_sized_new(1);
+ rspamd_mempool_add_destructor(cfg->cfg_pool, rspamd_ptr_array_free_hard,
+ sym_def->groups);
+
+ if (description) {
+ sym_def->description = rspamd_mempool_strdup(cfg->cfg_pool, description);
+ }
+
+ msg_debug_config("registered symbol %s with weight %.2f in and group %s",
+ sym_def->name, score, group);
+
+ g_hash_table_insert(cfg->symbols, sym_def->name, sym_def);
+
+ /* Search for symbol group */
+ if (group == nullptr) {
+ group = "ungrouped";
+ sym_def->flags |= RSPAMD_SYMBOL_FLAG_UNGROUPED;
+ }
+ else {
+ if (strcmp(group, "ungrouped") == 0) {
+ sym_def->flags |= RSPAMD_SYMBOL_FLAG_UNGROUPED;
+ }
+ }
+
+ sym_group = reinterpret_cast<rspamd_symbols_group *>(g_hash_table_lookup(cfg->groups, group));
+ if (sym_group == nullptr) {
+ /* Create new group */
+ sym_group = rspamd_config_new_group(cfg, group);
+ }
+
+ sym_def->gr = sym_group;
+ g_hash_table_insert(sym_group->symbols, sym_def->name, sym_def);
+
+ if (!(sym_def->flags & RSPAMD_SYMBOL_FLAG_UNGROUPED)) {
+ g_ptr_array_add(sym_def->groups, sym_group);
+ }
+}
+
+
+gboolean
+rspamd_config_add_symbol(struct rspamd_config *cfg,
+ const gchar *symbol,
+ gdouble score,
+ const gchar *description,
+ const gchar *group,
+ guint flags,
+ guint priority,
+ gint nshots)
+{
+ struct rspamd_symbol *sym_def;
+ struct rspamd_symbols_group *sym_group;
+ guint i;
+
+ g_assert(cfg != nullptr);
+ g_assert(symbol != nullptr);
+
+ sym_def = reinterpret_cast<rspamd_symbol *>(g_hash_table_lookup(cfg->symbols, symbol));
+
+ if (sym_def != nullptr) {
+ if (group != nullptr) {
+ gboolean has_group = FALSE;
+
+ PTR_ARRAY_FOREACH(sym_def->groups, i, sym_group)
+ {
+ if (g_ascii_strcasecmp(sym_group->name, group) == 0) {
+ /* Group is already here */
+ has_group = TRUE;
+ break;
+ }
+ }
+
+ if (!has_group) {
+ /* Non-empty group has a priority over non-grouped one */
+ sym_group = reinterpret_cast<rspamd_symbols_group *>(g_hash_table_lookup(cfg->groups, group));
+
+ if (sym_group == nullptr) {
+ /* Create new group */
+ sym_group = rspamd_config_new_group(cfg, group);
+ }
+
+ if ((!sym_def->gr) || (sym_def->flags & RSPAMD_SYMBOL_FLAG_UNGROUPED)) {
+ sym_def->gr = sym_group;
+ sym_def->flags &= ~RSPAMD_SYMBOL_FLAG_UNGROUPED;
+ }
+
+ g_hash_table_insert(sym_group->symbols, sym_def->name, sym_def);
+ sym_def->flags &= ~(RSPAMD_SYMBOL_FLAG_UNGROUPED);
+ g_ptr_array_add(sym_def->groups, sym_group);
+ }
+ }
+
+ if (sym_def->priority > priority &&
+ (isnan(score) || !(sym_def->flags & RSPAMD_SYMBOL_FLAG_UNSCORED))) {
+ msg_debug_config("symbol %s has been already registered with "
+ "priority %ud, do not override (new priority: %ud)",
+ symbol,
+ sym_def->priority,
+ priority);
+ /* But we can still add description */
+ if (!sym_def->description && description) {
+ sym_def->description = rspamd_mempool_strdup(cfg->cfg_pool,
+ description);
+ }
+
+ /* Or nshots in case of non-default setting */
+ if (nshots != 0 && sym_def->nshots == cfg->default_max_shots) {
+ sym_def->nshots = nshots;
+ }
+
+ return FALSE;
+ }
+ else {
+
+ if (!isnan(score)) {
+ msg_debug_config("symbol %s has been already registered with "
+ "priority %ud, override it with new priority: %ud, "
+ "old score: %.2f, new score: %.2f",
+ symbol,
+ sym_def->priority,
+ priority,
+ sym_def->score,
+ score);
+
+ *sym_def->weight_ptr = score;
+ sym_def->score = score;
+ sym_def->priority = priority;
+ sym_def->flags &= ~RSPAMD_SYMBOL_FLAG_UNSCORED;
+ }
+
+ sym_def->flags = flags;
+
+ if (nshots != 0) {
+ sym_def->nshots = nshots;
+ }
+ else {
+ /* Do not reset unless we have exactly lower priority */
+ if (sym_def->priority < priority) {
+ sym_def->nshots = cfg->default_max_shots;
+ }
+ }
+
+ if (description) {
+ sym_def->description = rspamd_mempool_strdup(cfg->cfg_pool,
+ description);
+ }
+
+
+ /* We also check group information in this case */
+ if (group != nullptr && sym_def->gr != nullptr &&
+ strcmp(group, sym_def->gr->name) != 0) {
+
+ sym_group = reinterpret_cast<rspamd_symbols_group *>(g_hash_table_lookup(cfg->groups, group));
+
+ if (sym_group == nullptr) {
+ /* Create new group */
+ sym_group = rspamd_config_new_group(cfg, group);
+ }
+
+ if (!(sym_group->flags & RSPAMD_SYMBOL_GROUP_UNGROUPED)) {
+ msg_debug_config("move symbol %s from group %s to %s",
+ sym_def->name, sym_def->gr->name, group);
+ g_hash_table_remove(sym_def->gr->symbols, sym_def->name);
+ sym_def->gr = sym_group;
+ g_hash_table_insert(sym_group->symbols, sym_def->name, sym_def);
+ }
+ }
+
+ return TRUE;
+ }
+ }
+
+ /* This is called merely when we have an undefined symbol */
+ rspamd_config_new_symbol(cfg, symbol, score, description,
+ group, flags, priority, nshots);
+
+ return TRUE;
+}
+
+gboolean
+rspamd_config_add_symbol_group(struct rspamd_config *cfg,
+ const gchar *symbol,
+ const gchar *group)
+{
+ struct rspamd_symbol *sym_def;
+ struct rspamd_symbols_group *sym_group;
+ guint i;
+
+ g_assert(cfg != nullptr);
+ g_assert(symbol != nullptr);
+ g_assert(group != nullptr);
+
+ sym_def = reinterpret_cast<rspamd_symbol *>(g_hash_table_lookup(cfg->symbols, symbol));
+
+ if (sym_def != nullptr) {
+ gboolean has_group = FALSE;
+
+ PTR_ARRAY_FOREACH(sym_def->groups, i, sym_group)
+ {
+ if (g_ascii_strcasecmp(sym_group->name, group) == 0) {
+ /* Group is already here */
+ has_group = TRUE;
+ break;
+ }
+ }
+
+ if (!has_group) {
+ /* Non-empty group has a priority over non-grouped one */
+ sym_group = reinterpret_cast<rspamd_symbols_group *>(g_hash_table_lookup(cfg->groups, group));
+
+ if (sym_group == nullptr) {
+ /* Create new group */
+ sym_group = rspamd_config_new_group(cfg, group);
+ }
+
+ if (!sym_def->gr) {
+ sym_def->gr = sym_group;
+ }
+
+ g_hash_table_insert(sym_group->symbols, sym_def->name, sym_def);
+ sym_def->flags &= ~(RSPAMD_SYMBOL_FLAG_UNGROUPED);
+ g_ptr_array_add(sym_def->groups, sym_group);
+
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+gboolean
+rspamd_config_is_enabled_from_ucl(rspamd_mempool_t *pool,
+ const ucl_object_t *obj)
+{
+
+ const ucl_object_t *enabled;
+
+ enabled = ucl_object_lookup(obj, "enabled");
+
+ if (enabled) {
+ if (ucl_object_type(enabled) == UCL_BOOLEAN) {
+ return ucl_object_toboolean(enabled);
+ }
+ else if (ucl_object_type(enabled) == UCL_STRING) {
+ gint ret = rspamd_config_parse_flag(ucl_object_tostring(enabled), 0);
+
+ if (ret == 0) {
+ return FALSE;
+ }
+ else if (ret == -1) {
+
+ msg_info_pool_check("wrong value for the `enabled` key");
+ return FALSE;
+ }
+ /* Default return is TRUE here */
+ }
+ }
+
+
+ const ucl_object_t *disabled;
+
+ disabled = ucl_object_lookup(obj, "disabled");
+
+ if (disabled) {
+ if (ucl_object_type(disabled) == UCL_BOOLEAN) {
+ return !ucl_object_toboolean(disabled);
+ }
+ else if (ucl_object_type(disabled) == UCL_STRING) {
+ gint ret = rspamd_config_parse_flag(ucl_object_tostring(disabled), 0);
+
+ if (ret == 0) {
+ return TRUE;
+ }
+ else if (ret == -1) {
+
+ msg_info_pool_check("wrong value for the `disabled` key");
+ return FALSE;
+ }
+
+ return FALSE;
+ }
+ }
+
+ return TRUE;
+}
+
+gboolean
+rspamd_config_is_module_enabled(struct rspamd_config *cfg,
+ const gchar *module_name)
+{
+ gboolean is_c = FALSE, enabled;
+ const ucl_object_t *conf;
+ GList *cur;
+ struct rspamd_symbols_group *gr;
+ lua_State *L = RSPAMD_LUA_CFG_STATE(cfg);
+ struct module_ctx *cur_ctx;
+ guint i;
+
+ PTR_ARRAY_FOREACH(cfg->c_modules, i, cur_ctx)
+ {
+ if (g_ascii_strcasecmp(cur_ctx->mod->name, module_name) == 0) {
+ is_c = TRUE;
+ break;
+ }
+ }
+
+ if (g_hash_table_lookup(cfg->explicit_modules, module_name) != nullptr) {
+ /* Always load module */
+ rspamd_plugins_table_push_elt(L, "enabled", module_name);
+
+ return TRUE;
+ }
+
+ if (is_c) {
+ gboolean found = FALSE;
+
+ cur = g_list_first(cfg->filters);
+
+ while (cur) {
+ if (strcmp((char *) cur->data, module_name) == 0) {
+ found = TRUE;
+ break;
+ }
+
+ cur = g_list_next(cur);
+ }
+
+ if (!found) {
+ msg_info_config("internal module %s is disable in `filters` line",
+ module_name);
+ rspamd_plugins_table_push_elt(L,
+ "disabled_explicitly", module_name);
+
+ return FALSE;
+ }
+ }
+
+ conf = ucl_object_lookup(cfg->cfg_ucl_obj, module_name);
+
+ if (conf == nullptr) {
+ rspamd_plugins_table_push_elt(L, "disabled_unconfigured", module_name);
+
+ msg_info_config("%s module %s is enabled but has not been configured",
+ is_c ? "internal" : "lua", module_name);
+
+ if (!is_c) {
+ msg_info_config("%s disabling unconfigured lua module", module_name);
+ return FALSE;
+ }
+ }
+ else {
+ enabled = rspamd_config_is_enabled_from_ucl(cfg->cfg_pool, conf);
+
+ if (!enabled) {
+ rspamd_plugins_table_push_elt(L,
+ "disabled_explicitly", module_name);
+
+ msg_info_config(
+ "%s module %s is disabled in the configuration",
+ is_c ? "internal" : "lua", module_name);
+ return FALSE;
+ }
+ }
+
+ /* Now we check symbols group */
+ gr = reinterpret_cast<rspamd_symbols_group *>(g_hash_table_lookup(cfg->groups, module_name));
+
+ if (gr) {
+ if (gr->flags & RSPAMD_SYMBOL_GROUP_DISABLED) {
+ rspamd_plugins_table_push_elt(L,
+ "disabled_explicitly", module_name);
+ msg_info_config("%s module %s is disabled in the configuration as "
+ "its group has been disabled",
+ is_c ? "internal" : "lua", module_name);
+
+ return FALSE;
+ }
+ }
+
+ rspamd_plugins_table_push_elt(L, "enabled", module_name);
+
+ return TRUE;
+}
+
+static gboolean
+rspamd_config_action_from_ucl(struct rspamd_config *cfg,
+ struct rspamd_action *act,
+ const ucl_object_t *obj,
+ guint priority)
+{
+ auto threshold = NAN;
+ int flags = 0;
+
+ auto obj_type = ucl_object_type(obj);
+
+ if (obj_type == UCL_OBJECT) {
+ obj_type = ucl_object_type(obj);
+
+ const auto *elt = ucl_object_lookup_any(obj, "score", "threshold", nullptr);
+
+ if (elt) {
+ threshold = ucl_object_todouble(elt);
+ }
+
+ elt = ucl_object_lookup(obj, "flags");
+
+ if (elt && ucl_object_type(elt) == UCL_ARRAY) {
+ const ucl_object_t *cur;
+ ucl_object_iter_t it = nullptr;
+
+ while ((cur = ucl_object_iterate(elt, &it, true)) != nullptr) {
+ if (ucl_object_type(cur) == UCL_STRING) {
+ const gchar *fl_str = ucl_object_tostring(cur);
+
+ if (g_ascii_strcasecmp(fl_str, "no_threshold") == 0) {
+ flags |= RSPAMD_ACTION_NO_THRESHOLD;
+ }
+ else if (g_ascii_strcasecmp(fl_str, "threshold_only") == 0) {
+ flags |= RSPAMD_ACTION_THRESHOLD_ONLY;
+ }
+ else if (g_ascii_strcasecmp(fl_str, "ham") == 0) {
+ flags |= RSPAMD_ACTION_HAM;
+ }
+ else {
+ msg_warn_config("unknown action flag: %s", fl_str);
+ }
+ }
+ }
+ }
+
+ elt = ucl_object_lookup(obj, "milter");
+
+ if (elt) {
+ const gchar *milter_action = ucl_object_tostring(elt);
+
+ if (strcmp(milter_action, "discard") == 0) {
+ flags |= RSPAMD_ACTION_MILTER;
+ act->action_type = METRIC_ACTION_DISCARD;
+ }
+ else if (strcmp(milter_action, "quarantine") == 0) {
+ flags |= RSPAMD_ACTION_MILTER;
+ act->action_type = METRIC_ACTION_QUARANTINE;
+ }
+ else {
+ msg_warn_config("unknown milter action: %s", milter_action);
+ }
+ }
+ }
+ else if (obj_type == UCL_FLOAT || obj_type == UCL_INT) {
+ threshold = ucl_object_todouble(obj);
+ }
+
+ /* TODO: add lua references support */
+
+ if (isnan(threshold) && !(flags & RSPAMD_ACTION_NO_THRESHOLD)) {
+ msg_err_config("action %s has no threshold being set and it is not"
+ " a no threshold action",
+ act->name);
+
+ return FALSE;
+ }
+
+ act->threshold = threshold;
+ act->flags = flags;
+
+ enum rspamd_action_type std_act;
+
+ if (!(flags & RSPAMD_ACTION_MILTER)) {
+ if (rspamd_action_from_str(act->name, &std_act)) {
+ act->action_type = std_act;
+ }
+ else {
+ act->action_type = METRIC_ACTION_CUSTOM;
+ }
+ }
+
+ return TRUE;
+}
+
+gboolean
+rspamd_config_set_action_score(struct rspamd_config *cfg,
+ const gchar *action_name,
+ const ucl_object_t *obj)
+{
+ enum rspamd_action_type std_act;
+ const ucl_object_t *elt;
+ guint priority = ucl_object_get_priority(obj), obj_type;
+
+ g_assert(cfg != nullptr);
+ g_assert(action_name != nullptr);
+
+ obj_type = ucl_object_type(obj);
+
+ if (obj_type == UCL_OBJECT) {
+ elt = ucl_object_lookup(obj, "priority");
+
+ if (elt) {
+ priority = ucl_object_toint(elt);
+ }
+ }
+
+ /* Here are dragons:
+ * We have `canonical` name for actions, such as `soft reject` and
+ * configuration names for actions (used to be more convenient), such
+ * as `soft_reject`. Unfortunately, we must have heuristic for this
+ * variance of names.
+ */
+
+ if (rspamd_action_from_str(action_name, &std_act)) {
+ action_name = rspamd_action_to_str(std_act);
+ }
+
+ auto actions = RSPAMD_CFG_ACTIONS(cfg);
+ auto existing_act_it = actions->actions_by_name.find(action_name);
+
+ if (existing_act_it != actions->actions_by_name.end()) {
+ auto *act = existing_act_it->second.get();
+ /* Existing element */
+ if (act->priority <= priority) {
+ /* We can replace data */
+ auto old_pri = act->priority;
+ auto old_thr = act->threshold;
+
+ if (rspamd_config_action_from_ucl(cfg, act, obj, priority)) {
+ msg_info_config("action %s has been already registered with "
+ "priority %ud, override it with new priority: %ud, "
+ "old threshold: %.2f, new threshold: %.2f",
+ action_name,
+ old_pri,
+ priority,
+ old_thr,
+ act->threshold);
+ actions->sort();
+ }
+ else {
+ return FALSE;
+ }
+ }
+ else {
+ msg_info_config("action %s has been already registered with "
+ "priority %ud, do not override (new priority: %ud)",
+ action_name,
+ act->priority,
+ priority);
+ }
+ }
+ else {
+ /* Add new element */
+ auto act = std::make_shared<rspamd_action>();
+ act->name = rspamd_mempool_strdup(cfg->cfg_pool, action_name);
+
+ if (rspamd_config_action_from_ucl(cfg, act.get(), obj, priority)) {
+ actions->add_action(std::move(act));
+ }
+ else {
+ return FALSE;
+ }
+ }
+
+ return TRUE;
+}
+
+gboolean
+rspamd_config_maybe_disable_action(struct rspamd_config *cfg,
+ const gchar *action_name,
+ guint priority)
+{
+ auto actions = RSPAMD_CFG_ACTIONS(cfg);
+ auto maybe_act = rspamd::find_map(actions->actions_by_name, action_name);
+
+ if (maybe_act) {
+ auto *act = maybe_act.value().get().get();
+ if (priority >= act->priority) {
+ msg_info_config("disable action %s; old priority: %ud, new priority: %ud",
+ action_name,
+ act->priority,
+ priority);
+
+ act->threshold = NAN;
+ act->priority = priority;
+ act->flags |= RSPAMD_ACTION_NO_THRESHOLD;
+
+ return TRUE;
+ }
+ else {
+ msg_info_config("action %s has been already registered with "
+ "priority %ud, cannot disable it with new priority: %ud",
+ action_name,
+ act->priority,
+ priority);
+ }
+ }
+
+ return FALSE;
+}
+
+struct rspamd_action *
+rspamd_config_get_action(struct rspamd_config *cfg, const gchar *name)
+{
+ auto actions = RSPAMD_CFG_ACTIONS(cfg);
+ auto maybe_act = rspamd::find_map(actions->actions_by_name, name);
+
+ if (maybe_act) {
+ return maybe_act.value().get().get();
+ }
+
+ return nullptr;
+}
+
+struct rspamd_action *
+rspamd_config_get_action_by_type(struct rspamd_config *cfg,
+ enum rspamd_action_type type)
+{
+ for (const auto &act: RSPAMD_CFG_ACTIONS(cfg)->actions) {
+ if (act->action_type == type) {
+ return act.get();
+ }
+ }
+
+ return nullptr;
+}
+
+void rspamd_config_actions_foreach(struct rspamd_config *cfg,
+ void (*func)(struct rspamd_action *act, void *d),
+ void *data)
+{
+ for (const auto &act: RSPAMD_CFG_ACTIONS(cfg)->actions) {
+ func(act.get(), data);
+ }
+}
+
+void rspamd_config_actions_foreach_enumerate(struct rspamd_config *cfg,
+ void (*func)(int idx, struct rspamd_action *act, void *d),
+ void *data)
+{
+ for (const auto &[idx, act]: rspamd::enumerate(RSPAMD_CFG_ACTIONS(cfg)->actions)) {
+ func(idx, act.get(), data);
+ }
+}
+
+gsize rspamd_config_actions_size(struct rspamd_config *cfg)
+{
+ return RSPAMD_CFG_ACTIONS(cfg)->actions.size();
+}
+
+gboolean
+rspamd_config_radix_from_ucl(struct rspamd_config *cfg, const ucl_object_t *obj, const gchar *description,
+ struct rspamd_radix_map_helper **target, GError **err,
+ struct rspamd_worker *worker, const gchar *map_name)
+{
+ ucl_type_t type;
+ ucl_object_iter_t it = nullptr;
+ const ucl_object_t *cur, *cur_elt;
+ const gchar *str;
+
+ /* Cleanup */
+ *target = nullptr;
+
+ LL_FOREACH(obj, cur_elt)
+ {
+ type = ucl_object_type(cur_elt);
+
+ switch (type) {
+ case UCL_STRING:
+ /* Either map or a list of IPs */
+ str = ucl_object_tostring(cur_elt);
+
+ if (rspamd_map_is_map(str)) {
+ if (rspamd_map_add_from_ucl(cfg, cur_elt,
+ description,
+ rspamd_radix_read,
+ rspamd_radix_fin,
+ rspamd_radix_dtor,
+ (void **) target,
+ worker, RSPAMD_MAP_DEFAULT) == nullptr) {
+ g_set_error(err,
+ g_quark_from_static_string("rspamd-config"),
+ EINVAL, "bad map definition %s for %s", str,
+ ucl_object_key(obj));
+ return FALSE;
+ }
+
+ return TRUE;
+ }
+ else {
+ /* Just a list */
+ if (!*target) {
+ *target = rspamd_map_helper_new_radix(
+ rspamd_map_add_fake(cfg, description, map_name));
+ }
+
+ rspamd_map_helper_insert_radix_resolve(*target, str, "");
+ }
+ break;
+ case UCL_OBJECT:
+ /* Should be a map description */
+ if (rspamd_map_add_from_ucl(cfg, cur_elt,
+ description,
+ rspamd_radix_read,
+ rspamd_radix_fin,
+ rspamd_radix_dtor,
+ (void **) target,
+ worker, RSPAMD_MAP_DEFAULT) == nullptr) {
+ g_set_error(err,
+ g_quark_from_static_string("rspamd-config"),
+ EINVAL, "bad map object for %s", ucl_object_key(obj));
+ return FALSE;
+ }
+
+ return TRUE;
+ break;
+ case UCL_ARRAY:
+ /* List of IP addresses */
+ it = ucl_object_iterate_new(cur_elt);
+
+ while ((cur = ucl_object_iterate_safe(it, true)) != nullptr) {
+
+
+ if (ucl_object_type(cur) == UCL_STRING) {
+ str = ucl_object_tostring(cur);
+ if (!*target) {
+ *target = rspamd_map_helper_new_radix(
+ rspamd_map_add_fake(cfg, description, map_name));
+ }
+
+ rspamd_map_helper_insert_radix_resolve(*target, str, "");
+ }
+ else {
+ g_set_error(err,
+ g_quark_from_static_string("rspamd-config"),
+ EINVAL, "bad element inside array object for %s: expected string, got: %s",
+ ucl_object_key(obj), ucl_object_type_to_string(ucl_object_type(cur)));
+ ucl_object_iterate_free(it);
+ return FALSE;
+ }
+ }
+
+ ucl_object_iterate_free(it);
+ break;
+ default:
+ g_set_error(err, g_quark_from_static_string("rspamd-config"),
+ EINVAL, "bad map type %s for %s",
+ ucl_object_type_to_string(type),
+ ucl_object_key(obj));
+ return FALSE;
+ }
+ }
+
+ /* Destroy on cfg cleanup */
+ rspamd_mempool_add_destructor(cfg->cfg_pool,
+ (rspamd_mempool_destruct_t) rspamd_map_helper_destroy_radix,
+ *target);
+
+ return TRUE;
+}
+
+constexpr const auto action_types = frozen::make_unordered_map<frozen::string, enum rspamd_action_type>({
+ {"reject", METRIC_ACTION_REJECT},
+ {"greylist", METRIC_ACTION_GREYLIST},
+ {"add header", METRIC_ACTION_ADD_HEADER},
+ {"add_header", METRIC_ACTION_ADD_HEADER},
+ {"rewrite subject", METRIC_ACTION_REWRITE_SUBJECT},
+ {"rewrite_subject", METRIC_ACTION_REWRITE_SUBJECT},
+ {"soft reject", METRIC_ACTION_SOFT_REJECT},
+ {"soft_reject", METRIC_ACTION_SOFT_REJECT},
+ {"no action", METRIC_ACTION_NOACTION},
+ {"no_action", METRIC_ACTION_NOACTION},
+ {"accept", METRIC_ACTION_NOACTION},
+ {"quarantine", METRIC_ACTION_QUARANTINE},
+ {"discard", METRIC_ACTION_DISCARD},
+
+});
+
+gboolean
+rspamd_action_from_str(const gchar *data, enum rspamd_action_type *result)
+{
+ auto maybe_action = rspamd::find_map(action_types, std::string_view{data});
+
+ if (maybe_action) {
+ *result = maybe_action.value().get();
+ return true;
+ }
+ else {
+ return false;
+ }
+}
+
+const gchar *
+rspamd_action_to_str(enum rspamd_action_type action)
+{
+ switch (action) {
+ case METRIC_ACTION_REJECT:
+ return "reject";
+ case METRIC_ACTION_SOFT_REJECT:
+ return "soft reject";
+ case METRIC_ACTION_REWRITE_SUBJECT:
+ return "rewrite subject";
+ case METRIC_ACTION_ADD_HEADER:
+ return "add header";
+ case METRIC_ACTION_GREYLIST:
+ return "greylist";
+ case METRIC_ACTION_NOACTION:
+ return "no action";
+ case METRIC_ACTION_MAX:
+ return "invalid max action";
+ case METRIC_ACTION_CUSTOM:
+ return "custom";
+ case METRIC_ACTION_DISCARD:
+ return "discard";
+ case METRIC_ACTION_QUARANTINE:
+ return "quarantine";
+ }
+
+ return "unknown action";
+}
+
+const gchar *
+rspamd_action_to_str_alt(enum rspamd_action_type action)
+{
+ switch (action) {
+ case METRIC_ACTION_REJECT:
+ return "reject";
+ case METRIC_ACTION_SOFT_REJECT:
+ return "soft_reject";
+ case METRIC_ACTION_REWRITE_SUBJECT:
+ return "rewrite_subject";
+ case METRIC_ACTION_ADD_HEADER:
+ return "add_header";
+ case METRIC_ACTION_GREYLIST:
+ return "greylist";
+ case METRIC_ACTION_NOACTION:
+ return "no action";
+ case METRIC_ACTION_MAX:
+ return "invalid max action";
+ case METRIC_ACTION_CUSTOM:
+ return "custom";
+ case METRIC_ACTION_DISCARD:
+ return "discard";
+ case METRIC_ACTION_QUARANTINE:
+ return "quarantine";
+ }
+
+ return "unknown action";
+}
+
+static void
+rspamd_config_settings_elt_dtor(struct rspamd_config_settings_elt *e)
+{
+ if (e->symbols_enabled) {
+ ucl_object_unref(e->symbols_enabled);
+ }
+ if (e->symbols_disabled) {
+ ucl_object_unref(e->symbols_disabled);
+ }
+}
+
+guint32
+rspamd_config_name_to_id(const gchar *name, gsize namelen)
+{
+ guint64 h;
+
+ h = rspamd_cryptobox_fast_hash_specific(RSPAMD_CRYPTOBOX_XXHASH64,
+ name, namelen, 0x0);
+ /* Take the lower part of hash as LE number */
+ return ((guint32) GUINT64_TO_LE(h));
+}
+
+struct rspamd_config_settings_elt *
+rspamd_config_find_settings_id_ref(struct rspamd_config *cfg,
+ guint32 id)
+{
+ struct rspamd_config_settings_elt *cur;
+
+ DL_FOREACH(cfg->setting_ids, cur)
+ {
+ if (cur->id == id) {
+ REF_RETAIN(cur);
+ return cur;
+ }
+ }
+
+ return nullptr;
+}
+
+struct rspamd_config_settings_elt *rspamd_config_find_settings_name_ref(
+ struct rspamd_config *cfg,
+ const gchar *name, gsize namelen)
+{
+ guint32 id;
+
+ id = rspamd_config_name_to_id(name, namelen);
+
+ return rspamd_config_find_settings_id_ref(cfg, id);
+}
+
+void rspamd_config_register_settings_id(struct rspamd_config *cfg,
+ const gchar *name,
+ ucl_object_t *symbols_enabled,
+ ucl_object_t *symbols_disabled,
+ enum rspamd_config_settings_policy policy)
+{
+ struct rspamd_config_settings_elt *elt;
+ guint32 id;
+
+ id = rspamd_config_name_to_id(name, strlen(name));
+ elt = rspamd_config_find_settings_id_ref(cfg, id);
+
+ if (elt) {
+ /* Need to replace */
+ struct rspamd_config_settings_elt *nelt;
+
+ DL_DELETE(cfg->setting_ids, elt);
+
+ nelt = rspamd_mempool_alloc0_type(cfg->cfg_pool, struct rspamd_config_settings_elt);
+
+ nelt->id = id;
+ nelt->name = rspamd_mempool_strdup(cfg->cfg_pool, name);
+
+ if (symbols_enabled) {
+ nelt->symbols_enabled = ucl_object_ref(symbols_enabled);
+ }
+
+ if (symbols_disabled) {
+ nelt->symbols_disabled = ucl_object_ref(symbols_disabled);
+ }
+
+ nelt->policy = policy;
+
+ REF_INIT_RETAIN(nelt, rspamd_config_settings_elt_dtor);
+ msg_warn_config("replace settings id %ud (%s)", id, name);
+ rspamd_symcache_process_settings_elt(cfg->cache, elt);
+ DL_APPEND(cfg->setting_ids, nelt);
+
+ /*
+ * Need to unref old element twice as there are two reference holders:
+ * 1. Config structure as we call REF_INIT_RETAIN
+ * 2. rspamd_config_find_settings_id_ref also increases refcount
+ */
+ REF_RELEASE(elt);
+ REF_RELEASE(elt);
+ }
+ else {
+ elt = rspamd_mempool_alloc0_type(cfg->cfg_pool, struct rspamd_config_settings_elt);
+
+ elt->id = id;
+ elt->name = rspamd_mempool_strdup(cfg->cfg_pool, name);
+
+ if (symbols_enabled) {
+ elt->symbols_enabled = ucl_object_ref(symbols_enabled);
+ }
+
+ if (symbols_disabled) {
+ elt->symbols_disabled = ucl_object_ref(symbols_disabled);
+ }
+
+ elt->policy = policy;
+
+ msg_info_config("register new settings id %ud (%s)", id, name);
+ REF_INIT_RETAIN(elt, rspamd_config_settings_elt_dtor);
+ rspamd_symcache_process_settings_elt(cfg->cache, elt);
+ DL_APPEND(cfg->setting_ids, elt);
+ }
+}
+
+int rspamd_config_ev_backend_get(struct rspamd_config *cfg)
+{
+#define AUTO_BACKEND (ev_supported_backends() & ~EVBACKEND_IOURING)
+ if (cfg == nullptr || cfg->events_backend == nullptr) {
+ return AUTO_BACKEND;
+ }
+
+ if (strcmp(cfg->events_backend, "auto") == 0) {
+ return AUTO_BACKEND;
+ }
+ else if (strcmp(cfg->events_backend, "epoll") == 0) {
+ if (ev_supported_backends() & EVBACKEND_EPOLL) {
+ return EVBACKEND_EPOLL;
+ }
+ else {
+ msg_warn_config("unsupported events_backend: %s; defaulting to auto",
+ cfg->events_backend);
+ return AUTO_BACKEND;
+ }
+ }
+ else if (strcmp(cfg->events_backend, "iouring") == 0) {
+ if (ev_supported_backends() & EVBACKEND_IOURING) {
+ return EVBACKEND_IOURING;
+ }
+ else {
+ msg_warn_config("unsupported events_backend: %s; defaulting to auto",
+ cfg->events_backend);
+ return AUTO_BACKEND;
+ }
+ }
+ else if (strcmp(cfg->events_backend, "kqueue") == 0) {
+ if (ev_supported_backends() & EVBACKEND_KQUEUE) {
+ return EVBACKEND_KQUEUE;
+ }
+ else {
+ msg_warn_config("unsupported events_backend: %s; defaulting to auto",
+ cfg->events_backend);
+ return AUTO_BACKEND;
+ }
+ }
+ else if (strcmp(cfg->events_backend, "poll") == 0) {
+ return EVBACKEND_POLL;
+ }
+ else if (strcmp(cfg->events_backend, "select") == 0) {
+ return EVBACKEND_SELECT;
+ }
+ else {
+ msg_warn_config("unknown events_backend: %s; defaulting to auto",
+ cfg->events_backend);
+ }
+
+ return AUTO_BACKEND;
+}
+
+const gchar *
+rspamd_config_ev_backend_to_string(int ev_backend, gboolean *effective)
+{
+#define SET_EFFECTIVE(b) \
+ do { \
+ if ((effective) != nullptr) *(effective) = b; \
+ } while (0)
+
+ if ((ev_backend & EVBACKEND_ALL) == EVBACKEND_ALL) {
+ SET_EFFECTIVE(TRUE);
+ return "auto";
+ }
+
+ if (ev_backend & EVBACKEND_IOURING) {
+ SET_EFFECTIVE(TRUE);
+ return "epoll+io_uring";
+ }
+ if (ev_backend & EVBACKEND_LINUXAIO) {
+ SET_EFFECTIVE(TRUE);
+ return "epoll+aio";
+ }
+ if (ev_backend & EVBACKEND_IOURING) {
+ SET_EFFECTIVE(TRUE);
+ return "epoll+io_uring";
+ }
+ if (ev_backend & EVBACKEND_LINUXAIO) {
+ SET_EFFECTIVE(TRUE);
+ return "epoll+aio";
+ }
+ if (ev_backend & EVBACKEND_EPOLL) {
+ SET_EFFECTIVE(TRUE);
+ return "epoll";
+ }
+ if (ev_backend & EVBACKEND_KQUEUE) {
+ SET_EFFECTIVE(TRUE);
+ return "kqueue";
+ }
+ if (ev_backend & EVBACKEND_POLL) {
+ SET_EFFECTIVE(FALSE);
+ return "poll";
+ }
+ if (ev_backend & EVBACKEND_SELECT) {
+ SET_EFFECTIVE(FALSE);
+ return "select";
+ }
+
+ SET_EFFECTIVE(FALSE);
+ return "unknown";
+#undef SET_EFFECTIVE
+}
+
+struct rspamd_external_libs_ctx *
+rspamd_init_libs(void)
+{
+ struct rlimit rlim;
+ struct ottery_config *ottery_cfg;
+
+ auto *ctx = g_new0(struct rspamd_external_libs_ctx, 1);
+ ctx->crypto_ctx = rspamd_cryptobox_init();
+ ottery_cfg = (struct ottery_config *) g_malloc0(ottery_get_sizeof_config());
+ ottery_config_init(ottery_cfg);
+ ctx->ottery_cfg = ottery_cfg;
+
+ rspamd_openssl_maybe_init();
+
+ /* Check if we have rdrand */
+ if ((ctx->crypto_ctx->cpu_config & CPUID_RDRAND) == 0) {
+ ottery_config_disable_entropy_sources(ottery_cfg,
+ OTTERY_ENTROPY_SRC_RDRAND);
+ }
+
+ g_assert(ottery_init(ottery_cfg) == 0);
+#if OPENSSL_VERSION_NUMBER >= 0x1000104fL && OPENSSL_VERSION_NUMBER < 0x30000000L && !defined(LIBRESSL_VERSION_NUMBER)
+ RAND_set_rand_engine(nullptr);
+#endif
+
+ /* Configure utf8 library */
+ guint utf8_flags = 0;
+
+ if ((ctx->crypto_ctx->cpu_config & CPUID_SSE41)) {
+ utf8_flags |= RSPAMD_FAST_UTF8_FLAG_SSE41;
+ }
+ if ((ctx->crypto_ctx->cpu_config & CPUID_AVX2)) {
+ utf8_flags |= RSPAMD_FAST_UTF8_FLAG_AVX2;
+ }
+
+ rspamd_fast_utf8_library_init(utf8_flags);
+
+#ifdef HAVE_LOCALE_H
+ if (getenv("LANG") == nullptr) {
+ setlocale(LC_ALL, "C");
+ setlocale(LC_CTYPE, "C");
+ setlocale(LC_MESSAGES, "C");
+ setlocale(LC_TIME, "C");
+ }
+ else {
+ /* Just set the default locale */
+ setlocale(LC_ALL, "");
+ /* But for some issues we still want C locale */
+ setlocale(LC_NUMERIC, "C");
+ }
+#endif
+
+ ctx->ssl_ctx = rspamd_init_ssl_ctx();
+ ctx->ssl_ctx_noverify = rspamd_init_ssl_ctx_noverify();
+ rspamd_random_seed_fast();
+
+ /* Set stack size for pcre */
+ getrlimit(RLIMIT_STACK, &rlim);
+ rlim.rlim_cur = 100 * 1024 * 1024;
+ rlim.rlim_max = rlim.rlim_cur;
+ setrlimit(RLIMIT_STACK, &rlim);
+
+ ctx->local_addrs = rspamd_inet_library_init();
+ REF_INIT_RETAIN(ctx, rspamd_deinit_libs);
+
+ return ctx;
+}
+
+static struct zstd_dictionary *
+rspamd_open_zstd_dictionary(const char *path)
+{
+ struct zstd_dictionary *dict;
+
+ dict = g_new0(zstd_dictionary, 1);
+ dict->dict = rspamd_file_xmap(path, PROT_READ, &dict->size, TRUE);
+
+ if (dict->dict == nullptr) {
+ g_free(dict);
+
+ return nullptr;
+ }
+
+ dict->id = -1;
+
+ if (dict->id == 0) {
+ g_free(dict);
+
+ return nullptr;
+ }
+
+ return dict;
+}
+
+static void
+rspamd_free_zstd_dictionary(struct zstd_dictionary *dict)
+{
+ if (dict) {
+ munmap(dict->dict, dict->size);
+ g_free(dict);
+ }
+}
+
+#ifdef HAVE_OPENBLAS_SET_NUM_THREADS
+extern "C" void openblas_set_num_threads(int num_threads);
+#endif
+#ifdef HAVE_BLI_THREAD_SET_NUM_THREADS
+extern "C" void bli_thread_set_num_threads(int num_threads);
+#endif
+
+gboolean
+rspamd_config_libs(struct rspamd_external_libs_ctx *ctx,
+ struct rspamd_config *cfg)
+{
+ size_t r;
+ gboolean ret = TRUE;
+
+ g_assert(cfg != nullptr);
+
+ if (ctx != nullptr) {
+ if (cfg->local_addrs) {
+ GError *err = nullptr;
+ ret = rspamd_config_radix_from_ucl(cfg, cfg->local_addrs,
+ "Local addresses",
+ (struct rspamd_radix_map_helper **) ctx->local_addrs,
+ &err,
+ nullptr, "local addresses");
+
+ if (!ret) {
+ msg_err_config("cannot load local addresses: %e", err);
+ g_error_free(err);
+
+ return ret;
+ }
+ }
+
+ rspamd_free_zstd_dictionary(ctx->in_dict);
+ rspamd_free_zstd_dictionary(ctx->out_dict);
+
+ if (ctx->out_zstream) {
+ ZSTD_freeCStream((ZSTD_CCtx *) ctx->out_zstream);
+ ctx->out_zstream = nullptr;
+ }
+
+ if (ctx->in_zstream) {
+ ZSTD_freeDStream((ZSTD_DCtx *) ctx->in_zstream);
+ ctx->in_zstream = nullptr;
+ }
+
+ if (cfg->zstd_input_dictionary) {
+ ctx->in_dict = rspamd_open_zstd_dictionary(
+ cfg->zstd_input_dictionary);
+
+ if (ctx->in_dict == nullptr) {
+ msg_err_config("cannot open zstd dictionary in %s",
+ cfg->zstd_input_dictionary);
+ }
+ }
+ if (cfg->zstd_output_dictionary) {
+ ctx->out_dict = rspamd_open_zstd_dictionary(
+ cfg->zstd_output_dictionary);
+
+ if (ctx->out_dict == nullptr) {
+ msg_err_config("cannot open zstd dictionary in %s",
+ cfg->zstd_output_dictionary);
+ }
+ }
+
+ if (cfg->fips_mode) {
+#ifdef HAVE_FIPS_MODE
+ int mode = FIPS_mode();
+ unsigned long err = (unsigned long) -1;
+
+ /* Toggle FIPS mode */
+ if (mode == 0) {
+#if defined(OPENSSL_VERSION_MAJOR) && (OPENSSL_VERSION_MAJOR >= 3)
+ if (EVP_set_default_properties(nullptr, "fips=yes") != 1) {
+#else
+ if (FIPS_mode_set(1) != 1) {
+#endif
+ err = ERR_get_error();
+ }
+ }
+ else {
+ msg_info_config("OpenSSL FIPS mode is already enabled");
+ }
+
+ if (err != (unsigned long) -1) {
+#if defined(OPENSSL_VERSION_MAJOR) && (OPENSSL_VERSION_MAJOR >= 3)
+ msg_err_config("EVP_set_default_properties failed: %s",
+#else
+ msg_err_config("FIPS_mode_set failed: %s",
+#endif
+ ERR_error_string(err, nullptr));
+ ret = FALSE;
+ }
+ else {
+ msg_info_config("OpenSSL FIPS mode is enabled");
+ }
+#else
+ msg_warn_config("SSL FIPS mode is enabled but not supported by OpenSSL library!");
+#endif
+ }
+
+ rspamd_ssl_ctx_config(cfg, ctx->ssl_ctx);
+ rspamd_ssl_ctx_config(cfg, ctx->ssl_ctx_noverify);
+
+ /* Init decompression */
+ ctx->in_zstream = ZSTD_createDStream();
+ r = ZSTD_initDStream((ZSTD_DCtx *) ctx->in_zstream);
+
+ if (ZSTD_isError(r)) {
+ msg_err("cannot init decompression stream: %s",
+ ZSTD_getErrorName(r));
+ ZSTD_freeDStream((ZSTD_DCtx *) ctx->in_zstream);
+ ctx->in_zstream = nullptr;
+ }
+
+ /* Init compression */
+ ctx->out_zstream = ZSTD_createCStream();
+ r = ZSTD_initCStream((ZSTD_CCtx *) ctx->out_zstream, 1);
+
+ if (ZSTD_isError(r)) {
+ msg_err("cannot init compression stream: %s",
+ ZSTD_getErrorName(r));
+ ZSTD_freeCStream((ZSTD_CCtx *) ctx->out_zstream);
+ ctx->out_zstream = nullptr;
+ }
+#ifdef HAVE_OPENBLAS_SET_NUM_THREADS
+ openblas_set_num_threads(cfg->max_blas_threads);
+#endif
+#ifdef HAVE_BLI_THREAD_SET_NUM_THREADS
+ bli_thread_set_num_threads(cfg->max_blas_threads);
+#endif
+ }
+
+ return ret;
+}
+
+gboolean
+rspamd_libs_reset_decompression(struct rspamd_external_libs_ctx *ctx)
+{
+ gsize r;
+
+ if (ctx->in_zstream == nullptr) {
+ return FALSE;
+ }
+ else {
+ r = ZSTD_DCtx_reset((ZSTD_DCtx *) ctx->in_zstream, ZSTD_reset_session_only);
+
+ if (ZSTD_isError(r)) {
+ msg_err("cannot init decompression stream: %s",
+ ZSTD_getErrorName(r));
+ ZSTD_freeDStream((ZSTD_DCtx *) ctx->in_zstream);
+ ctx->in_zstream = nullptr;
+
+ return FALSE;
+ }
+ }
+
+ return TRUE;
+}
+
+gboolean
+rspamd_libs_reset_compression(struct rspamd_external_libs_ctx *ctx)
+{
+ gsize r;
+
+ if (ctx->out_zstream == nullptr) {
+ return FALSE;
+ }
+ else {
+ /* Dictionary will be reused automatically if specified */
+ r = ZSTD_CCtx_reset((ZSTD_CCtx *) ctx->out_zstream, ZSTD_reset_session_only);
+ if (!ZSTD_isError(r)) {
+ r = ZSTD_CCtx_setPledgedSrcSize((ZSTD_CCtx *) ctx->out_zstream, ZSTD_CONTENTSIZE_UNKNOWN);
+ }
+
+ if (ZSTD_isError(r)) {
+ msg_err("cannot init compression stream: %s",
+ ZSTD_getErrorName(r));
+ ZSTD_freeCStream((ZSTD_CCtx *) ctx->out_zstream);
+ ctx->out_zstream = nullptr;
+
+ return FALSE;
+ }
+ }
+
+ return TRUE;
+}
+
+void rspamd_deinit_libs(struct rspamd_external_libs_ctx *ctx)
+{
+ if (ctx != nullptr) {
+ g_free(ctx->ottery_cfg);
+
+#ifdef HAVE_OPENSSL
+ EVP_cleanup();
+ ERR_free_strings();
+ rspamd_ssl_ctx_free(ctx->ssl_ctx);
+ rspamd_ssl_ctx_free(ctx->ssl_ctx_noverify);
+#endif
+ rspamd_inet_library_destroy();
+ rspamd_free_zstd_dictionary(ctx->in_dict);
+ rspamd_free_zstd_dictionary(ctx->out_dict);
+
+ if (ctx->out_zstream) {
+ ZSTD_freeCStream((ZSTD_CCtx *) ctx->out_zstream);
+ }
+
+ if (ctx->in_zstream) {
+ ZSTD_freeDStream((ZSTD_DCtx *) ctx->in_zstream);
+ }
+
+ rspamd_cryptobox_deinit(ctx->crypto_ctx);
+
+ g_free(ctx);
+ }
+}
+
+gboolean
+rspamd_ip_is_local_cfg(struct rspamd_config *cfg,
+ const rspamd_inet_addr_t *addr)
+{
+ struct rspamd_radix_map_helper *local_addrs = nullptr;
+
+ if (cfg && cfg->libs_ctx) {
+ local_addrs = *(struct rspamd_radix_map_helper **) cfg->libs_ctx->local_addrs;
+ }
+
+ if (rspamd_inet_address_is_local(addr)) {
+ return TRUE;
+ }
+
+ if (local_addrs) {
+ if (rspamd_match_radix_map_addr(local_addrs, addr) != nullptr) {
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
diff --git a/src/libserver/composites/composites.cxx b/src/libserver/composites/composites.cxx
new file mode 100644
index 0000000..aa231a3
--- /dev/null
+++ b/src/libserver/composites/composites.cxx
@@ -0,0 +1,989 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "config.h"
+#include "logger.h"
+#include "expression.h"
+#include "task.h"
+#include "utlist.h"
+#include "scan_result.h"
+#include "composites.h"
+
+#include <cmath>
+#include <vector>
+#include <variant>
+#include "libutil/cxx/util.hxx"
+#include "contrib/ankerl/unordered_dense.h"
+
+#include "composites_internal.hxx"
+
+#define msg_err_composites(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL, \
+ "composites", task->task_pool->tag.uid, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_warn_composites(...) rspamd_default_log_function(G_LOG_LEVEL_WARNING, \
+ "composites", task->task_pool->tag.uid, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_info_composites(...) rspamd_default_log_function(G_LOG_LEVEL_INFO, \
+ "composites", task->task_pool->tag.uid, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+
+#define msg_debug_composites(...) rspamd_conditional_debug_fast(NULL, task->from_addr, \
+ rspamd_composites_log_id, "composites", task->task_pool->tag.uid, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+
+INIT_LOG_MODULE(composites)
+
+
+namespace rspamd::composites {
+static rspamd_expression_atom_t *rspamd_composite_expr_parse(const gchar *line, gsize len,
+ rspamd_mempool_t *pool,
+ gpointer ud, GError **err);
+static gdouble rspamd_composite_expr_process(void *ud, rspamd_expression_atom_t *atom);
+static gint rspamd_composite_expr_priority(rspamd_expression_atom_t *atom);
+static void rspamd_composite_expr_destroy(rspamd_expression_atom_t *atom);
+static void composites_foreach_callback(gpointer key, gpointer value, void *data);
+
+const struct rspamd_atom_subr composite_expr_subr = {
+ .parse = rspamd::composites::rspamd_composite_expr_parse,
+ .process = rspamd::composites::rspamd_composite_expr_process,
+ .priority = rspamd::composites::rspamd_composite_expr_priority,
+ .destroy = rspamd::composites::rspamd_composite_expr_destroy};
+}// namespace rspamd::composites
+
+namespace rspamd::composites {
+
+static constexpr const double epsilon = 0.00001;
+
+struct symbol_remove_data {
+ const char *sym;
+ struct rspamd_composite *comp;
+ GNode *parent;
+ std::uint8_t action;
+};
+
+struct composites_data {
+ struct rspamd_task *task;
+ struct rspamd_composite *composite;
+ struct rspamd_scan_result *metric_res;
+ ankerl::unordered_dense::map<std::string_view,
+ std::vector<symbol_remove_data>>
+ symbols_to_remove;
+ std::vector<bool> checked;
+
+ explicit composites_data(struct rspamd_task *task, struct rspamd_scan_result *mres)
+ : task(task), composite(nullptr), metric_res(mres)
+ {
+ checked.resize(rspamd_composites_manager_nelts(task->cfg->composites_manager) * 2,
+ false);
+ }
+};
+
+struct rspamd_composite_option_match {
+ rspamd_regexp_t *re;
+ std::string match;
+
+ explicit rspamd_composite_option_match(const char *start, std::size_t len) noexcept
+ : re(nullptr), match(start, len)
+ {
+ }
+
+ explicit rspamd_composite_option_match(rspamd_regexp_t *re) noexcept
+ : re(rspamd_regexp_ref(re))
+ {
+ }
+
+ rspamd_composite_option_match(const rspamd_composite_option_match &other) noexcept
+ {
+ if (other.re) {
+ re = rspamd_regexp_ref(other.re);
+ }
+ else {
+ match = other.match;
+ re = nullptr;
+ }
+ }
+ rspamd_composite_option_match &operator=(const rspamd_composite_option_match &other) noexcept
+ {
+ if (other.re) {
+ if (re) {
+ rspamd_regexp_unref(re);
+ }
+ re = rspamd_regexp_ref(other.re);
+ }
+ else {
+ if (re) {
+ rspamd_regexp_unref(re);
+ }
+ re = nullptr;
+ match = other.match;
+ }
+
+ return *this;
+ }
+
+ rspamd_composite_option_match(rspamd_composite_option_match &&other) noexcept
+ {
+ if (other.re) {
+ re = other.re;
+ other.re = nullptr;
+ }
+ else {
+ re = nullptr;
+ match = std::move(other.match);
+ }
+ }
+ rspamd_composite_option_match &operator=(rspamd_composite_option_match &&other) noexcept
+ {
+ if (other.re) {
+ if (re) {
+ rspamd_regexp_unref(re);
+ }
+ re = other.re;
+ other.re = nullptr;
+ }
+ else {
+ if (re) {
+ rspamd_regexp_unref(re);
+ }
+ re = nullptr;
+ match = std::move(other.match);
+ }
+
+ return *this;
+ }
+
+ ~rspamd_composite_option_match()
+ {
+ if (re) {
+ rspamd_regexp_unref(re);
+ }
+ }
+
+ auto match_opt(const std::string_view &data) const -> bool
+ {
+ if (re) {
+ return rspamd_regexp_search(re,
+ data.data(), data.size(),
+ nullptr, nullptr, false, nullptr);
+ }
+ else {
+ return data == match;
+ }
+ }
+
+ auto get_pat() const -> std::string_view
+ {
+ if (re) {
+ return std::string_view(rspamd_regexp_get_pattern(re));
+ }
+ else {
+ return match;
+ }
+ }
+};
+
+enum class rspamd_composite_atom_type {
+ ATOM_UNKNOWN,
+ ATOM_COMPOSITE,
+ ATOM_PLAIN
+};
+
+struct rspamd_composite_atom {
+ std::string symbol;
+ std::string_view norm_symbol;
+ rspamd_composite_atom_type comp_type = rspamd_composite_atom_type::ATOM_UNKNOWN;
+ const struct rspamd_composite *ncomp; /* underlying composite */
+ std::vector<rspamd_composite_option_match> opts;
+};
+
+enum rspamd_composite_action : std::uint8_t {
+ RSPAMD_COMPOSITE_UNTOUCH = 0,
+ RSPAMD_COMPOSITE_REMOVE_SYMBOL = (1u << 0),
+ RSPAMD_COMPOSITE_REMOVE_WEIGHT = (1u << 1),
+ RSPAMD_COMPOSITE_REMOVE_FORCED = (1u << 2)
+};
+
+static GQuark
+rspamd_composites_quark(void)
+{
+ return g_quark_from_static_string("composites");
+}
+
+static auto
+rspamd_composite_atom_dtor(void *ptr)
+{
+ auto *atom = reinterpret_cast<rspamd_composite_atom *>(ptr);
+
+ delete atom;
+}
+
+static rspamd_expression_atom_t *
+rspamd_composite_expr_parse(const gchar *line, gsize len,
+ rspamd_mempool_t *pool,
+ gpointer ud, GError **err)
+{
+ gsize clen = 0;
+ const gchar *p, *end;
+ enum composite_expr_state {
+ comp_state_read_symbol = 0,
+ comp_state_read_obrace,
+ comp_state_read_option,
+ comp_state_read_regexp,
+ comp_state_read_regexp_end,
+ comp_state_read_comma,
+ comp_state_read_ebrace,
+ comp_state_read_end
+ } state = comp_state_read_symbol;
+
+ end = line + len;
+ p = line;
+
+ /* Find length of the atom using a reduced state machine */
+ while (p < end) {
+ if (state == comp_state_read_end) {
+ break;
+ }
+
+ switch (state) {
+ case comp_state_read_symbol:
+ clen = rspamd_memcspn(p, "[; \t()><!|&\n", len);
+ p += clen;
+
+ if (*p == '[') {
+ state = comp_state_read_obrace;
+ }
+ else {
+ state = comp_state_read_end;
+ }
+ break;
+ case comp_state_read_obrace:
+ p++;
+
+ if (*p == '/') {
+ p++;
+ state = comp_state_read_regexp;
+ }
+ else {
+ state = comp_state_read_option;
+ }
+ break;
+ case comp_state_read_regexp:
+ if (*p == '\\' && p + 1 < end) {
+ /* Escaping */
+ p++;
+ }
+ else if (*p == '/') {
+ /* End of regexp, possible flags */
+ state = comp_state_read_regexp_end;
+ }
+ p++;
+ break;
+ case comp_state_read_option:
+ case comp_state_read_regexp_end:
+ if (*p == ',') {
+ p++;
+ state = comp_state_read_comma;
+ }
+ else if (*p == ']') {
+ state = comp_state_read_ebrace;
+ }
+ else {
+ p++;
+ }
+ break;
+ case comp_state_read_comma:
+ if (!g_ascii_isspace(*p)) {
+ if (*p == '/') {
+ state = comp_state_read_regexp;
+ }
+ else if (*p == ']') {
+ state = comp_state_read_ebrace;
+ }
+ else {
+ state = comp_state_read_option;
+ }
+ }
+ else {
+ /* Skip spaces after comma */
+ p++;
+ }
+ break;
+ case comp_state_read_ebrace:
+ p++;
+ state = comp_state_read_end;
+ break;
+ case comp_state_read_end:
+ g_assert_not_reached();
+ }
+ }
+
+ if (state != comp_state_read_end) {
+ g_set_error(err, rspamd_composites_quark(), 100, "invalid composite: %s;"
+ "parser stopped in state %d",
+ line, state);
+ return NULL;
+ }
+
+ clen = p - line;
+ p = line;
+ state = comp_state_read_symbol;
+
+ auto *atom = new rspamd_composite_atom;
+ auto *res = rspamd_mempool_alloc0_type(pool, rspamd_expression_atom_t);
+ res->len = clen;
+ res->str = line;
+
+ /* Full state machine to fill a composite atom */
+ const gchar *opt_start = nullptr;
+
+ while (p < end) {
+ if (state == comp_state_read_end) {
+ break;
+ }
+
+ switch (state) {
+ case comp_state_read_symbol: {
+ clen = rspamd_memcspn(p, "[; \t()><!|&\n", len);
+ p += clen;
+
+ if (*p == '[') {
+ state = comp_state_read_obrace;
+ }
+ else {
+ state = comp_state_read_end;
+ }
+
+ atom->symbol = std::string{line, clen};
+ auto norm_start = std::find_if(atom->symbol.begin(), atom->symbol.end(),
+ [](char c) { return g_ascii_isalnum(c); });
+ if (norm_start == atom->symbol.end()) {
+ msg_err_pool("invalid composite atom: %s", atom->symbol.c_str());
+ }
+ atom->norm_symbol = make_string_view_from_it(norm_start, atom->symbol.end());
+ break;
+ }
+ case comp_state_read_obrace:
+ p++;
+
+ if (*p == '/') {
+ opt_start = p;
+ p++; /* Starting slash */
+ state = comp_state_read_regexp;
+ }
+ else {
+ state = comp_state_read_option;
+ opt_start = p;
+ }
+
+ break;
+ case comp_state_read_regexp:
+ if (*p == '\\' && p + 1 < end) {
+ /* Escaping */
+ p++;
+ }
+ else if (*p == '/') {
+ /* End of regexp, possible flags */
+ state = comp_state_read_regexp_end;
+ }
+ p++;
+ break;
+ case comp_state_read_option:
+ if (*p == ',' || *p == ']') {
+ /* Plain match, copy option to ensure string_view validity */
+ gint opt_len = p - opt_start;
+ auto *opt_buf = rspamd_mempool_alloc_buffer(pool, opt_len + 1);
+ rspamd_strlcpy(opt_buf, opt_start, opt_len + 1);
+ opt_buf = g_strstrip(opt_buf);
+ atom->opts.emplace_back(opt_buf, strlen(opt_buf));
+
+ if (*p == ',') {
+ p++;
+ state = comp_state_read_comma;
+ }
+ else {
+ state = comp_state_read_ebrace;
+ }
+ }
+ else {
+ p++;
+ }
+ break;
+ case comp_state_read_regexp_end:
+ if (*p == ',' || *p == ']') {
+ auto opt_len = p - opt_start;
+ rspamd_regexp_t *re;
+ GError *re_err = nullptr;
+
+ re = rspamd_regexp_new_len(opt_start, opt_len, nullptr, &re_err);
+
+ if (re == nullptr) {
+ msg_err_pool("cannot create regexp from string %*s: %e",
+ opt_len, opt_start, re_err);
+
+ g_error_free(re_err);
+ }
+ else {
+ atom->opts.emplace_back(re);
+ rspamd_regexp_unref(re);
+ }
+
+ if (*p == ',') {
+ p++;
+ state = comp_state_read_comma;
+ }
+ else {
+ state = comp_state_read_ebrace;
+ }
+ }
+ else {
+ p++;
+ }
+ break;
+ case comp_state_read_comma:
+ if (!g_ascii_isspace(*p)) {
+ if (*p == '/') {
+ state = comp_state_read_regexp;
+ opt_start = p;
+ }
+ else if (*p == ']') {
+ state = comp_state_read_ebrace;
+ }
+ else {
+ opt_start = p;
+ state = comp_state_read_option;
+ }
+ }
+ else {
+ /* Skip spaces after comma */
+ p++;
+ }
+ break;
+ case comp_state_read_ebrace:
+ p++;
+ state = comp_state_read_end;
+ break;
+ case comp_state_read_end:
+ g_assert_not_reached();
+ }
+ }
+
+ res->data = atom;
+
+ return res;
+}
+
+static auto
+process_symbol_removal(rspamd_expression_atom_t *atom,
+ struct composites_data *cd,
+ struct rspamd_symbol_result *ms,
+ const std::string &beg) -> void
+{
+ struct rspamd_task *task = cd->task;
+
+ if (ms == nullptr) {
+ return;
+ }
+
+ /*
+ * At this point we know that we need to do something about this symbol,
+ * however, we don't know whether we need to delete it unfortunately,
+ * that depends on the later decisions when the complete expression is
+ * evaluated.
+ */
+ auto rd_it = cd->symbols_to_remove.find(ms->name);
+
+ auto fill_removal_structure = [&](symbol_remove_data &nrd) {
+ nrd.sym = ms->name;
+
+ /* By default remove symbols */
+ switch (cd->composite->policy) {
+ case rspamd_composite_policy::RSPAMD_COMPOSITE_POLICY_REMOVE_ALL:
+ default:
+ nrd.action = (RSPAMD_COMPOSITE_REMOVE_SYMBOL | RSPAMD_COMPOSITE_REMOVE_WEIGHT);
+ break;
+ case rspamd_composite_policy::RSPAMD_COMPOSITE_POLICY_REMOVE_SYMBOL:
+ nrd.action = RSPAMD_COMPOSITE_REMOVE_SYMBOL;
+ break;
+ case rspamd_composite_policy::RSPAMD_COMPOSITE_POLICY_REMOVE_WEIGHT:
+ nrd.action = RSPAMD_COMPOSITE_REMOVE_WEIGHT;
+ break;
+ case rspamd_composite_policy::RSPAMD_COMPOSITE_POLICY_LEAVE:
+ nrd.action = 0;
+ break;
+ }
+
+ for (auto t: beg) {
+ if (t == '~') {
+ nrd.action &= ~RSPAMD_COMPOSITE_REMOVE_SYMBOL;
+ }
+ else if (t == '-') {
+ nrd.action &= ~(RSPAMD_COMPOSITE_REMOVE_WEIGHT |
+ RSPAMD_COMPOSITE_REMOVE_SYMBOL);
+ }
+ else if (t == '^') {
+ nrd.action |= RSPAMD_COMPOSITE_REMOVE_FORCED;
+ }
+ else {
+ break;
+ }
+ }
+
+ nrd.comp = cd->composite;
+ nrd.parent = atom->parent;
+ };
+
+ if (rd_it != cd->symbols_to_remove.end()) {
+ fill_removal_structure(rd_it->second.emplace_back());
+ msg_debug_composites("%s: added symbol %s to removal: %d policy, from composite %s",
+ cd->metric_res->name,
+ ms->name, rd_it->second.back().action,
+ cd->composite->sym.c_str());
+ }
+ else {
+ std::vector<symbol_remove_data> nrd;
+ fill_removal_structure(nrd.emplace_back());
+ msg_debug_composites("%s: added symbol %s to removal: %d policy, from composite %s",
+ cd->metric_res->name,
+ ms->name, nrd.front().action,
+ cd->composite->sym.c_str());
+ cd->symbols_to_remove[ms->name] = std::move(nrd);
+ }
+}
+
+static auto
+process_single_symbol(struct composites_data *cd,
+ std::string_view sym,
+ struct rspamd_symbol_result **pms,
+ struct rspamd_composite_atom *atom) -> double
+{
+ struct rspamd_symbol_result *ms = nullptr;
+ gdouble rc = 0;
+ struct rspamd_task *task = cd->task;
+
+ if ((ms = rspamd_task_find_symbol_result(cd->task, sym.data(), cd->metric_res)) == nullptr) {
+ msg_debug_composites("not found symbol %s in composite %s", sym.data(),
+ cd->composite->sym.c_str());
+
+ if (G_UNLIKELY(atom->comp_type == rspamd_composite_atom_type::ATOM_UNKNOWN)) {
+ const struct rspamd_composite *ncomp;
+
+ if ((ncomp = COMPOSITE_MANAGER_FROM_PTR(task->cfg->composites_manager)->find(sym)) != NULL) {
+ atom->comp_type = rspamd_composite_atom_type::ATOM_COMPOSITE;
+ atom->ncomp = ncomp;
+ }
+ else {
+ atom->comp_type = rspamd_composite_atom_type::ATOM_PLAIN;
+ }
+ }
+
+ if (atom->comp_type == rspamd_composite_atom_type::ATOM_COMPOSITE) {
+ msg_debug_composites("symbol %s for composite %s is another composite",
+ sym.data(), cd->composite->sym.c_str());
+
+ if (!cd->checked[atom->ncomp->id * 2]) {
+ msg_debug_composites("composite dependency %s for %s is not checked",
+ sym.data(), cd->composite->sym.c_str());
+ /* Set checked for this symbol to avoid cyclic references */
+ cd->checked[cd->composite->id * 2] = true;
+ auto *saved = cd->composite; /* Save the current composite */
+ composites_foreach_callback((gpointer) atom->ncomp->sym.c_str(),
+ (gpointer) atom->ncomp, (gpointer) cd);
+ /* Restore state */
+ cd->composite = saved;
+ cd->checked[cd->composite->id * 2] = false;
+
+ ms = rspamd_task_find_symbol_result(cd->task, sym.data(),
+ cd->metric_res);
+ }
+ else {
+ /*
+ * XXX: in case of cyclic references this would return 0
+ */
+ if (cd->checked[atom->ncomp->id * 2 + 1]) {
+ ms = rspamd_task_find_symbol_result(cd->task, sym.data(),
+ cd->metric_res);
+ }
+ }
+ }
+ }
+
+ if (ms) {
+ msg_debug_composites("found symbol %s in composite %s, weight: %.3f",
+ sym.data(), cd->composite->sym.c_str(), ms->score);
+
+ /* Now check options */
+ for (const auto &cur_opt: atom->opts) {
+ struct rspamd_symbol_option *opt;
+ auto found = false;
+
+ DL_FOREACH(ms->opts_head, opt)
+ {
+ if (cur_opt.match_opt({opt->option, opt->optlen})) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ auto pat = cur_opt.get_pat();
+ msg_debug_composites("symbol %s in composite %s misses required option %*s",
+ sym.data(),
+ cd->composite->sym.c_str(),
+ (int) pat.size(), pat.data());
+ ms = nullptr;
+
+ break;
+ }
+ }
+
+ if (ms) {
+ if (ms->score == 0) {
+ rc = epsilon * 16.0; /* Distinguish from 0 */
+ }
+ else {
+ rc = ms->score;
+ }
+ }
+ }
+
+ *pms = ms;
+ return rc;
+}
+
+static auto
+rspamd_composite_expr_process(void *ud, rspamd_expression_atom_t *atom) -> double
+{
+ struct composites_data *cd = (struct composites_data *) ud;
+ struct rspamd_composite_atom *comp_atom = (struct rspamd_composite_atom *) atom->data;
+
+ struct rspamd_symbol_result *ms = NULL;
+ struct rspamd_task *task = cd->task;
+ gdouble rc = 0;
+
+ if (cd->checked[cd->composite->id * 2]) {
+ /* We have already checked this composite, so just return its value */
+ if (cd->checked[cd->composite->id * 2 + 1]) {
+ ms = rspamd_task_find_symbol_result(cd->task,
+ comp_atom->norm_symbol.data(),
+ cd->metric_res);
+ }
+
+ if (ms) {
+ if (ms->score == 0) {
+ rc = epsilon; /* Distinguish from 0 */
+ }
+ else {
+ /* Treat negative and positive scores equally... */
+ rc = fabs(ms->score);
+ }
+ }
+
+ msg_debug_composites("composite %s is already checked, result: %.2f",
+ cd->composite->sym.c_str(), rc);
+
+ return rc;
+ }
+
+ /* Note: sym is zero terminated as it is a view on std::string */
+ auto sym = comp_atom->norm_symbol;
+ auto group_process_functor = [&](auto cond, int sub_start) -> double {
+ auto max = 0.;
+ GHashTableIter it;
+ gpointer k, v;
+ struct rspamd_symbols_group *gr;
+
+ gr = (struct rspamd_symbols_group *) g_hash_table_lookup(cd->task->cfg->groups,
+ sym.substr(sub_start).data());
+
+ if (gr != nullptr) {
+ g_hash_table_iter_init(&it, gr->symbols);
+
+ while (g_hash_table_iter_next(&it, &k, &v)) {
+ auto *sdef = (rspamd_symbol *) v;
+
+ if (cond(sdef->score)) {
+ rc = process_single_symbol(cd,
+ std::string_view(sdef->name),
+ &ms,
+ comp_atom);
+
+ if (fabs(rc) > epsilon) {
+ process_symbol_removal(atom,
+ cd,
+ ms,
+ comp_atom->symbol);
+
+ if (fabs(rc) > max) {
+ max = fabs(rc);
+ }
+ }
+ }
+ }
+ }
+
+ return max;
+ };
+
+ if (sym.size() > 2) {
+ if (sym.substr(0, 2) == "g:") {
+ rc = group_process_functor([](auto _) { return true; }, 2);
+ }
+ else if (sym.substr(0, 3) == "g+:") {
+ /* Group, positive symbols only */
+ rc = group_process_functor([](auto sc) { return sc > 0.; }, 3);
+ }
+ else if (sym.substr(0, 3) == "g-:") {
+ rc = group_process_functor([](auto sc) { return sc < 0.; }, 3);
+ }
+ else {
+ rc = process_single_symbol(cd, sym, &ms, comp_atom);
+
+ if (fabs(rc) > epsilon) {
+ process_symbol_removal(atom,
+ cd,
+ ms,
+ comp_atom->symbol);
+ }
+ }
+ }
+ else {
+ rc = process_single_symbol(cd, sym, &ms, comp_atom);
+
+ if (fabs(rc) > epsilon) {
+ process_symbol_removal(atom,
+ cd,
+ ms,
+ comp_atom->symbol);
+ }
+ }
+
+ msg_debug_composites("%s: result for atom %s in composite %s is %.4f",
+ cd->metric_res->name,
+ comp_atom->norm_symbol.data(),
+ cd->composite->sym.c_str(), rc);
+
+ return rc;
+}
+
+/*
+ * We don't have preferences for composites
+ */
+static gint
+rspamd_composite_expr_priority(rspamd_expression_atom_t *atom)
+{
+ return 0;
+}
+
+static void
+rspamd_composite_expr_destroy(rspamd_expression_atom_t *atom)
+{
+ rspamd_composite_atom_dtor(atom->data);
+}
+
+static void
+composites_foreach_callback(gpointer key, gpointer value, void *data)
+{
+ auto *cd = (struct composites_data *) data;
+ auto *comp = (struct rspamd_composite *) value;
+ auto *str_key = (const gchar *) key;
+ struct rspamd_task *task;
+ gdouble rc;
+
+ cd->composite = comp;
+ task = cd->task;
+
+ msg_debug_composites("process composite %s", str_key);
+
+ if (!cd->checked[cd->composite->id * 2]) {
+ if (rspamd_symcache_is_checked(cd->task, cd->task->cfg->cache,
+ str_key)) {
+ msg_debug_composites("composite %s is checked in symcache but not "
+ "in composites bitfield",
+ cd->composite->sym.c_str());
+ cd->checked[comp->id * 2] = true;
+ cd->checked[comp->id * 2 + 1] = false;
+ }
+ else {
+ if (rspamd_task_find_symbol_result(cd->task, str_key,
+ cd->metric_res) != nullptr) {
+ /* Already set, no need to check */
+ msg_debug_composites("composite %s is already in metric "
+ "in composites bitfield",
+ cd->composite->sym.c_str());
+ cd->checked[comp->id * 2] = true;
+ cd->checked[comp->id * 2 + 1] = true;
+
+ return;
+ }
+
+ msg_debug_composites("%s: start processing composite %s",
+ cd->metric_res->name,
+ cd->composite->sym.c_str());
+
+ rc = rspamd_process_expression(comp->expr, RSPAMD_EXPRESSION_FLAG_NOOPT,
+ cd);
+
+ /* Checked bit */
+ cd->checked[comp->id * 2] = true;
+
+ msg_debug_composites("%s: final result for composite %s is %.4f",
+ cd->metric_res->name,
+ cd->composite->sym.c_str(), rc);
+
+ /* Result bit */
+ if (fabs(rc) > epsilon) {
+ cd->checked[comp->id * 2 + 1] = true;
+ rspamd_task_insert_result_full(cd->task, str_key, 1.0, NULL,
+ RSPAMD_SYMBOL_INSERT_SINGLE, cd->metric_res);
+ }
+ else {
+ cd->checked[comp->id * 2 + 1] = false;
+ }
+ }
+ }
+}
+
+
+static auto
+remove_symbols(const composites_data &cd, const std::vector<symbol_remove_data> &rd) -> void
+{
+ struct rspamd_task *task = cd.task;
+ gboolean skip = FALSE,
+ has_valid_op = FALSE,
+ want_remove_score = TRUE,
+ want_remove_symbol = TRUE,
+ want_forced = FALSE;
+ const gchar *disable_score_reason = "no policy",
+ *disable_symbol_reason = "no policy";
+
+ task = cd.task;
+
+ for (const auto &cur: rd) {
+ if (!cd.checked[cur.comp->id * 2 + 1]) {
+ continue;
+ }
+ /*
+ * First of all exclude all elements with any parent that is negation:
+ * !A || B -> here we can have both !A and B matched, but we do *NOT*
+ * want to remove symbol in that case
+ */
+ auto *par = cur.parent;
+ skip = FALSE;
+
+ while (par) {
+ if (rspamd_expression_node_is_op(par, OP_NOT)) {
+ skip = TRUE;
+ break;
+ }
+
+ par = par->parent;
+ }
+
+ if (skip) {
+ continue;
+ }
+
+ has_valid_op = TRUE;
+ /*
+ * Now we can try to remove symbols/scores
+ *
+ * We apply the following logic here:
+ * - if no composites would like to save score then we remove score
+ * - if no composites would like to save symbol then we remove symbol
+ */
+ if (!want_forced) {
+ if (!(cur.action & RSPAMD_COMPOSITE_REMOVE_SYMBOL)) {
+ want_remove_symbol = FALSE;
+ disable_symbol_reason = cur.comp->sym.c_str();
+ }
+
+ if (!(cur.action & RSPAMD_COMPOSITE_REMOVE_WEIGHT)) {
+ want_remove_score = FALSE;
+ disable_score_reason = cur.comp->sym.c_str();
+ }
+
+ if (cur.action & RSPAMD_COMPOSITE_REMOVE_FORCED) {
+ want_forced = TRUE;
+ disable_symbol_reason = cur.comp->sym.c_str();
+ disable_score_reason = cur.comp->sym.c_str();
+ }
+ }
+ }
+
+ auto *ms = rspamd_task_find_symbol_result(task, rd.front().sym, cd.metric_res);
+
+ if (has_valid_op && ms && !(ms->flags & RSPAMD_SYMBOL_RESULT_IGNORED)) {
+
+ if (want_remove_score || want_forced) {
+ msg_debug_composites("%s: %s remove symbol weight for %s (was %.2f), "
+ "score removal affected by %s, symbol removal affected by %s",
+ cd.metric_res->name,
+ (want_forced ? "forced" : "normal"), rd.front().sym, ms->score,
+ disable_score_reason, disable_symbol_reason);
+ cd.metric_res->score -= ms->score;
+ ms->score = 0.0;
+ }
+
+ if (want_remove_symbol || want_forced) {
+ ms->flags |= RSPAMD_SYMBOL_RESULT_IGNORED;
+ msg_debug_composites("%s: %s remove symbol %s (score %.2f), "
+ "score removal affected by %s, symbol removal affected by %s",
+ cd.metric_res->name,
+ (want_forced ? "forced" : "normal"), rd.front().sym, ms->score,
+ disable_score_reason, disable_symbol_reason);
+ }
+ }
+}
+
+static void
+composites_metric_callback(struct rspamd_task *task)
+{
+ std::vector<composites_data> comp_data_vec;
+ struct rspamd_scan_result *mres;
+
+ comp_data_vec.reserve(1);
+
+ DL_FOREACH(task->result, mres)
+ {
+ auto &cd = comp_data_vec.emplace_back(task, mres);
+
+ /* Process metric result */
+ rspamd_symcache_composites_foreach(task,
+ task->cfg->cache,
+ composites_foreach_callback,
+ &cd);
+ }
+
+ for (const auto &cd: comp_data_vec) {
+ /* Remove symbols that are in composites */
+ for (const auto &srd_it: cd.symbols_to_remove) {
+ remove_symbols(cd, srd_it.second);
+ }
+ }
+}
+
+}// namespace rspamd::composites
+
+
+void rspamd_composites_process_task(struct rspamd_task *task)
+{
+ if (task->result && !RSPAMD_TASK_IS_SKIPPED(task)) {
+ rspamd::composites::composites_metric_callback(task);
+ }
+}
diff --git a/src/libserver/composites/composites.h b/src/libserver/composites/composites.h
new file mode 100644
index 0000000..5d58029
--- /dev/null
+++ b/src/libserver/composites/composites.h
@@ -0,0 +1,64 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef SRC_LIBSERVER_COMPOSITES_H_
+#define SRC_LIBSERVER_COMPOSITES_H_
+
+#include "config.h"
+#include "contrib/libucl/ucl.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct rspamd_task;
+struct rspamd_config;
+
+/**
+ * Process all results and form composite metrics from existent metrics as it is defined in config
+ * @param task worker's task that present message from user
+ */
+void rspamd_composites_process_task(struct rspamd_task *task);
+
+/**
+ * Creates a composites manager
+ * @param cfg
+ * @return
+ */
+void *rspamd_composites_manager_create(struct rspamd_config *cfg);
+/**
+ * Returns number of elements in a composite manager
+ * @return
+ */
+gsize rspamd_composites_manager_nelts(void *);
+/**
+ * Adds a composite from config
+ * @return
+ */
+void *rspamd_composites_manager_add_from_ucl(void *, const char *, const ucl_object_t *);
+void *rspamd_composites_manager_add_from_ucl_silent(void *, const char *, const ucl_object_t *);
+
+/**
+ * Adds a composite from config
+ * @return
+ */
+void *rspamd_composites_manager_add_from_string(void *, const char *, const char *);
+void *rspamd_composites_manager_add_from_string_silent(void *, const char *, const char *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SRC_LIBSERVER_COMPOSITES_H_ */
diff --git a/src/libserver/composites/composites_internal.hxx b/src/libserver/composites/composites_internal.hxx
new file mode 100644
index 0000000..038e217
--- /dev/null
+++ b/src/libserver/composites/composites_internal.hxx
@@ -0,0 +1,112 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef RSPAMD_COMPOSITES_INTERNAL_HXX
+#define RSPAMD_COMPOSITES_INTERNAL_HXX
+#pragma once
+
+#include <string>
+#include "libutil/expression.h"
+#include "libutil/cxx/hash_util.hxx"
+#include "libserver/cfg_file.h"
+
+namespace rspamd::composites {
+
+/**
+ * Subr for composite expressions
+ */
+extern const struct rspamd_atom_subr composite_expr_subr;
+
+enum class rspamd_composite_policy {
+ RSPAMD_COMPOSITE_POLICY_REMOVE_ALL = 0,
+ RSPAMD_COMPOSITE_POLICY_REMOVE_SYMBOL,
+ RSPAMD_COMPOSITE_POLICY_REMOVE_WEIGHT,
+ RSPAMD_COMPOSITE_POLICY_LEAVE,
+ RSPAMD_COMPOSITE_POLICY_UNKNOWN
+};
+
+/**
+ * Static composites structure
+ */
+struct rspamd_composite {
+ std::string str_expr;
+ std::string sym;
+ struct rspamd_expression *expr;
+ gint id;
+ rspamd_composite_policy policy;
+};
+
+#define COMPOSITE_MANAGER_FROM_PTR(ptr) (reinterpret_cast<rspamd::composites::composites_manager *>(ptr))
+
+class composites_manager {
+public:
+ composites_manager(struct rspamd_config *_cfg)
+ : cfg(_cfg)
+ {
+ rspamd_mempool_add_destructor(_cfg->cfg_pool, composites_manager_dtor, this);
+ }
+
+ auto size(void) const -> std::size_t
+ {
+ return all_composites.size();
+ }
+
+ auto find(std::string_view name) const -> const rspamd_composite *
+ {
+ auto found = composites.find(std::string(name));
+
+ if (found != composites.end()) {
+ return found->second.get();
+ }
+
+ return nullptr;
+ }
+
+ auto add_composite(std::string_view, const ucl_object_t *, bool silent_duplicate) -> rspamd_composite *;
+ auto add_composite(std::string_view name, std::string_view expression, bool silent_duplicate, double score = NAN) -> rspamd_composite *;
+
+private:
+ ~composites_manager() = default;
+ static void composites_manager_dtor(void *ptr)
+ {
+ delete COMPOSITE_MANAGER_FROM_PTR(ptr);
+ }
+
+ auto new_composite(std::string_view composite_name, rspamd_expression *expr,
+ std::string_view composite_expression) -> auto
+ {
+ auto &composite = all_composites.emplace_back(std::make_shared<rspamd_composite>());
+ composite->expr = expr;
+ composite->id = all_composites.size() - 1;
+ composite->str_expr = composite_expression;
+ composite->sym = composite_name;
+
+ composites[composite->sym] = composite;
+
+ return composite;
+ }
+
+ ankerl::unordered_dense::map<std::string,
+ std::shared_ptr<rspamd_composite>, rspamd::smart_str_hash, rspamd::smart_str_equal>
+ composites;
+ /* Store all composites here, even if we have duplicates */
+ std::vector<std::shared_ptr<rspamd_composite>> all_composites;
+ struct rspamd_config *cfg;
+};
+
+}// namespace rspamd::composites
+
+#endif//RSPAMD_COMPOSITES_INTERNAL_HXX
diff --git a/src/libserver/composites/composites_manager.cxx b/src/libserver/composites/composites_manager.cxx
new file mode 100644
index 0000000..1ee5c40
--- /dev/null
+++ b/src/libserver/composites/composites_manager.cxx
@@ -0,0 +1,330 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <memory>
+#include <vector>
+#include <cmath>
+#include "contrib/ankerl/unordered_dense.h"
+
+#include "composites.h"
+#include "composites_internal.hxx"
+#include "libserver/cfg_file.h"
+#include "libserver/logger.h"
+#include "libserver/maps/map.h"
+#include "libutil/cxx/util.hxx"
+
+namespace rspamd::composites {
+
+static auto
+composite_policy_from_str(const std::string_view &inp) -> enum rspamd_composite_policy {
+ const static ankerl::unordered_dense::map<std::string_view,
+ enum rspamd_composite_policy>
+ names{
+ {"remove", rspamd_composite_policy::RSPAMD_COMPOSITE_POLICY_REMOVE_ALL},
+ {"remove_all", rspamd_composite_policy::RSPAMD_COMPOSITE_POLICY_REMOVE_ALL},
+ {"default", rspamd_composite_policy::RSPAMD_COMPOSITE_POLICY_REMOVE_ALL},
+ {"remove_symbol", rspamd_composite_policy::RSPAMD_COMPOSITE_POLICY_REMOVE_SYMBOL},
+ {"remove_weight", rspamd_composite_policy::RSPAMD_COMPOSITE_POLICY_REMOVE_WEIGHT},
+ {"leave", rspamd_composite_policy::RSPAMD_COMPOSITE_POLICY_LEAVE},
+ {"remove_none", rspamd_composite_policy::RSPAMD_COMPOSITE_POLICY_LEAVE},
+ };
+
+ auto found = names.find(inp);
+ if (found != names.end()){
+ return found->second;}
+
+return rspamd_composite_policy::RSPAMD_COMPOSITE_POLICY_UNKNOWN;
+}// namespace rspamd::composites
+
+auto composites_manager::add_composite(std::string_view composite_name, const ucl_object_t *obj, bool silent_duplicate) -> rspamd_composite *
+{
+
+ const auto *val = ucl_object_lookup(obj, "enabled");
+ if (val != nullptr && !ucl_object_toboolean(val)) {
+ msg_info_config("composite %s is disabled", composite_name.data());
+ return nullptr;
+ }
+
+ if (composites.contains(composite_name)) {
+ if (silent_duplicate) {
+ msg_debug_config("composite %s is redefined", composite_name.data());
+ return nullptr;
+ }
+ else {
+ msg_warn_config("composite %s is redefined", composite_name.data());
+ }
+ }
+
+ const char *composite_expression = nullptr;
+ val = ucl_object_lookup(obj, "expression");
+
+ if (val == NULL || !ucl_object_tostring_safe(val, &composite_expression)) {
+ msg_err_config("composite must have an expression defined in %s",
+ composite_name.data());
+ return nullptr;
+ }
+
+ GError *err = nullptr;
+ rspamd_expression *expr = nullptr;
+
+ if (!rspamd_parse_expression(composite_expression, 0, &composite_expr_subr,
+ NULL, cfg->cfg_pool, &err, &expr)) {
+ msg_err_config("cannot parse composite expression for %s: %e",
+ composite_name.data(), err);
+
+ if (err) {
+ g_error_free(err);
+ }
+
+ return nullptr;
+ }
+
+ const auto &composite = new_composite(composite_name, expr, composite_expression);
+
+ auto score = std::isnan(cfg->unknown_weight) ? 0.0 : cfg->unknown_weight;
+ val = ucl_object_lookup(obj, "score");
+
+ if (val != nullptr) {
+ ucl_object_todouble_safe(val, &score);
+ }
+
+ /* Also set score in the metric */
+ const auto *group = "composite";
+ val = ucl_object_lookup(obj, "group");
+ if (val != nullptr) {
+ group = ucl_object_tostring(val);
+ }
+
+ const auto *description = composite_expression;
+ val = ucl_object_lookup(obj, "description");
+ if (val != nullptr) {
+ description = ucl_object_tostring(val);
+ }
+
+ rspamd_config_add_symbol(cfg, composite_name.data(), score,
+ description, group,
+ 0,
+ ucl_object_get_priority(obj), /* No +1 as it is default... */
+ 1);
+
+ const auto *elt = ucl_object_lookup(obj, "groups");
+ if (elt && ucl_object_type(elt) == UCL_ARRAY) {
+ const ucl_object_t *cur_gr;
+ auto *gr_it = ucl_object_iterate_new(elt);
+
+ while ((cur_gr = ucl_object_iterate_safe(gr_it, true)) != nullptr) {
+ rspamd_config_add_symbol_group(cfg, composite_name.data(),
+ ucl_object_tostring(cur_gr));
+ }
+
+ ucl_object_iterate_free(gr_it);
+ }
+
+ val = ucl_object_lookup(obj, "policy");
+ if (val) {
+ composite->policy = composite_policy_from_str(ucl_object_tostring(val));
+
+ if (composite->policy == rspamd_composite_policy::RSPAMD_COMPOSITE_POLICY_UNKNOWN) {
+ msg_err_config("composite %s has incorrect policy", composite_name.data());
+ return nullptr;
+ }
+ }
+
+ return composite.get();
+}
+
+auto composites_manager::add_composite(std::string_view composite_name,
+ std::string_view composite_expression,
+ bool silent_duplicate, double score) -> rspamd_composite *
+{
+ GError *err = nullptr;
+ rspamd_expression *expr = nullptr;
+
+ if (composites.contains(composite_name)) {
+ /* Duplicate composite - refuse to add */
+ if (silent_duplicate) {
+ msg_debug_config("composite %s is redefined", composite_name.data());
+ return nullptr;
+ }
+ else {
+ msg_warn_config("composite %s is redefined", composite_name.data());
+ }
+ }
+
+ if (!rspamd_parse_expression(composite_expression.data(),
+ composite_expression.size(), &composite_expr_subr,
+ nullptr, cfg->cfg_pool, &err, &expr)) {
+ msg_err_config("cannot parse composite expression for %s: %e",
+ composite_name.data(), err);
+
+ if (err) {
+ g_error_free(err);
+ }
+
+ return nullptr;
+ }
+
+ auto final_score = std::isnan(score) ? (std::isnan(cfg->unknown_weight) ? 0.0 : cfg->unknown_weight) : score;
+ rspamd_config_add_symbol(cfg, composite_name.data(), final_score,
+ composite_name.data(), "composite",
+ 0,
+ 0,
+ 1);
+
+ return new_composite(composite_name, expr, composite_expression).get();
+}
+
+struct map_cbdata {
+ composites_manager *cm;
+ struct rspamd_config *cfg;
+ std::string buf;
+
+ explicit map_cbdata(struct rspamd_config *cfg)
+ : cfg(cfg)
+ {
+ cm = COMPOSITE_MANAGER_FROM_PTR(cfg->composites_manager);
+ }
+
+ static char *map_read(char *chunk, int len,
+ struct map_cb_data *data,
+ gboolean _final)
+ {
+
+ if (data->cur_data == nullptr) {
+ data->cur_data = data->prev_data;
+ reinterpret_cast<map_cbdata *>(data->cur_data)->buf.clear();
+ }
+
+ auto *cbd = reinterpret_cast<map_cbdata *>(data->cur_data);
+
+ cbd->buf.append(chunk, len);
+ return nullptr;
+ }
+
+ static void
+ map_fin(struct map_cb_data *data, void **target)
+ {
+ auto *cbd = reinterpret_cast<map_cbdata *>(data->cur_data);
+
+ if (data->errored) {
+ if (cbd) {
+ cbd->buf.clear();
+ }
+ }
+ else if (cbd != nullptr) {
+ if (target) {
+ *target = data->cur_data;
+ }
+
+ rspamd::string_foreach_line(cbd->buf, [&](std::string_view line) {
+ auto [name_and_score, expr] = rspamd::string_split_on(line, ' ');
+ auto [name, score] = rspamd::string_split_on(name_and_score, ':');
+
+ if (!score.empty()) {
+ /* I wish it was supported properly */
+ //auto conv_res = std::from_chars(value->data(), value->size(), num);
+ char numbuf[128], *endptr = nullptr;
+ rspamd_strlcpy(numbuf, score.data(), MIN(score.size(), sizeof(numbuf)));
+ auto num = g_ascii_strtod(numbuf, &endptr);
+
+ if (fabs(num) >= G_MAXFLOAT || std::isnan(num)) {
+ msg_err("invalid score for %*s", (int) name_and_score.size(), name_and_score.data());
+ return;
+ }
+
+ auto ret = cbd->cm->add_composite(name, expr, true, num);
+
+ if (ret == nullptr) {
+ msg_err("cannot add composite %*s", (int) name_and_score.size(), name_and_score.data());
+ return;
+ }
+ }
+ else {
+ msg_err("missing score for %*s", (int) name_and_score.size(), name_and_score.data());
+ return;
+ }
+ });
+ }
+ else {
+ msg_err("no data read for composites map");
+ }
+ }
+
+ static void
+ map_dtor(struct map_cb_data *data)
+ {
+ auto *cbd = reinterpret_cast<map_cbdata *>(data->cur_data);
+ delete cbd;
+ }
+};
+}
+
+
+void *
+rspamd_composites_manager_create(struct rspamd_config *cfg)
+{
+ auto *cm = new rspamd::composites::composites_manager(cfg);
+
+ return reinterpret_cast<void *>(cm);
+}
+
+
+gsize rspamd_composites_manager_nelts(void *ptr)
+{
+ return COMPOSITE_MANAGER_FROM_PTR(ptr)->size();
+}
+
+void *
+rspamd_composites_manager_add_from_ucl(void *cm, const char *sym, const ucl_object_t *obj)
+{
+ return reinterpret_cast<void *>(COMPOSITE_MANAGER_FROM_PTR(cm)->add_composite(sym, obj, false));
+}
+
+void *
+rspamd_composites_manager_add_from_string(void *cm, const char *sym, const char *expr)
+{
+ return reinterpret_cast<void *>(COMPOSITE_MANAGER_FROM_PTR(cm)->add_composite(sym, expr, false));
+}
+
+void *
+rspamd_composites_manager_add_from_ucl_silent(void *cm, const char *sym, const ucl_object_t *obj)
+{
+ return reinterpret_cast<void *>(COMPOSITE_MANAGER_FROM_PTR(cm)->add_composite(sym, obj, true));
+}
+
+void *
+rspamd_composites_manager_add_from_string_silent(void *cm, const char *sym, const char *expr)
+{
+ return reinterpret_cast<void *>(COMPOSITE_MANAGER_FROM_PTR(cm)->add_composite(sym, expr, true));
+}
+
+
+bool rspamd_composites_add_map_handlers(const ucl_object_t *obj, struct rspamd_config *cfg)
+{
+ auto **pcbdata = rspamd_mempool_alloc_type(cfg->cfg_pool, rspamd::composites::map_cbdata *);
+ auto *cbdata = new rspamd::composites::map_cbdata{cfg};
+ *pcbdata = cbdata;
+
+ if (struct rspamd_map * m; (m = rspamd_map_add_from_ucl(cfg, obj, "composites map",
+ rspamd::composites::map_cbdata::map_read, rspamd::composites::map_cbdata::map_fin,
+ rspamd::composites::map_cbdata::map_dtor, (void **) pcbdata,
+ nullptr, RSPAMD_MAP_DEFAULT)) == nullptr) {
+ msg_err_config("cannot load composites map from %s", ucl_object_key(obj));
+ return false;
+ }
+
+ return true;
+} \ No newline at end of file
diff --git a/src/libserver/css/CMakeLists.txt b/src/libserver/css/CMakeLists.txt
new file mode 100644
index 0000000..c0c9d51
--- /dev/null
+++ b/src/libserver/css/CMakeLists.txt
@@ -0,0 +1,9 @@
+SET(LIBCSSSRC "${CMAKE_CURRENT_SOURCE_DIR}/css.cxx"
+ "${CMAKE_CURRENT_SOURCE_DIR}/css_property.cxx"
+ "${CMAKE_CURRENT_SOURCE_DIR}/css_value.cxx"
+ "${CMAKE_CURRENT_SOURCE_DIR}/css_selector.cxx"
+ "${CMAKE_CURRENT_SOURCE_DIR}/css_tokeniser.cxx"
+ "${CMAKE_CURRENT_SOURCE_DIR}/css_util.cxx"
+ "${CMAKE_CURRENT_SOURCE_DIR}/css_rule.cxx"
+ "${CMAKE_CURRENT_SOURCE_DIR}/css_parser.cxx"
+ PARENT_SCOPE) \ No newline at end of file
diff --git a/src/libserver/css/css.cxx b/src/libserver/css/css.cxx
new file mode 100644
index 0000000..1b369ed
--- /dev/null
+++ b/src/libserver/css/css.cxx
@@ -0,0 +1,227 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "css.hxx"
+#include "contrib/ankerl/unordered_dense.h"
+#include "css_parser.hxx"
+#include "libserver/html/html_tag.hxx"
+#include "libserver/html/html_block.hxx"
+
+/* Keep unit tests implementation here (it'll possibly be moved outside one day) */
+#define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL
+#define DOCTEST_CONFIG_IMPLEMENT
+#include "doctest/doctest.h"
+
+namespace rspamd::css {
+
+INIT_LOG_MODULE_PUBLIC(css);
+
+class css_style_sheet::impl {
+public:
+ using sel_shared_hash = smart_ptr_hash<css_selector>;
+ using sel_shared_eq = smart_ptr_equal<css_selector>;
+ using selector_ptr = std::unique_ptr<css_selector>;
+ using selectors_hash = ankerl::unordered_dense::map<selector_ptr, css_declarations_block_ptr,
+ sel_shared_hash, sel_shared_eq>;
+ using universal_selector_t = std::pair<selector_ptr, css_declarations_block_ptr>;
+ selectors_hash tags_selector;
+ selectors_hash class_selectors;
+ selectors_hash id_selectors;
+ std::optional<universal_selector_t> universal_selector;
+};
+
+css_style_sheet::css_style_sheet(rspamd_mempool_t *pool)
+ : pool(pool), pimpl(new impl)
+{
+}
+css_style_sheet::~css_style_sheet()
+{
+}
+
+auto css_style_sheet::add_selector_rule(std::unique_ptr<css_selector> &&selector,
+ css_declarations_block_ptr decls) -> void
+{
+ impl::selectors_hash *target_hash = nullptr;
+
+ switch (selector->type) {
+ case css_selector::selector_type::SELECTOR_ALL:
+ if (pimpl->universal_selector) {
+ /* Another universal selector */
+ msg_debug_css("redefined universal selector, merging rules");
+ pimpl->universal_selector->second->merge_block(*decls);
+ }
+ else {
+ msg_debug_css("added universal selector");
+ pimpl->universal_selector = std::make_pair(std::move(selector),
+ decls);
+ }
+ break;
+ case css_selector::selector_type::SELECTOR_CLASS:
+ target_hash = &pimpl->class_selectors;
+ break;
+ case css_selector::selector_type::SELECTOR_ID:
+ target_hash = &pimpl->id_selectors;
+ break;
+ case css_selector::selector_type::SELECTOR_TAG:
+ target_hash = &pimpl->tags_selector;
+ break;
+ }
+
+ if (target_hash) {
+ auto found_it = target_hash->find(selector);
+
+ if (found_it == target_hash->end()) {
+ /* Easy case, new element */
+ target_hash->insert({std::move(selector), decls});
+ }
+ else {
+ /* The problem with merging is actually in how to handle selectors chains
+ * For example, we have 2 selectors:
+ * 1. class id tag -> meaning that we first match class, then we ensure that
+ * id is also the same and finally we check the tag
+ * 2. tag class id -> it means that we check first tag, then class and then id
+ * So we have somehow equal path in the xpath terms.
+ * I suppose now, that we merely check parent stuff and handle duplicates
+ * merging when finally resolving paths.
+ */
+ auto sel_str = selector->to_string().value_or("unknown");
+ msg_debug_css("found duplicate selector: %*s", (int) sel_str.size(),
+ sel_str.data());
+ found_it->second->merge_block(*decls);
+ }
+ }
+}
+
+auto css_style_sheet::check_tag_block(const rspamd::html::html_tag *tag) -> rspamd::html::html_block *
+{
+ std::optional<std::string_view> id_comp, class_comp;
+ rspamd::html::html_block *res = nullptr;
+
+ if (!tag) {
+ return nullptr;
+ }
+
+ /* First, find id in a tag and a class */
+ for (const auto &param: tag->components) {
+ if (param.type == html::html_component_type::RSPAMD_HTML_COMPONENT_ID) {
+ id_comp = param.value;
+ }
+ else if (param.type == html::html_component_type::RSPAMD_HTML_COMPONENT_CLASS) {
+ class_comp = param.value;
+ }
+ }
+
+ /* ID part */
+ if (id_comp && !pimpl->id_selectors.empty()) {
+ auto found_id_sel = pimpl->id_selectors.find(css_selector{id_comp.value()});
+
+ if (found_id_sel != pimpl->id_selectors.end()) {
+ const auto &decl = *(found_id_sel->second);
+ res = decl.compile_to_block(pool);
+ }
+ }
+
+ /* Class part */
+ if (class_comp && !pimpl->class_selectors.empty()) {
+ auto sv_split = [](auto strv, std::string_view delims = " ") -> std::vector<std::string_view> {
+ std::vector<decltype(strv)> ret;
+ std::size_t start = 0;
+
+ while (start < strv.size()) {
+ const auto last = strv.find_first_of(delims, start);
+ if (start != last) {
+ ret.emplace_back(strv.substr(start, last - start));
+ }
+
+ if (last == std::string_view::npos) {
+ break;
+ }
+
+ start = last + 1;
+ }
+
+ return ret;
+ };
+
+ auto elts = sv_split(class_comp.value());
+
+ for (const auto &e: elts) {
+ auto found_class_sel = pimpl->class_selectors.find(
+ css_selector{e, css_selector::selector_type::SELECTOR_CLASS});
+
+ if (found_class_sel != pimpl->class_selectors.end()) {
+ const auto &decl = *(found_class_sel->second);
+ auto *tmp = decl.compile_to_block(pool);
+
+ if (res == nullptr) {
+ res = tmp;
+ }
+ else {
+ res->propagate_block(*tmp);
+ }
+ }
+ }
+ }
+
+ /* Tags part */
+ if (!pimpl->tags_selector.empty()) {
+ auto found_tag_sel = pimpl->tags_selector.find(
+ css_selector{static_cast<tag_id_t>(tag->id)});
+
+ if (found_tag_sel != pimpl->tags_selector.end()) {
+ const auto &decl = *(found_tag_sel->second);
+ auto *tmp = decl.compile_to_block(pool);
+
+ if (res == nullptr) {
+ res = tmp;
+ }
+ else {
+ res->propagate_block(*tmp);
+ }
+ }
+ }
+
+ /* Finally, universal selector */
+ if (pimpl->universal_selector) {
+ auto *tmp = pimpl->universal_selector->second->compile_to_block(pool);
+
+ if (res == nullptr) {
+ res = tmp;
+ }
+ else {
+ res->propagate_block(*tmp);
+ }
+ }
+
+ return res;
+}
+
+auto css_parse_style(rspamd_mempool_t *pool,
+ std::string_view input,
+ std::shared_ptr<css_style_sheet> &&existing)
+ -> css_return_pair
+{
+ auto parse_res = rspamd::css::parse_css(pool, input,
+ std::forward<std::shared_ptr<css_style_sheet>>(existing));
+
+ if (parse_res.has_value()) {
+ return std::make_pair(parse_res.value(), css_parse_error());
+ }
+
+ return std::make_pair(nullptr, parse_res.error());
+}
+
+}// namespace rspamd::css \ No newline at end of file
diff --git a/src/libserver/css/css.hxx b/src/libserver/css/css.hxx
new file mode 100644
index 0000000..f0f8120
--- /dev/null
+++ b/src/libserver/css/css.hxx
@@ -0,0 +1,68 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#ifndef RSPAMD_CSS_HXX
+#define RSPAMD_CSS_HXX
+
+#include <string>
+#include <memory>
+#include "logger.h"
+#include "css_rule.hxx"
+#include "css_selector.hxx"
+
+namespace rspamd::html {
+/* Forward declaration */
+struct html_tag;
+struct html_block;
+}// namespace rspamd::html
+
+namespace rspamd::css {
+
+extern int rspamd_css_log_id;
+
+#define msg_debug_css(...) rspamd_conditional_debug_fast(NULL, NULL, \
+ rspamd_css_log_id, "css", pool->tag.uid, \
+ __FUNCTION__, \
+ __VA_ARGS__)
+#define msg_err_css(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL, \
+ "css", pool->tag.uid, \
+ __FUNCTION__, \
+ __VA_ARGS__)
+
+class css_style_sheet {
+public:
+ css_style_sheet(rspamd_mempool_t *pool);
+ ~css_style_sheet(); /* must be declared separately due to pimpl */
+ auto add_selector_rule(std::unique_ptr<css_selector> &&selector,
+ css_declarations_block_ptr decls) -> void;
+
+ auto check_tag_block(const rspamd::html::html_tag *tag) -> rspamd::html::html_block *;
+
+private:
+ class impl;
+ rspamd_mempool_t *pool;
+ std::unique_ptr<impl> pimpl;
+};
+
+using css_return_pair = std::pair<std::shared_ptr<css_style_sheet>, css_parse_error>;
+auto css_parse_style(rspamd_mempool_t *pool,
+ std::string_view input,
+ std::shared_ptr<css_style_sheet> &&existing) -> css_return_pair;
+
+}// namespace rspamd::css
+
+#endif//RSPAMD_CSS_H \ No newline at end of file
diff --git a/src/libserver/css/css_colors_list.hxx b/src/libserver/css/css_colors_list.hxx
new file mode 100644
index 0000000..6dfe54f
--- /dev/null
+++ b/src/libserver/css/css_colors_list.hxx
@@ -0,0 +1,738 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef RSPAMD_CSS_COLORS_LIST_HXX
+#define RSPAMD_CSS_COLORS_LIST_HXX
+
+#pragma once
+
+#include <string_view>
+#include "contrib/ankerl/unordered_dense.h"
+#include "css_value.hxx"
+
+namespace rspamd::css {
+
+/*
+ * List of all colors, intended to use with hashes/sets
+ * TODO: think about frozen structs when we can deal with 700 values without
+ * compiler limits...
+ */
+static const ankerl::unordered_dense::map<std::string_view, css_color> css_colors_map{
+ {"aliceblue", {240, 248, 255}},
+ {"antiquewhite", {250, 235, 215}},
+ {"antiquewhite1", {255, 239, 219}},
+ {"antiquewhite2", {238, 223, 204}},
+ {"antiquewhite3", {205, 192, 176}},
+ {"antiquewhite4", {139, 131, 120}},
+ {"aqua", {0, 255, 255}},
+ {"aquamarine", {127, 255, 212}},
+ {"aquamarine1", {127, 255, 212}},
+ {"aquamarine2", {118, 238, 198}},
+ {"aquamarine3", {102, 205, 170}},
+ {"aquamarine4", {69, 139, 116}},
+ {"azure", {240, 255, 255}},
+ {"azure1", {240, 255, 255}},
+ {"azure2", {224, 238, 238}},
+ {"azure3", {193, 205, 205}},
+ {"azure4", {131, 139, 139}},
+ {"beige", {245, 245, 220}},
+ {"bisque", {255, 228, 196}},
+ {"bisque1", {255, 228, 196}},
+ {"bisque2", {238, 213, 183}},
+ {"bisque3", {205, 183, 158}},
+ {"bisque4", {139, 125, 107}},
+ {"black", {0, 0, 0}},
+ {"blanchedalmond", {255, 235, 205}},
+ {"blue", {0, 0, 255}},
+ {"blue1", {0, 0, 255}},
+ {"blue2", {0, 0, 238}},
+ {"blue3", {0, 0, 205}},
+ {"blue4", {0, 0, 139}},
+ {"blueviolet", {138, 43, 226}},
+ {"brown", {165, 42, 42}},
+ {"brown1", {255, 64, 64}},
+ {"brown2", {238, 59, 59}},
+ {"brown3", {205, 51, 51}},
+ {"brown4", {139, 35, 35}},
+ {"burlywood", {222, 184, 135}},
+ {"burlywood1", {255, 211, 155}},
+ {"burlywood2", {238, 197, 145}},
+ {"burlywood3", {205, 170, 125}},
+ {"burlywood4", {139, 115, 85}},
+ {"cadetblue", {95, 158, 160}},
+ {"cadetblue1", {152, 245, 255}},
+ {"cadetblue2", {142, 229, 238}},
+ {"cadetblue3", {122, 197, 205}},
+ {"cadetblue4", {83, 134, 139}},
+ {"chartreuse", {127, 255, 0}},
+ {"chartreuse1", {127, 255, 0}},
+ {"chartreuse2", {118, 238, 0}},
+ {"chartreuse3", {102, 205, 0}},
+ {"chartreuse4", {69, 139, 0}},
+ {"chocolate", {210, 105, 30}},
+ {"chocolate1", {255, 127, 36}},
+ {"chocolate2", {238, 118, 33}},
+ {"chocolate3", {205, 102, 29}},
+ {"chocolate4", {139, 69, 19}},
+ {"coral", {255, 127, 80}},
+ {"coral1", {255, 114, 86}},
+ {"coral2", {238, 106, 80}},
+ {"coral3", {205, 91, 69}},
+ {"coral4", {139, 62, 47}},
+ {"cornflowerblue", {100, 149, 237}},
+ {"cornsilk", {255, 248, 220}},
+ {"cornsilk1", {255, 248, 220}},
+ {"cornsilk2", {238, 232, 205}},
+ {"cornsilk3", {205, 200, 177}},
+ {"cornsilk4", {139, 136, 120}},
+ {"crimson", {220, 20, 60}},
+ {"cyan", {0, 255, 255}},
+ {"cyan1", {0, 255, 255}},
+ {"cyan2", {0, 238, 238}},
+ {"cyan3", {0, 205, 205}},
+ {"cyan4", {0, 139, 139}},
+ {"darkblue", {0, 0, 139}},
+ {"darkcyan", {0, 139, 139}},
+ {"darkgoldenrod", {184, 134, 11}},
+ {"darkgoldenrod1", {255, 185, 15}},
+ {"darkgoldenrod2", {238, 173, 14}},
+ {"darkgoldenrod3", {205, 149, 12}},
+ {"darkgoldenrod4", {139, 101, 8}},
+ {"darkgray", {169, 169, 169}},
+ {"darkgreen", {0, 100, 0}},
+ {"darkgrey", {169, 169, 169}},
+ {"darkkhaki", {189, 183, 107}},
+ {"darkmagenta", {139, 0, 139}},
+ {"darkolivegreen", {85, 107, 47}},
+ {"darkolivegreen1", {202, 255, 112}},
+ {"darkolivegreen2", {188, 238, 104}},
+ {"darkolivegreen3", {162, 205, 90}},
+ {"darkolivegreen4", {110, 139, 61}},
+ {"darkorange", {255, 140, 0}},
+ {"darkorange1", {255, 127, 0}},
+ {"darkorange2", {238, 118, 0}},
+ {"darkorange3", {205, 102, 0}},
+ {"darkorange4", {139, 69, 0}},
+ {"darkorchid", {153, 50, 204}},
+ {"darkorchid1", {191, 62, 255}},
+ {"darkorchid2", {178, 58, 238}},
+ {"darkorchid3", {154, 50, 205}},
+ {"darkorchid4", {104, 34, 139}},
+ {"darkred", {139, 0, 0}},
+ {"darksalmon", {233, 150, 122}},
+ {"darkseagreen", {143, 188, 143}},
+ {"darkseagreen1", {193, 255, 193}},
+ {"darkseagreen2", {180, 238, 180}},
+ {"darkseagreen3", {155, 205, 155}},
+ {"darkseagreen4", {105, 139, 105}},
+ {"darkslateblue", {72, 61, 139}},
+ {"darkslategray", {47, 79, 79}},
+ {"darkslategray1", {151, 255, 255}},
+ {"darkslategray2", {141, 238, 238}},
+ {"darkslategray3", {121, 205, 205}},
+ {"darkslategray4", {82, 139, 139}},
+ {"darkslategrey", {47, 79, 79}},
+ {"darkturquoise", {0, 206, 209}},
+ {"darkviolet", {148, 0, 211}},
+ {"deeppink", {255, 20, 147}},
+ {"deeppink1", {255, 20, 147}},
+ {"deeppink2", {238, 18, 137}},
+ {"deeppink3", {205, 16, 118}},
+ {"deeppink4", {139, 10, 80}},
+ {"deepskyblue", {0, 191, 255}},
+ {"deepskyblue1", {0, 191, 255}},
+ {"deepskyblue2", {0, 178, 238}},
+ {"deepskyblue3", {0, 154, 205}},
+ {"deepskyblue4", {0, 104, 139}},
+ {"dimgray", {105, 105, 105}},
+ {"dimgrey", {105, 105, 105}},
+ {"dodgerblue", {30, 144, 255}},
+ {"dodgerblue1", {30, 144, 255}},
+ {"dodgerblue2", {28, 134, 238}},
+ {"dodgerblue3", {24, 116, 205}},
+ {"dodgerblue4", {16, 78, 139}},
+ {"firebrick", {178, 34, 34}},
+ {"firebrick1", {255, 48, 48}},
+ {"firebrick2", {238, 44, 44}},
+ {"firebrick3", {205, 38, 38}},
+ {"firebrick4", {139, 26, 26}},
+ {"floralwhite", {255, 250, 240}},
+ {"forestgreen", {34, 139, 34}},
+ {"fuchsia", {255, 0, 255}},
+ {"gainsboro", {220, 220, 220}},
+ {"ghostwhite", {248, 248, 255}},
+ {"gold", {255, 215, 0}},
+ {"gold1", {255, 215, 0}},
+ {"gold2", {238, 201, 0}},
+ {"gold3", {205, 173, 0}},
+ {"gold4", {139, 117, 0}},
+ {"goldenrod", {218, 165, 32}},
+ {"goldenrod1", {255, 193, 37}},
+ {"goldenrod2", {238, 180, 34}},
+ {"goldenrod3", {205, 155, 29}},
+ {"goldenrod4", {139, 105, 20}},
+ {"gray", {190, 190, 190}},
+ {"gray0", {0, 0, 0}},
+ {"gray1", {3, 3, 3}},
+ {"gray10", {26, 26, 26}},
+ {"gray100", {255, 255, 255}},
+ {"gray11", {28, 28, 28}},
+ {"gray12", {31, 31, 31}},
+ {"gray13", {33, 33, 33}},
+ {"gray14", {36, 36, 36}},
+ {"gray15", {38, 38, 38}},
+ {"gray16", {41, 41, 41}},
+ {"gray17", {43, 43, 43}},
+ {"gray18", {46, 46, 46}},
+ {"gray19", {48, 48, 48}},
+ {"gray2", {5, 5, 5}},
+ {"gray20", {51, 51, 51}},
+ {"gray21", {54, 54, 54}},
+ {"gray22", {56, 56, 56}},
+ {"gray23", {59, 59, 59}},
+ {"gray24", {61, 61, 61}},
+ {"gray25", {64, 64, 64}},
+ {"gray26", {66, 66, 66}},
+ {"gray27", {69, 69, 69}},
+ {"gray28", {71, 71, 71}},
+ {"gray29", {74, 74, 74}},
+ {"gray3", {8, 8, 8}},
+ {"gray30", {77, 77, 77}},
+ {"gray31", {79, 79, 79}},
+ {"gray32", {82, 82, 82}},
+ {"gray33", {84, 84, 84}},
+ {"gray34", {87, 87, 87}},
+ {"gray35", {89, 89, 89}},
+ {"gray36", {92, 92, 92}},
+ {"gray37", {94, 94, 94}},
+ {"gray38", {97, 97, 97}},
+ {"gray39", {99, 99, 99}},
+ {"gray4", {10, 10, 10}},
+ {"gray40", {102, 102, 102}},
+ {"gray41", {105, 105, 105}},
+ {"gray42", {107, 107, 107}},
+ {"gray43", {110, 110, 110}},
+ {"gray44", {112, 112, 112}},
+ {"gray45", {115, 115, 115}},
+ {"gray46", {117, 117, 117}},
+ {"gray47", {120, 120, 120}},
+ {"gray48", {122, 122, 122}},
+ {"gray49", {125, 125, 125}},
+ {"gray5", {13, 13, 13}},
+ {"gray50", {127, 127, 127}},
+ {"gray51", {130, 130, 130}},
+ {"gray52", {133, 133, 133}},
+ {"gray53", {135, 135, 135}},
+ {"gray54", {138, 138, 138}},
+ {"gray55", {140, 140, 140}},
+ {"gray56", {143, 143, 143}},
+ {"gray57", {145, 145, 145}},
+ {"gray58", {148, 148, 148}},
+ {"gray59", {150, 150, 150}},
+ {"gray6", {15, 15, 15}},
+ {"gray60", {153, 153, 153}},
+ {"gray61", {156, 156, 156}},
+ {"gray62", {158, 158, 158}},
+ {"gray63", {161, 161, 161}},
+ {"gray64", {163, 163, 163}},
+ {"gray65", {166, 166, 166}},
+ {"gray66", {168, 168, 168}},
+ {"gray67", {171, 171, 171}},
+ {"gray68", {173, 173, 173}},
+ {"gray69", {176, 176, 176}},
+ {"gray7", {18, 18, 18}},
+ {"gray70", {179, 179, 179}},
+ {"gray71", {181, 181, 181}},
+ {"gray72", {184, 184, 184}},
+ {"gray73", {186, 186, 186}},
+ {"gray74", {189, 189, 189}},
+ {"gray75", {191, 191, 191}},
+ {"gray76", {194, 194, 194}},
+ {"gray77", {196, 196, 196}},
+ {"gray78", {199, 199, 199}},
+ {"gray79", {201, 201, 201}},
+ {"gray8", {20, 20, 20}},
+ {"gray80", {204, 204, 204}},
+ {"gray81", {207, 207, 207}},
+ {"gray82", {209, 209, 209}},
+ {"gray83", {212, 212, 212}},
+ {"gray84", {214, 214, 214}},
+ {"gray85", {217, 217, 217}},
+ {"gray86", {219, 219, 219}},
+ {"gray87", {222, 222, 222}},
+ {"gray88", {224, 224, 224}},
+ {"gray89", {227, 227, 227}},
+ {"gray9", {23, 23, 23}},
+ {"gray90", {229, 229, 229}},
+ {"gray91", {232, 232, 232}},
+ {"gray92", {235, 235, 235}},
+ {"gray93", {237, 237, 237}},
+ {"gray94", {240, 240, 240}},
+ {"gray95", {242, 242, 242}},
+ {"gray96", {245, 245, 245}},
+ {"gray97", {247, 247, 247}},
+ {"gray98", {250, 250, 250}},
+ {"gray99", {252, 252, 252}},
+ {"green", {0, 255, 0}},
+ {"green1", {0, 255, 0}},
+ {"green2", {0, 238, 0}},
+ {"green3", {0, 205, 0}},
+ {"green4", {0, 139, 0}},
+ {"greenyellow", {173, 255, 47}},
+ {"grey", {190, 190, 190}},
+ {"grey0", {0, 0, 0}},
+ {"grey1", {3, 3, 3}},
+ {"grey10", {26, 26, 26}},
+ {"grey100", {255, 255, 255}},
+ {"grey11", {28, 28, 28}},
+ {"grey12", {31, 31, 31}},
+ {"grey13", {33, 33, 33}},
+ {"grey14", {36, 36, 36}},
+ {"grey15", {38, 38, 38}},
+ {"grey16", {41, 41, 41}},
+ {"grey17", {43, 43, 43}},
+ {"grey18", {46, 46, 46}},
+ {"grey19", {48, 48, 48}},
+ {"grey2", {5, 5, 5}},
+ {"grey20", {51, 51, 51}},
+ {"grey21", {54, 54, 54}},
+ {"grey22", {56, 56, 56}},
+ {"grey23", {59, 59, 59}},
+ {"grey24", {61, 61, 61}},
+ {"grey25", {64, 64, 64}},
+ {"grey26", {66, 66, 66}},
+ {"grey27", {69, 69, 69}},
+ {"grey28", {71, 71, 71}},
+ {"grey29", {74, 74, 74}},
+ {"grey3", {8, 8, 8}},
+ {"grey30", {77, 77, 77}},
+ {"grey31", {79, 79, 79}},
+ {"grey32", {82, 82, 82}},
+ {"grey33", {84, 84, 84}},
+ {"grey34", {87, 87, 87}},
+ {"grey35", {89, 89, 89}},
+ {"grey36", {92, 92, 92}},
+ {"grey37", {94, 94, 94}},
+ {"grey38", {97, 97, 97}},
+ {"grey39", {99, 99, 99}},
+ {"grey4", {10, 10, 10}},
+ {"grey40", {102, 102, 102}},
+ {"grey41", {105, 105, 105}},
+ {"grey42", {107, 107, 107}},
+ {"grey43", {110, 110, 110}},
+ {"grey44", {112, 112, 112}},
+ {"grey45", {115, 115, 115}},
+ {"grey46", {117, 117, 117}},
+ {"grey47", {120, 120, 120}},
+ {"grey48", {122, 122, 122}},
+ {"grey49", {125, 125, 125}},
+ {"grey5", {13, 13, 13}},
+ {"grey50", {127, 127, 127}},
+ {"grey51", {130, 130, 130}},
+ {"grey52", {133, 133, 133}},
+ {"grey53", {135, 135, 135}},
+ {"grey54", {138, 138, 138}},
+ {"grey55", {140, 140, 140}},
+ {"grey56", {143, 143, 143}},
+ {"grey57", {145, 145, 145}},
+ {"grey58", {148, 148, 148}},
+ {"grey59", {150, 150, 150}},
+ {"grey6", {15, 15, 15}},
+ {"grey60", {153, 153, 153}},
+ {"grey61", {156, 156, 156}},
+ {"grey62", {158, 158, 158}},
+ {"grey63", {161, 161, 161}},
+ {"grey64", {163, 163, 163}},
+ {"grey65", {166, 166, 166}},
+ {"grey66", {168, 168, 168}},
+ {"grey67", {171, 171, 171}},
+ {"grey68", {173, 173, 173}},
+ {"grey69", {176, 176, 176}},
+ {"grey7", {18, 18, 18}},
+ {"grey70", {179, 179, 179}},
+ {"grey71", {181, 181, 181}},
+ {"grey72", {184, 184, 184}},
+ {"grey73", {186, 186, 186}},
+ {"grey74", {189, 189, 189}},
+ {"grey75", {191, 191, 191}},
+ {"grey76", {194, 194, 194}},
+ {"grey77", {196, 196, 196}},
+ {"grey78", {199, 199, 199}},
+ {"grey79", {201, 201, 201}},
+ {"grey8", {20, 20, 20}},
+ {"grey80", {204, 204, 204}},
+ {"grey81", {207, 207, 207}},
+ {"grey82", {209, 209, 209}},
+ {"grey83", {212, 212, 212}},
+ {"grey84", {214, 214, 214}},
+ {"grey85", {217, 217, 217}},
+ {"grey86", {219, 219, 219}},
+ {"grey87", {222, 222, 222}},
+ {"grey88", {224, 224, 224}},
+ {"grey89", {227, 227, 227}},
+ {"grey9", {23, 23, 23}},
+ {"grey90", {229, 229, 229}},
+ {"grey91", {232, 232, 232}},
+ {"grey92", {235, 235, 235}},
+ {"grey93", {237, 237, 237}},
+ {"grey94", {240, 240, 240}},
+ {"grey95", {242, 242, 242}},
+ {"grey96", {245, 245, 245}},
+ {"grey97", {247, 247, 247}},
+ {"grey98", {250, 250, 250}},
+ {"grey99", {252, 252, 252}},
+ {"honeydew", {240, 255, 240}},
+ {"honeydew1", {240, 255, 240}},
+ {"honeydew2", {224, 238, 224}},
+ {"honeydew3", {193, 205, 193}},
+ {"honeydew4", {131, 139, 131}},
+ {"hotpink", {255, 105, 180}},
+ {"hotpink1", {255, 110, 180}},
+ {"hotpink2", {238, 106, 167}},
+ {"hotpink3", {205, 96, 144}},
+ {"hotpink4", {139, 58, 98}},
+ {"indianred", {205, 92, 92}},
+ {"indianred1", {255, 106, 106}},
+ {"indianred2", {238, 99, 99}},
+ {"indianred3", {205, 85, 85}},
+ {"indianred4", {139, 58, 58}},
+ {"indigo", {75, 0, 130}},
+ {"ivory", {255, 255, 240}},
+ {"ivory1", {255, 255, 240}},
+ {"ivory2", {238, 238, 224}},
+ {"ivory3", {205, 205, 193}},
+ {"ivory4", {139, 139, 131}},
+ {"khaki", {240, 230, 140}},
+ {"khaki1", {255, 246, 143}},
+ {"khaki2", {238, 230, 133}},
+ {"khaki3", {205, 198, 115}},
+ {"khaki4", {139, 134, 78}},
+ {"lavender", {230, 230, 250}},
+ {"lavenderblush", {255, 240, 245}},
+ {"lavenderblush1", {255, 240, 245}},
+ {"lavenderblush2", {238, 224, 229}},
+ {"lavenderblush3", {205, 193, 197}},
+ {"lavenderblush4", {139, 131, 134}},
+ {"lawngreen", {124, 252, 0}},
+ {"lemonchiffon", {255, 250, 205}},
+ {"lemonchiffon1", {255, 250, 205}},
+ {"lemonchiffon2", {238, 233, 191}},
+ {"lemonchiffon3", {205, 201, 165}},
+ {"lemonchiffon4", {139, 137, 112}},
+ {"lightblue", {173, 216, 230}},
+ {"lightblue1", {191, 239, 255}},
+ {"lightblue2", {178, 223, 238}},
+ {"lightblue3", {154, 192, 205}},
+ {"lightblue4", {104, 131, 139}},
+ {"lightcoral", {240, 128, 128}},
+ {"lightcyan", {224, 255, 255}},
+ {"lightcyan1", {224, 255, 255}},
+ {"lightcyan2", {209, 238, 238}},
+ {"lightcyan3", {180, 205, 205}},
+ {"lightcyan4", {122, 139, 139}},
+ {"lightgoldenrod", {238, 221, 130}},
+ {"lightgoldenrod1", {255, 236, 139}},
+ {"lightgoldenrod2", {238, 220, 130}},
+ {"lightgoldenrod3", {205, 190, 112}},
+ {"lightgoldenrod4", {139, 129, 76}},
+ {"lightgoldenrodyellow", {250, 250, 210}},
+ {"lightgray", {211, 211, 211}},
+ {"lightgreen", {144, 238, 144}},
+ {"lightgrey", {211, 211, 211}},
+ {"lightpink", {255, 182, 193}},
+ {"lightpink1", {255, 174, 185}},
+ {"lightpink2", {238, 162, 173}},
+ {"lightpink3", {205, 140, 149}},
+ {"lightpink4", {139, 95, 101}},
+ {"lightsalmon", {255, 160, 122}},
+ {"lightsalmon1", {255, 160, 122}},
+ {"lightsalmon2", {238, 149, 114}},
+ {"lightsalmon3", {205, 129, 98}},
+ {"lightsalmon4", {139, 87, 66}},
+ {"lightseagreen", {32, 178, 170}},
+ {"lightskyblue", {135, 206, 250}},
+ {"lightskyblue1", {176, 226, 255}},
+ {"lightskyblue2", {164, 211, 238}},
+ {"lightskyblue3", {141, 182, 205}},
+ {"lightskyblue4", {96, 123, 139}},
+ {"lightslateblue", {132, 112, 255}},
+ {"lightslategray", {119, 136, 153}},
+ {"lightslategrey", {119, 136, 153}},
+ {"lightsteelblue", {176, 196, 222}},
+ {"lightsteelblue1", {202, 225, 255}},
+ {"lightsteelblue2", {188, 210, 238}},
+ {"lightsteelblue3", {162, 181, 205}},
+ {"lightsteelblue4", {110, 123, 139}},
+ {"lightyellow", {255, 255, 224}},
+ {"lightyellow1", {255, 255, 224}},
+ {"lightyellow2", {238, 238, 209}},
+ {"lightyellow3", {205, 205, 180}},
+ {"lightyellow4", {139, 139, 122}},
+ {"lime", {0, 255, 0}},
+ {"limegreen", {50, 205, 50}},
+ {"linen", {250, 240, 230}},
+ {"magenta", {255, 0, 255}},
+ {"magenta1", {255, 0, 255}},
+ {"magenta2", {238, 0, 238}},
+ {"magenta3", {205, 0, 205}},
+ {"magenta4", {139, 0, 139}},
+ {"maroon", {176, 48, 96}},
+ {"maroon1", {255, 52, 179}},
+ {"maroon2", {238, 48, 167}},
+ {"maroon3", {205, 41, 144}},
+ {"maroon4", {139, 28, 98}},
+ {"mediumaquamarine", {102, 205, 170}},
+ {"mediumblue", {0, 0, 205}},
+ {"mediumorchid", {186, 85, 211}},
+ {"mediumorchid1", {224, 102, 255}},
+ {"mediumorchid2", {209, 95, 238}},
+ {"mediumorchid3", {180, 82, 205}},
+ {"mediumorchid4", {122, 55, 139}},
+ {"mediumpurple", {147, 112, 219}},
+ {"mediumpurple1", {171, 130, 255}},
+ {"mediumpurple2", {159, 121, 238}},
+ {"mediumpurple3", {137, 104, 205}},
+ {"mediumpurple4", {93, 71, 139}},
+ {"mediumseagreen", {60, 179, 113}},
+ {"mediumslateblue", {123, 104, 238}},
+ {"mediumspringgreen", {0, 250, 154}},
+ {"mediumturquoise", {72, 209, 204}},
+ {"mediumvioletred", {199, 21, 133}},
+ {"midnightblue", {25, 25, 112}},
+ {"mintcream", {245, 255, 250}},
+ {"mistyrose", {255, 228, 225}},
+ {"mistyrose1", {255, 228, 225}},
+ {"mistyrose2", {238, 213, 210}},
+ {"mistyrose3", {205, 183, 181}},
+ {"mistyrose4", {139, 125, 123}},
+ {"moccasin", {255, 228, 181}},
+ {"navajowhite", {255, 222, 173}},
+ {"navajowhite1", {255, 222, 173}},
+ {"navajowhite2", {238, 207, 161}},
+ {"navajowhite3", {205, 179, 139}},
+ {"navajowhite4", {139, 121, 94}},
+ {"navy", {0, 0, 128}},
+ {"navyblue", {0, 0, 128}},
+ {"oldlace", {253, 245, 230}},
+ {"olive", {128, 128, 0}},
+ {"olivedrab", {107, 142, 35}},
+ {"olivedrab1", {192, 255, 62}},
+ {"olivedrab2", {179, 238, 58}},
+ {"olivedrab3", {154, 205, 50}},
+ {"olivedrab4", {105, 139, 34}},
+ {"orange", {255, 165, 0}},
+ {"orange1", {255, 165, 0}},
+ {"orange2", {238, 154, 0}},
+ {"orange3", {205, 133, 0}},
+ {"orange4", {139, 90, 0}},
+ {"orangered", {255, 69, 0}},
+ {"orangered1", {255, 69, 0}},
+ {"orangered2", {238, 64, 0}},
+ {"orangered3", {205, 55, 0}},
+ {"orangered4", {139, 37, 0}},
+ {"orchid", {218, 112, 214}},
+ {"orchid1", {255, 131, 250}},
+ {"orchid2", {238, 122, 233}},
+ {"orchid3", {205, 105, 201}},
+ {"orchid4", {139, 71, 137}},
+ {"palegoldenrod", {238, 232, 170}},
+ {"palegreen", {152, 251, 152}},
+ {"palegreen1", {154, 255, 154}},
+ {"palegreen2", {144, 238, 144}},
+ {"palegreen3", {124, 205, 124}},
+ {"palegreen4", {84, 139, 84}},
+ {"paleturquoise", {175, 238, 238}},
+ {"paleturquoise1", {187, 255, 255}},
+ {"paleturquoise2", {174, 238, 238}},
+ {"paleturquoise3", {150, 205, 205}},
+ {"paleturquoise4", {102, 139, 139}},
+ {"palevioletred", {219, 112, 147}},
+ {"palevioletred1", {255, 130, 171}},
+ {"palevioletred2", {238, 121, 159}},
+ {"palevioletred3", {205, 104, 137}},
+ {"palevioletred4", {139, 71, 93}},
+ {"papayawhip", {255, 239, 213}},
+ {"peachpuff", {255, 218, 185}},
+ {"peachpuff1", {255, 218, 185}},
+ {"peachpuff2", {238, 203, 173}},
+ {"peachpuff3", {205, 175, 149}},
+ {"peachpuff4", {139, 119, 101}},
+ {"peru", {205, 133, 63}},
+ {"pink", {255, 192, 203}},
+ {"pink1", {255, 181, 197}},
+ {"pink2", {238, 169, 184}},
+ {"pink3", {205, 145, 158}},
+ {"pink4", {139, 99, 108}},
+ {"plum", {221, 160, 221}},
+ {"plum1", {255, 187, 255}},
+ {"plum2", {238, 174, 238}},
+ {"plum3", {205, 150, 205}},
+ {"plum4", {139, 102, 139}},
+ {"powderblue", {176, 224, 230}},
+ {"purple", {160, 32, 240}},
+ {"purple1", {155, 48, 255}},
+ {"purple2", {145, 44, 238}},
+ {"purple3", {125, 38, 205}},
+ {"purple4", {85, 26, 139}},
+ {"rebeccapurple", {102, 51, 153}},
+ {"red", {255, 0, 0}},
+ {"red1", {255, 0, 0}},
+ {"red2", {238, 0, 0}},
+ {"red3", {205, 0, 0}},
+ {"red4", {139, 0, 0}},
+ {"rosybrown", {188, 143, 143}},
+ {"rosybrown1", {255, 193, 193}},
+ {"rosybrown2", {238, 180, 180}},
+ {"rosybrown3", {205, 155, 155}},
+ {"rosybrown4", {139, 105, 105}},
+ {"royalblue", {65, 105, 225}},
+ {"royalblue1", {72, 118, 255}},
+ {"royalblue2", {67, 110, 238}},
+ {"royalblue3", {58, 95, 205}},
+ {"royalblue4", {39, 64, 139}},
+ {"saddlebrown", {139, 69, 19}},
+ {"salmon", {250, 128, 114}},
+ {"salmon1", {255, 140, 105}},
+ {"salmon2", {238, 130, 98}},
+ {"salmon3", {205, 112, 84}},
+ {"salmon4", {139, 76, 57}},
+ {"sandybrown", {244, 164, 96}},
+ {"seagreen", {46, 139, 87}},
+ {"seagreen1", {84, 255, 159}},
+ {"seagreen2", {78, 238, 148}},
+ {"seagreen3", {67, 205, 128}},
+ {"seagreen4", {46, 139, 87}},
+ {"seashell", {255, 245, 238}},
+ {"seashell1", {255, 245, 238}},
+ {"seashell2", {238, 229, 222}},
+ {"seashell3", {205, 197, 191}},
+ {"seashell4", {139, 134, 130}},
+ {"sienna", {160, 82, 45}},
+ {"sienna1", {255, 130, 71}},
+ {"sienna2", {238, 121, 66}},
+ {"sienna3", {205, 104, 57}},
+ {"sienna4", {139, 71, 38}},
+ {"silver", {192, 192, 192}},
+ {"skyblue", {135, 206, 235}},
+ {"skyblue1", {135, 206, 255}},
+ {"skyblue2", {126, 192, 238}},
+ {"skyblue3", {108, 166, 205}},
+ {"skyblue4", {74, 112, 139}},
+ {"slateblue", {106, 90, 205}},
+ {"slateblue1", {131, 111, 255}},
+ {"slateblue2", {122, 103, 238}},
+ {"slateblue3", {105, 89, 205}},
+ {"slateblue4", {71, 60, 139}},
+ {"slategray", {112, 128, 144}},
+ {"slategray1", {198, 226, 255}},
+ {"slategray2", {185, 211, 238}},
+ {"slategray3", {159, 182, 205}},
+ {"slategray4", {108, 123, 139}},
+ {"slategrey", {112, 128, 144}},
+ {"snow", {255, 250, 250}},
+ {"snow1", {255, 250, 250}},
+ {"snow2", {238, 233, 233}},
+ {"snow3", {205, 201, 201}},
+ {"snow4", {139, 137, 137}},
+ {"springgreen", {0, 255, 127}},
+ {"springgreen1", {0, 255, 127}},
+ {"springgreen2", {0, 238, 118}},
+ {"springgreen3", {0, 205, 102}},
+ {"springgreen4", {0, 139, 69}},
+ {"steelblue", {70, 130, 180}},
+ {"steelblue1", {99, 184, 255}},
+ {"steelblue2", {92, 172, 238}},
+ {"steelblue3", {79, 148, 205}},
+ {"steelblue4", {54, 100, 139}},
+ {"tan", {210, 180, 140}},
+ {"tan1", {255, 165, 79}},
+ {"tan2", {238, 154, 73}},
+ {"tan3", {205, 133, 63}},
+ {"tan4", {139, 90, 43}},
+ {"teal", {0, 128, 128}},
+ {"thistle", {216, 191, 216}},
+ {"thistle1", {255, 225, 255}},
+ {"thistle2", {238, 210, 238}},
+ {"thistle3", {205, 181, 205}},
+ {"thistle4", {139, 123, 139}},
+ {"tomato", {255, 99, 71}},
+ {"tomato1", {255, 99, 71}},
+ {"tomato2", {238, 92, 66}},
+ {"tomato3", {205, 79, 57}},
+ {"tomato4", {139, 54, 38}},
+ {"turquoise", {64, 224, 208}},
+ {"turquoise1", {0, 245, 255}},
+ {"turquoise2", {0, 229, 238}},
+ {"turquoise3", {0, 197, 205}},
+ {"turquoise4", {0, 134, 139}},
+ {"violet", {238, 130, 238}},
+ {"violetred", {208, 32, 144}},
+ {"violetred1", {255, 62, 150}},
+ {"violetred2", {238, 58, 140}},
+ {"violetred3", {205, 50, 120}},
+ {"violetred4", {139, 34, 82}},
+ {"webgray", {128, 128, 128}},
+ {"webgreen", {0, 128, 0}},
+ {"webgrey", {128, 128, 128}},
+ {"webmaroon", {128, 0, 0}},
+ {"webpurple", {128, 0, 128}},
+ {"wheat", {245, 222, 179}},
+ {"wheat1", {255, 231, 186}},
+ {"wheat2", {238, 216, 174}},
+ {"wheat3", {205, 186, 150}},
+ {"wheat4", {139, 126, 102}},
+ {"white", {255, 255, 255}},
+ {"whitesmoke", {245, 245, 245}},
+ {"x11gray", {190, 190, 190}},
+ {"x11green", {0, 255, 0}},
+ {"x11grey", {190, 190, 190}},
+ {"x11maroon", {176, 48, 96}},
+ {"x11purple", {160, 32, 240}},
+ {"yellow", {255, 255, 0}},
+ {"yellow1", {255, 255, 0}},
+ {"yellow2", {238, 238, 0}},
+ {"yellow3", {205, 205, 0}},
+ {"yellow4", {139, 139, 0}},
+ {"yellowgreen", {154, 205, 50}},
+ {"activeborder", {180, 180, 180}},
+ {"activecaption", {153, 180, 209}},
+ {"appworkspace", {171, 171, 171}},
+ {"background", {0, 0, 0}},
+ {"buttonhighlight", {255, 255, 255}},
+ {"buttonshadow", {160, 160, 160}},
+ {"captiontext", {0, 0, 0}},
+ {"inactiveborder", {244, 247, 252}},
+ {"inactivecaption", {191, 205, 219}},
+ {"inactivecaptiontext", {0, 0, 0}},
+ {"infobackground", {255, 255, 225}},
+ {"infotext", {0, 0, 0}},
+ {"menu", {240, 240, 240}},
+ {"menutext", {0, 0, 0}},
+ {"scrollbar", {200, 200, 200}},
+ {"threeddarkshadow", {0, 0, 0}},
+ {"threedface", {0, 0, 0}},
+ {"threedhighlight", {0, 0, 0}},
+ {"threedlightshadow", {0, 0, 0}},
+ {"threedshadow", {0, 0, 0}},
+ {"transparent", {0, 0, 0, 0}},
+ {"window", {255, 255, 255}},
+ {"windowframe", {100, 100, 100}},
+ {"windowtext", {0, 0, 0}},
+};
+
+}// namespace rspamd::css
+
+#endif//RSPAMD_CSS_COLORS_LIST_HXX
diff --git a/src/libserver/css/css_parser.cxx b/src/libserver/css/css_parser.cxx
new file mode 100644
index 0000000..aed035a
--- /dev/null
+++ b/src/libserver/css/css_parser.cxx
@@ -0,0 +1,892 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "css_parser.hxx"
+#include "css_tokeniser.hxx"
+#include "css_selector.hxx"
+#include "css_rule.hxx"
+#include "css_util.hxx"
+#include "css.hxx"
+#include "fmt/core.h"
+
+#include <vector>
+#include <unicode/utf8.h>
+
+#define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL
+#include "doctest/doctest.h"
+
+namespace rspamd::css {
+
+const css_consumed_block css_parser_eof_block{};
+
+auto css_consumed_block::attach_block(consumed_block_ptr &&block) -> bool
+{
+ if (std::holds_alternative<std::monostate>(content)) {
+ /* Switch from monostate */
+ content = std::vector<consumed_block_ptr>();
+ }
+ else if (!std::holds_alternative<std::vector<consumed_block_ptr>>(content)) {
+ /* A single component, cannot attach a block ! */
+ return false;
+ }
+
+ auto &value_vec = std::get<std::vector<consumed_block_ptr>>(content);
+ value_vec.push_back(std::move(block));
+
+ return true;
+}
+
+auto css_consumed_block::add_function_argument(consumed_block_ptr &&block) -> bool
+{
+ if (!std::holds_alternative<css_function_block>(content)) {
+ return false;
+ }
+
+ auto &&func_bloc = std::get<css_function_block>(content);
+ func_bloc.args.push_back(std::move(block));
+
+ return true;
+}
+
+auto css_consumed_block::token_type_str(void) const -> const char *
+{
+ const auto *ret = "";
+
+ switch (tag) {
+ case parser_tag_type::css_top_block:
+ ret = "top";
+ break;
+ case parser_tag_type::css_qualified_rule:
+ ret = "qualified rule";
+ break;
+ case parser_tag_type::css_at_rule:
+ ret = "at rule";
+ break;
+ case parser_tag_type::css_simple_block:
+ ret = "simple block";
+ break;
+ case parser_tag_type::css_function:
+ ret = "function";
+ break;
+ case parser_tag_type::css_function_arg:
+ ret = "function arg";
+ break;
+ case parser_tag_type::css_component:
+ ret = "component";
+ break;
+ case parser_tag_type::css_eof_block:
+ ret = "eof";
+ break;
+ }
+
+ return ret;
+}
+
+auto css_consumed_block::debug_str(void) -> std::string
+{
+ std::string ret = fmt::format(R"("type": "{}", "value": )", token_type_str());
+
+ std::visit([&](auto &arg) {
+ using T = std::decay_t<decltype(arg)>;
+
+ if constexpr (std::is_same_v<T, std::vector<consumed_block_ptr>>) {
+ /* Array of blocks */
+ ret += "[";
+ for (const auto &block: arg) {
+ ret += "{";
+ ret += block->debug_str();
+ ret += "}, ";
+ }
+
+ if (*(--ret.end()) == ' ') {
+ ret.pop_back();
+ ret.pop_back(); /* Last ',' */
+ }
+ ret += "]";
+ }
+ else if constexpr (std::is_same_v<T, std::monostate>) {
+ /* Empty block */
+ ret += R"("empty")";
+ }
+ else if constexpr (std::is_same_v<T, css_function_block>) {
+ ret += R"({ "content": {"token": )";
+ ret += "\"" + arg.function.debug_token_str() + "\", ";
+ ret += R"("arguments": [)";
+
+ for (const auto &block: arg.args) {
+ ret += "{";
+ ret += block->debug_str();
+ ret += "}, ";
+ }
+ if (*(--ret.end()) == ' ') {
+ ret.pop_back();
+ ret.pop_back(); /* Last ',' */
+ }
+ ret += "]}}";
+ }
+ else {
+ /* Single element block */
+ ret += "\"" + arg.debug_token_str() + "\"";
+ }
+ },
+ content);
+
+ return ret;
+}
+
+class css_parser {
+public:
+ css_parser(void) = delete; /* Require mempool to be set for logging */
+ explicit css_parser(rspamd_mempool_t *pool)
+ : pool(pool)
+ {
+ style_object.reset();
+ error.type = css_parse_error_type::PARSE_ERROR_NO_ERROR;
+ }
+
+ /*
+ * This constructor captures existing via unique_ptr, but it does not
+ * destruct it on errors (we assume that it is owned somewhere else)
+ */
+ explicit css_parser(std::shared_ptr<css_style_sheet> &&existing, rspamd_mempool_t *pool)
+ : style_object(existing), pool(pool)
+ {
+ error.type = css_parse_error_type::PARSE_ERROR_NO_ERROR;
+ }
+
+ /*
+ * Process input css blocks
+ */
+ std::unique_ptr<css_consumed_block> consume_css_blocks(const std::string_view &sv);
+ /*
+ * Process a single css rule
+ */
+ std::unique_ptr<css_consumed_block> consume_css_rule(const std::string_view &sv);
+ std::optional<css_parse_error> consume_input(const std::string_view &sv);
+
+ auto get_object_maybe(void) -> tl::expected<std::shared_ptr<css_style_sheet>, css_parse_error>
+ {
+ if (style_object) {
+ return style_object;
+ }
+
+ return tl::make_unexpected(error);
+ }
+
+ /* Helper parser methods */
+ static bool need_unescape(const std::string_view &sv);
+
+private:
+ std::shared_ptr<css_style_sheet> style_object;
+ std::unique_ptr<css_tokeniser> tokeniser;
+
+ css_parse_error error;
+ rspamd_mempool_t *pool;
+
+ int rec_level = 0;
+ const int max_rec = 20;
+ bool eof = false;
+
+ /* Consumers */
+ auto component_value_consumer(std::unique_ptr<css_consumed_block> &top) -> bool;
+ auto function_consumer(std::unique_ptr<css_consumed_block> &top) -> bool;
+ auto simple_block_consumer(std::unique_ptr<css_consumed_block> &top,
+ css_parser_token::token_type expected_end,
+ bool consume_current) -> bool;
+ auto qualified_rule_consumer(std::unique_ptr<css_consumed_block> &top) -> bool;
+ auto at_rule_consumer(std::unique_ptr<css_consumed_block> &top) -> bool;
+};
+
+/*
+ * Find if we need to unescape css
+ */
+bool css_parser::need_unescape(const std::string_view &sv)
+{
+ bool in_quote = false;
+ char quote_char, prev_c = 0;
+
+ for (const auto c: sv) {
+ if (!in_quote) {
+ if (c == '"' || c == '\'') {
+ in_quote = true;
+ quote_char = c;
+ }
+ else if (c == '\\') {
+ return true;
+ }
+ }
+ else {
+ if (c == quote_char) {
+ if (prev_c != '\\') {
+ in_quote = false;
+ }
+ }
+ prev_c = c;
+ }
+ }
+
+ return false;
+}
+
+auto css_parser::function_consumer(std::unique_ptr<css_consumed_block> &top) -> bool
+{
+ auto ret = true, want_more = true;
+
+ msg_debug_css("consume function block; top block: %s, recursion level %d",
+ top->token_type_str(), rec_level);
+
+ if (++rec_level > max_rec) {
+ msg_err_css("max nesting reached, ignore style");
+ error = css_parse_error(css_parse_error_type::PARSE_ERROR_BAD_NESTING,
+ "maximum nesting has reached when parsing function value");
+ return false;
+ }
+
+ while (ret && want_more && !eof) {
+ auto next_token = tokeniser->next_token();
+
+ switch (next_token.type) {
+ case css_parser_token::token_type::eof_token:
+ eof = true;
+ break;
+ case css_parser_token::token_type::whitespace_token:
+ /* Ignore whitespaces */
+ break;
+ case css_parser_token::token_type::ebrace_token:
+ ret = true;
+ want_more = false;
+ break;
+ case css_parser_token::token_type::comma_token:
+ case css_parser_token::token_type::delim_token:
+ case css_parser_token::token_type::obrace_token:
+ break;
+ default:
+ /* Attach everything to the function block */
+ top->add_function_argument(std::make_unique<css_consumed_block>(
+ css::css_consumed_block::parser_tag_type::css_function_arg,
+ std::move(next_token)));
+ break;
+ }
+ }
+
+ --rec_level;
+
+ return ret;
+}
+
+auto css_parser::simple_block_consumer(std::unique_ptr<css_consumed_block> &top,
+ css_parser_token::token_type expected_end,
+ bool consume_current) -> bool
+{
+ auto ret = true;
+ std::unique_ptr<css_consumed_block> block;
+
+ msg_debug_css("consume simple block; top block: %s, recursion level %d",
+ top->token_type_str(), rec_level);
+
+ if (!consume_current && ++rec_level > max_rec) {
+ msg_err_css("max nesting reached, ignore style");
+ error = css_parse_error(css_parse_error_type::PARSE_ERROR_BAD_NESTING,
+ "maximum nesting has reached when parsing simple block value");
+ return false;
+ }
+
+ if (!consume_current) {
+ block = std::make_unique<css_consumed_block>(
+ css_consumed_block::parser_tag_type::css_simple_block);
+ }
+
+
+ while (ret && !eof) {
+ auto next_token = tokeniser->next_token();
+
+ if (next_token.type == expected_end) {
+ break;
+ }
+
+ switch (next_token.type) {
+ case css_parser_token::token_type::eof_token:
+ eof = true;
+ break;
+ case css_parser_token::token_type::whitespace_token:
+ /* Ignore whitespaces */
+ break;
+ default:
+ tokeniser->pushback_token(next_token);
+ ret = component_value_consumer(consume_current ? top : block);
+ break;
+ }
+ }
+
+ if (!consume_current && ret) {
+ msg_debug_css("attached node 'simple block' rule %s; length=%d",
+ block->token_type_str(), (int) block->size());
+ top->attach_block(std::move(block));
+ }
+
+ if (!consume_current) {
+ --rec_level;
+ }
+
+ return ret;
+}
+
+auto css_parser::qualified_rule_consumer(std::unique_ptr<css_consumed_block> &top) -> bool
+{
+ msg_debug_css("consume qualified block; top block: %s, recursion level %d",
+ top->token_type_str(), rec_level);
+
+ if (++rec_level > max_rec) {
+ msg_err_css("max nesting reached, ignore style");
+ error = css_parse_error(css_parse_error_type::PARSE_ERROR_BAD_NESTING,
+ "maximum nesting has reached when parsing qualified rule value");
+ return false;
+ }
+
+ auto ret = true, want_more = true;
+ auto block = std::make_unique<css_consumed_block>(
+ css_consumed_block::parser_tag_type::css_qualified_rule);
+
+ while (ret && want_more && !eof) {
+ auto next_token = tokeniser->next_token();
+ switch (next_token.type) {
+ case css_parser_token::token_type::eof_token:
+ eof = true;
+ break;
+ case css_parser_token::token_type::cdo_token:
+ case css_parser_token::token_type::cdc_token:
+ if (top->tag == css_consumed_block::parser_tag_type::css_top_block) {
+ /* Ignore */
+ ret = true;
+ }
+ else {
+ }
+ break;
+ case css_parser_token::token_type::ocurlbrace_token:
+ ret = simple_block_consumer(block,
+ css_parser_token::token_type::ecurlbrace_token, false);
+ want_more = false;
+ break;
+ case css_parser_token::token_type::whitespace_token:
+ /* Ignore whitespaces */
+ break;
+ default:
+ tokeniser->pushback_token(next_token);
+ ret = component_value_consumer(block);
+ break;
+ };
+ }
+
+ if (ret) {
+ if (top->tag == css_consumed_block::parser_tag_type::css_top_block) {
+ msg_debug_css("attached node qualified rule %s; length=%d",
+ block->token_type_str(), (int) block->size());
+ top->attach_block(std::move(block));
+ }
+ }
+
+ --rec_level;
+
+ return ret;
+}
+
+auto css_parser::at_rule_consumer(std::unique_ptr<css_consumed_block> &top) -> bool
+{
+ msg_debug_css("consume at-rule block; top block: %s, recursion level %d",
+ top->token_type_str(), rec_level);
+
+ if (++rec_level > max_rec) {
+ msg_err_css("max nesting reached, ignore style");
+ error = css_parse_error(css_parse_error_type::PARSE_ERROR_BAD_NESTING,
+ "maximum nesting has reached when parsing at keyword");
+ return false;
+ }
+
+ auto ret = true, want_more = true;
+ auto block = std::make_unique<css_consumed_block>(
+ css_consumed_block::parser_tag_type::css_at_rule);
+
+ while (ret && want_more && !eof) {
+ auto next_token = tokeniser->next_token();
+ switch (next_token.type) {
+ case css_parser_token::token_type::eof_token:
+ eof = true;
+ break;
+ case css_parser_token::token_type::cdo_token:
+ case css_parser_token::token_type::cdc_token:
+ if (top->tag == css_consumed_block::parser_tag_type::css_top_block) {
+ /* Ignore */
+ ret = true;
+ }
+ else {
+ }
+ break;
+ case css_parser_token::token_type::ocurlbrace_token:
+ ret = simple_block_consumer(block,
+ css_parser_token::token_type::ecurlbrace_token, false);
+ want_more = false;
+ break;
+ case css_parser_token::token_type::whitespace_token:
+ /* Ignore whitespaces */
+ break;
+ case css_parser_token::token_type::semicolon_token:
+ want_more = false;
+ break;
+ default:
+ tokeniser->pushback_token(next_token);
+ ret = component_value_consumer(block);
+ break;
+ };
+ }
+
+ if (ret) {
+ if (top->tag == css_consumed_block::parser_tag_type::css_top_block) {
+ msg_debug_css("attached node qualified rule %s; length=%d",
+ block->token_type_str(), (int) block->size());
+ top->attach_block(std::move(block));
+ }
+ }
+
+ --rec_level;
+
+ return ret;
+}
+
+auto css_parser::component_value_consumer(std::unique_ptr<css_consumed_block> &top) -> bool
+{
+ auto ret = true, need_more = true;
+ std::unique_ptr<css_consumed_block> block;
+
+ msg_debug_css("consume component block; top block: %s, recursion level %d",
+ top->token_type_str(), rec_level);
+
+ if (++rec_level > max_rec) {
+ error = css_parse_error(css_parse_error_type::PARSE_ERROR_BAD_NESTING,
+ "maximum nesting has reached when parsing component value");
+ return false;
+ }
+
+ while (ret && need_more && !eof) {
+ auto next_token = tokeniser->next_token();
+
+ switch (next_token.type) {
+ case css_parser_token::token_type::eof_token:
+ eof = true;
+ break;
+ case css_parser_token::token_type::ocurlbrace_token:
+ block = std::make_unique<css_consumed_block>(
+ css_consumed_block::parser_tag_type::css_simple_block);
+ ret = simple_block_consumer(block,
+ css_parser_token::token_type::ecurlbrace_token,
+ true);
+ need_more = false;
+ break;
+ case css_parser_token::token_type::obrace_token:
+ block = std::make_unique<css_consumed_block>(
+ css_consumed_block::parser_tag_type::css_simple_block);
+ ret = simple_block_consumer(block,
+ css_parser_token::token_type::ebrace_token,
+ true);
+ need_more = false;
+ break;
+ case css_parser_token::token_type::osqbrace_token:
+ block = std::make_unique<css_consumed_block>(
+ css_consumed_block::parser_tag_type::css_simple_block);
+ ret = simple_block_consumer(block,
+ css_parser_token::token_type::esqbrace_token,
+ true);
+ need_more = false;
+ break;
+ case css_parser_token::token_type::whitespace_token:
+ /* Ignore whitespaces */
+ break;
+ case css_parser_token::token_type::function_token: {
+ need_more = false;
+ block = std::make_unique<css_consumed_block>(
+ css_consumed_block::parser_tag_type::css_function,
+ std::move(next_token));
+
+ /* Consume the rest */
+ ret = function_consumer(block);
+ break;
+ }
+ default:
+ block = std::make_unique<css_consumed_block>(
+ css_consumed_block::parser_tag_type::css_component,
+ std::move(next_token));
+ need_more = false;
+ break;
+ }
+ }
+
+ if (ret && block) {
+ msg_debug_css("attached node component rule %s; length=%d",
+ block->token_type_str(), (int) block->size());
+ top->attach_block(std::move(block));
+ }
+
+ --rec_level;
+
+ return ret;
+}
+
+auto css_parser::consume_css_blocks(const std::string_view &sv) -> std::unique_ptr<css_consumed_block>
+{
+ tokeniser = std::make_unique<css_tokeniser>(pool, sv);
+ auto ret = true;
+
+ auto consumed_blocks =
+ std::make_unique<css_consumed_block>(css_consumed_block::parser_tag_type::css_top_block);
+
+ while (!eof && ret) {
+ auto next_token = tokeniser->next_token();
+
+ switch (next_token.type) {
+ case css_parser_token::token_type::whitespace_token:
+ /* Ignore whitespaces */
+ break;
+ case css_parser_token::token_type::eof_token:
+ eof = true;
+ break;
+ case css_parser_token::token_type::at_keyword_token:
+ tokeniser->pushback_token(next_token);
+ ret = at_rule_consumer(consumed_blocks);
+ break;
+ default:
+ tokeniser->pushback_token(next_token);
+ ret = qualified_rule_consumer(consumed_blocks);
+ break;
+ }
+ }
+
+ tokeniser.reset(nullptr); /* No longer needed */
+
+ return consumed_blocks;
+}
+
+auto css_parser::consume_css_rule(const std::string_view &sv) -> std::unique_ptr<css_consumed_block>
+{
+ tokeniser = std::make_unique<css_tokeniser>(pool, sv);
+ auto ret = true;
+
+ auto rule_block =
+ std::make_unique<css_consumed_block>(css_consumed_block::parser_tag_type::css_simple_block);
+
+ while (!eof && ret) {
+ auto next_token = tokeniser->next_token();
+
+ switch (next_token.type) {
+ case css_parser_token::token_type::eof_token:
+ eof = true;
+ break;
+ case css_parser_token::token_type::whitespace_token:
+ /* Ignore whitespaces */
+ break;
+ default:
+ tokeniser->pushback_token(next_token);
+ ret = component_value_consumer(rule_block);
+ break;
+ }
+ }
+
+ tokeniser.reset(nullptr); /* No longer needed */
+
+ return rule_block;
+}
+
+std::optional<css_parse_error>
+css_parser::consume_input(const std::string_view &sv)
+{
+ auto &&consumed_blocks = consume_css_blocks(sv);
+ const auto &rules = consumed_blocks->get_blocks_or_empty();
+
+ if (rules.empty()) {
+ if (error.type == css_parse_error_type::PARSE_ERROR_NO_ERROR) {
+ return css_parse_error(css_parse_error_type::PARSE_ERROR_EMPTY,
+ "no css rules consumed");
+ }
+ else {
+ return error;
+ }
+ }
+
+ if (!style_object) {
+ style_object = std::make_shared<css_style_sheet>(pool);
+ }
+
+ for (auto &&rule: rules) {
+ /*
+ * For now, we do not need any of the at rules, so we can safely ignore them
+ */
+ auto &&children = rule->get_blocks_or_empty();
+
+ if (children.size() > 1 &&
+ children[0]->tag == css_consumed_block::parser_tag_type::css_component) {
+ auto simple_block = std::find_if(children.begin(), children.end(),
+ [](auto &bl) {
+ return bl->tag == css_consumed_block::parser_tag_type::css_simple_block;
+ });
+
+ if (simple_block != children.end()) {
+ /*
+ * We have a component and a simple block,
+ * so we can parse a selector and then extract
+ * declarations from a simple block
+ */
+
+ /* First, tag all components as preamble */
+ auto selector_it = children.cbegin();
+
+ auto selector_token_functor = [&selector_it, &simple_block](void)
+ -> const css_consumed_block & {
+ for (;;) {
+ if (selector_it == simple_block) {
+ return css_parser_eof_block;
+ }
+
+ const auto &ret = (*selector_it);
+
+ ++selector_it;
+
+ return *ret;
+ }
+ };
+
+ auto selectors_vec = process_selector_tokens(pool, selector_token_functor);
+
+ if (selectors_vec.size() > 0) {
+ msg_debug_css("processed %d selectors", (int) selectors_vec.size());
+ auto decls_it = (*simple_block)->get_blocks_or_empty().cbegin();
+ auto decls_end = (*simple_block)->get_blocks_or_empty().cend();
+ auto declaration_token_functor = [&decls_it, &decls_end](void)
+ -> const css_consumed_block & {
+ for (;;) {
+ if (decls_it == decls_end) {
+ return css_parser_eof_block;
+ }
+
+ const auto &ret = (*decls_it);
+
+ ++decls_it;
+
+ return *ret;
+ }
+ };
+
+ auto declarations_vec = process_declaration_tokens(pool,
+ declaration_token_functor);
+
+ if (declarations_vec && !declarations_vec->get_rules().empty()) {
+ msg_debug_css("processed %d rules",
+ (int) declarations_vec->get_rules().size());
+
+ for (auto &&selector: selectors_vec) {
+ style_object->add_selector_rule(std::move(selector),
+ declarations_vec);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ auto debug_str = consumed_blocks->debug_str();
+ msg_debug_css("consumed css: {%*s}", (int) debug_str.size(), debug_str.data());
+
+ return std::nullopt;
+}
+
+auto get_selectors_parser_functor(rspamd_mempool_t *pool,
+ const std::string_view &st) -> blocks_gen_functor
+{
+ css_parser parser(pool);
+
+ auto &&consumed_blocks = parser.consume_css_blocks(st);
+ const auto &rules = consumed_blocks->get_blocks_or_empty();
+
+ auto rules_it = rules.begin();
+ auto &&children = (*rules_it)->get_blocks_or_empty();
+ auto cur = children.begin();
+ auto last = children.end();
+
+ /*
+ * We use move only wrapper to state the fact that the cosumed blocks
+ * are moved into the closure, not copied.
+ * It prevents us from thinking about copies of the blocks and
+ * functors.
+ * Mutable lambda is required to copy iterators inside of the closure,
+ * as, again, it is C++ where lifetime of the objects must be explicitly
+ * transferred. On the other hand, we could move all stuff inside and remove
+ * mutable.
+ */
+ return [cur, consumed_blocks = std::move(consumed_blocks), last](void) mutable
+ -> const css_consumed_block & {
+ if (cur != last) {
+ const auto &ret = (*cur);
+
+ ++cur;
+
+ return *ret;
+ }
+
+ return css_parser_eof_block;
+ };
+}
+
+auto get_rules_parser_functor(rspamd_mempool_t *pool,
+ const std::string_view &st) -> blocks_gen_functor
+{
+ css_parser parser(pool);
+
+ auto &&consumed_blocks = parser.consume_css_rule(st);
+ const auto &rules = consumed_blocks->get_blocks_or_empty();
+
+ auto cur = rules.begin();
+ auto last = rules.end();
+
+ return [cur, consumed_blocks = std::move(consumed_blocks), last](void) mutable
+ -> const css_consumed_block & {
+ if (cur != last) {
+ const auto &ret = (*cur);
+
+ ++cur;
+
+ return *ret;
+ }
+
+ return css_parser_eof_block;
+ };
+}
+
+
+/*
+ * Wrapper for the parser
+ */
+auto parse_css(rspamd_mempool_t *pool, const std::string_view &st,
+ std::shared_ptr<css_style_sheet> &&other)
+ -> tl::expected<std::shared_ptr<css_style_sheet>, css_parse_error>
+{
+ css_parser parser(std::forward<std::shared_ptr<css_style_sheet>>(other), pool);
+ std::string_view processed_input;
+
+ if (css_parser::need_unescape(st)) {
+ processed_input = rspamd::css::unescape_css(pool, st);
+ }
+ else {
+ /* Lowercase inplace */
+ auto *nspace = rspamd_mempool_alloc_buffer(pool, st.size());
+ rspamd_str_copy_lc(st.data(), nspace, st.size());
+ processed_input = std::string_view{nspace, st.size()};
+ }
+
+ auto maybe_error = parser.consume_input(processed_input);
+ if (!maybe_error) {
+ return parser.get_object_maybe();
+ }
+
+ return tl::make_unexpected(maybe_error.value());
+}
+
+auto parse_css_declaration(rspamd_mempool_t *pool, const std::string_view &st)
+ -> rspamd::html::html_block *
+{
+ std::string_view processed_input;
+
+ if (css_parser::need_unescape(st)) {
+ processed_input = rspamd::css::unescape_css(pool, st);
+ }
+ else {
+ auto *nspace = reinterpret_cast<char *>(rspamd_mempool_alloc(pool, st.size()));
+ auto nlen = rspamd_str_copy_lc(st.data(), nspace, st.size());
+ processed_input = std::string_view{nspace, nlen};
+ }
+ auto &&res = process_declaration_tokens(pool,
+ get_rules_parser_functor(pool, processed_input));
+
+ if (res) {
+ return res->compile_to_block(pool);
+ }
+
+ return nullptr;
+}
+
+TEST_SUITE("css")
+{
+ TEST_CASE("parse colors")
+ {
+ const std::vector<const char *> cases{
+ "P { CoLoR: rgb(100%, 50%, 0%); opacity: -1; width: 1em; display: none; } /* very transparent solid orange тест */",
+ "p { color: rgb(100%, 50%, 0%); opacity: 2; display: inline; } /* very transparent solid orange */",
+ "p { color: rgb(100%, 50%, 0%); opacity: 0.5; } /* very transparent solid orange */\n",
+ "p { color: rgb(100%, 50%, 0%); opacity: 1; width: 99%; } /* very transparent solid orange */\n",
+ "p { color: rgb(100%, 50%, 0%); opacity: 10%; width: 99%; } /* very transparent solid orange */\n",
+ "p { color: rgb(100%, 50%, 0%); opacity: 10%; width: 100px; } /* very transparent solid orange */\n",
+ "p { color: rgb(100%, 50%, 0%); opacity: 10% } /* very transparent solid orange */\n",
+ "* { color: hsl(0, 100%, 50%) !important } /* red */\n",
+ "* { color: hsl(120, 100%, 50%) important } /* lime */\n",
+ "* { color: hsl(120, 100%, 25%) } /* dark green */\n",
+ "* { color: hsl(120, 100%, 75%) } /* light green */\n",
+ "* { color: hsl(120, 75%, 75%) } /* pastel green, and so on */\n",
+ "em { color: #f00 } /* #rgb */\n",
+ "em { color: #ff0000 } /* #rrggbb */\n",
+ "em { color: rgb(255,0,0) }\n",
+ "em { color: rgb(100%, 0%, 0%) }\n",
+ "body {color: black; background: white }\n",
+ "h1 { color: maroon }\n",
+ "h2 { color: olive }\n",
+ "em { color: rgb(255,0,0) } /* integer range 0 - 255 */\n",
+ "em { color: rgb(300,0,0) } /* clipped to rgb(255,0,0) */\n",
+ "em { color: rgb(255,-10,0) } /* clipped to rgb(255,0,0) */\n",
+ "em { color: rgb(110%, 0%, 0%) } /* clipped to rgb(100%,0%,0%) */\n",
+ "em { color: rgb(255,0,0) } /* integer range 0 - 255 */\n",
+ "em { color: rgba(255,0,0,1) /* the same, with explicit opacity of 1 */\n",
+ "em { color: rgb(100%,0%,0%) } /* float range 0.0% - 100.0% */\n",
+ "em { color: rgba(100%,0%,0%,1) } /* the same, with explicit opacity of 1 */\n",
+ "p { color: rgba(0,0,255,0.5) } /* semi-transparent solid blue */\n",
+ "p { color: rgba(100%, 50%, 0%, 0.1) } /* very transparent solid orange */",
+ ".chat-icon[_ng-cnj-c0]::before{content:url(group-2.63e87cd21fbf8c966dd.svg);width:60px;height:60px;display:block}",
+ "tt{color:#1e3482}",
+ "tt{unicode-range: u+0049-u+004a,u+0020;}",
+ "@import url(https://fonts.googleapis.com/css?family=arial:300,400,7000;",
+ "tt{color:black;\v}",
+ "tt{color:black;\f}",
+ };
+
+ rspamd_mempool_t *pool = rspamd_mempool_new(rspamd_mempool_suggest_size(),
+ "css", 0);
+ for (const auto &c: cases) {
+ SUBCASE((std::string("parse css: ") + c).c_str())
+ {
+ CHECK(parse_css(pool, c, nullptr).value().get() != nullptr);
+ }
+ }
+
+ /* We now merge all styles together */
+ SUBCASE("merged css parse")
+ {
+ std::shared_ptr<css_style_sheet> merged;
+ for (const auto &c: cases) {
+ auto ret = parse_css(pool, c, std::move(merged));
+ merged.swap(ret.value());
+ }
+
+ CHECK(merged.get() != nullptr);
+ }
+
+ rspamd_mempool_delete(pool);
+ }
+}
+}// namespace rspamd::css
diff --git a/src/libserver/css/css_parser.hxx b/src/libserver/css/css_parser.hxx
new file mode 100644
index 0000000..d5a9671
--- /dev/null
+++ b/src/libserver/css/css_parser.hxx
@@ -0,0 +1,244 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#ifndef RSPAMD_CSS_PARSER_HXX
+#define RSPAMD_CSS_PARSER_HXX
+
+#include <variant>
+#include <vector>
+#include <memory>
+#include <string>
+
+#include "function2/function2.hpp"
+#include "css_tokeniser.hxx"
+#include "parse_error.hxx"
+#include "contrib/expected/expected.hpp"
+#include "logger.h"
+
+/* Forward declaration */
+namespace rspamd::html {
+struct html_block;
+}
+
+namespace rspamd::css {
+
+/*
+ * Represents a consumed token by a parser
+ */
+class css_consumed_block {
+public:
+ enum class parser_tag_type : std::uint8_t {
+ css_top_block = 0,
+ css_qualified_rule,
+ css_at_rule,
+ css_simple_block,
+ css_function,
+ css_function_arg,
+ css_component,
+ css_eof_block,
+ };
+ using consumed_block_ptr = std::unique_ptr<css_consumed_block>;
+
+ struct css_function_block {
+ css_parser_token function;
+ std::vector<consumed_block_ptr> args;
+
+ css_function_block(css_parser_token &&tok)
+ : function(std::forward<css_parser_token>(tok))
+ {
+ }
+
+ auto as_string() const -> std::string_view
+ {
+ return function.get_string_or_default("");
+ }
+
+ static auto empty_function() -> const css_function_block &
+ {
+ static const css_function_block invalid(
+ css_parser_token(css_parser_token::token_type::eof_token,
+ css_parser_token_placeholder()));
+ return invalid;
+ }
+ };
+
+ css_consumed_block()
+ : tag(parser_tag_type::css_eof_block)
+ {
+ }
+ css_consumed_block(parser_tag_type tag)
+ : tag(tag)
+ {
+ if (tag == parser_tag_type::css_top_block ||
+ tag == parser_tag_type::css_qualified_rule ||
+ tag == parser_tag_type::css_simple_block) {
+ /* Pre-allocate content for known vector blocks */
+ std::vector<consumed_block_ptr> vec;
+ vec.reserve(4);
+ content = std::move(vec);
+ }
+ }
+ /* Construct a block from a single lexer token (for trivial blocks) */
+ explicit css_consumed_block(parser_tag_type tag, css_parser_token &&tok)
+ : tag(tag)
+ {
+ if (tag == parser_tag_type::css_function) {
+ content = css_function_block{std::move(tok)};
+ }
+ else {
+ content = std::move(tok);
+ }
+ }
+
+ /* Attach a new block to the compound block, consuming block inside */
+ auto attach_block(consumed_block_ptr &&block) -> bool;
+ /* Attach a new argument to the compound function block, consuming block inside */
+ auto add_function_argument(consumed_block_ptr &&block) -> bool;
+
+ auto assign_token(css_parser_token &&tok) -> void
+ {
+ content = std::move(tok);
+ }
+
+ /* Empty blocks used to avoid type checks in loops */
+ const inline static std::vector<consumed_block_ptr> empty_block_vec{};
+
+ auto is_blocks_vec() const -> bool
+ {
+ return (std::holds_alternative<std::vector<consumed_block_ptr>>(content));
+ }
+
+ auto get_blocks_or_empty() const -> const std::vector<consumed_block_ptr> &
+ {
+ if (is_blocks_vec()) {
+ return std::get<std::vector<consumed_block_ptr>>(content);
+ }
+
+ return empty_block_vec;
+ }
+
+ auto is_token() const -> bool
+ {
+ return (std::holds_alternative<css_parser_token>(content));
+ }
+
+ auto get_token_or_empty() const -> const css_parser_token &
+ {
+ if (is_token()) {
+ return std::get<css_parser_token>(content);
+ }
+
+ return css_parser_eof_token();
+ }
+
+ auto is_function() const -> bool
+ {
+ return (std::holds_alternative<css_function_block>(content));
+ }
+
+ auto get_function_or_invalid() const -> const css_function_block &
+ {
+ if (is_function()) {
+ return std::get<css_function_block>(content);
+ }
+
+ return css_function_block::empty_function();
+ }
+
+ auto size() const -> std::size_t
+ {
+ auto ret = 0;
+
+ std::visit([&](auto &arg) {
+ using T = std::decay_t<decltype(arg)>;
+
+ if constexpr (std::is_same_v<T, std::vector<consumed_block_ptr>>) {
+ /* Array of blocks */
+ ret = arg.size();
+ }
+ else if constexpr (std::is_same_v<T, std::monostate>) {
+ /* Empty block */
+ ret = 0;
+ }
+ else {
+ /* Single element block */
+ ret = 1;
+ }
+ },
+ content);
+
+ return ret;
+ }
+
+ auto is_eof() -> bool
+ {
+ return tag == parser_tag_type::css_eof_block;
+ }
+
+ /* Debug methods */
+ auto token_type_str(void) const -> const char *;
+ auto debug_str(void) -> std::string;
+
+public:
+ parser_tag_type tag;
+
+private:
+ std::variant<std::monostate,
+ std::vector<consumed_block_ptr>,
+ css_parser_token,
+ css_function_block>
+ content;
+};
+
+extern const css_consumed_block css_parser_eof_block;
+
+using blocks_gen_functor = fu2::unique_function<const css_consumed_block &(void)>;
+
+class css_style_sheet;
+/*
+ * Update the existing stylesheet with another stylesheet
+ */
+auto parse_css(rspamd_mempool_t *pool, const std::string_view &st,
+ std::shared_ptr<css_style_sheet> &&other)
+ -> tl::expected<std::shared_ptr<css_style_sheet>, css_parse_error>;
+
+/*
+ * Creates a functor to consume css selectors sequence
+ */
+auto get_selectors_parser_functor(rspamd_mempool_t *pool,
+ const std::string_view &st) -> blocks_gen_functor;
+
+/*
+ * Creates a functor to process a rule definition (e.g. from embedded style tag for
+ * an element)
+ */
+auto get_rules_parser_functor(rspamd_mempool_t *pool,
+ const std::string_view &st) -> blocks_gen_functor;
+
+/**
+ * Parses a css declaration (e.g. embedded css and returns a completed html block)
+ * @param pool
+ * @param st
+ * @return
+ */
+auto parse_css_declaration(rspamd_mempool_t *pool, const std::string_view &st)
+ -> rspamd::html::html_block *;
+
+}// namespace rspamd::css
+
+#endif//RSPAMD_CSS_PARSER_HXX
diff --git a/src/libserver/css/css_property.cxx b/src/libserver/css/css_property.cxx
new file mode 100644
index 0000000..1557109
--- /dev/null
+++ b/src/libserver/css/css_property.cxx
@@ -0,0 +1,69 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "css_property.hxx"
+#include "frozen/unordered_map.h"
+#include "frozen/string.h"
+#include "libutil/cxx/util.hxx"
+
+namespace rspamd::css {
+
+constexpr const auto prop_names_map = frozen::make_unordered_map<frozen::string, css_property_type>({
+ {"font", css_property_type::PROPERTY_FONT},
+ {"font-color", css_property_type::PROPERTY_FONT_COLOR},
+ {"font-size", css_property_type::PROPERTY_FONT_SIZE},
+ {"color", css_property_type::PROPERTY_COLOR},
+ {"bgcolor", css_property_type::PROPERTY_BGCOLOR},
+ {"background-color", css_property_type::PROPERTY_BGCOLOR},
+ {"background", css_property_type::PROPERTY_BACKGROUND},
+ {"height", css_property_type::PROPERTY_HEIGHT},
+ {"width", css_property_type::PROPERTY_WIDTH},
+ {"display", css_property_type::PROPERTY_DISPLAY},
+ {"visibility", css_property_type::PROPERTY_VISIBILITY},
+ {"opacity", css_property_type::PROPERTY_OPACITY},
+});
+
+/* Ensure that we have all cases listed */
+static_assert(prop_names_map.size() >= static_cast<int>(css_property_type::PROPERTY_NYI));
+
+auto token_string_to_property(const std::string_view &inp)
+ -> css_property_type
+{
+
+ css_property_type ret = css_property_type::PROPERTY_NYI;
+
+ auto known_type = find_map(prop_names_map, inp);
+
+ if (known_type) {
+ ret = known_type.value().get();
+ }
+
+ return ret;
+}
+
+auto css_property::from_token(const css_parser_token &tok)
+ -> tl::expected<css_property, css_parse_error>
+{
+ if (tok.type == css_parser_token::token_type::ident_token) {
+ auto sv = tok.get_string_or_default("");
+
+ return css_property{token_string_to_property(sv), css_property_flag::FLAG_NORMAL};
+ }
+
+ return tl::unexpected{css_parse_error(css_parse_error_type::PARSE_ERROR_NYI)};
+}
+
+}// namespace rspamd::css
diff --git a/src/libserver/css/css_property.hxx b/src/libserver/css/css_property.hxx
new file mode 100644
index 0000000..9661222
--- /dev/null
+++ b/src/libserver/css/css_property.hxx
@@ -0,0 +1,172 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#ifndef RSPAMD_CSS_PROPERTY_HXX
+#define RSPAMD_CSS_PROPERTY_HXX
+
+#include <string>
+#include "css_tokeniser.hxx"
+#include "parse_error.hxx"
+#include "contrib/expected/expected.hpp"
+
+namespace rspamd::css {
+
+/*
+ * To be extended with properties that are interesting from the email
+ * point of view
+ */
+enum class css_property_type : std::uint16_t {
+ PROPERTY_FONT = 0,
+ PROPERTY_FONT_COLOR,
+ PROPERTY_FONT_SIZE,
+ PROPERTY_COLOR,
+ PROPERTY_BGCOLOR,
+ PROPERTY_BACKGROUND,
+ PROPERTY_HEIGHT,
+ PROPERTY_WIDTH,
+ PROPERTY_DISPLAY,
+ PROPERTY_VISIBILITY,
+ PROPERTY_OPACITY,
+ PROPERTY_NYI,
+};
+
+enum class css_property_flag : std::uint16_t {
+ FLAG_NORMAL,
+ FLAG_IMPORTANT,
+ FLAG_NOT_IMPORTANT
+};
+
+struct alignas(int) css_property {
+ css_property_type type;
+ css_property_flag flag;
+
+ css_property(css_property_type t, css_property_flag fl = css_property_flag::FLAG_NORMAL)
+ : type(t), flag(fl)
+ {
+ }
+ static tl::expected<css_property, css_parse_error> from_token(
+ const css_parser_token &tok);
+
+ constexpr auto to_string(void) const -> const char *
+ {
+ const char *ret = "nyi";
+
+ switch (type) {
+ case css_property_type::PROPERTY_FONT:
+ ret = "font";
+ break;
+ case css_property_type::PROPERTY_FONT_COLOR:
+ ret = "font-color";
+ break;
+ case css_property_type::PROPERTY_FONT_SIZE:
+ ret = "font-size";
+ break;
+ case css_property_type::PROPERTY_COLOR:
+ ret = "color";
+ break;
+ case css_property_type::PROPERTY_BGCOLOR:
+ ret = "bgcolor";
+ break;
+ case css_property_type::PROPERTY_BACKGROUND:
+ ret = "background";
+ break;
+ case css_property_type::PROPERTY_HEIGHT:
+ ret = "height";
+ break;
+ case css_property_type::PROPERTY_WIDTH:
+ ret = "width";
+ break;
+ case css_property_type::PROPERTY_DISPLAY:
+ ret = "display";
+ break;
+ case css_property_type::PROPERTY_VISIBILITY:
+ ret = "visibility";
+ break;
+ case css_property_type::PROPERTY_OPACITY:
+ ret = "opacity";
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+ }
+
+ /* Helpers to define which values are valid for which properties */
+ auto is_color(void) const -> bool
+ {
+ return type == css_property_type::PROPERTY_COLOR ||
+ type == css_property_type::PROPERTY_BACKGROUND ||
+ type == css_property_type::PROPERTY_BGCOLOR ||
+ type == css_property_type::PROPERTY_FONT_COLOR ||
+ type == css_property_type::PROPERTY_FONT;
+ }
+ auto is_dimension(void) const -> bool
+ {
+ return type == css_property_type::PROPERTY_HEIGHT ||
+ type == css_property_type::PROPERTY_WIDTH ||
+ type == css_property_type::PROPERTY_FONT_SIZE ||
+ type == css_property_type::PROPERTY_FONT;
+ }
+
+ auto is_normal_number(void) const -> bool
+ {
+ return type == css_property_type::PROPERTY_OPACITY;
+ }
+
+ auto is_display(void) const -> bool
+ {
+ return type == css_property_type::PROPERTY_DISPLAY;
+ }
+
+ auto is_visibility(void) const -> bool
+ {
+ return type == css_property_type::PROPERTY_VISIBILITY;
+ }
+
+ auto operator==(const css_property &other) const
+ {
+ return type == other.type;
+ }
+};
+
+
+}// namespace rspamd::css
+
+/* Make properties hashable */
+namespace std {
+template<>
+class hash<rspamd::css::css_property> {
+public:
+ using is_avalanching = void;
+ /* Mix bits to provide slightly better distribution but being constexpr */
+ constexpr size_t operator()(const rspamd::css::css_property &prop) const
+ {
+ std::size_t key = 0xdeadbeef ^ static_cast<std::size_t>(prop.type);
+ key = (~key) + (key << 21);
+ key = key ^ (key >> 24);
+ key = (key + (key << 3)) + (key << 8);
+ key = key ^ (key >> 14);
+ key = (key + (key << 2)) + (key << 4);
+ key = key ^ (key >> 28);
+ key = key + (key << 31);
+ return key;
+ }
+};
+}// namespace std
+
+#endif//RSPAMD_CSS_PROPERTY_HXX \ No newline at end of file
diff --git a/src/libserver/css/css_rule.cxx b/src/libserver/css/css_rule.cxx
new file mode 100644
index 0000000..4e33ac7
--- /dev/null
+++ b/src/libserver/css/css_rule.cxx
@@ -0,0 +1,531 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "css_rule.hxx"
+#include "css.hxx"
+#include "libserver/html/html_block.hxx"
+#include <limits>
+
+#define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL
+#include "doctest/doctest.h"
+
+namespace rspamd::css {
+
+/* Class methods */
+void css_rule::override_values(const css_rule &other)
+{
+ int bits = 0;
+ /* Ensure that our bitset is large enough */
+ static_assert(1 << std::variant_size_v<decltype(css_value::value)> <
+ std::numeric_limits<int>::max());
+
+ for (const auto &v: values) {
+ bits |= static_cast<int>(1 << v.value.index());
+ }
+
+ for (const auto &ov: other.values) {
+ if (isset(&bits, static_cast<int>(1 << ov.value.index()))) {
+ /* We need to override the existing value */
+ /*
+ * The algorithm is not very efficient,
+ * so we need to sort the values first and have a O(N) algorithm
+ * On the other hand, values vectors are usually limited to the
+ * number of elements about less then 10, so this O(N^2) algorithm
+ * is probably ok here
+ */
+ for (auto &v: values) {
+ if (v.value.index() == ov.value.index()) {
+ v = ov;
+ }
+ }
+ }
+ }
+
+ /* Copy only not set values */
+ std::copy_if(other.values.begin(), other.values.end(), std::back_inserter(values),
+ [&bits](const auto &elt) -> bool {
+ return (bits & (1 << static_cast<int>(elt.value.index()))) == 0;
+ });
+}
+
+void css_rule::merge_values(const css_rule &other)
+{
+ unsigned int bits = 0;
+
+ for (const auto &v: values) {
+ bits |= 1 << v.value.index();
+ }
+
+ /* Copy only not set values */
+ std::copy_if(other.values.begin(), other.values.end(), std::back_inserter(values),
+ [&bits](const auto &elt) -> bool {
+ return (bits & (1 << elt.value.index())) == 0;
+ });
+}
+
+auto css_declarations_block::add_rule(rule_shared_ptr rule) -> bool
+{
+ auto it = rules.find(rule);
+ auto &&remote_prop = rule->get_prop();
+ auto ret = true;
+
+ if (rule->get_values().size() == 0) {
+ /* Ignore rules with no values */
+ return false;
+ }
+
+ if (it != rules.end()) {
+ auto &&local_rule = *it;
+ auto &&local_prop = local_rule->get_prop();
+
+ if (local_prop.flag == css_property_flag::FLAG_IMPORTANT) {
+ if (remote_prop.flag == css_property_flag::FLAG_IMPORTANT) {
+ local_rule->override_values(*rule);
+ }
+ else {
+ /* Override remote not important over local important */
+ local_rule->merge_values(*rule);
+ }
+ }
+ else if (local_prop.flag == css_property_flag::FLAG_NOT_IMPORTANT) {
+ if (remote_prop.flag == css_property_flag::FLAG_NOT_IMPORTANT) {
+ local_rule->override_values(*rule);
+ }
+ else {
+ /* Override local not important over important */
+ local_rule->merge_values(*rule);
+ }
+ }
+ else {
+ if (remote_prop.flag == css_property_flag::FLAG_IMPORTANT) {
+ /* Override with remote */
+ local_rule->override_values(*rule);
+ }
+ else if (remote_prop.flag == css_property_flag::FLAG_NOT_IMPORTANT) {
+ /* Ignore remote not important over local normal */
+ ret = false;
+ }
+ else {
+ /* Merge both */
+ local_rule->merge_values(*rule);
+ }
+ }
+ }
+ else {
+ rules.insert(std::move(rule));
+ }
+
+ return ret;
+}
+
+}// namespace rspamd::css
+
+namespace rspamd::css {
+
+/* Static functions */
+
+static auto
+allowed_property_value(const css_property &prop, const css_consumed_block &parser_block)
+ -> std::optional<css_value>
+{
+ if (prop.is_color()) {
+ if (parser_block.is_token()) {
+ /* A single token */
+ const auto &tok = parser_block.get_token_or_empty();
+
+ if (tok.type == css_parser_token::token_type::hash_token) {
+ return css_value::maybe_color_from_hex(tok.get_string_or_default(""));
+ }
+ else if (tok.type == css_parser_token::token_type::ident_token) {
+ auto &&ret = css_value::maybe_color_from_string(tok.get_string_or_default(""));
+
+ return ret;
+ }
+ }
+ else if (parser_block.is_function()) {
+ const auto &func = parser_block.get_function_or_invalid();
+
+ auto &&ret = css_value::maybe_color_from_function(func);
+ return ret;
+ }
+ }
+ if (prop.is_dimension()) {
+ if (parser_block.is_token()) {
+ /* A single token */
+ const auto &tok = parser_block.get_token_or_empty();
+
+ if (tok.type == css_parser_token::token_type::number_token) {
+ return css_value::maybe_dimension_from_number(tok);
+ }
+ }
+ }
+ if (prop.is_display()) {
+ if (parser_block.is_token()) {
+ /* A single token */
+ const auto &tok = parser_block.get_token_or_empty();
+
+ if (tok.type == css_parser_token::token_type::ident_token) {
+ return css_value::maybe_display_from_string(tok.get_string_or_default(""));
+ }
+ }
+ }
+ if (prop.is_visibility()) {
+ if (parser_block.is_token()) {
+ /* A single token */
+ const auto &tok = parser_block.get_token_or_empty();
+
+ if (tok.type == css_parser_token::token_type::ident_token) {
+ return css_value::maybe_display_from_string(tok.get_string_or_default(""));
+ }
+ }
+ }
+ if (prop.is_normal_number()) {
+ if (parser_block.is_token()) {
+ /* A single token */
+ const auto &tok = parser_block.get_token_or_empty();
+
+ if (tok.type == css_parser_token::token_type::number_token) {
+ return css_value{tok.get_normal_number_or_default(0)};
+ }
+ }
+ }
+
+ return std::nullopt;
+}
+
+auto process_declaration_tokens(rspamd_mempool_t *pool,
+ blocks_gen_functor &&next_block_functor)
+ -> css_declarations_block_ptr
+{
+ css_declarations_block_ptr ret;
+ bool can_continue = true;
+ css_property cur_property{css_property_type::PROPERTY_NYI,
+ css_property_flag::FLAG_NORMAL};
+ static const css_property bad_property{css_property_type::PROPERTY_NYI,
+ css_property_flag::FLAG_NORMAL};
+ std::shared_ptr<css_rule> cur_rule;
+
+ enum {
+ parse_property,
+ parse_value,
+ ignore_value, /* For unknown properties */
+ } state = parse_property;
+
+ auto seen_not = false;
+ ret = std::make_shared<css_declarations_block>();
+
+ while (can_continue) {
+ const auto &next_tok = next_block_functor();
+
+ switch (next_tok.tag) {
+ case css_consumed_block::parser_tag_type::css_component:
+ /* Component can be a property or a compound list of values */
+ if (state == parse_property) {
+ cur_property = css_property::from_token(next_tok.get_token_or_empty())
+ .value_or(bad_property);
+
+ if (cur_property.type == css_property_type::PROPERTY_NYI) {
+ state = ignore_value;
+ /* Ignore everything till ; */
+ continue;
+ }
+
+ msg_debug_css("got css property: %s", cur_property.to_string());
+
+ /* We now expect colon block */
+ const auto &expect_colon_block = next_block_functor();
+
+ if (expect_colon_block.tag != css_consumed_block::parser_tag_type::css_component) {
+ state = ignore_value; /* Ignore up to the next rule */
+ }
+ else {
+ const auto &expect_colon_tok = expect_colon_block.get_token_or_empty();
+
+ if (expect_colon_tok.type != css_parser_token::token_type::colon_token) {
+ msg_debug_css("invalid rule, no colon after property");
+ state = ignore_value; /* Ignore up to the next rule */
+ }
+ else {
+ state = parse_value;
+ cur_rule = std::make_shared<css_rule>(cur_property);
+ }
+ }
+ }
+ else if (state == parse_value) {
+ /* Check semicolon */
+ if (next_tok.is_token()) {
+ const auto &parser_tok = next_tok.get_token_or_empty();
+
+ if (parser_tok.type == css_parser_token::token_type::semicolon_token && cur_rule) {
+ ret->add_rule(std::move(cur_rule));
+ state = parse_property;
+ seen_not = false;
+ continue;
+ }
+ else if (parser_tok.type == css_parser_token::token_type::delim_token) {
+ if (parser_tok.get_string_or_default("") == "!") {
+ /* Probably something like !important */
+ seen_not = true;
+ }
+ }
+ else if (parser_tok.type == css_parser_token::token_type::ident_token) {
+ if (parser_tok.get_string_or_default("") == "important") {
+ if (seen_not) {
+ msg_debug_css("add !important flag to property %s",
+ cur_property.to_string());
+ cur_property.flag = css_property_flag::FLAG_NOT_IMPORTANT;
+ }
+ else {
+ msg_debug_css("add important flag to property %s",
+ cur_property.to_string());
+ cur_property.flag = css_property_flag::FLAG_IMPORTANT;
+ }
+
+ seen_not = false;
+
+ continue;
+ }
+ else {
+ seen_not = false;
+ }
+ }
+ }
+
+ auto maybe_value = allowed_property_value(cur_property, next_tok);
+
+ if (maybe_value) {
+ msg_debug_css("added value %s to the property %s",
+ maybe_value.value().debug_str().c_str(),
+ cur_property.to_string());
+ cur_rule->add_value(maybe_value.value());
+ }
+ }
+ else {
+ /* Ignore all till ; */
+ if (next_tok.is_token()) {
+ const auto &parser_tok = next_tok.get_token_or_empty();
+
+ if (parser_tok.type == css_parser_token::token_type::semicolon_token) {
+ state = parse_property;
+ }
+ }
+ }
+ break;
+ case css_consumed_block::parser_tag_type::css_function:
+ if (state == parse_value) {
+ auto maybe_value = allowed_property_value(cur_property, next_tok);
+
+ if (maybe_value && cur_rule) {
+ msg_debug_css("added value %s to the property %s",
+ maybe_value.value().debug_str().c_str(),
+ cur_property.to_string());
+ cur_rule->add_value(maybe_value.value());
+ }
+ }
+ break;
+ case css_consumed_block::parser_tag_type::css_eof_block:
+ if (state == parse_value) {
+ ret->add_rule(std::move(cur_rule));
+ }
+ can_continue = false;
+ break;
+ default:
+ can_continue = false;
+ break;
+ }
+ }
+
+ return ret; /* copy elision */
+}
+
+auto css_declarations_block::merge_block(const css_declarations_block &other, merge_type how) -> void
+{
+ const auto &other_rules = other.get_rules();
+
+
+ for (auto &rule: other_rules) {
+ auto &&found_it = rules.find(rule);
+
+ if (found_it != rules.end()) {
+ /* Duplicate, need to merge */
+ switch (how) {
+ case merge_type::merge_override:
+ /* Override */
+ (*found_it)->override_values(*rule);
+ break;
+ case merge_type::merge_duplicate:
+ /* Merge values */
+ add_rule(rule);
+ break;
+ case merge_type::merge_parent:
+ /* Do not merge parent rule if more specific local one is presented */
+ break;
+ }
+ }
+ else {
+ /* New property, just insert */
+ rules.insert(rule);
+ }
+ }
+}
+
+auto css_declarations_block::compile_to_block(rspamd_mempool_t *pool) const -> rspamd::html::html_block *
+{
+ auto *block = rspamd_mempool_alloc0_type(pool, rspamd::html::html_block);
+ auto opacity = -1;
+ const css_rule *font_rule = nullptr, *background_rule = nullptr;
+
+ for (const auto &rule: rules) {
+ auto prop = rule->get_prop().type;
+ const auto &vals = rule->get_values();
+
+ if (vals.empty()) {
+ continue;
+ }
+
+ switch (prop) {
+ case css_property_type::PROPERTY_VISIBILITY:
+ case css_property_type::PROPERTY_DISPLAY: {
+ auto disp = vals.back().to_display().value_or(css_display_value::DISPLAY_INLINE);
+ block->set_display(disp);
+ break;
+ }
+ case css_property_type::PROPERTY_FONT_SIZE: {
+ auto fs = vals.back().to_dimension();
+ if (fs) {
+ block->set_font_size(fs.value().dim, fs.value().is_percent);
+ }
+ }
+ case css_property_type::PROPERTY_OPACITY: {
+ opacity = vals.back().to_number().value_or(opacity);
+ break;
+ }
+ case css_property_type::PROPERTY_FONT_COLOR:
+ case css_property_type::PROPERTY_COLOR: {
+ auto color = vals.back().to_color();
+ if (color) {
+ block->set_fgcolor(color.value());
+ }
+ break;
+ }
+ case css_property_type::PROPERTY_BGCOLOR: {
+ auto color = vals.back().to_color();
+ if (color) {
+ block->set_bgcolor(color.value());
+ }
+ break;
+ }
+ case css_property_type::PROPERTY_HEIGHT: {
+ auto w = vals.back().to_dimension();
+ if (w) {
+ block->set_width(w.value().dim, w.value().is_percent);
+ }
+ break;
+ }
+ case css_property_type::PROPERTY_WIDTH: {
+ auto h = vals.back().to_dimension();
+ if (h) {
+ block->set_width(h.value().dim, h.value().is_percent);
+ }
+ break;
+ }
+ /* Optional attributes */
+ case css_property_type::PROPERTY_FONT:
+ font_rule = rule.get();
+ break;
+ case css_property_type::PROPERTY_BACKGROUND:
+ background_rule = rule.get();
+ break;
+ default:
+ /* Do nothing for now */
+ break;
+ }
+ }
+
+ /* Optional properties */
+ if (!(block->fg_color_mask) && font_rule) {
+ auto &vals = font_rule->get_values();
+
+ for (const auto &val: vals) {
+ auto maybe_color = val.to_color();
+
+ if (maybe_color) {
+ block->set_fgcolor(maybe_color.value());
+ }
+ }
+ }
+
+ if (!(block->font_mask) && font_rule) {
+ auto &vals = font_rule->get_values();
+
+ for (const auto &val: vals) {
+ auto maybe_dim = val.to_dimension();
+
+ if (maybe_dim) {
+ block->set_font_size(maybe_dim.value().dim, maybe_dim.value().is_percent);
+ }
+ }
+ }
+
+ if (!(block->bg_color_mask) && background_rule) {
+ auto &vals = background_rule->get_values();
+
+ for (const auto &val: vals) {
+ auto maybe_color = val.to_color();
+
+ if (maybe_color) {
+ block->set_bgcolor(maybe_color.value());
+ }
+ }
+ }
+
+ return block;
+}
+
+void css_rule::add_value(const css_value &value)
+{
+ values.push_back(value);
+}
+
+
+TEST_SUITE("css")
+{
+ TEST_CASE("simple css rules")
+ {
+ const std::vector<std::pair<const char *, std::vector<css_property>>> cases{
+ {"font-size:12.0pt;line-height:115%",
+ {css_property(css_property_type::PROPERTY_FONT_SIZE)}},
+ {"font-size:12.0pt;display:none",
+ {css_property(css_property_type::PROPERTY_FONT_SIZE),
+ css_property(css_property_type::PROPERTY_DISPLAY)}}};
+
+ auto *pool = rspamd_mempool_new(rspamd_mempool_suggest_size(),
+ "css", 0);
+
+ for (const auto &c: cases) {
+ auto res = process_declaration_tokens(pool,
+ get_rules_parser_functor(pool, c.first));
+
+ CHECK(res.get() != nullptr);
+
+ for (auto i = 0; i < c.second.size(); i++) {
+ CHECK(res->has_property(c.second[i]));
+ }
+ }
+ }
+}
+
+}// namespace rspamd::css \ No newline at end of file
diff --git a/src/libserver/css/css_rule.hxx b/src/libserver/css/css_rule.hxx
new file mode 100644
index 0000000..114b83e
--- /dev/null
+++ b/src/libserver/css/css_rule.hxx
@@ -0,0 +1,153 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#ifndef RSPAMD_CSS_RULE_HXX
+#define RSPAMD_CSS_RULE_HXX
+
+#include "css_value.hxx"
+#include "css_property.hxx"
+#include "css_parser.hxx"
+#include "contrib/ankerl/unordered_dense.h"
+#include "libutil/cxx/util.hxx"
+#include "libutil/cxx/hash_util.hxx"
+#include <vector>
+#include <memory>
+
+namespace rspamd::html {
+/* Forward declaration */
+struct html_block;
+}// namespace rspamd::html
+
+namespace rspamd::css {
+
+class css_rule {
+ css_property prop;
+ using css_values_vec = std::vector<css_value>;
+ css_values_vec values;
+
+public:
+ /* We must create css rule explicitly from a property and values */
+ css_rule() = delete;
+
+ css_rule(const css_rule &other) = delete;
+
+ /* Constructors */
+ css_rule(css_rule &&other) noexcept = default;
+
+ explicit css_rule(css_property &&prop, css_values_vec &&values) noexcept
+ : prop(prop), values(std::forward<css_values_vec>(values))
+ {
+ }
+
+ explicit css_rule(const css_property &prop) noexcept
+ : prop(prop), values{}
+ {
+ }
+
+ /* Methods */
+ /* Comparison is special, as we care merely about property, not the values */
+ auto operator==(const css_rule &other) const
+ {
+ return prop == other.prop;
+ }
+
+ constexpr const css_values_vec &get_values(void) const
+ {
+ return values;
+ }
+ constexpr const css_property &get_prop(void) const
+ {
+ return prop;
+ }
+
+ /* Import values from another rules according to the importance */
+ void override_values(const css_rule &other);
+ void merge_values(const css_rule &other);
+ void add_value(const css_value &value);
+};
+
+}// namespace rspamd::css
+
+/* Make rules hashable by property */
+namespace std {
+template<>
+class hash<rspamd::css::css_rule> {
+public:
+ using is_avalanching = void;
+ constexpr auto operator()(const rspamd::css::css_rule &rule) const -> auto
+ {
+ return hash<rspamd::css::css_property>()(rule.get_prop());
+ }
+};
+
+}// namespace std
+
+namespace rspamd::css {
+
+/**
+ * Class that is designed to hold css declaration (a set of rules)
+ */
+class css_declarations_block {
+public:
+ using rule_shared_ptr = std::shared_ptr<css_rule>;
+ using rule_shared_hash = smart_ptr_hash<css_rule>;
+ using rule_shared_eq = smart_ptr_equal<css_rule>;
+ enum class merge_type {
+ merge_duplicate,
+ merge_parent,
+ merge_override
+ };
+
+ css_declarations_block() = default;
+ auto add_rule(rule_shared_ptr rule) -> bool;
+ auto merge_block(const css_declarations_block &other,
+ merge_type how = merge_type::merge_duplicate) -> void;
+ auto get_rules(void) const -> const auto &
+ {
+ return rules;
+ }
+
+ /**
+ * Returns if a declaration block has some property
+ * @param prop
+ * @return
+ */
+ auto has_property(const css_property &prop) const -> bool
+ {
+ return (rules.find(css_rule{prop}) != rules.end());
+ }
+
+ /**
+ * Compile CSS declaration to the html block
+ * @param pool used to carry memory required for html_block
+ * @return html block structure
+ */
+ auto compile_to_block(rspamd_mempool_t *pool) const -> rspamd::html::html_block *;
+
+private:
+ ankerl::unordered_dense::set<rule_shared_ptr, rule_shared_hash, rule_shared_eq> rules;
+};
+
+using css_declarations_block_ptr = std::shared_ptr<css_declarations_block>;
+
+auto process_declaration_tokens(rspamd_mempool_t *pool,
+ blocks_gen_functor &&next_token_functor)
+ -> css_declarations_block_ptr;
+
+}// namespace rspamd::css
+
+#endif//RSPAMD_CSS_RULE_HXX \ No newline at end of file
diff --git a/src/libserver/css/css_rule_parser.rl b/src/libserver/css/css_rule_parser.rl
new file mode 100644
index 0000000..e3b1876
--- /dev/null
+++ b/src/libserver/css/css_rule_parser.rl
@@ -0,0 +1,27 @@
+%%{
+ machine css_parser;
+ alphtype unsigned char;
+ include css_syntax "css_syntax.rl";
+
+ main := declaration;
+}%%
+
+%% write data;
+
+#include <cstddef>
+
+namespace rspamd::css {
+
+int
+foo (const unsigned char *data, std::size_t len)
+{
+ const unsigned char *p = data, *pe = data + len, *eof;
+ int cs;
+
+ %% write init;
+ %% write exec;
+
+ return cs;
+}
+
+} \ No newline at end of file
diff --git a/src/libserver/css/css_selector.cxx b/src/libserver/css/css_selector.cxx
new file mode 100644
index 0000000..a62ffff
--- /dev/null
+++ b/src/libserver/css/css_selector.cxx
@@ -0,0 +1,226 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "css_selector.hxx"
+#include "css.hxx"
+#include "libserver/html/html.hxx"
+#include "fmt/core.h"
+#define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL
+#include "doctest/doctest.h"
+
+namespace rspamd::css {
+
+auto process_selector_tokens(rspamd_mempool_t *pool,
+ blocks_gen_functor &&next_token_functor)
+ -> selectors_vec
+{
+ selectors_vec ret;
+ bool can_continue = true;
+ enum class selector_process_state {
+ selector_parse_start = 0,
+ selector_expect_ident,
+ selector_ident_consumed,
+ selector_ignore_attribute,
+ selector_ignore_function,
+ selector_ignore_combination
+ } state = selector_process_state::selector_parse_start;
+ std::unique_ptr<css_selector> cur_selector;
+
+
+ while (can_continue) {
+ const auto &next_tok = next_token_functor();
+
+ if (next_tok.tag == css_consumed_block::parser_tag_type::css_component) {
+ const auto &parser_tok = next_tok.get_token_or_empty();
+
+ if (state == selector_process_state::selector_parse_start) {
+ /*
+ * At the beginning of the parsing we can expect either
+ * delim or an ident, everything else is discarded for now
+ */
+ msg_debug_css("start consume selector");
+
+ switch (parser_tok.type) {
+ case css_parser_token::token_type::delim_token: {
+ auto delim_c = parser_tok.get_delim();
+
+ if (delim_c == '.') {
+ cur_selector = std::make_unique<css_selector>(
+ css_selector::selector_type::SELECTOR_CLASS);
+ state = selector_process_state::selector_expect_ident;
+ }
+ else if (delim_c == '#') {
+ cur_selector = std::make_unique<css_selector>(
+ css_selector::selector_type::SELECTOR_ID);
+ state = selector_process_state::selector_expect_ident;
+ }
+ else if (delim_c == '*') {
+ cur_selector = std::make_unique<css_selector>(
+ css_selector::selector_type::SELECTOR_ALL);
+ state = selector_process_state::selector_ident_consumed;
+ }
+ break;
+ }
+ case css_parser_token::token_type::ident_token: {
+ auto tag_id = html::html_tag_by_name(parser_tok.get_string_or_default(""));
+
+ if (tag_id) {
+ cur_selector = std::make_unique<css_selector>(tag_id.value());
+ }
+ state = selector_process_state::selector_ident_consumed;
+ break;
+ }
+ case css_parser_token::token_type::hash_token:
+ cur_selector = std::make_unique<css_selector>(
+ css_selector::selector_type::SELECTOR_ID);
+ cur_selector->value =
+ parser_tok.get_string_or_default("");
+ state = selector_process_state::selector_ident_consumed;
+ break;
+ default:
+ msg_debug_css("cannot consume more of a selector, invalid parser token: %s; expected start",
+ next_tok.token_type_str());
+ can_continue = false;
+ break;
+ }
+ }
+ else if (state == selector_process_state::selector_expect_ident) {
+ /*
+ * We got something like a selector start, so we expect
+ * a plain ident
+ */
+ if (parser_tok.type == css_parser_token::token_type::ident_token && cur_selector) {
+ cur_selector->value = parser_tok.get_string_or_default("");
+ state = selector_process_state::selector_ident_consumed;
+ }
+ else {
+ msg_debug_css("cannot consume more of a selector, invalid parser token: %s; expected ident",
+ next_tok.token_type_str());
+ can_continue = false;
+ }
+ }
+ else if (state == selector_process_state::selector_ident_consumed) {
+ if (parser_tok.type == css_parser_token::token_type::comma_token && cur_selector) {
+ /* Got full selector, attach it to the vector and go further */
+ msg_debug_css("attached selector: %s", cur_selector->debug_str().c_str());
+ ret.push_back(std::move(cur_selector));
+ state = selector_process_state::selector_parse_start;
+ }
+ else if (parser_tok.type == css_parser_token::token_type::semicolon_token) {
+ /* TODO: implement adjustments */
+ state = selector_process_state::selector_ignore_function;
+ }
+ else if (parser_tok.type == css_parser_token::token_type::osqbrace_token) {
+ /* TODO: implement attributes checks */
+ state = selector_process_state::selector_ignore_attribute;
+ }
+ else {
+ /* TODO: implement selectors combinations */
+ state = selector_process_state::selector_ignore_combination;
+ }
+ }
+ else {
+ /* Ignore state; ignore all till ',' token or eof token */
+ if (parser_tok.type == css_parser_token::token_type::comma_token && cur_selector) {
+ /* Got full selector, attach it to the vector and go further */
+ ret.push_back(std::move(cur_selector));
+ state = selector_process_state::selector_parse_start;
+ }
+ else {
+ auto debug_str = parser_tok.get_string_or_default("");
+ msg_debug_css("ignore token %*s", (int) debug_str.size(),
+ debug_str.data());
+ }
+ }
+ }
+ else {
+ /* End of parsing */
+ if (state == selector_process_state::selector_ident_consumed && cur_selector) {
+ msg_debug_css("attached selector: %s", cur_selector->debug_str().c_str());
+ ret.push_back(std::move(cur_selector));
+ }
+ else {
+ msg_debug_css("not attached selector, state: %d", static_cast<int>(state));
+ }
+ can_continue = false;
+ }
+ }
+
+ return ret; /* copy elision */
+}
+
+auto css_selector::debug_str() const -> std::string
+{
+ std::string ret;
+
+ if (type == selector_type::SELECTOR_ID) {
+ ret += "#";
+ }
+ else if (type == selector_type::SELECTOR_CLASS) {
+ ret += ".";
+ }
+ else if (type == selector_type::SELECTOR_ALL) {
+ ret = "*";
+
+ return ret;
+ }
+
+ std::visit([&](auto arg) -> void {
+ using T = std::decay_t<decltype(arg)>;
+
+ if constexpr (std::is_same_v<T, tag_id_t>) {
+ ret += fmt::format("tag: {}", static_cast<int>(arg));
+ }
+ else {
+ ret += arg;
+ }
+ },
+ value);
+
+ return ret;
+}
+
+TEST_SUITE("css")
+{
+ TEST_CASE("simple css selectors")
+ {
+ const std::vector<std::pair<const char *, std::vector<css_selector::selector_type>>> cases{
+ {"em", {css_selector::selector_type::SELECTOR_TAG}},
+ {"*", {css_selector::selector_type::SELECTOR_ALL}},
+ {".class", {css_selector::selector_type::SELECTOR_CLASS}},
+ {"#id", {css_selector::selector_type::SELECTOR_ID}},
+ {"em,.class,#id", {css_selector::selector_type::SELECTOR_TAG, css_selector::selector_type::SELECTOR_CLASS, css_selector::selector_type::SELECTOR_ID}},
+ };
+
+ auto *pool = rspamd_mempool_new(rspamd_mempool_suggest_size(),
+ "css", 0);
+
+ for (const auto &c: cases) {
+ auto res = process_selector_tokens(pool,
+ get_selectors_parser_functor(pool, c.first));
+
+ CHECK(c.second.size() == res.size());
+
+ for (auto i = 0; i < c.second.size(); i++) {
+ CHECK(res[i]->type == c.second[i]);
+ }
+ }
+
+ rspamd_mempool_delete(pool);
+ }
+}
+
+}// namespace rspamd::css
diff --git a/src/libserver/css/css_selector.hxx b/src/libserver/css/css_selector.hxx
new file mode 100644
index 0000000..65b185a
--- /dev/null
+++ b/src/libserver/css/css_selector.hxx
@@ -0,0 +1,134 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#ifndef RSPAMD_CSS_SELECTOR_HXX
+#define RSPAMD_CSS_SELECTOR_HXX
+
+#include <variant>
+#include <string>
+#include <optional>
+#include <vector>
+#include <memory>
+
+#include "function2/function2.hpp"
+#include "parse_error.hxx"
+#include "css_parser.hxx"
+#include "libserver/html/html_tags.h"
+#include "libcryptobox/cryptobox.h"
+
+namespace rspamd::css {
+
+/*
+ * Holds a value for css selector, internal is handled by variant
+ */
+struct css_selector {
+ enum class selector_type {
+ SELECTOR_TAG, /* e.g. tr, for this value we use tag_id_t */
+ SELECTOR_CLASS, /* generic class, e.g. .class */
+ SELECTOR_ID, /* e.g. #id */
+ SELECTOR_ALL /* * selector */
+ };
+
+ selector_type type;
+ std::variant<tag_id_t, std::string_view> value;
+
+ /* Conditions for the css selector */
+ /* Dependency on attributes */
+ struct css_attribute_condition {
+ std::string_view attribute;
+ std::string_view op = "";
+ std::string_view value = "";
+ };
+
+ /* General dependency chain */
+ using css_selector_ptr = std::unique_ptr<css_selector>;
+ using css_selector_dep = std::variant<css_attribute_condition, css_selector_ptr>;
+ std::vector<css_selector_dep> dependencies;
+
+ auto to_tag(void) const -> std::optional<tag_id_t>
+ {
+ if (type == selector_type::SELECTOR_TAG) {
+ return std::get<tag_id_t>(value);
+ }
+ return std::nullopt;
+ }
+
+ auto to_string(void) const -> std::optional<const std::string_view>
+ {
+ if (type != selector_type::SELECTOR_TAG) {
+ return std::string_view(std::get<std::string_view>(value));
+ }
+ return std::nullopt;
+ };
+
+ explicit css_selector(selector_type t)
+ : type(t)
+ {
+ }
+ explicit css_selector(tag_id_t t)
+ : type(selector_type::SELECTOR_TAG)
+ {
+ value = t;
+ }
+ explicit css_selector(const std::string_view &st, selector_type t = selector_type::SELECTOR_ID)
+ : type(t)
+ {
+ value = st;
+ }
+
+ auto operator==(const css_selector &other) const -> bool
+ {
+ return type == other.type && value == other.value;
+ }
+
+ auto debug_str(void) const -> std::string;
+};
+
+
+using selectors_vec = std::vector<std::unique_ptr<css_selector>>;
+
+/*
+ * Consume selectors token and split them to the list of selectors
+ */
+auto process_selector_tokens(rspamd_mempool_t *pool,
+ blocks_gen_functor &&next_token_functor)
+ -> selectors_vec;
+
+}// namespace rspamd::css
+
+/* Selectors hashing */
+namespace std {
+template<>
+class hash<rspamd::css::css_selector> {
+public:
+ using is_avalanching = void;
+ auto operator()(const rspamd::css::css_selector &sel) const -> std::size_t
+ {
+ if (sel.type == rspamd::css::css_selector::selector_type::SELECTOR_TAG) {
+ return static_cast<std::size_t>(std::get<tag_id_t>(sel.value));
+ }
+ else {
+ const auto &sv = std::get<std::string_view>(sel.value);
+
+ return rspamd_cryptobox_fast_hash(sv.data(), sv.size(), 0xdeadbabe);
+ }
+ }
+};
+}// namespace std
+
+#endif//RSPAMD_CSS_SELECTOR_HXX
diff --git a/src/libserver/css/css_selector_parser.rl b/src/libserver/css/css_selector_parser.rl
new file mode 100644
index 0000000..f5ae936
--- /dev/null
+++ b/src/libserver/css/css_selector_parser.rl
@@ -0,0 +1,27 @@
+%%{
+ machine css_parser;
+ alphtype unsigned char;
+ include css_syntax "css_syntax.rl";
+
+ main := selectors_group;
+}%%
+
+%% write data;
+
+#include <cstddef>
+
+namespace rspamd::css {
+
+int
+parse_css_selector (const unsigned char *data, std::size_t len)
+{
+ const unsigned char *p = data, *pe = data + len, *eof;
+ int cs;
+
+ %% write init;
+ %% write exec;
+
+ return cs;
+}
+
+} \ No newline at end of file
diff --git a/src/libserver/css/css_style.hxx b/src/libserver/css/css_style.hxx
new file mode 100644
index 0000000..429e58f
--- /dev/null
+++ b/src/libserver/css/css_style.hxx
@@ -0,0 +1,66 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#ifndef RSPAMD_CSS_STYLE_HXX
+#define RSPAMD_CSS_STYLE_HXX
+
+#include <memory>
+#include <vector>
+#include "css_rule.hxx"
+#include "css_selector.hxx"
+
+namespace rspamd::css {
+
+/*
+ * Full CSS style representation
+ */
+class css_style {
+public:
+ /* Make class trivial */
+ css_style(const css_style &other) = default;
+
+ css_style(const std::shared_ptr<css_style> &_parent)
+ : parent(_parent)
+ {
+ propagate_from_parent();
+ }
+ css_style(const std::shared_ptr<css_style> &_parent,
+ const std::vector<std::shared_ptr<css_selector>> &_selectors)
+ : parent(_parent)
+ {
+ selectors.reserve(_selectors.size());
+
+ for (const auto &sel_ptr: _selectors) {
+ selectors.emplace_back(sel_ptr);
+ }
+
+ propagate_from_parent();
+ }
+
+private:
+ std::vector<std::weak_ptr<css_selector>> selectors;
+ std::weak_ptr<css_style> parent;
+ std::vector<css_rule> rules;
+
+private:
+ void propagate_from_parent(void); /* Construct full style using parent */
+};
+
+}// namespace rspamd::css
+
+#endif//RSPAMD_CSS_STYLE_HXX
diff --git a/src/libserver/css/css_syntax.rl b/src/libserver/css/css_syntax.rl
new file mode 100644
index 0000000..93da44b
--- /dev/null
+++ b/src/libserver/css/css_syntax.rl
@@ -0,0 +1,110 @@
+%%{
+ # CSS3 EBNF derived
+ machine css_syntax;
+
+ # Primitive Atoms
+ COMMENT = (
+ '/*' ( any )* :>> '*/'
+ );
+ QUOTED_STRING = ('"' ( [^"\\] | /\\./ )* "'");
+ BARE_URL_CHARS = ((0x21
+ | 0x23..0x26
+ | 0x2A..0xFF)+);
+ BARE_URL = BARE_URL_CHARS;
+ URL = 'url(' ( QUOTED_STRING | space* BARE_URL space* ) ')';
+ nonascii = [^0x00-0x7F];
+ nmstart = ([_a-zA-Z] | nonascii);
+ nmchar = ([_a-zA-Z0-9] | 0x2D | nonascii);
+ name = nmchar+;
+ num = ([0-9]+ | ([0-9]* '.' [0-9]+));
+ CRLF = "\r\n" | ("\r" [^\n]) | ([^\r] "\n");
+ IDENT = ([\-]? nmstart nmchar*);
+ ATTR = 'attr(' IDENT ')';
+
+ DIMENSION = '-'? num space? ( 'ch' | 'cm' | 'em' | 'ex' | 'fr' | 'in' | 'mm' | 'pc' | 'pt' | 'px' | 'Q' | 'rem' | 'vh' | 'vmax' | 'vmin' | 'vw' | 'dpi' );
+ NUMBER = '-'? num;
+ HASH = '#' name;
+ HEX = '#' [0-9a-fA-F]{1,6};
+ PERCENTAGE = '-'? num '%';
+ INCLUDES = '~=';
+ DASHMATCH = '|=';
+ PREFIXMATCH = '^=';
+ SUFFIXMATCH = '$=';
+ SUBSTRINGMATCH = '*=';
+ PLUS = '+';
+ GREATER = '>';
+ COMMA = ',';
+ TILDE = '~';
+ S = space;
+
+ # Property name
+ property = ( QUOTED_STRING | IDENT );
+
+ # Values
+ important = space* '!' space* 'important';
+ expression = ( ( '+' | PERCENTAGE | URL | ATTR | HEX | '-' | DIMENSION | NUMBER | QUOTED_STRING | IDENT | ',') S* )+;
+ functional_pseudo = (IDENT - ('attr'|'url')) '(' space* expression? ')';
+ value = ( URL | ATTR | PLUS | HEX | PERCENTAGE | '-' | DIMENSION | NUMBER | QUOTED_STRING | IDENT | functional_pseudo);
+ values = value (space value | '/' value )* ( space* ',' space* value (space value | '/' value )* )* important?;
+
+ # Declaration definition
+ declaration = (property space? ':' (property ':')* space? values);
+
+ # Selectors
+ class = '.' IDENT;
+ element_name = IDENT;
+ namespace_prefix = ( IDENT | '*' )? '|';
+ type_selector = namespace_prefix? element_name;
+ universal = namespace_prefix? '*';
+ attrib = '[' space* namespace_prefix? IDENT space* ( ( PREFIXMATCH | SUFFIXMATCH | SUBSTRINGMATCH | '=' | INCLUDES | DASHMATCH ) space* ( IDENT | QUOTED_STRING ) space* )? ']';
+ pseudo = ':' ':'? ( IDENT | functional_pseudo );
+ atrule = '@' IDENT;
+ mediaquery_selector = '(' declaration ')';
+ negation_arg = type_selector
+ | universal
+ | HASH
+ | class
+ | attrib
+ | pseudo;
+ negation = 'NOT'|'not' space* negation_arg space* ')';
+ # Haha, so simple...
+ # there should be also mediaquery_selector but it makes grammar too large, so rip it off
+ simple_selector_sequence = ( type_selector | universal ) ( HASH | class | attrib | pseudo | negation | atrule )*
+ | ( HASH | class | attrib | pseudo | negation | atrule )+;
+ combinator = space* PLUS space*
+ | space* GREATER space*
+ | space* TILDE space*
+ | space+;
+ # Combine simple stuff and obtain just... an ordinary selector, bingo
+ selector = simple_selector_sequence ( combinator simple_selector_sequence )*;
+ # Multiple beasts
+ selectors_group = selector ( COMMENT? ',' space* selector )*;
+
+ # Rules
+ # This is mostly used stuff
+ rule = selectors_group space? "{" space*
+ (COMMENT? space* declaration ( space? ";" space? declaration?)* ";"? space?)* COMMENT* space* '}';
+ query_declaration = rule;
+
+ # Areas used in css
+ arearule = '@'('bottom-left'|'bottom-right'|'top-left'|'top-right');
+ areaquery = arearule space? '{' space* (COMMENT? space* declaration ( S? ';' S? declaration?)* ';'? space?)* COMMENT* space* '}';
+ # Printed media stuff, useless but we have to parse it :(
+ printcssrule = '@media print';
+ pagearea = ':'('left'|'right');
+ pagerule = '@page' space? pagearea?;
+ pagequery = pagerule space? '{' space* (areaquery| (COMMENT? space* declaration ( space? ';' space? declaration?)* ';'? S?)*) COMMENT* space* '}';
+ printcssquery = printcssrule S? '{' ( S? COMMENT* S? (pagequery| COMMENT|query_declaration) S*)* S? '}';
+ # Something that defines media
+ conditions = ('and'|'screen'|'or'|'only'|'not'|'amzn-mobi'|'amzn-kf8'|'amzn-mobi7'|',');
+ mediarule = '@media' space conditions ( space? conditions| space? mediaquery_selector )*;
+ mediaquery = mediarule space? '{' ( space? COMMENT* query_declaration)* S? '}';
+
+ simple_atrule = ("@charset"|"@namespace") space+ QUOTED_STRING space* ";";
+
+ import_rule = "@import" space+ ( QUOTED_STRING | URL ) space* ";";
+
+ # Final css definition
+ css_style = space* ( ( rule | simple_atrule | import_rule | mediaquery | printcssquery | COMMENT) space* )*;
+
+}%% \ No newline at end of file
diff --git a/src/libserver/css/css_tokeniser.cxx b/src/libserver/css/css_tokeniser.cxx
new file mode 100644
index 0000000..6d3f41e
--- /dev/null
+++ b/src/libserver/css/css_tokeniser.cxx
@@ -0,0 +1,836 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "css_tokeniser.hxx"
+#include "css_util.hxx"
+#include "css.hxx"
+#include "frozen/unordered_map.h"
+#include "frozen/string.h"
+#include <string>
+#include <cmath>
+
+namespace rspamd::css {
+
+/* Helpers to create tokens */
+
+/*
+ * This helper is intended to create tokens either with a tag and value
+ * or with just a tag.
+ */
+template<css_parser_token::token_type T, class Arg>
+auto make_token(const Arg &arg) -> css_parser_token;
+
+template<>
+auto make_token<css_parser_token::token_type::string_token, std::string_view>(const std::string_view &s)
+ -> css_parser_token
+{
+ return css_parser_token{css_parser_token::token_type::string_token, s};
+}
+
+template<>
+auto make_token<css_parser_token::token_type::ident_token, std::string_view>(const std::string_view &s)
+ -> css_parser_token
+{
+ return css_parser_token{css_parser_token::token_type::ident_token, s};
+}
+
+template<>
+auto make_token<css_parser_token::token_type::function_token, std::string_view>(const std::string_view &s)
+ -> css_parser_token
+{
+ return css_parser_token{css_parser_token::token_type::function_token, s};
+}
+
+template<>
+auto make_token<css_parser_token::token_type::url_token, std::string_view>(const std::string_view &s)
+ -> css_parser_token
+{
+ return css_parser_token{css_parser_token::token_type::url_token, s};
+}
+
+template<>
+auto make_token<css_parser_token::token_type::whitespace_token, std::string_view>(const std::string_view &s)
+ -> css_parser_token
+{
+ return css_parser_token{css_parser_token::token_type::whitespace_token, s};
+}
+
+template<>
+auto make_token<css_parser_token::token_type::delim_token, char>(const char &c)
+ -> css_parser_token
+{
+ return css_parser_token{css_parser_token::token_type::delim_token, c};
+}
+
+template<>
+auto make_token<css_parser_token::token_type::number_token, float>(const float &d)
+ -> css_parser_token
+{
+ return css_parser_token{css_parser_token::token_type::number_token, d};
+}
+
+/*
+ * Generic tokens with no value (non-terminals)
+ */
+template<css_parser_token::token_type T>
+auto make_token(void) -> css_parser_token
+{
+ return css_parser_token{T, css_parser_token_placeholder()};
+}
+
+static constexpr inline auto is_plain_ident_start(char c) -> bool
+{
+ if ((c & 0x80) || g_ascii_isalpha(c) || c == '_') {
+ return true;
+ }
+
+ return false;
+};
+
+static constexpr inline auto is_plain_ident(char c) -> bool
+{
+ if (is_plain_ident_start(c) || c == '-' || g_ascii_isdigit(c)) {
+ return true;
+ }
+
+ return false;
+};
+
+struct css_dimension_data {
+ css_parser_token::dim_type dtype;
+ double mult;
+};
+
+/*
+ * Maps from css dimensions to the multipliers that look reasonable in email
+ */
+constexpr const auto max_dims = static_cast<int>(css_parser_token::dim_type::dim_max);
+constexpr frozen::unordered_map<frozen::string, css_dimension_data, max_dims> dimensions_map{
+ {"px", {css_parser_token::dim_type::dim_px, 1.0}},
+ /* EM/REM are 16 px, so multiply and round */
+ {"em", {css_parser_token::dim_type::dim_em, 16.0}},
+ {"rem", {css_parser_token::dim_type::dim_rem, 16.0}},
+ /*
+ * Represents the x-height of the element's font.
+ * On fonts with the "x" letter, this is generally the height
+ * of lowercase letters in the font; 1ex = 0.5em in many fonts.
+ */
+ {"ex", {css_parser_token::dim_type::dim_ex, 8.0}},
+ {"wv", {css_parser_token::dim_type::dim_wv, 8.0}},
+ {"wh", {css_parser_token::dim_type::dim_wh, 6.0}},
+ {"vmax", {css_parser_token::dim_type::dim_vmax, 8.0}},
+ {"vmin", {css_parser_token::dim_type::dim_vmin, 6.0}},
+ /* One point. 1pt = 1/72nd of 1in */
+ {"pt", {css_parser_token::dim_type::dim_pt, 96.0 / 72.0}},
+ /* 96px/2.54 */
+ {"cm", {css_parser_token::dim_type::dim_cm, 96.0 / 2.54}},
+ {"mm", {css_parser_token::dim_type::dim_mm, 9.60 / 2.54}},
+ {"in", {css_parser_token::dim_type::dim_in, 96.0}},
+ /* 1pc = 12pt = 1/6th of 1in. */
+ {"pc", {css_parser_token::dim_type::dim_pc, 96.0 / 6.0}}};
+
+auto css_parser_token::adjust_dim(const css_parser_token &dim_token) -> bool
+{
+ if (!std::holds_alternative<float>(value) ||
+ !std::holds_alternative<std::string_view>(dim_token.value)) {
+ /* Invalid tokens */
+ return false;
+ }
+
+ auto num = std::get<float>(value);
+ auto sv = std::get<std::string_view>(dim_token.value);
+
+ auto dim_found = find_map(dimensions_map, sv);
+
+ if (dim_found) {
+ auto dim_elt = dim_found.value().get();
+ dimension_type = dim_elt.dtype;
+ flags |= css_parser_token::number_dimension;
+ num *= dim_elt.mult;
+ }
+ else {
+ flags |= css_parser_token::flag_bad_dimension;
+
+ return false;
+ }
+
+ value = num;
+
+ return true;
+}
+
+
+/*
+ * Consume functions: return a token and advance lexer offset
+ */
+auto css_tokeniser::consume_ident(bool allow_number) -> struct css_parser_token {
+ auto i = offset;
+ auto need_escape = false;
+ auto allow_middle_minus = false;
+
+ auto maybe_escape_sv = [&](auto cur_pos, auto tok_type) -> auto {
+ if (need_escape) {
+ auto escaped = rspamd::css::unescape_css(pool, {&input[offset],
+ cur_pos - offset});
+ offset = cur_pos;
+
+ return css_parser_token{tok_type, escaped};
+ }
+
+ auto result = std::string_view{&input[offset], cur_pos - offset};
+ offset = cur_pos;
+
+ return css_parser_token{tok_type, result};
+ };
+
+ /* Ident token can start from `-` or `--` */
+ if (input[i] == '-') {
+ i++;
+
+ if (i < input.size() && input[i] == '-') {
+ i++;
+ allow_middle_minus = true;
+ }
+ }
+
+ while (i < input.size()) {
+ auto c = input[i];
+
+ auto is_plain_c = (allow_number || allow_middle_minus) ? is_plain_ident(c) : is_plain_ident_start(c);
+ if (!is_plain_c) {
+ if (c == '\\' && i + 1 < input.size()) {
+ /* Escape token */
+ need_escape = true;
+ auto nhex = 0;
+
+ /* Need to find an escape end */
+ do {
+ c = input[++i];
+ if (g_ascii_isxdigit(c)) {
+ nhex++;
+
+ if (nhex > 6) {
+ /* End of the escape */
+ break;
+ }
+ }
+ else if (nhex > 0 && c == ' ') {
+ /* \[hex]{1,6} */
+ i++; /* Skip one space */
+ break;
+ }
+ else {
+ /* Single \ + char */
+ break;
+ }
+ } while (i < input.size());
+ }
+ else if (c == '(') {
+ /* Function or url token */
+ auto j = i + 1;
+
+ while (j < input.size() && g_ascii_isspace(input[j])) {
+ j++;
+ }
+
+ if (input.size() - offset > 3 && input.substr(offset, 3) == "url") {
+ if (j < input.size() && (input[j] == '"' || input[j] == '\'')) {
+ /* Function token */
+ auto ret = maybe_escape_sv(i,
+ css_parser_token::token_type::function_token);
+ return ret;
+ }
+ else {
+ /* Consume URL token */
+ while (j < input.size() && input[j] != ')') {
+ j++;
+ }
+
+ if (j < input.size() && input[j] == ')') {
+ /* Valid url token */
+ auto ret = maybe_escape_sv(j + 1,
+ css_parser_token::token_type::url_token);
+ return ret;
+ }
+ else {
+ /* Incomplete url token */
+ auto ret = maybe_escape_sv(j,
+ css_parser_token::token_type::url_token);
+
+ ret.flags |= css_parser_token::flag_bad_string;
+ return ret;
+ }
+ }
+ }
+ else {
+ auto ret = maybe_escape_sv(i,
+ css_parser_token::token_type::function_token);
+ return ret;
+ }
+ }
+ else if (c == '-' && allow_middle_minus) {
+ i++;
+ continue;
+ }
+ else {
+ break; /* Not an ident token */
+ }
+ } /* !plain ident */
+ else {
+ allow_middle_minus = true;
+ }
+
+ i++;
+ }
+
+ return maybe_escape_sv(i, css_parser_token::token_type::ident_token);
+}
+
+auto
+css_tokeniser::consume_number() -> struct css_parser_token {
+ auto i = offset;
+ auto seen_dot = false, seen_exp = false;
+
+ if (input[i] == '-' || input[i] == '+') {
+ i++;
+ }
+ if (input[i] == '.' && i < input.size()) {
+ seen_dot = true;
+ i++;
+ }
+
+ while (i < input.size()) {
+ auto c = input[i];
+
+ if (!g_ascii_isdigit(c)) {
+ if (c == '.') {
+ if (!seen_dot) {
+ seen_dot = true;
+ }
+ else {
+ break;
+ }
+ }
+ else if (c == 'e' || c == 'E') {
+ if (!seen_exp) {
+ seen_exp = true;
+ seen_dot = true; /* dots are not allowed after e */
+
+ if (i + 1 < input.size()) {
+ auto next_c = input[i + 1];
+ if (next_c == '+' || next_c == '-') {
+ i++;
+ }
+ else if (!g_ascii_isdigit(next_c)) {
+ /* Not an exponent */
+ break;
+ }
+ }
+ else {
+ /* Not an exponent */
+ break;
+ }
+ }
+ else {
+ break;
+ }
+ }
+ else {
+ break;
+ }
+ }
+
+ i++;
+ }
+
+ if (i > offset) {
+ /* I wish it was supported properly */
+ //auto conv_res = std::from_chars(&input[offset], &input[i], num);
+ char numbuf[128], *endptr = nullptr;
+ rspamd_strlcpy(numbuf, &input[offset], MIN(i - offset + 1, sizeof(numbuf)));
+ auto num = g_ascii_strtod(numbuf, &endptr);
+ offset = i;
+
+ if (fabs(num) >= G_MAXFLOAT || std::isnan(num)) {
+ msg_debug_css("invalid number: %s", numbuf);
+ return make_token<css_parser_token::token_type::delim_token>(input[i - 1]);
+ }
+ else {
+
+ auto ret = make_token<css_parser_token::token_type::number_token>(static_cast<float>(num));
+
+ if (i < input.size()) {
+ if (input[i] == '%') {
+ ret.flags |= css_parser_token::number_percent;
+ i++;
+
+ offset = i;
+ }
+ else if (is_plain_ident_start(input[i])) {
+ auto dim_token = consume_ident();
+
+ if (dim_token.type == css_parser_token::token_type::ident_token) {
+ if (!ret.adjust_dim(dim_token)) {
+ auto sv = std::get<std::string_view>(dim_token.value);
+ msg_debug_css("cannot apply dimension from the token %*s; number value = %.1f",
+ (int) sv.size(), sv.begin(), num);
+ /* Unconsume ident */
+ offset = i;
+ }
+ }
+ else {
+ /* We have no option but to uncosume ident token in this case */
+ msg_debug_css("got invalid ident like token after number, unconsume it");
+ }
+ }
+ else {
+ /* Plain number, nothing to do */
+ }
+ }
+
+ return ret;
+ }
+ }
+ else {
+ msg_err_css("internal error: invalid number, empty token");
+ i++;
+ }
+
+ offset = i;
+ /* Should not happen */
+ return make_token<css_parser_token::token_type::delim_token>(input[i - 1]);
+}
+
+/*
+ * Main routine to produce lexer tokens
+ */
+auto
+css_tokeniser::next_token(void) -> struct css_parser_token {
+ /* Check pushback queue */
+ if (!backlog.empty()) {
+ auto tok = backlog.front();
+ backlog.pop_front();
+
+ return tok;
+ }
+ /* Helpers */
+
+ /*
+ * This lambda eats comment handling nested comments;
+ * offset is set to the next character after a comment (or eof)
+ * Nothing is returned
+ */
+ auto consume_comment = [this]() {
+ auto i = offset;
+ auto nested = 0;
+
+ if (input.empty()) {
+ /* Nothing to consume */
+ return;
+ }
+
+ /* We handle nested comments just because they can exist... */
+ while (i < input.size() - 1) {
+ auto c = input[i];
+ if (c == '*' && input[i + 1] == '/') {
+ if (nested == 0) {
+ offset = i + 2;
+ return;
+ }
+ else {
+ nested--;
+ i += 2;
+ continue;
+ }
+ }
+ else if (c == '/' && input[i + 1] == '*') {
+ nested++;
+ i += 2;
+ continue;
+ }
+
+ i++;
+ }
+
+ offset = i;
+ };
+
+ /*
+ * Consume quoted string, returns a string_view over a string, offset
+ * is set one character after the string. Css unescaping is done automatically
+ * Accepts a quote char to find end of string
+ */
+ auto consume_string = [this](auto quote_char) -> auto {
+ auto i = offset;
+ bool need_unescape = false;
+
+ while (i < input.size()) {
+ auto c = input[i];
+
+ if (c == '\\') {
+ if (i + 1 < input.size()) {
+ need_unescape = true;
+ }
+ else {
+ /* \ at the end -> ignore */
+ }
+ }
+ else if (c == quote_char) {
+ /* End of string */
+ std::string_view res{&input[offset], i - offset};
+
+ if (need_unescape) {
+ res = rspamd::css::unescape_css(pool, res);
+ }
+
+ offset = i + 1;
+
+ return res;
+ }
+ else if (c == '\n') {
+ /* Should be a error, but we ignore it for now */
+ }
+
+ i++;
+ }
+
+ /* EOF with no quote character, consider it fine */
+ std::string_view res{&input[offset], i - offset};
+
+ if (need_unescape) {
+ res = rspamd::css::unescape_css(pool, res);
+ }
+
+ offset = i;
+
+ return res;
+ };
+
+ /* Main tokenisation loop */
+ for (auto i = offset; i < input.size(); ++i) {
+ auto c = input[i];
+
+ switch (c) {
+ case '/':
+ if (i + 1 < input.size() && input[i + 1] == '*') {
+ offset = i + 2;
+ consume_comment(); /* Consume comment and go forward */
+ return next_token(); /* Tail call */
+ }
+ else {
+ offset = i + 1;
+ return make_token<css_parser_token::token_type::delim_token>(c);
+ }
+ break;
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\r':
+ case '\f': {
+ /* Consume as much space as we can */
+ while (i < input.size() && g_ascii_isspace(input[i])) {
+ i++;
+ }
+
+ auto ret = make_token<css_parser_token::token_type::whitespace_token>(
+ std::string_view(&input[offset], i - offset));
+ offset = i;
+ return ret;
+ }
+ case '"':
+ case '\'':
+ offset = i + 1;
+ if (offset < input.size()) {
+ return make_token<css_parser_token::token_type::string_token>(consume_string(c));
+ }
+ else {
+ /* Unpaired quote at the end of the rule */
+ return make_token<css_parser_token::token_type::delim_token>(c);
+ }
+ case '(':
+ offset = i + 1;
+ return make_token<css_parser_token::token_type::obrace_token>();
+ case ')':
+ offset = i + 1;
+ return make_token<css_parser_token::token_type::ebrace_token>();
+ case '[':
+ offset = i + 1;
+ return make_token<css_parser_token::token_type::osqbrace_token>();
+ case ']':
+ offset = i + 1;
+ return make_token<css_parser_token::token_type::esqbrace_token>();
+ case '{':
+ offset = i + 1;
+ return make_token<css_parser_token::token_type::ocurlbrace_token>();
+ case '}':
+ offset = i + 1;
+ return make_token<css_parser_token::token_type::ecurlbrace_token>();
+ case ',':
+ offset = i + 1;
+ return make_token<css_parser_token::token_type::comma_token>();
+ case ';':
+ offset = i + 1;
+ return make_token<css_parser_token::token_type::semicolon_token>();
+ case ':':
+ offset = i + 1;
+ return make_token<css_parser_token::token_type::colon_token>();
+ case '<':
+ /* Maybe an xml like comment */
+ if (i + 3 < input.size() && input[i + 1] == '!' && input[i + 2] == '-' && input[i + 3] == '-') {
+ offset += 3;
+
+ return make_token<css_parser_token::token_type::cdo_token>();
+ }
+ else {
+ offset = i + 1;
+ return make_token<css_parser_token::token_type::delim_token>(c);
+ }
+ break;
+ case '-':
+ if (i + 1 < input.size()) {
+ auto next_c = input[i + 1];
+
+ if (g_ascii_isdigit(next_c)) {
+ /* negative number */
+ return consume_number();
+ }
+ else if (next_c == '-') {
+ if (i + 2 < input.size() && input[i + 2] == '>') {
+ /* XML like comment */
+ offset += 3;
+
+ return make_token<css_parser_token::token_type::cdc_token>();
+ }
+ }
+ }
+ /* No other options, a delimiter - */
+ offset = i + 1;
+ return make_token<css_parser_token::token_type::delim_token>(c);
+
+ break;
+ case '+':
+ case '.':
+ /* Maybe number */
+ if (i + 1 < input.size()) {
+ auto next_c = input[i + 1];
+
+ if (g_ascii_isdigit(next_c)) {
+ /* Numeric token */
+ return consume_number();
+ }
+ else {
+ offset = i + 1;
+ return make_token<css_parser_token::token_type::delim_token>(c);
+ }
+ }
+ /* No other options, a delimiter - */
+ offset = i + 1;
+ return make_token<css_parser_token::token_type::delim_token>(c);
+
+ break;
+ case '\\':
+ if (i + 1 < input.size()) {
+ if (input[i + 1] == '\n' || input[i + 1] == '\r') {
+ offset = i + 1;
+ return make_token<css_parser_token::token_type::delim_token>(c);
+ }
+ else {
+ /* Valid escape, assume ident */
+ return consume_ident();
+ }
+ }
+ else {
+ offset = i + 1;
+ return make_token<css_parser_token::token_type::delim_token>(c);
+ }
+ break;
+ case '@':
+ if (i + 3 < input.size()) {
+ if (is_plain_ident_start(input[i + 1]) &&
+ is_plain_ident(input[i + 2]) && is_plain_ident(input[i + 3])) {
+ offset = i + 1;
+ auto ident_token = consume_ident();
+
+ if (ident_token.type == css_parser_token::token_type::ident_token) {
+ /* Update type */
+ ident_token.type = css_parser_token::token_type::at_keyword_token;
+ }
+
+ return ident_token;
+ }
+ else {
+ offset = i + 1;
+ return make_token<css_parser_token::token_type::delim_token>(c);
+ }
+ }
+ else {
+ offset = i + 1;
+ return make_token<css_parser_token::token_type::delim_token>(c);
+ }
+ break;
+ case '#':
+ /* TODO: make it more conformant */
+ if (i + 2 < input.size()) {
+ auto next_c = input[i + 1], next_next_c = input[i + 2];
+ if ((is_plain_ident(next_c) || next_c == '-') &&
+ (is_plain_ident(next_next_c) || next_next_c == '-')) {
+ offset = i + 1;
+ /* We consume indent, but we allow numbers there */
+ auto ident_token = consume_ident(true);
+
+ if (ident_token.type == css_parser_token::token_type::ident_token) {
+ /* Update type */
+ ident_token.type = css_parser_token::token_type::hash_token;
+ }
+
+ return ident_token;
+ }
+ else {
+ offset = i + 1;
+ return make_token<css_parser_token::token_type::delim_token>(c);
+ }
+ }
+ else {
+ offset = i + 1;
+ return make_token<css_parser_token::token_type::delim_token>(c);
+ }
+ break;
+ default:
+ /* Generic parsing code */
+
+ if (g_ascii_isdigit(c)) {
+ return consume_number();
+ }
+ else if (is_plain_ident_start(c)) {
+ return consume_ident();
+ }
+ else {
+ offset = i + 1;
+ return make_token<css_parser_token::token_type::delim_token>(c);
+ }
+ break;
+ }
+ }
+
+ return make_token<css_parser_token::token_type::eof_token>();
+}
+
+constexpr auto
+css_parser_token::get_token_type() -> const char *
+{
+ const char *ret = "unknown";
+
+ switch (type) {
+ case token_type::whitespace_token:
+ ret = "whitespace";
+ break;
+ case token_type::ident_token:
+ ret = "ident";
+ break;
+ case token_type::function_token:
+ ret = "function";
+ break;
+ case token_type::at_keyword_token:
+ ret = "atkeyword";
+ break;
+ case token_type::hash_token:
+ ret = "hash";
+ break;
+ case token_type::string_token:
+ ret = "string";
+ break;
+ case token_type::number_token:
+ ret = "number";
+ break;
+ case token_type::url_token:
+ ret = "url";
+ break;
+ case token_type::cdo_token: /* xml open comment */
+ ret = "cdo";
+ break;
+ case token_type::cdc_token: /* xml close comment */
+ ret = "cdc";
+ break;
+ case token_type::delim_token:
+ ret = "delim";
+ break;
+ case token_type::obrace_token: /* ( */
+ ret = "obrace";
+ break;
+ case token_type::ebrace_token: /* ) */
+ ret = "ebrace";
+ break;
+ case token_type::osqbrace_token: /* [ */
+ ret = "osqbrace";
+ break;
+ case token_type::esqbrace_token: /* ] */
+ ret = "esqbrace";
+ break;
+ case token_type::ocurlbrace_token: /* { */
+ ret = "ocurlbrace";
+ break;
+ case token_type::ecurlbrace_token: /* } */
+ ret = "ecurlbrace";
+ break;
+ case token_type::comma_token:
+ ret = "comma";
+ break;
+ case token_type::colon_token:
+ ret = "colon";
+ break;
+ case token_type::semicolon_token:
+ ret = "semicolon";
+ break;
+ case token_type::eof_token:
+ ret = "eof";
+ break;
+ }
+
+ return ret;
+}
+
+
+auto css_parser_token::debug_token_str() -> std::string
+{
+ const auto *token_type_str = get_token_type();
+ std::string ret = token_type_str;
+
+ std::visit([&](auto arg) -> auto {
+ using T = std::decay_t<decltype(arg)>;
+
+ if constexpr (std::is_same_v<T, std::string_view> || std::is_same_v<T, char>) {
+ ret += "; value=";
+ ret += arg;
+ }
+ else if constexpr (std::is_same_v<T, double>) {
+ ret += "; value=";
+ ret += std::to_string(arg);
+ }
+ },
+ value);
+
+ if ((flags & (~number_dimension)) != default_flags) {
+ ret += "; flags=" + std::to_string(flags);
+ }
+
+ if (flags & number_dimension) {
+ ret += "; dim=" + std::to_string(static_cast<int>(dimension_type));
+ }
+
+ return ret; /* Copy elision */
+}
+
+}// namespace rspamd::css \ No newline at end of file
diff --git a/src/libserver/css/css_tokeniser.hxx b/src/libserver/css/css_tokeniser.hxx
new file mode 100644
index 0000000..aa6a1a7
--- /dev/null
+++ b/src/libserver/css/css_tokeniser.hxx
@@ -0,0 +1,215 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#ifndef RSPAMD_CSS_TOKENISER_HXX
+#define RSPAMD_CSS_TOKENISER_HXX
+
+#include <string_view>
+#include <utility>
+#include <variant>
+#include <list>
+#include <functional>
+#include <cstdint>
+#include "mem_pool.h"
+
+namespace rspamd::css {
+
+struct css_parser_token_placeholder {}; /* For empty tokens */
+
+struct css_parser_token {
+
+ enum class token_type : std::uint8_t {
+ whitespace_token,
+ ident_token,
+ function_token,
+ at_keyword_token,
+ hash_token,
+ string_token,
+ number_token,
+ url_token,
+ cdo_token, /* xml open comment */
+ cdc_token, /* xml close comment */
+ delim_token,
+ obrace_token, /* ( */
+ ebrace_token, /* ) */
+ osqbrace_token, /* [ */
+ esqbrace_token, /* ] */
+ ocurlbrace_token, /* { */
+ ecurlbrace_token, /* } */
+ comma_token,
+ colon_token,
+ semicolon_token,
+ eof_token,
+ };
+
+ enum class dim_type : std::uint8_t {
+ dim_px = 0,
+ dim_em,
+ dim_rem,
+ dim_ex,
+ dim_wv,
+ dim_wh,
+ dim_vmax,
+ dim_vmin,
+ dim_pt,
+ dim_cm,
+ dim_mm,
+ dim_in,
+ dim_pc,
+ dim_max,
+ };
+
+ static const std::uint8_t default_flags = 0;
+ static const std::uint8_t flag_bad_string = (1u << 0u);
+ static const std::uint8_t number_dimension = (1u << 1u);
+ static const std::uint8_t number_percent = (1u << 2u);
+ static const std::uint8_t flag_bad_dimension = (1u << 3u);
+
+ using value_type = std::variant<std::string_view, /* For strings and string like tokens */
+ char, /* For delimiters (might need to move to unicode point) */
+ float, /* For numeric stuff */
+ css_parser_token_placeholder /* For general no token stuff */
+ >;
+
+ /* Typed storage */
+ value_type value;
+
+ int lineno;
+
+ token_type type;
+ std::uint8_t flags = default_flags;
+ dim_type dimension_type;
+
+ css_parser_token() = delete;
+ explicit css_parser_token(token_type type, const value_type &value)
+ : value(value), type(type)
+ {
+ }
+ css_parser_token(css_parser_token &&other) = default;
+ css_parser_token(const css_parser_token &token) = default;
+ auto operator=(css_parser_token &&other) -> css_parser_token & = default;
+ auto adjust_dim(const css_parser_token &dim_token) -> bool;
+
+ auto get_string_or_default(const std::string_view &def) const -> std::string_view
+ {
+ if (std::holds_alternative<std::string_view>(value)) {
+ return std::get<std::string_view>(value);
+ }
+ else if (std::holds_alternative<char>(value)) {
+ return std::string_view(&std::get<char>(value), 1);
+ }
+
+ return def;
+ }
+
+ auto get_delim() const -> char
+ {
+ if (std::holds_alternative<char>(value)) {
+ return std::get<char>(value);
+ }
+
+ return (char) -1;
+ }
+
+ auto get_number_or_default(float def) const -> float
+ {
+ if (std::holds_alternative<float>(value)) {
+ auto dbl = std::get<float>(value);
+
+ if (flags & css_parser_token::number_percent) {
+ dbl /= 100.0;
+ }
+
+ return dbl;
+ }
+
+ return def;
+ }
+
+ auto get_normal_number_or_default(float def) const -> float
+ {
+ if (std::holds_alternative<float>(value)) {
+ auto dbl = std::get<float>(value);
+
+ if (flags & css_parser_token::number_percent) {
+ dbl /= 100.0;
+ }
+
+ if (dbl < 0) {
+ return 0.0;
+ }
+ else if (dbl > 1.0) {
+ return 1.0;
+ }
+
+ return dbl;
+ }
+
+ return def;
+ }
+
+ /* Debugging routines */
+ constexpr auto get_token_type() -> const char *;
+ /* This function might be slow */
+ auto debug_token_str() -> std::string;
+};
+
+static auto css_parser_eof_token(void) -> const css_parser_token &
+{
+ static css_parser_token eof_tok{
+ css_parser_token::token_type::eof_token,
+ css_parser_token_placeholder()};
+
+ return eof_tok;
+}
+
+/* Ensure that parser tokens are simple enough */
+/*
+ * compiler must implement P0602 "variant and optional should propagate copy/move triviality"
+ * This is broken on gcc < 8!
+ */
+static_assert(std::is_trivially_copyable_v<css_parser_token>);
+
+class css_tokeniser {
+public:
+ css_tokeniser() = delete;
+ css_tokeniser(rspamd_mempool_t *pool, const std::string_view &sv)
+ : input(sv), offset(0), pool(pool)
+ {
+ }
+
+ auto next_token(void) -> struct css_parser_token;
+ auto pushback_token(const struct css_parser_token &t) const -> void
+ {
+ backlog.push_back(t);
+ }
+
+private:
+ std::string_view input;
+ std::size_t offset;
+ rspamd_mempool_t *pool;
+ mutable std::list<css_parser_token> backlog;
+
+ auto consume_number() -> struct css_parser_token;
+ auto consume_ident(bool allow_number = false) -> struct css_parser_token;
+};
+
+}// namespace rspamd::css
+
+
+#endif//RSPAMD_CSS_TOKENISER_HXX
diff --git a/src/libserver/css/css_util.cxx b/src/libserver/css/css_util.cxx
new file mode 100644
index 0000000..07f8722
--- /dev/null
+++ b/src/libserver/css/css_util.cxx
@@ -0,0 +1,157 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "css_util.hxx"
+#include "css.hxx"
+#include <unicode/utf8.h>
+
+namespace rspamd::css {
+
+std::string_view unescape_css(rspamd_mempool_t *pool,
+ const std::string_view &sv)
+{
+ auto *nspace = reinterpret_cast<char *>(rspamd_mempool_alloc(pool, sv.length()));
+ auto *d = nspace;
+ auto nleft = sv.length();
+
+ enum {
+ normal = 0,
+ quoted,
+ escape,
+ skip_spaces,
+ } state = normal;
+
+ char quote_char, prev_c = 0;
+ auto escape_offset = 0, i = 0;
+
+#define MAYBE_CONSUME_CHAR(c) \
+ do { \
+ if ((c) == '"' || (c) == '\'') { \
+ state = quoted; \
+ quote_char = (c); \
+ nleft--; \
+ *d++ = (c); \
+ } \
+ else if ((c) == '\\') { \
+ escape_offset = i; \
+ state = escape; \
+ } \
+ else { \
+ state = normal; \
+ nleft--; \
+ *d++ = g_ascii_tolower(c); \
+ } \
+ } while (0)
+
+ for (const auto c: sv) {
+ if (nleft == 0) {
+ msg_err_css("cannot unescape css: truncated buffer of size %d",
+ (int) sv.length());
+ break;
+ }
+ switch (state) {
+ case normal:
+ MAYBE_CONSUME_CHAR(c);
+ break;
+ case quoted:
+ if (c == quote_char) {
+ if (prev_c != '\\') {
+ state = normal;
+ }
+ }
+ prev_c = c;
+ nleft--;
+ *d++ = c;
+ break;
+ case escape:
+ if (!g_ascii_isxdigit(c)) {
+ if (i > escape_offset + 1) {
+ /* Try to decode an escape */
+ const auto *escape_start = &sv[escape_offset + 1];
+ unsigned long val;
+
+ if (!rspamd_xstrtoul(escape_start, i - escape_offset - 1, &val)) {
+ msg_debug_css("invalid broken escape found at pos %d",
+ escape_offset);
+ }
+ else {
+ if (val < 0x80) {
+ /* Trivial case: ascii character */
+ *d++ = (unsigned char) g_ascii_tolower(val);
+ nleft--;
+ }
+ else {
+ UChar32 uc = val;
+ auto off = 0;
+ UTF8_APPEND_CHAR_SAFE((uint8_t *) d, off,
+ sv.length(), u_tolower(uc));
+ d += off;
+ nleft -= off;
+ }
+ }
+ }
+ else {
+ /* Empty escape, ignore it */
+ msg_debug_css("invalid empty escape found at pos %d",
+ escape_offset);
+ }
+
+ if (nleft <= 0) {
+ msg_err_css("cannot unescape css: truncated buffer of size %d",
+ (int) sv.length());
+ }
+ else {
+ /* Escape is done, advance forward */
+ if (g_ascii_isspace(c)) {
+ state = skip_spaces;
+ }
+ else {
+ MAYBE_CONSUME_CHAR(c);
+ }
+ }
+ }
+ break;
+ case skip_spaces:
+ if (!g_ascii_isspace(c)) {
+ MAYBE_CONSUME_CHAR(c);
+ }
+ /* Ignore spaces */
+ break;
+ }
+
+ i++;
+ }
+
+ return std::string_view{nspace, sv.size() - nleft};
+}
+
+}// namespace rspamd::css
+
+/* C API */
+const gchar *rspamd_css_unescape(rspamd_mempool_t *pool,
+ const guchar *begin,
+ gsize len,
+ gsize *outlen)
+{
+ auto sv = rspamd::css::unescape_css(pool, {(const char *) begin, len});
+ const auto *v = sv.begin();
+
+ if (outlen) {
+ *outlen = sv.size();
+ }
+
+ return v;
+} \ No newline at end of file
diff --git a/src/libserver/css/css_util.hxx b/src/libserver/css/css_util.hxx
new file mode 100644
index 0000000..4837a46
--- /dev/null
+++ b/src/libserver/css/css_util.hxx
@@ -0,0 +1,37 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#ifndef RSPAMD_CSS_UTIL_HXX
+#define RSPAMD_CSS_UTIL_HXX
+
+#include <string_view>
+#include "mem_pool.h"
+
+namespace rspamd::css {
+
+/*
+ * Unescape css escapes
+ * \20AC : must be followed by a space if the next character is one of a-f, A-F, 0-9
+ * \0020AC : must be 6 digits long, no space needed (but can be included)
+ */
+std::string_view unescape_css(rspamd_mempool_t *pool,
+ const std::string_view &sv);
+
+}// namespace rspamd::css
+
+#endif//RSPAMD_CSS_UTIL_HXX
diff --git a/src/libserver/css/css_value.cxx b/src/libserver/css/css_value.cxx
new file mode 100644
index 0000000..2546e01
--- /dev/null
+++ b/src/libserver/css/css_value.cxx
@@ -0,0 +1,449 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "css_value.hxx"
+#include "css_colors_list.hxx"
+#include "frozen/unordered_map.h"
+#include "frozen/string.h"
+#include "libutil/util.h"
+#include "contrib/ankerl/unordered_dense.h"
+#include "fmt/core.h"
+
+#define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL
+#include "doctest/doctest.h"
+
+/* Helper for unit test stringification */
+namespace doctest {
+template<>
+struct StringMaker<rspamd::css::css_color> {
+ static String convert(const rspamd::css::css_color &value)
+ {
+ return fmt::format("r={};g={};b={};alpha={}",
+ value.r, value.g, value.b, value.alpha)
+ .c_str();
+ }
+};
+
+}// namespace doctest
+
+namespace rspamd::css {
+
+auto css_value::maybe_color_from_string(const std::string_view &input)
+ -> std::optional<css_value>
+{
+
+ if (input.size() > 1 && input.front() == '#') {
+ return css_value::maybe_color_from_hex(input.substr(1));
+ }
+ else {
+ auto found_it = css_colors_map.find(input);
+
+ if (found_it != css_colors_map.end()) {
+ return css_value{found_it->second};
+ }
+ }
+
+ return std::nullopt;
+}
+
+constexpr static inline auto hexpair_decode(char c1, char c2) -> std::uint8_t
+{
+ std::uint8_t ret = 0;
+
+ if (c1 >= '0' && c1 <= '9') ret = c1 - '0';
+ else if (c1 >= 'A' && c1 <= 'F')
+ ret = c1 - 'A' + 10;
+ else if (c1 >= 'a' && c1 <= 'f')
+ ret = c1 - 'a' + 10;
+
+ ret *= 16;
+
+ if (c2 >= '0' && c2 <= '9') ret += c2 - '0';
+ else if (c2 >= 'A' && c2 <= 'F')
+ ret += c2 - 'A' + 10;
+ else if (c2 >= 'a' && c2 <= 'f')
+ ret += c2 - 'a' + 10;
+
+ return ret;
+}
+
+auto css_value::maybe_color_from_hex(const std::string_view &input)
+ -> std::optional<css_value>
+{
+ if (input.length() == 6) {
+ /* Plain RGB */
+ css_color col(hexpair_decode(input[0], input[1]),
+ hexpair_decode(input[2], input[3]),
+ hexpair_decode(input[4], input[5]));
+ return css_value(col);
+ }
+ else if (input.length() == 3) {
+ /* Rgb as 3 hex digests */
+ css_color col(hexpair_decode(input[0], input[0]),
+ hexpair_decode(input[1], input[1]),
+ hexpair_decode(input[2], input[2]));
+ return css_value(col);
+ }
+ else if (input.length() == 8) {
+ /* RGBA */
+ css_color col(hexpair_decode(input[0], input[1]),
+ hexpair_decode(input[2], input[3]),
+ hexpair_decode(input[4], input[5]),
+ hexpair_decode(input[6], input[7]));
+ return css_value(col);
+ }
+
+ return std::nullopt;
+}
+
+constexpr static inline auto rgb_color_component_convert(const css_parser_token &tok)
+ -> std::uint8_t
+{
+ std::uint8_t ret = 0;
+
+ if (tok.type == css_parser_token::token_type::number_token) {
+ auto dbl = std::get<float>(tok.value);
+
+ if (tok.flags & css_parser_token::number_percent) {
+ if (dbl > 100) {
+ dbl = 100;
+ }
+ else if (dbl < 0) {
+ dbl = 0;
+ }
+ ret = (std::uint8_t)(dbl / 100.0 * 255.0);
+ }
+ else {
+ if (dbl > 255) {
+ dbl = 255;
+ }
+ else if (dbl < 0) {
+ dbl = 0;
+ }
+
+ ret = (std::uint8_t)(dbl);
+ }
+ }
+
+ return ret;
+}
+
+constexpr static inline auto alpha_component_convert(const css_parser_token &tok)
+ -> std::uint8_t
+{
+ double ret = 1.0;
+
+ if (tok.type == css_parser_token::token_type::number_token) {
+ auto dbl = std::get<float>(tok.value);
+
+ if (tok.flags & css_parser_token::number_percent) {
+ if (dbl > 100) {
+ dbl = 100;
+ }
+ else if (dbl < 0) {
+ dbl = 0;
+ }
+ ret = (dbl / 100.0);
+ }
+ else {
+ if (dbl > 1.0) {
+ dbl = 1.0;
+ }
+ else if (dbl < 0) {
+ dbl = 0;
+ }
+
+ ret = dbl;
+ }
+ }
+
+ return (std::uint8_t)(ret * 255.0);
+}
+
+constexpr static inline auto h_component_convert(const css_parser_token &tok)
+ -> double
+{
+ double ret = 0.0;
+
+ if (tok.type == css_parser_token::token_type::number_token) {
+ auto dbl = std::get<float>(tok.value);
+
+ if (tok.flags & css_parser_token::number_percent) {
+ if (dbl > 100) {
+ dbl = 100;
+ }
+ else if (dbl < 0) {
+ dbl = 0;
+ }
+ ret = (dbl / 100.0);
+ }
+ else {
+ dbl = ((((int) dbl % 360) + 360) % 360); /* Deal with rotations */
+ ret = dbl / 360.0; /* Normalize to 0..1 */
+ }
+ }
+
+ return ret;
+}
+
+constexpr static inline auto sl_component_convert(const css_parser_token &tok)
+ -> double
+{
+ double ret = 0.0;
+
+ if (tok.type == css_parser_token::token_type::number_token) {
+ ret = tok.get_normal_number_or_default(ret);
+ }
+
+ return ret;
+}
+
+static inline auto hsl_to_rgb(double h, double s, double l)
+ -> css_color
+{
+ css_color ret;
+
+ constexpr auto hue2rgb = [](auto p, auto q, auto t) -> auto {
+ if (t < 0.0) {
+ t += 1.0;
+ }
+ if (t > 1.0) {
+ t -= 1.0;
+ }
+ if (t * 6. < 1.0) {
+ return p + (q - p) * 6.0 * t;
+ }
+ if (t * 2. < 1) {
+ return q;
+ }
+ if (t * 3. < 2.) {
+ return p + (q - p) * (2.0 / 3.0 - t) * 6.0;
+ }
+ return p;
+ };
+
+ if (s == 0) {
+ /* Achromatic */
+ ret.r = l;
+ ret.g = l;
+ ret.b = l;
+ }
+ else {
+ auto q = l <= 0.5 ? l * (1.0 + s) : l + s - l * s;
+ auto p = 2.0 * l - q;
+ ret.r = (std::uint8_t)(hue2rgb(p, q, h + 1.0 / 3.0) * 255);
+ ret.g = (std::uint8_t)(hue2rgb(p, q, h) * 255);
+ ret.b = (std::uint8_t)(hue2rgb(p, q, h - 1.0 / 3.0) * 255);
+ }
+
+ ret.alpha = 255;
+
+ return ret;
+}
+
+auto css_value::maybe_color_from_function(const css_consumed_block::css_function_block &func)
+ -> std::optional<css_value>
+{
+
+ if (func.as_string() == "rgb" && func.args.size() == 3) {
+ css_color col{rgb_color_component_convert(func.args[0]->get_token_or_empty()),
+ rgb_color_component_convert(func.args[1]->get_token_or_empty()),
+ rgb_color_component_convert(func.args[2]->get_token_or_empty())};
+
+ return css_value(col);
+ }
+ else if (func.as_string() == "rgba" && func.args.size() == 4) {
+ css_color col{rgb_color_component_convert(func.args[0]->get_token_or_empty()),
+ rgb_color_component_convert(func.args[1]->get_token_or_empty()),
+ rgb_color_component_convert(func.args[2]->get_token_or_empty()),
+ alpha_component_convert(func.args[3]->get_token_or_empty())};
+
+ return css_value(col);
+ }
+ else if (func.as_string() == "hsl" && func.args.size() == 3) {
+ auto h = h_component_convert(func.args[0]->get_token_or_empty());
+ auto s = sl_component_convert(func.args[1]->get_token_or_empty());
+ auto l = sl_component_convert(func.args[2]->get_token_or_empty());
+
+ auto col = hsl_to_rgb(h, s, l);
+
+ return css_value(col);
+ }
+ else if (func.as_string() == "hsla" && func.args.size() == 4) {
+ auto h = h_component_convert(func.args[0]->get_token_or_empty());
+ auto s = sl_component_convert(func.args[1]->get_token_or_empty());
+ auto l = sl_component_convert(func.args[2]->get_token_or_empty());
+
+ auto col = hsl_to_rgb(h, s, l);
+ col.alpha = alpha_component_convert(func.args[3]->get_token_or_empty());
+
+ return css_value(col);
+ }
+
+ return std::nullopt;
+}
+
+auto css_value::maybe_dimension_from_number(const css_parser_token &tok)
+ -> std::optional<css_value>
+{
+ if (std::holds_alternative<float>(tok.value)) {
+ auto dbl = std::get<float>(tok.value);
+ css_dimension dim;
+
+ dim.dim = dbl;
+
+ if (tok.flags & css_parser_token::number_percent) {
+ dim.is_percent = true;
+ }
+ else {
+ dim.is_percent = false;
+ }
+
+ return css_value{dim};
+ }
+
+ return std::nullopt;
+}
+
+constexpr const auto display_names_map = frozen::make_unordered_map<frozen::string, css_display_value>({
+ {"hidden", css_display_value::DISPLAY_HIDDEN},
+ {"none", css_display_value::DISPLAY_HIDDEN},
+ {"inline", css_display_value::DISPLAY_INLINE},
+ {"block", css_display_value::DISPLAY_BLOCK},
+ {"content", css_display_value::DISPLAY_INLINE},
+ {"flex", css_display_value::DISPLAY_BLOCK},
+ {"grid", css_display_value::DISPLAY_BLOCK},
+ {"inline-block", css_display_value::DISPLAY_INLINE},
+ {"inline-flex", css_display_value::DISPLAY_INLINE},
+ {"inline-grid", css_display_value::DISPLAY_INLINE},
+ {"inline-table", css_display_value::DISPLAY_INLINE},
+ {"list-item", css_display_value::DISPLAY_BLOCK},
+ {"run-in", css_display_value::DISPLAY_INLINE},
+ {"table", css_display_value::DISPLAY_BLOCK},
+ {"table-caption", css_display_value::DISPLAY_TABLE_ROW},
+ {"table-column-group", css_display_value::DISPLAY_TABLE_ROW},
+ {"table-header-group", css_display_value::DISPLAY_TABLE_ROW},
+ {"table-footer-group", css_display_value::DISPLAY_TABLE_ROW},
+ {"table-row-group", css_display_value::DISPLAY_TABLE_ROW},
+ {"table-cell", css_display_value::DISPLAY_TABLE_ROW},
+ {"table-column", css_display_value::DISPLAY_TABLE_ROW},
+ {"table-row", css_display_value::DISPLAY_TABLE_ROW},
+ {"initial", css_display_value::DISPLAY_INLINE},
+});
+
+auto css_value::maybe_display_from_string(const std::string_view &input)
+ -> std::optional<css_value>
+{
+ auto f = display_names_map.find(input);
+
+ if (f != display_names_map.end()) {
+ return css_value{f->second};
+ }
+
+ return std::nullopt;
+}
+
+
+auto css_value::debug_str() const -> std::string
+{
+ std::string ret;
+
+ std::visit([&](const auto &arg) {
+ using T = std::decay_t<decltype(arg)>;
+
+ if constexpr (std::is_same_v<T, css_color>) {
+ ret += fmt::format("color: r={};g={};b={};alpha={}",
+ arg.r, arg.g, arg.b, arg.alpha);
+ }
+ else if constexpr (std::is_same_v<T, double>) {
+ ret += "size: " + std::to_string(arg);
+ }
+ else if constexpr (std::is_same_v<T, css_dimension>) {
+ ret += "dimension: " + std::to_string(arg.dim);
+ if (arg.is_percent) {
+ ret += "%";
+ }
+ }
+ else if constexpr (std::is_same_v<T, css_display_value>) {
+ ret += "display: ";
+ switch (arg) {
+ case css_display_value::DISPLAY_HIDDEN:
+ ret += "hidden";
+ break;
+ case css_display_value::DISPLAY_BLOCK:
+ ret += "block";
+ break;
+ case css_display_value::DISPLAY_INLINE:
+ ret += "inline";
+ break;
+ case css_display_value::DISPLAY_TABLE_ROW:
+ ret += "table_row";
+ break;
+ }
+ }
+ else if constexpr (std::is_integral_v<T>) {
+ ret += "integral: " + std::to_string(static_cast<int>(arg));
+ }
+ else {
+ ret += "nyi";
+ }
+ },
+ value);
+
+ return ret;
+}
+
+TEST_SUITE("css"){
+ TEST_CASE("css hex colors"){
+ const std::pair<const char *, css_color> hex_tests[] = {
+ {"000", css_color(0, 0, 0)},
+ {"000000", css_color(0, 0, 0)},
+ {"f00", css_color(255, 0, 0)},
+ {"FEDCBA", css_color(254, 220, 186)},
+ {"234", css_color(34, 51, 68)},
+ };
+
+for (const auto &p: hex_tests) {
+ SUBCASE((std::string("parse hex color: ") + p.first).c_str())
+ {
+ auto col_parsed = css_value::maybe_color_from_hex(p.first);
+ //CHECK_UNARY(col_parsed);
+ //CHECK_UNARY(col_parsed.value().to_color());
+ auto final_col = col_parsed.value().to_color().value();
+ CHECK(final_col == p.second);
+ }
+}
+}// namespace rspamd::css
+TEST_CASE("css colors strings")
+{
+ auto passed = 0;
+ for (const auto &p: css_colors_map) {
+ /* Match some of the colors selected randomly */
+ if (rspamd_random_double_fast() > 0.9) {
+ auto col_parsed = css_value::maybe_color_from_string(p.first);
+ auto final_col = col_parsed.value().to_color().value();
+ CHECK_MESSAGE(final_col == p.second, p.first.data());
+ passed++;
+
+ if (passed > 20) {
+ break;
+ }
+ }
+ }
+}
+}
+;
+}
diff --git a/src/libserver/css/css_value.hxx b/src/libserver/css/css_value.hxx
new file mode 100644
index 0000000..1d57421
--- /dev/null
+++ b/src/libserver/css/css_value.hxx
@@ -0,0 +1,174 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#ifndef RSPAMD_CSS_VALUE_HXX
+#define RSPAMD_CSS_VALUE_HXX
+
+#include <string>
+#include <variant>
+#include <optional>
+#include <vector>
+#include <iosfwd>
+#include "parse_error.hxx"
+#include "css_parser.hxx"
+#include "contrib/expected/expected.hpp"
+
+namespace rspamd::css {
+
+struct alignas(int) css_color {
+ std::uint8_t r;
+ std::uint8_t g;
+ std::uint8_t b;
+
+ std::uint8_t alpha;
+
+ css_color(std::uint8_t _r, std::uint8_t _g, std::uint8_t _b, std::uint8_t _alpha = 255)
+ : r(_r), g(_g), b(_b), alpha(_alpha)
+ {
+ }
+ css_color() = default;
+ constexpr auto to_number() const -> std::uint32_t
+ {
+ return (std::uint32_t) alpha << 24 |
+ (std::uint32_t) r << 16 |
+ (std::uint32_t) g << 8 |
+ (std::uint32_t) b << 0;
+ }
+
+ constexpr auto to_rgb() const -> std::uint32_t
+ {
+ return (std::uint32_t) r << 16 |
+ (std::uint32_t) g << 8 |
+ (std::uint32_t) b << 0;
+ }
+ friend bool operator==(const css_color &l, const css_color &r)
+ {
+ return (memcmp(&l, &r, sizeof(css_color)) == 0);
+ }
+
+ static auto white() -> css_color
+ {
+ return css_color{255, 255, 255};
+ }
+ static auto black() -> css_color
+ {
+ return css_color{0, 0, 0};
+ }
+};
+
+struct css_dimension {
+ float dim;
+ bool is_percent;
+};
+
+/*
+ * Simple enum class for display stuff
+ */
+enum class css_display_value : std::uint8_t {
+ DISPLAY_INLINE,
+ DISPLAY_BLOCK,
+ DISPLAY_TABLE_ROW,
+ DISPLAY_HIDDEN
+};
+
+/*
+ * Value handler, uses std::variant instead of polymorphic classes for now
+ * for simplicity
+ */
+struct css_value {
+ std::variant<css_color,
+ float,
+ css_display_value,
+ css_dimension,
+ std::monostate>
+ value;
+
+ css_value()
+ {
+ }
+ css_value(const css_color &color)
+ : value(color)
+ {
+ }
+ css_value(float num)
+ : value(num)
+ {
+ }
+ css_value(css_dimension dim)
+ : value(dim)
+ {
+ }
+ css_value(css_display_value d)
+ : value(d)
+ {
+ }
+
+ auto to_color(void) const -> std::optional<css_color>
+ {
+ return extract_value_maybe<css_color>();
+ }
+
+ auto to_number(void) const -> std::optional<float>
+ {
+ return extract_value_maybe<float>();
+ }
+
+ auto to_dimension(void) const -> std::optional<css_dimension>
+ {
+ return extract_value_maybe<css_dimension>();
+ }
+
+ auto to_display(void) const -> std::optional<css_display_value>
+ {
+ return extract_value_maybe<css_display_value>();
+ }
+
+ auto is_valid(void) const -> bool
+ {
+ return !(std::holds_alternative<std::monostate>(value));
+ }
+
+ auto debug_str() const -> std::string;
+
+ static auto maybe_color_from_string(const std::string_view &input)
+ -> std::optional<css_value>;
+ static auto maybe_color_from_hex(const std::string_view &input)
+ -> std::optional<css_value>;
+ static auto maybe_color_from_function(const css_consumed_block::css_function_block &func)
+ -> std::optional<css_value>;
+ static auto maybe_dimension_from_number(const css_parser_token &tok)
+ -> std::optional<css_value>;
+ static auto maybe_display_from_string(const std::string_view &input)
+ -> std::optional<css_value>;
+
+private:
+ template<typename T>
+ auto extract_value_maybe(void) const -> std::optional<T>
+ {
+ if (std::holds_alternative<T>(value)) {
+ return std::get<T>(value);
+ }
+
+ return std::nullopt;
+ }
+};
+
+}// namespace rspamd::css
+
+
+#endif//RSPAMD_CSS_VALUE_HXX
diff --git a/src/libserver/css/parse_error.hxx b/src/libserver/css/parse_error.hxx
new file mode 100644
index 0000000..22b76f0
--- /dev/null
+++ b/src/libserver/css/parse_error.hxx
@@ -0,0 +1,61 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#ifndef RSPAMD_PARSE_ERROR_HXX
+#define RSPAMD_PARSE_ERROR_HXX
+
+#include <string>
+#include <optional>
+
+namespace rspamd::css {
+
+/*
+ * Generic parser errors
+ */
+enum class css_parse_error_type {
+ PARSE_ERROR_UNKNOWN_OPTION,
+ PARSE_ERROR_INVALID_SYNTAX,
+ PARSE_ERROR_BAD_NESTING,
+ PARSE_ERROR_NYI,
+ PARSE_ERROR_UNKNOWN_ERROR,
+ /* All above is treated as fatal error in parsing */
+ PARSE_ERROR_NO_ERROR,
+ PARSE_ERROR_EMPTY,
+};
+
+struct css_parse_error {
+ css_parse_error_type type = css_parse_error_type::PARSE_ERROR_UNKNOWN_ERROR;
+ std::optional<std::string> description;
+
+ explicit css_parse_error(css_parse_error_type type, const std::string &description)
+ : type(type), description(description)
+ {
+ }
+ explicit css_parse_error(css_parse_error_type type = css_parse_error_type::PARSE_ERROR_NO_ERROR)
+ : type(type)
+ {
+ }
+
+ constexpr auto is_fatal(void) const -> bool
+ {
+ return type < css_parse_error_type::PARSE_ERROR_NO_ERROR;
+ }
+};
+
+}// namespace rspamd::css
+#endif//RSPAMD_PARSE_ERROR_HXX
diff --git a/src/libserver/dkim.c b/src/libserver/dkim.c
new file mode 100644
index 0000000..4318e87
--- /dev/null
+++ b/src/libserver/dkim.c
@@ -0,0 +1,3588 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "config.h"
+#include "rspamd.h"
+#include "message.h"
+#include "dkim.h"
+#include "dns.h"
+#include "utlist.h"
+#include "unix-std.h"
+#include "mempool_vars_internal.h"
+
+#include <openssl/evp.h>
+#include <openssl/rsa.h>
+#include <openssl/engine.h>
+
+/* special DNS tokens */
+#define DKIM_DNSKEYNAME "_domainkey"
+
+/* ed25519 key lengths */
+#define ED25519_B64_BYTES 45
+#define ED25519_BYTES 32
+
+/* Canonization methods */
+#define DKIM_CANON_UNKNOWN (-1) /* unknown method */
+#define DKIM_CANON_SIMPLE 0 /* as specified in DKIM spec */
+#define DKIM_CANON_RELAXED 1 /* as specified in DKIM spec */
+
+#define DKIM_CANON_DEFAULT DKIM_CANON_SIMPLE
+
+#define RSPAMD_SHORT_BH_LEN 8
+
+/* Params */
+enum rspamd_dkim_param_type {
+ DKIM_PARAM_UNKNOWN = -1,
+ DKIM_PARAM_SIGNATURE = 0,
+ DKIM_PARAM_SIGNALG,
+ DKIM_PARAM_DOMAIN,
+ DKIM_PARAM_CANONALG,
+ DKIM_PARAM_QUERYMETHOD,
+ DKIM_PARAM_SELECTOR,
+ DKIM_PARAM_HDRLIST,
+ DKIM_PARAM_VERSION,
+ DKIM_PARAM_IDENTITY,
+ DKIM_PARAM_TIMESTAMP,
+ DKIM_PARAM_EXPIRATION,
+ DKIM_PARAM_COPIEDHDRS,
+ DKIM_PARAM_BODYHASH,
+ DKIM_PARAM_BODYLENGTH,
+ DKIM_PARAM_IDX,
+ DKIM_PARAM_CV,
+ DKIM_PARAM_IGNORE
+};
+
+#define RSPAMD_DKIM_MAX_ARC_IDX 10
+
+#define msg_err_dkim(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL, \
+ "dkim", ctx->pool->tag.uid, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_warn_dkim(...) rspamd_default_log_function(G_LOG_LEVEL_WARNING, \
+ "dkim", ctx->pool->tag.uid, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_info_dkim(...) rspamd_default_log_function(G_LOG_LEVEL_INFO, \
+ "dkim", ctx->pool->tag.uid, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_debug_dkim(...) rspamd_conditional_debug_fast(NULL, NULL, \
+ rspamd_dkim_log_id, "dkim", ctx->pool->tag.uid, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_debug_dkim_taskless(...) rspamd_conditional_debug_fast(NULL, NULL, \
+ rspamd_dkim_log_id, "dkim", "", \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+
+INIT_LOG_MODULE(dkim)
+
+#define RSPAMD_DKIM_FLAG_OVERSIGN (1u << 0u)
+#define RSPAMD_DKIM_FLAG_OVERSIGN_EXISTING (1u << 1u)
+
+union rspamd_dkim_header_stat {
+ struct _st {
+ guint16 count;
+ guint16 flags;
+ } s;
+ guint32 n;
+};
+
+struct rspamd_dkim_common_ctx {
+ rspamd_mempool_t *pool;
+ guint64 sig_hash;
+ gsize len;
+ GPtrArray *hlist;
+ GHashTable *htable; /* header -> count mapping */
+ EVP_MD_CTX *headers_hash;
+ EVP_MD_CTX *body_hash;
+ enum rspamd_dkim_type type;
+ guint idx;
+ gint header_canon_type;
+ gint body_canon_type;
+ guint body_canonicalised;
+ guint headers_canonicalised;
+ gboolean is_sign;
+};
+
+enum rspamd_arc_seal_cv {
+ RSPAMD_ARC_UNKNOWN = 0,
+ RSPAMD_ARC_NONE,
+ RSPAMD_ARC_INVALID,
+ RSPAMD_ARC_FAIL,
+ RSPAMD_ARC_PASS
+};
+
+
+struct rspamd_dkim_context_s {
+ struct rspamd_dkim_common_ctx common;
+ rspamd_mempool_t *pool;
+ struct rspamd_dns_resolver *resolver;
+ gsize blen;
+ gsize bhlen;
+ gint sig_alg;
+ guint ver;
+ time_t timestamp;
+ time_t expiration;
+ gchar *domain;
+ gchar *selector;
+ gint8 *b;
+ gchar *short_b;
+ gint8 *bh;
+ gchar *dns_key;
+ enum rspamd_arc_seal_cv cv;
+ const gchar *dkim_header;
+};
+
+#define RSPAMD_DKIM_KEY_ID_LEN 16
+
+struct rspamd_dkim_key_s {
+ guint8 *keydata;
+ guint8 *raw_key;
+ gsize keylen;
+ gsize decoded_len;
+ gchar key_id[RSPAMD_DKIM_KEY_ID_LEN];
+ union {
+ RSA *key_rsa;
+ EC_KEY *key_ecdsa;
+ guchar *key_eddsa;
+ } key;
+ BIO *key_bio;
+ EVP_PKEY *key_evp;
+ time_t mtime;
+ guint ttl;
+ enum rspamd_dkim_key_type type;
+ ref_entry_t ref;
+};
+
+struct rspamd_dkim_sign_context_s {
+ struct rspamd_dkim_common_ctx common;
+ rspamd_dkim_sign_key_t *key;
+};
+
+struct rspamd_dkim_header {
+ const gchar *name;
+ gint count;
+};
+
+/* Parser of dkim params */
+typedef gboolean (*dkim_parse_param_f)(rspamd_dkim_context_t *ctx,
+ const gchar *param, gsize len, GError **err);
+
+static gboolean rspamd_dkim_parse_signature(rspamd_dkim_context_t *ctx,
+ const gchar *param,
+ gsize len,
+ GError **err);
+static gboolean rspamd_dkim_parse_signalg(rspamd_dkim_context_t *ctx,
+ const gchar *param,
+ gsize len,
+ GError **err);
+static gboolean rspamd_dkim_parse_domain(rspamd_dkim_context_t *ctx,
+ const gchar *param,
+ gsize len,
+ GError **err);
+static gboolean rspamd_dkim_parse_canonalg(rspamd_dkim_context_t *ctx,
+ const gchar *param,
+ gsize len,
+ GError **err);
+static gboolean rspamd_dkim_parse_ignore(rspamd_dkim_context_t *ctx,
+ const gchar *param,
+ gsize len,
+ GError **err);
+static gboolean rspamd_dkim_parse_selector(rspamd_dkim_context_t *ctx,
+ const gchar *param,
+ gsize len,
+ GError **err);
+static gboolean rspamd_dkim_parse_hdrlist(rspamd_dkim_context_t *ctx,
+ const gchar *param,
+ gsize len,
+ GError **err);
+static gboolean rspamd_dkim_parse_version(rspamd_dkim_context_t *ctx,
+ const gchar *param,
+ gsize len,
+ GError **err);
+static gboolean rspamd_dkim_parse_timestamp(rspamd_dkim_context_t *ctx,
+ const gchar *param,
+ gsize len,
+ GError **err);
+static gboolean rspamd_dkim_parse_expiration(rspamd_dkim_context_t *ctx,
+ const gchar *param,
+ gsize len,
+ GError **err);
+static gboolean rspamd_dkim_parse_bodyhash(rspamd_dkim_context_t *ctx,
+ const gchar *param,
+ gsize len,
+ GError **err);
+static gboolean rspamd_dkim_parse_bodylength(rspamd_dkim_context_t *ctx,
+ const gchar *param,
+ gsize len,
+ GError **err);
+static gboolean rspamd_dkim_parse_idx(rspamd_dkim_context_t *ctx,
+ const gchar *param,
+ gsize len,
+ GError **err);
+static gboolean rspamd_dkim_parse_cv(rspamd_dkim_context_t *ctx,
+ const gchar *param,
+ gsize len,
+ GError **err);
+
+
+static const dkim_parse_param_f parser_funcs[] = {
+ [DKIM_PARAM_SIGNATURE] = rspamd_dkim_parse_signature,
+ [DKIM_PARAM_SIGNALG] = rspamd_dkim_parse_signalg,
+ [DKIM_PARAM_DOMAIN] = rspamd_dkim_parse_domain,
+ [DKIM_PARAM_CANONALG] = rspamd_dkim_parse_canonalg,
+ [DKIM_PARAM_QUERYMETHOD] = rspamd_dkim_parse_ignore,
+ [DKIM_PARAM_SELECTOR] = rspamd_dkim_parse_selector,
+ [DKIM_PARAM_HDRLIST] = rspamd_dkim_parse_hdrlist,
+ [DKIM_PARAM_VERSION] = rspamd_dkim_parse_version,
+ [DKIM_PARAM_IDENTITY] = rspamd_dkim_parse_ignore,
+ [DKIM_PARAM_TIMESTAMP] = rspamd_dkim_parse_timestamp,
+ [DKIM_PARAM_EXPIRATION] = rspamd_dkim_parse_expiration,
+ [DKIM_PARAM_COPIEDHDRS] = rspamd_dkim_parse_ignore,
+ [DKIM_PARAM_BODYHASH] = rspamd_dkim_parse_bodyhash,
+ [DKIM_PARAM_BODYLENGTH] = rspamd_dkim_parse_bodylength,
+ [DKIM_PARAM_IDX] = rspamd_dkim_parse_idx,
+ [DKIM_PARAM_CV] = rspamd_dkim_parse_cv,
+ [DKIM_PARAM_IGNORE] = rspamd_dkim_parse_ignore,
+};
+
+#define DKIM_ERROR dkim_error_quark()
+GQuark
+dkim_error_quark(void)
+{
+ return g_quark_from_static_string("dkim-error-quark");
+}
+
+/* Parsers implementation */
+static gboolean
+rspamd_dkim_parse_signature(rspamd_dkim_context_t *ctx,
+ const gchar *param,
+ gsize len,
+ GError **err)
+{
+ ctx->b = rspamd_mempool_alloc0(ctx->pool, len);
+ ctx->short_b = rspamd_mempool_alloc0(ctx->pool, RSPAMD_SHORT_BH_LEN + 1);
+ rspamd_strlcpy(ctx->short_b, param, MIN(len, RSPAMD_SHORT_BH_LEN + 1));
+ (void) rspamd_cryptobox_base64_decode(param, len, ctx->b, &ctx->blen);
+
+ return TRUE;
+}
+
+static gboolean
+rspamd_dkim_parse_signalg(rspamd_dkim_context_t *ctx,
+ const gchar *param,
+ gsize len,
+ GError **err)
+{
+ /* XXX: ugly size comparison, improve this code style some day */
+ if (len == 8) {
+ if (memcmp(param, "rsa-sha1", len) == 0) {
+ ctx->sig_alg = DKIM_SIGN_RSASHA1;
+ return TRUE;
+ }
+ }
+ else if (len == 10) {
+ if (memcmp(param, "rsa-sha256", len) == 0) {
+ ctx->sig_alg = DKIM_SIGN_RSASHA256;
+ return TRUE;
+ }
+ else if (memcmp(param, "rsa-sha512", len) == 0) {
+ ctx->sig_alg = DKIM_SIGN_RSASHA512;
+ return TRUE;
+ }
+ }
+ else if (len == 15) {
+ if (memcmp(param, "ecdsa256-sha256", len) == 0) {
+ ctx->sig_alg = DKIM_SIGN_ECDSASHA256;
+ return TRUE;
+ }
+ else if (memcmp(param, "ecdsa256-sha512", len) == 0) {
+ ctx->sig_alg = DKIM_SIGN_ECDSASHA512;
+ return TRUE;
+ }
+ }
+ else if (len == 14) {
+ if (memcmp(param, "ed25519-sha256", len) == 0) {
+ ctx->sig_alg = DKIM_SIGN_EDDSASHA256;
+ return TRUE;
+ }
+ }
+
+ g_set_error(err,
+ DKIM_ERROR,
+ DKIM_SIGERROR_INVALID_A,
+ "invalid dkim sign algorithm");
+ return FALSE;
+}
+
+static gboolean
+rspamd_dkim_parse_domain(rspamd_dkim_context_t *ctx,
+ const gchar *param,
+ gsize len,
+ GError **err)
+{
+ if (!rspamd_str_has_8bit(param, len)) {
+ ctx->domain = rspamd_mempool_alloc(ctx->pool, len + 1);
+ rspamd_strlcpy(ctx->domain, param, len + 1);
+ }
+ else {
+ ctx->domain = rspamd_dns_resolver_idna_convert_utf8(ctx->resolver,
+ ctx->pool, param, len, NULL);
+
+ if (!ctx->domain) {
+ g_set_error(err,
+ DKIM_ERROR,
+ DKIM_SIGERROR_INVALID_H,
+ "invalid dkim domain tag %.*s: idna failed",
+ (int) len, param);
+
+ return FALSE;
+ }
+ }
+
+ return TRUE;
+}
+
+static gboolean
+rspamd_dkim_parse_canonalg(rspamd_dkim_context_t *ctx,
+ const gchar *param,
+ gsize len,
+ GError **err)
+{
+ const gchar *p, *slash = NULL, *end = param + len;
+ gsize sl = 0;
+
+ p = param;
+ while (p != end) {
+ if (*p == '/') {
+ slash = p;
+ break;
+ }
+ p++;
+ sl++;
+ }
+
+ if (slash == NULL) {
+ /* Only check header */
+ if (len == 6 && memcmp(param, "simple", len) == 0) {
+ ctx->common.header_canon_type = DKIM_CANON_SIMPLE;
+ return TRUE;
+ }
+ else if (len == 7 && memcmp(param, "relaxed", len) == 0) {
+ ctx->common.header_canon_type = DKIM_CANON_RELAXED;
+ return TRUE;
+ }
+ }
+ else {
+ /* First check header */
+ if (sl == 6 && memcmp(param, "simple", sl) == 0) {
+ ctx->common.header_canon_type = DKIM_CANON_SIMPLE;
+ }
+ else if (sl == 7 && memcmp(param, "relaxed", sl) == 0) {
+ ctx->common.header_canon_type = DKIM_CANON_RELAXED;
+ }
+ else {
+ goto err;
+ }
+ /* Check body */
+ len -= sl + 1;
+ slash++;
+ if (len == 6 && memcmp(slash, "simple", len) == 0) {
+ ctx->common.body_canon_type = DKIM_CANON_SIMPLE;
+ return TRUE;
+ }
+ else if (len == 7 && memcmp(slash, "relaxed", len) == 0) {
+ ctx->common.body_canon_type = DKIM_CANON_RELAXED;
+ return TRUE;
+ }
+ }
+
+err:
+ g_set_error(err,
+ DKIM_ERROR,
+ DKIM_SIGERROR_INVALID_A,
+ "invalid dkim canonization algorithm");
+ return FALSE;
+}
+
+static gboolean
+rspamd_dkim_parse_ignore(rspamd_dkim_context_t *ctx,
+ const gchar *param,
+ gsize len,
+ GError **err)
+{
+ /* Just ignore unused params */
+ return TRUE;
+}
+
+static gboolean
+rspamd_dkim_parse_selector(rspamd_dkim_context_t *ctx,
+ const gchar *param,
+ gsize len,
+ GError **err)
+{
+
+ if (!rspamd_str_has_8bit(param, len)) {
+ ctx->selector = rspamd_mempool_alloc(ctx->pool, len + 1);
+ rspamd_strlcpy(ctx->selector, param, len + 1);
+ }
+ else {
+ ctx->selector = rspamd_dns_resolver_idna_convert_utf8(ctx->resolver,
+ ctx->pool, param, len, NULL);
+
+ if (!ctx->selector) {
+ g_set_error(err,
+ DKIM_ERROR,
+ DKIM_SIGERROR_INVALID_H,
+ "invalid dkim selector tag %.*s: idna failed",
+ (int) len, param);
+
+ return FALSE;
+ }
+ }
+
+ return TRUE;
+}
+
+static void
+rspamd_dkim_hlist_free(void *ud)
+{
+ GPtrArray *a = ud;
+
+ g_ptr_array_free(a, TRUE);
+}
+
+static gboolean
+rspamd_dkim_parse_hdrlist_common(struct rspamd_dkim_common_ctx *ctx,
+ const gchar *param,
+ gsize len,
+ gboolean sign,
+ GError **err)
+{
+ const gchar *c, *p, *end = param + len;
+ gchar *h;
+ gboolean from_found = FALSE, oversign, existing;
+ guint count = 0;
+ struct rspamd_dkim_header *new;
+ gpointer found;
+ union rspamd_dkim_header_stat u;
+
+ p = param;
+ while (p <= end) {
+ if ((p == end || *p == ':')) {
+ count++;
+ }
+ p++;
+ }
+
+ if (count > 0) {
+ ctx->hlist = g_ptr_array_sized_new(count);
+ }
+ else {
+ return FALSE;
+ }
+
+ c = param;
+ p = param;
+ ctx->htable = g_hash_table_new(rspamd_strcase_hash, rspamd_strcase_equal);
+
+ while (p <= end) {
+ if ((p == end || *p == ':') && p - c > 0) {
+ oversign = FALSE;
+ existing = FALSE;
+ h = rspamd_mempool_alloc(ctx->pool, p - c + 1);
+ rspamd_strlcpy(h, c, p - c + 1);
+
+ g_strstrip(h);
+
+ if (sign) {
+ if (rspamd_lc_cmp(h, "(o)", 3) == 0) {
+ oversign = TRUE;
+ h += 3;
+ msg_debug_dkim("oversign header: %s", h);
+ }
+ else if (rspamd_lc_cmp(h, "(x)", 3) == 0) {
+ oversign = TRUE;
+ existing = TRUE;
+ h += 3;
+ msg_debug_dkim("oversign existing header: %s", h);
+ }
+ }
+
+ /* Check mandatory from */
+ if (!from_found && g_ascii_strcasecmp(h, "from") == 0) {
+ from_found = TRUE;
+ }
+
+ new = rspamd_mempool_alloc(ctx->pool,
+ sizeof(struct rspamd_dkim_header));
+ new->name = h;
+ new->count = 0;
+ u.n = 0;
+
+ g_ptr_array_add(ctx->hlist, new);
+ found = g_hash_table_lookup(ctx->htable, h);
+
+ if (oversign) {
+ if (found) {
+ msg_err_dkim("specified oversigned header more than once: %s",
+ h);
+ }
+
+ u.s.flags |= RSPAMD_DKIM_FLAG_OVERSIGN;
+
+ if (existing) {
+ u.s.flags |= RSPAMD_DKIM_FLAG_OVERSIGN_EXISTING;
+ }
+
+ u.s.count = 0;
+ }
+ else {
+ if (found != NULL) {
+ u.n = GPOINTER_TO_UINT(found);
+ new->count = u.s.count;
+ u.s.count++;
+ }
+ else {
+ /* Insert new header order to the list */
+ u.s.count = new->count + 1;
+ }
+ }
+
+ g_hash_table_insert(ctx->htable, h, GUINT_TO_POINTER(u.n));
+
+ c = p + 1;
+ p++;
+ }
+ else {
+ p++;
+ }
+ }
+
+ if (!ctx->hlist) {
+ g_set_error(err,
+ DKIM_ERROR,
+ DKIM_SIGERROR_INVALID_H,
+ "invalid dkim header list");
+ return FALSE;
+ }
+ else {
+ if (!from_found) {
+ g_ptr_array_free(ctx->hlist, TRUE);
+ g_set_error(err,
+ DKIM_ERROR,
+ DKIM_SIGERROR_INVALID_H,
+ "invalid dkim header list, from header is missing");
+ return FALSE;
+ }
+
+ rspamd_mempool_add_destructor(ctx->pool,
+ (rspamd_mempool_destruct_t) rspamd_dkim_hlist_free,
+ ctx->hlist);
+ rspamd_mempool_add_destructor(ctx->pool,
+ (rspamd_mempool_destruct_t) g_hash_table_unref,
+ ctx->htable);
+ }
+
+ return TRUE;
+}
+
+static gboolean
+rspamd_dkim_parse_hdrlist(rspamd_dkim_context_t *ctx,
+ const gchar *param,
+ gsize len,
+ GError **err)
+{
+ return rspamd_dkim_parse_hdrlist_common(&ctx->common, param, len, FALSE, err);
+}
+
+static gboolean
+rspamd_dkim_parse_version(rspamd_dkim_context_t *ctx,
+ const gchar *param,
+ gsize len,
+ GError **err)
+{
+ if (len != 1 || *param != '1') {
+ g_set_error(err,
+ DKIM_ERROR,
+ DKIM_SIGERROR_VERSION,
+ "invalid dkim version");
+ return FALSE;
+ }
+
+ ctx->ver = 1;
+ return TRUE;
+}
+
+static gboolean
+rspamd_dkim_parse_timestamp(rspamd_dkim_context_t *ctx,
+ const gchar *param,
+ gsize len,
+ GError **err)
+{
+ gulong val;
+
+ if (!rspamd_strtoul(param, len, &val)) {
+ g_set_error(err,
+ DKIM_ERROR,
+ DKIM_SIGERROR_UNKNOWN,
+ "invalid dkim timestamp");
+ return FALSE;
+ }
+ ctx->timestamp = val;
+
+ return TRUE;
+}
+
+static gboolean
+rspamd_dkim_parse_expiration(rspamd_dkim_context_t *ctx,
+ const gchar *param,
+ gsize len,
+ GError **err)
+{
+ gulong val;
+
+ if (!rspamd_strtoul(param, len, &val)) {
+ g_set_error(err,
+ DKIM_ERROR,
+ DKIM_SIGERROR_UNKNOWN,
+ "invalid dkim expiration");
+ return FALSE;
+ }
+ ctx->expiration = val;
+
+ return TRUE;
+}
+
+static gboolean
+rspamd_dkim_parse_bodyhash(rspamd_dkim_context_t *ctx,
+ const gchar *param,
+ gsize len,
+ GError **err)
+{
+ ctx->bh = rspamd_mempool_alloc0(ctx->pool, len);
+ (void) rspamd_cryptobox_base64_decode(param, len, ctx->bh, &ctx->bhlen);
+
+ return TRUE;
+}
+
+static gboolean
+rspamd_dkim_parse_bodylength(rspamd_dkim_context_t *ctx,
+ const gchar *param,
+ gsize len,
+ GError **err)
+{
+ gulong val;
+
+ if (!rspamd_strtoul(param, len, &val)) {
+ g_set_error(err,
+ DKIM_ERROR,
+ DKIM_SIGERROR_INVALID_L,
+ "invalid dkim body length");
+ return FALSE;
+ }
+ ctx->common.len = val;
+
+ return TRUE;
+}
+
+static gboolean
+rspamd_dkim_parse_idx(rspamd_dkim_context_t *ctx,
+ const gchar *param,
+ gsize len,
+ GError **err)
+{
+ gulong val;
+
+ if (!rspamd_strtoul(param, len, &val)) {
+ g_set_error(err,
+ DKIM_ERROR,
+ DKIM_SIGERROR_INVALID_L,
+ "invalid ARC idx");
+ return FALSE;
+ }
+ ctx->common.idx = val;
+
+ return TRUE;
+}
+
+static gboolean
+rspamd_dkim_parse_cv(rspamd_dkim_context_t *ctx,
+ const gchar *param,
+ gsize len,
+ GError **err)
+{
+
+ /* Only check header */
+ if (len == 4 && memcmp(param, "fail", len) == 0) {
+ ctx->cv = RSPAMD_ARC_FAIL;
+ return TRUE;
+ }
+ else if (len == 4 && memcmp(param, "pass", len) == 0) {
+ ctx->cv = RSPAMD_ARC_PASS;
+ return TRUE;
+ }
+ else if (len == 4 && memcmp(param, "none", len) == 0) {
+ ctx->cv = RSPAMD_ARC_NONE;
+ return TRUE;
+ }
+ else if (len == 7 && memcmp(param, "invalid", len) == 0) {
+ ctx->cv = RSPAMD_ARC_INVALID;
+ return TRUE;
+ }
+
+ g_set_error(err,
+ DKIM_ERROR,
+ DKIM_SIGERROR_UNKNOWN,
+ "invalid arc seal verification result");
+
+ return FALSE;
+}
+
+
+static void
+rspamd_dkim_add_arc_seal_headers(rspamd_mempool_t *pool,
+ struct rspamd_dkim_common_ctx *ctx)
+{
+ struct rspamd_dkim_header *hdr;
+ gint count = ctx->idx, i;
+
+ ctx->hlist = g_ptr_array_sized_new(count * 3 - 1);
+
+ for (i = 0; i < count; i++) {
+ /* Authentication results */
+ hdr = rspamd_mempool_alloc(pool, sizeof(*hdr));
+ hdr->name = RSPAMD_DKIM_ARC_AUTHHEADER;
+ hdr->count = -(i + 1);
+ g_ptr_array_add(ctx->hlist, hdr);
+
+ /* Arc signature */
+ hdr = rspamd_mempool_alloc(pool, sizeof(*hdr));
+ hdr->name = RSPAMD_DKIM_ARC_SIGNHEADER;
+ hdr->count = -(i + 1);
+ g_ptr_array_add(ctx->hlist, hdr);
+
+ /* Arc seal (except last one) */
+ if (i != count - 1) {
+ hdr = rspamd_mempool_alloc(pool, sizeof(*hdr));
+ hdr->name = RSPAMD_DKIM_ARC_SEALHEADER;
+ hdr->count = -(i + 1);
+ g_ptr_array_add(ctx->hlist, hdr);
+ }
+ }
+
+ rspamd_mempool_add_destructor(ctx->pool,
+ (rspamd_mempool_destruct_t) rspamd_dkim_hlist_free,
+ ctx->hlist);
+}
+
+/**
+ * Create new dkim context from signature
+ * @param sig message's signature
+ * @param pool pool to allocate memory from
+ * @param err pointer to error object
+ * @return new context or NULL
+ */
+rspamd_dkim_context_t *
+rspamd_create_dkim_context(const gchar *sig,
+ rspamd_mempool_t *pool,
+ struct rspamd_dns_resolver *resolver,
+ guint time_jitter,
+ enum rspamd_dkim_type type,
+ GError **err)
+{
+ const gchar *p, *c, *tag = NULL, *end;
+ gint taglen;
+ gint param = DKIM_PARAM_UNKNOWN;
+ const EVP_MD *md_alg;
+ time_t now;
+ rspamd_dkim_context_t *ctx;
+ enum {
+ DKIM_STATE_TAG = 0,
+ DKIM_STATE_AFTER_TAG,
+ DKIM_STATE_VALUE,
+ DKIM_STATE_SKIP_SPACES = 99,
+ DKIM_STATE_ERROR = 100
+ } state,
+ next_state;
+
+
+ if (sig == NULL) {
+ g_set_error(err,
+ DKIM_ERROR,
+ DKIM_SIGERROR_EMPTY_B,
+ "empty signature");
+ return NULL;
+ }
+
+ ctx = rspamd_mempool_alloc0(pool, sizeof(rspamd_dkim_context_t));
+ ctx->pool = pool;
+ ctx->resolver = resolver;
+
+ if (type == RSPAMD_DKIM_ARC_SEAL) {
+ ctx->common.header_canon_type = DKIM_CANON_RELAXED;
+ ctx->common.body_canon_type = DKIM_CANON_RELAXED;
+ }
+ else {
+ ctx->common.header_canon_type = DKIM_CANON_DEFAULT;
+ ctx->common.body_canon_type = DKIM_CANON_DEFAULT;
+ }
+
+ ctx->sig_alg = DKIM_SIGN_UNKNOWN;
+ ctx->common.pool = pool;
+ ctx->common.type = type;
+ /* A simple state machine of parsing tags */
+ state = DKIM_STATE_SKIP_SPACES;
+ next_state = DKIM_STATE_TAG;
+ taglen = 0;
+ p = sig;
+ c = sig;
+ end = p + strlen(p);
+ ctx->common.sig_hash = rspamd_cryptobox_fast_hash(sig, end - sig,
+ rspamd_hash_seed());
+
+ msg_debug_dkim("create dkim context sig = %L", ctx->common.sig_hash);
+
+ while (p <= end) {
+ switch (state) {
+ case DKIM_STATE_TAG:
+ if (g_ascii_isspace(*p)) {
+ taglen = (int) (p - c);
+ while (*p && g_ascii_isspace(*p)) {
+ /* Skip spaces before '=' sign */
+ p++;
+ }
+ if (*p != '=') {
+ g_set_error(err,
+ DKIM_ERROR,
+ DKIM_SIGERROR_UNKNOWN,
+ "invalid dkim param");
+ state = DKIM_STATE_ERROR;
+ }
+ else {
+ state = DKIM_STATE_SKIP_SPACES;
+ next_state = DKIM_STATE_AFTER_TAG;
+ param = DKIM_PARAM_UNKNOWN;
+ p++;
+ tag = c;
+ }
+ }
+ else if (*p == '=') {
+ state = DKIM_STATE_SKIP_SPACES;
+ next_state = DKIM_STATE_AFTER_TAG;
+ param = DKIM_PARAM_UNKNOWN;
+ p++;
+ tag = c;
+ }
+ else {
+ taglen++;
+
+ if (taglen > G_MAXINT8) {
+ g_set_error(err,
+ DKIM_ERROR,
+ DKIM_SIGERROR_UNKNOWN,
+ "too long dkim tag");
+ state = DKIM_STATE_ERROR;
+ }
+ else {
+ p++;
+ }
+ }
+ break;
+ case DKIM_STATE_AFTER_TAG:
+ /* We got tag at tag and len at taglen */
+ switch (taglen) {
+ case 0:
+ g_set_error(err,
+ DKIM_ERROR,
+ DKIM_SIGERROR_UNKNOWN,
+ "zero length dkim param");
+ state = DKIM_STATE_ERROR;
+ break;
+ case 1:
+ /* 1 character tags */
+ switch (*tag) {
+ case 'v':
+ if (type == RSPAMD_DKIM_NORMAL) {
+ param = DKIM_PARAM_VERSION;
+ }
+ else {
+ g_set_error(err,
+ DKIM_ERROR,
+ DKIM_SIGERROR_UNKNOWN,
+ "invalid ARC v param");
+ state = DKIM_STATE_ERROR;
+ break;
+ }
+ break;
+ case 'a':
+ param = DKIM_PARAM_SIGNALG;
+ break;
+ case 'b':
+ param = DKIM_PARAM_SIGNATURE;
+ break;
+ case 'c':
+ param = DKIM_PARAM_CANONALG;
+ break;
+ case 'd':
+ param = DKIM_PARAM_DOMAIN;
+ break;
+ case 'h':
+ if (type == RSPAMD_DKIM_ARC_SEAL) {
+ g_set_error(err,
+ DKIM_ERROR,
+ DKIM_SIGERROR_UNKNOWN,
+ "ARC seal must NOT have h= tag");
+ state = DKIM_STATE_ERROR;
+ break;
+ }
+ else {
+ param = DKIM_PARAM_HDRLIST;
+ }
+ break;
+ case 'i':
+ if (type == RSPAMD_DKIM_NORMAL) {
+ param = DKIM_PARAM_IDENTITY;
+ }
+ else {
+ param = DKIM_PARAM_IDX;
+ }
+ break;
+ case 'l':
+ param = DKIM_PARAM_BODYLENGTH;
+ break;
+ case 'q':
+ param = DKIM_PARAM_QUERYMETHOD;
+ break;
+ case 's':
+ param = DKIM_PARAM_SELECTOR;
+ break;
+ case 't':
+ param = DKIM_PARAM_TIMESTAMP;
+ break;
+ case 'x':
+ param = DKIM_PARAM_EXPIRATION;
+ break;
+ case 'z':
+ param = DKIM_PARAM_COPIEDHDRS;
+ break;
+ case 'r':
+ param = DKIM_PARAM_IGNORE;
+ break;
+ default:
+ param = DKIM_PARAM_UNKNOWN;
+ msg_debug_dkim("unknown DKIM param %c, ignoring it", *tag);
+ break;
+ }
+ break;
+ case 2:
+ /* Two characters tags, e.g. `bh` */
+ if (tag[0] == 'b' && tag[1] == 'h') {
+ if (type == RSPAMD_DKIM_ARC_SEAL) {
+ g_set_error(err,
+ DKIM_ERROR,
+ DKIM_SIGERROR_UNKNOWN,
+ "ARC seal must NOT have bh= tag");
+ state = DKIM_STATE_ERROR;
+ }
+ else {
+ param = DKIM_PARAM_BODYHASH;
+ }
+ }
+ else if (tag[0] == 'c' && tag[1] == 'v') {
+ if (type != RSPAMD_DKIM_ARC_SEAL) {
+ g_set_error(err,
+ DKIM_ERROR,
+ DKIM_SIGERROR_UNKNOWN,
+ "cv tag is valid for ARC-Seal only");
+ state = DKIM_STATE_ERROR;
+ }
+ else {
+ param = DKIM_PARAM_CV;
+ }
+ }
+ else {
+ param = DKIM_PARAM_UNKNOWN;
+ msg_debug_dkim("unknown DKIM param %*s, ignoring it", taglen, tag);
+ }
+ break;
+ default:
+ /* Long and unknown (yet) DKIM tag */
+ param = DKIM_PARAM_UNKNOWN;
+ msg_debug_dkim("unknown DKIM param %*s, ignoring it", taglen, tag);
+ break;
+ }
+
+ if (state != DKIM_STATE_ERROR) {
+ /* Skip spaces */
+ state = DKIM_STATE_SKIP_SPACES;
+ next_state = DKIM_STATE_VALUE;
+ }
+ break;
+ case DKIM_STATE_VALUE:
+ if (*p == ';') {
+ if (p - c == 0 || c > p) {
+ state = DKIM_STATE_ERROR;
+ }
+ else {
+ /* Cut trailing spaces for value */
+ gint tlen = p - c;
+ const gchar *tmp = p - 1;
+
+ while (tlen > 0) {
+ if (!g_ascii_isspace(*tmp)) {
+ break;
+ }
+ tlen--;
+ tmp--;
+ }
+
+ if (param != DKIM_PARAM_UNKNOWN) {
+ if (!parser_funcs[param](ctx, c, tlen, err)) {
+ state = DKIM_STATE_ERROR;
+ }
+ else {
+ state = DKIM_STATE_SKIP_SPACES;
+ next_state = DKIM_STATE_TAG;
+ p++;
+ taglen = 0;
+ }
+ }
+ else {
+ /* Unknown param has been ignored */
+ msg_debug_dkim("ignored unknown tag parameter value: %*s = %*s",
+ taglen, tag, tlen, c);
+ state = DKIM_STATE_SKIP_SPACES;
+ next_state = DKIM_STATE_TAG;
+ p++;
+ taglen = 0;
+ }
+ }
+ }
+ else if (p == end) {
+ /* Last parameter with no `;` character */
+ gint tlen = p - c;
+ const gchar *tmp = p - 1;
+
+ while (tlen > 0) {
+ if (!g_ascii_isspace(*tmp)) {
+ break;
+ }
+ tlen--;
+ tmp--;
+ }
+
+ if (param != DKIM_PARAM_UNKNOWN) {
+ if (!parser_funcs[param](ctx, c, tlen, err)) {
+ state = DKIM_STATE_ERROR;
+ }
+ }
+ else {
+ msg_debug_dkim("ignored unknown tag parameter value: %*s: %*s",
+ taglen, tag, tlen, c);
+ }
+
+ if (state == DKIM_STATE_ERROR) {
+ /*
+ * We need to return from here as state machine won't
+ * do any more steps after p == end
+ */
+ if (err) {
+ msg_info_dkim("dkim parse failed: %e", *err);
+ }
+
+ return NULL;
+ }
+ /* Finish processing */
+ p++;
+ }
+ else {
+ p++;
+ }
+ break;
+ case DKIM_STATE_SKIP_SPACES:
+ if (g_ascii_isspace(*p)) {
+ p++;
+ }
+ else {
+ c = p;
+ state = next_state;
+ }
+ break;
+ case DKIM_STATE_ERROR:
+ if (err && *err) {
+ msg_info_dkim("dkim parse failed: %s", (*err)->message);
+ return NULL;
+ }
+ else {
+ msg_info_dkim("dkim parse failed: unknown error when parsing %c tag",
+ tag ? *tag : '?');
+ return NULL;
+ }
+ break;
+ }
+ }
+
+ if (type == RSPAMD_DKIM_ARC_SEAL) {
+ rspamd_dkim_add_arc_seal_headers(pool, &ctx->common);
+ }
+
+ /* Now check validity of signature */
+ if (ctx->b == NULL) {
+ g_set_error(err,
+ DKIM_ERROR,
+ DKIM_SIGERROR_EMPTY_B,
+ "b parameter missing");
+ return NULL;
+ }
+ if (ctx->common.type != RSPAMD_DKIM_ARC_SEAL && ctx->bh == NULL) {
+ g_set_error(err,
+ DKIM_ERROR,
+ DKIM_SIGERROR_EMPTY_BH,
+ "bh parameter missing");
+ return NULL;
+ }
+ if (ctx->domain == NULL) {
+ g_set_error(err,
+ DKIM_ERROR,
+ DKIM_SIGERROR_EMPTY_D,
+ "domain parameter missing");
+ return NULL;
+ }
+ if (ctx->selector == NULL) {
+ g_set_error(err,
+ DKIM_ERROR,
+ DKIM_SIGERROR_EMPTY_S,
+ "selector parameter missing");
+ return NULL;
+ }
+ if (ctx->common.type == RSPAMD_DKIM_NORMAL && ctx->ver == 0) {
+ g_set_error(err,
+ DKIM_ERROR,
+ DKIM_SIGERROR_EMPTY_V,
+ "v parameter missing");
+ return NULL;
+ }
+ if (ctx->common.hlist == NULL) {
+ g_set_error(err,
+ DKIM_ERROR,
+ DKIM_SIGERROR_EMPTY_H,
+ "h parameter missing");
+ return NULL;
+ }
+ if (ctx->sig_alg == DKIM_SIGN_UNKNOWN) {
+ g_set_error(err,
+ DKIM_ERROR,
+ DKIM_SIGERROR_EMPTY_S,
+ "s parameter missing");
+ return NULL;
+ }
+
+ if (type != RSPAMD_DKIM_ARC_SEAL) {
+ if (ctx->sig_alg == DKIM_SIGN_RSASHA1) {
+ /* Check bh length */
+ if (ctx->bhlen != (guint) EVP_MD_size(EVP_sha1())) {
+ g_set_error(err,
+ DKIM_ERROR,
+ DKIM_SIGERROR_BADSIG,
+ "signature has incorrect length: %zu",
+ ctx->bhlen);
+ return NULL;
+ }
+ }
+ else if (ctx->sig_alg == DKIM_SIGN_RSASHA256 ||
+ ctx->sig_alg == DKIM_SIGN_ECDSASHA256) {
+ if (ctx->bhlen !=
+ (guint) EVP_MD_size(EVP_sha256())) {
+ g_set_error(err,
+ DKIM_ERROR,
+ DKIM_SIGERROR_BADSIG,
+ "signature has incorrect length: %zu",
+ ctx->bhlen);
+ return NULL;
+ }
+ }
+ else if (ctx->sig_alg == DKIM_SIGN_RSASHA512 ||
+ ctx->sig_alg == DKIM_SIGN_ECDSASHA512) {
+ if (ctx->bhlen !=
+ (guint) EVP_MD_size(EVP_sha512())) {
+ g_set_error(err,
+ DKIM_ERROR,
+ DKIM_SIGERROR_BADSIG,
+ "signature has incorrect length: %zu",
+ ctx->bhlen);
+ return NULL;
+ }
+ }
+ }
+
+ /* Check expiration */
+ now = time(NULL);
+ if (ctx->timestamp && now < ctx->timestamp && ctx->timestamp - now > (gint) time_jitter) {
+ g_set_error(err,
+ DKIM_ERROR,
+ DKIM_SIGERROR_FUTURE,
+ "signature was made in future, ignoring");
+ return NULL;
+ }
+ if (ctx->expiration && ctx->expiration < now) {
+ g_set_error(err,
+ DKIM_ERROR,
+ DKIM_SIGERROR_EXPIRED,
+ "signature has expired");
+ return NULL;
+ }
+
+ if (ctx->common.type != RSPAMD_DKIM_NORMAL && (ctx->common.idx == 0 ||
+ ctx->common.idx > RSPAMD_DKIM_MAX_ARC_IDX)) {
+ g_set_error(err,
+ DKIM_ERROR,
+ DKIM_SIGERROR_UNKNOWN,
+ "i parameter missing or invalid for ARC");
+ return NULL;
+ }
+
+ if (ctx->common.type == RSPAMD_DKIM_ARC_SEAL) {
+ if (ctx->cv == RSPAMD_ARC_UNKNOWN) {
+ g_set_error(err,
+ DKIM_ERROR,
+ DKIM_SIGERROR_UNKNOWN,
+ "cv parameter missing or invalid for ARC");
+ return NULL;
+ }
+ }
+
+ /* Now create dns key to request further */
+ gsize dnslen = strlen(ctx->domain) + strlen(ctx->selector) +
+ sizeof(DKIM_DNSKEYNAME) + 2;
+ ctx->dns_key = rspamd_mempool_alloc(ctx->pool, dnslen);
+ rspamd_snprintf(ctx->dns_key,
+ dnslen,
+ "%s.%s.%s",
+ ctx->selector,
+ DKIM_DNSKEYNAME,
+ ctx->domain);
+
+ /* Create checksums for further operations */
+ if (ctx->sig_alg == DKIM_SIGN_RSASHA1) {
+ md_alg = EVP_sha1();
+ }
+ else if (ctx->sig_alg == DKIM_SIGN_RSASHA256 ||
+ ctx->sig_alg == DKIM_SIGN_ECDSASHA256 ||
+ ctx->sig_alg == DKIM_SIGN_EDDSASHA256) {
+ md_alg = EVP_sha256();
+ }
+ else if (ctx->sig_alg == DKIM_SIGN_RSASHA512 ||
+ ctx->sig_alg == DKIM_SIGN_ECDSASHA512) {
+ md_alg = EVP_sha512();
+ }
+ else {
+ g_set_error(err,
+ DKIM_ERROR,
+ DKIM_SIGERROR_BADSIG,
+ "signature has unsupported signature algorithm");
+
+ return NULL;
+ }
+#if OPENSSL_VERSION_NUMBER < 0x10100000L || defined(LIBRESSL_VERSION_NUMBER)
+ ctx->common.body_hash = EVP_MD_CTX_create();
+ EVP_DigestInit_ex(ctx->common.body_hash, md_alg, NULL);
+ ctx->common.headers_hash = EVP_MD_CTX_create();
+ EVP_DigestInit_ex(ctx->common.headers_hash, md_alg, NULL);
+ rspamd_mempool_add_destructor(pool,
+ (rspamd_mempool_destruct_t) EVP_MD_CTX_destroy, ctx->common.body_hash);
+ rspamd_mempool_add_destructor(pool,
+ (rspamd_mempool_destruct_t) EVP_MD_CTX_destroy, ctx->common.headers_hash);
+#else
+ ctx->common.body_hash = EVP_MD_CTX_new();
+ EVP_DigestInit_ex(ctx->common.body_hash, md_alg, NULL);
+ ctx->common.headers_hash = EVP_MD_CTX_new();
+ EVP_DigestInit_ex(ctx->common.headers_hash, md_alg, NULL);
+ rspamd_mempool_add_destructor(pool,
+ (rspamd_mempool_destruct_t) EVP_MD_CTX_free, ctx->common.body_hash);
+ rspamd_mempool_add_destructor(pool,
+ (rspamd_mempool_destruct_t) EVP_MD_CTX_free, ctx->common.headers_hash);
+#endif
+ ctx->dkim_header = sig;
+
+ return ctx;
+}
+
+struct rspamd_dkim_key_cbdata {
+ rspamd_dkim_context_t *ctx;
+ dkim_key_handler_f handler;
+ gpointer ud;
+};
+
+rspamd_dkim_key_t *
+rspamd_dkim_make_key(const gchar *keydata,
+ guint keylen, enum rspamd_dkim_key_type type, GError **err)
+{
+ rspamd_dkim_key_t *key = NULL;
+
+ if (keylen < 3) {
+ g_set_error(err,
+ DKIM_ERROR,
+ DKIM_SIGERROR_KEYFAIL,
+ "DKIM key is too short to be valid");
+ return NULL;
+ }
+
+ key = g_malloc0(sizeof(rspamd_dkim_key_t));
+ REF_INIT_RETAIN(key, rspamd_dkim_key_free);
+ key->keydata = g_malloc0(keylen + 1);
+ key->raw_key = g_malloc(keylen);
+ key->decoded_len = keylen;
+ key->type = type;
+
+ /* Copy key skipping all spaces and newlines */
+ const char *h = keydata;
+ guint8 *t = key->raw_key;
+
+ while (h - keydata < keylen) {
+ if (!g_ascii_isspace(*h)) {
+ *t++ = *h++;
+ }
+ else {
+ h++;
+ }
+ }
+
+ key->keylen = t - key->raw_key;
+
+ if (!rspamd_cryptobox_base64_decode(key->raw_key, key->keylen, key->keydata,
+ &key->decoded_len)) {
+ REF_RELEASE(key);
+ g_set_error(err,
+ DKIM_ERROR,
+ DKIM_SIGERROR_KEYFAIL,
+ "DKIM key is not a valid base64 string");
+
+ return NULL;
+ }
+
+ /* Calculate ID -> md5 */
+ EVP_MD_CTX *mdctx = EVP_MD_CTX_create();
+
+#ifdef EVP_MD_CTX_FLAG_NON_FIPS_ALLOW
+ EVP_MD_CTX_set_flags(mdctx, EVP_MD_CTX_FLAG_NON_FIPS_ALLOW);
+#endif
+
+ if (EVP_DigestInit_ex(mdctx, EVP_md5(), NULL) == 1) {
+ guint dlen = sizeof(key->key_id);
+
+ EVP_DigestUpdate(mdctx, key->keydata, key->decoded_len);
+ EVP_DigestFinal_ex(mdctx, key->key_id, &dlen);
+ }
+
+ EVP_MD_CTX_destroy(mdctx);
+
+ if (key->type == RSPAMD_DKIM_KEY_EDDSA) {
+ key->key.key_eddsa = key->keydata;
+
+ if (key->decoded_len != rspamd_cryptobox_pk_sig_bytes(
+ RSPAMD_CRYPTOBOX_MODE_25519)) {
+ g_set_error(err,
+ DKIM_ERROR,
+ DKIM_SIGERROR_KEYFAIL,
+ "DKIM key is has invalid length %d for eddsa; expected %d",
+ (gint) key->decoded_len,
+ rspamd_cryptobox_pk_sig_bytes(RSPAMD_CRYPTOBOX_MODE_25519));
+ REF_RELEASE(key);
+
+ return NULL;
+ }
+ }
+ else {
+ key->key_bio = BIO_new_mem_buf(key->keydata, key->decoded_len);
+
+ if (key->key_bio == NULL) {
+ g_set_error(err,
+ DKIM_ERROR,
+ DKIM_SIGERROR_KEYFAIL,
+ "cannot make ssl bio from key");
+ REF_RELEASE(key);
+
+ return NULL;
+ }
+
+ key->key_evp = d2i_PUBKEY_bio(key->key_bio, NULL);
+
+ if (key->key_evp == NULL) {
+ g_set_error(err,
+ DKIM_ERROR,
+ DKIM_SIGERROR_KEYFAIL,
+ "cannot extract pubkey from bio");
+ REF_RELEASE(key);
+
+ return NULL;
+ }
+
+ if (type == RSPAMD_DKIM_KEY_RSA) {
+ key->key.key_rsa = EVP_PKEY_get1_RSA(key->key_evp);
+
+ if (key->key.key_rsa == NULL) {
+ g_set_error(err,
+ DKIM_ERROR,
+ DKIM_SIGERROR_KEYFAIL,
+ "cannot extract rsa key from evp key");
+ REF_RELEASE(key);
+
+ return NULL;
+ }
+ }
+ else {
+ key->key.key_ecdsa = EVP_PKEY_get1_EC_KEY(key->key_evp);
+
+ if (key->key.key_ecdsa == NULL) {
+ g_set_error(err,
+ DKIM_ERROR,
+ DKIM_SIGERROR_KEYFAIL,
+ "cannot extract ecdsa key from evp key");
+ REF_RELEASE(key);
+
+ return NULL;
+ }
+ }
+ }
+
+ return key;
+}
+
+const guchar *
+rspamd_dkim_key_id(rspamd_dkim_key_t *key)
+{
+ if (key) {
+ return key->key_id;
+ }
+
+ return NULL;
+}
+
+/**
+ * Free DKIM key
+ * @param key
+ */
+void rspamd_dkim_key_free(rspamd_dkim_key_t *key)
+{
+ if (key->key_evp) {
+ EVP_PKEY_free(key->key_evp);
+ }
+
+ if (key->type == RSPAMD_DKIM_KEY_RSA) {
+ if (key->key.key_rsa) {
+ RSA_free(key->key.key_rsa);
+ }
+ }
+ else if (key->type == RSPAMD_DKIM_KEY_ECDSA) {
+ if (key->key.key_ecdsa) {
+ EC_KEY_free(key->key.key_ecdsa);
+ }
+ }
+ /* Nothing in case of eddsa key */
+ if (key->key_bio) {
+ BIO_free(key->key_bio);
+ }
+
+ g_free(key->raw_key);
+ g_free(key->keydata);
+ g_free(key);
+}
+
+void rspamd_dkim_sign_key_free(rspamd_dkim_sign_key_t *key)
+{
+ if (key->key_evp) {
+ EVP_PKEY_free(key->key_evp);
+ }
+ if (key->type == RSPAMD_DKIM_KEY_RSA) {
+ if (key->key.key_rsa) {
+ RSA_free(key->key.key_rsa);
+ }
+ }
+ if (key->key_bio) {
+ BIO_free(key->key_bio);
+ }
+
+ if (key->type == RSPAMD_DKIM_KEY_EDDSA) {
+ rspamd_explicit_memzero(key->key.key_eddsa, key->keylen);
+ g_free(key->keydata);
+ }
+
+ g_free(key);
+}
+
+rspamd_dkim_key_t *
+rspamd_dkim_parse_key(const gchar *txt, gsize *keylen, GError **err)
+{
+ const gchar *c, *p, *end, *key = NULL, *alg = "rsa";
+ enum {
+ read_tag = 0,
+ read_tag_before_eqsign,
+ read_eqsign,
+ read_p_tag,
+ read_k_tag,
+ ignore_value,
+ skip_spaces,
+ } state = read_tag,
+ next_state;
+ gchar tag = '\0';
+ gsize klen = 0, alglen = 0;
+
+ c = txt;
+ p = txt;
+ end = txt + strlen(txt);
+
+ while (p < end) {
+ switch (state) {
+ case read_tag:
+ if (*p == '=') {
+ state = read_eqsign;
+ }
+ else if (g_ascii_isspace(*p)) {
+ state = skip_spaces;
+
+ if (tag != '\0') {
+ /* We had tag letter */
+ next_state = read_tag_before_eqsign;
+ }
+ else {
+ /* We had no tag letter, so we ignore empty tag */
+ next_state = read_tag;
+ }
+ }
+ else {
+ tag = *p;
+ }
+ p++;
+ break;
+ case read_tag_before_eqsign:
+ /* Input: spaces before eqsign
+ * Output: either read a next tag (previous had no value), or read value
+ * p is moved forward
+ */
+ if (*p == '=') {
+ state = read_eqsign;
+ }
+ else {
+ tag = *p;
+ state = read_tag;
+ }
+ p++;
+ break;
+ case read_eqsign:
+ /* Always switch to skip spaces state and do not advance p */
+ state = skip_spaces;
+
+ if (tag == 'p') {
+ next_state = read_p_tag;
+ }
+ else if (tag == 'k') {
+ next_state = read_k_tag;
+ }
+ else {
+ /* Unknown tag, ignore */
+ next_state = ignore_value;
+ tag = '\0';
+ }
+ break;
+ case read_p_tag:
+ if (*p == ';') {
+ klen = p - c;
+ key = c;
+ state = read_tag;
+ tag = '\0';
+ p++;
+ }
+ else {
+ p++;
+ }
+ break;
+ case read_k_tag:
+ if (*p == ';') {
+ alglen = p - c;
+ alg = c;
+ state = read_tag;
+ tag = '\0';
+ p++;
+ }
+ else if (g_ascii_isspace(*p)) {
+ alglen = p - c;
+ alg = c;
+ state = skip_spaces;
+ next_state = read_tag;
+ tag = '\0';
+ }
+ else {
+ p++;
+ }
+ break;
+ case ignore_value:
+ if (*p == ';') {
+ state = read_tag;
+ tag = '\0';
+ p++;
+ }
+ else if (g_ascii_isspace(*p)) {
+ state = skip_spaces;
+ next_state = read_tag;
+ tag = '\0';
+ }
+ else {
+ p++;
+ }
+ break;
+ case skip_spaces:
+ /* Skip spaces and switch to the next state if needed */
+ if (g_ascii_isspace(*p)) {
+ p++;
+ }
+ else {
+ c = p;
+ state = next_state;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* Leftover */
+ switch (state) {
+ case read_p_tag:
+ klen = p - c;
+ key = c;
+ break;
+ case read_k_tag:
+ alglen = p - c;
+ alg = c;
+ break;
+ default:
+ break;
+ }
+
+ if (klen == 0 || key == NULL) {
+ g_set_error(err,
+ DKIM_ERROR,
+ DKIM_SIGERROR_KEYFAIL,
+ "key is missing");
+
+ return NULL;
+ }
+
+ if (alglen == 0 || alg == NULL) {
+ alg = "rsa"; /* Implicit */
+ alglen = 3;
+ }
+
+ if (keylen) {
+ *keylen = klen;
+ }
+
+ if (alglen == 8 && rspamd_lc_cmp(alg, "ecdsa256", alglen) == 0) {
+ return rspamd_dkim_make_key(key, klen,
+ RSPAMD_DKIM_KEY_ECDSA, err);
+ }
+ else if (alglen == 7 && rspamd_lc_cmp(alg, "ed25519", alglen) == 0) {
+ return rspamd_dkim_make_key(key, klen,
+ RSPAMD_DKIM_KEY_EDDSA, err);
+ }
+ else {
+ /* We assume RSA default in all cases */
+ return rspamd_dkim_make_key(key, klen,
+ RSPAMD_DKIM_KEY_RSA, err);
+ }
+
+ g_assert_not_reached();
+
+ return NULL;
+}
+
+/* Get TXT request data and parse it */
+static void
+rspamd_dkim_dns_cb(struct rdns_reply *reply, gpointer arg)
+{
+ struct rspamd_dkim_key_cbdata *cbdata = arg;
+ rspamd_dkim_key_t *key = NULL;
+ GError *err = NULL;
+ struct rdns_reply_entry *elt;
+ gsize keylen = 0;
+
+ if (reply->code != RDNS_RC_NOERROR) {
+ gint err_code = DKIM_SIGERROR_NOKEY;
+ if (reply->code == RDNS_RC_NOREC) {
+ err_code = DKIM_SIGERROR_NOREC;
+ }
+ else if (reply->code == RDNS_RC_NXDOMAIN) {
+ err_code = DKIM_SIGERROR_NOREC;
+ }
+ g_set_error(&err,
+ DKIM_ERROR,
+ err_code,
+ "dns request to %s failed: %s",
+ cbdata->ctx->dns_key,
+ rdns_strerror(reply->code));
+ cbdata->handler(NULL, 0, cbdata->ctx, cbdata->ud, err);
+ }
+ else {
+ LL_FOREACH(reply->entries, elt)
+ {
+ if (elt->type == RDNS_REQUEST_TXT) {
+ if (err != NULL) {
+ /* Free error as it is insignificant */
+ g_error_free(err);
+ err = NULL;
+ }
+ key = rspamd_dkim_parse_key(elt->content.txt.data,
+ &keylen,
+ &err);
+ if (key) {
+ key->ttl = elt->ttl;
+ break;
+ }
+ }
+ }
+ cbdata->handler(key, keylen, cbdata->ctx, cbdata->ud, err);
+ }
+}
+
+/**
+ * Make DNS request for specified context and obtain and parse key
+ * @param ctx dkim context from signature
+ * @param resolver dns resolver object
+ * @param s async session to make request
+ * @return
+ */
+gboolean
+rspamd_get_dkim_key(rspamd_dkim_context_t *ctx,
+ struct rspamd_task *task,
+ dkim_key_handler_f handler,
+ gpointer ud)
+{
+ struct rspamd_dkim_key_cbdata *cbdata;
+
+ g_return_val_if_fail(ctx != NULL, FALSE);
+ g_return_val_if_fail(ctx->dns_key != NULL, FALSE);
+
+ cbdata =
+ rspamd_mempool_alloc(ctx->pool,
+ sizeof(struct rspamd_dkim_key_cbdata));
+ cbdata->ctx = ctx;
+ cbdata->handler = handler;
+ cbdata->ud = ud;
+
+ return rspamd_dns_resolver_request_task_forced(task,
+ rspamd_dkim_dns_cb,
+ cbdata,
+ RDNS_REQUEST_TXT,
+ ctx->dns_key);
+}
+
+static gboolean
+rspamd_dkim_relaxed_body_step(struct rspamd_dkim_common_ctx *ctx, EVP_MD_CTX *ck,
+ const gchar **start, guint size,
+ gssize *remain)
+{
+ const gchar *h;
+ gchar *t;
+ guint len, inlen;
+ gssize octets_remain;
+ gboolean got_sp, ret = TRUE;
+ gchar buf[1024];
+
+ len = size;
+ inlen = sizeof(buf) - 1;
+ h = *start;
+ t = buf;
+ got_sp = FALSE;
+ octets_remain = *remain;
+
+ while (len > 0 && inlen > 0 && (octets_remain > 0)) {
+
+ if (*h == '\r' || *h == '\n') {
+ if (got_sp) {
+ /* Ignore spaces at the end of line */
+ t--;
+ }
+ *t++ = '\r';
+ *t++ = '\n';
+
+ if (len > 1 && (*h == '\r' && h[1] == '\n')) {
+ h += 2;
+ len -= 2;
+ octets_remain -= 2;
+ }
+ else {
+ h++;
+ len--;
+ if (octets_remain >= 2) {
+ octets_remain -= 2; /* Input has just \n or \r so we actually add more octets */
+ }
+ else {
+ octets_remain--;
+ break;
+ }
+ }
+ break;
+ }
+ else if (g_ascii_isspace(*h)) {
+ if (got_sp) {
+ /* Ignore multiply spaces */
+ h++;
+ len--;
+ continue;
+ }
+ else {
+ *t++ = ' ';
+ h++;
+ inlen--;
+ len--;
+ octets_remain--;
+ got_sp = TRUE;
+ continue;
+ }
+ }
+ else {
+ got_sp = FALSE;
+ }
+
+ *t++ = *h++;
+ inlen--;
+ len--;
+ octets_remain--;
+ }
+
+ if (octets_remain < 0) {
+ /* Absurdic l tag value, but we still need to rewind the t pointer back */
+ while (t > buf && octets_remain < 0) {
+ t--;
+ octets_remain++;
+ }
+
+ ret = FALSE;
+ }
+
+ *start = h;
+
+ if (t - buf > 0) {
+ gsize cklen = t - buf;
+
+ EVP_DigestUpdate(ck, buf, cklen);
+ ctx->body_canonicalised += cklen;
+ msg_debug_dkim("relaxed update signature with body buffer "
+ "(%z size, %z -> %z remain)",
+ cklen, *remain, octets_remain);
+ *remain = octets_remain;
+ }
+
+ return ret && ((len > 0) && (octets_remain > 0));
+}
+
+static gboolean
+rspamd_dkim_simple_body_step(struct rspamd_dkim_common_ctx *ctx,
+ EVP_MD_CTX *ck, const gchar **start, guint size,
+ gssize *remain)
+{
+ const gchar *h;
+ gchar *t;
+ guint len, inlen;
+ gssize octets_remain;
+ gchar buf[1024];
+
+ len = size;
+ inlen = sizeof(buf) - 1;
+ h = *start;
+ t = &buf[0];
+ octets_remain = *remain;
+
+ while (len > 0 && inlen > 0 && (octets_remain != 0)) {
+ if (*h == '\r' || *h == '\n') {
+ *t++ = '\r';
+ *t++ = '\n';
+
+ if (len > 1 && (*h == '\r' && h[1] == '\n')) {
+ h += 2;
+ len -= 2;
+
+ if (octets_remain >= 2) {
+ octets_remain -= 2; /* Input has just \n or \r so we actually add more octets */
+ }
+ else {
+ octets_remain--;
+ }
+ }
+ else {
+ h++;
+ len--;
+
+ if (octets_remain >= 2) {
+ octets_remain -= 2; /* Input has just \n or \r so we actually add more octets */
+ }
+ else {
+ octets_remain--;
+ }
+ }
+ break;
+ }
+
+ *t++ = *h++;
+ octets_remain--;
+ inlen--;
+ len--;
+ }
+
+ *start = h;
+
+ if (t - buf > 0) {
+ gsize cklen = t - buf;
+
+ EVP_DigestUpdate(ck, buf, cklen);
+ ctx->body_canonicalised += cklen;
+ msg_debug_dkim("simple update signature with body buffer "
+ "(%z size, %z -> %z remain)",
+ cklen, *remain, octets_remain);
+ *remain = octets_remain;
+ }
+
+ return ((len != 0) && (octets_remain != 0));
+}
+
+static const gchar *
+rspamd_dkim_skip_empty_lines(const gchar *start, const gchar *end,
+ guint type, gboolean sign, gboolean *need_crlf)
+{
+ const gchar *p = end - 1, *t;
+ enum {
+ init = 0,
+ init_2,
+ got_cr,
+ got_lf,
+ got_crlf,
+ test_spaces,
+ } state = init;
+ guint skip = 0;
+
+ while (p >= start) {
+ switch (state) {
+ case init:
+ if (*p == '\r') {
+ state = got_cr;
+ }
+ else if (*p == '\n') {
+ state = got_lf;
+ }
+ else if (type == DKIM_CANON_RELAXED && *p == ' ') {
+ skip = 0;
+ state = test_spaces;
+ }
+ else {
+ if (sign || type != DKIM_CANON_RELAXED) {
+ *need_crlf = TRUE;
+ }
+
+ goto end;
+ }
+ break;
+ case init_2:
+ if (*p == '\r') {
+ state = got_cr;
+ }
+ else if (*p == '\n') {
+ state = got_lf;
+ }
+ else if (type == DKIM_CANON_RELAXED && (*p == ' ' || *p == '\t')) {
+ skip = 0;
+ state = test_spaces;
+ }
+ else {
+ goto end;
+ }
+ break;
+ case got_cr:
+ if (p >= start + 1) {
+ if (*(p - 1) == '\r') {
+ p--;
+ state = got_cr;
+ }
+ else if (*(p - 1) == '\n') {
+ if ((*p - 2) == '\r') {
+ /* \r\n\r -> we know about one line */
+ p -= 1;
+ state = got_crlf;
+ }
+ else {
+ /* \n\r -> we know about one line */
+ p -= 1;
+ state = got_lf;
+ }
+ }
+ else if (type == DKIM_CANON_RELAXED && (*(p - 1) == ' ' ||
+ *(p - 1) == '\t')) {
+ skip = 1;
+ state = test_spaces;
+ }
+ else {
+ goto end;
+ }
+ }
+ else {
+ if (g_ascii_isspace(*(p - 1))) {
+ if (type == DKIM_CANON_RELAXED) {
+ p -= 1;
+ }
+ }
+ goto end;
+ }
+ break;
+ case got_lf:
+ if (p >= start + 1) {
+ if (*(p - 1) == '\r') {
+ state = got_crlf;
+ }
+ else if (*(p - 1) == '\n') {
+ /* We know about one line */
+ p--;
+ state = got_lf;
+ }
+ else if (type == DKIM_CANON_RELAXED && (*(p - 1) == ' ' ||
+ *(p - 1) == '\t')) {
+ skip = 1;
+ state = test_spaces;
+ }
+ else {
+ goto end;
+ }
+ }
+ else {
+ if (g_ascii_isspace(*(p - 1))) {
+ if (type == DKIM_CANON_RELAXED) {
+ p -= 1;
+ }
+ }
+ goto end;
+ }
+ break;
+ case got_crlf:
+ if (p >= start + 2) {
+ if (*(p - 2) == '\r') {
+ p -= 2;
+ state = got_cr;
+ }
+ else if (*(p - 2) == '\n') {
+ p -= 2;
+ state = got_lf;
+ }
+ else if (type == DKIM_CANON_RELAXED && (*(p - 2) == ' ' ||
+ *(p - 2) == '\t')) {
+ skip = 2;
+ state = test_spaces;
+ }
+ else {
+ goto end;
+ }
+ }
+ else {
+ if (g_ascii_isspace(*(p - 2))) {
+ if (type == DKIM_CANON_RELAXED) {
+ p -= 2;
+ }
+ }
+ goto end;
+ }
+ break;
+ case test_spaces:
+ t = p - skip;
+
+ while (t >= start + 2 && (*t == ' ' || *t == '\t')) {
+ t--;
+ }
+
+ if (*t == '\r') {
+ p = t;
+ state = got_cr;
+ }
+ else if (*t == '\n') {
+ p = t;
+ state = got_lf;
+ }
+ else {
+ goto end;
+ }
+ break;
+ }
+ }
+
+end:
+ return p;
+}
+
+static gboolean
+rspamd_dkim_canonize_body(struct rspamd_dkim_common_ctx *ctx,
+ const gchar *start,
+ const gchar *end,
+ gboolean sign)
+{
+ const gchar *p;
+ gssize remain = ctx->len ? ctx->len : G_MAXSSIZE;
+ guint total_len = end - start;
+ gboolean need_crlf = FALSE;
+
+ if (start == NULL) {
+ /* Empty body */
+ if (ctx->body_canon_type == DKIM_CANON_SIMPLE) {
+ EVP_DigestUpdate(ctx->body_hash, CRLF, sizeof(CRLF) - 1);
+ ctx->body_canonicalised += sizeof(CRLF) - 1;
+ }
+ else {
+ EVP_DigestUpdate(ctx->body_hash, "", 0);
+ }
+ }
+ else {
+ /* Strip extra ending CRLF */
+ p = rspamd_dkim_skip_empty_lines(start, end, ctx->body_canon_type,
+ sign, &need_crlf);
+ end = p + 1;
+
+ if (end == start) {
+ /* Empty body */
+ if (ctx->body_canon_type == DKIM_CANON_SIMPLE) {
+ EVP_DigestUpdate(ctx->body_hash, CRLF, sizeof(CRLF) - 1);
+ ctx->body_canonicalised += sizeof(CRLF) - 1;
+ }
+ else {
+ EVP_DigestUpdate(ctx->body_hash, "", 0);
+ }
+ }
+ else {
+ if (ctx->body_canon_type == DKIM_CANON_SIMPLE) {
+ /* Simple canonization */
+ while (rspamd_dkim_simple_body_step(ctx, ctx->body_hash,
+ &start, end - start, &remain))
+ ;
+
+ /*
+ * If we have l= tag then we cannot add crlf...
+ */
+ if (need_crlf) {
+ /* l is evil... */
+ if (ctx->len == 0) {
+ remain = 2;
+ }
+ else {
+ if (ctx->len <= total_len) {
+ /* We don't have enough l to add \r\n */
+ remain = 0;
+ }
+ else {
+ if (ctx->len - total_len >= 2) {
+ remain = 2;
+ }
+ else {
+ remain = ctx->len - total_len;
+ }
+ }
+ }
+
+ start = "\r\n";
+ end = start + 2;
+
+ rspamd_dkim_simple_body_step(ctx, ctx->body_hash,
+ &start, end - start, &remain);
+ }
+ }
+ else {
+ while (rspamd_dkim_relaxed_body_step(ctx, ctx->body_hash,
+ &start, end - start, &remain))
+ ;
+ if (need_crlf) {
+ start = "\r\n";
+ end = start + 2;
+ remain = 2;
+ rspamd_dkim_relaxed_body_step(ctx, ctx->body_hash,
+ &start, end - start, &remain);
+ }
+ }
+ }
+ return TRUE;
+ }
+
+ /* TODO: Implement relaxed algorithm */
+ return FALSE;
+}
+
+/* Update hash converting all CR and LF to CRLF */
+static void
+rspamd_dkim_hash_update(EVP_MD_CTX *ck, const gchar *begin, gsize len)
+{
+ const gchar *p, *c, *end;
+
+ end = begin + len;
+ p = begin;
+ c = p;
+
+ while (p < end) {
+ if (*p == '\r') {
+ EVP_DigestUpdate(ck, c, p - c);
+ EVP_DigestUpdate(ck, CRLF, sizeof(CRLF) - 1);
+ p++;
+
+ if (p < end && *p == '\n') {
+ p++;
+ }
+ c = p;
+ }
+ else if (*p == '\n') {
+ EVP_DigestUpdate(ck, c, p - c);
+ EVP_DigestUpdate(ck, CRLF, sizeof(CRLF) - 1);
+ p++;
+ c = p;
+ }
+ else {
+ p++;
+ }
+ }
+
+ if (p > c) {
+ EVP_DigestUpdate(ck, c, p - c);
+ }
+}
+
+/* Update hash by signature value (ignoring b= tag) */
+static void
+rspamd_dkim_signature_update(struct rspamd_dkim_common_ctx *ctx,
+ const gchar *begin,
+ guint len)
+{
+ const gchar *p, *c, *end;
+ gboolean tag, skip;
+
+ end = begin + len;
+ p = begin;
+ c = begin;
+ tag = TRUE;
+ skip = FALSE;
+
+ while (p < end) {
+ if (tag && p[0] == 'b' && p[1] == '=') {
+ /* Add to signature */
+ msg_debug_dkim("initial update hash with signature part: %*s",
+ (gint) (p - c + 2),
+ c);
+ ctx->headers_canonicalised += p - c + 2;
+ rspamd_dkim_hash_update(ctx->headers_hash, c, p - c + 2);
+ skip = TRUE;
+ }
+ else if (skip && (*p == ';' || p == end - 1)) {
+ skip = FALSE;
+ c = p;
+ }
+ else if (!tag && *p == ';') {
+ tag = TRUE;
+ }
+ else if (tag && *p == '=') {
+ tag = FALSE;
+ }
+ p++;
+ }
+
+ p--;
+ /* Skip \r\n at the end */
+ while ((*p == '\r' || *p == '\n') && p >= c) {
+ p--;
+ }
+
+ if (p - c + 1 > 0) {
+ msg_debug_dkim("final update hash with signature part: %*s",
+ (gint) (p - c + 1), c);
+ ctx->headers_canonicalised += p - c + 1;
+ rspamd_dkim_hash_update(ctx->headers_hash, c, p - c + 1);
+ }
+}
+
+goffset
+rspamd_dkim_canonize_header_relaxed_str(const gchar *hname,
+ const gchar *hvalue,
+ gchar *out,
+ gsize outlen)
+{
+ gchar *t;
+ const guchar *h;
+ gboolean got_sp;
+
+ /* Name part */
+ t = out;
+ h = hname;
+
+ while (*h && t - out < outlen) {
+ *t++ = lc_map[*h++];
+ }
+
+ if (t - out >= outlen) {
+ return -1;
+ }
+
+ *t++ = ':';
+
+ /* Value part */
+ h = hvalue;
+ /* Skip spaces at the beginning */
+ while (g_ascii_isspace(*h)) {
+ h++;
+ }
+
+ got_sp = FALSE;
+
+ while (*h && (t - out < outlen)) {
+ if (g_ascii_isspace(*h)) {
+ if (got_sp) {
+ h++;
+ continue;
+ }
+ else {
+ got_sp = TRUE;
+ *t++ = ' ';
+ h++;
+ continue;
+ }
+ }
+ else {
+ got_sp = FALSE;
+ }
+
+ *t++ = *h++;
+ }
+
+ if (g_ascii_isspace(*(t - 1))) {
+ t--;
+ }
+
+ if (t - out >= outlen - 2) {
+ return -1;
+ }
+
+ *t++ = '\r';
+ *t++ = '\n';
+ *t = '\0';
+
+ return t - out;
+}
+
+static gboolean
+rspamd_dkim_canonize_header_relaxed(struct rspamd_dkim_common_ctx *ctx,
+ const gchar *header,
+ const gchar *header_name,
+ gboolean is_sign,
+ guint count,
+ bool is_seal)
+{
+ static gchar st_buf[8192];
+ gchar *buf;
+ guint inlen;
+ goffset r;
+ gboolean allocated = FALSE;
+
+ inlen = strlen(header) + strlen(header_name) + sizeof(":" CRLF);
+
+ if (inlen > sizeof(st_buf)) {
+ buf = g_malloc(inlen);
+ allocated = TRUE;
+ }
+ else {
+ /* Faster */
+ buf = st_buf;
+ }
+
+ r = rspamd_dkim_canonize_header_relaxed_str(header_name, header, buf, inlen);
+
+ g_assert(r != -1);
+
+ if (!is_sign) {
+ msg_debug_dkim("update %s with header (idx=%d): %s",
+ is_seal ? "seal" : "signature", count, buf);
+ EVP_DigestUpdate(ctx->headers_hash, buf, r);
+ }
+ else {
+ rspamd_dkim_signature_update(ctx, buf, r);
+ }
+
+ if (allocated) {
+ g_free(buf);
+ }
+
+ return TRUE;
+}
+
+
+static gboolean
+rspamd_dkim_canonize_header(struct rspamd_dkim_common_ctx *ctx,
+ struct rspamd_task *task,
+ const gchar *header_name,
+ gint count,
+ const gchar *dkim_header,
+ const gchar *dkim_domain)
+{
+ struct rspamd_mime_header *rh, *cur, *sel = NULL;
+ gint hdr_cnt = 0;
+ bool use_idx = false, is_sign = ctx->is_sign;
+
+ /*
+ * TODO:
+ * Temporary hack to prevent linked list being misused until refactored
+ */
+ const guint max_list_iters = 1000;
+
+ if (count < 0) {
+ use_idx = true;
+ count = -(count); /* use i= in header content as it is arc stuff */
+ }
+
+ if (dkim_header == NULL) {
+ rh = rspamd_message_get_header_array(task, header_name,
+ is_sign);
+
+ if (rh) {
+ /* Check uniqueness of the header but we count from the bottom to top */
+ if (!use_idx) {
+ for (cur = rh->prev;; cur = cur->prev) {
+ if (hdr_cnt == count) {
+ sel = cur;
+ }
+
+ hdr_cnt++;
+
+ if (cur == rh || hdr_cnt >= max_list_iters) {
+ /* Cycle */
+ break;
+ }
+ }
+
+ if ((rh->flags & RSPAMD_HEADER_UNIQUE) && hdr_cnt > 1) {
+ guint64 random_cookie = ottery_rand_uint64();
+
+ msg_warn_dkim("header %s is intended to be unique by"
+ " email standards, but we have %d headers of this"
+ " type, artificially break DKIM check",
+ header_name,
+ hdr_cnt);
+ rspamd_dkim_hash_update(ctx->headers_hash,
+ (const gchar *) &random_cookie,
+ sizeof(random_cookie));
+ ctx->headers_canonicalised += sizeof(random_cookie);
+
+ return FALSE;
+ }
+
+ if (hdr_cnt <= count) {
+ /*
+ * If DKIM has less headers requested than there are in a
+ * message, then it's fine, it allows adding extra headers
+ */
+ return TRUE;
+ }
+ }
+ else {
+ /*
+ * This branch is used for ARC headers, and it orders them based on
+ * i=<number> string and not their real order in the list of headers
+ */
+ gchar idx_buf[16];
+ gint id_len, i;
+
+ id_len = rspamd_snprintf(idx_buf, sizeof(idx_buf), "i=%d;",
+ count);
+
+ for (cur = rh->prev, i = 0; i < max_list_iters; cur = cur->prev, i++) {
+ if (cur->decoded &&
+ rspamd_substring_search(cur->decoded, strlen(cur->decoded),
+ idx_buf, id_len) != -1) {
+ sel = cur;
+ break;
+ }
+
+ if (cur == rh) {
+ /* Cycle */
+ break;
+ }
+ }
+
+ if (sel == NULL) {
+ return FALSE;
+ }
+ }
+
+ /* Selected header must be non-null if previous condition is false */
+ g_assert(sel != NULL);
+
+ if (ctx->header_canon_type == DKIM_CANON_SIMPLE) {
+ rspamd_dkim_hash_update(ctx->headers_hash, sel->raw_value,
+ sel->raw_len);
+ ctx->headers_canonicalised += sel->raw_len;
+ msg_debug_dkim("update %s with header (idx=%d): %*s",
+ (use_idx ? "seal" : "signature"),
+ count, (gint) sel->raw_len, sel->raw_value);
+ }
+ else {
+ if (is_sign && (sel->flags & RSPAMD_HEADER_FROM)) {
+ /* Special handling of the From handling when rewrite is done */
+ gboolean has_rewrite = FALSE;
+ guint i;
+ struct rspamd_email_address *addr;
+
+ PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, from_mime), i, addr)
+ {
+ if ((addr->flags & RSPAMD_EMAIL_ADDR_ORIGINAL) && !(addr->flags & RSPAMD_EMAIL_ADDR_ALIASED)) {
+ has_rewrite = TRUE;
+ }
+ }
+
+ if (has_rewrite) {
+ PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, from_mime), i, addr)
+ {
+ if (!(addr->flags & RSPAMD_EMAIL_ADDR_ORIGINAL)) {
+ if (!rspamd_dkim_canonize_header_relaxed(ctx, addr->raw,
+ header_name, FALSE, i, use_idx)) {
+ return FALSE;
+ }
+
+ return TRUE;
+ }
+ }
+ }
+ }
+
+ if (!rspamd_dkim_canonize_header_relaxed(ctx, sel->value,
+ header_name, FALSE, count, use_idx)) {
+ return FALSE;
+ }
+ }
+ }
+ }
+ else {
+ /* For signature check just use the saved dkim header */
+ if (ctx->header_canon_type == DKIM_CANON_SIMPLE) {
+ /* We need to find our own signature and use it */
+ rh = rspamd_message_get_header_array(task, header_name, is_sign);
+
+ if (rh) {
+ /* We need to find our own signature */
+ if (!dkim_domain) {
+ msg_err_dkim("cannot verify dkim as we have no dkim domain!");
+ return FALSE;
+ }
+
+ gboolean found = FALSE;
+
+ DL_FOREACH(rh, cur)
+ {
+ guint64 th = rspamd_cryptobox_fast_hash(cur->decoded,
+ strlen(cur->decoded), rspamd_hash_seed());
+
+ if (th == ctx->sig_hash) {
+ rspamd_dkim_signature_update(ctx, cur->raw_value,
+ cur->raw_len);
+ found = TRUE;
+ break;
+ }
+ }
+ if (!found) {
+ msg_err_dkim("BUGON: cannot verify dkim as we have lost our signature"
+ " during simple canonicalisation, expected hash=%L",
+ ctx->sig_hash);
+ return FALSE;
+ }
+ }
+ else {
+ return FALSE;
+ }
+ }
+ else {
+ if (!rspamd_dkim_canonize_header_relaxed(ctx,
+ dkim_header,
+ header_name,
+ TRUE, 0, use_idx)) {
+ return FALSE;
+ }
+ }
+ }
+
+ return TRUE;
+}
+
+struct rspamd_dkim_cached_hash {
+ guchar *digest_normal;
+ guchar *digest_cr;
+ guchar *digest_crlf;
+ gchar *type;
+};
+
+static struct rspamd_dkim_cached_hash *
+rspamd_dkim_check_bh_cached(struct rspamd_dkim_common_ctx *ctx,
+ struct rspamd_task *task, gsize bhlen, gboolean is_sign)
+{
+ gchar typebuf[64];
+ struct rspamd_dkim_cached_hash *res;
+
+ rspamd_snprintf(typebuf, sizeof(typebuf),
+ RSPAMD_MEMPOOL_DKIM_BH_CACHE "%z_%s_%d_%z",
+ bhlen,
+ ctx->body_canon_type == DKIM_CANON_RELAXED ? "1" : "0",
+ !!is_sign,
+ ctx->len);
+
+ res = rspamd_mempool_get_variable(task->task_pool,
+ typebuf);
+
+ if (!res) {
+ res = rspamd_mempool_alloc0(task->task_pool, sizeof(*res));
+ res->type = rspamd_mempool_strdup(task->task_pool, typebuf);
+ rspamd_mempool_set_variable(task->task_pool,
+ res->type, res, NULL);
+ }
+
+ return res;
+}
+
+static const char *
+rspamd_dkim_type_to_string(enum rspamd_dkim_type t)
+{
+ switch (t) {
+ case RSPAMD_DKIM_NORMAL:
+ return "dkim";
+ case RSPAMD_DKIM_ARC_SIG:
+ return "arc_sig";
+ case RSPAMD_DKIM_ARC_SEAL:
+ default:
+ return "arc_seal";
+ }
+}
+
+/**
+ * Check task for dkim context using dkim key
+ * @param ctx dkim verify context
+ * @param key dkim key (from cache or from dns request)
+ * @param task task to check
+ * @return
+ */
+struct rspamd_dkim_check_result *
+rspamd_dkim_check(rspamd_dkim_context_t *ctx,
+ rspamd_dkim_key_t *key,
+ struct rspamd_task *task)
+{
+ const gchar *body_end, *body_start;
+ guchar raw_digest[EVP_MAX_MD_SIZE];
+ struct rspamd_dkim_cached_hash *cached_bh = NULL;
+ EVP_MD_CTX *cpy_ctx = NULL;
+ gsize dlen = 0;
+ struct rspamd_dkim_check_result *res;
+ guint i;
+ struct rspamd_dkim_header *dh;
+ gint nid;
+
+ g_return_val_if_fail(ctx != NULL, NULL);
+ g_return_val_if_fail(key != NULL, NULL);
+ g_return_val_if_fail(task->msg.len > 0, NULL);
+
+ /* First of all find place of body */
+ body_end = task->msg.begin + task->msg.len;
+
+ body_start = MESSAGE_FIELD(task, raw_headers_content).body_start;
+
+ res = rspamd_mempool_alloc0(task->task_pool, sizeof(*res));
+ res->ctx = ctx;
+ res->selector = ctx->selector;
+ res->domain = ctx->domain;
+ res->fail_reason = NULL;
+ res->short_b = ctx->short_b;
+ res->rcode = DKIM_CONTINUE;
+
+ if (!body_start) {
+ res->rcode = DKIM_ERROR;
+ return res;
+ }
+
+ if (ctx->common.type != RSPAMD_DKIM_ARC_SEAL) {
+ dlen = EVP_MD_CTX_size(ctx->common.body_hash);
+ cached_bh = rspamd_dkim_check_bh_cached(&ctx->common, task,
+ dlen, FALSE);
+
+ if (!cached_bh->digest_normal) {
+ /* Start canonization of body part */
+ if (!rspamd_dkim_canonize_body(&ctx->common, body_start, body_end,
+ FALSE)) {
+ res->rcode = DKIM_RECORD_ERROR;
+ return res;
+ }
+ }
+ }
+
+ /* Now canonize headers */
+ for (i = 0; i < ctx->common.hlist->len; i++) {
+ dh = g_ptr_array_index(ctx->common.hlist, i);
+ rspamd_dkim_canonize_header(&ctx->common, task, dh->name, dh->count,
+ NULL, NULL);
+ }
+
+ /* Canonize dkim signature */
+ switch (ctx->common.type) {
+ case RSPAMD_DKIM_NORMAL:
+ rspamd_dkim_canonize_header(&ctx->common, task, RSPAMD_DKIM_SIGNHEADER, 0,
+ ctx->dkim_header, ctx->domain);
+ break;
+ case RSPAMD_DKIM_ARC_SIG:
+ rspamd_dkim_canonize_header(&ctx->common, task, RSPAMD_DKIM_ARC_SIGNHEADER, 0,
+ ctx->dkim_header, ctx->domain);
+ break;
+ case RSPAMD_DKIM_ARC_SEAL:
+ rspamd_dkim_canonize_header(&ctx->common, task, RSPAMD_DKIM_ARC_SEALHEADER, 0,
+ ctx->dkim_header, ctx->domain);
+ break;
+ }
+
+
+ /* Use cached BH for all but arc seal, if it is not NULL we are not in arc seal mode */
+ if (cached_bh != NULL) {
+ if (!cached_bh->digest_normal) {
+ /* Copy md_ctx to deal with broken CRLF at the end */
+ cpy_ctx = EVP_MD_CTX_create();
+ EVP_MD_CTX_copy(cpy_ctx, ctx->common.body_hash);
+ EVP_DigestFinal_ex(cpy_ctx, raw_digest, NULL);
+
+ cached_bh->digest_normal = rspamd_mempool_alloc(task->task_pool,
+ sizeof(raw_digest));
+ memcpy(cached_bh->digest_normal, raw_digest, sizeof(raw_digest));
+ }
+
+ /* Check bh field */
+ if (memcmp(ctx->bh, cached_bh->digest_normal, ctx->bhlen) != 0) {
+ msg_debug_dkim(
+ "bh value mismatch: %*xs versus %*xs, try add LF; try adding CRLF",
+ (gint) dlen, ctx->bh,
+ (gint) dlen, raw_digest);
+
+ if (cpy_ctx) {
+ /* Try add CRLF */
+#if OPENSSL_VERSION_NUMBER < 0x10100000L || defined(LIBRESSL_VERSION_NUMBER)
+ EVP_MD_CTX_cleanup(cpy_ctx);
+#else
+ EVP_MD_CTX_reset(cpy_ctx);
+#endif
+ EVP_MD_CTX_copy(cpy_ctx, ctx->common.body_hash);
+ EVP_DigestUpdate(cpy_ctx, "\r\n", 2);
+ EVP_DigestFinal_ex(cpy_ctx, raw_digest, NULL);
+ cached_bh->digest_crlf = rspamd_mempool_alloc(task->task_pool,
+ sizeof(raw_digest));
+ memcpy(cached_bh->digest_crlf, raw_digest, sizeof(raw_digest));
+
+ if (memcmp(ctx->bh, raw_digest, ctx->bhlen) != 0) {
+ msg_debug_dkim(
+ "bh value mismatch after added CRLF: %*xs versus %*xs, try add LF",
+ (gint) dlen, ctx->bh,
+ (gint) dlen, raw_digest);
+
+ /* Try add LF */
+#if OPENSSL_VERSION_NUMBER < 0x10100000L || defined(LIBRESSL_VERSION_NUMBER)
+ EVP_MD_CTX_cleanup(cpy_ctx);
+#else
+ EVP_MD_CTX_reset(cpy_ctx);
+#endif
+ EVP_MD_CTX_copy(cpy_ctx, ctx->common.body_hash);
+ EVP_DigestUpdate(cpy_ctx, "\n", 1);
+ EVP_DigestFinal_ex(cpy_ctx, raw_digest, NULL);
+ cached_bh->digest_cr = rspamd_mempool_alloc(task->task_pool,
+ sizeof(raw_digest));
+ memcpy(cached_bh->digest_cr, raw_digest, sizeof(raw_digest));
+
+ if (memcmp(ctx->bh, raw_digest, ctx->bhlen) != 0) {
+ msg_debug_dkim("bh value mismatch after added LF: %*xs versus %*xs",
+ (gint) dlen, ctx->bh,
+ (gint) dlen, raw_digest);
+ res->fail_reason = "body hash did not verify";
+ res->rcode = DKIM_REJECT;
+ }
+ }
+ }
+ else if (cached_bh->digest_crlf) {
+ if (memcmp(ctx->bh, cached_bh->digest_crlf, ctx->bhlen) != 0) {
+ msg_debug_dkim("bh value mismatch after added CRLF: %*xs versus %*xs",
+ (gint) dlen, ctx->bh,
+ (gint) dlen, cached_bh->digest_crlf);
+
+ if (cached_bh->digest_cr) {
+ if (memcmp(ctx->bh, cached_bh->digest_cr, ctx->bhlen) != 0) {
+ msg_debug_dkim(
+ "bh value mismatch after added LF: %*xs versus %*xs",
+ (gint) dlen, ctx->bh,
+ (gint) dlen, cached_bh->digest_cr);
+
+ res->fail_reason = "body hash did not verify";
+ res->rcode = DKIM_REJECT;
+ }
+ }
+ else {
+
+ res->fail_reason = "body hash did not verify";
+ res->rcode = DKIM_REJECT;
+ }
+ }
+ }
+ else {
+ msg_debug_dkim(
+ "bh value mismatch: %*xs versus %*xs",
+ (gint) dlen, ctx->bh,
+ (gint) dlen, cached_bh->digest_normal);
+ res->fail_reason = "body hash did not verify";
+ res->rcode = DKIM_REJECT;
+ }
+ }
+
+ if (cpy_ctx) {
+#if OPENSSL_VERSION_NUMBER < 0x10100000L || defined(LIBRESSL_VERSION_NUMBER)
+ EVP_MD_CTX_cleanup(cpy_ctx);
+#else
+ EVP_MD_CTX_reset(cpy_ctx);
+#endif
+ EVP_MD_CTX_destroy(cpy_ctx);
+ }
+
+ if (res->rcode == DKIM_REJECT) {
+ msg_info_dkim(
+ "%s: bh value mismatch: got %*Bs, expected %*Bs; "
+ "body length %d->%d; d=%s; s=%s",
+ rspamd_dkim_type_to_string(ctx->common.type),
+ (gint) dlen, cached_bh->digest_normal,
+ (gint) dlen, ctx->bh,
+ (gint) (body_end - body_start), ctx->common.body_canonicalised,
+ ctx->domain, ctx->selector);
+
+ return res;
+ }
+ }
+
+ dlen = EVP_MD_CTX_size(ctx->common.headers_hash);
+ EVP_DigestFinal_ex(ctx->common.headers_hash, raw_digest, NULL);
+ /* Check headers signature */
+
+ if (ctx->sig_alg == DKIM_SIGN_RSASHA1) {
+ nid = NID_sha1;
+ }
+ else if (ctx->sig_alg == DKIM_SIGN_RSASHA256 ||
+ ctx->sig_alg == DKIM_SIGN_ECDSASHA256 ||
+ ctx->sig_alg == DKIM_SIGN_EDDSASHA256) {
+ nid = NID_sha256;
+ }
+ else if (ctx->sig_alg == DKIM_SIGN_RSASHA512 ||
+ ctx->sig_alg == DKIM_SIGN_ECDSASHA512) {
+ nid = NID_sha512;
+ }
+ else {
+ /* Not reached */
+ nid = NID_sha1;
+ }
+
+ switch (key->type) {
+ case RSPAMD_DKIM_KEY_RSA:
+ if (RSA_verify(nid, raw_digest, dlen, ctx->b, ctx->blen,
+ key->key.key_rsa) != 1) {
+ msg_debug_dkim("headers rsa verify failed");
+ ERR_clear_error();
+ res->rcode = DKIM_REJECT;
+ res->fail_reason = "headers rsa verify failed";
+
+ msg_info_dkim(
+ "%s: headers RSA verification failure; "
+ "body length %d->%d; headers length %d; d=%s; s=%s; key_md5=%*xs; orig header: %s",
+ rspamd_dkim_type_to_string(ctx->common.type),
+ (gint) (body_end - body_start), ctx->common.body_canonicalised,
+ ctx->common.headers_canonicalised,
+ ctx->domain, ctx->selector,
+ RSPAMD_DKIM_KEY_ID_LEN, rspamd_dkim_key_id(key),
+ ctx->dkim_header);
+ }
+ break;
+ case RSPAMD_DKIM_KEY_ECDSA:
+ if (ECDSA_verify(nid, raw_digest, dlen, ctx->b, ctx->blen,
+ key->key.key_ecdsa) != 1) {
+ msg_info_dkim(
+ "%s: headers ECDSA verification failure; "
+ "body length %d->%d; headers length %d; d=%s; s=%s; key_md5=%*xs; orig header: %s",
+ rspamd_dkim_type_to_string(ctx->common.type),
+ (gint) (body_end - body_start), ctx->common.body_canonicalised,
+ ctx->common.headers_canonicalised,
+ ctx->domain, ctx->selector,
+ RSPAMD_DKIM_KEY_ID_LEN, rspamd_dkim_key_id(key),
+ ctx->dkim_header);
+ msg_debug_dkim("headers ecdsa verify failed");
+ ERR_clear_error();
+ res->rcode = DKIM_REJECT;
+ res->fail_reason = "headers ecdsa verify failed";
+ }
+ break;
+ case RSPAMD_DKIM_KEY_EDDSA:
+ if (!rspamd_cryptobox_verify(ctx->b, ctx->blen, raw_digest, dlen,
+ key->key.key_eddsa, RSPAMD_CRYPTOBOX_MODE_25519)) {
+ msg_info_dkim(
+ "%s: headers EDDSA verification failure; "
+ "body length %d->%d; headers length %d; d=%s; s=%s; key_md5=%*xs; orig header: %s",
+ rspamd_dkim_type_to_string(ctx->common.type),
+ (gint) (body_end - body_start), ctx->common.body_canonicalised,
+ ctx->common.headers_canonicalised,
+ ctx->domain, ctx->selector,
+ RSPAMD_DKIM_KEY_ID_LEN, rspamd_dkim_key_id(key),
+ ctx->dkim_header);
+ msg_debug_dkim("headers eddsa verify failed");
+ res->rcode = DKIM_REJECT;
+ res->fail_reason = "headers eddsa verify failed";
+ }
+ break;
+ }
+
+
+ if (ctx->common.type == RSPAMD_DKIM_ARC_SEAL && res->rcode == DKIM_CONTINUE) {
+ switch (ctx->cv) {
+ case RSPAMD_ARC_INVALID:
+ msg_info_dkim("arc seal is invalid i=%d", ctx->common.idx);
+ res->rcode = DKIM_PERM_ERROR;
+ res->fail_reason = "arc seal is invalid";
+ break;
+ case RSPAMD_ARC_FAIL:
+ msg_info_dkim("arc seal failed i=%d", ctx->common.idx);
+ res->rcode = DKIM_REJECT;
+ res->fail_reason = "arc seal failed";
+ break;
+ default:
+ break;
+ }
+ }
+
+ return res;
+}
+
+struct rspamd_dkim_check_result *
+rspamd_dkim_create_result(rspamd_dkim_context_t *ctx,
+ enum rspamd_dkim_check_rcode rcode,
+ struct rspamd_task *task)
+{
+ struct rspamd_dkim_check_result *res;
+
+ res = rspamd_mempool_alloc0(task->task_pool, sizeof(*res));
+ res->ctx = ctx;
+ res->selector = ctx->selector;
+ res->domain = ctx->domain;
+ res->fail_reason = NULL;
+ res->short_b = ctx->short_b;
+ res->rcode = rcode;
+
+ return res;
+}
+
+rspamd_dkim_key_t *
+rspamd_dkim_key_ref(rspamd_dkim_key_t *k)
+{
+ REF_RETAIN(k);
+
+ return k;
+}
+
+void rspamd_dkim_key_unref(rspamd_dkim_key_t *k)
+{
+ REF_RELEASE(k);
+}
+
+rspamd_dkim_sign_key_t *
+rspamd_dkim_sign_key_ref(rspamd_dkim_sign_key_t *k)
+{
+ REF_RETAIN(k);
+
+ return k;
+}
+
+void rspamd_dkim_sign_key_unref(rspamd_dkim_sign_key_t *k)
+{
+ REF_RELEASE(k);
+}
+
+const gchar *
+rspamd_dkim_get_domain(rspamd_dkim_context_t *ctx)
+{
+ if (ctx) {
+ return ctx->domain;
+ }
+
+ return NULL;
+}
+
+const gchar *
+rspamd_dkim_get_selector(rspamd_dkim_context_t *ctx)
+{
+ if (ctx) {
+ return ctx->selector;
+ }
+
+ return NULL;
+}
+
+guint rspamd_dkim_key_get_ttl(rspamd_dkim_key_t *k)
+{
+ if (k) {
+ return k->ttl;
+ }
+
+ return 0;
+}
+
+const gchar *
+rspamd_dkim_get_dns_key(rspamd_dkim_context_t *ctx)
+{
+ if (ctx) {
+ return ctx->dns_key;
+ }
+
+ return NULL;
+}
+
+#define PEM_SIG "-----BEGIN"
+
+rspamd_dkim_sign_key_t *
+rspamd_dkim_sign_key_load(const gchar *key, gsize len,
+ enum rspamd_dkim_key_format type,
+ GError **err)
+{
+ guchar *map = NULL, *tmp = NULL;
+ gsize maplen;
+ rspamd_dkim_sign_key_t *nkey;
+ time_t mtime = time(NULL);
+
+ if (type < 0 || type > RSPAMD_DKIM_KEY_UNKNOWN || len == 0 || key == NULL) {
+ g_set_error(err, dkim_error_quark(), DKIM_SIGERROR_KEYFAIL,
+ "invalid key type to load: %d", type);
+ return NULL;
+ }
+
+ nkey = g_malloc0(sizeof(*nkey));
+ nkey->mtime = mtime;
+
+ msg_debug_dkim_taskless("got public key with length %z and type %d",
+ len, type);
+
+ /* Load key file if needed */
+ if (type == RSPAMD_DKIM_KEY_FILE) {
+ struct stat st;
+
+ if (stat(key, &st) != 0) {
+ g_set_error(err, dkim_error_quark(), DKIM_SIGERROR_KEYFAIL,
+ "cannot stat key file: '%s' %s", key, strerror(errno));
+ g_free(nkey);
+
+ return NULL;
+ }
+
+ nkey->mtime = st.st_mtime;
+ map = rspamd_file_xmap(key, PROT_READ, &maplen, TRUE);
+
+ if (map == NULL) {
+ g_set_error(err, dkim_error_quark(), DKIM_SIGERROR_KEYFAIL,
+ "cannot map key file: '%s' %s", key, strerror(errno));
+ g_free(nkey);
+
+ return NULL;
+ }
+
+ key = map;
+ len = maplen;
+
+ if (maplen > sizeof(PEM_SIG) &&
+ strncmp(map, PEM_SIG, sizeof(PEM_SIG) - 1) == 0) {
+ type = RSPAMD_DKIM_KEY_PEM;
+ }
+ else if (rspamd_cryptobox_base64_is_valid(map, maplen)) {
+ type = RSPAMD_DKIM_KEY_BASE64;
+ }
+ else {
+ type = RSPAMD_DKIM_KEY_RAW;
+ }
+ }
+
+ if (type == RSPAMD_DKIM_KEY_UNKNOWN) {
+ if (len > sizeof(PEM_SIG) &&
+ memcmp(key, PEM_SIG, sizeof(PEM_SIG) - 1) == 0) {
+ type = RSPAMD_DKIM_KEY_PEM;
+ }
+ else {
+ type = RSPAMD_DKIM_KEY_RAW;
+ }
+ }
+
+ if (type == RSPAMD_DKIM_KEY_BASE64) {
+ type = RSPAMD_DKIM_KEY_RAW;
+ tmp = g_malloc(len);
+ rspamd_cryptobox_base64_decode(key, len, tmp, &len);
+ key = tmp;
+ }
+
+ if (type == RSPAMD_DKIM_KEY_RAW && (len == 32 ||
+ len == rspamd_cryptobox_sk_sig_bytes(RSPAMD_CRYPTOBOX_MODE_25519))) {
+ if (len == 32) {
+ /* Seeded key, need scalarmult */
+ unsigned char pk[32];
+ nkey->type = RSPAMD_DKIM_KEY_EDDSA;
+ nkey->key.key_eddsa = g_malloc(
+ rspamd_cryptobox_sk_sig_bytes(RSPAMD_CRYPTOBOX_MODE_25519));
+ crypto_sign_ed25519_seed_keypair(pk, nkey->key.key_eddsa, key);
+ nkey->keylen = rspamd_cryptobox_sk_sig_bytes(RSPAMD_CRYPTOBOX_MODE_25519);
+ }
+ else {
+ /* Full ed25519 key */
+ unsigned klen = rspamd_cryptobox_sk_sig_bytes(RSPAMD_CRYPTOBOX_MODE_25519);
+ nkey->type = RSPAMD_DKIM_KEY_EDDSA;
+ nkey->key.key_eddsa = g_malloc(klen);
+ memcpy(nkey->key.key_eddsa, key, klen);
+ nkey->keylen = klen;
+ }
+ }
+ else {
+ nkey->key_bio = BIO_new_mem_buf(key, len);
+
+ if (type == RSPAMD_DKIM_KEY_RAW) {
+ if (d2i_PrivateKey_bio(nkey->key_bio, &nkey->key_evp) == NULL) {
+ g_set_error(err, dkim_error_quark(), DKIM_SIGERROR_KEYFAIL,
+ "cannot parse raw private key: %s",
+ ERR_error_string(ERR_get_error(), NULL));
+
+ rspamd_dkim_sign_key_free(nkey);
+ nkey = NULL;
+
+ goto end;
+ }
+ }
+ else {
+ if (!PEM_read_bio_PrivateKey(nkey->key_bio, &nkey->key_evp, NULL, NULL)) {
+ g_set_error(err, dkim_error_quark(), DKIM_SIGERROR_KEYFAIL,
+ "cannot parse pem private key: %s",
+ ERR_error_string(ERR_get_error(), NULL));
+ rspamd_dkim_sign_key_free(nkey);
+ nkey = NULL;
+
+ goto end;
+ }
+ }
+ nkey->key.key_rsa = EVP_PKEY_get1_RSA(nkey->key_evp);
+ if (nkey->key.key_rsa == NULL) {
+ g_set_error(err,
+ DKIM_ERROR,
+ DKIM_SIGERROR_KEYFAIL,
+ "cannot extract rsa key from evp key");
+ rspamd_dkim_sign_key_free(nkey);
+ nkey = NULL;
+
+ goto end;
+ }
+ nkey->type = RSPAMD_DKIM_KEY_RSA;
+ }
+
+ REF_INIT_RETAIN(nkey, rspamd_dkim_sign_key_free);
+
+end:
+
+ if (map != NULL) {
+ munmap(map, maplen);
+ }
+
+ if (tmp != NULL) {
+ rspamd_explicit_memzero(tmp, len);
+ g_free(tmp);
+ }
+
+ return nkey;
+}
+
+#undef PEM_SIG
+
+gboolean
+rspamd_dkim_sign_key_maybe_invalidate(rspamd_dkim_sign_key_t *key, time_t mtime)
+{
+ if (mtime > key->mtime) {
+ return TRUE;
+ }
+ return FALSE;
+}
+
+rspamd_dkim_sign_context_t *
+rspamd_create_dkim_sign_context(struct rspamd_task *task,
+ rspamd_dkim_sign_key_t *priv_key,
+ gint headers_canon,
+ gint body_canon,
+ const gchar *headers,
+ enum rspamd_dkim_type type,
+ GError **err)
+{
+ rspamd_dkim_sign_context_t *nctx;
+
+ if (headers_canon != DKIM_CANON_SIMPLE && headers_canon != DKIM_CANON_RELAXED) {
+ g_set_error(err,
+ DKIM_ERROR,
+ DKIM_SIGERROR_INVALID_HC,
+ "bad headers canonicalisation");
+
+ return NULL;
+ }
+ if (body_canon != DKIM_CANON_SIMPLE && body_canon != DKIM_CANON_RELAXED) {
+ g_set_error(err,
+ DKIM_ERROR,
+ DKIM_SIGERROR_INVALID_BC,
+ "bad body canonicalisation");
+
+ return NULL;
+ }
+
+ if (!priv_key || (!priv_key->key.key_rsa && !priv_key->key.key_eddsa)) {
+ g_set_error(err,
+ DKIM_ERROR,
+ DKIM_SIGERROR_KEYFAIL,
+ "bad key to sign");
+
+ return NULL;
+ }
+
+ nctx = rspamd_mempool_alloc0(task->task_pool, sizeof(*nctx));
+ nctx->common.pool = task->task_pool;
+ nctx->common.header_canon_type = headers_canon;
+ nctx->common.body_canon_type = body_canon;
+ nctx->common.type = type;
+ nctx->common.is_sign = TRUE;
+
+ if (type != RSPAMD_DKIM_ARC_SEAL) {
+ if (!rspamd_dkim_parse_hdrlist_common(&nctx->common, headers,
+ strlen(headers), TRUE,
+ err)) {
+ return NULL;
+ }
+ }
+ else {
+ rspamd_dkim_add_arc_seal_headers(task->task_pool, &nctx->common);
+ }
+
+ nctx->key = rspamd_dkim_sign_key_ref(priv_key);
+
+ rspamd_mempool_add_destructor(task->task_pool,
+ (rspamd_mempool_destruct_t) rspamd_dkim_sign_key_unref, priv_key);
+
+#if OPENSSL_VERSION_NUMBER < 0x10100000L || defined(LIBRESSL_VERSION_NUMBER)
+ nctx->common.body_hash = EVP_MD_CTX_create();
+ EVP_DigestInit_ex(nctx->common.body_hash, EVP_sha256(), NULL);
+ nctx->common.headers_hash = EVP_MD_CTX_create();
+ EVP_DigestInit_ex(nctx->common.headers_hash, EVP_sha256(), NULL);
+ rspamd_mempool_add_destructor(task->task_pool,
+ (rspamd_mempool_destruct_t) EVP_MD_CTX_destroy, nctx->common.body_hash);
+ rspamd_mempool_add_destructor(task->task_pool,
+ (rspamd_mempool_destruct_t) EVP_MD_CTX_destroy, nctx->common.headers_hash);
+#else
+ nctx->common.body_hash = EVP_MD_CTX_new();
+ EVP_DigestInit_ex(nctx->common.body_hash, EVP_sha256(), NULL);
+ nctx->common.headers_hash = EVP_MD_CTX_new();
+ EVP_DigestInit_ex(nctx->common.headers_hash, EVP_sha256(), NULL);
+ rspamd_mempool_add_destructor(task->task_pool,
+ (rspamd_mempool_destruct_t) EVP_MD_CTX_free, nctx->common.body_hash);
+ rspamd_mempool_add_destructor(task->task_pool,
+ (rspamd_mempool_destruct_t) EVP_MD_CTX_free, nctx->common.headers_hash);
+#endif
+
+ return nctx;
+}
+
+
+GString *
+rspamd_dkim_sign(struct rspamd_task *task, const gchar *selector,
+ const gchar *domain, time_t expire, gsize len, guint idx,
+ const gchar *arc_cv, rspamd_dkim_sign_context_t *ctx)
+{
+ GString *hdr;
+ struct rspamd_dkim_header *dh;
+ const gchar *body_end, *body_start, *hname;
+ guchar raw_digest[EVP_MAX_MD_SIZE];
+ struct rspamd_dkim_cached_hash *cached_bh = NULL;
+ gsize dlen = 0;
+ guint i, j;
+ gchar *b64_data;
+ guchar *sig_buf;
+ guint sig_len;
+ guint headers_len = 0, cur_len = 0;
+ union rspamd_dkim_header_stat hstat;
+
+ g_assert(ctx != NULL);
+
+ /* First of all find place of body */
+ body_end = task->msg.begin + task->msg.len;
+ body_start = MESSAGE_FIELD(task, raw_headers_content).body_start;
+
+ if (len > 0) {
+ ctx->common.len = len;
+ }
+
+ if (!body_start) {
+ return NULL;
+ }
+
+ /* Start canonization of body part */
+ if (ctx->common.type != RSPAMD_DKIM_ARC_SEAL) {
+ dlen = EVP_MD_CTX_size(ctx->common.body_hash);
+ cached_bh = rspamd_dkim_check_bh_cached(&ctx->common, task,
+ dlen, TRUE);
+
+ if (!cached_bh->digest_normal) {
+ /* Start canonization of body part */
+ if (!rspamd_dkim_canonize_body(&ctx->common, body_start, body_end,
+ TRUE)) {
+ return NULL;
+ }
+ }
+ }
+
+ hdr = g_string_sized_new(255);
+
+ if (ctx->common.type == RSPAMD_DKIM_NORMAL) {
+ rspamd_printf_gstring(hdr, "v=1; a=%s; c=%s/%s; d=%s; s=%s; ",
+ ctx->key->type == RSPAMD_DKIM_KEY_RSA ? "rsa-sha256" : "ed25519-sha256",
+ ctx->common.header_canon_type == DKIM_CANON_RELAXED ? "relaxed" : "simple",
+ ctx->common.body_canon_type == DKIM_CANON_RELAXED ? "relaxed" : "simple",
+ domain, selector);
+ }
+ else if (ctx->common.type == RSPAMD_DKIM_ARC_SIG) {
+ rspamd_printf_gstring(hdr, "i=%d; a=%s; c=%s/%s; d=%s; s=%s; ",
+ idx,
+ ctx->key->type == RSPAMD_DKIM_KEY_RSA ? "rsa-sha256" : "ed25519-sha256",
+ ctx->common.header_canon_type == DKIM_CANON_RELAXED ? "relaxed" : "simple",
+ ctx->common.body_canon_type == DKIM_CANON_RELAXED ? "relaxed" : "simple",
+ domain, selector);
+ }
+ else {
+ g_assert(arc_cv != NULL);
+ rspamd_printf_gstring(hdr, "i=%d; a=%s; d=%s; s=%s; cv=%s; ",
+ idx,
+ ctx->key->type == RSPAMD_DKIM_KEY_RSA ? "rsa-sha256" : "ed25519-sha256",
+ domain,
+ selector,
+ arc_cv);
+ }
+
+ if (expire > 0) {
+ rspamd_printf_gstring(hdr, "x=%t; ", expire);
+ }
+
+ if (ctx->common.type != RSPAMD_DKIM_ARC_SEAL) {
+ if (len > 0) {
+ rspamd_printf_gstring(hdr, "l=%z; ", len);
+ }
+ }
+
+ rspamd_printf_gstring(hdr, "t=%t; h=", time(NULL));
+
+ /* Now canonize headers */
+ for (i = 0; i < ctx->common.hlist->len; i++) {
+ struct rspamd_mime_header *rh, *cur;
+
+ dh = g_ptr_array_index(ctx->common.hlist, i);
+
+ /* We allow oversigning if dh->count > number of headers with this name */
+ hstat.n = GPOINTER_TO_UINT(g_hash_table_lookup(ctx->common.htable, dh->name));
+
+ if (hstat.s.flags & RSPAMD_DKIM_FLAG_OVERSIGN) {
+ /* Do oversigning */
+ guint count = 0;
+
+ rh = rspamd_message_get_header_array(task, dh->name, FALSE);
+
+ if (rh) {
+ DL_FOREACH(rh, cur)
+ {
+ /* Sign all existing headers */
+ rspamd_dkim_canonize_header(&ctx->common, task, dh->name,
+ count,
+ NULL, NULL);
+ count++;
+ }
+ }
+
+ /* Now add one more entry to oversign */
+ if (count > 0 || !(hstat.s.flags & RSPAMD_DKIM_FLAG_OVERSIGN_EXISTING)) {
+ cur_len = (strlen(dh->name) + 1) * (count + 1);
+ headers_len += cur_len;
+
+ if (headers_len > 70 && i > 0 && i < ctx->common.hlist->len - 1) {
+ rspamd_printf_gstring(hdr, " ");
+ headers_len = cur_len;
+ }
+
+ for (j = 0; j < count + 1; j++) {
+ rspamd_printf_gstring(hdr, "%s:", dh->name);
+ }
+ }
+ }
+ else {
+ rh = rspamd_message_get_header_array(task, dh->name, FALSE);
+
+ if (rh) {
+ if (hstat.s.count > 0) {
+
+ cur_len = (strlen(dh->name) + 1) * (hstat.s.count);
+ headers_len += cur_len;
+ if (headers_len > 70 && i > 0 && i < ctx->common.hlist->len - 1) {
+ rspamd_printf_gstring(hdr, " ");
+ headers_len = cur_len;
+ }
+
+ for (j = 0; j < hstat.s.count; j++) {
+ rspamd_printf_gstring(hdr, "%s:", dh->name);
+ }
+ }
+
+
+ rspamd_dkim_canonize_header(&ctx->common, task,
+ dh->name, dh->count,
+ NULL, NULL);
+ }
+ }
+
+ g_hash_table_remove(ctx->common.htable, dh->name);
+ }
+
+ /* Replace the last ':' with ';' */
+ hdr->str[hdr->len - 1] = ';';
+
+ if (ctx->common.type != RSPAMD_DKIM_ARC_SEAL) {
+ if (!cached_bh->digest_normal) {
+ EVP_DigestFinal_ex(ctx->common.body_hash, raw_digest, NULL);
+ cached_bh->digest_normal = rspamd_mempool_alloc(task->task_pool,
+ sizeof(raw_digest));
+ memcpy(cached_bh->digest_normal, raw_digest, sizeof(raw_digest));
+ }
+
+
+ b64_data = rspamd_encode_base64(cached_bh->digest_normal, dlen, 0, NULL);
+ rspamd_printf_gstring(hdr, " bh=%s; b=", b64_data);
+ g_free(b64_data);
+ }
+ else {
+ rspamd_printf_gstring(hdr, " b=");
+ }
+
+ switch (ctx->common.type) {
+ case RSPAMD_DKIM_NORMAL:
+ default:
+ hname = RSPAMD_DKIM_SIGNHEADER;
+ break;
+ case RSPAMD_DKIM_ARC_SIG:
+ hname = RSPAMD_DKIM_ARC_SIGNHEADER;
+ break;
+ case RSPAMD_DKIM_ARC_SEAL:
+ hname = RSPAMD_DKIM_ARC_SEALHEADER;
+ break;
+ }
+
+ if (ctx->common.header_canon_type == DKIM_CANON_RELAXED) {
+ if (!rspamd_dkim_canonize_header_relaxed(&ctx->common,
+ hdr->str,
+ hname,
+ TRUE,
+ 0,
+ ctx->common.type == RSPAMD_DKIM_ARC_SEAL)) {
+
+ g_string_free(hdr, TRUE);
+ return NULL;
+ }
+ }
+ else {
+ /* Will likely have issues with folding */
+ rspamd_dkim_hash_update(ctx->common.headers_hash, hdr->str,
+ hdr->len);
+ ctx->common.headers_canonicalised += hdr->len;
+ msg_debug_task("update signature with header: %*s",
+ (gint) hdr->len, hdr->str);
+ }
+
+ dlen = EVP_MD_CTX_size(ctx->common.headers_hash);
+ EVP_DigestFinal_ex(ctx->common.headers_hash, raw_digest, NULL);
+ if (ctx->key->type == RSPAMD_DKIM_KEY_RSA) {
+ sig_len = RSA_size(ctx->key->key.key_rsa);
+ sig_buf = g_alloca(sig_len);
+
+ if (RSA_sign(NID_sha256, raw_digest, dlen, sig_buf, &sig_len,
+ ctx->key->key.key_rsa) != 1) {
+ g_string_free(hdr, TRUE);
+ msg_err_task("rsa sign error: %s",
+ ERR_error_string(ERR_get_error(), NULL));
+
+ return NULL;
+ }
+ }
+ else if (ctx->key->type == RSPAMD_DKIM_KEY_EDDSA) {
+ sig_len = rspamd_cryptobox_signature_bytes(RSPAMD_CRYPTOBOX_MODE_25519);
+ sig_buf = g_alloca(sig_len);
+
+ rspamd_cryptobox_sign(sig_buf, NULL, raw_digest, dlen,
+ ctx->key->key.key_eddsa, RSPAMD_CRYPTOBOX_MODE_25519);
+ }
+ else {
+ g_string_free(hdr, TRUE);
+ msg_err_task("unsupported key type for signing");
+
+ return NULL;
+ }
+
+ if (task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_MILTER) {
+ b64_data = rspamd_encode_base64_fold(sig_buf, sig_len, 70, NULL,
+ RSPAMD_TASK_NEWLINES_LF);
+ }
+ else {
+ b64_data = rspamd_encode_base64_fold(sig_buf, sig_len, 70, NULL,
+ MESSAGE_FIELD(task, nlines_type));
+ }
+
+ rspamd_printf_gstring(hdr, "%s", b64_data);
+ g_free(b64_data);
+
+ return hdr;
+}
+
+gboolean
+rspamd_dkim_match_keys(rspamd_dkim_key_t *pk,
+ rspamd_dkim_sign_key_t *sk,
+ GError **err)
+{
+ if (pk == NULL || sk == NULL) {
+ g_set_error(err, dkim_error_quark(), DKIM_SIGERROR_KEYFAIL,
+ "missing public or private key");
+ return FALSE;
+ }
+ if (pk->type != sk->type) {
+ g_set_error(err, dkim_error_quark(), DKIM_SIGERROR_KEYFAIL,
+ "public and private key types do not match");
+ return FALSE;
+ }
+
+ if (pk->type == RSPAMD_DKIM_KEY_EDDSA) {
+ if (memcmp(sk->key.key_eddsa + 32, pk->key.key_eddsa, 32) != 0) {
+ g_set_error(err, dkim_error_quark(), DKIM_SIGERROR_KEYHASHMISMATCH,
+ "pubkey does not match private key");
+ return FALSE;
+ }
+ }
+ else if (EVP_PKEY_cmp(pk->key_evp, sk->key_evp) != 1) {
+ g_set_error(err, dkim_error_quark(), DKIM_SIGERROR_KEYHASHMISMATCH,
+ "pubkey does not match private key");
+ return FALSE;
+ }
+
+ return TRUE;
+}
diff --git a/src/libserver/dkim.h b/src/libserver/dkim.h
new file mode 100644
index 0000000..50703da
--- /dev/null
+++ b/src/libserver/dkim.h
@@ -0,0 +1,298 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef DKIM_H_
+#define DKIM_H_
+
+#include "config.h"
+#include "contrib/libev/ev.h"
+#include "dns.h"
+#include "ref.h"
+
+
+/* Main types and definitions */
+
+#define RSPAMD_DKIM_SIGNHEADER "DKIM-Signature"
+#define RSPAMD_DKIM_ARC_SIGNHEADER "ARC-Message-Signature"
+#define RSPAMD_DKIM_ARC_AUTHHEADER "ARC-Authentication-Results"
+#define RSPAMD_DKIM_ARC_SEALHEADER "ARC-Seal"
+/* DKIM signature header */
+
+
+/* Errors (from OpenDKIM) */
+
+#define DKIM_SIGERROR_UNKNOWN (-1) /* unknown error */
+#define DKIM_SIGERROR_VERSION 1 /* unsupported version */
+#define DKIM_SIGERROR_EXPIRED 3 /* signature expired */
+#define DKIM_SIGERROR_FUTURE 4 /* signature in the future */
+#define DKIM_SIGERROR_NOREC 6 /* No record */
+#define DKIM_SIGERROR_INVALID_HC 7 /* c= invalid (header) */
+#define DKIM_SIGERROR_INVALID_BC 8 /* c= invalid (body) */
+#define DKIM_SIGERROR_INVALID_A 10 /* a= invalid */
+#define DKIM_SIGERROR_INVALID_L 12 /* l= invalid */
+#define DKIM_SIGERROR_EMPTY_D 16 /* d= empty */
+#define DKIM_SIGERROR_EMPTY_S 18 /* s= empty */
+#define DKIM_SIGERROR_EMPTY_B 20 /* b= empty */
+#define DKIM_SIGERROR_NOKEY 22 /* no key found in DNS */
+#define DKIM_SIGERROR_KEYFAIL 24 /* DNS query failed */
+#define DKIM_SIGERROR_EMPTY_BH 26 /* bh= empty */
+#define DKIM_SIGERROR_BADSIG 28 /* signature mismatch */
+#define DKIM_SIGERROR_EMPTY_H 31 /* h= empty */
+#define DKIM_SIGERROR_INVALID_H 32 /* h= missing req'd entries */
+#define DKIM_SIGERROR_KEYHASHMISMATCH 37 /* sig-key hash mismatch */
+#define DKIM_SIGERROR_EMPTY_V 45 /* v= tag empty */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Check results */
+enum rspamd_dkim_check_rcode {
+ DKIM_CONTINUE = 0,
+ DKIM_REJECT,
+ DKIM_TRYAGAIN,
+ DKIM_NOTFOUND,
+ DKIM_RECORD_ERROR,
+ DKIM_PERM_ERROR,
+};
+
+#define DKIM_CANON_SIMPLE 0 /* as specified in DKIM spec */
+#define DKIM_CANON_RELAXED 1 /* as specified in DKIM spec */
+
+struct rspamd_dkim_context_s;
+typedef struct rspamd_dkim_context_s rspamd_dkim_context_t;
+
+struct rspamd_dkim_sign_context_s;
+typedef struct rspamd_dkim_sign_context_s rspamd_dkim_sign_context_t;
+
+struct rspamd_dkim_key_s;
+typedef struct rspamd_dkim_key_s rspamd_dkim_key_t;
+typedef struct rspamd_dkim_key_s rspamd_dkim_sign_key_t;
+
+struct rspamd_task;
+
+enum rspamd_dkim_key_format {
+ RSPAMD_DKIM_KEY_FILE = 0,
+ RSPAMD_DKIM_KEY_PEM,
+ RSPAMD_DKIM_KEY_BASE64,
+ RSPAMD_DKIM_KEY_RAW,
+ RSPAMD_DKIM_KEY_UNKNOWN
+};
+
+enum rspamd_dkim_type {
+ RSPAMD_DKIM_NORMAL,
+ RSPAMD_DKIM_ARC_SIG,
+ RSPAMD_DKIM_ARC_SEAL
+};
+
+/* Signature methods */
+enum rspamd_sign_type {
+ DKIM_SIGN_UNKNOWN = -2,
+ DKIM_SIGN_RSASHA1 = 0,
+ DKIM_SIGN_RSASHA256,
+ DKIM_SIGN_RSASHA512,
+ DKIM_SIGN_ECDSASHA256,
+ DKIM_SIGN_ECDSASHA512,
+ DKIM_SIGN_EDDSASHA256,
+};
+
+enum rspamd_dkim_key_type {
+ RSPAMD_DKIM_KEY_RSA = 0,
+ RSPAMD_DKIM_KEY_ECDSA,
+ RSPAMD_DKIM_KEY_EDDSA
+};
+
+struct rspamd_dkim_check_result {
+ enum rspamd_dkim_check_rcode rcode;
+ rspamd_dkim_context_t *ctx;
+ /* Processed parts */
+ const gchar *selector;
+ const gchar *domain;
+ const gchar *short_b;
+ const gchar *fail_reason;
+};
+
+
+/* Err MUST be freed if it is not NULL, key is allocated by slice allocator */
+typedef void (*dkim_key_handler_f)(rspamd_dkim_key_t *key, gsize keylen,
+ rspamd_dkim_context_t *ctx, gpointer ud, GError *err);
+
+/**
+ * Create new dkim context from signature
+ * @param sig message's signature
+ * @param pool pool to allocate memory from
+ * @param time_jitter jitter in seconds to allow time diff while checking
+ * @param err pointer to error object
+ * @return new context or NULL
+ */
+rspamd_dkim_context_t *rspamd_create_dkim_context(const gchar *sig,
+ rspamd_mempool_t *pool,
+ struct rspamd_dns_resolver *resolver,
+ guint time_jitter,
+ enum rspamd_dkim_type type,
+ GError **err);
+
+/**
+ * Create new dkim context for making a signature
+ * @param task
+ * @param priv_key
+ * @param err
+ * @return
+ */
+rspamd_dkim_sign_context_t *rspamd_create_dkim_sign_context(struct rspamd_task *task,
+ rspamd_dkim_sign_key_t *priv_key,
+ gint headers_canon,
+ gint body_canon,
+ const gchar *dkim_headers,
+ enum rspamd_dkim_type type,
+ GError **err);
+
+/**
+ * Load dkim key
+ * @param path
+ * @param err
+ * @return
+ */
+rspamd_dkim_sign_key_t *rspamd_dkim_sign_key_load(const gchar *what, gsize len,
+ enum rspamd_dkim_key_format type,
+ GError **err);
+
+/**
+ * Invalidate modified sign key
+ * @param key
+ * @return
+*/
+gboolean rspamd_dkim_sign_key_maybe_invalidate(rspamd_dkim_sign_key_t *key,
+ time_t mtime);
+
+/**
+ * Make DNS request for specified context and obtain and parse key
+ * @param ctx dkim context from signature
+ * @param resolver dns resolver object
+ * @param s async session to make request
+ * @return
+ */
+gboolean rspamd_get_dkim_key(rspamd_dkim_context_t *ctx,
+ struct rspamd_task *task,
+ dkim_key_handler_f handler,
+ gpointer ud);
+
+/**
+ * Check task for dkim context using dkim key
+ * @param ctx dkim verify context
+ * @param key dkim key (from cache or from dns request)
+ * @param task task to check
+ * @return
+ */
+struct rspamd_dkim_check_result *rspamd_dkim_check(rspamd_dkim_context_t *ctx,
+ rspamd_dkim_key_t *key,
+ struct rspamd_task *task);
+
+struct rspamd_dkim_check_result *
+rspamd_dkim_create_result(rspamd_dkim_context_t *ctx,
+ enum rspamd_dkim_check_rcode rcode,
+ struct rspamd_task *task);
+
+GString *rspamd_dkim_sign(struct rspamd_task *task,
+ const gchar *selector,
+ const gchar *domain,
+ time_t expire,
+ gsize len,
+ guint idx,
+ const gchar *arc_cv,
+ rspamd_dkim_sign_context_t *ctx);
+
+rspamd_dkim_key_t *rspamd_dkim_key_ref(rspamd_dkim_key_t *k);
+
+void rspamd_dkim_key_unref(rspamd_dkim_key_t *k);
+
+rspamd_dkim_sign_key_t *rspamd_dkim_sign_key_ref(rspamd_dkim_sign_key_t *k);
+
+void rspamd_dkim_sign_key_unref(rspamd_dkim_sign_key_t *k);
+
+const gchar *rspamd_dkim_get_domain(rspamd_dkim_context_t *ctx);
+
+const gchar *rspamd_dkim_get_selector(rspamd_dkim_context_t *ctx);
+
+const gchar *rspamd_dkim_get_dns_key(rspamd_dkim_context_t *ctx);
+
+guint rspamd_dkim_key_get_ttl(rspamd_dkim_key_t *k);
+
+/**
+ * Create DKIM public key from a raw data
+ * @param keydata
+ * @param keylen
+ * @param type
+ * @param err
+ * @return
+ */
+rspamd_dkim_key_t *rspamd_dkim_make_key(const gchar *keydata, guint keylen,
+ enum rspamd_dkim_key_type type,
+ GError **err);
+
+#define RSPAMD_DKIM_KEY_ID_LEN 16
+/**
+ * Returns key id for dkim key (raw md5 of RSPAMD_DKIM_KEY_ID_LEN)
+ * NOT ZERO TERMINATED, use RSPAMD_DKIM_KEY_ID_LEN for length
+ * @param key
+ * @return
+ */
+const guchar *rspamd_dkim_key_id(rspamd_dkim_key_t *key);
+
+/**
+ * Parse DKIM public key from a TXT record
+ * @param txt
+ * @param keylen
+ * @param err
+ * @return
+ */
+rspamd_dkim_key_t *rspamd_dkim_parse_key(const gchar *txt, gsize *keylen,
+ GError **err);
+
+/**
+ * Canonicalise header using relaxed algorithm
+ * @param hname
+ * @param hvalue
+ * @param out
+ * @param outlen
+ * @return
+ */
+goffset rspamd_dkim_canonize_header_relaxed_str(const gchar *hname,
+ const gchar *hvalue,
+ gchar *out,
+ gsize outlen);
+
+/**
+ * Checks public and private keys for match
+ * @param pk
+ * @param sk
+ * @param err
+ * @return
+ */
+gboolean rspamd_dkim_match_keys(rspamd_dkim_key_t *pk,
+ rspamd_dkim_sign_key_t *sk,
+ GError **err);
+
+/**
+ * Free DKIM key
+ * @param key
+ */
+void rspamd_dkim_key_free(rspamd_dkim_key_t *key);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* DKIM_H_ */
diff --git a/src/libserver/dns.c b/src/libserver/dns.c
new file mode 100644
index 0000000..be2d5a3
--- /dev/null
+++ b/src/libserver/dns.c
@@ -0,0 +1,1124 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#include "contrib/librdns/rdns.h"
+#include "config.h"
+#include "dns.h"
+#include "rspamd.h"
+#include "utlist.h"
+#include "contrib/libev/ev.h"
+#include "contrib/librdns/rdns.h"
+#include "contrib/librdns/dns_private.h"
+#include "contrib/librdns/rdns_ev.h"
+#include "unix-std.h"
+
+#include <unicode/uidna.h>
+
+static const gchar *M = "rspamd dns";
+
+static struct rdns_upstream_elt *rspamd_dns_select_upstream(const char *name,
+ size_t len, void *ups_data);
+static struct rdns_upstream_elt *rspamd_dns_select_upstream_retransmit(
+ const char *name,
+ size_t len,
+ struct rdns_upstream_elt *prev_elt,
+ void *ups_data);
+static void rspamd_dns_upstream_ok(struct rdns_upstream_elt *elt,
+ void *ups_data);
+static void rspamd_dns_upstream_fail(struct rdns_upstream_elt *elt,
+ void *ups_data, const gchar *reason);
+static unsigned int rspamd_dns_upstream_count(void *ups_data);
+
+static struct rdns_upstream_context rspamd_ups_ctx = {
+ .select = rspamd_dns_select_upstream,
+ .select_retransmit = rspamd_dns_select_upstream_retransmit,
+ .ok = rspamd_dns_upstream_ok,
+ .fail = rspamd_dns_upstream_fail,
+ .count = rspamd_dns_upstream_count,
+ .data = NULL};
+
+struct rspamd_dns_request_ud {
+ struct rspamd_async_session *session;
+ dns_callback_type cb;
+ gpointer ud;
+ rspamd_mempool_t *pool;
+ struct rspamd_task *task;
+ struct rspamd_symcache_dynamic_item *item;
+ struct rdns_request *req;
+ struct rdns_reply *reply;
+};
+
+struct rspamd_dns_fail_cache_entry {
+ const char *name;
+ gint32 namelen;
+ enum rdns_request_type type;
+};
+
+static const gint8 ascii_dns_table[128] = {
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ /* HYPHEN-MINUS..FULL STOP */
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, -1,
+ /* 0..9 digits */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1,
+ /* LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z */
+ -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ /* _ */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, 1,
+ /* LATIN SMALL LETTER A..LATIN SMALL LETTER Z */
+ -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1};
+
+static guint
+rspamd_dns_fail_hash(gconstpointer ptr)
+{
+ struct rspamd_dns_fail_cache_entry *elt =
+ (struct rspamd_dns_fail_cache_entry *) ptr;
+
+ /* We don't care about type when doing hashing */
+ return rspamd_cryptobox_fast_hash(elt->name, elt->namelen,
+ rspamd_hash_seed());
+}
+
+static gboolean
+rspamd_dns_fail_equal(gconstpointer p1, gconstpointer p2)
+{
+ struct rspamd_dns_fail_cache_entry *e1 = (struct rspamd_dns_fail_cache_entry *) p1,
+ *e2 = (struct rspamd_dns_fail_cache_entry *) p2;
+
+ if (e1->type == e2->type && e1->namelen == e2->namelen) {
+ return memcmp(e1->name, e2->name, e1->namelen) == 0;
+ }
+
+ return FALSE;
+}
+
+static void
+rspamd_dns_fin_cb(gpointer arg)
+{
+ struct rspamd_dns_request_ud *reqdata = (struct rspamd_dns_request_ud *) arg;
+
+ if (reqdata->item) {
+ rspamd_symcache_set_cur_item(reqdata->task, reqdata->item);
+ }
+
+ if (reqdata->reply) {
+ reqdata->cb(reqdata->reply, reqdata->ud);
+ }
+ else {
+ struct rdns_reply fake_reply;
+
+ memset(&fake_reply, 0, sizeof(fake_reply));
+ fake_reply.code = RDNS_RC_TIMEOUT;
+ fake_reply.request = reqdata->req;
+ fake_reply.resolver = reqdata->req->resolver;
+ fake_reply.requested_name = reqdata->req->requested_names[0].name;
+
+ reqdata->cb(&fake_reply, reqdata->ud);
+ }
+
+ rdns_request_release(reqdata->req);
+
+ if (reqdata->item) {
+ rspamd_symcache_item_async_dec_check(reqdata->task,
+ reqdata->item, M);
+ }
+
+ if (reqdata->pool == NULL) {
+ g_free(reqdata);
+ }
+}
+
+static void
+rspamd_dns_callback(struct rdns_reply *reply, gpointer ud)
+{
+ struct rspamd_dns_request_ud *reqdata = ud;
+
+ reqdata->reply = reply;
+
+
+ if (reqdata->session) {
+ if (reply->code == RDNS_RC_SERVFAIL &&
+ reqdata->task &&
+ reqdata->task->resolver->fails_cache) {
+
+ /* Add to cache... */
+ const gchar *name = reqdata->req->requested_names[0].name;
+ gchar *target;
+ gsize namelen;
+ struct rspamd_dns_fail_cache_entry *nentry;
+
+ /* Allocate in a single entry to allow further free in a single call */
+ namelen = strlen(name);
+ nentry = g_malloc(sizeof(nentry) + namelen + 1);
+ target = ((gchar *) nentry) + sizeof(nentry);
+ rspamd_strlcpy(target, name, namelen + 1);
+ nentry->type = reqdata->req->requested_names[0].type;
+ nentry->name = target;
+ nentry->namelen = namelen;
+
+ /* Rdns request is retained there */
+ rspamd_lru_hash_insert(reqdata->task->resolver->fails_cache,
+ nentry, rdns_request_retain(reply->request),
+ reqdata->task->task_timestamp,
+ reqdata->task->resolver->fails_cache_time);
+ }
+
+ /*
+ * Ref event to avoid double unref by
+ * event removing
+ */
+ rdns_request_retain(reply->request);
+ rspamd_session_remove_event(reqdata->session,
+ rspamd_dns_fin_cb, reqdata);
+ }
+ else {
+ reqdata->cb(reply, reqdata->ud);
+
+ if (reqdata->pool == NULL) {
+ g_free(reqdata);
+ }
+ }
+}
+
+struct rspamd_dns_request_ud *
+rspamd_dns_resolver_request(struct rspamd_dns_resolver *resolver,
+ struct rspamd_async_session *session,
+ rspamd_mempool_t *pool,
+ dns_callback_type cb,
+ gpointer ud,
+ enum rdns_request_type type,
+ const char *name)
+{
+ struct rdns_request *req;
+ struct rspamd_dns_request_ud *reqdata = NULL;
+ guint nlen = strlen(name);
+ gchar *real_name = NULL;
+
+ g_assert(resolver != NULL);
+
+ if (resolver->r == NULL) {
+ return NULL;
+ }
+
+ if (nlen == 0 || nlen > DNS_D_MAXNAME) {
+ return NULL;
+ }
+
+ if (session && rspamd_session_blocked(session)) {
+ return NULL;
+ }
+
+ if (rspamd_str_has_8bit(name, nlen)) {
+ /* Convert to idna using libicu as it follows all the standards */
+ real_name = rspamd_dns_resolver_idna_convert_utf8(resolver, pool,
+ name, nlen, &nlen);
+
+ if (real_name == NULL) {
+ return NULL;
+ }
+
+ name = real_name;
+ }
+
+ /* Name is now in ASCII only */
+ for (gsize i = 0; i < nlen; i++) {
+ if (ascii_dns_table[((unsigned int) name[i]) & 0x7F] == -1) {
+ /* Invalid DNS name requested */
+
+ if (!pool) {
+ g_free(real_name);
+ }
+
+ return NULL;
+ }
+ }
+
+ if (pool != NULL) {
+ reqdata =
+ rspamd_mempool_alloc0(pool, sizeof(struct rspamd_dns_request_ud));
+ }
+ else {
+ reqdata = g_malloc0(sizeof(struct rspamd_dns_request_ud));
+ }
+
+ reqdata->pool = pool;
+ reqdata->session = session;
+ reqdata->cb = cb;
+ reqdata->ud = ud;
+
+ req = rdns_make_request_full(resolver->r, rspamd_dns_callback, reqdata,
+ resolver->request_timeout, resolver->max_retransmits, 1, name,
+ type);
+ reqdata->req = req;
+
+ if (session) {
+ if (req != NULL) {
+ rspamd_session_add_event(session,
+ (event_finalizer_t) rspamd_dns_fin_cb,
+ reqdata,
+ M);
+ }
+ }
+
+ if (req == NULL) {
+ if (pool == NULL) {
+ g_free(reqdata);
+ g_free(real_name);
+ }
+
+ return NULL;
+ }
+
+ if (real_name && pool == NULL) {
+ g_free(real_name);
+ }
+
+ return reqdata;
+}
+
+struct rspamd_dns_cached_delayed_cbdata {
+ struct rspamd_task *task;
+ dns_callback_type cb;
+ gpointer ud;
+ ev_timer tm;
+ struct rdns_request *req;
+};
+
+static void
+rspamd_fail_cache_cb(EV_P_ ev_timer *w, int revents)
+{
+ struct rspamd_dns_cached_delayed_cbdata *cbd =
+ (struct rspamd_dns_cached_delayed_cbdata *) w->data;
+ struct rdns_reply fake_reply;
+
+ ev_timer_stop(EV_A_ w);
+ memset(&fake_reply, 0, sizeof(fake_reply));
+ fake_reply.code = RDNS_RC_SERVFAIL;
+ fake_reply.request = cbd->req;
+ fake_reply.resolver = cbd->req->resolver;
+ fake_reply.requested_name = cbd->req->requested_names[0].name;
+ cbd->cb(&fake_reply, cbd->ud);
+ rdns_request_release(cbd->req);
+}
+
+static gboolean
+make_dns_request_task_common(struct rspamd_task *task,
+ dns_callback_type cb,
+ gpointer ud,
+ enum rdns_request_type type,
+ const char *name,
+ gboolean forced)
+{
+ struct rspamd_dns_request_ud *reqdata;
+
+ if (!forced && task->dns_requests >= task->cfg->dns_max_requests) {
+ return FALSE;
+ }
+
+ if (task->resolver->fails_cache) {
+ /* Search in failures cache */
+ struct rspamd_dns_fail_cache_entry search;
+ struct rdns_request *req;
+
+ search.name = name;
+ search.namelen = strlen(name);
+ search.type = type;
+
+ if ((req = rspamd_lru_hash_lookup(task->resolver->fails_cache,
+ &search, task->task_timestamp)) != NULL) {
+ /*
+ * We need to reply with SERVFAIL again to the API, so add a special
+ * timer, uh-oh, and fire it
+ */
+ struct rspamd_dns_cached_delayed_cbdata *cbd =
+ rspamd_mempool_alloc0(task->task_pool, sizeof(*cbd));
+
+ ev_timer_init(&cbd->tm, rspamd_fail_cache_cb, 0.0, 0.0);
+ cbd->task = task;
+ cbd->cb = cb;
+ cbd->ud = ud;
+ cbd->req = rdns_request_retain(req);
+ cbd->tm.data = cbd;
+
+ return TRUE;
+ }
+ }
+
+ reqdata = rspamd_dns_resolver_request(
+ task->resolver, task->s, task->task_pool, cb, ud,
+ type, name);
+
+ if (reqdata) {
+ task->dns_requests++;
+
+ reqdata->task = task;
+ reqdata->item = rspamd_symcache_get_cur_item(task);
+
+ if (reqdata->item) {
+ /* We are inside some session */
+ rspamd_symcache_item_async_inc(task, reqdata->item, M);
+ }
+
+ if (!forced && task->dns_requests >= task->cfg->dns_max_requests) {
+ msg_info_task("stop resolving on reaching %ud requests",
+ task->dns_requests);
+ }
+
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+gboolean
+rspamd_dns_resolver_request_task(struct rspamd_task *task,
+ dns_callback_type cb,
+ gpointer ud,
+ enum rdns_request_type type,
+ const char *name)
+{
+ return make_dns_request_task_common(task, cb, ud, type, name, FALSE);
+}
+
+gboolean
+rspamd_dns_resolver_request_task_forced(struct rspamd_task *task,
+ dns_callback_type cb,
+ gpointer ud,
+ enum rdns_request_type type,
+ const char *name)
+{
+ return make_dns_request_task_common(task, cb, ud, type, name, TRUE);
+}
+
+static void rspamd_rnds_log_bridge(
+ void *log_data,
+ enum rdns_log_level level,
+ const char *function,
+ const char *format,
+ va_list args)
+{
+ rspamd_logger_t *logger = log_data;
+
+ rspamd_common_logv(logger, (GLogLevelFlags) level, "rdns", NULL,
+ function, format, args);
+}
+
+static void
+rspamd_dns_server_init(struct upstream *up, guint idx, gpointer ud)
+{
+ struct rspamd_dns_resolver *r = ud;
+ rspamd_inet_addr_t *addr;
+ void *serv;
+ struct rdns_upstream_elt *elt;
+
+ addr = rspamd_upstream_addr_next(up);
+
+ if (r->cfg) {
+ serv = rdns_resolver_add_server(r->r, rspamd_inet_address_to_string(addr),
+ rspamd_inet_address_get_port(addr), 0, r->cfg->dns_io_per_server);
+
+ elt = rspamd_mempool_alloc0(r->cfg->cfg_pool, sizeof(*elt));
+ elt->server = serv;
+ elt->lib_data = up;
+
+ rspamd_upstream_set_data(up, elt);
+ }
+ else {
+ serv = rdns_resolver_add_server(r->r, rspamd_inet_address_to_string(addr),
+ rspamd_inet_address_get_port(addr), 0, 8);
+ }
+
+ g_assert(serv != NULL);
+}
+
+static void
+rspamd_dns_server_reorder(struct upstream *up, guint idx, gpointer ud)
+{
+ struct rspamd_dns_resolver *r = ud;
+
+ rspamd_upstream_set_weight(up, rspamd_upstreams_count(r->ups) - idx + 1);
+}
+
+static bool
+rspamd_dns_resolv_conf_on_server(struct rdns_resolver *resolver,
+ const char *name, unsigned int port,
+ int priority, unsigned int io_cnt, void *ud)
+{
+ struct rspamd_dns_resolver *dns_resolver = ud;
+ struct rspamd_config *cfg;
+ rspamd_inet_addr_t *addr;
+ gint test_fd;
+
+ cfg = dns_resolver->cfg;
+
+ msg_info_config("parsed nameserver %s from resolv.conf", name);
+
+ /* Try to open a connection */
+ if (!rspamd_parse_inet_address(&addr, name, strlen(name),
+ RSPAMD_INET_ADDRESS_PARSE_DEFAULT)) {
+ msg_warn_config("cannot parse nameserver address %s", name);
+
+ return FALSE;
+ }
+
+ rspamd_inet_address_set_port(addr, port);
+ test_fd = rspamd_inet_address_connect(addr, SOCK_DGRAM, TRUE);
+
+ if (test_fd == -1 && (errno != EINTR || errno != ECONNREFUSED || errno != ECONNRESET)) {
+ msg_info_config("cannot open connection to nameserver at address %s: %s",
+ name, strerror(errno));
+ rspamd_inet_address_free(addr);
+
+ return FALSE;
+ }
+
+ rspamd_inet_address_free(addr);
+ close(test_fd);
+
+ return rspamd_upstreams_add_upstream(dns_resolver->ups, name, port,
+ RSPAMD_UPSTREAM_PARSE_NAMESERVER,
+ NULL);
+}
+
+static void
+rspamd_process_fake_reply(struct rspamd_config *cfg,
+ struct rspamd_dns_resolver *dns_resolver,
+ const ucl_object_t *cur_arr)
+{
+ const ucl_object_t *cur;
+ ucl_object_iter_t it;
+
+ it = ucl_object_iterate_new(cur_arr);
+
+ while ((cur = ucl_object_iterate_safe(it, true))) {
+ const ucl_object_t *type_obj, *name_obj, *code_obj, *replies_obj;
+ enum rdns_request_type rtype = RDNS_REQUEST_A;
+ enum dns_rcode rcode = RDNS_RC_NOERROR;
+ struct rdns_reply_entry *replies = NULL;
+ const gchar *name = NULL;
+
+ if (ucl_object_type(cur) != UCL_OBJECT) {
+ continue;
+ }
+
+ name_obj = ucl_object_lookup(cur, "name");
+ if (name_obj == NULL ||
+ (name = ucl_object_tostring(name_obj)) == NULL) {
+ msg_err_config("no name for fake dns reply");
+ continue;
+ }
+
+ type_obj = ucl_object_lookup(cur, "type");
+ if (type_obj) {
+ rtype = rdns_type_fromstr(ucl_object_tostring(type_obj));
+
+ if (rtype == RDNS_REQUEST_INVALID) {
+ msg_err_config("invalid type for %s: %s", name,
+ ucl_object_tostring(type_obj));
+ continue;
+ }
+ }
+
+ code_obj = ucl_object_lookup_any(cur, "code", "rcode", NULL);
+ if (code_obj) {
+ rcode = rdns_rcode_fromstr(ucl_object_tostring(code_obj));
+
+ if (rcode == RDNS_RC_INVALID) {
+ msg_err_config("invalid rcode for %s: %s", name,
+ ucl_object_tostring(code_obj));
+ continue;
+ }
+ }
+
+ if (rcode == RDNS_RC_NOERROR) {
+ /* We want replies to be set for this rcode */
+ replies_obj = ucl_object_lookup(cur, "replies");
+
+ if (replies_obj == NULL || ucl_object_type(replies_obj) != UCL_ARRAY) {
+ msg_err_config("invalid replies for fake DNS record %s", name);
+ continue;
+ }
+
+ ucl_object_iter_t rep_it;
+ const ucl_object_t *rep_obj;
+
+ rep_it = ucl_object_iterate_new(replies_obj);
+
+ while ((rep_obj = ucl_object_iterate_safe(rep_it, true))) {
+ const gchar *str_rep = ucl_object_tostring(rep_obj);
+ struct rdns_reply_entry *rep;
+ gchar **svec;
+
+ if (str_rep == NULL) {
+ msg_err_config("invalid reply element for fake DNS record %s",
+ name);
+ continue;
+ }
+
+ rep = calloc(1, sizeof(*rep));
+ g_assert(rep != NULL);
+
+ rep->type = rtype;
+ rep->ttl = 0;
+
+ switch (rtype) {
+ case RDNS_REQUEST_A:
+ if (inet_pton(AF_INET, str_rep, &rep->content.a.addr) != 1) {
+ msg_err_config("invalid A reply element for fake "
+ "DNS record %s: %s",
+ name, str_rep);
+ free(rep);
+ }
+ else {
+ DL_APPEND(replies, rep);
+ }
+ break;
+ case RDNS_REQUEST_NS:
+ rep->content.ns.name = strdup(str_rep);
+ DL_APPEND(replies, rep);
+ break;
+ case RDNS_REQUEST_PTR:
+ rep->content.ptr.name = strdup(str_rep);
+ DL_APPEND(replies, rep);
+ break;
+ case RDNS_REQUEST_MX:
+ svec = g_strsplit_set(str_rep, " :", -1);
+
+ if (svec && svec[0] && svec[1]) {
+ rep->content.mx.priority = strtoul(svec[0], NULL, 10);
+ rep->content.mx.name = strdup(svec[1]);
+ DL_APPEND(replies, rep);
+ }
+ else {
+ msg_err_config("invalid MX reply element for fake "
+ "DNS record %s: %s",
+ name, str_rep);
+ free(rep);
+ }
+
+ g_strfreev(svec);
+ break;
+ case RDNS_REQUEST_TXT:
+ rep->content.txt.data = strdup(str_rep);
+ DL_APPEND(replies, rep);
+ break;
+ case RDNS_REQUEST_SOA:
+ svec = g_strsplit_set(str_rep, " :", -1);
+
+ /* 7 elements */
+ if (svec && svec[0] && svec[1] && svec[2] &&
+ svec[3] && svec[4] && svec[5] && svec[6]) {
+ rep->content.soa.mname = strdup(svec[0]);
+ rep->content.soa.admin = strdup(svec[1]);
+ rep->content.soa.serial = strtoul(svec[2], NULL, 10);
+ rep->content.soa.refresh = strtol(svec[3], NULL, 10);
+ rep->content.soa.retry = strtol(svec[4], NULL, 10);
+ rep->content.soa.expire = strtol(svec[5], NULL, 10);
+ rep->content.soa.minimum = strtoul(svec[6], NULL, 10);
+ DL_APPEND(replies, rep);
+ }
+ else {
+ msg_err_config("invalid MX reply element for fake "
+ "DNS record %s: %s",
+ name, str_rep);
+ free(rep);
+ }
+
+ g_strfreev(svec);
+ break;
+ case RDNS_REQUEST_AAAA:
+ if (inet_pton(AF_INET6, str_rep, &rep->content.aaa.addr) != 1) {
+ msg_err_config("invalid AAAA reply element for fake "
+ "DNS record %s: %s",
+ name, str_rep);
+ free(rep);
+ }
+ else {
+ DL_APPEND(replies, rep);
+ }
+ break;
+ case RDNS_REQUEST_SRV:
+ default:
+ msg_err_config("invalid or unsupported reply element "
+ "for fake DNS record %s(%s): %s",
+ name, rdns_str_from_type(rtype), str_rep);
+ free(rep);
+ break;
+ }
+ }
+
+ ucl_object_iterate_free(rep_it);
+
+ if (replies) {
+ struct rdns_reply_entry *tmp_entry;
+ guint i = 0;
+ DL_COUNT(replies, tmp_entry, i);
+
+ msg_info_config("added fake record: %s(%s); %d replies", name,
+ rdns_str_from_type(rtype), i);
+ rdns_resolver_set_fake_reply(dns_resolver->r,
+ name, rtype, rcode, replies);
+ }
+ else {
+ msg_warn_config("record %s has no replies, not adding",
+ name);
+ }
+ }
+ else {
+ /* This entry returns some non valid code, no replies are possible */
+ replies_obj = ucl_object_lookup(cur, "replies");
+
+ if (replies_obj) {
+ msg_warn_config("replies are set for non-successful return "
+ "code for %s(%s), they will be ignored",
+ name, rdns_str_from_type(rtype));
+ }
+
+ rdns_resolver_set_fake_reply(dns_resolver->r,
+ name, rtype, rcode, NULL);
+ }
+ }
+
+ ucl_object_iterate_free(it);
+}
+
+static bool
+rspamd_dns_read_hosts_file(struct rspamd_config *cfg,
+ struct rspamd_dns_resolver *dns_resolver,
+ const gchar *fname)
+{
+ gchar *linebuf = NULL;
+ gsize buflen = 0;
+ gssize r;
+ FILE *fp;
+ guint nadded = 0;
+
+ fp = fopen(fname, "r");
+
+ if (fp == NULL) {
+ /* Hack to reduce noise */
+ if (strcmp(fname, "/etc/hosts") == 0) {
+ msg_info_config("cannot open hosts file %s: %s", fname,
+ strerror(errno));
+ }
+ else {
+ msg_err_config("cannot open hosts file %s: %s", fname,
+ strerror(errno));
+ }
+
+ return false;
+ }
+
+ while ((r = getline(&linebuf, &buflen, fp)) > 0) {
+ if (linebuf[0] == '#' || g_ascii_isspace(linebuf[0])) {
+ /* Skip comment or empty line */
+ continue;
+ }
+
+ g_strchomp(linebuf);
+
+ gchar **elts = g_strsplit_set(linebuf, " \t\v", -1);
+ rspamd_inet_addr_t *addr;
+
+ if (!rspamd_parse_inet_address(&addr, elts[0], strlen(elts[0]),
+ RSPAMD_INET_ADDRESS_PARSE_REMOTE | RSPAMD_INET_ADDRESS_PARSE_NO_UNIX)) {
+ msg_warn_config("bad hosts file line: %s; cannot parse address", linebuf);
+ }
+ else {
+ /* Add all FQDN + aliases if any */
+ gchar **cur_name = &elts[1];
+
+ while (*cur_name) {
+ if (strlen(*cur_name) == 0) {
+ cur_name++;
+ continue;
+ }
+
+ if (*cur_name[0] == '#') {
+ /* Start of the comment */
+ break;
+ }
+
+ struct rdns_reply_entry *rep;
+ rep = calloc(1, sizeof(*rep));
+ g_assert(rep != NULL);
+
+ rep->ttl = 0;
+
+ if (rspamd_inet_address_get_af(addr) == AF_INET) {
+ socklen_t unused;
+ const struct sockaddr_in *sin = (const struct sockaddr_in *)
+ rspamd_inet_address_get_sa(addr, &unused);
+ rep->type = RDNS_REQUEST_A;
+ memcpy(&rep->content.a.addr, &sin->sin_addr,
+ sizeof(rep->content.a.addr));
+ }
+ else {
+ socklen_t unused;
+ const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)
+ rspamd_inet_address_get_sa(addr, &unused);
+ rep->type = RDNS_REQUEST_AAAA;
+ memcpy(&rep->content.aaa.addr, &sin6->sin6_addr,
+ sizeof(rep->content.aaa.addr));
+ }
+
+ rep->next = NULL;
+ rep->prev = rep;
+ rdns_resolver_set_fake_reply(dns_resolver->r,
+ *cur_name, rep->type, RDNS_RC_NOERROR, rep);
+ msg_debug_config("added fake record %s -> %s from hosts file %s",
+ *cur_name, rspamd_inet_address_to_string(addr), fname);
+ cur_name++;
+ nadded++;
+ }
+
+ rspamd_inet_address_free(addr);
+ }
+
+ g_strfreev(elts);
+ }
+
+ if (linebuf) {
+ free(linebuf);
+ }
+
+ msg_info_config("processed host file %s; %d records added", fname, nadded);
+ fclose(fp);
+
+ return true;
+}
+
+static void
+rspamd_dns_resolver_config_ucl(struct rspamd_config *cfg,
+ struct rspamd_dns_resolver *dns_resolver,
+ const ucl_object_t *dns_section)
+{
+ const ucl_object_t *fake_replies, *fails_cache_size, *fails_cache_time,
+ *hosts;
+ static const ev_tstamp default_fails_cache_time = 10.0;
+
+ /* Process fake replies */
+ fake_replies = ucl_object_lookup_any(dns_section, "fake_records",
+ "fake_replies", NULL);
+
+ if (fake_replies && ucl_object_type(fake_replies) == UCL_ARRAY) {
+ const ucl_object_t *cur_arr;
+
+ DL_FOREACH(fake_replies, cur_arr)
+ {
+ rspamd_process_fake_reply(cfg, dns_resolver, cur_arr);
+ }
+ }
+
+ hosts = ucl_object_lookup(dns_section, "hosts");
+
+ if (hosts == NULL) {
+ /* Read normal `/etc/hosts` file */
+ rspamd_dns_read_hosts_file(cfg, dns_resolver, "/etc/hosts");
+ }
+ else if (ucl_object_type(hosts) == UCL_NULL) {
+ /* Do nothing, hosts are explicitly disabled */
+ }
+ else if (ucl_object_type(hosts) == UCL_STRING) {
+ if (!rspamd_dns_read_hosts_file(cfg, dns_resolver, ucl_object_tostring(hosts))) {
+ msg_err_config("cannot read hosts file %s", ucl_object_tostring(hosts));
+ }
+ }
+ else if (ucl_object_type(hosts) == UCL_ARRAY) {
+ const ucl_object_t *cur;
+ ucl_object_iter_t it = NULL;
+
+ while ((cur = ucl_object_iterate(hosts, &it, true)) != NULL) {
+ if (!rspamd_dns_read_hosts_file(cfg, dns_resolver, ucl_object_tostring(cur))) {
+ msg_err_config("cannot read hosts file %s", ucl_object_tostring(cur));
+ }
+ }
+ }
+ else {
+ msg_err_config("invalid type for hosts parameter: %s",
+ ucl_object_type_to_string(ucl_object_type(hosts)));
+ }
+
+ fails_cache_size = ucl_object_lookup(dns_section, "fails_cache_size");
+ if (fails_cache_size && ucl_object_type(fails_cache_size) == UCL_INT) {
+
+ dns_resolver->fails_cache_time = default_fails_cache_time;
+ fails_cache_time = ucl_object_lookup(dns_section, "fails_cache_time");
+
+ if (fails_cache_time) {
+ dns_resolver->fails_cache_time = ucl_object_todouble(fails_cache_time);
+ }
+
+ dns_resolver->fails_cache = rspamd_lru_hash_new_full(
+ ucl_object_toint(fails_cache_size),
+ g_free, (GDestroyNotify) rdns_request_release,
+ rspamd_dns_fail_hash, rspamd_dns_fail_equal);
+ }
+}
+
+struct rspamd_dns_resolver *
+rspamd_dns_resolver_init(rspamd_logger_t *logger,
+ struct ev_loop *ev_base,
+ struct rspamd_config *cfg)
+{
+ struct rspamd_dns_resolver *dns_resolver;
+
+ dns_resolver = g_malloc0(sizeof(struct rspamd_dns_resolver));
+ dns_resolver->event_loop = ev_base;
+
+ if (cfg != NULL) {
+ dns_resolver->request_timeout = cfg->dns_timeout;
+ dns_resolver->max_retransmits = cfg->dns_retransmits;
+ }
+ else {
+ dns_resolver->request_timeout = 1;
+ dns_resolver->max_retransmits = 2;
+ }
+
+ /* IDN translation is performed in Rspamd now */
+ dns_resolver->r = rdns_resolver_new(RDNS_RESOLVER_NOIDN);
+
+ UErrorCode uc_err = U_ZERO_ERROR;
+
+ dns_resolver->uidna = uidna_openUTS46(UIDNA_DEFAULT, &uc_err);
+ g_assert(!U_FAILURE(uc_err));
+ rdns_bind_libev(dns_resolver->r, dns_resolver->event_loop);
+
+ if (cfg != NULL) {
+ rdns_resolver_set_log_level(dns_resolver->r, cfg->log_level);
+ dns_resolver->cfg = cfg;
+ rdns_resolver_set_dnssec(dns_resolver->r, cfg->enable_dnssec);
+
+ if (cfg->nameservers == NULL) {
+ /* Parse resolv.conf */
+ dns_resolver->ups = rspamd_upstreams_create(cfg->ups_ctx);
+ rspamd_upstreams_set_flags(dns_resolver->ups,
+ RSPAMD_UPSTREAM_FLAG_NORESOLVE);
+ rspamd_upstreams_set_rotation(dns_resolver->ups,
+ RSPAMD_UPSTREAM_MASTER_SLAVE);
+
+ if (!rdns_resolver_parse_resolv_conf_cb(dns_resolver->r,
+ "/etc/resolv.conf",
+ rspamd_dns_resolv_conf_on_server,
+ dns_resolver)) {
+ msg_err("cannot parse resolv.conf and no nameservers defined, "
+ "so no ways to resolve addresses");
+ rdns_resolver_release(dns_resolver->r);
+ dns_resolver->r = NULL;
+
+ return dns_resolver;
+ }
+
+ /* Use normal resolv.conf rules */
+ rspamd_upstreams_foreach(dns_resolver->ups, rspamd_dns_server_reorder,
+ dns_resolver);
+ }
+ else {
+ dns_resolver->ups = rspamd_upstreams_create(cfg->ups_ctx);
+ rspamd_upstreams_set_flags(dns_resolver->ups,
+ RSPAMD_UPSTREAM_FLAG_NORESOLVE);
+
+ if (!rspamd_upstreams_from_ucl(dns_resolver->ups, cfg->nameservers,
+ 53, dns_resolver)) {
+ msg_err_config("cannot parse DNS nameservers definitions");
+ rdns_resolver_release(dns_resolver->r);
+ dns_resolver->r = NULL;
+
+ return dns_resolver;
+ }
+ }
+
+ rspamd_upstreams_foreach(dns_resolver->ups, rspamd_dns_server_init,
+ dns_resolver);
+ rdns_resolver_set_upstream_lib(dns_resolver->r, &rspamd_ups_ctx,
+ dns_resolver->ups);
+ cfg->dns_resolver = dns_resolver;
+
+ if (cfg->cfg_ucl_obj) {
+ /* Configure additional options */
+ const ucl_object_t *opts_section, *dns_section, *tmp;
+
+ opts_section = ucl_object_lookup(cfg->cfg_ucl_obj, "options");
+
+ if (opts_section) {
+ /* TODO: implement a more simple merge logic */
+ DL_FOREACH(opts_section, tmp)
+ {
+ dns_section = ucl_object_lookup(opts_section, "dns");
+
+ if (dns_section) {
+ rspamd_dns_resolver_config_ucl(cfg, dns_resolver,
+ dns_section);
+ }
+ }
+ }
+ }
+ }
+
+ rdns_resolver_set_logger(dns_resolver->r, rspamd_rnds_log_bridge, logger);
+ rdns_resolver_init(dns_resolver->r);
+
+ return dns_resolver;
+}
+
+void rspamd_dns_resolver_deinit(struct rspamd_dns_resolver *resolver)
+{
+ if (resolver) {
+ if (resolver->r) {
+ rdns_resolver_release(resolver->r);
+ }
+
+ if (resolver->ups) {
+ rspamd_upstreams_destroy(resolver->ups);
+ }
+
+ if (resolver->fails_cache) {
+ rspamd_lru_hash_destroy(resolver->fails_cache);
+ }
+
+ uidna_close(resolver->uidna);
+
+ g_free(resolver);
+ }
+}
+
+
+static struct rdns_upstream_elt *
+rspamd_dns_select_upstream(const char *name,
+ size_t len, void *ups_data)
+{
+ struct upstream_list *ups = ups_data;
+ struct upstream *up;
+
+ up = rspamd_upstream_get(ups, RSPAMD_UPSTREAM_ROUND_ROBIN, name, len);
+
+ if (up) {
+ msg_debug("select %s", rspamd_upstream_name(up));
+
+ return rspamd_upstream_get_data(up);
+ }
+
+ return NULL;
+}
+
+static struct rdns_upstream_elt *
+rspamd_dns_select_upstream_retransmit(
+ const char *name,
+ size_t len,
+ struct rdns_upstream_elt *prev_elt,
+ void *ups_data)
+{
+ struct upstream_list *ups = ups_data;
+ struct upstream *up;
+
+ if (prev_elt) {
+ up = rspamd_upstream_get_except(ups, (struct upstream *) prev_elt->lib_data,
+ RSPAMD_UPSTREAM_MASTER_SLAVE, name, len);
+ }
+ else {
+ up = rspamd_upstream_get_forced(ups, RSPAMD_UPSTREAM_RANDOM, name, len);
+ }
+
+ if (up) {
+ msg_debug("select forced %s", rspamd_upstream_name(up));
+
+ return rspamd_upstream_get_data(up);
+ }
+
+ return NULL;
+}
+
+static void
+rspamd_dns_upstream_ok(struct rdns_upstream_elt *elt,
+ void *ups_data)
+{
+ struct upstream *up = elt->lib_data;
+
+ rspamd_upstream_ok(up);
+}
+
+static void
+rspamd_dns_upstream_fail(struct rdns_upstream_elt *elt,
+ void *ups_data, const gchar *reason)
+{
+ struct upstream *up = elt->lib_data;
+
+ rspamd_upstream_fail(up, FALSE, reason);
+}
+
+static unsigned int
+rspamd_dns_upstream_count(void *ups_data)
+{
+ struct upstream_list *ups = ups_data;
+
+ return rspamd_upstreams_alive(ups);
+}
+
+gchar *
+rspamd_dns_resolver_idna_convert_utf8(struct rspamd_dns_resolver *resolver,
+ rspamd_mempool_t *pool,
+ const char *name,
+ gint namelen,
+ guint *outlen)
+{
+ if (resolver == NULL || resolver->uidna == NULL || name == NULL || namelen > DNS_D_MAXNAME) {
+ return NULL;
+ }
+
+ guint dest_len;
+ UErrorCode uc_err = U_ZERO_ERROR;
+ UIDNAInfo info = UIDNA_INFO_INITIALIZER;
+ /* Calculate length required */
+ dest_len = uidna_nameToASCII_UTF8(resolver->uidna, name, namelen,
+ NULL, 0, &info, &uc_err);
+
+ if (uc_err == U_BUFFER_OVERFLOW_ERROR) {
+ gchar *dest;
+
+ if (pool) {
+ dest = rspamd_mempool_alloc(pool, dest_len + 1);
+ }
+ else {
+ dest = g_malloc(dest_len + 1);
+ }
+
+ uc_err = U_ZERO_ERROR;
+
+ dest_len = uidna_nameToASCII_UTF8(resolver->uidna, name, namelen,
+ dest, dest_len + 1, &info, &uc_err);
+
+ if (U_FAILURE(uc_err)) {
+
+ if (!pool) {
+ g_free(dest);
+ }
+
+ return NULL;
+ }
+
+ dest[dest_len] = '\0';
+
+ if (outlen) {
+ *outlen = dest_len;
+ }
+
+ return dest;
+ }
+
+ return NULL;
+} \ No newline at end of file
diff --git a/src/libserver/dns.h b/src/libserver/dns.h
new file mode 100644
index 0000000..acf8d09
--- /dev/null
+++ b/src/libserver/dns.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef RSPAMD_DNS_H
+#define RSPAMD_DNS_H
+
+#include "config.h"
+#include "mem_pool.h"
+#include "async_session.h"
+#include "logger.h"
+#include "rdns.h"
+#include "upstream.h"
+#include "libutil/hash.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct rspamd_config;
+struct rspamd_task;
+struct event_loop;
+
+struct rspamd_dns_resolver {
+ struct rdns_resolver *r;
+ struct ev_loop *event_loop;
+ rspamd_lru_hash_t *fails_cache;
+ void *uidna;
+ double fails_cache_time;
+ struct upstream_list *ups;
+ struct rspamd_config *cfg;
+ gdouble request_timeout;
+ guint max_retransmits;
+};
+
+/* Rspamd DNS API */
+
+/**
+ * Init DNS resolver, params are obtained from a config file or system file /etc/resolv.conf
+ */
+struct rspamd_dns_resolver *rspamd_dns_resolver_init(rspamd_logger_t *logger,
+ struct ev_loop *ev_base,
+ struct rspamd_config *cfg);
+
+void rspamd_dns_resolver_deinit(struct rspamd_dns_resolver *resolver);
+
+struct rspamd_dns_request_ud;
+
+/**
+ * Make a DNS request
+ * @param resolver resolver object
+ * @param session async session to register event
+ * @param pool memory pool for storage
+ * @param cb callback to call on resolve completing
+ * @param ud user data for callback
+ * @param type request type
+ * @param ... string or ip address based on a request type
+ * @return TRUE if request was sent.
+ */
+struct rspamd_dns_request_ud *rspamd_dns_resolver_request(struct rspamd_dns_resolver *resolver,
+ struct rspamd_async_session *session,
+ rspamd_mempool_t *pool,
+ dns_callback_type cb,
+ gpointer ud,
+ enum rdns_request_type type,
+ const char *name);
+
+gboolean rspamd_dns_resolver_request_task(struct rspamd_task *task,
+ dns_callback_type cb,
+ gpointer ud,
+ enum rdns_request_type type,
+ const char *name);
+
+gboolean rspamd_dns_resolver_request_task_forced(struct rspamd_task *task,
+ dns_callback_type cb,
+ gpointer ud,
+ enum rdns_request_type type,
+ const char *name);
+
+/**
+ * Converts a name into idna from UTF8
+ * @param resolver resolver (must be initialised)
+ * @param pool optional memory pool (can be NULL, then you need to g_free) the result
+ * @param name input name
+ * @param namelen length of input (-1 for zero terminated)
+ * @return encoded string
+ */
+gchar *rspamd_dns_resolver_idna_convert_utf8(struct rspamd_dns_resolver *resolver,
+ rspamd_mempool_t *pool,
+ const char *name,
+ gint namelen,
+ guint *outlen);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/libserver/dynamic_cfg.c b/src/libserver/dynamic_cfg.c
new file mode 100644
index 0000000..cd5cc4e
--- /dev/null
+++ b/src/libserver/dynamic_cfg.c
@@ -0,0 +1,743 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "config.h"
+#include "rspamd.h"
+#include "libserver/maps/map.h"
+#include "scan_result.h"
+#include "dynamic_cfg.h"
+#include "unix-std.h"
+#include "lua/lua_common.h"
+
+#include <math.h>
+
+struct config_json_buf {
+ GString *buf;
+ struct rspamd_config *cfg;
+};
+
+/**
+ * Apply configuration to the specified configuration
+ * @param conf_metrics
+ * @param cfg
+ */
+static void
+apply_dynamic_conf(const ucl_object_t *top, struct rspamd_config *cfg)
+{
+ enum rspamd_action_type test_act;
+ const ucl_object_t *cur_elt, *cur_nm, *it_val;
+ ucl_object_iter_t it = NULL;
+ const gchar *name;
+ gdouble nscore;
+ static const guint priority = 3;
+
+ while ((cur_elt = ucl_object_iterate(top, &it, true))) {
+ if (ucl_object_type(cur_elt) != UCL_OBJECT) {
+ msg_err("loaded json array element is not an object");
+ continue;
+ }
+
+ cur_nm = ucl_object_lookup(cur_elt, "metric");
+ if (!cur_nm || ucl_object_type(cur_nm) != UCL_STRING) {
+ msg_err(
+ "loaded json metric object element has no 'metric' attribute");
+ continue;
+ }
+
+ cur_nm = ucl_object_lookup(cur_elt, "symbols");
+ /* Parse symbols */
+ if (cur_nm && ucl_object_type(cur_nm) == UCL_ARRAY) {
+ ucl_object_iter_t nit = NULL;
+
+ while ((it_val = ucl_object_iterate(cur_nm, &nit, true))) {
+ if (ucl_object_lookup(it_val, "name") &&
+ ucl_object_lookup(it_val, "value")) {
+ const ucl_object_t *n =
+ ucl_object_lookup(it_val, "name");
+ const ucl_object_t *v =
+ ucl_object_lookup(it_val, "value");
+
+ nscore = ucl_object_todouble(v);
+
+ /*
+ * We use priority = 3 here
+ */
+ rspamd_config_add_symbol(cfg,
+ ucl_object_tostring(n), nscore, NULL, NULL,
+ 0, priority, cfg->default_max_shots);
+ }
+ else {
+ msg_info(
+ "json symbol object has no mandatory 'name' and 'value' attributes");
+ }
+ }
+ }
+ else {
+ ucl_object_t *arr;
+
+ arr = ucl_object_typed_new(UCL_ARRAY);
+ ucl_object_insert_key((ucl_object_t *) cur_elt, arr, "symbols",
+ sizeof("symbols") - 1, false);
+ }
+ cur_nm = ucl_object_lookup(cur_elt, "actions");
+ /* Parse actions */
+ if (cur_nm && ucl_object_type(cur_nm) == UCL_ARRAY) {
+ ucl_object_iter_t nit = NULL;
+
+ while ((it_val = ucl_object_iterate(cur_nm, &nit, true))) {
+ const ucl_object_t *n = ucl_object_lookup(it_val, "name");
+ const ucl_object_t *v = ucl_object_lookup(it_val, "value");
+
+ if (n != NULL && v != NULL) {
+ name = ucl_object_tostring(n);
+
+ if (!name || !rspamd_action_from_str(name, &test_act)) {
+ msg_err("unknown action: %s",
+ ucl_object_tostring(ucl_object_lookup(it_val,
+ "name")));
+ continue;
+ }
+
+
+ if (ucl_object_type(v) == UCL_NULL) {
+ nscore = NAN;
+ }
+ else {
+ nscore = ucl_object_todouble(v);
+ }
+
+ ucl_object_t *obj_tbl = ucl_object_typed_new(UCL_OBJECT);
+ ucl_object_insert_key(obj_tbl, ucl_object_fromdouble(nscore),
+ "score", 0, false);
+ ucl_object_insert_key(obj_tbl, ucl_object_fromdouble(priority),
+ "priority", 0, false);
+ rspamd_config_set_action_score(cfg, name, obj_tbl);
+ ucl_object_unref(obj_tbl);
+ }
+ else {
+ msg_info(
+ "json action object has no mandatory 'name' and 'value' attributes");
+ }
+ }
+ }
+ else {
+ ucl_object_t *arr;
+
+ arr = ucl_object_typed_new(UCL_ARRAY);
+ ucl_object_insert_key((ucl_object_t *) cur_elt, arr, "actions",
+ sizeof("actions") - 1, false);
+ }
+ }
+}
+
+/* Callbacks for reading json dynamic rules */
+static gchar *
+json_config_read_cb(gchar *chunk,
+ gint len,
+ struct map_cb_data *data,
+ gboolean final)
+{
+ struct config_json_buf *jb, *pd;
+
+ pd = data->prev_data;
+
+ g_assert(pd != NULL);
+
+ if (data->cur_data == NULL) {
+ jb = g_malloc0(sizeof(*jb));
+ jb->cfg = pd->cfg;
+ data->cur_data = jb;
+ }
+ else {
+ jb = data->cur_data;
+ }
+
+ if (jb->buf == NULL) {
+ /* Allocate memory for buffer */
+ jb->buf = g_string_sized_new(MAX(len, BUFSIZ));
+ }
+
+ g_string_append_len(jb->buf, chunk, len);
+
+ return NULL;
+}
+
+static void
+json_config_fin_cb(struct map_cb_data *data, void **target)
+{
+ struct config_json_buf *jb;
+ ucl_object_t *top;
+ struct ucl_parser *parser;
+
+ /* Now parse json */
+ if (data->cur_data) {
+ jb = data->cur_data;
+ }
+ else {
+ return;
+ }
+
+ if (jb->buf == NULL) {
+ msg_err("no data read");
+
+ return;
+ }
+
+ parser = ucl_parser_new(0);
+
+ if (!ucl_parser_add_chunk(parser, jb->buf->str, jb->buf->len)) {
+ msg_err("cannot load json data: parse error %s",
+ ucl_parser_get_error(parser));
+ ucl_parser_free(parser);
+ return;
+ }
+
+ top = ucl_parser_get_object(parser);
+ ucl_parser_free(parser);
+
+ if (ucl_object_type(top) != UCL_ARRAY) {
+ ucl_object_unref(top);
+ msg_err("loaded json is not an array");
+ return;
+ }
+
+ ucl_object_unref(jb->cfg->current_dynamic_conf);
+ apply_dynamic_conf(top, jb->cfg);
+ jb->cfg->current_dynamic_conf = top;
+
+ if (target) {
+ *target = data->cur_data;
+ }
+
+ if (data->prev_data) {
+ jb = data->prev_data;
+ /* Clean prev data */
+ if (jb->buf) {
+ g_string_free(jb->buf, TRUE);
+ }
+
+ g_free(jb);
+ }
+}
+
+static void
+json_config_dtor_cb(struct map_cb_data *data)
+{
+ struct config_json_buf *jb;
+
+ if (data->cur_data) {
+ jb = data->cur_data;
+ /* Clean prev data */
+ if (jb->buf) {
+ g_string_free(jb->buf, TRUE);
+ }
+
+ if (jb->cfg && jb->cfg->current_dynamic_conf) {
+ ucl_object_unref(jb->cfg->current_dynamic_conf);
+ }
+
+ g_free(jb);
+ }
+}
+
+/**
+ * Init dynamic configuration using map logic and specific configuration
+ * @param cfg config file
+ */
+void init_dynamic_config(struct rspamd_config *cfg)
+{
+ struct config_json_buf *jb, **pjb;
+
+ if (cfg->dynamic_conf == NULL) {
+ /* No dynamic conf has been specified, so do not try to load it */
+ return;
+ }
+
+ /* Now try to add map with json data */
+ jb = g_malloc(sizeof(struct config_json_buf));
+ pjb = g_malloc(sizeof(struct config_json_buf *));
+ jb->buf = NULL;
+ jb->cfg = cfg;
+ *pjb = jb;
+ cfg->current_dynamic_conf = ucl_object_typed_new(UCL_ARRAY);
+ rspamd_mempool_add_destructor(cfg->cfg_pool,
+ (rspamd_mempool_destruct_t) g_free,
+ pjb);
+
+ if (!rspamd_map_add(cfg,
+ cfg->dynamic_conf,
+ "Dynamic configuration map",
+ json_config_read_cb,
+ json_config_fin_cb,
+ json_config_dtor_cb,
+ (void **) pjb, NULL, RSPAMD_MAP_DEFAULT)) {
+ msg_err("cannot add map for configuration %s", cfg->dynamic_conf);
+ }
+}
+
+/**
+ * Dump dynamic configuration to the disk
+ * @param cfg
+ * @return
+ */
+gboolean
+dump_dynamic_config(struct rspamd_config *cfg)
+{
+ struct stat st;
+ gchar *dir, pathbuf[PATH_MAX];
+ gint fd;
+
+ if (cfg->dynamic_conf == NULL || cfg->current_dynamic_conf == NULL) {
+ /* No dynamic conf has been specified, so do not try to dump it */
+ msg_err("cannot save dynamic conf as it is not specified");
+ return FALSE;
+ }
+
+ dir = g_path_get_dirname(cfg->dynamic_conf);
+ if (dir == NULL) {
+ msg_err("invalid path: %s", cfg->dynamic_conf);
+ return FALSE;
+ }
+
+ if (stat(cfg->dynamic_conf, &st) == -1) {
+ msg_debug("%s is unavailable: %s", cfg->dynamic_conf,
+ strerror(errno));
+ st.st_mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
+ }
+ if (access(dir, W_OK | R_OK) == -1) {
+ msg_warn("%s is inaccessible: %s", dir, strerror(errno));
+ g_free(dir);
+ return FALSE;
+ }
+ rspamd_snprintf(pathbuf,
+ sizeof(pathbuf),
+ "%s%crconf-XXXXXX",
+ dir,
+ G_DIR_SEPARATOR);
+ g_free(dir);
+#ifdef HAVE_MKSTEMP
+ /* Umask is set before */
+ fd = mkstemp(pathbuf);
+#else
+ fd = g_mkstemp_full(pathbuf, O_RDWR, S_IWUSR | S_IRUSR);
+#endif
+ if (fd == -1) {
+ msg_err("mkstemp error: %s", strerror(errno));
+
+ return FALSE;
+ }
+
+ struct ucl_emitter_functions *emitter_functions;
+ FILE *fp;
+
+ fp = fdopen(fd, "w");
+ emitter_functions = ucl_object_emit_file_funcs(fp);
+
+ if (!ucl_object_emit_full(cfg->current_dynamic_conf, UCL_EMIT_JSON,
+ emitter_functions, NULL)) {
+ msg_err("cannot emit ucl object: %s", strerror(errno));
+ ucl_object_emit_funcs_free(emitter_functions);
+ fclose(fp);
+ return FALSE;
+ }
+
+ (void) unlink(cfg->dynamic_conf);
+
+ /* Rename old config */
+ if (rename(pathbuf, cfg->dynamic_conf) == -1) {
+ msg_err("rename error: %s", strerror(errno));
+ fclose(fp);
+ ucl_object_emit_funcs_free(emitter_functions);
+ unlink(pathbuf);
+
+ return FALSE;
+ }
+ /* Set permissions */
+
+ if (chmod(cfg->dynamic_conf, st.st_mode) == -1) {
+ msg_warn("chmod failed: %s", strerror(errno));
+ }
+
+ fclose(fp);
+ ucl_object_emit_funcs_free(emitter_functions);
+
+ return TRUE;
+}
+
+static ucl_object_t *
+new_dynamic_metric(const gchar *metric_name, ucl_object_t *top)
+{
+ ucl_object_t *metric;
+
+ metric = ucl_object_typed_new(UCL_OBJECT);
+
+ ucl_object_insert_key(metric, ucl_object_fromstring(metric_name),
+ "metric", sizeof("metric") - 1, true);
+ ucl_object_insert_key(metric, ucl_object_typed_new(UCL_ARRAY),
+ "actions", sizeof("actions") - 1, false);
+ ucl_object_insert_key(metric, ucl_object_typed_new(UCL_ARRAY),
+ "symbols", sizeof("symbols") - 1, false);
+
+ ucl_array_append(top, metric);
+
+ return metric;
+}
+
+static ucl_object_t *
+dynamic_metric_find_elt(const ucl_object_t *arr, const gchar *name)
+{
+ ucl_object_iter_t it = NULL;
+ const ucl_object_t *cur, *n;
+
+ it = ucl_object_iterate_new(arr);
+
+ while ((cur = ucl_object_iterate_safe(it, true)) != NULL) {
+ if (cur->type == UCL_OBJECT) {
+ n = ucl_object_lookup(cur, "name");
+ if (n && n->type == UCL_STRING &&
+ strcmp(name, ucl_object_tostring(n)) == 0) {
+ ucl_object_iterate_free(it);
+
+ return (ucl_object_t *) ucl_object_lookup(cur, "value");
+ }
+ }
+ }
+
+ ucl_object_iterate_free(it);
+
+ return NULL;
+}
+
+static ucl_object_t *
+dynamic_metric_find_metric(const ucl_object_t *arr, const gchar *metric)
+{
+ ucl_object_iter_t it = NULL;
+ const ucl_object_t *cur, *n;
+
+ it = ucl_object_iterate_new(arr);
+
+ while ((cur = ucl_object_iterate_safe(it, true)) != NULL) {
+ if (cur->type == UCL_OBJECT) {
+ n = ucl_object_lookup(cur, "metric");
+ if (n && n->type == UCL_STRING &&
+ strcmp(metric, ucl_object_tostring(n)) == 0) {
+ ucl_object_iterate_free(it);
+
+ return (ucl_object_t *) cur;
+ }
+ }
+ }
+
+ ucl_object_iterate_free(it);
+
+ return NULL;
+}
+
+static ucl_object_t *
+new_dynamic_elt(ucl_object_t *arr, const gchar *name, gdouble value)
+{
+ ucl_object_t *n;
+
+ n = ucl_object_typed_new(UCL_OBJECT);
+ ucl_object_insert_key(n, ucl_object_fromstring(name), "name",
+ sizeof("name") - 1, false);
+ ucl_object_insert_key(n, ucl_object_fromdouble(value), "value",
+ sizeof("value") - 1, false);
+
+ ucl_array_append(arr, n);
+
+ return n;
+}
+
+static gint
+rspamd_maybe_add_lua_dynsym(struct rspamd_config *cfg,
+ const gchar *sym,
+ gdouble score)
+{
+ lua_State *L = cfg->lua_state;
+ gint ret = -1;
+ struct rspamd_config **pcfg;
+
+ lua_getglobal(L, "rspamd_plugins");
+ if (lua_type(L, -1) == LUA_TTABLE) {
+ lua_pushstring(L, "dynamic_conf");
+ lua_gettable(L, -2);
+
+ if (lua_type(L, -1) == LUA_TTABLE) {
+ lua_pushstring(L, "add_symbol");
+ lua_gettable(L, -2);
+
+ if (lua_type(L, -1) == LUA_TFUNCTION) {
+ pcfg = lua_newuserdata(L, sizeof(*pcfg));
+ *pcfg = cfg;
+ rspamd_lua_setclass(L, "rspamd{config}", -1);
+ lua_pushstring(L, sym);
+ lua_pushnumber(L, score);
+
+ if (lua_pcall(L, 3, 1, 0) != 0) {
+ msg_err_config("cannot execute add_symbol script: %s",
+ lua_tostring(L, -1));
+ }
+ else {
+ ret = lua_toboolean(L, -1);
+ }
+
+ lua_pop(L, 1);
+ }
+ else {
+ lua_pop(L, 1);
+ }
+ }
+
+ lua_pop(L, 1);
+ }
+
+ lua_pop(L, 1);
+
+ return ret;
+}
+
+static gint
+rspamd_maybe_add_lua_dynact(struct rspamd_config *cfg,
+ const gchar *action,
+ gdouble score)
+{
+ lua_State *L = cfg->lua_state;
+ gint ret = -1;
+ struct rspamd_config **pcfg;
+
+ lua_getglobal(L, "rspamd_plugins");
+ if (lua_type(L, -1) == LUA_TTABLE) {
+ lua_pushstring(L, "dynamic_conf");
+ lua_gettable(L, -2);
+
+ if (lua_type(L, -1) == LUA_TTABLE) {
+ lua_pushstring(L, "add_action");
+ lua_gettable(L, -2);
+
+ if (lua_type(L, -1) == LUA_TFUNCTION) {
+ pcfg = lua_newuserdata(L, sizeof(*pcfg));
+ *pcfg = cfg;
+ rspamd_lua_setclass(L, "rspamd{config}", -1);
+ lua_pushstring(L, action);
+ lua_pushnumber(L, score);
+
+ if (lua_pcall(L, 3, 1, 0) != 0) {
+ msg_err_config("cannot execute add_action script: %s",
+ lua_tostring(L, -1));
+ }
+ else {
+ ret = lua_toboolean(L, -1);
+ }
+
+ lua_pop(L, 1);
+ }
+ else {
+ lua_pop(L, 1);
+ }
+ }
+
+ lua_pop(L, 1);
+ }
+
+ lua_pop(L, 1);
+
+ return ret;
+}
+
+/**
+ * Add symbol for specified metric
+ * @param cfg config file object
+ * @param metric metric's name
+ * @param symbol symbol's name
+ * @param value value of symbol
+ * @return
+ */
+gboolean
+add_dynamic_symbol(struct rspamd_config *cfg,
+ const gchar *metric_name,
+ const gchar *symbol,
+ gdouble value)
+{
+ ucl_object_t *metric, *syms;
+ gint ret;
+
+ if ((ret = rspamd_maybe_add_lua_dynsym(cfg, symbol, value)) != -1) {
+ return ret == 0 ? FALSE : TRUE;
+ }
+
+ if (cfg->dynamic_conf == NULL) {
+ msg_info("dynamic conf is disabled");
+ return FALSE;
+ }
+
+ metric = dynamic_metric_find_metric(cfg->current_dynamic_conf,
+ metric_name);
+ if (metric == NULL) {
+ metric = new_dynamic_metric(metric_name, cfg->current_dynamic_conf);
+ }
+
+ syms = (ucl_object_t *) ucl_object_lookup(metric, "symbols");
+ if (syms != NULL) {
+ ucl_object_t *sym;
+
+ sym = dynamic_metric_find_elt(syms, symbol);
+ if (sym) {
+ sym->value.dv = value;
+ }
+ else {
+ new_dynamic_elt(syms, symbol, value);
+ }
+ }
+
+ apply_dynamic_conf(cfg->current_dynamic_conf, cfg);
+
+ return TRUE;
+}
+
+gboolean
+remove_dynamic_symbol(struct rspamd_config *cfg,
+ const gchar *metric_name,
+ const gchar *symbol)
+{
+ ucl_object_t *metric, *syms;
+ gboolean ret = FALSE;
+
+ if (cfg->dynamic_conf == NULL) {
+ msg_info("dynamic conf is disabled");
+ return FALSE;
+ }
+
+ metric = dynamic_metric_find_metric(cfg->current_dynamic_conf,
+ metric_name);
+ if (metric == NULL) {
+ return FALSE;
+ }
+
+ syms = (ucl_object_t *) ucl_object_lookup(metric, "symbols");
+ if (syms != NULL) {
+ ucl_object_t *sym;
+
+ sym = dynamic_metric_find_elt(syms, symbol);
+
+ if (sym) {
+ ret = ucl_array_delete((ucl_object_t *) syms, sym) != NULL;
+
+ if (ret) {
+ ucl_object_unref(sym);
+ }
+ }
+ }
+
+ if (ret) {
+ apply_dynamic_conf(cfg->current_dynamic_conf, cfg);
+ }
+
+ return ret;
+}
+
+
+/**
+ * Add action for specified metric
+ * @param cfg config file object
+ * @param metric metric's name
+ * @param action action's name
+ * @param value value of symbol
+ * @return
+ */
+gboolean
+add_dynamic_action(struct rspamd_config *cfg,
+ const gchar *metric_name,
+ guint action,
+ gdouble value)
+{
+ ucl_object_t *metric, *acts;
+ const gchar *action_name = rspamd_action_to_str(action);
+ gint ret;
+
+ if ((ret = rspamd_maybe_add_lua_dynact(cfg, action_name, value)) != -1) {
+ return ret == 0 ? FALSE : TRUE;
+ }
+
+ if (cfg->dynamic_conf == NULL) {
+ msg_info("dynamic conf is disabled");
+ return FALSE;
+ }
+
+ metric = dynamic_metric_find_metric(cfg->current_dynamic_conf,
+ metric_name);
+ if (metric == NULL) {
+ metric = new_dynamic_metric(metric_name, cfg->current_dynamic_conf);
+ }
+
+ acts = (ucl_object_t *) ucl_object_lookup(metric, "actions");
+ if (acts != NULL) {
+ ucl_object_t *act;
+
+ act = dynamic_metric_find_elt(acts, action_name);
+ if (act) {
+ act->value.dv = value;
+ }
+ else {
+ new_dynamic_elt(acts, action_name, value);
+ }
+ }
+
+ apply_dynamic_conf(cfg->current_dynamic_conf, cfg);
+
+ return TRUE;
+}
+
+gboolean
+remove_dynamic_action(struct rspamd_config *cfg,
+ const gchar *metric_name,
+ guint action)
+{
+ ucl_object_t *metric, *acts;
+ const gchar *action_name = rspamd_action_to_str(action);
+ gboolean ret = FALSE;
+
+ if (cfg->dynamic_conf == NULL) {
+ msg_info("dynamic conf is disabled");
+ return FALSE;
+ }
+
+ metric = dynamic_metric_find_metric(cfg->current_dynamic_conf,
+ metric_name);
+ if (metric == NULL) {
+ return FALSE;
+ }
+
+ acts = (ucl_object_t *) ucl_object_lookup(metric, "actions");
+
+ if (acts != NULL) {
+ ucl_object_t *act;
+
+ act = dynamic_metric_find_elt(acts, action_name);
+
+ if (act) {
+ ret = ucl_array_delete(acts, act) != NULL;
+ }
+ if (ret) {
+ ucl_object_unref(act);
+ }
+ }
+
+ if (ret) {
+ apply_dynamic_conf(cfg->current_dynamic_conf, cfg);
+ }
+
+ return ret;
+}
diff --git a/src/libserver/dynamic_cfg.h b/src/libserver/dynamic_cfg.h
new file mode 100644
index 0000000..bb386ca
--- /dev/null
+++ b/src/libserver/dynamic_cfg.h
@@ -0,0 +1,81 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef DYNAMIC_CFG_H_
+#define DYNAMIC_CFG_H_
+
+#include "config.h"
+#include "cfg_file.h"
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Init dynamic configuration using map logic and specific configuration
+ * @param cfg config file
+ */
+void init_dynamic_config(struct rspamd_config *cfg);
+
+/**
+ * Dump dynamic configuration to the disk
+ * @param cfg
+ * @return
+ */
+gboolean dump_dynamic_config(struct rspamd_config *cfg);
+
+/**
+ * Add symbol for specified metric
+ * @param cfg config file object
+ * @param metric metric's name
+ * @param symbol symbol's name
+ * @param value value of symbol
+ * @return
+ */
+gboolean add_dynamic_symbol(struct rspamd_config *cfg,
+ const gchar *metric,
+ const gchar *symbol,
+ gdouble value);
+
+gboolean remove_dynamic_symbol(struct rspamd_config *cfg,
+ const gchar *metric,
+ const gchar *symbol);
+
+/**
+ * Add action for specified metric
+ * @param cfg config file object
+ * @param metric metric's name
+ * @param action action's name
+ * @param value value of symbol
+ * @return
+ */
+gboolean add_dynamic_action(struct rspamd_config *cfg,
+ const gchar *metric,
+ guint action,
+ gdouble value);
+
+/**
+ * Removes dynamic action
+ */
+gboolean remove_dynamic_action(struct rspamd_config *cfg,
+ const gchar *metric,
+ guint action);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* DYNAMIC_CFG_H_ */
diff --git a/src/libserver/fuzzy_backend/fuzzy_backend.c b/src/libserver/fuzzy_backend/fuzzy_backend.c
new file mode 100644
index 0000000..9099f38
--- /dev/null
+++ b/src/libserver/fuzzy_backend/fuzzy_backend.c
@@ -0,0 +1,560 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "config.h"
+#include "fuzzy_backend.h"
+#include "fuzzy_backend_sqlite.h"
+#include "fuzzy_backend_redis.h"
+#include "cfg_file.h"
+#include "fuzzy_wire.h"
+
+#define DEFAULT_EXPIRE 172800L
+
+enum rspamd_fuzzy_backend_type {
+ RSPAMD_FUZZY_BACKEND_SQLITE = 0,
+ RSPAMD_FUZZY_BACKEND_REDIS = 1,
+};
+
+static void *rspamd_fuzzy_backend_init_sqlite(struct rspamd_fuzzy_backend *bk,
+ const ucl_object_t *obj, struct rspamd_config *cfg, GError **err);
+static void rspamd_fuzzy_backend_check_sqlite(struct rspamd_fuzzy_backend *bk,
+ const struct rspamd_fuzzy_cmd *cmd,
+ rspamd_fuzzy_check_cb cb, void *ud,
+ void *subr_ud);
+static void rspamd_fuzzy_backend_update_sqlite(struct rspamd_fuzzy_backend *bk,
+ GArray *updates, const gchar *src,
+ rspamd_fuzzy_update_cb cb, void *ud,
+ void *subr_ud);
+static void rspamd_fuzzy_backend_count_sqlite(struct rspamd_fuzzy_backend *bk,
+ rspamd_fuzzy_count_cb cb, void *ud,
+ void *subr_ud);
+static void rspamd_fuzzy_backend_version_sqlite(struct rspamd_fuzzy_backend *bk,
+ const gchar *src,
+ rspamd_fuzzy_version_cb cb, void *ud,
+ void *subr_ud);
+static const gchar *rspamd_fuzzy_backend_id_sqlite(struct rspamd_fuzzy_backend *bk,
+ void *subr_ud);
+static void rspamd_fuzzy_backend_expire_sqlite(struct rspamd_fuzzy_backend *bk,
+ void *subr_ud);
+static void rspamd_fuzzy_backend_close_sqlite(struct rspamd_fuzzy_backend *bk,
+ void *subr_ud);
+
+struct rspamd_fuzzy_backend_subr {
+ void *(*init)(struct rspamd_fuzzy_backend *bk, const ucl_object_t *obj,
+ struct rspamd_config *cfg,
+ GError **err);
+ void (*check)(struct rspamd_fuzzy_backend *bk,
+ const struct rspamd_fuzzy_cmd *cmd,
+ rspamd_fuzzy_check_cb cb, void *ud,
+ void *subr_ud);
+ void (*update)(struct rspamd_fuzzy_backend *bk,
+ GArray *updates, const gchar *src,
+ rspamd_fuzzy_update_cb cb, void *ud,
+ void *subr_ud);
+ void (*count)(struct rspamd_fuzzy_backend *bk,
+ rspamd_fuzzy_count_cb cb, void *ud,
+ void *subr_ud);
+ void (*version)(struct rspamd_fuzzy_backend *bk,
+ const gchar *src,
+ rspamd_fuzzy_version_cb cb, void *ud,
+ void *subr_ud);
+ const gchar *(*id)(struct rspamd_fuzzy_backend *bk, void *subr_ud);
+ void (*periodic)(struct rspamd_fuzzy_backend *bk, void *subr_ud);
+ void (*close)(struct rspamd_fuzzy_backend *bk, void *subr_ud);
+};
+
+static const struct rspamd_fuzzy_backend_subr fuzzy_subrs[] = {
+ [RSPAMD_FUZZY_BACKEND_SQLITE] = {
+ .init = rspamd_fuzzy_backend_init_sqlite,
+ .check = rspamd_fuzzy_backend_check_sqlite,
+ .update = rspamd_fuzzy_backend_update_sqlite,
+ .count = rspamd_fuzzy_backend_count_sqlite,
+ .version = rspamd_fuzzy_backend_version_sqlite,
+ .id = rspamd_fuzzy_backend_id_sqlite,
+ .periodic = rspamd_fuzzy_backend_expire_sqlite,
+ .close = rspamd_fuzzy_backend_close_sqlite,
+ },
+ [RSPAMD_FUZZY_BACKEND_REDIS] = {
+ .init = rspamd_fuzzy_backend_init_redis,
+ .check = rspamd_fuzzy_backend_check_redis,
+ .update = rspamd_fuzzy_backend_update_redis,
+ .count = rspamd_fuzzy_backend_count_redis,
+ .version = rspamd_fuzzy_backend_version_redis,
+ .id = rspamd_fuzzy_backend_id_redis,
+ .periodic = rspamd_fuzzy_backend_expire_redis,
+ .close = rspamd_fuzzy_backend_close_redis,
+ }};
+
+struct rspamd_fuzzy_backend {
+ enum rspamd_fuzzy_backend_type type;
+ gdouble expire;
+ gdouble sync;
+ struct ev_loop *event_loop;
+ rspamd_fuzzy_periodic_cb periodic_cb;
+ void *periodic_ud;
+ const struct rspamd_fuzzy_backend_subr *subr;
+ void *subr_ud;
+ ev_timer periodic_event;
+};
+
+static GQuark
+rspamd_fuzzy_backend_quark(void)
+{
+ return g_quark_from_static_string("fuzzy-backend");
+}
+
+static void *
+rspamd_fuzzy_backend_init_sqlite(struct rspamd_fuzzy_backend *bk,
+ const ucl_object_t *obj, struct rspamd_config *cfg, GError **err)
+{
+ const ucl_object_t *elt;
+
+ elt = ucl_object_lookup_any(obj, "hashfile", "hash_file", "file",
+ "database", NULL);
+
+ if (elt == NULL || ucl_object_type(elt) != UCL_STRING) {
+ g_set_error(err, rspamd_fuzzy_backend_quark(),
+ EINVAL, "missing sqlite3 path");
+ return NULL;
+ }
+
+ return rspamd_fuzzy_backend_sqlite_open(ucl_object_tostring(elt),
+ FALSE, err);
+}
+
+static void
+rspamd_fuzzy_backend_check_sqlite(struct rspamd_fuzzy_backend *bk,
+ const struct rspamd_fuzzy_cmd *cmd,
+ rspamd_fuzzy_check_cb cb, void *ud,
+ void *subr_ud)
+{
+ struct rspamd_fuzzy_backend_sqlite *sq = subr_ud;
+ struct rspamd_fuzzy_reply rep;
+
+ rep = rspamd_fuzzy_backend_sqlite_check(sq, cmd, bk->expire);
+
+ if (cb) {
+ cb(&rep, ud);
+ }
+}
+
+static void
+rspamd_fuzzy_backend_update_sqlite(struct rspamd_fuzzy_backend *bk,
+ GArray *updates, const gchar *src,
+ rspamd_fuzzy_update_cb cb, void *ud,
+ void *subr_ud)
+{
+ struct rspamd_fuzzy_backend_sqlite *sq = subr_ud;
+ gboolean success = FALSE;
+ guint i;
+ struct fuzzy_peer_cmd *io_cmd;
+ struct rspamd_fuzzy_cmd *cmd;
+ gpointer ptr;
+ guint nupdates = 0, nadded = 0, ndeleted = 0, nextended = 0, nignored = 0;
+
+ if (rspamd_fuzzy_backend_sqlite_prepare_update(sq, src)) {
+ for (i = 0; i < updates->len; i++) {
+ io_cmd = &g_array_index(updates, struct fuzzy_peer_cmd, i);
+
+ if (io_cmd->is_shingle) {
+ cmd = &io_cmd->cmd.shingle.basic;
+ ptr = &io_cmd->cmd.shingle;
+ }
+ else {
+ cmd = &io_cmd->cmd.normal;
+ ptr = &io_cmd->cmd.normal;
+ }
+
+ if (cmd->cmd == FUZZY_WRITE) {
+ rspamd_fuzzy_backend_sqlite_add(sq, ptr);
+ nadded++;
+ nupdates++;
+ }
+ else if (cmd->cmd == FUZZY_DEL) {
+ rspamd_fuzzy_backend_sqlite_del(sq, ptr);
+ ndeleted++;
+ nupdates++;
+ }
+ else {
+ if (cmd->cmd == FUZZY_REFRESH) {
+ nextended++;
+ }
+ else {
+ nignored++;
+ }
+ }
+ }
+
+ if (rspamd_fuzzy_backend_sqlite_finish_update(sq, src,
+ nupdates > 0)) {
+ success = TRUE;
+ }
+ }
+
+ if (cb) {
+ cb(success, nadded, ndeleted, nextended, nignored, ud);
+ }
+}
+
+static void
+rspamd_fuzzy_backend_count_sqlite(struct rspamd_fuzzy_backend *bk,
+ rspamd_fuzzy_count_cb cb, void *ud,
+ void *subr_ud)
+{
+ struct rspamd_fuzzy_backend_sqlite *sq = subr_ud;
+ guint64 nhashes;
+
+ nhashes = rspamd_fuzzy_backend_sqlite_count(sq);
+
+ if (cb) {
+ cb(nhashes, ud);
+ }
+}
+
+static void
+rspamd_fuzzy_backend_version_sqlite(struct rspamd_fuzzy_backend *bk,
+ const gchar *src,
+ rspamd_fuzzy_version_cb cb, void *ud,
+ void *subr_ud)
+{
+ struct rspamd_fuzzy_backend_sqlite *sq = subr_ud;
+ guint64 rev;
+
+ rev = rspamd_fuzzy_backend_sqlite_version(sq, src);
+
+ if (cb) {
+ cb(rev, ud);
+ }
+}
+
+static const gchar *
+rspamd_fuzzy_backend_id_sqlite(struct rspamd_fuzzy_backend *bk,
+ void *subr_ud)
+{
+ struct rspamd_fuzzy_backend_sqlite *sq = subr_ud;
+
+ return rspamd_fuzzy_sqlite_backend_id(sq);
+}
+static void
+rspamd_fuzzy_backend_expire_sqlite(struct rspamd_fuzzy_backend *bk,
+ void *subr_ud)
+{
+ struct rspamd_fuzzy_backend_sqlite *sq = subr_ud;
+
+ rspamd_fuzzy_backend_sqlite_sync(sq, bk->expire, TRUE);
+}
+
+static void
+rspamd_fuzzy_backend_close_sqlite(struct rspamd_fuzzy_backend *bk,
+ void *subr_ud)
+{
+ struct rspamd_fuzzy_backend_sqlite *sq = subr_ud;
+
+ rspamd_fuzzy_backend_sqlite_close(sq);
+}
+
+
+struct rspamd_fuzzy_backend *
+rspamd_fuzzy_backend_create(struct ev_loop *ev_base,
+ const ucl_object_t *config,
+ struct rspamd_config *cfg,
+ GError **err)
+{
+ struct rspamd_fuzzy_backend *bk;
+ enum rspamd_fuzzy_backend_type type = RSPAMD_FUZZY_BACKEND_SQLITE;
+ const ucl_object_t *elt;
+ gdouble expire = DEFAULT_EXPIRE;
+
+ if (config != NULL) {
+ elt = ucl_object_lookup(config, "backend");
+
+ if (elt != NULL && ucl_object_type(elt) == UCL_STRING) {
+ if (strcmp(ucl_object_tostring(elt), "sqlite") == 0) {
+ type = RSPAMD_FUZZY_BACKEND_SQLITE;
+ }
+ else if (strcmp(ucl_object_tostring(elt), "redis") == 0) {
+ type = RSPAMD_FUZZY_BACKEND_REDIS;
+ }
+ else {
+ g_set_error(err, rspamd_fuzzy_backend_quark(),
+ EINVAL, "invalid backend type: %s",
+ ucl_object_tostring(elt));
+ return NULL;
+ }
+ }
+
+ elt = ucl_object_lookup(config, "expire");
+
+ if (elt != NULL) {
+ expire = ucl_object_todouble(elt);
+ }
+ }
+
+ bk = g_malloc0(sizeof(*bk));
+ bk->event_loop = ev_base;
+ bk->expire = expire;
+ bk->type = type;
+ bk->subr = &fuzzy_subrs[type];
+
+ if ((bk->subr_ud = bk->subr->init(bk, config, cfg, err)) == NULL) {
+ g_free(bk);
+
+ return NULL;
+ }
+
+ return bk;
+}
+
+
+void rspamd_fuzzy_backend_check(struct rspamd_fuzzy_backend *bk,
+ const struct rspamd_fuzzy_cmd *cmd,
+ rspamd_fuzzy_check_cb cb, void *ud)
+{
+ g_assert(bk != NULL);
+
+ bk->subr->check(bk, cmd, cb, ud, bk->subr_ud);
+}
+
+static guint
+rspamd_fuzzy_digest_hash(gconstpointer key)
+{
+ guint ret;
+
+ /* Distributed uniformly already */
+ memcpy(&ret, key, sizeof(ret));
+
+ return ret;
+}
+
+static gboolean
+rspamd_fuzzy_digest_equal(gconstpointer v, gconstpointer v2)
+{
+ return memcmp(v, v2, rspamd_cryptobox_HASHBYTES) == 0;
+}
+
+static void
+rspamd_fuzzy_backend_deduplicate_queue(GArray *updates)
+{
+ GHashTable *seen = g_hash_table_new(rspamd_fuzzy_digest_hash,
+ rspamd_fuzzy_digest_equal);
+ struct fuzzy_peer_cmd *io_cmd, *found;
+ struct rspamd_fuzzy_cmd *cmd;
+ guchar *digest;
+ guint i;
+
+ for (i = 0; i < updates->len; i++) {
+ io_cmd = &g_array_index(updates, struct fuzzy_peer_cmd, i);
+
+ if (io_cmd->is_shingle) {
+ cmd = &io_cmd->cmd.shingle.basic;
+ }
+ else {
+ cmd = &io_cmd->cmd.normal;
+ }
+
+ digest = cmd->digest;
+
+ found = g_hash_table_lookup(seen, digest);
+
+ if (found == NULL) {
+ /* Add to the seen list, if not a duplicate (huh?) */
+ if (cmd->cmd != FUZZY_DUP) {
+ g_hash_table_insert(seen, digest, io_cmd);
+ }
+ }
+ else {
+ if (found->cmd.normal.flag != cmd->flag) {
+ /* TODO: deal with flags better at some point */
+ continue;
+ }
+
+ /* Apply heuristic */
+ switch (cmd->cmd) {
+ case FUZZY_WRITE:
+ if (found->cmd.normal.cmd == FUZZY_WRITE) {
+ /* Already seen */
+ found->cmd.normal.value += cmd->value;
+ cmd->cmd = FUZZY_DUP; /* Ignore this one */
+ }
+ else if (found->cmd.normal.cmd == FUZZY_REFRESH) {
+ /* Seen refresh command, remove it as write has higher priority */
+ g_hash_table_replace(seen, digest, io_cmd);
+ found->cmd.normal.cmd = FUZZY_DUP;
+ }
+ else if (found->cmd.normal.cmd == FUZZY_DEL) {
+ /* Request delete + add, weird, but ignore add */
+ cmd->cmd = FUZZY_DUP; /* Ignore this one */
+ }
+ break;
+ case FUZZY_REFRESH:
+ if (found->cmd.normal.cmd == FUZZY_WRITE) {
+ /* No need to expire, handled by addition */
+ cmd->cmd = FUZZY_DUP; /* Ignore this one */
+ }
+ else if (found->cmd.normal.cmd == FUZZY_DEL) {
+ /* Request delete + expire, ignore expire */
+ cmd->cmd = FUZZY_DUP; /* Ignore this one */
+ }
+ else if (found->cmd.normal.cmd == FUZZY_REFRESH) {
+ /* Already handled */
+ cmd->cmd = FUZZY_DUP; /* Ignore this one */
+ }
+ break;
+ case FUZZY_DEL:
+ /* Delete has priority over all other commands */
+ g_hash_table_replace(seen, digest, io_cmd);
+ found->cmd.normal.cmd = FUZZY_DUP;
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
+ g_hash_table_unref(seen);
+}
+
+void rspamd_fuzzy_backend_process_updates(struct rspamd_fuzzy_backend *bk,
+ GArray *updates, const gchar *src, rspamd_fuzzy_update_cb cb,
+ void *ud)
+{
+ g_assert(bk != NULL);
+ g_assert(updates != NULL);
+
+ if (updates) {
+ rspamd_fuzzy_backend_deduplicate_queue(updates);
+ bk->subr->update(bk, updates, src, cb, ud, bk->subr_ud);
+ }
+ else if (cb) {
+ cb(TRUE, 0, 0, 0, 0, ud);
+ }
+}
+
+
+void rspamd_fuzzy_backend_count(struct rspamd_fuzzy_backend *bk,
+ rspamd_fuzzy_count_cb cb, void *ud)
+{
+ g_assert(bk != NULL);
+
+ bk->subr->count(bk, cb, ud, bk->subr_ud);
+}
+
+
+void rspamd_fuzzy_backend_version(struct rspamd_fuzzy_backend *bk,
+ const gchar *src,
+ rspamd_fuzzy_version_cb cb, void *ud)
+{
+ g_assert(bk != NULL);
+
+ bk->subr->version(bk, src, cb, ud, bk->subr_ud);
+}
+
+const gchar *
+rspamd_fuzzy_backend_id(struct rspamd_fuzzy_backend *bk)
+{
+ g_assert(bk != NULL);
+
+ if (bk->subr->id) {
+ return bk->subr->id(bk, bk->subr_ud);
+ }
+
+ return NULL;
+}
+
+static inline void
+rspamd_fuzzy_backend_periodic_sync(struct rspamd_fuzzy_backend *bk)
+{
+ if (bk->periodic_cb) {
+ if (bk->periodic_cb(bk->periodic_ud)) {
+ if (bk->subr->periodic) {
+ bk->subr->periodic(bk, bk->subr_ud);
+ }
+ }
+ }
+ else {
+ if (bk->subr->periodic) {
+ bk->subr->periodic(bk, bk->subr_ud);
+ }
+ }
+}
+
+static void
+rspamd_fuzzy_backend_periodic_cb(EV_P_ ev_timer *w, int revents)
+{
+ struct rspamd_fuzzy_backend *bk = (struct rspamd_fuzzy_backend *) w->data;
+ gdouble jittered;
+
+ jittered = rspamd_time_jitter(bk->sync, bk->sync / 2.0);
+ w->repeat = jittered;
+ rspamd_fuzzy_backend_periodic_sync(bk);
+ ev_timer_again(EV_A_ w);
+}
+
+void rspamd_fuzzy_backend_start_update(struct rspamd_fuzzy_backend *bk,
+ gdouble timeout,
+ rspamd_fuzzy_periodic_cb cb,
+ void *ud)
+{
+ gdouble jittered;
+
+ g_assert(bk != NULL);
+
+ if (bk->subr->periodic) {
+ if (bk->sync > 0.0) {
+ ev_timer_stop(bk->event_loop, &bk->periodic_event);
+ }
+
+ if (cb) {
+ bk->periodic_cb = cb;
+ bk->periodic_ud = ud;
+ }
+
+ rspamd_fuzzy_backend_periodic_sync(bk);
+ bk->sync = timeout;
+ jittered = rspamd_time_jitter(timeout, timeout / 2.0);
+
+ bk->periodic_event.data = bk;
+ ev_timer_init(&bk->periodic_event, rspamd_fuzzy_backend_periodic_cb,
+ jittered, 0.0);
+ ev_timer_start(bk->event_loop, &bk->periodic_event);
+ }
+}
+
+void rspamd_fuzzy_backend_close(struct rspamd_fuzzy_backend *bk)
+{
+ g_assert(bk != NULL);
+
+ if (bk->sync > 0.0) {
+ rspamd_fuzzy_backend_periodic_sync(bk);
+ ev_timer_stop(bk->event_loop, &bk->periodic_event);
+ }
+
+ bk->subr->close(bk, bk->subr_ud);
+
+ g_free(bk);
+}
+
+struct ev_loop *
+rspamd_fuzzy_backend_event_base(struct rspamd_fuzzy_backend *backend)
+{
+ return backend->event_loop;
+}
+
+gdouble
+rspamd_fuzzy_backend_get_expire(struct rspamd_fuzzy_backend *backend)
+{
+ return backend->expire;
+}
diff --git a/src/libserver/fuzzy_backend/fuzzy_backend.h b/src/libserver/fuzzy_backend/fuzzy_backend.h
new file mode 100644
index 0000000..a1b74bc
--- /dev/null
+++ b/src/libserver/fuzzy_backend/fuzzy_backend.h
@@ -0,0 +1,131 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef SRC_LIBSERVER_FUZZY_BACKEND_H_
+#define SRC_LIBSERVER_FUZZY_BACKEND_H_
+
+#include "config.h"
+#include "contrib/libev/ev.h"
+#include "fuzzy_wire.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct rspamd_fuzzy_backend;
+struct rspamd_config;
+
+/*
+ * Callbacks for fuzzy methods
+ */
+typedef void (*rspamd_fuzzy_check_cb)(struct rspamd_fuzzy_reply *rep, void *ud);
+
+typedef void (*rspamd_fuzzy_update_cb)(gboolean success,
+ guint nadded,
+ guint ndeleted,
+ guint nextended,
+ guint nignored,
+ void *ud);
+
+typedef void (*rspamd_fuzzy_version_cb)(guint64 rev, void *ud);
+
+typedef void (*rspamd_fuzzy_count_cb)(guint64 count, void *ud);
+
+typedef gboolean (*rspamd_fuzzy_periodic_cb)(void *ud);
+
+/**
+ * Open fuzzy backend
+ * @param ev_base
+ * @param config
+ * @param err
+ * @return
+ */
+struct rspamd_fuzzy_backend *rspamd_fuzzy_backend_create(struct ev_loop *ev_base,
+ const ucl_object_t *config,
+ struct rspamd_config *cfg,
+ GError **err);
+
+
+/**
+ * Check a specific hash in storage
+ * @param cmd
+ * @param cb
+ * @param ud
+ */
+void rspamd_fuzzy_backend_check(struct rspamd_fuzzy_backend *bk,
+ const struct rspamd_fuzzy_cmd *cmd,
+ rspamd_fuzzy_check_cb cb, void *ud);
+
+/**
+ * Process updates for a specific queue
+ * @param bk
+ * @param updates queue of struct fuzzy_peer_cmd
+ * @param src
+ */
+void rspamd_fuzzy_backend_process_updates(struct rspamd_fuzzy_backend *bk,
+ GArray *updates, const gchar *src, rspamd_fuzzy_update_cb cb,
+ void *ud);
+
+/**
+ * Gets number of hashes from the backend
+ * @param bk
+ * @param cb
+ * @param ud
+ */
+void rspamd_fuzzy_backend_count(struct rspamd_fuzzy_backend *bk,
+ rspamd_fuzzy_count_cb cb, void *ud);
+
+/**
+ * Returns number of revision for a specific source
+ * @param bk
+ * @param src
+ * @param cb
+ * @param ud
+ */
+void rspamd_fuzzy_backend_version(struct rspamd_fuzzy_backend *bk,
+ const gchar *src,
+ rspamd_fuzzy_version_cb cb, void *ud);
+
+/**
+ * Returns unique id for backend
+ * @param backend
+ * @return
+ */
+const gchar *rspamd_fuzzy_backend_id(struct rspamd_fuzzy_backend *backend);
+
+/**
+ * Starts expire process for the backend
+ * @param backend
+ */
+void rspamd_fuzzy_backend_start_update(struct rspamd_fuzzy_backend *backend,
+ gdouble timeout,
+ rspamd_fuzzy_periodic_cb cb,
+ void *ud);
+
+struct ev_loop *rspamd_fuzzy_backend_event_base(struct rspamd_fuzzy_backend *backend);
+
+gdouble rspamd_fuzzy_backend_get_expire(struct rspamd_fuzzy_backend *backend);
+
+/**
+ * Closes backend
+ * @param backend
+ */
+void rspamd_fuzzy_backend_close(struct rspamd_fuzzy_backend *backend);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SRC_LIBSERVER_FUZZY_BACKEND_H_ */
diff --git a/src/libserver/fuzzy_backend/fuzzy_backend_redis.c b/src/libserver/fuzzy_backend/fuzzy_backend_redis.c
new file mode 100644
index 0000000..7ab7ca6
--- /dev/null
+++ b/src/libserver/fuzzy_backend/fuzzy_backend_redis.c
@@ -0,0 +1,1666 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "config.h"
+#include "ref.h"
+#include "fuzzy_backend.h"
+#include "fuzzy_backend_redis.h"
+#include "redis_pool.h"
+#include "cryptobox.h"
+#include "str_util.h"
+#include "upstream.h"
+#include "contrib/hiredis/hiredis.h"
+#include "contrib/hiredis/async.h"
+#include "lua/lua_common.h"
+
+#define REDIS_DEFAULT_PORT 6379
+#define REDIS_DEFAULT_OBJECT "fuzzy"
+#define REDIS_DEFAULT_TIMEOUT 2.0
+
+#define msg_err_redis_session(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL, \
+ "fuzzy_redis", session->backend->id, \
+ G_STRFUNC, \
+ __VA_ARGS__)
+#define msg_warn_redis_session(...) rspamd_default_log_function(G_LOG_LEVEL_WARNING, \
+ "fuzzy_redis", session->backend->id, \
+ G_STRFUNC, \
+ __VA_ARGS__)
+#define msg_info_redis_session(...) rspamd_default_log_function(G_LOG_LEVEL_INFO, \
+ "fuzzy_redis", session->backend->id, \
+ G_STRFUNC, \
+ __VA_ARGS__)
+#define msg_debug_redis_session(...) rspamd_conditional_debug_fast(NULL, NULL, \
+ rspamd_fuzzy_redis_log_id, "fuzzy_redis", session->backend->id, \
+ G_STRFUNC, \
+ __VA_ARGS__)
+
+INIT_LOG_MODULE(fuzzy_redis)
+
+struct rspamd_fuzzy_backend_redis {
+ lua_State *L;
+ const gchar *redis_object;
+ const gchar *username;
+ const gchar *password;
+ const gchar *dbname;
+ gchar *id;
+ struct rspamd_redis_pool *pool;
+ gdouble timeout;
+ gint conf_ref;
+ bool terminated;
+ ref_entry_t ref;
+};
+
+enum rspamd_fuzzy_redis_command {
+ RSPAMD_FUZZY_REDIS_COMMAND_COUNT,
+ RSPAMD_FUZZY_REDIS_COMMAND_VERSION,
+ RSPAMD_FUZZY_REDIS_COMMAND_UPDATES,
+ RSPAMD_FUZZY_REDIS_COMMAND_CHECK
+};
+
+struct rspamd_fuzzy_redis_session {
+ struct rspamd_fuzzy_backend_redis *backend;
+ redisAsyncContext *ctx;
+ ev_timer timeout;
+ const struct rspamd_fuzzy_cmd *cmd;
+ struct ev_loop *event_loop;
+ float prob;
+ gboolean shingles_checked;
+
+ enum rspamd_fuzzy_redis_command command;
+ guint nargs;
+
+ guint nadded;
+ guint ndeleted;
+ guint nextended;
+ guint nignored;
+
+ union {
+ rspamd_fuzzy_check_cb cb_check;
+ rspamd_fuzzy_update_cb cb_update;
+ rspamd_fuzzy_version_cb cb_version;
+ rspamd_fuzzy_count_cb cb_count;
+ } callback;
+ void *cbdata;
+
+ gchar **argv;
+ gsize *argv_lens;
+ struct upstream *up;
+ guchar found_digest[rspamd_cryptobox_HASHBYTES];
+};
+
+static inline struct upstream_list *
+rspamd_redis_get_servers(struct rspamd_fuzzy_backend_redis *ctx,
+ const gchar *what)
+{
+ lua_State *L = ctx->L;
+ struct upstream_list *res = NULL;
+
+ lua_rawgeti(L, LUA_REGISTRYINDEX, ctx->conf_ref);
+ lua_pushstring(L, what);
+ lua_gettable(L, -2);
+
+ if (lua_type(L, -1) == LUA_TUSERDATA) {
+ res = *((struct upstream_list **) lua_touserdata(L, -1));
+ }
+ else {
+ struct lua_logger_trace tr;
+ gchar outbuf[8192];
+
+ memset(&tr, 0, sizeof(tr));
+ lua_logger_out_type(L, -2, outbuf, sizeof(outbuf) - 1, &tr,
+ LUA_ESCAPE_UNPRINTABLE);
+
+ msg_err("cannot get %s upstreams for Redis fuzzy storage %s; table content: %s",
+ what, ctx->id, outbuf);
+ }
+
+ lua_settop(L, 0);
+
+ return res;
+}
+
+static inline void
+rspamd_fuzzy_redis_session_free_args(struct rspamd_fuzzy_redis_session *session)
+{
+ guint i;
+
+ if (session->argv) {
+ for (i = 0; i < session->nargs; i++) {
+ g_free(session->argv[i]);
+ }
+
+ g_free(session->argv);
+ g_free(session->argv_lens);
+ }
+}
+static void
+rspamd_fuzzy_redis_session_dtor(struct rspamd_fuzzy_redis_session *session,
+ gboolean is_fatal)
+{
+ redisAsyncContext *ac;
+
+
+ if (session->ctx) {
+ ac = session->ctx;
+ session->ctx = NULL;
+ rspamd_redis_pool_release_connection(session->backend->pool,
+ ac,
+ is_fatal ? RSPAMD_REDIS_RELEASE_FATAL : RSPAMD_REDIS_RELEASE_DEFAULT);
+ }
+
+ ev_timer_stop(session->event_loop, &session->timeout);
+ rspamd_fuzzy_redis_session_free_args(session);
+
+ REF_RELEASE(session->backend);
+ rspamd_upstream_unref(session->up);
+ g_free(session);
+}
+
+static void
+rspamd_fuzzy_backend_redis_dtor(struct rspamd_fuzzy_backend_redis *backend)
+{
+ if (!backend->terminated && backend->conf_ref != -1) {
+ luaL_unref(backend->L, LUA_REGISTRYINDEX, backend->conf_ref);
+ }
+
+ if (backend->id) {
+ g_free(backend->id);
+ }
+
+ g_free(backend);
+}
+
+void *
+rspamd_fuzzy_backend_init_redis(struct rspamd_fuzzy_backend *bk,
+ const ucl_object_t *obj, struct rspamd_config *cfg, GError **err)
+{
+ struct rspamd_fuzzy_backend_redis *backend;
+ const ucl_object_t *elt;
+ gboolean ret = FALSE;
+ guchar id_hash[rspamd_cryptobox_HASHBYTES];
+ rspamd_cryptobox_hash_state_t st;
+ lua_State *L = (lua_State *) cfg->lua_state;
+ gint conf_ref = -1;
+
+ backend = g_malloc0(sizeof(*backend));
+
+ backend->timeout = REDIS_DEFAULT_TIMEOUT;
+ backend->redis_object = REDIS_DEFAULT_OBJECT;
+ backend->L = L;
+
+ ret = rspamd_lua_try_load_redis(L, obj, cfg, &conf_ref);
+
+ /* Now try global redis settings */
+ if (!ret) {
+ elt = ucl_object_lookup(cfg->cfg_ucl_obj, "redis");
+
+ if (elt) {
+ const ucl_object_t *specific_obj;
+
+ specific_obj = ucl_object_lookup_any(elt, "fuzzy", "fuzzy_storage",
+ NULL);
+
+ if (specific_obj) {
+ ret = rspamd_lua_try_load_redis(L, specific_obj, cfg, &conf_ref);
+ }
+ else {
+ ret = rspamd_lua_try_load_redis(L, elt, cfg, &conf_ref);
+ }
+ }
+ }
+
+ if (!ret) {
+ msg_err_config("cannot init redis backend for fuzzy storage");
+ g_free(backend);
+
+ return NULL;
+ }
+
+ elt = ucl_object_lookup(obj, "prefix");
+ if (elt == NULL || ucl_object_type(elt) != UCL_STRING) {
+ backend->redis_object = REDIS_DEFAULT_OBJECT;
+ }
+ else {
+ backend->redis_object = ucl_object_tostring(elt);
+ }
+
+ backend->conf_ref = conf_ref;
+
+ /* Check some common table values */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, conf_ref);
+
+ lua_pushstring(L, "timeout");
+ lua_gettable(L, -2);
+ if (lua_type(L, -1) == LUA_TNUMBER) {
+ backend->timeout = lua_tonumber(L, -1);
+ }
+ lua_pop(L, 1);
+
+ lua_pushstring(L, "db");
+ lua_gettable(L, -2);
+ if (lua_type(L, -1) == LUA_TSTRING) {
+ backend->dbname = rspamd_mempool_strdup(cfg->cfg_pool,
+ lua_tostring(L, -1));
+ }
+ lua_pop(L, 1);
+
+ lua_pushstring(L, "username");
+ lua_gettable(L, -2);
+ if (lua_type(L, -1) == LUA_TSTRING) {
+ backend->username = rspamd_mempool_strdup(cfg->cfg_pool,
+ lua_tostring(L, -1));
+ }
+ lua_pop(L, 1);
+
+ lua_pushstring(L, "password");
+ lua_gettable(L, -2);
+ if (lua_type(L, -1) == LUA_TSTRING) {
+ backend->password = rspamd_mempool_strdup(cfg->cfg_pool,
+ lua_tostring(L, -1));
+ }
+ lua_pop(L, 1);
+
+ lua_settop(L, 0);
+
+ REF_INIT_RETAIN(backend, rspamd_fuzzy_backend_redis_dtor);
+ backend->pool = cfg->redis_pool;
+ rspamd_cryptobox_hash_init(&st, NULL, 0);
+ rspamd_cryptobox_hash_update(&st, backend->redis_object,
+ strlen(backend->redis_object));
+
+ if (backend->dbname) {
+ rspamd_cryptobox_hash_update(&st, backend->dbname,
+ strlen(backend->dbname));
+ }
+
+ if (backend->username) {
+ rspamd_cryptobox_hash_update(&st, backend->username,
+ strlen(backend->username));
+ }
+
+ if (backend->password) {
+ rspamd_cryptobox_hash_update(&st, backend->password,
+ strlen(backend->password));
+ }
+
+ rspamd_cryptobox_hash_final(&st, id_hash);
+ backend->id = rspamd_encode_base32(id_hash, sizeof(id_hash), RSPAMD_BASE32_DEFAULT);
+
+ return backend;
+}
+
+static void
+rspamd_fuzzy_redis_timeout(EV_P_ ev_timer *w, int revents)
+{
+ struct rspamd_fuzzy_redis_session *session =
+ (struct rspamd_fuzzy_redis_session *) w->data;
+ redisAsyncContext *ac;
+ static char errstr[128];
+
+ if (session->ctx) {
+ ac = session->ctx;
+ session->ctx = NULL;
+ ac->err = REDIS_ERR_IO;
+ /* Should be safe as in hiredis it is char[128] */
+ rspamd_snprintf(errstr, sizeof(errstr), "%s", strerror(ETIMEDOUT));
+ ac->errstr = errstr;
+
+ /* This will cause session closing */
+ rspamd_redis_pool_release_connection(session->backend->pool,
+ ac, RSPAMD_REDIS_RELEASE_FATAL);
+ }
+}
+
+static void rspamd_fuzzy_redis_check_callback(redisAsyncContext *c, gpointer r,
+ gpointer priv);
+
+struct _rspamd_fuzzy_shingles_helper {
+ guchar digest[64];
+ guint found;
+};
+
+static gint
+rspamd_fuzzy_backend_redis_shingles_cmp(const void *a, const void *b)
+{
+ const struct _rspamd_fuzzy_shingles_helper *sha = a,
+ *shb = b;
+
+ return memcmp(sha->digest, shb->digest, sizeof(sha->digest));
+}
+
+static void
+rspamd_fuzzy_redis_shingles_callback(redisAsyncContext *c, gpointer r,
+ gpointer priv)
+{
+ struct rspamd_fuzzy_redis_session *session = priv;
+ redisReply *reply = r, *cur;
+ struct rspamd_fuzzy_reply rep;
+ GString *key;
+ struct _rspamd_fuzzy_shingles_helper *shingles, *prev = NULL, *sel = NULL;
+ guint i, found = 0, max_found = 0, cur_found = 0;
+
+ ev_timer_stop(session->event_loop, &session->timeout);
+ memset(&rep, 0, sizeof(rep));
+
+ if (c->err == 0 && reply != NULL) {
+ rspamd_upstream_ok(session->up);
+
+ if (reply->type == REDIS_REPLY_ARRAY &&
+ reply->elements == RSPAMD_SHINGLE_SIZE) {
+ shingles = g_alloca(sizeof(struct _rspamd_fuzzy_shingles_helper) *
+ RSPAMD_SHINGLE_SIZE);
+
+ for (i = 0; i < RSPAMD_SHINGLE_SIZE; i++) {
+ cur = reply->element[i];
+
+ if (cur->type == REDIS_REPLY_STRING) {
+ shingles[i].found = 1;
+ memcpy(shingles[i].digest, cur->str, MIN(64, cur->len));
+ found++;
+ }
+ else {
+ memset(shingles[i].digest, 0, sizeof(shingles[i].digest));
+ shingles[i].found = 0;
+ }
+ }
+
+ if (found > RSPAMD_SHINGLE_SIZE / 2) {
+ /* Now sort to find the most frequent element */
+ qsort(shingles, RSPAMD_SHINGLE_SIZE,
+ sizeof(struct _rspamd_fuzzy_shingles_helper),
+ rspamd_fuzzy_backend_redis_shingles_cmp);
+
+ prev = &shingles[0];
+
+ for (i = 1; i < RSPAMD_SHINGLE_SIZE; i++) {
+ if (!shingles[i].found) {
+ continue;
+ }
+
+ if (memcmp(shingles[i].digest, prev->digest, 64) == 0) {
+ cur_found++;
+
+ if (cur_found > max_found) {
+ max_found = cur_found;
+ sel = &shingles[i];
+ }
+ }
+ else {
+ cur_found = 1;
+ prev = &shingles[i];
+ }
+ }
+
+ if (max_found > RSPAMD_SHINGLE_SIZE / 2) {
+ session->prob = ((float) max_found) / RSPAMD_SHINGLE_SIZE;
+ rep.v1.prob = session->prob;
+
+ g_assert(sel != NULL);
+
+ /* Prepare new check command */
+ rspamd_fuzzy_redis_session_free_args(session);
+ session->nargs = 5;
+ session->argv = g_malloc(sizeof(gchar *) * session->nargs);
+ session->argv_lens = g_malloc(sizeof(gsize) * session->nargs);
+
+ key = g_string_new(session->backend->redis_object);
+ g_string_append_len(key, sel->digest, sizeof(sel->digest));
+ session->argv[0] = g_strdup("HMGET");
+ session->argv_lens[0] = 5;
+ session->argv[1] = key->str;
+ session->argv_lens[1] = key->len;
+ session->argv[2] = g_strdup("V");
+ session->argv_lens[2] = 1;
+ session->argv[3] = g_strdup("F");
+ session->argv_lens[3] = 1;
+ session->argv[4] = g_strdup("C");
+ session->argv_lens[4] = 1;
+ g_string_free(key, FALSE); /* Do not free underlying array */
+ memcpy(session->found_digest, sel->digest,
+ sizeof(session->cmd->digest));
+
+ g_assert(session->ctx != NULL);
+ if (redisAsyncCommandArgv(session->ctx,
+ rspamd_fuzzy_redis_check_callback,
+ session, session->nargs,
+ (const gchar **) session->argv,
+ session->argv_lens) != REDIS_OK) {
+
+ if (session->callback.cb_check) {
+ memset(&rep, 0, sizeof(rep));
+ session->callback.cb_check(&rep, session->cbdata);
+ }
+
+ rspamd_fuzzy_redis_session_dtor(session, TRUE);
+ }
+ else {
+ /* Add timeout */
+ session->timeout.data = session;
+ ev_now_update_if_cheap((struct ev_loop *) session->event_loop);
+ ev_timer_init(&session->timeout,
+ rspamd_fuzzy_redis_timeout,
+ session->backend->timeout, 0.0);
+ ev_timer_start(session->event_loop, &session->timeout);
+ }
+
+ return;
+ }
+ }
+ }
+ else if (reply->type == REDIS_REPLY_ERROR) {
+ msg_err_redis_session("fuzzy backend redis error: \"%s\"",
+ reply->str);
+ }
+
+ if (session->callback.cb_check) {
+ session->callback.cb_check(&rep, session->cbdata);
+ }
+ }
+ else {
+ if (session->callback.cb_check) {
+ session->callback.cb_check(&rep, session->cbdata);
+ }
+
+ if (c->errstr) {
+ msg_err_redis_session("error getting shingles: %s", c->errstr);
+ rspamd_upstream_fail(session->up, FALSE, c->errstr);
+ }
+ }
+
+ rspamd_fuzzy_redis_session_dtor(session, FALSE);
+}
+
+static void
+rspamd_fuzzy_backend_check_shingles(struct rspamd_fuzzy_redis_session *session)
+{
+ struct rspamd_fuzzy_reply rep;
+ const struct rspamd_fuzzy_shingle_cmd *shcmd;
+ GString *key;
+ guint i, init_len;
+
+ rspamd_fuzzy_redis_session_free_args(session);
+ /* First of all check digest */
+ session->nargs = RSPAMD_SHINGLE_SIZE + 1;
+ session->argv = g_malloc(sizeof(gchar *) * session->nargs);
+ session->argv_lens = g_malloc(sizeof(gsize) * session->nargs);
+ shcmd = (const struct rspamd_fuzzy_shingle_cmd *) session->cmd;
+
+ session->argv[0] = g_strdup("MGET");
+ session->argv_lens[0] = 4;
+ init_len = strlen(session->backend->redis_object);
+
+ for (i = 0; i < RSPAMD_SHINGLE_SIZE; i++) {
+
+ key = g_string_sized_new(init_len + 2 + 2 + sizeof("18446744073709551616"));
+ rspamd_printf_gstring(key, "%s_%d_%uL", session->backend->redis_object,
+ i, shcmd->sgl.hashes[i]);
+ session->argv[i + 1] = key->str;
+ session->argv_lens[i + 1] = key->len;
+ g_string_free(key, FALSE); /* Do not free underlying array */
+ }
+
+ session->shingles_checked = TRUE;
+
+ g_assert(session->ctx != NULL);
+
+ if (redisAsyncCommandArgv(session->ctx, rspamd_fuzzy_redis_shingles_callback,
+ session, session->nargs,
+ (const gchar **) session->argv, session->argv_lens) != REDIS_OK) {
+ msg_err("cannot execute redis command on %s: %s",
+ rspamd_inet_address_to_string_pretty(rspamd_upstream_addr_cur(session->up)),
+ session->ctx->errstr);
+
+ if (session->callback.cb_check) {
+ memset(&rep, 0, sizeof(rep));
+ session->callback.cb_check(&rep, session->cbdata);
+ }
+
+ rspamd_fuzzy_redis_session_dtor(session, TRUE);
+ }
+ else {
+ /* Add timeout */
+ session->timeout.data = session;
+ ev_now_update_if_cheap((struct ev_loop *) session->event_loop);
+ ev_timer_init(&session->timeout,
+ rspamd_fuzzy_redis_timeout,
+ session->backend->timeout, 0.0);
+ ev_timer_start(session->event_loop, &session->timeout);
+ }
+}
+
+static void
+rspamd_fuzzy_redis_check_callback(redisAsyncContext *c, gpointer r,
+ gpointer priv)
+{
+ struct rspamd_fuzzy_redis_session *session = priv;
+ redisReply *reply = r, *cur;
+ struct rspamd_fuzzy_reply rep;
+ gulong value;
+ guint found_elts = 0;
+
+ ev_timer_stop(session->event_loop, &session->timeout);
+ memset(&rep, 0, sizeof(rep));
+
+ if (c->err == 0 && reply != NULL) {
+ rspamd_upstream_ok(session->up);
+
+ if (reply->type == REDIS_REPLY_ARRAY && reply->elements >= 2) {
+ cur = reply->element[0];
+
+ if (cur->type == REDIS_REPLY_STRING) {
+ value = strtoul(cur->str, NULL, 10);
+ rep.v1.value = value;
+ found_elts++;
+ }
+
+ cur = reply->element[1];
+
+ if (cur->type == REDIS_REPLY_STRING) {
+ value = strtoul(cur->str, NULL, 10);
+ rep.v1.flag = value;
+ found_elts++;
+ }
+
+ if (found_elts >= 2) {
+ rep.v1.prob = session->prob;
+ memcpy(rep.digest, session->found_digest, sizeof(rep.digest));
+ }
+
+ rep.ts = 0;
+
+ if (reply->elements > 2) {
+ cur = reply->element[2];
+
+ if (cur->type == REDIS_REPLY_STRING) {
+ rep.ts = strtoul(cur->str, NULL, 10);
+ }
+ }
+ }
+ else if (reply->type == REDIS_REPLY_ERROR) {
+ msg_err_redis_session("fuzzy backend redis error: \"%s\"",
+ reply->str);
+ }
+
+ if (found_elts < 2) {
+ if (session->cmd->shingles_count > 0 && !session->shingles_checked) {
+ /* We also need to check all shingles here */
+ rspamd_fuzzy_backend_check_shingles(session);
+ /* Do not free session */
+ return;
+ }
+ else {
+ if (session->callback.cb_check) {
+ session->callback.cb_check(&rep, session->cbdata);
+ }
+ }
+ }
+ else {
+ if (session->callback.cb_check) {
+ session->callback.cb_check(&rep, session->cbdata);
+ }
+ }
+ }
+ else {
+ if (session->callback.cb_check) {
+ session->callback.cb_check(&rep, session->cbdata);
+ }
+
+ if (c->errstr) {
+ msg_err_redis_session("error getting hashes on %s: %s",
+ rspamd_inet_address_to_string_pretty(rspamd_upstream_addr_cur(session->up)),
+ c->errstr);
+ rspamd_upstream_fail(session->up, FALSE, c->errstr);
+ }
+ }
+
+ rspamd_fuzzy_redis_session_dtor(session, FALSE);
+}
+
+void rspamd_fuzzy_backend_check_redis(struct rspamd_fuzzy_backend *bk,
+ const struct rspamd_fuzzy_cmd *cmd,
+ rspamd_fuzzy_check_cb cb, void *ud,
+ void *subr_ud)
+{
+ struct rspamd_fuzzy_backend_redis *backend = subr_ud;
+ struct rspamd_fuzzy_redis_session *session;
+ struct upstream *up;
+ struct upstream_list *ups;
+ rspamd_inet_addr_t *addr;
+ struct rspamd_fuzzy_reply rep;
+ GString *key;
+
+ g_assert(backend != NULL);
+
+ ups = rspamd_redis_get_servers(backend, "read_servers");
+ if (!ups) {
+ if (cb) {
+ memset(&rep, 0, sizeof(rep));
+ cb(&rep, ud);
+ }
+
+ return;
+ }
+
+ session = g_malloc0(sizeof(*session));
+ session->backend = backend;
+ REF_RETAIN(session->backend);
+
+ session->callback.cb_check = cb;
+ session->cbdata = ud;
+ session->command = RSPAMD_FUZZY_REDIS_COMMAND_CHECK;
+ session->cmd = cmd;
+ session->prob = 1.0;
+ memcpy(rep.digest, session->cmd->digest, sizeof(rep.digest));
+ memcpy(session->found_digest, session->cmd->digest, sizeof(rep.digest));
+ session->event_loop = rspamd_fuzzy_backend_event_base(bk);
+
+ /* First of all check digest */
+ session->nargs = 5;
+ session->argv = g_malloc(sizeof(gchar *) * session->nargs);
+ session->argv_lens = g_malloc(sizeof(gsize) * session->nargs);
+
+ key = g_string_new(backend->redis_object);
+ g_string_append_len(key, cmd->digest, sizeof(cmd->digest));
+ session->argv[0] = g_strdup("HMGET");
+ session->argv_lens[0] = 5;
+ session->argv[1] = key->str;
+ session->argv_lens[1] = key->len;
+ session->argv[2] = g_strdup("V");
+ session->argv_lens[2] = 1;
+ session->argv[3] = g_strdup("F");
+ session->argv_lens[3] = 1;
+ session->argv[4] = g_strdup("C");
+ session->argv_lens[4] = 1;
+ g_string_free(key, FALSE); /* Do not free underlying array */
+
+ up = rspamd_upstream_get(ups,
+ RSPAMD_UPSTREAM_ROUND_ROBIN,
+ NULL,
+ 0);
+
+ session->up = rspamd_upstream_ref(up);
+ addr = rspamd_upstream_addr_next(up);
+ g_assert(addr != NULL);
+ session->ctx = rspamd_redis_pool_connect(backend->pool,
+ backend->dbname,
+ backend->username, backend->password,
+ rspamd_inet_address_to_string(addr),
+ rspamd_inet_address_get_port(addr));
+
+ if (session->ctx == NULL) {
+ rspamd_upstream_fail(up, TRUE, strerror(errno));
+ rspamd_fuzzy_redis_session_dtor(session, TRUE);
+
+ if (cb) {
+ memset(&rep, 0, sizeof(rep));
+ cb(&rep, ud);
+ }
+ }
+ else {
+ if (redisAsyncCommandArgv(session->ctx, rspamd_fuzzy_redis_check_callback,
+ session, session->nargs,
+ (const gchar **) session->argv, session->argv_lens) != REDIS_OK) {
+ rspamd_fuzzy_redis_session_dtor(session, TRUE);
+
+ if (cb) {
+ memset(&rep, 0, sizeof(rep));
+ cb(&rep, ud);
+ }
+ }
+ else {
+ /* Add timeout */
+ session->timeout.data = session;
+ ev_now_update_if_cheap((struct ev_loop *) session->event_loop);
+ ev_timer_init(&session->timeout,
+ rspamd_fuzzy_redis_timeout,
+ session->backend->timeout, 0.0);
+ ev_timer_start(session->event_loop, &session->timeout);
+ }
+ }
+}
+
+static void
+rspamd_fuzzy_redis_count_callback(redisAsyncContext *c, gpointer r,
+ gpointer priv)
+{
+ struct rspamd_fuzzy_redis_session *session = priv;
+ redisReply *reply = r;
+ gulong nelts;
+
+ ev_timer_stop(session->event_loop, &session->timeout);
+
+ if (c->err == 0 && reply != NULL) {
+ rspamd_upstream_ok(session->up);
+
+ if (reply->type == REDIS_REPLY_INTEGER) {
+ if (session->callback.cb_count) {
+ session->callback.cb_count(reply->integer, session->cbdata);
+ }
+ }
+ else if (reply->type == REDIS_REPLY_STRING) {
+ nelts = strtoul(reply->str, NULL, 10);
+
+ if (session->callback.cb_count) {
+ session->callback.cb_count(nelts, session->cbdata);
+ }
+ }
+ else {
+ if (reply->type == REDIS_REPLY_ERROR) {
+ msg_err_redis_session("fuzzy backend redis error: \"%s\"",
+ reply->str);
+ }
+ if (session->callback.cb_count) {
+ session->callback.cb_count(0, session->cbdata);
+ }
+ }
+ }
+ else {
+ if (session->callback.cb_count) {
+ session->callback.cb_count(0, session->cbdata);
+ }
+
+ if (c->errstr) {
+ msg_err_redis_session("error getting count on %s: %s",
+ rspamd_inet_address_to_string_pretty(rspamd_upstream_addr_cur(session->up)),
+ c->errstr);
+ rspamd_upstream_fail(session->up, FALSE, c->errstr);
+ }
+ }
+
+ rspamd_fuzzy_redis_session_dtor(session, FALSE);
+}
+
+void rspamd_fuzzy_backend_count_redis(struct rspamd_fuzzy_backend *bk,
+ rspamd_fuzzy_count_cb cb, void *ud,
+ void *subr_ud)
+{
+ struct rspamd_fuzzy_backend_redis *backend = subr_ud;
+ struct rspamd_fuzzy_redis_session *session;
+ struct upstream *up;
+ struct upstream_list *ups;
+ rspamd_inet_addr_t *addr;
+ GString *key;
+
+ g_assert(backend != NULL);
+
+ ups = rspamd_redis_get_servers(backend, "read_servers");
+ if (!ups) {
+ if (cb) {
+ cb(0, ud);
+ }
+
+ return;
+ }
+
+ session = g_malloc0(sizeof(*session));
+ session->backend = backend;
+ REF_RETAIN(session->backend);
+
+ session->callback.cb_count = cb;
+ session->cbdata = ud;
+ session->command = RSPAMD_FUZZY_REDIS_COMMAND_COUNT;
+ session->event_loop = rspamd_fuzzy_backend_event_base(bk);
+
+ session->nargs = 2;
+ session->argv = g_malloc(sizeof(gchar *) * 2);
+ session->argv_lens = g_malloc(sizeof(gsize) * 2);
+ key = g_string_new(backend->redis_object);
+ g_string_append(key, "_count");
+ session->argv[0] = g_strdup("GET");
+ session->argv_lens[0] = 3;
+ session->argv[1] = key->str;
+ session->argv_lens[1] = key->len;
+ g_string_free(key, FALSE); /* Do not free underlying array */
+
+ up = rspamd_upstream_get(ups,
+ RSPAMD_UPSTREAM_ROUND_ROBIN,
+ NULL,
+ 0);
+
+ session->up = rspamd_upstream_ref(up);
+ addr = rspamd_upstream_addr_next(up);
+ g_assert(addr != NULL);
+ session->ctx = rspamd_redis_pool_connect(backend->pool,
+ backend->dbname,
+ backend->username, backend->password,
+ rspamd_inet_address_to_string(addr),
+ rspamd_inet_address_get_port(addr));
+
+ if (session->ctx == NULL) {
+ rspamd_upstream_fail(up, TRUE, strerror(errno));
+ rspamd_fuzzy_redis_session_dtor(session, TRUE);
+
+ if (cb) {
+ cb(0, ud);
+ }
+ }
+ else {
+ if (redisAsyncCommandArgv(session->ctx, rspamd_fuzzy_redis_count_callback,
+ session, session->nargs,
+ (const gchar **) session->argv, session->argv_lens) != REDIS_OK) {
+ rspamd_fuzzy_redis_session_dtor(session, TRUE);
+
+ if (cb) {
+ cb(0, ud);
+ }
+ }
+ else {
+ /* Add timeout */
+ session->timeout.data = session;
+ ev_now_update_if_cheap((struct ev_loop *) session->event_loop);
+ ev_timer_init(&session->timeout,
+ rspamd_fuzzy_redis_timeout,
+ session->backend->timeout, 0.0);
+ ev_timer_start(session->event_loop, &session->timeout);
+ }
+ }
+}
+
+static void
+rspamd_fuzzy_redis_version_callback(redisAsyncContext *c, gpointer r,
+ gpointer priv)
+{
+ struct rspamd_fuzzy_redis_session *session = priv;
+ redisReply *reply = r;
+ gulong nelts;
+
+ ev_timer_stop(session->event_loop, &session->timeout);
+
+ if (c->err == 0 && reply != NULL) {
+ rspamd_upstream_ok(session->up);
+
+ if (reply->type == REDIS_REPLY_INTEGER) {
+ if (session->callback.cb_version) {
+ session->callback.cb_version(reply->integer, session->cbdata);
+ }
+ }
+ else if (reply->type == REDIS_REPLY_STRING) {
+ nelts = strtoul(reply->str, NULL, 10);
+
+ if (session->callback.cb_version) {
+ session->callback.cb_version(nelts, session->cbdata);
+ }
+ }
+ else {
+ if (reply->type == REDIS_REPLY_ERROR) {
+ msg_err_redis_session("fuzzy backend redis error: \"%s\"",
+ reply->str);
+ }
+ if (session->callback.cb_version) {
+ session->callback.cb_version(0, session->cbdata);
+ }
+ }
+ }
+ else {
+ if (session->callback.cb_version) {
+ session->callback.cb_version(0, session->cbdata);
+ }
+
+ if (c->errstr) {
+ msg_err_redis_session("error getting version on %s: %s",
+ rspamd_inet_address_to_string_pretty(rspamd_upstream_addr_cur(session->up)),
+ c->errstr);
+ rspamd_upstream_fail(session->up, FALSE, c->errstr);
+ }
+ }
+
+ rspamd_fuzzy_redis_session_dtor(session, FALSE);
+}
+
+void rspamd_fuzzy_backend_version_redis(struct rspamd_fuzzy_backend *bk,
+ const gchar *src,
+ rspamd_fuzzy_version_cb cb, void *ud,
+ void *subr_ud)
+{
+ struct rspamd_fuzzy_backend_redis *backend = subr_ud;
+ struct rspamd_fuzzy_redis_session *session;
+ struct upstream *up;
+ struct upstream_list *ups;
+ rspamd_inet_addr_t *addr;
+ GString *key;
+
+ g_assert(backend != NULL);
+
+ ups = rspamd_redis_get_servers(backend, "read_servers");
+ if (!ups) {
+ if (cb) {
+ cb(0, ud);
+ }
+
+ return;
+ }
+
+ session = g_malloc0(sizeof(*session));
+ session->backend = backend;
+ REF_RETAIN(session->backend);
+
+ session->callback.cb_version = cb;
+ session->cbdata = ud;
+ session->command = RSPAMD_FUZZY_REDIS_COMMAND_VERSION;
+ session->event_loop = rspamd_fuzzy_backend_event_base(bk);
+
+ session->nargs = 2;
+ session->argv = g_malloc(sizeof(gchar *) * 2);
+ session->argv_lens = g_malloc(sizeof(gsize) * 2);
+ key = g_string_new(backend->redis_object);
+ g_string_append(key, src);
+ session->argv[0] = g_strdup("GET");
+ session->argv_lens[0] = 3;
+ session->argv[1] = key->str;
+ session->argv_lens[1] = key->len;
+ g_string_free(key, FALSE); /* Do not free underlying array */
+
+ up = rspamd_upstream_get(ups,
+ RSPAMD_UPSTREAM_ROUND_ROBIN,
+ NULL,
+ 0);
+
+ session->up = rspamd_upstream_ref(up);
+ addr = rspamd_upstream_addr_next(up);
+ g_assert(addr != NULL);
+ session->ctx = rspamd_redis_pool_connect(backend->pool,
+ backend->dbname,
+ backend->username, backend->password,
+ rspamd_inet_address_to_string(addr),
+ rspamd_inet_address_get_port(addr));
+
+ if (session->ctx == NULL) {
+ rspamd_upstream_fail(up, FALSE, strerror(errno));
+ rspamd_fuzzy_redis_session_dtor(session, TRUE);
+
+ if (cb) {
+ cb(0, ud);
+ }
+ }
+ else {
+ if (redisAsyncCommandArgv(session->ctx, rspamd_fuzzy_redis_version_callback,
+ session, session->nargs,
+ (const gchar **) session->argv, session->argv_lens) != REDIS_OK) {
+ rspamd_fuzzy_redis_session_dtor(session, TRUE);
+
+ if (cb) {
+ cb(0, ud);
+ }
+ }
+ else {
+ /* Add timeout */
+ session->timeout.data = session;
+ ev_now_update_if_cheap((struct ev_loop *) session->event_loop);
+ ev_timer_init(&session->timeout,
+ rspamd_fuzzy_redis_timeout,
+ session->backend->timeout, 0.0);
+ ev_timer_start(session->event_loop, &session->timeout);
+ }
+ }
+}
+
+const gchar *
+rspamd_fuzzy_backend_id_redis(struct rspamd_fuzzy_backend *bk,
+ void *subr_ud)
+{
+ struct rspamd_fuzzy_backend_redis *backend = subr_ud;
+ g_assert(backend != NULL);
+
+ return backend->id;
+}
+
+void rspamd_fuzzy_backend_expire_redis(struct rspamd_fuzzy_backend *bk,
+ void *subr_ud)
+{
+ struct rspamd_fuzzy_backend_redis *backend = subr_ud;
+
+ g_assert(backend != NULL);
+}
+
+static gboolean
+rspamd_fuzzy_update_append_command(struct rspamd_fuzzy_backend *bk,
+ struct rspamd_fuzzy_redis_session *session,
+ struct fuzzy_peer_cmd *io_cmd, guint *shift)
+{
+ GString *key, *value;
+ guint cur_shift = *shift;
+ guint i, klen;
+ struct rspamd_fuzzy_cmd *cmd;
+
+ if (io_cmd->is_shingle) {
+ cmd = &io_cmd->cmd.shingle.basic;
+ }
+ else {
+ cmd = &io_cmd->cmd.normal;
+ }
+
+ if (cmd->cmd == FUZZY_WRITE) {
+ /*
+ * For each normal hash addition we do 5 redis commands:
+ * HSET <key> F <flag>
+ * HSETNX <key> C <time>
+ * HINCRBY <key> V <weight>
+ * EXPIRE <key> <expire>
+ * Where <key> is <prefix> || <digest>
+ */
+
+ /* HSET */
+ klen = strlen(session->backend->redis_object) +
+ sizeof(cmd->digest) + 1;
+ key = g_string_sized_new(klen);
+ g_string_append(key, session->backend->redis_object);
+ g_string_append_len(key, cmd->digest, sizeof(cmd->digest));
+ value = g_string_sized_new(sizeof("4294967296"));
+ rspamd_printf_gstring(value, "%d", cmd->flag);
+
+ if (cmd->version & RSPAMD_FUZZY_FLAG_WEAK) {
+ session->argv[cur_shift] = g_strdup("HSETNX");
+ session->argv_lens[cur_shift++] = sizeof("HSETNX") - 1;
+ }
+ else {
+ session->argv[cur_shift] = g_strdup("HSET");
+ session->argv_lens[cur_shift++] = sizeof("HSET") - 1;
+ }
+
+ session->argv[cur_shift] = key->str;
+ session->argv_lens[cur_shift++] = key->len;
+ session->argv[cur_shift] = g_strdup("F");
+ session->argv_lens[cur_shift++] = sizeof("F") - 1;
+ session->argv[cur_shift] = value->str;
+ session->argv_lens[cur_shift++] = value->len;
+ g_string_free(key, FALSE);
+ g_string_free(value, FALSE);
+
+ if (redisAsyncCommandArgv(session->ctx, NULL, NULL,
+ 4,
+ (const gchar **) &session->argv[cur_shift - 4],
+ &session->argv_lens[cur_shift - 4]) != REDIS_OK) {
+
+ return FALSE;
+ }
+
+ /* HSETNX */
+ klen = strlen(session->backend->redis_object) +
+ sizeof(cmd->digest) + 1;
+ key = g_string_sized_new(klen);
+ g_string_append(key, session->backend->redis_object);
+ g_string_append_len(key, cmd->digest, sizeof(cmd->digest));
+ value = g_string_sized_new(sizeof("18446744073709551616"));
+ rspamd_printf_gstring(value, "%L", (gint64) rspamd_get_calendar_ticks());
+ session->argv[cur_shift] = g_strdup("HSETNX");
+ session->argv_lens[cur_shift++] = sizeof("HSETNX") - 1;
+ session->argv[cur_shift] = key->str;
+ session->argv_lens[cur_shift++] = key->len;
+ session->argv[cur_shift] = g_strdup("C");
+ session->argv_lens[cur_shift++] = sizeof("C") - 1;
+ session->argv[cur_shift] = value->str;
+ session->argv_lens[cur_shift++] = value->len;
+ g_string_free(key, FALSE);
+ g_string_free(value, FALSE);
+
+ if (redisAsyncCommandArgv(session->ctx, NULL, NULL,
+ 4,
+ (const gchar **) &session->argv[cur_shift - 4],
+ &session->argv_lens[cur_shift - 4]) != REDIS_OK) {
+
+ return FALSE;
+ }
+
+ /* HINCRBY */
+ key = g_string_sized_new(klen);
+ g_string_append(key, session->backend->redis_object);
+ g_string_append_len(key, cmd->digest, sizeof(cmd->digest));
+ value = g_string_sized_new(sizeof("4294967296"));
+ rspamd_printf_gstring(value, "%d", cmd->value);
+ session->argv[cur_shift] = g_strdup("HINCRBY");
+ session->argv_lens[cur_shift++] = sizeof("HINCRBY") - 1;
+ session->argv[cur_shift] = key->str;
+ session->argv_lens[cur_shift++] = key->len;
+ session->argv[cur_shift] = g_strdup("V");
+ session->argv_lens[cur_shift++] = sizeof("V") - 1;
+ session->argv[cur_shift] = value->str;
+ session->argv_lens[cur_shift++] = value->len;
+ g_string_free(key, FALSE);
+ g_string_free(value, FALSE);
+
+ if (redisAsyncCommandArgv(session->ctx, NULL, NULL,
+ 4,
+ (const gchar **) &session->argv[cur_shift - 4],
+ &session->argv_lens[cur_shift - 4]) != REDIS_OK) {
+
+ return FALSE;
+ }
+
+ /* EXPIRE */
+ key = g_string_sized_new(klen);
+ g_string_append(key, session->backend->redis_object);
+ g_string_append_len(key, cmd->digest, sizeof(cmd->digest));
+ value = g_string_sized_new(sizeof("4294967296"));
+ rspamd_printf_gstring(value, "%d",
+ (gint) rspamd_fuzzy_backend_get_expire(bk));
+ session->argv[cur_shift] = g_strdup("EXPIRE");
+ session->argv_lens[cur_shift++] = sizeof("EXPIRE") - 1;
+ session->argv[cur_shift] = key->str;
+ session->argv_lens[cur_shift++] = key->len;
+ session->argv[cur_shift] = value->str;
+ session->argv_lens[cur_shift++] = value->len;
+ g_string_free(key, FALSE);
+ g_string_free(value, FALSE);
+
+ if (redisAsyncCommandArgv(session->ctx, NULL, NULL,
+ 3,
+ (const gchar **) &session->argv[cur_shift - 3],
+ &session->argv_lens[cur_shift - 3]) != REDIS_OK) {
+
+ return FALSE;
+ }
+
+ /* INCR */
+ key = g_string_sized_new(klen);
+ g_string_append(key, session->backend->redis_object);
+ g_string_append(key, "_count");
+ session->argv[cur_shift] = g_strdup("INCR");
+ session->argv_lens[cur_shift++] = sizeof("INCR") - 1;
+ session->argv[cur_shift] = key->str;
+ session->argv_lens[cur_shift++] = key->len;
+ g_string_free(key, FALSE);
+
+ if (redisAsyncCommandArgv(session->ctx, NULL, NULL,
+ 2,
+ (const gchar **) &session->argv[cur_shift - 2],
+ &session->argv_lens[cur_shift - 2]) != REDIS_OK) {
+
+ return FALSE;
+ }
+ }
+ else if (cmd->cmd == FUZZY_DEL) {
+ /* DEL */
+ klen = strlen(session->backend->redis_object) +
+ sizeof(cmd->digest) + 1;
+
+ key = g_string_sized_new(klen);
+ g_string_append(key, session->backend->redis_object);
+ g_string_append_len(key, cmd->digest, sizeof(cmd->digest));
+ session->argv[cur_shift] = g_strdup("DEL");
+ session->argv_lens[cur_shift++] = sizeof("DEL") - 1;
+ session->argv[cur_shift] = key->str;
+ session->argv_lens[cur_shift++] = key->len;
+ g_string_free(key, FALSE);
+
+ if (redisAsyncCommandArgv(session->ctx, NULL, NULL,
+ 2,
+ (const gchar **) &session->argv[cur_shift - 2],
+ &session->argv_lens[cur_shift - 2]) != REDIS_OK) {
+
+ return FALSE;
+ }
+
+ /* DECR */
+ key = g_string_sized_new(klen);
+ g_string_append(key, session->backend->redis_object);
+ g_string_append(key, "_count");
+ session->argv[cur_shift] = g_strdup("DECR");
+ session->argv_lens[cur_shift++] = sizeof("DECR") - 1;
+ session->argv[cur_shift] = key->str;
+ session->argv_lens[cur_shift++] = key->len;
+ g_string_free(key, FALSE);
+
+ if (redisAsyncCommandArgv(session->ctx, NULL, NULL,
+ 2,
+ (const gchar **) &session->argv[cur_shift - 2],
+ &session->argv_lens[cur_shift - 2]) != REDIS_OK) {
+
+ return FALSE;
+ }
+ }
+ else if (cmd->cmd == FUZZY_REFRESH) {
+ /*
+ * Issue refresh command by just EXPIRE command
+ * EXPIRE <key> <expire>
+ * Where <key> is <prefix> || <digest>
+ */
+
+ klen = strlen(session->backend->redis_object) +
+ sizeof(cmd->digest) + 1;
+
+ /* EXPIRE */
+ key = g_string_sized_new(klen);
+ g_string_append(key, session->backend->redis_object);
+ g_string_append_len(key, cmd->digest, sizeof(cmd->digest));
+ value = g_string_sized_new(sizeof("4294967296"));
+ rspamd_printf_gstring(value, "%d",
+ (gint) rspamd_fuzzy_backend_get_expire(bk));
+ session->argv[cur_shift] = g_strdup("EXPIRE");
+ session->argv_lens[cur_shift++] = sizeof("EXPIRE") - 1;
+ session->argv[cur_shift] = key->str;
+ session->argv_lens[cur_shift++] = key->len;
+ session->argv[cur_shift] = value->str;
+ session->argv_lens[cur_shift++] = value->len;
+ g_string_free(key, FALSE);
+ g_string_free(value, FALSE);
+
+ if (redisAsyncCommandArgv(session->ctx, NULL, NULL,
+ 3,
+ (const gchar **) &session->argv[cur_shift - 3],
+ &session->argv_lens[cur_shift - 3]) != REDIS_OK) {
+
+ return FALSE;
+ }
+ }
+ else if (cmd->cmd == FUZZY_DUP) {
+ /* Ignore */
+ }
+ else {
+ g_assert_not_reached();
+ }
+
+ if (io_cmd->is_shingle) {
+ if (cmd->cmd == FUZZY_WRITE) {
+ klen = strlen(session->backend->redis_object) +
+ 64 + 1;
+
+ for (i = 0; i < RSPAMD_SHINGLE_SIZE; i++) {
+ guchar *hval;
+ /*
+ * For each command with shingles we additionally emit 32 commands:
+ * SETEX <prefix>_<number>_<value> <expire> <digest>
+ */
+
+ /* SETEX */
+ key = g_string_sized_new(klen);
+ rspamd_printf_gstring(key, "%s_%d_%uL",
+ session->backend->redis_object,
+ i,
+ io_cmd->cmd.shingle.sgl.hashes[i]);
+ value = g_string_sized_new(sizeof("4294967296"));
+ rspamd_printf_gstring(value, "%d",
+ (gint) rspamd_fuzzy_backend_get_expire(bk));
+ hval = g_malloc(sizeof(io_cmd->cmd.shingle.basic.digest));
+ memcpy(hval, io_cmd->cmd.shingle.basic.digest,
+ sizeof(io_cmd->cmd.shingle.basic.digest));
+ session->argv[cur_shift] = g_strdup("SETEX");
+ session->argv_lens[cur_shift++] = sizeof("SETEX") - 1;
+ session->argv[cur_shift] = key->str;
+ session->argv_lens[cur_shift++] = key->len;
+ session->argv[cur_shift] = value->str;
+ session->argv_lens[cur_shift++] = value->len;
+ session->argv[cur_shift] = hval;
+ session->argv_lens[cur_shift++] = sizeof(io_cmd->cmd.shingle.basic.digest);
+ g_string_free(key, FALSE);
+ g_string_free(value, FALSE);
+
+ if (redisAsyncCommandArgv(session->ctx, NULL, NULL,
+ 4,
+ (const gchar **) &session->argv[cur_shift - 4],
+ &session->argv_lens[cur_shift - 4]) != REDIS_OK) {
+
+ return FALSE;
+ }
+ }
+ }
+ else if (cmd->cmd == FUZZY_DEL) {
+ klen = strlen(session->backend->redis_object) +
+ 64 + 1;
+
+ for (i = 0; i < RSPAMD_SHINGLE_SIZE; i++) {
+ key = g_string_sized_new(klen);
+ rspamd_printf_gstring(key, "%s_%d_%uL",
+ session->backend->redis_object,
+ i,
+ io_cmd->cmd.shingle.sgl.hashes[i]);
+ session->argv[cur_shift] = g_strdup("DEL");
+ session->argv_lens[cur_shift++] = sizeof("DEL") - 1;
+ session->argv[cur_shift] = key->str;
+ session->argv_lens[cur_shift++] = key->len;
+ g_string_free(key, FALSE);
+
+ if (redisAsyncCommandArgv(session->ctx, NULL, NULL,
+ 2,
+ (const gchar **) &session->argv[cur_shift - 2],
+ &session->argv_lens[cur_shift - 2]) != REDIS_OK) {
+
+ return FALSE;
+ }
+ }
+ }
+ else if (cmd->cmd == FUZZY_REFRESH) {
+ klen = strlen(session->backend->redis_object) +
+ 64 + 1;
+
+ for (i = 0; i < RSPAMD_SHINGLE_SIZE; i++) {
+ /*
+ * For each command with shingles we additionally emit 32 commands:
+ * EXPIRE <prefix>_<number>_<value> <expire>
+ */
+
+ /* Expire */
+ key = g_string_sized_new(klen);
+ rspamd_printf_gstring(key, "%s_%d_%uL",
+ session->backend->redis_object,
+ i,
+ io_cmd->cmd.shingle.sgl.hashes[i]);
+ value = g_string_sized_new(sizeof("18446744073709551616"));
+ rspamd_printf_gstring(value, "%d",
+ (gint) rspamd_fuzzy_backend_get_expire(bk));
+ session->argv[cur_shift] = g_strdup("EXPIRE");
+ session->argv_lens[cur_shift++] = sizeof("EXPIRE") - 1;
+ session->argv[cur_shift] = key->str;
+ session->argv_lens[cur_shift++] = key->len;
+ session->argv[cur_shift] = value->str;
+ session->argv_lens[cur_shift++] = value->len;
+ g_string_free(key, FALSE);
+ g_string_free(value, FALSE);
+
+ if (redisAsyncCommandArgv(session->ctx, NULL, NULL,
+ 3,
+ (const gchar **) &session->argv[cur_shift - 3],
+ &session->argv_lens[cur_shift - 3]) != REDIS_OK) {
+
+ return FALSE;
+ }
+ }
+ }
+ else if (cmd->cmd == FUZZY_DUP) {
+ /* Ignore */
+ }
+ else {
+ g_assert_not_reached();
+ }
+ }
+
+ *shift = cur_shift;
+
+ return TRUE;
+}
+
+static void
+rspamd_fuzzy_redis_update_callback(redisAsyncContext *c, gpointer r,
+ gpointer priv)
+{
+ struct rspamd_fuzzy_redis_session *session = priv;
+ redisReply *reply = r;
+
+ ev_timer_stop(session->event_loop, &session->timeout);
+
+ if (c->err == 0 && reply != NULL) {
+ rspamd_upstream_ok(session->up);
+
+ if (reply->type == REDIS_REPLY_ARRAY) {
+ /* TODO: check all replies somehow */
+ if (session->callback.cb_update) {
+ session->callback.cb_update(TRUE,
+ session->nadded,
+ session->ndeleted,
+ session->nextended,
+ session->nignored,
+ session->cbdata);
+ }
+ }
+ else {
+ if (reply->type == REDIS_REPLY_ERROR) {
+ msg_err_redis_session("fuzzy backend redis error: \"%s\"",
+ reply->str);
+ }
+ if (session->callback.cb_update) {
+ session->callback.cb_update(FALSE, 0, 0, 0, 0, session->cbdata);
+ }
+ }
+ }
+ else {
+ if (session->callback.cb_update) {
+ session->callback.cb_update(FALSE, 0, 0, 0, 0, session->cbdata);
+ }
+
+ if (c->errstr) {
+ msg_err_redis_session("error sending update to redis %s: %s",
+ rspamd_inet_address_to_string_pretty(rspamd_upstream_addr_cur(session->up)),
+ c->errstr);
+ rspamd_upstream_fail(session->up, FALSE, c->errstr);
+ }
+ }
+
+ rspamd_fuzzy_redis_session_dtor(session, FALSE);
+}
+
+void rspamd_fuzzy_backend_update_redis(struct rspamd_fuzzy_backend *bk,
+ GArray *updates, const gchar *src,
+ rspamd_fuzzy_update_cb cb, void *ud,
+ void *subr_ud)
+{
+ struct rspamd_fuzzy_backend_redis *backend = subr_ud;
+ struct rspamd_fuzzy_redis_session *session;
+ struct upstream *up;
+ struct upstream_list *ups;
+ rspamd_inet_addr_t *addr;
+ guint i;
+ GString *key;
+ struct fuzzy_peer_cmd *io_cmd;
+ struct rspamd_fuzzy_cmd *cmd = NULL;
+ guint nargs, cur_shift;
+
+ g_assert(backend != NULL);
+
+ ups = rspamd_redis_get_servers(backend, "write_servers");
+ if (!ups) {
+ if (cb) {
+ cb(FALSE, 0, 0, 0, 0, ud);
+ }
+
+ return;
+ }
+
+ session = g_malloc0(sizeof(*session));
+ session->backend = backend;
+ REF_RETAIN(session->backend);
+
+ /*
+ * For each normal hash addition we do 3 redis commands:
+ * HSET <key> F <flag> **OR** HSETNX <key> F <flag> when flag is weak
+ * HINCRBY <key> V <weight>
+ * EXPIRE <key> <expire>
+ * INCR <prefix||fuzzy_count>
+ *
+ * Where <key> is <prefix> || <digest>
+ *
+ * For each command with shingles we additionally emit 32 commands:
+ * SETEX <prefix>_<number>_<value> <expire> <digest>
+ *
+ * For each delete command we emit:
+ * DEL <key>
+ *
+ * For each delete command with shingles we emit also 32 commands:
+ * DEL <prefix>_<number>_<value>
+ * DECR <prefix||fuzzy_count>
+ */
+
+ nargs = 4;
+
+ for (i = 0; i < updates->len; i++) {
+ io_cmd = &g_array_index(updates, struct fuzzy_peer_cmd, i);
+
+ if (io_cmd->is_shingle) {
+ cmd = &io_cmd->cmd.shingle.basic;
+ }
+ else {
+ cmd = &io_cmd->cmd.normal;
+ }
+
+ if (cmd->cmd == FUZZY_WRITE) {
+ nargs += 17;
+ session->nadded++;
+
+ if (io_cmd->is_shingle) {
+ nargs += RSPAMD_SHINGLE_SIZE * 4;
+ }
+ }
+ else if (cmd->cmd == FUZZY_DEL) {
+ nargs += 4;
+ session->ndeleted++;
+
+ if (io_cmd->is_shingle) {
+ nargs += RSPAMD_SHINGLE_SIZE * 2;
+ }
+ }
+ else if (cmd->cmd == FUZZY_REFRESH) {
+ nargs += 3;
+ session->nextended++;
+
+ if (io_cmd->is_shingle) {
+ nargs += RSPAMD_SHINGLE_SIZE * 3;
+ }
+ }
+ else {
+ session->nignored++;
+ }
+ }
+
+ /* Now we need to create a new request */
+ session->callback.cb_update = cb;
+ session->cbdata = ud;
+ session->command = RSPAMD_FUZZY_REDIS_COMMAND_UPDATES;
+ session->cmd = cmd;
+ session->prob = 1.0f;
+ session->event_loop = rspamd_fuzzy_backend_event_base(bk);
+
+ /* First of all check digest */
+ session->nargs = nargs;
+ session->argv = g_malloc0(sizeof(gchar *) * session->nargs);
+ session->argv_lens = g_malloc0(sizeof(gsize) * session->nargs);
+
+ up = rspamd_upstream_get(ups,
+ RSPAMD_UPSTREAM_MASTER_SLAVE,
+ NULL,
+ 0);
+
+ session->up = rspamd_upstream_ref(up);
+ addr = rspamd_upstream_addr_next(up);
+ g_assert(addr != NULL);
+ session->ctx = rspamd_redis_pool_connect(backend->pool,
+ backend->dbname,
+ backend->username, backend->password,
+ rspamd_inet_address_to_string(addr),
+ rspamd_inet_address_get_port(addr));
+
+ if (session->ctx == NULL) {
+ rspamd_upstream_fail(up, TRUE, strerror(errno));
+ rspamd_fuzzy_redis_session_dtor(session, TRUE);
+
+ if (cb) {
+ cb(FALSE, 0, 0, 0, 0, ud);
+ }
+ }
+ else {
+ /* Start with MULTI command */
+ session->argv[0] = g_strdup("MULTI");
+ session->argv_lens[0] = 5;
+
+ if (redisAsyncCommandArgv(session->ctx, NULL, NULL,
+ 1,
+ (const gchar **) session->argv,
+ session->argv_lens) != REDIS_OK) {
+
+ if (cb) {
+ cb(FALSE, 0, 0, 0, 0, ud);
+ }
+ rspamd_fuzzy_redis_session_dtor(session, TRUE);
+
+ return;
+ }
+
+ /* Now split the rest of commands in packs and emit them command by command */
+ cur_shift = 1;
+
+ for (i = 0; i < updates->len; i++) {
+ io_cmd = &g_array_index(updates, struct fuzzy_peer_cmd, i);
+
+ if (!rspamd_fuzzy_update_append_command(bk, session, io_cmd,
+ &cur_shift)) {
+ if (cb) {
+ cb(FALSE, 0, 0, 0, 0, ud);
+ }
+ rspamd_fuzzy_redis_session_dtor(session, TRUE);
+
+ return;
+ }
+ }
+
+ /* Now INCR command for the source */
+ key = g_string_new(backend->redis_object);
+ g_string_append(key, src);
+ session->argv[cur_shift] = g_strdup("INCR");
+ session->argv_lens[cur_shift++] = 4;
+ session->argv[cur_shift] = key->str;
+ session->argv_lens[cur_shift++] = key->len;
+ g_string_free(key, FALSE);
+
+ if (redisAsyncCommandArgv(session->ctx, NULL, NULL,
+ 2,
+ (const gchar **) &session->argv[cur_shift - 2],
+ &session->argv_lens[cur_shift - 2]) != REDIS_OK) {
+
+ if (cb) {
+ cb(FALSE, 0, 0, 0, 0, ud);
+ }
+ rspamd_fuzzy_redis_session_dtor(session, TRUE);
+
+ return;
+ }
+
+ /* Finally we call EXEC with a specific callback */
+ session->argv[cur_shift] = g_strdup("EXEC");
+ session->argv_lens[cur_shift] = 4;
+
+ if (redisAsyncCommandArgv(session->ctx,
+ rspamd_fuzzy_redis_update_callback, session,
+ 1,
+ (const gchar **) &session->argv[cur_shift],
+ &session->argv_lens[cur_shift]) != REDIS_OK) {
+
+ if (cb) {
+ cb(FALSE, 0, 0, 0, 0, ud);
+ }
+ rspamd_fuzzy_redis_session_dtor(session, TRUE);
+
+ return;
+ }
+ else {
+ /* Add timeout */
+ session->timeout.data = session;
+ ev_now_update_if_cheap((struct ev_loop *) session->event_loop);
+ ev_timer_init(&session->timeout,
+ rspamd_fuzzy_redis_timeout,
+ session->backend->timeout, 0.0);
+ ev_timer_start(session->event_loop, &session->timeout);
+ }
+ }
+}
+
+void rspamd_fuzzy_backend_close_redis(struct rspamd_fuzzy_backend *bk,
+ void *subr_ud)
+{
+ struct rspamd_fuzzy_backend_redis *backend = subr_ud;
+
+ g_assert(backend != NULL);
+
+ /*
+ * XXX: we leak lua registry element there to avoid crashing
+ * due to chicken-egg problem between lua state termination and
+ * redis pool termination.
+ * Here, we assume that redis pool is destroyed AFTER lua_state,
+ * so all connections pending will release references but due to
+ * `terminated` hack they will not try to access Lua stuff
+ * This is enabled merely if we have connections pending (e.g. refcount > 1)
+ */
+ if (backend->ref.refcount > 1) {
+ backend->terminated = true;
+ }
+ REF_RELEASE(backend);
+}
diff --git a/src/libserver/fuzzy_backend/fuzzy_backend_redis.h b/src/libserver/fuzzy_backend/fuzzy_backend_redis.h
new file mode 100644
index 0000000..3cfa162
--- /dev/null
+++ b/src/libserver/fuzzy_backend/fuzzy_backend_redis.h
@@ -0,0 +1,67 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef SRC_LIBSERVER_FUZZY_BACKEND_REDIS_H_
+#define SRC_LIBSERVER_FUZZY_BACKEND_REDIS_H_
+
+#include "config.h"
+#include "fuzzy_backend.h"
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Subroutines for fuzzy_backend
+ */
+void *rspamd_fuzzy_backend_init_redis(struct rspamd_fuzzy_backend *bk,
+ const ucl_object_t *obj,
+ struct rspamd_config *cfg,
+ GError **err);
+
+void rspamd_fuzzy_backend_check_redis(struct rspamd_fuzzy_backend *bk,
+ const struct rspamd_fuzzy_cmd *cmd,
+ rspamd_fuzzy_check_cb cb, void *ud,
+ void *subr_ud);
+
+void rspamd_fuzzy_backend_update_redis(struct rspamd_fuzzy_backend *bk,
+ GArray *updates, const gchar *src,
+ rspamd_fuzzy_update_cb cb, void *ud,
+ void *subr_ud);
+
+void rspamd_fuzzy_backend_count_redis(struct rspamd_fuzzy_backend *bk,
+ rspamd_fuzzy_count_cb cb, void *ud,
+ void *subr_ud);
+
+void rspamd_fuzzy_backend_version_redis(struct rspamd_fuzzy_backend *bk,
+ const gchar *src,
+ rspamd_fuzzy_version_cb cb, void *ud,
+ void *subr_ud);
+
+const gchar *rspamd_fuzzy_backend_id_redis(struct rspamd_fuzzy_backend *bk,
+ void *subr_ud);
+
+void rspamd_fuzzy_backend_expire_redis(struct rspamd_fuzzy_backend *bk,
+ void *subr_ud);
+
+void rspamd_fuzzy_backend_close_redis(struct rspamd_fuzzy_backend *bk,
+ void *subr_ud);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SRC_LIBSERVER_FUZZY_BACKEND_REDIS_H_ */
diff --git a/src/libserver/fuzzy_backend/fuzzy_backend_sqlite.c b/src/libserver/fuzzy_backend/fuzzy_backend_sqlite.c
new file mode 100644
index 0000000..9ec448e
--- /dev/null
+++ b/src/libserver/fuzzy_backend/fuzzy_backend_sqlite.c
@@ -0,0 +1,1029 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "config.h"
+#include "rspamd.h"
+#include "fuzzy_backend.h"
+#include "fuzzy_backend_sqlite.h"
+#include "unix-std.h"
+
+#include <sqlite3.h>
+#include "libutil/sqlite_utils.h"
+
+struct rspamd_fuzzy_backend_sqlite {
+ sqlite3 *db;
+ char *path;
+ gchar id[MEMPOOL_UID_LEN];
+ gsize count;
+ gsize expired;
+ rspamd_mempool_t *pool;
+};
+
+static const gdouble sql_sleep_time = 0.1;
+static const guint max_retries = 10;
+
+#define msg_err_fuzzy_backend(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL, \
+ backend->pool->tag.tagname, backend->pool->tag.uid, \
+ G_STRFUNC, \
+ __VA_ARGS__)
+#define msg_warn_fuzzy_backend(...) rspamd_default_log_function(G_LOG_LEVEL_WARNING, \
+ backend->pool->tag.tagname, backend->pool->tag.uid, \
+ G_STRFUNC, \
+ __VA_ARGS__)
+#define msg_info_fuzzy_backend(...) rspamd_default_log_function(G_LOG_LEVEL_INFO, \
+ backend->pool->tag.tagname, backend->pool->tag.uid, \
+ G_STRFUNC, \
+ __VA_ARGS__)
+#define msg_debug_fuzzy_backend(...) rspamd_conditional_debug_fast(NULL, NULL, \
+ rspamd_fuzzy_sqlite_log_id, backend->pool->tag.tagname, backend->pool->tag.uid, \
+ G_STRFUNC, \
+ __VA_ARGS__)
+
+INIT_LOG_MODULE(fuzzy_sqlite)
+
+static const char *create_tables_sql =
+ "BEGIN;"
+ "CREATE TABLE IF NOT EXISTS digests("
+ " id INTEGER PRIMARY KEY,"
+ " flag INTEGER NOT NULL,"
+ " digest TEXT NOT NULL,"
+ " value INTEGER,"
+ " time INTEGER);"
+ "CREATE TABLE IF NOT EXISTS shingles("
+ " value INTEGER NOT NULL,"
+ " number INTEGER NOT NULL,"
+ " digest_id INTEGER REFERENCES digests(id) ON DELETE CASCADE "
+ " ON UPDATE CASCADE);"
+ "CREATE TABLE IF NOT EXISTS sources("
+ " name TEXT UNIQUE,"
+ " version INTEGER,"
+ " last INTEGER);"
+ "CREATE UNIQUE INDEX IF NOT EXISTS d ON digests(digest);"
+ "CREATE INDEX IF NOT EXISTS t ON digests(time);"
+ "CREATE INDEX IF NOT EXISTS dgst_id ON shingles(digest_id);"
+ "CREATE UNIQUE INDEX IF NOT EXISTS s ON shingles(value, number);"
+ "COMMIT;";
+#if 0
+static const char *create_index_sql =
+ "BEGIN;"
+ "CREATE UNIQUE INDEX IF NOT EXISTS d ON digests(digest);"
+ "CREATE INDEX IF NOT EXISTS t ON digests(time);"
+ "CREATE INDEX IF NOT EXISTS dgst_id ON shingles(digest_id);"
+ "CREATE UNIQUE INDEX IF NOT EXISTS s ON shingles(value, number);"
+ "COMMIT;";
+#endif
+enum rspamd_fuzzy_statement_idx {
+ RSPAMD_FUZZY_BACKEND_TRANSACTION_START = 0,
+ RSPAMD_FUZZY_BACKEND_TRANSACTION_COMMIT,
+ RSPAMD_FUZZY_BACKEND_TRANSACTION_ROLLBACK,
+ RSPAMD_FUZZY_BACKEND_INSERT,
+ RSPAMD_FUZZY_BACKEND_UPDATE,
+ RSPAMD_FUZZY_BACKEND_UPDATE_FLAG,
+ RSPAMD_FUZZY_BACKEND_INSERT_SHINGLE,
+ RSPAMD_FUZZY_BACKEND_CHECK,
+ RSPAMD_FUZZY_BACKEND_CHECK_SHINGLE,
+ RSPAMD_FUZZY_BACKEND_GET_DIGEST_BY_ID,
+ RSPAMD_FUZZY_BACKEND_DELETE,
+ RSPAMD_FUZZY_BACKEND_COUNT,
+ RSPAMD_FUZZY_BACKEND_EXPIRE,
+ RSPAMD_FUZZY_BACKEND_VACUUM,
+ RSPAMD_FUZZY_BACKEND_DELETE_ORPHANED,
+ RSPAMD_FUZZY_BACKEND_ADD_SOURCE,
+ RSPAMD_FUZZY_BACKEND_VERSION,
+ RSPAMD_FUZZY_BACKEND_SET_VERSION,
+ RSPAMD_FUZZY_BACKEND_MAX
+};
+static struct rspamd_fuzzy_stmts {
+ enum rspamd_fuzzy_statement_idx idx;
+ const gchar *sql;
+ const gchar *args;
+ sqlite3_stmt *stmt;
+ gint result;
+} prepared_stmts[RSPAMD_FUZZY_BACKEND_MAX] =
+ {
+ {.idx = RSPAMD_FUZZY_BACKEND_TRANSACTION_START,
+ .sql = "BEGIN TRANSACTION;",
+ .args = "",
+ .stmt = NULL,
+ .result = SQLITE_DONE},
+ {.idx = RSPAMD_FUZZY_BACKEND_TRANSACTION_COMMIT,
+ .sql = "COMMIT;",
+ .args = "",
+ .stmt = NULL,
+ .result = SQLITE_DONE},
+ {.idx = RSPAMD_FUZZY_BACKEND_TRANSACTION_ROLLBACK,
+ .sql = "ROLLBACK;",
+ .args = "",
+ .stmt = NULL,
+ .result = SQLITE_DONE},
+ {.idx = RSPAMD_FUZZY_BACKEND_INSERT,
+ .sql = "INSERT INTO digests(flag, digest, value, time) VALUES"
+ "(?1, ?2, ?3, strftime('%s','now'));",
+ .args = "SDI",
+ .stmt = NULL,
+ .result = SQLITE_DONE},
+ {.idx = RSPAMD_FUZZY_BACKEND_UPDATE,
+ .sql = "UPDATE digests SET value = value + ?1, time = strftime('%s','now') WHERE "
+ "digest==?2;",
+ .args = "ID",
+ .stmt = NULL,
+ .result = SQLITE_DONE},
+ {.idx = RSPAMD_FUZZY_BACKEND_UPDATE_FLAG,
+ .sql = "UPDATE digests SET value = ?1, flag = ?2, time = strftime('%s','now') WHERE "
+ "digest==?3;",
+ .args = "IID",
+ .stmt = NULL,
+ .result = SQLITE_DONE},
+ {.idx = RSPAMD_FUZZY_BACKEND_INSERT_SHINGLE,
+ .sql = "INSERT OR REPLACE INTO shingles(value, number, digest_id) "
+ "VALUES (?1, ?2, ?3);",
+ .args = "III",
+ .stmt = NULL,
+ .result = SQLITE_DONE},
+ {.idx = RSPAMD_FUZZY_BACKEND_CHECK,
+ .sql = "SELECT value, time, flag FROM digests WHERE digest==?1;",
+ .args = "D",
+ .stmt = NULL,
+ .result = SQLITE_ROW},
+ {.idx = RSPAMD_FUZZY_BACKEND_CHECK_SHINGLE,
+ .sql = "SELECT digest_id FROM shingles WHERE value=?1 AND number=?2",
+ .args = "IS",
+ .stmt = NULL,
+ .result = SQLITE_ROW},
+ {.idx = RSPAMD_FUZZY_BACKEND_GET_DIGEST_BY_ID,
+ .sql = "SELECT digest, value, time, flag FROM digests WHERE id=?1",
+ .args = "I",
+ .stmt = NULL,
+ .result = SQLITE_ROW},
+ {.idx = RSPAMD_FUZZY_BACKEND_DELETE,
+ .sql = "DELETE FROM digests WHERE digest==?1;",
+ .args = "D",
+ .stmt = NULL,
+ .result = SQLITE_DONE},
+ {.idx = RSPAMD_FUZZY_BACKEND_COUNT,
+ .sql = "SELECT COUNT(*) FROM digests;",
+ .args = "",
+ .stmt = NULL,
+ .result = SQLITE_ROW},
+ {.idx = RSPAMD_FUZZY_BACKEND_EXPIRE,
+ .sql = "DELETE FROM digests WHERE id IN (SELECT id FROM digests WHERE time < ?1 LIMIT ?2);",
+ .args = "II",
+ .stmt = NULL,
+ .result = SQLITE_DONE},
+ {.idx = RSPAMD_FUZZY_BACKEND_VACUUM,
+ .sql = "VACUUM;",
+ .args = "",
+ .stmt = NULL,
+ .result = SQLITE_DONE},
+ {.idx = RSPAMD_FUZZY_BACKEND_DELETE_ORPHANED,
+ .sql = "DELETE FROM shingles WHERE value=?1 AND number=?2;",
+ .args = "II",
+ .stmt = NULL,
+ .result = SQLITE_DONE},
+ {.idx = RSPAMD_FUZZY_BACKEND_ADD_SOURCE,
+ .sql = "INSERT OR IGNORE INTO sources(name, version, last) VALUES (?1, ?2, ?3);",
+ .args = "TII",
+ .stmt = NULL,
+ .result = SQLITE_DONE},
+ {.idx = RSPAMD_FUZZY_BACKEND_VERSION,
+ .sql = "SELECT version FROM sources WHERE name=?1;",
+ .args = "T",
+ .stmt = NULL,
+ .result = SQLITE_ROW},
+ {.idx = RSPAMD_FUZZY_BACKEND_SET_VERSION,
+ .sql = "INSERT OR REPLACE INTO sources (name, version, last) VALUES (?3, ?1, ?2);",
+ .args = "IIT",
+ .stmt = NULL,
+ .result = SQLITE_DONE},
+};
+
+static GQuark
+rspamd_fuzzy_backend_sqlite_quark(void)
+{
+ return g_quark_from_static_string("fuzzy-backend-sqlite");
+}
+
+static gboolean
+rspamd_fuzzy_backend_sqlite_prepare_stmts(struct rspamd_fuzzy_backend_sqlite *bk, GError **err)
+{
+ int i;
+
+ for (i = 0; i < RSPAMD_FUZZY_BACKEND_MAX; i++) {
+ if (prepared_stmts[i].stmt != NULL) {
+ /* Skip already prepared statements */
+ continue;
+ }
+ if (sqlite3_prepare_v2(bk->db, prepared_stmts[i].sql, -1,
+ &prepared_stmts[i].stmt, NULL) != SQLITE_OK) {
+ g_set_error(err, rspamd_fuzzy_backend_sqlite_quark(),
+ -1, "Cannot initialize prepared sql `%s`: %s",
+ prepared_stmts[i].sql, sqlite3_errmsg(bk->db));
+
+ return FALSE;
+ }
+ }
+
+ return TRUE;
+}
+
+static int
+rspamd_fuzzy_backend_sqlite_cleanup_stmt(struct rspamd_fuzzy_backend_sqlite *backend,
+ int idx)
+{
+ sqlite3_stmt *stmt;
+
+ if (idx < 0 || idx >= RSPAMD_FUZZY_BACKEND_MAX) {
+
+ return -1;
+ }
+
+ msg_debug_fuzzy_backend("resetting `%s`", prepared_stmts[idx].sql);
+ stmt = prepared_stmts[idx].stmt;
+ sqlite3_clear_bindings(stmt);
+ sqlite3_reset(stmt);
+
+ return SQLITE_OK;
+}
+
+static int
+rspamd_fuzzy_backend_sqlite_run_stmt(struct rspamd_fuzzy_backend_sqlite *backend,
+ gboolean auto_cleanup,
+ int idx, ...)
+{
+ int retcode;
+ va_list ap;
+ sqlite3_stmt *stmt;
+ int i;
+ const char *argtypes;
+ guint retries = 0;
+ struct timespec ts;
+
+ if (idx < 0 || idx >= RSPAMD_FUZZY_BACKEND_MAX) {
+
+ return -1;
+ }
+
+ stmt = prepared_stmts[idx].stmt;
+ g_assert((int) prepared_stmts[idx].idx == idx);
+
+ if (stmt == NULL) {
+ if ((retcode = sqlite3_prepare_v2(backend->db, prepared_stmts[idx].sql, -1,
+ &prepared_stmts[idx].stmt, NULL)) != SQLITE_OK) {
+ msg_err_fuzzy_backend("Cannot initialize prepared sql `%s`: %s",
+ prepared_stmts[idx].sql, sqlite3_errmsg(backend->db));
+
+ return retcode;
+ }
+ stmt = prepared_stmts[idx].stmt;
+ }
+
+ msg_debug_fuzzy_backend("executing `%s` %s auto cleanup",
+ prepared_stmts[idx].sql, auto_cleanup ? "with" : "without");
+ argtypes = prepared_stmts[idx].args;
+ sqlite3_clear_bindings(stmt);
+ sqlite3_reset(stmt);
+ va_start(ap, idx);
+
+ for (i = 0; argtypes[i] != '\0'; i++) {
+ switch (argtypes[i]) {
+ case 'T':
+ sqlite3_bind_text(stmt, i + 1, va_arg(ap, const char *), -1,
+ SQLITE_STATIC);
+ break;
+ case 'I':
+ sqlite3_bind_int64(stmt, i + 1, va_arg(ap, gint64));
+ break;
+ case 'S':
+ sqlite3_bind_int(stmt, i + 1, va_arg(ap, gint));
+ break;
+ case 'D':
+ /* Special case for digests variable */
+ sqlite3_bind_text(stmt, i + 1, va_arg(ap, const char *), 64,
+ SQLITE_STATIC);
+ break;
+ }
+ }
+
+ va_end(ap);
+
+retry:
+ retcode = sqlite3_step(stmt);
+
+ if (retcode == prepared_stmts[idx].result) {
+ retcode = SQLITE_OK;
+ }
+ else {
+ if ((retcode == SQLITE_BUSY ||
+ retcode == SQLITE_LOCKED) &&
+ retries++ < max_retries) {
+ double_to_ts(sql_sleep_time, &ts);
+ nanosleep(&ts, NULL);
+ goto retry;
+ }
+
+ msg_debug_fuzzy_backend("failed to execute query %s: %d, %s", prepared_stmts[idx].sql,
+ retcode, sqlite3_errmsg(backend->db));
+ }
+
+ if (auto_cleanup) {
+ sqlite3_clear_bindings(stmt);
+ sqlite3_reset(stmt);
+ }
+
+ return retcode;
+}
+
+static void
+rspamd_fuzzy_backend_sqlite_close_stmts(struct rspamd_fuzzy_backend_sqlite *bk)
+{
+ int i;
+
+ for (i = 0; i < RSPAMD_FUZZY_BACKEND_MAX; i++) {
+ if (prepared_stmts[i].stmt != NULL) {
+ sqlite3_finalize(prepared_stmts[i].stmt);
+ prepared_stmts[i].stmt = NULL;
+ }
+ }
+
+ return;
+}
+
+static gboolean
+rspamd_fuzzy_backend_sqlite_run_sql(const gchar *sql, struct rspamd_fuzzy_backend_sqlite *bk,
+ GError **err)
+{
+ guint retries = 0;
+ struct timespec ts;
+ gint ret;
+
+ do {
+ ret = sqlite3_exec(bk->db, sql, NULL, NULL, NULL);
+ double_to_ts(sql_sleep_time, &ts);
+ } while (ret == SQLITE_BUSY && retries++ < max_retries &&
+ nanosleep(&ts, NULL) == 0);
+
+ if (ret != SQLITE_OK) {
+ g_set_error(err, rspamd_fuzzy_backend_sqlite_quark(),
+ -1, "Cannot execute raw sql `%s`: %s",
+ sql, sqlite3_errmsg(bk->db));
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static struct rspamd_fuzzy_backend_sqlite *
+rspamd_fuzzy_backend_sqlite_open_db(const gchar *path, GError **err)
+{
+ struct rspamd_fuzzy_backend_sqlite *bk;
+ rspamd_cryptobox_hash_state_t st;
+ guchar hash_out[rspamd_cryptobox_HASHBYTES];
+
+ g_assert(path != NULL);
+
+ bk = g_malloc0(sizeof(*bk));
+ bk->path = g_strdup(path);
+ bk->expired = 0;
+ bk->pool = rspamd_mempool_new(rspamd_mempool_suggest_size(),
+ "fuzzy_backend", 0);
+ bk->db = rspamd_sqlite3_open_or_create(bk->pool, bk->path,
+ create_tables_sql, 1, err);
+
+ if (bk->db == NULL) {
+ rspamd_fuzzy_backend_sqlite_close(bk);
+
+ return NULL;
+ }
+
+ if (!rspamd_fuzzy_backend_sqlite_prepare_stmts(bk, err)) {
+ rspamd_fuzzy_backend_sqlite_close(bk);
+
+ return NULL;
+ }
+
+ /* Set id for the backend */
+ rspamd_cryptobox_hash_init(&st, NULL, 0);
+ rspamd_cryptobox_hash_update(&st, path, strlen(path));
+ rspamd_cryptobox_hash_final(&st, hash_out);
+ rspamd_snprintf(bk->id, sizeof(bk->id), "%xs", hash_out);
+ memcpy(bk->pool->tag.uid, bk->id, sizeof(bk->pool->tag.uid));
+
+ return bk;
+}
+
+struct rspamd_fuzzy_backend_sqlite *
+rspamd_fuzzy_backend_sqlite_open(const gchar *path,
+ gboolean vacuum,
+ GError **err)
+{
+ struct rspamd_fuzzy_backend_sqlite *backend;
+
+ if (path == NULL) {
+ g_set_error(err, rspamd_fuzzy_backend_sqlite_quark(),
+ ENOENT, "Path has not been specified");
+ return NULL;
+ }
+
+ /* Open database */
+ if ((backend = rspamd_fuzzy_backend_sqlite_open_db(path, err)) == NULL) {
+ return NULL;
+ }
+
+ if (rspamd_fuzzy_backend_sqlite_run_stmt(backend, FALSE, RSPAMD_FUZZY_BACKEND_COUNT) == SQLITE_OK) {
+ backend->count = sqlite3_column_int64(
+ prepared_stmts[RSPAMD_FUZZY_BACKEND_COUNT].stmt, 0);
+ }
+
+ rspamd_fuzzy_backend_sqlite_cleanup_stmt(backend, RSPAMD_FUZZY_BACKEND_COUNT);
+
+ return backend;
+}
+
+static gint
+rspamd_fuzzy_backend_sqlite_int64_cmp(const void *a, const void *b)
+{
+ gint64 ia = *(gint64 *) a, ib = *(gint64 *) b;
+
+ return (ia - ib);
+}
+
+struct rspamd_fuzzy_reply
+rspamd_fuzzy_backend_sqlite_check(struct rspamd_fuzzy_backend_sqlite *backend,
+ const struct rspamd_fuzzy_cmd *cmd, gint64 expire)
+{
+ struct rspamd_fuzzy_reply rep;
+ const struct rspamd_fuzzy_shingle_cmd *shcmd;
+ int rc;
+ gint64 timestamp;
+ gint64 shingle_values[RSPAMD_SHINGLE_SIZE], i, sel_id, cur_id,
+ cur_cnt, max_cnt;
+
+ memset(&rep, 0, sizeof(rep));
+ memcpy(rep.digest, cmd->digest, sizeof(rep.digest));
+
+ if (backend == NULL) {
+ return rep;
+ }
+
+ /* Try direct match first of all */
+ rspamd_fuzzy_backend_sqlite_run_stmt(backend, TRUE,
+ RSPAMD_FUZZY_BACKEND_TRANSACTION_START);
+ rc = rspamd_fuzzy_backend_sqlite_run_stmt(backend, FALSE,
+ RSPAMD_FUZZY_BACKEND_CHECK,
+ cmd->digest);
+
+ if (rc == SQLITE_OK) {
+ timestamp = sqlite3_column_int64(
+ prepared_stmts[RSPAMD_FUZZY_BACKEND_CHECK].stmt, 1);
+ if (time(NULL) - timestamp > expire) {
+ /* Expire element */
+ msg_debug_fuzzy_backend("requested hash has been expired");
+ }
+ else {
+ rep.v1.value = sqlite3_column_int64(
+ prepared_stmts[RSPAMD_FUZZY_BACKEND_CHECK].stmt, 0);
+ rep.v1.prob = 1.0;
+ rep.v1.flag = sqlite3_column_int(
+ prepared_stmts[RSPAMD_FUZZY_BACKEND_CHECK].stmt, 2);
+ }
+ }
+ else if (cmd->shingles_count > 0) {
+ /* Fuzzy match */
+
+ rspamd_fuzzy_backend_sqlite_cleanup_stmt(backend, RSPAMD_FUZZY_BACKEND_CHECK);
+ shcmd = (const struct rspamd_fuzzy_shingle_cmd *) cmd;
+
+ for (i = 0; i < RSPAMD_SHINGLE_SIZE; i++) {
+ rc = rspamd_fuzzy_backend_sqlite_run_stmt(backend, FALSE,
+ RSPAMD_FUZZY_BACKEND_CHECK_SHINGLE,
+ shcmd->sgl.hashes[i], i);
+ if (rc == SQLITE_OK) {
+ shingle_values[i] = sqlite3_column_int64(
+ prepared_stmts[RSPAMD_FUZZY_BACKEND_CHECK_SHINGLE].stmt,
+ 0);
+ }
+ else {
+ shingle_values[i] = -1;
+ }
+ msg_debug_fuzzy_backend("looking for shingle %L -> %L: %d", i,
+ shcmd->sgl.hashes[i], rc);
+ }
+
+ rspamd_fuzzy_backend_sqlite_cleanup_stmt(backend,
+ RSPAMD_FUZZY_BACKEND_CHECK_SHINGLE);
+
+ qsort(shingle_values, RSPAMD_SHINGLE_SIZE, sizeof(gint64),
+ rspamd_fuzzy_backend_sqlite_int64_cmp);
+ sel_id = -1;
+ cur_id = -1;
+ cur_cnt = 0;
+ max_cnt = 0;
+
+ for (i = 0; i < RSPAMD_SHINGLE_SIZE; i++) {
+ if (shingle_values[i] == -1) {
+ continue;
+ }
+
+ /* We have some value here, so we need to check it */
+ if (shingle_values[i] == cur_id) {
+ cur_cnt++;
+ }
+ else {
+ cur_id = shingle_values[i];
+ if (cur_cnt >= max_cnt) {
+ max_cnt = cur_cnt;
+ sel_id = cur_id;
+ }
+ cur_cnt = 0;
+ }
+ }
+
+ if (cur_cnt > max_cnt) {
+ max_cnt = cur_cnt;
+ }
+
+ if (sel_id != -1) {
+ /* We have some id selected here */
+ rep.v1.prob = (float) max_cnt / (float) RSPAMD_SHINGLE_SIZE;
+
+ if (rep.v1.prob > 0.5) {
+ msg_debug_fuzzy_backend(
+ "found fuzzy hash with probability %.2f",
+ rep.v1.prob);
+ rc = rspamd_fuzzy_backend_sqlite_run_stmt(backend, FALSE,
+ RSPAMD_FUZZY_BACKEND_GET_DIGEST_BY_ID, sel_id);
+ if (rc == SQLITE_OK) {
+ timestamp = sqlite3_column_int64(
+ prepared_stmts[RSPAMD_FUZZY_BACKEND_GET_DIGEST_BY_ID].stmt,
+ 2);
+ if (time(NULL) - timestamp > expire) {
+ /* Expire element */
+ msg_debug_fuzzy_backend(
+ "requested hash has been expired");
+ rep.v1.prob = 0.0;
+ }
+ else {
+ rep.ts = timestamp;
+ memcpy(rep.digest, sqlite3_column_blob(prepared_stmts[RSPAMD_FUZZY_BACKEND_GET_DIGEST_BY_ID].stmt, 0), sizeof(rep.digest));
+ rep.v1.value = sqlite3_column_int64(
+ prepared_stmts[RSPAMD_FUZZY_BACKEND_GET_DIGEST_BY_ID].stmt,
+ 1);
+ rep.v1.flag = sqlite3_column_int(
+ prepared_stmts[RSPAMD_FUZZY_BACKEND_GET_DIGEST_BY_ID].stmt,
+ 3);
+ }
+ }
+ }
+ else {
+ /* Otherwise we assume that as error */
+ rep.v1.value = 0;
+ }
+
+ rspamd_fuzzy_backend_sqlite_cleanup_stmt(backend,
+ RSPAMD_FUZZY_BACKEND_GET_DIGEST_BY_ID);
+ }
+ }
+
+ rspamd_fuzzy_backend_sqlite_cleanup_stmt(backend, RSPAMD_FUZZY_BACKEND_CHECK);
+ rspamd_fuzzy_backend_sqlite_run_stmt(backend, TRUE,
+ RSPAMD_FUZZY_BACKEND_TRANSACTION_COMMIT);
+
+ return rep;
+}
+
+gboolean
+rspamd_fuzzy_backend_sqlite_prepare_update(struct rspamd_fuzzy_backend_sqlite *backend,
+ const gchar *source)
+{
+ gint rc;
+
+ if (backend == NULL) {
+ return FALSE;
+ }
+
+ rc = rspamd_fuzzy_backend_sqlite_run_stmt(backend, TRUE,
+ RSPAMD_FUZZY_BACKEND_TRANSACTION_START);
+
+ if (rc != SQLITE_OK) {
+ msg_warn_fuzzy_backend("cannot start transaction for updates: %s",
+ sqlite3_errmsg(backend->db));
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+gboolean
+rspamd_fuzzy_backend_sqlite_add(struct rspamd_fuzzy_backend_sqlite *backend,
+ const struct rspamd_fuzzy_cmd *cmd)
+{
+ int rc, i;
+ gint64 id, flag;
+ const struct rspamd_fuzzy_shingle_cmd *shcmd;
+
+ if (backend == NULL) {
+ return FALSE;
+ }
+
+ rc = rspamd_fuzzy_backend_sqlite_run_stmt(backend, FALSE,
+ RSPAMD_FUZZY_BACKEND_CHECK,
+ cmd->digest);
+
+ if (rc == SQLITE_OK) {
+ /* Check flag */
+ flag = sqlite3_column_int64(
+ prepared_stmts[RSPAMD_FUZZY_BACKEND_CHECK].stmt,
+ 2);
+ rspamd_fuzzy_backend_sqlite_cleanup_stmt(backend, RSPAMD_FUZZY_BACKEND_CHECK);
+
+ if (flag == cmd->flag) {
+ /* We need to increase weight */
+ rc = rspamd_fuzzy_backend_sqlite_run_stmt(backend, TRUE,
+ RSPAMD_FUZZY_BACKEND_UPDATE,
+ (gint64) cmd->value,
+ cmd->digest);
+ if (rc != SQLITE_OK) {
+ msg_warn_fuzzy_backend("cannot update hash to %d -> "
+ "%*xs: %s",
+ (gint) cmd->flag,
+ (gint) sizeof(cmd->digest), cmd->digest,
+ sqlite3_errmsg(backend->db));
+ }
+ }
+ else {
+ /* We need to relearn actually */
+
+ rc = rspamd_fuzzy_backend_sqlite_run_stmt(backend, TRUE,
+ RSPAMD_FUZZY_BACKEND_UPDATE_FLAG,
+ (gint64) cmd->value,
+ (gint64) cmd->flag,
+ cmd->digest);
+
+ if (rc != SQLITE_OK) {
+ msg_warn_fuzzy_backend("cannot update hash to %d -> "
+ "%*xs: %s",
+ (gint) cmd->flag,
+ (gint) sizeof(cmd->digest), cmd->digest,
+ sqlite3_errmsg(backend->db));
+ }
+ }
+ }
+ else {
+ rspamd_fuzzy_backend_sqlite_cleanup_stmt(backend, RSPAMD_FUZZY_BACKEND_CHECK);
+ rc = rspamd_fuzzy_backend_sqlite_run_stmt(backend, FALSE,
+ RSPAMD_FUZZY_BACKEND_INSERT,
+ (gint) cmd->flag,
+ cmd->digest,
+ (gint64) cmd->value);
+
+ if (rc == SQLITE_OK) {
+ if (cmd->shingles_count > 0) {
+ id = sqlite3_last_insert_rowid(backend->db);
+ shcmd = (const struct rspamd_fuzzy_shingle_cmd *) cmd;
+
+ for (i = 0; i < RSPAMD_SHINGLE_SIZE; i++) {
+ rc = rspamd_fuzzy_backend_sqlite_run_stmt(backend, TRUE,
+ RSPAMD_FUZZY_BACKEND_INSERT_SHINGLE,
+ shcmd->sgl.hashes[i], (gint64) i, id);
+ msg_debug_fuzzy_backend("add shingle %d -> %L: %L",
+ i,
+ shcmd->sgl.hashes[i],
+ id);
+
+ if (rc != SQLITE_OK) {
+ msg_warn_fuzzy_backend("cannot add shingle %d -> "
+ "%L: %L: %s",
+ i,
+ shcmd->sgl.hashes[i],
+ id, sqlite3_errmsg(backend->db));
+ }
+ }
+ }
+ }
+ else {
+ msg_warn_fuzzy_backend("cannot add hash to %d -> "
+ "%*xs: %s",
+ (gint) cmd->flag,
+ (gint) sizeof(cmd->digest), cmd->digest,
+ sqlite3_errmsg(backend->db));
+ }
+
+ rspamd_fuzzy_backend_sqlite_cleanup_stmt(backend,
+ RSPAMD_FUZZY_BACKEND_INSERT);
+ }
+
+ return (rc == SQLITE_OK);
+}
+
+gboolean
+rspamd_fuzzy_backend_sqlite_finish_update(struct rspamd_fuzzy_backend_sqlite *backend,
+ const gchar *source, gboolean version_bump)
+{
+ gint rc = SQLITE_OK, wal_frames, wal_checkpointed, ver;
+
+ /* Get and update version */
+ if (version_bump) {
+ ver = rspamd_fuzzy_backend_sqlite_version(backend, source);
+ ++ver;
+
+ rc = rspamd_fuzzy_backend_sqlite_run_stmt(backend, TRUE,
+ RSPAMD_FUZZY_BACKEND_SET_VERSION,
+ (gint64) ver, (gint64) time(NULL), source);
+ }
+
+ if (rc == SQLITE_OK) {
+ rc = rspamd_fuzzy_backend_sqlite_run_stmt(backend, TRUE,
+ RSPAMD_FUZZY_BACKEND_TRANSACTION_COMMIT);
+
+ if (rc != SQLITE_OK) {
+ msg_warn_fuzzy_backend("cannot commit updates: %s",
+ sqlite3_errmsg(backend->db));
+ rspamd_fuzzy_backend_sqlite_run_stmt(backend, TRUE,
+ RSPAMD_FUZZY_BACKEND_TRANSACTION_ROLLBACK);
+ return FALSE;
+ }
+ else {
+ if (!rspamd_sqlite3_sync(backend->db, &wal_frames, &wal_checkpointed)) {
+ msg_warn_fuzzy_backend("cannot commit checkpoint: %s",
+ sqlite3_errmsg(backend->db));
+ }
+ else if (wal_checkpointed > 0) {
+ msg_info_fuzzy_backend("total number of frames in the wal file: "
+ "%d, checkpointed: %d",
+ wal_frames, wal_checkpointed);
+ }
+ }
+ }
+ else {
+ msg_warn_fuzzy_backend("cannot update version for %s: %s", source,
+ sqlite3_errmsg(backend->db));
+ rspamd_fuzzy_backend_sqlite_run_stmt(backend, TRUE,
+ RSPAMD_FUZZY_BACKEND_TRANSACTION_ROLLBACK);
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+gboolean
+rspamd_fuzzy_backend_sqlite_del(struct rspamd_fuzzy_backend_sqlite *backend,
+ const struct rspamd_fuzzy_cmd *cmd)
+{
+ int rc = -1;
+
+ if (backend == NULL) {
+ return FALSE;
+ }
+
+ rc = rspamd_fuzzy_backend_sqlite_run_stmt(backend, FALSE,
+ RSPAMD_FUZZY_BACKEND_CHECK,
+ cmd->digest);
+
+ if (rc == SQLITE_OK) {
+ rspamd_fuzzy_backend_sqlite_cleanup_stmt(backend, RSPAMD_FUZZY_BACKEND_CHECK);
+
+ rc = rspamd_fuzzy_backend_sqlite_run_stmt(backend, TRUE,
+ RSPAMD_FUZZY_BACKEND_DELETE,
+ cmd->digest);
+ if (rc != SQLITE_OK) {
+ msg_warn_fuzzy_backend("cannot update hash to %d -> "
+ "%*xs: %s",
+ (gint) cmd->flag,
+ (gint) sizeof(cmd->digest), cmd->digest,
+ sqlite3_errmsg(backend->db));
+ }
+ }
+ else {
+ /* Hash is missing */
+ rspamd_fuzzy_backend_sqlite_cleanup_stmt(backend, RSPAMD_FUZZY_BACKEND_CHECK);
+ }
+
+ return (rc == SQLITE_OK);
+}
+
+gboolean
+rspamd_fuzzy_backend_sqlite_sync(struct rspamd_fuzzy_backend_sqlite *backend,
+ gint64 expire,
+ gboolean clean_orphaned)
+{
+ struct orphaned_shingle_elt {
+ gint64 value;
+ gint64 number;
+ };
+
+ /* Do not do more than 5k ops per step */
+ const guint64 max_changes = 5000;
+ gboolean ret = FALSE;
+ gint64 expire_lim, expired;
+ gint rc, i, orphaned_cnt = 0;
+ GError *err = NULL;
+ static const gchar orphaned_shingles[] = "SELECT shingles.value,shingles.number "
+ "FROM shingles "
+ "LEFT JOIN digests ON "
+ "shingles.digest_id=digests.id WHERE "
+ "digests.id IS NULL;";
+ sqlite3_stmt *stmt;
+ GArray *orphaned;
+ struct orphaned_shingle_elt orphaned_elt, *pelt;
+
+
+ if (backend == NULL) {
+ return FALSE;
+ }
+
+ /* Perform expire */
+ if (expire > 0) {
+ expire_lim = time(NULL) - expire;
+
+ if (expire_lim > 0) {
+ ret = rspamd_fuzzy_backend_sqlite_run_stmt(backend, TRUE,
+ RSPAMD_FUZZY_BACKEND_TRANSACTION_START);
+
+ if (ret == SQLITE_OK) {
+
+ rc = rspamd_fuzzy_backend_sqlite_run_stmt(backend, FALSE,
+ RSPAMD_FUZZY_BACKEND_EXPIRE, expire_lim, max_changes);
+
+ if (rc == SQLITE_OK) {
+ expired = sqlite3_changes(backend->db);
+
+ if (expired > 0) {
+ backend->expired += expired;
+ msg_info_fuzzy_backend("expired %L hashes", expired);
+ }
+ }
+ else {
+ msg_warn_fuzzy_backend(
+ "cannot execute expired statement: %s",
+ sqlite3_errmsg(backend->db));
+ }
+
+ rspamd_fuzzy_backend_sqlite_cleanup_stmt(backend,
+ RSPAMD_FUZZY_BACKEND_EXPIRE);
+
+ ret = rspamd_fuzzy_backend_sqlite_run_stmt(backend, TRUE,
+ RSPAMD_FUZZY_BACKEND_TRANSACTION_COMMIT);
+
+ if (ret != SQLITE_OK) {
+ rspamd_fuzzy_backend_sqlite_run_stmt(backend, TRUE,
+ RSPAMD_FUZZY_BACKEND_TRANSACTION_ROLLBACK);
+ }
+ }
+ if (ret != SQLITE_OK) {
+ msg_warn_fuzzy_backend("cannot expire db: %s",
+ sqlite3_errmsg(backend->db));
+ }
+ }
+ }
+
+ /* Cleanup database */
+ if (clean_orphaned) {
+ ret = rspamd_fuzzy_backend_sqlite_run_stmt(backend, TRUE,
+ RSPAMD_FUZZY_BACKEND_TRANSACTION_START);
+
+ if (ret == SQLITE_OK) {
+ if ((rc = sqlite3_prepare_v2(backend->db,
+ orphaned_shingles,
+ -1,
+ &stmt,
+ NULL)) != SQLITE_OK) {
+ msg_warn_fuzzy_backend("cannot cleanup shingles: %s",
+ sqlite3_errmsg(backend->db));
+ }
+ else {
+ orphaned = g_array_new(FALSE,
+ FALSE,
+ sizeof(struct orphaned_shingle_elt));
+
+ while (sqlite3_step(stmt) == SQLITE_ROW) {
+ orphaned_elt.value = sqlite3_column_int64(stmt, 0);
+ orphaned_elt.number = sqlite3_column_int64(stmt, 1);
+ g_array_append_val(orphaned, orphaned_elt);
+
+ if (orphaned->len > max_changes) {
+ break;
+ }
+ }
+
+ sqlite3_finalize(stmt);
+ orphaned_cnt = orphaned->len;
+
+ if (orphaned_cnt > 0) {
+ msg_info_fuzzy_backend(
+ "going to delete %ud orphaned shingles",
+ orphaned_cnt);
+ /* Need to delete orphaned elements */
+ for (i = 0; i < (gint) orphaned_cnt; i++) {
+ pelt = &g_array_index(orphaned,
+ struct orphaned_shingle_elt,
+ i);
+ rspamd_fuzzy_backend_sqlite_run_stmt(backend, TRUE,
+ RSPAMD_FUZZY_BACKEND_DELETE_ORPHANED,
+ pelt->value, pelt->number);
+ }
+ }
+
+
+ g_array_free(orphaned, TRUE);
+ }
+
+ ret = rspamd_fuzzy_backend_sqlite_run_stmt(backend, TRUE,
+ RSPAMD_FUZZY_BACKEND_TRANSACTION_COMMIT);
+
+ if (ret == SQLITE_OK) {
+ msg_info_fuzzy_backend(
+ "deleted %ud orphaned shingles",
+ orphaned_cnt);
+ }
+ else {
+ msg_warn_fuzzy_backend(
+ "cannot synchronize fuzzy backend: %e",
+ err);
+ rspamd_fuzzy_backend_sqlite_run_stmt(backend, TRUE,
+ RSPAMD_FUZZY_BACKEND_TRANSACTION_ROLLBACK);
+ }
+ }
+ }
+
+ return ret;
+}
+
+
+void rspamd_fuzzy_backend_sqlite_close(struct rspamd_fuzzy_backend_sqlite *backend)
+{
+ if (backend != NULL) {
+ if (backend->db != NULL) {
+ rspamd_fuzzy_backend_sqlite_close_stmts(backend);
+ sqlite3_close(backend->db);
+ }
+
+ if (backend->path != NULL) {
+ g_free(backend->path);
+ }
+
+ if (backend->pool) {
+ rspamd_mempool_delete(backend->pool);
+ }
+
+ g_free(backend);
+ }
+}
+
+
+gsize rspamd_fuzzy_backend_sqlite_count(struct rspamd_fuzzy_backend_sqlite *backend)
+{
+ if (backend) {
+ if (rspamd_fuzzy_backend_sqlite_run_stmt(backend, FALSE,
+ RSPAMD_FUZZY_BACKEND_COUNT) == SQLITE_OK) {
+ backend->count = sqlite3_column_int64(
+ prepared_stmts[RSPAMD_FUZZY_BACKEND_COUNT].stmt, 0);
+ }
+
+ rspamd_fuzzy_backend_sqlite_cleanup_stmt(backend, RSPAMD_FUZZY_BACKEND_COUNT);
+
+ return backend->count;
+ }
+
+ return 0;
+}
+
+gint rspamd_fuzzy_backend_sqlite_version(struct rspamd_fuzzy_backend_sqlite *backend,
+ const gchar *source)
+{
+ gint ret = 0;
+
+ if (backend) {
+ if (rspamd_fuzzy_backend_sqlite_run_stmt(backend, FALSE,
+ RSPAMD_FUZZY_BACKEND_VERSION, source) == SQLITE_OK) {
+ ret = sqlite3_column_int64(
+ prepared_stmts[RSPAMD_FUZZY_BACKEND_VERSION].stmt, 0);
+ }
+
+ rspamd_fuzzy_backend_sqlite_cleanup_stmt(backend, RSPAMD_FUZZY_BACKEND_VERSION);
+ }
+
+ return ret;
+}
+
+gsize rspamd_fuzzy_backend_sqlite_expired(struct rspamd_fuzzy_backend_sqlite *backend)
+{
+ return backend != NULL ? backend->expired : 0;
+}
+
+const gchar *
+rspamd_fuzzy_sqlite_backend_id(struct rspamd_fuzzy_backend_sqlite *backend)
+{
+ return backend != NULL ? backend->id : 0;
+}
diff --git a/src/libserver/fuzzy_backend/fuzzy_backend_sqlite.h b/src/libserver/fuzzy_backend/fuzzy_backend_sqlite.h
new file mode 100644
index 0000000..766f7c9
--- /dev/null
+++ b/src/libserver/fuzzy_backend/fuzzy_backend_sqlite.h
@@ -0,0 +1,107 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef FUZZY_BACKEND_H_
+#define FUZZY_BACKEND_H_
+
+#include "config.h"
+#include "fuzzy_wire.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct rspamd_fuzzy_backend_sqlite;
+
+/**
+ * Open fuzzy backend
+ * @param path file to open (legacy file will be converted automatically)
+ * @param err error pointer
+ * @return backend structure or NULL
+ */
+struct rspamd_fuzzy_backend_sqlite *rspamd_fuzzy_backend_sqlite_open(const gchar *path,
+ gboolean vacuum,
+ GError **err);
+
+/**
+ * Check specified fuzzy in the backend
+ * @param backend
+ * @param cmd
+ * @return reply with probability and weight
+ */
+struct rspamd_fuzzy_reply rspamd_fuzzy_backend_sqlite_check(
+ struct rspamd_fuzzy_backend_sqlite *backend,
+ const struct rspamd_fuzzy_cmd *cmd,
+ gint64 expire);
+
+/**
+ * Prepare storage for updates (by starting transaction)
+ */
+gboolean rspamd_fuzzy_backend_sqlite_prepare_update(struct rspamd_fuzzy_backend_sqlite *backend,
+ const gchar *source);
+
+/**
+ * Add digest to the database
+ * @param backend
+ * @param cmd
+ * @return
+ */
+gboolean rspamd_fuzzy_backend_sqlite_add(struct rspamd_fuzzy_backend_sqlite *backend,
+ const struct rspamd_fuzzy_cmd *cmd);
+
+/**
+ * Delete digest from the database
+ * @param backend
+ * @param cmd
+ * @return
+ */
+gboolean rspamd_fuzzy_backend_sqlite_del(
+ struct rspamd_fuzzy_backend_sqlite *backend,
+ const struct rspamd_fuzzy_cmd *cmd);
+
+/**
+ * Commit updates to storage
+ */
+gboolean rspamd_fuzzy_backend_sqlite_finish_update(struct rspamd_fuzzy_backend_sqlite *backend,
+ const gchar *source, gboolean version_bump);
+
+/**
+ * Sync storage
+ * @param backend
+ * @return
+ */
+gboolean rspamd_fuzzy_backend_sqlite_sync(struct rspamd_fuzzy_backend_sqlite *backend,
+ gint64 expire,
+ gboolean clean_orphaned);
+
+/**
+ * Close storage
+ * @param backend
+ */
+void rspamd_fuzzy_backend_sqlite_close(struct rspamd_fuzzy_backend_sqlite *backend);
+
+gsize rspamd_fuzzy_backend_sqlite_count(struct rspamd_fuzzy_backend_sqlite *backend);
+
+gint rspamd_fuzzy_backend_sqlite_version(struct rspamd_fuzzy_backend_sqlite *backend, const gchar *source);
+
+gsize rspamd_fuzzy_backend_sqlite_expired(struct rspamd_fuzzy_backend_sqlite *backend);
+
+const gchar *rspamd_fuzzy_sqlite_backend_id(struct rspamd_fuzzy_backend_sqlite *backend);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* FUZZY_BACKEND_H_ */
diff --git a/src/libserver/fuzzy_wire.h b/src/libserver/fuzzy_wire.h
new file mode 100644
index 0000000..c2f93b8
--- /dev/null
+++ b/src/libserver/fuzzy_wire.h
@@ -0,0 +1,154 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef RSPAMD_FUZZY_STORAGE_H
+#define RSPAMD_FUZZY_STORAGE_H
+
+#include "config.h"
+#include "rspamd.h"
+#include "shingles.h"
+#include "cryptobox.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define RSPAMD_FUZZY_VERSION 4
+#define RSPAMD_FUZZY_KEYLEN 8
+
+#define RSPAMD_FUZZY_FLAG_WEAK (1u << 7u)
+/* Use lower 4 bits for the version */
+#define RSPAMD_FUZZY_VERSION_MASK 0x0fu
+/* Commands for fuzzy storage */
+#define FUZZY_CHECK 0
+#define FUZZY_WRITE 1
+#define FUZZY_DEL 2
+#define FUZZY_STAT 3
+#define FUZZY_PING 4
+#define FUZZY_CLIENT_MAX 4
+/* Internal commands */
+#define FUZZY_REFRESH 100 /* Update expire */
+#define FUZZY_DUP 101 /* Skip duplicate in update queue */
+
+/**
+ * The epoch of the fuzzy client
+ */
+enum rspamd_fuzzy_epoch {
+ RSPAMD_FUZZY_EPOCH10, /**< 1.0+ encryption */
+ RSPAMD_FUZZY_EPOCH11, /**< 1.7+ extended reply */
+ RSPAMD_FUZZY_EPOCH_MAX
+};
+
+RSPAMD_PACKED(rspamd_fuzzy_cmd)
+{
+ guint8 version;
+ guint8 cmd;
+ guint8 shingles_count;
+ guint8 flag;
+ gint32 value;
+ guint32 tag;
+ gchar digest[rspamd_cryptobox_HASHBYTES];
+};
+
+RSPAMD_PACKED(rspamd_fuzzy_shingle_cmd)
+{
+ struct rspamd_fuzzy_cmd basic;
+ struct rspamd_shingle sgl;
+};
+
+RSPAMD_PACKED(rspamd_fuzzy_reply_v1)
+{
+ gint32 value;
+ guint32 flag;
+ guint32 tag;
+ float prob;
+};
+
+RSPAMD_PACKED(rspamd_fuzzy_reply)
+{
+ struct rspamd_fuzzy_reply_v1 v1;
+ gchar digest[rspamd_cryptobox_HASHBYTES];
+ guint32 ts;
+ guchar reserved[12];
+};
+
+RSPAMD_PACKED(rspamd_fuzzy_encrypted_req_hdr)
+{
+ guchar magic[4];
+ guchar key_id[RSPAMD_FUZZY_KEYLEN];
+ guchar pubkey[32];
+ guchar nonce[rspamd_cryptobox_MAX_NONCEBYTES];
+ guchar mac[rspamd_cryptobox_MAX_MACBYTES];
+};
+
+RSPAMD_PACKED(rspamd_fuzzy_encrypted_cmd)
+{
+ struct rspamd_fuzzy_encrypted_req_hdr hdr;
+ struct rspamd_fuzzy_cmd cmd;
+};
+
+RSPAMD_PACKED(rspamd_fuzzy_encrypted_shingle_cmd)
+{
+ struct rspamd_fuzzy_encrypted_req_hdr hdr;
+ struct rspamd_fuzzy_shingle_cmd cmd;
+};
+
+RSPAMD_PACKED(rspamd_fuzzy_encrypted_rep_hdr)
+{
+ guchar nonce[rspamd_cryptobox_MAX_NONCEBYTES];
+ guchar mac[rspamd_cryptobox_MAX_MACBYTES];
+};
+
+RSPAMD_PACKED(rspamd_fuzzy_encrypted_reply)
+{
+ struct rspamd_fuzzy_encrypted_rep_hdr hdr;
+ struct rspamd_fuzzy_reply rep;
+};
+
+static const guchar fuzzy_encrypted_magic[4] = {'r', 's', 'f', 'e'};
+
+enum rspamd_fuzzy_extension_type {
+ RSPAMD_FUZZY_EXT_SOURCE_DOMAIN = 'd',
+ RSPAMD_FUZZY_EXT_SOURCE_IP4 = '4',
+ RSPAMD_FUZZY_EXT_SOURCE_IP6 = '6',
+};
+
+struct rspamd_fuzzy_cmd_extension {
+ enum rspamd_fuzzy_extension_type ext;
+ guint length;
+ struct rspamd_fuzzy_cmd_extension *next;
+ guchar *payload;
+};
+
+struct rspamd_fuzzy_stat_entry {
+ const gchar *name;
+ guint64 fuzzy_cnt;
+};
+
+RSPAMD_PACKED(fuzzy_peer_cmd)
+{
+ gint32 is_shingle;
+ union {
+ struct rspamd_fuzzy_cmd normal;
+ struct rspamd_fuzzy_shingle_cmd shingle;
+ } cmd;
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx
new file mode 100644
index 0000000..5861d45
--- /dev/null
+++ b/src/libserver/html/html.cxx
@@ -0,0 +1,2393 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "config.h"
+#include "util.h"
+#include "message.h"
+#include "html.h"
+#include "html_tags.h"
+#include "html_block.hxx"
+#include "html.hxx"
+#include "libserver/css/css_value.hxx"
+#include "libserver/css/css.hxx"
+#include "libserver/task.h"
+#include "libserver/cfg_file.h"
+
+#include "url.h"
+#include "contrib/libucl/khash.h"
+#include "libmime/images.h"
+#include "libutil/cxx/utf8_util.h"
+
+#include "html_tag_defs.hxx"
+#include "html_entities.hxx"
+#include "html_tag.hxx"
+#include "html_url.hxx"
+
+#include <frozen/unordered_map.h>
+#include <frozen/string.h>
+#include <fmt/core.h>
+
+#include <unicode/uversion.h>
+
+namespace rspamd::html {
+
+static const guint max_tags = 8192; /* Ignore tags if this maximum is reached */
+
+static const html_tags_storage html_tags_defs;
+
+auto html_components_map = frozen::make_unordered_map<frozen::string, html_component_type>(
+ {
+ {"name", html_component_type::RSPAMD_HTML_COMPONENT_NAME},
+ {"href", html_component_type::RSPAMD_HTML_COMPONENT_HREF},
+ {"src", html_component_type::RSPAMD_HTML_COMPONENT_HREF},
+ {"action", html_component_type::RSPAMD_HTML_COMPONENT_HREF},
+ {"color", html_component_type::RSPAMD_HTML_COMPONENT_COLOR},
+ {"bgcolor", html_component_type::RSPAMD_HTML_COMPONENT_BGCOLOR},
+ {"style", html_component_type::RSPAMD_HTML_COMPONENT_STYLE},
+ {"class", html_component_type::RSPAMD_HTML_COMPONENT_CLASS},
+ {"width", html_component_type::RSPAMD_HTML_COMPONENT_WIDTH},
+ {"height", html_component_type::RSPAMD_HTML_COMPONENT_HEIGHT},
+ {"size", html_component_type::RSPAMD_HTML_COMPONENT_SIZE},
+ {"rel", html_component_type::RSPAMD_HTML_COMPONENT_REL},
+ {"alt", html_component_type::RSPAMD_HTML_COMPONENT_ALT},
+ {"id", html_component_type::RSPAMD_HTML_COMPONENT_ID},
+ {"hidden", html_component_type::RSPAMD_HTML_COMPONENT_HIDDEN},
+ });
+
+#define msg_debug_html(...) rspamd_conditional_debug_fast(NULL, NULL, \
+ rspamd_html_log_id, "html", pool->tag.uid, \
+ __FUNCTION__, \
+ __VA_ARGS__)
+
+INIT_LOG_MODULE(html)
+
+/*
+ * This function is expected to be called on a closing tag to fill up all tags
+ * and return the current parent (meaning unclosed) tag
+ */
+static auto
+html_check_balance(struct html_content *hc,
+ struct html_tag *tag,
+ goffset tag_start_offset,
+ goffset tag_end_offset) -> html_tag *
+{
+ /* As agreed, the closing tag has the last opening at the parent ptr */
+ auto *opening_tag = tag->parent;
+
+ auto calculate_content_length = [tag_start_offset, tag_end_offset](html_tag *t) {
+ auto opening_content_offset = t->content_offset;
+
+ if (t->flags & (CM_EMPTY)) {
+ /* Attach closing tag just at the opening tag */
+ t->closing.start = t->tag_start;
+ t->closing.end = t->content_offset;
+ }
+ else {
+
+ if (opening_content_offset <= tag_start_offset) {
+ t->closing.start = tag_start_offset;
+ t->closing.end = tag_end_offset;
+ }
+ else {
+
+ t->closing.start = t->content_offset;
+ t->closing.end = tag_end_offset;
+ }
+ }
+ };
+
+ auto balance_tag = [&]() -> html_tag * {
+ auto it = tag->parent;
+ auto found_pair = false;
+
+ for (; it != nullptr; it = it->parent) {
+ if (it->id == tag->id && !(it->flags & FL_CLOSED)) {
+ found_pair = true;
+ break;
+ }
+ }
+
+ /*
+ * If we have found a closing pair, then we need to close all tags and
+ * return the top-most tag
+ */
+ if (found_pair) {
+ for (it = tag->parent; it != nullptr; it = it->parent) {
+ it->flags |= FL_CLOSED;
+ /* Insert a virtual closing tag for all tags that are not closed */
+ calculate_content_length(it);
+ if (it->id == tag->id && !(it->flags & FL_CLOSED)) {
+ break;
+ }
+ }
+
+ return it;
+ }
+ else {
+ /*
+ * We have not found a pair, so this closing tag is bogus and should
+ * be ignored completely.
+ * Unfortunately, it also means that we need to insert another tag,
+ * as the current closing tag is unusable for that purposes.
+ *
+ * We assume that callee will recognise that and reconstruct the
+ * tag at the tag_end_closing state, so we return nullptr...
+ */
+ }
+
+ /* Tag must be ignored and reconstructed */
+ return nullptr;
+ };
+
+ if (opening_tag) {
+
+ if (opening_tag->id == tag->id) {
+ opening_tag->flags |= FL_CLOSED;
+
+ calculate_content_length(opening_tag);
+ /* All good */
+ return opening_tag->parent;
+ }
+ else {
+ return balance_tag();
+ }
+ }
+ else {
+ /*
+ * We have no opening tag
+ * There are two possibilities:
+ *
+ * 1) We have some block tag in hc->all_tags;
+ * 2) We have no tags
+ */
+
+ if (hc->all_tags.empty()) {
+ hc->all_tags.push_back(std::make_unique<html_tag>());
+ auto *vtag = hc->all_tags.back().get();
+ vtag->id = Tag_HTML;
+ vtag->flags = FL_VIRTUAL;
+ vtag->tag_start = 0;
+ vtag->content_offset = 0;
+ calculate_content_length(vtag);
+
+ if (!hc->root_tag) {
+ hc->root_tag = vtag;
+ }
+ else {
+ vtag->parent = hc->root_tag;
+ }
+
+ tag->parent = vtag;
+
+ /* Recursively call with a virtual <html> tag inserted */
+ return html_check_balance(hc, tag, tag_start_offset, tag_end_offset);
+ }
+ }
+
+ return nullptr;
+}
+
+auto html_component_from_string(const std::string_view &st) -> std::optional<html_component_type>
+{
+ auto known_component_it = html_components_map.find(st);
+
+ if (known_component_it != html_components_map.end()) {
+ return known_component_it->second;
+ }
+ else {
+ return std::nullopt;
+ }
+}
+
+enum tag_parser_state {
+ parse_start = 0,
+ parse_name,
+ parse_attr_name,
+ parse_equal,
+ parse_start_dquote,
+ parse_dqvalue,
+ parse_end_dquote,
+ parse_start_squote,
+ parse_sqvalue,
+ parse_end_squote,
+ parse_value,
+ spaces_before_eq,
+ spaces_after_eq,
+ spaces_after_param,
+ ignore_bad_tag,
+ tag_end,
+ slash_after_value,
+ slash_in_unquoted_value,
+};
+struct tag_content_parser_state {
+ tag_parser_state cur_state = parse_start;
+ std::string buf;
+ std::optional<html_component_type> cur_component;
+
+ void reset()
+ {
+ cur_state = parse_start;
+ buf.clear();
+ cur_component = std::nullopt;
+ }
+};
+
+static inline void
+html_parse_tag_content(rspamd_mempool_t *pool,
+ struct html_content *hc,
+ struct html_tag *tag,
+ const char *in,
+ struct tag_content_parser_state &parser_env)
+{
+ auto state = parser_env.cur_state;
+
+ /*
+ * Stores tag component if it doesn't exist, performing copy of the
+ * value + decoding of the entities
+ * Parser env is set to clear the current html attribute fields (saved_p and
+ * cur_component)
+ */
+ auto store_component_value = [&]() -> void {
+ if (parser_env.cur_component) {
+
+ if (parser_env.buf.empty()) {
+ tag->components.emplace_back(parser_env.cur_component.value(),
+ std::string_view{});
+ }
+ else {
+ /* We need to copy buf to a persistent storage */
+ auto *s = rspamd_mempool_alloc_buffer(pool, parser_env.buf.size());
+
+ if (parser_env.cur_component.value() == html_component_type::RSPAMD_HTML_COMPONENT_ID ||
+ parser_env.cur_component.value() == html_component_type::RSPAMD_HTML_COMPONENT_CLASS) {
+ /* Lowercase */
+ rspamd_str_copy_lc(parser_env.buf.data(), s, parser_env.buf.size());
+ }
+ else {
+ memcpy(s, parser_env.buf.data(), parser_env.buf.size());
+ }
+
+ auto sz = rspamd_html_decode_entitles_inplace(s, parser_env.buf.size());
+ tag->components.emplace_back(parser_env.cur_component.value(),
+ std::string_view{s, sz});
+ }
+ }
+
+ parser_env.buf.clear();
+ parser_env.cur_component = std::nullopt;
+ };
+
+ auto store_component_name = [&]() -> bool {
+ decode_html_entitles_inplace(parser_env.buf);
+ auto known_component_it = html_components_map.find(std::string_view{parser_env.buf});
+ parser_env.buf.clear();
+
+ if (known_component_it != html_components_map.end()) {
+ parser_env.cur_component = known_component_it->second;
+
+ return true;
+ }
+ else {
+ parser_env.cur_component = std::nullopt;
+ }
+
+ return false;
+ };
+
+ auto store_value_character = [&](bool lc) -> void {
+ auto c = lc ? g_ascii_tolower(*in) : *in;
+
+ if (c == '\0') {
+ /* Replace with u0FFD */
+ parser_env.buf.append((const char *) u8"\uFFFD");
+ }
+ else {
+ parser_env.buf.push_back(c);
+ }
+ };
+
+ switch (state) {
+ case parse_start:
+ if (!g_ascii_isalpha(*in) && !g_ascii_isspace(*in)) {
+ hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
+ state = ignore_bad_tag;
+ tag->id = N_TAGS;
+ tag->flags |= FL_BROKEN;
+ }
+ else if (g_ascii_isalpha(*in)) {
+ state = parse_name;
+ store_value_character(true);
+ }
+ break;
+
+ case parse_name:
+ if ((g_ascii_isspace(*in) || *in == '>' || *in == '/')) {
+ if (*in == '/') {
+ tag->flags |= FL_CLOSED;
+ }
+
+ if (parser_env.buf.empty()) {
+ hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
+ tag->id = N_TAGS;
+ tag->flags |= FL_BROKEN;
+ state = ignore_bad_tag;
+ }
+ else {
+ decode_html_entitles_inplace(parser_env.buf);
+ const auto *tag_def = rspamd::html::html_tags_defs.by_name(parser_env.buf);
+
+ if (tag_def == nullptr) {
+ hc->flags |= RSPAMD_HTML_FLAG_UNKNOWN_ELEMENTS;
+ /* Assign -hash to match closing tag if needed */
+ auto nhash = static_cast<std::int32_t>(std::hash<std::string>{}(parser_env.buf));
+ /* Always negative */
+ tag->id = static_cast<tag_id_t>(nhash | G_MININT32);
+ }
+ else {
+ tag->id = tag_def->id;
+ tag->flags = tag_def->flags;
+ }
+
+ parser_env.buf.clear();
+
+ state = spaces_after_param;
+ }
+ }
+ else {
+ store_value_character(true);
+ }
+ break;
+
+ case parse_attr_name:
+ if (*in == '=') {
+ if (!parser_env.buf.empty()) {
+ store_component_name();
+ }
+ state = parse_equal;
+ }
+ else if (g_ascii_isspace(*in)) {
+ store_component_name();
+ state = spaces_before_eq;
+ }
+ else if (*in == '/') {
+ store_component_name();
+ store_component_value();
+ state = slash_after_value;
+ }
+ else if (*in == '>') {
+ store_component_name();
+ store_component_value();
+ state = tag_end;
+ }
+ else {
+ if (*in == '"' || *in == '\'' || *in == '<') {
+ /* Should never be in attribute names but ignored */
+ tag->flags |= FL_BROKEN;
+ }
+
+ store_value_character(true);
+ }
+
+ break;
+
+ case spaces_before_eq:
+ if (*in == '=') {
+ state = parse_equal;
+ }
+ else if (!g_ascii_isspace(*in)) {
+ /*
+ * HTML defines that crap could still be restored and
+ * calculated somehow... So we have to follow this stupid behaviour
+ */
+ /*
+ * TODO: estimate what insane things do email clients in each case
+ */
+ if (*in == '>') {
+ /*
+ * Attribute name followed by end of tag
+ * Should be okay (empty attribute). The rest is handled outside
+ * this automata.
+ */
+ store_component_value();
+ state = tag_end;
+ }
+ else if (*in == '"' || *in == '\'' || *in == '<') {
+ /* Attribute followed by quote... Missing '=' ? Dunno, need to test */
+ hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
+ tag->flags |= FL_BROKEN;
+ store_component_value();
+ store_value_character(true);
+ state = spaces_after_param;
+ }
+ else {
+ /* Empty attribute */
+ store_component_value();
+ store_value_character(true);
+ state = spaces_after_param;
+ }
+ }
+ break;
+
+ case spaces_after_eq:
+ if (*in == '"') {
+ state = parse_start_dquote;
+ }
+ else if (*in == '\'') {
+ state = parse_start_squote;
+ }
+ else if (!g_ascii_isspace(*in)) {
+ store_value_character(true);
+ state = parse_value;
+ }
+ break;
+
+ case parse_equal:
+ if (g_ascii_isspace(*in)) {
+ state = spaces_after_eq;
+ }
+ else if (*in == '"') {
+ state = parse_start_dquote;
+ }
+ else if (*in == '\'') {
+ state = parse_start_squote;
+ }
+ else {
+ store_value_character(true);
+ state = parse_value;
+ }
+ break;
+
+ case parse_start_dquote:
+ if (*in == '"') {
+ state = spaces_after_param;
+ }
+ else {
+ store_value_character(false);
+ state = parse_dqvalue;
+ }
+ break;
+
+ case parse_start_squote:
+ if (*in == '\'') {
+ state = spaces_after_param;
+ }
+ else {
+ store_value_character(false);
+ state = parse_sqvalue;
+ }
+ break;
+
+ case parse_dqvalue:
+ if (*in == '"') {
+ store_component_value();
+ state = parse_end_dquote;
+ }
+ else {
+ store_value_character(false);
+ }
+ break;
+
+ case parse_sqvalue:
+ if (*in == '\'') {
+ store_component_value();
+ state = parse_end_squote;
+ }
+ else {
+ store_value_character(false);
+ }
+
+ break;
+
+ case parse_value:
+ if (*in == '/') {
+ state = slash_in_unquoted_value;
+ }
+ else if (g_ascii_isspace(*in) || *in == '>' || *in == '"') {
+ store_component_value();
+ state = spaces_after_param;
+ }
+ else {
+ store_value_character(false);
+ }
+ break;
+
+ case parse_end_dquote:
+ case parse_end_squote:
+ if (g_ascii_isspace(*in)) {
+ state = spaces_after_param;
+ }
+ else if (*in == '/') {
+ store_component_value();
+ store_value_character(true);
+ state = slash_after_value;
+ }
+ else {
+ /* No space, proceed immediately to the attribute name */
+ state = parse_attr_name;
+ store_component_value();
+ store_value_character(true);
+ }
+ break;
+
+ case spaces_after_param:
+ if (!g_ascii_isspace(*in)) {
+ if (*in == '/') {
+ state = slash_after_value;
+ }
+ else if (*in == '=') {
+ /* Attributes cannot start with '=' */
+ tag->flags |= FL_BROKEN;
+ store_value_character(true);
+ state = parse_attr_name;
+ }
+ else {
+ store_value_character(true);
+ state = parse_attr_name;
+ }
+ }
+ break;
+ case slash_after_value:
+ if (*in == '>') {
+ tag->flags |= FL_CLOSED;
+ state = tag_end;
+ }
+ else if (!g_ascii_isspace(*in)) {
+ tag->flags |= FL_BROKEN;
+ state = parse_attr_name;
+ }
+ break;
+ case slash_in_unquoted_value:
+ if (*in == '>') {
+ /* That slash was in fact closing tag slash, woohoo */
+ tag->flags |= FL_CLOSED;
+ state = tag_end;
+ store_component_value();
+ }
+ else {
+ /* Welcome to the world of html, revert state and save missing / */
+ parser_env.buf.push_back('/');
+ store_value_character(false);
+ state = parse_value;
+ }
+ break;
+ case ignore_bad_tag:
+ case tag_end:
+ break;
+ }
+
+ parser_env.cur_state = state;
+}
+
+static inline auto
+html_is_absolute_url(std::string_view st) -> bool
+{
+ auto alnum_pos = std::find_if(std::begin(st), std::end(st),
+ [](auto c) { return !g_ascii_isalnum(c); });
+
+ if (alnum_pos != std::end(st) && alnum_pos != std::begin(st)) {
+ if (*alnum_pos == ':') {
+ if (st.substr(0, std::distance(std::begin(st), alnum_pos)) == "mailto") {
+ return true;
+ }
+
+ std::advance(alnum_pos, 1);
+ if (alnum_pos != std::end(st)) {
+ /* Include even malformed urls */
+ if (*alnum_pos == '/' || *alnum_pos == '\\') {
+ return true;
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
+static auto
+html_process_url_tag(rspamd_mempool_t *pool,
+ struct html_tag *tag,
+ struct html_content *hc) -> std::optional<struct rspamd_url *>
+{
+ auto found_href_maybe = tag->find_component(html_component_type::RSPAMD_HTML_COMPONENT_HREF);
+
+ if (found_href_maybe) {
+ /* Check base url */
+ auto &href_value = found_href_maybe.value();
+
+ if (hc && hc->base_url) {
+ /*
+ * Relative url cannot start from the following:
+ * schema://
+ * data:
+ * slash
+ */
+
+ if (!html_is_absolute_url(href_value)) {
+
+ if (href_value.size() >= sizeof("data:") &&
+ g_ascii_strncasecmp(href_value.data(), "data:", sizeof("data:") - 1) == 0) {
+ /* Image data url, never insert as url */
+ return std::nullopt;
+ }
+
+ /* Assume relative url */
+ auto need_slash = false;
+
+ auto orig_len = href_value.size();
+ auto len = orig_len + hc->base_url->urllen;
+
+ if (hc->base_url->datalen == 0) {
+ need_slash = true;
+ len++;
+ }
+
+ auto *buf = rspamd_mempool_alloc_buffer(pool, len + 1);
+ auto nlen = (std::size_t) rspamd_snprintf(buf, len + 1,
+ "%*s%s%*s",
+ (int) hc->base_url->urllen, hc->base_url->string,
+ need_slash ? "/" : "",
+ (gint) orig_len, href_value.data());
+ href_value = {buf, nlen};
+ }
+ else if (href_value.size() > 2 && href_value[0] == '/' && href_value[1] != '/') {
+ /* Relative to the hostname */
+ auto orig_len = href_value.size();
+ auto len = orig_len + hc->base_url->hostlen + hc->base_url->protocollen +
+ 3 /* for :// */;
+ auto *buf = rspamd_mempool_alloc_buffer(pool, len + 1);
+ auto nlen = (std::size_t) rspamd_snprintf(buf, len + 1, "%*s://%*s/%*s",
+ (int) hc->base_url->protocollen, hc->base_url->string,
+ (int) hc->base_url->hostlen, rspamd_url_host_unsafe(hc->base_url),
+ (gint) orig_len, href_value.data());
+ href_value = {buf, nlen};
+ }
+ }
+
+ auto url = html_process_url(pool, href_value).value_or(nullptr);
+
+ if (url) {
+ if (tag->id != Tag_A) {
+ /* Mark special tags special */
+ url->flags |= RSPAMD_URL_FLAG_SPECIAL;
+ }
+
+ if (std::holds_alternative<std::monostate>(tag->extra)) {
+ tag->extra = url;
+ }
+
+ return url;
+ }
+
+ return std::nullopt;
+ }
+
+ return std::nullopt;
+}
+
+struct rspamd_html_url_query_cbd {
+ rspamd_mempool_t *pool;
+ khash_t(rspamd_url_hash) * url_set;
+ struct rspamd_url *url;
+ GPtrArray *part_urls;
+};
+
+static gboolean
+html_url_query_callback(struct rspamd_url *url, gsize start_offset,
+ gsize end_offset, gpointer ud)
+{
+ struct rspamd_html_url_query_cbd *cbd =
+ (struct rspamd_html_url_query_cbd *) ud;
+ rspamd_mempool_t *pool;
+
+ pool = cbd->pool;
+
+ if (url->protocol == PROTOCOL_MAILTO) {
+ if (url->userlen == 0) {
+ return FALSE;
+ }
+ }
+
+ msg_debug_html("found url %s in query of url"
+ " %*s",
+ url->string,
+ cbd->url->querylen, rspamd_url_query_unsafe(cbd->url));
+
+ url->flags |= RSPAMD_URL_FLAG_QUERY;
+
+ if (rspamd_url_set_add_or_increase(cbd->url_set, url, false) && cbd->part_urls) {
+ g_ptr_array_add(cbd->part_urls, url);
+ }
+
+ return TRUE;
+}
+
+static void
+html_process_query_url(rspamd_mempool_t *pool, struct rspamd_url *url,
+ khash_t(rspamd_url_hash) * url_set,
+ GPtrArray *part_urls)
+{
+ if (url->querylen > 0) {
+ struct rspamd_html_url_query_cbd qcbd;
+
+ qcbd.pool = pool;
+ qcbd.url_set = url_set;
+ qcbd.url = url;
+ qcbd.part_urls = part_urls;
+
+ rspamd_url_find_multiple(pool,
+ rspamd_url_query_unsafe(url), url->querylen,
+ RSPAMD_URL_FIND_ALL, NULL,
+ html_url_query_callback, &qcbd);
+ }
+
+ if (part_urls) {
+ g_ptr_array_add(part_urls, url);
+ }
+}
+
+static auto
+html_process_data_image(rspamd_mempool_t *pool,
+ struct html_image *img,
+ std::string_view input) -> void
+{
+ /*
+ * Here, we do very basic processing of the data:
+ * detect if we have something like: ``
+ * We only parse base64 encoded data.
+ * We ignore content type so far
+ */
+ struct rspamd_image *parsed_image;
+ const gchar *semicolon_pos = input.data(),
+ *end = input.data() + input.size();
+
+ if ((semicolon_pos = (const gchar *) memchr(semicolon_pos, ';', end - semicolon_pos)) != NULL) {
+ if (end - semicolon_pos > sizeof("base64,")) {
+ if (memcmp(semicolon_pos + 1, "base64,", sizeof("base64,") - 1) == 0) {
+ const gchar *data_pos = semicolon_pos + sizeof("base64,");
+ gchar *decoded;
+ gsize encoded_len = end - data_pos, decoded_len;
+ rspamd_ftok_t inp;
+
+ decoded_len = (encoded_len / 4 * 3) + 12;
+ decoded = rspamd_mempool_alloc_buffer(pool, decoded_len);
+ rspamd_cryptobox_base64_decode(data_pos, encoded_len,
+ reinterpret_cast<guchar *>(decoded), &decoded_len);
+ inp.begin = decoded;
+ inp.len = decoded_len;
+
+ parsed_image = rspamd_maybe_process_image(pool, &inp);
+
+ if (parsed_image) {
+ msg_debug_html("detected %s image of size %ud x %ud in data url",
+ rspamd_image_type_str(parsed_image->type),
+ parsed_image->width, parsed_image->height);
+ img->embedded_image = parsed_image;
+ }
+ }
+ }
+ else {
+ /* Nothing useful */
+ return;
+ }
+ }
+}
+
+static void
+html_process_img_tag(rspamd_mempool_t *pool,
+ struct html_tag *tag,
+ struct html_content *hc,
+ khash_t(rspamd_url_hash) * url_set,
+ GPtrArray *part_urls)
+{
+ struct html_image *img;
+
+ img = rspamd_mempool_alloc0_type(pool, struct html_image);
+ img->tag = tag;
+
+ for (const auto &param: tag->components) {
+
+ if (param.type == html_component_type::RSPAMD_HTML_COMPONENT_HREF) {
+ /* Check base url */
+ const auto &href_value = param.value;
+
+ if (href_value.size() > 0) {
+ rspamd_ftok_t fstr;
+ fstr.begin = href_value.data();
+ fstr.len = href_value.size();
+ img->src = rspamd_mempool_ftokdup(pool, &fstr);
+
+ if (href_value.size() > sizeof("cid:") - 1 && memcmp(href_value.data(),
+ "cid:", sizeof("cid:") - 1) == 0) {
+ /* We have an embedded image */
+ img->src += sizeof("cid:") - 1;
+ img->flags |= RSPAMD_HTML_FLAG_IMAGE_EMBEDDED;
+ }
+ else {
+ if (href_value.size() > sizeof("data:") - 1 && memcmp(href_value.data(),
+ "data:", sizeof("data:") - 1) == 0) {
+ /* We have an embedded image in HTML tag */
+ img->flags |=
+ (RSPAMD_HTML_FLAG_IMAGE_EMBEDDED | RSPAMD_HTML_FLAG_IMAGE_DATA);
+ html_process_data_image(pool, img, href_value);
+ hc->flags |= RSPAMD_HTML_FLAG_HAS_DATA_URLS;
+ }
+ else {
+ img->flags |= RSPAMD_HTML_FLAG_IMAGE_EXTERNAL;
+ if (img->src) {
+
+ std::string_view cpy{href_value};
+ auto maybe_url = html_process_url(pool, cpy);
+
+ if (maybe_url) {
+ img->url = maybe_url.value();
+ struct rspamd_url *existing;
+
+ img->url->flags |= RSPAMD_URL_FLAG_IMAGE;
+ existing = rspamd_url_set_add_or_return(url_set,
+ img->url);
+
+ if (existing && existing != img->url) {
+ /*
+ * We have some other URL that could be
+ * found, e.g. from another part. However,
+ * we still want to set an image flag on it
+ */
+ existing->flags |= img->url->flags;
+ existing->count++;
+ }
+ else if (part_urls) {
+ /* New url */
+ g_ptr_array_add(part_urls, img->url);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+
+ if (param.type == html_component_type::RSPAMD_HTML_COMPONENT_HEIGHT) {
+ unsigned long val;
+
+ rspamd_strtoul(param.value.data(), param.value.size(), &val);
+ img->height = val;
+ }
+
+ if (param.type == html_component_type::RSPAMD_HTML_COMPONENT_WIDTH) {
+ unsigned long val;
+
+ rspamd_strtoul(param.value.data(), param.value.size(), &val);
+ img->width = val;
+ }
+
+ /* TODO: rework to css at some time */
+ if (param.type == html_component_type::RSPAMD_HTML_COMPONENT_STYLE) {
+ if (img->height == 0) {
+ auto style_st = param.value;
+ auto pos = rspamd_substring_search_caseless(style_st.data(),
+ style_st.size(),
+ "height", sizeof("height") - 1);
+ if (pos != -1) {
+ auto substr = style_st.substr(pos + sizeof("height") - 1);
+
+ for (auto i = 0; i < substr.size(); i++) {
+ auto t = substr[i];
+ if (g_ascii_isdigit(t)) {
+ unsigned long val;
+ rspamd_strtoul(substr.data(),
+ substr.size(), &val);
+ img->height = val;
+ break;
+ }
+ else if (!g_ascii_isspace(t) && t != '=' && t != ':') {
+ /* Fallback */
+ break;
+ }
+ }
+ }
+ }
+ if (img->width == 0) {
+ auto style_st = param.value;
+ auto pos = rspamd_substring_search_caseless(style_st.data(),
+ style_st.size(),
+ "width", sizeof("width") - 1);
+ if (pos != -1) {
+ auto substr = style_st.substr(pos + sizeof("width") - 1);
+
+ for (auto i = 0; i < substr.size(); i++) {
+ auto t = substr[i];
+ if (g_ascii_isdigit(t)) {
+ unsigned long val;
+ rspamd_strtoul(substr.data(),
+ substr.size(), &val);
+ img->width = val;
+ break;
+ }
+ else if (!g_ascii_isspace(t) && t != '=' && t != ':') {
+ /* Fallback */
+ break;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (img->embedded_image) {
+ if (img->height == 0) {
+ img->height = img->embedded_image->height;
+ }
+ if (img->width == 0) {
+ img->width = img->embedded_image->width;
+ }
+ }
+
+ hc->images.push_back(img);
+
+ if (std::holds_alternative<std::monostate>(tag->extra)) {
+ tag->extra = img;
+ }
+}
+
+static auto
+html_process_link_tag(rspamd_mempool_t *pool, struct html_tag *tag,
+ struct html_content *hc,
+ khash_t(rspamd_url_hash) * url_set,
+ GPtrArray *part_urls) -> void
+{
+ auto found_rel_maybe = tag->find_component(html_component_type::RSPAMD_HTML_COMPONENT_REL);
+
+ if (found_rel_maybe) {
+ if (found_rel_maybe.value() == "icon") {
+ html_process_img_tag(pool, tag, hc, url_set, part_urls);
+ }
+ }
+}
+
+static auto
+html_process_block_tag(rspamd_mempool_t *pool, struct html_tag *tag,
+ struct html_content *hc) -> void
+{
+ std::optional<css::css_value> maybe_fgcolor, maybe_bgcolor;
+ bool hidden = false;
+
+ for (const auto &param: tag->components) {
+ if (param.type == html_component_type::RSPAMD_HTML_COMPONENT_COLOR) {
+ maybe_fgcolor = css::css_value::maybe_color_from_string(param.value);
+ }
+
+ if (param.type == html_component_type::RSPAMD_HTML_COMPONENT_BGCOLOR) {
+ maybe_bgcolor = css::css_value::maybe_color_from_string(param.value);
+ }
+
+ if (param.type == html_component_type::RSPAMD_HTML_COMPONENT_STYLE) {
+ tag->block = rspamd::css::parse_css_declaration(pool, param.value);
+ }
+
+ if (param.type == html_component_type::RSPAMD_HTML_COMPONENT_HIDDEN) {
+ hidden = true;
+ }
+ }
+
+ if (!tag->block) {
+ tag->block = html_block::undefined_html_block_pool(pool);
+ }
+
+ if (hidden) {
+ tag->block->set_display(false);
+ }
+
+ if (maybe_fgcolor) {
+ tag->block->set_fgcolor(maybe_fgcolor->to_color().value());
+ }
+
+ if (maybe_bgcolor) {
+ tag->block->set_bgcolor(maybe_bgcolor->to_color().value());
+ }
+}
+
+static inline auto
+html_append_parsed(struct html_content *hc,
+ std::string_view data,
+ bool transparent,
+ std::size_t input_len,
+ std::string &dest) -> std::size_t
+{
+ auto cur_offset = dest.size();
+
+ if (dest.size() > input_len) {
+ /* Impossible case, refuse to append */
+ return 0;
+ }
+
+ if (data.size() > 0) {
+ /* Handle multiple spaces at the begin */
+
+ if (cur_offset > 0) {
+ auto last = dest.back();
+ if (!g_ascii_isspace(last) && g_ascii_isspace(data.front())) {
+ dest.append(" ");
+ data = {data.data() + 1, data.size() - 1};
+ cur_offset++;
+ }
+ }
+
+ if (data.find('\0') != std::string_view::npos) {
+ auto replace_zero_func = [](const auto &input, auto &output) {
+ const auto last = input.cend();
+ for (auto it = input.cbegin(); it != last; ++it) {
+ if (*it == '\0') {
+ output.append((const char *) u8"\uFFFD");
+ }
+ else {
+ output.push_back(*it);
+ }
+ }
+ };
+
+ dest.reserve(dest.size() + data.size() + sizeof(u8"\uFFFD"));
+ replace_zero_func(data, dest);
+ hc->flags |= RSPAMD_HTML_FLAG_HAS_ZEROS;
+ }
+ else {
+ dest.append(data);
+ }
+ }
+
+ auto nlen = decode_html_entitles_inplace(dest.data() + cur_offset,
+ dest.size() - cur_offset, true);
+
+ dest.resize(nlen + cur_offset);
+
+ if (transparent) {
+ /* Replace all visible characters with spaces */
+ auto start = std::next(dest.begin(), cur_offset);
+ std::replace_if(
+ start, std::end(dest), [](const auto c) {
+ return !g_ascii_isspace(c);
+ },
+ ' ');
+ }
+
+ return nlen;
+}
+
+static auto
+html_process_displayed_href_tag(rspamd_mempool_t *pool,
+ struct html_content *hc,
+ std::string_view data,
+ const struct html_tag *cur_tag,
+ GList **exceptions,
+ khash_t(rspamd_url_hash) * url_set,
+ goffset dest_offset) -> void
+{
+
+ if (std::holds_alternative<rspamd_url *>(cur_tag->extra)) {
+ auto *url = std::get<rspamd_url *>(cur_tag->extra);
+
+ html_check_displayed_url(pool,
+ exceptions, url_set,
+ data,
+ dest_offset,
+ url);
+ }
+}
+
+static auto
+html_append_tag_content(rspamd_mempool_t *pool,
+ const gchar *start, gsize len,
+ struct html_content *hc,
+ html_tag *tag,
+ GList **exceptions,
+ khash_t(rspamd_url_hash) * url_set) -> goffset
+{
+ auto is_visible = true, is_block = false, is_spaces = false, is_transparent = false;
+ goffset next_tag_offset = tag->closing.end,
+ initial_parsed_offset = hc->parsed.size(),
+ initial_invisible_offset = hc->invisible.size();
+
+ auto calculate_final_tag_offsets = [&]() -> void {
+ if (is_visible) {
+ tag->content_offset = initial_parsed_offset;
+ tag->closing.start = hc->parsed.size();
+ }
+ else {
+ tag->content_offset = initial_invisible_offset;
+ tag->closing.start = hc->invisible.size();
+ }
+ };
+
+ if (tag->closing.end == -1) {
+ if (tag->closing.start != -1) {
+ next_tag_offset = tag->closing.start;
+ tag->closing.end = tag->closing.start;
+ }
+ else {
+ next_tag_offset = tag->content_offset;
+ tag->closing.end = tag->content_offset;
+ }
+ }
+ if (tag->closing.start == -1) {
+ tag->closing.start = tag->closing.end;
+ }
+
+ auto append_margin = [&](char c) -> void {
+ /* We do care about visible margins only */
+ if (is_visible) {
+ if (!hc->parsed.empty() && hc->parsed.back() != c && hc->parsed.back() != '\n') {
+ if (hc->parsed.back() == ' ') {
+ /* We also strip extra spaces at the end, but limiting the start */
+ auto last = std::make_reverse_iterator(hc->parsed.begin() + initial_parsed_offset);
+ auto first = std::find_if(hc->parsed.rbegin(), last,
+ [](auto ch) -> auto {
+ return ch != ' ';
+ });
+ hc->parsed.erase(first.base(), hc->parsed.end());
+ g_assert(hc->parsed.size() >= initial_parsed_offset);
+ }
+ hc->parsed.push_back(c);
+ }
+ }
+ };
+
+ if (tag->id == Tag_BR || tag->id == Tag_HR) {
+
+ if (!(tag->flags & FL_IGNORE)) {
+ hc->parsed.append("\n");
+ }
+
+ auto ret = tag->content_offset;
+ calculate_final_tag_offsets();
+
+ return ret;
+ }
+ else if ((tag->id == Tag_HEAD && (tag->flags & FL_IGNORE)) || (tag->flags & CM_HEAD)) {
+ auto ret = tag->closing.end;
+ calculate_final_tag_offsets();
+
+ return ret;
+ }
+
+ if ((tag->flags & (FL_COMMENT | FL_XML | FL_IGNORE | CM_HEAD))) {
+ is_visible = false;
+ }
+ else {
+ if (!tag->block) {
+ is_visible = true;
+ }
+ else if (!tag->block->is_visible()) {
+ if (!tag->block->is_transparent()) {
+ is_visible = false;
+ }
+ else {
+ if (tag->block->has_display() &&
+ tag->block->display == css::css_display_value::DISPLAY_HIDDEN) {
+ is_visible = false;
+ }
+ else {
+ is_transparent = true;
+ }
+ }
+ }
+ else {
+ if (tag->block->display == css::css_display_value::DISPLAY_BLOCK) {
+ is_block = true;
+ }
+ else if (tag->block->display == css::css_display_value::DISPLAY_TABLE_ROW) {
+ is_spaces = true;
+ }
+ }
+ }
+
+ if (is_block) {
+ append_margin('\n');
+ }
+ else if (is_spaces) {
+ append_margin(' ');
+ }
+
+ goffset cur_offset = tag->content_offset;
+
+ for (auto *cld: tag->children) {
+ auto enclosed_start = cld->tag_start;
+ goffset initial_part_len = enclosed_start - cur_offset;
+
+ if (initial_part_len > 0) {
+ if (is_visible) {
+ html_append_parsed(hc,
+ {start + cur_offset, std::size_t(initial_part_len)},
+ is_transparent, len, hc->parsed);
+ }
+ else {
+ html_append_parsed(hc,
+ {start + cur_offset, std::size_t(initial_part_len)},
+ is_transparent, len, hc->invisible);
+ }
+ }
+
+ auto next_offset = html_append_tag_content(pool, start, len,
+ hc, cld, exceptions, url_set);
+
+ /* Do not allow shifting back */
+ if (next_offset > cur_offset) {
+ cur_offset = next_offset;
+ }
+ }
+
+ if (cur_offset < tag->closing.start) {
+ goffset final_part_len = tag->closing.start - cur_offset;
+
+ if (final_part_len > 0) {
+ if (is_visible) {
+ html_append_parsed(hc,
+ {start + cur_offset, std::size_t(final_part_len)},
+ is_transparent,
+ len,
+ hc->parsed);
+ }
+ else {
+ html_append_parsed(hc,
+ {start + cur_offset, std::size_t(final_part_len)},
+ is_transparent,
+ len,
+ hc->invisible);
+ }
+ }
+ }
+ if (is_block) {
+ append_margin('\n');
+ }
+ else if (is_spaces) {
+ append_margin(' ');
+ }
+
+ if (is_visible) {
+ if (tag->id == Tag_A) {
+ auto written_len = hc->parsed.size() - initial_parsed_offset;
+ html_process_displayed_href_tag(pool, hc,
+ {hc->parsed.data() + initial_parsed_offset, std::size_t(written_len)},
+ tag, exceptions,
+ url_set, initial_parsed_offset);
+ }
+ else if (tag->id == Tag_IMG) {
+ /* Process ALT if presented */
+ auto maybe_alt = tag->find_component(html_component_type::RSPAMD_HTML_COMPONENT_ALT);
+
+ if (maybe_alt) {
+ if (!hc->parsed.empty() && !g_ascii_isspace(hc->parsed.back())) {
+ /* Add a space */
+ hc->parsed += ' ';
+ }
+
+ hc->parsed.append(maybe_alt.value());
+
+ if (!hc->parsed.empty() && !g_ascii_isspace(hc->parsed.back())) {
+ /* Add a space */
+ hc->parsed += ' ';
+ }
+ }
+ }
+ }
+ else {
+ /* Invisible stuff */
+ if (std::holds_alternative<rspamd_url *>(tag->extra)) {
+ auto *url_enclosed = std::get<rspamd_url *>(tag->extra);
+
+ /*
+ * TODO: when hash is fixed to include flags we need to remove and add
+ * url to the hash set
+ */
+ if (url_enclosed) {
+ url_enclosed->flags |= RSPAMD_URL_FLAG_INVISIBLE;
+ }
+ }
+ }
+
+ calculate_final_tag_offsets();
+
+ return next_tag_offset;
+}
+
+auto html_process_input(struct rspamd_task *task,
+ GByteArray *in,
+ GList **exceptions,
+ khash_t(rspamd_url_hash) * url_set,
+ GPtrArray *part_urls,
+ bool allow_css,
+ std::uint16_t *cur_url_order) -> html_content *
+{
+ const gchar *p, *c, *end, *start;
+ guchar t;
+ auto closing = false;
+ guint obrace = 0, ebrace = 0;
+ struct rspamd_url *url = nullptr;
+ gint href_offset = -1;
+ auto overflow_input = false;
+ struct html_tag *cur_tag = nullptr, *parent_tag = nullptr, cur_closing_tag;
+ struct tag_content_parser_state content_parser_env;
+ auto process_size = in->len;
+
+
+ enum {
+ parse_start = 0,
+ content_before_start,
+ tag_begin,
+ sgml_tag,
+ xml_tag,
+ compound_tag,
+ comment_tag,
+ comment_content,
+ sgml_content,
+ tag_content,
+ tag_end_opening,
+ tag_end_closing,
+ html_text_content,
+ xml_tag_end,
+ tag_raw_text,
+ tag_raw_text_less_than,
+ tags_limit_overflow,
+ } state = parse_start;
+
+ enum class html_document_state {
+ doctype,
+ head,
+ body
+ } html_document_state = html_document_state::doctype;
+
+ g_assert(in != NULL);
+ g_assert(task != NULL);
+
+ auto *pool = task->task_pool;
+ auto cur_url_part_order = 0u;
+
+ auto *hc = new html_content;
+ rspamd_mempool_add_destructor(task->task_pool, html_content::html_content_dtor, hc);
+
+ if (task->cfg && in->len > task->cfg->max_html_len) {
+ msg_notice_task("html input is too big: %z, limit is %z",
+ in->len,
+ task->cfg->max_html_len);
+ process_size = task->cfg->max_html_len;
+ overflow_input = true;
+ }
+
+ auto new_tag = [&](int flags = 0) -> struct html_tag *
+ {
+
+ if (hc->all_tags.size() > rspamd::html::max_tags) {
+ hc->flags |= RSPAMD_HTML_FLAG_TOO_MANY_TAGS;
+
+ return nullptr;
+ }
+
+ hc->all_tags.emplace_back(std::make_unique<html_tag>());
+ auto *ntag = hc->all_tags.back().get();
+ ntag->tag_start = c - start;
+ ntag->flags = flags;
+
+ if (cur_tag && !(cur_tag->flags & (CM_EMPTY | FL_CLOSED)) && cur_tag != &cur_closing_tag) {
+ parent_tag = cur_tag;
+ }
+
+ if (flags & FL_XML) {
+ return ntag;
+ }
+
+ return ntag;
+ };
+
+ auto process_opening_tag = [&]() {
+ if (cur_tag->id > Tag_UNKNOWN) {
+ if (cur_tag->flags & CM_UNIQUE) {
+ if (!hc->tags_seen[cur_tag->id]) {
+ /* Duplicate tag has been found */
+ hc->flags |= RSPAMD_HTML_FLAG_DUPLICATE_ELEMENTS;
+ }
+ }
+ hc->tags_seen[cur_tag->id] = true;
+ }
+
+ /* Shift to the first unclosed tag */
+ auto *pt = parent_tag;
+ while (pt && (pt->flags & FL_CLOSED)) {
+ pt = pt->parent;
+ }
+
+ if (pt) {
+ g_assert(cur_tag != pt);
+ cur_tag->parent = pt;
+ g_assert(cur_tag->parent != &cur_closing_tag);
+ parent_tag = pt;
+ parent_tag->children.push_back(cur_tag);
+ }
+ else {
+ if (hc->root_tag) {
+ if (cur_tag != hc->root_tag) {
+ cur_tag->parent = hc->root_tag;
+ g_assert(cur_tag->parent != cur_tag);
+ hc->root_tag->children.push_back(cur_tag);
+ parent_tag = hc->root_tag;
+ }
+ }
+ else {
+ if (cur_tag->id == Tag_HTML) {
+ hc->root_tag = cur_tag;
+ }
+ else {
+ /* Insert a fake html tag */
+ hc->all_tags.emplace_back(std::make_unique<html_tag>());
+ auto *top_tag = hc->all_tags.back().get();
+ top_tag->tag_start = 0;
+ top_tag->flags = FL_VIRTUAL;
+ top_tag->id = Tag_HTML;
+ top_tag->content_offset = 0;
+ top_tag->children.push_back(cur_tag);
+ cur_tag->parent = top_tag;
+ g_assert(cur_tag->parent != cur_tag);
+ hc->root_tag = top_tag;
+ parent_tag = top_tag;
+ }
+ }
+ }
+
+ if (cur_tag->flags & FL_HREF && html_document_state == html_document_state::body) {
+ auto maybe_url = html_process_url_tag(pool, cur_tag, hc);
+
+ if (maybe_url.has_value()) {
+ url = maybe_url.value();
+
+ if (url_set != NULL) {
+ struct rspamd_url *maybe_existing =
+ rspamd_url_set_add_or_return(url_set, maybe_url.value());
+ if (maybe_existing == maybe_url.value()) {
+ if (cur_url_order) {
+ url->order = (*cur_url_order)++;
+ }
+ url->part_order = cur_url_part_order++;
+ html_process_query_url(pool, url, url_set,
+ part_urls);
+ }
+ else {
+ url = maybe_existing;
+ /* Replace extra as well */
+ cur_tag->extra = maybe_existing;
+ /* Increase count to avoid odd checks failure */
+ url->count++;
+ }
+ }
+ if (part_urls) {
+ g_ptr_array_add(part_urls, url);
+ }
+
+ href_offset = hc->parsed.size();
+ }
+ }
+ else if (cur_tag->id == Tag_BASE) {
+ /*
+ * Base is allowed only within head tag but HTML is retarded
+ */
+ auto maybe_url = html_process_url_tag(pool, cur_tag, hc);
+
+ if (maybe_url) {
+ msg_debug_html("got valid base tag");
+ cur_tag->extra = maybe_url.value();
+ cur_tag->flags |= FL_HREF;
+
+ if (hc->base_url == nullptr) {
+ hc->base_url = maybe_url.value();
+ }
+ else {
+ msg_debug_html("ignore redundant base tag");
+ }
+ }
+ else {
+ msg_debug_html("got invalid base tag!");
+ }
+ }
+
+ if (cur_tag->id == Tag_IMG) {
+ html_process_img_tag(pool, cur_tag, hc, url_set,
+ part_urls);
+ }
+ else if (cur_tag->id == Tag_LINK) {
+ html_process_link_tag(pool, cur_tag, hc, url_set,
+ part_urls);
+ }
+
+ if (!(cur_tag->flags & CM_EMPTY)) {
+ html_process_block_tag(pool, cur_tag, hc);
+ }
+ else {
+ /* Implicitly close */
+ cur_tag->flags |= FL_CLOSED;
+ }
+
+ if (cur_tag->flags & FL_CLOSED) {
+ cur_tag->closing.end = cur_tag->content_offset;
+ cur_tag->closing.start = cur_tag->tag_start;
+
+ cur_tag = parent_tag;
+ }
+ };
+
+ p = (const char *) in->data;
+ c = p;
+ end = p + process_size;
+ start = c;
+
+ while (p < end) {
+ t = *p;
+
+ switch (state) {
+ case parse_start:
+ if (t == '<') {
+ state = tag_begin;
+ }
+ else {
+ /* We have no starting tag, so assume that it's content */
+ hc->flags |= RSPAMD_HTML_FLAG_BAD_START;
+ cur_tag = new_tag();
+ html_document_state = html_document_state::body;
+
+ if (cur_tag) {
+ cur_tag->id = Tag_HTML;
+ hc->root_tag = cur_tag;
+ state = content_before_start;
+ }
+ else {
+ state = tags_limit_overflow;
+ }
+ }
+ break;
+ case content_before_start:
+ if (t == '<') {
+ state = tag_begin;
+ }
+ else {
+ p++;
+ }
+ break;
+ case tag_begin:
+ switch (t) {
+ case '<':
+ c = p;
+ p++;
+ closing = FALSE;
+ break;
+ case '!':
+ cur_tag = new_tag(FL_XML | FL_CLOSED);
+ if (cur_tag) {
+ state = sgml_tag;
+ }
+ else {
+ state = tags_limit_overflow;
+ }
+ p++;
+ break;
+ case '?':
+ cur_tag = new_tag(FL_XML | FL_CLOSED);
+ if (cur_tag) {
+ state = xml_tag;
+ }
+ else {
+ state = tags_limit_overflow;
+ }
+ hc->flags |= RSPAMD_HTML_FLAG_XML;
+ p++;
+ break;
+ case '/':
+ closing = TRUE;
+ /* We fill fake closing tag to fill it with the content parser */
+ cur_closing_tag.clear();
+ /*
+ * For closing tags, we need to find some corresponding opening tag.
+ * However, at this point we have not even parsed a name, so we
+ * can not assume anything about balancing, etc.
+ *
+ * So we need to ensure that:
+ * 1) We have some opening tag in the chain cur_tag->parent...
+ * 2) cur_tag is nullptr - okay, html is just brain damaged
+ * 3) cur_tag must NOT be equal to cur_closing tag. It means that
+ * we had some poor closing tag but we still need to find an opening
+ * tag... Somewhere...
+ */
+
+ if (cur_tag == &cur_closing_tag) {
+ if (parent_tag != &cur_closing_tag) {
+ cur_closing_tag.parent = parent_tag;
+ }
+ else {
+ cur_closing_tag.parent = nullptr;
+ }
+ }
+ else if (cur_tag && cur_tag->flags & FL_CLOSED) {
+ /* Cur tag is already closed, we should find something else */
+ auto *tmp = cur_tag;
+ while (tmp) {
+ tmp = tmp->parent;
+
+ if (tmp == nullptr || !(tmp->flags & FL_CLOSED)) {
+ break;
+ }
+ }
+
+ cur_closing_tag.parent = tmp;
+ }
+ else {
+ cur_closing_tag.parent = cur_tag;
+ }
+
+ cur_tag = &cur_closing_tag;
+ p++;
+ break;
+ case '>':
+ /* Empty tag */
+ hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
+ state = html_text_content;
+ continue;
+ default:
+ if (g_ascii_isalpha(t)) {
+ state = tag_content;
+ content_parser_env.reset();
+
+ if (!closing) {
+ cur_tag = new_tag();
+ }
+
+ if (cur_tag) {
+ state = tag_content;
+ }
+ else {
+ state = tags_limit_overflow;
+ }
+ }
+ else {
+ /* Wrong bad tag */
+ state = html_text_content;
+ }
+ break;
+ }
+
+ break;
+
+ case sgml_tag:
+ switch (t) {
+ case '[':
+ state = compound_tag;
+ obrace = 1;
+ ebrace = 0;
+ p++;
+ break;
+ case '-':
+ cur_tag->flags |= FL_COMMENT;
+ state = comment_tag;
+ p++;
+ break;
+ default:
+ state = sgml_content;
+ break;
+ }
+
+ break;
+
+ case xml_tag:
+ if (t == '?') {
+ state = xml_tag_end;
+ }
+ else if (t == '>') {
+ /* Misformed xml tag */
+ hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
+ state = tag_end_opening;
+ continue;
+ }
+ /* We efficiently ignore xml tags */
+ p++;
+ break;
+
+ case xml_tag_end:
+ if (t == '>') {
+ state = tag_end_opening;
+ cur_tag->content_offset = p - start + 1;
+ continue;
+ }
+ else {
+ hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
+ }
+ p++;
+ break;
+
+ case compound_tag:
+ if (t == '[') {
+ obrace++;
+ }
+ else if (t == ']') {
+ ebrace++;
+ }
+ else if (t == '>' && obrace == ebrace) {
+ state = tag_end_opening;
+ cur_tag->content_offset = p - start + 1;
+ continue;
+ }
+ p++;
+ break;
+
+ case comment_tag:
+ if (t != '-') {
+ hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
+ state = tag_end_opening;
+ }
+ else {
+ p++;
+ ebrace = 0;
+ /*
+ * https://www.w3.org/TR/2012/WD-html5-20120329/syntax.html#syntax-comments
+ * ... the text must not start with a single
+ * U+003E GREATER-THAN SIGN character (>),
+ * nor start with a "-" (U+002D) character followed by
+ * a U+003E GREATER-THAN SIGN (>) character,
+ * nor contain two consecutive U+002D HYPHEN-MINUS
+ * characters (--), nor end with a "-" (U+002D) character.
+ */
+ if (p[0] == '-' && p + 1 < end && p[1] == '>') {
+ hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
+ p++;
+ state = tag_end_opening;
+ }
+ else if (*p == '>') {
+ hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
+ state = tag_end_opening;
+ }
+ else {
+ state = comment_content;
+ }
+ }
+ break;
+
+ case comment_content:
+ if (t == '-') {
+ ebrace++;
+ }
+ else if (t == '>' && ebrace >= 2) {
+ cur_tag->content_offset = p - start + 1;
+ state = tag_end_opening;
+ continue;
+ }
+ else {
+ ebrace = 0;
+ }
+
+ p++;
+ break;
+
+ case html_text_content:
+ if (t != '<') {
+ p++;
+ }
+ else {
+ state = tag_begin;
+ }
+ break;
+
+ case tag_raw_text:
+ if (t == '<') {
+ c = p;
+ state = tag_raw_text_less_than;
+ }
+ p++;
+ break;
+ case tag_raw_text_less_than:
+ if (t == '/') {
+ /* Here are special things: we look for obrace and then ensure
+ * that if there is any closing brace nearby
+ * (we look maximum at 30 characters). We also need to ensure
+ * that we have no special characters, such as punctuation marks and
+ * so on.
+ * Basically, we validate the input to be sane.
+ * Since closing tags must not have attributes, these assumptions
+ * seems to be reasonable enough for our toy parser.
+ */
+ gint cur_lookahead = 1;
+ gint max_lookahead = MIN(end - p, 30);
+ bool valid_closing_tag = true;
+
+ if (p + 1 < end && !g_ascii_isalpha(p[1])) {
+ valid_closing_tag = false;
+ }
+ else {
+ while (cur_lookahead < max_lookahead) {
+ gchar tt = p[cur_lookahead];
+ if (tt == '>') {
+ break;
+ }
+ else if (tt < '\n' || tt == ',') {
+ valid_closing_tag = false;
+ break;
+ }
+ cur_lookahead++;
+ }
+
+ if (cur_lookahead == max_lookahead) {
+ valid_closing_tag = false;
+ }
+ }
+
+ if (valid_closing_tag) {
+ /* Shift back */
+ p = c;
+ state = tag_begin;
+ }
+ else {
+ p++;
+ state = tag_raw_text;
+ }
+ }
+ else {
+ p++;
+ state = tag_raw_text;
+ }
+ break;
+ case sgml_content:
+ /* TODO: parse DOCTYPE here */
+ if (t == '>') {
+ cur_tag->content_offset = p - start + 1;
+ state = tag_end_opening;
+ }
+ else {
+ p++;
+ }
+ break;
+
+ case tag_content:
+ html_parse_tag_content(pool, hc, cur_tag, p, content_parser_env);
+
+ if (t == '>') {
+ if (content_parser_env.cur_state != parse_dqvalue && content_parser_env.cur_state != parse_sqvalue) {
+ /* We have a closing element */
+ if (closing) {
+ cur_tag->closing.start = c - start;
+ cur_tag->closing.end = p - start + 1;
+
+ closing = FALSE;
+ state = tag_end_closing;
+ }
+ else {
+ cur_tag->content_offset = p - start + 1;
+ state = tag_end_opening;
+ }
+ }
+ else {
+ /*
+ * We are in the parse_quoted value state but got
+ * an unescaped `>` character.
+ * HTML is written for monkeys, so there are two possibilities:
+ * 1) We have missing ending quote
+ * 2) We have unescaped `>` character
+ * How to distinguish between those possibilities?
+ * Well, the idea is to do some lookahead and try to find a
+ * quote. If we can find a quote, we just pretend as we have
+ * not seen `>` character. Otherwise, we pretend that it is an
+ * unquoted stuff. This logic is quite fragile but I really
+ * don't know any better options...
+ */
+ auto end_quote = content_parser_env.cur_state == parse_sqvalue ? '\'' : '"';
+ if (memchr(p, end_quote, end - p) != nullptr) {
+ /* Unencoded `>` */
+ p++;
+ continue;
+ }
+ else {
+ if (closing) {
+ cur_tag->closing.start = c - start;
+ cur_tag->closing.end = p - start + 1;
+
+ closing = FALSE;
+ state = tag_end_closing;
+ }
+ else {
+ cur_tag->content_offset = p - start + 1;
+ state = tag_end_opening;
+ }
+ }
+ }
+ continue;
+ }
+ p++;
+ break;
+
+ case tag_end_opening:
+ content_parser_env.reset();
+ state = html_text_content;
+
+ if (cur_tag) {
+ if (cur_tag->id == Tag_STYLE || cur_tag->id == Tag_NOSCRIPT || cur_tag->id == Tag_SCRIPT) {
+ state = tag_raw_text;
+ }
+ if (html_document_state == html_document_state::doctype) {
+ if (cur_tag->id == Tag_HEAD || (cur_tag->flags & CM_HEAD)) {
+ html_document_state = html_document_state::head;
+ cur_tag->flags |= FL_IGNORE;
+ }
+ else if (cur_tag->id != Tag_HTML) {
+ html_document_state = html_document_state::body;
+ }
+ }
+ else if (html_document_state == html_document_state::head) {
+ if (!(cur_tag->flags & (CM_EMPTY | CM_HEAD))) {
+ if (parent_tag && (parent_tag->id == Tag_HEAD || !(parent_tag->flags & CM_HEAD))) {
+ /*
+ * As by standard, we have to close the HEAD tag
+ * and switch to the body state
+ */
+ parent_tag->flags |= FL_CLOSED;
+ parent_tag->closing.start = cur_tag->tag_start;
+ parent_tag->closing.end = cur_tag->content_offset;
+
+ html_document_state = html_document_state::body;
+ }
+ else if (cur_tag->id == Tag_BODY) {
+ html_document_state = html_document_state::body;
+ }
+ else {
+ /*
+ * For propagation in something like
+ * <title><p><a>ololo</a></p></title> - should be unprocessed
+ */
+ cur_tag->flags |= CM_HEAD;
+ }
+ }
+ }
+
+ process_opening_tag();
+ }
+
+ p++;
+ c = p;
+ break;
+ case tag_end_closing: {
+ if (cur_tag) {
+
+ if (cur_tag->flags & CM_EMPTY) {
+ /* Ignore closing empty tags */
+ cur_tag->flags |= FL_IGNORE;
+ }
+ if (html_document_state == html_document_state::doctype) {
+ }
+ else if (html_document_state == html_document_state::head) {
+ if (cur_tag->id == Tag_HEAD) {
+ html_document_state = html_document_state::body;
+ }
+ }
+
+ /* cur_tag here is a closing tag */
+ auto *next_cur_tag = html_check_balance(hc, cur_tag,
+ c - start, p - start + 1);
+
+ if (cur_tag->id == Tag_STYLE && allow_css) {
+ auto *opening_tag = cur_tag->parent;
+
+ if (opening_tag && opening_tag->id == Tag_STYLE &&
+ (int) opening_tag->content_offset < opening_tag->closing.start) {
+ auto ret_maybe = rspamd::css::parse_css(pool,
+ {start + opening_tag->content_offset,
+ opening_tag->closing.start - opening_tag->content_offset},
+ std::move(hc->css_style));
+
+ if (!ret_maybe.has_value()) {
+ if (ret_maybe.error().is_fatal()) {
+ auto err_str = fmt::format(
+ "cannot parse css (error code: {}): {}",
+ static_cast<int>(ret_maybe.error().type),
+ ret_maybe.error().description.value_or("unknown error"));
+ msg_info_pool("%*s", (int) err_str.size(), err_str.data());
+ }
+ }
+ else {
+ hc->css_style = ret_maybe.value();
+ }
+ }
+ }
+
+ if (next_cur_tag != nullptr) {
+ cur_tag = next_cur_tag;
+ }
+ else {
+ /*
+ * Here, we handle cases like <p>lala</b>...
+ * So the tag </b> is bogus and unpaired
+ * However, we need to exclude it from the output of <p> tag
+ * To do that, we create a fake opening tag and insert that to
+ * the current opening tag
+ */
+ auto *cur_opening_tag = cur_tag->parent;
+
+ while (cur_opening_tag && (cur_opening_tag->flags & FL_CLOSED)) {
+ cur_opening_tag = cur_opening_tag->parent;
+ }
+
+ if (!cur_opening_tag) {
+ cur_opening_tag = hc->root_tag;
+ }
+
+ auto &&vtag = std::make_unique<html_tag>();
+ vtag->id = cur_tag->id;
+ vtag->flags = FL_VIRTUAL | FL_CLOSED | cur_tag->flags;
+ vtag->tag_start = cur_tag->closing.start;
+ vtag->content_offset = p - start + 1;
+ vtag->closing = cur_tag->closing;
+ vtag->parent = cur_opening_tag;
+ g_assert(vtag->parent != &cur_closing_tag);
+ cur_opening_tag->children.push_back(vtag.get());
+ hc->all_tags.emplace_back(std::move(vtag));
+ cur_tag = cur_opening_tag;
+ parent_tag = cur_tag->parent;
+ g_assert(cur_tag->parent != &cur_closing_tag);
+ }
+ } /* if cur_tag != nullptr */
+ state = html_text_content;
+ p++;
+ c = p;
+ break;
+ }
+ case tags_limit_overflow:
+ msg_warn_pool("tags limit of %d tags is reached at the position %d;"
+ " ignoring the rest of the HTML content",
+ (int) hc->all_tags.size(), (int) (p - start));
+ c = p;
+ p = end;
+ break;
+ }
+ }
+
+ if (cur_tag && !(cur_tag->flags & FL_CLOSED) && cur_tag != &cur_closing_tag) {
+ cur_closing_tag.parent = cur_tag;
+ cur_closing_tag.id = cur_tag->id;
+ cur_tag = &cur_closing_tag;
+ html_check_balance(hc, cur_tag,
+ end - start, end - start);
+ }
+
+ /* Propagate styles */
+ hc->traverse_block_tags([&hc, &pool](const html_tag *tag) -> bool {
+ if (hc->css_style && tag->id > Tag_UNKNOWN && tag->id < Tag_MAX) {
+ auto *css_block = hc->css_style->check_tag_block(tag);
+
+ if (css_block) {
+ if (tag->block) {
+ tag->block->set_block(*css_block);
+ }
+ else {
+ tag->block = css_block;
+ }
+ }
+ }
+ if (tag->block) {
+ if (!tag->block->has_display()) {
+ /* If we have no display field, we can check it by tag */
+ if (tag->flags & CM_HEAD) {
+ tag->block->set_display(css::css_display_value::DISPLAY_HIDDEN,
+ html_block::set);
+ }
+ else if (tag->flags & (CM_BLOCK | CM_TABLE)) {
+ tag->block->set_display(css::css_display_value::DISPLAY_BLOCK,
+ html_block::implicit);
+ }
+ else if (tag->flags & CM_ROW) {
+ tag->block->set_display(css::css_display_value::DISPLAY_TABLE_ROW,
+ html_block::implicit);
+ }
+ else {
+ tag->block->set_display(css::css_display_value::DISPLAY_INLINE,
+ html_block::implicit);
+ }
+ }
+
+ tag->block->compute_visibility();
+
+ for (const auto *cld_tag: tag->children) {
+
+ if (cld_tag->block) {
+ cld_tag->block->propagate_block(*tag->block);
+ }
+ else {
+ cld_tag->block = rspamd_mempool_alloc0_type(pool, html_block);
+ *cld_tag->block = *tag->block;
+ }
+ }
+ }
+ return true;
+ },
+ html_content::traverse_type::PRE_ORDER);
+
+ /* Leftover before content */
+ switch (state) {
+ case tag_end_opening:
+ if (cur_tag != nullptr) {
+ process_opening_tag();
+ }
+ break;
+ default:
+ /* Do nothing */
+ break;
+ }
+
+ if (!hc->all_tags.empty() && hc->root_tag) {
+ html_append_tag_content(pool, start, end - start, hc, hc->root_tag,
+ exceptions, url_set);
+ }
+
+ /* Leftover after content */
+ switch (state) {
+ case tags_limit_overflow:
+ html_append_parsed(hc, {c, (std::size_t)(end - c)},
+ false, end - start, hc->parsed);
+ break;
+ default:
+ /* Do nothing */
+ break;
+ }
+
+ if (overflow_input) {
+ /*
+ * Append the rest of the input as raw html, this might work as
+ * further algorithms can skip words when auto *pool = task->task_pool;there are too many.
+ * It is still unclear about urls though...
+ */
+ html_append_parsed(hc, {end, in->len - process_size}, false,
+ end - start, hc->parsed);
+ }
+
+ if (!hc->parsed.empty()) {
+ /* Trim extra spaces at the end if needed */
+ if (g_ascii_isspace(hc->parsed.back())) {
+ auto last_it = std::end(hc->parsed);
+
+ /* Allow last newline */
+ if (hc->parsed.back() == '\n') {
+ --last_it;
+ }
+
+ hc->parsed.erase(std::find_if(hc->parsed.rbegin(), hc->parsed.rend(),
+ [](auto ch) -> auto {
+ return !g_ascii_isspace(ch);
+ })
+ .base(),
+ last_it);
+ }
+ }
+
+ return hc;
+}
+
+static auto
+html_find_image_by_cid(const html_content &hc, std::string_view cid)
+ -> std::optional<const html_image *>
+{
+ for (const auto *html_image: hc.images) {
+ /* Filter embedded images */
+ if (html_image->flags & RSPAMD_HTML_FLAG_IMAGE_EMBEDDED &&
+ html_image->src != nullptr) {
+ if (cid == html_image->src) {
+ return html_image;
+ }
+ }
+ }
+
+ return std::nullopt;
+}
+
+auto html_debug_structure(const html_content &hc) -> std::string
+{
+ std::string output;
+
+ if (hc.root_tag) {
+ auto rec_functor = [&](const html_tag *t, int level, auto rec_functor) -> void {
+ std::string pluses(level, '+');
+
+ if (!(t->flags & (FL_VIRTUAL | FL_IGNORE))) {
+ if (t->flags & FL_XML) {
+ output += fmt::format("{}xml;", pluses);
+ }
+ else {
+ output += fmt::format("{}{};", pluses,
+ html_tags_defs.name_by_id_safe(t->id));
+ }
+ level++;
+ }
+ for (const auto *cld: t->children) {
+ rec_functor(cld, level, rec_functor);
+ }
+ };
+
+ rec_functor(hc.root_tag, 1, rec_functor);
+ }
+
+ return output;
+}
+
+auto html_tag_by_name(const std::string_view &name)
+ -> std::optional<tag_id_t>
+{
+ const auto *td = rspamd::html::html_tags_defs.by_name(name);
+
+ if (td != nullptr) {
+ return td->id;
+ }
+
+ return std::nullopt;
+}
+
+auto html_tag::get_content(const struct html_content *hc) const -> std::string_view
+{
+ const std::string *dest = &hc->parsed;
+
+ if (block && !block->is_visible()) {
+ dest = &hc->invisible;
+ }
+ const auto clen = get_content_length();
+ if (content_offset < dest->size()) {
+ if (dest->size() - content_offset >= clen) {
+ return std::string_view{*dest}.substr(content_offset, clen);
+ }
+ else {
+ return std::string_view{*dest}.substr(content_offset, dest->size() - content_offset);
+ }
+ }
+
+ return std::string_view{};
+}
+
+}// namespace rspamd::html
+
+void *
+rspamd_html_process_part_full(struct rspamd_task *task,
+ GByteArray *in, GList **exceptions,
+ khash_t(rspamd_url_hash) * url_set,
+ GPtrArray *part_urls,
+ bool allow_css,
+ uint16_t *cur_url_order)
+{
+ return rspamd::html::html_process_input(task, in, exceptions, url_set,
+ part_urls, allow_css, cur_url_order);
+}
+
+void *
+rspamd_html_process_part(rspamd_mempool_t *pool,
+ GByteArray *in)
+{
+ struct rspamd_task fake_task;
+ memset(&fake_task, 0, sizeof(fake_task));
+ fake_task.task_pool = pool;
+ uint16_t order = 0;
+
+ return rspamd_html_process_part_full(&fake_task, in, NULL,
+ NULL, NULL, FALSE, &order);
+}
+
+guint rspamd_html_decode_entitles_inplace(gchar *s, gsize len)
+{
+ return rspamd::html::decode_html_entitles_inplace(s, len);
+}
+
+gint rspamd_html_tag_by_name(const gchar *name)
+{
+ const auto *td = rspamd::html::html_tags_defs.by_name(name);
+
+ if (td != nullptr) {
+ return td->id;
+ }
+
+ return -1;
+}
+
+gboolean
+rspamd_html_tag_seen(void *ptr, const gchar *tagname)
+{
+ gint id;
+ auto *hc = rspamd::html::html_content::from_ptr(ptr);
+
+ g_assert(hc != NULL);
+
+ id = rspamd_html_tag_by_name(tagname);
+
+ if (id != -1) {
+ return hc->tags_seen[id];
+ }
+
+ return FALSE;
+}
+
+const gchar *
+rspamd_html_tag_by_id(gint id)
+{
+ if (id > Tag_UNKNOWN && id < Tag_MAX) {
+ const auto *td = rspamd::html::html_tags_defs.by_id(id);
+
+ if (td != nullptr) {
+ return td->name.c_str();
+ }
+ }
+
+ return nullptr;
+}
+
+const gchar *
+rspamd_html_tag_name(void *p, gsize *len)
+{
+ auto *tag = reinterpret_cast<rspamd::html::html_tag *>(p);
+ auto tname = rspamd::html::html_tags_defs.name_by_id_safe(tag->id);
+
+ if (len) {
+ *len = tname.size();
+ }
+
+ return tname.data();
+}
+
+struct html_image *
+rspamd_html_find_embedded_image(void *html_content,
+ const char *cid, gsize cid_len)
+{
+ auto *hc = rspamd::html::html_content::from_ptr(html_content);
+
+ auto maybe_img = rspamd::html::html_find_image_by_cid(*hc, {cid, cid_len});
+
+ if (maybe_img) {
+ return (html_image *) maybe_img.value();
+ }
+
+ return nullptr;
+}
+
+bool rspamd_html_get_parsed_content(void *html_content, rspamd_ftok_t *dest)
+{
+ auto *hc = rspamd::html::html_content::from_ptr(html_content);
+
+ dest->begin = hc->parsed.data();
+ dest->len = hc->parsed.size();
+
+ return true;
+}
+
+gsize rspamd_html_get_tags_count(void *html_content)
+{
+ auto *hc = rspamd::html::html_content::from_ptr(html_content);
+
+ if (!hc) {
+ return 0;
+ }
+
+ return hc->all_tags.size();
+} \ No newline at end of file
diff --git a/src/libserver/html/html.h b/src/libserver/html/html.h
new file mode 100644
index 0000000..2d34f2a
--- /dev/null
+++ b/src/libserver/html/html.h
@@ -0,0 +1,137 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef RSPAMD_HTML_H
+#define RSPAMD_HTML_H
+
+#include "config.h"
+#include "libutil/mem_pool.h"
+#include "libserver/url.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * HTML content flags
+ */
+#define RSPAMD_HTML_FLAG_BAD_START (1 << 0)
+#define RSPAMD_HTML_FLAG_BAD_ELEMENTS (1 << 1)
+#define RSPAMD_HTML_FLAG_XML (1 << 2)
+#define RSPAMD_HTML_FLAG_UNBALANCED (1 << 3)
+#define RSPAMD_HTML_FLAG_UNKNOWN_ELEMENTS (1 << 4)
+#define RSPAMD_HTML_FLAG_DUPLICATE_ELEMENTS (1 << 5)
+#define RSPAMD_HTML_FLAG_TOO_MANY_TAGS (1 << 6)
+#define RSPAMD_HTML_FLAG_HAS_DATA_URLS (1 << 7)
+#define RSPAMD_HTML_FLAG_HAS_ZEROS (1 << 8)
+
+/*
+ * Image flags
+ */
+#define RSPAMD_HTML_FLAG_IMAGE_EMBEDDED (1 << 0)
+#define RSPAMD_HTML_FLAG_IMAGE_EXTERNAL (1 << 1)
+#define RSPAMD_HTML_FLAG_IMAGE_DATA (1 << 2)
+
+
+struct rspamd_image;
+
+struct html_image {
+ guint height;
+ guint width;
+ guint flags;
+ gchar *src;
+ struct rspamd_url *url;
+ struct rspamd_image *embedded_image;
+ void *tag;
+};
+
+
+/* Forwarded declaration */
+struct rspamd_task;
+
+/*
+ * Decode HTML entitles in text. Text is modified in place.
+ */
+guint rspamd_html_decode_entitles_inplace(gchar *s, gsize len);
+
+void *rspamd_html_process_part(rspamd_mempool_t *pool,
+ GByteArray *in);
+
+void *rspamd_html_process_part_full(struct rspamd_task *task,
+ GByteArray *in, GList **exceptions,
+ khash_t(rspamd_url_hash) * url_set,
+ GPtrArray *part_urls,
+ bool allow_css,
+ uint16_t *cur_url_order);
+
+/*
+ * Returns true if a specified tag has been seen in a part
+ */
+gboolean rspamd_html_tag_seen(void *ptr, const gchar *tagname);
+
+/**
+ * Returns name for the specified tag id
+ * @param id
+ * @return
+ */
+const gchar *rspamd_html_tag_by_id(gint id);
+
+/**
+ * Returns HTML tag id by name
+ * @param name
+ * @return
+ */
+gint rspamd_html_tag_by_name(const gchar *name);
+
+/**
+ * Gets a name for a tag
+ * @param tag
+ * @param len
+ * @return
+ */
+const gchar *rspamd_html_tag_name(void *tag, gsize *len);
+
+/**
+ * Find HTML image by content id
+ * @param html_content
+ * @param cid
+ * @param cid_len
+ * @return
+ */
+struct html_image *rspamd_html_find_embedded_image(void *html_content,
+ const char *cid, gsize cid_len);
+
+/**
+ * Stores parsed content in ftok_t structure
+ * @param html_content
+ * @param dest
+ * @return
+ */
+bool rspamd_html_get_parsed_content(void *html_content, rspamd_ftok_t *dest);
+
+/**
+ * Returns number of tags in the html content
+ * @param html_content
+ * @return
+ */
+gsize rspamd_html_get_tags_count(void *html_content);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/libserver/html/html.hxx b/src/libserver/html/html.hxx
new file mode 100644
index 0000000..3320fd6
--- /dev/null
+++ b/src/libserver/html/html.hxx
@@ -0,0 +1,146 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef RSPAMD_HTML_HXX
+#define RSPAMD_HTML_HXX
+#pragma once
+
+#include "config.h"
+#include "libserver/url.h"
+#include "libserver/html/html_tag.hxx"
+#include "libserver/html/html.h"
+#include "libserver/html/html_tags.h"
+
+
+#include <vector>
+#include <memory>
+#include <string>
+#include "function2/function2.hpp"
+
+namespace rspamd::css {
+/* Forward declaration */
+class css_style_sheet;
+}// namespace rspamd::css
+
+namespace rspamd::html {
+
+struct html_block;
+
+struct html_content {
+ struct rspamd_url *base_url = nullptr;
+ struct html_tag *root_tag = nullptr;
+ gint flags = 0;
+ std::vector<bool> tags_seen;
+ std::vector<html_image *> images;
+ std::vector<std::unique_ptr<struct html_tag>> all_tags;
+ std::string parsed;
+ std::string invisible;
+ std::shared_ptr<css::css_style_sheet> css_style;
+
+ /* Preallocate and reserve all internal structures */
+ html_content()
+ {
+ tags_seen.resize(Tag_MAX, false);
+ all_tags.reserve(128);
+ parsed.reserve(256);
+ }
+
+ static void html_content_dtor(void *ptr)
+ {
+ delete html_content::from_ptr(ptr);
+ }
+
+ static auto from_ptr(void *ptr) -> html_content *
+ {
+ return static_cast<html_content *>(ptr);
+ }
+
+ enum class traverse_type {
+ PRE_ORDER,
+ POST_ORDER
+ };
+ auto traverse_block_tags(fu2::function<bool(const html_tag *)> &&func,
+ traverse_type how = traverse_type::PRE_ORDER) const -> bool
+ {
+
+ if (root_tag == nullptr) {
+ return false;
+ }
+
+ auto rec_functor_pre_order = [&](const html_tag *root, auto &&rec) -> bool {
+ if (func(root)) {
+
+ for (const auto *c: root->children) {
+ if (!rec(c, rec)) {
+ return false;
+ }
+ }
+
+ return true;
+ }
+ return false;
+ };
+ auto rec_functor_post_order = [&](const html_tag *root, auto &&rec) -> bool {
+ for (const auto *c: root->children) {
+ if (!rec(c, rec)) {
+ return false;
+ }
+ }
+
+ return func(root);
+ };
+
+ switch (how) {
+ case traverse_type::PRE_ORDER:
+ return rec_functor_pre_order(root_tag, rec_functor_pre_order);
+ case traverse_type::POST_ORDER:
+ return rec_functor_post_order(root_tag, rec_functor_post_order);
+ default:
+ RSPAMD_UNREACHABLE;
+ }
+ }
+
+ auto traverse_all_tags(fu2::function<bool(const html_tag *)> &&func) const -> bool
+ {
+ for (const auto &tag: all_tags) {
+ if (!(tag->flags & (FL_XML | FL_VIRTUAL))) {
+ if (!func(tag.get())) {
+ return false;
+ }
+ }
+ }
+
+ return true;
+ }
+
+private:
+ ~html_content() = default;
+};
+
+
+auto html_tag_by_name(const std::string_view &name) -> std::optional<tag_id_t>;
+auto html_process_input(struct rspamd_task *task,
+ GByteArray *in,
+ GList **exceptions,
+ khash_t(rspamd_url_hash) * url_set,
+ GPtrArray *part_urls,
+ bool allow_css,
+ std::uint16_t *cur_url_order) -> html_content *;
+auto html_debug_structure(const html_content &hc) -> std::string;
+
+}// namespace rspamd::html
+
+#endif//RSPAMD_HTML_HXX
diff --git a/src/libserver/html/html_block.hxx b/src/libserver/html/html_block.hxx
new file mode 100644
index 0000000..f9b5184
--- /dev/null
+++ b/src/libserver/html/html_block.hxx
@@ -0,0 +1,358 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef RSPAMD_HTML_BLOCK_HXX
+#define RSPAMD_HTML_BLOCK_HXX
+#pragma once
+
+#include "libserver/css/css_value.hxx"
+#include <cmath>
+
+namespace rspamd::html {
+
+/*
+ * Block tag definition
+ */
+struct html_block {
+ rspamd::css::css_color fg_color;
+ rspamd::css::css_color bg_color;
+ std::int16_t height;
+ std::int16_t width;
+ rspamd::css::css_display_value display;
+ std::int8_t font_size;
+
+ unsigned fg_color_mask : 2;
+ unsigned bg_color_mask : 2;
+ unsigned height_mask : 2;
+ unsigned width_mask : 2;
+ unsigned font_mask : 2;
+ unsigned display_mask : 2;
+ unsigned visibility_mask : 2;
+
+ constexpr static const auto unset = 0;
+ constexpr static const auto inherited = 1;
+ constexpr static const auto implicit = 1;
+ constexpr static const auto set = 3;
+ constexpr static const auto invisible_flag = 1;
+ constexpr static const auto transparent_flag = 2;
+
+ /* Helpers to set mask when setting the elements */
+ auto set_fgcolor(const rspamd::css::css_color &c, int how = html_block::set) -> void
+ {
+ fg_color = c;
+ fg_color_mask = how;
+ }
+ auto set_bgcolor(const rspamd::css::css_color &c, int how = html_block::set) -> void
+ {
+ bg_color = c;
+ bg_color_mask = how;
+ }
+ auto set_height(float h, bool is_percent = false, int how = html_block::set) -> void
+ {
+ h = is_percent ? (-h) : h;
+ if (h < INT16_MIN) {
+ /* Negative numbers encode percents... */
+ height = -100;
+ }
+ else if (h > INT16_MAX) {
+ height = INT16_MAX;
+ }
+ else {
+ height = h;
+ }
+ height_mask = how;
+ }
+
+ auto set_width(float w, bool is_percent = false, int how = html_block::set) -> void
+ {
+ w = is_percent ? (-w) : w;
+ if (w < INT16_MIN) {
+ width = INT16_MIN;
+ }
+ else if (w > INT16_MAX) {
+ width = INT16_MAX;
+ }
+ else {
+ width = w;
+ }
+ width_mask = how;
+ }
+
+ auto set_display(bool v, int how = html_block::set) -> void
+ {
+ if (v) {
+ display = rspamd::css::css_display_value::DISPLAY_INLINE;
+ }
+ else {
+ display = rspamd::css::css_display_value::DISPLAY_HIDDEN;
+ }
+ display_mask = how;
+ }
+
+ auto set_display(rspamd::css::css_display_value v, int how = html_block::set) -> void
+ {
+ display = v;
+ display_mask = how;
+ }
+
+ auto set_font_size(float fs, bool is_percent = false, int how = html_block::set) -> void
+ {
+ fs = is_percent ? (-fs) : fs;
+ if (fs < INT8_MIN) {
+ font_size = -100;
+ }
+ else if (fs > INT8_MAX) {
+ font_size = INT8_MAX;
+ }
+ else {
+ font_size = fs;
+ }
+ font_mask = how;
+ }
+
+private:
+ template<typename T, typename MT>
+ static constexpr auto simple_prop(MT mask_val, MT other_mask, T &our_val,
+ T other_val) -> MT
+ {
+ if (other_mask && other_mask > mask_val) {
+ our_val = other_val;
+ mask_val = html_block::inherited;
+ }
+
+ return mask_val;
+ }
+
+ /* Sizes propagation logic
+ * We can have multiple cases:
+ * 1) Our size is > 0 and we can use it as is
+ * 2) Parent size is > 0 and our size is undefined, so propagate parent
+ * 3) Parent size is < 0 and our size is undefined - propagate parent
+ * 4) Parent size is > 0 and our size is < 0 - multiply parent by abs(ours)
+ * 5) Parent size is undefined and our size is < 0 - tricky stuff, assume some defaults
+ */
+ template<typename T, typename MT>
+ static constexpr auto size_prop(MT mask_val, MT other_mask, T &our_val,
+ T other_val, T default_val) -> MT
+ {
+ if (mask_val) {
+ /* We have our value */
+ if (our_val < 0) {
+ if (other_mask > 0) {
+ if (other_val >= 0) {
+ our_val = other_val * (-our_val / 100.0);
+ }
+ else {
+ our_val *= (-other_val / 100.0);
+ }
+ }
+ else {
+ /* Parent value is not defined and our value is relative */
+ our_val = default_val * (-our_val / 100.0);
+ }
+ }
+ else if (other_mask && other_mask > mask_val) {
+ our_val = other_val;
+ mask_val = html_block::inherited;
+ }
+ }
+ else {
+ /* We propagate parent if defined */
+ if (other_mask && other_mask > mask_val) {
+ our_val = other_val;
+ mask_val = html_block::inherited;
+ }
+ /* Otherwise do nothing */
+ }
+
+ return mask_val;
+ }
+
+public:
+ /**
+ * Propagate values from the block if they are not defined by the current block
+ * @param other
+ * @return
+ */
+ auto propagate_block(const html_block &other) -> void
+ {
+ fg_color_mask = html_block::simple_prop(fg_color_mask, other.fg_color_mask,
+ fg_color, other.fg_color);
+ bg_color_mask = html_block::simple_prop(bg_color_mask, other.bg_color_mask,
+ bg_color, other.bg_color);
+ display_mask = html_block::simple_prop(display_mask, other.display_mask,
+ display, other.display);
+
+ height_mask = html_block::size_prop(height_mask, other.height_mask,
+ height, other.height, static_cast<std::int16_t>(800));
+ width_mask = html_block::size_prop(width_mask, other.width_mask,
+ width, other.width, static_cast<std::int16_t>(1024));
+ font_mask = html_block::size_prop(font_mask, other.font_mask,
+ font_size, other.font_size, static_cast<std::int8_t>(10));
+ }
+
+ /*
+ * Set block overriding all inherited values
+ */
+ auto set_block(const html_block &other) -> void
+ {
+ constexpr auto set_value = [](auto mask_val, auto other_mask, auto &our_val,
+ auto other_val) constexpr -> int {
+ if (other_mask && mask_val != html_block::set) {
+ our_val = other_val;
+ mask_val = other_mask;
+ }
+
+ return mask_val;
+ };
+
+ fg_color_mask = set_value(fg_color_mask, other.fg_color_mask, fg_color, other.fg_color);
+ bg_color_mask = set_value(bg_color_mask, other.bg_color_mask, bg_color, other.bg_color);
+ display_mask = set_value(display_mask, other.display_mask, display, other.display);
+ height_mask = set_value(height_mask, other.height_mask, height, other.height);
+ width_mask = set_value(width_mask, other.width_mask, width, other.width);
+ font_mask = set_value(font_mask, other.font_mask, font_size, other.font_size);
+ }
+
+ auto compute_visibility(void) -> void
+ {
+ if (display_mask) {
+ if (display == css::css_display_value::DISPLAY_HIDDEN) {
+ visibility_mask = html_block::invisible_flag;
+
+ return;
+ }
+ }
+
+ if (font_mask) {
+ if (font_size == 0) {
+ visibility_mask = html_block::invisible_flag;
+
+ return;
+ }
+ }
+
+ auto is_similar_colors = [](const rspamd::css::css_color &fg, const rspamd::css::css_color &bg) -> bool {
+ constexpr const auto min_visible_diff = 0.1f;
+ auto diff_r = ((float) fg.r - bg.r);
+ auto diff_g = ((float) fg.g - bg.g);
+ auto diff_b = ((float) fg.b - bg.b);
+ auto ravg = ((float) fg.r + bg.r) / 2.0f;
+
+ /* Square diffs */
+ diff_r *= diff_r;
+ diff_g *= diff_g;
+ diff_b *= diff_b;
+
+ auto diff = std::sqrt(2.0f * diff_r + 4.0f * diff_g + 3.0f * diff_b +
+ (ravg * (diff_r - diff_b) / 256.0f)) /
+ 256.0f;
+
+ return diff < min_visible_diff;
+ };
+ /* Check if we have both bg/fg colors */
+ if (fg_color_mask && bg_color_mask) {
+ if (fg_color.alpha < 10) {
+ /* Too transparent */
+ visibility_mask = html_block::transparent_flag;
+
+ return;
+ }
+
+ if (bg_color.alpha > 10) {
+ if (is_similar_colors(fg_color, bg_color)) {
+ visibility_mask = html_block::transparent_flag;
+ return;
+ }
+ }
+ }
+ else if (fg_color_mask) {
+ /* Merely fg color */
+ if (fg_color.alpha < 10) {
+ /* Too transparent */
+ visibility_mask = html_block::transparent_flag;
+
+ return;
+ }
+
+ /* Implicit fg color */
+ if (is_similar_colors(fg_color, rspamd::css::css_color::white())) {
+ visibility_mask = html_block::transparent_flag;
+ return;
+ }
+ }
+ else if (bg_color_mask) {
+ if (bg_color.alpha > 10) {
+ if (is_similar_colors(rspamd::css::css_color::black(), bg_color)) {
+ visibility_mask = html_block::transparent_flag;
+ return;
+ }
+ }
+ }
+
+ visibility_mask = html_block::unset;
+ }
+
+ constexpr auto is_visible(void) const -> bool
+ {
+ return visibility_mask == html_block::unset;
+ }
+
+ constexpr auto is_transparent(void) const -> bool
+ {
+ return visibility_mask == html_block::transparent_flag;
+ }
+
+ constexpr auto has_display(int how = html_block::set) const -> bool
+ {
+ return display_mask >= how;
+ }
+
+ /**
+ * Returns a default html block for root HTML element
+ * @return
+ */
+ static auto default_html_block(void) -> html_block
+ {
+ return html_block{.fg_color = rspamd::css::css_color::black(),
+ .bg_color = rspamd::css::css_color::white(),
+ .height = 0,
+ .width = 0,
+ .display = rspamd::css::css_display_value::DISPLAY_INLINE,
+ .font_size = 12,
+ .fg_color_mask = html_block::inherited,
+ .bg_color_mask = html_block::inherited,
+ .height_mask = html_block::unset,
+ .width_mask = html_block::unset,
+ .font_mask = html_block::unset,
+ .display_mask = html_block::inherited,
+ .visibility_mask = html_block::unset};
+ }
+ /**
+ * Produces html block with no defined values allocated from the pool
+ * @param pool
+ * @return
+ */
+ static auto undefined_html_block_pool(rspamd_mempool_t *pool) -> html_block *
+ {
+ auto *bl = rspamd_mempool_alloc0_type(pool, html_block);
+
+ return bl;
+ }
+};
+
+}// namespace rspamd::html
+
+#endif//RSPAMD_HTML_BLOCK_HXX
diff --git a/src/libserver/html/html_entities.cxx b/src/libserver/html/html_entities.cxx
new file mode 100644
index 0000000..c642536
--- /dev/null
+++ b/src/libserver/html/html_entities.cxx
@@ -0,0 +1,2644 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "config.h"
+#include "html_entities.hxx"
+
+#include <string>
+#include <utility>
+#include <vector>
+#include "contrib/ankerl/unordered_dense.h"
+#include <unicode/utf8.h>
+#include <unicode/uchar.h>
+#include "libutil/cxx/util.hxx"
+
+#define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL
+#include "doctest/doctest.h"
+
+namespace rspamd::html {
+
+struct html_entity_def {
+ const char *name;
+ const char *replacement;
+ unsigned code;
+ bool allow_heuristic;
+};
+
+#define ENTITY_DEF(name, code, replacement) \
+ html_entity_def \
+ { \
+ (name), (replacement), (code), false \
+ }
+#define ENTITY_DEF_HEUR(name, code, replacement) \
+ html_entity_def \
+ { \
+ (name), (replacement), (code), true \
+ }
+
+static const html_entity_def html_entities_array[] = {
+ ENTITY_DEF_HEUR("szlig", 223, "\xc3\x9f"),
+ ENTITY_DEF("prime", 8242, "\xe2\x80\xb2"),
+ ENTITY_DEF("lnsim", 8934, "\xe2\x8b\xa6"),
+ ENTITY_DEF("nvDash", 8877, "\xe2\x8a\xad"),
+ ENTITY_DEF("isinsv", 8947, "\xe2\x8b\xb3"),
+ ENTITY_DEF("notin", 8713, "\xe2\x88\x89"),
+ ENTITY_DEF("becaus", 8757, "\xe2\x88\xb5"),
+ ENTITY_DEF("Leftrightarrow", 8660, "\xe2\x87\x94"),
+ ENTITY_DEF("EmptySmallSquare", 9723, "\xe2\x97\xbb"),
+ ENTITY_DEF("SquareUnion", 8852, "\xe2\x8a\x94"),
+ ENTITY_DEF("subdot", 10941, "\xe2\xaa\xbd"),
+ ENTITY_DEF("Dstrok", 272, "\xc4\x90"),
+ ENTITY_DEF("rrarr", 8649, "\xe2\x87\x89"),
+ ENTITY_DEF("rArr", 8658, "\xe2\x87\x92"),
+ ENTITY_DEF_HEUR("Aacute", 193, "\xc3\x81"),
+ ENTITY_DEF("kappa", 954, "\xce\xba"),
+ ENTITY_DEF("Iopf", 120128, "\xf0\x9d\x95\x80"),
+ ENTITY_DEF("hyphen", 8208, "\xe2\x80\x90"),
+ ENTITY_DEF("rarrbfs", 10528, "\xe2\xa4\xa0"),
+ ENTITY_DEF("supsetneqq", 10956, "\xe2\xab\x8c"),
+ ENTITY_DEF("gacute", 501, "\xc7\xb5"),
+ ENTITY_DEF("VeryThinSpace", 8202, "\xe2\x80\x8a"),
+ ENTITY_DEF("tint", 8749, "\xe2\x88\xad"),
+ ENTITY_DEF("ffr", 120099, "\xf0\x9d\x94\xa3"),
+ ENTITY_DEF("kgreen", 312, "\xc4\xb8"),
+ ENTITY_DEF("nis", 8956, "\xe2\x8b\xbc"),
+ ENTITY_DEF("NotRightTriangleBar", 10704, "\xe2\xa7\x90\xcc\xb8"),
+ ENTITY_DEF("Eogon", 280, "\xc4\x98"),
+ ENTITY_DEF("lbrke", 10635, "\xe2\xa6\x8b"),
+ ENTITY_DEF("phi", 966, "\xcf\x86"),
+ ENTITY_DEF("notnivc", 8957, "\xe2\x8b\xbd"),
+ ENTITY_DEF("utilde", 361, "\xc5\xa9"),
+ ENTITY_DEF("Fopf", 120125, "\xf0\x9d\x94\xbd"),
+ ENTITY_DEF("Vcy", 1042, "\xd0\x92"),
+ ENTITY_DEF("erDot", 8787, "\xe2\x89\x93"),
+ ENTITY_DEF("nsubE", 10949, "\xe2\xab\x85\xcc\xb8"),
+ ENTITY_DEF_HEUR("egrave", 232, "\xc3\xa8"),
+ ENTITY_DEF("Lcedil", 315, "\xc4\xbb"),
+ ENTITY_DEF("lharul", 10602, "\xe2\xa5\xaa"),
+ ENTITY_DEF_HEUR("middot", 183, "\xc2\xb7"),
+ ENTITY_DEF("ggg", 8921, "\xe2\x8b\x99"),
+ ENTITY_DEF("NestedLessLess", 8810, "\xe2\x89\xaa"),
+ ENTITY_DEF("tau", 964, "\xcf\x84"),
+ ENTITY_DEF("setmn", 8726, "\xe2\x88\x96"),
+ ENTITY_DEF("frac78", 8542, "\xe2\x85\x9e"),
+ ENTITY_DEF_HEUR("para", 182, "\xc2\xb6"),
+ ENTITY_DEF("Rcedil", 342, "\xc5\x96"),
+ ENTITY_DEF("propto", 8733, "\xe2\x88\x9d"),
+ ENTITY_DEF("sqsubset", 8847, "\xe2\x8a\x8f"),
+ ENTITY_DEF("ensp", 8194, "\xe2\x80\x82"),
+ ENTITY_DEF("boxvH", 9578, "\xe2\x95\xaa"),
+ ENTITY_DEF("NotGreaterTilde", 8821, "\xe2\x89\xb5"),
+ ENTITY_DEF("ffllig", 64260, "\xef\xac\x84"),
+ ENTITY_DEF("kcedil", 311, "\xc4\xb7"),
+ ENTITY_DEF("omega", 969, "\xcf\x89"),
+ ENTITY_DEF("sime", 8771, "\xe2\x89\x83"),
+ ENTITY_DEF("LeftTriangleEqual", 8884, "\xe2\x8a\xb4"),
+ ENTITY_DEF("bsemi", 8271, "\xe2\x81\x8f"),
+ ENTITY_DEF("rdquor", 8221, "\xe2\x80\x9d"),
+ ENTITY_DEF("Utilde", 360, "\xc5\xa8"),
+ ENTITY_DEF("bsol", 92, "\x5c"),
+ ENTITY_DEF("risingdotseq", 8787, "\xe2\x89\x93"),
+ ENTITY_DEF("ultri", 9720, "\xe2\x97\xb8"),
+ ENTITY_DEF("rhov", 1009, "\xcf\xb1"),
+ ENTITY_DEF("TildeEqual", 8771, "\xe2\x89\x83"),
+ ENTITY_DEF("jukcy", 1108, "\xd1\x94"),
+ ENTITY_DEF("perp", 8869, "\xe2\x8a\xa5"),
+ ENTITY_DEF("capbrcup", 10825, "\xe2\xa9\x89"),
+ ENTITY_DEF("ltrie", 8884, "\xe2\x8a\xb4"),
+ ENTITY_DEF("LessTilde", 8818, "\xe2\x89\xb2"),
+ ENTITY_DEF("popf", 120161, "\xf0\x9d\x95\xa1"),
+ ENTITY_DEF("dbkarow", 10511, "\xe2\xa4\x8f"),
+ ENTITY_DEF("roang", 10221, "\xe2\x9f\xad"),
+ ENTITY_DEF_HEUR("brvbar", 166, "\xc2\xa6"),
+ ENTITY_DEF("CenterDot", 183, "\xc2\xb7"),
+ ENTITY_DEF("notindot", 8949, "\xe2\x8b\xb5\xcc\xb8"),
+ ENTITY_DEF("supmult", 10946, "\xe2\xab\x82"),
+ ENTITY_DEF("multimap", 8888, "\xe2\x8a\xb8"),
+ ENTITY_DEF_HEUR("frac34", 190, "\xc2\xbe"),
+ ENTITY_DEF("mapsto", 8614, "\xe2\x86\xa6"),
+ ENTITY_DEF("flat", 9837, "\xe2\x99\xad"),
+ ENTITY_DEF("updownarrow", 8597, "\xe2\x86\x95"),
+ ENTITY_DEF("gne", 10888, "\xe2\xaa\x88"),
+ ENTITY_DEF("nrarrc", 10547, "\xe2\xa4\xb3\xcc\xb8"),
+ ENTITY_DEF("suphsol", 10185, "\xe2\x9f\x89"),
+ ENTITY_DEF("nGtv", 8811, "\xe2\x89\xab\xcc\xb8"),
+ ENTITY_DEF("hopf", 120153, "\xf0\x9d\x95\x99"),
+ ENTITY_DEF("pointint", 10773, "\xe2\xa8\x95"),
+ ENTITY_DEF("glj", 10916, "\xe2\xaa\xa4"),
+ ENTITY_DEF("LeftDoubleBracket", 10214, "\xe2\x9f\xa6"),
+ ENTITY_DEF("NotSupersetEqual", 8841, "\xe2\x8a\x89"),
+ ENTITY_DEF("dot", 729, "\xcb\x99"),
+ ENTITY_DEF("tbrk", 9140, "\xe2\x8e\xb4"),
+ ENTITY_DEF("LeftUpDownVector", 10577, "\xe2\xa5\x91"),
+ ENTITY_DEF_HEUR("uml", 168, "\xc2\xa8"),
+ ENTITY_DEF("bbrk", 9141, "\xe2\x8e\xb5"),
+ ENTITY_DEF("nearrow", 8599, "\xe2\x86\x97"),
+ ENTITY_DEF("backsimeq", 8909, "\xe2\x8b\x8d"),
+ ENTITY_DEF("dblac", 733, "\xcb\x9d"),
+ ENTITY_DEF("circleddash", 8861, "\xe2\x8a\x9d"),
+ ENTITY_DEF("ldsh", 8626, "\xe2\x86\xb2"),
+ ENTITY_DEF("sce", 10928, "\xe2\xaa\xb0"),
+ ENTITY_DEF("angst", 197, "\xc3\x85"),
+ ENTITY_DEF_HEUR("yen", 165, "\xc2\xa5"),
+ ENTITY_DEF("nsupE", 10950, "\xe2\xab\x86\xcc\xb8"),
+ ENTITY_DEF("Uscr", 119984, "\xf0\x9d\x92\xb0"),
+ ENTITY_DEF("subplus", 10943, "\xe2\xaa\xbf"),
+ ENTITY_DEF("nleqq", 8806, "\xe2\x89\xa6\xcc\xb8"),
+ ENTITY_DEF("nprcue", 8928, "\xe2\x8b\xa0"),
+ ENTITY_DEF("Ocirc", 212, "\xc3\x94"),
+ ENTITY_DEF("disin", 8946, "\xe2\x8b\xb2"),
+ ENTITY_DEF("EqualTilde", 8770, "\xe2\x89\x82"),
+ ENTITY_DEF("YUcy", 1070, "\xd0\xae"),
+ ENTITY_DEF("Kscr", 119974, "\xf0\x9d\x92\xa6"),
+ ENTITY_DEF("lg", 8822, "\xe2\x89\xb6"),
+ ENTITY_DEF("nLeftrightarrow", 8654, "\xe2\x87\x8e"),
+ ENTITY_DEF("eplus", 10865, "\xe2\xa9\xb1"),
+ ENTITY_DEF("les", 10877, "\xe2\xa9\xbd"),
+ ENTITY_DEF("sfr", 120112, "\xf0\x9d\x94\xb0"),
+ ENTITY_DEF("HumpDownHump", 8782, "\xe2\x89\x8e"),
+ ENTITY_DEF("Fouriertrf", 8497, "\xe2\x84\xb1"),
+ ENTITY_DEF("Updownarrow", 8661, "\xe2\x87\x95"),
+ ENTITY_DEF("nrarr", 8603, "\xe2\x86\x9b"),
+ ENTITY_DEF("radic", 8730, "\xe2\x88\x9a"),
+ ENTITY_DEF("gnap", 10890, "\xe2\xaa\x8a"),
+ ENTITY_DEF("zeta", 950, "\xce\xb6"),
+ ENTITY_DEF("Qscr", 119980, "\xf0\x9d\x92\xac"),
+ ENTITY_DEF("NotRightTriangleEqual", 8941, "\xe2\x8b\xad"),
+ ENTITY_DEF("nshortmid", 8740, "\xe2\x88\xa4"),
+ ENTITY_DEF("SHCHcy", 1065, "\xd0\xa9"),
+ ENTITY_DEF("piv", 982, "\xcf\x96"),
+ ENTITY_DEF("angmsdaa", 10664, "\xe2\xa6\xa8"),
+ ENTITY_DEF("curlywedge", 8911, "\xe2\x8b\x8f"),
+ ENTITY_DEF("sqcaps", 8851, "\xe2\x8a\x93\xef\xb8\x80"),
+ ENTITY_DEF("sum", 8721, "\xe2\x88\x91"),
+ ENTITY_DEF("rarrtl", 8611, "\xe2\x86\xa3"),
+ ENTITY_DEF("gescc", 10921, "\xe2\xaa\xa9"),
+ ENTITY_DEF("sup", 8835, "\xe2\x8a\x83"),
+ ENTITY_DEF("smid", 8739, "\xe2\x88\xa3"),
+ ENTITY_DEF("cularr", 8630, "\xe2\x86\xb6"),
+ ENTITY_DEF("olcross", 10683, "\xe2\xa6\xbb"),
+ ENTITY_DEF_HEUR("GT", 62, "\x3e"),
+ ENTITY_DEF("scap", 10936, "\xe2\xaa\xb8"),
+ ENTITY_DEF("capcup", 10823, "\xe2\xa9\x87"),
+ ENTITY_DEF("NotSquareSubsetEqual", 8930, "\xe2\x8b\xa2"),
+ ENTITY_DEF("uhblk", 9600, "\xe2\x96\x80"),
+ ENTITY_DEF("latail", 10521, "\xe2\xa4\x99"),
+ ENTITY_DEF("smtes", 10924, "\xe2\xaa\xac\xef\xb8\x80"),
+ ENTITY_DEF("RoundImplies", 10608, "\xe2\xa5\xb0"),
+ ENTITY_DEF("wreath", 8768, "\xe2\x89\x80"),
+ ENTITY_DEF("curlyvee", 8910, "\xe2\x8b\x8e"),
+ ENTITY_DEF("uscr", 120010, "\xf0\x9d\x93\x8a"),
+ ENTITY_DEF("nleftrightarrow", 8622, "\xe2\x86\xae"),
+ ENTITY_DEF("ucy", 1091, "\xd1\x83"),
+ ENTITY_DEF("nvge", 8805, "\xe2\x89\xa5\xe2\x83\x92"),
+ ENTITY_DEF("bnot", 8976, "\xe2\x8c\x90"),
+ ENTITY_DEF("alefsym", 8501, "\xe2\x84\xb5"),
+ ENTITY_DEF("star", 9734, "\xe2\x98\x86"),
+ ENTITY_DEF("boxHd", 9572, "\xe2\x95\xa4"),
+ ENTITY_DEF("vsubnE", 10955, "\xe2\xab\x8b\xef\xb8\x80"),
+ ENTITY_DEF("Popf", 8473, "\xe2\x84\x99"),
+ ENTITY_DEF("simgE", 10912, "\xe2\xaa\xa0"),
+ ENTITY_DEF("upsilon", 965, "\xcf\x85"),
+ ENTITY_DEF("NoBreak", 8288, "\xe2\x81\xa0"),
+ ENTITY_DEF("realine", 8475, "\xe2\x84\x9b"),
+ ENTITY_DEF("frac38", 8540, "\xe2\x85\x9c"),
+ ENTITY_DEF("YAcy", 1071, "\xd0\xaf"),
+ ENTITY_DEF("bnequiv", 8801, "\xe2\x89\xa1\xe2\x83\xa5"),
+ ENTITY_DEF("cudarrr", 10549, "\xe2\xa4\xb5"),
+ ENTITY_DEF("lsime", 10893, "\xe2\xaa\x8d"),
+ ENTITY_DEF("lowbar", 95, "\x5f"),
+ ENTITY_DEF("utdot", 8944, "\xe2\x8b\xb0"),
+ ENTITY_DEF("ReverseElement", 8715, "\xe2\x88\x8b"),
+ ENTITY_DEF("nshortparallel", 8742, "\xe2\x88\xa6"),
+ ENTITY_DEF("DJcy", 1026, "\xd0\x82"),
+ ENTITY_DEF("nsube", 8840, "\xe2\x8a\x88"),
+ ENTITY_DEF("VDash", 8875, "\xe2\x8a\xab"),
+ ENTITY_DEF("Ncaron", 327, "\xc5\x87"),
+ ENTITY_DEF("LeftUpVector", 8639, "\xe2\x86\xbf"),
+ ENTITY_DEF("Kcy", 1050, "\xd0\x9a"),
+ ENTITY_DEF("NotLeftTriangleEqual", 8940, "\xe2\x8b\xac"),
+ ENTITY_DEF("nvHarr", 10500, "\xe2\xa4\x84"),
+ ENTITY_DEF("lotimes", 10804, "\xe2\xa8\xb4"),
+ ENTITY_DEF("RightFloor", 8971, "\xe2\x8c\x8b"),
+ ENTITY_DEF("succ", 8827, "\xe2\x89\xbb"),
+ ENTITY_DEF("Ucy", 1059, "\xd0\xa3"),
+ ENTITY_DEF("darr", 8595, "\xe2\x86\x93"),
+ ENTITY_DEF("lbarr", 10508, "\xe2\xa4\x8c"),
+ ENTITY_DEF("xfr", 120117, "\xf0\x9d\x94\xb5"),
+ ENTITY_DEF("zopf", 120171, "\xf0\x9d\x95\xab"),
+ ENTITY_DEF("Phi", 934, "\xce\xa6"),
+ ENTITY_DEF("ord", 10845, "\xe2\xa9\x9d"),
+ ENTITY_DEF("iinfin", 10716, "\xe2\xa7\x9c"),
+ ENTITY_DEF("Xfr", 120091, "\xf0\x9d\x94\x9b"),
+ ENTITY_DEF("qint", 10764, "\xe2\xa8\x8c"),
+ ENTITY_DEF("Upsilon", 933, "\xce\xa5"),
+ ENTITY_DEF("NotSubset", 8834, "\xe2\x8a\x82\xe2\x83\x92"),
+ ENTITY_DEF("gfr", 120100, "\xf0\x9d\x94\xa4"),
+ ENTITY_DEF("notnivb", 8958, "\xe2\x8b\xbe"),
+ ENTITY_DEF("Afr", 120068, "\xf0\x9d\x94\x84"),
+ ENTITY_DEF_HEUR("ge", 8805, "\xe2\x89\xa5"),
+ ENTITY_DEF_HEUR("iexcl", 161, "\xc2\xa1"),
+ ENTITY_DEF("dfr", 120097, "\xf0\x9d\x94\xa1"),
+ ENTITY_DEF("rsaquo", 8250, "\xe2\x80\xba"),
+ ENTITY_DEF("xcap", 8898, "\xe2\x8b\x82"),
+ ENTITY_DEF("Jopf", 120129, "\xf0\x9d\x95\x81"),
+ ENTITY_DEF("Hstrok", 294, "\xc4\xa6"),
+ ENTITY_DEF("ldca", 10550, "\xe2\xa4\xb6"),
+ ENTITY_DEF("lmoust", 9136, "\xe2\x8e\xb0"),
+ ENTITY_DEF("wcirc", 373, "\xc5\xb5"),
+ ENTITY_DEF("DownRightVector", 8641, "\xe2\x87\x81"),
+ ENTITY_DEF("LessFullEqual", 8806, "\xe2\x89\xa6"),
+ ENTITY_DEF("dotsquare", 8865, "\xe2\x8a\xa1"),
+ ENTITY_DEF("zhcy", 1078, "\xd0\xb6"),
+ ENTITY_DEF("mDDot", 8762, "\xe2\x88\xba"),
+ ENTITY_DEF("Prime", 8243, "\xe2\x80\xb3"),
+ ENTITY_DEF("prec", 8826, "\xe2\x89\xba"),
+ ENTITY_DEF("swnwar", 10538, "\xe2\xa4\xaa"),
+ ENTITY_DEF_HEUR("COPY", 169, "\xc2\xa9"),
+ ENTITY_DEF("cong", 8773, "\xe2\x89\x85"),
+ ENTITY_DEF("sacute", 347, "\xc5\x9b"),
+ ENTITY_DEF("Nopf", 8469, "\xe2\x84\x95"),
+ ENTITY_DEF("it", 8290, "\xe2\x81\xa2"),
+ ENTITY_DEF("SOFTcy", 1068, "\xd0\xac"),
+ ENTITY_DEF("uuarr", 8648, "\xe2\x87\x88"),
+ ENTITY_DEF("iota", 953, "\xce\xb9"),
+ ENTITY_DEF("notinE", 8953, "\xe2\x8b\xb9\xcc\xb8"),
+ ENTITY_DEF("jfr", 120103, "\xf0\x9d\x94\xa7"),
+ ENTITY_DEF_HEUR("QUOT", 34, "\x22"),
+ ENTITY_DEF("vsupnE", 10956, "\xe2\xab\x8c\xef\xb8\x80"),
+ ENTITY_DEF_HEUR("igrave", 236, "\xc3\xac"),
+ ENTITY_DEF("bsim", 8765, "\xe2\x88\xbd"),
+ ENTITY_DEF("npreceq", 10927, "\xe2\xaa\xaf\xcc\xb8"),
+ ENTITY_DEF("zcaron", 382, "\xc5\xbe"),
+ ENTITY_DEF("DD", 8517, "\xe2\x85\x85"),
+ ENTITY_DEF("gamma", 947, "\xce\xb3"),
+ ENTITY_DEF("homtht", 8763, "\xe2\x88\xbb"),
+ ENTITY_DEF("NonBreakingSpace", 160, "\xc2\xa0"),
+ ENTITY_DEF("Proportion", 8759, "\xe2\x88\xb7"),
+ ENTITY_DEF("nedot", 8784, "\xe2\x89\x90\xcc\xb8"),
+ ENTITY_DEF("nabla", 8711, "\xe2\x88\x87"),
+ ENTITY_DEF("ac", 8766, "\xe2\x88\xbe"),
+ ENTITY_DEF("nsupe", 8841, "\xe2\x8a\x89"),
+ ENTITY_DEF("ell", 8467, "\xe2\x84\x93"),
+ ENTITY_DEF("boxvR", 9566, "\xe2\x95\x9e"),
+ ENTITY_DEF("LowerRightArrow", 8600, "\xe2\x86\x98"),
+ ENTITY_DEF("boxHu", 9575, "\xe2\x95\xa7"),
+ ENTITY_DEF("lE", 8806, "\xe2\x89\xa6"),
+ ENTITY_DEF("dzigrarr", 10239, "\xe2\x9f\xbf"),
+ ENTITY_DEF("rfloor", 8971, "\xe2\x8c\x8b"),
+ ENTITY_DEF("gneq", 10888, "\xe2\xaa\x88"),
+ ENTITY_DEF("rightleftharpoons", 8652, "\xe2\x87\x8c"),
+ ENTITY_DEF("gtquest", 10876, "\xe2\xa9\xbc"),
+ ENTITY_DEF("searhk", 10533, "\xe2\xa4\xa5"),
+ ENTITY_DEF("gesdoto", 10882, "\xe2\xaa\x82"),
+ ENTITY_DEF("cross", 10007, "\xe2\x9c\x97"),
+ ENTITY_DEF("rdquo", 8221, "\xe2\x80\x9d"),
+ ENTITY_DEF("sqsupset", 8848, "\xe2\x8a\x90"),
+ ENTITY_DEF("divonx", 8903, "\xe2\x8b\x87"),
+ ENTITY_DEF("lat", 10923, "\xe2\xaa\xab"),
+ ENTITY_DEF("rmoustache", 9137, "\xe2\x8e\xb1"),
+ ENTITY_DEF("succapprox", 10936, "\xe2\xaa\xb8"),
+ ENTITY_DEF("nhpar", 10994, "\xe2\xab\xb2"),
+ ENTITY_DEF("sharp", 9839, "\xe2\x99\xaf"),
+ ENTITY_DEF("lrcorner", 8991, "\xe2\x8c\x9f"),
+ ENTITY_DEF("Vscr", 119985, "\xf0\x9d\x92\xb1"),
+ ENTITY_DEF("varsigma", 962, "\xcf\x82"),
+ ENTITY_DEF("bsolb", 10693, "\xe2\xa7\x85"),
+ ENTITY_DEF("cupcap", 10822, "\xe2\xa9\x86"),
+ ENTITY_DEF("leftrightarrow", 8596, "\xe2\x86\x94"),
+ ENTITY_DEF("LeftTee", 8867, "\xe2\x8a\xa3"),
+ ENTITY_DEF("Sqrt", 8730, "\xe2\x88\x9a"),
+ ENTITY_DEF("Odblac", 336, "\xc5\x90"),
+ ENTITY_DEF("ocir", 8858, "\xe2\x8a\x9a"),
+ ENTITY_DEF("eqslantless", 10901, "\xe2\xaa\x95"),
+ ENTITY_DEF("supedot", 10948, "\xe2\xab\x84"),
+ ENTITY_DEF("intercal", 8890, "\xe2\x8a\xba"),
+ ENTITY_DEF("Gbreve", 286, "\xc4\x9e"),
+ ENTITY_DEF("xrArr", 10233, "\xe2\x9f\xb9"),
+ ENTITY_DEF("NotTildeEqual", 8772, "\xe2\x89\x84"),
+ ENTITY_DEF("Bfr", 120069, "\xf0\x9d\x94\x85"),
+ ENTITY_DEF_HEUR("Iuml", 207, "\xc3\x8f"),
+ ENTITY_DEF("leg", 8922, "\xe2\x8b\x9a"),
+ ENTITY_DEF("boxhU", 9576, "\xe2\x95\xa8"),
+ ENTITY_DEF("Gopf", 120126, "\xf0\x9d\x94\xbe"),
+ ENTITY_DEF("af", 8289, "\xe2\x81\xa1"),
+ ENTITY_DEF("xwedge", 8896, "\xe2\x8b\x80"),
+ ENTITY_DEF("precapprox", 10935, "\xe2\xaa\xb7"),
+ ENTITY_DEF("lcedil", 316, "\xc4\xbc"),
+ ENTITY_DEF("between", 8812, "\xe2\x89\xac"),
+ ENTITY_DEF_HEUR("Oslash", 216, "\xc3\x98"),
+ ENTITY_DEF("breve", 728, "\xcb\x98"),
+ ENTITY_DEF("caps", 8745, "\xe2\x88\xa9\xef\xb8\x80"),
+ ENTITY_DEF("vangrt", 10652, "\xe2\xa6\x9c"),
+ ENTITY_DEF("lagran", 8466, "\xe2\x84\x92"),
+ ENTITY_DEF("kopf", 120156, "\xf0\x9d\x95\x9c"),
+ ENTITY_DEF("ReverseUpEquilibrium", 10607, "\xe2\xa5\xaf"),
+ ENTITY_DEF("nlsim", 8820, "\xe2\x89\xb4"),
+ ENTITY_DEF("Cap", 8914, "\xe2\x8b\x92"),
+ ENTITY_DEF("angmsdac", 10666, "\xe2\xa6\xaa"),
+ ENTITY_DEF("iocy", 1105, "\xd1\x91"),
+ ENTITY_DEF("seswar", 10537, "\xe2\xa4\xa9"),
+ ENTITY_DEF("dzcy", 1119, "\xd1\x9f"),
+ ENTITY_DEF("nsubset", 8834, "\xe2\x8a\x82\xe2\x83\x92"),
+ ENTITY_DEF("cup", 8746, "\xe2\x88\xaa"),
+ ENTITY_DEF("npar", 8742, "\xe2\x88\xa6"),
+ ENTITY_DEF("late", 10925, "\xe2\xaa\xad"),
+ ENTITY_DEF("plussim", 10790, "\xe2\xa8\xa6"),
+ ENTITY_DEF("Darr", 8609, "\xe2\x86\xa1"),
+ ENTITY_DEF("nexist", 8708, "\xe2\x88\x84"),
+ ENTITY_DEF_HEUR("cent", 162, "\xc2\xa2"),
+ ENTITY_DEF("khcy", 1093, "\xd1\x85"),
+ ENTITY_DEF("smallsetminus", 8726, "\xe2\x88\x96"),
+ ENTITY_DEF("ycirc", 375, "\xc5\xb7"),
+ ENTITY_DEF("lharu", 8636, "\xe2\x86\xbc"),
+ ENTITY_DEF("upuparrows", 8648, "\xe2\x87\x88"),
+ ENTITY_DEF("sigmaf", 962, "\xcf\x82"),
+ ENTITY_DEF("nltri", 8938, "\xe2\x8b\xaa"),
+ ENTITY_DEF("mstpos", 8766, "\xe2\x88\xbe"),
+ ENTITY_DEF("Zopf", 8484, "\xe2\x84\xa4"),
+ ENTITY_DEF("dwangle", 10662, "\xe2\xa6\xa6"),
+ ENTITY_DEF("bowtie", 8904, "\xe2\x8b\x88"),
+ ENTITY_DEF("Dfr", 120071, "\xf0\x9d\x94\x87"),
+ ENTITY_DEF_HEUR("iacute", 237, "\xc3\xad"),
+ ENTITY_DEF("njcy", 1114, "\xd1\x9a"),
+ ENTITY_DEF("cfr", 120096, "\xf0\x9d\x94\xa0"),
+ ENTITY_DEF("TripleDot", 8411, "\xe2\x83\x9b"),
+ ENTITY_DEF("Or", 10836, "\xe2\xa9\x94"),
+ ENTITY_DEF("blk34", 9619, "\xe2\x96\x93"),
+ ENTITY_DEF("equiv", 8801, "\xe2\x89\xa1"),
+ ENTITY_DEF("fflig", 64256, "\xef\xac\x80"),
+ ENTITY_DEF("Rang", 10219, "\xe2\x9f\xab"),
+ ENTITY_DEF("Wopf", 120142, "\xf0\x9d\x95\x8e"),
+ ENTITY_DEF("boxUl", 9564, "\xe2\x95\x9c"),
+ ENTITY_DEF_HEUR("frac12", 189, "\xc2\xbd"),
+ ENTITY_DEF("clubs", 9827, "\xe2\x99\xa3"),
+ ENTITY_DEF("amalg", 10815, "\xe2\xa8\xbf"),
+ ENTITY_DEF("Lang", 10218, "\xe2\x9f\xaa"),
+ ENTITY_DEF("asymp", 8776, "\xe2\x89\x88"),
+ ENTITY_DEF("models", 8871, "\xe2\x8a\xa7"),
+ ENTITY_DEF("emptyset", 8709, "\xe2\x88\x85"),
+ ENTITY_DEF("Tscr", 119983, "\xf0\x9d\x92\xaf"),
+ ENTITY_DEF("nleftarrow", 8602, "\xe2\x86\x9a"),
+ ENTITY_DEF("Omacr", 332, "\xc5\x8c"),
+ ENTITY_DEF("gtrarr", 10616, "\xe2\xa5\xb8"),
+ ENTITY_DEF("excl", 33, "\x21"),
+ ENTITY_DEF("rarrw", 8605, "\xe2\x86\x9d"),
+ ENTITY_DEF("abreve", 259, "\xc4\x83"),
+ ENTITY_DEF("CircleTimes", 8855, "\xe2\x8a\x97"),
+ ENTITY_DEF("aopf", 120146, "\xf0\x9d\x95\x92"),
+ ENTITY_DEF("eqvparsl", 10725, "\xe2\xa7\xa5"),
+ ENTITY_DEF("boxv", 9474, "\xe2\x94\x82"),
+ ENTITY_DEF("SuchThat", 8715, "\xe2\x88\x8b"),
+ ENTITY_DEF("varphi", 981, "\xcf\x95"),
+ ENTITY_DEF("Ropf", 8477, "\xe2\x84\x9d"),
+ ENTITY_DEF("rscr", 120007, "\xf0\x9d\x93\x87"),
+ ENTITY_DEF("Rrightarrow", 8667, "\xe2\x87\x9b"),
+ ENTITY_DEF("equest", 8799, "\xe2\x89\x9f"),
+ ENTITY_DEF_HEUR("ntilde", 241, "\xc3\xb1"),
+ ENTITY_DEF("Escr", 8496, "\xe2\x84\xb0"),
+ ENTITY_DEF("Lopf", 120131, "\xf0\x9d\x95\x83"),
+ ENTITY_DEF("GreaterGreater", 10914, "\xe2\xaa\xa2"),
+ ENTITY_DEF("pluscir", 10786, "\xe2\xa8\xa2"),
+ ENTITY_DEF("nsupset", 8835, "\xe2\x8a\x83\xe2\x83\x92"),
+ ENTITY_DEF("uArr", 8657, "\xe2\x87\x91"),
+ ENTITY_DEF("nwarhk", 10531, "\xe2\xa4\xa3"),
+ ENTITY_DEF("Ycirc", 374, "\xc5\xb6"),
+ ENTITY_DEF("tdot", 8411, "\xe2\x83\x9b"),
+ ENTITY_DEF("circledS", 9416, "\xe2\x93\x88"),
+ ENTITY_DEF("lhard", 8637, "\xe2\x86\xbd"),
+ ENTITY_DEF("iukcy", 1110, "\xd1\x96"),
+ ENTITY_DEF("PrecedesSlantEqual", 8828, "\xe2\x89\xbc"),
+ ENTITY_DEF("Sfr", 120086, "\xf0\x9d\x94\x96"),
+ ENTITY_DEF("egs", 10902, "\xe2\xaa\x96"),
+ ENTITY_DEF("oelig", 339, "\xc5\x93"),
+ ENTITY_DEF("bigtriangledown", 9661, "\xe2\x96\xbd"),
+ ENTITY_DEF("EmptyVerySmallSquare", 9643, "\xe2\x96\xab"),
+ ENTITY_DEF("Backslash", 8726, "\xe2\x88\x96"),
+ ENTITY_DEF("nscr", 120003, "\xf0\x9d\x93\x83"),
+ ENTITY_DEF("uogon", 371, "\xc5\xb3"),
+ ENTITY_DEF("circeq", 8791, "\xe2\x89\x97"),
+ ENTITY_DEF("check", 10003, "\xe2\x9c\x93"),
+ ENTITY_DEF("Sup", 8913, "\xe2\x8b\x91"),
+ ENTITY_DEF("Rcaron", 344, "\xc5\x98"),
+ ENTITY_DEF("lneqq", 8808, "\xe2\x89\xa8"),
+ ENTITY_DEF("lrhar", 8651, "\xe2\x87\x8b"),
+ ENTITY_DEF("ulcorn", 8988, "\xe2\x8c\x9c"),
+ ENTITY_DEF("timesd", 10800, "\xe2\xa8\xb0"),
+ ENTITY_DEF("Sum", 8721, "\xe2\x88\x91"),
+ ENTITY_DEF("varpropto", 8733, "\xe2\x88\x9d"),
+ ENTITY_DEF("Lcaron", 317, "\xc4\xbd"),
+ ENTITY_DEF("lbrkslu", 10637, "\xe2\xa6\x8d"),
+ ENTITY_DEF_HEUR("AElig", 198, "\xc3\x86"),
+ ENTITY_DEF("varr", 8597, "\xe2\x86\x95"),
+ ENTITY_DEF("nvinfin", 10718, "\xe2\xa7\x9e"),
+ ENTITY_DEF("leq", 8804, "\xe2\x89\xa4"),
+ ENTITY_DEF("biguplus", 10756, "\xe2\xa8\x84"),
+ ENTITY_DEF("rpar", 41, "\x29"),
+ ENTITY_DEF("eng", 331, "\xc5\x8b"),
+ ENTITY_DEF("NegativeThinSpace", 8203, "\xe2\x80\x8b"),
+ ENTITY_DEF("lesssim", 8818, "\xe2\x89\xb2"),
+ ENTITY_DEF("lBarr", 10510, "\xe2\xa4\x8e"),
+ ENTITY_DEF("LeftUpTeeVector", 10592, "\xe2\xa5\xa0"),
+ ENTITY_DEF("gnE", 8809, "\xe2\x89\xa9"),
+ ENTITY_DEF("efr", 120098, "\xf0\x9d\x94\xa2"),
+ ENTITY_DEF("barvee", 8893, "\xe2\x8a\xbd"),
+ ENTITY_DEF("ee", 8519, "\xe2\x85\x87"),
+ ENTITY_DEF("Uogon", 370, "\xc5\xb2"),
+ ENTITY_DEF("gnapprox", 10890, "\xe2\xaa\x8a"),
+ ENTITY_DEF("olcir", 10686, "\xe2\xa6\xbe"),
+ ENTITY_DEF("boxUL", 9565, "\xe2\x95\x9d"),
+ ENTITY_DEF("Gg", 8921, "\xe2\x8b\x99"),
+ ENTITY_DEF("CloseCurlyQuote", 8217, "\xe2\x80\x99"),
+ ENTITY_DEF("leftharpoondown", 8637, "\xe2\x86\xbd"),
+ ENTITY_DEF("vfr", 120115, "\xf0\x9d\x94\xb3"),
+ ENTITY_DEF("gvertneqq", 8809, "\xe2\x89\xa9\xef\xb8\x80"),
+ ENTITY_DEF_HEUR("ouml", 246, "\xc3\xb6"),
+ ENTITY_DEF("raemptyv", 10675, "\xe2\xa6\xb3"),
+ ENTITY_DEF("Zcaron", 381, "\xc5\xbd"),
+ ENTITY_DEF("scE", 10932, "\xe2\xaa\xb4"),
+ ENTITY_DEF("boxvh", 9532, "\xe2\x94\xbc"),
+ ENTITY_DEF("ominus", 8854, "\xe2\x8a\x96"),
+ ENTITY_DEF("oopf", 120160, "\xf0\x9d\x95\xa0"),
+ ENTITY_DEF("nsucceq", 10928, "\xe2\xaa\xb0\xcc\xb8"),
+ ENTITY_DEF("RBarr", 10512, "\xe2\xa4\x90"),
+ ENTITY_DEF("iprod", 10812, "\xe2\xa8\xbc"),
+ ENTITY_DEF("lvnE", 8808, "\xe2\x89\xa8\xef\xb8\x80"),
+ ENTITY_DEF("andand", 10837, "\xe2\xa9\x95"),
+ ENTITY_DEF("upharpoonright", 8638, "\xe2\x86\xbe"),
+ ENTITY_DEF("ncongdot", 10861, "\xe2\xa9\xad\xcc\xb8"),
+ ENTITY_DEF("drcrop", 8972, "\xe2\x8c\x8c"),
+ ENTITY_DEF("nsimeq", 8772, "\xe2\x89\x84"),
+ ENTITY_DEF("subsub", 10965, "\xe2\xab\x95"),
+ ENTITY_DEF("hardcy", 1098, "\xd1\x8a"),
+ ENTITY_DEF("leqslant", 10877, "\xe2\xa9\xbd"),
+ ENTITY_DEF("uharl", 8639, "\xe2\x86\xbf"),
+ ENTITY_DEF("expectation", 8496, "\xe2\x84\xb0"),
+ ENTITY_DEF("mdash", 8212, "\xe2\x80\x94"),
+ ENTITY_DEF("VerticalTilde", 8768, "\xe2\x89\x80"),
+ ENTITY_DEF("rdldhar", 10601, "\xe2\xa5\xa9"),
+ ENTITY_DEF("leftharpoonup", 8636, "\xe2\x86\xbc"),
+ ENTITY_DEF("mu", 956, "\xce\xbc"),
+ ENTITY_DEF("curarrm", 10556, "\xe2\xa4\xbc"),
+ ENTITY_DEF("Cdot", 266, "\xc4\x8a"),
+ ENTITY_DEF("NotTildeTilde", 8777, "\xe2\x89\x89"),
+ ENTITY_DEF("boxul", 9496, "\xe2\x94\x98"),
+ ENTITY_DEF("planckh", 8462, "\xe2\x84\x8e"),
+ ENTITY_DEF("CapitalDifferentialD", 8517, "\xe2\x85\x85"),
+ ENTITY_DEF("boxDL", 9559, "\xe2\x95\x97"),
+ ENTITY_DEF("cupbrcap", 10824, "\xe2\xa9\x88"),
+ ENTITY_DEF("boxdL", 9557, "\xe2\x95\x95"),
+ ENTITY_DEF("supe", 8839, "\xe2\x8a\x87"),
+ ENTITY_DEF("nvlt", 60, "\x3c\xe2\x83\x92"),
+ ENTITY_DEF("par", 8741, "\xe2\x88\xa5"),
+ ENTITY_DEF("InvisibleComma", 8291, "\xe2\x81\xa3"),
+ ENTITY_DEF("ring", 730, "\xcb\x9a"),
+ ENTITY_DEF("nvap", 8781, "\xe2\x89\x8d\xe2\x83\x92"),
+ ENTITY_DEF("veeeq", 8794, "\xe2\x89\x9a"),
+ ENTITY_DEF("Hfr", 8460, "\xe2\x84\x8c"),
+ ENTITY_DEF("dstrok", 273, "\xc4\x91"),
+ ENTITY_DEF("gesles", 10900, "\xe2\xaa\x94"),
+ ENTITY_DEF("dash", 8208, "\xe2\x80\x90"),
+ ENTITY_DEF("SHcy", 1064, "\xd0\xa8"),
+ ENTITY_DEF("congdot", 10861, "\xe2\xa9\xad"),
+ ENTITY_DEF("imagline", 8464, "\xe2\x84\x90"),
+ ENTITY_DEF("ncy", 1085, "\xd0\xbd"),
+ ENTITY_DEF("bigstar", 9733, "\xe2\x98\x85"),
+ ENTITY_DEF_HEUR("REG", 174, "\xc2\xae"),
+ ENTITY_DEF("triangleq", 8796, "\xe2\x89\x9c"),
+ ENTITY_DEF("rsqb", 93, "\x5d"),
+ ENTITY_DEF("ddarr", 8650, "\xe2\x87\x8a"),
+ ENTITY_DEF("csub", 10959, "\xe2\xab\x8f"),
+ ENTITY_DEF("quest", 63, "\x3f"),
+ ENTITY_DEF("Star", 8902, "\xe2\x8b\x86"),
+ ENTITY_DEF_HEUR("LT", 60, "\x3c"),
+ ENTITY_DEF("ncong", 8775, "\xe2\x89\x87"),
+ ENTITY_DEF("prnE", 10933, "\xe2\xaa\xb5"),
+ ENTITY_DEF("bigtriangleup", 9651, "\xe2\x96\xb3"),
+ ENTITY_DEF("Tilde", 8764, "\xe2\x88\xbc"),
+ ENTITY_DEF("ltrif", 9666, "\xe2\x97\x82"),
+ ENTITY_DEF("ldrdhar", 10599, "\xe2\xa5\xa7"),
+ ENTITY_DEF("lcaron", 318, "\xc4\xbe"),
+ ENTITY_DEF("equivDD", 10872, "\xe2\xa9\xb8"),
+ ENTITY_DEF("lHar", 10594, "\xe2\xa5\xa2"),
+ ENTITY_DEF("vBar", 10984, "\xe2\xab\xa8"),
+ ENTITY_DEF("Mopf", 120132, "\xf0\x9d\x95\x84"),
+ ENTITY_DEF("LeftArrow", 8592, "\xe2\x86\x90"),
+ ENTITY_DEF("Rho", 929, "\xce\xa1"),
+ ENTITY_DEF("Ccirc", 264, "\xc4\x88"),
+ ENTITY_DEF("ifr", 120102, "\xf0\x9d\x94\xa6"),
+ ENTITY_DEF("cacute", 263, "\xc4\x87"),
+ ENTITY_DEF("centerdot", 183, "\xc2\xb7"),
+ ENTITY_DEF("dollar", 36, "\x24"),
+ ENTITY_DEF("lang", 10216, "\xe2\x9f\xa8"),
+ ENTITY_DEF("curvearrowright", 8631, "\xe2\x86\xb7"),
+ ENTITY_DEF("half", 189, "\xc2\xbd"),
+ ENTITY_DEF("Ecy", 1069, "\xd0\xad"),
+ ENTITY_DEF("rcub", 125, "\x7d"),
+ ENTITY_DEF("rcy", 1088, "\xd1\x80"),
+ ENTITY_DEF("isins", 8948, "\xe2\x8b\xb4"),
+ ENTITY_DEF("bsolhsub", 10184, "\xe2\x9f\x88"),
+ ENTITY_DEF("boxuL", 9563, "\xe2\x95\x9b"),
+ ENTITY_DEF("shchcy", 1097, "\xd1\x89"),
+ ENTITY_DEF("cwconint", 8754, "\xe2\x88\xb2"),
+ ENTITY_DEF("euro", 8364, "\xe2\x82\xac"),
+ ENTITY_DEF("lesseqqgtr", 10891, "\xe2\xaa\x8b"),
+ ENTITY_DEF("sim", 8764, "\xe2\x88\xbc"),
+ ENTITY_DEF("rarrc", 10547, "\xe2\xa4\xb3"),
+ ENTITY_DEF("boxdl", 9488, "\xe2\x94\x90"),
+ ENTITY_DEF("Epsilon", 917, "\xce\x95"),
+ ENTITY_DEF("iiiint", 10764, "\xe2\xa8\x8c"),
+ ENTITY_DEF("Rightarrow", 8658, "\xe2\x87\x92"),
+ ENTITY_DEF("conint", 8750, "\xe2\x88\xae"),
+ ENTITY_DEF("boxDl", 9558, "\xe2\x95\x96"),
+ ENTITY_DEF("kappav", 1008, "\xcf\xb0"),
+ ENTITY_DEF("profsurf", 8979, "\xe2\x8c\x93"),
+ ENTITY_DEF_HEUR("auml", 228, "\xc3\xa4"),
+ ENTITY_DEF("heartsuit", 9829, "\xe2\x99\xa5"),
+ ENTITY_DEF_HEUR("eacute", 233, "\xc3\xa9"),
+ ENTITY_DEF_HEUR("gt", 62, "\x3e"),
+ ENTITY_DEF("Gcedil", 290, "\xc4\xa2"),
+ ENTITY_DEF("easter", 10862, "\xe2\xa9\xae"),
+ ENTITY_DEF("Tcy", 1058, "\xd0\xa2"),
+ ENTITY_DEF("swarrow", 8601, "\xe2\x86\x99"),
+ ENTITY_DEF("lopf", 120157, "\xf0\x9d\x95\x9d"),
+ ENTITY_DEF("Agrave", 192, "\xc3\x80"),
+ ENTITY_DEF("Aring", 197, "\xc3\x85"),
+ ENTITY_DEF("fpartint", 10765, "\xe2\xa8\x8d"),
+ ENTITY_DEF("xoplus", 10753, "\xe2\xa8\x81"),
+ ENTITY_DEF("LeftDownTeeVector", 10593, "\xe2\xa5\xa1"),
+ ENTITY_DEF("int", 8747, "\xe2\x88\xab"),
+ ENTITY_DEF("Zeta", 918, "\xce\x96"),
+ ENTITY_DEF("loz", 9674, "\xe2\x97\x8a"),
+ ENTITY_DEF("ncup", 10818, "\xe2\xa9\x82"),
+ ENTITY_DEF("napE", 10864, "\xe2\xa9\xb0\xcc\xb8"),
+ ENTITY_DEF("csup", 10960, "\xe2\xab\x90"),
+ ENTITY_DEF("Ncedil", 325, "\xc5\x85"),
+ ENTITY_DEF("cuwed", 8911, "\xe2\x8b\x8f"),
+ ENTITY_DEF("Dot", 168, "\xc2\xa8"),
+ ENTITY_DEF("SquareIntersection", 8851, "\xe2\x8a\x93"),
+ ENTITY_DEF("map", 8614, "\xe2\x86\xa6"),
+ ENTITY_DEF_HEUR("aelig", 230, "\xc3\xa6"),
+ ENTITY_DEF("RightArrow", 8594, "\xe2\x86\x92"),
+ ENTITY_DEF("rightharpoondown", 8641, "\xe2\x87\x81"),
+ ENTITY_DEF("bNot", 10989, "\xe2\xab\xad"),
+ ENTITY_DEF("nsccue", 8929, "\xe2\x8b\xa1"),
+ ENTITY_DEF("zigrarr", 8669, "\xe2\x87\x9d"),
+ ENTITY_DEF("Sacute", 346, "\xc5\x9a"),
+ ENTITY_DEF("orv", 10843, "\xe2\xa9\x9b"),
+ ENTITY_DEF("RightVectorBar", 10579, "\xe2\xa5\x93"),
+ ENTITY_DEF("nrarrw", 8605, "\xe2\x86\x9d\xcc\xb8"),
+ ENTITY_DEF("nbump", 8782, "\xe2\x89\x8e\xcc\xb8"),
+ ENTITY_DEF_HEUR("iquest", 191, "\xc2\xbf"),
+ ENTITY_DEF("wr", 8768, "\xe2\x89\x80"),
+ ENTITY_DEF("UpArrow", 8593, "\xe2\x86\x91"),
+ ENTITY_DEF("notinva", 8713, "\xe2\x88\x89"),
+ ENTITY_DEF("ddagger", 8225, "\xe2\x80\xa1"),
+ ENTITY_DEF("nLeftarrow", 8653, "\xe2\x87\x8d"),
+ ENTITY_DEF("rbbrk", 10099, "\xe2\x9d\xb3"),
+ ENTITY_DEF("RightTriangle", 8883, "\xe2\x8a\xb3"),
+ ENTITY_DEF("leqq", 8806, "\xe2\x89\xa6"),
+ ENTITY_DEF("Vert", 8214, "\xe2\x80\x96"),
+ ENTITY_DEF("gesl", 8923, "\xe2\x8b\x9b\xef\xb8\x80"),
+ ENTITY_DEF("LeftTeeVector", 10586, "\xe2\xa5\x9a"),
+ ENTITY_DEF("Union", 8899, "\xe2\x8b\x83"),
+ ENTITY_DEF("sc", 8827, "\xe2\x89\xbb"),
+ ENTITY_DEF("ofr", 120108, "\xf0\x9d\x94\xac"),
+ ENTITY_DEF("quatint", 10774, "\xe2\xa8\x96"),
+ ENTITY_DEF("apacir", 10863, "\xe2\xa9\xaf"),
+ ENTITY_DEF("profalar", 9006, "\xe2\x8c\xae"),
+ ENTITY_DEF("subsetneq", 8842, "\xe2\x8a\x8a"),
+ ENTITY_DEF("Vvdash", 8874, "\xe2\x8a\xaa"),
+ ENTITY_DEF("ohbar", 10677, "\xe2\xa6\xb5"),
+ ENTITY_DEF("Gt", 8811, "\xe2\x89\xab"),
+ ENTITY_DEF("exist", 8707, "\xe2\x88\x83"),
+ ENTITY_DEF("gtrapprox", 10886, "\xe2\xaa\x86"),
+ ENTITY_DEF_HEUR("euml", 235, "\xc3\xab"),
+ ENTITY_DEF("Equilibrium", 8652, "\xe2\x87\x8c"),
+ ENTITY_DEF_HEUR("aacute", 225, "\xc3\xa1"),
+ ENTITY_DEF("omid", 10678, "\xe2\xa6\xb6"),
+ ENTITY_DEF("loarr", 8701, "\xe2\x87\xbd"),
+ ENTITY_DEF("SucceedsSlantEqual", 8829, "\xe2\x89\xbd"),
+ ENTITY_DEF("angsph", 8738, "\xe2\x88\xa2"),
+ ENTITY_DEF("nsmid", 8740, "\xe2\x88\xa4"),
+ ENTITY_DEF("lsquor", 8218, "\xe2\x80\x9a"),
+ ENTITY_DEF("cemptyv", 10674, "\xe2\xa6\xb2"),
+ ENTITY_DEF("rAarr", 8667, "\xe2\x87\x9b"),
+ ENTITY_DEF("searr", 8600, "\xe2\x86\x98"),
+ ENTITY_DEF("complexes", 8450, "\xe2\x84\x82"),
+ ENTITY_DEF("UnderParenthesis", 9181, "\xe2\x8f\x9d"),
+ ENTITY_DEF("nparsl", 11005, "\xe2\xab\xbd\xe2\x83\xa5"),
+ ENTITY_DEF("Lacute", 313, "\xc4\xb9"),
+ ENTITY_DEF_HEUR("deg", 176, "\xc2\xb0"),
+ ENTITY_DEF("Racute", 340, "\xc5\x94"),
+ ENTITY_DEF("Verbar", 8214, "\xe2\x80\x96"),
+ ENTITY_DEF("sqcups", 8852, "\xe2\x8a\x94\xef\xb8\x80"),
+ ENTITY_DEF("Hopf", 8461, "\xe2\x84\x8d"),
+ ENTITY_DEF("naturals", 8469, "\xe2\x84\x95"),
+ ENTITY_DEF("Cedilla", 184, "\xc2\xb8"),
+ ENTITY_DEF("exponentiale", 8519, "\xe2\x85\x87"),
+ ENTITY_DEF("vnsup", 8835, "\xe2\x8a\x83\xe2\x83\x92"),
+ ENTITY_DEF("leftrightarrows", 8646, "\xe2\x87\x86"),
+ ENTITY_DEF("Laplacetrf", 8466, "\xe2\x84\x92"),
+ ENTITY_DEF("vartriangleright", 8883, "\xe2\x8a\xb3"),
+ ENTITY_DEF("rtri", 9657, "\xe2\x96\xb9"),
+ ENTITY_DEF("gE", 8807, "\xe2\x89\xa7"),
+ ENTITY_DEF("SmallCircle", 8728, "\xe2\x88\x98"),
+ ENTITY_DEF("diamondsuit", 9830, "\xe2\x99\xa6"),
+ ENTITY_DEF_HEUR("Otilde", 213, "\xc3\x95"),
+ ENTITY_DEF("lneq", 10887, "\xe2\xaa\x87"),
+ ENTITY_DEF("lesdoto", 10881, "\xe2\xaa\x81"),
+ ENTITY_DEF("ltquest", 10875, "\xe2\xa9\xbb"),
+ ENTITY_DEF("thinsp", 8201, "\xe2\x80\x89"),
+ ENTITY_DEF("barwed", 8965, "\xe2\x8c\x85"),
+ ENTITY_DEF("elsdot", 10903, "\xe2\xaa\x97"),
+ ENTITY_DEF("circ", 710, "\xcb\x86"),
+ ENTITY_DEF("ni", 8715, "\xe2\x88\x8b"),
+ ENTITY_DEF("mlcp", 10971, "\xe2\xab\x9b"),
+ ENTITY_DEF("Vdash", 8873, "\xe2\x8a\xa9"),
+ ENTITY_DEF("ShortRightArrow", 8594, "\xe2\x86\x92"),
+ ENTITY_DEF("upharpoonleft", 8639, "\xe2\x86\xbf"),
+ ENTITY_DEF("UnderBracket", 9141, "\xe2\x8e\xb5"),
+ ENTITY_DEF("rAtail", 10524, "\xe2\xa4\x9c"),
+ ENTITY_DEF("iopf", 120154, "\xf0\x9d\x95\x9a"),
+ ENTITY_DEF("longleftarrow", 10229, "\xe2\x9f\xb5"),
+ ENTITY_DEF("Zacute", 377, "\xc5\xb9"),
+ ENTITY_DEF("duhar", 10607, "\xe2\xa5\xaf"),
+ ENTITY_DEF("Mfr", 120080, "\xf0\x9d\x94\x90"),
+ ENTITY_DEF("prnap", 10937, "\xe2\xaa\xb9"),
+ ENTITY_DEF("eqcirc", 8790, "\xe2\x89\x96"),
+ ENTITY_DEF("rarrlp", 8620, "\xe2\x86\xac"),
+ ENTITY_DEF("le", 8804, "\xe2\x89\xa4"),
+ ENTITY_DEF("Oscr", 119978, "\xf0\x9d\x92\xaa"),
+ ENTITY_DEF("langd", 10641, "\xe2\xa6\x91"),
+ ENTITY_DEF("Ucirc", 219, "\xc3\x9b"),
+ ENTITY_DEF("precnapprox", 10937, "\xe2\xaa\xb9"),
+ ENTITY_DEF("succcurlyeq", 8829, "\xe2\x89\xbd"),
+ ENTITY_DEF("Tau", 932, "\xce\xa4"),
+ ENTITY_DEF("larr", 8592, "\xe2\x86\x90"),
+ ENTITY_DEF("neArr", 8663, "\xe2\x87\x97"),
+ ENTITY_DEF("subsim", 10951, "\xe2\xab\x87"),
+ ENTITY_DEF("DScy", 1029, "\xd0\x85"),
+ ENTITY_DEF("preccurlyeq", 8828, "\xe2\x89\xbc"),
+ ENTITY_DEF("NotLessLess", 8810, "\xe2\x89\xaa\xcc\xb8"),
+ ENTITY_DEF("succnapprox", 10938, "\xe2\xaa\xba"),
+ ENTITY_DEF("prcue", 8828, "\xe2\x89\xbc"),
+ ENTITY_DEF("Downarrow", 8659, "\xe2\x87\x93"),
+ ENTITY_DEF("angmsdah", 10671, "\xe2\xa6\xaf"),
+ ENTITY_DEF("Emacr", 274, "\xc4\x92"),
+ ENTITY_DEF("lsh", 8624, "\xe2\x86\xb0"),
+ ENTITY_DEF("simne", 8774, "\xe2\x89\x86"),
+ ENTITY_DEF("Bumpeq", 8782, "\xe2\x89\x8e"),
+ ENTITY_DEF("RightUpTeeVector", 10588, "\xe2\xa5\x9c"),
+ ENTITY_DEF("Sigma", 931, "\xce\xa3"),
+ ENTITY_DEF("nvltrie", 8884, "\xe2\x8a\xb4\xe2\x83\x92"),
+ ENTITY_DEF("lfr", 120105, "\xf0\x9d\x94\xa9"),
+ ENTITY_DEF("emsp13", 8196, "\xe2\x80\x84"),
+ ENTITY_DEF("parsl", 11005, "\xe2\xab\xbd"),
+ ENTITY_DEF_HEUR("ucirc", 251, "\xc3\xbb"),
+ ENTITY_DEF("gsiml", 10896, "\xe2\xaa\x90"),
+ ENTITY_DEF("xsqcup", 10758, "\xe2\xa8\x86"),
+ ENTITY_DEF("Omicron", 927, "\xce\x9f"),
+ ENTITY_DEF("gsime", 10894, "\xe2\xaa\x8e"),
+ ENTITY_DEF("circlearrowleft", 8634, "\xe2\x86\xba"),
+ ENTITY_DEF("sqsupe", 8850, "\xe2\x8a\x92"),
+ ENTITY_DEF("supE", 10950, "\xe2\xab\x86"),
+ ENTITY_DEF("dlcrop", 8973, "\xe2\x8c\x8d"),
+ ENTITY_DEF("RightDownTeeVector", 10589, "\xe2\xa5\x9d"),
+ ENTITY_DEF("Colone", 10868, "\xe2\xa9\xb4"),
+ ENTITY_DEF("awconint", 8755, "\xe2\x88\xb3"),
+ ENTITY_DEF("smte", 10924, "\xe2\xaa\xac"),
+ ENTITY_DEF("lEg", 10891, "\xe2\xaa\x8b"),
+ ENTITY_DEF("circledast", 8859, "\xe2\x8a\x9b"),
+ ENTITY_DEF("ecolon", 8789, "\xe2\x89\x95"),
+ ENTITY_DEF("rect", 9645, "\xe2\x96\xad"),
+ ENTITY_DEF("Equal", 10869, "\xe2\xa9\xb5"),
+ ENTITY_DEF("nwnear", 10535, "\xe2\xa4\xa7"),
+ ENTITY_DEF("capdot", 10816, "\xe2\xa9\x80"),
+ ENTITY_DEF("straightphi", 981, "\xcf\x95"),
+ ENTITY_DEF("forkv", 10969, "\xe2\xab\x99"),
+ ENTITY_DEF("ZHcy", 1046, "\xd0\x96"),
+ ENTITY_DEF("Element", 8712, "\xe2\x88\x88"),
+ ENTITY_DEF("rthree", 8908, "\xe2\x8b\x8c"),
+ ENTITY_DEF("vzigzag", 10650, "\xe2\xa6\x9a"),
+ ENTITY_DEF("hybull", 8259, "\xe2\x81\x83"),
+ ENTITY_DEF("intprod", 10812, "\xe2\xa8\xbc"),
+ ENTITY_DEF("HumpEqual", 8783, "\xe2\x89\x8f"),
+ ENTITY_DEF("bigsqcup", 10758, "\xe2\xa8\x86"),
+ ENTITY_DEF("mp", 8723, "\xe2\x88\x93"),
+ ENTITY_DEF("lescc", 10920, "\xe2\xaa\xa8"),
+ ENTITY_DEF("NotPrecedes", 8832, "\xe2\x8a\x80"),
+ ENTITY_DEF("wedge", 8743, "\xe2\x88\xa7"),
+ ENTITY_DEF("Supset", 8913, "\xe2\x8b\x91"),
+ ENTITY_DEF("pm", 177, "\xc2\xb1"),
+ ENTITY_DEF("kfr", 120104, "\xf0\x9d\x94\xa8"),
+ ENTITY_DEF("ufisht", 10622, "\xe2\xa5\xbe"),
+ ENTITY_DEF("ecaron", 283, "\xc4\x9b"),
+ ENTITY_DEF("chcy", 1095, "\xd1\x87"),
+ ENTITY_DEF("Esim", 10867, "\xe2\xa9\xb3"),
+ ENTITY_DEF("fltns", 9649, "\xe2\x96\xb1"),
+ ENTITY_DEF("nsce", 10928, "\xe2\xaa\xb0\xcc\xb8"),
+ ENTITY_DEF("hookrightarrow", 8618, "\xe2\x86\xaa"),
+ ENTITY_DEF("semi", 59, "\x3b"),
+ ENTITY_DEF("ges", 10878, "\xe2\xa9\xbe"),
+ ENTITY_DEF("approxeq", 8778, "\xe2\x89\x8a"),
+ ENTITY_DEF("rarrsim", 10612, "\xe2\xa5\xb4"),
+ ENTITY_DEF("boxhD", 9573, "\xe2\x95\xa5"),
+ ENTITY_DEF("varpi", 982, "\xcf\x96"),
+ ENTITY_DEF("larrb", 8676, "\xe2\x87\xa4"),
+ ENTITY_DEF("copf", 120148, "\xf0\x9d\x95\x94"),
+ ENTITY_DEF("Dopf", 120123, "\xf0\x9d\x94\xbb"),
+ ENTITY_DEF("LeftVector", 8636, "\xe2\x86\xbc"),
+ ENTITY_DEF("iff", 8660, "\xe2\x87\x94"),
+ ENTITY_DEF("lnap", 10889, "\xe2\xaa\x89"),
+ ENTITY_DEF("NotGreaterFullEqual", 8807, "\xe2\x89\xa7\xcc\xb8"),
+ ENTITY_DEF("varrho", 1009, "\xcf\xb1"),
+ ENTITY_DEF("NotSucceeds", 8833, "\xe2\x8a\x81"),
+ ENTITY_DEF("ltrPar", 10646, "\xe2\xa6\x96"),
+ ENTITY_DEF("nlE", 8806, "\xe2\x89\xa6\xcc\xb8"),
+ ENTITY_DEF("Zfr", 8488, "\xe2\x84\xa8"),
+ ENTITY_DEF("LeftArrowBar", 8676, "\xe2\x87\xa4"),
+ ENTITY_DEF("boxplus", 8862, "\xe2\x8a\x9e"),
+ ENTITY_DEF("sqsube", 8849, "\xe2\x8a\x91"),
+ ENTITY_DEF("Re", 8476, "\xe2\x84\x9c"),
+ ENTITY_DEF("Wfr", 120090, "\xf0\x9d\x94\x9a"),
+ ENTITY_DEF("epsi", 949, "\xce\xb5"),
+ ENTITY_DEF("oacute", 243, "\xc3\xb3"),
+ ENTITY_DEF("bdquo", 8222, "\xe2\x80\x9e"),
+ ENTITY_DEF("wscr", 120012, "\xf0\x9d\x93\x8c"),
+ ENTITY_DEF("bullet", 8226, "\xe2\x80\xa2"),
+ ENTITY_DEF("frown", 8994, "\xe2\x8c\xa2"),
+ ENTITY_DEF("siml", 10909, "\xe2\xaa\x9d"),
+ ENTITY_DEF("Rarr", 8608, "\xe2\x86\xa0"),
+ ENTITY_DEF("Scaron", 352, "\xc5\xa0"),
+ ENTITY_DEF("gtreqqless", 10892, "\xe2\xaa\x8c"),
+ ENTITY_DEF("Larr", 8606, "\xe2\x86\x9e"),
+ ENTITY_DEF("notniva", 8716, "\xe2\x88\x8c"),
+ ENTITY_DEF("gg", 8811, "\xe2\x89\xab"),
+ ENTITY_DEF("phmmat", 8499, "\xe2\x84\xb3"),
+ ENTITY_DEF("boxVL", 9571, "\xe2\x95\xa3"),
+ ENTITY_DEF("sigmav", 962, "\xcf\x82"),
+ ENTITY_DEF("order", 8500, "\xe2\x84\xb4"),
+ ENTITY_DEF("subsup", 10963, "\xe2\xab\x93"),
+ ENTITY_DEF("afr", 120094, "\xf0\x9d\x94\x9e"),
+ ENTITY_DEF("lbrace", 123, "\x7b"),
+ ENTITY_DEF("urcorn", 8989, "\xe2\x8c\x9d"),
+ ENTITY_DEF("Im", 8465, "\xe2\x84\x91"),
+ ENTITY_DEF("CounterClockwiseContourIntegral", 8755, "\xe2\x88\xb3"),
+ ENTITY_DEF("lne", 10887, "\xe2\xaa\x87"),
+ ENTITY_DEF("chi", 967, "\xcf\x87"),
+ ENTITY_DEF("cudarrl", 10552, "\xe2\xa4\xb8"),
+ ENTITY_DEF("ang", 8736, "\xe2\x88\xa0"),
+ ENTITY_DEF("isindot", 8949, "\xe2\x8b\xb5"),
+ ENTITY_DEF("Lfr", 120079, "\xf0\x9d\x94\x8f"),
+ ENTITY_DEF("Rsh", 8625, "\xe2\x86\xb1"),
+ ENTITY_DEF("Ocy", 1054, "\xd0\x9e"),
+ ENTITY_DEF("nvrArr", 10499, "\xe2\xa4\x83"),
+ ENTITY_DEF("otimes", 8855, "\xe2\x8a\x97"),
+ ENTITY_DEF("eqslantgtr", 10902, "\xe2\xaa\x96"),
+ ENTITY_DEF("Rfr", 8476, "\xe2\x84\x9c"),
+ ENTITY_DEF("blacktriangleleft", 9666, "\xe2\x97\x82"),
+ ENTITY_DEF("Lsh", 8624, "\xe2\x86\xb0"),
+ ENTITY_DEF("boxvr", 9500, "\xe2\x94\x9c"),
+ ENTITY_DEF("scedil", 351, "\xc5\x9f"),
+ ENTITY_DEF_HEUR("iuml", 239, "\xc3\xaf"),
+ ENTITY_DEF("NJcy", 1034, "\xd0\x8a"),
+ ENTITY_DEF("Dagger", 8225, "\xe2\x80\xa1"),
+ ENTITY_DEF("rarrap", 10613, "\xe2\xa5\xb5"),
+ ENTITY_DEF("udblac", 369, "\xc5\xb1"),
+ ENTITY_DEF("Sopf", 120138, "\xf0\x9d\x95\x8a"),
+ ENTITY_DEF("scnsim", 8937, "\xe2\x8b\xa9"),
+ ENTITY_DEF("hbar", 8463, "\xe2\x84\x8f"),
+ ENTITY_DEF("frac15", 8533, "\xe2\x85\x95"),
+ ENTITY_DEF_HEUR("sup3", 179, "\xc2\xb3"),
+ ENTITY_DEF("NegativeThickSpace", 8203, "\xe2\x80\x8b"),
+ ENTITY_DEF("npr", 8832, "\xe2\x8a\x80"),
+ ENTITY_DEF("doteq", 8784, "\xe2\x89\x90"),
+ ENTITY_DEF("subrarr", 10617, "\xe2\xa5\xb9"),
+ ENTITY_DEF("SquareSubset", 8847, "\xe2\x8a\x8f"),
+ ENTITY_DEF("vprop", 8733, "\xe2\x88\x9d"),
+ ENTITY_DEF("OpenCurlyQuote", 8216, "\xe2\x80\x98"),
+ ENTITY_DEF("supseteq", 8839, "\xe2\x8a\x87"),
+ ENTITY_DEF("nRightarrow", 8655, "\xe2\x87\x8f"),
+ ENTITY_DEF("Longleftarrow", 10232, "\xe2\x9f\xb8"),
+ ENTITY_DEF("lsquo", 8216, "\xe2\x80\x98"),
+ ENTITY_DEF("hstrok", 295, "\xc4\xa7"),
+ ENTITY_DEF("NotTilde", 8769, "\xe2\x89\x81"),
+ ENTITY_DEF("ogt", 10689, "\xe2\xa7\x81"),
+ ENTITY_DEF("block", 9608, "\xe2\x96\x88"),
+ ENTITY_DEF("minusd", 8760, "\xe2\x88\xb8"),
+ ENTITY_DEF("esdot", 8784, "\xe2\x89\x90"),
+ ENTITY_DEF("nsim", 8769, "\xe2\x89\x81"),
+ ENTITY_DEF("scsim", 8831, "\xe2\x89\xbf"),
+ ENTITY_DEF("boxVl", 9570, "\xe2\x95\xa2"),
+ ENTITY_DEF("ltimes", 8905, "\xe2\x8b\x89"),
+ ENTITY_DEF("thkap", 8776, "\xe2\x89\x88"),
+ ENTITY_DEF("vnsub", 8834, "\xe2\x8a\x82\xe2\x83\x92"),
+ ENTITY_DEF("thetasym", 977, "\xcf\x91"),
+ ENTITY_DEF("eopf", 120150, "\xf0\x9d\x95\x96"),
+ ENTITY_DEF("image", 8465, "\xe2\x84\x91"),
+ ENTITY_DEF("doteqdot", 8785, "\xe2\x89\x91"),
+ ENTITY_DEF("Udblac", 368, "\xc5\xb0"),
+ ENTITY_DEF("gnsim", 8935, "\xe2\x8b\xa7"),
+ ENTITY_DEF("yicy", 1111, "\xd1\x97"),
+ ENTITY_DEF("vopf", 120167, "\xf0\x9d\x95\xa7"),
+ ENTITY_DEF("DDotrahd", 10513, "\xe2\xa4\x91"),
+ ENTITY_DEF("Iota", 921, "\xce\x99"),
+ ENTITY_DEF("GJcy", 1027, "\xd0\x83"),
+ ENTITY_DEF("rightthreetimes", 8908, "\xe2\x8b\x8c"),
+ ENTITY_DEF("nrtri", 8939, "\xe2\x8b\xab"),
+ ENTITY_DEF("TildeFullEqual", 8773, "\xe2\x89\x85"),
+ ENTITY_DEF("Dcaron", 270, "\xc4\x8e"),
+ ENTITY_DEF("ccaron", 269, "\xc4\x8d"),
+ ENTITY_DEF("lacute", 314, "\xc4\xba"),
+ ENTITY_DEF("VerticalBar", 8739, "\xe2\x88\xa3"),
+ ENTITY_DEF("Igrave", 204, "\xc3\x8c"),
+ ENTITY_DEF("boxH", 9552, "\xe2\x95\x90"),
+ ENTITY_DEF("Pfr", 120083, "\xf0\x9d\x94\x93"),
+ ENTITY_DEF("equals", 61, "\x3d"),
+ ENTITY_DEF("rbrack", 93, "\x5d"),
+ ENTITY_DEF("OverParenthesis", 9180, "\xe2\x8f\x9c"),
+ ENTITY_DEF("in", 8712, "\xe2\x88\x88"),
+ ENTITY_DEF("llcorner", 8990, "\xe2\x8c\x9e"),
+ ENTITY_DEF("mcomma", 10793, "\xe2\xa8\xa9"),
+ ENTITY_DEF("NotGreater", 8815, "\xe2\x89\xaf"),
+ ENTITY_DEF("midcir", 10992, "\xe2\xab\xb0"),
+ ENTITY_DEF("Edot", 278, "\xc4\x96"),
+ ENTITY_DEF("oplus", 8853, "\xe2\x8a\x95"),
+ ENTITY_DEF("geqq", 8807, "\xe2\x89\xa7"),
+ ENTITY_DEF("curvearrowleft", 8630, "\xe2\x86\xb6"),
+ ENTITY_DEF("Poincareplane", 8460, "\xe2\x84\x8c"),
+ ENTITY_DEF("yscr", 120014, "\xf0\x9d\x93\x8e"),
+ ENTITY_DEF("ccaps", 10829, "\xe2\xa9\x8d"),
+ ENTITY_DEF("rpargt", 10644, "\xe2\xa6\x94"),
+ ENTITY_DEF("topfork", 10970, "\xe2\xab\x9a"),
+ ENTITY_DEF("Gamma", 915, "\xce\x93"),
+ ENTITY_DEF("umacr", 363, "\xc5\xab"),
+ ENTITY_DEF("frac13", 8531, "\xe2\x85\x93"),
+ ENTITY_DEF("cirfnint", 10768, "\xe2\xa8\x90"),
+ ENTITY_DEF("xlArr", 10232, "\xe2\x9f\xb8"),
+ ENTITY_DEF("digamma", 989, "\xcf\x9d"),
+ ENTITY_DEF("Hat", 94, "\x5e"),
+ ENTITY_DEF("lates", 10925, "\xe2\xaa\xad\xef\xb8\x80"),
+ ENTITY_DEF("lgE", 10897, "\xe2\xaa\x91"),
+ ENTITY_DEF("commat", 64, "\x40"),
+ ENTITY_DEF("NotPrecedesSlantEqual", 8928, "\xe2\x8b\xa0"),
+ ENTITY_DEF("phone", 9742, "\xe2\x98\x8e"),
+ ENTITY_DEF("Ecirc", 202, "\xc3\x8a"),
+ ENTITY_DEF_HEUR("lt", 60, "\x3c"),
+ ENTITY_DEF("intcal", 8890, "\xe2\x8a\xba"),
+ ENTITY_DEF("xdtri", 9661, "\xe2\x96\xbd"),
+ ENTITY_DEF("Abreve", 258, "\xc4\x82"),
+ ENTITY_DEF("gopf", 120152, "\xf0\x9d\x95\x98"),
+ ENTITY_DEF("Xopf", 120143, "\xf0\x9d\x95\x8f"),
+ ENTITY_DEF("Iacute", 205, "\xc3\x8d"),
+ ENTITY_DEF("Aopf", 120120, "\xf0\x9d\x94\xb8"),
+ ENTITY_DEF("gbreve", 287, "\xc4\x9f"),
+ ENTITY_DEF("nleq", 8816, "\xe2\x89\xb0"),
+ ENTITY_DEF("xopf", 120169, "\xf0\x9d\x95\xa9"),
+ ENTITY_DEF("SquareSupersetEqual", 8850, "\xe2\x8a\x92"),
+ ENTITY_DEF("NotLessTilde", 8820, "\xe2\x89\xb4"),
+ ENTITY_DEF("SubsetEqual", 8838, "\xe2\x8a\x86"),
+ ENTITY_DEF("Sc", 10940, "\xe2\xaa\xbc"),
+ ENTITY_DEF("sdote", 10854, "\xe2\xa9\xa6"),
+ ENTITY_DEF("loplus", 10797, "\xe2\xa8\xad"),
+ ENTITY_DEF("zfr", 120119, "\xf0\x9d\x94\xb7"),
+ ENTITY_DEF("subseteqq", 10949, "\xe2\xab\x85"),
+ ENTITY_DEF("Vdashl", 10982, "\xe2\xab\xa6"),
+ ENTITY_DEF("integers", 8484, "\xe2\x84\xa4"),
+ ENTITY_DEF("Umacr", 362, "\xc5\xaa"),
+ ENTITY_DEF("dopf", 120149, "\xf0\x9d\x95\x95"),
+ ENTITY_DEF("RightDownVectorBar", 10581, "\xe2\xa5\x95"),
+ ENTITY_DEF("angmsdaf", 10669, "\xe2\xa6\xad"),
+ ENTITY_DEF("Jfr", 120077, "\xf0\x9d\x94\x8d"),
+ ENTITY_DEF("bernou", 8492, "\xe2\x84\xac"),
+ ENTITY_DEF("lceil", 8968, "\xe2\x8c\x88"),
+ ENTITY_DEF("nvsim", 8764, "\xe2\x88\xbc\xe2\x83\x92"),
+ ENTITY_DEF("NotSucceedsSlantEqual", 8929, "\xe2\x8b\xa1"),
+ ENTITY_DEF("hearts", 9829, "\xe2\x99\xa5"),
+ ENTITY_DEF("vee", 8744, "\xe2\x88\xa8"),
+ ENTITY_DEF("LJcy", 1033, "\xd0\x89"),
+ ENTITY_DEF("nlt", 8814, "\xe2\x89\xae"),
+ ENTITY_DEF("because", 8757, "\xe2\x88\xb5"),
+ ENTITY_DEF("hairsp", 8202, "\xe2\x80\x8a"),
+ ENTITY_DEF("comma", 44, "\x2c"),
+ ENTITY_DEF("iecy", 1077, "\xd0\xb5"),
+ ENTITY_DEF("npre", 10927, "\xe2\xaa\xaf\xcc\xb8"),
+ ENTITY_DEF("NotSquareSubset", 8847, "\xe2\x8a\x8f\xcc\xb8"),
+ ENTITY_DEF("mscr", 120002, "\xf0\x9d\x93\x82"),
+ ENTITY_DEF("jopf", 120155, "\xf0\x9d\x95\x9b"),
+ ENTITY_DEF("bumpE", 10926, "\xe2\xaa\xae"),
+ ENTITY_DEF("thicksim", 8764, "\xe2\x88\xbc"),
+ ENTITY_DEF("Nfr", 120081, "\xf0\x9d\x94\x91"),
+ ENTITY_DEF("yucy", 1102, "\xd1\x8e"),
+ ENTITY_DEF("notinvc", 8950, "\xe2\x8b\xb6"),
+ ENTITY_DEF("lstrok", 322, "\xc5\x82"),
+ ENTITY_DEF("robrk", 10215, "\xe2\x9f\xa7"),
+ ENTITY_DEF("LeftTriangleBar", 10703, "\xe2\xa7\x8f"),
+ ENTITY_DEF("hksearow", 10533, "\xe2\xa4\xa5"),
+ ENTITY_DEF("bigcap", 8898, "\xe2\x8b\x82"),
+ ENTITY_DEF("udhar", 10606, "\xe2\xa5\xae"),
+ ENTITY_DEF("Yscr", 119988, "\xf0\x9d\x92\xb4"),
+ ENTITY_DEF("smeparsl", 10724, "\xe2\xa7\xa4"),
+ ENTITY_DEF("NotLess", 8814, "\xe2\x89\xae"),
+ ENTITY_DEF("dcaron", 271, "\xc4\x8f"),
+ ENTITY_DEF("ange", 10660, "\xe2\xa6\xa4"),
+ ENTITY_DEF("dHar", 10597, "\xe2\xa5\xa5"),
+ ENTITY_DEF("UpperRightArrow", 8599, "\xe2\x86\x97"),
+ ENTITY_DEF("trpezium", 9186, "\xe2\x8f\xa2"),
+ ENTITY_DEF("boxminus", 8863, "\xe2\x8a\x9f"),
+ ENTITY_DEF("notni", 8716, "\xe2\x88\x8c"),
+ ENTITY_DEF("dtrif", 9662, "\xe2\x96\xbe"),
+ ENTITY_DEF("nhArr", 8654, "\xe2\x87\x8e"),
+ ENTITY_DEF("larrpl", 10553, "\xe2\xa4\xb9"),
+ ENTITY_DEF("simeq", 8771, "\xe2\x89\x83"),
+ ENTITY_DEF("geqslant", 10878, "\xe2\xa9\xbe"),
+ ENTITY_DEF("RightUpVectorBar", 10580, "\xe2\xa5\x94"),
+ ENTITY_DEF("nsc", 8833, "\xe2\x8a\x81"),
+ ENTITY_DEF("div", 247, "\xc3\xb7"),
+ ENTITY_DEF("orslope", 10839, "\xe2\xa9\x97"),
+ ENTITY_DEF("lparlt", 10643, "\xe2\xa6\x93"),
+ ENTITY_DEF("trie", 8796, "\xe2\x89\x9c"),
+ ENTITY_DEF("cirmid", 10991, "\xe2\xab\xaf"),
+ ENTITY_DEF("wp", 8472, "\xe2\x84\x98"),
+ ENTITY_DEF("dagger", 8224, "\xe2\x80\xa0"),
+ ENTITY_DEF("utri", 9653, "\xe2\x96\xb5"),
+ ENTITY_DEF("supnE", 10956, "\xe2\xab\x8c"),
+ ENTITY_DEF("eg", 10906, "\xe2\xaa\x9a"),
+ ENTITY_DEF("LeftDownVector", 8643, "\xe2\x87\x83"),
+ ENTITY_DEF("NotLessEqual", 8816, "\xe2\x89\xb0"),
+ ENTITY_DEF("Bopf", 120121, "\xf0\x9d\x94\xb9"),
+ ENTITY_DEF("LongLeftRightArrow", 10231, "\xe2\x9f\xb7"),
+ ENTITY_DEF("Gfr", 120074, "\xf0\x9d\x94\x8a"),
+ ENTITY_DEF("sqsubseteq", 8849, "\xe2\x8a\x91"),
+ ENTITY_DEF_HEUR("ograve", 242, "\xc3\xb2"),
+ ENTITY_DEF("larrhk", 8617, "\xe2\x86\xa9"),
+ ENTITY_DEF("sigma", 963, "\xcf\x83"),
+ ENTITY_DEF("NotSquareSupersetEqual", 8931, "\xe2\x8b\xa3"),
+ ENTITY_DEF("gvnE", 8809, "\xe2\x89\xa9\xef\xb8\x80"),
+ ENTITY_DEF("timesbar", 10801, "\xe2\xa8\xb1"),
+ ENTITY_DEF("Iukcy", 1030, "\xd0\x86"),
+ ENTITY_DEF("bscr", 119991, "\xf0\x9d\x92\xb7"),
+ ENTITY_DEF("Exists", 8707, "\xe2\x88\x83"),
+ ENTITY_DEF("tscr", 120009, "\xf0\x9d\x93\x89"),
+ ENTITY_DEF("tcy", 1090, "\xd1\x82"),
+ ENTITY_DEF("nwarr", 8598, "\xe2\x86\x96"),
+ ENTITY_DEF("hoarr", 8703, "\xe2\x87\xbf"),
+ ENTITY_DEF("lnapprox", 10889, "\xe2\xaa\x89"),
+ ENTITY_DEF("nu", 957, "\xce\xbd"),
+ ENTITY_DEF("bcy", 1073, "\xd0\xb1"),
+ ENTITY_DEF("ndash", 8211, "\xe2\x80\x93"),
+ ENTITY_DEF("smt", 10922, "\xe2\xaa\xaa"),
+ ENTITY_DEF("scaron", 353, "\xc5\xa1"),
+ ENTITY_DEF("IOcy", 1025, "\xd0\x81"),
+ ENTITY_DEF("Ifr", 8465, "\xe2\x84\x91"),
+ ENTITY_DEF("cularrp", 10557, "\xe2\xa4\xbd"),
+ ENTITY_DEF("lvertneqq", 8808, "\xe2\x89\xa8\xef\xb8\x80"),
+ ENTITY_DEF("nlarr", 8602, "\xe2\x86\x9a"),
+ ENTITY_DEF("colon", 58, "\x3a"),
+ ENTITY_DEF("ddotseq", 10871, "\xe2\xa9\xb7"),
+ ENTITY_DEF("zacute", 378, "\xc5\xba"),
+ ENTITY_DEF("DoubleVerticalBar", 8741, "\xe2\x88\xa5"),
+ ENTITY_DEF("larrfs", 10525, "\xe2\xa4\x9d"),
+ ENTITY_DEF("NotExists", 8708, "\xe2\x88\x84"),
+ ENTITY_DEF("geq", 8805, "\xe2\x89\xa5"),
+ ENTITY_DEF("Ffr", 120073, "\xf0\x9d\x94\x89"),
+ ENTITY_DEF_HEUR("divide", 247, "\xc3\xb7"),
+ ENTITY_DEF("blank", 9251, "\xe2\x90\xa3"),
+ ENTITY_DEF("IEcy", 1045, "\xd0\x95"),
+ ENTITY_DEF_HEUR("ordm", 186, "\xc2\xba"),
+ ENTITY_DEF("fopf", 120151, "\xf0\x9d\x95\x97"),
+ ENTITY_DEF("ecir", 8790, "\xe2\x89\x96"),
+ ENTITY_DEF("complement", 8705, "\xe2\x88\x81"),
+ ENTITY_DEF("top", 8868, "\xe2\x8a\xa4"),
+ ENTITY_DEF("DoubleContourIntegral", 8751, "\xe2\x88\xaf"),
+ ENTITY_DEF("nisd", 8954, "\xe2\x8b\xba"),
+ ENTITY_DEF("bcong", 8780, "\xe2\x89\x8c"),
+ ENTITY_DEF("plusdu", 10789, "\xe2\xa8\xa5"),
+ ENTITY_DEF("TildeTilde", 8776, "\xe2\x89\x88"),
+ ENTITY_DEF("lnE", 8808, "\xe2\x89\xa8"),
+ ENTITY_DEF("DoubleLongRightArrow", 10233, "\xe2\x9f\xb9"),
+ ENTITY_DEF("nsubseteqq", 10949, "\xe2\xab\x85\xcc\xb8"),
+ ENTITY_DEF("DownTeeArrow", 8615, "\xe2\x86\xa7"),
+ ENTITY_DEF("Cscr", 119966, "\xf0\x9d\x92\x9e"),
+ ENTITY_DEF("NegativeVeryThinSpace", 8203, "\xe2\x80\x8b"),
+ ENTITY_DEF("emsp", 8195, "\xe2\x80\x83"),
+ ENTITY_DEF("vartriangleleft", 8882, "\xe2\x8a\xb2"),
+ ENTITY_DEF("ropar", 10630, "\xe2\xa6\x86"),
+ ENTITY_DEF("checkmark", 10003, "\xe2\x9c\x93"),
+ ENTITY_DEF("Ycy", 1067, "\xd0\xab"),
+ ENTITY_DEF("supset", 8835, "\xe2\x8a\x83"),
+ ENTITY_DEF("gneqq", 8809, "\xe2\x89\xa9"),
+ ENTITY_DEF("Lstrok", 321, "\xc5\x81"),
+ ENTITY_DEF_HEUR("AMP", 38, "\x26"),
+ ENTITY_DEF("acE", 8766, "\xe2\x88\xbe\xcc\xb3"),
+ ENTITY_DEF("sqsupseteq", 8850, "\xe2\x8a\x92"),
+ ENTITY_DEF("nle", 8816, "\xe2\x89\xb0"),
+ ENTITY_DEF("nesear", 10536, "\xe2\xa4\xa8"),
+ ENTITY_DEF("LeftDownVectorBar", 10585, "\xe2\xa5\x99"),
+ ENTITY_DEF("Integral", 8747, "\xe2\x88\xab"),
+ ENTITY_DEF("Beta", 914, "\xce\x92"),
+ ENTITY_DEF("nvdash", 8876, "\xe2\x8a\xac"),
+ ENTITY_DEF("nges", 10878, "\xe2\xa9\xbe\xcc\xb8"),
+ ENTITY_DEF("demptyv", 10673, "\xe2\xa6\xb1"),
+ ENTITY_DEF("eta", 951, "\xce\xb7"),
+ ENTITY_DEF("GreaterSlantEqual", 10878, "\xe2\xa9\xbe"),
+ ENTITY_DEF_HEUR("ccedil", 231, "\xc3\xa7"),
+ ENTITY_DEF("pfr", 120109, "\xf0\x9d\x94\xad"),
+ ENTITY_DEF("bbrktbrk", 9142, "\xe2\x8e\xb6"),
+ ENTITY_DEF("mcy", 1084, "\xd0\xbc"),
+ ENTITY_DEF("Not", 10988, "\xe2\xab\xac"),
+ ENTITY_DEF("qscr", 120006, "\xf0\x9d\x93\x86"),
+ ENTITY_DEF("zwj", 8205, "\xe2\x80\x8d"),
+ ENTITY_DEF("ntrianglerighteq", 8941, "\xe2\x8b\xad"),
+ ENTITY_DEF("permil", 8240, "\xe2\x80\xb0"),
+ ENTITY_DEF("squarf", 9642, "\xe2\x96\xaa"),
+ ENTITY_DEF("apos", 39, "\x27"),
+ ENTITY_DEF("lrm", 8206, "\xe2\x80\x8e"),
+ ENTITY_DEF("male", 9794, "\xe2\x99\x82"),
+ ENTITY_DEF_HEUR("agrave", 224, "\xc3\xa0"),
+ ENTITY_DEF("Lt", 8810, "\xe2\x89\xaa"),
+ ENTITY_DEF("capand", 10820, "\xe2\xa9\x84"),
+ ENTITY_DEF_HEUR("aring", 229, "\xc3\xa5"),
+ ENTITY_DEF("Jukcy", 1028, "\xd0\x84"),
+ ENTITY_DEF("bumpe", 8783, "\xe2\x89\x8f"),
+ ENTITY_DEF("dd", 8518, "\xe2\x85\x86"),
+ ENTITY_DEF("tscy", 1094, "\xd1\x86"),
+ ENTITY_DEF("oS", 9416, "\xe2\x93\x88"),
+ ENTITY_DEF("succeq", 10928, "\xe2\xaa\xb0"),
+ ENTITY_DEF("xharr", 10231, "\xe2\x9f\xb7"),
+ ENTITY_DEF("pluse", 10866, "\xe2\xa9\xb2"),
+ ENTITY_DEF("rfisht", 10621, "\xe2\xa5\xbd"),
+ ENTITY_DEF("HorizontalLine", 9472, "\xe2\x94\x80"),
+ ENTITY_DEF("DiacriticalAcute", 180, "\xc2\xb4"),
+ ENTITY_DEF("hfr", 120101, "\xf0\x9d\x94\xa5"),
+ ENTITY_DEF("preceq", 10927, "\xe2\xaa\xaf"),
+ ENTITY_DEF("rationals", 8474, "\xe2\x84\x9a"),
+ ENTITY_DEF_HEUR("Auml", 196, "\xc3\x84"),
+ ENTITY_DEF("LeftRightArrow", 8596, "\xe2\x86\x94"),
+ ENTITY_DEF("blacktriangleright", 9656, "\xe2\x96\xb8"),
+ ENTITY_DEF("dharr", 8642, "\xe2\x87\x82"),
+ ENTITY_DEF("isin", 8712, "\xe2\x88\x88"),
+ ENTITY_DEF("ldrushar", 10571, "\xe2\xa5\x8b"),
+ ENTITY_DEF("squ", 9633, "\xe2\x96\xa1"),
+ ENTITY_DEF("rbrksld", 10638, "\xe2\xa6\x8e"),
+ ENTITY_DEF("bigwedge", 8896, "\xe2\x8b\x80"),
+ ENTITY_DEF("swArr", 8665, "\xe2\x87\x99"),
+ ENTITY_DEF("IJlig", 306, "\xc4\xb2"),
+ ENTITY_DEF("harr", 8596, "\xe2\x86\x94"),
+ ENTITY_DEF("range", 10661, "\xe2\xa6\xa5"),
+ ENTITY_DEF("urtri", 9721, "\xe2\x97\xb9"),
+ ENTITY_DEF("NotVerticalBar", 8740, "\xe2\x88\xa4"),
+ ENTITY_DEF("ic", 8291, "\xe2\x81\xa3"),
+ ENTITY_DEF("solbar", 9023, "\xe2\x8c\xbf"),
+ ENTITY_DEF("approx", 8776, "\xe2\x89\x88"),
+ ENTITY_DEF("SquareSuperset", 8848, "\xe2\x8a\x90"),
+ ENTITY_DEF("numsp", 8199, "\xe2\x80\x87"),
+ ENTITY_DEF("nLt", 8810, "\xe2\x89\xaa\xe2\x83\x92"),
+ ENTITY_DEF("tilde", 732, "\xcb\x9c"),
+ ENTITY_DEF("rlarr", 8644, "\xe2\x87\x84"),
+ ENTITY_DEF("langle", 10216, "\xe2\x9f\xa8"),
+ ENTITY_DEF("nleqslant", 10877, "\xe2\xa9\xbd\xcc\xb8"),
+ ENTITY_DEF("Nacute", 323, "\xc5\x83"),
+ ENTITY_DEF("NotLeftTriangle", 8938, "\xe2\x8b\xaa"),
+ ENTITY_DEF("sopf", 120164, "\xf0\x9d\x95\xa4"),
+ ENTITY_DEF("xmap", 10236, "\xe2\x9f\xbc"),
+ ENTITY_DEF("supne", 8843, "\xe2\x8a\x8b"),
+ ENTITY_DEF("Int", 8748, "\xe2\x88\xac"),
+ ENTITY_DEF("nsupseteqq", 10950, "\xe2\xab\x86\xcc\xb8"),
+ ENTITY_DEF("circlearrowright", 8635, "\xe2\x86\xbb"),
+ ENTITY_DEF("NotCongruent", 8802, "\xe2\x89\xa2"),
+ ENTITY_DEF("Scedil", 350, "\xc5\x9e"),
+ ENTITY_DEF_HEUR("raquo", 187, "\xc2\xbb"),
+ ENTITY_DEF("ycy", 1099, "\xd1\x8b"),
+ ENTITY_DEF("notinvb", 8951, "\xe2\x8b\xb7"),
+ ENTITY_DEF("andv", 10842, "\xe2\xa9\x9a"),
+ ENTITY_DEF("nap", 8777, "\xe2\x89\x89"),
+ ENTITY_DEF("shcy", 1096, "\xd1\x88"),
+ ENTITY_DEF("ssetmn", 8726, "\xe2\x88\x96"),
+ ENTITY_DEF("downarrow", 8595, "\xe2\x86\x93"),
+ ENTITY_DEF("gesdotol", 10884, "\xe2\xaa\x84"),
+ ENTITY_DEF("Congruent", 8801, "\xe2\x89\xa1"),
+ ENTITY_DEF_HEUR("pound", 163, "\xc2\xa3"),
+ ENTITY_DEF("ZeroWidthSpace", 8203, "\xe2\x80\x8b"),
+ ENTITY_DEF("rdca", 10551, "\xe2\xa4\xb7"),
+ ENTITY_DEF("rmoust", 9137, "\xe2\x8e\xb1"),
+ ENTITY_DEF("zcy", 1079, "\xd0\xb7"),
+ ENTITY_DEF("Square", 9633, "\xe2\x96\xa1"),
+ ENTITY_DEF("subE", 10949, "\xe2\xab\x85"),
+ ENTITY_DEF("infintie", 10717, "\xe2\xa7\x9d"),
+ ENTITY_DEF("Cayleys", 8493, "\xe2\x84\xad"),
+ ENTITY_DEF("lsaquo", 8249, "\xe2\x80\xb9"),
+ ENTITY_DEF("realpart", 8476, "\xe2\x84\x9c"),
+ ENTITY_DEF("nprec", 8832, "\xe2\x8a\x80"),
+ ENTITY_DEF("RightTriangleBar", 10704, "\xe2\xa7\x90"),
+ ENTITY_DEF("Kopf", 120130, "\xf0\x9d\x95\x82"),
+ ENTITY_DEF("Ubreve", 364, "\xc5\xac"),
+ ENTITY_DEF("Uopf", 120140, "\xf0\x9d\x95\x8c"),
+ ENTITY_DEF("trianglelefteq", 8884, "\xe2\x8a\xb4"),
+ ENTITY_DEF("rotimes", 10805, "\xe2\xa8\xb5"),
+ ENTITY_DEF("qfr", 120110, "\xf0\x9d\x94\xae"),
+ ENTITY_DEF("gtcc", 10919, "\xe2\xaa\xa7"),
+ ENTITY_DEF("fnof", 402, "\xc6\x92"),
+ ENTITY_DEF("tritime", 10811, "\xe2\xa8\xbb"),
+ ENTITY_DEF("andslope", 10840, "\xe2\xa9\x98"),
+ ENTITY_DEF("harrw", 8621, "\xe2\x86\xad"),
+ ENTITY_DEF("NotSquareSuperset", 8848, "\xe2\x8a\x90\xcc\xb8"),
+ ENTITY_DEF("Amacr", 256, "\xc4\x80"),
+ ENTITY_DEF("OpenCurlyDoubleQuote", 8220, "\xe2\x80\x9c"),
+ ENTITY_DEF_HEUR("thorn", 254, "\xc3\xbe"),
+ ENTITY_DEF_HEUR("ordf", 170, "\xc2\xaa"),
+ ENTITY_DEF("natur", 9838, "\xe2\x99\xae"),
+ ENTITY_DEF("xi", 958, "\xce\xbe"),
+ ENTITY_DEF("infin", 8734, "\xe2\x88\x9e"),
+ ENTITY_DEF("nspar", 8742, "\xe2\x88\xa6"),
+ ENTITY_DEF("Jcy", 1049, "\xd0\x99"),
+ ENTITY_DEF("DownLeftTeeVector", 10590, "\xe2\xa5\x9e"),
+ ENTITY_DEF("rbarr", 10509, "\xe2\xa4\x8d"),
+ ENTITY_DEF("Xi", 926, "\xce\x9e"),
+ ENTITY_DEF("bull", 8226, "\xe2\x80\xa2"),
+ ENTITY_DEF("cuesc", 8927, "\xe2\x8b\x9f"),
+ ENTITY_DEF("backcong", 8780, "\xe2\x89\x8c"),
+ ENTITY_DEF("frac35", 8535, "\xe2\x85\x97"),
+ ENTITY_DEF("hscr", 119997, "\xf0\x9d\x92\xbd"),
+ ENTITY_DEF("LessEqualGreater", 8922, "\xe2\x8b\x9a"),
+ ENTITY_DEF("Implies", 8658, "\xe2\x87\x92"),
+ ENTITY_DEF("ETH", 208, "\xc3\x90"),
+ ENTITY_DEF_HEUR("Yacute", 221, "\xc3\x9d"),
+ ENTITY_DEF_HEUR("shy", 173, "\xc2\xad"),
+ ENTITY_DEF("Rarrtl", 10518, "\xe2\xa4\x96"),
+ ENTITY_DEF_HEUR("sup1", 185, "\xc2\xb9"),
+ ENTITY_DEF("reals", 8477, "\xe2\x84\x9d"),
+ ENTITY_DEF("blacklozenge", 10731, "\xe2\xa7\xab"),
+ ENTITY_DEF("ncedil", 326, "\xc5\x86"),
+ ENTITY_DEF("Lambda", 923, "\xce\x9b"),
+ ENTITY_DEF("uopf", 120166, "\xf0\x9d\x95\xa6"),
+ ENTITY_DEF("bigodot", 10752, "\xe2\xa8\x80"),
+ ENTITY_DEF("ubreve", 365, "\xc5\xad"),
+ ENTITY_DEF("drbkarow", 10512, "\xe2\xa4\x90"),
+ ENTITY_DEF("els", 10901, "\xe2\xaa\x95"),
+ ENTITY_DEF("shortparallel", 8741, "\xe2\x88\xa5"),
+ ENTITY_DEF("Pcy", 1055, "\xd0\x9f"),
+ ENTITY_DEF("dsol", 10742, "\xe2\xa7\xb6"),
+ ENTITY_DEF("supsim", 10952, "\xe2\xab\x88"),
+ ENTITY_DEF("Longrightarrow", 10233, "\xe2\x9f\xb9"),
+ ENTITY_DEF("ThickSpace", 8287, "\xe2\x81\x9f\xe2\x80\x8a"),
+ ENTITY_DEF("Itilde", 296, "\xc4\xa8"),
+ ENTITY_DEF("nparallel", 8742, "\xe2\x88\xa6"),
+ ENTITY_DEF("And", 10835, "\xe2\xa9\x93"),
+ ENTITY_DEF("boxhd", 9516, "\xe2\x94\xac"),
+ ENTITY_DEF("Dashv", 10980, "\xe2\xab\xa4"),
+ ENTITY_DEF("NotSuperset", 8835, "\xe2\x8a\x83\xe2\x83\x92"),
+ ENTITY_DEF("Eta", 919, "\xce\x97"),
+ ENTITY_DEF("Qopf", 8474, "\xe2\x84\x9a"),
+ ENTITY_DEF("period", 46, "\x2e"),
+ ENTITY_DEF("angmsd", 8737, "\xe2\x88\xa1"),
+ ENTITY_DEF("fllig", 64258, "\xef\xac\x82"),
+ ENTITY_DEF("cuvee", 8910, "\xe2\x8b\x8e"),
+ ENTITY_DEF("wedbar", 10847, "\xe2\xa9\x9f"),
+ ENTITY_DEF("Fscr", 8497, "\xe2\x84\xb1"),
+ ENTITY_DEF("veebar", 8891, "\xe2\x8a\xbb"),
+ ENTITY_DEF("Longleftrightarrow", 10234, "\xe2\x9f\xba"),
+ ENTITY_DEF_HEUR("reg", 174, "\xc2\xae"),
+ ENTITY_DEF("NegativeMediumSpace", 8203, "\xe2\x80\x8b"),
+ ENTITY_DEF("Upsi", 978, "\xcf\x92"),
+ ENTITY_DEF("Mellintrf", 8499, "\xe2\x84\xb3"),
+ ENTITY_DEF("boxHU", 9577, "\xe2\x95\xa9"),
+ ENTITY_DEF("frac56", 8538, "\xe2\x85\x9a"),
+ ENTITY_DEF("utrif", 9652, "\xe2\x96\xb4"),
+ ENTITY_DEF("LeftTriangle", 8882, "\xe2\x8a\xb2"),
+ ENTITY_DEF("nsime", 8772, "\xe2\x89\x84"),
+ ENTITY_DEF("rcedil", 343, "\xc5\x97"),
+ ENTITY_DEF("aogon", 261, "\xc4\x85"),
+ ENTITY_DEF("uHar", 10595, "\xe2\xa5\xa3"),
+ ENTITY_DEF("ForAll", 8704, "\xe2\x88\x80"),
+ ENTITY_DEF("prE", 10931, "\xe2\xaa\xb3"),
+ ENTITY_DEF("boxV", 9553, "\xe2\x95\x91"),
+ ENTITY_DEF("softcy", 1100, "\xd1\x8c"),
+ ENTITY_DEF("hercon", 8889, "\xe2\x8a\xb9"),
+ ENTITY_DEF("lmoustache", 9136, "\xe2\x8e\xb0"),
+ ENTITY_DEF("Product", 8719, "\xe2\x88\x8f"),
+ ENTITY_DEF("lsimg", 10895, "\xe2\xaa\x8f"),
+ ENTITY_DEF("verbar", 124, "\x7c"),
+ ENTITY_DEF("ofcir", 10687, "\xe2\xa6\xbf"),
+ ENTITY_DEF("curlyeqprec", 8926, "\xe2\x8b\x9e"),
+ ENTITY_DEF("ldquo", 8220, "\xe2\x80\x9c"),
+ ENTITY_DEF("bot", 8869, "\xe2\x8a\xa5"),
+ ENTITY_DEF("Psi", 936, "\xce\xa8"),
+ ENTITY_DEF("OElig", 338, "\xc5\x92"),
+ ENTITY_DEF("DownRightVectorBar", 10583, "\xe2\xa5\x97"),
+ ENTITY_DEF("minusb", 8863, "\xe2\x8a\x9f"),
+ ENTITY_DEF("Iscr", 8464, "\xe2\x84\x90"),
+ ENTITY_DEF("Tcedil", 354, "\xc5\xa2"),
+ ENTITY_DEF("ffilig", 64259, "\xef\xac\x83"),
+ ENTITY_DEF("Gcy", 1043, "\xd0\x93"),
+ ENTITY_DEF("oline", 8254, "\xe2\x80\xbe"),
+ ENTITY_DEF("bottom", 8869, "\xe2\x8a\xa5"),
+ ENTITY_DEF("nVDash", 8879, "\xe2\x8a\xaf"),
+ ENTITY_DEF("lessdot", 8918, "\xe2\x8b\x96"),
+ ENTITY_DEF("cups", 8746, "\xe2\x88\xaa\xef\xb8\x80"),
+ ENTITY_DEF("gla", 10917, "\xe2\xaa\xa5"),
+ ENTITY_DEF("hellip", 8230, "\xe2\x80\xa6"),
+ ENTITY_DEF("hookleftarrow", 8617, "\xe2\x86\xa9"),
+ ENTITY_DEF("Cup", 8915, "\xe2\x8b\x93"),
+ ENTITY_DEF("upsi", 965, "\xcf\x85"),
+ ENTITY_DEF("DownArrowBar", 10515, "\xe2\xa4\x93"),
+ ENTITY_DEF("lowast", 8727, "\xe2\x88\x97"),
+ ENTITY_DEF("profline", 8978, "\xe2\x8c\x92"),
+ ENTITY_DEF("ngsim", 8821, "\xe2\x89\xb5"),
+ ENTITY_DEF("boxhu", 9524, "\xe2\x94\xb4"),
+ ENTITY_DEF("operp", 10681, "\xe2\xa6\xb9"),
+ ENTITY_DEF("cap", 8745, "\xe2\x88\xa9"),
+ ENTITY_DEF("Hcirc", 292, "\xc4\xa4"),
+ ENTITY_DEF("Ncy", 1053, "\xd0\x9d"),
+ ENTITY_DEF("zeetrf", 8488, "\xe2\x84\xa8"),
+ ENTITY_DEF("cuepr", 8926, "\xe2\x8b\x9e"),
+ ENTITY_DEF("supsetneq", 8843, "\xe2\x8a\x8b"),
+ ENTITY_DEF("lfloor", 8970, "\xe2\x8c\x8a"),
+ ENTITY_DEF("ngtr", 8815, "\xe2\x89\xaf"),
+ ENTITY_DEF("ccups", 10828, "\xe2\xa9\x8c"),
+ ENTITY_DEF("pscr", 120005, "\xf0\x9d\x93\x85"),
+ ENTITY_DEF("Cfr", 8493, "\xe2\x84\xad"),
+ ENTITY_DEF("dtri", 9663, "\xe2\x96\xbf"),
+ ENTITY_DEF("icirc", 238, "\xc3\xae"),
+ ENTITY_DEF("leftarrow", 8592, "\xe2\x86\x90"),
+ ENTITY_DEF("vdash", 8866, "\xe2\x8a\xa2"),
+ ENTITY_DEF("leftrightharpoons", 8651, "\xe2\x87\x8b"),
+ ENTITY_DEF("rightrightarrows", 8649, "\xe2\x87\x89"),
+ ENTITY_DEF("strns", 175, "\xc2\xaf"),
+ ENTITY_DEF("intlarhk", 10775, "\xe2\xa8\x97"),
+ ENTITY_DEF("downharpoonright", 8642, "\xe2\x87\x82"),
+ ENTITY_DEF_HEUR("yacute", 253, "\xc3\xbd"),
+ ENTITY_DEF("boxUr", 9561, "\xe2\x95\x99"),
+ ENTITY_DEF("triangleleft", 9667, "\xe2\x97\x83"),
+ ENTITY_DEF("DiacriticalDot", 729, "\xcb\x99"),
+ ENTITY_DEF("thetav", 977, "\xcf\x91"),
+ ENTITY_DEF("OverBracket", 9140, "\xe2\x8e\xb4"),
+ ENTITY_DEF("PrecedesTilde", 8830, "\xe2\x89\xbe"),
+ ENTITY_DEF("rtrie", 8885, "\xe2\x8a\xb5"),
+ ENTITY_DEF("Scirc", 348, "\xc5\x9c"),
+ ENTITY_DEF("vsupne", 8843, "\xe2\x8a\x8b\xef\xb8\x80"),
+ ENTITY_DEF("OverBrace", 9182, "\xe2\x8f\x9e"),
+ ENTITY_DEF("Yfr", 120092, "\xf0\x9d\x94\x9c"),
+ ENTITY_DEF("scnE", 10934, "\xe2\xaa\xb6"),
+ ENTITY_DEF("simlE", 10911, "\xe2\xaa\x9f"),
+ ENTITY_DEF("Proportional", 8733, "\xe2\x88\x9d"),
+ ENTITY_DEF("edot", 279, "\xc4\x97"),
+ ENTITY_DEF("loang", 10220, "\xe2\x9f\xac"),
+ ENTITY_DEF("gesdot", 10880, "\xe2\xaa\x80"),
+ ENTITY_DEF("DownBreve", 785, "\xcc\x91"),
+ ENTITY_DEF("pcy", 1087, "\xd0\xbf"),
+ ENTITY_DEF("Succeeds", 8827, "\xe2\x89\xbb"),
+ ENTITY_DEF("mfr", 120106, "\xf0\x9d\x94\xaa"),
+ ENTITY_DEF("Leftarrow", 8656, "\xe2\x87\x90"),
+ ENTITY_DEF("boxDr", 9555, "\xe2\x95\x93"),
+ ENTITY_DEF("Nscr", 119977, "\xf0\x9d\x92\xa9"),
+ ENTITY_DEF("diam", 8900, "\xe2\x8b\x84"),
+ ENTITY_DEF("CHcy", 1063, "\xd0\xa7"),
+ ENTITY_DEF("boxdr", 9484, "\xe2\x94\x8c"),
+ ENTITY_DEF("rlm", 8207, "\xe2\x80\x8f"),
+ ENTITY_DEF("Coproduct", 8720, "\xe2\x88\x90"),
+ ENTITY_DEF("RightTeeArrow", 8614, "\xe2\x86\xa6"),
+ ENTITY_DEF("tridot", 9708, "\xe2\x97\xac"),
+ ENTITY_DEF("ldquor", 8222, "\xe2\x80\x9e"),
+ ENTITY_DEF("sol", 47, "\x2f"),
+ ENTITY_DEF_HEUR("ecirc", 234, "\xc3\xaa"),
+ ENTITY_DEF("DoubleLeftArrow", 8656, "\xe2\x87\x90"),
+ ENTITY_DEF("Gscr", 119970, "\xf0\x9d\x92\xa2"),
+ ENTITY_DEF("ap", 8776, "\xe2\x89\x88"),
+ ENTITY_DEF("rbrke", 10636, "\xe2\xa6\x8c"),
+ ENTITY_DEF("LeftFloor", 8970, "\xe2\x8c\x8a"),
+ ENTITY_DEF("blk12", 9618, "\xe2\x96\x92"),
+ ENTITY_DEF("Conint", 8751, "\xe2\x88\xaf"),
+ ENTITY_DEF("triangledown", 9663, "\xe2\x96\xbf"),
+ ENTITY_DEF("Icy", 1048, "\xd0\x98"),
+ ENTITY_DEF("backprime", 8245, "\xe2\x80\xb5"),
+ ENTITY_DEF("longleftrightarrow", 10231, "\xe2\x9f\xb7"),
+ ENTITY_DEF("ntriangleleft", 8938, "\xe2\x8b\xaa"),
+ ENTITY_DEF_HEUR("copy", 169, "\xc2\xa9"),
+ ENTITY_DEF("mapstodown", 8615, "\xe2\x86\xa7"),
+ ENTITY_DEF("seArr", 8664, "\xe2\x87\x98"),
+ ENTITY_DEF("ENG", 330, "\xc5\x8a"),
+ ENTITY_DEF("DoubleRightArrow", 8658, "\xe2\x87\x92"),
+ ENTITY_DEF("tfr", 120113, "\xf0\x9d\x94\xb1"),
+ ENTITY_DEF("rharul", 10604, "\xe2\xa5\xac"),
+ ENTITY_DEF("bfr", 120095, "\xf0\x9d\x94\x9f"),
+ ENTITY_DEF("origof", 8886, "\xe2\x8a\xb6"),
+ ENTITY_DEF("Therefore", 8756, "\xe2\x88\xb4"),
+ ENTITY_DEF("glE", 10898, "\xe2\xaa\x92"),
+ ENTITY_DEF("leftarrowtail", 8610, "\xe2\x86\xa2"),
+ ENTITY_DEF("NotEqual", 8800, "\xe2\x89\xa0"),
+ ENTITY_DEF("LeftCeiling", 8968, "\xe2\x8c\x88"),
+ ENTITY_DEF("lArr", 8656, "\xe2\x87\x90"),
+ ENTITY_DEF("subseteq", 8838, "\xe2\x8a\x86"),
+ ENTITY_DEF("larrbfs", 10527, "\xe2\xa4\x9f"),
+ ENTITY_DEF("Gammad", 988, "\xcf\x9c"),
+ ENTITY_DEF("rtriltri", 10702, "\xe2\xa7\x8e"),
+ ENTITY_DEF("Fcy", 1060, "\xd0\xa4"),
+ ENTITY_DEF("Vopf", 120141, "\xf0\x9d\x95\x8d"),
+ ENTITY_DEF("lrarr", 8646, "\xe2\x87\x86"),
+ ENTITY_DEF("delta", 948, "\xce\xb4"),
+ ENTITY_DEF("xodot", 10752, "\xe2\xa8\x80"),
+ ENTITY_DEF("larrtl", 8610, "\xe2\x86\xa2"),
+ ENTITY_DEF("gsim", 8819, "\xe2\x89\xb3"),
+ ENTITY_DEF("ratail", 10522, "\xe2\xa4\x9a"),
+ ENTITY_DEF("vsubne", 8842, "\xe2\x8a\x8a\xef\xb8\x80"),
+ ENTITY_DEF("boxur", 9492, "\xe2\x94\x94"),
+ ENTITY_DEF("succsim", 8831, "\xe2\x89\xbf"),
+ ENTITY_DEF("triplus", 10809, "\xe2\xa8\xb9"),
+ ENTITY_DEF("nless", 8814, "\xe2\x89\xae"),
+ ENTITY_DEF("uharr", 8638, "\xe2\x86\xbe"),
+ ENTITY_DEF("lambda", 955, "\xce\xbb"),
+ ENTITY_DEF_HEUR("uuml", 252, "\xc3\xbc"),
+ ENTITY_DEF("horbar", 8213, "\xe2\x80\x95"),
+ ENTITY_DEF("ccirc", 265, "\xc4\x89"),
+ ENTITY_DEF("sqcup", 8852, "\xe2\x8a\x94"),
+ ENTITY_DEF("Pscr", 119979, "\xf0\x9d\x92\xab"),
+ ENTITY_DEF("supsup", 10966, "\xe2\xab\x96"),
+ ENTITY_DEF("Cacute", 262, "\xc4\x86"),
+ ENTITY_DEF("upsih", 978, "\xcf\x92"),
+ ENTITY_DEF("precsim", 8830, "\xe2\x89\xbe"),
+ ENTITY_DEF("longrightarrow", 10230, "\xe2\x9f\xb6"),
+ ENTITY_DEF("circledR", 174, "\xc2\xae"),
+ ENTITY_DEF("UpTeeArrow", 8613, "\xe2\x86\xa5"),
+ ENTITY_DEF("bepsi", 1014, "\xcf\xb6"),
+ ENTITY_DEF("oast", 8859, "\xe2\x8a\x9b"),
+ ENTITY_DEF("yfr", 120118, "\xf0\x9d\x94\xb6"),
+ ENTITY_DEF("rdsh", 8627, "\xe2\x86\xb3"),
+ ENTITY_DEF("Ograve", 210, "\xc3\x92"),
+ ENTITY_DEF("LeftVectorBar", 10578, "\xe2\xa5\x92"),
+ ENTITY_DEF("NotNestedLessLess", 10913, "\xe2\xaa\xa1\xcc\xb8"),
+ ENTITY_DEF("Jscr", 119973, "\xf0\x9d\x92\xa5"),
+ ENTITY_DEF("psi", 968, "\xcf\x88"),
+ ENTITY_DEF("orarr", 8635, "\xe2\x86\xbb"),
+ ENTITY_DEF("Subset", 8912, "\xe2\x8b\x90"),
+ ENTITY_DEF("curarr", 8631, "\xe2\x86\xb7"),
+ ENTITY_DEF("CirclePlus", 8853, "\xe2\x8a\x95"),
+ ENTITY_DEF("gtrless", 8823, "\xe2\x89\xb7"),
+ ENTITY_DEF("nvle", 8804, "\xe2\x89\xa4\xe2\x83\x92"),
+ ENTITY_DEF("prop", 8733, "\xe2\x88\x9d"),
+ ENTITY_DEF("gEl", 10892, "\xe2\xaa\x8c"),
+ ENTITY_DEF("gtlPar", 10645, "\xe2\xa6\x95"),
+ ENTITY_DEF("frasl", 8260, "\xe2\x81\x84"),
+ ENTITY_DEF("nearr", 8599, "\xe2\x86\x97"),
+ ENTITY_DEF("NotSubsetEqual", 8840, "\xe2\x8a\x88"),
+ ENTITY_DEF("planck", 8463, "\xe2\x84\x8f"),
+ ENTITY_DEF_HEUR("Uuml", 220, "\xc3\x9c"),
+ ENTITY_DEF("spadesuit", 9824, "\xe2\x99\xa0"),
+ ENTITY_DEF_HEUR("sect", 167, "\xc2\xa7"),
+ ENTITY_DEF("cdot", 267, "\xc4\x8b"),
+ ENTITY_DEF("boxVh", 9579, "\xe2\x95\xab"),
+ ENTITY_DEF("zscr", 120015, "\xf0\x9d\x93\x8f"),
+ ENTITY_DEF("nsqsube", 8930, "\xe2\x8b\xa2"),
+ ENTITY_DEF("grave", 96, "\x60"),
+ ENTITY_DEF("angrtvb", 8894, "\xe2\x8a\xbe"),
+ ENTITY_DEF("MediumSpace", 8287, "\xe2\x81\x9f"),
+ ENTITY_DEF("Ntilde", 209, "\xc3\x91"),
+ ENTITY_DEF("solb", 10692, "\xe2\xa7\x84"),
+ ENTITY_DEF("angzarr", 9084, "\xe2\x8d\xbc"),
+ ENTITY_DEF("nopf", 120159, "\xf0\x9d\x95\x9f"),
+ ENTITY_DEF("rtrif", 9656, "\xe2\x96\xb8"),
+ ENTITY_DEF("nrightarrow", 8603, "\xe2\x86\x9b"),
+ ENTITY_DEF("Kappa", 922, "\xce\x9a"),
+ ENTITY_DEF("simrarr", 10610, "\xe2\xa5\xb2"),
+ ENTITY_DEF("imacr", 299, "\xc4\xab"),
+ ENTITY_DEF("vrtri", 8883, "\xe2\x8a\xb3"),
+ ENTITY_DEF("part", 8706, "\xe2\x88\x82"),
+ ENTITY_DEF("esim", 8770, "\xe2\x89\x82"),
+ ENTITY_DEF_HEUR("atilde", 227, "\xc3\xa3"),
+ ENTITY_DEF("DownRightTeeVector", 10591, "\xe2\xa5\x9f"),
+ ENTITY_DEF("jcirc", 309, "\xc4\xb5"),
+ ENTITY_DEF("Ecaron", 282, "\xc4\x9a"),
+ ENTITY_DEF("VerticalSeparator", 10072, "\xe2\x9d\x98"),
+ ENTITY_DEF("rHar", 10596, "\xe2\xa5\xa4"),
+ ENTITY_DEF("rcaron", 345, "\xc5\x99"),
+ ENTITY_DEF("subnE", 10955, "\xe2\xab\x8b"),
+ ENTITY_DEF("ii", 8520, "\xe2\x85\x88"),
+ ENTITY_DEF("Cconint", 8752, "\xe2\x88\xb0"),
+ ENTITY_DEF("Mcy", 1052, "\xd0\x9c"),
+ ENTITY_DEF("eqcolon", 8789, "\xe2\x89\x95"),
+ ENTITY_DEF("cupor", 10821, "\xe2\xa9\x85"),
+ ENTITY_DEF("DoubleUpArrow", 8657, "\xe2\x87\x91"),
+ ENTITY_DEF("boxbox", 10697, "\xe2\xa7\x89"),
+ ENTITY_DEF("setminus", 8726, "\xe2\x88\x96"),
+ ENTITY_DEF("Lleftarrow", 8666, "\xe2\x87\x9a"),
+ ENTITY_DEF("nang", 8736, "\xe2\x88\xa0\xe2\x83\x92"),
+ ENTITY_DEF("TRADE", 8482, "\xe2\x84\xa2"),
+ ENTITY_DEF("urcorner", 8989, "\xe2\x8c\x9d"),
+ ENTITY_DEF("lsqb", 91, "\x5b"),
+ ENTITY_DEF("cupcup", 10826, "\xe2\xa9\x8a"),
+ ENTITY_DEF("kjcy", 1116, "\xd1\x9c"),
+ ENTITY_DEF("llhard", 10603, "\xe2\xa5\xab"),
+ ENTITY_DEF("mumap", 8888, "\xe2\x8a\xb8"),
+ ENTITY_DEF("iiint", 8749, "\xe2\x88\xad"),
+ ENTITY_DEF("RightTee", 8866, "\xe2\x8a\xa2"),
+ ENTITY_DEF("Tcaron", 356, "\xc5\xa4"),
+ ENTITY_DEF("bigcirc", 9711, "\xe2\x97\xaf"),
+ ENTITY_DEF("trianglerighteq", 8885, "\xe2\x8a\xb5"),
+ ENTITY_DEF("NotLessGreater", 8824, "\xe2\x89\xb8"),
+ ENTITY_DEF("hArr", 8660, "\xe2\x87\x94"),
+ ENTITY_DEF("ocy", 1086, "\xd0\xbe"),
+ ENTITY_DEF("tosa", 10537, "\xe2\xa4\xa9"),
+ ENTITY_DEF("twixt", 8812, "\xe2\x89\xac"),
+ ENTITY_DEF("square", 9633, "\xe2\x96\xa1"),
+ ENTITY_DEF("Otimes", 10807, "\xe2\xa8\xb7"),
+ ENTITY_DEF("Kcedil", 310, "\xc4\xb6"),
+ ENTITY_DEF("beth", 8502, "\xe2\x84\xb6"),
+ ENTITY_DEF("triminus", 10810, "\xe2\xa8\xba"),
+ ENTITY_DEF("nlArr", 8653, "\xe2\x87\x8d"),
+ ENTITY_DEF("Oacute", 211, "\xc3\x93"),
+ ENTITY_DEF("zwnj", 8204, "\xe2\x80\x8c"),
+ ENTITY_DEF("ll", 8810, "\xe2\x89\xaa"),
+ ENTITY_DEF("smashp", 10803, "\xe2\xa8\xb3"),
+ ENTITY_DEF("ngeqq", 8807, "\xe2\x89\xa7\xcc\xb8"),
+ ENTITY_DEF("rnmid", 10990, "\xe2\xab\xae"),
+ ENTITY_DEF("nwArr", 8662, "\xe2\x87\x96"),
+ ENTITY_DEF("RightUpDownVector", 10575, "\xe2\xa5\x8f"),
+ ENTITY_DEF("lbbrk", 10098, "\xe2\x9d\xb2"),
+ ENTITY_DEF("compfn", 8728, "\xe2\x88\x98"),
+ ENTITY_DEF("eDDot", 10871, "\xe2\xa9\xb7"),
+ ENTITY_DEF("Jsercy", 1032, "\xd0\x88"),
+ ENTITY_DEF("HARDcy", 1066, "\xd0\xaa"),
+ ENTITY_DEF("nexists", 8708, "\xe2\x88\x84"),
+ ENTITY_DEF("theta", 952, "\xce\xb8"),
+ ENTITY_DEF("plankv", 8463, "\xe2\x84\x8f"),
+ ENTITY_DEF_HEUR("sup2", 178, "\xc2\xb2"),
+ ENTITY_DEF("lessapprox", 10885, "\xe2\xaa\x85"),
+ ENTITY_DEF("gdot", 289, "\xc4\xa1"),
+ ENTITY_DEF("angmsdae", 10668, "\xe2\xa6\xac"),
+ ENTITY_DEF("Superset", 8835, "\xe2\x8a\x83"),
+ ENTITY_DEF("prap", 10935, "\xe2\xaa\xb7"),
+ ENTITY_DEF("Zscr", 119989, "\xf0\x9d\x92\xb5"),
+ ENTITY_DEF("nsucc", 8833, "\xe2\x8a\x81"),
+ ENTITY_DEF("supseteqq", 10950, "\xe2\xab\x86"),
+ ENTITY_DEF("UpTee", 8869, "\xe2\x8a\xa5"),
+ ENTITY_DEF("LowerLeftArrow", 8601, "\xe2\x86\x99"),
+ ENTITY_DEF("ssmile", 8995, "\xe2\x8c\xa3"),
+ ENTITY_DEF("niv", 8715, "\xe2\x88\x8b"),
+ ENTITY_DEF("bigvee", 8897, "\xe2\x8b\x81"),
+ ENTITY_DEF("kscr", 120000, "\xf0\x9d\x93\x80"),
+ ENTITY_DEF("xutri", 9651, "\xe2\x96\xb3"),
+ ENTITY_DEF("caret", 8257, "\xe2\x81\x81"),
+ ENTITY_DEF("caron", 711, "\xcb\x87"),
+ ENTITY_DEF("Wedge", 8896, "\xe2\x8b\x80"),
+ ENTITY_DEF("sdotb", 8865, "\xe2\x8a\xa1"),
+ ENTITY_DEF("bigoplus", 10753, "\xe2\xa8\x81"),
+ ENTITY_DEF("Breve", 728, "\xcb\x98"),
+ ENTITY_DEF("ImaginaryI", 8520, "\xe2\x85\x88"),
+ ENTITY_DEF("longmapsto", 10236, "\xe2\x9f\xbc"),
+ ENTITY_DEF("boxVH", 9580, "\xe2\x95\xac"),
+ ENTITY_DEF("lozenge", 9674, "\xe2\x97\x8a"),
+ ENTITY_DEF("toea", 10536, "\xe2\xa4\xa8"),
+ ENTITY_DEF("nbumpe", 8783, "\xe2\x89\x8f\xcc\xb8"),
+ ENTITY_DEF("gcirc", 285, "\xc4\x9d"),
+ ENTITY_DEF("NotHumpEqual", 8783, "\xe2\x89\x8f\xcc\xb8"),
+ ENTITY_DEF("pre", 10927, "\xe2\xaa\xaf"),
+ ENTITY_DEF("ascr", 119990, "\xf0\x9d\x92\xb6"),
+ ENTITY_DEF("Acirc", 194, "\xc3\x82"),
+ ENTITY_DEF("questeq", 8799, "\xe2\x89\x9f"),
+ ENTITY_DEF("ncaron", 328, "\xc5\x88"),
+ ENTITY_DEF("LeftTeeArrow", 8612, "\xe2\x86\xa4"),
+ ENTITY_DEF("xcirc", 9711, "\xe2\x97\xaf"),
+ ENTITY_DEF("swarr", 8601, "\xe2\x86\x99"),
+ ENTITY_DEF("MinusPlus", 8723, "\xe2\x88\x93"),
+ ENTITY_DEF("plus", 43, "\x2b"),
+ ENTITY_DEF("NotDoubleVerticalBar", 8742, "\xe2\x88\xa6"),
+ ENTITY_DEF("rppolint", 10770, "\xe2\xa8\x92"),
+ ENTITY_DEF("NotTildeFullEqual", 8775, "\xe2\x89\x87"),
+ ENTITY_DEF("ltdot", 8918, "\xe2\x8b\x96"),
+ ENTITY_DEF("NotNestedGreaterGreater", 10914, "\xe2\xaa\xa2\xcc\xb8"),
+ ENTITY_DEF("Lscr", 8466, "\xe2\x84\x92"),
+ ENTITY_DEF("pitchfork", 8916, "\xe2\x8b\x94"),
+ ENTITY_DEF("Eopf", 120124, "\xf0\x9d\x94\xbc"),
+ ENTITY_DEF("ropf", 120163, "\xf0\x9d\x95\xa3"),
+ ENTITY_DEF("Delta", 916, "\xce\x94"),
+ ENTITY_DEF("lozf", 10731, "\xe2\xa7\xab"),
+ ENTITY_DEF("RightTeeVector", 10587, "\xe2\xa5\x9b"),
+ ENTITY_DEF("UpDownArrow", 8597, "\xe2\x86\x95"),
+ ENTITY_DEF("bump", 8782, "\xe2\x89\x8e"),
+ ENTITY_DEF("Rscr", 8475, "\xe2\x84\x9b"),
+ ENTITY_DEF("slarr", 8592, "\xe2\x86\x90"),
+ ENTITY_DEF("lcy", 1083, "\xd0\xbb"),
+ ENTITY_DEF("Vee", 8897, "\xe2\x8b\x81"),
+ ENTITY_DEF("Iogon", 302, "\xc4\xae"),
+ ENTITY_DEF("minus", 8722, "\xe2\x88\x92"),
+ ENTITY_DEF("GreaterFullEqual", 8807, "\xe2\x89\xa7"),
+ ENTITY_DEF("xhArr", 10234, "\xe2\x9f\xba"),
+ ENTITY_DEF("shortmid", 8739, "\xe2\x88\xa3"),
+ ENTITY_DEF("DoubleDownArrow", 8659, "\xe2\x87\x93"),
+ ENTITY_DEF("Wscr", 119986, "\xf0\x9d\x92\xb2"),
+ ENTITY_DEF("rang", 10217, "\xe2\x9f\xa9"),
+ ENTITY_DEF("lcub", 123, "\x7b"),
+ ENTITY_DEF("mnplus", 8723, "\xe2\x88\x93"),
+ ENTITY_DEF("ulcrop", 8975, "\xe2\x8c\x8f"),
+ ENTITY_DEF("wfr", 120116, "\xf0\x9d\x94\xb4"),
+ ENTITY_DEF("DifferentialD", 8518, "\xe2\x85\x86"),
+ ENTITY_DEF("ThinSpace", 8201, "\xe2\x80\x89"),
+ ENTITY_DEF("NotGreaterGreater", 8811, "\xe2\x89\xab\xcc\xb8"),
+ ENTITY_DEF("Topf", 120139, "\xf0\x9d\x95\x8b"),
+ ENTITY_DEF("sbquo", 8218, "\xe2\x80\x9a"),
+ ENTITY_DEF("sdot", 8901, "\xe2\x8b\x85"),
+ ENTITY_DEF("DoubleLeftTee", 10980, "\xe2\xab\xa4"),
+ ENTITY_DEF("vBarv", 10985, "\xe2\xab\xa9"),
+ ENTITY_DEF("subne", 8842, "\xe2\x8a\x8a"),
+ ENTITY_DEF("gtrdot", 8919, "\xe2\x8b\x97"),
+ ENTITY_DEF("opar", 10679, "\xe2\xa6\xb7"),
+ ENTITY_DEF("apid", 8779, "\xe2\x89\x8b"),
+ ENTITY_DEF("Cross", 10799, "\xe2\xa8\xaf"),
+ ENTITY_DEF("lhblk", 9604, "\xe2\x96\x84"),
+ ENTITY_DEF("capcap", 10827, "\xe2\xa9\x8b"),
+ ENTITY_DEF("midast", 42, "\x2a"),
+ ENTITY_DEF("lscr", 120001, "\xf0\x9d\x93\x81"),
+ ENTITY_DEF("nGt", 8811, "\xe2\x89\xab\xe2\x83\x92"),
+ ENTITY_DEF_HEUR("Euml", 203, "\xc3\x8b"),
+ ENTITY_DEF("blacktriangledown", 9662, "\xe2\x96\xbe"),
+ ENTITY_DEF("Rcy", 1056, "\xd0\xa0"),
+ ENTITY_DEF("dfisht", 10623, "\xe2\xa5\xbf"),
+ ENTITY_DEF("dashv", 8867, "\xe2\x8a\xa3"),
+ ENTITY_DEF("ast", 42, "\x2a"),
+ ENTITY_DEF("ContourIntegral", 8750, "\xe2\x88\xae"),
+ ENTITY_DEF("Ofr", 120082, "\xf0\x9d\x94\x92"),
+ ENTITY_DEF("Lcy", 1051, "\xd0\x9b"),
+ ENTITY_DEF("nltrie", 8940, "\xe2\x8b\xac"),
+ ENTITY_DEF("ShortUpArrow", 8593, "\xe2\x86\x91"),
+ ENTITY_DEF("acy", 1072, "\xd0\xb0"),
+ ENTITY_DEF("rightarrow", 8594, "\xe2\x86\x92"),
+ ENTITY_DEF("UnderBar", 95, "\x5f"),
+ ENTITY_DEF("LongLeftArrow", 10229, "\xe2\x9f\xb5"),
+ ENTITY_DEF("andd", 10844, "\xe2\xa9\x9c"),
+ ENTITY_DEF("xlarr", 10229, "\xe2\x9f\xb5"),
+ ENTITY_DEF("percnt", 37, "\x25"),
+ ENTITY_DEF("rharu", 8640, "\xe2\x87\x80"),
+ ENTITY_DEF("plusdo", 8724, "\xe2\x88\x94"),
+ ENTITY_DEF("TScy", 1062, "\xd0\xa6"),
+ ENTITY_DEF("kcy", 1082, "\xd0\xba"),
+ ENTITY_DEF("boxVR", 9568, "\xe2\x95\xa0"),
+ ENTITY_DEF("looparrowleft", 8619, "\xe2\x86\xab"),
+ ENTITY_DEF("scirc", 349, "\xc5\x9d"),
+ ENTITY_DEF("drcorn", 8991, "\xe2\x8c\x9f"),
+ ENTITY_DEF("iiota", 8489, "\xe2\x84\xa9"),
+ ENTITY_DEF("Zcy", 1047, "\xd0\x97"),
+ ENTITY_DEF("frac58", 8541, "\xe2\x85\x9d"),
+ ENTITY_DEF("alpha", 945, "\xce\xb1"),
+ ENTITY_DEF("daleth", 8504, "\xe2\x84\xb8"),
+ ENTITY_DEF("gtreqless", 8923, "\xe2\x8b\x9b"),
+ ENTITY_DEF("tstrok", 359, "\xc5\xa7"),
+ ENTITY_DEF("plusb", 8862, "\xe2\x8a\x9e"),
+ ENTITY_DEF("odsold", 10684, "\xe2\xa6\xbc"),
+ ENTITY_DEF("varsupsetneqq", 10956, "\xe2\xab\x8c\xef\xb8\x80"),
+ ENTITY_DEF_HEUR("otilde", 245, "\xc3\xb5"),
+ ENTITY_DEF("gtcir", 10874, "\xe2\xa9\xba"),
+ ENTITY_DEF("lltri", 9722, "\xe2\x97\xba"),
+ ENTITY_DEF("rx", 8478, "\xe2\x84\x9e"),
+ ENTITY_DEF("ljcy", 1113, "\xd1\x99"),
+ ENTITY_DEF("parsim", 10995, "\xe2\xab\xb3"),
+ ENTITY_DEF("NotElement", 8713, "\xe2\x88\x89"),
+ ENTITY_DEF_HEUR("plusmn", 177, "\xc2\xb1"),
+ ENTITY_DEF("varsubsetneq", 8842, "\xe2\x8a\x8a\xef\xb8\x80"),
+ ENTITY_DEF("subset", 8834, "\xe2\x8a\x82"),
+ ENTITY_DEF("awint", 10769, "\xe2\xa8\x91"),
+ ENTITY_DEF("laemptyv", 10676, "\xe2\xa6\xb4"),
+ ENTITY_DEF("phiv", 981, "\xcf\x95"),
+ ENTITY_DEF("sfrown", 8994, "\xe2\x8c\xa2"),
+ ENTITY_DEF("DoubleUpDownArrow", 8661, "\xe2\x87\x95"),
+ ENTITY_DEF("lpar", 40, "\x28"),
+ ENTITY_DEF("frac45", 8536, "\xe2\x85\x98"),
+ ENTITY_DEF("rBarr", 10511, "\xe2\xa4\x8f"),
+ ENTITY_DEF("npolint", 10772, "\xe2\xa8\x94"),
+ ENTITY_DEF("emacr", 275, "\xc4\x93"),
+ ENTITY_DEF("maltese", 10016, "\xe2\x9c\xa0"),
+ ENTITY_DEF("PlusMinus", 177, "\xc2\xb1"),
+ ENTITY_DEF("ReverseEquilibrium", 8651, "\xe2\x87\x8b"),
+ ENTITY_DEF("oscr", 8500, "\xe2\x84\xb4"),
+ ENTITY_DEF("blacksquare", 9642, "\xe2\x96\xaa"),
+ ENTITY_DEF("TSHcy", 1035, "\xd0\x8b"),
+ ENTITY_DEF("gap", 10886, "\xe2\xaa\x86"),
+ ENTITY_DEF("xnis", 8955, "\xe2\x8b\xbb"),
+ ENTITY_DEF("Ll", 8920, "\xe2\x8b\x98"),
+ ENTITY_DEF("PrecedesEqual", 10927, "\xe2\xaa\xaf"),
+ ENTITY_DEF("incare", 8453, "\xe2\x84\x85"),
+ ENTITY_DEF("nharr", 8622, "\xe2\x86\xae"),
+ ENTITY_DEF("varnothing", 8709, "\xe2\x88\x85"),
+ ENTITY_DEF("ShortDownArrow", 8595, "\xe2\x86\x93"),
+ ENTITY_DEF_HEUR("nbsp", 160, " "),
+ ENTITY_DEF("asympeq", 8781, "\xe2\x89\x8d"),
+ ENTITY_DEF("rbrkslu", 10640, "\xe2\xa6\x90"),
+ ENTITY_DEF("rho", 961, "\xcf\x81"),
+ ENTITY_DEF("Mscr", 8499, "\xe2\x84\xb3"),
+ ENTITY_DEF_HEUR("eth", 240, "\xc3\xb0"),
+ ENTITY_DEF("suplarr", 10619, "\xe2\xa5\xbb"),
+ ENTITY_DEF("Tab", 9, "\x09"),
+ ENTITY_DEF("omicron", 959, "\xce\xbf"),
+ ENTITY_DEF("blacktriangle", 9652, "\xe2\x96\xb4"),
+ ENTITY_DEF("nldr", 8229, "\xe2\x80\xa5"),
+ ENTITY_DEF("downharpoonleft", 8643, "\xe2\x87\x83"),
+ ENTITY_DEF("circledcirc", 8858, "\xe2\x8a\x9a"),
+ ENTITY_DEF("leftleftarrows", 8647, "\xe2\x87\x87"),
+ ENTITY_DEF("NotHumpDownHump", 8782, "\xe2\x89\x8e\xcc\xb8"),
+ ENTITY_DEF("nvgt", 62, "\x3e\xe2\x83\x92"),
+ ENTITY_DEF("rhard", 8641, "\xe2\x87\x81"),
+ ENTITY_DEF("nGg", 8921, "\xe2\x8b\x99\xcc\xb8"),
+ ENTITY_DEF("lurdshar", 10570, "\xe2\xa5\x8a"),
+ ENTITY_DEF("cirE", 10691, "\xe2\xa7\x83"),
+ ENTITY_DEF("isinE", 8953, "\xe2\x8b\xb9"),
+ ENTITY_DEF("eparsl", 10723, "\xe2\xa7\xa3"),
+ ENTITY_DEF("RightAngleBracket", 10217, "\xe2\x9f\xa9"),
+ ENTITY_DEF("hcirc", 293, "\xc4\xa5"),
+ ENTITY_DEF("bumpeq", 8783, "\xe2\x89\x8f"),
+ ENTITY_DEF("cire", 8791, "\xe2\x89\x97"),
+ ENTITY_DEF("dotplus", 8724, "\xe2\x88\x94"),
+ ENTITY_DEF("itilde", 297, "\xc4\xa9"),
+ ENTITY_DEF("uwangle", 10663, "\xe2\xa6\xa7"),
+ ENTITY_DEF("rlhar", 8652, "\xe2\x87\x8c"),
+ ENTITY_DEF("rbrace", 125, "\x7d"),
+ ENTITY_DEF("mid", 8739, "\xe2\x88\xa3"),
+ ENTITY_DEF("el", 10905, "\xe2\xaa\x99"),
+ ENTITY_DEF("KJcy", 1036, "\xd0\x8c"),
+ ENTITY_DEF("odiv", 10808, "\xe2\xa8\xb8"),
+ ENTITY_DEF("amacr", 257, "\xc4\x81"),
+ ENTITY_DEF("qprime", 8279, "\xe2\x81\x97"),
+ ENTITY_DEF("tcedil", 355, "\xc5\xa3"),
+ ENTITY_DEF("UpArrowDownArrow", 8645, "\xe2\x87\x85"),
+ ENTITY_DEF("spades", 9824, "\xe2\x99\xa0"),
+ ENTITY_DEF("napos", 329, "\xc5\x89"),
+ ENTITY_DEF("straightepsilon", 1013, "\xcf\xb5"),
+ ENTITY_DEF("CupCap", 8781, "\xe2\x89\x8d"),
+ ENTITY_DEF("Oopf", 120134, "\xf0\x9d\x95\x86"),
+ ENTITY_DEF("sub", 8834, "\xe2\x8a\x82"),
+ ENTITY_DEF("ohm", 937, "\xce\xa9"),
+ ENTITY_DEF("UnderBrace", 9183, "\xe2\x8f\x9f"),
+ ENTITY_DEF("looparrowright", 8620, "\xe2\x86\xac"),
+ ENTITY_DEF("xotime", 10754, "\xe2\xa8\x82"),
+ ENTITY_DEF("ntgl", 8825, "\xe2\x89\xb9"),
+ ENTITY_DEF("minusdu", 10794, "\xe2\xa8\xaa"),
+ ENTITY_DEF("rarrb", 8677, "\xe2\x87\xa5"),
+ ENTITY_DEF("nvlArr", 10498, "\xe2\xa4\x82"),
+ ENTITY_DEF("triangle", 9653, "\xe2\x96\xb5"),
+ ENTITY_DEF("nacute", 324, "\xc5\x84"),
+ ENTITY_DEF("boxHD", 9574, "\xe2\x95\xa6"),
+ ENTITY_DEF("ratio", 8758, "\xe2\x88\xb6"),
+ ENTITY_DEF("larrsim", 10611, "\xe2\xa5\xb3"),
+ ENTITY_DEF("LessLess", 10913, "\xe2\xaa\xa1"),
+ ENTITY_DEF("yacy", 1103, "\xd1\x8f"),
+ ENTITY_DEF("ctdot", 8943, "\xe2\x8b\xaf"),
+ ENTITY_DEF("and", 8743, "\xe2\x88\xa7"),
+ ENTITY_DEF("lrtri", 8895, "\xe2\x8a\xbf"),
+ ENTITY_DEF("eDot", 8785, "\xe2\x89\x91"),
+ ENTITY_DEF("sqsub", 8847, "\xe2\x8a\x8f"),
+ ENTITY_DEF("real", 8476, "\xe2\x84\x9c"),
+ ENTITY_DEF("Dcy", 1044, "\xd0\x94"),
+ ENTITY_DEF("vartheta", 977, "\xcf\x91"),
+ ENTITY_DEF("nsub", 8836, "\xe2\x8a\x84"),
+ ENTITY_DEF("DownTee", 8868, "\xe2\x8a\xa4"),
+ ENTITY_DEF_HEUR("acute", 180, "\xc2\xb4"),
+ ENTITY_DEF("GreaterLess", 8823, "\xe2\x89\xb7"),
+ ENTITY_DEF("supplus", 10944, "\xe2\xab\x80"),
+ ENTITY_DEF("Vbar", 10987, "\xe2\xab\xab"),
+ ENTITY_DEF("divideontimes", 8903, "\xe2\x8b\x87"),
+ ENTITY_DEF("lsim", 8818, "\xe2\x89\xb2"),
+ ENTITY_DEF("nearhk", 10532, "\xe2\xa4\xa4"),
+ ENTITY_DEF("nLtv", 8810, "\xe2\x89\xaa\xcc\xb8"),
+ ENTITY_DEF("RuleDelayed", 10740, "\xe2\xa7\xb4"),
+ ENTITY_DEF("smile", 8995, "\xe2\x8c\xa3"),
+ ENTITY_DEF("coprod", 8720, "\xe2\x88\x90"),
+ ENTITY_DEF("imof", 8887, "\xe2\x8a\xb7"),
+ ENTITY_DEF("ecy", 1101, "\xd1\x8d"),
+ ENTITY_DEF("RightCeiling", 8969, "\xe2\x8c\x89"),
+ ENTITY_DEF("dlcorn", 8990, "\xe2\x8c\x9e"),
+ ENTITY_DEF("Nu", 925, "\xce\x9d"),
+ ENTITY_DEF("frac18", 8539, "\xe2\x85\x9b"),
+ ENTITY_DEF("diamond", 8900, "\xe2\x8b\x84"),
+ ENTITY_DEF("Icirc", 206, "\xc3\x8e"),
+ ENTITY_DEF("ngeq", 8817, "\xe2\x89\xb1"),
+ ENTITY_DEF("epsilon", 949, "\xce\xb5"),
+ ENTITY_DEF("fork", 8916, "\xe2\x8b\x94"),
+ ENTITY_DEF("xrarr", 10230, "\xe2\x9f\xb6"),
+ ENTITY_DEF("racute", 341, "\xc5\x95"),
+ ENTITY_DEF("ntlg", 8824, "\xe2\x89\xb8"),
+ ENTITY_DEF("xvee", 8897, "\xe2\x8b\x81"),
+ ENTITY_DEF("LeftArrowRightArrow", 8646, "\xe2\x87\x86"),
+ ENTITY_DEF("DownLeftRightVector", 10576, "\xe2\xa5\x90"),
+ ENTITY_DEF("Eacute", 201, "\xc3\x89"),
+ ENTITY_DEF("gimel", 8503, "\xe2\x84\xb7"),
+ ENTITY_DEF("rtimes", 8906, "\xe2\x8b\x8a"),
+ ENTITY_DEF("forall", 8704, "\xe2\x88\x80"),
+ ENTITY_DEF("DiacriticalDoubleAcute", 733, "\xcb\x9d"),
+ ENTITY_DEF("dArr", 8659, "\xe2\x87\x93"),
+ ENTITY_DEF("fallingdotseq", 8786, "\xe2\x89\x92"),
+ ENTITY_DEF("Aogon", 260, "\xc4\x84"),
+ ENTITY_DEF("PartialD", 8706, "\xe2\x88\x82"),
+ ENTITY_DEF("mapstoup", 8613, "\xe2\x86\xa5"),
+ ENTITY_DEF("die", 168, "\xc2\xa8"),
+ ENTITY_DEF("ngt", 8815, "\xe2\x89\xaf"),
+ ENTITY_DEF("vcy", 1074, "\xd0\xb2"),
+ ENTITY_DEF("fjlig", (unsigned) -1, "\x66\x6a"),
+ ENTITY_DEF("submult", 10945, "\xe2\xab\x81"),
+ ENTITY_DEF("ubrcy", 1118, "\xd1\x9e"),
+ ENTITY_DEF("ovbar", 9021, "\xe2\x8c\xbd"),
+ ENTITY_DEF("bsime", 8909, "\xe2\x8b\x8d"),
+ ENTITY_DEF("precnsim", 8936, "\xe2\x8b\xa8"),
+ ENTITY_DEF("DiacriticalTilde", 732, "\xcb\x9c"),
+ ENTITY_DEF("cwint", 8753, "\xe2\x88\xb1"),
+ ENTITY_DEF("Scy", 1057, "\xd0\xa1"),
+ ENTITY_DEF("NotGreaterEqual", 8817, "\xe2\x89\xb1"),
+ ENTITY_DEF("boxUR", 9562, "\xe2\x95\x9a"),
+ ENTITY_DEF("LessSlantEqual", 10877, "\xe2\xa9\xbd"),
+ ENTITY_DEF("Barwed", 8966, "\xe2\x8c\x86"),
+ ENTITY_DEF("supdot", 10942, "\xe2\xaa\xbe"),
+ ENTITY_DEF("gel", 8923, "\xe2\x8b\x9b"),
+ ENTITY_DEF("iscr", 119998, "\xf0\x9d\x92\xbe"),
+ ENTITY_DEF("doublebarwedge", 8966, "\xe2\x8c\x86"),
+ ENTITY_DEF("Idot", 304, "\xc4\xb0"),
+ ENTITY_DEF("DoubleDot", 168, "\xc2\xa8"),
+ ENTITY_DEF("rsquo", 8217, "\xe2\x80\x99"),
+ ENTITY_DEF("subsetneqq", 10955, "\xe2\xab\x8b"),
+ ENTITY_DEF("UpEquilibrium", 10606, "\xe2\xa5\xae"),
+ ENTITY_DEF("copysr", 8471, "\xe2\x84\x97"),
+ ENTITY_DEF("RightDoubleBracket", 10215, "\xe2\x9f\xa7"),
+ ENTITY_DEF("LeftRightVector", 10574, "\xe2\xa5\x8e"),
+ ENTITY_DEF("DownLeftVectorBar", 10582, "\xe2\xa5\x96"),
+ ENTITY_DEF("suphsub", 10967, "\xe2\xab\x97"),
+ ENTITY_DEF_HEUR("cedil", 184, "\xc2\xb8"),
+ ENTITY_DEF("prurel", 8880, "\xe2\x8a\xb0"),
+ ENTITY_DEF("imagpart", 8465, "\xe2\x84\x91"),
+ ENTITY_DEF("Hscr", 8459, "\xe2\x84\x8b"),
+ ENTITY_DEF("jmath", 567, "\xc8\xb7"),
+ ENTITY_DEF("nrtrie", 8941, "\xe2\x8b\xad"),
+ ENTITY_DEF("nsup", 8837, "\xe2\x8a\x85"),
+ ENTITY_DEF("Ubrcy", 1038, "\xd0\x8e"),
+ ENTITY_DEF("succnsim", 8937, "\xe2\x8b\xa9"),
+ ENTITY_DEF("nesim", 8770, "\xe2\x89\x82\xcc\xb8"),
+ ENTITY_DEF("varepsilon", 1013, "\xcf\xb5"),
+ ENTITY_DEF("DoubleRightTee", 8872, "\xe2\x8a\xa8"),
+ ENTITY_DEF_HEUR("not", 172, "\xc2\xac"),
+ ENTITY_DEF("lesdot", 10879, "\xe2\xa9\xbf"),
+ ENTITY_DEF("backepsilon", 1014, "\xcf\xb6"),
+ ENTITY_DEF("srarr", 8594, "\xe2\x86\x92"),
+ ENTITY_DEF("varsubsetneqq", 10955, "\xe2\xab\x8b\xef\xb8\x80"),
+ ENTITY_DEF("sqcap", 8851, "\xe2\x8a\x93"),
+ ENTITY_DEF("rightleftarrows", 8644, "\xe2\x87\x84"),
+ ENTITY_DEF("diams", 9830, "\xe2\x99\xa6"),
+ ENTITY_DEF("boxdR", 9554, "\xe2\x95\x92"),
+ ENTITY_DEF("ngeqslant", 10878, "\xe2\xa9\xbe\xcc\xb8"),
+ ENTITY_DEF("boxDR", 9556, "\xe2\x95\x94"),
+ ENTITY_DEF("sext", 10038, "\xe2\x9c\xb6"),
+ ENTITY_DEF("backsim", 8765, "\xe2\x88\xbd"),
+ ENTITY_DEF("nfr", 120107, "\xf0\x9d\x94\xab"),
+ ENTITY_DEF("CloseCurlyDoubleQuote", 8221, "\xe2\x80\x9d"),
+ ENTITY_DEF("npart", 8706, "\xe2\x88\x82\xcc\xb8"),
+ ENTITY_DEF("dharl", 8643, "\xe2\x87\x83"),
+ ENTITY_DEF("NewLine", 10, "\x0a"),
+ ENTITY_DEF("bigotimes", 10754, "\xe2\xa8\x82"),
+ ENTITY_DEF("lAtail", 10523, "\xe2\xa4\x9b"),
+ ENTITY_DEF_HEUR("frac14", 188, "\xc2\xbc"),
+ ENTITY_DEF("or", 8744, "\xe2\x88\xa8"),
+ ENTITY_DEF("subedot", 10947, "\xe2\xab\x83"),
+ ENTITY_DEF("nmid", 8740, "\xe2\x88\xa4"),
+ ENTITY_DEF("DownArrowUpArrow", 8693, "\xe2\x87\xb5"),
+ ENTITY_DEF("icy", 1080, "\xd0\xb8"),
+ ENTITY_DEF("num", 35, "\x23"),
+ ENTITY_DEF("Gdot", 288, "\xc4\xa0"),
+ ENTITY_DEF("urcrop", 8974, "\xe2\x8c\x8e"),
+ ENTITY_DEF("epsiv", 1013, "\xcf\xb5"),
+ ENTITY_DEF("topcir", 10993, "\xe2\xab\xb1"),
+ ENTITY_DEF("ne", 8800, "\xe2\x89\xa0"),
+ ENTITY_DEF("osol", 8856, "\xe2\x8a\x98"),
+ ENTITY_DEF_HEUR("amp", 38, "\x26"),
+ ENTITY_DEF("ncap", 10819, "\xe2\xa9\x83"),
+ ENTITY_DEF("Sscr", 119982, "\xf0\x9d\x92\xae"),
+ ENTITY_DEF("sung", 9834, "\xe2\x99\xaa"),
+ ENTITY_DEF("ltri", 9667, "\xe2\x97\x83"),
+ ENTITY_DEF("frac25", 8534, "\xe2\x85\x96"),
+ ENTITY_DEF("DZcy", 1039, "\xd0\x8f"),
+ ENTITY_DEF("RightUpVector", 8638, "\xe2\x86\xbe"),
+ ENTITY_DEF("rsquor", 8217, "\xe2\x80\x99"),
+ ENTITY_DEF("uplus", 8846, "\xe2\x8a\x8e"),
+ ENTITY_DEF("triangleright", 9657, "\xe2\x96\xb9"),
+ ENTITY_DEF("lAarr", 8666, "\xe2\x87\x9a"),
+ ENTITY_DEF("HilbertSpace", 8459, "\xe2\x84\x8b"),
+ ENTITY_DEF("there4", 8756, "\xe2\x88\xb4"),
+ ENTITY_DEF("vscr", 120011, "\xf0\x9d\x93\x8b"),
+ ENTITY_DEF("cirscir", 10690, "\xe2\xa7\x82"),
+ ENTITY_DEF("roarr", 8702, "\xe2\x87\xbe"),
+ ENTITY_DEF("hslash", 8463, "\xe2\x84\x8f"),
+ ENTITY_DEF("supdsub", 10968, "\xe2\xab\x98"),
+ ENTITY_DEF("simg", 10910, "\xe2\xaa\x9e"),
+ ENTITY_DEF("trade", 8482, "\xe2\x84\xa2"),
+ ENTITY_DEF("searrow", 8600, "\xe2\x86\x98"),
+ ENTITY_DEF("DownLeftVector", 8637, "\xe2\x86\xbd"),
+ ENTITY_DEF("FilledSmallSquare", 9724, "\xe2\x97\xbc"),
+ ENTITY_DEF("prod", 8719, "\xe2\x88\x8f"),
+ ENTITY_DEF("oror", 10838, "\xe2\xa9\x96"),
+ ENTITY_DEF("udarr", 8645, "\xe2\x87\x85"),
+ ENTITY_DEF("jsercy", 1112, "\xd1\x98"),
+ ENTITY_DEF("tprime", 8244, "\xe2\x80\xb4"),
+ ENTITY_DEF("bprime", 8245, "\xe2\x80\xb5"),
+ ENTITY_DEF("malt", 10016, "\xe2\x9c\xa0"),
+ ENTITY_DEF("bigcup", 8899, "\xe2\x8b\x83"),
+ ENTITY_DEF("oint", 8750, "\xe2\x88\xae"),
+ ENTITY_DEF("female", 9792, "\xe2\x99\x80"),
+ ENTITY_DEF("omacr", 333, "\xc5\x8d"),
+ ENTITY_DEF("SquareSubsetEqual", 8849, "\xe2\x8a\x91"),
+ ENTITY_DEF("SucceedsEqual", 10928, "\xe2\xaa\xb0"),
+ ENTITY_DEF("plusacir", 10787, "\xe2\xa8\xa3"),
+ ENTITY_DEF("Gcirc", 284, "\xc4\x9c"),
+ ENTITY_DEF("lesdotor", 10883, "\xe2\xaa\x83"),
+ ENTITY_DEF("escr", 8495, "\xe2\x84\xaf"),
+ ENTITY_DEF_HEUR("THORN", 222, "\xc3\x9e"),
+ ENTITY_DEF("UpArrowBar", 10514, "\xe2\xa4\x92"),
+ ENTITY_DEF("nvrtrie", 8885, "\xe2\x8a\xb5\xe2\x83\x92"),
+ ENTITY_DEF("varkappa", 1008, "\xcf\xb0"),
+ ENTITY_DEF("NotReverseElement", 8716, "\xe2\x88\x8c"),
+ ENTITY_DEF("zdot", 380, "\xc5\xbc"),
+ ENTITY_DEF("ExponentialE", 8519, "\xe2\x85\x87"),
+ ENTITY_DEF("lesseqgtr", 8922, "\xe2\x8b\x9a"),
+ ENTITY_DEF("cscr", 119992, "\xf0\x9d\x92\xb8"),
+ ENTITY_DEF("Dscr", 119967, "\xf0\x9d\x92\x9f"),
+ ENTITY_DEF("lthree", 8907, "\xe2\x8b\x8b"),
+ ENTITY_DEF("Ccedil", 199, "\xc3\x87"),
+ ENTITY_DEF("nge", 8817, "\xe2\x89\xb1"),
+ ENTITY_DEF("UpperLeftArrow", 8598, "\xe2\x86\x96"),
+ ENTITY_DEF("vDash", 8872, "\xe2\x8a\xa8"),
+ ENTITY_DEF("efDot", 8786, "\xe2\x89\x92"),
+ ENTITY_DEF("telrec", 8981, "\xe2\x8c\x95"),
+ ENTITY_DEF("vellip", 8942, "\xe2\x8b\xae"),
+ ENTITY_DEF("nrArr", 8655, "\xe2\x87\x8f"),
+ ENTITY_DEF_HEUR("ugrave", 249, "\xc3\xb9"),
+ ENTITY_DEF("uring", 367, "\xc5\xaf"),
+ ENTITY_DEF("Bernoullis", 8492, "\xe2\x84\xac"),
+ ENTITY_DEF("nles", 10877, "\xe2\xa9\xbd\xcc\xb8"),
+ ENTITY_DEF_HEUR("macr", 175, "\xc2\xaf"),
+ ENTITY_DEF("boxuR", 9560, "\xe2\x95\x98"),
+ ENTITY_DEF("clubsuit", 9827, "\xe2\x99\xa3"),
+ ENTITY_DEF("rightarrowtail", 8611, "\xe2\x86\xa3"),
+ ENTITY_DEF("epar", 8917, "\xe2\x8b\x95"),
+ ENTITY_DEF("ltcc", 10918, "\xe2\xaa\xa6"),
+ ENTITY_DEF("twoheadleftarrow", 8606, "\xe2\x86\x9e"),
+ ENTITY_DEF("aleph", 8501, "\xe2\x84\xb5"),
+ ENTITY_DEF("Colon", 8759, "\xe2\x88\xb7"),
+ ENTITY_DEF("vltri", 8882, "\xe2\x8a\xb2"),
+ ENTITY_DEF("quaternions", 8461, "\xe2\x84\x8d"),
+ ENTITY_DEF("rfr", 120111, "\xf0\x9d\x94\xaf"),
+ ENTITY_DEF_HEUR("Ouml", 214, "\xc3\x96"),
+ ENTITY_DEF("rsh", 8625, "\xe2\x86\xb1"),
+ ENTITY_DEF("emptyv", 8709, "\xe2\x88\x85"),
+ ENTITY_DEF("sqsup", 8848, "\xe2\x8a\x90"),
+ ENTITY_DEF("marker", 9646, "\xe2\x96\xae"),
+ ENTITY_DEF("Efr", 120072, "\xf0\x9d\x94\x88"),
+ ENTITY_DEF("DotEqual", 8784, "\xe2\x89\x90"),
+ ENTITY_DEF("eqsim", 8770, "\xe2\x89\x82"),
+ ENTITY_DEF("NotSucceedsEqual", 10928, "\xe2\xaa\xb0\xcc\xb8"),
+ ENTITY_DEF("primes", 8473, "\xe2\x84\x99"),
+ ENTITY_DEF_HEUR("times", 215, "\xc3\x97"),
+ ENTITY_DEF("rangd", 10642, "\xe2\xa6\x92"),
+ ENTITY_DEF("rightharpoonup", 8640, "\xe2\x87\x80"),
+ ENTITY_DEF("lrhard", 10605, "\xe2\xa5\xad"),
+ ENTITY_DEF("ape", 8778, "\xe2\x89\x8a"),
+ ENTITY_DEF("varsupsetneq", 8843, "\xe2\x8a\x8b\xef\xb8\x80"),
+ ENTITY_DEF("larrlp", 8619, "\xe2\x86\xab"),
+ ENTITY_DEF("NotPrecedesEqual", 10927, "\xe2\xaa\xaf\xcc\xb8"),
+ ENTITY_DEF("ulcorner", 8988, "\xe2\x8c\x9c"),
+ ENTITY_DEF("acd", 8767, "\xe2\x88\xbf"),
+ ENTITY_DEF("Hacek", 711, "\xcb\x87"),
+ ENTITY_DEF("xuplus", 10756, "\xe2\xa8\x84"),
+ ENTITY_DEF("therefore", 8756, "\xe2\x88\xb4"),
+ ENTITY_DEF("YIcy", 1031, "\xd0\x87"),
+ ENTITY_DEF("Tfr", 120087, "\xf0\x9d\x94\x97"),
+ ENTITY_DEF("Jcirc", 308, "\xc4\xb4"),
+ ENTITY_DEF("LessGreater", 8822, "\xe2\x89\xb6"),
+ ENTITY_DEF("Uring", 366, "\xc5\xae"),
+ ENTITY_DEF("Ugrave", 217, "\xc3\x99"),
+ ENTITY_DEF("rarr", 8594, "\xe2\x86\x92"),
+ ENTITY_DEF("wopf", 120168, "\xf0\x9d\x95\xa8"),
+ ENTITY_DEF("imath", 305, "\xc4\xb1"),
+ ENTITY_DEF("Yopf", 120144, "\xf0\x9d\x95\x90"),
+ ENTITY_DEF("colone", 8788, "\xe2\x89\x94"),
+ ENTITY_DEF("csube", 10961, "\xe2\xab\x91"),
+ ENTITY_DEF("odash", 8861, "\xe2\x8a\x9d"),
+ ENTITY_DEF("olarr", 8634, "\xe2\x86\xba"),
+ ENTITY_DEF("angrt", 8735, "\xe2\x88\x9f"),
+ ENTITY_DEF("NotLeftTriangleBar", 10703, "\xe2\xa7\x8f\xcc\xb8"),
+ ENTITY_DEF("GreaterEqual", 8805, "\xe2\x89\xa5"),
+ ENTITY_DEF("scnap", 10938, "\xe2\xaa\xba"),
+ ENTITY_DEF("pi", 960, "\xcf\x80"),
+ ENTITY_DEF("lesg", 8922, "\xe2\x8b\x9a\xef\xb8\x80"),
+ ENTITY_DEF("orderof", 8500, "\xe2\x84\xb4"),
+ ENTITY_DEF_HEUR("uacute", 250, "\xc3\xba"),
+ ENTITY_DEF("Barv", 10983, "\xe2\xab\xa7"),
+ ENTITY_DEF("Theta", 920, "\xce\x98"),
+ ENTITY_DEF("leftrightsquigarrow", 8621, "\xe2\x86\xad"),
+ ENTITY_DEF("Atilde", 195, "\xc3\x83"),
+ ENTITY_DEF("cupdot", 8845, "\xe2\x8a\x8d"),
+ ENTITY_DEF("ntriangleright", 8939, "\xe2\x8b\xab"),
+ ENTITY_DEF("measuredangle", 8737, "\xe2\x88\xa1"),
+ ENTITY_DEF("jscr", 119999, "\xf0\x9d\x92\xbf"),
+ ENTITY_DEF("inodot", 305, "\xc4\xb1"),
+ ENTITY_DEF("mopf", 120158, "\xf0\x9d\x95\x9e"),
+ ENTITY_DEF("hkswarow", 10534, "\xe2\xa4\xa6"),
+ ENTITY_DEF("lopar", 10629, "\xe2\xa6\x85"),
+ ENTITY_DEF("thksim", 8764, "\xe2\x88\xbc"),
+ ENTITY_DEF("bkarow", 10509, "\xe2\xa4\x8d"),
+ ENTITY_DEF("rarrfs", 10526, "\xe2\xa4\x9e"),
+ ENTITY_DEF("ntrianglelefteq", 8940, "\xe2\x8b\xac"),
+ ENTITY_DEF("Bscr", 8492, "\xe2\x84\xac"),
+ ENTITY_DEF("topf", 120165, "\xf0\x9d\x95\xa5"),
+ ENTITY_DEF("Uacute", 218, "\xc3\x9a"),
+ ENTITY_DEF("lap", 10885, "\xe2\xaa\x85"),
+ ENTITY_DEF("djcy", 1106, "\xd1\x92"),
+ ENTITY_DEF("bopf", 120147, "\xf0\x9d\x95\x93"),
+ ENTITY_DEF("empty", 8709, "\xe2\x88\x85"),
+ ENTITY_DEF("LeftAngleBracket", 10216, "\xe2\x9f\xa8"),
+ ENTITY_DEF("Imacr", 298, "\xc4\xaa"),
+ ENTITY_DEF("ltcir", 10873, "\xe2\xa9\xb9"),
+ ENTITY_DEF("trisb", 10701, "\xe2\xa7\x8d"),
+ ENTITY_DEF("gjcy", 1107, "\xd1\x93"),
+ ENTITY_DEF("pr", 8826, "\xe2\x89\xba"),
+ ENTITY_DEF("Mu", 924, "\xce\x9c"),
+ ENTITY_DEF("ogon", 731, "\xcb\x9b"),
+ ENTITY_DEF("pertenk", 8241, "\xe2\x80\xb1"),
+ ENTITY_DEF("plustwo", 10791, "\xe2\xa8\xa7"),
+ ENTITY_DEF("Vfr", 120089, "\xf0\x9d\x94\x99"),
+ ENTITY_DEF("ApplyFunction", 8289, "\xe2\x81\xa1"),
+ ENTITY_DEF("Sub", 8912, "\xe2\x8b\x90"),
+ ENTITY_DEF("DoubleLeftRightArrow", 8660, "\xe2\x87\x94"),
+ ENTITY_DEF("Lmidot", 319, "\xc4\xbf"),
+ ENTITY_DEF("nwarrow", 8598, "\xe2\x86\x96"),
+ ENTITY_DEF("angrtvbd", 10653, "\xe2\xa6\x9d"),
+ ENTITY_DEF("fcy", 1092, "\xd1\x84"),
+ ENTITY_DEF("ltlarr", 10614, "\xe2\xa5\xb6"),
+ ENTITY_DEF("CircleMinus", 8854, "\xe2\x8a\x96"),
+ ENTITY_DEF("angmsdab", 10665, "\xe2\xa6\xa9"),
+ ENTITY_DEF("wedgeq", 8793, "\xe2\x89\x99"),
+ ENTITY_DEF("iogon", 303, "\xc4\xaf"),
+ ENTITY_DEF_HEUR("laquo", 171, "\xc2\xab"),
+ ENTITY_DEF("NestedGreaterGreater", 8811, "\xe2\x89\xab"),
+ ENTITY_DEF("UnionPlus", 8846, "\xe2\x8a\x8e"),
+ ENTITY_DEF("CircleDot", 8857, "\xe2\x8a\x99"),
+ ENTITY_DEF("coloneq", 8788, "\xe2\x89\x94"),
+ ENTITY_DEF("csupe", 10962, "\xe2\xab\x92"),
+ ENTITY_DEF("tcaron", 357, "\xc5\xa5"),
+ ENTITY_DEF("GreaterTilde", 8819, "\xe2\x89\xb3"),
+ ENTITY_DEF("Map", 10501, "\xe2\xa4\x85"),
+ ENTITY_DEF("DoubleLongLeftArrow", 10232, "\xe2\x9f\xb8"),
+ ENTITY_DEF("Uparrow", 8657, "\xe2\x87\x91"),
+ ENTITY_DEF("scy", 1089, "\xd1\x81"),
+ ENTITY_DEF("llarr", 8647, "\xe2\x87\x87"),
+ ENTITY_DEF("rangle", 10217, "\xe2\x9f\xa9"),
+ ENTITY_DEF("sstarf", 8902, "\xe2\x8b\x86"),
+ ENTITY_DEF("InvisibleTimes", 8290, "\xe2\x81\xa2"),
+ ENTITY_DEF("egsdot", 10904, "\xe2\xaa\x98"),
+ ENTITY_DEF("target", 8982, "\xe2\x8c\x96"),
+ ENTITY_DEF("lesges", 10899, "\xe2\xaa\x93"),
+ ENTITY_DEF_HEUR("curren", 164, "\xc2\xa4"),
+ ENTITY_DEF("yopf", 120170, "\xf0\x9d\x95\xaa"),
+ ENTITY_DEF("frac23", 8532, "\xe2\x85\x94"),
+ ENTITY_DEF("NotSucceedsTilde", 8831, "\xe2\x89\xbf\xcc\xb8"),
+ ENTITY_DEF("napprox", 8777, "\xe2\x89\x89"),
+ ENTITY_DEF("odblac", 337, "\xc5\x91"),
+ ENTITY_DEF("gammad", 989, "\xcf\x9d"),
+ ENTITY_DEF("dscr", 119993, "\xf0\x9d\x92\xb9"),
+ ENTITY_DEF("SupersetEqual", 8839, "\xe2\x8a\x87"),
+ ENTITY_DEF("squf", 9642, "\xe2\x96\xaa"),
+ ENTITY_DEF("Because", 8757, "\xe2\x88\xb5"),
+ ENTITY_DEF("sccue", 8829, "\xe2\x89\xbd"),
+ ENTITY_DEF("KHcy", 1061, "\xd0\xa5"),
+ ENTITY_DEF("Wcirc", 372, "\xc5\xb4"),
+ ENTITY_DEF("uparrow", 8593, "\xe2\x86\x91"),
+ ENTITY_DEF("lessgtr", 8822, "\xe2\x89\xb6"),
+ ENTITY_DEF("thickapprox", 8776, "\xe2\x89\x88"),
+ ENTITY_DEF("lbrksld", 10639, "\xe2\xa6\x8f"),
+ ENTITY_DEF_HEUR("oslash", 248, "\xc3\xb8"),
+ ENTITY_DEF("NotCupCap", 8813, "\xe2\x89\xad"),
+ ENTITY_DEF("elinters", 9191, "\xe2\x8f\xa7"),
+ ENTITY_DEF("Assign", 8788, "\xe2\x89\x94"),
+ ENTITY_DEF("ClockwiseContourIntegral", 8754, "\xe2\x88\xb2"),
+ ENTITY_DEF("lfisht", 10620, "\xe2\xa5\xbc"),
+ ENTITY_DEF("DownArrow", 8595, "\xe2\x86\x93"),
+ ENTITY_DEF("Zdot", 379, "\xc5\xbb"),
+ ENTITY_DEF("xscr", 120013, "\xf0\x9d\x93\x8d"),
+ ENTITY_DEF("DiacriticalGrave", 96, "\x60"),
+ ENTITY_DEF("DoubleLongLeftRightArrow", 10234, "\xe2\x9f\xba"),
+ ENTITY_DEF("angle", 8736, "\xe2\x88\xa0"),
+ ENTITY_DEF("race", 8765, "\xe2\x88\xbd\xcc\xb1"),
+ ENTITY_DEF("Ascr", 119964, "\xf0\x9d\x92\x9c"),
+ ENTITY_DEF("Xscr", 119987, "\xf0\x9d\x92\xb3"),
+ ENTITY_DEF_HEUR("acirc", 226, "\xc3\xa2"),
+ ENTITY_DEF("otimesas", 10806, "\xe2\xa8\xb6"),
+ ENTITY_DEF("gscr", 8458, "\xe2\x84\x8a"),
+ ENTITY_DEF("gcy", 1075, "\xd0\xb3"),
+ ENTITY_DEF("angmsdag", 10670, "\xe2\xa6\xae"),
+ ENTITY_DEF("tshcy", 1115, "\xd1\x9b"),
+ ENTITY_DEF("Acy", 1040, "\xd0\x90"),
+ ENTITY_DEF("NotGreaterLess", 8825, "\xe2\x89\xb9"),
+ ENTITY_DEF("dtdot", 8945, "\xe2\x8b\xb1"),
+ ENTITY_DEF_HEUR("quot", 34, "\x22"),
+ ENTITY_DEF_HEUR("micro", 181, "\xc2\xb5"),
+ ENTITY_DEF("simplus", 10788, "\xe2\xa8\xa4"),
+ ENTITY_DEF("nsupseteq", 8841, "\xe2\x8a\x89"),
+ ENTITY_DEF("Ufr", 120088, "\xf0\x9d\x94\x98"),
+ ENTITY_DEF("Pr", 10939, "\xe2\xaa\xbb"),
+ ENTITY_DEF("napid", 8779, "\xe2\x89\x8b\xcc\xb8"),
+ ENTITY_DEF("rceil", 8969, "\xe2\x8c\x89"),
+ ENTITY_DEF("boxtimes", 8864, "\xe2\x8a\xa0"),
+ ENTITY_DEF("erarr", 10609, "\xe2\xa5\xb1"),
+ ENTITY_DEF("downdownarrows", 8650, "\xe2\x87\x8a"),
+ ENTITY_DEF("Kfr", 120078, "\xf0\x9d\x94\x8e"),
+ ENTITY_DEF("mho", 8487, "\xe2\x84\xa7"),
+ ENTITY_DEF("scpolint", 10771, "\xe2\xa8\x93"),
+ ENTITY_DEF("vArr", 8661, "\xe2\x87\x95"),
+ ENTITY_DEF("Ccaron", 268, "\xc4\x8c"),
+ ENTITY_DEF("NotRightTriangle", 8939, "\xe2\x8b\xab"),
+ ENTITY_DEF("topbot", 9014, "\xe2\x8c\xb6"),
+ ENTITY_DEF("qopf", 120162, "\xf0\x9d\x95\xa2"),
+ ENTITY_DEF("eogon", 281, "\xc4\x99"),
+ ENTITY_DEF("luruhar", 10598, "\xe2\xa5\xa6"),
+ ENTITY_DEF("gtdot", 8919, "\xe2\x8b\x97"),
+ ENTITY_DEF("Egrave", 200, "\xc3\x88"),
+ ENTITY_DEF("roplus", 10798, "\xe2\xa8\xae"),
+ ENTITY_DEF("Intersection", 8898, "\xe2\x8b\x82"),
+ ENTITY_DEF("Uarr", 8607, "\xe2\x86\x9f"),
+ ENTITY_DEF("dcy", 1076, "\xd0\xb4"),
+ ENTITY_DEF("boxvl", 9508, "\xe2\x94\xa4"),
+ ENTITY_DEF("RightArrowBar", 8677, "\xe2\x87\xa5"),
+ ENTITY_DEF_HEUR("yuml", 255, "\xc3\xbf"),
+ ENTITY_DEF("parallel", 8741, "\xe2\x88\xa5"),
+ ENTITY_DEF("succneqq", 10934, "\xe2\xaa\xb6"),
+ ENTITY_DEF("bemptyv", 10672, "\xe2\xa6\xb0"),
+ ENTITY_DEF("starf", 9733, "\xe2\x98\x85"),
+ ENTITY_DEF("OverBar", 8254, "\xe2\x80\xbe"),
+ ENTITY_DEF("Alpha", 913, "\xce\x91"),
+ ENTITY_DEF("LeftUpVectorBar", 10584, "\xe2\xa5\x98"),
+ ENTITY_DEF("ufr", 120114, "\xf0\x9d\x94\xb2"),
+ ENTITY_DEF("swarhk", 10534, "\xe2\xa4\xa6"),
+ ENTITY_DEF("GreaterEqualLess", 8923, "\xe2\x8b\x9b"),
+ ENTITY_DEF("sscr", 120008, "\xf0\x9d\x93\x88"),
+ ENTITY_DEF("Pi", 928, "\xce\xa0"),
+ ENTITY_DEF("boxh", 9472, "\xe2\x94\x80"),
+ ENTITY_DEF("frac16", 8537, "\xe2\x85\x99"),
+ ENTITY_DEF("lbrack", 91, "\x5b"),
+ ENTITY_DEF("vert", 124, "\x7c"),
+ ENTITY_DEF("precneqq", 10933, "\xe2\xaa\xb5"),
+ ENTITY_DEF("NotGreaterSlantEqual", 10878, "\xe2\xa9\xbe\xcc\xb8"),
+ ENTITY_DEF("Omega", 937, "\xce\xa9"),
+ ENTITY_DEF("uarr", 8593, "\xe2\x86\x91"),
+ ENTITY_DEF("boxVr", 9567, "\xe2\x95\x9f"),
+ ENTITY_DEF("ruluhar", 10600, "\xe2\xa5\xa8"),
+ ENTITY_DEF("ShortLeftArrow", 8592, "\xe2\x86\x90"),
+ ENTITY_DEF("Qfr", 120084, "\xf0\x9d\x94\x94"),
+ ENTITY_DEF("olt", 10688, "\xe2\xa7\x80"),
+ ENTITY_DEF("nequiv", 8802, "\xe2\x89\xa2"),
+ ENTITY_DEF("fscr", 119995, "\xf0\x9d\x92\xbb"),
+ ENTITY_DEF("rarrhk", 8618, "\xe2\x86\xaa"),
+ ENTITY_DEF("nsqsupe", 8931, "\xe2\x8b\xa3"),
+ ENTITY_DEF("nsubseteq", 8840, "\xe2\x8a\x88"),
+ ENTITY_DEF("numero", 8470, "\xe2\x84\x96"),
+ ENTITY_DEF("emsp14", 8197, "\xe2\x80\x85"),
+ ENTITY_DEF("gl", 8823, "\xe2\x89\xb7"),
+ ENTITY_DEF("ocirc", 244, "\xc3\xb4"),
+ ENTITY_DEF("weierp", 8472, "\xe2\x84\x98"),
+ ENTITY_DEF("boxvL", 9569, "\xe2\x95\xa1"),
+ ENTITY_DEF("RightArrowLeftArrow", 8644, "\xe2\x87\x84"),
+ ENTITY_DEF("Precedes", 8826, "\xe2\x89\xba"),
+ ENTITY_DEF("RightVector", 8640, "\xe2\x87\x80"),
+ ENTITY_DEF("xcup", 8899, "\xe2\x8b\x83"),
+ ENTITY_DEF("angmsdad", 10667, "\xe2\xa6\xab"),
+ ENTITY_DEF("gtrsim", 8819, "\xe2\x89\xb3"),
+ ENTITY_DEF("natural", 9838, "\xe2\x99\xae"),
+ ENTITY_DEF("nVdash", 8878, "\xe2\x8a\xae"),
+ ENTITY_DEF("RightTriangleEqual", 8885, "\xe2\x8a\xb5"),
+ ENTITY_DEF("dscy", 1109, "\xd1\x95"),
+ ENTITY_DEF("leftthreetimes", 8907, "\xe2\x8b\x8b"),
+ ENTITY_DEF("prsim", 8830, "\xe2\x89\xbe"),
+ ENTITY_DEF("Bcy", 1041, "\xd0\x91"),
+ ENTITY_DEF("Chi", 935, "\xce\xa7"),
+ ENTITY_DEF("timesb", 8864, "\xe2\x8a\xa0"),
+ ENTITY_DEF("Del", 8711, "\xe2\x88\x87"),
+ ENTITY_DEF("lmidot", 320, "\xc5\x80"),
+ ENTITY_DEF("RightDownVector", 8642, "\xe2\x87\x82"),
+ ENTITY_DEF("simdot", 10858, "\xe2\xa9\xaa"),
+ ENTITY_DEF("FilledVerySmallSquare", 9642, "\xe2\x96\xaa"),
+ ENTITY_DEF("NotLessSlantEqual", 10877, "\xe2\xa9\xbd\xcc\xb8"),
+ ENTITY_DEF("SucceedsTilde", 8831, "\xe2\x89\xbf"),
+ ENTITY_DEF("duarr", 8693, "\xe2\x87\xb5"),
+ ENTITY_DEF("apE", 10864, "\xe2\xa9\xb0"),
+ ENTITY_DEF("odot", 8857, "\xe2\x8a\x99"),
+ ENTITY_DEF("mldr", 8230, "\xe2\x80\xa6"),
+ ENTITY_DEF("Uarrocir", 10569, "\xe2\xa5\x89"),
+ ENTITY_DEF("nLl", 8920, "\xe2\x8b\x98\xcc\xb8"),
+ ENTITY_DEF("rarrpl", 10565, "\xe2\xa5\x85"),
+ ENTITY_DEF("cir", 9675, "\xe2\x97\x8b"),
+ ENTITY_DEF("blk14", 9617, "\xe2\x96\x91"),
+ ENTITY_DEF("VerticalLine", 124, "\x7c"),
+ ENTITY_DEF("jcy", 1081, "\xd0\xb9"),
+ ENTITY_DEF("filig", 64257, "\xef\xac\x81"),
+ ENTITY_DEF("LongRightArrow", 10230, "\xe2\x9f\xb6"),
+ ENTITY_DEF("beta", 946, "\xce\xb2"),
+ ENTITY_DEF("ccupssm", 10832, "\xe2\xa9\x90"),
+ ENTITY_DEF("supsub", 10964, "\xe2\xab\x94"),
+ ENTITY_DEF("spar", 8741, "\xe2\x88\xa5"),
+ ENTITY_DEF("Tstrok", 358, "\xc5\xa6"),
+ ENTITY_DEF("isinv", 8712, "\xe2\x88\x88"),
+ ENTITY_DEF("rightsquigarrow", 8605, "\xe2\x86\x9d"),
+ ENTITY_DEF("Diamond", 8900, "\xe2\x8b\x84"),
+ ENTITY_DEF("curlyeqsucc", 8927, "\xe2\x8b\x9f"),
+ ENTITY_DEF("ijlig", 307, "\xc4\xb3"),
+ ENTITY_DEF("puncsp", 8200, "\xe2\x80\x88"),
+ ENTITY_DEF("hamilt", 8459, "\xe2\x84\x8b"),
+ ENTITY_DEF("mapstoleft", 8612, "\xe2\x86\xa4"),
+ ENTITY_DEF("Copf", 8450, "\xe2\x84\x82"),
+ ENTITY_DEF("prnsim", 8936, "\xe2\x8b\xa8"),
+ ENTITY_DEF("DotDot", 8412, "\xe2\x83\x9c"),
+ ENTITY_DEF("lobrk", 10214, "\xe2\x9f\xa6"),
+ ENTITY_DEF("twoheadrightarrow", 8608, "\xe2\x86\xa0"),
+ ENTITY_DEF("ngE", 8807, "\xe2\x89\xa7\xcc\xb8"),
+ ENTITY_DEF("cylcty", 9005, "\xe2\x8c\xad"),
+ ENTITY_DEF("sube", 8838, "\xe2\x8a\x86"),
+ ENTITY_DEF("NotEqualTilde", 8770, "\xe2\x89\x82\xcc\xb8"),
+ ENTITY_DEF_HEUR("Yuml", 376, "\xc5\xb8"),
+ ENTITY_DEF("comp", 8705, "\xe2\x88\x81"),
+ ENTITY_DEF("dotminus", 8760, "\xe2\x88\xb8"),
+ ENTITY_DEF("crarr", 8629, "\xe2\x86\xb5"),
+ ENTITY_DEF("imped", 437, "\xc6\xb5"),
+ ENTITY_DEF("barwedge", 8965, "\xe2\x8c\x85"),
+ ENTITY_DEF("harrcir", 10568, "\xe2\xa5\x88")};
+
+class html_entities_storage {
+ ankerl::unordered_dense::map<std::string_view, html_entity_def> entity_by_name;
+ ankerl::unordered_dense::map<std::string_view, html_entity_def> entity_by_name_heur;
+ ankerl::unordered_dense::map<unsigned, html_entity_def> entity_by_id;
+
+public:
+ html_entities_storage()
+ {
+ auto nelts = G_N_ELEMENTS(html_entities_array);
+ entity_by_name.reserve(nelts);
+ entity_by_id.reserve(nelts);
+
+ for (const auto &e: html_entities_array) {
+ entity_by_name[e.name] = e;
+ entity_by_id[e.code] = e;
+
+ if (e.allow_heuristic) {
+ entity_by_name_heur[e.name] = e;
+ }
+ }
+ }
+
+ auto by_name(std::string_view name, bool use_heuristic = false) const -> const html_entity_def *
+ {
+ const decltype(entity_by_name) *htb;
+
+ if (use_heuristic) {
+ htb = &entity_by_name_heur;
+ }
+ else {
+ htb = &entity_by_name;
+ }
+ auto it = htb->find(name);
+
+ if (it != htb->end()) {
+ return &(it->second);
+ }
+
+ return nullptr;
+ }
+
+ auto by_id(int id) const -> const html_entity_def *
+ {
+ auto it = entity_by_id.find(id);
+ if (it != entity_by_id.end()) {
+ return &(it->second);
+ }
+
+ return nullptr;
+ }
+};
+
+static const html_entities_storage html_entities_defs;
+
+std::size_t
+decode_html_entitles_inplace(char *s, std::size_t len, bool norm_spaces)
+{
+ /*
+ * t - tortoise (destination ptr)
+ * h - hare (source ptr)
+ * e - begin of entity
+ */
+ char *t = s, *h = s, *e = s;
+ const gchar *end;
+ bool seen_hash = false, seen_hex = false;
+ enum {
+ do_undefined,
+ do_digits_only,
+ do_mixed,
+ } seen_digit_only;
+ enum class parser_state {
+ normal_content,
+ ampersand,
+ skip_multi_spaces,
+ skip_start_spaces,
+ } state = parser_state::normal_content;
+
+ end = s + len;
+
+ auto replace_named_entity = [&](const char *entity, std::size_t len) -> bool {
+ const auto *entity_def = html_entities_defs.by_name({entity,
+ (std::size_t)(h - entity)},
+ false);
+
+ auto replace_entity = [&]() -> void {
+ auto l = strlen(entity_def->replacement);
+ memcpy(t, entity_def->replacement, l);
+ t += l;
+ };
+
+ if (entity_def) {
+ replace_entity();
+ return true;
+ }
+ else {
+ /* Try heuristic */
+ auto heuristic_lookup_func = [&](std::size_t lookup_len) -> bool {
+ if (!entity_def && h - e > lookup_len) {
+ entity_def = html_entities_defs.by_name({entity, lookup_len}, true);
+
+ if (entity_def) {
+ replace_entity();
+ /* Adjust h back */
+ h = e + lookup_len;
+
+ return true;
+ }
+
+ entity_def = nullptr;
+ }
+
+ return false;
+ };
+
+ heuristic_lookup_func(5);
+ heuristic_lookup_func(4);
+ heuristic_lookup_func(3);
+ heuristic_lookup_func(2);
+
+ /* Leave undecoded */
+ if (!entity_def && (end - t > h - e)) {
+ memmove(t, e, h - e);
+ t += h - e;
+ }
+ else if (entity_def) {
+ return true;
+ }
+ }
+
+ return false;
+ };
+
+ /* Strtoul works merely for 0 terminated strings, so leave it alone... */
+ auto dec_to_int = [](const char *str, std::size_t len) -> std::optional<int> {
+ int n = 0;
+
+ /* Avoid INT_MIN overflow by moving to negative numbers */
+ while (len > 0 && g_ascii_isdigit(*str)) {
+ n = 10 * n - (*str++ - '0');
+ len--;
+ }
+
+ if (len == 0) {
+ return -(n);
+ }
+ else {
+ return std::nullopt;
+ }
+ };
+ auto hex_to_int = [](const char *str, std::size_t len) -> std::optional<int> {
+ int n = 0;
+
+ /* Avoid INT_MIN overflow by moving to negative numbers */
+ while (len > 0 && g_ascii_isxdigit(*str)) {
+ if (*str <= 0x39) {
+ n = 16 * n - (*str++ - '0');
+ }
+ else {
+ n = 16 * n - (((*str++) | ' ') - 'a' + 10);
+ }
+ len--;
+ }
+
+ if (len == 0) {
+ return -(n);
+ }
+ else {
+ return std::nullopt;
+ }
+ };
+ auto oct_to_int = [](const char *str, std::size_t len) -> std::optional<int> {
+ int n = 0;
+
+ /* Avoid INT_MIN overflow by moving to negative numbers */
+ while (len > 0 && g_ascii_isdigit(*str)) {
+ if (*str > '7') {
+ break;
+ }
+ else {
+ n = 8 * n - (*str++ - '0');
+ }
+ len--;
+ }
+
+ if (len == 0) {
+ return -(n);
+ }
+ else {
+ return std::nullopt;
+ }
+ };
+
+ auto replace_numeric_entity = [&](const char *entity) -> bool {
+ UChar32 uc;
+ std::optional<int> maybe_num;
+
+ if (*entity == 'x' || *entity == 'X') {
+ maybe_num = hex_to_int(entity + 1, h - (entity + 1));
+ }
+ else if (*entity == 'o' || *entity == 'O') {
+ maybe_num = oct_to_int(entity + 1, h - (entity + 1));
+ }
+ else {
+ maybe_num = dec_to_int(entity, h - entity);
+ }
+
+ if (!maybe_num) {
+ /* Skip undecoded */
+ if (end - t >= h - e) {
+ memmove(t, e, h - e);
+ t += h - e;
+ }
+
+ return false;
+ }
+ else {
+ uc = maybe_num.value();
+ /* Search for a replacement */
+ const auto *entity_def = html_entities_defs.by_id(uc);
+
+ if (entity_def) {
+ auto rep_len = strlen(entity_def->replacement);
+
+ if (end - t >= rep_len) {
+ memcpy(t, entity_def->replacement,
+ rep_len);
+ t += rep_len;
+ }
+
+ return true;
+ }
+ else {
+ /* Unicode point */
+ goffset off = t - s;
+ UBool is_error = 0;
+
+ if (uc > 0) {
+ U8_APPEND((std::uint8_t *) s, off, len, uc, is_error);
+
+ if (!is_error) {
+ t = s + off;
+ }
+ else if (end - t > 3) {
+ /* Not printable code point replace with 0xFFFD */
+ *t++ = '\357';
+ *t++ = '\277';
+ *t++ = '\275';
+
+ return true;
+ }
+ }
+ else if (end - t > 3) {
+ /* Not printable code point replace with 0xFFFD */
+ *t++ = '\357';
+ *t++ = '\277';
+ *t++ = '\275';
+ }
+ }
+
+ return true;
+ }
+
+ return false;
+ };
+
+ auto replace_entity = [&]() -> bool {
+ if (e + 1 < end) {
+ const auto *entity_start = e + 1;
+
+ if (*entity_start != '#') {
+ return replace_named_entity(entity_start, (h - entity_start));
+ }
+ else if (entity_start + 1 < h) {
+ return replace_numeric_entity(entity_start + 1);
+ }
+ }
+
+ return false;
+ };
+
+ if (norm_spaces && g_ascii_isspace(*h)) {
+ state = parser_state::skip_start_spaces;
+ }
+
+ while (h - s < len && t <= h) {
+ switch (state) {
+ case parser_state::normal_content:
+ if (*h == '&') {
+ state = parser_state::ampersand;
+ seen_hash = false;
+ seen_hex = false;
+ seen_digit_only = do_undefined;
+ e = h;
+ h++;
+ continue;
+ }
+ else {
+ if (norm_spaces && g_ascii_isspace(*h)) {
+ *t++ = ' ';
+ state = parser_state::skip_multi_spaces;
+ h++;
+ }
+ else {
+ *t++ = *h++;
+ }
+ }
+ break;
+ case parser_state::ampersand:
+ if ((*h == ';' || g_ascii_isspace(*h)) && h > e) {
+ replace_entity();
+ state = parser_state::normal_content;
+
+ if (g_ascii_isspace(*h)) {
+ /* Avoid increase of h */
+ continue;
+ }
+ }
+ else if (*h == '&') {
+ /* Previous `&` was bogus */
+ state = parser_state::ampersand;
+
+ if (end - t > h - e) {
+ memmove(t, e, h - e);
+ t += h - e;
+ }
+
+ e = h;
+ }
+ else if (*h == '#') {
+ seen_hash = true;
+
+ if (h + 1 < end && h[1] == 'x') {
+ seen_hex = true;
+ /* Skip one more character */
+ h++;
+ }
+ }
+ else if (seen_digit_only != do_mixed &&
+ (g_ascii_isdigit(*h) || (seen_hex && g_ascii_isxdigit(*h)))) {
+ seen_digit_only = do_digits_only;
+ }
+ else {
+ if (seen_digit_only == do_digits_only && seen_hash && h > e) {
+ /* We have seen some digits, so we can try to decode, eh */
+ /* Fuck retarded email clients... */
+ replace_entity();
+ state = parser_state::normal_content;
+ continue;
+ }
+
+ seen_digit_only = do_mixed;
+ }
+
+ h++;
+
+ break;
+ case parser_state::skip_multi_spaces:
+ if (g_ascii_isspace(*h)) {
+ h++;
+ }
+ else {
+ state = parser_state::normal_content;
+ }
+ break;
+ case parser_state::skip_start_spaces:
+ if (g_ascii_isspace(*h)) {
+ h++;
+ }
+ else {
+ state = parser_state::normal_content;
+ }
+ break;
+ }
+ }
+
+ /* Leftover */
+ if (state == parser_state::ampersand && h > e) {
+ /* Unfinished entity, copy as is */
+ if (replace_entity()) {
+ /* To follow FSM semantics */
+ h++;
+ }
+ else {
+ h = e; /* Include the last & */
+ }
+
+ /* Leftover after replacement */
+ if (h < end && t + (end - h) <= end) {
+ memmove(t, h, end - h);
+ t += end - h;
+ }
+ }
+
+ if (norm_spaces) {
+ bool seen_spaces = false;
+
+ while (t > s && g_ascii_isspace(*(t - 1))) {
+ seen_spaces = true;
+ t--;
+ }
+
+ if (seen_spaces) {
+ *t++ = ' ';
+ }
+ }
+
+ return (t - s);
+}
+
+auto decode_html_entitles_inplace(std::string &st) -> void
+{
+ auto nlen = decode_html_entitles_inplace(st.data(), st.size());
+ st.resize(nlen);
+}
+
+TEST_SUITE("html entities")
+{
+
+ TEST_CASE("html entities decode")
+ {
+ std::vector<std::pair<std::string, std::string>> cases{
+ {"", ""},
+ {"abc", "abc"},
+ {"abc def", "abc def"},
+ {"abc def", "abc def"},
+ {"abc\ndef", "abc def"},
+ {"abc\n \tdef", "abc def"},
+ {" abc def ", "abc def "},
+ {"FOO&gt;BAR", "FOO>BAR"},
+ {"FOO&gtBAR", "FOO>BAR"},
+ {"FOO&gt BAR", "FOO> BAR"},
+ {"FOO&gt;;;BAR", "FOO>;;BAR"},
+ {"I'm &notit;", "I'm ¬it;"},
+ {"I'm &notin;", "I'm ∉"},
+ {"FOO& BAR", "FOO& BAR"},
+ {"FOO&&&&gt;BAR", "FOO&&&>BAR"},
+ {"FOO&#41;BAR", "FOO)BAR"},
+ {"FOO&#x41;BAR", "FOOABAR"},
+ {"FOO&#X41;BAR", "FOOABAR"},
+ {"FOO&#BAR", "FOO&#BAR"},
+ {"FOO&#ZOO", "FOO&#ZOO"},
+ {"FOO&#xBAR", "FOOºR"},
+ {"FOO&#x41BAR", "FOO䆺R"},
+ {"FOO&#x0000;ZOO", "FOO\uFFFDZOO"},
+ {"FOO&#x0081;ZOO", "FOO\u0081ZOO"},
+ {"FOO&#xD800;ZOO", "FOO\uFFFDZOO"},
+ {"FOO&#xFFFFFF;ZOO", "FOO\uFFFDZOO"},
+ {"ZZ&pound_id=23", "ZZ£_id=23"},
+ {"ZZ&prod_id=23", "ZZ&prod_id=23"},
+ {"ZZ&gt", "ZZ>"},
+ {"ZZ&", "ZZ&"},
+ {"ZZ&AElig=", "ZZÆ="},
+ };
+
+ for (const auto &c: cases) {
+ SUBCASE(("decode entities: " + c.first).c_str())
+ {
+ auto *cpy = new char[c.first.size()];
+ memcpy(cpy, c.first.data(), c.first.size());
+ auto nlen = decode_html_entitles_inplace(cpy, c.first.size(), true);
+ CHECK(std::string{cpy, nlen} == c.second);
+ delete[] cpy;
+ }
+ }
+ }
+}
+
+}// namespace rspamd::html \ No newline at end of file
diff --git a/src/libserver/html/html_entities.hxx b/src/libserver/html/html_entities.hxx
new file mode 100644
index 0000000..fc1f7cc
--- /dev/null
+++ b/src/libserver/html/html_entities.hxx
@@ -0,0 +1,31 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef RSPAMD_HTML_ENTITIES_H
+#define RSPAMD_HTML_ENTITIES_H
+#pragma once
+
+#include <utility>
+#include <string>
+
+namespace rspamd::html {
+
+auto decode_html_entitles_inplace(char *s, std::size_t len, bool norm_spaces = false) -> std::size_t;
+auto decode_html_entitles_inplace(std::string &st) -> void;
+
+}// namespace rspamd::html
+
+#endif
diff --git a/src/libserver/html/html_tag.hxx b/src/libserver/html/html_tag.hxx
new file mode 100644
index 0000000..309d761
--- /dev/null
+++ b/src/libserver/html/html_tag.hxx
@@ -0,0 +1,159 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef RSPAMD_HTML_TAG_HXX
+#define RSPAMD_HTML_TAG_HXX
+#pragma once
+
+#include <utility>
+#include <string_view>
+#include <variant>
+#include <vector>
+#include <optional>
+#include <cstdint>
+
+#include "html_tags.h"
+
+struct rspamd_url;
+struct html_image;
+
+namespace rspamd::html {
+
+struct html_content; /* Forward declaration */
+
+enum class html_component_type : std::uint8_t {
+ RSPAMD_HTML_COMPONENT_NAME = 0,
+ RSPAMD_HTML_COMPONENT_HREF,
+ RSPAMD_HTML_COMPONENT_COLOR,
+ RSPAMD_HTML_COMPONENT_BGCOLOR,
+ RSPAMD_HTML_COMPONENT_STYLE,
+ RSPAMD_HTML_COMPONENT_CLASS,
+ RSPAMD_HTML_COMPONENT_WIDTH,
+ RSPAMD_HTML_COMPONENT_HEIGHT,
+ RSPAMD_HTML_COMPONENT_SIZE,
+ RSPAMD_HTML_COMPONENT_REL,
+ RSPAMD_HTML_COMPONENT_ALT,
+ RSPAMD_HTML_COMPONENT_ID,
+ RSPAMD_HTML_COMPONENT_HIDDEN,
+};
+
+/* Public tags flags */
+/* XML tag */
+#define FL_XML (1u << CM_USER_SHIFT)
+/* Fully closed tag (e.g. <a attrs />) */
+#define FL_CLOSED (1 << (CM_USER_SHIFT + 1))
+#define FL_BROKEN (1 << (CM_USER_SHIFT + 2))
+#define FL_IGNORE (1 << (CM_USER_SHIFT + 3))
+#define FL_BLOCK (1 << (CM_USER_SHIFT + 4))
+#define FL_HREF (1 << (CM_USER_SHIFT + 5))
+#define FL_COMMENT (1 << (CM_USER_SHIFT + 6))
+#define FL_VIRTUAL (1 << (CM_USER_SHIFT + 7))
+
+/**
+ * Returns component type from a string
+ * @param st
+ * @return
+ */
+auto html_component_from_string(const std::string_view &st) -> std::optional<html_component_type>;
+
+using html_tag_extra_t = std::variant<std::monostate, struct rspamd_url *, struct html_image *>;
+struct html_tag_component {
+ html_component_type type;
+ std::string_view value;
+
+ html_tag_component(html_component_type type, std::string_view value)
+ : type(type), value(value)
+ {
+ }
+};
+
+/* Pairing closing tag representation */
+struct html_closing_tag {
+ int start = -1;
+ int end = -1;
+
+ auto clear() -> void
+ {
+ start = end = -1;
+ }
+};
+
+struct html_tag {
+ unsigned int tag_start = 0;
+ unsigned int content_offset = 0;
+ std::uint32_t flags = 0;
+ std::int32_t id = Tag_UNKNOWN;
+ html_closing_tag closing;
+
+ std::vector<html_tag_component> components;
+
+ html_tag_extra_t extra;
+ mutable struct html_block *block = nullptr;
+ std::vector<struct html_tag *> children;
+ struct html_tag *parent;
+
+ auto find_component(html_component_type what) const -> std::optional<std::string_view>
+ {
+ for (const auto &comp: components) {
+ if (comp.type == what) {
+ return comp.value;
+ }
+ }
+
+ return std::nullopt;
+ }
+
+ auto find_component(std::optional<html_component_type> what) const -> std::optional<std::string_view>
+ {
+ if (what) {
+ return find_component(what.value());
+ }
+
+ return std::nullopt;
+ }
+
+ auto clear(void) -> void
+ {
+ id = Tag_UNKNOWN;
+ tag_start = content_offset = 0;
+ extra = std::monostate{};
+ components.clear();
+ flags = 0;
+ block = nullptr;
+ children.clear();
+ closing.clear();
+ }
+
+ constexpr auto get_content_length() const -> std::size_t
+ {
+ if (flags & (FL_IGNORE | CM_HEAD)) {
+ return 0;
+ }
+ if (closing.start > content_offset) {
+ return closing.start - content_offset;
+ }
+
+ return 0;
+ }
+
+ auto get_content(const struct html_content *hc) const -> std::string_view;
+};
+
+static_assert(CM_USER_SHIFT + 7 < sizeof(html_tag::flags) * NBBY);
+
+}// namespace rspamd::html
+
+#endif//RSPAMD_HTML_TAG_HXX
diff --git a/src/libserver/html/html_tag_defs.hxx b/src/libserver/html/html_tag_defs.hxx
new file mode 100644
index 0000000..647f7c3
--- /dev/null
+++ b/src/libserver/html/html_tag_defs.hxx
@@ -0,0 +1,194 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef RSPAMD_HTML_TAG_DEFS_HXX
+#define RSPAMD_HTML_TAG_DEFS_HXX
+
+#include "config.h"
+#include "html_tags.h"
+#include "libutil/cxx/util.hxx"
+
+#include <string>
+#include "contrib/ankerl/unordered_dense.h"
+
+namespace rspamd::html {
+
+struct html_tag_def {
+ std::string name;
+ tag_id_t id;
+ guint flags;
+};
+
+#define TAG_DEF(id, name, flags) \
+ html_tag_def \
+ { \
+ (name), (id), (flags) \
+ }
+
+static const auto html_tag_defs_array = rspamd::array_of(
+ /* W3C defined elements */
+ TAG_DEF(Tag_A, "a", FL_HREF),
+ TAG_DEF(Tag_ABBR, "abbr", (CM_INLINE)),
+ TAG_DEF(Tag_ACRONYM, "acronym", (CM_INLINE)),
+ TAG_DEF(Tag_ADDRESS, "address", (CM_BLOCK)),
+ TAG_DEF(Tag_APPLET, "applet", (CM_IMG | CM_INLINE | CM_PARAM)),
+ TAG_DEF(Tag_AREA, "area", (CM_BLOCK | CM_EMPTY | FL_HREF)),
+ TAG_DEF(Tag_B, "b", (CM_INLINE | FL_BLOCK)),
+ TAG_DEF(Tag_BASE, "base", (CM_HEAD | CM_EMPTY)),
+ TAG_DEF(Tag_BASEFONT, "basefont", (CM_INLINE | CM_EMPTY)),
+ TAG_DEF(Tag_BDO, "bdo", (CM_INLINE)),
+ TAG_DEF(Tag_BIG, "big", (CM_INLINE)),
+ TAG_DEF(Tag_BLOCKQUOTE, "blockquote", (CM_BLOCK)),
+ TAG_DEF(Tag_BODY, "body", (CM_HTML | CM_OPT | CM_OMITST | CM_UNIQUE | FL_BLOCK)),
+ TAG_DEF(Tag_BR, "br", (CM_INLINE | CM_EMPTY)),
+ TAG_DEF(Tag_BUTTON, "button", (CM_INLINE | FL_BLOCK)),
+ TAG_DEF(Tag_CAPTION, "caption", (CM_TABLE)),
+ TAG_DEF(Tag_CENTER, "center", (CM_BLOCK)),
+ TAG_DEF(Tag_CITE, "cite", (CM_INLINE)),
+ TAG_DEF(Tag_CODE, "code", (CM_INLINE)),
+ TAG_DEF(Tag_COL, "col", (CM_TABLE | CM_EMPTY)),
+ TAG_DEF(Tag_COLGROUP, "colgroup", (CM_TABLE | CM_OPT)),
+ TAG_DEF(Tag_DD, "dd", (CM_DEFLIST | CM_OPT | CM_NO_INDENT)),
+ TAG_DEF(Tag_DEL, "del", (CM_INLINE | CM_BLOCK)),
+ TAG_DEF(Tag_DFN, "dfn", (CM_INLINE)),
+ TAG_DEF(Tag_DIR, "dir", (CM_BLOCK)),
+ TAG_DEF(Tag_DIV, "div", (CM_BLOCK | FL_BLOCK)),
+ TAG_DEF(Tag_DL, "dl", (CM_BLOCK | FL_BLOCK)),
+ TAG_DEF(Tag_DT, "dt", (CM_DEFLIST | CM_OPT | CM_NO_INDENT)),
+ TAG_DEF(Tag_EM, "em", (CM_INLINE)),
+ TAG_DEF(Tag_FIELDSET, "fieldset", (CM_BLOCK)),
+ TAG_DEF(Tag_FONT, "font", (FL_BLOCK)),
+ TAG_DEF(Tag_FORM, "form", (CM_BLOCK | FL_HREF)),
+ TAG_DEF(Tag_FRAME, "frame", (CM_EMPTY | FL_HREF)),
+ TAG_DEF(Tag_FRAMESET, "frameset", (CM_HTML)),
+ TAG_DEF(Tag_H1, "h1", (CM_BLOCK)),
+ TAG_DEF(Tag_H2, "h2", (CM_BLOCK)),
+ TAG_DEF(Tag_H3, "h3", (CM_BLOCK)),
+ TAG_DEF(Tag_H4, "h4", (CM_BLOCK)),
+ TAG_DEF(Tag_H5, "h5", (CM_BLOCK)),
+ TAG_DEF(Tag_H6, "h6", (CM_BLOCK)),
+ TAG_DEF(Tag_HEAD, "head", (CM_HTML | CM_OPT | CM_OMITST | CM_UNIQUE)),
+ TAG_DEF(Tag_HR, "hr", (CM_BLOCK | CM_EMPTY)),
+ TAG_DEF(Tag_HTML, "html", (CM_HTML | CM_OPT | CM_OMITST | CM_UNIQUE)),
+ TAG_DEF(Tag_I, "i", (CM_INLINE)),
+ TAG_DEF(Tag_IFRAME, "iframe", (FL_HREF)),
+ TAG_DEF(Tag_IMG, "img", (CM_INLINE | CM_IMG | CM_EMPTY)),
+ TAG_DEF(Tag_INPUT, "input", (CM_INLINE | CM_IMG | CM_EMPTY)),
+ TAG_DEF(Tag_INS, "ins", (CM_INLINE | CM_BLOCK)),
+ TAG_DEF(Tag_ISINDEX, "isindex", (CM_BLOCK | CM_EMPTY)),
+ TAG_DEF(Tag_KBD, "kbd", (CM_INLINE)),
+ TAG_DEF(Tag_LABEL, "label", (CM_INLINE)),
+ TAG_DEF(Tag_LEGEND, "legend", (CM_INLINE)),
+ TAG_DEF(Tag_LI, "li", (CM_LIST | CM_OPT | CM_NO_INDENT | FL_BLOCK)),
+ TAG_DEF(Tag_LINK, "link", (CM_EMPTY | FL_HREF)),
+ TAG_DEF(Tag_LISTING, "listing", (CM_BLOCK)),
+ TAG_DEF(Tag_MAP, "map", (CM_INLINE | FL_HREF)),
+ TAG_DEF(Tag_MENU, "menu", (CM_BLOCK)),
+ TAG_DEF(Tag_META, "meta", (CM_HEAD | CM_INLINE | CM_EMPTY)),
+ TAG_DEF(Tag_NOFRAMES, "noframes", (CM_BLOCK)),
+ TAG_DEF(Tag_NOSCRIPT, "noscript", (CM_BLOCK | CM_INLINE | CM_RAW)),
+ TAG_DEF(Tag_OBJECT, "object", (CM_HEAD | CM_IMG | CM_INLINE | CM_PARAM)),
+ TAG_DEF(Tag_OL, "ol", (CM_BLOCK | FL_BLOCK)),
+ TAG_DEF(Tag_OPTGROUP, "optgroup", (CM_FIELD | CM_OPT)),
+ TAG_DEF(Tag_OPTION, "option", (CM_FIELD | CM_OPT)),
+ TAG_DEF(Tag_P, "p", (CM_BLOCK | CM_OPT | FL_BLOCK)),
+ TAG_DEF(Tag_PARAM, "param", (CM_INLINE | CM_EMPTY)),
+ TAG_DEF(Tag_PLAINTEXT, "plaintext", (CM_BLOCK)),
+ TAG_DEF(Tag_PRE, "pre", (CM_BLOCK)),
+ TAG_DEF(Tag_Q, "q", (CM_INLINE)),
+ TAG_DEF(Tag_RB, "rb", (CM_INLINE)),
+ TAG_DEF(Tag_RBC, "rbc", (CM_INLINE)),
+ TAG_DEF(Tag_RP, "rp", (CM_INLINE)),
+ TAG_DEF(Tag_RT, "rt", (CM_INLINE)),
+ TAG_DEF(Tag_RTC, "rtc", (CM_INLINE)),
+ TAG_DEF(Tag_RUBY, "ruby", (CM_INLINE)),
+ TAG_DEF(Tag_S, "s", (CM_INLINE)),
+ TAG_DEF(Tag_SAMP, "samp", (CM_INLINE)),
+ TAG_DEF(Tag_SCRIPT, "script", (CM_HEAD | CM_RAW)),
+ TAG_DEF(Tag_SELECT, "select", (CM_INLINE | CM_FIELD)),
+ TAG_DEF(Tag_SMALL, "small", (CM_INLINE)),
+ TAG_DEF(Tag_SPAN, "span", (CM_NO_INDENT | FL_BLOCK)),
+ TAG_DEF(Tag_STRIKE, "strike", (CM_INLINE)),
+ TAG_DEF(Tag_STRONG, "strong", (CM_INLINE)),
+ TAG_DEF(Tag_STYLE, "style", (CM_HEAD | CM_RAW)),
+ TAG_DEF(Tag_SUB, "sub", (CM_INLINE)),
+ TAG_DEF(Tag_SUP, "sup", (CM_INLINE)),
+ TAG_DEF(Tag_TABLE, "table", (CM_BLOCK | FL_BLOCK)),
+ TAG_DEF(Tag_TBODY, "tbody", (CM_TABLE | CM_ROWGRP | CM_OPT | FL_BLOCK)),
+ TAG_DEF(Tag_TD, "td", (CM_ROW | CM_OPT | CM_NO_INDENT | FL_BLOCK)),
+ TAG_DEF(Tag_TEXTAREA, "textarea", (CM_INLINE | CM_FIELD)),
+ TAG_DEF(Tag_TFOOT, "tfoot", (CM_TABLE | CM_ROWGRP | CM_OPT)),
+ TAG_DEF(Tag_TH, "th", (CM_ROW | CM_OPT | CM_NO_INDENT | FL_BLOCK)),
+ TAG_DEF(Tag_THEAD, "thead", (CM_TABLE | CM_ROWGRP | CM_OPT)),
+ TAG_DEF(Tag_TITLE, "title", (CM_HEAD | CM_UNIQUE)),
+ TAG_DEF(Tag_TR, "tr", (CM_TABLE | CM_OPT | FL_BLOCK)),
+ TAG_DEF(Tag_TT, "tt", (CM_INLINE)),
+ TAG_DEF(Tag_U, "u", (CM_INLINE)),
+ TAG_DEF(Tag_UL, "ul", (CM_BLOCK | FL_BLOCK)),
+ TAG_DEF(Tag_VAR, "var", (CM_INLINE)),
+ TAG_DEF(Tag_XMP, "xmp", (CM_BLOCK)),
+ TAG_DEF(Tag_NEXTID, "nextid", (CM_HEAD | CM_EMPTY)));
+
+class html_tags_storage {
+ ankerl::unordered_dense::map<std::string_view, html_tag_def> tag_by_name;
+ ankerl::unordered_dense::map<tag_id_t, html_tag_def> tag_by_id;
+
+public:
+ html_tags_storage()
+ {
+ tag_by_name.reserve(html_tag_defs_array.size());
+ tag_by_id.reserve(html_tag_defs_array.size());
+
+ for (const auto &t: html_tag_defs_array) {
+ tag_by_name[t.name] = t;
+ tag_by_id[t.id] = t;
+ }
+ }
+
+ auto by_name(std::string_view name) const -> const html_tag_def *
+ {
+ auto it = tag_by_name.find(name);
+
+ if (it != tag_by_name.end()) {
+ return &(it->second);
+ }
+
+ return nullptr;
+ }
+
+ auto by_id(int id) const -> const html_tag_def *
+ {
+ auto it = tag_by_id.find(static_cast<tag_id_t>(id));
+ if (it != tag_by_id.end()) {
+ return &(it->second);
+ }
+
+ return nullptr;
+ }
+
+ auto name_by_id_safe(int id) const -> std::string_view
+ {
+ auto it = tag_by_id.find(static_cast<tag_id_t>(id));
+ if (it != tag_by_id.end()) {
+ return it->second.name;
+ }
+
+ return "unknown";
+ }
+};
+
+}// namespace rspamd::html
+
+#endif//RSPAMD_HTML_TAG_DEFS_HXX
diff --git a/src/libserver/html/html_tags.h b/src/libserver/html/html_tags.h
new file mode 100644
index 0000000..c186314
--- /dev/null
+++ b/src/libserver/html/html_tags.h
@@ -0,0 +1,176 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef SRC_LIBSERVER_HTML_TAGS_H_
+#define SRC_LIBSERVER_HTML_TAGS_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Known HTML tags */
+typedef enum {
+ Tag_UNKNOWN = 0, /**< Unknown tag! */
+ Tag_A, /**< A */
+ Tag_ABBR, /**< ABBR */
+ Tag_ACRONYM, /**< ACRONYM */
+ Tag_ADDRESS, /**< ADDRESS */
+ Tag_APPLET, /**< APPLET */
+ Tag_AREA, /**< AREA */
+ Tag_B, /**< B */
+ Tag_BASE, /**< BASE */
+ Tag_BASEFONT, /**< BASEFONT */
+ Tag_BDO, /**< BDO */
+ Tag_BIG, /**< BIG */
+ Tag_BLOCKQUOTE, /**< BLOCKQUOTE */
+ Tag_BODY, /**< BODY */
+ Tag_BR, /**< BR */
+ Tag_BUTTON, /**< BUTTON */
+ Tag_CAPTION, /**< CAPTION */
+ Tag_CENTER, /**< CENTER */
+ Tag_CITE, /**< CITE */
+ Tag_CODE, /**< CODE */
+ Tag_COL, /**< COL */
+ Tag_COLGROUP, /**< COLGROUP */
+ Tag_DD, /**< DD */
+ Tag_DEL, /**< DEL */
+ Tag_DFN, /**< DFN */
+ Tag_DIR, /**< DIR */
+ Tag_DIV, /**< DIF */
+ Tag_DL, /**< DL */
+ Tag_DT, /**< DT */
+ Tag_EM, /**< EM */
+ Tag_FIELDSET, /**< FIELDSET */
+ Tag_FONT, /**< FONT */
+ Tag_FORM, /**< FORM */
+ Tag_FRAME, /**< FRAME */
+ Tag_FRAMESET, /**< FRAMESET */
+ Tag_H1, /**< H1 */
+ Tag_H2, /**< H2 */
+ Tag_H3, /**< H3 */
+ Tag_H4, /**< H4 */
+ Tag_H5, /**< H5 */
+ Tag_H6, /**< H6 */
+ Tag_HEAD, /**< HEAD */
+ Tag_HR, /**< HR */
+ Tag_HTML, /**< HTML */
+ Tag_I, /**< I */
+ Tag_IFRAME, /**< IFRAME */
+ Tag_IMG, /**< IMG */
+ Tag_INPUT, /**< INPUT */
+ Tag_INS, /**< INS */
+ Tag_ISINDEX, /**< ISINDEX */
+ Tag_KBD, /**< KBD */
+ Tag_KEYGEN, /**< KEYGEN */
+ Tag_LABEL, /**< LABEL */
+ Tag_LEGEND, /**< LEGEND */
+ Tag_LI, /**< LI */
+ Tag_LINK, /**< LINK */
+ Tag_LISTING, /**< LISTING */
+ Tag_MAP, /**< MAP */
+ Tag_MENU, /**< MENU */
+ Tag_META, /**< META */
+ Tag_NOFRAMES, /**< NOFRAMES */
+ Tag_NOSCRIPT, /**< NOSCRIPT */
+ Tag_OBJECT, /**< OBJECT */
+ Tag_OL, /**< OL */
+ Tag_OPTGROUP, /**< OPTGROUP */
+ Tag_OPTION, /**< OPTION */
+ Tag_P, /**< P */
+ Tag_PARAM, /**< PARAM */
+ Tag_PLAINTEXT, /**< PLAINTEXT */
+ Tag_PRE, /**< PRE */
+ Tag_Q, /**< Q */
+ Tag_RB, /**< RB */
+ Tag_RBC, /**< RBC */
+ Tag_RP, /**< RP */
+ Tag_RT, /**< RT */
+ Tag_RTC, /**< RTC */
+ Tag_RUBY, /**< RUBY */
+ Tag_S, /**< S */
+ Tag_SAMP, /**< SAMP */
+ Tag_SCRIPT, /**< SCRIPT */
+ Tag_SELECT, /**< SELECT */
+ Tag_SMALL, /**< SMALL */
+ Tag_SPAN, /**< SPAN */
+ Tag_STRIKE, /**< STRIKE */
+ Tag_STRONG, /**< STRONG */
+ Tag_STYLE, /**< STYLE */
+ Tag_SUB, /**< SUB */
+ Tag_SUP, /**< SUP */
+ Tag_TABLE, /**< TABLE */
+ Tag_TBODY, /**< TBODY */
+ Tag_TD, /**< TD */
+ Tag_TEXTAREA, /**< TEXTAREA */
+ Tag_TFOOT, /**< TFOOT */
+ Tag_TH, /**< TH */
+ Tag_THEAD, /**< THEAD */
+ Tag_TITLE, /**< TITLE */
+ Tag_TR, /**< TR */
+ Tag_TT, /**< TT */
+ Tag_U, /**< U */
+ Tag_UL, /**< UL */
+ Tag_VAR, /**< VAR */
+ Tag_XMP, /**< XMP */
+ Tag_NEXTID, /**< NEXTID */
+ Tag_MAX,
+
+ N_TAGS = -1 /**< Must be -1 */
+} tag_id_t;
+
+#define CM_UNKNOWN 0
+/* Elements with no content. Map to HTML specification. */
+#define CM_EMPTY (1 << 0)
+/* Elements that appear outside of "BODY". */
+#define CM_HTML (1 << 1)
+/* Elements that can appear within HEAD. */
+#define CM_HEAD (1 << 2)
+/* HTML "block" elements. */
+#define CM_BLOCK (1 << 3)
+/* HTML "inline" elements. */
+#define CM_INLINE (1 << 4)
+/* Elements that mark list item ("LI"). */
+#define CM_LIST (1 << 5)
+/* Elements that mark definition list item ("DL", "DT"). */
+#define CM_DEFLIST (1 << 6)
+/* Elements that can appear inside TABLE. */
+#define CM_TABLE (1 << 7)
+/* Used for "THEAD", "TFOOT" or "TBODY". */
+#define CM_ROWGRP (1 << 8)
+/* Used for "TD", "TH" */
+#define CM_ROW (1 << 9)
+/* Elements whose content must be protected against white space movement.
+ Includes some elements that can found in forms. */
+#define CM_FIELD (1 << 10)
+#define CM_RAW (1 << 11)
+/* Elements that allows "PARAM". */
+#define CM_PARAM (1 << 12)
+/* Elements with an optional end tag. */
+#define CM_OPT (1 << 13)
+/* Elements that use "align" attribute for vertical position. */
+#define CM_IMG (1 << 14)
+#define CM_NO_INDENT (1 << 15)
+/* Elements that cannot be omitted. */
+#define CM_OMITST (1 << 16)
+/* Unique elements */
+#define CM_UNIQUE (1 << 17)
+
+#define CM_USER_SHIFT (18)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SRC_LIBSERVER_HTML_TAGS_H_ */
diff --git a/src/libserver/html/html_tests.cxx b/src/libserver/html/html_tests.cxx
new file mode 100644
index 0000000..2fe6702
--- /dev/null
+++ b/src/libserver/html/html_tests.cxx
@@ -0,0 +1,304 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "config.h"
+#include "html.hxx"
+#include "libserver/task.h"
+
+#include <vector>
+#include <fmt/core.h>
+
+
+#define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL
+#include "doctest/doctest.h"
+
+namespace rspamd::html {
+
+/*
+ * Tests part
+ */
+
+TEST_SUITE("html")
+{
+ TEST_CASE("html parsing")
+ {
+
+ const std::vector<std::pair<std::string, std::string>> cases{
+ {"<html><!DOCTYPE html><body>", "+html;++xml;++body;"},
+ {"<html><div><div></div></div></html>", "+html;++div;+++div;"},
+ {"<html><div><div></div></html>", "+html;++div;+++div;"},
+ {"<html><div><div></div></html></div>", "+html;++div;+++div;"},
+ {"<p><p><a></p></a></a>", "+p;++p;+++a;"},
+ {"<div><a href=\"http://example.com\"></div></a>", "+div;++a;"},
+ /* Broken, as I don't know how the hell this should be really parsed */
+ //{"<html><!DOCTYPE html><body><head><body></body></html></body></html>",
+ // "+html;++xml;++body;+++head;+++body;"}
+ };
+
+ rspamd_url_init(NULL);
+ auto *pool = rspamd_mempool_new(rspamd_mempool_suggest_size(),
+ "html", 0);
+ struct rspamd_task fake_task;
+ memset(&fake_task, 0, sizeof(fake_task));
+ fake_task.task_pool = pool;
+
+ for (const auto &c: cases) {
+ SUBCASE((std::string("extract tags from: ") + c.first).c_str())
+ {
+ GByteArray *tmp = g_byte_array_sized_new(c.first.size());
+ g_byte_array_append(tmp, (const guint8 *) c.first.data(), c.first.size());
+ auto *hc = html_process_input(&fake_task, tmp, nullptr, nullptr, nullptr, true, nullptr);
+ CHECK(hc != nullptr);
+ auto dump = html_debug_structure(*hc);
+ CHECK(c.second == dump);
+ g_byte_array_free(tmp, TRUE);
+ }
+ }
+
+ rspamd_mempool_delete(pool);
+ }
+
+ TEST_CASE("html text extraction")
+ {
+ using namespace std::string_literals;
+ const std::vector<std::pair<std::string, std::string>> cases{
+ {"test", "test"},
+ {"test\0"s, "test\uFFFD"s},
+ {"test\0test"s, "test\uFFFDtest"s},
+ {"test\0\0test"s, "test\uFFFD\uFFFDtest"s},
+ {"test ", "test"},
+ {"test foo, bar", "test foo, bar"},
+ {"<p>text</p>", "text\n"},
+ {"olo<p>text</p>lolo", "olo\ntext\nlolo"},
+ {"<div>foo</div><div>bar</div>", "foo\nbar\n"},
+ {"<b>foo<i>bar</b>baz</i>", "foobarbaz"},
+ {"<b>foo<i>bar</i>baz</b>", "foobarbaz"},
+ {"foo<br>baz", "foo\nbaz"},
+ {"<a href=https://example.com>test</a>", "test"},
+ {"<img alt=test>", "test"},
+ {" <body>\n"
+ " <!-- escape content -->\n"
+ " a&nbsp;b a &gt; b a &lt; b a &amp; b &apos;a &quot;a&quot;\n"
+ " </body>",
+ R"|(a b a > b a < b a & b 'a "a")|"},
+ /* XML tags */
+ {"<?xml version=\"1.0\" encoding=\"iso-8859-1\"?>\n"
+ " <!DOCTYPE html\n"
+ " PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n"
+ " \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n"
+ "<body>test</body>",
+ "test"},
+ {"<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\"></head>"
+ " <body>\n"
+ " <p><br>\n"
+ " </p>\n"
+ " <div class=\"moz-forward-container\"><br>\n"
+ " <br>\n"
+ " test</div>"
+ "</body>",
+ "\n\n\ntest\n"},
+ {"<div>fi<span style=\"FONT-SIZE: 0px\">le </span>"
+ "sh<span style=\"FONT-SIZE: 0px\">aring </span></div>",
+ "fish\n"},
+ /* FIXME: broken until rework of css parser */
+ //{"<div>fi<span style=\"FONT-SIZE: 0px\">le </span>"
+ // "sh<span style=\"FONT-SIZE: 0px\">aring </div>foo</span>", "fish\nfoo"},
+ /* Complex html with bad tags */
+ {"<!DOCTYPE html>\n"
+ "<html lang=\"en\">\n"
+ " <head>\n"
+ " <meta charset=\"utf-8\">\n"
+ " <title>title</title>\n"
+ " <link rel=\"stylesheet\" href=\"style.css\">\n"
+ " <script src=\"script.js\"></script>\n"
+ " </head>\n"
+ " <body>\n"
+ " <!-- page content -->\n"
+ " Hello, world! <b>test</b>\n"
+ " <p>data<>\n"
+ " </P>\n"
+ " <b>stuff</p>?\n"
+ " </body>\n"
+ "</html>",
+ "Hello, world! test \ndata<>\nstuff\n?"},
+ {"<p><!--comment-->test</br></hr><br>", "test\n"},
+ /* Tables */
+ {"<table>\n"
+ " <tr>\n"
+ " <th>heada</th>\n"
+ " <th>headb</th>\n"
+ " </tr>\n"
+ " <tr>\n"
+ " <td>data1</td>\n"
+ " <td>data2</td>\n"
+ " </tr>\n"
+ " </table>",
+ "heada headb\ndata1 data2\n"},
+ /* Invalid closing br and hr + comment */
+ {" <body>\n"
+ " <!-- page content -->\n"
+ " Hello, world!<br>test</br><br>content</hr>more content<br>\n"
+ " <div>\n"
+ " content inside div\n"
+ " </div>\n"
+ " </body>",
+ "Hello, world!\ntest\ncontentmore content\ncontent inside div\n"},
+ /* First closing tag */
+ {"</head>\n"
+ "<body>\n"
+ "<p> Hello. I have some bad news.\n"
+ "<br /> <br /> <br /> <strong> <br /> <br /> <br /> <br /> <br /> <br /> <br /> <br /> </strong><span> <br /> </span>test</p>\n"
+ "</body>\n"
+ "</html>",
+ "Hello. I have some bad news. \n\n\n\n\n\n\n\n\n\n\n\ntest\n"},
+ /* Invalid tags */
+ {"lol <sht> omg </sht> oh my!\n"
+ "<name>words words</name> goodbye",
+ "lol omg oh my! words words goodbye"},
+ /* Invisible stuff */
+ {"<div style=\"color:#555555;font-family:Arial, 'Helvetica Neue', Helvetica, sans-serif;line-height:1.2;padding-top:10px;padding-right:10px;padding-bottom:10px;padding-left:10px;font-style: italic;\">\n"
+ "<p style=\"font-size: 11px; line-height: 1.2; color: #555555; font-family: Arial, 'Helvetica Neue', Helvetica, sans-serif; mso-line-height-alt: 14px; margin: 0;\">\n"
+ "<span style=\"color:#FFFFFF; \">F</span>Sincerely,</p>\n"
+ "<p style=\"font-size: 11px; line-height: 1.2; color: #555555; font-family: Arial, 'Helvetica Neue', Helvetica, sans-serif; mso-line-height-alt: 14px; margin: 0;\">\n"
+ "<span style=\"color:#FFFFFF; \">8</span>Sky<span style=\"opacity:1;\"></span>pe<span style=\"color:#FFFFFF; \">F</span>Web<span style=\"color:#FFFFFF; \">F</span></p>\n"
+ "<span style=\"color:#FFFFFF; \">kreyes</span>\n"
+ "<p style=\"font-size: 11px; line-height: 1.2; color: #555555; font-family: Arial, 'Helvetica Neue', Helvetica, sans-serif; mso-line-height-alt: 14px; margin: 0;\">\n"
+ "&nbsp;</p>",
+ " Sincerely,\n Skype Web\n"},
+ {"lala<p hidden>fafa</p>", "lala"},
+ {"<table style=\"FONT-SIZE: 0px;\"><tbody><tr><td>\n"
+ "DONKEY\n"
+ "</td></tr></tbody></table>",
+ ""},
+ /* bgcolor propagation */
+ {"<a style=\"display: inline-block; color: #ffffff; background-color: #00aff0;\">\n"
+ "<span style=\"color: #00aff0;\">F</span>Rev<span style=\"opacity: 1;\"></span></span>ie<span style=\"opacity: 1;\"></span>"
+ "</span>w<span style=\"color: #00aff0;\">F<span style=\"opacity: 1;\">̹</span></span>",
+ " Review"},
+ {"<td style=\"color:#ffffff\" bgcolor=\"#005595\">\n"
+ "hello world\n"
+ "</td>",
+ "hello world"},
+ /* Colors */
+ {"goodbye <span style=\"COLOR: rgb(64,64,64)\">cruel</span>"
+ "<span>world</span>",
+ "goodbye cruelworld"},
+ /* Font-size propagation */
+ {"<p style=\"font-size: 11pt;line-height:22px\">goodbye <span style=\"font-size:0px\">cruel</span>world</p>",
+ "goodbye world\n"},
+ /* Newline before tag -> must be space */
+ {"goodbye <span style=\"COLOR: rgb(64,64,64)\">cruel</span>\n"
+ "<span>world</span>",
+ "goodbye cruel world"},
+ /* Head tag with some stuff */
+ {"<html><head><p>oh my god</head><body></body></html>", "oh my god\n"},
+ {"<html><head><title>oh my god</head><body></body></html>", ""},
+ {"<html><body><html><head>displayed</body></html></body></html>", "displayed"},
+
+ };
+
+ rspamd_url_init(NULL);
+ auto *pool = rspamd_mempool_new(rspamd_mempool_suggest_size(),
+ "html", 0);
+ struct rspamd_task fake_task;
+ memset(&fake_task, 0, sizeof(fake_task));
+ fake_task.task_pool = pool;
+
+ auto replace_newlines = [](std::string &str) {
+ auto start_pos = 0;
+ while ((start_pos = str.find("\n", start_pos, 1)) != std::string::npos) {
+ str.replace(start_pos, 1, "\\n", 2);
+ start_pos += 2;
+ }
+ };
+
+ auto i = 1;
+ for (const auto &c: cases) {
+ SUBCASE((fmt::format("html extraction case {}", i)).c_str())
+ {
+ GByteArray *tmp = g_byte_array_sized_new(c.first.size());
+ g_byte_array_append(tmp, (const guint8 *) c.first.data(), c.first.size());
+ auto *hc = html_process_input(&fake_task, tmp, nullptr, nullptr, nullptr, true, nullptr);
+ CHECK(hc != nullptr);
+ replace_newlines(hc->parsed);
+ auto expected = c.second;
+ replace_newlines(expected);
+ CHECK(hc->parsed == expected);
+ g_byte_array_free(tmp, TRUE);
+ }
+ i++;
+ }
+
+ rspamd_mempool_delete(pool);
+ }
+
+ TEST_CASE("html urls extraction")
+ {
+ using namespace std::string_literals;
+ const std::vector<std::tuple<std::string, std::vector<std::string>, std::optional<std::string>>> cases{
+ {"<style></style><a href=\"https://www.example.com\">yolo</a>",
+ {"https://www.example.com"},
+ "yolo"},
+ {"<a href=\"https://example.com\">test</a>", {"https://example.com"}, "test"},
+ {"<a <poo href=\"http://example.com\">hello</a>", {"http://example.com"}, "hello"},
+ {"<html>\n"
+ "<META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html; charset=utf-8\">\n"
+ "<body>\n"
+ "<a href=\"https://www.example.com\">hello</a>\n"
+ "</body>\n"
+ "</html>",
+ {"https://www.example.com"},
+ "hello"},
+ };
+
+ rspamd_url_init(NULL);
+ auto *pool = rspamd_mempool_new(rspamd_mempool_suggest_size(),
+ "html", 0);
+ struct rspamd_task fake_task;
+ memset(&fake_task, 0, sizeof(fake_task));
+ fake_task.task_pool = pool;
+
+ auto i = 1;
+ for (const auto &c: cases) {
+ SUBCASE((fmt::format("html url extraction case {}", i)).c_str())
+ {
+ GPtrArray *purls = g_ptr_array_new();
+ auto input = std::get<0>(c);
+ GByteArray *tmp = g_byte_array_sized_new(input.size());
+ g_byte_array_append(tmp, (const guint8 *) input.data(), input.size());
+ auto *hc = html_process_input(&fake_task, tmp, nullptr, nullptr, purls, true, nullptr);
+ CHECK(hc != nullptr);
+ auto &expected_text = std::get<2>(c);
+ if (expected_text.has_value()) {
+ CHECK(hc->parsed == expected_text.value());
+ }
+ const auto &expected_urls = std::get<1>(c);
+ CHECK(expected_urls.size() == purls->len);
+ for (auto j = 0; j < expected_urls.size(); ++j) {
+ auto *url = (rspamd_url *) g_ptr_array_index(purls, j);
+ CHECK(expected_urls[j] == std::string{url->string, url->urllen});
+ }
+ g_byte_array_free(tmp, TRUE);
+ g_ptr_array_free(purls, TRUE);
+ }
+ ++i;
+ }
+
+ rspamd_mempool_delete(pool);
+ }
+}
+
+} /* namespace rspamd::html */
diff --git a/src/libserver/html/html_url.cxx b/src/libserver/html/html_url.cxx
new file mode 100644
index 0000000..8f29f2c
--- /dev/null
+++ b/src/libserver/html/html_url.cxx
@@ -0,0 +1,496 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "html_url.hxx"
+#include "libutil/str_util.h"
+#include "libserver/url.h"
+#include "libserver/logger.h"
+#include "rspamd.h"
+
+#include <unicode/idna.h>
+
+namespace rspamd::html {
+
+static auto
+rspamd_url_is_subdomain(std::string_view t1, std::string_view t2) -> bool
+{
+ const auto *p1 = t1.data() + t1.size() - 1;
+ const auto *p2 = t2.data() + t2.size() - 1;
+
+ /* Skip trailing dots */
+ while (p1 > t1.data()) {
+ if (*p1 != '.') {
+ break;
+ }
+
+ p1--;
+ }
+
+ while (p2 > t2.data()) {
+ if (*p2 != '.') {
+ break;
+ }
+
+ p2--;
+ }
+
+ while (p1 > t1.data() && p2 > t2.data()) {
+ if (*p1 != *p2) {
+ break;
+ }
+
+ p1--;
+ p2--;
+ }
+
+ if (p2 == t2.data()) {
+ /* p2 can be subdomain of p1 if *p1 is '.' */
+ if (p1 != t1.data() && *(p1 - 1) == '.') {
+ return true;
+ }
+ }
+ else if (p1 == t1.data()) {
+ if (p2 != t2.data() && *(p2 - 1) == '.') {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+
+static auto
+get_icu_idna_instance(void) -> auto
+{
+ auto uc_err = U_ZERO_ERROR;
+ static auto *udn = icu::IDNA::createUTS46Instance(UIDNA_DEFAULT, uc_err);
+
+ return udn;
+}
+
+static auto
+convert_idna_hostname_maybe(rspamd_mempool_t *pool, struct rspamd_url *url, bool use_tld)
+ -> std::string_view
+{
+ std::string_view ret = use_tld ? std::string_view{rspamd_url_tld_unsafe(url), url->tldlen} : std::string_view{rspamd_url_host_unsafe(url), url->hostlen};
+
+ /* Handle IDN url's */
+ if (ret.size() > 4 &&
+ rspamd_substring_search_caseless(ret.data(), ret.size(), "xn--", 4) != -1) {
+
+ const auto buf_capacity = ret.size() * 2 + 1;
+ auto *idn_hbuf = (char *) rspamd_mempool_alloc(pool, buf_capacity);
+ icu::CheckedArrayByteSink byte_sink{idn_hbuf, (int) buf_capacity};
+
+ /* We need to convert it to the normal value first */
+ icu::IDNAInfo info;
+ auto uc_err = U_ZERO_ERROR;
+ auto *udn = get_icu_idna_instance();
+ udn->nameToUnicodeUTF8(icu::StringPiece(ret.data(), ret.size()),
+ byte_sink, info, uc_err);
+
+ if (uc_err == U_ZERO_ERROR && !info.hasErrors()) {
+ /* idn_hbuf is allocated in mempool, so it is safe to use */
+ ret = std::string_view{idn_hbuf, (std::size_t) byte_sink.NumberOfBytesWritten()};
+ }
+ else {
+ msg_err_pool("cannot convert to IDN: %s (0x%xd)",
+ u_errorName(uc_err), info.getErrors());
+ }
+ }
+
+ return ret;
+};
+
+constexpr auto sv_equals(std::string_view s1, std::string_view s2) -> auto
+{
+ return (s1.size() == s2.size()) &&
+ std::equal(s1.begin(), s1.end(), s2.begin(), s2.end(),
+ [](const auto c1, const auto c2) {
+ return g_ascii_tolower(c1) == g_ascii_tolower(c2);
+ });
+}
+
+constexpr auto
+is_transfer_proto(struct rspamd_url *u) -> bool
+{
+ return (u->protocol & (PROTOCOL_HTTP | PROTOCOL_HTTPS | PROTOCOL_FTP)) != 0;
+}
+
+auto html_url_is_phished(rspamd_mempool_t *pool,
+ struct rspamd_url *href_url,
+ std::string_view text_data) -> std::optional<rspamd_url *>
+{
+ struct rspamd_url *text_url;
+ std::string_view disp_tok, href_tok;
+ goffset url_pos;
+ gchar *url_str = NULL;
+
+ auto sz = text_data.size();
+ const auto *trimmed = rspamd_string_unicode_trim_inplace(text_data.data(), &sz);
+ text_data = std::string_view(trimmed, sz);
+
+ if (text_data.size() > 4 &&
+ rspamd_url_find(pool, text_data.data(), text_data.size(), &url_str,
+ RSPAMD_URL_FIND_ALL,
+ &url_pos, NULL) &&
+ url_str != nullptr) {
+
+ if (url_pos > 0) {
+ /*
+ * We have some url at some offset, so we need to check what is
+ * at the start of the text
+ */
+ return std::nullopt;
+ }
+
+ text_url = rspamd_mempool_alloc0_type(pool, struct rspamd_url);
+ auto rc = rspamd_url_parse(text_url, url_str, strlen(url_str), pool,
+ RSPAMD_URL_PARSE_TEXT);
+
+ if (rc == URI_ERRNO_OK) {
+ text_url->flags |= RSPAMD_URL_FLAG_HTML_DISPLAYED;
+ href_url->flags |= RSPAMD_URL_FLAG_DISPLAY_URL;
+
+ /* Check for phishing */
+ if (is_transfer_proto(text_url) == is_transfer_proto(href_url)) {
+ disp_tok = convert_idna_hostname_maybe(pool, text_url, false);
+ href_tok = convert_idna_hostname_maybe(pool, href_url, false);
+
+ if (!sv_equals(disp_tok, href_tok) &&
+ text_url->tldlen > 0 && href_url->tldlen > 0) {
+
+ /* Apply the same logic for TLD */
+ disp_tok = convert_idna_hostname_maybe(pool, text_url, true);
+ href_tok = convert_idna_hostname_maybe(pool, href_url, true);
+
+ if (!sv_equals(disp_tok, href_tok)) {
+ /* Check if one url is a subdomain for another */
+
+ if (!rspamd_url_is_subdomain(disp_tok, href_tok)) {
+ href_url->flags |= RSPAMD_URL_FLAG_PHISHED;
+ text_url->flags |= RSPAMD_URL_FLAG_HTML_DISPLAYED;
+
+ if (href_url->ext == nullptr) {
+ href_url->ext = rspamd_mempool_alloc0_type(pool, rspamd_url_ext);
+ }
+ href_url->ext->linked_url = text_url;
+ }
+ }
+ }
+ }
+
+ return text_url;
+ }
+ else {
+ /*
+ * We have found something that looks like an url but it was
+ * not parsed correctly.
+ * Sometimes it means an obfuscation attempt, so we have to check
+ * what's inside of the text
+ */
+ gboolean obfuscation_found = FALSE;
+
+ if (text_data.size() > 4 && g_ascii_strncasecmp(text_data.begin(), "http", 4) == 0 &&
+ rspamd_substring_search(text_data.begin(), text_data.size(), "://", 3) != -1) {
+ /* Clearly an obfuscation attempt */
+ obfuscation_found = TRUE;
+ }
+
+ msg_info_pool("extract of url '%s' failed: %s; obfuscation detected: %s",
+ url_str,
+ rspamd_url_strerror(rc),
+ obfuscation_found ? "yes" : "no");
+
+ if (obfuscation_found) {
+ href_url->flags |= RSPAMD_URL_FLAG_PHISHED | RSPAMD_URL_FLAG_OBSCURED;
+ }
+ }
+ }
+
+ return std::nullopt;
+}
+
+void html_check_displayed_url(rspamd_mempool_t *pool,
+ GList **exceptions,
+ void *url_set,
+ std::string_view visible_part,
+ goffset href_offset,
+ struct rspamd_url *url)
+{
+ struct rspamd_url *displayed_url = nullptr;
+ struct rspamd_url *turl;
+ struct rspamd_process_exception *ex;
+ guint saved_flags = 0;
+ gsize dlen;
+
+ if (visible_part.empty()) {
+ /* No displayed url, just some text within <a> tag */
+ return;
+ }
+
+ if (url->ext == nullptr) {
+ url->ext = rspamd_mempool_alloc0_type(pool, rspamd_url_ext);
+ }
+ url->ext->visible_part = rspamd_mempool_alloc_buffer(pool, visible_part.size() + 1);
+ rspamd_strlcpy(url->ext->visible_part,
+ visible_part.data(),
+ visible_part.size() + 1);
+ dlen = visible_part.size();
+
+ /* Strip unicode spaces from the start and the end */
+ url->ext->visible_part = const_cast<char *>(
+ rspamd_string_unicode_trim_inplace(url->ext->visible_part,
+ &dlen));
+ auto maybe_url = html_url_is_phished(pool, url,
+ {url->ext->visible_part, dlen});
+
+ if (maybe_url) {
+ url->flags |= saved_flags;
+ displayed_url = maybe_url.value();
+ }
+
+ if (exceptions && displayed_url != nullptr) {
+ ex = rspamd_mempool_alloc_type(pool, struct rspamd_process_exception);
+ ex->pos = href_offset;
+ ex->len = dlen;
+ ex->type = RSPAMD_EXCEPTION_URL;
+ ex->ptr = url;
+
+ *exceptions = g_list_prepend(*exceptions, ex);
+ }
+
+ if (displayed_url && url_set) {
+ turl = rspamd_url_set_add_or_return((khash_t(rspamd_url_hash) *) url_set, displayed_url);
+
+ if (turl != nullptr) {
+ /* Here, we assume the following:
+ * if we have a URL in the text part which
+ * is the same as displayed URL in the
+ * HTML part, we assume that it is also
+ * hint only.
+ */
+ if (turl->flags & RSPAMD_URL_FLAG_FROM_TEXT) {
+
+ /*
+ * We have the same URL for href and displayed url, so we
+ * know that this url cannot be both target and display (as
+ * it breaks logic in many places), so we do not
+ * propagate html flags
+ */
+ if (!(turl->flags & RSPAMD_URL_FLAG_DISPLAY_URL)) {
+ turl->flags |= displayed_url->flags;
+ }
+ turl->flags &= ~RSPAMD_URL_FLAG_FROM_TEXT;
+ }
+
+ turl->count++;
+ }
+ else {
+ /* Already inserted by `rspamd_url_set_add_or_return` */
+ }
+ }
+
+ rspamd_normalise_unicode_inplace(url->ext->visible_part, &dlen);
+}
+
+auto html_process_url(rspamd_mempool_t *pool, std::string_view &input)
+ -> std::optional<struct rspamd_url *>
+{
+ struct rspamd_url *url;
+ guint saved_flags = 0;
+ gint rc;
+ const gchar *s, *prefix = "http://";
+ gchar *d;
+ gsize dlen;
+ gboolean has_bad_chars = FALSE, no_prefix = FALSE;
+ static const gchar hexdigests[] = "0123456789abcdef";
+
+ auto sz = input.length();
+ const auto *trimmed = rspamd_string_unicode_trim_inplace(input.data(), &sz);
+ input = {trimmed, sz};
+
+ const auto *start = input.data();
+ s = start;
+ dlen = 0;
+
+ for (auto i = 0; i < sz; i++) {
+ if (G_UNLIKELY(((guint) s[i]) < 0x80 && !g_ascii_isgraph(s[i]))) {
+ dlen += 3;
+ }
+ else {
+ dlen++;
+ }
+ }
+
+ if (rspamd_substring_search(start, sz, "://", 3) == -1) {
+ if (sz >= sizeof("mailto:") &&
+ (memcmp(start, "mailto:", sizeof("mailto:") - 1) == 0 ||
+ memcmp(start, "tel:", sizeof("tel:") - 1) == 0 ||
+ memcmp(start, "callto:", sizeof("callto:") - 1) == 0)) {
+ /* Exclusion, has valid but 'strange' prefix */
+ }
+ else {
+ for (auto i = 0; i < sz; i++) {
+ if (!((s[i] & 0x80) || g_ascii_isalnum(s[i]))) {
+ if (i == 0 && sz > 2 && s[i] == '/' && s[i + 1] == '/') {
+ prefix = "http:";
+ dlen += sizeof("http:") - 1;
+ no_prefix = TRUE;
+ }
+ else if (s[i] == '@') {
+ /* Likely email prefix */
+ prefix = "mailto://";
+ dlen += sizeof("mailto://") - 1;
+ no_prefix = TRUE;
+ }
+ else if (s[i] == ':' && i != 0) {
+ /* Special case */
+ no_prefix = FALSE;
+ }
+ else {
+ if (i == 0) {
+ /* No valid data */
+ return std::nullopt;
+ }
+ else {
+ no_prefix = TRUE;
+ dlen += strlen(prefix);
+ }
+ }
+
+ break;
+ }
+ }
+ }
+ }
+
+ auto *decoded = rspamd_mempool_alloc_buffer(pool, dlen + 1);
+ d = decoded;
+
+ if (no_prefix) {
+ gsize plen = strlen(prefix);
+ memcpy(d, prefix, plen);
+ d += plen;
+ }
+
+ /*
+ * We also need to remove all internal newlines, spaces
+ * and encode unsafe characters
+ * Another obfuscation find in the wild was encoding of the SAFE url characters,
+ * including essential ones
+ */
+ for (auto i = 0; i < sz; i++) {
+ if (G_UNLIKELY(g_ascii_isspace(s[i]))) {
+ continue;
+ }
+ else if (G_UNLIKELY(((guint) s[i]) < 0x80 && !g_ascii_isgraph(s[i]))) {
+ /* URL encode */
+ *d++ = '%';
+ *d++ = hexdigests[(s[i] >> 4) & 0xf];
+ *d++ = hexdigests[s[i] & 0xf];
+ has_bad_chars = TRUE;
+ }
+ else if (G_UNLIKELY(s[i] == '%')) {
+ if (i + 2 < sz) {
+ auto c1 = s[i + 1];
+ auto c2 = s[i + 2];
+
+ if (g_ascii_isxdigit(c1) && g_ascii_isxdigit(c2)) {
+ auto codepoint = 0;
+
+ if (c1 >= '0' && c1 <= '9') codepoint = c1 - '0';
+ else if (c1 >= 'A' && c1 <= 'F')
+ codepoint = c1 - 'A' + 10;
+ else if (c1 >= 'a' && c1 <= 'f')
+ codepoint = c1 - 'a' + 10;
+
+ codepoint <<= 4;
+
+ if (c2 >= '0' && c2 <= '9') codepoint += c2 - '0';
+ else if (c2 >= 'A' && c2 <= 'F')
+ codepoint += c2 - 'A' + 10;
+ else if (c2 >= 'a' && c2 <= 'f')
+ codepoint += c2 - 'a' + 10;
+
+ /* Now check for 'interesting' codepoints */
+ if (codepoint == '@' || codepoint == ':' || codepoint == '|' ||
+ codepoint == '?' || codepoint == '\\' || codepoint == '/') {
+ /* Replace it back */
+ *d++ = (char) (codepoint & 0xff);
+ i += 2;
+ }
+ else {
+ *d++ = s[i];
+ }
+ }
+ else {
+ *d++ = s[i];
+ }
+ }
+ else {
+ *d++ = s[i];
+ }
+ }
+ else {
+ *d++ = s[i];
+ }
+ }
+
+ *d = '\0';
+ dlen = d - decoded;
+
+ url = rspamd_mempool_alloc0_type(pool, struct rspamd_url);
+ rspamd_url_normalise_propagate_flags(pool, decoded, &dlen, saved_flags);
+ rc = rspamd_url_parse(url, decoded, dlen, pool, RSPAMD_URL_PARSE_HREF);
+
+ /* Filter some completely damaged urls */
+ if (rc == URI_ERRNO_OK && url->hostlen > 0 &&
+ !((url->protocol & PROTOCOL_UNKNOWN))) {
+ url->flags |= saved_flags;
+
+ if (has_bad_chars) {
+ url->flags |= RSPAMD_URL_FLAG_OBSCURED;
+ }
+
+ if (no_prefix) {
+ url->flags |= RSPAMD_URL_FLAG_SCHEMALESS;
+
+ if (url->tldlen == 0 || (url->flags & RSPAMD_URL_FLAG_NO_TLD)) {
+ /* Ignore urls with both no schema and no tld */
+ return std::nullopt;
+ }
+ }
+
+ decoded = url->string;
+
+ input = {decoded, url->urllen};
+
+ /* Spaces in href usually mean an attempt to obfuscate URL */
+ /* See https://github.com/vstakhov/rspamd/issues/593 */
+#if 0
+ if (has_spaces) {
+ url->flags |= RSPAMD_URL_FLAG_OBSCURED;
+ }
+#endif
+
+ return url;
+ }
+
+ return std::nullopt;
+}
+
+}// namespace rspamd::html \ No newline at end of file
diff --git a/src/libserver/html/html_url.hxx b/src/libserver/html/html_url.hxx
new file mode 100644
index 0000000..46dde6d
--- /dev/null
+++ b/src/libserver/html/html_url.hxx
@@ -0,0 +1,68 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef RSPAMD_HTML_URL_HXX
+#define RSPAMD_HTML_URL_HXX
+#pragma once
+
+#include "libutil/mem_pool.h"
+
+#include <string_view>
+#include <optional>
+
+struct rspamd_url; /* Forward declaration */
+
+namespace rspamd::html {
+
+
+/**
+ * Checks if an html url is likely phished by some displayed url
+ * @param pool
+ * @param href_url
+ * @param text_data
+ * @return
+ */
+auto html_url_is_phished(rspamd_mempool_t *pool,
+ struct rspamd_url *href_url,
+ std::string_view text_data) -> std::optional<rspamd_url *>;
+
+/**
+ * Check displayed part of the url at specified offset
+ * @param pool
+ * @param exceptions
+ * @param url_set
+ * @param visible_part
+ * @param href_offset
+ * @param url
+ */
+auto html_check_displayed_url(rspamd_mempool_t *pool,
+ GList **exceptions,
+ void *url_set,
+ std::string_view visible_part,
+ goffset href_offset,
+ struct rspamd_url *url) -> void;
+
+/**
+ * Process HTML url (e.g. for href component)
+ * @param pool
+ * @param input may be modified during the process
+ * @return
+ */
+auto html_process_url(rspamd_mempool_t *pool, std::string_view &input)
+ -> std::optional<struct rspamd_url *>;
+}// namespace rspamd::html
+
+#endif//RSPAMD_HTML_URL_HXX \ No newline at end of file
diff --git a/src/libserver/http/http_connection.c b/src/libserver/http/http_connection.c
new file mode 100644
index 0000000..5557fbf
--- /dev/null
+++ b/src/libserver/http/http_connection.c
@@ -0,0 +1,2649 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "config.h"
+#include "http_connection.h"
+#include "http_private.h"
+#include "http_message.h"
+#include "utlist.h"
+#include "util.h"
+#include "printf.h"
+#include "logger.h"
+#include "ref.h"
+#include "ottery.h"
+#include "keypair_private.h"
+#include "cryptobox.h"
+#include "libutil/libev_helper.h"
+#include "libserver/ssl_util.h"
+#include "libserver/url.h"
+
+#include "contrib/mumhash/mum.h"
+#include "contrib/http-parser/http_parser.h"
+#include "unix-std.h"
+
+#include <openssl/err.h>
+
+#define ENCRYPTED_VERSION " HTTP/1.0"
+
+struct _rspamd_http_privbuf {
+ rspamd_fstring_t *data;
+ const gchar *zc_buf;
+ gsize zc_remain;
+ ref_entry_t ref;
+};
+
+enum rspamd_http_priv_flags {
+ RSPAMD_HTTP_CONN_FLAG_ENCRYPTED = 1u << 0u,
+ RSPAMD_HTTP_CONN_FLAG_NEW_HEADER = 1u << 1u,
+ RSPAMD_HTTP_CONN_FLAG_RESETED = 1u << 2u,
+ RSPAMD_HTTP_CONN_FLAG_TOO_LARGE = 1u << 3u,
+ RSPAMD_HTTP_CONN_FLAG_ENCRYPTION_NEEDED = 1u << 4u,
+ RSPAMD_HTTP_CONN_FLAG_PROXY = 1u << 5u,
+ RSPAMD_HTTP_CONN_FLAG_PROXY_REQUEST = 1u << 6u,
+ RSPAMD_HTTP_CONN_OWN_SOCKET = 1u << 7u,
+};
+
+#define IS_CONN_ENCRYPTED(c) ((c)->flags & RSPAMD_HTTP_CONN_FLAG_ENCRYPTED)
+#define IS_CONN_RESETED(c) ((c)->flags & RSPAMD_HTTP_CONN_FLAG_RESETED)
+
+struct rspamd_http_connection_private {
+ struct rspamd_http_context *ctx;
+ struct rspamd_ssl_connection *ssl;
+ struct _rspamd_http_privbuf *buf;
+ struct rspamd_keypair_cache *cache;
+ struct rspamd_cryptobox_pubkey *peer_key;
+ struct rspamd_cryptobox_keypair *local_key;
+ struct rspamd_http_header *header;
+ struct http_parser parser;
+ struct http_parser_settings parser_cb;
+ struct rspamd_io_ev ev;
+ ev_tstamp timeout;
+ struct rspamd_http_message *msg;
+ struct iovec *out;
+ guint outlen;
+ enum rspamd_http_priv_flags flags;
+ gsize wr_pos;
+ gsize wr_total;
+};
+
+static const rspamd_ftok_t key_header = {
+ .begin = "Key",
+ .len = 3};
+static const rspamd_ftok_t date_header = {
+ .begin = "Date",
+ .len = 4};
+static const rspamd_ftok_t last_modified_header = {
+ .begin = "Last-Modified",
+ .len = 13};
+
+static void rspamd_http_event_handler(int fd, short what, gpointer ud);
+static void rspamd_http_ssl_err_handler(gpointer ud, GError *err);
+
+
+#define HTTP_ERROR http_error_quark()
+GQuark
+http_error_quark(void)
+{
+ return g_quark_from_static_string("http-error-quark");
+}
+
+static void
+rspamd_http_privbuf_dtor(gpointer ud)
+{
+ struct _rspamd_http_privbuf *p = (struct _rspamd_http_privbuf *) ud;
+
+ if (p->data) {
+ rspamd_fstring_free(p->data);
+ }
+
+ g_free(p);
+}
+
+static const gchar *
+rspamd_http_code_to_str(gint code)
+{
+ if (code == 200) {
+ return "OK";
+ }
+ else if (code == 404) {
+ return "Not found";
+ }
+ else if (code == 403 || code == 401) {
+ return "Not authorized";
+ }
+ else if (code >= 400 && code < 500) {
+ return "Bad request";
+ }
+ else if (code >= 300 && code < 400) {
+ return "See Other";
+ }
+ else if (code >= 500 && code < 600) {
+ return "Internal server error";
+ }
+
+ return "Unknown error";
+}
+
+static void
+rspamd_http_parse_key(rspamd_ftok_t *data, struct rspamd_http_connection *conn,
+ struct rspamd_http_connection_private *priv)
+{
+ guchar *decoded_id;
+ const gchar *eq_pos;
+ gsize id_len;
+ struct rspamd_cryptobox_pubkey *pk;
+
+ if (priv->local_key == NULL) {
+ /* In this case we cannot do anything, e.g. we cannot decrypt payload */
+ priv->flags &= ~RSPAMD_HTTP_CONN_FLAG_ENCRYPTED;
+ }
+ else {
+ /* Check sanity of what we have */
+ eq_pos = memchr(data->begin, '=', data->len);
+ if (eq_pos != NULL) {
+ decoded_id = rspamd_decode_base32(data->begin, eq_pos - data->begin,
+ &id_len, RSPAMD_BASE32_DEFAULT);
+
+ if (decoded_id != NULL && id_len >= RSPAMD_KEYPAIR_SHORT_ID_LEN) {
+ pk = rspamd_pubkey_from_base32(eq_pos + 1,
+ data->begin + data->len - eq_pos - 1,
+ RSPAMD_KEYPAIR_KEX,
+ RSPAMD_CRYPTOBOX_MODE_25519);
+ if (pk != NULL) {
+ if (memcmp(rspamd_keypair_get_id(priv->local_key),
+ decoded_id,
+ RSPAMD_KEYPAIR_SHORT_ID_LEN) == 0) {
+ priv->msg->peer_key = pk;
+
+ if (priv->cache && priv->msg->peer_key) {
+ rspamd_keypair_cache_process(priv->cache,
+ priv->local_key,
+ priv->msg->peer_key);
+ }
+ }
+ else {
+ rspamd_pubkey_unref(pk);
+ }
+ }
+ }
+
+ priv->flags |= RSPAMD_HTTP_CONN_FLAG_ENCRYPTED;
+ g_free(decoded_id);
+ }
+ }
+}
+
+static inline void
+rspamd_http_check_special_header(struct rspamd_http_connection *conn,
+ struct rspamd_http_connection_private *priv)
+{
+ if (rspamd_ftok_casecmp(&priv->header->name, &date_header) == 0) {
+ priv->msg->date = rspamd_http_parse_date(priv->header->value.begin,
+ priv->header->value.len);
+ }
+ else if (rspamd_ftok_casecmp(&priv->header->name, &key_header) == 0) {
+ rspamd_http_parse_key(&priv->header->value, conn, priv);
+ }
+ else if (rspamd_ftok_casecmp(&priv->header->name, &last_modified_header) == 0) {
+ priv->msg->last_modified = rspamd_http_parse_date(
+ priv->header->value.begin,
+ priv->header->value.len);
+ }
+}
+
+static gint
+rspamd_http_on_url(http_parser *parser, const gchar *at, size_t length)
+{
+ struct rspamd_http_connection *conn =
+ (struct rspamd_http_connection *) parser->data;
+ struct rspamd_http_connection_private *priv;
+
+ priv = conn->priv;
+
+ priv->msg->url = rspamd_fstring_append(priv->msg->url, at, length);
+
+ return 0;
+}
+
+static gint
+rspamd_http_on_status(http_parser *parser, const gchar *at, size_t length)
+{
+ struct rspamd_http_connection *conn =
+ (struct rspamd_http_connection *) parser->data;
+ struct rspamd_http_connection_private *priv;
+
+ priv = conn->priv;
+
+ if (parser->status_code != 200) {
+ if (priv->msg->status == NULL) {
+ priv->msg->status = rspamd_fstring_new();
+ }
+
+ priv->msg->status = rspamd_fstring_append(priv->msg->status, at, length);
+ }
+
+ return 0;
+}
+
+static void
+rspamd_http_finish_header(struct rspamd_http_connection *conn,
+ struct rspamd_http_connection_private *priv)
+{
+ struct rspamd_http_header *hdr;
+ khiter_t k;
+ gint r;
+
+ priv->header->combined = rspamd_fstring_append(priv->header->combined,
+ "\r\n", 2);
+ priv->header->value.len = priv->header->combined->len -
+ priv->header->name.len - 4;
+ priv->header->value.begin = priv->header->combined->str +
+ priv->header->name.len + 2;
+ priv->header->name.begin = priv->header->combined->str;
+
+ k = kh_put(rspamd_http_headers_hash, priv->msg->headers, &priv->header->name,
+ &r);
+
+ if (r != 0) {
+ kh_value(priv->msg->headers, k) = priv->header;
+ hdr = NULL;
+ }
+ else {
+ hdr = kh_value(priv->msg->headers, k);
+ }
+
+ DL_APPEND(hdr, priv->header);
+
+ rspamd_http_check_special_header(conn, priv);
+}
+
+static void
+rspamd_http_init_header(struct rspamd_http_connection_private *priv)
+{
+ priv->header = g_malloc0(sizeof(struct rspamd_http_header));
+ priv->header->combined = rspamd_fstring_new();
+}
+
+static gint
+rspamd_http_on_header_field(http_parser *parser,
+ const gchar *at,
+ size_t length)
+{
+ struct rspamd_http_connection *conn =
+ (struct rspamd_http_connection *) parser->data;
+ struct rspamd_http_connection_private *priv;
+
+ priv = conn->priv;
+
+ if (priv->header == NULL) {
+ rspamd_http_init_header(priv);
+ }
+ else if (priv->flags & RSPAMD_HTTP_CONN_FLAG_NEW_HEADER) {
+ rspamd_http_finish_header(conn, priv);
+ rspamd_http_init_header(priv);
+ }
+
+ priv->flags &= ~RSPAMD_HTTP_CONN_FLAG_NEW_HEADER;
+ priv->header->combined = rspamd_fstring_append(priv->header->combined,
+ at, length);
+
+ return 0;
+}
+
+static gint
+rspamd_http_on_header_value(http_parser *parser,
+ const gchar *at,
+ size_t length)
+{
+ struct rspamd_http_connection *conn =
+ (struct rspamd_http_connection *) parser->data;
+ struct rspamd_http_connection_private *priv;
+
+ priv = conn->priv;
+
+ if (priv->header == NULL) {
+ /* Should not happen */
+ return -1;
+ }
+
+ if (!(priv->flags & RSPAMD_HTTP_CONN_FLAG_NEW_HEADER)) {
+ priv->flags |= RSPAMD_HTTP_CONN_FLAG_NEW_HEADER;
+ priv->header->combined = rspamd_fstring_append(priv->header->combined,
+ ": ", 2);
+ priv->header->name.len = priv->header->combined->len - 2;
+ }
+
+ priv->header->combined = rspamd_fstring_append(priv->header->combined,
+ at, length);
+
+ return 0;
+}
+
+static int
+rspamd_http_on_headers_complete(http_parser *parser)
+{
+ struct rspamd_http_connection *conn =
+ (struct rspamd_http_connection *) parser->data;
+ struct rspamd_http_connection_private *priv;
+ struct rspamd_http_message *msg;
+ int ret;
+
+ priv = conn->priv;
+ msg = priv->msg;
+
+ if (priv->header != NULL) {
+ rspamd_http_finish_header(conn, priv);
+
+ priv->header = NULL;
+ priv->flags &= ~RSPAMD_HTTP_CONN_FLAG_NEW_HEADER;
+ }
+
+ if (msg->method == HTTP_HEAD) {
+ /* We don't care about the rest */
+ rspamd_ev_watcher_stop(priv->ctx->event_loop, &priv->ev);
+
+ msg->code = parser->status_code;
+ rspamd_http_connection_ref(conn);
+ ret = conn->finish_handler(conn, msg);
+
+ if (conn->opts & RSPAMD_HTTP_CLIENT_KEEP_ALIVE) {
+ rspamd_http_context_push_keepalive(conn->priv->ctx, conn,
+ msg, conn->priv->ctx->event_loop);
+ rspamd_http_connection_reset(conn);
+ }
+ else {
+ conn->finished = TRUE;
+ }
+
+ rspamd_http_connection_unref(conn);
+
+ return ret;
+ }
+
+ /*
+ * HTTP parser sets content length to (-1) when it doesn't know the real
+ * length, for example, in case of chunked encoding.
+ *
+ * Hence, we skip body setup here
+ */
+ if (parser->content_length != ULLONG_MAX && parser->content_length != 0 &&
+ msg->method != HTTP_HEAD) {
+ if (conn->max_size > 0 &&
+ parser->content_length > conn->max_size) {
+ /* Too large message */
+ priv->flags |= RSPAMD_HTTP_CONN_FLAG_TOO_LARGE;
+ return -1;
+ }
+
+ if (!rspamd_http_message_set_body(msg, NULL, parser->content_length)) {
+ return -1;
+ }
+ }
+
+ if (parser->flags & F_SPAMC) {
+ msg->flags |= RSPAMD_HTTP_FLAG_SPAMC;
+ }
+
+
+ msg->method = parser->method;
+ msg->code = parser->status_code;
+
+ return 0;
+}
+
+static void
+rspamd_http_switch_zc(struct _rspamd_http_privbuf *pbuf,
+ struct rspamd_http_message *msg)
+{
+ pbuf->zc_buf = msg->body_buf.begin + msg->body_buf.len;
+ pbuf->zc_remain = msg->body_buf.allocated_len - msg->body_buf.len;
+}
+
+static int
+rspamd_http_on_body(http_parser *parser, const gchar *at, size_t length)
+{
+ struct rspamd_http_connection *conn =
+ (struct rspamd_http_connection *) parser->data;
+ struct rspamd_http_connection_private *priv;
+ struct rspamd_http_message *msg;
+ struct _rspamd_http_privbuf *pbuf;
+ const gchar *p;
+
+ priv = conn->priv;
+ msg = priv->msg;
+ pbuf = priv->buf;
+ p = at;
+
+ if (!(msg->flags & RSPAMD_HTTP_FLAG_HAS_BODY)) {
+ if (!rspamd_http_message_set_body(msg, NULL, parser->content_length)) {
+ return -1;
+ }
+ }
+
+ if (conn->finished) {
+ return 0;
+ }
+
+ if (conn->max_size > 0 &&
+ msg->body_buf.len + length > conn->max_size) {
+ /* Body length overflow */
+ priv->flags |= RSPAMD_HTTP_CONN_FLAG_TOO_LARGE;
+ return -1;
+ }
+
+ if (!pbuf->zc_buf) {
+ if (!rspamd_http_message_append_body(msg, at, length)) {
+ return -1;
+ }
+
+ /* We might have some leftover in our private buffer */
+ if (pbuf->data->len == length) {
+ /* Switch to zero-copy mode */
+ rspamd_http_switch_zc(pbuf, msg);
+ }
+ }
+ else {
+ if (msg->body_buf.begin + msg->body_buf.len != at) {
+ /* Likely chunked encoding */
+ memmove((gchar *) msg->body_buf.begin + msg->body_buf.len, at, length);
+ p = msg->body_buf.begin + msg->body_buf.len;
+ }
+
+ /* Adjust zero-copy buf */
+ msg->body_buf.len += length;
+
+ if (!(msg->flags & RSPAMD_HTTP_FLAG_SHMEM)) {
+ msg->body_buf.c.normal->len += length;
+ }
+
+ pbuf->zc_buf = msg->body_buf.begin + msg->body_buf.len;
+ pbuf->zc_remain = msg->body_buf.allocated_len - msg->body_buf.len;
+ }
+
+ if ((conn->opts & RSPAMD_HTTP_BODY_PARTIAL) && !IS_CONN_ENCRYPTED(priv)) {
+ /* Incremental update is impossible for encrypted requests so far */
+ return (conn->body_handler(conn, msg, p, length));
+ }
+
+ return 0;
+}
+
+static int
+rspamd_http_on_body_decrypted(http_parser *parser, const gchar *at, size_t length)
+{
+ struct rspamd_http_connection *conn =
+ (struct rspamd_http_connection *) parser->data;
+ struct rspamd_http_connection_private *priv;
+
+ priv = conn->priv;
+
+ if (priv->header != NULL) {
+ rspamd_http_finish_header(conn, priv);
+ priv->header = NULL;
+ }
+
+ if (conn->finished) {
+ return 0;
+ }
+
+ if (priv->msg->body_buf.len == 0) {
+
+ priv->msg->body_buf.begin = at;
+ priv->msg->method = parser->method;
+ priv->msg->code = parser->status_code;
+ }
+
+ priv->msg->body_buf.len += length;
+
+ return 0;
+}
+
+static int
+rspamd_http_on_headers_complete_decrypted(http_parser *parser)
+{
+ struct rspamd_http_connection *conn =
+ (struct rspamd_http_connection *) parser->data;
+ struct rspamd_http_connection_private *priv;
+ struct rspamd_http_message *msg;
+ int ret;
+
+ priv = conn->priv;
+ msg = priv->msg;
+
+ if (priv->header != NULL) {
+ rspamd_http_finish_header(conn, priv);
+
+ priv->header = NULL;
+ priv->flags &= ~RSPAMD_HTTP_CONN_FLAG_NEW_HEADER;
+ }
+
+ if (parser->flags & F_SPAMC) {
+ priv->msg->flags |= RSPAMD_HTTP_FLAG_SPAMC;
+ }
+
+ if (msg->method == HTTP_HEAD) {
+ /* We don't care about the rest */
+ rspamd_ev_watcher_stop(priv->ctx->event_loop, &priv->ev);
+ msg->code = parser->status_code;
+ rspamd_http_connection_ref(conn);
+ ret = conn->finish_handler(conn, msg);
+
+ if (conn->opts & RSPAMD_HTTP_CLIENT_KEEP_ALIVE) {
+ rspamd_http_context_push_keepalive(conn->priv->ctx, conn,
+ msg, conn->priv->ctx->event_loop);
+ rspamd_http_connection_reset(conn);
+ }
+ else {
+ conn->finished = TRUE;
+ }
+
+ rspamd_http_connection_unref(conn);
+
+ return ret;
+ }
+
+ priv->msg->method = parser->method;
+ priv->msg->code = parser->status_code;
+
+ return 0;
+}
+
+static int
+rspamd_http_decrypt_message(struct rspamd_http_connection *conn,
+ struct rspamd_http_connection_private *priv,
+ struct rspamd_cryptobox_pubkey *peer_key)
+{
+ guchar *nonce, *m;
+ const guchar *nm;
+ gsize dec_len;
+ struct rspamd_http_message *msg = priv->msg;
+ struct rspamd_http_header *hdr, *hcur, *hcurtmp;
+ struct http_parser decrypted_parser;
+ struct http_parser_settings decrypted_cb;
+ enum rspamd_cryptobox_mode mode;
+
+ mode = rspamd_keypair_alg(priv->local_key);
+ nonce = msg->body_buf.str;
+ m = msg->body_buf.str + rspamd_cryptobox_nonce_bytes(mode) +
+ rspamd_cryptobox_mac_bytes(mode);
+ dec_len = msg->body_buf.len - rspamd_cryptobox_nonce_bytes(mode) -
+ rspamd_cryptobox_mac_bytes(mode);
+
+ if ((nm = rspamd_pubkey_get_nm(peer_key, priv->local_key)) == NULL) {
+ nm = rspamd_pubkey_calculate_nm(peer_key, priv->local_key);
+ }
+
+ if (!rspamd_cryptobox_decrypt_nm_inplace(m, dec_len, nonce,
+ nm, m - rspamd_cryptobox_mac_bytes(mode), mode)) {
+ msg_err("cannot verify encrypted message, first bytes of the input: %*xs",
+ (gint) MIN(msg->body_buf.len, 64), msg->body_buf.begin);
+ return -1;
+ }
+
+ /* Cleanup message */
+ kh_foreach_value (msg->headers, hdr, {
+ DL_FOREACH_SAFE (hdr, hcur, hcurtmp) {
+ rspamd_fstring_free (hcur->combined);
+ g_free (hcur);
+}
+});
+
+kh_destroy(rspamd_http_headers_hash, msg->headers);
+msg->headers = kh_init(rspamd_http_headers_hash);
+
+if (msg->url != NULL) {
+ msg->url = rspamd_fstring_assign(msg->url, "", 0);
+}
+
+msg->body_buf.len = 0;
+
+memset(&decrypted_parser, 0, sizeof(decrypted_parser));
+http_parser_init(&decrypted_parser,
+ conn->type == RSPAMD_HTTP_SERVER ? HTTP_REQUEST : HTTP_RESPONSE);
+
+memset(&decrypted_cb, 0, sizeof(decrypted_cb));
+decrypted_cb.on_url = rspamd_http_on_url;
+decrypted_cb.on_status = rspamd_http_on_status;
+decrypted_cb.on_header_field = rspamd_http_on_header_field;
+decrypted_cb.on_header_value = rspamd_http_on_header_value;
+decrypted_cb.on_headers_complete = rspamd_http_on_headers_complete_decrypted;
+decrypted_cb.on_body = rspamd_http_on_body_decrypted;
+decrypted_parser.data = conn;
+decrypted_parser.content_length = dec_len;
+
+if (http_parser_execute(&decrypted_parser, &decrypted_cb, m,
+ dec_len) != (size_t) dec_len) {
+ msg_err("HTTP parser error: %s when parsing encrypted request",
+ http_errno_description(decrypted_parser.http_errno));
+ return -1;
+}
+
+return 0;
+}
+
+static int
+rspamd_http_on_message_complete(http_parser *parser)
+{
+ struct rspamd_http_connection *conn =
+ (struct rspamd_http_connection *) parser->data;
+ struct rspamd_http_connection_private *priv;
+ int ret = 0;
+ enum rspamd_cryptobox_mode mode;
+
+ if (conn->finished) {
+ return 0;
+ }
+
+ priv = conn->priv;
+
+ if ((conn->opts & RSPAMD_HTTP_REQUIRE_ENCRYPTION) && !IS_CONN_ENCRYPTED(priv)) {
+ priv->flags |= RSPAMD_HTTP_CONN_FLAG_ENCRYPTION_NEEDED;
+ msg_err("unencrypted connection when encryption has been requested");
+ return -1;
+ }
+
+ if ((conn->opts & RSPAMD_HTTP_BODY_PARTIAL) == 0 && IS_CONN_ENCRYPTED(priv)) {
+ mode = rspamd_keypair_alg(priv->local_key);
+
+ if (priv->local_key == NULL || priv->msg->peer_key == NULL ||
+ priv->msg->body_buf.len < rspamd_cryptobox_nonce_bytes(mode) +
+ rspamd_cryptobox_mac_bytes(mode)) {
+ msg_err("cannot decrypt message");
+ return -1;
+ }
+
+ /* We have keys, so we can decrypt message */
+ ret = rspamd_http_decrypt_message(conn, priv, priv->msg->peer_key);
+
+ if (ret != 0) {
+ return ret;
+ }
+
+ if (conn->body_handler != NULL) {
+ rspamd_http_connection_ref(conn);
+ ret = conn->body_handler(conn,
+ priv->msg,
+ priv->msg->body_buf.begin,
+ priv->msg->body_buf.len);
+ rspamd_http_connection_unref(conn);
+ }
+ }
+ else if ((conn->opts & RSPAMD_HTTP_BODY_PARTIAL) == 0 && conn->body_handler) {
+ g_assert(conn->body_handler != NULL);
+ rspamd_http_connection_ref(conn);
+ ret = conn->body_handler(conn,
+ priv->msg,
+ priv->msg->body_buf.begin,
+ priv->msg->body_buf.len);
+ rspamd_http_connection_unref(conn);
+ }
+
+ if (ret == 0) {
+ rspamd_ev_watcher_stop(priv->ctx->event_loop, &priv->ev);
+ rspamd_http_connection_ref(conn);
+ ret = conn->finish_handler(conn, priv->msg);
+
+ if (conn->opts & RSPAMD_HTTP_CLIENT_KEEP_ALIVE) {
+ rspamd_http_context_push_keepalive(conn->priv->ctx, conn,
+ priv->msg, conn->priv->ctx->event_loop);
+ rspamd_http_connection_reset(conn);
+ }
+ else {
+ conn->finished = TRUE;
+ }
+
+ rspamd_http_connection_unref(conn);
+ }
+
+ return ret;
+}
+
+static void
+rspamd_http_simple_client_helper(struct rspamd_http_connection *conn)
+{
+ struct rspamd_http_connection_private *priv;
+ gpointer ssl;
+ gint request_method;
+ GString *prev_host = NULL;
+
+ priv = conn->priv;
+ ssl = priv->ssl;
+ priv->ssl = NULL;
+
+ /* Preserve data */
+ if (priv->msg) {
+ request_method = priv->msg->method;
+ /* Preserve host for keepalive */
+ prev_host = priv->msg->host;
+ priv->msg->host = NULL;
+ }
+
+ rspamd_http_connection_reset(conn);
+ priv->ssl = ssl;
+
+ /* Plan read message */
+
+ if (conn->opts & RSPAMD_HTTP_CLIENT_SHARED) {
+ rspamd_http_connection_read_message_shared(conn, conn->ud,
+ conn->priv->timeout);
+ }
+ else {
+ rspamd_http_connection_read_message(conn, conn->ud,
+ conn->priv->timeout);
+ }
+
+ if (priv->msg) {
+ priv->msg->method = request_method;
+ priv->msg->host = prev_host;
+ }
+ else {
+ if (prev_host) {
+ g_string_free(prev_host, TRUE);
+ }
+ }
+}
+
+static void
+rspamd_http_write_helper(struct rspamd_http_connection *conn)
+{
+ struct rspamd_http_connection_private *priv;
+ struct iovec *start;
+ guint niov, i;
+ gint flags = 0;
+ gsize remain;
+ gssize r;
+ GError *err;
+ struct iovec *cur_iov;
+ struct msghdr msg;
+
+ priv = conn->priv;
+
+ if (priv->wr_pos == priv->wr_total) {
+ goto call_finish_handler;
+ }
+
+ start = &priv->out[0];
+ niov = priv->outlen;
+ remain = priv->wr_pos;
+ /* We know that niov is small enough for that */
+ if (priv->ssl) {
+ /* Might be recursive! */
+ cur_iov = g_malloc(niov * sizeof(struct iovec));
+ }
+ else {
+ cur_iov = alloca(niov * sizeof(struct iovec));
+ }
+ memcpy(cur_iov, priv->out, niov * sizeof(struct iovec));
+ for (i = 0; i < priv->outlen && remain > 0; i++) {
+ /* Find out the first iov required */
+ start = &cur_iov[i];
+ if (start->iov_len <= remain) {
+ remain -= start->iov_len;
+ start = &cur_iov[i + 1];
+ niov--;
+ }
+ else {
+ start->iov_base = (void *) ((char *) start->iov_base + remain);
+ start->iov_len -= remain;
+ remain = 0;
+ }
+ }
+
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_iov = start;
+ msg.msg_iovlen = MIN(IOV_MAX, niov);
+ g_assert(niov > 0);
+#ifdef MSG_NOSIGNAL
+ flags = MSG_NOSIGNAL;
+#endif
+
+ if (priv->ssl) {
+ r = rspamd_ssl_writev(priv->ssl, msg.msg_iov, msg.msg_iovlen);
+ g_free(cur_iov);
+ }
+ else {
+ r = sendmsg(conn->fd, &msg, flags);
+ }
+
+ if (r == -1) {
+ if (!priv->ssl) {
+ err = g_error_new(HTTP_ERROR, 500, "IO write error: %s", strerror(errno));
+ rspamd_http_connection_ref(conn);
+ conn->error_handler(conn, err);
+ rspamd_http_connection_unref(conn);
+ g_error_free(err);
+ }
+
+ return;
+ }
+ else {
+ priv->wr_pos += r;
+ }
+
+ if (priv->wr_pos >= priv->wr_total) {
+ goto call_finish_handler;
+ }
+ else {
+ /* Want to write more */
+ priv->flags &= ~RSPAMD_HTTP_CONN_FLAG_RESETED;
+
+ if (priv->ssl && r > 0) {
+ /* We can write more data... */
+ rspamd_http_write_helper(conn);
+ return;
+ }
+ }
+
+ return;
+
+call_finish_handler:
+ rspamd_ev_watcher_stop(priv->ctx->event_loop, &priv->ev);
+
+ if ((conn->opts & RSPAMD_HTTP_CLIENT_SIMPLE) == 0) {
+ rspamd_http_connection_ref(conn);
+ conn->finished = TRUE;
+ conn->finish_handler(conn, priv->msg);
+ rspamd_http_connection_unref(conn);
+ }
+ else {
+ /* Plan read message */
+ rspamd_http_simple_client_helper(conn);
+ }
+}
+
+static gssize
+rspamd_http_try_read(gint fd,
+ struct rspamd_http_connection *conn,
+ struct rspamd_http_connection_private *priv,
+ struct _rspamd_http_privbuf *pbuf,
+ const gchar **buf_ptr)
+{
+ gssize r;
+ gchar *data;
+ gsize len;
+ struct rspamd_http_message *msg;
+
+ msg = priv->msg;
+
+ if (pbuf->zc_buf == NULL) {
+ data = priv->buf->data->str;
+ len = priv->buf->data->allocated;
+ }
+ else {
+ data = (gchar *) pbuf->zc_buf;
+ len = pbuf->zc_remain;
+
+ if (len == 0) {
+ rspamd_http_message_grow_body(priv->msg, priv->buf->data->allocated);
+ rspamd_http_switch_zc(pbuf, msg);
+ data = (gchar *) pbuf->zc_buf;
+ len = pbuf->zc_remain;
+ }
+ }
+
+ if (priv->ssl) {
+ r = rspamd_ssl_read(priv->ssl, data, len);
+ }
+ else {
+ r = read(fd, data, len);
+ }
+
+ if (r <= 0) {
+ return r;
+ }
+ else {
+ if (pbuf->zc_buf == NULL) {
+ priv->buf->data->len = r;
+ }
+ else {
+ pbuf->zc_remain -= r;
+ pbuf->zc_buf += r;
+ }
+ }
+
+ if (buf_ptr) {
+ *buf_ptr = data;
+ }
+
+ return r;
+}
+
+static void
+rspamd_http_ssl_err_handler(gpointer ud, GError *err)
+{
+ struct rspamd_http_connection *conn = (struct rspamd_http_connection *) ud;
+
+ rspamd_http_connection_ref(conn);
+ conn->error_handler(conn, err);
+ rspamd_http_connection_unref(conn);
+}
+
+static void
+rspamd_http_event_handler(int fd, short what, gpointer ud)
+{
+ struct rspamd_http_connection *conn = (struct rspamd_http_connection *) ud;
+ struct rspamd_http_connection_private *priv;
+ struct _rspamd_http_privbuf *pbuf;
+ const gchar *d;
+ gssize r;
+ GError *err;
+
+ priv = conn->priv;
+ pbuf = priv->buf;
+ REF_RETAIN(pbuf);
+ rspamd_http_connection_ref(conn);
+
+ if (what == EV_READ) {
+ r = rspamd_http_try_read(fd, conn, priv, pbuf, &d);
+
+ if (r > 0) {
+ if (http_parser_execute(&priv->parser, &priv->parser_cb,
+ d, r) != (size_t) r ||
+ priv->parser.http_errno != 0) {
+ if (priv->flags & RSPAMD_HTTP_CONN_FLAG_TOO_LARGE) {
+ err = g_error_new(HTTP_ERROR, 413,
+ "Request entity too large: %zu",
+ (size_t) priv->parser.content_length);
+ }
+ else if (priv->flags & RSPAMD_HTTP_CONN_FLAG_ENCRYPTION_NEEDED) {
+ err = g_error_new(HTTP_ERROR, 400,
+ "Encryption required");
+ }
+ else if (priv->parser.http_errno == HPE_CLOSED_CONNECTION) {
+ msg_err("got garbage after end of the message, ignore it");
+
+ REF_RELEASE(pbuf);
+ rspamd_http_connection_unref(conn);
+
+ return;
+ }
+ else {
+ if (priv->parser.http_errno > HPE_CB_status) {
+ err = g_error_new(HTTP_ERROR, 400,
+ "HTTP parser error: %s",
+ http_errno_description(priv->parser.http_errno));
+ }
+ else {
+ err = g_error_new(HTTP_ERROR, 500,
+ "HTTP parser internal error: %s",
+ http_errno_description(priv->parser.http_errno));
+ }
+ }
+
+ if (!conn->finished) {
+ conn->error_handler(conn, err);
+ }
+ else {
+ msg_err("got error after HTTP request is finished: %e", err);
+ }
+
+ g_error_free(err);
+
+ REF_RELEASE(pbuf);
+ rspamd_http_connection_unref(conn);
+
+ return;
+ }
+ }
+ else if (r == 0) {
+ /* We can still call http parser */
+ http_parser_execute(&priv->parser, &priv->parser_cb, d, r);
+
+ if (!conn->finished) {
+ err = g_error_new(HTTP_ERROR,
+ 400,
+ "IO read error: unexpected EOF");
+ conn->error_handler(conn, err);
+ g_error_free(err);
+ }
+ REF_RELEASE(pbuf);
+ rspamd_http_connection_unref(conn);
+
+ return;
+ }
+ else {
+ if (!priv->ssl) {
+ err = g_error_new(HTTP_ERROR,
+ 500,
+ "HTTP IO read error: %s",
+ strerror(errno));
+ conn->error_handler(conn, err);
+ g_error_free(err);
+ }
+
+ REF_RELEASE(pbuf);
+ rspamd_http_connection_unref(conn);
+
+ return;
+ }
+ }
+ else if (what == EV_TIMEOUT) {
+ if (!priv->ssl) {
+ /* Let's try to read from the socket first */
+ r = rspamd_http_try_read(fd, conn, priv, pbuf, &d);
+
+ if (r > 0) {
+ if (http_parser_execute(&priv->parser, &priv->parser_cb,
+ d, r) != (size_t) r ||
+ priv->parser.http_errno != 0) {
+ err = g_error_new(HTTP_ERROR, 400,
+ "HTTP parser error: %s",
+ http_errno_description(priv->parser.http_errno));
+
+ if (!conn->finished) {
+ conn->error_handler(conn, err);
+ }
+ else {
+ msg_err("got error after HTTP request is finished: %e", err);
+ }
+
+ g_error_free(err);
+
+ REF_RELEASE(pbuf);
+ rspamd_http_connection_unref(conn);
+
+ return;
+ }
+ }
+ else {
+ err = g_error_new(HTTP_ERROR, 408,
+ "IO timeout");
+ conn->error_handler(conn, err);
+ g_error_free(err);
+
+ REF_RELEASE(pbuf);
+ rspamd_http_connection_unref(conn);
+
+ return;
+ }
+ }
+ else {
+ /* In case of SSL we disable this logic as we already came from SSL handler */
+ REF_RELEASE(pbuf);
+ rspamd_http_connection_unref(conn);
+
+ return;
+ }
+ }
+ else if (what == EV_WRITE) {
+ rspamd_http_write_helper(conn);
+ }
+
+ REF_RELEASE(pbuf);
+ rspamd_http_connection_unref(conn);
+}
+
+static void
+rspamd_http_parser_reset(struct rspamd_http_connection *conn)
+{
+ struct rspamd_http_connection_private *priv = conn->priv;
+
+ http_parser_init(&priv->parser,
+ conn->type == RSPAMD_HTTP_SERVER ? HTTP_REQUEST : HTTP_RESPONSE);
+
+ priv->parser_cb.on_url = rspamd_http_on_url;
+ priv->parser_cb.on_status = rspamd_http_on_status;
+ priv->parser_cb.on_header_field = rspamd_http_on_header_field;
+ priv->parser_cb.on_header_value = rspamd_http_on_header_value;
+ priv->parser_cb.on_headers_complete = rspamd_http_on_headers_complete;
+ priv->parser_cb.on_body = rspamd_http_on_body;
+ priv->parser_cb.on_message_complete = rspamd_http_on_message_complete;
+}
+
+static struct rspamd_http_connection *
+rspamd_http_connection_new_common(struct rspamd_http_context *ctx,
+ gint fd,
+ rspamd_http_body_handler_t body_handler,
+ rspamd_http_error_handler_t error_handler,
+ rspamd_http_finish_handler_t finish_handler,
+ unsigned opts,
+ enum rspamd_http_connection_type type,
+ enum rspamd_http_priv_flags priv_flags,
+ struct upstream *proxy_upstream)
+{
+ struct rspamd_http_connection *conn;
+ struct rspamd_http_connection_private *priv;
+
+ g_assert(error_handler != NULL && finish_handler != NULL);
+
+ if (ctx == NULL) {
+ ctx = rspamd_http_context_default();
+ }
+
+ conn = g_malloc0(sizeof(struct rspamd_http_connection));
+ conn->opts = opts;
+ conn->type = type;
+ conn->body_handler = body_handler;
+ conn->error_handler = error_handler;
+ conn->finish_handler = finish_handler;
+ conn->fd = fd;
+ conn->ref = 1;
+ conn->finished = FALSE;
+
+ /* Init priv */
+ priv = g_malloc0(sizeof(struct rspamd_http_connection_private));
+ conn->priv = priv;
+ priv->ctx = ctx;
+ priv->flags = priv_flags;
+
+ if (type == RSPAMD_HTTP_SERVER) {
+ priv->cache = ctx->server_kp_cache;
+ }
+ else {
+ priv->cache = ctx->client_kp_cache;
+ if (ctx->client_kp) {
+ priv->local_key = rspamd_keypair_ref(ctx->client_kp);
+ }
+ }
+
+ rspamd_http_parser_reset(conn);
+ priv->parser.data = conn;
+
+ return conn;
+}
+
+struct rspamd_http_connection *
+rspamd_http_connection_new_server(struct rspamd_http_context *ctx,
+ gint fd,
+ rspamd_http_body_handler_t body_handler,
+ rspamd_http_error_handler_t error_handler,
+ rspamd_http_finish_handler_t finish_handler,
+ unsigned opts)
+{
+ return rspamd_http_connection_new_common(ctx, fd, body_handler,
+ error_handler, finish_handler, opts, RSPAMD_HTTP_SERVER, 0, NULL);
+}
+
+struct rspamd_http_connection *
+rspamd_http_connection_new_client_socket(struct rspamd_http_context *ctx,
+ rspamd_http_body_handler_t body_handler,
+ rspamd_http_error_handler_t error_handler,
+ rspamd_http_finish_handler_t finish_handler,
+ unsigned opts,
+ gint fd)
+{
+ return rspamd_http_connection_new_common(ctx, fd, body_handler,
+ error_handler, finish_handler, opts, RSPAMD_HTTP_CLIENT, 0, NULL);
+}
+
+struct rspamd_http_connection *
+rspamd_http_connection_new_client(struct rspamd_http_context *ctx,
+ rspamd_http_body_handler_t body_handler,
+ rspamd_http_error_handler_t error_handler,
+ rspamd_http_finish_handler_t finish_handler,
+ unsigned opts,
+ rspamd_inet_addr_t *addr)
+{
+ gint fd;
+
+ if (ctx == NULL) {
+ ctx = rspamd_http_context_default();
+ }
+
+ if (ctx->http_proxies) {
+ struct upstream *up = rspamd_upstream_get(ctx->http_proxies,
+ RSPAMD_UPSTREAM_ROUND_ROBIN, NULL, 0);
+
+ if (up) {
+ rspamd_inet_addr_t *proxy_addr = rspamd_upstream_addr_next(up);
+
+ fd = rspamd_inet_address_connect(proxy_addr, SOCK_STREAM, TRUE);
+
+ if (fd == -1) {
+ msg_info("cannot connect to http proxy %s: %s",
+ rspamd_inet_address_to_string_pretty(proxy_addr),
+ strerror(errno));
+ rspamd_upstream_fail(up, TRUE, strerror(errno));
+
+ return NULL;
+ }
+
+ return rspamd_http_connection_new_common(ctx, fd, body_handler,
+ error_handler, finish_handler, opts,
+ RSPAMD_HTTP_CLIENT,
+ RSPAMD_HTTP_CONN_OWN_SOCKET | RSPAMD_HTTP_CONN_FLAG_PROXY,
+ up);
+ }
+ }
+
+ /* Unproxied version */
+ fd = rspamd_inet_address_connect(addr, SOCK_STREAM, TRUE);
+
+ if (fd == -1) {
+ msg_info("cannot connect make http connection to %s: %s",
+ rspamd_inet_address_to_string_pretty(addr),
+ strerror(errno));
+
+ return NULL;
+ }
+
+ return rspamd_http_connection_new_common(ctx, fd, body_handler,
+ error_handler, finish_handler, opts,
+ RSPAMD_HTTP_CLIENT,
+ RSPAMD_HTTP_CONN_OWN_SOCKET,
+ NULL);
+}
+
+struct rspamd_http_connection *
+rspamd_http_connection_new_client_keepalive(struct rspamd_http_context *ctx,
+ rspamd_http_body_handler_t body_handler,
+ rspamd_http_error_handler_t error_handler,
+ rspamd_http_finish_handler_t finish_handler,
+ unsigned opts,
+ rspamd_inet_addr_t *addr,
+ const gchar *host)
+{
+ struct rspamd_http_connection *conn;
+
+ if (ctx == NULL) {
+ ctx = rspamd_http_context_default();
+ }
+
+ conn = rspamd_http_context_check_keepalive(ctx, addr, host,
+ opts & RSPAMD_HTTP_CLIENT_SSL);
+
+ if (conn) {
+ return conn;
+ }
+
+ conn = rspamd_http_connection_new_client(ctx,
+ body_handler, error_handler, finish_handler,
+ opts | RSPAMD_HTTP_CLIENT_SIMPLE | RSPAMD_HTTP_CLIENT_KEEP_ALIVE,
+ addr);
+
+ if (conn) {
+ rspamd_http_context_prepare_keepalive(ctx, conn, addr, host,
+ opts & RSPAMD_HTTP_CLIENT_SSL);
+ }
+
+ return conn;
+}
+
+void rspamd_http_connection_reset(struct rspamd_http_connection *conn)
+{
+ struct rspamd_http_connection_private *priv;
+ struct rspamd_http_message *msg;
+
+ priv = conn->priv;
+ msg = priv->msg;
+
+ /* Clear request */
+ if (msg != NULL) {
+ if (msg->peer_key) {
+ priv->peer_key = msg->peer_key;
+ msg->peer_key = NULL;
+ }
+ rspamd_http_message_unref(msg);
+ priv->msg = NULL;
+ }
+
+ conn->finished = FALSE;
+ /* Clear priv */
+ rspamd_ev_watcher_stop(priv->ctx->event_loop, &priv->ev);
+
+ if (!(priv->flags & RSPAMD_HTTP_CONN_FLAG_RESETED)) {
+ rspamd_http_parser_reset(conn);
+ }
+
+ if (priv->buf != NULL) {
+ REF_RELEASE(priv->buf);
+ priv->buf = NULL;
+ }
+
+ if (priv->out != NULL) {
+ g_free(priv->out);
+ priv->out = NULL;
+ }
+
+ priv->flags |= RSPAMD_HTTP_CONN_FLAG_RESETED;
+}
+
+struct rspamd_http_message *
+rspamd_http_connection_steal_msg(struct rspamd_http_connection *conn)
+{
+ struct rspamd_http_connection_private *priv;
+ struct rspamd_http_message *msg;
+
+ priv = conn->priv;
+ msg = priv->msg;
+
+ /* Clear request */
+ if (msg != NULL) {
+ if (msg->peer_key) {
+ priv->peer_key = msg->peer_key;
+ msg->peer_key = NULL;
+ }
+ priv->msg = NULL;
+ }
+
+ return msg;
+}
+
+struct rspamd_http_message *
+rspamd_http_connection_copy_msg(struct rspamd_http_message *msg, GError **err)
+{
+ struct rspamd_http_message *new_msg;
+ struct rspamd_http_header *hdr, *nhdr, *nhdrs, *hcur;
+ const gchar *old_body;
+ gsize old_len;
+ struct stat st;
+ union _rspamd_storage_u *storage;
+
+ new_msg = rspamd_http_new_message(msg->type);
+ new_msg->flags = msg->flags;
+
+ if (msg->body_buf.len > 0) {
+
+ if (msg->flags & RSPAMD_HTTP_FLAG_SHMEM) {
+ /* Avoid copying by just mapping a shared segment */
+ new_msg->flags |= RSPAMD_HTTP_FLAG_SHMEM_IMMUTABLE;
+
+ storage = &new_msg->body_buf.c;
+ storage->shared.shm_fd = dup(msg->body_buf.c.shared.shm_fd);
+
+ if (storage->shared.shm_fd == -1) {
+ rspamd_http_message_unref(new_msg);
+ g_set_error(err, http_error_quark(), errno,
+ "cannot dup shmem fd: %d: %s",
+ msg->body_buf.c.shared.shm_fd, strerror(errno));
+
+ return NULL;
+ }
+
+ if (fstat(storage->shared.shm_fd, &st) == -1) {
+ g_set_error(err, http_error_quark(), errno,
+ "cannot stat shmem fd: %d: %s",
+ storage->shared.shm_fd, strerror(errno));
+ rspamd_http_message_unref(new_msg);
+
+ return NULL;
+ }
+
+ /* We don't own segment, so do not try to touch it */
+
+ if (msg->body_buf.c.shared.name) {
+ storage->shared.name = msg->body_buf.c.shared.name;
+ REF_RETAIN(storage->shared.name);
+ }
+
+ new_msg->body_buf.str = mmap(NULL, st.st_size,
+ PROT_READ, MAP_SHARED,
+ storage->shared.shm_fd, 0);
+
+ if (new_msg->body_buf.str == MAP_FAILED) {
+ g_set_error(err, http_error_quark(), errno,
+ "cannot mmap shmem fd: %d: %s",
+ storage->shared.shm_fd, strerror(errno));
+ rspamd_http_message_unref(new_msg);
+
+ return NULL;
+ }
+
+ new_msg->body_buf.begin = new_msg->body_buf.str;
+ new_msg->body_buf.len = msg->body_buf.len;
+ new_msg->body_buf.begin = new_msg->body_buf.str +
+ (msg->body_buf.begin - msg->body_buf.str);
+ }
+ else {
+ old_body = rspamd_http_message_get_body(msg, &old_len);
+
+ if (!rspamd_http_message_set_body(new_msg, old_body, old_len)) {
+ g_set_error(err, http_error_quark(), errno,
+ "cannot set body for message, length: %zd",
+ old_len);
+ rspamd_http_message_unref(new_msg);
+
+ return NULL;
+ }
+ }
+ }
+
+ if (msg->url) {
+ if (new_msg->url) {
+ new_msg->url = rspamd_fstring_append(new_msg->url, msg->url->str,
+ msg->url->len);
+ }
+ else {
+ new_msg->url = rspamd_fstring_new_init(msg->url->str,
+ msg->url->len);
+ }
+ }
+
+ if (msg->host) {
+ new_msg->host = g_string_new_len(msg->host->str, msg->host->len);
+ }
+
+ new_msg->method = msg->method;
+ new_msg->port = msg->port;
+ new_msg->date = msg->date;
+ new_msg->last_modified = msg->last_modified;
+
+ kh_foreach_value(msg->headers, hdr, {
+ nhdrs = NULL;
+
+ DL_FOREACH(hdr, hcur)
+ {
+ nhdr = g_malloc(sizeof(struct rspamd_http_header));
+
+ nhdr->combined = rspamd_fstring_new_init(hcur->combined->str,
+ hcur->combined->len);
+ nhdr->name.begin = nhdr->combined->str +
+ (hcur->name.begin - hcur->combined->str);
+ nhdr->name.len = hcur->name.len;
+ nhdr->value.begin = nhdr->combined->str +
+ (hcur->value.begin - hcur->combined->str);
+ nhdr->value.len = hcur->value.len;
+ DL_APPEND(nhdrs, nhdr);
+ }
+
+ gint r;
+ khiter_t k = kh_put(rspamd_http_headers_hash, new_msg->headers,
+ &nhdrs->name, &r);
+
+ if (r != 0) {
+ kh_value(new_msg->headers, k) = nhdrs;
+ }
+ else {
+ DL_CONCAT(kh_value(new_msg->headers, k), nhdrs);
+ }
+ });
+
+ return new_msg;
+}
+
+void rspamd_http_connection_free(struct rspamd_http_connection *conn)
+{
+ struct rspamd_http_connection_private *priv;
+
+ priv = conn->priv;
+
+ if (priv != NULL) {
+ rspamd_http_connection_reset(conn);
+
+ if (priv->ssl) {
+ rspamd_ssl_connection_free(priv->ssl);
+ priv->ssl = NULL;
+ }
+
+ if (priv->local_key) {
+ rspamd_keypair_unref(priv->local_key);
+ }
+ if (priv->peer_key) {
+ rspamd_pubkey_unref(priv->peer_key);
+ }
+
+ if (priv->flags & RSPAMD_HTTP_CONN_OWN_SOCKET) {
+ /* Fd is owned by a connection */
+ close(conn->fd);
+ }
+
+ g_free(priv);
+ }
+
+ g_free(conn);
+}
+
+static void
+rspamd_http_connection_read_message_common(struct rspamd_http_connection *conn,
+ gpointer ud, ev_tstamp timeout,
+ gint flags)
+{
+ struct rspamd_http_connection_private *priv = conn->priv;
+ struct rspamd_http_message *req;
+
+ conn->ud = ud;
+ req = rspamd_http_new_message(
+ conn->type == RSPAMD_HTTP_SERVER ? HTTP_REQUEST : HTTP_RESPONSE);
+ priv->msg = req;
+ req->flags = flags;
+
+ if (flags & RSPAMD_HTTP_FLAG_SHMEM) {
+ req->body_buf.c.shared.shm_fd = -1;
+ }
+
+ if (priv->peer_key) {
+ priv->msg->peer_key = priv->peer_key;
+ priv->peer_key = NULL;
+ priv->flags |= RSPAMD_HTTP_CONN_FLAG_ENCRYPTED;
+ }
+
+ priv->timeout = timeout;
+ priv->header = NULL;
+ priv->buf = g_malloc0(sizeof(*priv->buf));
+ REF_INIT_RETAIN(priv->buf, rspamd_http_privbuf_dtor);
+ priv->buf->data = rspamd_fstring_sized_new(8192);
+ priv->flags |= RSPAMD_HTTP_CONN_FLAG_NEW_HEADER;
+
+ if (!priv->ssl) {
+ rspamd_ev_watcher_init(&priv->ev, conn->fd, EV_READ,
+ rspamd_http_event_handler, conn);
+ rspamd_ev_watcher_start(priv->ctx->event_loop, &priv->ev, priv->timeout);
+ }
+ else {
+ rspamd_ssl_connection_restore_handlers(priv->ssl,
+ rspamd_http_event_handler,
+ rspamd_http_ssl_err_handler,
+ conn,
+ EV_READ);
+ }
+
+ priv->flags &= ~RSPAMD_HTTP_CONN_FLAG_RESETED;
+}
+
+void rspamd_http_connection_read_message(struct rspamd_http_connection *conn,
+ gpointer ud, ev_tstamp timeout)
+{
+ rspamd_http_connection_read_message_common(conn, ud, timeout, 0);
+}
+
+void rspamd_http_connection_read_message_shared(struct rspamd_http_connection *conn,
+ gpointer ud, ev_tstamp timeout)
+{
+ rspamd_http_connection_read_message_common(conn, ud, timeout,
+ RSPAMD_HTTP_FLAG_SHMEM);
+}
+
+static void
+rspamd_http_connection_encrypt_message(
+ struct rspamd_http_connection *conn,
+ struct rspamd_http_message *msg,
+ struct rspamd_http_connection_private *priv,
+ guchar *pbody,
+ guint bodylen,
+ guchar *pmethod,
+ guint methodlen,
+ guint preludelen,
+ gint hdrcount,
+ guchar *np,
+ guchar *mp,
+ struct rspamd_cryptobox_pubkey *peer_key)
+{
+ struct rspamd_cryptobox_segment *segments;
+ guchar *crlfp;
+ const guchar *nm;
+ gint i, cnt;
+ guint outlen;
+ struct rspamd_http_header *hdr, *hcur;
+ enum rspamd_cryptobox_mode mode;
+
+ mode = rspamd_keypair_alg(priv->local_key);
+ crlfp = mp + rspamd_cryptobox_mac_bytes(mode);
+
+ outlen = priv->out[0].iov_len + priv->out[1].iov_len;
+ /*
+ * Create segments from the following:
+ * Method, [URL], CRLF, nheaders, CRLF, body
+ */
+ segments = g_new(struct rspamd_cryptobox_segment, hdrcount + 5);
+
+ segments[0].data = pmethod;
+ segments[0].len = methodlen;
+
+ if (conn->type != RSPAMD_HTTP_SERVER) {
+ segments[1].data = msg->url->str;
+ segments[1].len = msg->url->len;
+ /* space + HTTP version + crlf */
+ segments[2].data = crlfp;
+ segments[2].len = preludelen - 2;
+ crlfp += segments[2].len;
+ i = 3;
+ }
+ else {
+ /* Here we send just CRLF */
+ segments[1].data = crlfp;
+ segments[1].len = 2;
+ crlfp += segments[1].len;
+
+ i = 2;
+ }
+
+
+ kh_foreach_value (msg->headers, hdr, {
+ DL_FOREACH (hdr, hcur) {
+ segments[i].data = hcur->combined->str;
+ segments[i++].len = hcur->combined->len;
+}
+});
+
+/* crlfp should point now at the second crlf */
+segments[i].data = crlfp;
+segments[i++].len = 2;
+
+if (pbody) {
+ segments[i].data = pbody;
+ segments[i++].len = bodylen;
+}
+
+cnt = i;
+
+if ((nm = rspamd_pubkey_get_nm(peer_key, priv->local_key)) == NULL) {
+ nm = rspamd_pubkey_calculate_nm(peer_key, priv->local_key);
+}
+
+rspamd_cryptobox_encryptv_nm_inplace(segments, cnt, np, nm, mp, mode);
+
+/*
+ * iov[0] = base HTTP request
+ * iov[1] = CRLF
+ * iov[2] = nonce
+ * iov[3] = mac
+ * iov[4..i] = encrypted HTTP request/reply
+ */
+priv->out[2].iov_base = np;
+priv->out[2].iov_len = rspamd_cryptobox_nonce_bytes(mode);
+priv->out[3].iov_base = mp;
+priv->out[3].iov_len = rspamd_cryptobox_mac_bytes(mode);
+
+outlen += rspamd_cryptobox_nonce_bytes(mode) +
+ rspamd_cryptobox_mac_bytes(mode);
+
+for (i = 0; i < cnt; i++) {
+ priv->out[i + 4].iov_base = segments[i].data;
+ priv->out[i + 4].iov_len = segments[i].len;
+ outlen += segments[i].len;
+}
+
+priv->wr_total = outlen;
+
+g_free(segments);
+}
+
+static void
+rspamd_http_detach_shared(struct rspamd_http_message *msg)
+{
+ rspamd_fstring_t *cpy_str;
+
+ cpy_str = rspamd_fstring_new_init(msg->body_buf.begin, msg->body_buf.len);
+ rspamd_http_message_set_body_from_fstring_steal(msg, cpy_str);
+}
+
+gint rspamd_http_message_write_header(const gchar *mime_type, gboolean encrypted,
+ gchar *repbuf, gsize replen, gsize bodylen, gsize enclen, const gchar *host,
+ struct rspamd_http_connection *conn, struct rspamd_http_message *msg,
+ rspamd_fstring_t **buf,
+ struct rspamd_http_connection_private *priv,
+ struct rspamd_cryptobox_pubkey *peer_key)
+{
+ gchar datebuf[64];
+ gint meth_len = 0;
+ const gchar *conn_type = "close";
+
+ if (conn->type == RSPAMD_HTTP_SERVER) {
+ /* Format reply */
+ if (msg->method < HTTP_SYMBOLS) {
+ rspamd_ftok_t status;
+
+ rspamd_http_date_format(datebuf, sizeof(datebuf), msg->date);
+
+ if (mime_type == NULL) {
+ mime_type =
+ encrypted ? "application/octet-stream" : "text/plain";
+ }
+
+ if (msg->status == NULL || msg->status->len == 0) {
+ if (msg->code == 200) {
+ RSPAMD_FTOK_ASSIGN(&status, "OK");
+ }
+ else if (msg->code == 404) {
+ RSPAMD_FTOK_ASSIGN(&status, "Not Found");
+ }
+ else if (msg->code == 403) {
+ RSPAMD_FTOK_ASSIGN(&status, "Forbidden");
+ }
+ else if (msg->code >= 500 && msg->code < 600) {
+ RSPAMD_FTOK_ASSIGN(&status, "Internal Server Error");
+ }
+ else {
+ RSPAMD_FTOK_ASSIGN(&status, "Undefined Error");
+ }
+ }
+ else {
+ status.begin = msg->status->str;
+ status.len = msg->status->len;
+ }
+
+ if (encrypted) {
+ /* Internal reply (encrypted) */
+ if (mime_type) {
+ meth_len =
+ rspamd_snprintf(repbuf, replen,
+ "HTTP/1.1 %d %T\r\n"
+ "Connection: close\r\n"
+ "Server: %s\r\n"
+ "Date: %s\r\n"
+ "Content-Length: %z\r\n"
+ "Content-Type: %s", /* NO \r\n at the end ! */
+ msg->code, &status, priv->ctx->config.server_hdr,
+ datebuf,
+ bodylen, mime_type);
+ }
+ else {
+ meth_len =
+ rspamd_snprintf(repbuf, replen,
+ "HTTP/1.1 %d %T\r\n"
+ "Connection: close\r\n"
+ "Server: %s\r\n"
+ "Date: %s\r\n"
+ "Content-Length: %z", /* NO \r\n at the end ! */
+ msg->code, &status, priv->ctx->config.server_hdr,
+ datebuf,
+ bodylen);
+ }
+ enclen += meth_len;
+ /* External reply */
+ rspamd_printf_fstring(buf,
+ "HTTP/1.1 200 OK\r\n"
+ "Connection: close\r\n"
+ "Server: %s\r\n"
+ "Date: %s\r\n"
+ "Content-Length: %z\r\n"
+ "Content-Type: application/octet-stream\r\n",
+ priv->ctx->config.server_hdr,
+ datebuf, enclen);
+ }
+ else {
+ if (mime_type) {
+ meth_len =
+ rspamd_printf_fstring(buf,
+ "HTTP/1.1 %d %T\r\n"
+ "Connection: close\r\n"
+ "Server: %s\r\n"
+ "Date: %s\r\n"
+ "Content-Length: %z\r\n"
+ "Content-Type: %s\r\n",
+ msg->code, &status, priv->ctx->config.server_hdr,
+ datebuf,
+ bodylen, mime_type);
+ }
+ else {
+ meth_len =
+ rspamd_printf_fstring(buf,
+ "HTTP/1.1 %d %T\r\n"
+ "Connection: close\r\n"
+ "Server: %s\r\n"
+ "Date: %s\r\n"
+ "Content-Length: %z\r\n",
+ msg->code, &status, priv->ctx->config.server_hdr,
+ datebuf,
+ bodylen);
+ }
+ }
+ }
+ else {
+ /* Legacy spamd reply */
+ if (msg->flags & RSPAMD_HTTP_FLAG_SPAMC) {
+ gsize real_bodylen;
+ goffset eoh_pos;
+ GString tmp;
+
+ /* Unfortunately, spamc protocol is deadly brain damaged */
+ tmp.str = (gchar *) msg->body_buf.begin;
+ tmp.len = msg->body_buf.len;
+
+ if (rspamd_string_find_eoh(&tmp, &eoh_pos) != -1 &&
+ bodylen > eoh_pos) {
+ real_bodylen = bodylen - eoh_pos;
+ }
+ else {
+ real_bodylen = bodylen;
+ }
+
+ rspamd_printf_fstring(buf, "SPAMD/1.1 0 EX_OK\r\n"
+ "Content-length: %z\r\n",
+ real_bodylen);
+ }
+ else {
+ rspamd_printf_fstring(buf, "RSPAMD/1.3 0 EX_OK\r\n");
+ }
+ }
+ }
+ else {
+
+ /* Client request */
+ if (conn->opts & RSPAMD_HTTP_CLIENT_KEEP_ALIVE) {
+ conn_type = "keep-alive";
+ }
+
+ /* Format request */
+ enclen += RSPAMD_FSTRING_LEN(msg->url) +
+ strlen(http_method_str(msg->method)) + 1;
+
+ if (host == NULL && msg->host == NULL) {
+ /* Fallback to HTTP/1.0 */
+ if (encrypted) {
+ rspamd_printf_fstring(buf,
+ "%s %s HTTP/1.0\r\n"
+ "Content-Length: %z\r\n"
+ "Content-Type: application/octet-stream\r\n"
+ "Connection: %s\r\n",
+ "POST",
+ "/post",
+ enclen,
+ conn_type);
+ }
+ else {
+ rspamd_printf_fstring(buf,
+ "%s %V HTTP/1.0\r\n"
+ "Content-Length: %z\r\n"
+ "Connection: %s\r\n",
+ http_method_str(msg->method),
+ msg->url,
+ bodylen,
+ conn_type);
+
+ if (bodylen > 0) {
+ if (mime_type == NULL) {
+ mime_type = "text/plain";
+ }
+
+ rspamd_printf_fstring(buf,
+ "Content-Type: %s\r\n",
+ mime_type);
+ }
+ }
+ }
+ else {
+ /* Normal HTTP/1.1 with Host */
+ if (host == NULL) {
+ host = msg->host->str;
+ }
+
+ if (encrypted) {
+ /* TODO: Add proxy support to HTTPCrypt */
+ if (rspamd_http_message_is_standard_port(msg)) {
+ rspamd_printf_fstring(buf,
+ "%s %s HTTP/1.1\r\n"
+ "Connection: %s\r\n"
+ "Host: %s\r\n"
+ "Content-Length: %z\r\n"
+ "Content-Type: application/octet-stream\r\n",
+ "POST",
+ "/post",
+ conn_type,
+ host,
+ enclen);
+ }
+ else {
+ rspamd_printf_fstring(buf,
+ "%s %s HTTP/1.1\r\n"
+ "Connection: %s\r\n"
+ "Host: %s:%d\r\n"
+ "Content-Length: %z\r\n"
+ "Content-Type: application/octet-stream\r\n",
+ "POST",
+ "/post",
+ conn_type,
+ host,
+ msg->port,
+ enclen);
+ }
+ }
+ else {
+ if (conn->priv->flags & RSPAMD_HTTP_CONN_FLAG_PROXY) {
+ /* Write proxied request */
+ if ((msg->flags & RSPAMD_HTTP_FLAG_HAS_HOST_HEADER)) {
+ rspamd_printf_fstring(buf,
+ "%s %s://%s:%d/%V HTTP/1.1\r\n"
+ "Connection: %s\r\n"
+ "Content-Length: %z\r\n",
+ http_method_str(msg->method),
+ (conn->opts & RSPAMD_HTTP_CLIENT_SSL) ? "https" : "http",
+ host,
+ msg->port,
+ msg->url,
+ conn_type,
+ bodylen);
+ }
+ else {
+ if (rspamd_http_message_is_standard_port(msg)) {
+ rspamd_printf_fstring(buf,
+ "%s %s://%s:%d/%V HTTP/1.1\r\n"
+ "Connection: %s\r\n"
+ "Host: %s\r\n"
+ "Content-Length: %z\r\n",
+ http_method_str(msg->method),
+ (conn->opts & RSPAMD_HTTP_CLIENT_SSL) ? "https" : "http",
+ host,
+ msg->port,
+ msg->url,
+ conn_type,
+ host,
+ bodylen);
+ }
+ else {
+ rspamd_printf_fstring(buf,
+ "%s %s://%s:%d/%V HTTP/1.1\r\n"
+ "Connection: %s\r\n"
+ "Host: %s:%d\r\n"
+ "Content-Length: %z\r\n",
+ http_method_str(msg->method),
+ (conn->opts & RSPAMD_HTTP_CLIENT_SSL) ? "https" : "http",
+ host,
+ msg->port,
+ msg->url,
+ conn_type,
+ host,
+ msg->port,
+ bodylen);
+ }
+ }
+ }
+ else {
+ /* Unproxied version */
+ if ((msg->flags & RSPAMD_HTTP_FLAG_HAS_HOST_HEADER)) {
+ rspamd_printf_fstring(buf,
+ "%s %V HTTP/1.1\r\n"
+ "Connection: %s\r\n"
+ "Content-Length: %z\r\n",
+ http_method_str(msg->method),
+ msg->url,
+ conn_type,
+ bodylen);
+ }
+ else {
+ if (rspamd_http_message_is_standard_port(msg)) {
+ rspamd_printf_fstring(buf,
+ "%s %V HTTP/1.1\r\n"
+ "Connection: %s\r\n"
+ "Host: %s\r\n"
+ "Content-Length: %z\r\n",
+ http_method_str(msg->method),
+ msg->url,
+ conn_type,
+ host,
+ bodylen);
+ }
+ else {
+ rspamd_printf_fstring(buf,
+ "%s %V HTTP/1.1\r\n"
+ "Connection: %s\r\n"
+ "Host: %s:%d\r\n"
+ "Content-Length: %z\r\n",
+ http_method_str(msg->method),
+ msg->url,
+ conn_type,
+ host,
+ msg->port,
+ bodylen);
+ }
+ }
+ }
+
+ if (bodylen > 0) {
+ if (mime_type != NULL) {
+ rspamd_printf_fstring(buf,
+ "Content-Type: %s\r\n",
+ mime_type);
+ }
+ }
+ }
+ }
+
+ if (encrypted) {
+ GString *b32_key, *b32_id;
+
+ b32_key = rspamd_keypair_print(priv->local_key,
+ RSPAMD_KEYPAIR_PUBKEY | RSPAMD_KEYPAIR_BASE32);
+ b32_id = rspamd_pubkey_print(peer_key,
+ RSPAMD_KEYPAIR_ID_SHORT | RSPAMD_KEYPAIR_BASE32);
+ /* XXX: add some fuzz here */
+ rspamd_printf_fstring(&*buf, "Key: %v=%v\r\n", b32_id, b32_key);
+ g_string_free(b32_key, TRUE);
+ g_string_free(b32_id, TRUE);
+ }
+ }
+
+ return meth_len;
+}
+
+static gboolean
+rspamd_http_connection_write_message_common(struct rspamd_http_connection *conn,
+ struct rspamd_http_message *msg,
+ const gchar *host,
+ const gchar *mime_type,
+ gpointer ud,
+ ev_tstamp timeout,
+ gboolean allow_shared)
+{
+ struct rspamd_http_connection_private *priv = conn->priv;
+ struct rspamd_http_header *hdr, *hcur;
+ gchar repbuf[512], *pbody;
+ gint i, hdrcount, meth_len = 0, preludelen = 0;
+ gsize bodylen, enclen = 0;
+ rspamd_fstring_t *buf;
+ gboolean encrypted = FALSE;
+ guchar nonce[rspamd_cryptobox_MAX_NONCEBYTES], mac[rspamd_cryptobox_MAX_MACBYTES];
+ guchar *np = NULL, *mp = NULL, *meth_pos = NULL;
+ struct rspamd_cryptobox_pubkey *peer_key = NULL;
+ enum rspamd_cryptobox_mode mode;
+ GError *err;
+
+ conn->ud = ud;
+ priv->msg = msg;
+ priv->timeout = timeout;
+
+ priv->header = NULL;
+ priv->buf = g_malloc0(sizeof(*priv->buf));
+ REF_INIT_RETAIN(priv->buf, rspamd_http_privbuf_dtor);
+ priv->buf->data = rspamd_fstring_sized_new(512);
+ buf = priv->buf->data;
+
+ if ((msg->flags & RSPAMD_HTTP_FLAG_WANT_SSL) && !(conn->opts & RSPAMD_HTTP_CLIENT_SSL)) {
+ err = g_error_new(HTTP_ERROR, 400,
+ "SSL connection requested but not created properly, internal error");
+ rspamd_http_connection_ref(conn);
+ conn->error_handler(conn, err);
+ rspamd_http_connection_unref(conn);
+ g_error_free(err);
+ return FALSE;
+ }
+
+ if (priv->peer_key && priv->local_key) {
+ priv->msg->peer_key = priv->peer_key;
+ priv->peer_key = NULL;
+ priv->flags |= RSPAMD_HTTP_CONN_FLAG_ENCRYPTED;
+ }
+
+ if (msg->peer_key != NULL) {
+ if (priv->local_key == NULL) {
+ /* Automatically generate a temporary keypair */
+ priv->local_key = rspamd_keypair_new(RSPAMD_KEYPAIR_KEX,
+ RSPAMD_CRYPTOBOX_MODE_25519);
+ }
+
+ encrypted = TRUE;
+
+ if (priv->cache) {
+ rspamd_keypair_cache_process(priv->cache,
+ priv->local_key, priv->msg->peer_key);
+ }
+ }
+
+ if (encrypted && (msg->flags &
+ (RSPAMD_HTTP_FLAG_SHMEM_IMMUTABLE | RSPAMD_HTTP_FLAG_SHMEM))) {
+ /* We cannot use immutable body to encrypt message in place */
+ allow_shared = FALSE;
+ rspamd_http_detach_shared(msg);
+ }
+
+ if (allow_shared) {
+ gchar tmpbuf[64];
+
+ if (!(msg->flags & RSPAMD_HTTP_FLAG_SHMEM) ||
+ msg->body_buf.c.shared.name == NULL) {
+ allow_shared = FALSE;
+ }
+ else {
+ /* Insert new headers */
+ rspamd_http_message_add_header(msg, "Shm",
+ msg->body_buf.c.shared.name->shm_name);
+ rspamd_snprintf(tmpbuf, sizeof(tmpbuf), "%d",
+ (int) (msg->body_buf.begin - msg->body_buf.str));
+ rspamd_http_message_add_header(msg, "Shm-Offset",
+ tmpbuf);
+ rspamd_snprintf(tmpbuf, sizeof(tmpbuf), "%z",
+ msg->body_buf.len);
+ rspamd_http_message_add_header(msg, "Shm-Length",
+ tmpbuf);
+ }
+ }
+
+ if (priv->ctx->config.user_agent && conn->type == RSPAMD_HTTP_CLIENT) {
+ rspamd_ftok_t srch;
+ khiter_t k;
+ gint r;
+
+ RSPAMD_FTOK_ASSIGN(&srch, "User-Agent");
+
+ k = kh_put(rspamd_http_headers_hash, msg->headers, &srch, &r);
+
+ if (r != 0) {
+ hdr = g_malloc0(sizeof(struct rspamd_http_header));
+ guint vlen = strlen(priv->ctx->config.user_agent);
+ hdr->combined = rspamd_fstring_sized_new(srch.len + vlen + 4);
+ rspamd_printf_fstring(&hdr->combined, "%T: %*s\r\n", &srch, vlen,
+ priv->ctx->config.user_agent);
+ hdr->name.begin = hdr->combined->str;
+ hdr->name.len = srch.len;
+ hdr->value.begin = hdr->combined->str + srch.len + 2;
+ hdr->value.len = vlen;
+ hdr->prev = hdr; /* for utlists */
+
+ kh_value(msg->headers, k) = hdr;
+ /* as we searched using static buffer */
+ kh_key(msg->headers, k) = &hdr->name;
+ }
+ }
+
+ if (encrypted) {
+ mode = rspamd_keypair_alg(priv->local_key);
+
+ if (msg->body_buf.len == 0) {
+ pbody = NULL;
+ bodylen = 0;
+ msg->method = HTTP_GET;
+ }
+ else {
+ pbody = (gchar *) msg->body_buf.begin;
+ bodylen = msg->body_buf.len;
+ msg->method = HTTP_POST;
+ }
+
+ if (conn->type == RSPAMD_HTTP_SERVER) {
+ /*
+ * iov[0] = base reply
+ * iov[1] = CRLF
+ * iov[2] = nonce
+ * iov[3] = mac
+ * iov[4] = encrypted reply
+ * iov[6] = encrypted crlf
+ * iov[7..n] = encrypted headers
+ * iov[n + 1] = encrypted crlf
+ * [iov[n + 2] = encrypted body]
+ */
+ priv->outlen = 7;
+ enclen = rspamd_cryptobox_nonce_bytes(mode) +
+ rspamd_cryptobox_mac_bytes(mode) +
+ 4 + /* 2 * CRLF */
+ bodylen;
+ }
+ else {
+ /*
+ * iov[0] = base request
+ * iov[1] = CRLF
+ * iov[2] = nonce
+ * iov[3] = mac
+ * iov[4] = encrypted method + space
+ * iov[5] = encrypted url
+ * iov[7] = encrypted prelude
+ * iov[8..n] = encrypted headers
+ * iov[n + 1] = encrypted crlf
+ * [iov[n + 2] = encrypted body]
+ */
+ priv->outlen = 8;
+
+ if (bodylen > 0) {
+ if (mime_type != NULL) {
+ preludelen = rspamd_snprintf(repbuf, sizeof(repbuf), "%s\r\n"
+ "Content-Length: %z\r\n"
+ "Content-Type: %s\r\n"
+ "\r\n",
+ ENCRYPTED_VERSION, bodylen,
+ mime_type);
+ }
+ else {
+ preludelen = rspamd_snprintf(repbuf, sizeof(repbuf), "%s\r\n"
+ "Content-Length: %z\r\n"
+ ""
+ "\r\n",
+ ENCRYPTED_VERSION, bodylen);
+ }
+ }
+ else {
+ preludelen = rspamd_snprintf(repbuf, sizeof(repbuf),
+ "%s\r\n\r\n",
+ ENCRYPTED_VERSION);
+ }
+
+ enclen = rspamd_cryptobox_nonce_bytes(mode) +
+ rspamd_cryptobox_mac_bytes(mode) +
+ preludelen + /* version [content-length] + 2 * CRLF */
+ bodylen;
+ }
+
+ if (bodylen > 0) {
+ priv->outlen++;
+ }
+ }
+ else {
+ if (msg->method < HTTP_SYMBOLS) {
+ if (msg->body_buf.len == 0 || allow_shared) {
+ pbody = NULL;
+ bodylen = 0;
+ priv->outlen = 2;
+
+ if (msg->method == HTTP_INVALID) {
+ msg->method = HTTP_GET;
+ }
+ }
+ else {
+ pbody = (gchar *) msg->body_buf.begin;
+ bodylen = msg->body_buf.len;
+ priv->outlen = 3;
+
+ if (msg->method == HTTP_INVALID) {
+ msg->method = HTTP_POST;
+ }
+ }
+ }
+ else if (msg->body_buf.len > 0) {
+ allow_shared = FALSE;
+ pbody = (gchar *) msg->body_buf.begin;
+ bodylen = msg->body_buf.len;
+ priv->outlen = 2;
+ }
+ else {
+ /* Invalid body for spamc method */
+ abort();
+ }
+ }
+
+ peer_key = msg->peer_key;
+
+ priv->wr_total = bodylen + 2;
+
+ hdrcount = 0;
+
+ if (msg->method < HTTP_SYMBOLS) {
+ kh_foreach_value (msg->headers, hdr, {
+ DL_FOREACH (hdr, hcur) {
+ /* <name: value\r\n> */
+ priv->wr_total += hcur->combined->len;
+ enclen += hcur->combined->len;
+ priv->outlen ++;
+ hdrcount ++;
+ }
+});
+}
+
+/* Allocate iov */
+priv->out = g_malloc0(sizeof(struct iovec) * priv->outlen);
+priv->wr_pos = 0;
+
+meth_len = rspamd_http_message_write_header(mime_type, encrypted,
+ repbuf, sizeof(repbuf), bodylen, enclen,
+ host, conn, msg,
+ &buf, priv, peer_key);
+priv->wr_total += buf->len;
+
+/* Setup external request body */
+priv->out[0].iov_base = buf->str;
+priv->out[0].iov_len = buf->len;
+
+/* Buf will be used eventually for encryption */
+if (encrypted) {
+ gint meth_offset, nonce_offset, mac_offset;
+ mode = rspamd_keypair_alg(priv->local_key);
+
+ ottery_rand_bytes(nonce, rspamd_cryptobox_nonce_bytes(mode));
+ memset(mac, 0, rspamd_cryptobox_mac_bytes(mode));
+ meth_offset = buf->len;
+
+ if (conn->type == RSPAMD_HTTP_SERVER) {
+ buf = rspamd_fstring_append(buf, repbuf, meth_len);
+ }
+ else {
+ meth_len = strlen(http_method_str(msg->method)) + 1; /* + space */
+ buf = rspamd_fstring_append(buf, http_method_str(msg->method),
+ meth_len - 1);
+ buf = rspamd_fstring_append(buf, " ", 1);
+ }
+
+ nonce_offset = buf->len;
+ buf = rspamd_fstring_append(buf, nonce,
+ rspamd_cryptobox_nonce_bytes(mode));
+ mac_offset = buf->len;
+ buf = rspamd_fstring_append(buf, mac,
+ rspamd_cryptobox_mac_bytes(mode));
+
+ /* Need to be encrypted */
+ if (conn->type == RSPAMD_HTTP_SERVER) {
+ buf = rspamd_fstring_append(buf, "\r\n\r\n", 4);
+ }
+ else {
+ buf = rspamd_fstring_append(buf, repbuf, preludelen);
+ }
+
+ meth_pos = buf->str + meth_offset;
+ np = buf->str + nonce_offset;
+ mp = buf->str + mac_offset;
+}
+
+/* During previous writes, buf might be reallocated and changed */
+priv->buf->data = buf;
+
+if (encrypted) {
+ /* Finish external HTTP request */
+ priv->out[1].iov_base = "\r\n";
+ priv->out[1].iov_len = 2;
+ /* Encrypt the real request */
+ rspamd_http_connection_encrypt_message(conn, msg, priv, pbody, bodylen,
+ meth_pos, meth_len, preludelen, hdrcount, np, mp, peer_key);
+}
+else {
+ i = 1;
+ if (msg->method < HTTP_SYMBOLS) {
+ kh_foreach_value (msg->headers, hdr, {
+ DL_FOREACH (hdr, hcur) {
+ priv->out[i].iov_base = hcur->combined->str;
+ priv->out[i++].iov_len = hcur->combined->len;
+ }
+});
+
+priv->out[i].iov_base = "\r\n";
+priv->out[i++].iov_len = 2;
+}
+else
+{
+ /* No CRLF for compatibility reply */
+ priv->wr_total -= 2;
+}
+
+if (pbody != NULL) {
+ priv->out[i].iov_base = pbody;
+ priv->out[i++].iov_len = bodylen;
+}
+}
+
+priv->flags &= ~RSPAMD_HTTP_CONN_FLAG_RESETED;
+
+if ((priv->flags & RSPAMD_HTTP_CONN_FLAG_PROXY) && (conn->opts & RSPAMD_HTTP_CLIENT_SSL)) {
+ /* We need to disable SSL flag! */
+ err = g_error_new(HTTP_ERROR, 400, "cannot use proxy for SSL connections");
+ rspamd_http_connection_ref(conn);
+ conn->error_handler(conn, err);
+ rspamd_http_connection_unref(conn);
+ g_error_free(err);
+ return FALSE;
+}
+
+rspamd_ev_watcher_stop(priv->ctx->event_loop, &priv->ev);
+
+if (conn->opts & RSPAMD_HTTP_CLIENT_SSL) {
+ gpointer ssl_ctx = (msg->flags & RSPAMD_HTTP_FLAG_SSL_NOVERIFY) ? priv->ctx->ssl_ctx_noverify : priv->ctx->ssl_ctx;
+
+ if (!ssl_ctx) {
+ err = g_error_new(HTTP_ERROR, 400, "ssl message requested "
+ "with no ssl ctx");
+ rspamd_http_connection_ref(conn);
+ conn->error_handler(conn, err);
+ rspamd_http_connection_unref(conn);
+ g_error_free(err);
+ return FALSE;
+ }
+ else {
+ if (!priv->ssl) {
+ priv->ssl = rspamd_ssl_connection_new(ssl_ctx, priv->ctx->event_loop,
+ !(msg->flags & RSPAMD_HTTP_FLAG_SSL_NOVERIFY),
+ conn->log_tag);
+ g_assert(priv->ssl != NULL);
+
+ if (!rspamd_ssl_connect_fd(priv->ssl, conn->fd, host, &priv->ev,
+ priv->timeout, rspamd_http_event_handler,
+ rspamd_http_ssl_err_handler, conn)) {
+
+ err = g_error_new(HTTP_ERROR, 400,
+ "ssl connection error: ssl error=%s, errno=%s",
+ ERR_error_string(ERR_get_error(), NULL),
+ strerror(errno));
+ rspamd_http_connection_ref(conn);
+ conn->error_handler(conn, err);
+ rspamd_http_connection_unref(conn);
+ g_error_free(err);
+ return FALSE;
+ }
+ }
+ else {
+ /* Just restore SSL handlers */
+ rspamd_ssl_connection_restore_handlers(priv->ssl,
+ rspamd_http_event_handler,
+ rspamd_http_ssl_err_handler,
+ conn,
+ EV_WRITE);
+ }
+ }
+}
+else {
+ rspamd_ev_watcher_init(&priv->ev, conn->fd, EV_WRITE,
+ rspamd_http_event_handler, conn);
+ rspamd_ev_watcher_start(priv->ctx->event_loop, &priv->ev, priv->timeout);
+}
+
+return TRUE;
+}
+
+gboolean
+rspamd_http_connection_write_message(struct rspamd_http_connection *conn,
+ struct rspamd_http_message *msg,
+ const gchar *host,
+ const gchar *mime_type,
+ gpointer ud,
+ ev_tstamp timeout)
+{
+ return rspamd_http_connection_write_message_common(conn, msg, host, mime_type,
+ ud, timeout, FALSE);
+}
+
+gboolean
+rspamd_http_connection_write_message_shared(struct rspamd_http_connection *conn,
+ struct rspamd_http_message *msg,
+ const gchar *host,
+ const gchar *mime_type,
+ gpointer ud,
+ ev_tstamp timeout)
+{
+ return rspamd_http_connection_write_message_common(conn, msg, host, mime_type,
+ ud, timeout, TRUE);
+}
+
+
+void rspamd_http_connection_set_max_size(struct rspamd_http_connection *conn,
+ gsize sz)
+{
+ conn->max_size = sz;
+}
+
+void rspamd_http_connection_set_key(struct rspamd_http_connection *conn,
+ struct rspamd_cryptobox_keypair *key)
+{
+ struct rspamd_http_connection_private *priv = conn->priv;
+
+ g_assert(key != NULL);
+ priv->local_key = rspamd_keypair_ref(key);
+}
+
+void rspamd_http_connection_own_socket(struct rspamd_http_connection *conn)
+{
+ struct rspamd_http_connection_private *priv = conn->priv;
+
+ priv->flags |= RSPAMD_HTTP_CONN_OWN_SOCKET;
+}
+
+const struct rspamd_cryptobox_pubkey *
+rspamd_http_connection_get_peer_key(struct rspamd_http_connection *conn)
+{
+ struct rspamd_http_connection_private *priv = conn->priv;
+
+ if (priv->peer_key) {
+ return priv->peer_key;
+ }
+ else if (priv->msg) {
+ return priv->msg->peer_key;
+ }
+
+ return NULL;
+}
+
+gboolean
+rspamd_http_connection_is_encrypted(struct rspamd_http_connection *conn)
+{
+ struct rspamd_http_connection_private *priv = conn->priv;
+
+ if (priv->peer_key != NULL) {
+ return TRUE;
+ }
+ else if (priv->msg) {
+ return priv->msg->peer_key != NULL;
+ }
+
+ return FALSE;
+}
+
+GHashTable *
+rspamd_http_message_parse_query(struct rspamd_http_message *msg)
+{
+ GHashTable *res;
+ rspamd_fstring_t *key = NULL, *value = NULL;
+ rspamd_ftok_t *key_tok = NULL, *value_tok = NULL;
+ const gchar *p, *c, *end;
+ struct http_parser_url u;
+ enum {
+ parse_key,
+ parse_eqsign,
+ parse_value,
+ parse_ampersand
+ } state = parse_key;
+
+ res = g_hash_table_new_full(rspamd_ftok_icase_hash,
+ rspamd_ftok_icase_equal,
+ rspamd_fstring_mapped_ftok_free,
+ rspamd_fstring_mapped_ftok_free);
+
+ if (msg->url && msg->url->len > 0) {
+ http_parser_parse_url(msg->url->str, msg->url->len, TRUE, &u);
+
+ if (u.field_set & (1 << UF_QUERY)) {
+ p = msg->url->str + u.field_data[UF_QUERY].off;
+ c = p;
+ end = p + u.field_data[UF_QUERY].len;
+
+ while (p <= end) {
+ switch (state) {
+ case parse_key:
+ if ((p == end || *p == '&') && p > c) {
+ /* We have a single parameter without a value */
+ key = rspamd_fstring_new_init(c, p - c);
+ key_tok = rspamd_ftok_map(key);
+ key_tok->len = rspamd_url_decode(key->str, key->str,
+ key->len);
+
+ value = rspamd_fstring_new_init("", 0);
+ value_tok = rspamd_ftok_map(value);
+
+ g_hash_table_replace(res, key_tok, value_tok);
+ state = parse_ampersand;
+ }
+ else if (*p == '=' && p > c) {
+ /* We have something like key=value */
+ key = rspamd_fstring_new_init(c, p - c);
+ key_tok = rspamd_ftok_map(key);
+ key_tok->len = rspamd_url_decode(key->str, key->str,
+ key->len);
+
+ state = parse_eqsign;
+ }
+ else {
+ p++;
+ }
+ break;
+
+ case parse_eqsign:
+ if (*p != '=') {
+ c = p;
+ state = parse_value;
+ }
+ else {
+ p++;
+ }
+ break;
+
+ case parse_value:
+ if ((p == end || *p == '&') && p >= c) {
+ g_assert(key != NULL);
+ if (p > c) {
+ value = rspamd_fstring_new_init(c, p - c);
+ value_tok = rspamd_ftok_map(value);
+ value_tok->len = rspamd_url_decode(value->str,
+ value->str,
+ value->len);
+ /* Detect quotes for value */
+ if (value_tok->begin[0] == '"') {
+ memmove(value->str, value->str + 1,
+ value_tok->len - 1);
+ value_tok->len--;
+ }
+ if (value_tok->begin[value_tok->len - 1] == '"') {
+ value_tok->len--;
+ }
+ }
+ else {
+ value = rspamd_fstring_new_init("", 0);
+ value_tok = rspamd_ftok_map(value);
+ }
+
+ g_hash_table_replace(res, key_tok, value_tok);
+ key = value = NULL;
+ key_tok = value_tok = NULL;
+ state = parse_ampersand;
+ }
+ else {
+ p++;
+ }
+ break;
+
+ case parse_ampersand:
+ if (p != end && *p != '&') {
+ c = p;
+ state = parse_key;
+ }
+ else {
+ p++;
+ }
+ break;
+ }
+ }
+ }
+
+ if (state != parse_ampersand && key != NULL) {
+ rspamd_fstring_free(key);
+ }
+ }
+
+ return res;
+}
+
+
+struct rspamd_http_message *
+rspamd_http_message_ref(struct rspamd_http_message *msg)
+{
+ REF_RETAIN(msg);
+
+ return msg;
+}
+
+void rspamd_http_message_unref(struct rspamd_http_message *msg)
+{
+ REF_RELEASE(msg);
+}
+
+void rspamd_http_connection_disable_encryption(struct rspamd_http_connection *conn)
+{
+ struct rspamd_http_connection_private *priv;
+
+ priv = conn->priv;
+
+ if (priv) {
+ if (priv->local_key) {
+ rspamd_keypair_unref(priv->local_key);
+ }
+ if (priv->peer_key) {
+ rspamd_pubkey_unref(priv->peer_key);
+ }
+
+ priv->local_key = NULL;
+ priv->peer_key = NULL;
+ priv->flags &= ~RSPAMD_HTTP_CONN_FLAG_ENCRYPTED;
+ }
+} \ No newline at end of file
diff --git a/src/libserver/http/http_connection.h b/src/libserver/http/http_connection.h
new file mode 100644
index 0000000..e98d164
--- /dev/null
+++ b/src/libserver/http/http_connection.h
@@ -0,0 +1,320 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef HTTP_H_
+#define HTTP_H_
+
+/**
+ * @file http.h
+ *
+ * This is an interface for HTTP client and conn.
+ * This code uses HTTP parser written by Joyent Inc based on nginx code.
+ */
+
+#include "config.h"
+#include "http_context.h"
+#include "fstring.h"
+#include "ref.h"
+#include "http_message.h"
+#include "http_util.h"
+#include "addr.h"
+
+#include "contrib/libev/ev.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum rspamd_http_connection_type {
+ RSPAMD_HTTP_SERVER,
+ RSPAMD_HTTP_CLIENT
+};
+
+struct rspamd_http_header;
+struct rspamd_http_message;
+struct rspamd_http_connection_private;
+struct rspamd_http_connection;
+struct rspamd_http_connection_router;
+struct rspamd_http_connection_entry;
+struct rspamd_keepalive_hash_key;
+
+struct rspamd_storage_shmem {
+ gchar *shm_name;
+ ref_entry_t ref;
+};
+
+/**
+ * Legacy spamc protocol
+ */
+#define RSPAMD_HTTP_FLAG_SPAMC (1 << 0)
+/**
+ * Store body of the message in a shared memory segment
+ */
+#define RSPAMD_HTTP_FLAG_SHMEM (1 << 2)
+/**
+ * Store body of the message in an immutable shared memory segment
+ */
+#define RSPAMD_HTTP_FLAG_SHMEM_IMMUTABLE (1 << 3)
+/**
+ * Body has been set for a message
+ */
+#define RSPAMD_HTTP_FLAG_HAS_BODY (1 << 5)
+/**
+ * Do not verify server's certificate
+ */
+#define RSPAMD_HTTP_FLAG_SSL_NOVERIFY (1 << 6)
+/**
+ * Body has been set for a message
+ */
+#define RSPAMD_HTTP_FLAG_HAS_HOST_HEADER (1 << 7)
+/**
+ * Message is intended for SSL connection
+ */
+#define RSPAMD_HTTP_FLAG_WANT_SSL (1 << 8)
+/**
+ * Options for HTTP connection
+ */
+enum rspamd_http_options {
+ RSPAMD_HTTP_BODY_PARTIAL = 1, /**< Call body handler on all body data portions */
+ RSPAMD_HTTP_CLIENT_SIMPLE = 1u << 1, /**< Read HTTP client reply automatically */
+ RSPAMD_HTTP_CLIENT_ENCRYPTED = 1u << 2, /**< Encrypt data for client */
+ RSPAMD_HTTP_CLIENT_SHARED = 1u << 3, /**< Store reply in shared memory */
+ RSPAMD_HTTP_REQUIRE_ENCRYPTION = 1u << 4,
+ RSPAMD_HTTP_CLIENT_KEEP_ALIVE = 1u << 5,
+ RSPAMD_HTTP_CLIENT_SSL = 1u << 6u,
+};
+
+typedef int (*rspamd_http_body_handler_t)(struct rspamd_http_connection *conn,
+ struct rspamd_http_message *msg,
+ const gchar *chunk,
+ gsize len);
+
+typedef void (*rspamd_http_error_handler_t)(struct rspamd_http_connection *conn,
+ GError *err);
+
+typedef int (*rspamd_http_finish_handler_t)(struct rspamd_http_connection *conn,
+ struct rspamd_http_message *msg);
+
+/**
+ * HTTP connection structure
+ */
+struct rspamd_http_connection {
+ struct rspamd_http_connection_private *priv;
+ rspamd_http_body_handler_t body_handler;
+ rspamd_http_error_handler_t error_handler;
+ rspamd_http_finish_handler_t finish_handler;
+ gpointer ud;
+ const gchar *log_tag;
+ /* Used for keepalive */
+ struct rspamd_keepalive_hash_key *keepalive_hash_key;
+ gsize max_size;
+ unsigned opts;
+ enum rspamd_http_connection_type type;
+ gboolean finished;
+ gint fd;
+ gint ref;
+};
+
+/**
+ * Creates a new HTTP server connection from an opened FD returned by accept function
+ * @param ctx
+ * @param fd
+ * @param body_handler
+ * @param error_handler
+ * @param finish_handler
+ * @param opts
+ * @return
+ */
+struct rspamd_http_connection *rspamd_http_connection_new_server(
+ struct rspamd_http_context *ctx,
+ gint fd,
+ rspamd_http_body_handler_t body_handler,
+ rspamd_http_error_handler_t error_handler,
+ rspamd_http_finish_handler_t finish_handler,
+ unsigned opts);
+
+/**
+ * Creates or reuses a new keepalive client connection identified by hostname and inet_addr
+ * @param ctx
+ * @param body_handler
+ * @param error_handler
+ * @param finish_handler
+ * @param addr
+ * @param host
+ * @return
+ */
+struct rspamd_http_connection *rspamd_http_connection_new_client_keepalive(
+ struct rspamd_http_context *ctx,
+ rspamd_http_body_handler_t body_handler,
+ rspamd_http_error_handler_t error_handler,
+ rspamd_http_finish_handler_t finish_handler,
+ unsigned opts,
+ rspamd_inet_addr_t *addr,
+ const gchar *host);
+
+/**
+ * Creates an ordinary connection using the address specified (if proxy is not set)
+ * @param ctx
+ * @param body_handler
+ * @param error_handler
+ * @param finish_handler
+ * @param opts
+ * @param addr
+ * @return
+ */
+struct rspamd_http_connection *rspamd_http_connection_new_client(
+ struct rspamd_http_context *ctx,
+ rspamd_http_body_handler_t body_handler,
+ rspamd_http_error_handler_t error_handler,
+ rspamd_http_finish_handler_t finish_handler,
+ unsigned opts,
+ rspamd_inet_addr_t *addr);
+
+/**
+ * Creates an ordinary client connection using ready file descriptor (ignores proxy)
+ * @param ctx
+ * @param body_handler
+ * @param error_handler
+ * @param finish_handler
+ * @param opts
+ * @param addr
+ * @return
+ */
+struct rspamd_http_connection *rspamd_http_connection_new_client_socket(
+ struct rspamd_http_context *ctx,
+ rspamd_http_body_handler_t body_handler,
+ rspamd_http_error_handler_t error_handler,
+ rspamd_http_finish_handler_t finish_handler,
+ unsigned opts,
+ gint fd);
+
+/**
+ * Set key pointed by an opaque pointer
+ * @param conn connection structure
+ * @param key opaque key structure
+ */
+void rspamd_http_connection_set_key(struct rspamd_http_connection *conn,
+ struct rspamd_cryptobox_keypair *key);
+
+/**
+ * Transfer ownership on socket to an HTTP connection
+ * @param conn
+ */
+void rspamd_http_connection_own_socket(struct rspamd_http_connection *conn);
+
+/**
+ * Get peer's public key
+ * @param conn connection structure
+ * @return pubkey structure or NULL
+ */
+const struct rspamd_cryptobox_pubkey *rspamd_http_connection_get_peer_key(
+ struct rspamd_http_connection *conn);
+
+/**
+ * Returns TRUE if a connection is encrypted
+ * @param conn
+ * @return
+ */
+gboolean rspamd_http_connection_is_encrypted(struct rspamd_http_connection *conn);
+
+/**
+ * Handle a request using socket fd and user data ud
+ * @param conn connection structure
+ * @param ud opaque user data
+ * @param fd fd to read/write
+ */
+void rspamd_http_connection_read_message(
+ struct rspamd_http_connection *conn,
+ gpointer ud,
+ ev_tstamp timeout);
+
+void rspamd_http_connection_read_message_shared(
+ struct rspamd_http_connection *conn,
+ gpointer ud,
+ ev_tstamp timeout);
+
+/**
+ * Send reply using initialised connection
+ * @param conn connection structure
+ * @param msg HTTP message
+ * @param ud opaque user data
+ * @param fd fd to read/write
+ */
+gboolean rspamd_http_connection_write_message(
+ struct rspamd_http_connection *conn,
+ struct rspamd_http_message *msg,
+ const gchar *host,
+ const gchar *mime_type,
+ gpointer ud,
+ ev_tstamp timeout);
+
+gboolean rspamd_http_connection_write_message_shared(
+ struct rspamd_http_connection *conn,
+ struct rspamd_http_message *msg,
+ const gchar *host,
+ const gchar *mime_type,
+ gpointer ud,
+ ev_tstamp timeout);
+
+/**
+ * Free connection structure
+ * @param conn
+ */
+void rspamd_http_connection_free(struct rspamd_http_connection *conn);
+
+/**
+ * Increase refcount for a connection
+ * @param conn
+ * @return
+ */
+static inline struct rspamd_http_connection *
+rspamd_http_connection_ref(struct rspamd_http_connection *conn)
+{
+ conn->ref++;
+ return conn;
+}
+
+/**
+ * Decrease a refcount for a connection and free it if refcount is equal to zero
+ * @param conn
+ */
+static void
+rspamd_http_connection_unref(struct rspamd_http_connection *conn)
+{
+ if (--conn->ref <= 0) {
+ rspamd_http_connection_free(conn);
+ }
+}
+
+/**
+ * Reset connection for a new request
+ * @param conn
+ */
+void rspamd_http_connection_reset(struct rspamd_http_connection *conn);
+
+/**
+ * Sets global maximum size for HTTP message being processed
+ * @param sz
+ */
+void rspamd_http_connection_set_max_size(struct rspamd_http_connection *conn,
+ gsize sz);
+
+void rspamd_http_connection_disable_encryption(struct rspamd_http_connection *conn);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* HTTP_H_ */
diff --git a/src/libserver/http/http_context.c b/src/libserver/http/http_context.c
new file mode 100644
index 0000000..f08e33b
--- /dev/null
+++ b/src/libserver/http/http_context.c
@@ -0,0 +1,670 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "http_context.h"
+#include "http_private.h"
+#include "keypair.h"
+#include "keypairs_cache.h"
+#include "cfg_file.h"
+#include "contrib/libottery/ottery.h"
+#include "contrib/http-parser/http_parser.h"
+#include "ssl_util.h"
+#include "rspamd.h"
+#include "libev_helper.h"
+
+INIT_LOG_MODULE(http_context)
+
+#define msg_debug_http_context(...) rspamd_conditional_debug_fast(NULL, NULL, \
+ rspamd_http_context_log_id, "http_context", NULL, \
+ G_STRFUNC, \
+ __VA_ARGS__)
+
+static struct rspamd_http_context *default_ctx = NULL;
+
+struct rspamd_http_keepalive_cbdata {
+ struct rspamd_http_connection *conn;
+ struct rspamd_http_context *ctx;
+ GQueue *queue;
+ GList *link;
+ struct rspamd_io_ev ev;
+};
+
+static void
+rspamd_http_keepalive_queue_cleanup(GQueue *conns)
+{
+ GList *cur;
+
+ cur = conns->head;
+
+ while (cur) {
+ struct rspamd_http_keepalive_cbdata *cbd;
+
+ cbd = (struct rspamd_http_keepalive_cbdata *) cur->data;
+ /* unref call closes fd, so we need to remove ev watcher first! */
+ rspamd_ev_watcher_stop(cbd->ctx->event_loop, &cbd->ev);
+ rspamd_http_connection_unref(cbd->conn);
+ g_free(cbd);
+
+ cur = cur->next;
+ }
+
+ g_queue_clear(conns);
+}
+
+static void
+rspamd_http_context_client_rotate_ev(struct ev_loop *loop, ev_timer *w, int revents)
+{
+ struct rspamd_http_context *ctx = (struct rspamd_http_context *) w->data;
+ gpointer kp;
+
+ w->repeat = rspamd_time_jitter(ctx->config.client_key_rotate_time, 0);
+ msg_debug_http_context("rotate local keypair, next rotate in %.0f seconds",
+ w->repeat);
+
+ ev_timer_again(loop, w);
+
+ kp = ctx->client_kp;
+ ctx->client_kp = rspamd_keypair_new(RSPAMD_KEYPAIR_KEX,
+ RSPAMD_CRYPTOBOX_MODE_25519);
+ rspamd_keypair_unref(kp);
+}
+
+static struct rspamd_http_context *
+rspamd_http_context_new_default(struct rspamd_config *cfg,
+ struct ev_loop *ev_base,
+ struct upstream_ctx *ups_ctx)
+{
+ struct rspamd_http_context *ctx;
+
+ static const int default_kp_size = 1024;
+ static const gdouble default_rotate_time = 120;
+ static const gdouble default_keepalive_interval = 65;
+ static const gchar *default_user_agent = "rspamd-" RSPAMD_VERSION_FULL;
+ static const gchar *default_server_hdr = "rspamd/" RSPAMD_VERSION_FULL;
+
+ ctx = g_malloc0(sizeof(*ctx));
+ ctx->config.kp_cache_size_client = default_kp_size;
+ ctx->config.kp_cache_size_server = default_kp_size;
+ ctx->config.client_key_rotate_time = default_rotate_time;
+ ctx->config.user_agent = default_user_agent;
+ ctx->config.keepalive_interval = default_keepalive_interval;
+ ctx->config.server_hdr = default_server_hdr;
+ ctx->ups_ctx = ups_ctx;
+
+ if (cfg) {
+ ctx->ssl_ctx = cfg->libs_ctx->ssl_ctx;
+ ctx->ssl_ctx_noverify = cfg->libs_ctx->ssl_ctx_noverify;
+ }
+ else {
+ ctx->ssl_ctx = rspamd_init_ssl_ctx();
+ ctx->ssl_ctx_noverify = rspamd_init_ssl_ctx_noverify();
+ }
+
+ ctx->event_loop = ev_base;
+
+ ctx->keep_alive_hash = kh_init(rspamd_keep_alive_hash);
+
+ return ctx;
+}
+
+static void
+rspamd_http_context_parse_proxy(struct rspamd_http_context *ctx,
+ const gchar *name,
+ struct upstream_list **pls)
+{
+ struct http_parser_url u;
+ struct upstream_list *uls;
+
+ if (!ctx->ups_ctx) {
+ msg_err("cannot parse http_proxy %s - upstreams context is undefined", name);
+ return;
+ }
+
+ memset(&u, 0, sizeof(u));
+
+ if (http_parser_parse_url(name, strlen(name), 1, &u) == 0) {
+ if (!(u.field_set & (1u << UF_HOST)) || u.port == 0) {
+ msg_err("cannot parse http(s) proxy %s - invalid host or port", name);
+
+ return;
+ }
+
+ uls = rspamd_upstreams_create(ctx->ups_ctx);
+
+ if (!rspamd_upstreams_parse_line_len(uls,
+ name + u.field_data[UF_HOST].off,
+ u.field_data[UF_HOST].len, u.port, NULL)) {
+ msg_err("cannot parse http(s) proxy %s - invalid data", name);
+
+ rspamd_upstreams_destroy(uls);
+ }
+ else {
+ *pls = uls;
+ msg_info("set http(s) proxy to %s", name);
+ }
+ }
+ else {
+ uls = rspamd_upstreams_create(ctx->ups_ctx);
+
+ if (!rspamd_upstreams_parse_line(uls,
+ name, 3128, NULL)) {
+ msg_err("cannot parse http(s) proxy %s - invalid data", name);
+
+ rspamd_upstreams_destroy(uls);
+ }
+ else {
+ *pls = uls;
+ msg_info("set http(s) proxy to %s", name);
+ }
+ }
+}
+
+static void
+rspamd_http_context_init(struct rspamd_http_context *ctx)
+{
+ if (ctx->config.kp_cache_size_client > 0) {
+ ctx->client_kp_cache = rspamd_keypair_cache_new(ctx->config.kp_cache_size_client);
+ }
+
+ if (ctx->config.kp_cache_size_server > 0) {
+ ctx->server_kp_cache = rspamd_keypair_cache_new(ctx->config.kp_cache_size_server);
+ }
+
+ if (ctx->config.client_key_rotate_time > 0 && ctx->event_loop) {
+ double jittered = rspamd_time_jitter(ctx->config.client_key_rotate_time,
+ 0);
+
+ ev_timer_init(&ctx->client_rotate_ev,
+ rspamd_http_context_client_rotate_ev, jittered, 0);
+ ev_timer_start(ctx->event_loop, &ctx->client_rotate_ev);
+ ctx->client_rotate_ev.data = ctx;
+ }
+
+ if (ctx->config.http_proxy) {
+ rspamd_http_context_parse_proxy(ctx, ctx->config.http_proxy,
+ &ctx->http_proxies);
+ }
+
+ default_ctx = ctx;
+}
+
+struct rspamd_http_context *
+rspamd_http_context_create(struct rspamd_config *cfg,
+ struct ev_loop *ev_base,
+ struct upstream_ctx *ups_ctx)
+{
+ struct rspamd_http_context *ctx;
+ const ucl_object_t *http_obj;
+
+ ctx = rspamd_http_context_new_default(cfg, ev_base, ups_ctx);
+ http_obj = ucl_object_lookup(cfg->cfg_ucl_obj, "http");
+
+ if (http_obj) {
+ const ucl_object_t *server_obj, *client_obj;
+
+ client_obj = ucl_object_lookup(http_obj, "client");
+
+ if (client_obj) {
+ const ucl_object_t *kp_size;
+
+ kp_size = ucl_object_lookup(client_obj, "cache_size");
+
+ if (kp_size) {
+ ctx->config.kp_cache_size_client = ucl_object_toint(kp_size);
+ }
+
+ const ucl_object_t *rotate_time;
+
+ rotate_time = ucl_object_lookup(client_obj, "rotate_time");
+
+ if (rotate_time) {
+ ctx->config.client_key_rotate_time = ucl_object_todouble(rotate_time);
+ }
+
+ const ucl_object_t *user_agent;
+
+ user_agent = ucl_object_lookup(client_obj, "user_agent");
+
+ if (user_agent) {
+ ctx->config.user_agent = ucl_object_tostring(user_agent);
+
+ if (ctx->config.user_agent && strlen(ctx->config.user_agent) == 0) {
+ ctx->config.user_agent = NULL;
+ }
+ }
+
+ const ucl_object_t *server_hdr;
+ server_hdr = ucl_object_lookup(client_obj, "server_hdr");
+
+ if (server_hdr) {
+ ctx->config.server_hdr = ucl_object_tostring(server_hdr);
+
+ if (ctx->config.server_hdr && strlen(ctx->config.server_hdr) == 0) {
+ ctx->config.server_hdr = "";
+ }
+ }
+
+ const ucl_object_t *keepalive_interval;
+
+ keepalive_interval = ucl_object_lookup(client_obj, "keepalive_interval");
+
+ if (keepalive_interval) {
+ ctx->config.keepalive_interval = ucl_object_todouble(keepalive_interval);
+ }
+
+ const ucl_object_t *http_proxy;
+ http_proxy = ucl_object_lookup(client_obj, "http_proxy");
+
+ if (http_proxy) {
+ ctx->config.http_proxy = ucl_object_tostring(http_proxy);
+ }
+ }
+
+ server_obj = ucl_object_lookup(http_obj, "server");
+
+ if (server_obj) {
+ const ucl_object_t *kp_size;
+
+ kp_size = ucl_object_lookup(server_obj, "cache_size");
+
+ if (kp_size) {
+ ctx->config.kp_cache_size_server = ucl_object_toint(kp_size);
+ }
+ }
+ }
+
+ rspamd_http_context_init(ctx);
+
+ return ctx;
+}
+
+
+void rspamd_http_context_free(struct rspamd_http_context *ctx)
+{
+ if (ctx == default_ctx) {
+ default_ctx = NULL;
+ }
+
+ if (ctx->client_kp_cache) {
+ rspamd_keypair_cache_destroy(ctx->client_kp_cache);
+ }
+
+ if (ctx->server_kp_cache) {
+ rspamd_keypair_cache_destroy(ctx->server_kp_cache);
+ }
+
+ if (ctx->config.client_key_rotate_time > 0) {
+ ev_timer_stop(ctx->event_loop, &ctx->client_rotate_ev);
+
+ if (ctx->client_kp) {
+ rspamd_keypair_unref(ctx->client_kp);
+ }
+ }
+
+ struct rspamd_keepalive_hash_key *hk;
+
+ kh_foreach_key(ctx->keep_alive_hash, hk, {
+ msg_debug_http_context("cleanup keepalive elt %s (%s)",
+ rspamd_inet_address_to_string_pretty(hk->addr),
+ hk->host);
+
+ if (hk->host) {
+ g_free(hk->host);
+ }
+
+ rspamd_inet_address_free(hk->addr);
+ rspamd_http_keepalive_queue_cleanup(&hk->conns);
+ g_free(hk);
+ });
+
+ kh_destroy(rspamd_keep_alive_hash, ctx->keep_alive_hash);
+
+ if (ctx->http_proxies) {
+ rspamd_upstreams_destroy(ctx->http_proxies);
+ }
+
+ g_free(ctx);
+}
+
+struct rspamd_http_context *
+rspamd_http_context_create_config(struct rspamd_http_context_cfg *cfg,
+ struct ev_loop *ev_base,
+ struct upstream_ctx *ups_ctx)
+{
+ struct rspamd_http_context *ctx;
+
+ ctx = rspamd_http_context_new_default(NULL, ev_base, ups_ctx);
+ memcpy(&ctx->config, cfg, sizeof(*cfg));
+ rspamd_http_context_init(ctx);
+
+ return ctx;
+}
+
+struct rspamd_http_context *
+rspamd_http_context_default(void)
+{
+ g_assert(default_ctx != NULL);
+
+ return default_ctx;
+}
+
+gint32
+rspamd_keep_alive_key_hash(struct rspamd_keepalive_hash_key *k)
+{
+ rspamd_cryptobox_fast_hash_state_t hst;
+
+ rspamd_cryptobox_fast_hash_init(&hst, 0);
+
+ if (k->host) {
+ rspamd_cryptobox_fast_hash_update(&hst, k->host, strlen(k->host));
+ }
+
+ rspamd_cryptobox_fast_hash_update(&hst, &k->port, sizeof(k->port));
+ rspamd_cryptobox_fast_hash_update(&hst, &k->is_ssl, sizeof(k->is_ssl));
+
+ return rspamd_cryptobox_fast_hash_final(&hst);
+}
+
+bool rspamd_keep_alive_key_equal(struct rspamd_keepalive_hash_key *k1,
+ struct rspamd_keepalive_hash_key *k2)
+{
+ if (k1->is_ssl != k2->is_ssl) {
+ return false;
+ }
+
+ if (k1->host && k2->host) {
+ if (k1->port == k2->port) {
+ return strcmp(k1->host, k2->host) == 0;
+ }
+ }
+ else if (!k1->host && !k2->host) {
+ return (k1->port == k2->port);
+ }
+
+ /* One has host and another has no host */
+ return false;
+}
+
+struct rspamd_http_connection *
+rspamd_http_context_check_keepalive(struct rspamd_http_context *ctx,
+ const rspamd_inet_addr_t *addr,
+ const gchar *host,
+ bool is_ssl)
+{
+ struct rspamd_keepalive_hash_key hk, *phk;
+ khiter_t k;
+
+ if (ctx == NULL) {
+ ctx = rspamd_http_context_default();
+ }
+
+ hk.addr = (rspamd_inet_addr_t *) addr;
+ hk.host = (gchar *) host;
+ hk.port = rspamd_inet_address_get_port(addr);
+ hk.is_ssl = is_ssl;
+
+ k = kh_get(rspamd_keep_alive_hash, ctx->keep_alive_hash, &hk);
+
+ if (k != kh_end(ctx->keep_alive_hash)) {
+ phk = kh_key(ctx->keep_alive_hash, k);
+ GQueue *conns = &phk->conns;
+
+ /* Use stack based approach */
+
+ if (g_queue_get_length(conns) > 0) {
+ struct rspamd_http_keepalive_cbdata *cbd;
+ struct rspamd_http_connection *conn;
+ gint err;
+ socklen_t len = sizeof(gint);
+
+ cbd = g_queue_pop_head(conns);
+ rspamd_ev_watcher_stop(ctx->event_loop, &cbd->ev);
+ conn = cbd->conn;
+ g_free(cbd);
+
+ if (getsockopt(conn->fd, SOL_SOCKET, SO_ERROR, (void *) &err, &len) == -1) {
+ err = errno;
+ }
+
+ if (err != 0) {
+ rspamd_http_connection_unref(conn);
+
+ msg_debug_http_context("invalid reused keepalive element %s (%s, ssl=%d); "
+ "%s error; "
+ "%d connections queued",
+ rspamd_inet_address_to_string_pretty(phk->addr),
+ phk->host,
+ (int) phk->is_ssl,
+ g_strerror(err),
+ conns->length);
+
+ return NULL;
+ }
+
+ msg_debug_http_context("reused keepalive element %s (%s, ssl=%d), %d connections queued",
+ rspamd_inet_address_to_string_pretty(phk->addr),
+ phk->host,
+ (int) phk->is_ssl,
+ conns->length);
+
+ /* We transfer refcount here! */
+ return conn;
+ }
+ else {
+ msg_debug_http_context("found empty keepalive element %s (%s), cannot reuse",
+ rspamd_inet_address_to_string_pretty(phk->addr),
+ phk->host);
+ }
+ }
+
+ return NULL;
+}
+
+const rspamd_inet_addr_t *
+rspamd_http_context_has_keepalive(struct rspamd_http_context *ctx,
+ const gchar *host,
+ unsigned port,
+ bool is_ssl)
+{
+ struct rspamd_keepalive_hash_key hk, *phk;
+ khiter_t k;
+
+ if (ctx == NULL) {
+ ctx = rspamd_http_context_default();
+ }
+
+ hk.host = (gchar *) host;
+ hk.port = port;
+ hk.is_ssl = is_ssl;
+
+ k = kh_get(rspamd_keep_alive_hash, ctx->keep_alive_hash, &hk);
+
+ if (k != kh_end(ctx->keep_alive_hash)) {
+ phk = kh_key(ctx->keep_alive_hash, k);
+ GQueue *conns = &phk->conns;
+
+ if (g_queue_get_length(conns) > 0) {
+ return phk->addr;
+ }
+ }
+
+ return NULL;
+}
+
+void rspamd_http_context_prepare_keepalive(struct rspamd_http_context *ctx,
+ struct rspamd_http_connection *conn,
+ const rspamd_inet_addr_t *addr,
+ const gchar *host,
+ bool is_ssl)
+{
+ struct rspamd_keepalive_hash_key hk, *phk;
+ khiter_t k;
+
+ hk.addr = (rspamd_inet_addr_t *) addr;
+ hk.host = (gchar *) host;
+ hk.is_ssl = is_ssl;
+ hk.port = rspamd_inet_address_get_port(addr);
+
+ k = kh_get(rspamd_keep_alive_hash, ctx->keep_alive_hash, &hk);
+
+ if (k != kh_end(ctx->keep_alive_hash)) {
+ /* Reuse existing */
+ conn->keepalive_hash_key = kh_key(ctx->keep_alive_hash, k);
+ msg_debug_http_context("use existing keepalive element %s (%s)",
+ rspamd_inet_address_to_string_pretty(conn->keepalive_hash_key->addr),
+ conn->keepalive_hash_key->host);
+ }
+ else {
+ /* Create new one */
+ GQueue empty_init = G_QUEUE_INIT;
+ gint r;
+
+ phk = g_malloc(sizeof(*phk));
+ phk->conns = empty_init;
+ phk->host = g_strdup(host);
+ phk->is_ssl = is_ssl;
+ phk->addr = rspamd_inet_address_copy(addr, NULL);
+ phk->port = hk.port;
+
+
+ kh_put(rspamd_keep_alive_hash, ctx->keep_alive_hash, phk, &r);
+ conn->keepalive_hash_key = phk;
+
+ msg_debug_http_context("create new keepalive element %s (%s)",
+ rspamd_inet_address_to_string_pretty(conn->keepalive_hash_key->addr),
+ conn->keepalive_hash_key->host);
+ }
+}
+
+static void
+rspamd_http_keepalive_handler(gint fd, short what, gpointer ud)
+{
+ struct rspamd_http_keepalive_cbdata *cbdata =
+ (struct rspamd_http_keepalive_cbdata *) ud; /*
+ * We can get here if a remote side reported something or it has
+ * timed out. In both cases we just terminate keepalive connection.
+ */
+
+ g_queue_delete_link(cbdata->queue, cbdata->link);
+ msg_debug_http_context("remove keepalive element %s (%s), %d connections left",
+ rspamd_inet_address_to_string_pretty(cbdata->conn->keepalive_hash_key->addr),
+ cbdata->conn->keepalive_hash_key->host,
+ cbdata->queue->length);
+ /* unref call closes fd, so we need to remove ev watcher first! */
+ rspamd_ev_watcher_stop(cbdata->ctx->event_loop, &cbdata->ev);
+ rspamd_http_connection_unref(cbdata->conn);
+ g_free(cbdata);
+}
+
+/* Non-static for unit testing */
+long rspamd_http_parse_keepalive_timeout(const rspamd_ftok_t *tok)
+{
+ long timeout = -1;
+ goffset pos = rspamd_substring_search(tok->begin,
+ tok->len, "timeout", sizeof("timeout") - 1);
+
+ if (pos != -1) {
+ pos += sizeof("timeout") - 1;
+
+ /* Skip spaces and equal sign */
+ while (pos < tok->len) {
+ if (tok->begin[pos] != '=' && !g_ascii_isspace(tok->begin[pos])) {
+ break;
+ }
+ pos++;
+ }
+
+ gsize ndigits = rspamd_memspn(tok->begin + pos, "0123456789", tok->len - pos);
+ glong real_timeout;
+
+ if (ndigits > 0) {
+ if (rspamd_strtoul(tok->begin + pos, ndigits, &real_timeout)) {
+ timeout = real_timeout;
+ msg_debug_http_context("got timeout attr %l", timeout);
+ }
+ }
+ }
+
+ return timeout;
+}
+
+void rspamd_http_context_push_keepalive(struct rspamd_http_context *ctx,
+ struct rspamd_http_connection *conn,
+ struct rspamd_http_message *msg,
+ struct ev_loop *event_loop)
+{
+ struct rspamd_http_keepalive_cbdata *cbdata;
+ gdouble timeout = ctx->config.keepalive_interval;
+
+ g_assert(conn->keepalive_hash_key != NULL);
+
+ if (msg) {
+ const rspamd_ftok_t *tok;
+ rspamd_ftok_t cmp;
+
+ tok = rspamd_http_message_find_header(msg, "Connection");
+
+ if (!tok) {
+ /* Server has not stated that it can do keep alive */
+ conn->finished = TRUE;
+ msg_debug_http_context("no Connection header");
+ return;
+ }
+
+ RSPAMD_FTOK_ASSIGN(&cmp, "keep-alive");
+
+ if (rspamd_ftok_casecmp(&cmp, tok) != 0) {
+ conn->finished = TRUE;
+ msg_debug_http_context("connection header is not `keep-alive`");
+ return;
+ }
+
+ /* We can proceed, check timeout */
+
+ tok = rspamd_http_message_find_header(msg, "Keep-Alive");
+
+ if (tok) {
+ long maybe_timeout = rspamd_http_parse_keepalive_timeout(tok);
+
+ if (maybe_timeout > 0) {
+ timeout = maybe_timeout;
+ }
+ }
+ }
+
+ /* Move connection to the keepalive pool */
+ cbdata = g_malloc0(sizeof(*cbdata));
+
+ cbdata->conn = rspamd_http_connection_ref(conn);
+ /* Use stack like approach to that would easy reading */
+ g_queue_push_head(&conn->keepalive_hash_key->conns, cbdata);
+ cbdata->link = conn->keepalive_hash_key->conns.head;
+
+ cbdata->queue = &conn->keepalive_hash_key->conns;
+ cbdata->ctx = ctx;
+ conn->finished = FALSE;
+
+ rspamd_ev_watcher_init(&cbdata->ev, conn->fd, EV_READ,
+ rspamd_http_keepalive_handler,
+ cbdata);
+ rspamd_ev_watcher_start(event_loop, &cbdata->ev, timeout);
+
+ msg_debug_http_context("push keepalive element %s (%s), %d connections queued, %.1f timeout",
+ rspamd_inet_address_to_string_pretty(cbdata->conn->keepalive_hash_key->addr),
+ cbdata->conn->keepalive_hash_key->host,
+ cbdata->queue->length,
+ timeout);
+} \ No newline at end of file
diff --git a/src/libserver/http/http_context.h b/src/libserver/http/http_context.h
new file mode 100644
index 0000000..f3622ae
--- /dev/null
+++ b/src/libserver/http/http_context.h
@@ -0,0 +1,122 @@
+/*-
+ * Copyright 2019 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef RSPAMD_HTTP_CONTEXT_H
+#define RSPAMD_HTTP_CONTEXT_H
+
+#include "config.h"
+#include "ucl.h"
+#include "addr.h"
+
+#include "contrib/libev/ev.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct rspamd_http_context;
+struct rspamd_config;
+struct rspamd_http_message;
+struct upstream_ctx;
+
+struct rspamd_http_context_cfg {
+ guint kp_cache_size_client;
+ guint kp_cache_size_server;
+ guint ssl_cache_size;
+ gdouble keepalive_interval;
+ gdouble client_key_rotate_time;
+ const gchar *user_agent;
+ const gchar *http_proxy;
+ const gchar *server_hdr;
+};
+
+/**
+ * Creates and configures new HTTP context
+ * @param root_conf configuration object
+ * @param ev_base event base
+ * @return new context used for both client and server HTTP connections
+ */
+struct rspamd_http_context *rspamd_http_context_create(struct rspamd_config *cfg,
+ struct ev_loop *ev_base,
+ struct upstream_ctx *ctx);
+
+struct rspamd_http_context *rspamd_http_context_create_config(
+ struct rspamd_http_context_cfg *cfg,
+ struct ev_loop *ev_base,
+ struct upstream_ctx *ctx);
+
+/**
+ * Destroys context
+ * @param ctx
+ */
+void rspamd_http_context_free(struct rspamd_http_context *ctx);
+
+struct rspamd_http_context *rspamd_http_context_default(void);
+
+/**
+ * Returns preserved keepalive connection if it's available.
+ * Refcount is transferred to caller!
+ * @param ctx
+ * @param addr
+ * @param host
+ * @return
+ */
+struct rspamd_http_connection *rspamd_http_context_check_keepalive(struct rspamd_http_context *ctx,
+ const rspamd_inet_addr_t *addr,
+ const gchar *host,
+ bool is_ssl);
+
+/**
+ * Checks if there is a valid keepalive connection
+ * @param ctx
+ * @param addr
+ * @param host
+ * @param is_ssl
+ * @return
+ */
+const rspamd_inet_addr_t *rspamd_http_context_has_keepalive(struct rspamd_http_context *ctx,
+ const gchar *host,
+ unsigned port,
+ bool is_ssl);
+
+/**
+ * Prepares keepalive key for a connection by creating a new entry or by reusing existent
+ * Bear in mind, that keepalive pool has currently no cleanup methods!
+ * @param ctx
+ * @param conn
+ * @param addr
+ * @param host
+ */
+void rspamd_http_context_prepare_keepalive(struct rspamd_http_context *ctx, struct rspamd_http_connection *conn,
+ const rspamd_inet_addr_t *addr, const gchar *host, bool is_ssl);
+
+/**
+ * Pushes a connection to keepalive pool after client request is finished,
+ * keepalive key *must* be prepared before using of this function
+ * @param ctx
+ * @param conn
+ * @param msg
+ */
+void rspamd_http_context_push_keepalive(struct rspamd_http_context *ctx,
+ struct rspamd_http_connection *conn,
+ struct rspamd_http_message *msg,
+ struct ev_loop *ev_base);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/libserver/http/http_message.c b/src/libserver/http/http_message.c
new file mode 100644
index 0000000..670122d
--- /dev/null
+++ b/src/libserver/http/http_message.c
@@ -0,0 +1,725 @@
+/*-
+ * Copyright 2019 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "http_message.h"
+#include "http_connection.h"
+#include "http_private.h"
+#include "libutil/printf.h"
+#include "libserver/logger.h"
+#include "utlist.h"
+#include "unix-std.h"
+
+struct rspamd_http_message *
+rspamd_http_new_message(enum rspamd_http_message_type type)
+{
+ struct rspamd_http_message *new;
+
+ new = g_malloc0(sizeof(struct rspamd_http_message));
+
+ if (type == HTTP_REQUEST) {
+ new->url = rspamd_fstring_new();
+ }
+ else {
+ new->url = NULL;
+ new->code = 200;
+ }
+
+ new->port = 80;
+ new->type = type;
+ new->method = HTTP_INVALID;
+ new->headers = kh_init(rspamd_http_headers_hash);
+
+ REF_INIT_RETAIN(new, rspamd_http_message_free);
+
+ return new;
+}
+
+struct rspamd_http_message *
+rspamd_http_message_from_url(const gchar *url)
+{
+ struct http_parser_url pu;
+ struct rspamd_http_message *msg;
+ const gchar *host, *path;
+ size_t pathlen, urllen;
+ guint flags = 0;
+
+ if (url == NULL) {
+ return NULL;
+ }
+
+ urllen = strlen(url);
+ memset(&pu, 0, sizeof(pu));
+
+ if (http_parser_parse_url(url, urllen, FALSE, &pu) != 0) {
+ msg_warn("cannot parse URL: %s", url);
+ return NULL;
+ }
+
+ if ((pu.field_set & (1 << UF_HOST)) == 0) {
+ msg_warn("no host argument in URL: %s", url);
+ return NULL;
+ }
+
+ if ((pu.field_set & (1 << UF_SCHEMA))) {
+ if (pu.field_data[UF_SCHEMA].len == sizeof("https") - 1 &&
+ memcmp(url + pu.field_data[UF_SCHEMA].off, "https", 5) == 0) {
+ flags |= RSPAMD_HTTP_FLAG_WANT_SSL;
+ }
+ }
+
+ if ((pu.field_set & (1 << UF_PATH)) == 0) {
+ path = "/";
+ pathlen = 1;
+ }
+ else {
+ path = url + pu.field_data[UF_PATH].off;
+ pathlen = urllen - pu.field_data[UF_PATH].off;
+ }
+
+ msg = rspamd_http_new_message(HTTP_REQUEST);
+ host = url + pu.field_data[UF_HOST].off;
+ msg->flags = flags;
+
+ if ((pu.field_set & (1 << UF_PORT)) != 0) {
+ msg->port = pu.port;
+ }
+ else {
+ /* XXX: magic constant */
+ if (flags & RSPAMD_HTTP_FLAG_WANT_SSL) {
+ msg->port = 443;
+ }
+ else {
+ msg->port = 80;
+ }
+ }
+
+ msg->host = g_string_new_len(host, pu.field_data[UF_HOST].len);
+ msg->url = rspamd_fstring_append(msg->url, path, pathlen);
+
+ REF_INIT_RETAIN(msg, rspamd_http_message_free);
+
+ return msg;
+}
+
+const gchar *
+rspamd_http_message_get_body(struct rspamd_http_message *msg,
+ gsize *blen)
+{
+ const gchar *ret = NULL;
+
+ if (msg->body_buf.len > 0) {
+ ret = msg->body_buf.begin;
+ }
+
+ if (blen) {
+ *blen = msg->body_buf.len;
+ }
+
+ return ret;
+}
+
+static void
+rspamd_http_shname_dtor(void *p)
+{
+ struct rspamd_storage_shmem *n = p;
+
+#ifdef HAVE_SANE_SHMEM
+ shm_unlink(n->shm_name);
+#else
+ unlink(n->shm_name);
+#endif
+ g_free(n->shm_name);
+ g_free(n);
+}
+
+struct rspamd_storage_shmem *
+rspamd_http_message_shmem_ref(struct rspamd_http_message *msg)
+{
+ if ((msg->flags & RSPAMD_HTTP_FLAG_SHMEM) && msg->body_buf.c.shared.name) {
+ REF_RETAIN(msg->body_buf.c.shared.name);
+ return msg->body_buf.c.shared.name;
+ }
+
+ return NULL;
+}
+
+guint rspamd_http_message_get_flags(struct rspamd_http_message *msg)
+{
+ return msg->flags;
+}
+
+void rspamd_http_message_shmem_unref(struct rspamd_storage_shmem *p)
+{
+ REF_RELEASE(p);
+}
+
+gboolean
+rspamd_http_message_set_body(struct rspamd_http_message *msg,
+ const gchar *data, gsize len)
+{
+ union _rspamd_storage_u *storage;
+ storage = &msg->body_buf.c;
+
+ rspamd_http_message_storage_cleanup(msg);
+
+ if (msg->flags & RSPAMD_HTTP_FLAG_SHMEM) {
+ storage->shared.name = g_malloc(sizeof(*storage->shared.name));
+ REF_INIT_RETAIN(storage->shared.name, rspamd_http_shname_dtor);
+#ifdef HAVE_SANE_SHMEM
+#if defined(__DragonFly__)
+ // DragonFly uses regular files for shm. User rspamd is not allowed to create
+ // files in the root.
+ storage->shared.name->shm_name = g_strdup("/tmp/rhm.XXXXXXXXXXXXXXXXXXXX");
+#else
+ storage->shared.name->shm_name = g_strdup("/rhm.XXXXXXXXXXXXXXXXXXXX");
+#endif
+ storage->shared.shm_fd = rspamd_shmem_mkstemp(storage->shared.name->shm_name);
+#else
+ /* XXX: assume that tempdir is /tmp */
+ storage->shared.name->shm_name = g_strdup("/tmp/rhm.XXXXXXXXXXXXXXXXXXXX");
+ storage->shared.shm_fd = mkstemp(storage->shared.name->shm_name);
+#endif
+
+ if (storage->shared.shm_fd == -1) {
+ return FALSE;
+ }
+
+ if (len != 0 && len != G_MAXSIZE) {
+ if (ftruncate(storage->shared.shm_fd, len) == -1) {
+ return FALSE;
+ }
+
+ msg->body_buf.str = mmap(NULL, len,
+ PROT_WRITE | PROT_READ, MAP_SHARED,
+ storage->shared.shm_fd, 0);
+
+ if (msg->body_buf.str == MAP_FAILED) {
+ return FALSE;
+ }
+
+ msg->body_buf.begin = msg->body_buf.str;
+ msg->body_buf.allocated_len = len;
+
+ if (data != NULL) {
+ memcpy(msg->body_buf.str, data, len);
+ msg->body_buf.len = len;
+ }
+ }
+ else {
+ msg->body_buf.len = 0;
+ msg->body_buf.begin = NULL;
+ msg->body_buf.str = NULL;
+ msg->body_buf.allocated_len = 0;
+ }
+ }
+ else {
+ if (len != 0 && len != G_MAXSIZE) {
+ if (data == NULL) {
+ storage->normal = rspamd_fstring_sized_new(len);
+ msg->body_buf.len = 0;
+ }
+ else {
+ storage->normal = rspamd_fstring_new_init(data, len);
+ msg->body_buf.len = len;
+ }
+ }
+ else {
+ storage->normal = rspamd_fstring_new();
+ }
+
+ msg->body_buf.begin = storage->normal->str;
+ msg->body_buf.str = storage->normal->str;
+ msg->body_buf.allocated_len = storage->normal->allocated;
+ }
+
+ msg->flags |= RSPAMD_HTTP_FLAG_HAS_BODY;
+
+ return TRUE;
+}
+
+void rspamd_http_message_set_method(struct rspamd_http_message *msg,
+ const gchar *method)
+{
+ gint i;
+
+ /* Linear search: not very efficient method */
+ for (i = 0; i < HTTP_METHOD_MAX; i++) {
+ if (g_ascii_strcasecmp(method, http_method_str(i)) == 0) {
+ msg->method = i;
+ }
+ }
+}
+
+gboolean
+rspamd_http_message_set_body_from_fd(struct rspamd_http_message *msg,
+ gint fd)
+{
+ union _rspamd_storage_u *storage;
+ struct stat st;
+
+ rspamd_http_message_storage_cleanup(msg);
+
+ storage = &msg->body_buf.c;
+ msg->flags |= RSPAMD_HTTP_FLAG_SHMEM | RSPAMD_HTTP_FLAG_SHMEM_IMMUTABLE;
+
+ storage->shared.shm_fd = dup(fd);
+ msg->body_buf.str = MAP_FAILED;
+
+ if (storage->shared.shm_fd == -1) {
+ return FALSE;
+ }
+
+ if (fstat(storage->shared.shm_fd, &st) == -1) {
+ return FALSE;
+ }
+
+ msg->body_buf.str = mmap(NULL, st.st_size,
+ PROT_READ, MAP_SHARED,
+ storage->shared.shm_fd, 0);
+
+ if (msg->body_buf.str == MAP_FAILED) {
+ return FALSE;
+ }
+
+ msg->body_buf.begin = msg->body_buf.str;
+ msg->body_buf.len = st.st_size;
+ msg->body_buf.allocated_len = st.st_size;
+
+ return TRUE;
+}
+
+gboolean
+rspamd_http_message_set_body_from_fstring_steal(struct rspamd_http_message *msg,
+ rspamd_fstring_t *fstr)
+{
+ union _rspamd_storage_u *storage;
+
+ rspamd_http_message_storage_cleanup(msg);
+
+ storage = &msg->body_buf.c;
+ msg->flags &= ~(RSPAMD_HTTP_FLAG_SHMEM | RSPAMD_HTTP_FLAG_SHMEM_IMMUTABLE);
+
+ storage->normal = fstr;
+ msg->body_buf.str = fstr->str;
+ msg->body_buf.begin = msg->body_buf.str;
+ msg->body_buf.len = fstr->len;
+ msg->body_buf.allocated_len = fstr->allocated;
+
+ return TRUE;
+}
+
+gboolean
+rspamd_http_message_set_body_from_fstring_copy(struct rspamd_http_message *msg,
+ const rspamd_fstring_t *fstr)
+{
+ union _rspamd_storage_u *storage;
+
+ rspamd_http_message_storage_cleanup(msg);
+
+ storage = &msg->body_buf.c;
+ msg->flags &= ~(RSPAMD_HTTP_FLAG_SHMEM | RSPAMD_HTTP_FLAG_SHMEM_IMMUTABLE);
+
+ storage->normal = rspamd_fstring_new_init(fstr->str, fstr->len);
+ msg->body_buf.str = storage->normal->str;
+ msg->body_buf.begin = msg->body_buf.str;
+ msg->body_buf.len = storage->normal->len;
+ msg->body_buf.allocated_len = storage->normal->allocated;
+
+ return TRUE;
+}
+
+
+gboolean
+rspamd_http_message_grow_body(struct rspamd_http_message *msg, gsize len)
+{
+ struct stat st;
+ union _rspamd_storage_u *storage;
+ gsize newlen;
+
+ storage = &msg->body_buf.c;
+
+ if (msg->flags & RSPAMD_HTTP_FLAG_SHMEM) {
+ if (storage->shared.shm_fd == -1) {
+ return FALSE;
+ }
+
+ if (fstat(storage->shared.shm_fd, &st) == -1) {
+ return FALSE;
+ }
+
+ /* Check if we need to grow */
+ if ((gsize) st.st_size < msg->body_buf.len + len) {
+ /* Need to grow */
+ newlen = rspamd_fstring_suggest_size(msg->body_buf.len, st.st_size,
+ len);
+ /* Unmap as we need another size of segment */
+ if (msg->body_buf.str != MAP_FAILED) {
+ munmap(msg->body_buf.str, st.st_size);
+ }
+
+ if (ftruncate(storage->shared.shm_fd, newlen) == -1) {
+ return FALSE;
+ }
+
+ msg->body_buf.str = mmap(NULL, newlen,
+ PROT_WRITE | PROT_READ, MAP_SHARED,
+ storage->shared.shm_fd, 0);
+ if (msg->body_buf.str == MAP_FAILED) {
+ return FALSE;
+ }
+
+ msg->body_buf.begin = msg->body_buf.str;
+ msg->body_buf.allocated_len = newlen;
+ }
+ }
+ else {
+ storage->normal = rspamd_fstring_grow(storage->normal, len);
+
+ /* Append might cause realloc */
+ msg->body_buf.begin = storage->normal->str;
+ msg->body_buf.len = storage->normal->len;
+ msg->body_buf.str = storage->normal->str;
+ msg->body_buf.allocated_len = storage->normal->allocated;
+ }
+
+ return TRUE;
+}
+
+gboolean
+rspamd_http_message_append_body(struct rspamd_http_message *msg,
+ const gchar *data, gsize len)
+{
+ union _rspamd_storage_u *storage;
+
+ storage = &msg->body_buf.c;
+
+ if (msg->flags & RSPAMD_HTTP_FLAG_SHMEM) {
+ if (!rspamd_http_message_grow_body(msg, len)) {
+ return FALSE;
+ }
+
+ memcpy(msg->body_buf.str + msg->body_buf.len, data, len);
+ msg->body_buf.len += len;
+ }
+ else {
+ storage->normal = rspamd_fstring_append(storage->normal, data, len);
+
+ /* Append might cause realloc */
+ msg->body_buf.begin = storage->normal->str;
+ msg->body_buf.len = storage->normal->len;
+ msg->body_buf.str = storage->normal->str;
+ msg->body_buf.allocated_len = storage->normal->allocated;
+ }
+
+ return TRUE;
+}
+
+void rspamd_http_message_storage_cleanup(struct rspamd_http_message *msg)
+{
+ union _rspamd_storage_u *storage;
+ struct stat st;
+
+ if (msg->flags & RSPAMD_HTTP_FLAG_SHMEM) {
+ storage = &msg->body_buf.c;
+
+ if (storage->shared.shm_fd > 0) {
+ g_assert(fstat(storage->shared.shm_fd, &st) != -1);
+
+ if (msg->body_buf.str != MAP_FAILED) {
+ munmap(msg->body_buf.str, st.st_size);
+ }
+
+ close(storage->shared.shm_fd);
+ }
+
+ if (storage->shared.name != NULL) {
+ REF_RELEASE(storage->shared.name);
+ }
+
+ storage->shared.shm_fd = -1;
+ msg->body_buf.str = MAP_FAILED;
+ }
+ else {
+ if (msg->body_buf.c.normal) {
+ rspamd_fstring_free(msg->body_buf.c.normal);
+ }
+
+ msg->body_buf.c.normal = NULL;
+ }
+
+ msg->body_buf.len = 0;
+}
+
+void rspamd_http_message_free(struct rspamd_http_message *msg)
+{
+ struct rspamd_http_header *hdr, *hcur, *hcurtmp;
+
+ kh_foreach_value (msg->headers, hdr, {
+ DL_FOREACH_SAFE (hdr, hcur, hcurtmp) {
+ rspamd_fstring_free (hcur->combined);
+ g_free (hcur);
+}
+});
+
+kh_destroy(rspamd_http_headers_hash, msg->headers);
+rspamd_http_message_storage_cleanup(msg);
+
+if (msg->url != NULL) {
+ rspamd_fstring_free(msg->url);
+}
+if (msg->status != NULL) {
+ rspamd_fstring_free(msg->status);
+}
+if (msg->host != NULL) {
+ g_string_free(msg->host, TRUE);
+}
+if (msg->peer_key != NULL) {
+ rspamd_pubkey_unref(msg->peer_key);
+}
+
+g_free(msg);
+}
+
+void rspamd_http_message_set_peer_key(struct rspamd_http_message *msg,
+ struct rspamd_cryptobox_pubkey *pk)
+{
+ if (msg->peer_key != NULL) {
+ rspamd_pubkey_unref(msg->peer_key);
+ }
+
+ if (pk) {
+ msg->peer_key = rspamd_pubkey_ref(pk);
+ }
+ else {
+ msg->peer_key = NULL;
+ }
+}
+
+void rspamd_http_message_add_header_len(struct rspamd_http_message *msg,
+ const gchar *name,
+ const gchar *value,
+ gsize len)
+{
+ struct rspamd_http_header *hdr, *found;
+ guint nlen, vlen;
+ khiter_t k;
+ gint r;
+
+ if (msg != NULL && name != NULL && value != NULL) {
+ hdr = g_malloc0(sizeof(struct rspamd_http_header));
+ nlen = strlen(name);
+ vlen = len;
+
+ if (g_ascii_strcasecmp(name, "host") == 0) {
+ msg->flags |= RSPAMD_HTTP_FLAG_HAS_HOST_HEADER;
+ }
+
+ hdr->combined = rspamd_fstring_sized_new(nlen + vlen + 4);
+ rspamd_printf_fstring(&hdr->combined, "%s: %*s\r\n", name, (gint) vlen,
+ value);
+ hdr->name.begin = hdr->combined->str;
+ hdr->name.len = nlen;
+ hdr->value.begin = hdr->combined->str + nlen + 2;
+ hdr->value.len = vlen;
+
+ k = kh_put(rspamd_http_headers_hash, msg->headers, &hdr->name,
+ &r);
+
+ if (r != 0) {
+ kh_value(msg->headers, k) = hdr;
+ found = NULL;
+ }
+ else {
+ found = kh_value(msg->headers, k);
+ }
+
+ DL_APPEND(found, hdr);
+ }
+}
+
+void rspamd_http_message_add_header(struct rspamd_http_message *msg,
+ const gchar *name,
+ const gchar *value)
+{
+ if (value) {
+ rspamd_http_message_add_header_len(msg, name, value, strlen(value));
+ }
+}
+
+void rspamd_http_message_add_header_fstr(struct rspamd_http_message *msg,
+ const gchar *name,
+ rspamd_fstring_t *value)
+{
+ struct rspamd_http_header *hdr, *found = NULL;
+ guint nlen, vlen;
+ khiter_t k;
+ gint r;
+
+ if (msg != NULL && name != NULL && value != NULL) {
+ hdr = g_malloc0(sizeof(struct rspamd_http_header));
+ nlen = strlen(name);
+ vlen = value->len;
+ hdr->combined = rspamd_fstring_sized_new(nlen + vlen + 4);
+ rspamd_printf_fstring(&hdr->combined, "%s: %V\r\n", name, value);
+ hdr->name.begin = hdr->combined->str;
+ hdr->name.len = nlen;
+ hdr->value.begin = hdr->combined->str + nlen + 2;
+ hdr->value.len = vlen;
+
+ k = kh_put(rspamd_http_headers_hash, msg->headers, &hdr->name,
+ &r);
+
+ if (r != 0) {
+ kh_value(msg->headers, k) = hdr;
+ found = NULL;
+ }
+ else {
+ found = kh_value(msg->headers, k);
+ }
+
+ DL_APPEND(found, hdr);
+ }
+}
+
+const rspamd_ftok_t *
+rspamd_http_message_find_header(struct rspamd_http_message *msg,
+ const gchar *name)
+{
+ const rspamd_ftok_t *res = NULL;
+ rspamd_ftok_t srch;
+ guint slen = strlen(name);
+ khiter_t k;
+
+ if (msg != NULL) {
+ srch.begin = name;
+ srch.len = slen;
+
+ k = kh_get(rspamd_http_headers_hash, msg->headers, &srch);
+
+ if (k != kh_end(msg->headers)) {
+ res = &(kh_value(msg->headers, k)->value);
+ }
+ }
+
+ return res;
+}
+
+GPtrArray *
+rspamd_http_message_find_header_multiple(
+ struct rspamd_http_message *msg,
+ const gchar *name)
+{
+ GPtrArray *res = NULL;
+ struct rspamd_http_header *hdr, *cur;
+ rspamd_ftok_t srch;
+ khiter_t k;
+ guint cnt = 0;
+
+ guint slen = strlen(name);
+
+ if (msg != NULL) {
+ srch.begin = name;
+ srch.len = slen;
+
+ k = kh_get(rspamd_http_headers_hash, msg->headers, &srch);
+
+ if (k != kh_end(msg->headers)) {
+ hdr = kh_value(msg->headers, k);
+
+ LL_COUNT(hdr, cur, cnt);
+ res = g_ptr_array_sized_new(cnt);
+
+ LL_FOREACH(hdr, cur)
+ {
+ g_ptr_array_add(res, &cur->value);
+ }
+ }
+ }
+
+
+ return res;
+}
+
+
+gboolean
+rspamd_http_message_remove_header(struct rspamd_http_message *msg,
+ const gchar *name)
+{
+ struct rspamd_http_header *hdr, *hcur, *hcurtmp;
+ gboolean res = FALSE;
+ guint slen = strlen(name);
+ rspamd_ftok_t srch;
+ khiter_t k;
+
+ if (msg != NULL) {
+ srch.begin = name;
+ srch.len = slen;
+
+ k = kh_get(rspamd_http_headers_hash, msg->headers, &srch);
+
+ if (k != kh_end(msg->headers)) {
+ hdr = kh_value(msg->headers, k);
+ kh_del(rspamd_http_headers_hash, msg->headers, k);
+ res = TRUE;
+
+ DL_FOREACH_SAFE(hdr, hcur, hcurtmp)
+ {
+ rspamd_fstring_free(hcur->combined);
+ g_free(hcur);
+ }
+ }
+ }
+
+ return res;
+}
+
+const gchar *
+rspamd_http_message_get_http_host(struct rspamd_http_message *msg,
+ gsize *hostlen)
+{
+ if (msg->flags & RSPAMD_HTTP_FLAG_HAS_HOST_HEADER) {
+ rspamd_ftok_t srch;
+
+ RSPAMD_FTOK_ASSIGN(&srch, "Host");
+
+ khiter_t k = kh_get(rspamd_http_headers_hash, msg->headers, &srch);
+
+ if (k != kh_end(msg->headers)) {
+ *hostlen = (kh_value(msg->headers, k)->value).len;
+ return (kh_value(msg->headers, k)->value).begin;
+ }
+ else if (msg->host) {
+ *hostlen = msg->host->len;
+ return msg->host->str;
+ }
+ }
+ else {
+ if (msg->host) {
+ *hostlen = msg->host->len;
+ return msg->host->str;
+ }
+ }
+
+ return NULL;
+}
+
+bool rspamd_http_message_is_standard_port(struct rspamd_http_message *msg)
+{
+ if (msg->flags & RSPAMD_HTTP_FLAG_WANT_SSL) {
+ return msg->port == 443;
+ }
+
+ return msg->port == 80;
+} \ No newline at end of file
diff --git a/src/libserver/http/http_message.h b/src/libserver/http/http_message.h
new file mode 100644
index 0000000..fa8ed04
--- /dev/null
+++ b/src/libserver/http/http_message.h
@@ -0,0 +1,254 @@
+/*-
+ * Copyright 2019 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef RSPAMD_HTTP_MESSAGE_H
+#define RSPAMD_HTTP_MESSAGE_H
+
+#include "config.h"
+#include "keypair.h"
+#include "keypairs_cache.h"
+#include "fstring.h"
+#include "ref.h"
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct rspamd_http_connection;
+
+enum rspamd_http_message_type {
+ HTTP_REQUEST = 0,
+ HTTP_RESPONSE
+};
+
+/**
+ * Extract the current message from a connection to deal with separately
+ * @param conn
+ * @return
+ */
+struct rspamd_http_message *rspamd_http_connection_steal_msg(
+ struct rspamd_http_connection *conn);
+
+/**
+ * Copy the current message from a connection to deal with separately
+ * @param conn
+ * @return
+ */
+struct rspamd_http_message *rspamd_http_connection_copy_msg(
+ struct rspamd_http_message *msg, GError **err);
+
+/**
+ * Create new HTTP message
+ * @param type request or response
+ * @return new http message
+ */
+struct rspamd_http_message *rspamd_http_new_message(enum rspamd_http_message_type type);
+
+/**
+ * Increase refcount number for an HTTP message
+ * @param msg message to use
+ * @return
+ */
+struct rspamd_http_message *rspamd_http_message_ref(struct rspamd_http_message *msg);
+
+/**
+ * Decrease number of refcounts for http message
+ * @param msg
+ */
+void rspamd_http_message_unref(struct rspamd_http_message *msg);
+
+/**
+ * Sets a key for peer
+ * @param msg
+ * @param pk
+ */
+void rspamd_http_message_set_peer_key(struct rspamd_http_message *msg,
+ struct rspamd_cryptobox_pubkey *pk);
+
+/**
+ * Create HTTP message from URL
+ * @param url
+ * @return new message or NULL
+ */
+struct rspamd_http_message *rspamd_http_message_from_url(const gchar *url);
+
+/**
+ * Returns body for a message
+ * @param msg
+ * @param blen pointer where to save body length
+ * @return pointer to body start
+ */
+const gchar *rspamd_http_message_get_body(struct rspamd_http_message *msg,
+ gsize *blen);
+
+/**
+ * Set message's body from the string
+ * @param msg
+ * @param data
+ * @param len
+ * @return TRUE if a message's body has been set
+ */
+gboolean rspamd_http_message_set_body(struct rspamd_http_message *msg,
+ const gchar *data, gsize len);
+
+/**
+ * Set message's method by name
+ * @param msg
+ * @param method
+ */
+void rspamd_http_message_set_method(struct rspamd_http_message *msg,
+ const gchar *method);
+
+/**
+ * Maps fd as message's body
+ * @param msg
+ * @param fd
+ * @return TRUE if a message's body has been set
+ */
+gboolean rspamd_http_message_set_body_from_fd(struct rspamd_http_message *msg,
+ gint fd);
+
+/**
+ * Uses rspamd_fstring_t as message's body, string is consumed by this operation
+ * @param msg
+ * @param fstr
+ * @return TRUE if a message's body has been set
+ */
+gboolean rspamd_http_message_set_body_from_fstring_steal(struct rspamd_http_message *msg,
+ rspamd_fstring_t *fstr);
+
+/**
+ * Uses rspamd_fstring_t as message's body, string is copied by this operation
+ * @param msg
+ * @param fstr
+ * @return TRUE if a message's body has been set
+ */
+gboolean rspamd_http_message_set_body_from_fstring_copy(struct rspamd_http_message *msg,
+ const rspamd_fstring_t *fstr);
+
+/**
+ * Appends data to message's body
+ * @param msg
+ * @param data
+ * @param len
+ * @return TRUE if a message's body has been set
+ */
+gboolean rspamd_http_message_append_body(struct rspamd_http_message *msg,
+ const gchar *data, gsize len);
+
+/**
+ * Append a header to http message
+ * @param rep
+ * @param name
+ * @param value
+ */
+void rspamd_http_message_add_header(struct rspamd_http_message *msg,
+ const gchar *name,
+ const gchar *value);
+
+void rspamd_http_message_add_header_len(struct rspamd_http_message *msg,
+ const gchar *name,
+ const gchar *value,
+ gsize len);
+
+void rspamd_http_message_add_header_fstr(struct rspamd_http_message *msg,
+ const gchar *name,
+ rspamd_fstring_t *value);
+
+/**
+ * Search for a specified header in message
+ * @param msg message
+ * @param name name of header
+ */
+const rspamd_ftok_t *rspamd_http_message_find_header(
+ struct rspamd_http_message *msg,
+ const gchar *name);
+
+/**
+ * Search for a header that has multiple values
+ * @param msg
+ * @param name
+ * @return list of rspamd_ftok_t * with values
+ */
+GPtrArray *rspamd_http_message_find_header_multiple(
+ struct rspamd_http_message *msg,
+ const gchar *name);
+
+/**
+ * Remove specific header from a message
+ * @param msg
+ * @param name
+ * @return
+ */
+gboolean rspamd_http_message_remove_header(struct rspamd_http_message *msg,
+ const gchar *name);
+
+/**
+ * Free HTTP message
+ * @param msg
+ */
+void rspamd_http_message_free(struct rspamd_http_message *msg);
+
+/**
+ * Extract arguments from a message's URI contained inside query string decoding
+ * them if needed
+ * @param msg HTTP request message
+ * @return new GHashTable which maps rspamd_ftok_t* to rspamd_ftok_t*
+ * (table must be freed by a caller)
+ */
+GHashTable *rspamd_http_message_parse_query(struct rspamd_http_message *msg);
+
+/**
+ * Increase refcount for shared file (if any) to prevent early memory unlinking
+ * @param msg
+ */
+struct rspamd_storage_shmem *rspamd_http_message_shmem_ref(struct rspamd_http_message *msg);
+
+/**
+ * Decrease external ref for shmem segment associated with a message
+ * @param msg
+ */
+void rspamd_http_message_shmem_unref(struct rspamd_storage_shmem *p);
+
+/**
+ * Returns message's flags
+ * @param msg
+ * @return
+ */
+guint rspamd_http_message_get_flags(struct rspamd_http_message *msg);
+
+/**
+ * Returns an HTTP hostname for a message, derived from a header if it has it
+ * or from a url if it doesn't
+ * @param msg
+ * @param hostlen output of the host length
+ * @return
+ */
+const gchar *rspamd_http_message_get_http_host(struct rspamd_http_message *msg,
+ gsize *hostlen);
+
+/**
+ * Returns true if a message has standard port (80 or 443 for https)
+ * @param msg
+ * @return
+ */
+bool rspamd_http_message_is_standard_port(struct rspamd_http_message *msg);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/libserver/http/http_private.h b/src/libserver/http/http_private.h
new file mode 100644
index 0000000..096545e
--- /dev/null
+++ b/src/libserver/http/http_private.h
@@ -0,0 +1,129 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef SRC_LIBUTIL_HTTP_PRIVATE_H_
+#define SRC_LIBUTIL_HTTP_PRIVATE_H_
+
+#include "http_connection.h"
+#include "http_parser.h"
+#include "str_util.h"
+#include "keypair.h"
+#include "keypairs_cache.h"
+#include "ref.h"
+#include "upstream.h"
+#include "khash.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * HTTP header structure
+ */
+struct rspamd_http_header {
+ rspamd_fstring_t *combined;
+ rspamd_ftok_t name;
+ rspamd_ftok_t value;
+ struct rspamd_http_header *prev, *next;
+};
+
+KHASH_INIT(rspamd_http_headers_hash, rspamd_ftok_t *,
+ struct rspamd_http_header *, 1,
+ rspamd_ftok_icase_hash, rspamd_ftok_icase_equal);
+
+/**
+ * HTTP message structure, used for requests and replies
+ */
+struct rspamd_http_message {
+ rspamd_fstring_t *url;
+ GString *host;
+ rspamd_fstring_t *status;
+ khash_t(rspamd_http_headers_hash) * headers;
+
+ struct _rspamd_body_buf_s {
+ /* Data start */
+ const gchar *begin;
+ /* Data len */
+ gsize len;
+ /* Allocated len */
+ gsize allocated_len;
+ /* Data buffer (used to write data inside) */
+ gchar *str;
+
+ /* Internal storage */
+ union _rspamd_storage_u {
+ rspamd_fstring_t *normal;
+ struct _rspamd_storage_shared_s {
+ struct rspamd_storage_shmem *name;
+ gint shm_fd;
+ } shared;
+ } c;
+ } body_buf;
+
+ struct rspamd_cryptobox_pubkey *peer_key;
+ time_t date;
+ time_t last_modified;
+ unsigned port;
+ int type;
+ gint code;
+ enum http_method method;
+ gint flags;
+ ref_entry_t ref;
+};
+
+struct rspamd_keepalive_hash_key {
+ rspamd_inet_addr_t *addr;
+ gchar *host;
+ gboolean is_ssl;
+ unsigned port;
+ GQueue conns;
+};
+
+gint32 rspamd_keep_alive_key_hash(struct rspamd_keepalive_hash_key *k);
+
+bool rspamd_keep_alive_key_equal(struct rspamd_keepalive_hash_key *k1,
+ struct rspamd_keepalive_hash_key *k2);
+
+KHASH_INIT(rspamd_keep_alive_hash, struct rspamd_keepalive_hash_key *,
+ char, 0, rspamd_keep_alive_key_hash, rspamd_keep_alive_key_equal);
+
+struct rspamd_http_context {
+ struct rspamd_http_context_cfg config;
+ struct rspamd_keypair_cache *client_kp_cache;
+ struct rspamd_cryptobox_keypair *client_kp;
+ struct rspamd_keypair_cache *server_kp_cache;
+ struct upstream_ctx *ups_ctx;
+ struct upstream_list *http_proxies;
+ gpointer ssl_ctx;
+ gpointer ssl_ctx_noverify;
+ struct ev_loop *event_loop;
+ ev_timer client_rotate_ev;
+ khash_t(rspamd_keep_alive_hash) * keep_alive_hash;
+};
+
+#define HTTP_ERROR http_error_quark()
+
+GQuark http_error_quark(void);
+
+void rspamd_http_message_storage_cleanup(struct rspamd_http_message *msg);
+
+gboolean rspamd_http_message_grow_body(struct rspamd_http_message *msg,
+ gsize len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SRC_LIBUTIL_HTTP_PRIVATE_H_ */
diff --git a/src/libserver/http/http_router.c b/src/libserver/http/http_router.c
new file mode 100644
index 0000000..2fdfe48
--- /dev/null
+++ b/src/libserver/http/http_router.c
@@ -0,0 +1,559 @@
+/*-
+ * Copyright 2019 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "http_router.h"
+#include "http_connection.h"
+#include "http_private.h"
+#include "libutil/regexp.h"
+#include "libutil/printf.h"
+#include "libserver/logger.h"
+#include "utlist.h"
+#include "unix-std.h"
+
+enum http_magic_type {
+ HTTP_MAGIC_PLAIN = 0,
+ HTTP_MAGIC_HTML,
+ HTTP_MAGIC_CSS,
+ HTTP_MAGIC_JS,
+ HTTP_MAGIC_ICO,
+ HTTP_MAGIC_PNG,
+ HTTP_MAGIC_JPG
+};
+
+static const struct _rspamd_http_magic {
+ const gchar *ext;
+ const gchar *ct;
+} http_file_types[] = {
+ [HTTP_MAGIC_PLAIN] = {"txt", "text/plain"},
+ [HTTP_MAGIC_HTML] = {"html", "text/html"},
+ [HTTP_MAGIC_CSS] = {"css", "text/css"},
+ [HTTP_MAGIC_JS] = {"js", "application/javascript"},
+ [HTTP_MAGIC_ICO] = {"ico", "image/x-icon"},
+ [HTTP_MAGIC_PNG] = {"png", "image/png"},
+ [HTTP_MAGIC_JPG] = {"jpg", "image/jpeg"},
+};
+
+/*
+ * HTTP router functions
+ */
+
+static void
+rspamd_http_entry_free(struct rspamd_http_connection_entry *entry)
+{
+ if (entry != NULL) {
+ close(entry->conn->fd);
+ rspamd_http_connection_unref(entry->conn);
+ if (entry->rt->finish_handler) {
+ entry->rt->finish_handler(entry);
+ }
+
+ DL_DELETE(entry->rt->conns, entry);
+ g_free(entry);
+ }
+}
+
+static void
+rspamd_http_router_error_handler(struct rspamd_http_connection *conn,
+ GError *err)
+{
+ struct rspamd_http_connection_entry *entry = conn->ud;
+ struct rspamd_http_message *msg;
+
+ if (entry->is_reply) {
+ /* At this point we need to finish this session and close owned socket */
+ if (entry->rt->error_handler != NULL) {
+ entry->rt->error_handler(entry, err);
+ }
+ rspamd_http_entry_free(entry);
+ }
+ else {
+ /* Here we can write a reply to a client */
+ if (entry->rt->error_handler != NULL) {
+ entry->rt->error_handler(entry, err);
+ }
+ msg = rspamd_http_new_message(HTTP_RESPONSE);
+ msg->date = time(NULL);
+ msg->code = err->code;
+ rspamd_http_message_set_body(msg, err->message, strlen(err->message));
+ rspamd_http_connection_reset(entry->conn);
+ rspamd_http_connection_write_message(entry->conn,
+ msg,
+ NULL,
+ "text/plain",
+ entry,
+ entry->rt->timeout);
+ entry->is_reply = TRUE;
+ }
+}
+
+static const gchar *
+rspamd_http_router_detect_ct(const gchar *path)
+{
+ const gchar *dot;
+ guint i;
+
+ dot = strrchr(path, '.');
+ if (dot == NULL) {
+ return http_file_types[HTTP_MAGIC_PLAIN].ct;
+ }
+ dot++;
+
+ for (i = 0; i < G_N_ELEMENTS(http_file_types); i++) {
+ if (strcmp(http_file_types[i].ext, dot) == 0) {
+ return http_file_types[i].ct;
+ }
+ }
+
+ return http_file_types[HTTP_MAGIC_PLAIN].ct;
+}
+
+static gboolean
+rspamd_http_router_is_subdir(const gchar *parent, const gchar *sub)
+{
+ if (parent == NULL || sub == NULL || *parent == '\0') {
+ return FALSE;
+ }
+
+ while (*parent != '\0') {
+ if (*sub != *parent) {
+ return FALSE;
+ }
+ parent++;
+ sub++;
+ }
+
+ parent--;
+ if (*parent == G_DIR_SEPARATOR) {
+ return TRUE;
+ }
+
+ return (*sub == G_DIR_SEPARATOR || *sub == '\0');
+}
+
+static gboolean
+rspamd_http_router_try_file(struct rspamd_http_connection_entry *entry,
+ rspamd_ftok_t *lookup, gboolean expand_path)
+{
+ struct stat st;
+ gint fd;
+ gchar filebuf[PATH_MAX], realbuf[PATH_MAX], *dir;
+ struct rspamd_http_message *reply_msg;
+
+ rspamd_snprintf(filebuf, sizeof(filebuf), "%s%c%T",
+ entry->rt->default_fs_path, G_DIR_SEPARATOR, lookup);
+
+ if (realpath(filebuf, realbuf) == NULL ||
+ lstat(realbuf, &st) == -1) {
+ return FALSE;
+ }
+
+ if (S_ISDIR(st.st_mode) && expand_path) {
+ /* Try to append 'index.html' to the url */
+ rspamd_fstring_t *nlookup;
+ rspamd_ftok_t tok;
+ gboolean ret;
+
+ nlookup = rspamd_fstring_sized_new(lookup->len + sizeof("index.html"));
+ rspamd_printf_fstring(&nlookup, "%T%c%s", lookup, G_DIR_SEPARATOR,
+ "index.html");
+ tok.begin = nlookup->str;
+ tok.len = nlookup->len;
+ ret = rspamd_http_router_try_file(entry, &tok, FALSE);
+ rspamd_fstring_free(nlookup);
+
+ return ret;
+ }
+ else if (!S_ISREG(st.st_mode)) {
+ return FALSE;
+ }
+
+ /* We also need to ensure that file is inside the defined dir */
+ rspamd_strlcpy(filebuf, realbuf, sizeof(filebuf));
+ dir = dirname(filebuf);
+
+ if (dir == NULL ||
+ !rspamd_http_router_is_subdir(entry->rt->default_fs_path,
+ dir)) {
+ return FALSE;
+ }
+
+ fd = open(realbuf, O_RDONLY);
+ if (fd == -1) {
+ return FALSE;
+ }
+
+ reply_msg = rspamd_http_new_message(HTTP_RESPONSE);
+ reply_msg->date = time(NULL);
+ reply_msg->code = 200;
+ rspamd_http_router_insert_headers(entry->rt, reply_msg);
+
+ if (!rspamd_http_message_set_body_from_fd(reply_msg, fd)) {
+ rspamd_http_message_free(reply_msg);
+ close(fd);
+ return FALSE;
+ }
+
+ close(fd);
+
+ rspamd_http_connection_reset(entry->conn);
+
+ msg_debug("requested file %s", realbuf);
+ rspamd_http_connection_write_message(entry->conn, reply_msg, NULL,
+ rspamd_http_router_detect_ct(realbuf), entry,
+ entry->rt->timeout);
+
+ return TRUE;
+}
+
+static void
+rspamd_http_router_send_error(GError *err,
+ struct rspamd_http_connection_entry *entry)
+{
+ struct rspamd_http_message *err_msg;
+
+ err_msg = rspamd_http_new_message(HTTP_RESPONSE);
+ err_msg->date = time(NULL);
+ err_msg->code = err->code;
+ rspamd_http_message_set_body(err_msg, err->message,
+ strlen(err->message));
+ entry->is_reply = TRUE;
+ err_msg->status = rspamd_fstring_new_init(err->message, strlen(err->message));
+ rspamd_http_router_insert_headers(entry->rt, err_msg);
+ rspamd_http_connection_reset(entry->conn);
+ rspamd_http_connection_write_message(entry->conn,
+ err_msg,
+ NULL,
+ "text/plain",
+ entry,
+ entry->rt->timeout);
+}
+
+
+static int
+rspamd_http_router_finish_handler(struct rspamd_http_connection *conn,
+ struct rspamd_http_message *msg)
+{
+ struct rspamd_http_connection_entry *entry = conn->ud;
+ rspamd_http_router_handler_t handler = NULL;
+ gpointer found;
+
+ GError *err;
+ rspamd_ftok_t lookup;
+ const rspamd_ftok_t *encoding;
+ struct http_parser_url u;
+ guint i;
+ rspamd_regexp_t *re;
+ struct rspamd_http_connection_router *router;
+ gchar *pathbuf = NULL;
+
+ G_STATIC_ASSERT(sizeof(rspamd_http_router_handler_t) ==
+ sizeof(gpointer));
+
+ memset(&lookup, 0, sizeof(lookup));
+ router = entry->rt;
+
+ if (entry->is_reply) {
+ /* Request is finished, it is safe to free a connection */
+ rspamd_http_entry_free(entry);
+ }
+ else {
+ if (G_UNLIKELY(msg->method != HTTP_GET && msg->method != HTTP_POST)) {
+ if (router->unknown_method_handler) {
+ return router->unknown_method_handler(entry, msg);
+ }
+ else {
+ err = g_error_new(HTTP_ERROR, 500,
+ "Invalid method");
+ if (entry->rt->error_handler != NULL) {
+ entry->rt->error_handler(entry, err);
+ }
+
+ rspamd_http_router_send_error(err, entry);
+ g_error_free(err);
+
+ return 0;
+ }
+ }
+
+ /* Search for path */
+ if (msg->url != NULL && msg->url->len != 0) {
+
+ http_parser_parse_url(msg->url->str, msg->url->len, TRUE, &u);
+
+ if (u.field_set & (1 << UF_PATH)) {
+ gsize unnorm_len;
+
+ pathbuf = g_malloc(u.field_data[UF_PATH].len);
+ memcpy(pathbuf, msg->url->str + u.field_data[UF_PATH].off,
+ u.field_data[UF_PATH].len);
+ lookup.begin = pathbuf;
+ lookup.len = u.field_data[UF_PATH].len;
+
+ rspamd_normalize_path_inplace(pathbuf,
+ lookup.len,
+ &unnorm_len);
+ lookup.len = unnorm_len;
+ }
+ else {
+ lookup.begin = msg->url->str;
+ lookup.len = msg->url->len;
+ }
+
+ found = g_hash_table_lookup(entry->rt->paths, &lookup);
+ memcpy(&handler, &found, sizeof(found));
+ msg_debug("requested known path: %T", &lookup);
+ }
+ else {
+ err = g_error_new(HTTP_ERROR, 404,
+ "Empty path requested");
+ if (entry->rt->error_handler != NULL) {
+ entry->rt->error_handler(entry, err);
+ }
+
+ rspamd_http_router_send_error(err, entry);
+ g_error_free(err);
+
+ return 0;
+ }
+
+ entry->is_reply = TRUE;
+
+ encoding = rspamd_http_message_find_header(msg, "Accept-Encoding");
+
+ if (encoding && rspamd_substring_search(encoding->begin, encoding->len,
+ "gzip", 4) != -1) {
+ entry->support_gzip = TRUE;
+ }
+
+ if (handler != NULL) {
+ if (pathbuf) {
+ g_free(pathbuf);
+ }
+
+ return handler(entry, msg);
+ }
+ else {
+ /* Try regexps */
+ for (i = 0; i < router->regexps->len; i++) {
+ re = g_ptr_array_index(router->regexps, i);
+ if (rspamd_regexp_match(re, lookup.begin, lookup.len,
+ TRUE)) {
+ found = rspamd_regexp_get_ud(re);
+ memcpy(&handler, &found, sizeof(found));
+
+ if (pathbuf) {
+ g_free(pathbuf);
+ }
+
+ return handler(entry, msg);
+ }
+ }
+
+ /* Now try plain file */
+ if (entry->rt->default_fs_path == NULL || lookup.len == 0 ||
+ !rspamd_http_router_try_file(entry, &lookup, TRUE)) {
+
+ err = g_error_new(HTTP_ERROR, 404,
+ "Not found");
+ if (entry->rt->error_handler != NULL) {
+ entry->rt->error_handler(entry, err);
+ }
+
+ msg_info("path: %T not found", &lookup);
+ rspamd_http_router_send_error(err, entry);
+ g_error_free(err);
+ }
+ }
+ }
+
+ if (pathbuf) {
+ g_free(pathbuf);
+ }
+
+ return 0;
+}
+
+struct rspamd_http_connection_router *
+rspamd_http_router_new(rspamd_http_router_error_handler_t eh,
+ rspamd_http_router_finish_handler_t fh,
+ ev_tstamp timeout,
+ const char *default_fs_path,
+ struct rspamd_http_context *ctx)
+{
+ struct rspamd_http_connection_router *nrouter;
+ struct stat st;
+
+ nrouter = g_malloc0(sizeof(struct rspamd_http_connection_router));
+ nrouter->paths = g_hash_table_new_full(rspamd_ftok_icase_hash,
+ rspamd_ftok_icase_equal, rspamd_fstring_mapped_ftok_free, NULL);
+ nrouter->regexps = g_ptr_array_new();
+ nrouter->conns = NULL;
+ nrouter->error_handler = eh;
+ nrouter->finish_handler = fh;
+ nrouter->response_headers = g_hash_table_new_full(rspamd_strcase_hash,
+ rspamd_strcase_equal, g_free, g_free);
+ nrouter->event_loop = ctx->event_loop;
+ nrouter->timeout = timeout;
+ nrouter->default_fs_path = NULL;
+
+ if (default_fs_path != NULL) {
+ if (stat(default_fs_path, &st) == -1) {
+ msg_err("cannot stat %s", default_fs_path);
+ }
+ else {
+ if (!S_ISDIR(st.st_mode)) {
+ msg_err("path %s is not a directory", default_fs_path);
+ }
+ else {
+ nrouter->default_fs_path = realpath(default_fs_path, NULL);
+ }
+ }
+ }
+
+ nrouter->ctx = ctx;
+
+ return nrouter;
+}
+
+void rspamd_http_router_set_key(struct rspamd_http_connection_router *router,
+ struct rspamd_cryptobox_keypair *key)
+{
+ g_assert(key != NULL);
+
+ router->key = rspamd_keypair_ref(key);
+}
+
+void rspamd_http_router_add_path(struct rspamd_http_connection_router *router,
+ const gchar *path, rspamd_http_router_handler_t handler)
+{
+ gpointer ptr;
+ rspamd_ftok_t *key;
+ rspamd_fstring_t *storage;
+ G_STATIC_ASSERT(sizeof(rspamd_http_router_handler_t) ==
+ sizeof(gpointer));
+
+ if (path != NULL && handler != NULL && router != NULL) {
+ memcpy(&ptr, &handler, sizeof(ptr));
+ storage = rspamd_fstring_new_init(path, strlen(path));
+ key = g_malloc0(sizeof(*key));
+ key->begin = storage->str;
+ key->len = storage->len;
+ g_hash_table_insert(router->paths, key, ptr);
+ }
+}
+
+void rspamd_http_router_set_unknown_handler(struct rspamd_http_connection_router *router,
+ rspamd_http_router_handler_t handler)
+{
+ if (router != NULL) {
+ router->unknown_method_handler = handler;
+ }
+}
+
+void rspamd_http_router_add_header(struct rspamd_http_connection_router *router,
+ const gchar *name, const gchar *value)
+{
+ if (name != NULL && value != NULL && router != NULL) {
+ g_hash_table_replace(router->response_headers, g_strdup(name),
+ g_strdup(value));
+ }
+}
+
+void rspamd_http_router_insert_headers(struct rspamd_http_connection_router *router,
+ struct rspamd_http_message *msg)
+{
+ GHashTableIter it;
+ gpointer k, v;
+
+ if (router && msg) {
+ g_hash_table_iter_init(&it, router->response_headers);
+
+ while (g_hash_table_iter_next(&it, &k, &v)) {
+ rspamd_http_message_add_header(msg, k, v);
+ }
+ }
+}
+
+void rspamd_http_router_add_regexp(struct rspamd_http_connection_router *router,
+ struct rspamd_regexp_s *re, rspamd_http_router_handler_t handler)
+{
+ gpointer ptr;
+ G_STATIC_ASSERT(sizeof(rspamd_http_router_handler_t) ==
+ sizeof(gpointer));
+
+ if (re != NULL && handler != NULL && router != NULL) {
+ memcpy(&ptr, &handler, sizeof(ptr));
+ rspamd_regexp_set_ud(re, ptr);
+ g_ptr_array_add(router->regexps, rspamd_regexp_ref(re));
+ }
+}
+
+void rspamd_http_router_handle_socket(struct rspamd_http_connection_router *router,
+ gint fd, gpointer ud)
+{
+ struct rspamd_http_connection_entry *conn;
+
+ conn = g_malloc0(sizeof(struct rspamd_http_connection_entry));
+ conn->rt = router;
+ conn->ud = ud;
+ conn->is_reply = FALSE;
+
+ conn->conn = rspamd_http_connection_new_server(router->ctx,
+ fd,
+ NULL,
+ rspamd_http_router_error_handler,
+ rspamd_http_router_finish_handler,
+ 0);
+
+ if (router->key) {
+ rspamd_http_connection_set_key(conn->conn, router->key);
+ }
+
+ rspamd_http_connection_read_message(conn->conn, conn, router->timeout);
+ DL_PREPEND(router->conns, conn);
+}
+
+void rspamd_http_router_free(struct rspamd_http_connection_router *router)
+{
+ struct rspamd_http_connection_entry *conn, *tmp;
+ rspamd_regexp_t *re;
+ guint i;
+
+ if (router) {
+ DL_FOREACH_SAFE(router->conns, conn, tmp)
+ {
+ rspamd_http_entry_free(conn);
+ }
+
+ if (router->key) {
+ rspamd_keypair_unref(router->key);
+ }
+
+ if (router->default_fs_path != NULL) {
+ g_free(router->default_fs_path);
+ }
+
+ for (i = 0; i < router->regexps->len; i++) {
+ re = g_ptr_array_index(router->regexps, i);
+ rspamd_regexp_unref(re);
+ }
+
+ g_ptr_array_free(router->regexps, TRUE);
+ g_hash_table_unref(router->paths);
+ g_hash_table_unref(router->response_headers);
+ g_free(router);
+ }
+}
diff --git a/src/libserver/http/http_router.h b/src/libserver/http/http_router.h
new file mode 100644
index 0000000..1bf70ed
--- /dev/null
+++ b/src/libserver/http/http_router.h
@@ -0,0 +1,149 @@
+/*-
+ * Copyright 2019 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef RSPAMD_HTTP_ROUTER_H
+#define RSPAMD_HTTP_ROUTER_H
+
+#include "config.h"
+#include "http_connection.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct rspamd_http_connection_router;
+struct rspamd_http_connection_entry;
+
+typedef int (*rspamd_http_router_handler_t)(struct rspamd_http_connection_entry
+ *conn_ent,
+ struct rspamd_http_message *msg);
+
+typedef void (*rspamd_http_router_error_handler_t)(struct rspamd_http_connection_entry *conn_ent,
+ GError *err);
+
+typedef void (*rspamd_http_router_finish_handler_t)(struct rspamd_http_connection_entry *conn_ent);
+
+
+struct rspamd_http_connection_entry {
+ struct rspamd_http_connection_router *rt;
+ struct rspamd_http_connection *conn;
+ gpointer ud;
+ gboolean is_reply;
+ gboolean support_gzip;
+ struct rspamd_http_connection_entry *prev, *next;
+};
+
+struct rspamd_http_connection_router {
+ struct rspamd_http_connection_entry *conns;
+ GHashTable *paths;
+ GHashTable *response_headers;
+ GPtrArray *regexps;
+ ev_tstamp timeout;
+ struct ev_loop *event_loop;
+ struct rspamd_http_context *ctx;
+ gchar *default_fs_path;
+ rspamd_http_router_handler_t unknown_method_handler;
+ struct rspamd_cryptobox_keypair *key;
+ rspamd_http_router_error_handler_t error_handler;
+ rspamd_http_router_finish_handler_t finish_handler;
+};
+
+/**
+ * Create new http connection router and the associated HTTP connection
+ * @param eh error handler callback
+ * @param fh finish handler callback
+ * @param default_fs_path if not NULL try to serve static files from
+ * the specified directory
+ * @return
+ */
+struct rspamd_http_connection_router *rspamd_http_router_new(
+ rspamd_http_router_error_handler_t eh,
+ rspamd_http_router_finish_handler_t fh,
+ ev_tstamp timeout,
+ const char *default_fs_path,
+ struct rspamd_http_context *ctx);
+
+/**
+ * Set encryption key for the HTTP router
+ * @param router router structure
+ * @param key opaque key structure
+ */
+void rspamd_http_router_set_key(struct rspamd_http_connection_router *router,
+ struct rspamd_cryptobox_keypair *key);
+
+/**
+ * Add new path to the router
+ */
+void rspamd_http_router_add_path(struct rspamd_http_connection_router *router,
+ const gchar *path, rspamd_http_router_handler_t handler);
+
+/**
+ * Add custom header to append to router replies
+ * @param router
+ * @param name
+ * @param value
+ */
+void rspamd_http_router_add_header(struct rspamd_http_connection_router *router,
+ const gchar *name, const gchar *value);
+
+/**
+ * Sets method to handle unknown request methods
+ * @param router
+ * @param handler
+ */
+void rspamd_http_router_set_unknown_handler(struct rspamd_http_connection_router *router,
+ rspamd_http_router_handler_t handler);
+
+/**
+ * Inserts router headers to the outbound message
+ * @param router
+ * @param msg
+ */
+void rspamd_http_router_insert_headers(struct rspamd_http_connection_router *router,
+ struct rspamd_http_message *msg);
+
+struct rspamd_regexp_s;
+
+/**
+ * Adds new pattern to router, regexp object is refcounted by this function
+ * @param router
+ * @param re
+ * @param handler
+ */
+void rspamd_http_router_add_regexp(struct rspamd_http_connection_router *router,
+ struct rspamd_regexp_s *re, rspamd_http_router_handler_t handler);
+
+/**
+ * Handle new accepted socket
+ * @param router router object
+ * @param fd server socket
+ * @param ud opaque userdata
+ */
+void rspamd_http_router_handle_socket(
+ struct rspamd_http_connection_router *router,
+ gint fd,
+ gpointer ud);
+
+/**
+ * Free router and all connections associated
+ * @param router
+ */
+void rspamd_http_router_free(struct rspamd_http_connection_router *router);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/libserver/http/http_util.c b/src/libserver/http/http_util.c
new file mode 100644
index 0000000..d5c4a57
--- /dev/null
+++ b/src/libserver/http/http_util.c
@@ -0,0 +1,295 @@
+/*-
+ * Copyright 2019 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "libserver/http/http_util.h"
+#include "libutil/printf.h"
+#include "libutil/util.h"
+
+static const gchar *http_week[] = {"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"};
+static const gchar *http_month[] = {"Jan", "Feb", "Mar", "Apr", "May", "Jun",
+ "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"};
+
+/*
+ * Obtained from nginx
+ * Copyright (C) Igor Sysoev
+ */
+static guint mday[] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
+
+time_t
+rspamd_http_parse_date(const gchar *header, gsize len)
+{
+ const gchar *p, *end;
+ gint month;
+ guint day, year, hour, min, sec;
+ guint64 time;
+ enum {
+ no = 0,
+ rfc822, /* Tue, 10 Nov 2002 23:50:13 */
+ rfc850, /* Tuesday, 10-Dec-02 23:50:13 */
+ isoc /* Tue Dec 10 23:50:13 2002 */
+ } fmt;
+
+ fmt = 0;
+ if (len > 0) {
+ end = header + len;
+ }
+ else {
+ end = header + strlen(header);
+ }
+
+ day = 32;
+ year = 2038;
+
+ for (p = header; p < end; p++) {
+ if (*p == ',') {
+ break;
+ }
+
+ if (*p == ' ') {
+ fmt = isoc;
+ break;
+ }
+ }
+
+ for (p++; p < end; p++)
+ if (*p != ' ') {
+ break;
+ }
+
+ if (end - p < 18) {
+ return (time_t) -1;
+ }
+
+ if (fmt != isoc) {
+ if (*p < '0' || *p > '9' || *(p + 1) < '0' || *(p + 1) > '9') {
+ return (time_t) -1;
+ }
+
+ day = (*p - '0') * 10 + *(p + 1) - '0';
+ p += 2;
+
+ if (*p == ' ') {
+ if (end - p < 18) {
+ return (time_t) -1;
+ }
+ fmt = rfc822;
+ }
+ else if (*p == '-') {
+ fmt = rfc850;
+ }
+ else {
+ return (time_t) -1;
+ }
+
+ p++;
+ }
+
+ switch (*p) {
+
+ case 'J':
+ month = *(p + 1) == 'a' ? 0 : *(p + 2) == 'n' ? 5
+ : 6;
+ break;
+
+ case 'F':
+ month = 1;
+ break;
+
+ case 'M':
+ month = *(p + 2) == 'r' ? 2 : 4;
+ break;
+
+ case 'A':
+ month = *(p + 1) == 'p' ? 3 : 7;
+ break;
+
+ case 'S':
+ month = 8;
+ break;
+
+ case 'O':
+ month = 9;
+ break;
+
+ case 'N':
+ month = 10;
+ break;
+
+ case 'D':
+ month = 11;
+ break;
+
+ default:
+ return (time_t) -1;
+ }
+
+ p += 3;
+
+ if ((fmt == rfc822 && *p != ' ') || (fmt == rfc850 && *p != '-')) {
+ return (time_t) -1;
+ }
+
+ p++;
+
+ if (fmt == rfc822) {
+ if (*p < '0' || *p > '9' || *(p + 1) < '0' || *(p + 1) > '9' || *(p + 2) < '0' || *(p + 2) > '9' || *(p + 3) < '0' || *(p + 3) > '9') {
+ return (time_t) -1;
+ }
+
+ year = (*p - '0') * 1000 + (*(p + 1) - '0') * 100 + (*(p + 2) - '0') * 10 + *(p + 3) - '0';
+ p += 4;
+ }
+ else if (fmt == rfc850) {
+ if (*p < '0' || *p > '9' || *(p + 1) < '0' || *(p + 1) > '9') {
+ return (time_t) -1;
+ }
+
+ year = (*p - '0') * 10 + *(p + 1) - '0';
+ year += (year < 70) ? 2000 : 1900;
+ p += 2;
+ }
+
+ if (fmt == isoc) {
+ if (*p == ' ') {
+ p++;
+ }
+
+ if (*p < '0' || *p > '9') {
+ return (time_t) -1;
+ }
+
+ day = *p++ - '0';
+
+ if (*p != ' ') {
+ if (*p < '0' || *p > '9') {
+ return (time_t) -1;
+ }
+
+ day = day * 10 + *p++ - '0';
+ }
+
+ if (end - p < 14) {
+ return (time_t) -1;
+ }
+ }
+
+ if (*p++ != ' ') {
+ return (time_t) -1;
+ }
+
+ if (*p < '0' || *p > '9' || *(p + 1) < '0' || *(p + 1) > '9') {
+ return (time_t) -1;
+ }
+
+ hour = (*p - '0') * 10 + *(p + 1) - '0';
+ p += 2;
+
+ if (*p++ != ':') {
+ return (time_t) -1;
+ }
+
+ if (*p < '0' || *p > '9' || *(p + 1) < '0' || *(p + 1) > '9') {
+ return (time_t) -1;
+ }
+
+ min = (*p - '0') * 10 + *(p + 1) - '0';
+ p += 2;
+
+ if (*p++ != ':') {
+ return (time_t) -1;
+ }
+
+ if (*p < '0' || *p > '9' || *(p + 1) < '0' || *(p + 1) > '9') {
+ return (time_t) -1;
+ }
+
+ sec = (*p - '0') * 10 + *(p + 1) - '0';
+
+ if (fmt == isoc) {
+ p += 2;
+
+ if (*p++ != ' ') {
+ return (time_t) -1;
+ }
+
+ if (*p < '0' || *p > '9' || *(p + 1) < '0' || *(p + 1) > '9' || *(p + 2) < '0' || *(p + 2) > '9' || *(p + 3) < '0' || *(p + 3) > '9') {
+ return (time_t) -1;
+ }
+
+ year = (*p - '0') * 1000 + (*(p + 1) - '0') * 100 + (*(p + 2) - '0') * 10 + *(p + 3) - '0';
+ }
+
+ if (hour > 23 || min > 59 || sec > 59) {
+ return (time_t) -1;
+ }
+
+ if (day == 29 && month == 1) {
+ if ((year & 3) || ((year % 100 == 0) && (year % 400) != 0)) {
+ return (time_t) -1;
+ }
+ }
+ else if (day > mday[month]) {
+ return (time_t) -1;
+ }
+
+ /*
+ * shift new year to March 1 and start months from 1 (not 0),
+ * it is needed for Gauss' formula
+ */
+
+ if (--month <= 0) {
+ month += 12;
+ year -= 1;
+ }
+
+ /* Gauss' formula for Gregorian days since March 1, 1 BC */
+
+ time = (guint64) (
+ /* days in years including leap years since March 1, 1 BC */
+
+ 365 * year + year / 4 - year / 100 + year / 400
+
+ /* days before the month */
+
+ + 367 * month / 12 - 30
+
+ /* days before the day */
+
+ + day - 1
+
+ /*
+ * 719527 days were between March 1, 1 BC and March 1, 1970,
+ * 31 and 28 days were in January and February 1970
+ */
+
+ - 719527 + 31 + 28) *
+ 86400 +
+ hour * 3600 + min * 60 + sec;
+
+ return (time_t) time;
+}
+
+glong rspamd_http_date_format(gchar *buf, gsize len, time_t time)
+{
+ struct tm tms;
+
+ rspamd_gmtime(time, &tms);
+
+ return rspamd_snprintf(buf, len, "%s, %02d %s %4d %02d:%02d:%02d GMT",
+ http_week[tms.tm_wday], tms.tm_mday,
+ http_month[tms.tm_mon], tms.tm_year + 1900,
+ tms.tm_hour, tms.tm_min, tms.tm_sec);
+} \ No newline at end of file
diff --git a/src/libserver/http/http_util.h b/src/libserver/http/http_util.h
new file mode 100644
index 0000000..ec57508
--- /dev/null
+++ b/src/libserver/http/http_util.h
@@ -0,0 +1,47 @@
+/*-
+ * Copyright 2019 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef RSPAMD_HTTP_UTIL_H
+#define RSPAMD_HTTP_UTIL_H
+
+#include "config.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Parse HTTP date header and return it as time_t
+ * @param header HTTP date header
+ * @param len length of header
+ * @return time_t or (time_t)-1 in case of error
+ */
+time_t rspamd_http_parse_date(const gchar *header, gsize len);
+
+/**
+ * Prints HTTP date from `time` to `buf` using standard HTTP date format
+ * @param buf date buffer
+ * @param len length of buffer
+ * @param time time in unix seconds
+ * @return number of bytes written
+ */
+glong rspamd_http_date_format(gchar *buf, gsize len, time_t time);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/libserver/hyperscan_tools.cxx b/src/libserver/hyperscan_tools.cxx
new file mode 100644
index 0000000..7d1ecf3
--- /dev/null
+++ b/src/libserver/hyperscan_tools.cxx
@@ -0,0 +1,627 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "config.h"
+
+#ifdef WITH_HYPERSCAN
+#include <string>
+#include <filesystem>
+#include "contrib/ankerl/unordered_dense.h"
+#include "contrib/ankerl/svector.h"
+#include "fmt/core.h"
+#include "libutil/cxx/file_util.hxx"
+#include "libutil/cxx/error.hxx"
+#include "hs.h"
+#include "logger.h"
+#include "worker_util.h"
+#include "hyperscan_tools.h"
+
+#include <glob.h> /* for glob */
+#include <unistd.h> /* for unlink */
+#include <optional>
+#include <cstdlib> /* for std::getenv */
+#include "unix-std.h"
+#include "rspamd_control.h"
+
+#define HYPERSCAN_LOG_TAG "hsxxxx"
+
+// Hyperscan does not provide any API to check validity of it's databases
+// However, it is required for us to perform migrations properly without
+// failing at `hs_alloc_scratch` phase or even `hs_scan` which is **way too late**
+// Hence, we have to check hyperscan internal guts to prevent that situation...
+
+#ifdef HS_MAJOR
+#ifndef HS_VERSION_32BIT
+#define HS_VERSION_32BIT ((HS_MAJOR << 24) | (HS_MINOR << 16) | (HS_PATCH << 8) | 0)
+#endif
+#endif// defined(HS_MAJOR)
+
+#if !defined(HS_DB_VERSION) && defined(HS_VERSION_32BIT)
+#define HS_DB_VERSION HS_VERSION_32BIT
+#endif
+
+#ifndef HS_DB_MAGIC
+#define HS_DB_MAGIC (0xdbdbdbdbU)
+#endif
+
+#define msg_info_hyperscan(...) rspamd_default_log_function(G_LOG_LEVEL_INFO, \
+ "hyperscan", HYPERSCAN_LOG_TAG, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_info_hyperscan_lambda(...) rspamd_default_log_function(G_LOG_LEVEL_INFO, \
+ "hyperscan", HYPERSCAN_LOG_TAG, \
+ log_func, \
+ __VA_ARGS__)
+#define msg_err_hyperscan(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL, \
+ "hyperscan", HYPERSCAN_LOG_TAG, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_debug_hyperscan(...) rspamd_conditional_debug_fast(nullptr, nullptr, \
+ rspamd_hyperscan_log_id, "hyperscan", HYPERSCAN_LOG_TAG, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_debug_hyperscan_lambda(...) rspamd_conditional_debug_fast(nullptr, nullptr, \
+ rspamd_hyperscan_log_id, "hyperscan", HYPERSCAN_LOG_TAG, \
+ log_func, \
+ __VA_ARGS__)
+
+INIT_LOG_MODULE_PUBLIC(hyperscan)
+
+namespace rspamd::util {
+
+/*
+ * A singleton class that is responsible for deletion of the outdated hyperscan files
+ * One issue is that it must know about HS files in all workers, which is a problem
+ * TODO: we need to export hyperscan caches from all workers to a single place where
+ * we can clean them up (probably, to the main process)
+ */
+class hs_known_files_cache {
+private:
+ // These fields are filled when we add new known cache files
+ ankerl::svector<std::string, 4> cache_dirs;
+ ankerl::svector<std::string, 8> cache_extensions;
+ ankerl::unordered_dense::set<std::string> known_cached_files;
+ bool loaded = false;
+
+private:
+ hs_known_files_cache() = default;
+
+ virtual ~hs_known_files_cache()
+ {
+ // Cleanup cache dir
+ cleanup_maybe();
+ }
+
+public:
+ hs_known_files_cache(const hs_known_files_cache &) = delete;
+ hs_known_files_cache(hs_known_files_cache &&) = delete;
+
+ static auto get() -> hs_known_files_cache &
+ {
+ static hs_known_files_cache *singleton = nullptr;
+
+ if (singleton == nullptr) {
+ singleton = new hs_known_files_cache;
+ }
+
+ return *singleton;
+ }
+
+ void add_cached_file(const raii_file &file)
+ {
+ auto fpath = std::filesystem::path{file.get_name()};
+ std::error_code ec;
+
+ fpath = std::filesystem::canonical(fpath, ec);
+
+ if (ec && ec.value() != 0) {
+ msg_err_hyperscan("invalid path: \"%s\", error message: %s", fpath.c_str(), ec.message().c_str());
+ return;
+ }
+
+ auto dir = fpath.parent_path();
+ auto ext = fpath.extension();
+
+ if (std::find_if(cache_dirs.begin(), cache_dirs.end(),
+ [&](const auto &item) { return item == dir; }) == std::end(cache_dirs)) {
+ cache_dirs.emplace_back(std::string{dir});
+ }
+ if (std::find_if(cache_extensions.begin(), cache_extensions.end(),
+ [&](const auto &item) { return item == ext; }) == std::end(cache_extensions)) {
+ cache_extensions.emplace_back(std::string{ext});
+ }
+
+ auto is_known = known_cached_files.insert(fpath.string());
+ msg_debug_hyperscan("added %s hyperscan file: %s",
+ is_known.second ? "new" : "already known",
+ fpath.c_str());
+ }
+
+ void add_cached_file(const char *fname)
+ {
+ auto fpath = std::filesystem::path{fname};
+ std::error_code ec;
+
+ fpath = std::filesystem::canonical(fpath, ec);
+
+ if (ec && ec.value() != 0) {
+ msg_err_hyperscan("invalid path: \"%s\", error message: %s", fname, ec.message().c_str());
+ return;
+ }
+
+ auto dir = fpath.parent_path();
+ auto ext = fpath.extension();
+
+ if (std::find_if(cache_dirs.begin(), cache_dirs.end(),
+ [&](const auto &item) { return item == dir; }) == std::end(cache_dirs)) {
+ cache_dirs.emplace_back(dir.string());
+ }
+ if (std::find_if(cache_extensions.begin(), cache_extensions.end(),
+ [&](const auto &item) { return item == ext; }) == std::end(cache_extensions)) {
+ cache_extensions.emplace_back(ext.string());
+ }
+
+ auto is_known = known_cached_files.insert(fpath.string());
+ msg_debug_hyperscan("added %s hyperscan file: %s",
+ is_known.second ? "new" : "already known",
+ fpath.c_str());
+ }
+
+ void delete_cached_file(const char *fname)
+ {
+ auto fpath = std::filesystem::path{fname};
+ std::error_code ec;
+
+ fpath = std::filesystem::canonical(fpath, ec);
+
+ if (ec && ec.value() != 0) {
+ msg_err_hyperscan("invalid path to remove: \"%s\", error message: %s",
+ fname, ec.message().c_str());
+ return;
+ }
+
+ if (fpath.empty()) {
+ msg_err_hyperscan("attempt to remove an empty hyperscan file!");
+ return;
+ }
+
+ if (unlink(fpath.c_str()) == -1) {
+ msg_err_hyperscan("cannot remove hyperscan file %s: %s",
+ fpath.c_str(), strerror(errno));
+ }
+ else {
+ msg_debug_hyperscan("removed hyperscan file %s", fpath.c_str());
+ }
+
+ known_cached_files.erase(fpath.string());
+ }
+
+ auto cleanup_maybe() -> void
+ {
+ auto env_cleanup_disable = std::getenv("RSPAMD_NO_CLEANUP");
+ /* We clean dir merely if we are running from the main process */
+ if (rspamd_current_worker == nullptr && env_cleanup_disable == nullptr && loaded) {
+ const auto *log_func = RSPAMD_LOG_FUNC;
+ auto cleanup_dir = [&](std::string_view dir) -> void {
+ for (const auto &ext: cache_extensions) {
+ glob_t globbuf;
+
+ auto glob_pattern = fmt::format("{}{}*{}",
+ dir, G_DIR_SEPARATOR_S, ext);
+ msg_debug_hyperscan_lambda("perform glob for pattern: %s",
+ glob_pattern.c_str());
+ memset(&globbuf, 0, sizeof(globbuf));
+
+ if (glob(glob_pattern.c_str(), 0, nullptr, &globbuf) == 0) {
+ for (auto i = 0; i < globbuf.gl_pathc; i++) {
+ auto path = std::string{globbuf.gl_pathv[i]};
+ std::size_t nsz;
+ struct stat st;
+
+ rspamd_normalize_path_inplace(path.data(), path.size(), &nsz);
+ path.resize(nsz);
+
+ if (stat(path.c_str(), &st) == -1) {
+ msg_debug_hyperscan_lambda("cannot stat file %s: %s",
+ path.c_str(), strerror(errno));
+ continue;
+ }
+
+ if (S_ISREG(st.st_mode)) {
+ if (!known_cached_files.contains(path)) {
+ msg_info_hyperscan_lambda("remove stale hyperscan file %s", path.c_str());
+ unlink(path.c_str());
+ }
+ else {
+ msg_debug_hyperscan_lambda("found known hyperscan file %s, size: %Hz",
+ path.c_str(), st.st_size);
+ }
+ }
+ }
+ }
+
+ globfree(&globbuf);
+ }
+ };
+
+ for (const auto &dir: cache_dirs) {
+ msg_info_hyperscan("cleaning up directory %s", dir.c_str());
+ cleanup_dir(dir);
+ }
+
+ cache_dirs.clear();
+ cache_extensions.clear();
+ known_cached_files.clear();
+ }
+ else if (rspamd_current_worker == nullptr && env_cleanup_disable != nullptr) {
+ msg_info_hyperscan("disable hyperscan cleanup: env variable RSPAMD_NO_CLEANUP is set");
+ }
+ else if (!loaded) {
+ msg_info_hyperscan("disable hyperscan cleanup: not loaded");
+ }
+ }
+
+ auto notice_loaded() -> void
+ {
+ loaded = true;
+ }
+};
+
+
+/**
+ * This is a higher level representation of the cached hyperscan file
+ */
+struct hs_shared_database {
+ hs_database_t *db = nullptr; /**< internal database (might be in a shared memory) */
+ std::optional<raii_mmaped_file> maybe_map;
+ std::string cached_path;
+
+ ~hs_shared_database()
+ {
+ if (!maybe_map) {
+ hs_free_database(db);
+ }
+ // Otherwise, handled by maybe_map dtor
+ }
+
+ explicit hs_shared_database(raii_mmaped_file &&map, hs_database_t *db)
+ : db(db), maybe_map(std::move(map))
+ {
+ cached_path = maybe_map.value().get_file().get_name();
+ }
+ explicit hs_shared_database(hs_database_t *db, const char *fname)
+ : db(db), maybe_map(std::nullopt)
+ {
+ if (fname) {
+ cached_path = fname;
+ }
+ else {
+ /* Likely a test case */
+ cached_path = "";
+ }
+ }
+ hs_shared_database(const hs_shared_database &other) = delete;
+ hs_shared_database() = default;
+ hs_shared_database(hs_shared_database &&other) noexcept
+ {
+ *this = std::move(other);
+ }
+ hs_shared_database &operator=(hs_shared_database &&other) noexcept
+ {
+ std::swap(db, other.db);
+ std::swap(maybe_map, other.maybe_map);
+ return *this;
+ }
+};
+
+struct real_hs_db {
+ std::uint32_t magic;
+ std::uint32_t version;
+ std::uint32_t length;
+ std::uint64_t platform;
+ std::uint32_t crc32;
+};
+static auto
+hs_is_valid_database(void *raw, std::size_t len, std::string_view fname) -> tl::expected<bool, std::string>
+{
+ if (len < sizeof(real_hs_db)) {
+ return tl::make_unexpected(fmt::format("cannot load hyperscan database from {}: too short", fname));
+ }
+
+ static real_hs_db test;
+
+ memcpy(&test, raw, sizeof(test));
+
+ if (test.magic != HS_DB_MAGIC) {
+ return tl::make_unexpected(fmt::format("cannot load hyperscan database from {}: invalid magic: {} ({} expected)",
+ fname, test.magic, HS_DB_MAGIC));
+ }
+
+#ifdef HS_DB_VERSION
+ if (test.version != HS_DB_VERSION) {
+ return tl::make_unexpected(fmt::format("cannot load hyperscan database from {}: invalid version: {} ({} expected)",
+ fname, test.version, HS_DB_VERSION));
+ }
+#endif
+
+ return true;
+}
+
+static auto
+hs_shared_from_unserialized(hs_known_files_cache &hs_cache, raii_mmaped_file &&map) -> tl::expected<hs_shared_database, error>
+{
+ auto ptr = map.get_map();
+ auto db = (hs_database_t *) ptr;
+
+ auto is_valid = hs_is_valid_database(map.get_map(), map.get_size(), map.get_file().get_name());
+ if (!is_valid) {
+ return tl::make_unexpected(error{is_valid.error(), -1, error_category::IMPORTANT});
+ }
+
+ hs_cache.add_cached_file(map.get_file());
+ return tl::expected<hs_shared_database, error>{tl::in_place, std::move(map), db};
+}
+
+static auto
+hs_shared_from_serialized(hs_known_files_cache &hs_cache, raii_mmaped_file &&map, std::int64_t offset) -> tl::expected<hs_shared_database, error>
+{
+ hs_database_t *target = nullptr;
+
+ if (auto ret = hs_deserialize_database((const char *) map.get_map() + offset,
+ map.get_size() - offset, &target);
+ ret != HS_SUCCESS) {
+ return tl::make_unexpected(error{"cannot deserialize database", ret});
+ }
+
+ hs_cache.add_cached_file(map.get_file());
+ return tl::expected<hs_shared_database, error>{tl::in_place, target, map.get_file().get_name().data()};
+}
+
+auto load_cached_hs_file(const char *fname, std::int64_t offset = 0) -> tl::expected<hs_shared_database, error>
+{
+ auto &hs_cache = hs_known_files_cache::get();
+ const auto *log_func = RSPAMD_LOG_FUNC;
+
+ return raii_mmaped_file::mmap_shared(fname, O_RDONLY, PROT_READ, 0)
+ .and_then([&]<class T>(T &&cached_serialized) -> tl::expected<hs_shared_database, error> {
+ if (cached_serialized.get_size() <= offset) {
+ return tl::make_unexpected(error{"Invalid offset", EINVAL, error_category::CRITICAL});
+ }
+#if defined(HS_MAJOR) && defined(HS_MINOR) && HS_MAJOR >= 5 && HS_MINOR >= 4
+ auto unserialized_fname = fmt::format("{}.unser", fname);
+ auto unserialized_file = raii_locked_file::create(unserialized_fname.c_str(), O_CREAT | O_RDWR | O_EXCL,
+ 00644)
+ .and_then([&](auto &&new_file_locked) -> tl::expected<raii_file, error> {
+ auto tmpfile_pattern = fmt::format("{}{}hsmp-XXXXXXXXXXXXXXXXXX",
+ cached_serialized.get_file().get_dir(), G_DIR_SEPARATOR);
+ auto tmpfile = raii_locked_file::mkstemp(tmpfile_pattern.data(), O_CREAT | O_RDWR | O_EXCL,
+ 00644);
+
+ if (!tmpfile) {
+ return tl::make_unexpected(tmpfile.error());
+ }
+ else {
+ auto &tmpfile_checked = tmpfile.value();
+ // Store owned string
+ auto tmpfile_name = std::string{tmpfile_checked.get_name()};
+ std::size_t unserialized_size;
+
+ if (auto ret = hs_serialized_database_size(((const char *) cached_serialized.get_map()) + offset,
+ cached_serialized.get_size() - offset, &unserialized_size);
+ ret != HS_SUCCESS) {
+ return tl::make_unexpected(error{
+ fmt::format("cannot get unserialized database size: {}", ret),
+ EINVAL,
+ error_category::IMPORTANT});
+ }
+
+ msg_debug_hyperscan_lambda("multipattern: create new database in %s; %Hz size",
+ tmpfile_name.c_str(), unserialized_size);
+ void *buf;
+#ifdef HAVE_GETPAGESIZE
+ auto page_size = getpagesize();
+#else
+ auto page_size = sysconf(_SC_PAGESIZE);
+#endif
+ if (page_size == -1) {
+ page_size = 4096;
+ }
+ auto errcode = posix_memalign(&buf, page_size, unserialized_size);
+ if (errcode != 0 || buf == nullptr) {
+ return tl::make_unexpected(error{"Cannot allocate memory",
+ errno, error_category::CRITICAL});
+ }
+
+ if (auto ret = hs_deserialize_database_at(((const char *) cached_serialized.get_map()) + offset,
+ cached_serialized.get_size() - offset, (hs_database_t *) buf);
+ ret != HS_SUCCESS) {
+ return tl::make_unexpected(error{
+ fmt::format("cannot deserialize hyperscan database: {}", ret), ret});
+ }
+ else {
+ if (write(tmpfile_checked.get_fd(), buf, unserialized_size) == -1) {
+ free(buf);
+ return tl::make_unexpected(error{fmt::format("cannot write to {}: {}",
+ tmpfile_name, ::strerror(errno)),
+ errno, error_category::CRITICAL});
+ }
+ else {
+ free(buf);
+ /*
+ * Unlink target file before renaming to avoid
+ * race condition.
+ * So what we have is that `new_file_locked`
+ * will have flock on that file, so it will be
+ * replaced after unlink safely, and also unlocked.
+ */
+ (void) unlink(unserialized_fname.c_str());
+ if (rename(tmpfile_name.c_str(),
+ unserialized_fname.c_str()) == -1) {
+ if (errno != EEXIST) {
+ msg_info_hyperscan_lambda("cannot rename %s -> %s: %s",
+ tmpfile_name.c_str(),
+ unserialized_fname.c_str(),
+ strerror(errno));
+ }
+ }
+ else {
+ /* Unlock file but mark it as immortal first to avoid deletion */
+ tmpfile_checked.make_immortal();
+ (void) tmpfile_checked.unlock();
+ }
+ }
+ }
+ /* Reopen in RO mode */
+ return raii_file::open(unserialized_fname.c_str(), O_RDONLY);
+ };
+ })
+ .or_else([&](auto unused) -> tl::expected<raii_file, error> {
+ // Cannot create file, so try to open it in RO mode
+ return raii_file::open(unserialized_fname.c_str(), O_RDONLY);
+ });
+
+ tl::expected<hs_shared_database, error> ret;
+
+ if (unserialized_file.has_value()) {
+
+ auto &unserialized_checked = unserialized_file.value();
+
+ if (unserialized_checked.get_size() == 0) {
+ /*
+ * This is a case when we have a file that is currently
+ * being created by another process.
+ * We cannot use it!
+ */
+ ret = hs_shared_from_serialized(hs_cache, std::forward<T>(cached_serialized), offset);
+ }
+ else {
+ ret = raii_mmaped_file::mmap_shared(std::move(unserialized_checked), PROT_READ)
+ .and_then([&]<class U>(U &&mmapped_unserialized) -> auto {
+ return hs_shared_from_unserialized(hs_cache, std::forward<U>(mmapped_unserialized));
+ });
+ }
+ }
+ else {
+ ret = hs_shared_from_serialized(hs_cache, std::forward<T>(cached_serialized), offset);
+ }
+#else // defined(HS_MAJOR) && defined(HS_MINOR) && HS_MAJOR >= 5 && HS_MINOR >= 4
+ auto ret = hs_shared_from_serialized(hs_cache, std::forward<T>(cached_serialized), offset);
+#endif// defined(HS_MAJOR) && defined(HS_MINOR) && HS_MAJOR >= 5 && HS_MINOR >= 4 \
+ // Add serialized file to cache merely if we have successfully loaded the actual db
+ if (ret.has_value()) {
+ hs_cache.add_cached_file(cached_serialized.get_file());
+ }
+ return ret;
+ });
+}
+}// namespace rspamd::util
+
+/* C API */
+
+#define CXX_DB_FROM_C(obj) (reinterpret_cast<rspamd::util::hs_shared_database *>(obj))
+#define C_DB_FROM_CXX(obj) (reinterpret_cast<rspamd_hyperscan_t *>(obj))
+
+rspamd_hyperscan_t *
+rspamd_hyperscan_maybe_load(const char *filename, goffset offset)
+{
+ auto maybe_db = rspamd::util::load_cached_hs_file(filename, offset);
+
+ if (maybe_db.has_value()) {
+ auto *ndb = new rspamd::util::hs_shared_database;
+ *ndb = std::move(maybe_db.value());
+ return C_DB_FROM_CXX(ndb);
+ }
+ else {
+ auto error = maybe_db.error();
+
+ switch (error.category) {
+ case rspamd::util::error_category::CRITICAL:
+ msg_err_hyperscan("critical error when trying to load cached hyperscan: %s",
+ error.error_message.data());
+ break;
+ case rspamd::util::error_category::IMPORTANT:
+ msg_info_hyperscan("error when trying to load cached hyperscan: %s",
+ error.error_message.data());
+ break;
+ default:
+ msg_debug_hyperscan("error when trying to load cached hyperscan: %s",
+ error.error_message.data());
+ break;
+ }
+ }
+
+ return nullptr;
+}
+
+hs_database_t *
+rspamd_hyperscan_get_database(rspamd_hyperscan_t *db)
+{
+ auto *real_db = CXX_DB_FROM_C(db);
+ return real_db->db;
+}
+
+rspamd_hyperscan_t *
+rspamd_hyperscan_from_raw_db(hs_database_t *db, const char *fname)
+{
+ auto *ndb = new rspamd::util::hs_shared_database{db, fname};
+
+ return C_DB_FROM_CXX(ndb);
+}
+
+void rspamd_hyperscan_free(rspamd_hyperscan_t *db, bool invalid)
+{
+ auto *real_db = CXX_DB_FROM_C(db);
+
+ if (invalid && !real_db->cached_path.empty()) {
+ rspamd::util::hs_known_files_cache::get().delete_cached_file(real_db->cached_path.c_str());
+ }
+ delete real_db;
+}
+
+void rspamd_hyperscan_notice_known(const char *fname)
+{
+ rspamd::util::hs_known_files_cache::get().add_cached_file(fname);
+
+ if (rspamd_current_worker != nullptr) {
+ /* Also notify main process */
+ struct rspamd_srv_command notice_cmd;
+
+ if (strlen(fname) >= sizeof(notice_cmd.cmd.hyperscan_cache_file.path)) {
+ msg_err("internal error: length of the filename %d ('%s') is larger than control buffer path: %d",
+ (int) strlen(fname), fname, (int) sizeof(notice_cmd.cmd.hyperscan_cache_file.path));
+ }
+ else {
+ notice_cmd.type = RSPAMD_SRV_NOTICE_HYPERSCAN_CACHE;
+ rspamd_strlcpy(notice_cmd.cmd.hyperscan_cache_file.path, fname, sizeof(notice_cmd.cmd.hyperscan_cache_file.path));
+ rspamd_srv_send_command(rspamd_current_worker,
+ rspamd_current_worker->srv->event_loop, &notice_cmd, -1,
+ nullptr,
+ nullptr);
+ }
+ }
+}
+
+void rspamd_hyperscan_cleanup_maybe(void)
+{
+ rspamd::util::hs_known_files_cache::get().cleanup_maybe();
+}
+
+void rspamd_hyperscan_notice_loaded(void)
+{
+ rspamd::util::hs_known_files_cache::get().notice_loaded();
+}
+
+#endif// WITH_HYPERSCAN \ No newline at end of file
diff --git a/src/libserver/hyperscan_tools.h b/src/libserver/hyperscan_tools.h
new file mode 100644
index 0000000..624b7b0
--- /dev/null
+++ b/src/libserver/hyperscan_tools.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "config.h"
+
+#ifndef RSPAMD_HYPERSCAN_TOOLS_H
+#define RSPAMD_HYPERSCAN_TOOLS_H
+
+#ifdef WITH_HYPERSCAN
+
+#include "hs.h"
+
+G_BEGIN_DECLS
+
+/**
+ * Opaque structure that represents hyperscan (maybe shared/cached database)
+ */
+typedef struct rspamd_hyperscan_s rspamd_hyperscan_t;
+
+/**
+ * Maybe load or mmap shared a hyperscan from a file
+ * @param filename
+ * @return cached database if available
+ */
+rspamd_hyperscan_t *rspamd_hyperscan_maybe_load(const char *filename, goffset offset);
+
+/**
+ * Creates a wrapper for a raw hs db. Ownership is transferred to the enclosing object returned
+ * @param filename
+ * @return
+ */
+rspamd_hyperscan_t *rspamd_hyperscan_from_raw_db(hs_database_t *db, const char *fname);
+/**
+ * Get the internal database
+ * @param db
+ * @return
+ */
+hs_database_t *rspamd_hyperscan_get_database(rspamd_hyperscan_t *db);
+/**
+ * Free the database
+ * @param db
+ */
+void rspamd_hyperscan_free(rspamd_hyperscan_t *db, bool invalid);
+
+/**
+ * Notice a known hyperscan file (e.g. externally serialized)
+ * @param fname
+ */
+void rspamd_hyperscan_notice_known(const char *fname);
+
+/**
+ * Notice that hyperscan files are all loaded (e.g. in the main process), so we can cleanup old files on termination
+ */
+void rspamd_hyperscan_notice_loaded(void);
+
+/**
+ * Cleans up old files. This method should be called on config free (in the main process)
+ */
+void rspamd_hyperscan_cleanup_maybe(void);
+
+G_END_DECLS
+
+#endif
+
+#endif
diff --git a/src/libserver/logger.h b/src/libserver/logger.h
new file mode 100644
index 0000000..8d4e313
--- /dev/null
+++ b/src/libserver/logger.h
@@ -0,0 +1,403 @@
+#ifndef RSPAMD_LOGGER_H
+#define RSPAMD_LOGGER_H
+
+#include "config.h"
+#include "radix.h"
+#include "util.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef G_LOG_LEVEL_USER_SHIFT
+#define G_LOG_LEVEL_USER_SHIFT 8
+#endif
+
+#define RSPAMD_LOG_ID_LEN 6
+
+struct rspamd_config;
+
+enum rspamd_log_flags {
+ RSPAMD_LOG_FORCED = (1 << G_LOG_LEVEL_USER_SHIFT),
+ RSPAMD_LOG_ENCRYPTED = (1 << (G_LOG_LEVEL_USER_SHIFT + 1)),
+ RSPAMD_LOG_LEVEL_MASK = ~(RSPAMD_LOG_FORCED | RSPAMD_LOG_ENCRYPTED)
+};
+
+typedef struct rspamd_logger_s rspamd_logger_t;
+typedef bool (*rspamd_log_func_t)(const gchar *module, const gchar *id,
+ const gchar *function,
+ gint level_flags,
+ const gchar *message,
+ gsize mlen,
+ rspamd_logger_t *logger,
+ gpointer arg);
+typedef void *(*rspamd_log_init_func)(rspamd_logger_t *logger,
+ struct rspamd_config *cfg,
+ uid_t uid, gid_t gid,
+ GError **err);
+typedef bool (*rspamd_log_on_fork_func)(rspamd_logger_t *logger,
+ struct rspamd_config *cfg,
+ gpointer arg,
+ GError **err);
+typedef void *(*rspamd_log_reload_func)(rspamd_logger_t *logger,
+ struct rspamd_config *cfg,
+ gpointer arg,
+ uid_t uid, gid_t gid,
+ GError **err);
+typedef void (*rspamd_log_dtor_func)(rspamd_logger_t *logger,
+ gpointer arg);
+
+struct rspamd_logger_funcs {
+ rspamd_log_init_func init;
+ rspamd_log_reload_func reload;
+ rspamd_log_dtor_func dtor;
+ rspamd_log_func_t log;
+ rspamd_log_on_fork_func on_fork;
+ gpointer specific;
+};
+
+#if defined(__x86_64) || defined(__x86_64__) || defined(__amd64) || defined(_M_X64)
+#define RSPAMD_LOGBUF_SIZE 8192
+#else
+/* Use a smaller buffer */
+#define RSPAMD_LOGBUF_SIZE 2048
+#endif
+
+/**
+ * Opens a new (initial) logger with console type
+ * This logger is also used as an emergency logger
+ * @return new rspamd logger object
+ */
+rspamd_logger_t *rspamd_log_open_emergency(rspamd_mempool_t *pool, gint flags);
+
+/**
+ * Open specific (configured logging)
+ * @param pool
+ * @param config
+ * @param uid
+ * @param gid
+ * @return
+ */
+rspamd_logger_t *rspamd_log_open_specific(rspamd_mempool_t *pool,
+ struct rspamd_config *config,
+ const gchar *ptype,
+ uid_t uid, gid_t gid);
+
+/**
+ * Set log level (from GLogLevelFlags)
+ * @param logger
+ * @param level
+ */
+void rspamd_log_set_log_level(rspamd_logger_t *logger, gint level);
+gint rspamd_log_get_log_level(rspamd_logger_t *logger);
+const gchar *rspamd_get_log_severity_string(gint level_flags);
+/**
+ * Set log flags (from enum rspamd_log_flags)
+ * @param logger
+ * @param flags
+ */
+void rspamd_log_set_log_flags(rspamd_logger_t *logger, gint flags);
+
+/**
+ * Close log file or destroy other structures
+ */
+void rspamd_log_close(rspamd_logger_t *logger);
+
+
+rspamd_logger_t *rspamd_log_default_logger(void);
+rspamd_logger_t *rspamd_log_emergency_logger(void);
+
+/**
+ * Close and open log again for privileged processes
+ */
+bool rspamd_log_reopen(rspamd_logger_t *logger, struct rspamd_config *cfg,
+ uid_t uid, gid_t gid);
+
+/**
+ * Set log pid
+ */
+void rspamd_log_on_fork(GQuark ptype, struct rspamd_config *cfg,
+ rspamd_logger_t *logger);
+
+/**
+ * Log function that is compatible for glib messages
+ */
+void rspamd_glib_log_function(const gchar *log_domain,
+ GLogLevelFlags log_level,
+ const gchar *message,
+ gpointer arg);
+
+/**
+ * Log function for printing glib assertions
+ */
+void rspamd_glib_printerr_function(const gchar *message);
+
+/**
+ * Function with variable number of arguments support
+ */
+bool rspamd_common_log_function(rspamd_logger_t *logger,
+ gint level_flags,
+ const gchar *module, const gchar *id,
+ const gchar *function, const gchar *fmt, ...);
+
+bool rspamd_common_logv(rspamd_logger_t *logger, gint level_flags,
+ const gchar *module, const gchar *id, const gchar *function,
+ const gchar *fmt, va_list args);
+
+/**
+ * Add new logging module, returns module ID
+ * @param mod
+ * @return
+ */
+gint rspamd_logger_add_debug_module(const gchar *mod);
+
+/*
+ * Macro to use for faster debug modules
+ */
+#define INIT_LOG_MODULE(mname) \
+ static gint rspamd_##mname##_log_id = -1; \
+ RSPAMD_CONSTRUCTOR(rspamd_##mname##_log_init) \
+ { \
+ rspamd_##mname##_log_id = rspamd_logger_add_debug_module(#mname); \
+ }
+
+
+#define INIT_LOG_MODULE_PUBLIC(mname) \
+ gint rspamd_##mname##_log_id = -1; \
+ RSPAMD_CONSTRUCTOR(rspamd_##mname##_log_init) \
+ { \
+ rspamd_##mname##_log_id = rspamd_logger_add_debug_module(#mname); \
+ }
+
+#define EXTERN_LOG_MODULE_DEF(mname) \
+ extern gint rspamd_##mname##_log_id
+
+void rspamd_logger_configure_modules(GHashTable *mods_enabled);
+
+/**
+ * Conditional debug function
+ */
+bool rspamd_conditional_debug(rspamd_logger_t *logger,
+ rspamd_inet_addr_t *addr, const gchar *module, const gchar *id,
+ const gchar *function, const gchar *fmt, ...);
+
+bool rspamd_conditional_debug_fast(rspamd_logger_t *logger,
+ rspamd_inet_addr_t *addr,
+ gint mod_id,
+ const gchar *module, const gchar *id,
+ const gchar *function, const gchar *fmt, ...);
+bool rspamd_conditional_debug_fast_num_id(rspamd_logger_t *logger,
+ rspamd_inet_addr_t *addr,
+ gint mod_id,
+ const gchar *module, guint64 id,
+ const gchar *function, const gchar *fmt, ...);
+gboolean rspamd_logger_need_log(rspamd_logger_t *rspamd_log,
+ GLogLevelFlags log_level,
+ gint module_id);
+
+/**
+ * Function with variable number of arguments support that uses static default logger
+ */
+bool rspamd_default_log_function(gint level_flags,
+ const gchar *module, const gchar *id,
+ const gchar *function,
+ const gchar *fmt,
+ ...);
+
+/**
+ * Varargs version of default log function
+ * @param log_level
+ * @param function
+ * @param fmt
+ * @param args
+ */
+bool rspamd_default_logv(gint level_flags,
+ const gchar *module, const gchar *id,
+ const gchar *function,
+ const gchar *fmt,
+ va_list args);
+
+/**
+ * Temporary turn on debug
+ */
+void rspamd_log_debug(rspamd_logger_t *logger);
+
+/**
+ * Turn off debug
+ */
+void rspamd_log_nodebug(rspamd_logger_t *logger);
+
+/**
+ * Return array of counters (4 numbers):
+ * 0 - errors
+ * 1 - warnings
+ * 2 - info messages
+ * 3 - debug messages
+ */
+const guint64 *rspamd_log_counters(rspamd_logger_t *logger);
+
+/**
+ * Returns errors ring buffer as ucl array
+ * @param logger
+ * @return
+ */
+ucl_object_t *rspamd_log_errorbuf_export(const rspamd_logger_t *logger);
+
+/**
+ * Sets new logger functions and initialise logging if needed
+ * @param logger
+ * @param nfuncs
+ * @return static pointer to the old functions (so this function is not reentrant)
+ */
+struct rspamd_logger_funcs *rspamd_logger_set_log_function(rspamd_logger_t *logger,
+ struct rspamd_logger_funcs *nfuncs);
+
+/* Typical functions */
+
+extern guint rspamd_task_log_id;
+#ifdef __cplusplus
+#define RSPAMD_LOG_FUNC __func__
+#else
+#define RSPAMD_LOG_FUNC G_STRFUNC
+#endif
+
+/* Logging in postfix style */
+#define msg_err(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL, \
+ NULL, NULL, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_warn(...) rspamd_default_log_function(G_LOG_LEVEL_WARNING, \
+ NULL, NULL, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_info(...) rspamd_default_log_function(G_LOG_LEVEL_INFO, \
+ NULL, NULL, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_notice(...) rspamd_default_log_function(G_LOG_LEVEL_MESSAGE, \
+ NULL, NULL, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_debug(...) rspamd_default_log_function(G_LOG_LEVEL_DEBUG, \
+ NULL, NULL, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+
+#define debug_task(...) rspamd_conditional_debug_fast(NULL, \
+ task->from_addr, \
+ rspamd_task_log_id, "task", task->task_pool->tag.uid, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+
+/* Use the following macros if you have `task` in the function */
+#define msg_err_task(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL, \
+ task->task_pool->tag.tagname, task->task_pool->tag.uid, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_err_task_lambda(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL, \
+ task->task_pool->tag.tagname, task->task_pool->tag.uid, \
+ log_func, \
+ __VA_ARGS__)
+#define msg_warn_task(...) rspamd_default_log_function(G_LOG_LEVEL_WARNING, \
+ task->task_pool->tag.tagname, task->task_pool->tag.uid, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_notice_task(...) rspamd_default_log_function(G_LOG_LEVEL_MESSAGE, \
+ task->task_pool->tag.tagname, task->task_pool->tag.uid, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_info_task(...) rspamd_default_log_function(G_LOG_LEVEL_INFO, \
+ task->task_pool->tag.tagname, task->task_pool->tag.uid, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_info_task_lambda(...) rspamd_default_log_function(G_LOG_LEVEL_INFO, \
+ task->task_pool->tag.tagname, task->task_pool->tag.uid, \
+ log_func, \
+ __VA_ARGS__)
+#define msg_debug_task(...) rspamd_conditional_debug_fast(NULL, task->from_addr, \
+ rspamd_task_log_id, "task", task->task_pool->tag.uid, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_debug_task_lambda(...) rspamd_conditional_debug_fast(NULL, task->from_addr, \
+ rspamd_task_log_id, "task", task->task_pool->tag.uid, \
+ log_func, \
+ __VA_ARGS__)
+#define msg_err_task_encrypted(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL | RSPAMD_LOG_ENCRYPTED, \
+ task->task_pool->tag.tagname, task->task_pool->tag.uid, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_warn_task_encrypted(...) rspamd_default_log_function(G_LOG_LEVEL_WARNING | RSPAMD_LOG_ENCRYPTED, \
+ task->task_pool->tag.tagname, task->task_pool->tag.uid, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_notice_task_encrypted(...) rspamd_default_log_function(G_LOG_LEVEL_MESSAGE | RSPAMD_LOG_ENCRYPTED, \
+ task->task_pool->tag.tagname, task->task_pool->tag.uid, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_info_task_encrypted(...) rspamd_default_log_function(G_LOG_LEVEL_INFO | RSPAMD_LOG_ENCRYPTED, \
+ task->task_pool->tag.tagname, task->task_pool->tag.uid, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+/* Check for NULL pointer first */
+#define msg_err_task_check(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL, \
+ task ? task->task_pool->tag.tagname : NULL, task ? task->task_pool->tag.uid : NULL, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_warn_task_check(...) rspamd_default_log_function(G_LOG_LEVEL_WARNING, \
+ task ? task->task_pool->tag.tagname : NULL, task ? task->task_pool->tag.uid : NULL, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_info_task_check(...) rspamd_default_log_function(G_LOG_LEVEL_MESSAGE, \
+ task ? task->task_pool->tag.tagname : NULL, task ? task->task_pool->tag.uid : NULL, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_notice_task_check(...) rspamd_default_log_function(G_LOG_LEVEL_INFO, \
+ task ? task->task_pool->tag.tagname : NULL, task ? task->task_pool->tag.uid : NULL, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_debug_task_check(...) rspamd_conditional_debug_fast(NULL, \
+ task ? task->from_addr : NULL, \
+ rspamd_task_log_id, "task", task ? task->task_pool->tag.uid : NULL, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+
+/* Use the following macros if you have `pool` in the function */
+#define msg_err_pool(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL, \
+ pool->tag.tagname, pool->tag.uid, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_warn_pool(...) rspamd_default_log_function(G_LOG_LEVEL_WARNING, \
+ pool->tag.tagname, pool->tag.uid, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_info_pool(...) rspamd_default_log_function(G_LOG_LEVEL_INFO, \
+ pool->tag.tagname, pool->tag.uid, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_debug_pool(...) rspamd_conditional_debug(NULL, NULL, \
+ pool->tag.tagname, pool->tag.uid, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+/* Check for NULL pointer first */
+#define msg_err_pool_check(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL, \
+ pool ? pool->tag.tagname : NULL, pool ? pool->tag.uid : NULL, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_warn_pool_check(...) rspamd_default_log_function(G_LOG_LEVEL_WARNING, \
+ pool ? pool->tag.tagname : NULL, pool ? pool->tag.uid : NULL, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_info_pool_check(...) rspamd_default_log_function(G_LOG_LEVEL_INFO, \
+ pool ? pool->tag.tagname : NULL, pool ? pool->tag.uid : NULL, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_debug_pool_check(...) rspamd_conditional_debug(NULL, NULL, \
+ pool ? pool->tag.tagname : NULL, pool ? pool->tag.uid : NULL, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/libserver/logger/logger.c b/src/libserver/logger/logger.c
new file mode 100644
index 0000000..2dae632
--- /dev/null
+++ b/src/libserver/logger/logger.c
@@ -0,0 +1,1319 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "config.h"
+#include "logger.h"
+#include "rspamd.h"
+#include "libserver/maps/map.h"
+#include "libserver/maps/map_helpers.h"
+#include "ottery.h"
+#include "unix-std.h"
+#include "logger_private.h"
+
+
+static rspamd_logger_t *default_logger = NULL;
+static rspamd_logger_t *emergency_logger = NULL;
+static struct rspamd_log_modules *log_modules = NULL;
+
+static const gchar lf_chr = '\n';
+
+guint rspamd_task_log_id = (guint) -1;
+RSPAMD_CONSTRUCTOR(rspamd_task_log_init)
+{
+ rspamd_task_log_id = rspamd_logger_add_debug_module("task");
+}
+
+rspamd_logger_t *
+rspamd_log_default_logger(void)
+{
+ return default_logger;
+}
+
+rspamd_logger_t *
+rspamd_log_emergency_logger(void)
+{
+ return emergency_logger;
+}
+
+void rspamd_log_set_log_level(rspamd_logger_t *logger, gint level)
+{
+ if (logger == NULL) {
+ logger = default_logger;
+ }
+
+ logger->log_level = level;
+}
+
+gint rspamd_log_get_log_level(rspamd_logger_t *logger)
+{
+ if (logger == NULL) {
+ logger = default_logger;
+ }
+
+ return logger->log_level;
+}
+
+void rspamd_log_set_log_flags(rspamd_logger_t *logger, gint flags)
+{
+ g_assert(logger != NULL);
+
+ logger->flags = flags;
+}
+
+void rspamd_log_close(rspamd_logger_t *logger)
+{
+ g_assert(logger != NULL);
+
+ if (logger->closed) {
+ return;
+ }
+
+ logger->closed = TRUE;
+
+ if (logger->debug_ip) {
+ rspamd_map_helper_destroy_radix(logger->debug_ip);
+ }
+
+ if (logger->pk) {
+ rspamd_pubkey_unref(logger->pk);
+ }
+
+ if (logger->keypair) {
+ rspamd_keypair_unref(logger->keypair);
+ }
+
+ logger->ops.dtor(logger, logger->ops.specific);
+
+ /* TODO: Do we really need that ? */
+ if (logger == default_logger) {
+ default_logger = NULL;
+ }
+
+ if (logger == emergency_logger) {
+ emergency_logger = NULL;
+ }
+
+ if (!logger->pool) {
+ g_free(logger);
+ }
+}
+
+bool rspamd_log_reopen(rspamd_logger_t *rspamd_log, struct rspamd_config *cfg,
+ uid_t uid, gid_t gid)
+{
+ void *nspec;
+ GError *err = NULL;
+
+ g_assert(rspamd_log != NULL);
+
+ nspec = rspamd_log->ops.reload(rspamd_log, cfg, rspamd_log->ops.specific,
+ uid, gid, &err);
+
+ if (nspec != NULL) {
+ rspamd_log->ops.specific = nspec;
+ }
+ else {
+ }
+
+ return nspec != NULL;
+}
+
+static void
+rspamd_emergency_logger_dtor(gpointer d)
+{
+ rspamd_logger_t *logger = (rspamd_logger_t *) d;
+
+ rspamd_log_close(logger);
+}
+
+rspamd_logger_t *
+rspamd_log_open_emergency(rspamd_mempool_t *pool, gint flags)
+{
+ rspamd_logger_t *logger;
+ GError *err = NULL;
+
+ g_assert(default_logger == NULL);
+ g_assert(emergency_logger == NULL);
+
+ if (pool) {
+ logger = rspamd_mempool_alloc0(pool, sizeof(rspamd_logger_t));
+ logger->mtx = rspamd_mempool_get_mutex(pool);
+ }
+ else {
+ logger = g_malloc0(sizeof(rspamd_logger_t));
+ }
+
+ logger->flags = flags;
+ logger->pool = pool;
+ logger->process_type = "main";
+ logger->pid = getpid();
+
+ const struct rspamd_logger_funcs *funcs = &console_log_funcs;
+ memcpy(&logger->ops, funcs, sizeof(*funcs));
+
+ logger->ops.specific = logger->ops.init(logger, NULL, -1, -1, &err);
+
+ if (logger->ops.specific == NULL) {
+ rspamd_fprintf(stderr, "fatal error: cannot init console logging: %e\n",
+ err);
+ g_error_free(err);
+
+ exit(EXIT_FAILURE);
+ }
+
+ default_logger = logger;
+ emergency_logger = logger;
+
+ rspamd_mempool_add_destructor(pool, rspamd_emergency_logger_dtor,
+ emergency_logger);
+
+ return logger;
+}
+
+rspamd_logger_t *
+rspamd_log_open_specific(rspamd_mempool_t *pool,
+ struct rspamd_config *cfg,
+ const gchar *ptype,
+ uid_t uid, gid_t gid)
+{
+ rspamd_logger_t *logger;
+ GError *err = NULL;
+
+ if (pool) {
+ logger = rspamd_mempool_alloc0(pool, sizeof(rspamd_logger_t));
+ logger->mtx = rspamd_mempool_get_mutex(pool);
+ }
+ else {
+ logger = g_malloc0(sizeof(rspamd_logger_t));
+ }
+
+ logger->pool = pool;
+
+ if (cfg) {
+ if (cfg->log_error_elts > 0 && pool) {
+ logger->errlog = rspamd_mempool_alloc0_shared(pool,
+ sizeof(*logger->errlog));
+ logger->errlog->pool = pool;
+ logger->errlog->max_elts = cfg->log_error_elts;
+ logger->errlog->elt_len = cfg->log_error_elt_maxlen;
+ logger->errlog->elts = rspamd_mempool_alloc0_shared(pool,
+ sizeof(struct rspamd_logger_error_elt) * cfg->log_error_elts +
+ cfg->log_error_elt_maxlen * cfg->log_error_elts);
+ }
+
+ logger->log_level = cfg->log_level;
+ logger->flags = cfg->log_flags;
+
+ if (!(logger->flags & RSPAMD_LOG_FLAG_ENFORCED)) {
+ logger->log_level = cfg->log_level;
+ }
+ }
+
+ const struct rspamd_logger_funcs *funcs = NULL;
+
+ if (cfg) {
+ switch (cfg->log_type) {
+ case RSPAMD_LOG_CONSOLE:
+ funcs = &console_log_funcs;
+ break;
+ case RSPAMD_LOG_SYSLOG:
+ funcs = &syslog_log_funcs;
+ break;
+ case RSPAMD_LOG_FILE:
+ funcs = &file_log_funcs;
+ break;
+ }
+ }
+ else {
+ funcs = &console_log_funcs;
+ }
+
+ g_assert(funcs != NULL);
+ memcpy(&logger->ops, funcs, sizeof(*funcs));
+
+ logger->ops.specific = logger->ops.init(logger, cfg, uid, gid, &err);
+
+ if (emergency_logger && logger->ops.specific == NULL) {
+ rspamd_common_log_function(emergency_logger, G_LOG_LEVEL_CRITICAL,
+ "logger", NULL, G_STRFUNC,
+ "cannot open specific logger: %e", err);
+ g_error_free(err);
+
+ return NULL;
+ }
+
+ logger->pid = getpid();
+ logger->process_type = ptype;
+ logger->enabled = TRUE;
+
+ /* Set up conditional logging */
+ if (cfg) {
+ if (cfg->debug_ip_map != NULL) {
+ /* Try to add it as map first of all */
+ if (logger->debug_ip) {
+ rspamd_map_helper_destroy_radix(logger->debug_ip);
+ }
+
+ logger->debug_ip = NULL;
+ rspamd_config_radix_from_ucl(cfg,
+ cfg->debug_ip_map,
+ "IP addresses for which debug logs are enabled",
+ &logger->debug_ip,
+ NULL,
+ NULL, "debug ip");
+ }
+
+ if (cfg->log_encryption_key) {
+ logger->pk = rspamd_pubkey_ref(cfg->log_encryption_key);
+ logger->keypair = rspamd_keypair_new(RSPAMD_KEYPAIR_KEX,
+ RSPAMD_CRYPTOBOX_MODE_25519);
+ rspamd_pubkey_calculate_nm(logger->pk, logger->keypair);
+ }
+ }
+
+ default_logger = logger;
+
+ return logger;
+}
+
+
+/**
+ * Used after fork() for updating structure params
+ */
+void rspamd_log_on_fork(GQuark ptype, struct rspamd_config *cfg,
+ rspamd_logger_t *logger)
+{
+ logger->pid = getpid();
+ logger->process_type = g_quark_to_string(ptype);
+
+ if (logger->ops.on_fork) {
+ GError *err = NULL;
+
+ bool ret = logger->ops.on_fork(logger, cfg, logger->ops.specific, &err);
+
+ if (!ret && emergency_logger) {
+ rspamd_common_log_function(emergency_logger, G_LOG_LEVEL_CRITICAL,
+ "logger", NULL, G_STRFUNC,
+ "cannot update logging on fork: %e", err);
+ g_error_free(err);
+ }
+ }
+}
+
+inline gboolean
+rspamd_logger_need_log(rspamd_logger_t *rspamd_log, GLogLevelFlags log_level,
+ gint module_id)
+{
+ g_assert(rspamd_log != NULL);
+
+ if ((log_level & RSPAMD_LOG_FORCED) ||
+ (log_level & (RSPAMD_LOG_LEVEL_MASK & G_LOG_LEVEL_MASK)) <= rspamd_log->log_level) {
+ return TRUE;
+ }
+
+ if (module_id != -1 && isset(log_modules->bitset, module_id)) {
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static gchar *
+rspamd_log_encrypt_message(const gchar *begin, const gchar *end, gsize *enc_len,
+ rspamd_logger_t *rspamd_log)
+{
+ guchar *out;
+ gchar *b64;
+ guchar *p, *nonce, *mac;
+ const guchar *comp;
+ guint len, inlen;
+
+ g_assert(end > begin);
+ /* base64 (pubkey | nonce | message) */
+ inlen = rspamd_cryptobox_nonce_bytes(RSPAMD_CRYPTOBOX_MODE_25519) +
+ rspamd_cryptobox_pk_bytes(RSPAMD_CRYPTOBOX_MODE_25519) +
+ rspamd_cryptobox_mac_bytes(RSPAMD_CRYPTOBOX_MODE_25519) +
+ (end - begin);
+ out = g_malloc(inlen);
+
+ p = out;
+ comp = rspamd_pubkey_get_pk(rspamd_log->pk, &len);
+ memcpy(p, comp, len);
+ p += len;
+ ottery_rand_bytes(p, rspamd_cryptobox_nonce_bytes(RSPAMD_CRYPTOBOX_MODE_25519));
+ nonce = p;
+ p += rspamd_cryptobox_nonce_bytes(RSPAMD_CRYPTOBOX_MODE_25519);
+ mac = p;
+ p += rspamd_cryptobox_mac_bytes(RSPAMD_CRYPTOBOX_MODE_25519);
+ memcpy(p, begin, end - begin);
+ comp = rspamd_pubkey_get_nm(rspamd_log->pk, rspamd_log->keypair);
+ g_assert(comp != NULL);
+ rspamd_cryptobox_encrypt_nm_inplace(p, end - begin, nonce, comp, mac,
+ RSPAMD_CRYPTOBOX_MODE_25519);
+ b64 = rspamd_encode_base64(out, inlen, 0, enc_len);
+ g_free(out);
+
+ return b64;
+}
+
+static void
+rspamd_log_write_ringbuffer(rspamd_logger_t *rspamd_log,
+ const gchar *module, const gchar *id,
+ const gchar *data, glong len)
+{
+ guint32 row_num;
+ struct rspamd_logger_error_log *elog;
+ struct rspamd_logger_error_elt *elt;
+
+ if (!rspamd_log->errlog) {
+ return;
+ }
+
+ elog = rspamd_log->errlog;
+
+ g_atomic_int_compare_and_exchange(&elog->cur_row, elog->max_elts, 0);
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30))
+ row_num = g_atomic_int_add(&elog->cur_row, 1);
+#else
+ row_num = g_atomic_int_exchange_and_add(&elog->cur_row, 1);
+#endif
+
+ if (row_num < elog->max_elts) {
+ elt = (struct rspamd_logger_error_elt *) (((guchar *) elog->elts) +
+ (sizeof(*elt) + elog->elt_len) * row_num);
+ g_atomic_int_set(&elt->completed, 0);
+ }
+ else {
+ /* Race condition */
+ elog->cur_row = 0;
+ return;
+ }
+
+ elt->pid = rspamd_log->pid;
+ elt->ptype = g_quark_from_string(rspamd_log->process_type);
+ elt->ts = rspamd_get_calendar_ticks();
+
+ if (id) {
+ rspamd_strlcpy(elt->id, id, sizeof(elt->id));
+ }
+ else {
+ rspamd_strlcpy(elt->id, "", sizeof(elt->id));
+ }
+
+ if (module) {
+ rspamd_strlcpy(elt->module, module, sizeof(elt->module));
+ }
+ else {
+ rspamd_strlcpy(elt->module, "", sizeof(elt->module));
+ }
+
+ rspamd_strlcpy(elt->message, data, MIN(len + 1, elog->elt_len));
+ g_atomic_int_set(&elt->completed, 1);
+}
+
+bool rspamd_common_logv(rspamd_logger_t *rspamd_log, gint level_flags,
+ const gchar *module, const gchar *id, const gchar *function,
+ const gchar *fmt, va_list args)
+{
+ gchar *end;
+ gint level = level_flags & (RSPAMD_LOG_LEVEL_MASK & G_LOG_LEVEL_MASK), mod_id;
+ bool ret = false;
+ gchar logbuf[RSPAMD_LOGBUF_SIZE], *log_line;
+ gsize nescaped;
+
+ if (G_UNLIKELY(rspamd_log == NULL)) {
+ rspamd_log = default_logger;
+ }
+
+ log_line = logbuf;
+
+ if (G_UNLIKELY(rspamd_log == NULL)) {
+ /* Just fprintf message to stderr */
+ if (level >= G_LOG_LEVEL_INFO) {
+ end = rspamd_vsnprintf(logbuf, sizeof(logbuf), fmt, args);
+ rspamd_fprintf(stderr, "%*s\n", (gint) (end - log_line),
+ log_line);
+ }
+ }
+ else {
+ if (level == G_LOG_LEVEL_DEBUG) {
+ mod_id = rspamd_logger_add_debug_module(module);
+ }
+ else {
+ mod_id = -1;
+ }
+
+ if (rspamd_logger_need_log(rspamd_log, level_flags, mod_id)) {
+ end = rspamd_vsnprintf(logbuf, sizeof(logbuf), fmt, args);
+
+ if (!(rspamd_log->flags & RSPAMD_LOG_FLAG_RSPAMADM)) {
+ if ((nescaped = rspamd_log_line_need_escape(logbuf, end - logbuf)) != 0) {
+ gsize unescaped_len = end - logbuf;
+ gchar *logbuf_escaped = g_alloca(unescaped_len + nescaped * 4);
+ log_line = logbuf_escaped;
+
+ end = rspamd_log_line_hex_escape(logbuf, unescaped_len,
+ logbuf_escaped, unescaped_len + nescaped * 4);
+ }
+ }
+
+ if ((level_flags & RSPAMD_LOG_ENCRYPTED) && rspamd_log->pk) {
+ gchar *encrypted;
+ gsize enc_len;
+
+ encrypted = rspamd_log_encrypt_message(log_line, end, &enc_len,
+ rspamd_log);
+ ret = rspamd_log->ops.log(module, id,
+ function,
+ level_flags,
+ encrypted,
+ enc_len,
+ rspamd_log,
+ rspamd_log->ops.specific);
+ g_free(encrypted);
+ }
+ else {
+ ret = rspamd_log->ops.log(module, id,
+ function,
+ level_flags,
+ log_line,
+ end - log_line,
+ rspamd_log,
+ rspamd_log->ops.specific);
+ }
+
+ switch (level) {
+ case G_LOG_LEVEL_CRITICAL:
+ rspamd_log->log_cnt[0]++;
+ rspamd_log_write_ringbuffer(rspamd_log, module, id, log_line,
+ end - log_line);
+ break;
+ case G_LOG_LEVEL_WARNING:
+ rspamd_log->log_cnt[1]++;
+ break;
+ case G_LOG_LEVEL_INFO:
+ rspamd_log->log_cnt[2]++;
+ break;
+ case G_LOG_LEVEL_DEBUG:
+ rspamd_log->log_cnt[3]++;
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
+ return ret;
+}
+
+/**
+ * This log functions select real logger and write message if level is less or equal to configured log level
+ */
+bool rspamd_common_log_function(rspamd_logger_t *rspamd_log,
+ gint level_flags,
+ const gchar *module, const gchar *id,
+ const gchar *function,
+ const gchar *fmt,
+ ...)
+{
+ va_list vp;
+
+ va_start(vp, fmt);
+ bool ret = rspamd_common_logv(rspamd_log, level_flags, module, id, function, fmt, vp);
+ va_end(vp);
+
+ return ret;
+}
+
+bool rspamd_default_logv(gint level_flags, const gchar *module, const gchar *id,
+ const gchar *function,
+ const gchar *fmt, va_list args)
+{
+ return rspamd_common_logv(NULL, level_flags, module, id, function, fmt, args);
+}
+
+bool rspamd_default_log_function(gint level_flags,
+ const gchar *module, const gchar *id,
+ const gchar *function, const gchar *fmt, ...)
+{
+
+ va_list vp;
+
+ va_start(vp, fmt);
+ bool ret = rspamd_default_logv(level_flags, module, id, function, fmt, vp);
+ va_end(vp);
+
+ return ret;
+}
+
+
+/**
+ * Main file interface for logging
+ */
+/**
+ * Write log line depending on ip
+ */
+bool rspamd_conditional_debug(rspamd_logger_t *rspamd_log,
+ rspamd_inet_addr_t *addr, const gchar *module, const gchar *id,
+ const gchar *function, const gchar *fmt, ...)
+{
+ static gchar logbuf[LOGBUF_LEN];
+ va_list vp;
+ gchar *end;
+ gint mod_id;
+
+ if (rspamd_log == NULL) {
+ rspamd_log = default_logger;
+ }
+
+ mod_id = rspamd_logger_add_debug_module(module);
+
+ if (rspamd_logger_need_log(rspamd_log, G_LOG_LEVEL_DEBUG, mod_id) ||
+ rspamd_log->is_debug) {
+ if (rspamd_log->debug_ip && addr != NULL) {
+ if (rspamd_match_radix_map_addr(rspamd_log->debug_ip,
+ addr) == NULL) {
+ return false;
+ }
+ }
+
+ va_start(vp, fmt);
+ end = rspamd_vsnprintf(logbuf, sizeof(logbuf), fmt, vp);
+ *end = '\0';
+ va_end(vp);
+ return rspamd_log->ops.log(module, id,
+ function,
+ G_LOG_LEVEL_DEBUG | RSPAMD_LOG_FORCED,
+ logbuf,
+ end - logbuf,
+ rspamd_log,
+ rspamd_log->ops.specific);
+ }
+
+ return false;
+}
+
+bool rspamd_conditional_debug_fast(rspamd_logger_t *rspamd_log,
+ rspamd_inet_addr_t *addr,
+ gint mod_id, const gchar *module, const gchar *id,
+ const gchar *function, const gchar *fmt, ...)
+{
+ static gchar logbuf[LOGBUF_LEN];
+ va_list vp;
+ gchar *end;
+
+ if (rspamd_log == NULL) {
+ rspamd_log = default_logger;
+ }
+
+ if (rspamd_logger_need_log(rspamd_log, G_LOG_LEVEL_DEBUG, mod_id) ||
+ rspamd_log->is_debug) {
+ if (rspamd_log->debug_ip && addr != NULL) {
+ if (rspamd_match_radix_map_addr(rspamd_log->debug_ip, addr) == NULL) {
+ return false;
+ }
+ }
+
+ va_start(vp, fmt);
+ end = rspamd_vsnprintf(logbuf, sizeof(logbuf), fmt, vp);
+ *end = '\0';
+ va_end(vp);
+ return rspamd_log->ops.log(module, id,
+ function,
+ G_LOG_LEVEL_DEBUG | RSPAMD_LOG_FORCED,
+ logbuf,
+ end - logbuf,
+ rspamd_log,
+ rspamd_log->ops.specific);
+ }
+
+ return false;
+}
+
+bool rspamd_conditional_debug_fast_num_id(rspamd_logger_t *rspamd_log,
+ rspamd_inet_addr_t *addr,
+ gint mod_id, const gchar *module, guint64 id,
+ const gchar *function, const gchar *fmt, ...)
+{
+ static gchar logbuf[LOGBUF_LEN], idbuf[64];
+ va_list vp;
+ gchar *end;
+
+ if (rspamd_log == NULL) {
+ rspamd_log = default_logger;
+ }
+
+ if (rspamd_logger_need_log(rspamd_log, G_LOG_LEVEL_DEBUG, mod_id) ||
+ rspamd_log->is_debug) {
+ if (rspamd_log->debug_ip && addr != NULL) {
+ if (rspamd_match_radix_map_addr(rspamd_log->debug_ip, addr) == NULL) {
+ return false;
+ }
+ }
+
+ rspamd_snprintf(idbuf, sizeof(idbuf), "%XuL", id);
+ va_start(vp, fmt);
+ end = rspamd_vsnprintf(logbuf, sizeof(logbuf), fmt, vp);
+ *end = '\0';
+ va_end(vp);
+ return rspamd_log->ops.log(module, idbuf,
+ function,
+ G_LOG_LEVEL_DEBUG | RSPAMD_LOG_FORCED,
+ logbuf,
+ end - logbuf,
+ rspamd_log,
+ rspamd_log->ops.specific);
+ }
+
+ return false;
+}
+
+/**
+ * Wrapper for glib logger
+ */
+void rspamd_glib_log_function(const gchar *log_domain,
+ GLogLevelFlags log_level,
+ const gchar *message,
+ gpointer arg)
+{
+ rspamd_logger_t *rspamd_log = (rspamd_logger_t *) arg;
+
+ if (rspamd_log->enabled &&
+ rspamd_logger_need_log(rspamd_log, log_level, -1)) {
+ rspamd_log->ops.log("glib", NULL,
+ NULL,
+ log_level,
+ message,
+ strlen(message),
+ rspamd_log,
+ rspamd_log->ops.specific);
+ }
+}
+
+void rspamd_glib_printerr_function(const gchar *message)
+{
+ rspamd_common_log_function(NULL, G_LOG_LEVEL_CRITICAL, "glib",
+ NULL, G_STRFUNC,
+ "%s", message);
+}
+
+/**
+ * Temporary turn on debugging
+ */
+void rspamd_log_debug(rspamd_logger_t *rspamd_log)
+{
+ rspamd_log->is_debug = TRUE;
+}
+
+/**
+ * Turn off temporary debugging
+ */
+void rspamd_log_nodebug(rspamd_logger_t *rspamd_log)
+{
+ rspamd_log->is_debug = FALSE;
+}
+
+const guint64 *
+rspamd_log_counters(rspamd_logger_t *logger)
+{
+ if (logger) {
+ return logger->log_cnt;
+ }
+
+ return NULL;
+}
+
+static gint
+rspamd_log_errlog_cmp(const ucl_object_t **o1, const ucl_object_t **o2)
+{
+ const ucl_object_t *ts1, *ts2;
+
+ ts1 = ucl_object_lookup(*o1, "ts");
+ ts2 = ucl_object_lookup(*o2, "ts");
+
+ if (ts1 && ts2) {
+ gdouble t1 = ucl_object_todouble(ts1), t2 = ucl_object_todouble(ts2);
+
+ if (t1 > t2) {
+ return -1;
+ }
+ else if (t2 > t1) {
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+ucl_object_t *
+rspamd_log_errorbuf_export(const rspamd_logger_t *logger)
+{
+ struct rspamd_logger_error_elt *cpy, *cur;
+ ucl_object_t *top = ucl_object_typed_new(UCL_ARRAY);
+ guint i;
+
+ if (logger->errlog == NULL) {
+ return top;
+ }
+
+ cpy = g_malloc0_n(logger->errlog->max_elts,
+ sizeof(*cpy) + logger->errlog->elt_len);
+ memcpy(cpy, logger->errlog->elts, logger->errlog->max_elts * (sizeof(*cpy) + logger->errlog->elt_len));
+
+ for (i = 0; i < logger->errlog->max_elts; i++) {
+ cur = (struct rspamd_logger_error_elt *) ((guchar *) cpy +
+ i * ((sizeof(*cpy) + logger->errlog->elt_len)));
+ if (cur->completed) {
+ ucl_object_t *obj = ucl_object_typed_new(UCL_OBJECT);
+
+ ucl_object_insert_key(obj, ucl_object_fromdouble(cur->ts),
+ "ts", 0, false);
+ ucl_object_insert_key(obj, ucl_object_fromint(cur->pid),
+ "pid", 0, false);
+ ucl_object_insert_key(obj,
+ ucl_object_fromstring(g_quark_to_string(cur->ptype)),
+ "type", 0, false);
+ ucl_object_insert_key(obj, ucl_object_fromstring(cur->id),
+ "id", 0, false);
+ ucl_object_insert_key(obj, ucl_object_fromstring(cur->module),
+ "module", 0, false);
+ ucl_object_insert_key(obj, ucl_object_fromstring(cur->message),
+ "message", 0, false);
+
+ ucl_array_append(top, obj);
+ }
+ }
+
+ ucl_object_array_sort(top, rspamd_log_errlog_cmp);
+ g_free(cpy);
+
+ return top;
+}
+
+static guint
+rspamd_logger_allocate_mod_bit(void)
+{
+ if (log_modules->bitset_allocated * NBBY > log_modules->bitset_len + 1) {
+ log_modules->bitset_len++;
+ return log_modules->bitset_len - 1;
+ }
+ else {
+ /* Need to expand */
+ log_modules->bitset_allocated *= 2;
+ log_modules->bitset = g_realloc(log_modules->bitset,
+ log_modules->bitset_allocated);
+
+ return rspamd_logger_allocate_mod_bit();
+ }
+}
+
+RSPAMD_DESTRUCTOR(rspamd_debug_modules_dtor)
+{
+ if (log_modules) {
+ g_hash_table_unref(log_modules->modules);
+ g_free(log_modules->bitset);
+ g_free(log_modules);
+ }
+}
+
+gint rspamd_logger_add_debug_module(const gchar *mname)
+{
+ struct rspamd_log_module *m;
+
+ if (mname == NULL) {
+ return -1;
+ }
+
+ if (log_modules == NULL) {
+ /*
+ * This is usually called from constructors, so we call init check
+ * each time to avoid dependency issues between ctors calls
+ */
+ log_modules = g_malloc0(sizeof(*log_modules));
+ log_modules->modules = g_hash_table_new_full(rspamd_strcase_hash,
+ rspamd_strcase_equal, g_free, g_free);
+ log_modules->bitset_allocated = 16;
+ log_modules->bitset_len = 0;
+ log_modules->bitset = g_malloc0(log_modules->bitset_allocated);
+ }
+
+ if ((m = g_hash_table_lookup(log_modules->modules, mname)) == NULL) {
+ m = g_malloc0(sizeof(*m));
+ m->mname = g_strdup(mname);
+ m->id = rspamd_logger_allocate_mod_bit();
+ clrbit(log_modules->bitset, m->id);
+ g_hash_table_insert(log_modules->modules, m->mname, m);
+ }
+
+ return m->id;
+}
+
+void rspamd_logger_configure_modules(GHashTable *mods_enabled)
+{
+ GHashTableIter it;
+ gpointer k, v;
+ guint id;
+
+ /* Clear all in bitset_allocated -> this are bytes not bits */
+ memset(log_modules->bitset, 0, log_modules->bitset_allocated);
+ /* On first iteration, we go through all modules enabled and add missing ones */
+ g_hash_table_iter_init(&it, mods_enabled);
+
+ while (g_hash_table_iter_next(&it, &k, &v)) {
+ rspamd_logger_add_debug_module((const gchar *) k);
+ }
+
+ g_hash_table_iter_init(&it, mods_enabled);
+
+ while (g_hash_table_iter_next(&it, &k, &v)) {
+ id = rspamd_logger_add_debug_module((const gchar *) k);
+
+ if (isclr(log_modules->bitset, id)) {
+ msg_info("enable debugging for module %s (%d)", (const gchar *) k,
+ id);
+ setbit(log_modules->bitset, id);
+ }
+ }
+}
+
+struct rspamd_logger_funcs *
+rspamd_logger_set_log_function(rspamd_logger_t *logger,
+ struct rspamd_logger_funcs *nfuncs)
+{
+ /* TODO: write this */
+
+ return NULL;
+}
+
+
+gchar *
+rspamd_log_line_hex_escape(const guchar *src, gsize srclen,
+ gchar *dst, gsize dstlen)
+{
+ static const gchar hexdigests[16] = "0123456789ABCDEF";
+ gchar *d = dst;
+
+ static guint32 escape[] = {
+ 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
+
+ /* ?>=< ;:98 7654 3210 /.-, +*)( '&%$ #"! */
+ 0x00000000, /* 0000 0000 0000 0000 0000 0000 0000 0100 */
+
+ /* _^]\ [ZYX WVUT SRQP ONML KJIH GFED CBA@ */
+ 0x00000000, /* 0001 0000 0000 0000 0000 0000 0000 0000 */
+
+ /* ~}| {zyx wvut srqp onml kjih gfed cba` */
+ 0x80000000, /* 1000 0000 0000 0000 0000 0000 0000 0000 */
+
+ /* Allow all 8bit characters (assuming they are valid utf8) */
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ };
+
+ while (srclen && dstlen) {
+ if (escape[*src >> 5] & (1U << (*src & 0x1f))) {
+ if (dstlen >= 4) {
+ *d++ = '\\';
+ *d++ = 'x';
+ *d++ = hexdigests[*src >> 4];
+ *d++ = hexdigests[*src & 0xf];
+ src++;
+ dstlen -= 4;
+ }
+ else {
+ /* Overflow */
+ break;
+ }
+ }
+ else {
+ *d++ = *src++;
+ dstlen--;
+ }
+
+ srclen--;
+ }
+
+ return d;
+}
+
+gsize rspamd_log_line_need_escape(const guchar *src, gsize srclen)
+{
+ static guint32 escape[] = {
+ 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
+
+ /* ?>=< ;:98 7654 3210 /.-, +*)( '&%$ #"! */
+ 0x00000000, /* 0000 0000 0000 0000 0000 0000 0000 0100 */
+
+ /* _^]\ [ZYX WVUT SRQP ONML KJIH GFED CBA@ */
+ 0x00000000, /* 0001 0000 0000 0000 0000 0000 0000 0000 */
+
+ /* ~}| {zyx wvut srqp onml kjih gfed cba` */
+ 0x80000000, /* 1000 0000 0000 0000 0000 0000 0000 0000 */
+
+ /* Allow all 8bit characters (assuming they are valid utf8) */
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ };
+ gsize n = 0;
+
+ while (srclen) {
+ if (escape[*src >> 5] & (1U << (*src & 0x1f))) {
+ n++;
+ }
+
+ src++;
+ srclen--;
+ }
+
+ return n;
+}
+
+const gchar *
+rspamd_get_log_severity_string(gint level_flags)
+{
+ unsigned int bitnum;
+ static const char *level_strs[G_LOG_LEVEL_USER_SHIFT] = {
+ "", /* G_LOG_FLAG_RECURSION */
+ "", /* G_LOG_FLAG_FATAL */
+ "crit",
+ "error",
+ "warn",
+ "notice",
+ "info",
+ "debug"};
+ level_flags &= ((1u << G_LOG_LEVEL_USER_SHIFT) - 1u) & ~(G_LOG_FLAG_RECURSION | G_LOG_FLAG_FATAL);
+#ifdef __GNUC__
+ /* We assume gcc >= 3 and clang >= 5 anyway */
+ bitnum = __builtin_ffs(level_flags) - 1;
+#else
+ bitnum = ffs(level_flags) - 1;
+#endif
+ return level_strs[bitnum];
+}
+
+static inline void
+log_time(gdouble now, rspamd_logger_t *rspamd_log, gchar *timebuf,
+ size_t len)
+{
+ time_t sec = (time_t) now;
+ gsize r;
+ struct tm tms;
+
+ rspamd_localtime(sec, &tms);
+ r = strftime(timebuf, len, "%F %H:%M:%S", &tms);
+
+ if (rspamd_log->flags & RSPAMD_LOG_FLAG_USEC) {
+ gchar usec_buf[16];
+
+ rspamd_snprintf(usec_buf, sizeof(usec_buf), "%.5f",
+ now - (gdouble) sec);
+ rspamd_snprintf(timebuf + r, len - r,
+ "%s", usec_buf + 1);
+ }
+}
+
+void rspamd_log_fill_iov(struct rspamd_logger_iov_ctx *iov_ctx,
+ double ts,
+ const gchar *module,
+ const gchar *id,
+ const gchar *function,
+ gint level_flags,
+ const gchar *message,
+ gsize mlen,
+ rspamd_logger_t *logger)
+{
+ bool log_color = (logger->flags & RSPAMD_LOG_FLAG_COLOR);
+ bool log_severity = (logger->flags & RSPAMD_LOG_FLAG_SEVERITY);
+ bool log_rspamadm = (logger->flags & RSPAMD_LOG_FLAG_RSPAMADM);
+ bool log_systemd = (logger->flags & RSPAMD_LOG_FLAG_SYSTEMD);
+ bool log_json = (logger->flags & RSPAMD_LOG_FLAG_JSON);
+
+ if (log_json) {
+ /* Some sanity to avoid too many branches */
+ log_color = false;
+ log_severity = true;
+ log_systemd = false;
+ }
+
+ glong r;
+ static gchar timebuf[64], modulebuf[64];
+ static gchar tmpbuf[256];
+
+ if (!log_json && !log_systemd) {
+ log_time(ts, logger, timebuf, sizeof(timebuf));
+ }
+
+ if (G_UNLIKELY(log_json)) {
+ /* Perform JSON logging */
+ guint slen = id ? strlen(id) : strlen("(NULL)");
+ slen = MIN(RSPAMD_LOG_ID_LEN, slen);
+ r = rspamd_snprintf(tmpbuf, sizeof(tmpbuf), "{\"ts\": %f, "
+ "\"pid\": %P, "
+ "\"severity\": \"%s\", "
+ "\"worker_type\": \"%s\", "
+ "\"id\": \"%*.s\", "
+ "\"module\": \"%s\", "
+ "\"function\": \"%s\", "
+ "\"message\": \"",
+ ts,
+ logger->pid,
+ rspamd_get_log_severity_string(level_flags),
+ logger->process_type,
+ slen, id,
+ module,
+ function);
+ iov_ctx->iov[0].iov_base = tmpbuf;
+ iov_ctx->iov[0].iov_len = r;
+ /* TODO: is it possible to have other 'bad' symbols here? */
+ if (rspamd_memcspn(message, "\"\\\r\n\b\t\v", mlen) == mlen) {
+ iov_ctx->iov[1].iov_base = (void *) message;
+ iov_ctx->iov[1].iov_len = mlen;
+ }
+ else {
+ /* We need to do JSON escaping of the quotes */
+ const char *p, *end = message + mlen;
+ long escaped_len;
+
+ for (p = message, escaped_len = 0; p < end; p++, escaped_len++) {
+ switch (*p) {
+ case '\v':
+ case '\0':
+ escaped_len += 5;
+ break;
+ case '\\':
+ case '"':
+ case '\n':
+ case '\r':
+ case '\b':
+ case '\t':
+ escaped_len++;
+ break;
+ default:
+ break;
+ }
+ }
+
+
+ struct rspamd_logger_iov_thrash_stack *thrash_stack_elt = g_malloc(
+ sizeof(struct rspamd_logger_iov_thrash_stack) +
+ escaped_len);
+
+ char *dst = ((char *) thrash_stack_elt) + sizeof(struct rspamd_logger_iov_thrash_stack);
+ char *d;
+
+ thrash_stack_elt->prev = iov_ctx->thrash_stack;
+ iov_ctx->thrash_stack = thrash_stack_elt;
+
+ for (p = message, d = dst; p < end; p++, d++) {
+ switch (*p) {
+ case '\n':
+ *d++ = '\\';
+ *d = 'n';
+ break;
+ case '\r':
+ *d++ = '\\';
+ *d = 'r';
+ break;
+ case '\b':
+ *d++ = '\\';
+ *d = 'b';
+ break;
+ case '\t':
+ *d++ = '\\';
+ *d = 't';
+ break;
+ case '\f':
+ *d++ = '\\';
+ *d = 'f';
+ break;
+ case '\0':
+ *d++ = '\\';
+ *d++ = 'u';
+ *d++ = '0';
+ *d++ = '0';
+ *d++ = '0';
+ *d = '0';
+ break;
+ case '\v':
+ *d++ = '\\';
+ *d++ = 'u';
+ *d++ = '0';
+ *d++ = '0';
+ *d++ = '0';
+ *d = 'B';
+ break;
+ case '\\':
+ *d++ = '\\';
+ *d = '\\';
+ break;
+ case '"':
+ *d++ = '\\';
+ *d = '"';
+ break;
+ default:
+ *d = *p;
+ break;
+ }
+ }
+
+ iov_ctx->iov[1].iov_base = dst;
+ iov_ctx->iov[1].iov_len = d - dst;
+ }
+ iov_ctx->iov[2].iov_base = (void *) "\"}\n";
+ iov_ctx->iov[2].iov_len = sizeof("\"}\n") - 1;
+
+ iov_ctx->niov = 3;
+ }
+ else if (G_LIKELY(!log_rspamadm)) {
+ if (!log_systemd) {
+ if (log_color) {
+ if (level_flags & (G_LOG_LEVEL_INFO | G_LOG_LEVEL_MESSAGE)) {
+ /* White */
+ r = rspamd_snprintf(tmpbuf, sizeof(tmpbuf), "\033[0;37m");
+ }
+ else if (level_flags & G_LOG_LEVEL_WARNING) {
+ /* Magenta */
+ r = rspamd_snprintf(tmpbuf, sizeof(tmpbuf), "\033[0;32m");
+ }
+ else if (level_flags & G_LOG_LEVEL_CRITICAL) {
+ /* Red */
+ r = rspamd_snprintf(tmpbuf, sizeof(tmpbuf), "\033[1;31m");
+ }
+ else {
+ r = 0;
+ }
+ }
+ else {
+ r = 0;
+ }
+
+ if (log_severity) {
+ r += rspamd_snprintf(tmpbuf + r,
+ sizeof(tmpbuf) - r,
+ "%s [%s] #%P(%s) ",
+ timebuf,
+ rspamd_get_log_severity_string(level_flags),
+ logger->pid,
+ logger->process_type);
+ }
+ else {
+ r += rspamd_snprintf(tmpbuf + r,
+ sizeof(tmpbuf) - r,
+ "%s #%P(%s) ",
+ timebuf,
+ logger->pid,
+ logger->process_type);
+ }
+ }
+ else {
+ r = 0;
+ r += rspamd_snprintf(tmpbuf + r,
+ sizeof(tmpbuf) - r,
+ "(%s) ",
+ logger->process_type);
+ }
+
+ glong mremain, mr;
+ char *m;
+
+ modulebuf[0] = '\0';
+ mremain = sizeof(modulebuf);
+ m = modulebuf;
+
+ if (id != NULL) {
+ guint slen = strlen(id);
+ slen = MIN(RSPAMD_LOG_ID_LEN, slen);
+ mr = rspamd_snprintf(m, mremain, "<%*.s>; ", slen,
+ id);
+ m += mr;
+ mremain -= mr;
+ }
+ if (module != NULL) {
+ mr = rspamd_snprintf(m, mremain, "%s; ", module);
+ m += mr;
+ mremain -= mr;
+ }
+ if (function != NULL) {
+ mr = rspamd_snprintf(m, mremain, "%s: ", function);
+ m += mr;
+ mremain -= mr;
+ }
+ else {
+ mr = rspamd_snprintf(m, mremain, ": ");
+ m += mr;
+ mremain -= mr;
+ }
+
+ /* Ensure that we have a space at the end */
+ if (m > modulebuf && *(m - 1) != ' ') {
+ *(m - 1) = ' ';
+ }
+
+ /* Construct IOV for log line */
+ iov_ctx->iov[0].iov_base = tmpbuf;
+ iov_ctx->iov[0].iov_len = r;
+ iov_ctx->iov[1].iov_base = modulebuf;
+ iov_ctx->iov[1].iov_len = m - modulebuf;
+ iov_ctx->iov[2].iov_base = (void *) message;
+ iov_ctx->iov[2].iov_len = mlen;
+ iov_ctx->iov[3].iov_base = (void *) &lf_chr;
+ iov_ctx->iov[3].iov_len = 1;
+
+ iov_ctx->niov = 4;
+
+ if (log_color) {
+ iov_ctx->iov[4].iov_base = "\033[0m";
+ iov_ctx->iov[4].iov_len = sizeof("\033[0m") - 1;
+
+ iov_ctx->niov = 5;
+ }
+ }
+ else {
+ /* Rspamadm case */
+ int niov = 0;
+
+ if (logger->log_level == G_LOG_LEVEL_DEBUG) {
+ iov_ctx->iov[niov].iov_base = (void *) timebuf;
+ iov_ctx->iov[niov++].iov_len = strlen(timebuf);
+ iov_ctx->iov[niov].iov_base = (void *) " ";
+ iov_ctx->iov[niov++].iov_len = 1;
+ }
+
+ iov_ctx->iov[niov].iov_base = (void *) message;
+ iov_ctx->iov[niov++].iov_len = mlen;
+ iov_ctx->iov[niov].iov_base = (void *) &lf_chr;
+ iov_ctx->iov[niov++].iov_len = 1;
+
+ iov_ctx->niov = niov;
+ }
+
+ // this is kind of "after-the-fact" check, but it's mostly for debugging-only
+ g_assert(iov_ctx->niov <= G_N_ELEMENTS(iov_ctx->iov));
+}
+
+void rspamd_log_iov_free(struct rspamd_logger_iov_ctx *iov_ctx)
+{
+ struct rspamd_logger_iov_thrash_stack *st = iov_ctx->thrash_stack;
+
+ while (st) {
+ struct rspamd_logger_iov_thrash_stack *nst = st->prev;
+ g_free(st);
+ st = nst;
+ }
+} \ No newline at end of file
diff --git a/src/libserver/logger/logger_console.c b/src/libserver/logger/logger_console.c
new file mode 100644
index 0000000..7f3c770
--- /dev/null
+++ b/src/libserver/logger/logger_console.c
@@ -0,0 +1,211 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "config.h"
+#include "logger.h"
+#include "libserver/cfg_file.h"
+#include "libcryptobox/cryptobox.h"
+#include "unix-std.h"
+
+#include "logger_private.h"
+
+#define CONSOLE_LOG_QUARK g_quark_from_static_string("console_logger")
+
+static const gchar lf_chr = '\n';
+struct rspamd_console_logger_priv {
+ gint fd;
+ gint crit_fd;
+};
+
+/* Copy & paste :( */
+static inline void
+log_time(gdouble now, rspamd_logger_t *rspamd_log, gchar *timebuf,
+ size_t len)
+{
+ time_t sec = (time_t) now;
+ gsize r;
+ struct tm tms;
+
+ rspamd_localtime(sec, &tms);
+ r = strftime(timebuf, len, "%F %H:%M:%S", &tms);
+
+ if (rspamd_log->flags & RSPAMD_LOG_FLAG_USEC) {
+ gchar usec_buf[16];
+
+ rspamd_snprintf(usec_buf, sizeof(usec_buf), "%.5f",
+ now - (gdouble) sec);
+ rspamd_snprintf(timebuf + r, len - r,
+ "%s", usec_buf + 1);
+ }
+}
+
+void *
+rspamd_log_console_init(rspamd_logger_t *logger, struct rspamd_config *cfg,
+ uid_t uid, gid_t gid, GError **err)
+{
+ struct rspamd_console_logger_priv *priv;
+
+ priv = g_malloc0(sizeof(*priv));
+
+ if (logger->flags & RSPAMD_LOG_FLAG_RSPAMADM) {
+ priv->fd = dup(STDOUT_FILENO);
+ priv->crit_fd = dup(STDERR_FILENO);
+ }
+ else {
+ priv->fd = dup(STDERR_FILENO);
+ priv->crit_fd = priv->fd;
+ }
+
+ if (priv->fd == -1) {
+ g_set_error(err, CONSOLE_LOG_QUARK, errno,
+ "open_log: cannot dup console fd: %s\n",
+ strerror(errno));
+ rspamd_log_console_dtor(logger, priv);
+
+ return NULL;
+ }
+
+ if (!isatty(priv->fd)) {
+ if (logger->flags & RSPAMD_LOG_FLAG_COLOR) {
+ /* Disable colors for not a tty */
+ logger->flags &= ~RSPAMD_LOG_FLAG_COLOR;
+ }
+ }
+
+ return priv;
+}
+
+void *
+rspamd_log_console_reload(rspamd_logger_t *logger, struct rspamd_config *cfg,
+ gpointer arg, uid_t uid, gid_t gid, GError **err)
+{
+ struct rspamd_console_logger_priv *npriv;
+
+ npriv = rspamd_log_console_init(logger, cfg, uid, gid, err);
+
+ if (npriv) {
+ /* Close old */
+ rspamd_log_console_dtor(logger, arg);
+ }
+
+ return npriv;
+}
+
+void rspamd_log_console_dtor(rspamd_logger_t *logger, gpointer arg)
+{
+ struct rspamd_console_logger_priv *priv = (struct rspamd_console_logger_priv *) arg;
+
+ if (priv->fd != -1) {
+ if (priv->fd != priv->crit_fd) {
+ /* Two different FD case */
+ if (close(priv->crit_fd) == -1) {
+ rspamd_fprintf(stderr, "cannot close log crit_fd %d: %s\n",
+ priv->crit_fd, strerror(errno));
+ }
+ }
+
+ if (close(priv->fd) == -1) {
+ rspamd_fprintf(stderr, "cannot close log fd %d: %s\n",
+ priv->fd, strerror(errno));
+ }
+
+ /* Avoid the next if to be executed as crit_fd is equal to fd */
+ priv->crit_fd = -1;
+ }
+
+ if (priv->crit_fd != -1) {
+ if (close(priv->crit_fd) == -1) {
+ rspamd_fprintf(stderr, "cannot close log crit_fd %d: %s\n",
+ priv->crit_fd, strerror(errno));
+ }
+ }
+
+ g_free(priv);
+}
+
+bool rspamd_log_console_log(const gchar *module, const gchar *id,
+ const gchar *function,
+ gint level_flags,
+ const gchar *message,
+ gsize mlen,
+ rspamd_logger_t *rspamd_log,
+ gpointer arg)
+{
+ struct rspamd_console_logger_priv *priv = (struct rspamd_console_logger_priv *) arg;
+ gint fd, r;
+ double now;
+
+ if (level_flags & G_LOG_LEVEL_CRITICAL) {
+ fd = priv->crit_fd;
+ }
+ else {
+ /* Use stderr if we are in rspamadm mode and severity is more than WARNING */
+ if ((rspamd_log->flags & RSPAMD_LOG_FLAG_RSPAMADM) && (level_flags & G_LOG_LEVEL_WARNING)) {
+ fd = priv->crit_fd;
+ }
+ else {
+ fd = priv->fd;
+ }
+ }
+
+#ifndef DISABLE_PTHREAD_MUTEX
+ if (rspamd_log->mtx) {
+ rspamd_mempool_lock_mutex(rspamd_log->mtx);
+ }
+ else {
+ rspamd_file_lock(fd, FALSE);
+ }
+#else
+ rspamd_file_lock(fd, FALSE);
+#endif
+
+ now = rspamd_get_calendar_ticks();
+
+ struct rspamd_logger_iov_ctx iov_ctx;
+ memset(&iov_ctx, 0, sizeof(iov_ctx));
+ rspamd_log_fill_iov(&iov_ctx, now, module, id,
+ function, level_flags, message,
+ mlen, rspamd_log);
+
+again:
+ r = writev(fd, iov_ctx.iov, iov_ctx.niov);
+
+ if (r == -1) {
+ if (errno == EAGAIN || errno == EINTR) {
+ goto again;
+ }
+
+ if (rspamd_log->mtx) {
+ rspamd_mempool_unlock_mutex(rspamd_log->mtx);
+ }
+ else {
+ rspamd_file_unlock(fd, FALSE);
+ }
+
+ rspamd_log_iov_free(&iov_ctx);
+ return false;
+ }
+
+ if (rspamd_log->mtx) {
+ rspamd_mempool_unlock_mutex(rspamd_log->mtx);
+ }
+ else {
+ rspamd_file_unlock(fd, FALSE);
+ }
+
+ rspamd_log_iov_free(&iov_ctx);
+ return true;
+} \ No newline at end of file
diff --git a/src/libserver/logger/logger_file.c b/src/libserver/logger/logger_file.c
new file mode 100644
index 0000000..20b04b8
--- /dev/null
+++ b/src/libserver/logger/logger_file.c
@@ -0,0 +1,510 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "config.h"
+#include "logger.h"
+#include "libserver/cfg_file.h"
+#include "libcryptobox/cryptobox.h"
+#include "unix-std.h"
+
+#include "logger_private.h"
+
+#define FILE_LOG_QUARK g_quark_from_static_string("file_logger")
+
+struct rspamd_file_logger_priv {
+ gint fd;
+ struct {
+ guint32 size;
+ guint32 used;
+ u_char *buf;
+ } io_buf;
+ gboolean throttling;
+ gchar *log_file;
+ gboolean is_buffered;
+ gboolean log_severity;
+ time_t throttling_time;
+ guint32 repeats;
+ guint64 last_line_cksum;
+ gchar *saved_message;
+ gsize saved_mlen;
+ gchar *saved_function;
+ gchar *saved_module;
+ gchar *saved_id;
+ guint saved_loglevel;
+};
+
+/**
+ * Calculate checksum for log line (used for repeating logic)
+ */
+static inline guint64
+rspamd_log_calculate_cksum(const gchar *message, size_t mlen)
+{
+ return rspamd_cryptobox_fast_hash(message, mlen, rspamd_hash_seed());
+}
+
+/*
+ * Write a line to log file (unbuffered)
+ */
+static bool
+direct_write_log_line(rspamd_logger_t *rspamd_log,
+ struct rspamd_file_logger_priv *priv,
+ void *data,
+ gsize count,
+ gboolean is_iov,
+ gint level_flags)
+{
+ struct iovec *iov;
+ const gchar *line;
+ glong r;
+ gint fd;
+ gboolean locked = FALSE;
+
+ iov = (struct iovec *) data;
+ fd = priv->fd;
+
+ if (!rspamd_log->no_lock) {
+ gsize tlen;
+
+ if (is_iov) {
+ tlen = 0;
+
+ for (guint i = 0; i < count; i++) {
+ tlen += iov[i].iov_len;
+ }
+ }
+ else {
+ tlen = count;
+ }
+
+ if (tlen > PIPE_BUF) {
+ locked = TRUE;
+
+#ifndef DISABLE_PTHREAD_MUTEX
+ if (rspamd_log->mtx) {
+ rspamd_mempool_lock_mutex(rspamd_log->mtx);
+ }
+ else {
+ rspamd_file_lock(fd, FALSE);
+ }
+#else
+ rspamd_file_lock(fd, FALSE);
+#endif
+ }
+ }
+
+ if (is_iov) {
+ r = writev(fd, iov, count);
+ }
+ else {
+ line = (const gchar *) data;
+ r = write(fd, line, count);
+ }
+
+ if (locked) {
+#ifndef DISABLE_PTHREAD_MUTEX
+ if (rspamd_log->mtx) {
+ rspamd_mempool_unlock_mutex(rspamd_log->mtx);
+ }
+ else {
+ rspamd_file_unlock(fd, FALSE);
+ }
+#else
+ rspamd_file_unlock(fd, FALSE);
+#endif
+ }
+
+ if (r == -1) {
+ /* We cannot write message to file, so we need to detect error and make decision */
+ if (errno == EINTR) {
+ /* Try again */
+ return direct_write_log_line(rspamd_log, priv, data, count, is_iov, level_flags);
+ }
+
+ if (errno == EFAULT || errno == EINVAL || errno == EFBIG ||
+ errno == ENOSPC) {
+ /* Rare case */
+ priv->throttling = TRUE;
+ priv->throttling_time = time(NULL);
+ }
+ else if (errno == EPIPE || errno == EBADF) {
+ /* We write to some pipe and it disappears, disable logging or we has opened bad file descriptor */
+ rspamd_log->enabled = FALSE;
+ }
+
+ return false;
+ }
+ else if (priv->throttling) {
+ priv->throttling = FALSE;
+ }
+
+ return true;
+}
+
+/**
+ * Fill buffer with message (limits must be checked BEFORE this call)
+ */
+static void
+fill_buffer(rspamd_logger_t *rspamd_log,
+ struct rspamd_file_logger_priv *priv,
+ const struct iovec *iov, gint iovcnt)
+{
+ gint i;
+
+ for (i = 0; i < iovcnt; i++) {
+ memcpy(priv->io_buf.buf + priv->io_buf.used,
+ iov[i].iov_base,
+ iov[i].iov_len);
+ priv->io_buf.used += iov[i].iov_len;
+ }
+}
+
+static void
+rspamd_log_flush(rspamd_logger_t *rspamd_log, struct rspamd_file_logger_priv *priv)
+{
+ if (priv->is_buffered) {
+ direct_write_log_line(rspamd_log,
+ priv,
+ priv->io_buf.buf,
+ priv->io_buf.used,
+ FALSE,
+ rspamd_log->log_level);
+ priv->io_buf.used = 0;
+ }
+}
+
+/*
+ * Write message to buffer or to file (using direct_write_log_line function)
+ */
+static bool
+file_log_helper(rspamd_logger_t *rspamd_log,
+ struct rspamd_file_logger_priv *priv,
+ const struct iovec *iov,
+ guint iovcnt,
+ gint level_flags)
+{
+ size_t len = 0;
+ guint i;
+
+ if (!priv->is_buffered) {
+ /* Write string directly */
+ return direct_write_log_line(rspamd_log, priv, (void *) iov, iovcnt,
+ TRUE, level_flags);
+ }
+ else {
+ /* Calculate total length */
+ for (i = 0; i < iovcnt; i++) {
+ len += iov[i].iov_len;
+ }
+ /* Fill buffer */
+ if (priv->io_buf.size < len) {
+ /* Buffer is too small to hold this string, so write it directly */
+ rspamd_log_flush(rspamd_log, priv);
+ return direct_write_log_line(rspamd_log, priv, (void *) iov, iovcnt,
+ TRUE, level_flags);
+ }
+ else if (priv->io_buf.used + len >= priv->io_buf.size) {
+ /* Buffer is full, try to write it directly */
+ rspamd_log_flush(rspamd_log, priv);
+ fill_buffer(rspamd_log, priv, iov, iovcnt);
+ }
+ else {
+ /* Copy incoming string to buffer */
+ fill_buffer(rspamd_log, priv, iov, iovcnt);
+ }
+ }
+
+ return true;
+}
+
+static void
+rspamd_log_reset_repeated(rspamd_logger_t *rspamd_log,
+ struct rspamd_file_logger_priv *priv)
+{
+ gchar tmpbuf[256];
+ gssize r;
+
+ if (priv->repeats > REPEATS_MIN) {
+ r = rspamd_snprintf(tmpbuf,
+ sizeof(tmpbuf),
+ "Last message repeated %ud times",
+ priv->repeats - REPEATS_MIN);
+ priv->repeats = 0;
+
+ if (priv->saved_message) {
+ rspamd_log_file_log(priv->saved_module,
+ priv->saved_id,
+ priv->saved_function,
+ priv->saved_loglevel | RSPAMD_LOG_FORCED,
+ priv->saved_message,
+ priv->saved_mlen,
+ rspamd_log,
+ priv);
+
+ g_free(priv->saved_message);
+ g_free(priv->saved_function);
+ g_free(priv->saved_module);
+ g_free(priv->saved_id);
+ priv->saved_message = NULL;
+ priv->saved_function = NULL;
+ priv->saved_module = NULL;
+ priv->saved_id = NULL;
+ }
+
+ /* It is safe to use temporary buffer here as it is not static */
+ rspamd_log_file_log(NULL, NULL,
+ G_STRFUNC,
+ priv->saved_loglevel | RSPAMD_LOG_FORCED,
+ tmpbuf,
+ r,
+ rspamd_log,
+ priv);
+ rspamd_log_flush(rspamd_log, priv);
+ }
+}
+
+static gint
+rspamd_try_open_log_fd(rspamd_logger_t *rspamd_log,
+ struct rspamd_file_logger_priv *priv,
+ uid_t uid, gid_t gid,
+ GError **err)
+{
+ gint fd;
+
+ fd = open(priv->log_file,
+ O_CREAT | O_WRONLY | O_APPEND,
+ S_IWUSR | S_IRUSR | S_IRGRP | S_IROTH);
+ if (fd == -1) {
+ g_set_error(err, FILE_LOG_QUARK, errno,
+ "open_log: cannot open desired log file: %s, %s\n",
+ priv->log_file, strerror(errno));
+ return -1;
+ }
+
+ if (uid != -1 || gid != -1) {
+ if (fchown(fd, uid, gid) == -1) {
+ g_set_error(err, FILE_LOG_QUARK, errno,
+ "open_log: cannot chown desired log file: %s, %s\n",
+ priv->log_file, strerror(errno));
+ close(fd);
+
+ return -1;
+ }
+ }
+
+ return fd;
+}
+
+void *
+rspamd_log_file_init(rspamd_logger_t *logger, struct rspamd_config *cfg,
+ uid_t uid, gid_t gid, GError **err)
+{
+ struct rspamd_file_logger_priv *priv;
+
+ if (!cfg || !cfg->cfg_name) {
+ g_set_error(err, FILE_LOG_QUARK, EINVAL,
+ "no log file specified");
+ return NULL;
+ }
+
+ priv = g_malloc0(sizeof(*priv));
+
+ if (cfg->log_buffered) {
+ if (cfg->log_buf_size != 0) {
+ priv->io_buf.size = cfg->log_buf_size;
+ }
+ else {
+ priv->io_buf.size = LOGBUF_LEN;
+ }
+ priv->is_buffered = TRUE;
+ priv->io_buf.buf = g_malloc(priv->io_buf.size);
+ }
+
+ if (cfg->log_file) {
+ priv->log_file = g_strdup(cfg->log_file);
+ }
+
+ priv->log_severity = (logger->flags & RSPAMD_LOG_FLAG_SEVERITY);
+ priv->fd = rspamd_try_open_log_fd(logger, priv, uid, gid, err);
+
+ if (priv->fd == -1) {
+ rspamd_log_file_dtor(logger, priv);
+
+ return NULL;
+ }
+
+ return priv;
+}
+
+void rspamd_log_file_dtor(rspamd_logger_t *logger, gpointer arg)
+{
+ struct rspamd_file_logger_priv *priv = (struct rspamd_file_logger_priv *) arg;
+
+ rspamd_log_reset_repeated(logger, priv);
+ rspamd_log_flush(logger, priv);
+
+ if (priv->fd != -1) {
+ if (close(priv->fd) == -1) {
+ rspamd_fprintf(stderr, "cannot close log fd %d: %s; log file = %s\n",
+ priv->fd, strerror(errno), priv->log_file);
+ }
+ }
+
+ g_free(priv->log_file);
+ g_free(priv);
+}
+
+bool rspamd_log_file_log(const gchar *module, const gchar *id,
+ const gchar *function,
+ gint level_flags,
+ const gchar *message,
+ gsize mlen,
+ rspamd_logger_t *rspamd_log,
+ gpointer arg)
+{
+ struct rspamd_file_logger_priv *priv = (struct rspamd_file_logger_priv *) arg;
+ gdouble now;
+ guint64 cksum;
+ gboolean got_time = FALSE;
+
+
+ if (!(level_flags & RSPAMD_LOG_FORCED) && !rspamd_log->enabled) {
+ return false;
+ }
+
+ /* Check throttling due to write errors */
+ if (!(level_flags & RSPAMD_LOG_FORCED) && priv->throttling) {
+ now = rspamd_get_calendar_ticks();
+
+ if (priv->throttling_time != now) {
+ priv->throttling_time = now;
+ got_time = TRUE;
+ }
+ else {
+ /* Do not try to write to file too often while throttling */
+ return false;
+ }
+ }
+
+ /* Check repeats */
+ cksum = rspamd_log_calculate_cksum(message, mlen);
+
+ if (cksum == priv->last_line_cksum) {
+ priv->repeats++;
+
+ if (priv->repeats > REPEATS_MIN && priv->repeats <
+ REPEATS_MAX) {
+ /* Do not log anything but save message for future */
+ if (priv->saved_message == NULL) {
+ priv->saved_function = g_strdup(function);
+ priv->saved_mlen = mlen;
+ priv->saved_message = g_malloc(mlen);
+ memcpy(priv->saved_message, message, mlen);
+
+ if (module) {
+ priv->saved_module = g_strdup(module);
+ }
+
+ if (id) {
+ priv->saved_id = g_strdup(id);
+ }
+
+ priv->saved_loglevel = level_flags;
+ }
+
+ return true;
+ }
+ else if (priv->repeats > REPEATS_MAX) {
+ rspamd_log_reset_repeated(rspamd_log, priv);
+
+ bool ret = rspamd_log_file_log(module, id,
+ function,
+ level_flags,
+ message,
+ mlen,
+ rspamd_log,
+ priv);
+
+ /* Probably we have more repeats in future */
+ priv->repeats = REPEATS_MIN + 1;
+
+ return ret;
+ }
+ }
+ else {
+ /* Reset counter if new message differs from saved message */
+ priv->last_line_cksum = cksum;
+
+ if (priv->repeats > REPEATS_MIN) {
+ rspamd_log_reset_repeated(rspamd_log, priv);
+ return rspamd_log_file_log(module, id,
+ function,
+ level_flags,
+ message,
+ mlen,
+ rspamd_log,
+ arg);
+ }
+ else {
+ priv->repeats = 0;
+ }
+ }
+ if (!got_time) {
+ now = rspamd_get_calendar_ticks();
+ }
+
+ struct rspamd_logger_iov_ctx iov_ctx;
+ memset(&iov_ctx, 0, sizeof(iov_ctx));
+ rspamd_log_fill_iov(&iov_ctx, now, module, id, function, level_flags, message,
+ mlen, rspamd_log);
+
+ bool ret = file_log_helper(rspamd_log, priv, iov_ctx.iov, iov_ctx.niov, level_flags);
+ rspamd_log_iov_free(&iov_ctx);
+
+ return ret;
+}
+
+void *
+rspamd_log_file_reload(rspamd_logger_t *logger, struct rspamd_config *cfg,
+ gpointer arg, uid_t uid, gid_t gid, GError **err)
+{
+ struct rspamd_file_logger_priv *npriv;
+
+ if (!cfg->cfg_name) {
+ g_set_error(err, FILE_LOG_QUARK, EINVAL,
+ "no log file specified");
+ return NULL;
+ }
+
+ npriv = rspamd_log_file_init(logger, cfg, uid, gid, err);
+
+ if (npriv) {
+ /* Close old */
+ rspamd_log_file_dtor(logger, arg);
+ }
+
+ return npriv;
+}
+
+bool rspamd_log_file_on_fork(rspamd_logger_t *logger, struct rspamd_config *cfg,
+ gpointer arg, GError **err)
+{
+ struct rspamd_file_logger_priv *priv = (struct rspamd_file_logger_priv *) arg;
+
+ rspamd_log_reset_repeated(logger, priv);
+ rspamd_log_flush(logger, priv);
+
+ return true;
+} \ No newline at end of file
diff --git a/src/libserver/logger/logger_private.h b/src/libserver/logger/logger_private.h
new file mode 100644
index 0000000..234a207
--- /dev/null
+++ b/src/libserver/logger/logger_private.h
@@ -0,0 +1,218 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef RSPAMD_LOGGER_PRIVATE_H
+#define RSPAMD_LOGGER_PRIVATE_H
+
+#include "logger.h"
+
+/* How much message should be repeated before it is count to be repeated one */
+#define REPEATS_MIN 3
+#define REPEATS_MAX 300
+#define LOGBUF_LEN 8192
+
+struct rspamd_log_module {
+ gchar *mname;
+ guint id;
+};
+
+struct rspamd_log_modules {
+ guchar *bitset;
+ guint bitset_len; /* Number of BITS used in bitset */
+ guint bitset_allocated; /* Size of bitset allocated in BYTES */
+ GHashTable *modules;
+};
+
+struct rspamd_logger_error_elt {
+ gint completed;
+ GQuark ptype;
+ pid_t pid;
+ gdouble ts;
+ gchar id[RSPAMD_LOG_ID_LEN + 1];
+ gchar module[9];
+ gchar message[];
+};
+
+struct rspamd_logger_error_log {
+ struct rspamd_logger_error_elt *elts;
+ rspamd_mempool_t *pool;
+ guint32 max_elts;
+ guint32 elt_len;
+ /* Avoid false cache sharing */
+ guchar __padding[64 - sizeof(gpointer) * 2 - sizeof(guint64)];
+ guint cur_row;
+};
+
+/**
+ * Static structure that store logging parameters
+ * It is NOT shared between processes and is created by main process
+ */
+struct rspamd_logger_s {
+ struct rspamd_logger_funcs ops;
+ gint log_level;
+
+ struct rspamd_logger_error_log *errlog;
+ struct rspamd_cryptobox_pubkey *pk;
+ struct rspamd_cryptobox_keypair *keypair;
+
+ guint flags;
+ gboolean closed;
+ gboolean enabled;
+ gboolean is_debug;
+ gboolean no_lock;
+
+ pid_t pid;
+ const gchar *process_type;
+ struct rspamd_radix_map_helper *debug_ip;
+ rspamd_mempool_mutex_t *mtx;
+ rspamd_mempool_t *pool;
+ guint64 log_cnt[4];
+};
+
+/*
+ * Common logging prototypes
+ */
+
+/*
+ * File logging
+ */
+void *rspamd_log_file_init(rspamd_logger_t *logger, struct rspamd_config *cfg,
+ uid_t uid, gid_t gid, GError **err);
+void *rspamd_log_file_reload(rspamd_logger_t *logger, struct rspamd_config *cfg,
+ gpointer arg, uid_t uid, gid_t gid, GError **err);
+void rspamd_log_file_dtor(rspamd_logger_t *logger, gpointer arg);
+bool rspamd_log_file_log(const gchar *module, const gchar *id,
+ const gchar *function,
+ gint level_flags,
+ const gchar *message,
+ gsize mlen,
+ rspamd_logger_t *rspamd_log,
+ gpointer arg);
+bool rspamd_log_file_on_fork(rspamd_logger_t *logger, struct rspamd_config *cfg,
+ gpointer arg, GError **err);
+
+struct rspamd_logger_iov_thrash_stack {
+ struct rspamd_logger_iov_thrash_stack *prev;
+ char data[0];
+};
+#define RSPAMD_LOGGER_MAX_IOV 8
+struct rspamd_logger_iov_ctx {
+ struct iovec iov[RSPAMD_LOGGER_MAX_IOV];
+ int niov;
+ struct rspamd_logger_iov_thrash_stack *thrash_stack;
+};
+/**
+ * Fills IOV of logger (usable for file/console logging)
+ * Warning: this function is NOT reentrant, do not call it twice from a single moment of execution
+ * @param iov filled by this function
+ * @param module
+ * @param id
+ * @param function
+ * @param level_flags
+ * @param message
+ * @param mlen
+ * @param rspamd_log
+ * @return number of iov elements being filled
+ */
+void rspamd_log_fill_iov(struct rspamd_logger_iov_ctx *iov_ctx,
+ double ts,
+ const gchar *module, const gchar *id,
+ const gchar *function,
+ gint level_flags,
+ const gchar *message,
+ gsize mlen,
+ rspamd_logger_t *rspamd_log);
+
+/**
+ * Frees IOV context
+ * @param iov_ctx
+ */
+void rspamd_log_iov_free(struct rspamd_logger_iov_ctx *iov_ctx);
+/**
+ * Escape log line by replacing unprintable characters to hex escapes like \xNN
+ * @param src
+ * @param srclen
+ * @param dst
+ * @param dstlen
+ * @return end of the escaped buffer
+ */
+gchar *rspamd_log_line_hex_escape(const guchar *src, gsize srclen,
+ gchar *dst, gsize dstlen);
+/**
+ * Returns number of characters to be escaped, e.g. a caller can allocate a new buffer
+ * the desired number of characters
+ * @param src
+ * @param srclen
+ * @return number of characters to be escaped
+ */
+gsize rspamd_log_line_need_escape(const guchar *src, gsize srclen);
+
+static const struct rspamd_logger_funcs file_log_funcs = {
+ .init = rspamd_log_file_init,
+ .dtor = rspamd_log_file_dtor,
+ .reload = rspamd_log_file_reload,
+ .log = rspamd_log_file_log,
+ .on_fork = rspamd_log_file_on_fork,
+};
+
+/*
+ * Syslog logging
+ */
+void *rspamd_log_syslog_init(rspamd_logger_t *logger, struct rspamd_config *cfg,
+ uid_t uid, gid_t gid, GError **err);
+void *rspamd_log_syslog_reload(rspamd_logger_t *logger, struct rspamd_config *cfg,
+ gpointer arg, uid_t uid, gid_t gid, GError **err);
+void rspamd_log_syslog_dtor(rspamd_logger_t *logger, gpointer arg);
+bool rspamd_log_syslog_log(const gchar *module, const gchar *id,
+ const gchar *function,
+ gint level_flags,
+ const gchar *message,
+ gsize mlen,
+ rspamd_logger_t *rspamd_log,
+ gpointer arg);
+
+static const struct rspamd_logger_funcs syslog_log_funcs = {
+ .init = rspamd_log_syslog_init,
+ .dtor = rspamd_log_syslog_dtor,
+ .reload = rspamd_log_syslog_reload,
+ .log = rspamd_log_syslog_log,
+ .on_fork = NULL,
+};
+
+/*
+ * Console logging
+ */
+void *rspamd_log_console_init(rspamd_logger_t *logger, struct rspamd_config *cfg,
+ uid_t uid, gid_t gid, GError **err);
+void *rspamd_log_console_reload(rspamd_logger_t *logger, struct rspamd_config *cfg,
+ gpointer arg, uid_t uid, gid_t gid, GError **err);
+void rspamd_log_console_dtor(rspamd_logger_t *logger, gpointer arg);
+bool rspamd_log_console_log(const gchar *module, const gchar *id,
+ const gchar *function,
+ gint level_flags,
+ const gchar *message,
+ gsize mlen,
+ rspamd_logger_t *rspamd_log,
+ gpointer arg);
+
+static const struct rspamd_logger_funcs console_log_funcs = {
+ .init = rspamd_log_console_init,
+ .dtor = rspamd_log_console_dtor,
+ .reload = rspamd_log_console_reload,
+ .log = rspamd_log_console_log,
+ .on_fork = NULL,
+};
+
+#endif
diff --git a/src/libserver/logger/logger_syslog.c b/src/libserver/logger/logger_syslog.c
new file mode 100644
index 0000000..3c4f7f7
--- /dev/null
+++ b/src/libserver/logger/logger_syslog.c
@@ -0,0 +1,143 @@
+/*-
+ * Copyright 2020 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "config.h"
+#include "logger.h"
+#include "libserver/cfg_file.h"
+#include "logger_private.h"
+
+#define SYSLOG_LOG_QUARK g_quark_from_static_string("syslog_logger")
+
+struct rspamd_syslog_logger_priv {
+ gint log_facility;
+};
+
+#ifdef HAVE_SYSLOG_H
+#include <syslog.h>
+
+void *
+rspamd_log_syslog_init(rspamd_logger_t *logger, struct rspamd_config *cfg,
+ uid_t uid, gid_t gid, GError **err)
+{
+ struct rspamd_syslog_logger_priv *priv;
+
+ if (!cfg) {
+ g_set_error(err, SYSLOG_LOG_QUARK, EINVAL,
+ "no log config specified");
+ return NULL;
+ }
+
+ priv = g_malloc0(sizeof(*priv));
+
+ priv->log_facility = cfg->log_facility;
+ openlog("rspamd", LOG_NDELAY | LOG_PID, priv->log_facility);
+
+ return priv;
+}
+
+void rspamd_log_syslog_dtor(rspamd_logger_t *logger, gpointer arg)
+{
+ struct rspamd_syslog_logger_priv *priv = (struct rspamd_syslog_logger_priv *) arg;
+
+ closelog();
+ g_free(priv);
+}
+bool rspamd_log_syslog_log(const gchar *module, const gchar *id,
+ const gchar *function,
+ gint level_flags,
+ const gchar *message,
+ gsize mlen,
+ rspamd_logger_t *rspamd_log,
+ gpointer arg)
+{
+ static const struct {
+ GLogLevelFlags glib_level;
+ gint syslog_level;
+ } levels_match[] = {
+ {G_LOG_LEVEL_DEBUG, LOG_DEBUG},
+ {G_LOG_LEVEL_INFO, LOG_INFO},
+ {G_LOG_LEVEL_WARNING, LOG_WARNING},
+ {G_LOG_LEVEL_CRITICAL, LOG_ERR}};
+ unsigned i;
+ gint syslog_level;
+
+ if (!(level_flags & RSPAMD_LOG_FORCED) && !rspamd_log->enabled) {
+ return false;
+ }
+
+ /* Detect level */
+ syslog_level = LOG_DEBUG;
+
+ for (i = 0; i < G_N_ELEMENTS(levels_match); i++) {
+ if (level_flags & levels_match[i].glib_level) {
+ syslog_level = levels_match[i].syslog_level;
+ break;
+ }
+ }
+
+ syslog(syslog_level, "<%.*s>; %s; %s: %.*s",
+ RSPAMD_LOG_ID_LEN, id != NULL ? id : "",
+ module != NULL ? module : "",
+ function != NULL ? function : "",
+ (gint) mlen, message);
+
+ return true;
+}
+
+#else
+
+void *
+rspamd_log_syslog_init(rspamd_logger_t *logger, struct rspamd_config *cfg,
+ uid_t uid, gid_t gid, GError **err)
+{
+ g_set_error(err, SYSLOG_LOG_QUARK, EINVAL, "syslog support is not compiled in");
+
+ return NULL;
+}
+
+bool rspamd_log_syslog_log(const gchar *module, const gchar *id,
+ const gchar *function,
+ gint level_flags,
+ const gchar *message,
+ gsize mlen,
+ rspamd_logger_t *rspamd_log,
+ gpointer arg)
+{
+ return false;
+}
+
+void rspamd_log_syslog_dtor(rspamd_logger_t *logger, gpointer arg)
+{
+ /* Left blank intentionally */
+}
+
+#endif
+
+void *
+rspamd_log_syslog_reload(rspamd_logger_t *logger, struct rspamd_config *cfg,
+ gpointer arg, uid_t uid, gid_t gid, GError **err)
+{
+ struct rspamd_syslog_logger_priv *npriv;
+
+ npriv = rspamd_log_syslog_init(logger, cfg, uid, gid, err);
+
+ if (npriv) {
+ /* Close old */
+ rspamd_log_syslog_dtor(logger, arg);
+ }
+
+ return npriv;
+}
diff --git a/src/libserver/maps/map.c b/src/libserver/maps/map.c
new file mode 100644
index 0000000..7f6a48f
--- /dev/null
+++ b/src/libserver/maps/map.c
@@ -0,0 +1,3195 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Implementation of map files handling
+ */
+
+#include "config.h"
+#include "map.h"
+#include "map_private.h"
+#include "libserver/http/http_connection.h"
+#include "libserver/http/http_private.h"
+#include "rspamd.h"
+#include "contrib/libev/ev.h"
+#include "contrib/uthash/utlist.h"
+
+#ifdef SYS_ZSTD
+#include "zstd.h"
+#else
+#include "contrib/zstd/zstd.h"
+#endif
+
+#undef MAP_DEBUG_REFS
+#ifdef MAP_DEBUG_REFS
+#define MAP_RETAIN(x, t) \
+ do { \
+ msg_err(G_GNUC_PRETTY_FUNCTION ": " t ": retain ref %p, refcount: %d -> %d", (x), (x)->ref.refcount, (x)->ref.refcount + 1); \
+ REF_RETAIN(x); \
+ } while (0)
+
+#define MAP_RELEASE(x, t) \
+ do { \
+ msg_err(G_GNUC_PRETTY_FUNCTION ": " t ": release ref %p, refcount: %d -> %d", (x), (x)->ref.refcount, (x)->ref.refcount - 1); \
+ REF_RELEASE(x); \
+ } while (0)
+#else
+#define MAP_RETAIN(x, t) REF_RETAIN(x)
+#define MAP_RELEASE(x, t) REF_RELEASE(x)
+#endif
+
+enum rspamd_map_periodic_opts {
+ RSPAMD_MAP_SCHEDULE_NORMAL = 0,
+ RSPAMD_MAP_SCHEDULE_ERROR = (1u << 0u),
+ RSPAMD_MAP_SCHEDULE_LOCKED = (1u << 1u),
+ RSPAMD_MAP_SCHEDULE_INIT = (1u << 2u),
+};
+
+static void free_http_cbdata_common(struct http_callback_data *cbd,
+ gboolean plan_new);
+static void free_http_cbdata_dtor(gpointer p);
+static void free_http_cbdata(struct http_callback_data *cbd);
+static void rspamd_map_process_periodic(struct map_periodic_cbdata *cbd);
+static void rspamd_map_schedule_periodic(struct rspamd_map *map, int how);
+static gboolean read_map_file_chunks(struct rspamd_map *map,
+ struct map_cb_data *cbdata,
+ const gchar *fname,
+ gsize len,
+ goffset off);
+static gboolean rspamd_map_save_http_cached_file(struct rspamd_map *map,
+ struct rspamd_map_backend *bk,
+ struct http_map_data *htdata,
+ const guchar *data,
+ gsize len);
+static gboolean rspamd_map_update_http_cached_file(struct rspamd_map *map,
+ struct rspamd_map_backend *bk,
+ struct http_map_data *htdata);
+
+guint rspamd_map_log_id = (guint) -1;
+RSPAMD_CONSTRUCTOR(rspamd_map_log_init)
+{
+ rspamd_map_log_id = rspamd_logger_add_debug_module("map");
+}
+
+/**
+ * Write HTTP request
+ */
+static void
+write_http_request(struct http_callback_data *cbd)
+{
+ gchar datebuf[128];
+ struct rspamd_http_message *msg;
+
+ msg = rspamd_http_new_message(HTTP_REQUEST);
+ if (cbd->check) {
+ msg->method = HTTP_HEAD;
+ }
+
+ msg->url = rspamd_fstring_append(msg->url,
+ cbd->data->path, strlen(cbd->data->path));
+
+ if (cbd->check) {
+ if (cbd->data->last_modified != 0) {
+ rspamd_http_date_format(datebuf, sizeof(datebuf),
+ cbd->data->last_modified);
+ rspamd_http_message_add_header(msg, "If-Modified-Since",
+ datebuf);
+ }
+ if (cbd->data->etag) {
+ rspamd_http_message_add_header_len(msg, "If-None-Match",
+ cbd->data->etag->str, cbd->data->etag->len);
+ }
+ }
+
+ msg->url = rspamd_fstring_append(msg->url, cbd->data->rest,
+ strlen(cbd->data->rest));
+
+ if (cbd->data->userinfo) {
+ rspamd_http_message_add_header(msg, "Authorization",
+ cbd->data->userinfo);
+ }
+
+ MAP_RETAIN(cbd, "http_callback_data");
+ rspamd_http_connection_write_message(cbd->conn,
+ msg,
+ cbd->data->host,
+ NULL,
+ cbd,
+ cbd->timeout);
+}
+
+/**
+ * Callback for destroying HTTP callback data
+ */
+static void
+free_http_cbdata_common(struct http_callback_data *cbd, gboolean plan_new)
+{
+ struct map_periodic_cbdata *periodic = cbd->periodic;
+
+ if (cbd->shmem_data) {
+ rspamd_http_message_shmem_unref(cbd->shmem_data);
+ }
+
+ if (cbd->pk) {
+ rspamd_pubkey_unref(cbd->pk);
+ }
+
+ if (cbd->conn) {
+ rspamd_http_connection_unref(cbd->conn);
+ cbd->conn = NULL;
+ }
+
+ if (cbd->addrs) {
+ rspamd_inet_addr_t *addr;
+ guint i;
+
+ PTR_ARRAY_FOREACH(cbd->addrs, i, addr)
+ {
+ rspamd_inet_address_free(addr);
+ }
+
+ g_ptr_array_free(cbd->addrs, TRUE);
+ }
+
+
+ MAP_RELEASE(cbd->bk, "rspamd_map_backend");
+
+ if (periodic) {
+ /* Detached in case of HTTP error */
+ MAP_RELEASE(periodic, "periodic");
+ }
+
+ g_free(cbd);
+}
+
+static void
+free_http_cbdata(struct http_callback_data *cbd)
+{
+ cbd->map->tmp_dtor = NULL;
+ cbd->map->tmp_dtor_data = NULL;
+
+ free_http_cbdata_common(cbd, TRUE);
+}
+
+static void
+free_http_cbdata_dtor(gpointer p)
+{
+ struct http_callback_data *cbd = p;
+ struct rspamd_map *map;
+
+ map = cbd->map;
+ if (cbd->stage == http_map_http_conn) {
+ REF_RELEASE(cbd);
+ }
+ else {
+ /* We cannot terminate DNS requests sent */
+ cbd->stage = http_map_terminated;
+ }
+
+ msg_warn_map("%s: "
+ "connection with http server is terminated: worker is stopping",
+ map->name);
+}
+
+/*
+ * HTTP callbacks
+ */
+static void
+http_map_error(struct rspamd_http_connection *conn,
+ GError *err)
+{
+ struct http_callback_data *cbd = conn->ud;
+ struct rspamd_map *map;
+
+ map = cbd->map;
+
+ if (cbd->periodic) {
+ cbd->periodic->errored = TRUE;
+ msg_err_map("error reading %s(%s): "
+ "connection with http server terminated incorrectly: %e",
+ cbd->bk->uri,
+ cbd->addr ? rspamd_inet_address_to_string_pretty(cbd->addr) : "",
+ err);
+
+ rspamd_map_process_periodic(cbd->periodic);
+ }
+
+ MAP_RELEASE(cbd, "http_callback_data");
+}
+
+static void
+rspamd_map_cache_cb(struct ev_loop *loop, ev_timer *w, int revents)
+{
+ struct rspamd_http_map_cached_cbdata *cache_cbd = (struct rspamd_http_map_cached_cbdata *)
+ w->data;
+ struct rspamd_map *map;
+ struct http_map_data *data;
+
+ map = cache_cbd->map;
+ data = cache_cbd->data;
+
+ if (cache_cbd->gen != cache_cbd->data->gen) {
+ /* We have another update, so this cache element is obviously expired */
+ /*
+ * Important!: we do not set cache availability to zero here, as there
+ * might be fresh cache
+ */
+ msg_info_map("cached data is now expired (gen mismatch %L != %L) for %s; shm name=%s; refcount=%d",
+ cache_cbd->gen, cache_cbd->data->gen, map->name, cache_cbd->shm->shm_name,
+ cache_cbd->shm->ref.refcount);
+ MAP_RELEASE(cache_cbd->shm, "rspamd_http_map_cached_cbdata");
+ ev_timer_stop(loop, &cache_cbd->timeout);
+ g_free(cache_cbd);
+ }
+ else if (cache_cbd->data->last_checked >= cache_cbd->last_checked) {
+ /*
+ * We checked map but we have not found anything more recent,
+ * reschedule cache check
+ */
+ if (cache_cbd->map->poll_timeout >
+ rspamd_get_calendar_ticks() - cache_cbd->data->last_checked) {
+ w->repeat = cache_cbd->map->poll_timeout -
+ (rspamd_get_calendar_ticks() - cache_cbd->data->last_checked);
+ }
+ else {
+ w->repeat = cache_cbd->map->poll_timeout;
+ }
+
+ if (w->repeat < 0) {
+ msg_info_map("cached data for %s has skewed check time: %d last checked, "
+ "%d poll timeout, %.2f diff; shm name=%s; refcount=%d",
+ map->name, (int) cache_cbd->data->last_checked,
+ (int) cache_cbd->map->poll_timeout,
+ (rspamd_get_calendar_ticks() - cache_cbd->data->last_checked),
+ cache_cbd->shm->shm_name,
+ cache_cbd->shm->ref.refcount);
+ w->repeat = 0.0;
+ }
+
+ cache_cbd->last_checked = cache_cbd->data->last_checked;
+ msg_debug_map("cached data is up to date for %s", map->name);
+ ev_timer_again(loop, &cache_cbd->timeout);
+ }
+ else {
+ data->cur_cache_cbd = NULL;
+ g_atomic_int_set(&data->cache->available, 0);
+ msg_info_map("cached data is now expired for %s; shm name=%s; refcount=%d",
+ map->name,
+ cache_cbd->shm->shm_name,
+ cache_cbd->shm->ref.refcount);
+ MAP_RELEASE(cache_cbd->shm, "rspamd_http_map_cached_cbdata");
+ ev_timer_stop(loop, &cache_cbd->timeout);
+ g_free(cache_cbd);
+ }
+}
+
+static int
+http_map_finish(struct rspamd_http_connection *conn,
+ struct rspamd_http_message *msg)
+{
+ struct http_callback_data *cbd = conn->ud;
+ struct rspamd_map *map;
+ struct rspamd_map_backend *bk;
+ struct http_map_data *data;
+ struct rspamd_http_map_cached_cbdata *cache_cbd;
+ const rspamd_ftok_t *expires_hdr, *etag_hdr;
+ char next_check_date[128];
+ guchar *in = NULL;
+ gsize dlen = 0;
+
+ map = cbd->map;
+ bk = cbd->bk;
+ data = bk->data.hd;
+
+ if (msg->code == 200) {
+
+ if (cbd->check) {
+ msg_info_map("need to reread map from %s", cbd->bk->uri);
+ cbd->periodic->need_modify = TRUE;
+ /* Reset the whole chain */
+ cbd->periodic->cur_backend = 0;
+ /* Reset cache, old cached data will be cleaned on timeout */
+ g_atomic_int_set(&data->cache->available, 0);
+ data->cur_cache_cbd = NULL;
+
+ rspamd_map_process_periodic(cbd->periodic);
+ MAP_RELEASE(cbd, "http_callback_data");
+
+ return 0;
+ }
+
+ cbd->data->last_checked = msg->date;
+
+ if (msg->last_modified) {
+ cbd->data->last_modified = msg->last_modified;
+ }
+ else {
+ cbd->data->last_modified = msg->date;
+ }
+
+
+ /* Unsigned version - just open file */
+ cbd->shmem_data = rspamd_http_message_shmem_ref(msg);
+ cbd->data_len = msg->body_buf.len;
+
+ if (cbd->data_len == 0) {
+ msg_err_map("cannot read empty map");
+ goto err;
+ }
+
+ g_assert(cbd->shmem_data != NULL);
+
+ in = rspamd_shmem_xmap(cbd->shmem_data->shm_name, PROT_READ, &dlen);
+
+ if (in == NULL) {
+ msg_err_map("cannot read tempfile %s: %s",
+ cbd->shmem_data->shm_name,
+ strerror(errno));
+ goto err;
+ }
+
+ /* Check for expires */
+ double cached_timeout = map->poll_timeout * 2;
+
+ expires_hdr = rspamd_http_message_find_header(msg, "Expires");
+
+ if (expires_hdr) {
+ time_t hdate;
+
+ hdate = rspamd_http_parse_date(expires_hdr->begin, expires_hdr->len);
+
+ if (hdate != (time_t) -1 && hdate > msg->date) {
+ cached_timeout = map->next_check - msg->date +
+ map->poll_timeout * 2;
+
+ map->next_check = hdate;
+ }
+ else {
+ msg_info_map("invalid expires header: %T, ignore it", expires_hdr);
+ map->next_check = 0;
+ }
+ }
+
+ /* Check for etag */
+ etag_hdr = rspamd_http_message_find_header(msg, "ETag");
+
+ if (etag_hdr) {
+ if (cbd->data->etag) {
+ /* Remove old etag */
+ rspamd_fstring_free(cbd->data->etag);
+ }
+
+ cbd->data->etag = rspamd_fstring_new_init(etag_hdr->begin,
+ etag_hdr->len);
+ }
+ else {
+ if (cbd->data->etag) {
+ /* Remove and clear old etag */
+ rspamd_fstring_free(cbd->data->etag);
+ cbd->data->etag = NULL;
+ }
+ }
+
+ MAP_RETAIN(cbd->shmem_data, "shmem_data");
+ cbd->data->gen++;
+ /*
+ * We know that a map is in the locked state
+ */
+ g_atomic_int_set(&data->cache->available, 1);
+ /* Store cached data */
+ rspamd_strlcpy(data->cache->shmem_name, cbd->shmem_data->shm_name,
+ sizeof(data->cache->shmem_name));
+ data->cache->len = cbd->data_len;
+ data->cache->last_modified = cbd->data->last_modified;
+ cache_cbd = g_malloc0(sizeof(*cache_cbd));
+ cache_cbd->shm = cbd->shmem_data;
+ cache_cbd->event_loop = cbd->event_loop;
+ cache_cbd->map = map;
+ cache_cbd->data = cbd->data;
+ cache_cbd->last_checked = cbd->data->last_checked;
+ cache_cbd->gen = cbd->data->gen;
+ MAP_RETAIN(cache_cbd->shm, "shmem_data");
+ msg_info_map("stored map data in a shared memory cache: %s",
+ cache_cbd->shm->shm_name);
+
+ ev_timer_init(&cache_cbd->timeout, rspamd_map_cache_cb, cached_timeout,
+ 0.0);
+ ev_timer_start(cbd->event_loop, &cache_cbd->timeout);
+ cache_cbd->timeout.data = cache_cbd;
+ data->cur_cache_cbd = cache_cbd;
+
+ if (map->next_check) {
+ rspamd_http_date_format(next_check_date, sizeof(next_check_date),
+ map->next_check);
+ }
+ else {
+ rspamd_http_date_format(next_check_date, sizeof(next_check_date),
+ rspamd_get_calendar_ticks() + map->poll_timeout);
+ }
+
+
+ if (cbd->bk->is_compressed) {
+ ZSTD_DStream *zstream;
+ ZSTD_inBuffer zin;
+ ZSTD_outBuffer zout;
+ guchar *out;
+ gsize outlen, r;
+
+ zstream = ZSTD_createDStream();
+ ZSTD_initDStream(zstream);
+
+ zin.pos = 0;
+ zin.src = in;
+ zin.size = dlen;
+
+ if ((outlen = ZSTD_getDecompressedSize(zin.src, zin.size)) == 0) {
+ outlen = ZSTD_DStreamOutSize();
+ }
+
+ out = g_malloc(outlen);
+
+ zout.dst = out;
+ zout.pos = 0;
+ zout.size = outlen;
+
+ while (zin.pos < zin.size) {
+ r = ZSTD_decompressStream(zstream, &zout, &zin);
+
+ if (ZSTD_isError(r)) {
+ msg_err_map("%s(%s): cannot decompress data: %s",
+ cbd->bk->uri,
+ rspamd_inet_address_to_string_pretty(cbd->addr),
+ ZSTD_getErrorName(r));
+ ZSTD_freeDStream(zstream);
+ g_free(out);
+ MAP_RELEASE(cbd->shmem_data, "shmem_data");
+ goto err;
+ }
+
+ if (zout.pos == zout.size) {
+ /* We need to extend output buffer */
+ zout.size = zout.size * 2 + 1.0;
+ out = g_realloc(zout.dst, zout.size);
+ zout.dst = out;
+ }
+ }
+
+ ZSTD_freeDStream(zstream);
+ msg_info_map("%s(%s): read map data %z bytes compressed, "
+ "%z uncompressed, next check at %s",
+ cbd->bk->uri,
+ rspamd_inet_address_to_string_pretty(cbd->addr),
+ dlen, zout.pos, next_check_date);
+ map->read_callback(out, zout.pos, &cbd->periodic->cbdata, TRUE);
+ rspamd_map_save_http_cached_file(map, bk, cbd->data, out, zout.pos);
+ g_free(out);
+ }
+ else {
+ msg_info_map("%s(%s): read map data %z bytes, next check at %s",
+ cbd->bk->uri,
+ rspamd_inet_address_to_string_pretty(cbd->addr),
+ dlen, next_check_date);
+ rspamd_map_save_http_cached_file(map, bk, cbd->data, in, cbd->data_len);
+ map->read_callback(in, cbd->data_len, &cbd->periodic->cbdata, TRUE);
+ }
+
+ MAP_RELEASE(cbd->shmem_data, "shmem_data");
+
+ cbd->periodic->cur_backend++;
+ munmap(in, dlen);
+ rspamd_map_process_periodic(cbd->periodic);
+ }
+ else if (msg->code == 304 && cbd->check) {
+ cbd->data->last_checked = msg->date;
+
+ if (msg->last_modified) {
+ cbd->data->last_modified = msg->last_modified;
+ }
+ else {
+ cbd->data->last_modified = msg->date;
+ }
+
+ expires_hdr = rspamd_http_message_find_header(msg, "Expires");
+
+ if (expires_hdr) {
+ time_t hdate;
+
+ hdate = rspamd_http_parse_date(expires_hdr->begin, expires_hdr->len);
+ if (hdate != (time_t) -1 && hdate > msg->date) {
+ map->next_check = hdate;
+ }
+ else {
+ msg_info_map("invalid expires header: %T, ignore it", expires_hdr);
+ map->next_check = 0;
+ }
+ }
+
+ etag_hdr = rspamd_http_message_find_header(msg, "ETag");
+
+ if (etag_hdr) {
+ if (cbd->data->etag) {
+ /* Remove old etag */
+ rspamd_fstring_free(cbd->data->etag);
+ cbd->data->etag = rspamd_fstring_new_init(etag_hdr->begin,
+ etag_hdr->len);
+ }
+ }
+
+ if (map->next_check) {
+ rspamd_http_date_format(next_check_date, sizeof(next_check_date),
+ map->next_check);
+ msg_info_map("data is not modified for server %s, next check at %s "
+ "(http cache based: %T)",
+ cbd->data->host, next_check_date, expires_hdr);
+ }
+ else {
+ rspamd_http_date_format(next_check_date, sizeof(next_check_date),
+ rspamd_get_calendar_ticks() + map->poll_timeout);
+ msg_info_map("data is not modified for server %s, next check at %s "
+ "(timer based)",
+ cbd->data->host, next_check_date);
+ }
+
+ rspamd_map_update_http_cached_file(map, bk, cbd->data);
+ cbd->periodic->cur_backend++;
+ rspamd_map_process_periodic(cbd->periodic);
+ }
+ else {
+ msg_info_map("cannot load map %s from %s: HTTP error %d",
+ bk->uri, cbd->data->host, msg->code);
+ goto err;
+ }
+
+ MAP_RELEASE(cbd, "http_callback_data");
+ return 0;
+
+err:
+ cbd->periodic->errored = 1;
+ rspamd_map_process_periodic(cbd->periodic);
+ MAP_RELEASE(cbd, "http_callback_data");
+
+ return 0;
+}
+
+static gboolean
+read_map_file_chunks(struct rspamd_map *map, struct map_cb_data *cbdata,
+ const gchar *fname, gsize len, goffset off)
+{
+ gint fd;
+ gssize r, avail;
+ gsize buflen = 1024 * 1024;
+ gchar *pos, *bytes;
+
+ fd = rspamd_file_xopen(fname, O_RDONLY, 0, TRUE);
+
+ if (fd == -1) {
+ msg_err_map("can't open map for buffered reading %s: %s",
+ fname, strerror(errno));
+ return FALSE;
+ }
+
+ if (lseek(fd, off, SEEK_SET) == -1) {
+ msg_err_map("can't seek in map to pos %d for buffered reading %s: %s",
+ (gint) off, fname, strerror(errno));
+ close(fd);
+
+ return FALSE;
+ }
+
+ buflen = MIN(len, buflen);
+ bytes = g_malloc(buflen);
+ avail = buflen;
+ pos = bytes;
+
+ while ((r = read(fd, pos, avail)) > 0) {
+ gchar *end = bytes + (pos - bytes) + r;
+ msg_debug_map("%s: read map chunk, %z bytes", fname,
+ r);
+ pos = map->read_callback(bytes, end - bytes, cbdata, r == len);
+
+ if (pos && pos > bytes && pos < end) {
+ guint remain = end - pos;
+
+ memmove(bytes, pos, remain);
+ pos = bytes + remain;
+ /* Need to preserve the remain */
+ avail = ((gssize) buflen) - remain;
+
+ if (avail <= 0) {
+ /* Try realloc, too large element */
+ g_assert(buflen >= remain);
+ bytes = g_realloc(bytes, buflen * 2);
+
+ pos = bytes + remain; /* Adjust */
+ avail += buflen;
+ buflen *= 2;
+ }
+ }
+ else {
+ avail = buflen;
+ pos = bytes;
+ }
+
+ len -= r;
+ }
+
+ if (r == -1) {
+ msg_err_map("can't read from map %s: %s", fname, strerror(errno));
+ close(fd);
+ g_free(bytes);
+
+ return FALSE;
+ }
+
+ close(fd);
+ g_free(bytes);
+
+ return TRUE;
+}
+
+static gboolean
+rspamd_map_check_sig_pk_mem(const guchar *sig,
+ gsize siglen,
+ struct rspamd_map *map,
+ const guchar *input,
+ gsize inlen,
+ struct rspamd_cryptobox_pubkey *pk)
+{
+ GString *b32_key;
+ gboolean ret = TRUE;
+
+ if (siglen != rspamd_cryptobox_signature_bytes(RSPAMD_CRYPTOBOX_MODE_25519)) {
+ msg_err_map("can't open signature for %s: invalid size: %z", map->name, siglen);
+
+ ret = FALSE;
+ }
+
+ if (ret && !rspamd_cryptobox_verify(sig, siglen, input, inlen,
+ rspamd_pubkey_get_pk(pk, NULL), RSPAMD_CRYPTOBOX_MODE_25519)) {
+ msg_err_map("can't verify signature for %s: incorrect signature", map->name);
+
+ ret = FALSE;
+ }
+
+ if (ret) {
+ b32_key = rspamd_pubkey_print(pk,
+ RSPAMD_KEYPAIR_BASE32 | RSPAMD_KEYPAIR_PUBKEY);
+ msg_info_map("verified signature for %s using trusted key %v",
+ map->name, b32_key);
+ g_string_free(b32_key, TRUE);
+ }
+
+ return ret;
+}
+
+static gboolean
+rspamd_map_check_file_sig(const char *fname,
+ struct rspamd_map *map,
+ struct rspamd_map_backend *bk,
+ const guchar *input,
+ gsize inlen)
+{
+ guchar *data;
+ struct rspamd_cryptobox_pubkey *pk = NULL;
+ GString *b32_key;
+ gboolean ret = TRUE;
+ gsize len = 0;
+ gchar fpath[PATH_MAX];
+
+ if (bk->trusted_pubkey == NULL) {
+ /* Try to load and check pubkey */
+ rspamd_snprintf(fpath, sizeof(fpath), "%s.pub", fname);
+ data = rspamd_file_xmap(fpath, PROT_READ, &len, TRUE);
+
+ if (data == NULL) {
+ msg_err_map("can't open pubkey %s: %s", fpath, strerror(errno));
+ return FALSE;
+ }
+
+ pk = rspamd_pubkey_from_base32(data, len, RSPAMD_KEYPAIR_SIGN,
+ RSPAMD_CRYPTOBOX_MODE_25519);
+ munmap(data, len);
+
+ if (pk == NULL) {
+ msg_err_map("can't load pubkey %s", fpath);
+ return FALSE;
+ }
+
+ /* We just check pk against the trusted db of keys */
+ b32_key = rspamd_pubkey_print(pk,
+ RSPAMD_KEYPAIR_BASE32 | RSPAMD_KEYPAIR_PUBKEY);
+ g_assert(b32_key != NULL);
+
+ if (g_hash_table_lookup(map->cfg->trusted_keys, b32_key->str) == NULL) {
+ msg_err_map("pubkey loaded from %s is untrusted: %v", fpath,
+ b32_key);
+ g_string_free(b32_key, TRUE);
+ rspamd_pubkey_unref(pk);
+
+ return FALSE;
+ }
+
+ g_string_free(b32_key, TRUE);
+ }
+ else {
+ pk = rspamd_pubkey_ref(bk->trusted_pubkey);
+ }
+
+ rspamd_snprintf(fpath, sizeof(fpath), "%s.sig", fname);
+ data = rspamd_shmem_xmap(fpath, PROT_READ, &len);
+
+ if (data == NULL) {
+ msg_err_map("can't open signature %s: %s", fpath, strerror(errno));
+ ret = FALSE;
+ }
+
+ if (ret) {
+ ret = rspamd_map_check_sig_pk_mem(data, len, map, input, inlen, pk);
+ munmap(data, len);
+ }
+
+ rspamd_pubkey_unref(pk);
+
+ return ret;
+}
+
+/**
+ * Callback for reading data from file
+ */
+static gboolean
+read_map_file(struct rspamd_map *map, struct file_map_data *data,
+ struct rspamd_map_backend *bk, struct map_periodic_cbdata *periodic)
+{
+ gchar *bytes;
+ gsize len;
+ struct stat st;
+
+ if (map->read_callback == NULL || map->fin_callback == NULL) {
+ msg_err_map("%s: bad callback for reading map file",
+ data->filename);
+ return FALSE;
+ }
+
+ if (stat(data->filename, &st) == -1) {
+ /* File does not exist, skipping */
+ if (errno != ENOENT) {
+ msg_err_map("%s: map file is unavailable for reading: %s",
+ data->filename, strerror(errno));
+
+ return FALSE;
+ }
+ else {
+ msg_info_map("%s: map file is not found; "
+ "it will be read automatically if created",
+ data->filename);
+ return TRUE;
+ }
+ }
+
+ ev_stat_stat(map->event_loop, &data->st_ev);
+ len = st.st_size;
+
+ if (bk->is_signed) {
+ bytes = rspamd_file_xmap(data->filename, PROT_READ, &len, TRUE);
+
+ if (bytes == NULL) {
+ msg_err_map("can't open map %s: %s", data->filename, strerror(errno));
+ return FALSE;
+ }
+
+ if (!rspamd_map_check_file_sig(data->filename, map, bk, bytes, len)) {
+ munmap(bytes, len);
+
+ return FALSE;
+ }
+
+ munmap(bytes, len);
+ }
+
+ if (len > 0) {
+ if (map->no_file_read) {
+ /* We just call read callback with backend name */
+ map->read_callback(data->filename, strlen(data->filename),
+ &periodic->cbdata, TRUE);
+ }
+ else {
+ if (bk->is_compressed) {
+ bytes = rspamd_file_xmap(data->filename, PROT_READ, &len, TRUE);
+
+ if (bytes == NULL) {
+ msg_err_map("can't open map %s: %s", data->filename, strerror(errno));
+ return FALSE;
+ }
+
+ ZSTD_DStream *zstream;
+ ZSTD_inBuffer zin;
+ ZSTD_outBuffer zout;
+ guchar *out;
+ gsize outlen, r;
+
+ zstream = ZSTD_createDStream();
+ ZSTD_initDStream(zstream);
+
+ zin.pos = 0;
+ zin.src = bytes;
+ zin.size = len;
+
+ if ((outlen = ZSTD_getDecompressedSize(zin.src, zin.size)) == 0) {
+ outlen = ZSTD_DStreamOutSize();
+ }
+
+ out = g_malloc(outlen);
+
+ zout.dst = out;
+ zout.pos = 0;
+ zout.size = outlen;
+
+ while (zin.pos < zin.size) {
+ r = ZSTD_decompressStream(zstream, &zout, &zin);
+
+ if (ZSTD_isError(r)) {
+ msg_err_map("%s: cannot decompress data: %s",
+ data->filename,
+ ZSTD_getErrorName(r));
+ ZSTD_freeDStream(zstream);
+ g_free(out);
+ munmap(bytes, len);
+ return FALSE;
+ }
+
+ if (zout.pos == zout.size) {
+ /* We need to extend output buffer */
+ zout.size = zout.size * 2 + 1;
+ out = g_realloc(zout.dst, zout.size);
+ zout.dst = out;
+ }
+ }
+
+ ZSTD_freeDStream(zstream);
+ msg_info_map("%s: read map data, %z bytes compressed, "
+ "%z uncompressed)",
+ data->filename,
+ len, zout.pos);
+ map->read_callback(out, zout.pos, &periodic->cbdata, TRUE);
+ g_free(out);
+
+ munmap(bytes, len);
+ }
+ else {
+ /* Perform buffered read: fail-safe */
+ if (!read_map_file_chunks(map, &periodic->cbdata, data->filename,
+ len, 0)) {
+ return FALSE;
+ }
+ }
+ }
+ }
+ else {
+ /* Empty map */
+ map->read_callback(NULL, 0, &periodic->cbdata, TRUE);
+ }
+
+ return TRUE;
+}
+
+static gboolean
+read_map_static(struct rspamd_map *map, struct static_map_data *data,
+ struct rspamd_map_backend *bk, struct map_periodic_cbdata *periodic)
+{
+ guchar *bytes;
+ gsize len;
+
+ if (map->read_callback == NULL || map->fin_callback == NULL) {
+ msg_err_map("%s: bad callback for reading map file", map->name);
+ data->processed = TRUE;
+ return FALSE;
+ }
+
+ bytes = data->data;
+ len = data->len;
+
+ if (len > 0) {
+ if (bk->is_compressed) {
+ ZSTD_DStream *zstream;
+ ZSTD_inBuffer zin;
+ ZSTD_outBuffer zout;
+ guchar *out;
+ gsize outlen, r;
+
+ zstream = ZSTD_createDStream();
+ ZSTD_initDStream(zstream);
+
+ zin.pos = 0;
+ zin.src = bytes;
+ zin.size = len;
+
+ if ((outlen = ZSTD_getDecompressedSize(zin.src, zin.size)) == 0) {
+ outlen = ZSTD_DStreamOutSize();
+ }
+
+ out = g_malloc(outlen);
+
+ zout.dst = out;
+ zout.pos = 0;
+ zout.size = outlen;
+
+ while (zin.pos < zin.size) {
+ r = ZSTD_decompressStream(zstream, &zout, &zin);
+
+ if (ZSTD_isError(r)) {
+ msg_err_map("%s: cannot decompress data: %s",
+ map->name,
+ ZSTD_getErrorName(r));
+ ZSTD_freeDStream(zstream);
+ g_free(out);
+
+ return FALSE;
+ }
+
+ if (zout.pos == zout.size) {
+ /* We need to extend output buffer */
+ zout.size = zout.size * 2 + 1;
+ out = g_realloc(zout.dst, zout.size);
+ zout.dst = out;
+ }
+ }
+
+ ZSTD_freeDStream(zstream);
+ msg_info_map("%s: read map data, %z bytes compressed, "
+ "%z uncompressed)",
+ map->name,
+ len, zout.pos);
+ map->read_callback(out, zout.pos, &periodic->cbdata, TRUE);
+ g_free(out);
+ }
+ else {
+ msg_info_map("%s: read map data, %z bytes",
+ map->name, len);
+ map->read_callback(bytes, len, &periodic->cbdata, TRUE);
+ }
+ }
+ else {
+ map->read_callback(NULL, 0, &periodic->cbdata, TRUE);
+ }
+
+ data->processed = TRUE;
+
+ return TRUE;
+}
+
+static void
+rspamd_map_periodic_dtor(struct map_periodic_cbdata *periodic)
+{
+ struct rspamd_map *map;
+
+ map = periodic->map;
+ msg_debug_map("periodic dtor %p", periodic);
+
+ if (periodic->need_modify || periodic->cbdata.errored) {
+ /* Need to notify the real data structure */
+ periodic->map->fin_callback(&periodic->cbdata, periodic->map->user_data);
+
+ if (map->on_load_function) {
+ map->on_load_function(map, map->on_load_ud);
+ }
+ }
+ else {
+ /* Not modified */
+ }
+
+ if (periodic->locked) {
+ g_atomic_int_set(periodic->map->locked, 0);
+ msg_debug_map("unlocked map %s", periodic->map->name);
+
+ if (periodic->map->wrk->state == rspamd_worker_state_running) {
+ rspamd_map_schedule_periodic(periodic->map,
+ RSPAMD_SYMBOL_RESULT_NORMAL);
+ }
+ else {
+ msg_debug_map("stop scheduling periodics for %s; terminating state",
+ periodic->map->name);
+ }
+ }
+
+ g_free(periodic);
+}
+
+/* Called on timer execution */
+static void
+rspamd_map_periodic_callback(struct ev_loop *loop, ev_timer *w, int revents)
+{
+ struct map_periodic_cbdata *cbd = (struct map_periodic_cbdata *) w->data;
+
+ MAP_RETAIN(cbd, "periodic");
+ ev_timer_stop(loop, w);
+ rspamd_map_process_periodic(cbd);
+ MAP_RELEASE(cbd, "periodic");
+}
+
+static void
+rspamd_map_schedule_periodic(struct rspamd_map *map, int how)
+{
+ const gdouble error_mult = 20.0, lock_mult = 0.1;
+ static const gdouble min_timer_interval = 2.0;
+ const gchar *reason = "unknown reason";
+ gdouble jittered_sec;
+ gdouble timeout;
+ struct map_periodic_cbdata *cbd;
+
+ if (map->scheduled_check || (map->wrk &&
+ map->wrk->state != rspamd_worker_state_running)) {
+ /*
+ * Do not schedule check if some check is already scheduled or
+ * if worker is going to die
+ */
+ return;
+ }
+
+ if (!(how & RSPAMD_MAP_SCHEDULE_INIT) && map->static_only) {
+ /* No need to schedule anything for static maps */
+ return;
+ }
+
+ if (map->non_trivial && map->next_check != 0) {
+ timeout = map->next_check - rspamd_get_calendar_ticks();
+ map->next_check = 0;
+
+ if (timeout > 0 && timeout < map->poll_timeout) {
+ /* Early check case, jitter */
+ gdouble poll_timeout = map->poll_timeout;
+
+ if (how & RSPAMD_MAP_SCHEDULE_ERROR) {
+ poll_timeout = map->poll_timeout * error_mult;
+ reason = "early active non-trivial check (after error)";
+ }
+ else if (how & RSPAMD_MAP_SCHEDULE_LOCKED) {
+ poll_timeout = map->poll_timeout * lock_mult;
+ reason = "early active non-trivial check (after being locked)";
+ }
+ else {
+ reason = "early active non-trivial check";
+ }
+
+ jittered_sec = MIN(timeout, poll_timeout);
+ }
+ else if (timeout <= 0) {
+ /* Data is already expired, need to check */
+ if (how & RSPAMD_MAP_SCHEDULE_ERROR) {
+ /* In case of error we still need to increase delay */
+ jittered_sec = map->poll_timeout * error_mult;
+ reason = "expired non-trivial data (after error)";
+ }
+ else {
+ jittered_sec = 0.0;
+ reason = "expired non-trivial data";
+ }
+ }
+ else {
+ /* No need to check now, wait till next_check */
+ jittered_sec = timeout;
+ reason = "valid non-trivial data";
+ }
+ }
+ else {
+ /* No valid information when to check a map, plan a timer based check */
+ timeout = map->poll_timeout;
+
+ if (how & RSPAMD_MAP_SCHEDULE_INIT) {
+ if (map->active_http) {
+ /* Spill maps load to get better chances to hit ssl cache */
+ timeout = rspamd_time_jitter(0.0, 2.0);
+ }
+ else {
+ timeout = 0.0;
+ }
+
+ reason = "init scheduled check";
+ }
+ else {
+ if (how & RSPAMD_MAP_SCHEDULE_ERROR) {
+ timeout = map->poll_timeout * error_mult;
+ reason = "errored scheduled check";
+ }
+ else if (how & RSPAMD_MAP_SCHEDULE_LOCKED) {
+ timeout = map->poll_timeout * lock_mult;
+ reason = "locked scheduled check";
+ }
+ else {
+ reason = "normal scheduled check";
+ }
+ }
+
+ jittered_sec = rspamd_time_jitter(timeout, 0);
+ }
+
+ /* Now, we do some sanity checks for jittered seconds */
+ if (!(how & RSPAMD_MAP_SCHEDULE_INIT)) {
+ /* Never allow too low interval between timer checks, it is expensive */
+ if (jittered_sec < min_timer_interval) {
+ jittered_sec = rspamd_time_jitter(min_timer_interval, 0);
+ }
+
+ if (map->non_trivial) {
+ /*
+ * Even if we are reported that we need to reload cache often, we
+ * still want to be sane in terms of events...
+ */
+ if (jittered_sec < min_timer_interval * 2.0) {
+ if (map->nelts > 0) {
+ jittered_sec = min_timer_interval * 3.0;
+ }
+ }
+ }
+ }
+
+ cbd = g_malloc0(sizeof(*cbd));
+ cbd->cbdata.prev_data = *map->user_data;
+ cbd->cbdata.cur_data = NULL;
+ cbd->cbdata.map = map;
+ cbd->map = map;
+ map->scheduled_check = cbd;
+ REF_INIT_RETAIN(cbd, rspamd_map_periodic_dtor);
+
+ cbd->ev.data = cbd;
+ ev_timer_init(&cbd->ev, rspamd_map_periodic_callback, jittered_sec, 0.0);
+ ev_timer_start(map->event_loop, &cbd->ev);
+
+ msg_debug_map("schedule new periodic event %p in %.3f seconds for %s; reason: %s",
+ cbd, jittered_sec, map->name, reason);
+}
+
+static gint
+rspamd_map_af_to_weight(const rspamd_inet_addr_t *addr)
+{
+ int ret;
+
+ switch (rspamd_inet_address_get_af(addr)) {
+ case AF_UNIX:
+ ret = 2;
+ break;
+ case AF_INET:
+ ret = 1;
+ break;
+ default:
+ ret = 0;
+ break;
+ }
+
+ return ret;
+}
+
+static gint
+rspamd_map_dns_address_sort_func(gconstpointer a, gconstpointer b)
+{
+ const rspamd_inet_addr_t *ip1 = *(const rspamd_inet_addr_t **) a,
+ *ip2 = *(const rspamd_inet_addr_t **) b;
+ gint w1, w2;
+
+ w1 = rspamd_map_af_to_weight(ip1);
+ w2 = rspamd_map_af_to_weight(ip2);
+
+ /* Inverse order */
+ return w2 - w1;
+}
+
+static void
+rspamd_map_dns_callback(struct rdns_reply *reply, void *arg)
+{
+ struct http_callback_data *cbd = arg;
+ struct rdns_reply_entry *cur_rep;
+ struct rspamd_map *map;
+ guint flags = RSPAMD_HTTP_CLIENT_SIMPLE | RSPAMD_HTTP_CLIENT_SHARED;
+
+ map = cbd->map;
+
+ msg_debug_map("got dns reply with code %s on stage %d",
+ rdns_strerror(reply->code), cbd->stage);
+
+ if (cbd->stage == http_map_terminated) {
+ MAP_RELEASE(cbd, "http_callback_data");
+ return;
+ }
+
+ if (reply->code == RDNS_RC_NOERROR) {
+ DL_FOREACH(reply->entries, cur_rep)
+ {
+ rspamd_inet_addr_t *addr;
+ addr = rspamd_inet_address_from_rnds(cur_rep);
+
+ if (addr != NULL) {
+ rspamd_inet_address_set_port(addr, cbd->data->port);
+ g_ptr_array_add(cbd->addrs, (void *) addr);
+ }
+ }
+
+ if (cbd->stage == http_map_resolve_host2) {
+ /* We have still one request pending */
+ cbd->stage = http_map_resolve_host1;
+ }
+ else if (cbd->stage == http_map_resolve_host1) {
+ cbd->stage = http_map_http_conn;
+ }
+ }
+ else if (cbd->stage < http_map_http_conn) {
+ if (cbd->stage == http_map_resolve_host2) {
+ /* We have still one request pending */
+ cbd->stage = http_map_resolve_host1;
+ }
+ else if (cbd->addrs->len == 0) {
+ /* We could not resolve host, so cowardly fail here */
+ msg_err_map("cannot resolve %s: %s", cbd->data->host,
+ rdns_strerror(reply->code));
+ cbd->periodic->errored = 1;
+ rspamd_map_process_periodic(cbd->periodic);
+ }
+ else {
+ /* We have at least one address, so we can continue... */
+ cbd->stage = http_map_http_conn;
+ }
+ }
+
+ if (cbd->stage == http_map_http_conn && cbd->addrs->len > 0) {
+ rspamd_ptr_array_shuffle(cbd->addrs);
+ gint idx = 0;
+ /*
+ * For the existing addr we can just select any address as we have
+ * data available
+ */
+ if (cbd->map->nelts > 0 && rspamd_random_double_fast() > 0.5) {
+ /* Already shuffled, use whatever is the first */
+ cbd->addr = (rspamd_inet_addr_t *) g_ptr_array_index(cbd->addrs, idx);
+ }
+ else {
+ /* Always prefer IPv4 as IPv6 is almost all the time broken */
+ g_ptr_array_sort(cbd->addrs, rspamd_map_dns_address_sort_func);
+ cbd->addr = (rspamd_inet_addr_t *) g_ptr_array_index(cbd->addrs, idx);
+ }
+
+ retry:
+ msg_debug_map("try open http connection to %s",
+ rspamd_inet_address_to_string_pretty(cbd->addr));
+ if (cbd->bk->protocol == MAP_PROTO_HTTPS) {
+ flags |= RSPAMD_HTTP_CLIENT_SSL;
+ }
+ cbd->conn = rspamd_http_connection_new_client(NULL,
+ NULL,
+ http_map_error,
+ http_map_finish,
+ flags,
+ cbd->addr);
+
+ if (cbd->conn != NULL) {
+ write_http_request(cbd);
+ }
+ else {
+ if (idx < cbd->addrs->len - 1) {
+ /* We can retry */
+ idx++;
+ rspamd_inet_addr_t *prev_addr = cbd->addr;
+ cbd->addr = (rspamd_inet_addr_t *) g_ptr_array_index(cbd->addrs, idx);
+ msg_info_map("cannot connect to %s to get data for %s: %s, retry with %s (%d of %d)",
+ rspamd_inet_address_to_string_pretty(prev_addr),
+ cbd->bk->uri,
+ strerror(errno),
+ rspamd_inet_address_to_string_pretty(cbd->addr),
+ idx + 1, cbd->addrs->len);
+ goto retry;
+ }
+ else {
+ /* Nothing else left */
+ cbd->periodic->errored = TRUE;
+ msg_err_map("error reading %s(%s): "
+ "connection with http server terminated incorrectly: %s",
+ cbd->bk->uri,
+ cbd->addr ? rspamd_inet_address_to_string_pretty(cbd->addr) : "",
+ strerror(errno));
+
+ rspamd_map_process_periodic(cbd->periodic);
+ }
+ }
+ }
+
+ MAP_RELEASE(cbd, "http_callback_data");
+}
+
+static gboolean
+rspamd_map_read_cached(struct rspamd_map *map, struct rspamd_map_backend *bk,
+ struct map_periodic_cbdata *periodic, const gchar *host)
+{
+ gsize mmap_len, len;
+ gpointer in;
+ struct http_map_data *data;
+
+ data = bk->data.hd;
+
+ in = rspamd_shmem_xmap(data->cache->shmem_name, PROT_READ, &mmap_len);
+
+ if (in == NULL) {
+ msg_err("cannot map cache from %s: %s", data->cache->shmem_name,
+ strerror(errno));
+ return FALSE;
+ }
+
+ if (mmap_len < data->cache->len) {
+ msg_err("cannot map cache from %s: truncated length %z, %z expected",
+ data->cache->shmem_name,
+ mmap_len, data->cache->len);
+ munmap(in, mmap_len);
+
+ return FALSE;
+ }
+
+ /*
+ * Len is taken from the shmem file size that can be larger than the
+ * actual data length, as we use shared memory as a growing buffer for the
+ * HTTP input.
+ * Hence, we need to use len from the saved cache data, counting that it is
+ * at least not more than the cached file length (this is checked above).
+ */
+ len = data->cache->len;
+
+ if (bk->is_compressed) {
+ ZSTD_DStream *zstream;
+ ZSTD_inBuffer zin;
+ ZSTD_outBuffer zout;
+ guchar *out;
+ gsize outlen, r;
+
+ zstream = ZSTD_createDStream();
+ ZSTD_initDStream(zstream);
+
+ zin.pos = 0;
+ zin.src = in;
+ zin.size = len;
+
+ if ((outlen = ZSTD_getDecompressedSize(zin.src, zin.size)) == 0) {
+ outlen = ZSTD_DStreamOutSize();
+ }
+
+ out = g_malloc(outlen);
+
+ zout.dst = out;
+ zout.pos = 0;
+ zout.size = outlen;
+
+ while (zin.pos < zin.size) {
+ r = ZSTD_decompressStream(zstream, &zout, &zin);
+
+ if (ZSTD_isError(r)) {
+ msg_err_map("%s: cannot decompress data: %s",
+ bk->uri,
+ ZSTD_getErrorName(r));
+ ZSTD_freeDStream(zstream);
+ g_free(out);
+ munmap(in, mmap_len);
+ return FALSE;
+ }
+
+ if (zout.pos == zout.size) {
+ /* We need to extend output buffer */
+ zout.size = zout.size * 2 + 1;
+ out = g_realloc(zout.dst, zout.size);
+ zout.dst = out;
+ }
+ }
+
+ ZSTD_freeDStream(zstream);
+ msg_info_map("%s: read map data cached %z bytes compressed, "
+ "%z uncompressed",
+ bk->uri,
+ len, zout.pos);
+ map->read_callback(out, zout.pos, &periodic->cbdata, TRUE);
+ g_free(out);
+ }
+ else {
+ msg_info_map("%s: read map data cached %z bytes", bk->uri, len);
+ map->read_callback(in, len, &periodic->cbdata, TRUE);
+ }
+
+ munmap(in, mmap_len);
+
+ return TRUE;
+}
+
+static gboolean
+rspamd_map_has_http_cached_file(struct rspamd_map *map,
+ struct rspamd_map_backend *bk)
+{
+ gchar path[PATH_MAX];
+ guchar digest[rspamd_cryptobox_HASHBYTES];
+ struct rspamd_config *cfg = map->cfg;
+ struct stat st;
+
+ if (cfg->maps_cache_dir == NULL || cfg->maps_cache_dir[0] == '\0') {
+ return FALSE;
+ }
+
+ rspamd_cryptobox_hash(digest, bk->uri, strlen(bk->uri), NULL, 0);
+ rspamd_snprintf(path, sizeof(path), "%s%c%*xs.map", cfg->maps_cache_dir,
+ G_DIR_SEPARATOR, 20, digest);
+
+ if (stat(path, &st) != -1 && st.st_size >
+ sizeof(struct rspamd_http_file_data)) {
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static gboolean
+rspamd_map_save_http_cached_file(struct rspamd_map *map,
+ struct rspamd_map_backend *bk,
+ struct http_map_data *htdata,
+ const guchar *data,
+ gsize len)
+{
+ gchar path[PATH_MAX];
+ guchar digest[rspamd_cryptobox_HASHBYTES];
+ struct rspamd_config *cfg = map->cfg;
+ gint fd;
+ struct rspamd_http_file_data header;
+
+ if (cfg->maps_cache_dir == NULL || cfg->maps_cache_dir[0] == '\0') {
+ return FALSE;
+ }
+
+ rspamd_cryptobox_hash(digest, bk->uri, strlen(bk->uri), NULL, 0);
+ rspamd_snprintf(path, sizeof(path), "%s%c%*xs.map", cfg->maps_cache_dir,
+ G_DIR_SEPARATOR, 20, digest);
+
+ fd = rspamd_file_xopen(path, O_WRONLY | O_TRUNC | O_CREAT,
+ 00600, FALSE);
+
+ if (fd == -1) {
+ return FALSE;
+ }
+
+ if (!rspamd_file_lock(fd, FALSE)) {
+ msg_err_map("cannot lock file %s: %s", path, strerror(errno));
+ close(fd);
+
+ return FALSE;
+ }
+
+ memcpy(header.magic, rspamd_http_file_magic, sizeof(rspamd_http_file_magic));
+ header.mtime = htdata->last_modified;
+ header.next_check = map->next_check;
+ header.data_off = sizeof(header);
+
+ if (htdata->etag) {
+ header.data_off += RSPAMD_FSTRING_LEN(htdata->etag);
+ header.etag_len = RSPAMD_FSTRING_LEN(htdata->etag);
+ }
+ else {
+ header.etag_len = 0;
+ }
+
+ if (write(fd, &header, sizeof(header)) != sizeof(header)) {
+ msg_err_map("cannot write file %s (header stage): %s", path, strerror(errno));
+ rspamd_file_unlock(fd, FALSE);
+ close(fd);
+
+ return FALSE;
+ }
+
+ if (header.etag_len > 0) {
+ if (write(fd, RSPAMD_FSTRING_DATA(htdata->etag), header.etag_len) !=
+ header.etag_len) {
+ msg_err_map("cannot write file %s (etag stage): %s", path, strerror(errno));
+ rspamd_file_unlock(fd, FALSE);
+ close(fd);
+
+ return FALSE;
+ }
+ }
+
+ /* Now write the rest */
+ if (write(fd, data, len) != len) {
+ msg_err_map("cannot write file %s (data stage): %s", path, strerror(errno));
+ rspamd_file_unlock(fd, FALSE);
+ close(fd);
+
+ return FALSE;
+ }
+
+ rspamd_file_unlock(fd, FALSE);
+ close(fd);
+
+ msg_info_map("saved data from %s in %s, %uz bytes", bk->uri, path, len + sizeof(header) + header.etag_len);
+
+ return TRUE;
+}
+
+static gboolean
+rspamd_map_update_http_cached_file(struct rspamd_map *map,
+ struct rspamd_map_backend *bk,
+ struct http_map_data *htdata)
+{
+ gchar path[PATH_MAX];
+ guchar digest[rspamd_cryptobox_HASHBYTES];
+ struct rspamd_config *cfg = map->cfg;
+ gint fd;
+ struct rspamd_http_file_data header;
+
+ if (!rspamd_map_has_http_cached_file(map, bk)) {
+ return FALSE;
+ }
+
+ rspamd_cryptobox_hash(digest, bk->uri, strlen(bk->uri), NULL, 0);
+ rspamd_snprintf(path, sizeof(path), "%s%c%*xs.map", cfg->maps_cache_dir,
+ G_DIR_SEPARATOR, 20, digest);
+
+ fd = rspamd_file_xopen(path, O_WRONLY,
+ 00600, FALSE);
+
+ if (fd == -1) {
+ return FALSE;
+ }
+
+ if (!rspamd_file_lock(fd, FALSE)) {
+ msg_err_map("cannot lock file %s: %s", path, strerror(errno));
+ close(fd);
+
+ return FALSE;
+ }
+
+ memcpy(header.magic, rspamd_http_file_magic, sizeof(rspamd_http_file_magic));
+ header.mtime = htdata->last_modified;
+ header.next_check = map->next_check;
+ header.data_off = sizeof(header);
+
+ if (htdata->etag) {
+ header.data_off += RSPAMD_FSTRING_LEN(htdata->etag);
+ header.etag_len = RSPAMD_FSTRING_LEN(htdata->etag);
+ }
+ else {
+ header.etag_len = 0;
+ }
+
+ if (write(fd, &header, sizeof(header)) != sizeof(header)) {
+ msg_err_map("cannot update file %s (header stage): %s", path, strerror(errno));
+ rspamd_file_unlock(fd, FALSE);
+ close(fd);
+
+ return FALSE;
+ }
+
+ if (header.etag_len > 0) {
+ if (write(fd, RSPAMD_FSTRING_DATA(htdata->etag), header.etag_len) !=
+ header.etag_len) {
+ msg_err_map("cannot update file %s (etag stage): %s", path, strerror(errno));
+ rspamd_file_unlock(fd, FALSE);
+ close(fd);
+
+ return FALSE;
+ }
+ }
+
+ rspamd_file_unlock(fd, FALSE);
+ close(fd);
+
+ return TRUE;
+}
+
+
+static gboolean
+rspamd_map_read_http_cached_file(struct rspamd_map *map,
+ struct rspamd_map_backend *bk,
+ struct http_map_data *htdata,
+ struct map_cb_data *cbdata)
+{
+ gchar path[PATH_MAX];
+ guchar digest[rspamd_cryptobox_HASHBYTES];
+ struct rspamd_config *cfg = map->cfg;
+ gint fd;
+ struct stat st;
+ struct rspamd_http_file_data header;
+
+ if (cfg->maps_cache_dir == NULL || cfg->maps_cache_dir[0] == '\0') {
+ return FALSE;
+ }
+
+ rspamd_cryptobox_hash(digest, bk->uri, strlen(bk->uri), NULL, 0);
+ rspamd_snprintf(path, sizeof(path), "%s%c%*xs.map", cfg->maps_cache_dir,
+ G_DIR_SEPARATOR, 20, digest);
+
+ fd = rspamd_file_xopen(path, O_RDONLY, 00600, FALSE);
+
+ if (fd == -1) {
+ return FALSE;
+ }
+
+ if (!rspamd_file_lock(fd, FALSE)) {
+ msg_err_map("cannot lock file %s: %s", path, strerror(errno));
+ close(fd);
+
+ return FALSE;
+ }
+
+ (void) fstat(fd, &st);
+
+ if (read(fd, &header, sizeof(header)) != sizeof(header)) {
+ msg_err_map("cannot read file %s (header stage): %s", path, strerror(errno));
+ rspamd_file_unlock(fd, FALSE);
+ close(fd);
+
+ return FALSE;
+ }
+
+ if (memcmp(header.magic, rspamd_http_file_magic,
+ sizeof(rspamd_http_file_magic)) != 0) {
+ msg_warn_map("invalid or old version magic in file %s; ignore it", path);
+ rspamd_file_unlock(fd, FALSE);
+ close(fd);
+
+ return FALSE;
+ }
+
+ double now = rspamd_get_calendar_ticks();
+
+ if (header.next_check > now) {
+ map->next_check = header.next_check;
+ }
+ else {
+ map->next_check = now;
+ }
+
+ htdata->last_modified = header.mtime;
+
+ if (header.etag_len > 0) {
+ rspamd_fstring_t *etag = rspamd_fstring_sized_new(header.etag_len);
+
+ if (read(fd, RSPAMD_FSTRING_DATA(etag), header.etag_len) != header.etag_len) {
+ msg_err_map("cannot read file %s (etag stage): %s", path,
+ strerror(errno));
+ rspamd_file_unlock(fd, FALSE);
+ rspamd_fstring_free(etag);
+ close(fd);
+
+ return FALSE;
+ }
+
+ etag->len = header.etag_len;
+
+ if (htdata->etag) {
+ /* FIXME: should be dealt somehow better */
+ msg_warn_map("etag is already defined as %V; cached is %V; ignore cached",
+ htdata->etag, etag);
+ rspamd_fstring_free(etag);
+ }
+ else {
+ htdata->etag = etag;
+ }
+ }
+
+ rspamd_file_unlock(fd, FALSE);
+ close(fd);
+
+ /* Now read file data */
+ /* Perform buffered read: fail-safe */
+ if (!read_map_file_chunks(map, cbdata, path,
+ st.st_size - header.data_off, header.data_off)) {
+ return FALSE;
+ }
+
+ struct tm tm;
+ gchar ncheck_buf[32], lm_buf[32];
+
+ rspamd_localtime(map->next_check, &tm);
+ strftime(ncheck_buf, sizeof(ncheck_buf) - 1, "%Y-%m-%d %H:%M:%S", &tm);
+ rspamd_localtime(htdata->last_modified, &tm);
+ strftime(lm_buf, sizeof(lm_buf) - 1, "%Y-%m-%d %H:%M:%S", &tm);
+
+ msg_info_map("read cached data for %s from %s, %uz bytes; next check at: %s;"
+ " last modified on: %s; etag: %V",
+ bk->uri,
+ path,
+ (size_t) (st.st_size - header.data_off),
+ ncheck_buf,
+ lm_buf,
+ htdata->etag);
+
+ return TRUE;
+}
+
+/**
+ * Async HTTP callback
+ */
+static void
+rspamd_map_common_http_callback(struct rspamd_map *map,
+ struct rspamd_map_backend *bk,
+ struct map_periodic_cbdata *periodic,
+ gboolean check)
+{
+ struct http_map_data *data;
+ struct http_callback_data *cbd;
+ guint flags = RSPAMD_HTTP_CLIENT_SIMPLE | RSPAMD_HTTP_CLIENT_SHARED;
+
+ data = bk->data.hd;
+
+ if (g_atomic_int_get(&data->cache->available) == 1) {
+ /* Read cached data */
+ if (check) {
+ if (data->last_modified < data->cache->last_modified) {
+ msg_info_map("need to reread cached map triggered by %s "
+ "(%d our modify time, %d cached modify time)",
+ bk->uri,
+ (int) data->last_modified,
+ (int) data->cache->last_modified);
+ periodic->need_modify = TRUE;
+ /* Reset the whole chain */
+ periodic->cur_backend = 0;
+ rspamd_map_process_periodic(periodic);
+ }
+ else {
+ if (map->active_http) {
+ /* Check even if there is a cached version */
+ goto check;
+ }
+ else {
+ /* Switch to the next backend */
+ periodic->cur_backend++;
+ rspamd_map_process_periodic(periodic);
+ }
+ }
+
+ return;
+ }
+ else {
+ if (map->active_http &&
+ data->last_modified > data->cache->last_modified) {
+ goto check;
+ }
+ else if (rspamd_map_read_cached(map, bk, periodic, data->host)) {
+ /* Switch to the next backend */
+ periodic->cur_backend++;
+ data->last_modified = data->cache->last_modified;
+ rspamd_map_process_periodic(periodic);
+
+ return;
+ }
+ }
+ }
+ else if (!map->active_http) {
+ /* Switch to the next backend */
+ periodic->cur_backend++;
+ rspamd_map_process_periodic(periodic);
+
+ return;
+ }
+
+check:
+ cbd = g_malloc0(sizeof(struct http_callback_data));
+
+ cbd->event_loop = map->event_loop;
+ cbd->addrs = g_ptr_array_sized_new(4);
+ cbd->map = map;
+ cbd->data = data;
+ cbd->check = check;
+ cbd->periodic = periodic;
+ MAP_RETAIN(periodic, "periodic");
+ cbd->bk = bk;
+ MAP_RETAIN(bk, "rspamd_map_backend");
+ cbd->stage = http_map_terminated;
+ REF_INIT_RETAIN(cbd, free_http_cbdata);
+
+ msg_debug_map("%s map data from %s", check ? "checking" : "reading",
+ data->host);
+
+ /* Try address */
+ rspamd_inet_addr_t *addr = NULL;
+
+ if (rspamd_parse_inet_address(&addr, data->host,
+ strlen(data->host), RSPAMD_INET_ADDRESS_PARSE_DEFAULT)) {
+ rspamd_inet_address_set_port(addr, cbd->data->port);
+ g_ptr_array_add(cbd->addrs, (void *) addr);
+
+ if (bk->protocol == MAP_PROTO_HTTPS) {
+ flags |= RSPAMD_HTTP_CLIENT_SSL;
+ }
+
+ cbd->conn = rspamd_http_connection_new_client(
+ NULL,
+ NULL,
+ http_map_error,
+ http_map_finish,
+ flags,
+ addr);
+
+ if (cbd->conn != NULL) {
+ cbd->stage = http_map_http_conn;
+ write_http_request(cbd);
+ cbd->addr = addr;
+ MAP_RELEASE(cbd, "http_callback_data");
+ }
+ else {
+ msg_warn_map("cannot load map: cannot connect to %s: %s",
+ data->host, strerror(errno));
+ MAP_RELEASE(cbd, "http_callback_data");
+ }
+
+ return;
+ }
+ else if (map->r->r) {
+ /* Send both A and AAAA requests */
+ guint nreq = 0;
+
+ if (rdns_make_request_full(map->r->r, rspamd_map_dns_callback, cbd,
+ map->cfg->dns_timeout, map->cfg->dns_retransmits, 1,
+ data->host, RDNS_REQUEST_A)) {
+ MAP_RETAIN(cbd, "http_callback_data");
+ nreq++;
+ }
+ if (rdns_make_request_full(map->r->r, rspamd_map_dns_callback, cbd,
+ map->cfg->dns_timeout, map->cfg->dns_retransmits, 1,
+ data->host, RDNS_REQUEST_AAAA)) {
+ MAP_RETAIN(cbd, "http_callback_data");
+ nreq++;
+ }
+
+ if (nreq == 2) {
+ cbd->stage = http_map_resolve_host2;
+ }
+ else if (nreq == 1) {
+ cbd->stage = http_map_resolve_host1;
+ }
+
+ map->tmp_dtor = free_http_cbdata_dtor;
+ map->tmp_dtor_data = cbd;
+ }
+ else {
+ msg_warn_map("cannot load map: DNS resolver is not initialized");
+ cbd->periodic->errored = TRUE;
+ }
+
+ MAP_RELEASE(cbd, "http_callback_data");
+}
+
+static void
+rspamd_map_http_check_callback(struct map_periodic_cbdata *cbd)
+{
+ struct rspamd_map *map;
+ struct rspamd_map_backend *bk;
+
+ map = cbd->map;
+ bk = g_ptr_array_index(cbd->map->backends, cbd->cur_backend);
+
+ rspamd_map_common_http_callback(map, bk, cbd, TRUE);
+}
+
+static void
+rspamd_map_http_read_callback(struct map_periodic_cbdata *cbd)
+{
+ struct rspamd_map *map;
+ struct rspamd_map_backend *bk;
+
+ map = cbd->map;
+ bk = g_ptr_array_index(cbd->map->backends, cbd->cur_backend);
+ rspamd_map_common_http_callback(map, bk, cbd, FALSE);
+}
+
+static void
+rspamd_map_file_check_callback(struct map_periodic_cbdata *periodic)
+{
+ struct rspamd_map *map;
+ struct file_map_data *data;
+ struct rspamd_map_backend *bk;
+
+ map = periodic->map;
+ bk = g_ptr_array_index(map->backends, periodic->cur_backend);
+ data = bk->data.fd;
+
+ if (data->need_modify) {
+ periodic->need_modify = TRUE;
+ periodic->cur_backend = 0;
+ data->need_modify = FALSE;
+
+ rspamd_map_process_periodic(periodic);
+
+ return;
+ }
+
+ map = periodic->map;
+ /* Switch to the next backend as the rest is handled by ev_stat */
+ periodic->cur_backend++;
+ rspamd_map_process_periodic(periodic);
+}
+
+static void
+rspamd_map_static_check_callback(struct map_periodic_cbdata *periodic)
+{
+ struct rspamd_map *map;
+ struct static_map_data *data;
+ struct rspamd_map_backend *bk;
+
+ map = periodic->map;
+ bk = g_ptr_array_index(map->backends, periodic->cur_backend);
+ data = bk->data.sd;
+
+ if (!data->processed) {
+ periodic->need_modify = TRUE;
+ periodic->cur_backend = 0;
+
+ rspamd_map_process_periodic(periodic);
+
+ return;
+ }
+
+ /* Switch to the next backend */
+ periodic->cur_backend++;
+ rspamd_map_process_periodic(periodic);
+}
+
+static void
+rspamd_map_file_read_callback(struct map_periodic_cbdata *periodic)
+{
+ struct rspamd_map *map;
+ struct file_map_data *data;
+ struct rspamd_map_backend *bk;
+
+ map = periodic->map;
+
+ bk = g_ptr_array_index(map->backends, periodic->cur_backend);
+ data = bk->data.fd;
+
+ msg_info_map("rereading map file %s", data->filename);
+
+ if (!read_map_file(map, data, bk, periodic)) {
+ periodic->errored = TRUE;
+ }
+
+ /* Switch to the next backend */
+ periodic->cur_backend++;
+ rspamd_map_process_periodic(periodic);
+}
+
+static void
+rspamd_map_static_read_callback(struct map_periodic_cbdata *periodic)
+{
+ struct rspamd_map *map;
+ struct static_map_data *data;
+ struct rspamd_map_backend *bk;
+
+ map = periodic->map;
+
+ bk = g_ptr_array_index(map->backends, periodic->cur_backend);
+ data = bk->data.sd;
+
+ msg_info_map("rereading static map");
+
+ if (!read_map_static(map, data, bk, periodic)) {
+ periodic->errored = TRUE;
+ }
+
+ /* Switch to the next backend */
+ periodic->cur_backend++;
+ rspamd_map_process_periodic(periodic);
+}
+
+static void
+rspamd_map_process_periodic(struct map_periodic_cbdata *cbd)
+{
+ struct rspamd_map_backend *bk;
+ struct rspamd_map *map;
+
+ map = cbd->map;
+ map->scheduled_check = NULL;
+
+ if (!map->file_only && !cbd->locked) {
+ if (!g_atomic_int_compare_and_exchange(cbd->map->locked,
+ 0, 1)) {
+ msg_debug_map(
+ "don't try to reread map %s as it is locked by other process, "
+ "will reread it later",
+ cbd->map->name);
+ rspamd_map_schedule_periodic(map, RSPAMD_MAP_SCHEDULE_LOCKED);
+ MAP_RELEASE(cbd, "periodic");
+
+ return;
+ }
+ else {
+ msg_debug_map("locked map %s", cbd->map->name);
+ cbd->locked = TRUE;
+ }
+ }
+
+ if (cbd->errored) {
+ /* We should not check other backends if some backend has failed*/
+ rspamd_map_schedule_periodic(cbd->map, RSPAMD_MAP_SCHEDULE_ERROR);
+
+ if (cbd->locked) {
+ g_atomic_int_set(cbd->map->locked, 0);
+ cbd->locked = FALSE;
+ }
+
+ /* Also set error flag for the map consumer */
+ cbd->cbdata.errored = true;
+
+ msg_debug_map("unlocked map %s, refcount=%d", cbd->map->name,
+ cbd->ref.refcount);
+ MAP_RELEASE(cbd, "periodic");
+
+ return;
+ }
+
+ /* For each backend we need to check for modifications */
+ if (cbd->cur_backend >= cbd->map->backends->len) {
+ /* Last backend */
+ msg_debug_map("finished map: %d of %d", cbd->cur_backend,
+ cbd->map->backends->len);
+ MAP_RELEASE(cbd, "periodic");
+
+ return;
+ }
+
+ if (cbd->map->wrk && cbd->map->wrk->state == rspamd_worker_state_running) {
+ bk = g_ptr_array_index(cbd->map->backends, cbd->cur_backend);
+ g_assert(bk != NULL);
+
+ if (cbd->need_modify) {
+ /* Load data from the next backend */
+ switch (bk->protocol) {
+ case MAP_PROTO_HTTP:
+ case MAP_PROTO_HTTPS:
+ rspamd_map_http_read_callback(cbd);
+ break;
+ case MAP_PROTO_FILE:
+ rspamd_map_file_read_callback(cbd);
+ break;
+ case MAP_PROTO_STATIC:
+ rspamd_map_static_read_callback(cbd);
+ break;
+ }
+ }
+ else {
+ /* Check the next backend */
+ switch (bk->protocol) {
+ case MAP_PROTO_HTTP:
+ case MAP_PROTO_HTTPS:
+ rspamd_map_http_check_callback(cbd);
+ break;
+ case MAP_PROTO_FILE:
+ rspamd_map_file_check_callback(cbd);
+ break;
+ case MAP_PROTO_STATIC:
+ rspamd_map_static_check_callback(cbd);
+ break;
+ }
+ }
+ }
+}
+
+static void
+rspamd_map_on_stat(struct ev_loop *loop, ev_stat *w, int revents)
+{
+ struct rspamd_map *map = (struct rspamd_map *) w->data;
+
+ if (w->attr.st_nlink > 0) {
+ msg_info_map("old mtime is %t (size = %Hz), "
+ "new mtime is %t (size = %Hz) for map file %s",
+ w->prev.st_mtime, (gsize) w->prev.st_size,
+ w->attr.st_mtime, (gsize) w->attr.st_size,
+ w->path);
+
+ /* Fire need modify flag */
+ struct rspamd_map_backend *bk;
+ guint i;
+
+ PTR_ARRAY_FOREACH(map->backends, i, bk)
+ {
+ if (bk->protocol == MAP_PROTO_FILE) {
+ bk->data.fd->need_modify = TRUE;
+ }
+ }
+
+ map->next_check = 0;
+
+ if (map->scheduled_check) {
+ ev_timer_stop(map->event_loop, &map->scheduled_check->ev);
+ MAP_RELEASE(map->scheduled_check, "rspamd_map_on_stat");
+ map->scheduled_check = NULL;
+ }
+
+ rspamd_map_schedule_periodic(map, RSPAMD_MAP_SCHEDULE_INIT);
+ }
+}
+
+/* Start watching event for all maps */
+void rspamd_map_watch(struct rspamd_config *cfg,
+ struct ev_loop *event_loop,
+ struct rspamd_dns_resolver *resolver,
+ struct rspamd_worker *worker,
+ enum rspamd_map_watch_type how)
+{
+ GList *cur = cfg->maps;
+ struct rspamd_map *map;
+ struct rspamd_map_backend *bk;
+ guint i;
+
+ g_assert(how > RSPAMD_MAP_WATCH_MIN && how < RSPAMD_MAP_WATCH_MAX);
+
+ /* First of all do synced read of data */
+ while (cur) {
+ map = cur->data;
+ map->event_loop = event_loop;
+ map->r = resolver;
+
+ if (map->wrk == NULL && how != RSPAMD_MAP_WATCH_WORKER) {
+ /* Generic scanner map */
+ map->wrk = worker;
+
+ if (how == RSPAMD_MAP_WATCH_PRIMARY_CONTROLLER) {
+ map->active_http = TRUE;
+ }
+ else {
+ map->active_http = FALSE;
+ }
+ }
+ else if (map->wrk != NULL && map->wrk == worker) {
+ /* Map is bound to a specific worker */
+ map->active_http = TRUE;
+ }
+ else {
+ /* Skip map for this worker as irrelevant */
+ cur = g_list_next(cur);
+ continue;
+ }
+
+ if (!map->active_http) {
+ /* Check cached version more frequently as it is cheap */
+
+ if (map->poll_timeout >= cfg->map_timeout &&
+ cfg->map_file_watch_multiplier < 1.0) {
+ map->poll_timeout =
+ map->poll_timeout * cfg->map_file_watch_multiplier;
+ }
+ }
+
+ map->file_only = TRUE;
+ map->static_only = TRUE;
+
+ PTR_ARRAY_FOREACH(map->backends, i, bk)
+ {
+ bk->event_loop = event_loop;
+
+ if (bk->protocol == MAP_PROTO_FILE) {
+ struct file_map_data *data;
+
+ data = bk->data.fd;
+
+ if (map->user_data == NULL || *map->user_data == NULL) {
+ /* Map has not been read, init it's reading if possible */
+ struct stat st;
+
+ if (stat(data->filename, &st) != -1) {
+ data->need_modify = TRUE;
+ }
+ }
+
+ ev_stat_init(&data->st_ev, rspamd_map_on_stat,
+ data->filename, map->poll_timeout * cfg->map_file_watch_multiplier);
+ data->st_ev.data = map;
+ ev_stat_start(event_loop, &data->st_ev);
+ map->static_only = FALSE;
+ }
+ else if ((bk->protocol == MAP_PROTO_HTTP ||
+ bk->protocol == MAP_PROTO_HTTPS)) {
+ if (map->active_http) {
+ map->non_trivial = TRUE;
+ }
+
+ map->static_only = FALSE;
+ map->file_only = FALSE;
+ }
+ }
+
+ rspamd_map_schedule_periodic(map, RSPAMD_MAP_SCHEDULE_INIT);
+
+ cur = g_list_next(cur);
+ }
+}
+
+void rspamd_map_preload(struct rspamd_config *cfg)
+{
+ GList *cur = cfg->maps;
+ struct rspamd_map *map;
+ struct rspamd_map_backend *bk;
+ guint i;
+ gboolean map_ok;
+
+ /* First of all do synced read of data */
+ while (cur) {
+ map = cur->data;
+ map_ok = TRUE;
+
+ PTR_ARRAY_FOREACH(map->backends, i, bk)
+ {
+ if (!(bk->protocol == MAP_PROTO_FILE ||
+ bk->protocol == MAP_PROTO_STATIC)) {
+
+ if (bk->protocol == MAP_PROTO_HTTP ||
+ bk->protocol == MAP_PROTO_HTTPS) {
+ if (!rspamd_map_has_http_cached_file(map, bk)) {
+
+ if (!map->fallback_backend) {
+ map_ok = FALSE;
+ }
+ break;
+ }
+ else {
+ continue; /* We are yet fine */
+ }
+ }
+ map_ok = FALSE;
+ break;
+ }
+ }
+
+ if (map_ok) {
+ struct map_periodic_cbdata fake_cbd;
+ gboolean succeed = TRUE;
+
+ memset(&fake_cbd, 0, sizeof(fake_cbd));
+ fake_cbd.cbdata.state = 0;
+ fake_cbd.cbdata.prev_data = *map->user_data;
+ fake_cbd.cbdata.cur_data = NULL;
+ fake_cbd.cbdata.map = map;
+ fake_cbd.map = map;
+
+ PTR_ARRAY_FOREACH(map->backends, i, bk)
+ {
+ fake_cbd.cur_backend = i;
+
+ if (bk->protocol == MAP_PROTO_FILE) {
+ if (!read_map_file(map, bk->data.fd, bk, &fake_cbd)) {
+ succeed = FALSE;
+ break;
+ }
+ }
+ else if (bk->protocol == MAP_PROTO_STATIC) {
+ if (!read_map_static(map, bk->data.sd, bk, &fake_cbd)) {
+ succeed = FALSE;
+ break;
+ }
+ }
+ else if (bk->protocol == MAP_PROTO_HTTP ||
+ bk->protocol == MAP_PROTO_HTTPS) {
+ if (!rspamd_map_read_http_cached_file(map, bk, bk->data.hd,
+ &fake_cbd.cbdata)) {
+
+ if (map->fallback_backend) {
+ /* Try fallback */
+ g_assert(map->fallback_backend->protocol ==
+ MAP_PROTO_FILE);
+ if (!read_map_file(map,
+ map->fallback_backend->data.fd,
+ map->fallback_backend, &fake_cbd)) {
+ succeed = FALSE;
+ break;
+ }
+ }
+ else {
+ succeed = FALSE;
+ break;
+ }
+ }
+ }
+ else {
+ g_assert_not_reached();
+ }
+ }
+
+ if (succeed) {
+ map->fin_callback(&fake_cbd.cbdata, map->user_data);
+
+ if (map->on_load_function) {
+ map->on_load_function(map, map->on_load_ud);
+ }
+ }
+ else {
+ msg_info_map("preload of %s failed", map->name);
+ }
+ }
+
+ cur = g_list_next(cur);
+ }
+}
+
+void rspamd_map_remove_all(struct rspamd_config *cfg)
+{
+ struct rspamd_map *map;
+ GList *cur;
+ struct rspamd_map_backend *bk;
+ struct map_cb_data cbdata;
+ guint i;
+
+ for (cur = cfg->maps; cur != NULL; cur = g_list_next(cur)) {
+ map = cur->data;
+
+ if (map->tmp_dtor) {
+ map->tmp_dtor(map->tmp_dtor_data);
+ }
+
+ if (map->dtor) {
+ cbdata.prev_data = NULL;
+ cbdata.map = map;
+ cbdata.cur_data = *map->user_data;
+
+ map->dtor(&cbdata);
+ *map->user_data = NULL;
+ }
+
+ if (map->on_load_ud_dtor) {
+ map->on_load_ud_dtor(map->on_load_ud);
+ }
+
+ for (i = 0; i < map->backends->len; i++) {
+ bk = g_ptr_array_index(map->backends, i);
+
+ MAP_RELEASE(bk, "rspamd_map_backend");
+ }
+
+ if (map->fallback_backend) {
+ MAP_RELEASE(map->fallback_backend, "rspamd_map_backend");
+ }
+ }
+
+ g_list_free(cfg->maps);
+ cfg->maps = NULL;
+}
+
+static const gchar *
+rspamd_map_check_proto(struct rspamd_config *cfg,
+ const gchar *map_line, struct rspamd_map_backend *bk)
+{
+ const gchar *pos = map_line, *end, *end_key;
+
+ g_assert(bk != NULL);
+ g_assert(pos != NULL);
+
+ end = pos + strlen(pos);
+
+ /* Static check */
+ if (g_ascii_strcasecmp(pos, "static") == 0) {
+ bk->protocol = MAP_PROTO_STATIC;
+ bk->uri = g_strdup(pos);
+
+ return pos;
+ }
+ else if (g_ascii_strcasecmp(pos, "zst+static") == 0) {
+ bk->protocol = MAP_PROTO_STATIC;
+ bk->uri = g_strdup(pos + 4);
+ bk->is_compressed = TRUE;
+
+ return pos + 4;
+ }
+
+ for (;;) {
+ if (g_ascii_strncasecmp(pos, "sign+", sizeof("sign+") - 1) == 0) {
+ bk->is_signed = TRUE;
+ pos += sizeof("sign+") - 1;
+ }
+ else if (g_ascii_strncasecmp(pos, "fallback+", sizeof("fallback+") - 1) == 0) {
+ bk->is_fallback = TRUE;
+ pos += sizeof("fallback+") - 1;
+ }
+ else if (g_ascii_strncasecmp(pos, "key=", sizeof("key=") - 1) == 0) {
+ pos += sizeof("key=") - 1;
+ end_key = memchr(pos, '+', end - pos);
+
+ if (end_key != NULL) {
+ bk->trusted_pubkey = rspamd_pubkey_from_base32(pos, end_key - pos,
+ RSPAMD_KEYPAIR_SIGN, RSPAMD_CRYPTOBOX_MODE_25519);
+
+ if (bk->trusted_pubkey == NULL) {
+ msg_err_config("cannot read pubkey from map: %s",
+ map_line);
+ return NULL;
+ }
+ pos = end_key + 1;
+ }
+ else if (end - pos > 64) {
+ /* Try hex encoding */
+ bk->trusted_pubkey = rspamd_pubkey_from_hex(pos, 64,
+ RSPAMD_KEYPAIR_SIGN, RSPAMD_CRYPTOBOX_MODE_25519);
+
+ if (bk->trusted_pubkey == NULL) {
+ msg_err_config("cannot read pubkey from map: %s",
+ map_line);
+ return NULL;
+ }
+ pos += 64;
+ }
+ else {
+ msg_err_config("cannot read pubkey from map: %s",
+ map_line);
+ return NULL;
+ }
+
+ if (*pos == '+' || *pos == ':') {
+ pos++;
+ }
+ }
+ else {
+ /* No known flags */
+ break;
+ }
+ }
+
+ bk->protocol = MAP_PROTO_FILE;
+
+ if (g_ascii_strncasecmp(pos, "http://", sizeof("http://") - 1) == 0) {
+ bk->protocol = MAP_PROTO_HTTP;
+ /* Include http:// */
+ bk->uri = g_strdup(pos);
+ pos += sizeof("http://") - 1;
+ }
+ else if (g_ascii_strncasecmp(pos, "https://", sizeof("https://") - 1) == 0) {
+ bk->protocol = MAP_PROTO_HTTPS;
+ /* Include https:// */
+ bk->uri = g_strdup(pos);
+ pos += sizeof("https://") - 1;
+ }
+ else if (g_ascii_strncasecmp(pos, "file://", sizeof("file://") - 1) == 0) {
+ pos += sizeof("file://") - 1;
+ /* Exclude file:// */
+ bk->uri = g_strdup(pos);
+ }
+ else if (*pos == '/') {
+ /* Trivial file case */
+ bk->uri = g_strdup(pos);
+ }
+ else {
+ msg_err_config("invalid map fetching protocol: %s", map_line);
+
+ return NULL;
+ }
+
+ if (bk->protocol != MAP_PROTO_FILE && bk->is_signed) {
+ msg_err_config("signed maps are no longer supported for HTTP(s): %s", map_line);
+ }
+
+ return pos;
+}
+
+gboolean
+rspamd_map_is_map(const gchar *map_line)
+{
+ gboolean ret = FALSE;
+
+ g_assert(map_line != NULL);
+
+ if (map_line[0] == '/') {
+ ret = TRUE;
+ }
+ else if (g_ascii_strncasecmp(map_line, "sign+", sizeof("sign+") - 1) == 0) {
+ ret = TRUE;
+ }
+ else if (g_ascii_strncasecmp(map_line, "fallback+", sizeof("fallback+") - 1) == 0) {
+ ret = TRUE;
+ }
+ else if (g_ascii_strncasecmp(map_line, "file://", sizeof("file://") - 1) == 0) {
+ ret = TRUE;
+ }
+ else if (g_ascii_strncasecmp(map_line, "http://", sizeof("http://") - 1) == 0) {
+ ret = TRUE;
+ }
+ else if (g_ascii_strncasecmp(map_line, "https://", sizeof("https://") - 1) == 0) {
+ ret = TRUE;
+ }
+
+ return ret;
+}
+
+static void
+rspamd_map_backend_dtor(struct rspamd_map_backend *bk)
+{
+ switch (bk->protocol) {
+ case MAP_PROTO_FILE:
+ if (bk->data.fd) {
+ ev_stat_stop(bk->event_loop, &bk->data.fd->st_ev);
+ g_free(bk->data.fd->filename);
+ g_free(bk->data.fd);
+ }
+ break;
+ case MAP_PROTO_STATIC:
+ if (bk->data.sd) {
+ if (bk->data.sd->data) {
+ g_free(bk->data.sd->data);
+ }
+
+ g_free(bk->data.sd);
+ }
+ break;
+ case MAP_PROTO_HTTP:
+ case MAP_PROTO_HTTPS:
+ if (bk->data.hd) {
+ struct http_map_data *data = bk->data.hd;
+
+ g_free(data->host);
+ g_free(data->path);
+ g_free(data->rest);
+
+ if (data->userinfo) {
+ g_free(data->userinfo);
+ }
+
+ if (data->etag) {
+ rspamd_fstring_free(data->etag);
+ }
+
+ /*
+ * Clear cached file, but check if a worker is an active http worker
+ * as cur_cache_cbd is meaningful merely for active worker, who actually
+ * owns the cache
+ */
+ if (bk->map && bk->map->active_http) {
+ if (g_atomic_int_compare_and_exchange(&data->cache->available, 1, 0)) {
+ if (data->cur_cache_cbd) {
+ msg_info("clear shared memory cache for a map in %s as backend \"%s\" is closing",
+ data->cur_cache_cbd->shm->shm_name,
+ bk->uri);
+ MAP_RELEASE(data->cur_cache_cbd->shm,
+ "rspamd_http_map_cached_cbdata");
+ ev_timer_stop(data->cur_cache_cbd->event_loop,
+ &data->cur_cache_cbd->timeout);
+ g_free(data->cur_cache_cbd);
+ data->cur_cache_cbd = NULL;
+ }
+ }
+ }
+
+ g_free(bk->data.hd);
+ }
+ break;
+ }
+
+ if (bk->trusted_pubkey) {
+ rspamd_pubkey_unref(bk->trusted_pubkey);
+ }
+
+ g_free(bk->uri);
+ g_free(bk);
+}
+
+static struct rspamd_map_backend *
+rspamd_map_parse_backend(struct rspamd_config *cfg, const gchar *map_line)
+{
+ struct rspamd_map_backend *bk;
+ struct file_map_data *fdata = NULL;
+ struct http_map_data *hdata = NULL;
+ struct static_map_data *sdata = NULL;
+ struct http_parser_url up;
+ const gchar *end, *p;
+ rspamd_ftok_t tok;
+
+ bk = g_malloc0(sizeof(*bk));
+ REF_INIT_RETAIN(bk, rspamd_map_backend_dtor);
+
+ if (!rspamd_map_check_proto(cfg, map_line, bk)) {
+ goto err;
+ }
+
+ if (bk->is_fallback && bk->protocol != MAP_PROTO_FILE) {
+ msg_err_config("fallback backend must be file for %s", bk->uri);
+
+ goto err;
+ }
+
+ end = map_line + strlen(map_line);
+ if (end - map_line > 5) {
+ p = end - 5;
+ if (g_ascii_strcasecmp(p, ".zstd") == 0) {
+ bk->is_compressed = TRUE;
+ }
+ p = end - 4;
+ if (g_ascii_strcasecmp(p, ".zst") == 0) {
+ bk->is_compressed = TRUE;
+ }
+ }
+
+ /* Now check for each proto separately */
+ if (bk->protocol == MAP_PROTO_FILE) {
+ fdata = g_malloc0(sizeof(struct file_map_data));
+
+ if (access(bk->uri, R_OK) == -1) {
+ if (errno != ENOENT) {
+ msg_err_config("cannot open file '%s': %s", bk->uri, strerror(errno));
+ goto err;
+ }
+
+ msg_info_config(
+ "map '%s' is not found, but it can be loaded automatically later",
+ bk->uri);
+ }
+
+ fdata->filename = g_strdup(bk->uri);
+ bk->data.fd = fdata;
+ }
+ else if (bk->protocol == MAP_PROTO_HTTP || bk->protocol == MAP_PROTO_HTTPS) {
+ hdata = g_malloc0(sizeof(struct http_map_data));
+
+ memset(&up, 0, sizeof(up));
+ if (http_parser_parse_url(bk->uri, strlen(bk->uri), FALSE,
+ &up) != 0) {
+ msg_err_config("cannot parse HTTP url: %s", bk->uri);
+ goto err;
+ }
+ else {
+ if (!(up.field_set & 1u << UF_HOST)) {
+ msg_err_config("cannot parse HTTP url: %s: no host", bk->uri);
+ goto err;
+ }
+
+ tok.begin = bk->uri + up.field_data[UF_HOST].off;
+ tok.len = up.field_data[UF_HOST].len;
+ hdata->host = rspamd_ftokdup(&tok);
+
+ if (up.field_set & (1u << UF_PORT)) {
+ hdata->port = up.port;
+ }
+ else {
+ if (bk->protocol == MAP_PROTO_HTTP) {
+ hdata->port = 80;
+ }
+ else {
+ hdata->port = 443;
+ }
+ }
+
+ if (up.field_set & (1u << UF_PATH)) {
+ tok.begin = bk->uri + up.field_data[UF_PATH].off;
+ tok.len = up.field_data[UF_PATH].len;
+
+ hdata->path = rspamd_ftokdup(&tok);
+
+ /* We also need to check query + fragment */
+ if (up.field_set & ((1u << UF_QUERY) | (1u << UF_FRAGMENT))) {
+ tok.begin = bk->uri + up.field_data[UF_PATH].off +
+ up.field_data[UF_PATH].len;
+ tok.len = strlen(tok.begin);
+ hdata->rest = rspamd_ftokdup(&tok);
+ }
+ else {
+ hdata->rest = g_strdup("");
+ }
+ }
+
+ if (up.field_set & (1u << UF_USERINFO)) {
+ /* Create authorisation header for basic auth */
+ guint len = sizeof("Basic ") +
+ up.field_data[UF_USERINFO].len * 8 / 5 + 4;
+ hdata->userinfo = g_malloc(len);
+ rspamd_snprintf(hdata->userinfo, len, "Basic %*Bs",
+ (int) up.field_data[UF_USERINFO].len,
+ bk->uri + up.field_data[UF_USERINFO].off);
+
+ msg_debug("added userinfo for the map from the URL: %s", hdata->host);
+ }
+ else {
+ /* Try to obtain authentication data from options in the configuration */
+ const ucl_object_t *auth_obj, *opts_obj;
+
+ opts_obj = ucl_object_lookup(cfg->cfg_ucl_obj, "options");
+ if (opts_obj != NULL) {
+ auth_obj = ucl_object_lookup(opts_obj, "http_auth");
+ if (auth_obj != NULL && ucl_object_type(auth_obj) == UCL_OBJECT) {
+ const ucl_object_t *host_obj;
+
+ /*
+ * Search first by the full URL and then by the host part
+ */
+ host_obj = ucl_object_lookup(auth_obj, map_line);
+
+ if (host_obj == NULL) {
+ host_obj = ucl_object_lookup(auth_obj, hdata->host);
+ }
+
+ if (host_obj != NULL && ucl_object_type(host_obj) == UCL_OBJECT) {
+ const ucl_object_t *user_obj, *password_obj;
+
+ user_obj = ucl_object_lookup(host_obj, "user");
+ password_obj = ucl_object_lookup(host_obj, "password");
+
+ if (user_obj != NULL && password_obj != NULL &&
+ ucl_object_type(user_obj) == UCL_STRING &&
+ ucl_object_type(password_obj) == UCL_STRING) {
+
+ gchar *tmpbuf;
+ unsigned tlen;
+
+ /* User + password + ':' */
+ tlen = strlen(ucl_object_tostring(user_obj)) +
+ strlen(ucl_object_tostring(password_obj)) + 1;
+ tmpbuf = g_malloc(tlen + 1);
+ rspamd_snprintf(tmpbuf, tlen + 1, "%s:%s",
+ ucl_object_tostring(user_obj),
+ ucl_object_tostring(password_obj));
+ /* Base64 encoding is not so greedy, but we add some space for simplicity */
+ tlen *= 2;
+ tlen += sizeof("Basic ") - 1;
+ hdata->userinfo = g_malloc(tlen + 1);
+ rspamd_snprintf(hdata->userinfo, tlen + 1, "Basic %Bs", tmpbuf);
+ g_free(tmpbuf);
+ msg_debug("added userinfo for the map from the configuration: %s", map_line);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ hdata->cache = rspamd_mempool_alloc0_shared(cfg->cfg_pool,
+ sizeof(*hdata->cache));
+
+ bk->data.hd = hdata;
+ }
+ else if (bk->protocol == MAP_PROTO_STATIC) {
+ sdata = g_malloc0(sizeof(*sdata));
+ bk->data.sd = sdata;
+ }
+
+ bk->id = rspamd_cryptobox_fast_hash_specific(RSPAMD_CRYPTOBOX_T1HA,
+ bk->uri, strlen(bk->uri),
+ 0xdeadbabe);
+
+ return bk;
+
+err:
+ MAP_RELEASE(bk, "rspamd_map_backend");
+
+ if (hdata) {
+ g_free(hdata);
+ }
+
+ if (fdata) {
+ g_free(fdata);
+ }
+
+ if (sdata) {
+ g_free(sdata);
+ }
+
+ return NULL;
+}
+
+static void
+rspamd_map_calculate_hash(struct rspamd_map *map)
+{
+ struct rspamd_map_backend *bk;
+ guint i;
+ rspamd_cryptobox_hash_state_t st;
+ gchar *cksum_encoded, cksum[rspamd_cryptobox_HASHBYTES];
+
+ rspamd_cryptobox_hash_init(&st, NULL, 0);
+
+ for (i = 0; i < map->backends->len; i++) {
+ bk = g_ptr_array_index(map->backends, i);
+ rspamd_cryptobox_hash_update(&st, bk->uri, strlen(bk->uri));
+ }
+
+ rspamd_cryptobox_hash_final(&st, cksum);
+ cksum_encoded = rspamd_encode_base32(cksum, sizeof(cksum), RSPAMD_BASE32_DEFAULT);
+ rspamd_strlcpy(map->tag, cksum_encoded, sizeof(map->tag));
+ g_free(cksum_encoded);
+}
+
+static gboolean
+rspamd_map_add_static_string(struct rspamd_config *cfg,
+ const ucl_object_t *elt,
+ GString *target)
+{
+ gsize sz;
+ const gchar *dline;
+
+ if (ucl_object_type(elt) != UCL_STRING) {
+ msg_err_config("map has static backend but `data` is "
+ "not string like: %s",
+ ucl_object_type_to_string(elt->type));
+ return FALSE;
+ }
+
+ /* Otherwise, we copy data to the backend */
+ dline = ucl_object_tolstring(elt, &sz);
+
+ if (sz == 0) {
+ msg_err_config("map has static backend but empty no data");
+ return FALSE;
+ }
+
+ g_string_append_len(target, dline, sz);
+ g_string_append_c(target, '\n');
+
+ return TRUE;
+}
+
+struct rspamd_map *
+rspamd_map_add(struct rspamd_config *cfg,
+ const gchar *map_line,
+ const gchar *description,
+ map_cb_t read_callback,
+ map_fin_cb_t fin_callback,
+ map_dtor_t dtor,
+ void **user_data,
+ struct rspamd_worker *worker,
+ int flags)
+{
+ struct rspamd_map *map;
+ struct rspamd_map_backend *bk;
+
+ bk = rspamd_map_parse_backend(cfg, map_line);
+ if (bk == NULL) {
+ return NULL;
+ }
+
+ if (bk->is_fallback) {
+ msg_err_config("cannot add map with fallback only backend: %s", bk->uri);
+ REF_RELEASE(bk);
+
+ return NULL;
+ }
+
+ map = rspamd_mempool_alloc0(cfg->cfg_pool, sizeof(struct rspamd_map));
+ map->read_callback = read_callback;
+ map->fin_callback = fin_callback;
+ map->dtor = dtor;
+ map->user_data = user_data;
+ map->cfg = cfg;
+ map->id = rspamd_random_uint64_fast();
+ map->locked =
+ rspamd_mempool_alloc0_shared(cfg->cfg_pool, sizeof(gint));
+ map->backends = g_ptr_array_sized_new(1);
+ map->wrk = worker;
+ rspamd_mempool_add_destructor(cfg->cfg_pool, rspamd_ptr_array_free_hard,
+ map->backends);
+ g_ptr_array_add(map->backends, bk);
+ map->name = rspamd_mempool_strdup(cfg->cfg_pool, map_line);
+ map->no_file_read = (flags & RSPAMD_MAP_FILE_NO_READ);
+
+ if (bk->protocol == MAP_PROTO_FILE) {
+ map->poll_timeout = (cfg->map_timeout * cfg->map_file_watch_multiplier);
+ }
+ else {
+ map->poll_timeout = cfg->map_timeout;
+ }
+
+ if (description != NULL) {
+ map->description = rspamd_mempool_strdup(cfg->cfg_pool, description);
+ }
+
+ rspamd_map_calculate_hash(map);
+ msg_info_map("added map %s", bk->uri);
+ bk->map = map;
+
+ cfg->maps = g_list_prepend(cfg->maps, map);
+
+ return map;
+}
+
+struct rspamd_map *
+rspamd_map_add_fake(struct rspamd_config *cfg,
+ const gchar *description,
+ const gchar *name)
+{
+ struct rspamd_map *map;
+
+ map = rspamd_mempool_alloc0(cfg->cfg_pool, sizeof(struct rspamd_map));
+ map->cfg = cfg;
+ map->id = rspamd_random_uint64_fast();
+ map->name = rspamd_mempool_strdup(cfg->cfg_pool, name);
+ map->user_data = (void **) &map; /* to prevent null pointer dereferencing */
+
+ if (description != NULL) {
+ map->description = rspamd_mempool_strdup(cfg->cfg_pool, description);
+ }
+
+ return map;
+}
+
+static inline void
+rspamd_map_add_backend(struct rspamd_map *map, struct rspamd_map_backend *bk)
+{
+ if (bk->is_fallback) {
+ if (map->fallback_backend) {
+ msg_warn_map("redefining fallback backend from %s to %s",
+ map->fallback_backend->uri, bk->uri);
+ }
+
+ map->fallback_backend = bk;
+ }
+ else {
+ g_ptr_array_add(map->backends, bk);
+ }
+
+ bk->map = map;
+}
+
+struct rspamd_map *
+rspamd_map_add_from_ucl(struct rspamd_config *cfg,
+ const ucl_object_t *obj,
+ const gchar *description,
+ map_cb_t read_callback,
+ map_fin_cb_t fin_callback,
+ map_dtor_t dtor,
+ void **user_data,
+ struct rspamd_worker *worker,
+ gint flags)
+{
+ ucl_object_iter_t it = NULL;
+ const ucl_object_t *cur, *elt;
+ struct rspamd_map *map;
+ struct rspamd_map_backend *bk;
+ guint i;
+
+ g_assert(obj != NULL);
+
+ if (ucl_object_type(obj) == UCL_STRING) {
+ /* Just a plain string */
+ return rspamd_map_add(cfg, ucl_object_tostring(obj), description,
+ read_callback, fin_callback, dtor, user_data, worker, flags);
+ }
+
+ map = rspamd_mempool_alloc0(cfg->cfg_pool, sizeof(struct rspamd_map));
+ map->read_callback = read_callback;
+ map->fin_callback = fin_callback;
+ map->dtor = dtor;
+ map->user_data = user_data;
+ map->cfg = cfg;
+ map->id = rspamd_random_uint64_fast();
+ map->locked =
+ rspamd_mempool_alloc0_shared(cfg->cfg_pool, sizeof(gint));
+ map->backends = g_ptr_array_new();
+ map->wrk = worker;
+ map->no_file_read = (flags & RSPAMD_MAP_FILE_NO_READ);
+ rspamd_mempool_add_destructor(cfg->cfg_pool, rspamd_ptr_array_free_hard,
+ map->backends);
+ map->poll_timeout = cfg->map_timeout;
+
+ if (description) {
+ map->description = rspamd_mempool_strdup(cfg->cfg_pool, description);
+ }
+
+ if (ucl_object_type(obj) == UCL_ARRAY) {
+ /* Add array of maps as multiple backends */
+ while ((cur = ucl_object_iterate(obj, &it, true)) != NULL) {
+ if (ucl_object_type(cur) == UCL_STRING) {
+ bk = rspamd_map_parse_backend(cfg, ucl_object_tostring(cur));
+
+ if (bk != NULL) {
+ rspamd_map_add_backend(map, bk);
+
+ if (!map->name) {
+ map->name = rspamd_mempool_strdup(cfg->cfg_pool,
+ ucl_object_tostring(cur));
+ }
+ }
+ }
+ else {
+ msg_err_config("bad map element type: %s",
+ ucl_object_type_to_string(ucl_object_type(cur)));
+ }
+ }
+
+ if (map->backends->len == 0) {
+ msg_err_config("map has no urls to be loaded: empty list");
+ goto err;
+ }
+ }
+ else if (ucl_object_type(obj) == UCL_OBJECT) {
+ elt = ucl_object_lookup(obj, "name");
+ if (elt && ucl_object_type(elt) == UCL_STRING) {
+ map->name = rspamd_mempool_strdup(cfg->cfg_pool,
+ ucl_object_tostring(elt));
+ }
+
+ elt = ucl_object_lookup(obj, "description");
+ if (elt && ucl_object_type(elt) == UCL_STRING) {
+ map->description = rspamd_mempool_strdup(cfg->cfg_pool,
+ ucl_object_tostring(elt));
+ }
+
+ elt = ucl_object_lookup_any(obj, "timeout", "poll", "poll_time",
+ "watch_interval", NULL);
+ if (elt) {
+ map->poll_timeout = ucl_object_todouble(elt);
+ }
+
+ elt = ucl_object_lookup_any(obj, "upstreams", "url", "urls", NULL);
+ if (elt == NULL) {
+ msg_err_config("map has no urls to be loaded: no elt");
+ goto err;
+ }
+
+ if (ucl_object_type(elt) == UCL_ARRAY) {
+ /* Add array of maps as multiple backends */
+ it = ucl_object_iterate_new(elt);
+
+ while ((cur = ucl_object_iterate_safe(it, true)) != NULL) {
+ if (ucl_object_type(cur) == UCL_STRING) {
+ bk = rspamd_map_parse_backend(cfg, ucl_object_tostring(cur));
+
+ if (bk != NULL) {
+ rspamd_map_add_backend(map, bk);
+
+ if (!map->name) {
+ map->name = rspamd_mempool_strdup(cfg->cfg_pool,
+ ucl_object_tostring(cur));
+ }
+ }
+ }
+ else {
+ msg_err_config("bad map element type: %s",
+ ucl_object_type_to_string(ucl_object_type(cur)));
+ ucl_object_iterate_free(it);
+ goto err;
+ }
+ }
+
+ ucl_object_iterate_free(it);
+
+ if (map->backends->len == 0) {
+ msg_err_config("map has no urls to be loaded: empty object list");
+ goto err;
+ }
+ }
+ else if (ucl_object_type(elt) == UCL_STRING) {
+ bk = rspamd_map_parse_backend(cfg, ucl_object_tostring(elt));
+
+ if (bk != NULL) {
+ rspamd_map_add_backend(map, bk);
+
+ if (!map->name) {
+ map->name = rspamd_mempool_strdup(cfg->cfg_pool,
+ ucl_object_tostring(elt));
+ }
+ }
+ }
+
+ if (!map->backends || map->backends->len == 0) {
+ msg_err_config("map has no urls to be loaded: no valid backends");
+ goto err;
+ }
+ }
+ else {
+ msg_err_config("map has invalid type for value: %s",
+ ucl_object_type_to_string(ucl_object_type(obj)));
+ goto err;
+ }
+
+ gboolean all_local = TRUE;
+
+ PTR_ARRAY_FOREACH(map->backends, i, bk)
+ {
+ if (bk->protocol == MAP_PROTO_STATIC) {
+ GString *map_data;
+ /* We need data field in ucl */
+ elt = ucl_object_lookup(obj, "data");
+
+ if (elt == NULL) {
+ msg_err_config("map has static backend but no `data` field");
+ goto err;
+ }
+
+
+ if (ucl_object_type(elt) == UCL_STRING) {
+ map_data = g_string_sized_new(32);
+
+ if (rspamd_map_add_static_string(cfg, elt, map_data)) {
+ bk->data.sd->data = map_data->str;
+ bk->data.sd->len = map_data->len;
+ g_string_free(map_data, FALSE);
+ }
+ else {
+ g_string_free(map_data, TRUE);
+ msg_err_config("map has static backend with invalid `data` field");
+ goto err;
+ }
+ }
+ else if (ucl_object_type(elt) == UCL_ARRAY) {
+ map_data = g_string_sized_new(32);
+ it = ucl_object_iterate_new(elt);
+
+ while ((cur = ucl_object_iterate_safe(it, true))) {
+ if (!rspamd_map_add_static_string(cfg, cur, map_data)) {
+ g_string_free(map_data, TRUE);
+ msg_err_config("map has static backend with invalid "
+ "`data` field");
+ ucl_object_iterate_free(it);
+ goto err;
+ }
+ }
+
+ ucl_object_iterate_free(it);
+ bk->data.sd->data = map_data->str;
+ bk->data.sd->len = map_data->len;
+ g_string_free(map_data, FALSE);
+ }
+ }
+ else if (bk->protocol != MAP_PROTO_FILE) {
+ all_local = FALSE;
+ }
+ }
+
+ if (all_local) {
+ map->poll_timeout = (map->poll_timeout *
+ cfg->map_file_watch_multiplier);
+ }
+
+ rspamd_map_calculate_hash(map);
+ msg_debug_map("added map from ucl");
+
+ cfg->maps = g_list_prepend(cfg->maps, map);
+
+ return map;
+
+err:
+
+ if (map) {
+ PTR_ARRAY_FOREACH(map->backends, i, bk)
+ {
+ MAP_RELEASE(bk, "rspamd_map_backend");
+ }
+ }
+
+ return NULL;
+}
+
+rspamd_map_traverse_function
+rspamd_map_get_traverse_function(struct rspamd_map *map)
+{
+ if (map) {
+ return map->traverse_function;
+ }
+
+ return NULL;
+}
+
+void rspamd_map_traverse(struct rspamd_map *map, rspamd_map_traverse_cb cb,
+ gpointer cbdata, gboolean reset_hits)
+{
+ if (*map->user_data && map->traverse_function) {
+ map->traverse_function(*map->user_data, cb, cbdata, reset_hits);
+ }
+}
+
+void rspamd_map_set_on_load_function(struct rspamd_map *map, rspamd_map_on_load_function cb,
+ gpointer cbdata, GDestroyNotify dtor)
+{
+ if (map) {
+ map->on_load_function = cb;
+ map->on_load_ud = cbdata;
+ map->on_load_ud_dtor = dtor;
+ }
+}
diff --git a/src/libserver/maps/map.h b/src/libserver/maps/map.h
new file mode 100644
index 0000000..04df16e
--- /dev/null
+++ b/src/libserver/maps/map.h
@@ -0,0 +1,168 @@
+#ifndef RSPAMD_MAP_H
+#define RSPAMD_MAP_H
+
+#include "config.h"
+#include "contrib/libev/ev.h"
+
+#include "ucl.h"
+#include "mem_pool.h"
+#include "radix.h"
+#include "dns.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Maps API is designed to load lists data from different dynamic sources.
+ * It monitor files and HTTP locations for modifications and reload them if they are
+ * modified.
+ */
+struct map_cb_data;
+struct rspamd_worker;
+
+/**
+ * Common map object
+ */
+struct rspamd_config;
+struct rspamd_map;
+
+/**
+ * Callback types
+ */
+typedef gchar *(*map_cb_t)(gchar *chunk, gint len,
+ struct map_cb_data *data, gboolean final);
+
+typedef void (*map_fin_cb_t)(struct map_cb_data *data, void **target);
+
+typedef void (*map_dtor_t)(struct map_cb_data *data);
+
+typedef gboolean (*rspamd_map_traverse_cb)(gconstpointer key,
+ gconstpointer value, gsize hits, gpointer ud);
+
+typedef void (*rspamd_map_traverse_function)(void *data,
+ rspamd_map_traverse_cb cb,
+ gpointer cbdata, gboolean reset_hits);
+typedef void (*rspamd_map_on_load_function)(struct rspamd_map *map, gpointer ud);
+
+/**
+ * Callback data for async load
+ */
+struct map_cb_data {
+ struct rspamd_map *map;
+ gint state;
+ bool errored;
+ void *prev_data;
+ void *cur_data;
+};
+
+/**
+ * Returns TRUE if line looks like a map definition
+ * @param map_line
+ * @return
+ */
+gboolean rspamd_map_is_map(const gchar *map_line);
+
+enum rspamd_map_flags {
+ RSPAMD_MAP_DEFAULT = 0,
+ RSPAMD_MAP_FILE_ONLY = 1u << 0u,
+ RSPAMD_MAP_FILE_NO_READ = 1u << 1u,
+};
+
+/**
+ * Add map from line
+ */
+struct rspamd_map *rspamd_map_add(struct rspamd_config *cfg,
+ const gchar *map_line,
+ const gchar *description,
+ map_cb_t read_callback,
+ map_fin_cb_t fin_callback,
+ map_dtor_t dtor,
+ void **user_data,
+ struct rspamd_worker *worker,
+ int flags);
+
+/**
+ * Add map from ucl
+ */
+struct rspamd_map *rspamd_map_add_from_ucl(struct rspamd_config *cfg,
+ const ucl_object_t *obj,
+ const gchar *description,
+ map_cb_t read_callback,
+ map_fin_cb_t fin_callback,
+ map_dtor_t dtor,
+ void **user_data,
+ struct rspamd_worker *worker,
+ int flags);
+
+/**
+ * Adds a fake map structure (for logging purposes mainly)
+ * @param cfg
+ * @param description
+ * @return
+ */
+struct rspamd_map *rspamd_map_add_fake(struct rspamd_config *cfg,
+ const gchar *description,
+ const gchar *name);
+
+
+enum rspamd_map_watch_type {
+ RSPAMD_MAP_WATCH_MIN = 9,
+ RSPAMD_MAP_WATCH_PRIMARY_CONTROLLER,
+ RSPAMD_MAP_WATCH_SCANNER,
+ RSPAMD_MAP_WATCH_WORKER,
+ RSPAMD_MAP_WATCH_MAX
+};
+
+/**
+ * Start watching of maps by adding events to libevent event loop
+ */
+void rspamd_map_watch(struct rspamd_config *cfg,
+ struct ev_loop *event_loop,
+ struct rspamd_dns_resolver *resolver,
+ struct rspamd_worker *worker,
+ enum rspamd_map_watch_type how);
+
+/**
+ * Preloads maps where all backends are file
+ * @param cfg
+ */
+void rspamd_map_preload(struct rspamd_config *cfg);
+
+/**
+ * Remove all maps watched (remove events)
+ */
+void rspamd_map_remove_all(struct rspamd_config *cfg);
+
+/**
+ * Get traverse function for specific map
+ * @param map
+ * @return
+ */
+rspamd_map_traverse_function rspamd_map_get_traverse_function(struct rspamd_map *map);
+
+/**
+ * Perform map traverse
+ * @param map
+ * @param cb
+ * @param cbdata
+ * @param reset_hits
+ * @return
+ */
+void rspamd_map_traverse(struct rspamd_map *map, rspamd_map_traverse_cb cb,
+ gpointer cbdata, gboolean reset_hits);
+
+/**
+ * Set map on load callback
+ * @param map
+ * @param cb
+ * @param cbdata
+ */
+void rspamd_map_set_on_load_function(struct rspamd_map *map, rspamd_map_on_load_function cb,
+ gpointer cbdata, GDestroyNotify dtor);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/libserver/maps/map_helpers.c b/src/libserver/maps/map_helpers.c
new file mode 100644
index 0000000..65478c5
--- /dev/null
+++ b/src/libserver/maps/map_helpers.c
@@ -0,0 +1,1845 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "map_helpers.h"
+#include "map_private.h"
+#include "khash.h"
+#include "radix.h"
+#include "rspamd.h"
+#include "cryptobox.h"
+#include "mempool_vars_internal.h"
+#include "contrib/fastutf8/fastutf8.h"
+#include "contrib/cdb/cdb.h"
+
+#ifdef WITH_HYPERSCAN
+#include "hs.h"
+#include "hyperscan_tools.h"
+#endif
+#ifndef WITH_PCRE2
+#include <pcre.h>
+#else
+#include <pcre2.h>
+#endif
+
+
+static const guint64 map_hash_seed = 0xdeadbabeULL;
+static const gchar *const hash_fill = "1";
+
+struct rspamd_map_helper_value {
+ gsize hits;
+ gconstpointer key;
+ gchar value[]; /* Null terminated */
+};
+
+#define rspamd_map_ftok_hash(t) (rspamd_icase_hash((t).begin, (t).len, rspamd_hash_seed()))
+#define rspamd_map_ftok_equal(a, b) ((a).len == (b).len && rspamd_lc_cmp((a).begin, (b).begin, (a).len) == 0)
+
+KHASH_INIT(rspamd_map_hash, rspamd_ftok_t,
+ struct rspamd_map_helper_value *, true,
+ rspamd_map_ftok_hash, rspamd_map_ftok_equal);
+
+struct rspamd_radix_map_helper {
+ rspamd_mempool_t *pool;
+ khash_t(rspamd_map_hash) * htb;
+ radix_compressed_t *trie;
+ struct rspamd_map *map;
+ rspamd_cryptobox_fast_hash_state_t hst;
+};
+
+struct rspamd_hash_map_helper {
+ rspamd_mempool_t *pool;
+ khash_t(rspamd_map_hash) * htb;
+ struct rspamd_map *map;
+ rspamd_cryptobox_fast_hash_state_t hst;
+};
+
+struct rspamd_cdb_map_helper {
+ GQueue cdbs;
+ struct rspamd_map *map;
+ rspamd_cryptobox_fast_hash_state_t hst;
+ gsize total_size;
+};
+
+struct rspamd_regexp_map_helper {
+ rspamd_cryptobox_hash_state_t hst;
+ guchar re_digest[rspamd_cryptobox_HASHBYTES];
+ rspamd_mempool_t *pool;
+ struct rspamd_map *map;
+ GPtrArray *regexps;
+ GPtrArray *values;
+ khash_t(rspamd_map_hash) * htb;
+ enum rspamd_regexp_map_flags map_flags;
+#ifdef WITH_HYPERSCAN
+ rspamd_hyperscan_t *hs_db;
+ hs_scratch_t *hs_scratch;
+ gchar **patterns;
+ gint *flags;
+ gint *ids;
+#endif
+};
+
+/**
+ * FSM for parsing lists
+ */
+
+#define MAP_STORE_KEY \
+ do { \
+ while (g_ascii_isspace(*c) && p > c) { c++; } \
+ key = g_malloc(p - c + 1); \
+ rspamd_strlcpy(key, c, p - c + 1); \
+ stripped_key = g_strstrip(key); \
+ } while (0)
+
+#define MAP_STORE_VALUE \
+ do { \
+ while (g_ascii_isspace(*c) && p > c) { c++; } \
+ value = g_malloc(p - c + 1); \
+ rspamd_strlcpy(value, c, p - c + 1); \
+ stripped_value = g_strstrip(value); \
+ } while (0)
+
+gchar *
+rspamd_parse_kv_list(
+ gchar *chunk,
+ gint len,
+ struct map_cb_data *data,
+ rspamd_map_insert_func func,
+ const gchar *default_value,
+ gboolean final)
+{
+ enum {
+ map_skip_spaces_before_key = 0,
+ map_read_key,
+ map_read_key_quoted,
+ map_read_key_slashed,
+ map_skip_spaces_after_key,
+ map_backslash_quoted,
+ map_backslash_slashed,
+ map_read_key_after_slash,
+ map_read_value,
+ map_read_comment_start,
+ map_skip_comment,
+ map_read_eol,
+ };
+
+ gchar *c, *p, *key = NULL, *value = NULL, *stripped_key, *stripped_value, *end;
+ struct rspamd_map *map = data->map;
+ guint line_number = 0;
+
+ p = chunk;
+ c = p;
+ end = p + len;
+
+ while (p < end) {
+ switch (data->state) {
+ case map_skip_spaces_before_key:
+ if (g_ascii_isspace(*p)) {
+ p++;
+ }
+ else {
+ if (*p == '"') {
+ p++;
+ c = p;
+ data->state = map_read_key_quoted;
+ }
+ else if (*p == '/') {
+ /* Note that c is on '/' here as '/' is a part of key */
+ c = p;
+ p++;
+ data->state = map_read_key_slashed;
+ }
+ else {
+ c = p;
+ data->state = map_read_key;
+ }
+ }
+ break;
+ case map_read_key:
+ /* read key */
+ /* Check here comments, eol and end of buffer */
+ if (*p == '#' && (p == c || *(p - 1) != '\\')) {
+ if (p - c > 0) {
+ /* Store a single key */
+ MAP_STORE_KEY;
+ func(data->cur_data, stripped_key, default_value);
+ msg_debug_map("insert key only pair: %s -> %s; line: %d",
+ stripped_key, default_value, line_number);
+ g_free(key);
+ }
+
+ key = NULL;
+ data->state = map_read_comment_start;
+ }
+ else if (*p == '\r' || *p == '\n') {
+ if (p - c > 0) {
+ /* Store a single key */
+ MAP_STORE_KEY;
+ func(data->cur_data, stripped_key, default_value);
+ msg_debug_map("insert key only pair: %s -> %s; line: %d",
+ stripped_key, default_value, line_number);
+ g_free(key);
+ }
+
+ data->state = map_read_eol;
+ key = NULL;
+ }
+ else if (g_ascii_isspace(*p)) {
+ if (p - c > 0) {
+ MAP_STORE_KEY;
+ data->state = map_skip_spaces_after_key;
+ }
+ else {
+ msg_err_map("empty or invalid key found on line %d", line_number);
+ data->state = map_skip_comment;
+ }
+ }
+ else {
+ p++;
+ }
+ break;
+ case map_read_key_quoted:
+ if (*p == '\\') {
+ data->state = map_backslash_quoted;
+ p++;
+ }
+ else if (*p == '"') {
+ /* Allow empty keys in this case */
+ if (p - c >= 0) {
+ MAP_STORE_KEY;
+ data->state = map_skip_spaces_after_key;
+ }
+ else {
+ g_assert_not_reached();
+ }
+ p++;
+ }
+ else {
+ p++;
+ }
+ break;
+ case map_read_key_slashed:
+ if (*p == '\\') {
+ data->state = map_backslash_slashed;
+ p++;
+ }
+ else if (*p == '/') {
+ /* Allow empty keys in this case */
+ if (p - c >= 0) {
+ data->state = map_read_key_after_slash;
+ }
+ else {
+ g_assert_not_reached();
+ }
+ }
+ else {
+ p++;
+ }
+ break;
+ case map_read_key_after_slash:
+ /*
+ * This state is equal to reading of key but '/' is not
+ * treated specially
+ */
+ if (*p == '#') {
+ if (p - c > 0) {
+ /* Store a single key */
+ MAP_STORE_KEY;
+ func(data->cur_data, stripped_key, default_value);
+ msg_debug_map("insert key only pair: %s -> %s; line: %d",
+ stripped_key, default_value, line_number);
+ g_free(key);
+ key = NULL;
+ }
+
+ data->state = map_read_comment_start;
+ }
+ else if (*p == '\r' || *p == '\n') {
+ if (p - c > 0) {
+ /* Store a single key */
+ MAP_STORE_KEY;
+ func(data->cur_data, stripped_key, default_value);
+
+ msg_debug_map("insert key only pair: %s -> %s; line: %d",
+ stripped_key, default_value, line_number);
+ g_free(key);
+ key = NULL;
+ }
+
+ data->state = map_read_eol;
+ key = NULL;
+ }
+ else if (g_ascii_isspace(*p)) {
+ if (p - c > 0) {
+ MAP_STORE_KEY;
+ data->state = map_skip_spaces_after_key;
+ }
+ else {
+ msg_err_map("empty or invalid key found on line %d", line_number);
+ data->state = map_skip_comment;
+ }
+ }
+ else {
+ p++;
+ }
+ break;
+ case map_backslash_quoted:
+ p++;
+ data->state = map_read_key_quoted;
+ break;
+ case map_backslash_slashed:
+ p++;
+ data->state = map_read_key_slashed;
+ break;
+ case map_skip_spaces_after_key:
+ if (*p == ' ' || *p == '\t') {
+ p++;
+ }
+ else {
+ c = p;
+ data->state = map_read_value;
+ }
+ break;
+ case map_read_value:
+ if (key == NULL) {
+ /* Ignore line */
+ msg_err_map("empty or invalid key found on line %d", line_number);
+ data->state = map_skip_comment;
+ }
+ else {
+ if (*p == '#') {
+ if (p - c > 0) {
+ /* Store a single key */
+ MAP_STORE_VALUE;
+ func(data->cur_data, stripped_key, stripped_value);
+ msg_debug_map("insert key value pair: %s -> %s; line: %d",
+ stripped_key, stripped_value, line_number);
+ g_free(key);
+ g_free(value);
+ key = NULL;
+ value = NULL;
+ }
+ else {
+ func(data->cur_data, stripped_key, default_value);
+ msg_debug_map("insert key only pair: %s -> %s; line: %d",
+ stripped_key, default_value, line_number);
+ g_free(key);
+ key = NULL;
+ }
+
+ data->state = map_read_comment_start;
+ }
+ else if (*p == '\r' || *p == '\n') {
+ if (p - c > 0) {
+ /* Store a single key */
+ MAP_STORE_VALUE;
+ func(data->cur_data, stripped_key, stripped_value);
+ msg_debug_map("insert key value pair: %s -> %s",
+ stripped_key, stripped_value);
+ g_free(key);
+ g_free(value);
+ key = NULL;
+ value = NULL;
+ }
+ else {
+ func(data->cur_data, stripped_key, default_value);
+ msg_debug_map("insert key only pair: %s -> %s",
+ stripped_key, default_value);
+ g_free(key);
+ key = NULL;
+ }
+
+ data->state = map_read_eol;
+ key = NULL;
+ }
+ else {
+ p++;
+ }
+ }
+ break;
+ case map_read_comment_start:
+ if (*p == '#') {
+ data->state = map_skip_comment;
+ p++;
+ key = NULL;
+ value = NULL;
+ }
+ else {
+ g_assert_not_reached();
+ }
+ break;
+ case map_skip_comment:
+ if (*p == '\r' || *p == '\n') {
+ data->state = map_read_eol;
+ }
+ else {
+ p++;
+ }
+ break;
+ case map_read_eol:
+ /* Skip \r\n and whitespaces */
+ if (*p == '\r' || *p == '\n') {
+ if (*p == '\n') {
+ /* We don't care about \r only line separators, they are too rare */
+ line_number++;
+ }
+ p++;
+ }
+ else {
+ data->state = map_skip_spaces_before_key;
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ break;
+ }
+ }
+
+ if (final) {
+ /* Examine the state */
+ switch (data->state) {
+ case map_read_key:
+ case map_read_key_slashed:
+ case map_read_key_quoted:
+ case map_read_key_after_slash:
+ if (p - c > 0) {
+ /* Store a single key */
+ MAP_STORE_KEY;
+ func(data->cur_data, stripped_key, default_value);
+ msg_debug_map("insert key only pair: %s -> %s",
+ stripped_key, default_value);
+ g_free(key);
+ key = NULL;
+ }
+ break;
+ case map_read_value:
+ if (key == NULL) {
+ /* Ignore line */
+ msg_err_map("empty or invalid key found on line %d", line_number);
+ data->state = map_skip_comment;
+ }
+ else {
+ if (p - c > 0) {
+ /* Store a single key */
+ MAP_STORE_VALUE;
+ func(data->cur_data, stripped_key, stripped_value);
+ msg_debug_map("insert key value pair: %s -> %s",
+ stripped_key, stripped_value);
+ g_free(key);
+ g_free(value);
+ key = NULL;
+ value = NULL;
+ }
+ else {
+ func(data->cur_data, stripped_key, default_value);
+ msg_debug_map("insert key only pair: %s -> %s",
+ stripped_key, default_value);
+ g_free(key);
+ key = NULL;
+ }
+ }
+ break;
+ }
+
+ data->state = map_skip_spaces_before_key;
+ }
+
+ return c;
+}
+
+/**
+ * Radix tree helper function
+ */
+void rspamd_map_helper_insert_radix(gpointer st, gconstpointer key, gconstpointer value)
+{
+ struct rspamd_radix_map_helper *r = (struct rspamd_radix_map_helper *) st;
+ struct rspamd_map_helper_value *val;
+ gsize vlen;
+ khiter_t k;
+ gconstpointer nk;
+ rspamd_ftok_t tok;
+ gint res;
+ struct rspamd_map *map;
+
+ map = r->map;
+ tok.begin = key;
+ tok.len = strlen(key);
+
+ k = kh_get(rspamd_map_hash, r->htb, tok);
+
+ if (k == kh_end(r->htb)) {
+ nk = rspamd_mempool_strdup(r->pool, key);
+ tok.begin = nk;
+ k = kh_put(rspamd_map_hash, r->htb, tok, &res);
+ }
+ else {
+ val = kh_value(r->htb, k);
+
+ if (strcmp(value, val->value) == 0) {
+ /* Same element, skip */
+ return;
+ }
+ else {
+ msg_warn_map("duplicate radix entry found for map %s: %s (old value: '%s', new: '%s')",
+ map->name, key, val->value, value);
+ }
+
+ nk = kh_key(r->htb, k).begin;
+ val->key = nk;
+ kh_value(r->htb, k) = val;
+
+ return; /* do not touch radix in case of exact duplicate */
+ }
+
+ vlen = strlen(value);
+ val = rspamd_mempool_alloc0(r->pool, sizeof(*val) +
+ vlen + 1);
+ memcpy(val->value, value, vlen);
+
+ nk = kh_key(r->htb, k).begin;
+ val->key = nk;
+ kh_value(r->htb, k) = val;
+ rspamd_radix_add_iplist(key, ",", r->trie, val, FALSE,
+ r->map->name);
+ rspamd_cryptobox_fast_hash_update(&r->hst, nk, tok.len);
+}
+
+void rspamd_map_helper_insert_radix_resolve(gpointer st, gconstpointer key, gconstpointer value)
+{
+ struct rspamd_radix_map_helper *r = (struct rspamd_radix_map_helper *) st;
+ struct rspamd_map_helper_value *val;
+ gsize vlen;
+ khiter_t k;
+ gconstpointer nk;
+ rspamd_ftok_t tok;
+ gint res;
+ struct rspamd_map *map;
+
+ map = r->map;
+
+ if (!key) {
+ msg_warn_map("cannot insert NULL value in the map: %s",
+ map->name);
+ return;
+ }
+
+ tok.begin = key;
+ tok.len = strlen(key);
+
+ k = kh_get(rspamd_map_hash, r->htb, tok);
+
+ if (k == kh_end(r->htb)) {
+ nk = rspamd_mempool_strdup(r->pool, key);
+ tok.begin = nk;
+ k = kh_put(rspamd_map_hash, r->htb, tok, &res);
+ }
+ else {
+ val = kh_value(r->htb, k);
+
+ if (strcmp(value, val->value) == 0) {
+ /* Same element, skip */
+ return;
+ }
+ else {
+ msg_warn_map("duplicate radix entry found for map %s: %s (old value: '%s', new: '%s')",
+ map->name, key, val->value, value);
+ }
+
+ nk = kh_key(r->htb, k).begin;
+ val->key = nk;
+ kh_value(r->htb, k) = val;
+
+ return; /* do not touch radix in case of exact duplicate */
+ }
+
+ vlen = strlen(value);
+ val = rspamd_mempool_alloc0(r->pool, sizeof(*val) +
+ vlen + 1);
+ memcpy(val->value, value, vlen);
+ nk = kh_key(r->htb, k).begin;
+ val->key = nk;
+ kh_value(r->htb, k) = val;
+ rspamd_radix_add_iplist(key, ",", r->trie, val, TRUE,
+ r->map->name);
+ rspamd_cryptobox_fast_hash_update(&r->hst, nk, tok.len);
+}
+
+void rspamd_map_helper_insert_hash(gpointer st, gconstpointer key, gconstpointer value)
+{
+ struct rspamd_hash_map_helper *ht = st;
+ struct rspamd_map_helper_value *val;
+ khiter_t k;
+ gconstpointer nk;
+ gsize vlen;
+ gint r;
+ rspamd_ftok_t tok;
+ struct rspamd_map *map;
+
+ tok.begin = key;
+ tok.len = strlen(key);
+ map = ht->map;
+
+ k = kh_get(rspamd_map_hash, ht->htb, tok);
+
+ if (k == kh_end(ht->htb)) {
+ nk = rspamd_mempool_strdup(ht->pool, key);
+ tok.begin = nk;
+ k = kh_put(rspamd_map_hash, ht->htb, tok, &r);
+ }
+ else {
+ val = kh_value(ht->htb, k);
+
+ if (strcmp(value, val->value) == 0) {
+ /* Same element, skip */
+ return;
+ }
+ else {
+ msg_warn_map("duplicate hash entry found for map %s: %s (old value: '%s', new: '%s')",
+ map->name, key, val->value, value);
+ }
+ }
+
+ /* Null termination due to alloc0 */
+ vlen = strlen(value);
+ val = rspamd_mempool_alloc0(ht->pool, sizeof(*val) + vlen + 1);
+ memcpy(val->value, value, vlen);
+
+ tok = kh_key(ht->htb, k);
+ nk = tok.begin;
+ val->key = nk;
+ kh_value(ht->htb, k) = val;
+
+ rspamd_cryptobox_fast_hash_update(&ht->hst, nk, tok.len);
+}
+
+void rspamd_map_helper_insert_re(gpointer st, gconstpointer key, gconstpointer value)
+{
+ struct rspamd_regexp_map_helper *re_map = st;
+ struct rspamd_map *map;
+ rspamd_regexp_t *re;
+ gchar *escaped;
+ GError *err = NULL;
+ gint pcre_flags;
+ gsize escaped_len;
+ struct rspamd_map_helper_value *val;
+ khiter_t k;
+ rspamd_ftok_t tok;
+ gconstpointer nk;
+ gsize vlen;
+ gint r;
+
+ map = re_map->map;
+
+ tok.begin = key;
+ tok.len = strlen(key);
+
+ k = kh_get(rspamd_map_hash, re_map->htb, tok);
+
+ if (k == kh_end(re_map->htb)) {
+ nk = rspamd_mempool_strdup(re_map->pool, key);
+ tok.begin = nk;
+ k = kh_put(rspamd_map_hash, re_map->htb, tok, &r);
+ }
+ else {
+ val = kh_value(re_map->htb, k);
+
+ /* Always warn about regexp duplicate as it's likely a bad mistake */
+ msg_warn_map("duplicate re entry found for map %s: %s (old value: '%s', new: '%s')",
+ map->name, key, val->value, value);
+
+ if (strcmp(val->value, value) == 0) {
+ /* Same value, skip */
+ return;
+ }
+
+ /* Replace value but do not touch regexp */
+ nk = kh_key(re_map->htb, k).begin;
+ val->key = nk;
+ kh_value(re_map->htb, k) = val;
+
+ return;
+ }
+
+ /* Check regexp stuff */
+ if (re_map->map_flags & RSPAMD_REGEXP_MAP_FLAG_GLOB) {
+ escaped = rspamd_str_regexp_escape(key, strlen(key), &escaped_len,
+ RSPAMD_REGEXP_ESCAPE_GLOB | RSPAMD_REGEXP_ESCAPE_UTF);
+ re = rspamd_regexp_new(escaped, NULL, &err);
+ g_free(escaped);
+ }
+ else {
+ re = rspamd_regexp_new(key, NULL, &err);
+ }
+
+ if (re == NULL) {
+ msg_err_map("cannot parse regexp %s: %e", key, err);
+
+ if (err) {
+ g_error_free(err);
+ }
+
+ return;
+ }
+
+ vlen = strlen(value);
+ val = rspamd_mempool_alloc0(re_map->pool, sizeof(*val) +
+ vlen + 1);
+ memcpy(val->value, value, vlen); /* Null terminated due to alloc0 previously */
+ nk = kh_key(re_map->htb, k).begin;
+ val->key = nk;
+ kh_value(re_map->htb, k) = val;
+ rspamd_cryptobox_hash_update(&re_map->hst, nk, tok.len);
+
+ pcre_flags = rspamd_regexp_get_pcre_flags(re);
+
+#ifndef WITH_PCRE2
+ if (pcre_flags & PCRE_FLAG(UTF8)) {
+ re_map->map_flags |= RSPAMD_REGEXP_MAP_FLAG_UTF;
+ }
+#else
+ if (pcre_flags & PCRE_FLAG(UTF)) {
+ re_map->map_flags |= RSPAMD_REGEXP_MAP_FLAG_UTF;
+ }
+#endif
+
+ g_ptr_array_add(re_map->regexps, re);
+ g_ptr_array_add(re_map->values, val);
+}
+
+static void
+rspamd_map_helper_traverse_regexp(void *data,
+ rspamd_map_traverse_cb cb,
+ gpointer cbdata,
+ gboolean reset_hits)
+{
+ rspamd_ftok_t tok;
+ struct rspamd_map_helper_value *val;
+ struct rspamd_regexp_map_helper *re_map = data;
+
+ kh_foreach(re_map->htb, tok, val, {
+ if (!cb(tok.begin, val->value, val->hits, cbdata)) {
+ break;
+ }
+
+ if (reset_hits) {
+ val->hits = 0;
+ }
+ });
+}
+
+struct rspamd_hash_map_helper *
+rspamd_map_helper_new_hash(struct rspamd_map *map)
+{
+ struct rspamd_hash_map_helper *htb;
+ rspamd_mempool_t *pool;
+
+ if (map) {
+ pool = rspamd_mempool_new(rspamd_mempool_suggest_size(),
+ map->tag, 0);
+ }
+ else {
+ pool = rspamd_mempool_new(rspamd_mempool_suggest_size(),
+ NULL, 0);
+ }
+
+ htb = rspamd_mempool_alloc0_type(pool, struct rspamd_hash_map_helper);
+ htb->htb = kh_init(rspamd_map_hash);
+ htb->pool = pool;
+ htb->map = map;
+ rspamd_cryptobox_fast_hash_init(&htb->hst, map_hash_seed);
+
+ return htb;
+}
+
+void rspamd_map_helper_destroy_hash(struct rspamd_hash_map_helper *r)
+{
+ if (r == NULL || r->pool == NULL) {
+ return;
+ }
+
+ rspamd_mempool_t *pool = r->pool;
+ kh_destroy(rspamd_map_hash, r->htb);
+ memset(r, 0, sizeof(*r));
+ rspamd_mempool_delete(pool);
+}
+
+static void
+rspamd_map_helper_traverse_hash(void *data,
+ rspamd_map_traverse_cb cb,
+ gpointer cbdata,
+ gboolean reset_hits)
+{
+ rspamd_ftok_t tok;
+ struct rspamd_map_helper_value *val;
+ struct rspamd_hash_map_helper *ht = data;
+
+ kh_foreach(ht->htb, tok, val, {
+ if (!cb(tok.begin, val->value, val->hits, cbdata)) {
+ break;
+ }
+
+ if (reset_hits) {
+ val->hits = 0;
+ }
+ });
+}
+
+struct rspamd_radix_map_helper *
+rspamd_map_helper_new_radix(struct rspamd_map *map)
+{
+ struct rspamd_radix_map_helper *r;
+ rspamd_mempool_t *pool;
+ const gchar *name = "unnamed";
+
+ if (map) {
+ pool = rspamd_mempool_new(rspamd_mempool_suggest_size(),
+ map->tag, 0);
+ name = map->name;
+ }
+ else {
+ pool = rspamd_mempool_new(rspamd_mempool_suggest_size(),
+ NULL, 0);
+ }
+
+ r = rspamd_mempool_alloc0_type(pool, struct rspamd_radix_map_helper);
+ r->trie = radix_create_compressed_with_pool(pool, name);
+ r->htb = kh_init(rspamd_map_hash);
+ r->pool = pool;
+ r->map = map;
+ rspamd_cryptobox_fast_hash_init(&r->hst, map_hash_seed);
+
+ return r;
+}
+
+void rspamd_map_helper_destroy_radix(struct rspamd_radix_map_helper *r)
+{
+ if (r == NULL || !r->pool) {
+ return;
+ }
+
+ kh_destroy(rspamd_map_hash, r->htb);
+ rspamd_mempool_t *pool = r->pool;
+ memset(r, 0, sizeof(*r));
+ rspamd_mempool_delete(pool);
+}
+
+static void
+rspamd_map_helper_traverse_radix(void *data,
+ rspamd_map_traverse_cb cb,
+ gpointer cbdata,
+ gboolean reset_hits)
+{
+ rspamd_ftok_t tok;
+ struct rspamd_map_helper_value *val;
+ struct rspamd_radix_map_helper *r = data;
+
+ kh_foreach(r->htb, tok, val, {
+ if (!cb(tok.begin, val->value, val->hits, cbdata)) {
+ break;
+ }
+
+ if (reset_hits) {
+ val->hits = 0;
+ }
+ });
+}
+
+struct rspamd_regexp_map_helper *
+rspamd_map_helper_new_regexp(struct rspamd_map *map,
+ enum rspamd_regexp_map_flags flags)
+{
+ struct rspamd_regexp_map_helper *re_map;
+ rspamd_mempool_t *pool;
+
+ pool = rspamd_mempool_new(rspamd_mempool_suggest_size(),
+ map->tag, 0);
+
+ re_map = rspamd_mempool_alloc0_type(pool, struct rspamd_regexp_map_helper);
+ re_map->pool = pool;
+ re_map->values = g_ptr_array_new();
+ re_map->regexps = g_ptr_array_new();
+ re_map->map = map;
+ re_map->map_flags = flags;
+ re_map->htb = kh_init(rspamd_map_hash);
+ rspamd_cryptobox_hash_init(&re_map->hst, NULL, 0);
+
+ return re_map;
+}
+
+
+void rspamd_map_helper_destroy_regexp(struct rspamd_regexp_map_helper *re_map)
+{
+ rspamd_regexp_t *re;
+ guint i;
+
+ if (!re_map || !re_map->regexps) {
+ return;
+ }
+
+#ifdef WITH_HYPERSCAN
+ if (re_map->hs_scratch) {
+ hs_free_scratch(re_map->hs_scratch);
+ }
+ if (re_map->hs_db) {
+ rspamd_hyperscan_free(re_map->hs_db, false);
+ }
+ if (re_map->patterns) {
+ for (i = 0; i < re_map->regexps->len; i++) {
+ g_free(re_map->patterns[i]);
+ }
+
+ g_free(re_map->patterns);
+ }
+ if (re_map->flags) {
+ g_free(re_map->flags);
+ }
+ if (re_map->ids) {
+ g_free(re_map->ids);
+ }
+#endif
+
+ for (i = 0; i < re_map->regexps->len; i++) {
+ re = g_ptr_array_index(re_map->regexps, i);
+ rspamd_regexp_unref(re);
+ }
+
+ g_ptr_array_free(re_map->regexps, TRUE);
+ g_ptr_array_free(re_map->values, TRUE);
+ kh_destroy(rspamd_map_hash, re_map->htb);
+
+ rspamd_mempool_t *pool = re_map->pool;
+ memset(re_map, 0, sizeof(*re_map));
+ rspamd_mempool_delete(pool);
+}
+
+gchar *
+rspamd_kv_list_read(
+ gchar *chunk,
+ gint len,
+ struct map_cb_data *data,
+ gboolean final)
+{
+ if (data->cur_data == NULL) {
+ data->cur_data = rspamd_map_helper_new_hash(data->map);
+ }
+
+ return rspamd_parse_kv_list(
+ chunk,
+ len,
+ data,
+ rspamd_map_helper_insert_hash,
+ "",
+ final);
+}
+
+void rspamd_kv_list_fin(struct map_cb_data *data, void **target)
+{
+ struct rspamd_map *map = data->map;
+ struct rspamd_hash_map_helper *htb;
+
+ if (data->errored) {
+ /* Clean up the current data and do not touch prev data */
+ if (data->cur_data) {
+ msg_info_map("cleanup unfinished new data as error occurred for %s",
+ map->name);
+ htb = (struct rspamd_hash_map_helper *) data->cur_data;
+ rspamd_map_helper_destroy_hash(htb);
+ data->cur_data = NULL;
+ }
+ }
+ else {
+ if (data->cur_data) {
+ htb = (struct rspamd_hash_map_helper *) data->cur_data;
+ msg_info_map("read hash of %d elements from %s", kh_size(htb->htb),
+ map->name);
+ data->map->traverse_function = rspamd_map_helper_traverse_hash;
+ data->map->nelts = kh_size(htb->htb);
+ data->map->digest = rspamd_cryptobox_fast_hash_final(&htb->hst);
+ }
+
+ if (target) {
+ *target = data->cur_data;
+ }
+
+ if (data->prev_data) {
+ htb = (struct rspamd_hash_map_helper *) data->prev_data;
+ rspamd_map_helper_destroy_hash(htb);
+ }
+ }
+}
+
+void rspamd_kv_list_dtor(struct map_cb_data *data)
+{
+ struct rspamd_hash_map_helper *htb;
+
+ if (data->cur_data) {
+ htb = (struct rspamd_hash_map_helper *) data->cur_data;
+ rspamd_map_helper_destroy_hash(htb);
+ }
+}
+
+gchar *
+rspamd_radix_read(
+ gchar *chunk,
+ gint len,
+ struct map_cb_data *data,
+ gboolean final)
+{
+ struct rspamd_radix_map_helper *r;
+ struct rspamd_map *map = data->map;
+
+ if (data->cur_data == NULL) {
+ r = rspamd_map_helper_new_radix(map);
+ data->cur_data = r;
+ }
+
+ return rspamd_parse_kv_list(
+ chunk,
+ len,
+ data,
+ rspamd_map_helper_insert_radix,
+ hash_fill,
+ final);
+}
+
+void rspamd_radix_fin(struct map_cb_data *data, void **target)
+{
+ struct rspamd_map *map = data->map;
+ struct rspamd_radix_map_helper *r;
+
+ if (data->errored) {
+ /* Clean up the current data and do not touch prev data */
+ if (data->cur_data) {
+ msg_info_map("cleanup unfinished new data as error occurred for %s",
+ map->name);
+ r = (struct rspamd_radix_map_helper *) data->cur_data;
+ rspamd_map_helper_destroy_radix(r);
+ data->cur_data = NULL;
+ }
+ }
+ else {
+ if (data->cur_data) {
+ r = (struct rspamd_radix_map_helper *) data->cur_data;
+ msg_info_map("read radix trie of %z elements: %s",
+ radix_get_size(r->trie), radix_get_info(r->trie));
+ data->map->traverse_function = rspamd_map_helper_traverse_radix;
+ data->map->nelts = kh_size(r->htb);
+ data->map->digest = rspamd_cryptobox_fast_hash_final(&r->hst);
+ }
+
+ if (target) {
+ *target = data->cur_data;
+ }
+
+ if (data->prev_data) {
+ r = (struct rspamd_radix_map_helper *) data->prev_data;
+ rspamd_map_helper_destroy_radix(r);
+ }
+ }
+}
+
+void rspamd_radix_dtor(struct map_cb_data *data)
+{
+ struct rspamd_radix_map_helper *r;
+
+ if (data->cur_data) {
+ r = (struct rspamd_radix_map_helper *) data->cur_data;
+ rspamd_map_helper_destroy_radix(r);
+ }
+}
+
+#ifdef WITH_HYPERSCAN
+
+static gboolean
+rspamd_try_load_re_map_cache(struct rspamd_regexp_map_helper *re_map)
+{
+ gchar fp[PATH_MAX];
+ struct rspamd_map *map;
+
+ map = re_map->map;
+
+ if (!map->cfg->hs_cache_dir) {
+ return FALSE;
+ }
+
+ rspamd_snprintf(fp, sizeof(fp), "%s/%*xs.hsmc",
+ map->cfg->hs_cache_dir,
+ (gint) rspamd_cryptobox_HASHBYTES / 2, re_map->re_digest);
+
+ re_map->hs_db = rspamd_hyperscan_maybe_load(fp, 0);
+
+ return re_map->hs_db != NULL;
+}
+
+static gboolean
+rspamd_try_save_re_map_cache(struct rspamd_regexp_map_helper *re_map)
+{
+ gchar fp[PATH_MAX], np[PATH_MAX];
+ gsize len;
+ gint fd;
+ char *bytes = NULL;
+ struct rspamd_map *map;
+
+ map = re_map->map;
+
+ if (!map->cfg->hs_cache_dir) {
+ return FALSE;
+ }
+
+ rspamd_snprintf(fp, sizeof(fp), "%s/hsmc-XXXXXXXXXXXXX",
+ re_map->map->cfg->hs_cache_dir);
+
+ if ((fd = g_mkstemp_full(fp, O_WRONLY | O_CREAT | O_EXCL, 00644)) != -1) {
+ if (hs_serialize_database(rspamd_hyperscan_get_database(re_map->hs_db), &bytes, &len) == HS_SUCCESS) {
+ if (write(fd, bytes, len) == -1) {
+ msg_warn_map("cannot write hyperscan cache to %s: %s",
+ fp, strerror(errno));
+ unlink(fp);
+ free(bytes);
+ }
+ else {
+ free(bytes);
+ fsync(fd);
+
+ rspamd_snprintf(np, sizeof(np), "%s/%*xs.hsmc",
+ re_map->map->cfg->hs_cache_dir,
+ (gint) rspamd_cryptobox_HASHBYTES / 2, re_map->re_digest);
+
+ if (rename(fp, np) == -1) {
+ msg_warn_map("cannot rename hyperscan cache from %s to %s: %s",
+ fp, np, strerror(errno));
+ unlink(fp);
+ }
+ else {
+ msg_info_map("written cached hyperscan data for %s to %s (%Hz length)",
+ map->name, np, len);
+ rspamd_hyperscan_notice_known(np);
+ }
+ }
+ }
+ else {
+ msg_warn_map("cannot serialize hyperscan cache to %s: %s",
+ fp, strerror(errno));
+ unlink(fp);
+ }
+
+
+ close(fd);
+ }
+
+ return FALSE;
+}
+
+#endif
+
+static void
+rspamd_re_map_finalize(struct rspamd_regexp_map_helper *re_map)
+{
+#ifdef WITH_HYPERSCAN
+ guint i;
+ hs_platform_info_t plt;
+ hs_compile_error_t *err;
+ struct rspamd_map *map;
+ rspamd_regexp_t *re;
+ gint pcre_flags;
+
+ map = re_map->map;
+
+#if !defined(__aarch64__) && !defined(__powerpc64__)
+ if (!(map->cfg->libs_ctx->crypto_ctx->cpu_config & CPUID_SSSE3)) {
+ msg_info_map("disable hyperscan for map %s, ssse3 instructions are not supported by CPU",
+ map->name);
+ return;
+ }
+#endif
+
+ if (hs_populate_platform(&plt) != HS_SUCCESS) {
+ msg_err_map("cannot populate hyperscan platform");
+ return;
+ }
+
+ re_map->patterns = g_new(gchar *, re_map->regexps->len);
+ re_map->flags = g_new(gint, re_map->regexps->len);
+ re_map->ids = g_new(gint, re_map->regexps->len);
+
+ for (i = 0; i < re_map->regexps->len; i++) {
+ const gchar *pat;
+ gchar *escaped;
+ gint pat_flags;
+
+ re = g_ptr_array_index(re_map->regexps, i);
+ pcre_flags = rspamd_regexp_get_pcre_flags(re);
+ pat = rspamd_regexp_get_pattern(re);
+ pat_flags = rspamd_regexp_get_flags(re);
+
+ if (pat_flags & RSPAMD_REGEXP_FLAG_UTF) {
+ escaped = rspamd_str_regexp_escape(pat, strlen(pat), NULL,
+ RSPAMD_REGEXP_ESCAPE_RE | RSPAMD_REGEXP_ESCAPE_UTF);
+ re_map->flags[i] |= HS_FLAG_UTF8;
+ }
+ else {
+ escaped = rspamd_str_regexp_escape(pat, strlen(pat), NULL,
+ RSPAMD_REGEXP_ESCAPE_RE);
+ }
+
+ re_map->patterns[i] = escaped;
+ re_map->flags[i] = HS_FLAG_SINGLEMATCH;
+
+#ifndef WITH_PCRE2
+ if (pcre_flags & PCRE_FLAG(UTF8)) {
+ re_map->flags[i] |= HS_FLAG_UTF8;
+ }
+#else
+ if (pcre_flags & PCRE_FLAG(UTF)) {
+ re_map->flags[i] |= HS_FLAG_UTF8;
+ }
+#endif
+ if (pcre_flags & PCRE_FLAG(CASELESS)) {
+ re_map->flags[i] |= HS_FLAG_CASELESS;
+ }
+ if (pcre_flags & PCRE_FLAG(MULTILINE)) {
+ re_map->flags[i] |= HS_FLAG_MULTILINE;
+ }
+ if (pcre_flags & PCRE_FLAG(DOTALL)) {
+ re_map->flags[i] |= HS_FLAG_DOTALL;
+ }
+ if (rspamd_regexp_get_maxhits(re) == 1) {
+ re_map->flags[i] |= HS_FLAG_SINGLEMATCH;
+ }
+
+ re_map->ids[i] = i;
+ }
+
+ if (re_map->regexps->len > 0 && re_map->patterns) {
+
+ if (!rspamd_try_load_re_map_cache(re_map)) {
+ gdouble ts1 = rspamd_get_ticks(FALSE);
+ hs_database_t *hs_db = NULL;
+
+ if (hs_compile_multi((const gchar **) re_map->patterns,
+ re_map->flags,
+ re_map->ids,
+ re_map->regexps->len,
+ HS_MODE_BLOCK,
+ &plt,
+ &hs_db,
+ &err) != HS_SUCCESS) {
+
+ msg_err_map("cannot create tree of regexp when processing '%s': %s",
+ err->expression >= 0 ? re_map->patterns[err->expression] : "unknown regexp", err->message);
+ re_map->hs_db = NULL;
+ hs_free_compile_error(err);
+
+ return;
+ }
+
+ if (re_map->map->cfg->hs_cache_dir) {
+ char fpath[PATH_MAX];
+ rspamd_snprintf(fpath, sizeof(fpath), "%s/%*xs.hsmc",
+ re_map->map->cfg->hs_cache_dir,
+ (gint) rspamd_cryptobox_HASHBYTES / 2, re_map->re_digest);
+ re_map->hs_db = rspamd_hyperscan_from_raw_db(hs_db, fpath);
+ }
+ else {
+ re_map->hs_db = rspamd_hyperscan_from_raw_db(hs_db, NULL);
+ }
+
+ ts1 = (rspamd_get_ticks(FALSE) - ts1) * 1000.0;
+ msg_info_map("hyperscan compiled %d regular expressions from %s in %.1f ms",
+ re_map->regexps->len, re_map->map->name, ts1);
+ rspamd_try_save_re_map_cache(re_map);
+ }
+ else {
+ msg_info_map("hyperscan read %d cached regular expressions from %s",
+ re_map->regexps->len, re_map->map->name);
+ }
+
+ if (hs_alloc_scratch(rspamd_hyperscan_get_database(re_map->hs_db), &re_map->hs_scratch) != HS_SUCCESS) {
+ msg_err_map("cannot allocate scratch space for hyperscan");
+ rspamd_hyperscan_free(re_map->hs_db, true);
+ re_map->hs_db = NULL;
+ }
+ }
+ else {
+ msg_err_map("regexp map is empty");
+ }
+#endif
+}
+
+gchar *
+rspamd_regexp_list_read_single(
+ gchar *chunk,
+ gint len,
+ struct map_cb_data *data,
+ gboolean final)
+{
+ struct rspamd_regexp_map_helper *re_map;
+
+ if (data->cur_data == NULL) {
+ re_map = rspamd_map_helper_new_regexp(data->map, 0);
+ data->cur_data = re_map;
+ }
+
+ return rspamd_parse_kv_list(
+ chunk,
+ len,
+ data,
+ rspamd_map_helper_insert_re,
+ hash_fill,
+ final);
+}
+
+gchar *
+rspamd_glob_list_read_single(
+ gchar *chunk,
+ gint len,
+ struct map_cb_data *data,
+ gboolean final)
+{
+ struct rspamd_regexp_map_helper *re_map;
+
+ if (data->cur_data == NULL) {
+ re_map = rspamd_map_helper_new_regexp(data->map, RSPAMD_REGEXP_MAP_FLAG_GLOB);
+ data->cur_data = re_map;
+ }
+
+ return rspamd_parse_kv_list(
+ chunk,
+ len,
+ data,
+ rspamd_map_helper_insert_re,
+ hash_fill,
+ final);
+}
+
+gchar *
+rspamd_regexp_list_read_multiple(
+ gchar *chunk,
+ gint len,
+ struct map_cb_data *data,
+ gboolean final)
+{
+ struct rspamd_regexp_map_helper *re_map;
+
+ if (data->cur_data == NULL) {
+ re_map = rspamd_map_helper_new_regexp(data->map,
+ RSPAMD_REGEXP_MAP_FLAG_MULTIPLE);
+ data->cur_data = re_map;
+ }
+
+ return rspamd_parse_kv_list(
+ chunk,
+ len,
+ data,
+ rspamd_map_helper_insert_re,
+ hash_fill,
+ final);
+}
+
+gchar *
+rspamd_glob_list_read_multiple(
+ gchar *chunk,
+ gint len,
+ struct map_cb_data *data,
+ gboolean final)
+{
+ struct rspamd_regexp_map_helper *re_map;
+
+ if (data->cur_data == NULL) {
+ re_map = rspamd_map_helper_new_regexp(data->map,
+ RSPAMD_REGEXP_MAP_FLAG_GLOB | RSPAMD_REGEXP_MAP_FLAG_MULTIPLE);
+ data->cur_data = re_map;
+ }
+
+ return rspamd_parse_kv_list(
+ chunk,
+ len,
+ data,
+ rspamd_map_helper_insert_re,
+ hash_fill,
+ final);
+}
+
+
+void rspamd_regexp_list_fin(struct map_cb_data *data, void **target)
+{
+ struct rspamd_regexp_map_helper *re_map = NULL, *old_re_map;
+ struct rspamd_map *map = data->map;
+
+ if (data->errored) {
+ /* Clean up the current data and do not touch prev data */
+ if (data->cur_data) {
+ msg_info_map("cleanup unfinished new data as error occurred for %s",
+ map->name);
+ re_map = (struct rspamd_regexp_map_helper *) data->cur_data;
+ rspamd_map_helper_destroy_regexp(re_map);
+ data->cur_data = NULL;
+ }
+ }
+ else {
+ if (data->cur_data) {
+ re_map = data->cur_data;
+ rspamd_cryptobox_hash_final(&re_map->hst, re_map->re_digest);
+ memcpy(&data->map->digest, re_map->re_digest, sizeof(data->map->digest));
+ rspamd_re_map_finalize(re_map);
+ msg_info_map("read regexp list of %ud elements",
+ re_map->regexps->len);
+ data->map->traverse_function = rspamd_map_helper_traverse_regexp;
+ data->map->nelts = kh_size(re_map->htb);
+ }
+
+ if (target) {
+ *target = data->cur_data;
+ }
+
+ if (data->prev_data) {
+ old_re_map = data->prev_data;
+ rspamd_map_helper_destroy_regexp(old_re_map);
+ }
+ }
+}
+void rspamd_regexp_list_dtor(struct map_cb_data *data)
+{
+ if (data->cur_data) {
+ rspamd_map_helper_destroy_regexp(data->cur_data);
+ }
+}
+
+#ifdef WITH_HYPERSCAN
+static int
+rspamd_match_hs_single_handler(unsigned int id, unsigned long long from,
+ unsigned long long to,
+ unsigned int flags, void *context)
+{
+ guint *i = context;
+ /* Always return non-zero as we need a single match here */
+
+ *i = id;
+
+ return 1;
+}
+#endif
+
+gconstpointer
+rspamd_match_regexp_map_single(struct rspamd_regexp_map_helper *map,
+ const gchar *in, gsize len)
+{
+ guint i;
+ rspamd_regexp_t *re;
+ gint res = 0;
+ gpointer ret = NULL;
+ struct rspamd_map_helper_value *val;
+ gboolean validated = FALSE;
+
+ g_assert(in != NULL);
+
+ if (map == NULL || len == 0 || map->regexps == NULL) {
+ return NULL;
+ }
+
+ if (map->map_flags & RSPAMD_REGEXP_MAP_FLAG_UTF) {
+ if (rspamd_fast_utf8_validate(in, len) == 0) {
+ validated = TRUE;
+ }
+ }
+ else {
+ validated = TRUE;
+ }
+
+#ifdef WITH_HYPERSCAN
+ if (map->hs_db && map->hs_scratch) {
+
+ if (validated) {
+
+ res = hs_scan(rspamd_hyperscan_get_database(map->hs_db), in, len, 0,
+ map->hs_scratch,
+ rspamd_match_hs_single_handler, (void *) &i);
+
+ if (res == HS_SCAN_TERMINATED) {
+ res = 1;
+ val = g_ptr_array_index(map->values, i);
+
+ ret = val->value;
+ val->hits++;
+ }
+
+ return ret;
+ }
+ }
+#endif
+
+ if (!res) {
+ /* PCRE version */
+ for (i = 0; i < map->regexps->len; i++) {
+ re = g_ptr_array_index(map->regexps, i);
+
+ if (rspamd_regexp_search(re, in, len, NULL, NULL, !validated, NULL)) {
+ val = g_ptr_array_index(map->values, i);
+
+ ret = val->value;
+ val->hits++;
+ break;
+ }
+ }
+ }
+
+ return ret;
+}
+
+#ifdef WITH_HYPERSCAN
+struct rspamd_multiple_cbdata {
+ GPtrArray *ar;
+ struct rspamd_regexp_map_helper *map;
+};
+
+static int
+rspamd_match_hs_multiple_handler(unsigned int id, unsigned long long from,
+ unsigned long long to,
+ unsigned int flags, void *context)
+{
+ struct rspamd_multiple_cbdata *cbd = context;
+ struct rspamd_map_helper_value *val;
+
+
+ if (id < cbd->map->values->len) {
+ val = g_ptr_array_index(cbd->map->values, id);
+ val->hits++;
+ g_ptr_array_add(cbd->ar, val->value);
+ }
+
+ /* Always return zero as we need all matches here */
+ return 0;
+}
+#endif
+
+GPtrArray *
+rspamd_match_regexp_map_all(struct rspamd_regexp_map_helper *map,
+ const gchar *in, gsize len)
+{
+ guint i;
+ rspamd_regexp_t *re;
+ GPtrArray *ret;
+ gint res = 0;
+ gboolean validated = FALSE;
+ struct rspamd_map_helper_value *val;
+
+ if (map == NULL || map->regexps == NULL || len == 0) {
+ return NULL;
+ }
+
+ g_assert(in != NULL);
+
+ if (map->map_flags & RSPAMD_REGEXP_MAP_FLAG_UTF) {
+ if (rspamd_fast_utf8_validate(in, len) == 0) {
+ validated = TRUE;
+ }
+ }
+ else {
+ validated = TRUE;
+ }
+
+ ret = g_ptr_array_new();
+
+#ifdef WITH_HYPERSCAN
+ if (map->hs_db && map->hs_scratch) {
+
+ if (validated) {
+ struct rspamd_multiple_cbdata cbd;
+
+ cbd.ar = ret;
+ cbd.map = map;
+
+ if (hs_scan(rspamd_hyperscan_get_database(map->hs_db), in, len,
+ 0, map->hs_scratch,
+ rspamd_match_hs_multiple_handler, &cbd) == HS_SUCCESS) {
+ res = 1;
+ }
+ }
+ }
+#endif
+
+ if (!res) {
+ /* PCRE version */
+ for (i = 0; i < map->regexps->len; i++) {
+ re = g_ptr_array_index(map->regexps, i);
+
+ if (rspamd_regexp_search(re, in, len, NULL, NULL,
+ !validated, NULL)) {
+ val = g_ptr_array_index(map->values, i);
+ val->hits++;
+ g_ptr_array_add(ret, val->value);
+ }
+ }
+ }
+
+ if (ret->len > 0) {
+ return ret;
+ }
+
+ g_ptr_array_free(ret, TRUE);
+
+ return NULL;
+}
+
+gconstpointer
+rspamd_match_hash_map(struct rspamd_hash_map_helper *map, const gchar *in,
+ gsize len)
+{
+ khiter_t k;
+ struct rspamd_map_helper_value *val;
+ rspamd_ftok_t tok;
+
+ if (map == NULL || map->htb == NULL) {
+ return NULL;
+ }
+
+ tok.begin = in;
+ tok.len = len;
+
+ k = kh_get(rspamd_map_hash, map->htb, tok);
+
+ if (k != kh_end(map->htb)) {
+ val = kh_value(map->htb, k);
+ val->hits++;
+
+ return val->value;
+ }
+
+ return NULL;
+}
+
+gconstpointer
+rspamd_match_radix_map(struct rspamd_radix_map_helper *map,
+ const guchar *in, gsize inlen)
+{
+ struct rspamd_map_helper_value *val;
+
+ if (map == NULL || map->trie == NULL) {
+ return NULL;
+ }
+
+ val = (struct rspamd_map_helper_value *) radix_find_compressed(map->trie,
+ in, inlen);
+
+ if (val != (gconstpointer) RADIX_NO_VALUE) {
+ val->hits++;
+
+ return val->value;
+ }
+
+ return NULL;
+}
+
+gconstpointer
+rspamd_match_radix_map_addr(struct rspamd_radix_map_helper *map,
+ const rspamd_inet_addr_t *addr)
+{
+ struct rspamd_map_helper_value *val;
+
+ if (map == NULL || map->trie == NULL) {
+ return NULL;
+ }
+
+ val = (struct rspamd_map_helper_value *) radix_find_compressed_addr(map->trie, addr);
+
+ if (val != (gconstpointer) RADIX_NO_VALUE) {
+ val->hits++;
+
+ return val->value;
+ }
+
+ return NULL;
+}
+
+
+/*
+ * CBD stuff
+ */
+
+struct rspamd_cdb_map_helper *
+rspamd_map_helper_new_cdb(struct rspamd_map *map)
+{
+ struct rspamd_cdb_map_helper *n;
+
+ n = g_malloc0(sizeof(*n));
+ n->cdbs = (GQueue) G_QUEUE_INIT;
+ n->map = map;
+
+ rspamd_cryptobox_fast_hash_init(&n->hst, map_hash_seed);
+
+ return n;
+}
+
+void rspamd_map_helper_destroy_cdb(struct rspamd_cdb_map_helper *c)
+{
+ if (c == NULL) {
+ return;
+ }
+
+ GList *cur = c->cdbs.head;
+
+ while (cur) {
+ struct cdb *cdb = (struct cdb *) cur->data;
+
+ cdb_free(cdb);
+ g_free(cdb->filename);
+ close(cdb->cdb_fd);
+ g_free(cdb);
+
+ cur = g_list_next(cur);
+ }
+
+ g_queue_clear(&c->cdbs);
+
+ g_free(c);
+}
+
+gchar *
+rspamd_cdb_list_read(gchar *chunk,
+ gint len,
+ struct map_cb_data *data,
+ gboolean final)
+{
+ struct rspamd_cdb_map_helper *cdb_data;
+ struct cdb *found = NULL;
+ struct rspamd_map *map = data->map;
+
+ g_assert(map->no_file_read);
+
+ if (data->cur_data == NULL) {
+ cdb_data = rspamd_map_helper_new_cdb(data->map);
+ data->cur_data = cdb_data;
+ }
+ else {
+ cdb_data = (struct rspamd_cdb_map_helper *) data->cur_data;
+ }
+
+ GList *cur = cdb_data->cdbs.head;
+
+ while (cur) {
+ struct cdb *elt = (struct cdb *) cur->data;
+
+ if (strcmp(elt->filename, chunk) == 0) {
+ found = elt;
+ break;
+ }
+
+ cur = g_list_next(cur);
+ }
+
+ if (found == NULL) {
+ /* New cdb */
+ gint fd;
+ struct cdb *cdb;
+
+ fd = rspamd_file_xopen(chunk, O_RDONLY, 0, TRUE);
+
+ if (fd == -1) {
+ msg_err_map("cannot open cdb map from %s: %s", chunk, strerror(errno));
+
+ return NULL;
+ }
+
+ cdb = g_malloc0(sizeof(struct cdb));
+
+ if (cdb_init(cdb, fd) == -1) {
+ g_free(cdb);
+ msg_err_map("cannot init cdb map from %s: %s", chunk, strerror(errno));
+
+ return NULL;
+ }
+
+ cdb->filename = g_strdup(chunk);
+ g_queue_push_tail(&cdb_data->cdbs, cdb);
+ cdb_data->total_size += cdb->cdb_fsize;
+ rspamd_cryptobox_fast_hash_update(&cdb_data->hst, chunk, len);
+ }
+
+ return chunk + len;
+}
+
+void rspamd_cdb_list_fin(struct map_cb_data *data, void **target)
+{
+ struct rspamd_map *map = data->map;
+ struct rspamd_cdb_map_helper *cdb_data;
+
+ if (data->errored) {
+ /* Clean up the current data and do not touch prev data */
+ if (data->cur_data) {
+ msg_info_map("cleanup unfinished new data as error occurred for %s",
+ map->name);
+ cdb_data = (struct rspamd_cdb_map_helper *) data->cur_data;
+ rspamd_map_helper_destroy_cdb(cdb_data);
+ data->cur_data = NULL;
+ }
+ }
+ else {
+ if (data->cur_data) {
+ cdb_data = (struct rspamd_cdb_map_helper *) data->cur_data;
+ msg_info_map("read cdb of %Hz size", cdb_data->total_size);
+ data->map->traverse_function = NULL;
+ data->map->nelts = 0;
+ data->map->digest = rspamd_cryptobox_fast_hash_final(&cdb_data->hst);
+ }
+
+ if (target) {
+ *target = data->cur_data;
+ }
+
+ if (data->prev_data) {
+ cdb_data = (struct rspamd_cdb_map_helper *) data->prev_data;
+ rspamd_map_helper_destroy_cdb(cdb_data);
+ }
+ }
+}
+void rspamd_cdb_list_dtor(struct map_cb_data *data)
+{
+ if (data->cur_data) {
+ rspamd_map_helper_destroy_cdb(data->cur_data);
+ }
+}
+
+gconstpointer
+rspamd_match_cdb_map(struct rspamd_cdb_map_helper *map,
+ const gchar *in, gsize inlen)
+{
+ if (map == NULL || map->cdbs.head == NULL) {
+ return NULL;
+ }
+
+ GList *cur = map->cdbs.head;
+ static rspamd_ftok_t found;
+
+ while (cur) {
+ struct cdb *cdb = (struct cdb *) cur->data;
+
+ if (cdb_find(cdb, in, inlen) > 0) {
+ /* Extract and push value to lua as string */
+ unsigned vlen;
+ gconstpointer vpos;
+
+ vpos = cdb->cdb_mem + cdb_datapos(cdb);
+ vlen = cdb_datalen(cdb);
+ found.len = vlen;
+ found.begin = vpos;
+
+ return &found; /* Do not reuse! */
+ }
+
+ cur = g_list_next(cur);
+ }
+
+ return NULL;
+}
diff --git a/src/libserver/maps/map_helpers.h b/src/libserver/maps/map_helpers.h
new file mode 100644
index 0000000..82c62b6
--- /dev/null
+++ b/src/libserver/maps/map_helpers.h
@@ -0,0 +1,269 @@
+/*-
+ * Copyright 2018 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef RSPAMD_MAP_HELPERS_H
+#define RSPAMD_MAP_HELPERS_H
+
+#include "config.h"
+#include "map.h"
+#include "addr.h"
+
+/**
+ * @file map_helpers.h
+ *
+ * Defines helper structures to deal with different map types
+ */
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Common structures, abstract for simplicity
+ */
+struct rspamd_radix_map_helper;
+struct rspamd_hash_map_helper;
+struct rspamd_regexp_map_helper;
+struct rspamd_cdb_map_helper;
+struct rspamd_map_helper_value;
+
+enum rspamd_regexp_map_flags {
+ RSPAMD_REGEXP_MAP_FLAG_UTF = (1u << 0),
+ RSPAMD_REGEXP_MAP_FLAG_MULTIPLE = (1u << 1),
+ RSPAMD_REGEXP_MAP_FLAG_GLOB = (1u << 2),
+};
+
+typedef void (*rspamd_map_insert_func)(gpointer st, gconstpointer key,
+ gconstpointer value);
+
+/**
+ * Radix list is a list like ip/mask
+ */
+gchar *rspamd_radix_read(
+ gchar *chunk,
+ gint len,
+ struct map_cb_data *data,
+ gboolean final);
+
+void rspamd_radix_fin(struct map_cb_data *data, void **target);
+
+void rspamd_radix_dtor(struct map_cb_data *data);
+
+/**
+ * Kv list is an ordinal list of keys and values separated by whitespace
+ */
+gchar *rspamd_kv_list_read(
+ gchar *chunk,
+ gint len,
+ struct map_cb_data *data,
+ gboolean final);
+
+void rspamd_kv_list_fin(struct map_cb_data *data, void **target);
+
+void rspamd_kv_list_dtor(struct map_cb_data *data);
+
+/**
+ * Cdb is a cdb mapped file with shared data
+ * chunk must be filename!
+ */
+gchar *rspamd_cdb_list_read(
+ gchar *chunk,
+ gint len,
+ struct map_cb_data *data,
+ gboolean final);
+void rspamd_cdb_list_fin(struct map_cb_data *data, void **target);
+void rspamd_cdb_list_dtor(struct map_cb_data *data);
+
+/**
+ * Regexp list is a list of regular expressions
+ */
+
+gchar *rspamd_regexp_list_read_single(
+ gchar *chunk,
+ gint len,
+ struct map_cb_data *data,
+ gboolean final);
+
+gchar *rspamd_regexp_list_read_multiple(
+ gchar *chunk,
+ gint len,
+ struct map_cb_data *data,
+ gboolean final);
+
+gchar *rspamd_glob_list_read_single(
+ gchar *chunk,
+ gint len,
+ struct map_cb_data *data,
+ gboolean final);
+
+gchar *rspamd_glob_list_read_multiple(
+ gchar *chunk,
+ gint len,
+ struct map_cb_data *data,
+ gboolean final);
+
+void rspamd_regexp_list_fin(struct map_cb_data *data, void **target);
+
+void rspamd_regexp_list_dtor(struct map_cb_data *data);
+
+/**
+ * FSM for lists parsing (support comments, blank lines and partial replies)
+ */
+gchar *
+rspamd_parse_kv_list(
+ gchar *chunk,
+ gint len,
+ struct map_cb_data *data,
+ rspamd_map_insert_func func,
+ const gchar *default_value,
+ gboolean final);
+
+/**
+ * Find a single (any) matching regexp for the specified text or NULL if
+ * no matches found
+ * @param map
+ * @param in
+ * @param len
+ * @return
+ */
+gconstpointer rspamd_match_regexp_map_single(struct rspamd_regexp_map_helper *map,
+ const gchar *in, gsize len);
+
+/**
+ * Find a multiple (all) matching regexp for the specified text or NULL if
+ * no matches found. Returns GPtrArray that *must* be freed by a caller if not NULL
+ * @param map
+ * @param in
+ * @param len
+ * @return
+ */
+GPtrArray *rspamd_match_regexp_map_all(struct rspamd_regexp_map_helper *map,
+ const gchar *in, gsize len);
+
+/**
+ * Find value matching specific key in a hash map
+ * @param map
+ * @param in
+ * @param len
+ * @return
+ */
+gconstpointer rspamd_match_hash_map(struct rspamd_hash_map_helper *map,
+ const gchar *in, gsize len);
+
+/**
+ * Find value matching specific key in a cdb map
+ * @param map
+ * @param in
+ * @param len
+ * @return rspamd_ftok_t pointer (allocated in a static buffer!)
+ */
+gconstpointer rspamd_match_cdb_map(struct rspamd_cdb_map_helper *map,
+ const gchar *in, gsize len);
+
+/**
+ * Find value matching specific key in a hash map
+ * @param map
+ * @param in raw ip address
+ * @param inlen ip address length (4 for IPv4 and 16 for IPv6)
+ * @return
+ */
+gconstpointer rspamd_match_radix_map(struct rspamd_radix_map_helper *map,
+ const guchar *in, gsize inlen);
+
+gconstpointer rspamd_match_radix_map_addr(struct rspamd_radix_map_helper *map,
+ const rspamd_inet_addr_t *addr);
+
+/**
+ * Creates radix map helper
+ * @param map
+ * @return
+ */
+struct rspamd_radix_map_helper *rspamd_map_helper_new_radix(struct rspamd_map *map);
+
+/**
+ * Inserts new value into radix map
+ * @param st
+ * @param key
+ * @param value
+ */
+void rspamd_map_helper_insert_radix(gpointer st, gconstpointer key, gconstpointer value);
+
+/**
+ * Inserts new value into radix map performing synchronous resolving
+ * @param st
+ * @param key
+ * @param value
+ */
+void rspamd_map_helper_insert_radix_resolve(gpointer st, gconstpointer key,
+ gconstpointer value);
+
+/**
+ * Destroys radix map helper
+ * @param r
+ */
+void rspamd_map_helper_destroy_radix(struct rspamd_radix_map_helper *r);
+
+
+/**
+ * Creates hash map helper
+ * @param map
+ * @return
+ */
+struct rspamd_hash_map_helper *rspamd_map_helper_new_hash(struct rspamd_map *map);
+
+/**
+ * Inserts a new value into a hash map
+ * @param st
+ * @param key
+ * @param value
+ */
+void rspamd_map_helper_insert_hash(gpointer st, gconstpointer key, gconstpointer value);
+
+/**
+ * Destroys hash map helper
+ * @param r
+ */
+void rspamd_map_helper_destroy_hash(struct rspamd_hash_map_helper *r);
+
+/**
+ * Create new regexp map
+ * @param map
+ * @param flags
+ * @return
+ */
+struct rspamd_regexp_map_helper *rspamd_map_helper_new_regexp(struct rspamd_map *map,
+ enum rspamd_regexp_map_flags flags);
+
+/**
+ * Inserts a new regexp into regexp map
+ * @param st
+ * @param key
+ * @param value
+ */
+void rspamd_map_helper_insert_re(gpointer st, gconstpointer key, gconstpointer value);
+
+/**
+ * Destroy regexp map
+ * @param re_map
+ */
+void rspamd_map_helper_destroy_regexp(struct rspamd_regexp_map_helper *re_map);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/libserver/maps/map_private.h b/src/libserver/maps/map_private.h
new file mode 100644
index 0000000..60751c0
--- /dev/null
+++ b/src/libserver/maps/map_private.h
@@ -0,0 +1,226 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef SRC_LIBUTIL_MAP_PRIVATE_H_
+#define SRC_LIBUTIL_MAP_PRIVATE_H_
+
+#include "config.h"
+#include "mem_pool.h"
+#include "keypair.h"
+#include "unix-std.h"
+#include "map.h"
+#include "ref.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef void (*rspamd_map_tmp_dtor)(gpointer p);
+
+extern guint rspamd_map_log_id;
+#define msg_err_map(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL, \
+ "map", map->tag, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_warn_map(...) rspamd_default_log_function(G_LOG_LEVEL_WARNING, \
+ "map", map->tag, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_info_map(...) rspamd_default_log_function(G_LOG_LEVEL_INFO, \
+ "map", map->tag, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_debug_map(...) rspamd_conditional_debug_fast(NULL, NULL, \
+ rspamd_map_log_id, "map", map->tag, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+
+enum fetch_proto {
+ MAP_PROTO_FILE,
+ MAP_PROTO_HTTP,
+ MAP_PROTO_HTTPS,
+ MAP_PROTO_STATIC
+};
+
+/**
+ * Data specific to file maps
+ */
+struct file_map_data {
+ gchar *filename;
+ gboolean need_modify;
+ ev_stat st_ev;
+};
+
+
+struct http_map_data;
+
+struct rspamd_http_map_cached_cbdata {
+ ev_timer timeout;
+ struct ev_loop *event_loop;
+ struct rspamd_storage_shmem *shm;
+ struct rspamd_map *map;
+ struct http_map_data *data;
+ guint64 gen;
+ time_t last_checked;
+};
+
+struct rspamd_map_cachepoint {
+ gint available;
+ gsize len;
+ time_t last_modified;
+ gchar shmem_name[256];
+};
+
+/**
+ * Data specific to HTTP maps
+ */
+struct http_map_data {
+ /* Shared cache data */
+ struct rspamd_map_cachepoint *cache;
+ /* Non-shared for cache owner, used to cleanup cache */
+ struct rspamd_http_map_cached_cbdata *cur_cache_cbd;
+ gchar *userinfo;
+ gchar *path;
+ gchar *host;
+ gchar *rest;
+ rspamd_fstring_t *etag;
+ time_t last_modified;
+ time_t last_checked;
+ gboolean request_sent;
+ guint64 gen;
+ guint16 port;
+};
+
+struct static_map_data {
+ guchar *data;
+ gsize len;
+ gboolean processed;
+};
+
+union rspamd_map_backend_data {
+ struct file_map_data *fd;
+ struct http_map_data *hd;
+ struct static_map_data *sd;
+};
+
+
+struct rspamd_map;
+struct rspamd_map_backend {
+ enum fetch_proto protocol;
+ gboolean is_signed;
+ gboolean is_compressed;
+ gboolean is_fallback;
+ struct rspamd_map *map;
+ struct ev_loop *event_loop;
+ guint32 id;
+ struct rspamd_cryptobox_pubkey *trusted_pubkey;
+ union rspamd_map_backend_data data;
+ gchar *uri;
+ ref_entry_t ref;
+};
+
+struct map_periodic_cbdata;
+
+struct rspamd_map {
+ struct rspamd_dns_resolver *r;
+ struct rspamd_config *cfg;
+ GPtrArray *backends;
+ struct rspamd_map_backend *fallback_backend;
+ map_cb_t read_callback;
+ map_fin_cb_t fin_callback;
+ map_dtor_t dtor;
+ void **user_data;
+ struct ev_loop *event_loop;
+ struct rspamd_worker *wrk;
+ gchar *description;
+ gchar *name;
+ guint32 id;
+ struct map_periodic_cbdata *scheduled_check;
+ rspamd_map_tmp_dtor tmp_dtor;
+ gpointer tmp_dtor_data;
+ rspamd_map_traverse_function traverse_function;
+ rspamd_map_on_load_function on_load_function;
+ gpointer on_load_ud;
+ GDestroyNotify on_load_ud_dtor;
+ gpointer lua_map;
+ gsize nelts;
+ guint64 digest;
+ /* Should we check HTTP or just load cached data */
+ ev_tstamp timeout;
+ gdouble poll_timeout;
+ time_t next_check;
+ bool active_http;
+ bool non_trivial; /* E.g. has http backends in active mode */
+ bool file_only; /* No HTTP backends found */
+ bool static_only; /* No need to check */
+ bool no_file_read; /* Do not read files */
+ /* Shared lock for temporary disabling of map reading (e.g. when this map is written by UI) */
+ gint *locked;
+ gchar tag[MEMPOOL_UID_LEN];
+};
+
+enum rspamd_map_http_stage {
+ http_map_resolve_host2 = 0, /* 2 requests sent */
+ http_map_resolve_host1, /* 1 requests sent */
+ http_map_http_conn, /* http connection */
+ http_map_terminated /* terminated when doing resolving */
+};
+
+struct map_periodic_cbdata {
+ struct rspamd_map *map;
+ struct map_cb_data cbdata;
+ ev_timer ev;
+ gboolean need_modify;
+ gboolean errored;
+ gboolean locked;
+ guint cur_backend;
+ ref_entry_t ref;
+};
+
+static const gchar rspamd_http_file_magic[] =
+ {'r', 'm', 'c', 'd', '2', '0', '0', '0'};
+
+struct rspamd_http_file_data {
+ guchar magic[sizeof(rspamd_http_file_magic)];
+ goffset data_off;
+ gulong mtime;
+ gulong next_check;
+ gulong etag_len;
+};
+
+struct http_callback_data {
+ struct ev_loop *event_loop;
+ struct rspamd_http_connection *conn;
+ GPtrArray *addrs;
+ rspamd_inet_addr_t *addr;
+ struct rspamd_map *map;
+ struct rspamd_map_backend *bk;
+ struct http_map_data *data;
+ struct map_periodic_cbdata *periodic;
+ struct rspamd_cryptobox_pubkey *pk;
+ struct rspamd_storage_shmem *shmem_data;
+ gsize data_len;
+ gboolean check;
+ enum rspamd_map_http_stage stage;
+ ev_tstamp timeout;
+
+ ref_entry_t ref;
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SRC_LIBUTIL_MAP_PRIVATE_H_ */
diff --git a/src/libserver/mempool_vars_internal.h b/src/libserver/mempool_vars_internal.h
new file mode 100644
index 0000000..6c95538
--- /dev/null
+++ b/src/libserver/mempool_vars_internal.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef RSPAMD_MEMPOOL_VARS_INTERNAL_H
+#define RSPAMD_MEMPOOL_VARS_INTERNAL_H
+
+/* Basic rspamd mempool variables names */
+#define RSPAMD_MEMPOOL_AVG_WORDS_LEN "avg_words_len"
+#define RSPAMD_MEMPOOL_SHORT_WORDS_CNT "short_words_cnt"
+#define RSPAMD_MEMPOOL_HEADERS_HASH "headers_hash"
+#define RSPAMD_MEMPOOL_MTA_TAG "MTA-Tag"
+#define RSPAMD_MEMPOOL_MTA_NAME "MTA-Name"
+#define RSPAMD_MEMPOOL_SPF_DOMAIN "spf_domain"
+#define RSPAMD_MEMPOOL_SPF_RECORD "spf_record"
+#define RSPAMD_MEMPOOL_PRINCIPAL_RECIPIENT "principal_recipient"
+#define RSPAMD_MEMPOOL_PROFILE "profile"
+#define RSPAMD_MEMPOOL_MILTER_REPLY "milter_reply"
+#define RSPAMD_MEMPOOL_DKIM_SIGNATURE "dkim-signature"
+#define RSPAMD_MEMPOOL_DMARC_CHECKS "dmarc_checks"
+#define RSPAMD_MEMPOOL_DKIM_BH_CACHE "dkim_bh_cache"
+#define RSPAMD_MEMPOOL_DKIM_CHECK_RESULTS "dkim_results"
+#define RSPAMD_MEMPOOL_DKIM_SIGN_KEY "dkim_key"
+#define RSPAMD_MEMPOOL_DKIM_SIGN_SELECTOR "dkim_selector"
+#define RSPAMD_MEMPOOL_ARC_SIGN_KEY "arc_key"
+#define RSPAMD_MEMPOOL_ARC_SIGN_SELECTOR "arc_selector"
+#define RSPAMD_MEMPOOL_STAT_SIGNATURE "stat_signature"
+#define RSPAMD_MEMPOOL_FUZZY_RESULT "fuzzy_hashes"
+#define RSPAMD_MEMPOOL_SPAM_LEARNS "spam_learns"
+#define RSPAMD_MEMPOOL_HAM_LEARNS "ham_learns"
+#define RSPAMD_MEMPOOL_RE_MAPS_CACHE "re_maps_cache"
+#define RSPAMD_MEMPOOL_HTTP_STAT_BACKEND_RUNTIME "stat_http_runtime"
+#define RSPAMD_MEMPOOL_FUZZY_STAT "fuzzy_stat"
+
+#endif
diff --git a/src/libserver/milter.c b/src/libserver/milter.c
new file mode 100644
index 0000000..cfb7d3c
--- /dev/null
+++ b/src/libserver/milter.c
@@ -0,0 +1,2232 @@
+/*-
+ * Copyright 2017 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "config.h"
+#include "milter.h"
+#include "milter_internal.h"
+#include "email_addr.h"
+#include "addr.h"
+#include "unix-std.h"
+#include "logger.h"
+#include "ottery.h"
+#include "libserver/http/http_connection.h"
+#include "libserver/http/http_private.h"
+#include "libserver/protocol_internal.h"
+#include "libserver/cfg_file_private.h"
+#include "libmime/scan_result.h"
+#include "libserver/worker_util.h"
+#include "utlist.h"
+
+#define msg_err_milter(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL, \
+ "milter", priv->pool->tag.uid, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_warn_milter(...) rspamd_default_log_function(G_LOG_LEVEL_WARNING, \
+ "milter", priv->pool->tag.uid, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_info_milter(...) rspamd_default_log_function(G_LOG_LEVEL_INFO, \
+ "milter", priv->pool->tag.uid, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_debug_milter(...) rspamd_conditional_debug_fast(NULL, NULL, \
+ rspamd_milter_log_id, "milter", priv->pool->tag.uid, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+
+INIT_LOG_MODULE(milter)
+
+static const struct rspamd_milter_context *milter_ctx = NULL;
+
+static gboolean rspamd_milter_handle_session(
+ struct rspamd_milter_session *session,
+ struct rspamd_milter_private *priv);
+static inline void rspamd_milter_plan_io(struct rspamd_milter_session *session,
+ struct rspamd_milter_private *priv, gshort what);
+
+static GQuark
+rspamd_milter_quark(void)
+{
+ return g_quark_from_static_string("milter");
+}
+
+static void
+rspamd_milter_obuf_free(struct rspamd_milter_outbuf *obuf)
+{
+ if (obuf) {
+ if (obuf->buf) {
+ rspamd_fstring_free(obuf->buf);
+ }
+
+ g_free(obuf);
+ }
+}
+
+#define RSPAMD_MILTER_RESET_COMMON (1 << 0)
+#define RSPAMD_MILTER_RESET_IO (1 << 1)
+#define RSPAMD_MILTER_RESET_ADDR (1 << 2)
+#define RSPAMD_MILTER_RESET_MACRO (1 << 3)
+#define RSPAMD_MILTER_RESET_ALL (RSPAMD_MILTER_RESET_COMMON | \
+ RSPAMD_MILTER_RESET_IO | \
+ RSPAMD_MILTER_RESET_ADDR | \
+ RSPAMD_MILTER_RESET_MACRO)
+#define RSPAMD_MILTER_RESET_QUIT_NC (RSPAMD_MILTER_RESET_COMMON | \
+ RSPAMD_MILTER_RESET_ADDR | \
+ RSPAMD_MILTER_RESET_MACRO)
+#define RSPAMD_MILTER_RESET_ABORT (RSPAMD_MILTER_RESET_COMMON)
+
+static void
+rspamd_milter_session_reset(struct rspamd_milter_session *session,
+ guint how)
+{
+ struct rspamd_milter_outbuf *obuf, *obuf_tmp;
+ struct rspamd_milter_private *priv = session->priv;
+ struct rspamd_email_address *cur;
+ guint i;
+
+ if (how & RSPAMD_MILTER_RESET_IO) {
+ msg_debug_milter("cleanup IO on abort");
+
+ DL_FOREACH_SAFE(priv->out_chain, obuf, obuf_tmp)
+ {
+ rspamd_milter_obuf_free(obuf);
+ }
+
+ priv->out_chain = NULL;
+
+ if (priv->parser.buf) {
+ priv->parser.buf->len = 0;
+ }
+ }
+
+ if (how & RSPAMD_MILTER_RESET_COMMON) {
+ msg_debug_milter("cleanup common data on abort");
+
+ if (session->message) {
+ session->message->len = 0;
+ msg_debug_milter("cleanup message on abort");
+ }
+
+ if (session->rcpts) {
+ PTR_ARRAY_FOREACH(session->rcpts, i, cur)
+ {
+ rspamd_email_address_free(cur);
+ }
+
+ msg_debug_milter("cleanup %d recipients on abort",
+ (gint) session->rcpts->len);
+
+ g_ptr_array_free(session->rcpts, TRUE);
+ session->rcpts = NULL;
+ }
+
+ if (session->from) {
+ msg_debug_milter("cleanup from");
+ rspamd_email_address_free(session->from);
+ session->from = NULL;
+ }
+
+ if (priv->headers) {
+ msg_debug_milter("cleanup headers");
+ gchar *k;
+ GArray *ar;
+
+ kh_foreach(priv->headers, k, ar, {
+ g_free(k);
+ g_array_free(ar, TRUE);
+ });
+
+ kh_clear(milter_headers_hash_t, priv->headers);
+ }
+
+ priv->cur_hdr = 0;
+ }
+
+ if (how & RSPAMD_MILTER_RESET_ADDR) {
+ if (session->addr) {
+ msg_debug_milter("cleanup addr");
+ rspamd_inet_address_free(session->addr);
+ session->addr = NULL;
+ }
+ if (session->hostname) {
+ msg_debug_milter("cleanup hostname");
+ session->hostname->len = 0;
+ }
+ }
+
+ if (how & RSPAMD_MILTER_RESET_MACRO) {
+ if (session->macros) {
+ msg_debug_milter("cleanup macros");
+ g_hash_table_unref(session->macros);
+ session->macros = NULL;
+ }
+ }
+}
+
+static void
+rspamd_milter_session_dtor(struct rspamd_milter_session *session)
+{
+ struct rspamd_milter_private *priv;
+
+ if (session) {
+ priv = session->priv;
+ msg_debug_milter("destroying milter session");
+
+ rspamd_ev_watcher_stop(priv->event_loop, &priv->ev);
+ rspamd_milter_session_reset(session, RSPAMD_MILTER_RESET_ALL);
+ close(priv->fd);
+
+ if (priv->parser.buf) {
+ rspamd_fstring_free(priv->parser.buf);
+ }
+
+ if (session->message) {
+ rspamd_fstring_free(session->message);
+ }
+
+ if (session->helo) {
+ rspamd_fstring_free(session->helo);
+ }
+
+ if (session->hostname) {
+ rspamd_fstring_free(session->hostname);
+ }
+
+ if (priv->headers) {
+ gchar *k;
+ GArray *ar;
+
+ kh_foreach(priv->headers, k, ar, {
+ g_free(k);
+ g_array_free(ar, TRUE);
+ });
+
+ kh_destroy(milter_headers_hash_t, priv->headers);
+ }
+
+ if (milter_ctx->sessions_cache) {
+ rspamd_worker_session_cache_remove(milter_ctx->sessions_cache,
+ session);
+ }
+
+ rspamd_mempool_delete(priv->pool);
+ g_free(priv);
+ g_free(session);
+ }
+}
+
+static void
+rspamd_milter_on_protocol_error(struct rspamd_milter_session *session,
+ struct rspamd_milter_private *priv, GError *err)
+{
+ msg_debug_milter("protocol error: %e", err);
+ priv->state = RSPAMD_MILTER_WANNA_DIE;
+ REF_RETAIN(session);
+ priv->err_cb(priv->fd, session, priv->ud, err);
+ REF_RELEASE(session);
+ g_error_free(err);
+
+ rspamd_milter_plan_io(session, priv, EV_WRITE);
+}
+
+static void
+rspamd_milter_on_protocol_ping(struct rspamd_milter_session *session,
+ struct rspamd_milter_private *priv)
+{
+ GError *err = NULL;
+ static const gchar reply[] = "HTTP/1.1 200 OK\r\n"
+ "Connection: close\r\n"
+ "Server: rspamd/2.7 (milter mode)\r\n"
+ "Content-Length: 6\r\n"
+ "Content-Type: text/plain\r\n"
+ "\r\n"
+ "pong\r\n";
+
+ if (write(priv->fd, reply, sizeof(reply)) == -1) {
+ gint serrno = errno;
+ msg_err_milter("cannot write pong reply: %s", strerror(serrno));
+ g_set_error(&err, rspamd_milter_quark(), serrno, "ping command IO error: %s",
+ strerror(serrno));
+ priv->state = RSPAMD_MILTER_WANNA_DIE;
+ REF_RETAIN(session);
+ priv->err_cb(priv->fd, session, priv->ud, err);
+ REF_RELEASE(session);
+ g_error_free(err);
+ }
+ else {
+ priv->state = RSPAMD_MILTER_PONG_AND_DIE;
+ rspamd_milter_plan_io(session, priv, EV_WRITE);
+ }
+}
+
+static gint
+rspamd_milter_http_on_url(http_parser *parser, const gchar *at, size_t length)
+{
+ GString *url = (GString *) parser->data;
+
+ g_string_append_len(url, at, length);
+
+ return 0;
+}
+
+static void
+rspamd_milter_io_handler(gint fd, gshort what, void *ud)
+{
+ struct rspamd_milter_session *session = ud;
+ struct rspamd_milter_private *priv;
+ GError *err;
+
+ priv = session->priv;
+
+ if (what == EV_TIMEOUT) {
+ msg_debug_milter("connection timed out");
+ err = g_error_new(rspamd_milter_quark(), ETIMEDOUT, "connection "
+ "timed out");
+ rspamd_milter_on_protocol_error(session, priv, err);
+ }
+ else {
+ rspamd_milter_handle_session(session, priv);
+ }
+}
+
+static inline void
+rspamd_milter_plan_io(struct rspamd_milter_session *session,
+ struct rspamd_milter_private *priv, gshort what)
+{
+ rspamd_ev_watcher_reschedule(priv->event_loop, &priv->ev, what);
+}
+
+
+#define READ_INT_32(pos, var) \
+ do { \
+ memcpy(&(var), (pos), sizeof(var)); \
+ (pos) += sizeof(var); \
+ (var) = ntohl(var); \
+ } while (0)
+#define READ_INT_16(pos, var) \
+ do { \
+ memcpy(&(var), (pos), sizeof(var)); \
+ (pos) += sizeof(var); \
+ (var) = ntohs(var); \
+ } while (0)
+
+static gboolean
+rspamd_milter_process_command(struct rspamd_milter_session *session,
+ struct rspamd_milter_private *priv)
+{
+ GError *err;
+ rspamd_fstring_t *buf;
+ const guchar *pos, *end, *zero;
+ guint cmdlen;
+ guint32 version, actions, protocol;
+
+ buf = priv->parser.buf;
+ pos = buf->str + priv->parser.cmd_start;
+ cmdlen = priv->parser.datalen;
+ end = pos + cmdlen;
+
+ switch (priv->parser.cur_cmd) {
+ case RSPAMD_MILTER_CMD_ABORT:
+ msg_debug_milter("got abort command");
+ rspamd_milter_session_reset(session, RSPAMD_MILTER_RESET_ABORT);
+ break;
+ case RSPAMD_MILTER_CMD_BODY:
+ if (!session->message) {
+ session->message = rspamd_fstring_sized_new(
+ RSPAMD_MILTER_MESSAGE_CHUNK);
+ }
+
+ msg_debug_milter("got body chunk: %d bytes", (int) cmdlen);
+ session->message = rspamd_fstring_append(session->message,
+ pos, cmdlen);
+ break;
+ case RSPAMD_MILTER_CMD_CONNECT:
+ msg_debug_milter("got connect command");
+
+ /*
+ * char hostname[]: Hostname, NUL terminated
+ * char family: Protocol family
+ * uint16 port: Port number (SMFIA_INET or SMFIA_INET6 only)
+ * char address[]: IP address (ASCII) or unix socket path, NUL terminated
+ */
+ zero = memchr(pos, '\0', cmdlen);
+
+ if (zero == NULL || zero > (end - sizeof(guint16) + 1)) {
+ err = g_error_new(rspamd_milter_quark(), EINVAL, "invalid "
+ "connect command (no name)");
+ rspamd_milter_on_protocol_error(session, priv, err);
+
+ return FALSE;
+ }
+ else {
+ guchar proto;
+ guint16 port;
+ gchar ip6_str[INET6_ADDRSTRLEN + 3];
+ gsize r;
+
+ /*
+ * Important notice: Postfix do NOT use this command to pass
+ * client's info (e.g. hostname is not really here)
+ * Sendmail will pass it here
+ */
+ if (session->hostname == NULL) {
+ session->hostname = rspamd_fstring_new_init(pos, zero - pos);
+ msg_debug_milter("got hostname on connect phase: %V",
+ session->hostname);
+ }
+ else {
+ session->hostname = rspamd_fstring_assign(session->hostname,
+ pos, zero - pos);
+ msg_debug_milter("rewrote hostname on connect phase: %V",
+ session->hostname);
+ }
+
+ pos = zero + 1;
+ proto = *pos++;
+
+ if (proto == RSPAMD_MILTER_CONN_UNKNOWN) {
+ /* We have no information about host */
+ msg_debug_milter("unknown connect address");
+ }
+ else {
+ READ_INT_16(pos, port);
+
+ if (pos >= end) {
+ /* No IP somehow */
+ msg_debug_milter("unknown connect IP/socket");
+ }
+ else {
+ zero = memchr(pos, '\0', end - pos);
+
+ if (zero == NULL) {
+ err = g_error_new(rspamd_milter_quark(), EINVAL, "invalid "
+ "connect command (no zero terminated IP)");
+ rspamd_milter_on_protocol_error(session, priv, err);
+
+ return FALSE;
+ }
+
+ switch (proto) {
+ case RSPAMD_MILTER_CONN_UNIX:
+ session->addr = rspamd_inet_address_new(AF_UNIX,
+ pos);
+ break;
+
+ case RSPAMD_MILTER_CONN_INET:
+ session->addr = rspamd_inet_address_new(AF_INET, NULL);
+
+ if (!rspamd_parse_inet_address_ip(pos, zero - pos,
+ session->addr)) {
+ err = g_error_new(rspamd_milter_quark(), EINVAL,
+ "invalid connect command (bad IPv4)");
+ rspamd_milter_on_protocol_error(session, priv,
+ err);
+
+ return FALSE;
+ }
+
+ rspamd_inet_address_set_port(session->addr, port);
+ break;
+
+ case RSPAMD_MILTER_CONN_INET6:
+ session->addr = rspamd_inet_address_new(AF_INET6, NULL);
+
+ if (zero - pos > sizeof("IPv6:") &&
+ rspamd_lc_cmp(pos, "IPv6:",
+ sizeof("IPv6:") - 1) == 0) {
+ /* Kill sendmail please */
+ pos += sizeof("IPv6:") - 1;
+
+ if (*pos != '[') {
+ /* Add explicit braces */
+ r = rspamd_snprintf(ip6_str, sizeof(ip6_str),
+ "[%*s]", (int) (zero - pos), pos);
+ }
+ else {
+ r = rspamd_strlcpy(ip6_str, pos, sizeof(ip6_str));
+ }
+ }
+ else {
+ r = rspamd_strlcpy(ip6_str, pos, sizeof(ip6_str));
+ }
+
+ if (!rspamd_parse_inet_address_ip(ip6_str, r,
+ session->addr)) {
+ err = g_error_new(rspamd_milter_quark(), EINVAL,
+ "invalid connect command (bad IPv6)");
+ rspamd_milter_on_protocol_error(session, priv,
+ err);
+
+ return FALSE;
+ }
+
+ rspamd_inet_address_set_port(session->addr, port);
+ break;
+
+ default:
+ err = g_error_new(rspamd_milter_quark(), EINVAL,
+ "invalid connect command (bad protocol: %c)",
+ proto);
+ rspamd_milter_on_protocol_error(session, priv,
+ err);
+
+ return FALSE;
+ }
+ }
+ }
+
+ msg_info_milter("got connection from %s",
+ rspamd_inet_address_to_string_pretty(session->addr));
+ }
+ break;
+ case RSPAMD_MILTER_CMD_MACRO:
+ msg_debug_milter("got macro command");
+ /*
+ * Format is
+ * 1 byte - command associated (we don't care about it)
+ * 0-terminated name
+ * 0-terminated value
+ * ...
+ */
+ if (session->macros == NULL) {
+ session->macros = g_hash_table_new_full(rspamd_ftok_icase_hash,
+ rspamd_ftok_icase_equal,
+ rspamd_fstring_mapped_ftok_free,
+ rspamd_fstring_mapped_ftok_free);
+ }
+
+ /* Ignore one byte */
+ pos++;
+
+ while (pos < end) {
+ zero = memchr(pos, '\0', cmdlen);
+
+ if (zero == NULL || zero >= end) {
+ err = g_error_new(rspamd_milter_quark(), EINVAL, "invalid "
+ "macro command (no name)");
+ rspamd_milter_on_protocol_error(session, priv, err);
+
+ return FALSE;
+ }
+ else {
+ rspamd_fstring_t *name, *value;
+ rspamd_ftok_t *name_tok, *value_tok;
+ const guchar *zero_val;
+
+ zero_val = memchr(zero + 1, '\0', end - zero - 1);
+
+ if (zero_val != NULL && end > zero_val) {
+ name = rspamd_fstring_new_init(pos, zero - pos);
+ value = rspamd_fstring_new_init(zero + 1,
+ zero_val - zero - 1);
+ name_tok = rspamd_ftok_map(name);
+ value_tok = rspamd_ftok_map(value);
+
+ g_hash_table_replace(session->macros, name_tok, value_tok);
+ msg_debug_milter("got macro: %T -> %T",
+ name_tok, value_tok);
+
+ cmdlen -= zero_val - pos;
+ pos = zero_val + 1;
+ }
+ else {
+ err = g_error_new(rspamd_milter_quark(), EINVAL,
+ "invalid macro command (bad value)");
+ rspamd_milter_on_protocol_error(session, priv, err);
+
+ return FALSE;
+ }
+ }
+ }
+ break;
+ case RSPAMD_MILTER_CMD_BODYEOB:
+ msg_debug_milter("got eob command");
+ REF_RETAIN(session);
+ priv->fin_cb(priv->fd, session, priv->ud);
+ REF_RELEASE(session);
+ break;
+ case RSPAMD_MILTER_CMD_HELO:
+ msg_debug_milter("got helo command");
+
+ if (end > pos && *(end - 1) == '\0') {
+ if (session->helo == NULL) {
+ session->helo = rspamd_fstring_new_init(pos, cmdlen - 1);
+ }
+ else {
+ session->helo = rspamd_fstring_assign(session->helo,
+ pos, cmdlen - 1);
+ }
+ }
+ else if (end > pos) {
+ /* Should not happen */
+ if (session->helo == NULL) {
+ session->helo = rspamd_fstring_new_init(pos, cmdlen);
+ }
+ else {
+ session->helo = rspamd_fstring_assign(session->helo,
+ pos, cmdlen);
+ }
+ }
+
+ msg_debug_milter("got helo value: %V", session->helo);
+
+ break;
+ case RSPAMD_MILTER_CMD_QUIT_NC:
+ /* We need to reset session and start over */
+ msg_debug_milter("got quit_nc command");
+ rspamd_milter_session_reset(session, RSPAMD_MILTER_RESET_QUIT_NC);
+ break;
+ case RSPAMD_MILTER_CMD_HEADER:
+ msg_debug_milter("got header command");
+ if (!session->message) {
+ session->message = rspamd_fstring_sized_new(
+ RSPAMD_MILTER_MESSAGE_CHUNK);
+ }
+ zero = memchr(pos, '\0', cmdlen);
+
+ if (zero == NULL) {
+ err = g_error_new(rspamd_milter_quark(), EINVAL, "invalid "
+ "header command (no name)");
+ rspamd_milter_on_protocol_error(session, priv, err);
+
+ return FALSE;
+ }
+ else {
+ if (end > zero && *(end - 1) == '\0') {
+ khiter_t k;
+ gint res;
+
+ k = kh_get(milter_headers_hash_t, priv->headers, (gchar *) pos);
+
+ if (k == kh_end(priv->headers)) {
+ GArray *ar;
+
+ k = kh_put(milter_headers_hash_t, priv->headers,
+ g_strdup(pos), &res);
+ ar = g_array_new(FALSE, FALSE, sizeof(gint));
+ g_array_append_val(ar, priv->cur_hdr);
+ kh_value(priv->headers, k) = ar;
+ }
+ else {
+ g_array_append_val(kh_value(priv->headers, k),
+ priv->cur_hdr);
+ }
+
+ rspamd_printf_fstring(&session->message, "%*s: %*s\r\n",
+ (int) (zero - pos), pos,
+ (int) (end - zero - 2), zero + 1);
+ priv->cur_hdr++;
+ }
+ else {
+ err = g_error_new(rspamd_milter_quark(), EINVAL, "invalid "
+ "header command (bad value)");
+ rspamd_milter_on_protocol_error(session, priv, err);
+
+ return FALSE;
+ }
+ }
+ break;
+ case RSPAMD_MILTER_CMD_MAIL:
+ msg_debug_milter("mail command");
+
+ while (pos < end) {
+ struct rspamd_email_address *addr;
+ gchar *cpy;
+
+ zero = memchr(pos, '\0', end - pos);
+
+ if (zero && zero > pos) {
+ cpy = rspamd_mempool_alloc(priv->pool, zero - pos);
+ memcpy(cpy, pos, zero - pos);
+ msg_debug_milter("got mail: %*s", (int) (zero - pos), cpy);
+ addr = rspamd_email_address_from_smtp(cpy, zero - pos);
+
+ if (addr) {
+ session->from = addr;
+ }
+
+ /* TODO: parse esmtp arguments */
+ break;
+ }
+ else {
+ msg_debug_milter("got weird from: %*s", (int) (end - pos),
+ pos);
+ /* That actually should not happen */
+ cpy = rspamd_mempool_alloc(priv->pool, end - pos);
+ memcpy(cpy, pos, end - pos);
+ addr = rspamd_email_address_from_smtp(cpy, end - pos);
+
+ if (addr) {
+ session->from = addr;
+ }
+
+ break;
+ }
+ }
+ break;
+ case RSPAMD_MILTER_CMD_EOH:
+ msg_debug_milter("got eoh command");
+
+ if (!session->message) {
+ session->message = rspamd_fstring_sized_new(
+ RSPAMD_MILTER_MESSAGE_CHUNK);
+ }
+
+ session->message = rspamd_fstring_append(session->message,
+ "\r\n", 2);
+ break;
+ case RSPAMD_MILTER_CMD_OPTNEG:
+ if (cmdlen != sizeof(guint32) * 3) {
+ err = g_error_new(rspamd_milter_quark(), EINVAL, "invalid "
+ "optneg command");
+ rspamd_milter_on_protocol_error(session, priv, err);
+
+ return FALSE;
+ }
+
+ READ_INT_32(pos, version);
+ READ_INT_32(pos, actions);
+ READ_INT_32(pos, protocol);
+
+ msg_debug_milter("optneg: version: %d, actions: %d, protocol: %d",
+ version, actions, protocol);
+
+ if (version < RSPAMD_MILTER_PROTO_VER) {
+ msg_warn_milter("MTA specifies too old protocol: %d, "
+ "aborting connection",
+ version);
+
+ err = g_error_new(rspamd_milter_quark(), EINVAL, "invalid "
+ "protocol version: %d",
+ version);
+ rspamd_milter_on_protocol_error(session, priv, err);
+
+ return FALSE;
+ }
+
+ version = RSPAMD_MILTER_PROTO_VER;
+ actions |= RSPAMD_MILTER_ACTIONS_MASK;
+ protocol = RSPAMD_MILTER_FLAG_NOREPLY_MASK;
+
+ return rspamd_milter_send_action(session, RSPAMD_MILTER_OPTNEG,
+ version, actions, protocol);
+ break;
+ case RSPAMD_MILTER_CMD_QUIT:
+ if (priv->out_chain) {
+ msg_debug_milter("quit command, refcount: %d, "
+ "some output buffers left - draining",
+ session->ref.refcount);
+
+ priv->state = RSPAMD_MILTER_WRITE_AND_DIE;
+ }
+ else {
+ msg_debug_milter("quit command, refcount: %d",
+ session->ref.refcount);
+
+ priv->state = RSPAMD_MILTER_WANNA_DIE;
+ REF_RETAIN(session);
+ priv->fin_cb(priv->fd, session, priv->ud);
+ REF_RELEASE(session);
+ return FALSE;
+ }
+ break;
+ case RSPAMD_MILTER_CMD_RCPT:
+ msg_debug_milter("rcpt command");
+
+ while (pos < end) {
+ struct rspamd_email_address *addr;
+ gchar *cpy;
+
+ zero = memchr(pos, '\0', end - pos);
+
+ if (zero && zero > pos) {
+ cpy = rspamd_mempool_alloc(priv->pool, end - pos);
+ memcpy(cpy, pos, end - pos);
+
+ msg_debug_milter("got rcpt: %*s", (int) (zero - pos), cpy);
+ addr = rspamd_email_address_from_smtp(cpy, zero - pos);
+
+ if (addr) {
+ if (!session->rcpts) {
+ session->rcpts = g_ptr_array_sized_new(1);
+ }
+
+ g_ptr_array_add(session->rcpts, addr);
+ }
+
+ pos = zero + 1;
+ }
+ else {
+ cpy = rspamd_mempool_alloc(priv->pool, end - pos);
+ memcpy(cpy, pos, end - pos);
+
+ msg_debug_milter("got weird rcpt: %*s", (int) (end - pos),
+ pos);
+ /* That actually should not happen */
+ addr = rspamd_email_address_from_smtp(cpy, end - pos);
+
+ if (addr) {
+ if (!session->rcpts) {
+ session->rcpts = g_ptr_array_sized_new(1);
+ }
+
+ g_ptr_array_add(session->rcpts, addr);
+ }
+
+ break;
+ }
+ }
+ break;
+ case RSPAMD_MILTER_CMD_DATA:
+ if (!session->message) {
+ session->message = rspamd_fstring_sized_new(
+ RSPAMD_MILTER_MESSAGE_CHUNK);
+ }
+ msg_debug_milter("got data command");
+ /* We do not need reply as specified */
+ break;
+ default:
+ msg_debug_milter("got bad command: %c", priv->parser.cur_cmd);
+ break;
+ }
+
+ return TRUE;
+}
+
+static gboolean
+rspamd_milter_is_valid_cmd(guchar c)
+{
+ switch (c) {
+ case RSPAMD_MILTER_CMD_ABORT:
+ case RSPAMD_MILTER_CMD_BODY:
+ case RSPAMD_MILTER_CMD_CONNECT:
+ case RSPAMD_MILTER_CMD_MACRO:
+ case RSPAMD_MILTER_CMD_BODYEOB:
+ case RSPAMD_MILTER_CMD_HELO:
+ case RSPAMD_MILTER_CMD_QUIT_NC:
+ case RSPAMD_MILTER_CMD_HEADER:
+ case RSPAMD_MILTER_CMD_MAIL:
+ case RSPAMD_MILTER_CMD_EOH:
+ case RSPAMD_MILTER_CMD_OPTNEG:
+ case RSPAMD_MILTER_CMD_QUIT:
+ case RSPAMD_MILTER_CMD_RCPT:
+ case RSPAMD_MILTER_CMD_DATA:
+ case RSPAMD_MILTER_CMD_UNKNOWN:
+ return TRUE;
+ default:
+ break;
+ }
+
+ return FALSE;
+}
+
+static gboolean
+rspamd_milter_consume_input(struct rspamd_milter_session *session,
+ struct rspamd_milter_private *priv)
+{
+ const guchar *p, *end;
+ GError *err;
+
+ p = priv->parser.buf->str + priv->parser.pos;
+ end = priv->parser.buf->str + priv->parser.buf->len;
+
+ while (p < end) {
+ msg_debug_milter("offset: %d, state: %d",
+ (gint) (p - (const guchar *) priv->parser.buf->str),
+ priv->parser.state);
+
+ switch (priv->parser.state) {
+ case st_len_1:
+ /* The first length byte in big endian order */
+ priv->parser.datalen = 0;
+ priv->parser.datalen |= ((gsize) *p) << 24;
+ priv->parser.state = st_len_2;
+ p++;
+ break;
+ case st_len_2:
+ /* The second length byte in big endian order */
+ priv->parser.datalen |= ((gsize) *p) << 16;
+ priv->parser.state = st_len_3;
+ p++;
+ break;
+ case st_len_3:
+ /* The third length byte in big endian order */
+ priv->parser.datalen |= ((gsize) *p) << 8;
+ priv->parser.state = st_len_4;
+ p++;
+ break;
+ case st_len_4:
+ /* The fourth length byte in big endian order */
+ priv->parser.datalen |= ((gsize) *p);
+ priv->parser.state = st_read_cmd;
+ p++;
+ break;
+ case st_read_cmd:
+ priv->parser.cur_cmd = *p;
+ priv->parser.state = st_read_data;
+
+ if (priv->parser.datalen < 1) {
+ err = g_error_new(rspamd_milter_quark(), EINVAL,
+ "Command length is too short");
+ rspamd_milter_on_protocol_error(session, priv, err);
+
+ return FALSE;
+ }
+ else {
+ /* Eat command itself */
+ priv->parser.datalen--;
+ }
+
+ p++;
+ priv->parser.cmd_start = p - (const guchar *) priv->parser.buf->str;
+ break;
+ case st_read_data:
+ /* We might need some more data in buffer for further steps */
+ if (priv->parser.datalen >
+ RSPAMD_MILTER_MESSAGE_CHUNK * 2) {
+ /* Check if we have HTTP input instead of milter */
+ if (priv->parser.buf->len > sizeof("GET") &&
+ memcmp(priv->parser.buf->str, "GET", 3) == 0) {
+ struct http_parser http_parser;
+ struct http_parser_settings http_callbacks;
+ GString *url = g_string_new(NULL);
+
+ /* Hack, hack, hack */
+ /*
+ * This code is assumed to read `/ping` command and
+ * handle it to monitor port's availability since
+ * milter protocol is stupid and does not allow to do that
+ * This code also assumes that HTTP request can be read
+ * as as single data chunk which is not true in some cases
+ * In general, don't use it for anything but ping checks
+ */
+ memset(&http_callbacks, 0, sizeof(http_callbacks));
+ http_parser.data = url;
+ http_parser_init(&http_parser, HTTP_REQUEST);
+ http_callbacks.on_url = rspamd_milter_http_on_url;
+ http_parser_execute(&http_parser, &http_callbacks,
+ priv->parser.buf->str, priv->parser.buf->len);
+
+ if (url->len == sizeof("/ping") - 1 &&
+ rspamd_lc_cmp(url->str, "/ping", url->len) == 0) {
+ rspamd_milter_on_protocol_ping(session, priv);
+ g_string_free(url, TRUE);
+
+ return TRUE;
+ }
+ else {
+ err = g_error_new(rspamd_milter_quark(), EINVAL,
+ "HTTP GET request is not supported in milter mode, url: %s",
+ url->str);
+ }
+
+ g_string_free(url, TRUE);
+ }
+ else if (priv->parser.buf->len > sizeof("POST") &&
+ memcmp(priv->parser.buf->str, "POST", 4) == 0) {
+ err = g_error_new(rspamd_milter_quark(), EINVAL,
+ "HTTP POST request is not supported in milter mode");
+ }
+ else {
+ err = g_error_new(rspamd_milter_quark(), E2BIG,
+ "Command length is too big: %zd",
+ priv->parser.datalen);
+ }
+
+ rspamd_milter_on_protocol_error(session, priv, err);
+
+ return FALSE;
+ }
+ if (!rspamd_milter_is_valid_cmd(priv->parser.cur_cmd)) {
+ err = g_error_new(rspamd_milter_quark(), E2BIG,
+ "Unvalid command: %c",
+ priv->parser.cur_cmd);
+ rspamd_milter_on_protocol_error(session, priv, err);
+
+ return FALSE;
+ }
+ if (priv->parser.buf->allocated < priv->parser.datalen) {
+ priv->parser.pos = p - (const guchar *) priv->parser.buf->str;
+ priv->parser.buf = rspamd_fstring_grow(priv->parser.buf,
+ priv->parser.buf->len + priv->parser.datalen);
+ /* This can realloc buffer */
+ rspamd_milter_plan_io(session, priv, EV_READ);
+ goto end;
+ }
+ else {
+ /* We may have the full command available */
+ if (p + priv->parser.datalen <= end) {
+ /* We can process command */
+ if (!rspamd_milter_process_command(session, priv)) {
+ return FALSE;
+ }
+
+ p += priv->parser.datalen;
+ priv->parser.state = st_len_1;
+ priv->parser.cur_cmd = '\0';
+ priv->parser.cmd_start = 0;
+ }
+ else {
+ /* Need to read more */
+ priv->parser.pos = p - (const guchar *) priv->parser.buf->str;
+ rspamd_milter_plan_io(session, priv, EV_READ);
+ goto end;
+ }
+ }
+ break;
+ }
+ }
+
+ /* Leftover */
+ switch (priv->parser.state) {
+ case st_read_data:
+ if (p + priv->parser.datalen <= end) {
+ if (!rspamd_milter_process_command(session, priv)) {
+ return FALSE;
+ }
+
+ priv->parser.state = st_len_1;
+ priv->parser.cur_cmd = '\0';
+ priv->parser.cmd_start = 0;
+ }
+ break;
+ default:
+ /* No need to do anything */
+ break;
+ }
+
+ if (p == end) {
+ priv->parser.buf->len = 0;
+ priv->parser.pos = 0;
+ priv->parser.cmd_start = 0;
+ }
+
+ if (priv->out_chain) {
+ rspamd_milter_plan_io(session, priv, EV_READ | EV_WRITE);
+ }
+ else {
+ rspamd_milter_plan_io(session, priv, EV_READ);
+ }
+end:
+
+ return TRUE;
+}
+
+static gboolean
+rspamd_milter_handle_session(struct rspamd_milter_session *session,
+ struct rspamd_milter_private *priv)
+{
+ struct rspamd_milter_outbuf *obuf, *obuf_tmp;
+ gssize r, to_write;
+ GError *err;
+
+ g_assert(session != NULL);
+
+ switch (priv->state) {
+ case RSPAMD_MILTER_READ_MORE:
+ if (priv->parser.buf->len >= priv->parser.buf->allocated) {
+ priv->parser.buf = rspamd_fstring_grow(priv->parser.buf,
+ priv->parser.buf->len * 2);
+ }
+
+ r = read(priv->fd, priv->parser.buf->str + priv->parser.buf->len,
+ priv->parser.buf->allocated - priv->parser.buf->len);
+
+ msg_debug_milter("read %z bytes, %z remain, %z allocated",
+ r, priv->parser.buf->len, priv->parser.buf->allocated);
+
+ if (r == -1) {
+ if (errno == EAGAIN || errno == EINTR) {
+ rspamd_milter_plan_io(session, priv, EV_READ);
+
+ return TRUE;
+ }
+ else {
+ /* Fatal IO error */
+ err = g_error_new(rspamd_milter_quark(), errno,
+ "IO read error: %s", strerror(errno));
+ REF_RETAIN(session);
+ priv->err_cb(priv->fd, session, priv->ud, err);
+ REF_RELEASE(session);
+ g_error_free(err);
+
+ REF_RELEASE(session);
+
+ return FALSE;
+ }
+ }
+ else if (r == 0) {
+ err = g_error_new(rspamd_milter_quark(), ECONNRESET,
+ "Unexpected EOF");
+ REF_RETAIN(session);
+ priv->err_cb(priv->fd, session, priv->ud, err);
+ REF_RELEASE(session);
+ g_error_free(err);
+
+ REF_RELEASE(session);
+
+ return FALSE;
+ }
+ else {
+ priv->parser.buf->len += r;
+
+ return rspamd_milter_consume_input(session, priv);
+ }
+
+ break;
+ case RSPAMD_MILTER_WRITE_REPLY:
+ case RSPAMD_MILTER_WRITE_AND_DIE:
+ if (priv->out_chain == NULL) {
+ if (priv->state == RSPAMD_MILTER_WRITE_AND_DIE) {
+ /* Finished writing, let's die finally */
+ msg_debug_milter("output drained, terminating, refcount: %d",
+ session->ref.refcount);
+
+ /* Session should be destroyed by fin_cb... */
+ REF_RETAIN(session);
+ priv->fin_cb(priv->fd, session, priv->ud);
+ REF_RELEASE(session);
+
+ return FALSE;
+ }
+ else {
+ /* We have written everything, so we can read something */
+ priv->state = RSPAMD_MILTER_READ_MORE;
+ rspamd_milter_plan_io(session, priv, EV_READ);
+ }
+ }
+ else {
+ DL_FOREACH_SAFE(priv->out_chain, obuf, obuf_tmp)
+ {
+ to_write = obuf->buf->len - obuf->pos;
+
+ g_assert(to_write > 0);
+
+ r = write(priv->fd, obuf->buf->str + obuf->pos, to_write);
+
+ if (r == -1) {
+ if (errno == EAGAIN || errno == EINTR) {
+ rspamd_milter_plan_io(session, priv, EV_WRITE);
+ }
+ else {
+ /* Fatal IO error */
+ err = g_error_new(rspamd_milter_quark(), errno,
+ "IO write error: %s", strerror(errno));
+ REF_RETAIN(session);
+ priv->err_cb(priv->fd, session, priv->ud, err);
+ REF_RELEASE(session);
+ g_error_free(err);
+
+ REF_RELEASE(session);
+
+ return FALSE;
+ }
+ }
+ else if (r == 0) {
+ err = g_error_new(rspamd_milter_quark(), ECONNRESET,
+ "Unexpected EOF");
+ REF_RETAIN(session);
+ priv->err_cb(priv->fd, session, priv->ud, err);
+ REF_RELEASE(session);
+ g_error_free(err);
+
+ REF_RELEASE(session);
+
+ return FALSE;
+ }
+ else {
+ if (r == to_write) {
+ /* We have done with this buf */
+ DL_DELETE(priv->out_chain, obuf);
+ rspamd_milter_obuf_free(obuf);
+ }
+ else {
+ /* We need to plan another write */
+ obuf->pos += r;
+ rspamd_milter_plan_io(session, priv, EV_WRITE);
+
+ return TRUE;
+ }
+ }
+ }
+
+ /* Here we have written everything, so we can plan reading */
+ priv->state = RSPAMD_MILTER_READ_MORE;
+ rspamd_milter_plan_io(session, priv, EV_READ);
+ }
+ break;
+ case RSPAMD_MILTER_WANNA_DIE:
+ /* We are here after processing everything, so release session */
+ REF_RELEASE(session);
+ return FALSE;
+ break;
+ case RSPAMD_MILTER_PONG_AND_DIE:
+ err = g_error_new(rspamd_milter_quark(), 0,
+ "ping command");
+ REF_RETAIN(session);
+ priv->err_cb(priv->fd, session, priv->ud, err);
+ REF_RELEASE(session);
+ g_error_free(err);
+ REF_RELEASE(session);
+ return FALSE;
+ break;
+ }
+
+ return TRUE;
+}
+
+
+gboolean
+rspamd_milter_handle_socket(gint fd, ev_tstamp timeout,
+ rspamd_mempool_t *pool,
+ struct ev_loop *ev_base, rspamd_milter_finish finish_cb,
+ rspamd_milter_error error_cb, void *ud)
+{
+ struct rspamd_milter_session *session;
+ struct rspamd_milter_private *priv;
+ gint nfd = dup(fd);
+
+ if (nfd == -1) {
+ GError *err = g_error_new(rspamd_milter_quark(), errno,
+ "dup failed: %s", strerror(errno));
+ error_cb(fd, NULL, ud, err);
+
+ return FALSE;
+ }
+
+ g_assert(finish_cb != NULL);
+ g_assert(error_cb != NULL);
+ g_assert(milter_ctx != NULL);
+
+ session = g_malloc0(sizeof(*session));
+ priv = g_malloc0(sizeof(*priv));
+ priv->fd = nfd;
+ priv->ud = ud;
+ priv->fin_cb = finish_cb;
+ priv->err_cb = error_cb;
+ priv->parser.state = st_len_1;
+ priv->parser.buf = rspamd_fstring_sized_new(RSPAMD_MILTER_MESSAGE_CHUNK + 5);
+ priv->event_loop = ev_base;
+ priv->state = RSPAMD_MILTER_READ_MORE;
+ priv->pool = rspamd_mempool_new(rspamd_mempool_suggest_size(), "milter", 0);
+ priv->discard_on_reject = milter_ctx->discard_on_reject;
+ priv->quarantine_on_reject = milter_ctx->quarantine_on_reject;
+ priv->ev.timeout = timeout;
+
+ rspamd_ev_watcher_init(&priv->ev, priv->fd, EV_READ | EV_WRITE,
+ rspamd_milter_io_handler, session);
+
+ if (pool) {
+ /* Copy tag */
+ memcpy(priv->pool->tag.uid, pool->tag.uid, sizeof(pool->tag.uid));
+ }
+
+ priv->headers = kh_init(milter_headers_hash_t);
+ kh_resize(milter_headers_hash_t, priv->headers, 32);
+
+ session->priv = priv;
+ REF_INIT_RETAIN(session, rspamd_milter_session_dtor);
+
+ if (milter_ctx->sessions_cache) {
+ rspamd_worker_session_cache_add(milter_ctx->sessions_cache,
+ priv->pool->tag.uid, &session->ref.refcount, session);
+ }
+
+ return rspamd_milter_handle_session(session, priv);
+}
+
+gboolean
+rspamd_milter_set_reply(struct rspamd_milter_session *session,
+ rspamd_fstring_t *rcode,
+ rspamd_fstring_t *xcode,
+ rspamd_fstring_t *reply)
+{
+ GString *buf;
+ gboolean ret;
+
+ buf = g_string_sized_new(xcode->len + rcode->len + reply->len + 2);
+ rspamd_printf_gstring(buf, "%V %V %V", rcode, xcode, reply);
+ ret = rspamd_milter_send_action(session, RSPAMD_MILTER_REPLYCODE,
+ buf);
+ g_string_free(buf, TRUE);
+
+ return ret;
+}
+
+#define SET_COMMAND(cmd, sz, reply, pos) \
+ do { \
+ guint32 _len; \
+ _len = (sz) + 1; \
+ (reply) = rspamd_fstring_sized_new(sizeof(_len) + _len); \
+ (reply)->len = sizeof(_len) + _len; \
+ _len = htonl(_len); \
+ memcpy((reply)->str, &_len, sizeof(_len)); \
+ (reply)->str[sizeof(_len)] = (cmd); \
+ (pos) = (guchar *) (reply)->str + sizeof(_len) + 1; \
+ } while (0)
+
+gboolean
+rspamd_milter_send_action(struct rspamd_milter_session *session,
+ enum rspamd_milter_reply act, ...)
+{
+ guint32 ver, actions, protocol, idx;
+ va_list ap;
+ guchar cmd, *pos;
+ rspamd_fstring_t *reply = NULL;
+ gsize len;
+ GString *name, *value;
+ const char *reason, *body_str;
+ struct rspamd_milter_outbuf *obuf;
+ struct rspamd_milter_private *priv = session->priv;
+
+ va_start(ap, act);
+ cmd = act;
+
+ switch (act) {
+ case RSPAMD_MILTER_ACCEPT:
+ case RSPAMD_MILTER_CONTINUE:
+ case RSPAMD_MILTER_DISCARD:
+ case RSPAMD_MILTER_PROGRESS:
+ case RSPAMD_MILTER_REJECT:
+ case RSPAMD_MILTER_TEMPFAIL:
+ /* No additional arguments */
+ msg_debug_milter("send %c command", cmd);
+ SET_COMMAND(cmd, 0, reply, pos);
+ break;
+ case RSPAMD_MILTER_QUARANTINE:
+ reason = va_arg(ap, const char *);
+
+ if (reason == NULL) {
+ reason = "";
+ }
+
+ len = strlen(reason);
+ msg_debug_milter("send quarantine action %s", reason);
+ SET_COMMAND(cmd, len + 1, reply, pos);
+ memcpy(pos, reason, len + 1);
+ break;
+ case RSPAMD_MILTER_ADDHEADER:
+ name = va_arg(ap, GString *);
+ value = va_arg(ap, GString *);
+
+ /* Name and value must be zero terminated */
+ msg_debug_milter("add header command - \"%v\"=\"%v\"", name, value);
+ SET_COMMAND(cmd, name->len + value->len + 2, reply, pos);
+ memcpy(pos, name->str, name->len + 1);
+ pos += name->len + 1;
+ memcpy(pos, value->str, value->len + 1);
+ break;
+ case RSPAMD_MILTER_CHGHEADER:
+ case RSPAMD_MILTER_INSHEADER:
+ idx = va_arg(ap, guint32);
+ name = va_arg(ap, GString *);
+ value = va_arg(ap, GString *);
+
+ msg_debug_milter("change/insert header command pos = %d- \"%v\"=\"%v\"",
+ idx, name, value);
+ /* Name and value must be zero terminated */
+ SET_COMMAND(cmd, name->len + value->len + 2 + sizeof(guint32),
+ reply, pos);
+ idx = htonl(idx);
+ memcpy(pos, &idx, sizeof(idx));
+ pos += sizeof(idx);
+ memcpy(pos, name->str, name->len + 1);
+ pos += name->len + 1;
+ memcpy(pos, value->str, value->len + 1);
+ break;
+ case RSPAMD_MILTER_REPLBODY:
+ len = va_arg(ap, gsize);
+ body_str = va_arg(ap, const char *);
+ msg_debug_milter("want to change body; size = %uz",
+ len);
+ SET_COMMAND(cmd, len, reply, pos);
+ memcpy(pos, body_str, len);
+ break;
+ case RSPAMD_MILTER_REPLYCODE:
+ case RSPAMD_MILTER_ADDRCPT:
+ case RSPAMD_MILTER_DELRCPT:
+ case RSPAMD_MILTER_CHGFROM:
+ /* Single GString * argument */
+ value = va_arg(ap, GString *);
+ msg_debug_milter("command %c; value=%v", cmd, value);
+ SET_COMMAND(cmd, value->len + 1, reply, pos);
+ memcpy(pos, value->str, value->len + 1);
+ break;
+ case RSPAMD_MILTER_OPTNEG:
+ ver = va_arg(ap, guint32);
+ actions = va_arg(ap, guint32);
+ protocol = va_arg(ap, guint32);
+
+ msg_debug_milter("optneg reply: ver=%d, actions=%d, protocol=%d",
+ ver, actions, protocol);
+ ver = htonl(ver);
+ actions = htonl(actions);
+ protocol = htonl(protocol);
+ SET_COMMAND(cmd, sizeof(guint32) * 3, reply, pos);
+ memcpy(pos, &ver, sizeof(ver));
+ pos += sizeof(ver);
+ memcpy(pos, &actions, sizeof(actions));
+ pos += sizeof(actions);
+ memcpy(pos, &protocol, sizeof(protocol));
+ break;
+ default:
+ msg_err_milter("invalid command: %c", cmd);
+ break;
+ }
+
+ va_end(ap);
+
+ if (reply) {
+ obuf = g_malloc(sizeof(*obuf));
+ obuf->buf = reply;
+ obuf->pos = 0;
+ DL_APPEND(priv->out_chain, obuf);
+ priv->state = RSPAMD_MILTER_WRITE_REPLY;
+ rspamd_milter_plan_io(session, priv, EV_WRITE);
+
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+gboolean
+rspamd_milter_add_header(struct rspamd_milter_session *session,
+ GString *name, GString *value)
+{
+ return rspamd_milter_send_action(session, RSPAMD_MILTER_ADDHEADER,
+ name, value);
+}
+
+gboolean
+rspamd_milter_del_header(struct rspamd_milter_session *session,
+ GString *name)
+{
+ GString value;
+ guint32 idx = 1;
+
+ value.str = (gchar *) "";
+ value.len = 0;
+
+ return rspamd_milter_send_action(session, RSPAMD_MILTER_CHGHEADER,
+ idx, name, &value);
+}
+
+void rspamd_milter_session_unref(struct rspamd_milter_session *session)
+{
+ REF_RELEASE(session);
+}
+
+struct rspamd_milter_session *
+rspamd_milter_session_ref(struct rspamd_milter_session *session)
+{
+ REF_RETAIN(session);
+
+ return session;
+}
+
+#define IF_MACRO(lit) \
+ RSPAMD_FTOK_ASSIGN(&srch, (lit)); \
+ found = g_hash_table_lookup(session->macros, &srch); \
+ if (found)
+
+static void
+rspamd_milter_macro_http(struct rspamd_milter_session *session,
+ struct rspamd_http_message *msg)
+{
+ rspamd_ftok_t *found, srch;
+ struct rspamd_milter_private *priv = session->priv;
+
+ /*
+ * We assume postfix macros here, sendmail ones might be slightly
+ * different
+ */
+
+ if (!session->macros) {
+ return;
+ }
+
+ IF_MACRO("{i}")
+ {
+ rspamd_http_message_add_header_len(msg, QUEUE_ID_HEADER,
+ found->begin, found->len);
+ }
+ else
+ {
+ IF_MACRO("i")
+ {
+ rspamd_http_message_add_header_len(msg, QUEUE_ID_HEADER,
+ found->begin, found->len);
+ }
+ }
+
+ IF_MACRO("{v}")
+ {
+ rspamd_http_message_add_header_len(msg, USER_AGENT_HEADER,
+ found->begin, found->len);
+ }
+ else
+ {
+ IF_MACRO("v")
+ {
+ rspamd_http_message_add_header_len(msg, USER_AGENT_HEADER,
+ found->begin, found->len);
+ }
+ }
+
+ IF_MACRO("{cipher}")
+ {
+ rspamd_http_message_add_header_len(msg, TLS_CIPHER_HEADER,
+ found->begin, found->len);
+ }
+
+ IF_MACRO("{tls_version}")
+ {
+ rspamd_http_message_add_header_len(msg, TLS_VERSION_HEADER,
+ found->begin, found->len);
+ }
+
+ IF_MACRO("{auth_authen}")
+ {
+ rspamd_http_message_add_header_len(msg, USER_HEADER,
+ found->begin, found->len);
+ }
+
+ IF_MACRO("{rcpt_mailer}")
+ {
+ rspamd_http_message_add_header_len(msg, MAILER_HEADER,
+ found->begin, found->len);
+ }
+
+ if (milter_ctx->client_ca_name) {
+ IF_MACRO("{cert_issuer}")
+ {
+ rspamd_http_message_add_header_len(msg, CERT_ISSUER_HEADER,
+ found->begin, found->len);
+
+ if (found->len == strlen(milter_ctx->client_ca_name) &&
+ rspamd_cryptobox_memcmp(found->begin,
+ milter_ctx->client_ca_name, found->len) == 0) {
+ msg_debug_milter("process certificate issued by %T", found);
+ IF_MACRO("{cert_subject}")
+ {
+ rspamd_http_message_add_header_len(msg, USER_HEADER,
+ found->begin, found->len);
+ }
+ }
+ else {
+ msg_debug_milter("skip certificate issued by %T", found);
+ }
+ }
+ }
+ else {
+ IF_MACRO("{cert_issuer}")
+ {
+ rspamd_http_message_add_header_len(msg, CERT_ISSUER_HEADER,
+ found->begin, found->len);
+ }
+ }
+
+ if (!session->hostname || session->hostname->len == 0) {
+ IF_MACRO("{client_name}")
+ {
+ if (!(found->len == sizeof("unknown") - 1 &&
+ memcmp(found->begin, "unknown",
+ sizeof("unknown") - 1) == 0)) {
+ rspamd_http_message_add_header_len(msg, HOSTNAME_HEADER,
+ found->begin, found->len);
+ }
+ else {
+ msg_debug_milter("skip unknown hostname from being added");
+ }
+ }
+ }
+
+ IF_MACRO("{daemon_name}")
+ {
+ /* Postfix style */
+ rspamd_http_message_add_header_len(msg, MTA_NAME_HEADER,
+ found->begin, found->len);
+ }
+ else
+ {
+ /* Sendmail style */
+ IF_MACRO("{j}")
+ {
+ rspamd_http_message_add_header_len(msg, MTA_NAME_HEADER,
+ found->begin, found->len);
+ }
+ else
+ {
+ IF_MACRO("j")
+ {
+ rspamd_http_message_add_header_len(msg, MTA_NAME_HEADER,
+ found->begin, found->len);
+ }
+ }
+ }
+}
+
+struct rspamd_http_message *
+rspamd_milter_to_http(struct rspamd_milter_session *session)
+{
+ struct rspamd_http_message *msg;
+ guint i;
+ struct rspamd_email_address *rcpt;
+ struct rspamd_milter_private *priv = session->priv;
+
+ g_assert(session != NULL);
+
+ msg = rspamd_http_new_message(HTTP_REQUEST);
+
+ msg->url = rspamd_fstring_assign(msg->url, "/" MSG_CMD_CHECK_V2,
+ sizeof("/" MSG_CMD_CHECK_V2) - 1);
+
+ if (session->message) {
+ rspamd_http_message_set_body_from_fstring_steal(msg, session->message);
+ session->message = NULL;
+ }
+
+ if (session->hostname && RSPAMD_FSTRING_LEN(session->hostname) > 0) {
+ if (!(session->hostname->len == sizeof("unknown") - 1 &&
+ memcmp(RSPAMD_FSTRING_DATA(session->hostname), "unknown",
+ sizeof("unknown") - 1) == 0)) {
+ rspamd_http_message_add_header_fstr(msg, HOSTNAME_HEADER,
+ session->hostname);
+ }
+ else {
+ msg_debug_milter("skip unknown hostname from being added");
+ }
+ }
+
+ if (session->helo && session->helo->len > 0) {
+ rspamd_http_message_add_header_fstr(msg, HELO_HEADER,
+ session->helo);
+ }
+
+ if (session->from) {
+ rspamd_http_message_add_header_len(msg, FROM_HEADER,
+ session->from->raw, session->from->raw_len);
+ }
+
+ if (session->rcpts) {
+ PTR_ARRAY_FOREACH(session->rcpts, i, rcpt)
+ {
+ rspamd_http_message_add_header_len(msg, RCPT_HEADER,
+ rcpt->raw, rcpt->raw_len);
+ }
+ }
+
+ if (session->addr) {
+ if (rspamd_inet_address_get_af(session->addr) != AF_UNIX) {
+ rspamd_http_message_add_header(msg, IP_ADDR_HEADER,
+ rspamd_inet_address_to_string_pretty(session->addr));
+ }
+ else {
+ rspamd_http_message_add_header(msg, IP_ADDR_HEADER,
+ rspamd_inet_address_to_string(session->addr));
+ }
+ }
+
+ rspamd_milter_macro_http(session, msg);
+ rspamd_http_message_add_header(msg, FLAGS_HEADER, "milter,body_block");
+
+ return msg;
+}
+
+void *
+rspamd_milter_update_userdata(struct rspamd_milter_session *session,
+ void *ud)
+{
+ struct rspamd_milter_private *priv = session->priv;
+ void *prev_ud;
+
+ prev_ud = priv->ud;
+ priv->ud = ud;
+
+ return prev_ud;
+}
+
+static void
+rspamd_milter_remove_header_safe(struct rspamd_milter_session *session,
+ const gchar *key, gint nhdr)
+{
+ gint i;
+ GString *hname, *hvalue;
+ struct rspamd_milter_private *priv = session->priv;
+ khiter_t k;
+ GArray *ar;
+
+ k = kh_get(milter_headers_hash_t, priv->headers, (char *) key);
+
+ if (k != kh_end(priv->headers)) {
+ ar = kh_val(priv->headers, k);
+
+ hname = g_string_new(key);
+ hvalue = g_string_new("");
+
+ if (nhdr > 0) {
+ if (ar->len >= nhdr) {
+ rspamd_milter_send_action(session,
+ RSPAMD_MILTER_CHGHEADER,
+ nhdr, hname, hvalue);
+ priv->cur_hdr--;
+ }
+ }
+ else if (nhdr == 0) {
+ /* We need to clear all headers */
+ for (i = ar->len; i > 0; i--) {
+ rspamd_milter_send_action(session,
+ RSPAMD_MILTER_CHGHEADER,
+ i, hname, hvalue);
+ priv->cur_hdr--;
+ }
+ }
+ else {
+ /* Remove from the end */
+ if (nhdr >= -(ar->len)) {
+ rspamd_milter_send_action(session,
+ RSPAMD_MILTER_CHGHEADER,
+ ar->len + nhdr + 1, hname, hvalue);
+ priv->cur_hdr--;
+ }
+ }
+
+ g_string_free(hname, TRUE);
+ g_string_free(hvalue, TRUE);
+
+ if (priv->cur_hdr < 0) {
+ msg_err_milter("negative header count after removing %s", key);
+ priv->cur_hdr = 0;
+ }
+ }
+}
+
+static void
+rspamd_milter_extract_single_header(struct rspamd_milter_session *session,
+ const gchar *hdr, const ucl_object_t *obj)
+{
+ GString *hname, *hvalue;
+ struct rspamd_milter_private *priv = session->priv;
+ gint idx = -1;
+ const ucl_object_t *val;
+
+ val = ucl_object_lookup(obj, "value");
+
+ if (val && ucl_object_type(val) == UCL_STRING) {
+ const ucl_object_t *idx_obj;
+ gboolean has_idx = FALSE;
+
+ idx_obj = ucl_object_lookup_any(obj, "order",
+ "index", NULL);
+
+ if (idx_obj && (ucl_object_type(idx_obj) == UCL_INT || ucl_object_type(idx_obj) == UCL_FLOAT)) {
+ idx = ucl_object_toint(idx_obj);
+ has_idx = TRUE;
+ }
+
+ hname = g_string_new(hdr);
+ hvalue = g_string_new(ucl_object_tostring(val));
+
+ if (has_idx) {
+ if (idx >= 0) {
+ rspamd_milter_send_action(session,
+ RSPAMD_MILTER_INSHEADER,
+ idx,
+ hname, hvalue);
+ }
+ else {
+ /* Calculate negative offset */
+
+ if (idx == -1) {
+ rspamd_milter_send_action(session,
+ RSPAMD_MILTER_ADDHEADER,
+ hname, hvalue);
+ }
+ else if (-idx <= priv->cur_hdr) {
+ /*
+ * Note: We should account MTA's own "Received:" field
+ * which wasn't passed by Milter's header command.
+ */
+ rspamd_milter_send_action(session,
+ RSPAMD_MILTER_INSHEADER,
+ priv->cur_hdr + idx + 2,
+ hname, hvalue);
+ }
+ else {
+ rspamd_milter_send_action(session,
+ RSPAMD_MILTER_INSHEADER,
+ 0,
+ hname, hvalue);
+ }
+ }
+ }
+ else {
+ rspamd_milter_send_action(session,
+ RSPAMD_MILTER_ADDHEADER,
+ hname, hvalue);
+ }
+
+ priv->cur_hdr++;
+
+ g_string_free(hname, TRUE);
+ g_string_free(hvalue, TRUE);
+ }
+}
+
+/*
+ * Returns `TRUE` if action has been processed internally by this function
+ */
+static gboolean
+rspamd_milter_process_milter_block(struct rspamd_milter_session *session,
+ const ucl_object_t *obj, struct rspamd_action *action)
+{
+ const ucl_object_t *elt, *cur;
+ ucl_object_iter_t it;
+ struct rspamd_milter_private *priv = session->priv;
+ GString *hname, *hvalue;
+
+ if (obj && ucl_object_type(obj) == UCL_OBJECT) {
+ elt = ucl_object_lookup(obj, "remove_headers");
+ /*
+ * remove_headers: {"name": 1, ... }
+ * where number is the header's position starting from '1'
+ */
+ if (elt && ucl_object_type(elt) == UCL_OBJECT) {
+ it = NULL;
+
+ while ((cur = ucl_object_iterate(elt, &it, true)) != NULL) {
+ if (ucl_object_type(cur) == UCL_INT) {
+ rspamd_milter_remove_header_safe(session,
+ ucl_object_key(cur),
+ ucl_object_toint(cur));
+ }
+ }
+ }
+
+ elt = ucl_object_lookup(obj, "add_headers");
+ /*
+ * add_headers: {"name": "value", ... }
+ * name could have multiple values
+ * -or- (since 1.7)
+ * {"name": {"value": "val", "order": 0}, ... }
+ */
+ if (elt && ucl_object_type(elt) == UCL_OBJECT) {
+ it = NULL;
+
+ while ((cur = ucl_object_iterate(elt, &it, true)) != NULL) {
+
+ const char *key_name = ucl_object_key(cur);
+
+ if (ucl_object_type(cur) == UCL_STRING) {
+ /*
+ * Legacy support of {"name": "value", ... } with
+ * multiple names under the same name
+ */
+ ucl_object_iter_t *elt_it;
+ const ucl_object_t *cur_elt;
+
+ elt_it = ucl_object_iterate_new(cur);
+ while ((cur_elt = ucl_object_iterate_safe(elt_it, false)) != NULL) {
+ if (ucl_object_type(cur_elt) == UCL_STRING) {
+ hname = g_string_new(key_name);
+ hvalue = g_string_new(ucl_object_tostring(cur_elt));
+
+ rspamd_milter_send_action(session,
+ RSPAMD_MILTER_ADDHEADER,
+ hname, hvalue);
+ g_string_free(hname, TRUE);
+ g_string_free(hvalue, TRUE);
+ }
+ else {
+ msg_warn_milter("legacy header with name %s, that has not a string value: %s",
+ key_name, ucl_object_type_to_string(cur_elt->type));
+ }
+ }
+ ucl_object_iterate_free(elt_it);
+ }
+ else {
+ if (ucl_object_type(cur) == UCL_OBJECT) {
+ rspamd_milter_extract_single_header(session,
+ key_name, cur);
+ }
+ else if (ucl_object_type(cur) == UCL_ARRAY) {
+ /* Multiple values for the same key */
+ ucl_object_iter_t *array_it;
+ const ucl_object_t *array_elt;
+
+ array_it = ucl_object_iterate_new(cur);
+
+ while ((array_elt = ucl_object_iterate_safe(array_it,
+ true)) != NULL) {
+ rspamd_milter_extract_single_header(session,
+ key_name, array_elt);
+ }
+
+ ucl_object_iterate_free(array_it);
+ }
+ else {
+ msg_warn_milter("non-legacy header with name %s, that has unsupported value type: %s",
+ key_name, ucl_object_type_to_string(cur->type));
+ }
+ }
+ }
+ }
+
+ elt = ucl_object_lookup(obj, "change_from");
+
+ if (elt && ucl_object_type(elt) == UCL_STRING) {
+ hvalue = g_string_new(ucl_object_tostring(elt));
+ rspamd_milter_send_action(session,
+ RSPAMD_MILTER_CHGFROM,
+ hvalue);
+ g_string_free(hvalue, TRUE);
+ }
+
+ elt = ucl_object_lookup(obj, "add_rcpt");
+
+ if (elt && ucl_object_type(elt) == UCL_ARRAY) {
+ it = NULL;
+
+ while ((cur = ucl_object_iterate(elt, &it, true)) != NULL) {
+ hvalue = g_string_new(ucl_object_tostring(cur));
+ rspamd_milter_send_action(session,
+ RSPAMD_MILTER_ADDRCPT,
+ hvalue);
+ g_string_free(hvalue, TRUE);
+ }
+ }
+
+ elt = ucl_object_lookup(obj, "del_rcpt");
+
+ if (elt && ucl_object_type(elt) == UCL_ARRAY) {
+ it = NULL;
+
+ while ((cur = ucl_object_iterate(elt, &it, true)) != NULL) {
+ hvalue = g_string_new(ucl_object_tostring(cur));
+ rspamd_milter_send_action(session,
+ RSPAMD_MILTER_DELRCPT,
+ hvalue);
+ g_string_free(hvalue, TRUE);
+ }
+ }
+
+ elt = ucl_object_lookup(obj, "reject");
+
+ if (elt && ucl_object_type(elt) == UCL_STRING) {
+ if (strcmp(ucl_object_tostring(elt), "discard") == 0) {
+ priv->discard_on_reject = TRUE;
+ msg_info_milter("discard message instead of rejection");
+ }
+ else if (strcmp(ucl_object_tostring(elt), "quarantine") == 0) {
+ priv->quarantine_on_reject = TRUE;
+ msg_info_milter("quarantine message instead of rejection");
+ }
+ else {
+ priv->discard_on_reject = FALSE;
+ priv->quarantine_on_reject = FALSE;
+ }
+ }
+
+ elt = ucl_object_lookup(obj, "no_action");
+
+ if (elt && ucl_object_type(elt) == UCL_BOOLEAN) {
+ priv->no_action = ucl_object_toboolean(elt);
+ }
+ }
+
+ if (action->action_type == METRIC_ACTION_ADD_HEADER) {
+ elt = ucl_object_lookup(obj, "spam_header");
+
+ if (elt) {
+ if (ucl_object_type(elt) == UCL_STRING) {
+ rspamd_milter_remove_header_safe(session,
+ milter_ctx->spam_header,
+ 0);
+
+ hname = g_string_new(milter_ctx->spam_header);
+ hvalue = g_string_new(ucl_object_tostring(elt));
+ rspamd_milter_send_action(session, RSPAMD_MILTER_CHGHEADER,
+ (guint32) 1, hname, hvalue);
+ g_string_free(hname, TRUE);
+ g_string_free(hvalue, TRUE);
+ rspamd_milter_send_action(session, RSPAMD_MILTER_ACCEPT);
+
+ return TRUE;
+ }
+ else if (ucl_object_type(elt) == UCL_OBJECT) {
+ it = NULL;
+
+ while ((cur = ucl_object_iterate(elt, &it, true)) != NULL) {
+ rspamd_milter_remove_header_safe(session,
+ ucl_object_key(cur),
+ 0);
+
+ hname = g_string_new(ucl_object_key(cur));
+ hvalue = g_string_new(ucl_object_tostring(cur));
+ rspamd_milter_send_action(session, RSPAMD_MILTER_CHGHEADER,
+ (guint32) 1, hname, hvalue);
+ g_string_free(hname, TRUE);
+ g_string_free(hvalue, TRUE);
+ }
+
+ rspamd_milter_send_action(session, RSPAMD_MILTER_ACCEPT);
+
+ return TRUE;
+ }
+ }
+ }
+
+ return FALSE;
+}
+
+void rspamd_milter_send_task_results(struct rspamd_milter_session *session,
+ const ucl_object_t *results,
+ const gchar *new_body,
+ gsize bodylen)
+{
+ const ucl_object_t *elt;
+ struct rspamd_milter_private *priv = session->priv;
+ const gchar *str_action;
+ struct rspamd_action *action;
+ rspamd_fstring_t *xcode = NULL, *rcode = NULL, *reply = NULL;
+ GString *hname, *hvalue;
+ gboolean processed = FALSE;
+
+ if (results == NULL) {
+ msg_err_milter("cannot find scan results, tempfail");
+ rspamd_milter_send_action(session, RSPAMD_MILTER_TEMPFAIL);
+
+ goto cleanup;
+ }
+
+ elt = ucl_object_lookup(results, "action");
+
+ if (!elt) {
+ msg_err_milter("cannot find action in results, tempfail");
+ rspamd_milter_send_action(session, RSPAMD_MILTER_TEMPFAIL);
+
+ goto cleanup;
+ }
+
+ str_action = ucl_object_tostring(elt);
+ action = rspamd_config_get_action(milter_ctx->cfg, str_action);
+
+ if (action == NULL) {
+ msg_err_milter("action %s has not been registered", str_action);
+ rspamd_milter_send_action(session, RSPAMD_MILTER_TEMPFAIL);
+
+ goto cleanup;
+ }
+
+ elt = ucl_object_lookup(results, "messages");
+ if (elt) {
+ const ucl_object_t *smtp_res;
+ const gchar *msg;
+ gsize len = 0;
+
+ smtp_res = ucl_object_lookup(elt, "smtp_message");
+
+ if (smtp_res) {
+ msg = ucl_object_tolstring(smtp_res, &len);
+ reply = rspamd_fstring_new_init(msg, len);
+ }
+ }
+
+ /* Deal with milter headers */
+ elt = ucl_object_lookup(results, "milter");
+
+ if (elt) {
+ processed = rspamd_milter_process_milter_block(session, elt, action);
+ }
+
+ /* DKIM-Signature */
+ elt = ucl_object_lookup(results, "dkim-signature");
+
+ if (elt) {
+ hname = g_string_new(RSPAMD_MILTER_DKIM_HEADER);
+
+ if (ucl_object_type(elt) == UCL_STRING) {
+ hvalue = g_string_new(ucl_object_tostring(elt));
+
+ rspamd_milter_send_action(session, RSPAMD_MILTER_INSHEADER,
+ 1, hname, hvalue);
+
+ g_string_free(hvalue, TRUE);
+ }
+ else {
+ ucl_object_iter_t it;
+ const ucl_object_t *cur;
+ int i = 1;
+
+ it = ucl_object_iterate_new(elt);
+
+ while ((cur = ucl_object_iterate_safe(it, true)) != NULL) {
+ hvalue = g_string_new(ucl_object_tostring(cur));
+
+ rspamd_milter_send_action(session, RSPAMD_MILTER_INSHEADER,
+ i++, hname, hvalue);
+
+ g_string_free(hvalue, TRUE);
+ }
+
+ ucl_object_iterate_free(it);
+ }
+
+ g_string_free(hname, TRUE);
+ }
+
+ if (processed) {
+ goto cleanup;
+ }
+
+ if (new_body) {
+ rspamd_milter_send_action(session, RSPAMD_MILTER_REPLBODY,
+ bodylen, new_body);
+ }
+
+ if (priv->no_action) {
+ msg_info_milter("do not apply action %s, no_action is set",
+ str_action);
+ hname = g_string_new(RSPAMD_MILTER_ACTION_HEADER);
+ hvalue = g_string_new(str_action);
+
+ rspamd_milter_send_action(session, RSPAMD_MILTER_ADDHEADER,
+ hname, hvalue);
+ g_string_free(hname, TRUE);
+ g_string_free(hvalue, TRUE);
+ rspamd_milter_send_action(session, RSPAMD_MILTER_ACCEPT);
+
+ goto cleanup;
+ }
+
+ switch (action->action_type) {
+ case METRIC_ACTION_REJECT:
+ if (priv->discard_on_reject) {
+ rspamd_milter_send_action(session, RSPAMD_MILTER_DISCARD);
+ }
+ else if (priv->quarantine_on_reject) {
+ /* TODO: be more flexible about SMTP messages */
+ rspamd_milter_send_action(session, RSPAMD_MILTER_QUARANTINE,
+ RSPAMD_MILTER_QUARANTINE_MESSAGE);
+
+ /* Quarantine also requires accept action, all hail Sendmail */
+ rspamd_milter_send_action(session, RSPAMD_MILTER_ACCEPT);
+ }
+ else {
+ rcode = rspamd_fstring_new_init(RSPAMD_MILTER_RCODE_REJECT,
+ sizeof(RSPAMD_MILTER_RCODE_REJECT) - 1);
+ xcode = rspamd_fstring_new_init(RSPAMD_MILTER_XCODE_REJECT,
+ sizeof(RSPAMD_MILTER_XCODE_REJECT) - 1);
+
+ if (!reply) {
+ if (milter_ctx->reject_message == NULL) {
+ reply = rspamd_fstring_new_init(
+ RSPAMD_MILTER_REJECT_MESSAGE,
+ sizeof(RSPAMD_MILTER_REJECT_MESSAGE) - 1);
+ }
+ else {
+ reply = rspamd_fstring_new_init(milter_ctx->reject_message,
+ strlen(milter_ctx->reject_message));
+ }
+ }
+
+ rspamd_milter_set_reply(session, rcode, xcode, reply);
+ }
+ break;
+ case METRIC_ACTION_SOFT_REJECT:
+ rcode = rspamd_fstring_new_init(RSPAMD_MILTER_RCODE_TEMPFAIL,
+ sizeof(RSPAMD_MILTER_RCODE_TEMPFAIL) - 1);
+ xcode = rspamd_fstring_new_init(RSPAMD_MILTER_XCODE_TEMPFAIL,
+ sizeof(RSPAMD_MILTER_XCODE_TEMPFAIL) - 1);
+
+ if (!reply) {
+ reply = rspamd_fstring_new_init(RSPAMD_MILTER_TEMPFAIL_MESSAGE,
+ sizeof(RSPAMD_MILTER_TEMPFAIL_MESSAGE) - 1);
+ }
+
+ rspamd_milter_set_reply(session, rcode, xcode, reply);
+ break;
+
+ case METRIC_ACTION_REWRITE_SUBJECT:
+ elt = ucl_object_lookup(results, "subject");
+
+ if (elt) {
+ hname = g_string_new("Subject");
+ hvalue = g_string_new(ucl_object_tostring(elt));
+
+ rspamd_milter_send_action(session, RSPAMD_MILTER_CHGHEADER,
+ (guint32) 1, hname, hvalue);
+ g_string_free(hname, TRUE);
+ g_string_free(hvalue, TRUE);
+ }
+
+ rspamd_milter_send_action(session, RSPAMD_MILTER_ACCEPT);
+ break;
+
+ case METRIC_ACTION_ADD_HEADER:
+ /* Remove existing headers */
+ rspamd_milter_remove_header_safe(session,
+ milter_ctx->spam_header,
+ 0);
+
+ hname = g_string_new(milter_ctx->spam_header);
+ hvalue = g_string_new("Yes");
+ rspamd_milter_send_action(session, RSPAMD_MILTER_CHGHEADER,
+ (guint32) 1, hname, hvalue);
+ g_string_free(hname, TRUE);
+ g_string_free(hvalue, TRUE);
+ rspamd_milter_send_action(session, RSPAMD_MILTER_ACCEPT);
+ break;
+
+ case METRIC_ACTION_QUARANTINE:
+ /* TODO: be more flexible about SMTP messages */
+ rspamd_milter_send_action(session, RSPAMD_MILTER_QUARANTINE,
+ RSPAMD_MILTER_QUARANTINE_MESSAGE);
+
+ /* Quarantine also requires accept action, all hail Sendmail */
+ rspamd_milter_send_action(session, RSPAMD_MILTER_ACCEPT);
+ break;
+ case METRIC_ACTION_DISCARD:
+ rspamd_milter_send_action(session, RSPAMD_MILTER_DISCARD);
+ break;
+ case METRIC_ACTION_GREYLIST:
+ case METRIC_ACTION_NOACTION:
+ default:
+ rspamd_milter_send_action(session, RSPAMD_MILTER_ACCEPT);
+ break;
+ }
+
+cleanup:
+ rspamd_fstring_free(rcode);
+ rspamd_fstring_free(xcode);
+ rspamd_fstring_free(reply);
+
+ rspamd_milter_session_reset(session, RSPAMD_MILTER_RESET_ABORT);
+}
+
+void rspamd_milter_init_library(const struct rspamd_milter_context *ctx)
+{
+ milter_ctx = ctx;
+}
+
+rspamd_mempool_t *
+rspamd_milter_get_session_pool(struct rspamd_milter_session *session)
+{
+ struct rspamd_milter_private *priv = session->priv;
+
+ return priv->pool;
+}
diff --git a/src/libserver/milter.h b/src/libserver/milter.h
new file mode 100644
index 0000000..096cda8
--- /dev/null
+++ b/src/libserver/milter.h
@@ -0,0 +1,188 @@
+/*-
+ * Copyright 2017 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef RSPAMD_MILTER_H
+#define RSPAMD_MILTER_H
+
+#include "config.h"
+#include "fstring.h"
+#include "addr.h"
+#include "contrib/libucl/ucl.h"
+#include "contrib/libev/ev.h"
+#include "ref.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum rspamd_milter_reply {
+ RSPAMD_MILTER_ADDRCPT = '+',
+ RSPAMD_MILTER_DELRCPT = '-',
+ RSPAMD_MILTER_ACCEPT = 'a',
+ RSPAMD_MILTER_CONTINUE = 'c',
+ RSPAMD_MILTER_DISCARD = 'd',
+ RSPAMD_MILTER_CHGFROM = 'e',
+ RSPAMD_MILTER_ADDHEADER = 'h',
+ RSPAMD_MILTER_CHGHEADER = 'm',
+ RSPAMD_MILTER_INSHEADER = 'i',
+ RSPAMD_MILTER_REPLBODY = 'b',
+ RSPAMD_MILTER_REJECT = 'r',
+ RSPAMD_MILTER_TEMPFAIL = 't',
+ RSPAMD_MILTER_REPLYCODE = 'y',
+ RSPAMD_MILTER_OPTNEG = 'O',
+ RSPAMD_MILTER_PROGRESS = 'p',
+ RSPAMD_MILTER_QUARANTINE = 'q',
+};
+
+struct rspamd_email_address;
+struct ev_loop;
+struct rspamd_http_message;
+struct rspamd_config;
+
+struct rspamd_milter_context {
+ const gchar *spam_header;
+ const gchar *client_ca_name;
+ const gchar *reject_message;
+ void *sessions_cache;
+ struct rspamd_config *cfg;
+ gboolean discard_on_reject;
+ gboolean quarantine_on_reject;
+};
+
+struct rspamd_milter_session {
+ GHashTable *macros;
+ rspamd_inet_addr_t *addr;
+ struct rspamd_email_address *from;
+ GPtrArray *rcpts;
+ rspamd_fstring_t *helo;
+ rspamd_fstring_t *hostname;
+ rspamd_fstring_t *message;
+ void *priv;
+ ref_entry_t ref;
+};
+
+typedef void (*rspamd_milter_finish)(gint fd,
+ struct rspamd_milter_session *session, void *ud);
+
+typedef void (*rspamd_milter_error)(gint fd,
+ struct rspamd_milter_session *session,
+ void *ud, GError *err);
+
+/**
+ * Handles socket with milter protocol
+ * @param fd
+ * @param finish_cb
+ * @param error_cb
+ * @param ud
+ * @return
+ */
+gboolean rspamd_milter_handle_socket(gint fd, ev_tstamp timeout,
+ rspamd_mempool_t *pool,
+ struct ev_loop *ev_base, rspamd_milter_finish finish_cb,
+ rspamd_milter_error error_cb, void *ud);
+
+/**
+ * Updates userdata for a session, returns previous userdata
+ * @param session
+ * @param ud
+ * @return
+ */
+void *rspamd_milter_update_userdata(struct rspamd_milter_session *session,
+ void *ud);
+
+/**
+ * Sets SMTP reply string
+ * @param session
+ * @param rcode
+ * @param xcode
+ * @param reply
+ * @return
+ */
+gboolean rspamd_milter_set_reply(struct rspamd_milter_session *session,
+ rspamd_fstring_t *rcode,
+ rspamd_fstring_t *xcode,
+ rspamd_fstring_t *reply);
+
+/**
+ * Send some action to the MTA
+ * @param fd
+ * @param session
+ * @param act
+ * @return
+ */
+gboolean rspamd_milter_send_action(struct rspamd_milter_session *session,
+ enum rspamd_milter_reply act, ...);
+
+/**
+ * Adds some header
+ * @param session
+ * @param name
+ * @param value
+ * @return
+ */
+gboolean rspamd_milter_add_header(struct rspamd_milter_session *session,
+ GString *name, GString *value);
+
+/**
+ * Removes some header
+ * @param session
+ * @param name
+ * @return
+ */
+gboolean rspamd_milter_del_header(struct rspamd_milter_session *session,
+ GString *name);
+
+void rspamd_milter_session_unref(struct rspamd_milter_session *session);
+
+struct rspamd_milter_session *rspamd_milter_session_ref(
+ struct rspamd_milter_session *session);
+
+/**
+ * Converts milter session to HTTP session that is suitable for Rspamd
+ * @param session
+ * @return
+ */
+struct rspamd_http_message *rspamd_milter_to_http(
+ struct rspamd_milter_session *session);
+
+/**
+ * Sends task results to the
+ * @param session
+ * @param results
+ */
+void rspamd_milter_send_task_results(struct rspamd_milter_session *session,
+ const ucl_object_t *results,
+ const gchar *new_body,
+ gsize bodylen);
+
+/**
+ * Init internal milter context
+ * @param spam_header spam header name (must NOT be NULL)
+ */
+void rspamd_milter_init_library(const struct rspamd_milter_context *ctx);
+
+/**
+ * Returns pool for a session
+ * @param session
+ * @return
+ */
+rspamd_mempool_t *rspamd_milter_get_session_pool(
+ struct rspamd_milter_session *session);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/libserver/milter_internal.h b/src/libserver/milter_internal.h
new file mode 100644
index 0000000..bc292d3
--- /dev/null
+++ b/src/libserver/milter_internal.h
@@ -0,0 +1,176 @@
+/*-
+ * Copyright 2017 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef RSPAMD_MILTER_INTERNAL_H
+#define RSPAMD_MILTER_INTERNAL_H
+
+#include "config.h"
+#include "libutil/mem_pool.h"
+#include "contrib/libev/ev.h"
+#include "khash.h"
+#include "libutil/str_util.h"
+#include "libutil/libev_helper.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum rspamd_milter_state {
+ st_len_1 = 0,
+ st_len_2,
+ st_len_3,
+ st_len_4,
+ st_read_cmd,
+ st_read_data
+};
+
+struct rspamd_milter_parser {
+ rspamd_fstring_t *buf;
+ goffset pos;
+ goffset cmd_start;
+ gsize datalen;
+ enum rspamd_milter_state state;
+ gchar cur_cmd;
+};
+
+struct rspamd_milter_outbuf {
+ rspamd_fstring_t *buf;
+ goffset pos;
+ struct rspamd_milter_outbuf *next, *prev;
+};
+
+enum rspamd_milter_io_state {
+ RSPAMD_MILTER_READ_MORE,
+ RSPAMD_MILTER_WRITE_REPLY,
+ RSPAMD_MILTER_WANNA_DIE,
+ RSPAMD_MILTER_WRITE_AND_DIE,
+ RSPAMD_MILTER_PONG_AND_DIE,
+};
+
+KHASH_INIT(milter_headers_hash_t, char *, GArray *, true,
+ rspamd_strcase_hash, rspamd_strcase_equal);
+
+struct rspamd_milter_private {
+ struct rspamd_milter_parser parser;
+ struct rspamd_io_ev ev;
+ struct rspamd_milter_outbuf *out_chain;
+ struct ev_loop *event_loop;
+ rspamd_mempool_t *pool;
+ khash_t(milter_headers_hash_t) * headers;
+ gint cur_hdr;
+ rspamd_milter_finish fin_cb;
+ rspamd_milter_error err_cb;
+ void *ud;
+ enum rspamd_milter_io_state state;
+ int fd;
+ gboolean discard_on_reject;
+ gboolean quarantine_on_reject;
+ gboolean no_action;
+};
+
+enum rspamd_milter_io_cmd {
+ RSPAMD_MILTER_CMD_ABORT = 'A', /* Abort */
+ RSPAMD_MILTER_CMD_BODY = 'B', /* Body chunk */
+ RSPAMD_MILTER_CMD_CONNECT = 'C', /* Connection information */
+ RSPAMD_MILTER_CMD_MACRO = 'D', /* Define macro */
+ RSPAMD_MILTER_CMD_BODYEOB = 'E', /* final body chunk (end of message) */
+ RSPAMD_MILTER_CMD_HELO = 'H', /* HELO/EHLO */
+ RSPAMD_MILTER_CMD_QUIT_NC = 'K', /* QUIT but new connection follows */
+ RSPAMD_MILTER_CMD_HEADER = 'L', /* Header */
+ RSPAMD_MILTER_CMD_MAIL = 'M', /* MAIL from */
+ RSPAMD_MILTER_CMD_EOH = 'N', /* EOH */
+ RSPAMD_MILTER_CMD_OPTNEG = 'O', /* Option negotiation */
+ RSPAMD_MILTER_CMD_QUIT = 'Q', /* QUIT */
+ RSPAMD_MILTER_CMD_RCPT = 'R', /* RCPT to */
+ RSPAMD_MILTER_CMD_DATA = 'T', /* DATA */
+ RSPAMD_MILTER_CMD_UNKNOWN = 'U' /* Any unknown command */
+};
+
+/*
+ * Protocol flags
+ */
+#define RSPAMD_MILTER_FLAG_NOUNKNOWN (1L << 8) /* filter does not want unknown cmd */
+#define RSPAMD_MILTER_FLAG_NODATA (1L << 9) /* filter does not want DATA */
+#define RSPAMD_MILTER_FLAG_NR_HDR (1L << 7) /* filter won't reply for header */
+#define RSPAMD_MILTER_FLAG_SKIP (1L << 10) /* MTA supports SMFIR_SKIP */
+#define RSPAMD_MILTER_FLAG_RCPT_REJ (1L << 11) /* filter wants rejected RCPTs */
+#define RSPAMD_MILTER_FLAG_NR_CONN (1L << 12) /* filter won't reply for connect */
+#define RSPAMD_MILTER_FLAG_NR_HELO (1L << 13) /* filter won't reply for HELO */
+#define RSPAMD_MILTER_FLAG_NR_MAIL (1L << 14) /* filter won't reply for MAIL */
+#define RSPAMD_MILTER_FLAG_NR_RCPT (1L << 15) /* filter won't reply for RCPT */
+#define RSPAMD_MILTER_FLAG_NR_DATA (1L << 16) /* filter won't reply for DATA */
+#define RSPAMD_MILTER_FLAG_NR_UNKN (1L << 17) /* filter won't reply for UNKNOWN */
+#define RSPAMD_MILTER_FLAG_NR_EOH (1L << 18) /* filter won't reply for eoh */
+#define RSPAMD_MILTER_FLAG_NR_BODY (1L << 19) /* filter won't reply for body chunk */
+
+/*
+ * For now, we specify that we want to reply just after EOM
+ */
+#define RSPAMD_MILTER_FLAG_NOREPLY_MASK \
+ (RSPAMD_MILTER_FLAG_NR_CONN | RSPAMD_MILTER_FLAG_NR_HELO | \
+ RSPAMD_MILTER_FLAG_NR_MAIL | RSPAMD_MILTER_FLAG_NR_RCPT | \
+ RSPAMD_MILTER_FLAG_NR_DATA | RSPAMD_MILTER_FLAG_NR_UNKN | \
+ RSPAMD_MILTER_FLAG_NR_HDR | RSPAMD_MILTER_FLAG_NR_EOH | \
+ RSPAMD_MILTER_FLAG_NR_BODY)
+
+/*
+ * Options that the filter may send at initial handshake time, and message
+ * modifications that the filter may request at the end of the message body.
+ */
+#define RSPAMD_MILTER_FLAG_ADDHDRS (1L << 0) /* filter may add headers */
+#define RSPAMD_MILTER_FLAG_CHGBODY (1L << 1) /* filter may replace body */
+#define RSPAMD_MILTER_FLAG_ADDRCPT (1L << 2) /* filter may add recipients */
+#define RSPAMD_MILTER_FLAG_DELRCPT (1L << 3) /* filter may delete recipients */
+#define RSPAMD_MILTER_FLAG_CHGHDRS (1L << 4) /* filter may change/delete headers */
+#define RSPAMD_MILTER_FLAG_QUARANTINE (1L << 5) /* filter may request quarantine */
+
+#define RSPAMD_MILTER_ACTIONS_MASK \
+ (RSPAMD_MILTER_FLAG_ADDHDRS | RSPAMD_MILTER_FLAG_ADDRCPT | \
+ RSPAMD_MILTER_FLAG_DELRCPT | RSPAMD_MILTER_FLAG_CHGHDRS | \
+ RSPAMD_MILTER_FLAG_CHGBODY | RSPAMD_MILTER_FLAG_QUARANTINE)
+
+enum rspamd_milter_connect_proto {
+ RSPAMD_MILTER_CONN_UNKNOWN = 'U',
+ RSPAMD_MILTER_CONN_UNIX = 'L',
+ RSPAMD_MILTER_CONN_INET = '4',
+ RSPAMD_MILTER_CONN_INET6 = '6',
+};
+
+/*
+ * Rspamd supports just version 6 of the protocol, failing all versions below
+ * this one
+ */
+#define RSPAMD_MILTER_PROTO_VER 6
+
+#define RSPAMD_MILTER_MESSAGE_CHUNK 65536
+
+#define RSPAMD_MILTER_RCODE_REJECT "554"
+#define RSPAMD_MILTER_RCODE_TEMPFAIL "451"
+#define RSPAMD_MILTER_RCODE_LATER "452"
+#define RSPAMD_MILTER_XCODE_REJECT "5.7.1"
+#define RSPAMD_MILTER_XCODE_TEMPFAIL "4.7.1"
+#define RSPAMD_MILTER_REJECT_MESSAGE "Spam message rejected"
+#define RSPAMD_MILTER_QUARANTINE_MESSAGE "Spam message quarantined"
+#define RSPAMD_MILTER_TEMPFAIL_MESSAGE "Try again later"
+#define RSPAMD_MILTER_SPAM_HEADER "X-Spam"
+#define RSPAMD_MILTER_DKIM_HEADER "DKIM-Signature"
+#define RSPAMD_MILTER_ACTION_HEADER "X-Rspamd-Action"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif \ No newline at end of file
diff --git a/src/libserver/monitored.c b/src/libserver/monitored.c
new file mode 100644
index 0000000..3aebaf6
--- /dev/null
+++ b/src/libserver/monitored.c
@@ -0,0 +1,735 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <contrib/librdns/rdns.h>
+#include "rdns.h"
+#include "mem_pool.h"
+#include "cfg_file.h"
+#include "cryptobox.h"
+#include "logger.h"
+#include "contrib/uthash/utlist.h"
+
+static const gdouble default_monitoring_interval = 60.0;
+static const guint default_max_errors = 2;
+static const gdouble default_max_monitored_mult = 32;
+static const gdouble default_min_monitored_mult = 0.1;
+static const gdouble default_initial_monitored_mult = default_min_monitored_mult;
+static const gdouble default_offline_monitored_mult = 8.0;
+
+struct rspamd_monitored_methods {
+ void *(*monitored_config)(struct rspamd_monitored *m,
+ struct rspamd_monitored_ctx *ctx,
+ const ucl_object_t *opts);
+ gboolean (*monitored_update)(struct rspamd_monitored *m,
+ struct rspamd_monitored_ctx *ctx, gpointer ud);
+ void (*monitored_dtor)(struct rspamd_monitored *m,
+ struct rspamd_monitored_ctx *ctx, gpointer ud);
+ gpointer ud;
+};
+
+struct rspamd_monitored_ctx {
+ struct rspamd_config *cfg;
+ struct rdns_resolver *resolver;
+ struct ev_loop *event_loop;
+ GPtrArray *elts;
+ GHashTable *helts;
+ mon_change_cb change_cb;
+ gpointer ud;
+ gdouble monitoring_interval;
+ gdouble max_monitored_mult;
+ gdouble min_monitored_mult;
+ gdouble initial_monitored_mult;
+ gdouble offline_monitored_mult;
+ guint max_errors;
+ gboolean initialized;
+};
+
+struct rspamd_monitored {
+ gchar *url;
+ gdouble monitoring_mult;
+ gdouble offline_time;
+ gdouble total_offline_time;
+ gdouble latency;
+ guint nchecks;
+ guint max_errors;
+ guint cur_errors;
+ gboolean alive;
+ enum rspamd_monitored_type type;
+ enum rspamd_monitored_flags flags;
+ struct rspamd_monitored_ctx *ctx;
+ struct rspamd_monitored_methods proc;
+ ev_timer periodic;
+ gchar tag[RSPAMD_MONITORED_TAG_LEN];
+};
+
+#define msg_err_mon(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL, \
+ "monitored", m->tag, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_warn_mon(...) rspamd_default_log_function(G_LOG_LEVEL_WARNING, \
+ "monitored", m->tag, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_info_mon(...) rspamd_default_log_function(G_LOG_LEVEL_INFO, \
+ "monitored", m->tag, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_notice_mon(...) rspamd_default_log_function(G_LOG_LEVEL_MESSAGE, \
+ "monitored", m->tag, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_debug_mon(...) rspamd_conditional_debug_fast(NULL, NULL, \
+ rspamd_monitored_log_id, "monitored", m->tag, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+
+INIT_LOG_MODULE(monitored)
+
+static inline void
+rspamd_monitored_propagate_error(struct rspamd_monitored *m,
+ const gchar *error)
+{
+ if (m->alive) {
+ if (m->cur_errors < m->max_errors) {
+
+ m->cur_errors++;
+ /* Reduce timeout */
+ rspamd_monitored_stop(m);
+
+ if (m->monitoring_mult > m->ctx->min_monitored_mult) {
+ if (m->monitoring_mult < 1.0) {
+ m->monitoring_mult = 1.0;
+ }
+ else {
+ m->monitoring_mult /= 2.0;
+ }
+ }
+
+ msg_debug_mon("%s on resolving %s, %d retries left; next check in %.2f",
+ error, m->url, m->max_errors - m->cur_errors,
+ m->ctx->monitoring_interval * m->monitoring_mult);
+
+ rspamd_monitored_start(m);
+ }
+ else {
+ msg_notice_mon("%s on resolving %s, disable object",
+ error, m->url);
+ m->alive = FALSE;
+ m->offline_time = rspamd_get_calendar_ticks();
+ rspamd_monitored_stop(m);
+ m->monitoring_mult = 2.0;
+ rspamd_monitored_start(m);
+
+ if (m->ctx->change_cb) {
+ m->ctx->change_cb(m->ctx, m, FALSE, m->ctx->ud);
+ }
+ }
+ }
+ else {
+ if (m->monitoring_mult < m->ctx->offline_monitored_mult) {
+ /* Increase timeout */
+ rspamd_monitored_stop(m);
+ m->monitoring_mult *= 2.0;
+ rspamd_monitored_start(m);
+ }
+ else {
+ rspamd_monitored_stop(m);
+ m->monitoring_mult = m->ctx->offline_monitored_mult;
+ rspamd_monitored_start(m);
+ }
+ }
+}
+
+static inline void
+rspamd_monitored_propagate_success(struct rspamd_monitored *m, gdouble lat)
+{
+ gdouble t;
+
+ m->cur_errors = 0;
+
+ if (!m->alive) {
+ m->monitoring_mult = 1.0;
+ t = rspamd_get_calendar_ticks();
+ m->total_offline_time += t - m->offline_time;
+ m->alive = TRUE;
+ msg_notice_mon("restoring %s after %.1f seconds of downtime, "
+ "total downtime: %.1f",
+ m->url, t - m->offline_time, m->total_offline_time);
+ m->offline_time = 0;
+ m->nchecks = 1;
+ m->latency = lat;
+ rspamd_monitored_stop(m);
+ rspamd_monitored_start(m);
+
+ if (m->ctx->change_cb) {
+ m->ctx->change_cb(m->ctx, m, TRUE, m->ctx->ud);
+ }
+ }
+ else {
+ /* Increase monitored interval */
+ if (m->monitoring_mult < m->ctx->max_monitored_mult) {
+ if (m->monitoring_mult < 1.0) {
+ /* Upgrade fast from the initial mult */
+ m->monitoring_mult = 1.0;
+ }
+ else {
+ m->monitoring_mult *= 2.0;
+ }
+ }
+ else {
+ m->monitoring_mult = m->ctx->max_monitored_mult;
+ }
+ m->latency = (lat + m->latency * m->nchecks) / (m->nchecks + 1);
+ m->nchecks++;
+ }
+}
+
+static void
+rspamd_monitored_periodic(EV_P_ ev_timer *w, int revents)
+{
+ struct rspamd_monitored *m = (struct rspamd_monitored *) w->data;
+ gdouble jittered;
+ gboolean ret = FALSE;
+
+ if (m->proc.monitored_update) {
+ ret = m->proc.monitored_update(m, m->ctx, m->proc.ud);
+ }
+
+ jittered = rspamd_time_jitter(m->ctx->monitoring_interval * m->monitoring_mult,
+ 0.0);
+
+ if (ret) {
+ m->periodic.repeat = jittered;
+ ev_timer_again(EV_A_ & m->periodic);
+ }
+}
+
+struct rspamd_dns_monitored_conf {
+ enum rdns_request_type rt;
+ GString *request;
+ radix_compressed_t *expected;
+ struct rspamd_monitored *m;
+ gint expected_code;
+ gdouble check_tm;
+};
+
+static void
+rspamd_monitored_dns_random(struct rspamd_monitored *m,
+ struct rspamd_dns_monitored_conf *conf)
+{
+ gchar random_prefix[32];
+ const gchar dns_chars[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_";
+ gint len;
+
+ len = rspamd_random_uint64_fast() % sizeof(random_prefix);
+
+ if (len < 8) {
+ len = 8;
+ }
+
+ for (guint i = 0; i < len; i++) {
+ guint idx = rspamd_random_uint64_fast() % (sizeof(dns_chars) - 1);
+ random_prefix[i] = dns_chars[idx];
+ }
+
+ conf->request->len = 0;
+ rspamd_printf_gstring(conf->request, "%*.s.%s", len, random_prefix,
+ m->url);
+}
+
+static void *
+rspamd_monitored_dns_conf(struct rspamd_monitored *m,
+ struct rspamd_monitored_ctx *ctx,
+ const ucl_object_t *opts)
+{
+ struct rspamd_dns_monitored_conf *conf;
+ const ucl_object_t *elt;
+ gint rt;
+ GString *req = g_string_sized_new(127);
+
+ conf = g_malloc0(sizeof(*conf));
+ conf->rt = RDNS_REQUEST_A;
+ conf->m = m;
+ conf->expected_code = -1;
+
+ if (opts) {
+ elt = ucl_object_lookup(opts, "type");
+
+ if (elt) {
+ rt = rdns_type_fromstr(ucl_object_tostring(elt));
+
+ if (rt != -1) {
+ conf->rt = rt;
+ }
+ else {
+ msg_err_mon("invalid resolve type: %s",
+ ucl_object_tostring(elt));
+ }
+ }
+
+ if (!(m->flags & RSPAMD_MONITORED_RANDOM)) {
+ /* Prefix is useless for random monitored */
+ elt = ucl_object_lookup(opts, "prefix");
+
+ if (elt && ucl_object_type(elt) == UCL_STRING) {
+ rspamd_printf_gstring(req, "%s.", ucl_object_tostring(elt));
+ }
+ }
+
+ elt = ucl_object_lookup(opts, "ipnet");
+
+ if (elt) {
+ if (ucl_object_type(elt) == UCL_STRING) {
+ radix_add_generic_iplist(ucl_object_tostring(elt),
+ &conf->expected, FALSE, NULL);
+ }
+ else if (ucl_object_type(elt) == UCL_ARRAY) {
+ const ucl_object_t *cur;
+ ucl_object_iter_t it = NULL;
+
+ while ((cur = ucl_object_iterate(elt, &it, true)) != NULL) {
+ radix_add_generic_iplist(ucl_object_tostring(elt),
+ &conf->expected, FALSE, NULL);
+ }
+ }
+ }
+
+ elt = ucl_object_lookup(opts, "rcode");
+ if (elt) {
+ rt = rdns_rcode_fromstr(ucl_object_tostring(elt));
+
+ if (rt != -1) {
+ conf->expected_code = rt;
+ }
+ else {
+ msg_err_mon("invalid resolve rcode: %s",
+ ucl_object_tostring(elt));
+ }
+ }
+ }
+
+ if (!(m->flags & RSPAMD_MONITORED_RANDOM)) {
+ rspamd_printf_gstring(req, "%s", m->url);
+ }
+
+ conf->request = req;
+
+ return conf;
+}
+
+static void
+rspamd_monitored_dns_cb(struct rdns_reply *reply, void *arg)
+{
+ struct rspamd_dns_monitored_conf *conf = arg;
+ struct rspamd_monitored *m;
+ struct rdns_reply_entry *cur;
+ gboolean is_special_reply = FALSE;
+ gdouble lat;
+
+ m = conf->m;
+ lat = rspamd_get_calendar_ticks() - conf->check_tm;
+ conf->check_tm = 0;
+ msg_debug_mon("dns callback for %s in %.2f: %s", m->url, lat,
+ rdns_strerror(reply->code));
+
+ if (reply->code == RDNS_RC_TIMEOUT) {
+ rspamd_monitored_propagate_error(m, "timeout");
+ }
+ else if (reply->code == RDNS_RC_SERVFAIL) {
+ rspamd_monitored_propagate_error(m, "servfail");
+ }
+ else if (reply->code == RDNS_RC_REFUSED) {
+ rspamd_monitored_propagate_error(m, "refused");
+ }
+ else {
+ if (conf->expected_code != -1) {
+ if (reply->code != conf->expected_code) {
+ if (reply->code == RDNS_RC_NOREC &&
+ conf->expected_code == RDNS_RC_NXDOMAIN) {
+ rspamd_monitored_propagate_success(m, lat);
+ }
+ else {
+ LL_FOREACH(reply->entries, cur)
+ {
+ if (cur->type == RDNS_REQUEST_A) {
+ if ((guint32) cur->content.a.addr.s_addr ==
+ htonl(INADDR_LOOPBACK)) {
+ is_special_reply = TRUE;
+ }
+ }
+ }
+
+ if (is_special_reply) {
+ msg_notice_mon("DNS query blocked on %s "
+ "(127.0.0.1 returned), "
+ "possibly due to high volume",
+ m->url);
+ }
+ else {
+ msg_notice_mon("DNS reply returned '%s' for %s while '%s' "
+ "was expected when querying for '%s'"
+ "(likely DNS spoofing or BL internal issues)",
+ rdns_strerror(reply->code),
+ m->url,
+ rdns_strerror(conf->expected_code),
+ conf->request->str);
+ }
+
+ rspamd_monitored_propagate_error(m, "invalid return");
+ }
+ }
+ else {
+ rspamd_monitored_propagate_success(m, lat);
+ }
+ }
+ else if (conf->expected) {
+ /* We also need to check IP */
+ if (reply->code != RDNS_RC_NOERROR) {
+ rspamd_monitored_propagate_error(m, "no record");
+ }
+ else {
+ rspamd_inet_addr_t *addr;
+
+ addr = rspamd_inet_address_from_rnds(reply->entries);
+
+ if (!addr) {
+ rspamd_monitored_propagate_error(m,
+ "unreadable address");
+ }
+ else if (radix_find_compressed_addr(conf->expected, addr)) {
+ msg_notice_mon("bad address %s is returned when monitoring %s",
+ rspamd_inet_address_to_string(addr),
+ conf->request->str);
+ rspamd_monitored_propagate_error(m,
+ "invalid address");
+
+ rspamd_inet_address_free(addr);
+ }
+ else {
+ rspamd_monitored_propagate_success(m, lat);
+ rspamd_inet_address_free(addr);
+ }
+ }
+ }
+ else {
+ rspamd_monitored_propagate_success(m, lat);
+ }
+ }
+}
+
+static gboolean
+rspamd_monitored_dns_mon(struct rspamd_monitored *m,
+ struct rspamd_monitored_ctx *ctx, gpointer ud)
+{
+ struct rspamd_dns_monitored_conf *conf = ud;
+
+ if (m->flags & RSPAMD_MONITORED_RANDOM) {
+ rspamd_monitored_dns_random(m, conf);
+ }
+
+ if (!rdns_make_request_full(ctx->resolver, rspamd_monitored_dns_cb,
+ conf, ctx->cfg->dns_timeout, ctx->cfg->dns_retransmits,
+ 1, conf->request->str, conf->rt)) {
+ msg_notice_mon("cannot make request to resolve %s (%s monitored url)",
+ conf->request->str, conf->m->url);
+
+ m->cur_errors++;
+ rspamd_monitored_propagate_error(m, "failed to make DNS request");
+
+ return FALSE;
+ }
+ else {
+ conf->check_tm = rspamd_get_calendar_ticks();
+ }
+
+ return TRUE;
+}
+
+void rspamd_monitored_dns_dtor(struct rspamd_monitored *m,
+ struct rspamd_monitored_ctx *ctx, gpointer ud)
+{
+ struct rspamd_dns_monitored_conf *conf = ud;
+
+ g_string_free(conf->request, TRUE);
+
+ if (conf->expected) {
+ radix_destroy_compressed(conf->expected);
+ }
+
+ g_free(conf);
+}
+
+struct rspamd_monitored_ctx *
+rspamd_monitored_ctx_init(void)
+{
+ struct rspamd_monitored_ctx *ctx;
+
+ ctx = g_malloc0(sizeof(*ctx));
+ ctx->monitoring_interval = default_monitoring_interval;
+ ctx->max_errors = default_max_errors;
+ ctx->offline_monitored_mult = default_offline_monitored_mult;
+ ctx->initial_monitored_mult = default_initial_monitored_mult;
+ ctx->max_monitored_mult = default_max_monitored_mult;
+ ctx->min_monitored_mult = default_min_monitored_mult;
+ ctx->elts = g_ptr_array_new();
+ ctx->helts = g_hash_table_new(g_str_hash, g_str_equal);
+
+ return ctx;
+}
+
+
+void rspamd_monitored_ctx_config(struct rspamd_monitored_ctx *ctx,
+ struct rspamd_config *cfg,
+ struct ev_loop *ev_base,
+ struct rdns_resolver *resolver,
+ mon_change_cb change_cb,
+ gpointer ud)
+{
+ struct rspamd_monitored *m;
+ guint i;
+
+ g_assert(ctx != NULL);
+ ctx->event_loop = ev_base;
+ ctx->resolver = resolver;
+ ctx->cfg = cfg;
+ ctx->initialized = TRUE;
+ ctx->change_cb = change_cb;
+ ctx->ud = ud;
+
+ if (cfg->monitored_interval != 0) {
+ ctx->monitoring_interval = cfg->monitored_interval;
+ }
+
+ /* Start all events */
+ for (i = 0; i < ctx->elts->len; i++) {
+ m = g_ptr_array_index(ctx->elts, i);
+ m->monitoring_mult = ctx->initial_monitored_mult;
+ rspamd_monitored_start(m);
+ m->monitoring_mult = 1.0;
+ }
+}
+
+
+struct ev_loop *
+rspamd_monitored_ctx_get_ev_base(struct rspamd_monitored_ctx *ctx)
+{
+ return ctx->event_loop;
+}
+
+
+struct rspamd_monitored *
+rspamd_monitored_create_(struct rspamd_monitored_ctx *ctx,
+ const gchar *line,
+ enum rspamd_monitored_type type,
+ enum rspamd_monitored_flags flags,
+ const ucl_object_t *opts,
+ const gchar *loc)
+{
+ struct rspamd_monitored *m;
+ rspamd_cryptobox_hash_state_t st;
+ gchar *cksum_encoded, cksum[rspamd_cryptobox_HASHBYTES];
+
+ g_assert(ctx != NULL);
+
+ m = g_malloc0(sizeof(*m));
+ m->type = type;
+ m->flags = flags;
+
+ m->url = g_strdup(line);
+ m->ctx = ctx;
+ m->monitoring_mult = ctx->initial_monitored_mult;
+ m->max_errors = ctx->max_errors;
+ m->alive = TRUE;
+
+ if (type == RSPAMD_MONITORED_DNS) {
+ m->proc.monitored_update = rspamd_monitored_dns_mon;
+ m->proc.monitored_config = rspamd_monitored_dns_conf;
+ m->proc.monitored_dtor = rspamd_monitored_dns_dtor;
+ }
+ else {
+ g_free(m);
+
+ return NULL;
+ }
+
+ if (opts) {
+ const ucl_object_t *rnd_obj;
+
+ rnd_obj = ucl_object_lookup(opts, "random");
+
+ if (rnd_obj && ucl_object_type(rnd_obj) == UCL_BOOLEAN) {
+ if (ucl_object_toboolean(rnd_obj)) {
+ m->flags |= RSPAMD_MONITORED_RANDOM;
+ }
+ }
+ }
+
+ m->proc.ud = m->proc.monitored_config(m, ctx, opts);
+
+ if (m->proc.ud == NULL) {
+ g_free(m);
+
+ return NULL;
+ }
+
+ /* Create a persistent tag */
+ rspamd_cryptobox_hash_init(&st, NULL, 0);
+ rspamd_cryptobox_hash_update(&st, m->url, strlen(m->url));
+ rspamd_cryptobox_hash_update(&st, loc, strlen(loc));
+ rspamd_cryptobox_hash_final(&st, cksum);
+ cksum_encoded = rspamd_encode_base32(cksum, sizeof(cksum), RSPAMD_BASE32_DEFAULT);
+ rspamd_strlcpy(m->tag, cksum_encoded, sizeof(m->tag));
+
+ if (g_hash_table_lookup(ctx->helts, m->tag) != NULL) {
+ msg_err("monitored error: tag collision detected for %s; "
+ "url: %s",
+ m->tag, m->url);
+ }
+ else {
+ g_hash_table_insert(ctx->helts, m->tag, m);
+ }
+
+ g_free(cksum_encoded);
+
+ g_ptr_array_add(ctx->elts, m);
+
+ if (ctx->event_loop) {
+ rspamd_monitored_start(m);
+ }
+
+ return m;
+}
+
+gboolean
+rspamd_monitored_alive(struct rspamd_monitored *m)
+{
+ g_assert(m != NULL);
+
+ return m->alive;
+}
+
+gboolean
+rspamd_monitored_set_alive(struct rspamd_monitored *m, gboolean alive)
+{
+ gboolean st;
+
+ g_assert(m != NULL);
+ st = m->alive;
+ m->alive = alive;
+
+ return st;
+}
+
+gdouble
+rspamd_monitored_offline_time(struct rspamd_monitored *m)
+{
+ g_assert(m != NULL);
+
+ if (m->offline_time > 0) {
+ return rspamd_get_calendar_ticks() - m->offline_time;
+ }
+
+ return 0;
+}
+
+gdouble
+rspamd_monitored_total_offline_time(struct rspamd_monitored *m)
+{
+ g_assert(m != NULL);
+
+ if (m->offline_time > 0) {
+ return rspamd_get_calendar_ticks() - m->offline_time + m->total_offline_time;
+ }
+
+
+ return m->total_offline_time;
+}
+
+gdouble
+rspamd_monitored_latency(struct rspamd_monitored *m)
+{
+ g_assert(m != NULL);
+
+ return m->latency;
+}
+
+void rspamd_monitored_stop(struct rspamd_monitored *m)
+{
+ g_assert(m != NULL);
+
+ ev_timer_stop(m->ctx->event_loop, &m->periodic);
+}
+
+void rspamd_monitored_start(struct rspamd_monitored *m)
+{
+ gdouble jittered;
+
+ g_assert(m != NULL);
+ jittered = rspamd_time_jitter(m->ctx->monitoring_interval * m->monitoring_mult,
+ 0.0);
+
+ msg_debug_mon("started monitored object %s in %.2f seconds", m->url, jittered);
+
+ if (ev_can_stop(&m->periodic)) {
+ ev_timer_stop(m->ctx->event_loop, &m->periodic);
+ }
+
+ m->periodic.data = m;
+ ev_timer_init(&m->periodic, rspamd_monitored_periodic, jittered, 0.0);
+ ev_timer_start(m->ctx->event_loop, &m->periodic);
+}
+
+void rspamd_monitored_ctx_destroy(struct rspamd_monitored_ctx *ctx)
+{
+ struct rspamd_monitored *m;
+ guint i;
+
+ g_assert(ctx != NULL);
+
+ for (i = 0; i < ctx->elts->len; i++) {
+ m = g_ptr_array_index(ctx->elts, i);
+ rspamd_monitored_stop(m);
+ m->proc.monitored_dtor(m, m->ctx, m->proc.ud);
+ g_free(m->url);
+ g_free(m);
+ }
+
+ g_ptr_array_free(ctx->elts, TRUE);
+ g_hash_table_unref(ctx->helts);
+ g_free(ctx);
+}
+
+struct rspamd_monitored *
+rspamd_monitored_by_tag(struct rspamd_monitored_ctx *ctx,
+ guchar tag[RSPAMD_MONITORED_TAG_LEN])
+{
+ struct rspamd_monitored *res;
+ gchar rtag[RSPAMD_MONITORED_TAG_LEN];
+
+ rspamd_strlcpy(rtag, tag, sizeof(rtag));
+ res = g_hash_table_lookup(ctx->helts, rtag);
+
+ return res;
+}
+
+
+void rspamd_monitored_get_tag(struct rspamd_monitored *m,
+ guchar tag_out[RSPAMD_MONITORED_TAG_LEN])
+{
+ g_assert(m != NULL);
+
+ rspamd_strlcpy(tag_out, m->tag, RSPAMD_MONITORED_TAG_LEN);
+} \ No newline at end of file
diff --git a/src/libserver/monitored.h b/src/libserver/monitored.h
new file mode 100644
index 0000000..01f050a
--- /dev/null
+++ b/src/libserver/monitored.h
@@ -0,0 +1,161 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef SRC_LIBSERVER_MONITORED_H_
+#define SRC_LIBSERVER_MONITORED_H_
+
+#include "config.h"
+#include "rdns.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct rspamd_monitored;
+struct rspamd_monitored_ctx;
+struct rspamd_config;
+
+#define RSPAMD_MONITORED_TAG_LEN 32
+
+enum rspamd_monitored_type {
+ RSPAMD_MONITORED_DNS = 0,
+};
+
+enum rspamd_monitored_flags {
+ RSPAMD_MONITORED_DEFAULT = 0u,
+ RSPAMD_MONITORED_RBL = (1u << 0u),
+ RSPAMD_MONITORED_RANDOM = (1u << 1u)
+};
+
+/**
+ * Initialize new monitored context
+ * @return opaque context pointer (should be configured)
+ */
+struct rspamd_monitored_ctx *rspamd_monitored_ctx_init(void);
+
+typedef void (*mon_change_cb)(struct rspamd_monitored_ctx *ctx,
+ struct rspamd_monitored *m, gboolean alive,
+ void *ud);
+
+/**
+ * Configure context for monitored objects
+ * @param ctx context
+ * @param cfg configuration
+ * @param ev_base events base
+ * @param resolver resolver object
+ */
+void rspamd_monitored_ctx_config(struct rspamd_monitored_ctx *ctx,
+ struct rspamd_config *cfg,
+ struct ev_loop *ev_base,
+ struct rdns_resolver *resolver,
+ mon_change_cb change_cb,
+ gpointer ud);
+
+struct ev_loop *rspamd_monitored_ctx_get_ev_base(struct rspamd_monitored_ctx *ctx);
+
+/**
+ * Create monitored object
+ * @param ctx context
+ * @param line string definition (e.g. hostname)
+ * @param type type of monitoring
+ * @param flags specific flags for monitoring
+ * @return new monitored object
+ */
+struct rspamd_monitored *rspamd_monitored_create_(
+ struct rspamd_monitored_ctx *ctx,
+ const gchar *line,
+ enum rspamd_monitored_type type,
+ enum rspamd_monitored_flags flags,
+ const ucl_object_t *opts,
+ const gchar *loc);
+
+#define rspamd_monitored_create(ctx, line, type, flags, opts) \
+ rspamd_monitored_create_(ctx, line, type, flags, opts, G_STRFUNC)
+
+/**
+ * Return monitored by its tag
+ * @param ctx
+ * @param tag
+ * @return
+ */
+struct rspamd_monitored *rspamd_monitored_by_tag(struct rspamd_monitored_ctx *ctx,
+ guchar tag[RSPAMD_MONITORED_TAG_LEN]);
+
+/**
+ * Sets `tag_out` to the monitored tag
+ * @param m
+ * @param tag_out
+ */
+void rspamd_monitored_get_tag(struct rspamd_monitored *m,
+ guchar tag_out[RSPAMD_MONITORED_TAG_LEN]);
+
+/**
+ * Return TRUE if monitored object is alive
+ * @param m monitored object
+ * @return TRUE or FALSE
+ */
+gboolean rspamd_monitored_alive(struct rspamd_monitored *m);
+
+/**
+ * Force alive flag for a monitored object
+ * @param m monitored object
+ * @return TRUE or FALSE
+ */
+gboolean rspamd_monitored_set_alive(struct rspamd_monitored *m, gboolean alive);
+
+/**
+ * Returns the current offline time for a monitored object
+ * @param m
+ * @return
+ */
+gdouble rspamd_monitored_offline_time(struct rspamd_monitored *m);
+
+/**
+ * Returns the total offline time for a monitored object
+ * @param m
+ * @return
+ */
+gdouble rspamd_monitored_total_offline_time(struct rspamd_monitored *m);
+
+/**
+ * Returns the latency for monitored object (in seconds)
+ * @param m
+ * @return
+ */
+gdouble rspamd_monitored_latency(struct rspamd_monitored *m);
+
+/**
+ * Explicitly disable monitored object
+ * @param m
+ */
+void rspamd_monitored_stop(struct rspamd_monitored *m);
+
+/**
+ * Explicitly enable monitored object
+ * @param m
+ */
+void rspamd_monitored_start(struct rspamd_monitored *m);
+
+/**
+ * Destroy monitored context and all monitored objects inside
+ * @param ctx
+ */
+void rspamd_monitored_ctx_destroy(struct rspamd_monitored_ctx *ctx);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SRC_LIBSERVER_MONITORED_H_ */
diff --git a/src/libserver/protocol.c b/src/libserver/protocol.c
new file mode 100644
index 0000000..8674557
--- /dev/null
+++ b/src/libserver/protocol.c
@@ -0,0 +1,2185 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "config.h"
+#include "rspamd.h"
+#include "message.h"
+#include "utlist.h"
+#include "libserver/http/http_private.h"
+#include "worker_private.h"
+#include "libserver/cfg_file_private.h"
+#include "libmime/scan_result_private.h"
+#include "lua/lua_common.h"
+#include "unix-std.h"
+#include "protocol_internal.h"
+#include "libserver/mempool_vars_internal.h"
+#include "contrib/fastutf8/fastutf8.h"
+#include "task.h"
+#include <math.h>
+
+#ifdef SYS_ZSTD
+#include "zstd.h"
+#else
+#include "contrib/zstd/zstd.h"
+#endif
+
+INIT_LOG_MODULE(protocol)
+
+#define msg_err_protocol(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL, \
+ "protocol", task->task_pool->tag.uid, \
+ G_STRFUNC, \
+ __VA_ARGS__)
+#define msg_warn_protocol(...) rspamd_default_log_function(G_LOG_LEVEL_WARNING, \
+ "protocol", task->task_pool->tag.uid, \
+ G_STRFUNC, \
+ __VA_ARGS__)
+#define msg_info_protocol(...) rspamd_default_log_function(G_LOG_LEVEL_INFO, \
+ "protocol", task->task_pool->tag.uid, \
+ G_STRFUNC, \
+ __VA_ARGS__)
+#define msg_debug_protocol(...) rspamd_conditional_debug_fast(NULL, NULL, \
+ rspamd_protocol_log_id, "protocol", task->task_pool->tag.uid, \
+ G_STRFUNC, \
+ __VA_ARGS__)
+
+static GQuark
+rspamd_protocol_quark(void)
+{
+ return g_quark_from_static_string("protocol-error");
+}
+
+/*
+ * Remove <> from the fixed string and copy it to the pool
+ */
+static gchar *
+rspamd_protocol_escape_braces(struct rspamd_task *task, rspamd_ftok_t *in)
+{
+ guint nchars = 0;
+ const gchar *p;
+ rspamd_ftok_t tok;
+ gboolean has_obrace = FALSE;
+
+ g_assert(in != NULL);
+ g_assert(in->len > 0);
+
+ p = in->begin;
+
+ while ((g_ascii_isspace(*p) || *p == '<') && nchars < in->len) {
+ if (*p == '<') {
+ has_obrace = TRUE;
+ }
+
+ p++;
+ nchars++;
+ }
+
+ tok.begin = p;
+
+ p = in->begin + in->len - 1;
+ tok.len = in->len - nchars;
+
+ while (g_ascii_isspace(*p) && tok.len > 0) {
+ p--;
+ tok.len--;
+ }
+
+ if (has_obrace && *p == '>') {
+ tok.len--;
+ }
+
+ return rspamd_mempool_ftokdup(task->task_pool, &tok);
+}
+
+#define COMPARE_CMD(str, cmd, len) (sizeof(cmd) - 1 == (len) && rspamd_lc_cmp((str), (cmd), (len)) == 0)
+
+static gboolean
+rspamd_protocol_handle_url(struct rspamd_task *task,
+ struct rspamd_http_message *msg)
+{
+ GHashTable *query_args;
+ GHashTableIter it;
+ struct http_parser_url u;
+ const gchar *p;
+ gsize pathlen;
+ rspamd_ftok_t *key, *value;
+ gpointer k, v;
+
+ if (msg->url == NULL || msg->url->len == 0) {
+ g_set_error(&task->err, rspamd_protocol_quark(), 400, "missing command");
+ return FALSE;
+ }
+
+ if (http_parser_parse_url(msg->url->str, msg->url->len, 0, &u) != 0) {
+ g_set_error(&task->err, rspamd_protocol_quark(), 400, "bad request URL");
+
+ return FALSE;
+ }
+
+ if (!(u.field_set & (1 << UF_PATH))) {
+ g_set_error(&task->err, rspamd_protocol_quark(), 400,
+ "bad request URL: missing path");
+
+ return FALSE;
+ }
+
+ p = msg->url->str + u.field_data[UF_PATH].off;
+ pathlen = u.field_data[UF_PATH].len;
+
+ if (*p == '/') {
+ p++;
+ pathlen--;
+ }
+
+ switch (*p) {
+ case 'c':
+ case 'C':
+ /* check */
+ if (COMPARE_CMD(p, MSG_CMD_CHECK_V2, pathlen)) {
+ task->cmd = CMD_CHECK_V2;
+ msg_debug_protocol("got checkv2 command");
+ }
+ else if (COMPARE_CMD(p, MSG_CMD_CHECK, pathlen)) {
+ task->cmd = CMD_CHECK;
+ msg_debug_protocol("got check command");
+ }
+ else {
+ goto err;
+ }
+ break;
+ case 's':
+ case 'S':
+ /* symbols, skip */
+ if (COMPARE_CMD(p, MSG_CMD_SYMBOLS, pathlen)) {
+ task->cmd = CMD_CHECK;
+ msg_debug_protocol("got symbols -> old check command");
+ }
+ else if (COMPARE_CMD(p, MSG_CMD_SCAN, pathlen)) {
+ task->cmd = CMD_CHECK;
+ msg_debug_protocol("got scan -> old check command");
+ }
+ else if (COMPARE_CMD(p, MSG_CMD_SKIP, pathlen)) {
+ msg_debug_protocol("got skip command");
+ task->cmd = CMD_SKIP;
+ }
+ else {
+ goto err;
+ }
+ break;
+ case 'p':
+ case 'P':
+ /* ping, process */
+ if (COMPARE_CMD(p, MSG_CMD_PING, pathlen)) {
+ msg_debug_protocol("got ping command");
+ task->cmd = CMD_PING;
+ task->flags |= RSPAMD_TASK_FLAG_SKIP;
+ task->processed_stages |= RSPAMD_TASK_STAGE_DONE; /* Skip all */
+ }
+ else if (COMPARE_CMD(p, MSG_CMD_PROCESS, pathlen)) {
+ msg_debug_protocol("got process -> old check command");
+ task->cmd = CMD_CHECK;
+ }
+ else {
+ goto err;
+ }
+ break;
+ case 'r':
+ case 'R':
+ /* report, report_ifspam */
+ if (COMPARE_CMD(p, MSG_CMD_REPORT, pathlen)) {
+ msg_debug_protocol("got report -> old check command");
+ task->cmd = CMD_CHECK;
+ }
+ else if (COMPARE_CMD(p, MSG_CMD_REPORT_IFSPAM, pathlen)) {
+ msg_debug_protocol("got reportifspam -> old check command");
+ task->cmd = CMD_CHECK;
+ }
+ else {
+ goto err;
+ }
+ break;
+ default:
+ goto err;
+ }
+
+ if (u.field_set & (1u << UF_QUERY)) {
+ /* In case if we have a query, we need to store it somewhere */
+ query_args = rspamd_http_message_parse_query(msg);
+
+ /* Insert the rest of query params as HTTP headers */
+ g_hash_table_iter_init(&it, query_args);
+
+ while (g_hash_table_iter_next(&it, &k, &v)) {
+ gchar *key_cpy;
+ key = k;
+ value = v;
+
+ key_cpy = rspamd_mempool_ftokdup(task->task_pool, key);
+
+ rspamd_http_message_add_header_len(msg, key_cpy,
+ value->begin, value->len);
+ msg_debug_protocol("added header \"%T\" -> \"%T\" from HTTP query",
+ key, value);
+ }
+
+ g_hash_table_unref(query_args);
+ }
+
+ return TRUE;
+
+err:
+ g_set_error(&task->err, rspamd_protocol_quark(), 400, "invalid command");
+
+ return FALSE;
+}
+
+static void
+rspamd_protocol_process_recipients(struct rspamd_task *task,
+ const rspamd_ftok_t *hdr)
+{
+ enum {
+ skip_spaces,
+ quoted_string,
+ normal_string,
+ } state = skip_spaces;
+ const gchar *p, *end, *start_addr;
+ struct rspamd_email_address *addr;
+
+ p = hdr->begin;
+ end = hdr->begin + hdr->len;
+ start_addr = NULL;
+
+ while (p < end) {
+ switch (state) {
+ case skip_spaces:
+ if (g_ascii_isspace(*p)) {
+ p++;
+ }
+ else if (*p == '"') {
+ start_addr = p;
+ p++;
+ state = quoted_string;
+ }
+ else {
+ state = normal_string;
+ start_addr = p;
+ }
+ break;
+ case quoted_string:
+ if (*p == '"') {
+ state = normal_string;
+ p++;
+ }
+ else if (*p == '\\') {
+ /* Quoted pair */
+ p += 2;
+ }
+ else {
+ p++;
+ }
+ break;
+ case normal_string:
+ if (*p == '"') {
+ state = quoted_string;
+ p++;
+ }
+ else if (*p == ',' && start_addr != NULL && p > start_addr) {
+ /* We have finished address, check what we have */
+ addr = rspamd_email_address_from_smtp(start_addr,
+ p - start_addr);
+
+ if (addr) {
+ if (task->rcpt_envelope == NULL) {
+ task->rcpt_envelope = g_ptr_array_sized_new(
+ 2);
+ }
+
+ g_ptr_array_add(task->rcpt_envelope, addr);
+ }
+ else {
+ msg_err_protocol("bad rcpt address: '%*s'",
+ (int) (p - start_addr), start_addr);
+ task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS;
+ }
+ start_addr = NULL;
+ p++;
+ state = skip_spaces;
+ }
+ else {
+ p++;
+ }
+ break;
+ }
+ }
+
+ /* Check remainder */
+ if (start_addr && p > start_addr) {
+ switch (state) {
+ case normal_string:
+ addr = rspamd_email_address_from_smtp(start_addr, end - start_addr);
+
+ if (addr) {
+ if (task->rcpt_envelope == NULL) {
+ task->rcpt_envelope = g_ptr_array_sized_new(
+ 2);
+ }
+
+ g_ptr_array_add(task->rcpt_envelope, addr);
+ }
+ else {
+ msg_err_protocol("bad rcpt address: '%*s'",
+ (int) (end - start_addr), start_addr);
+ task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS;
+ }
+ break;
+ case skip_spaces:
+ /* Do nothing */
+ break;
+ case quoted_string:
+ default:
+ msg_err_protocol("bad state when parsing rcpt address: '%*s'",
+ (int) (end - start_addr), start_addr);
+ task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS;
+ }
+ }
+}
+
+#define COMPARE_FLAG_LIT(lit) (len == sizeof(lit) - 1 && memcmp((lit), str, len) == 0)
+#define CHECK_PROTOCOL_FLAG(lit, fl) \
+ do { \
+ if (!known && COMPARE_FLAG_LIT(lit)) { \
+ task->protocol_flags |= (fl); \
+ known = TRUE; \
+ msg_debug_protocol("add protocol flag %s", lit); \
+ } \
+ } while (0)
+#define CHECK_TASK_FLAG(lit, fl) \
+ do { \
+ if (!known && COMPARE_FLAG_LIT(lit)) { \
+ task->flags |= (fl); \
+ known = TRUE; \
+ msg_debug_protocol("add task flag %s", lit); \
+ } \
+ } while (0)
+
+static void
+rspamd_protocol_handle_flag(struct rspamd_task *task, const gchar *str,
+ gsize len)
+{
+ gboolean known = FALSE;
+
+ CHECK_TASK_FLAG("pass_all", RSPAMD_TASK_FLAG_PASS_ALL);
+ CHECK_TASK_FLAG("no_log", RSPAMD_TASK_FLAG_NO_LOG);
+ CHECK_TASK_FLAG("skip", RSPAMD_TASK_FLAG_SKIP);
+ CHECK_TASK_FLAG("skip_process", RSPAMD_TASK_FLAG_SKIP_PROCESS);
+ CHECK_TASK_FLAG("no_stat", RSPAMD_TASK_FLAG_NO_STAT);
+ CHECK_TASK_FLAG("ssl", RSPAMD_TASK_FLAG_SSL);
+ CHECK_TASK_FLAG("profile", RSPAMD_TASK_FLAG_PROFILE);
+
+ CHECK_PROTOCOL_FLAG("milter", RSPAMD_TASK_PROTOCOL_FLAG_MILTER);
+ CHECK_PROTOCOL_FLAG("zstd", RSPAMD_TASK_PROTOCOL_FLAG_COMPRESSED);
+ CHECK_PROTOCOL_FLAG("ext_urls", RSPAMD_TASK_PROTOCOL_FLAG_EXT_URLS);
+ CHECK_PROTOCOL_FLAG("body_block", RSPAMD_TASK_PROTOCOL_FLAG_BODY_BLOCK);
+ CHECK_PROTOCOL_FLAG("groups", RSPAMD_TASK_PROTOCOL_FLAG_GROUPS);
+
+ if (!known) {
+ msg_warn_protocol("unknown flag: %*s", (gint) len, str);
+ }
+}
+
+#undef COMPARE_FLAG
+#undef CHECK_PROTOCOL_FLAG
+
+static void
+rspamd_protocol_process_flags(struct rspamd_task *task, const rspamd_ftok_t *hdr)
+{
+ enum {
+ skip_spaces,
+ read_flag,
+ } state = skip_spaces;
+ const gchar *p, *end, *start;
+
+ p = hdr->begin;
+ end = hdr->begin + hdr->len;
+ start = NULL;
+
+ while (p < end) {
+ switch (state) {
+ case skip_spaces:
+ if (g_ascii_isspace(*p)) {
+ p++;
+ }
+ else {
+ state = read_flag;
+ start = p;
+ }
+ break;
+ case read_flag:
+ if (*p == ',') {
+ if (p > start) {
+ rspamd_protocol_handle_flag(task, start, p - start);
+ }
+ start = NULL;
+ state = skip_spaces;
+ p++;
+ }
+ else {
+ p++;
+ }
+ break;
+ }
+ }
+
+ /* Check remainder */
+ if (start && end > start && state == read_flag) {
+ rspamd_protocol_handle_flag(task, start, end - start);
+ }
+}
+
+#define IF_HEADER(name) \
+ srch.begin = (name); \
+ srch.len = sizeof(name) - 1; \
+ if (rspamd_ftok_casecmp(hn_tok, &srch) == 0)
+
+gboolean
+rspamd_protocol_handle_headers(struct rspamd_task *task,
+ struct rspamd_http_message *msg)
+{
+ rspamd_ftok_t *hn_tok, *hv_tok, srch;
+ gboolean has_ip = FALSE, seen_settings_header = FALSE;
+ struct rspamd_http_header *header, *h;
+ gchar *ntok;
+
+ kh_foreach_value (msg->headers, header, {
+ DL_FOREACH (header, h) {
+ ntok = rspamd_mempool_ftokdup (task->task_pool, &h->name);
+ hn_tok = rspamd_mempool_alloc (task->task_pool, sizeof (*hn_tok));
+ hn_tok->begin = ntok;
+ hn_tok->len = h->name.len;
+
+
+ ntok = rspamd_mempool_ftokdup (task->task_pool, &h->value);
+ hv_tok = rspamd_mempool_alloc (task->task_pool, sizeof (*hv_tok));
+ hv_tok->begin = ntok;
+ hv_tok->len = h->value.len;
+
+ switch (*hn_tok->begin) {
+ case 'd':
+ case 'D':
+ IF_HEADER(DELIVER_TO_HEADER)
+ {
+ task->deliver_to = rspamd_protocol_escape_braces(task, hv_tok);
+ msg_debug_protocol("read deliver-to header, value: %s",
+ task->deliver_to);
+ }
+ else
+ {
+ msg_debug_protocol("wrong header: %T", hn_tok);
+ }
+ break;
+ case 'h':
+ case 'H':
+ IF_HEADER(HELO_HEADER)
+ {
+ task->helo = rspamd_mempool_ftokdup(task->task_pool, hv_tok);
+ msg_debug_protocol("read helo header, value: %s", task->helo);
+ }
+ IF_HEADER(HOSTNAME_HEADER)
+ {
+ task->hostname = rspamd_mempool_ftokdup(task->task_pool,
+ hv_tok);
+ msg_debug_protocol("read hostname header, value: %s", task->hostname);
+ }
+ break;
+ case 'f':
+ case 'F':
+ IF_HEADER(FROM_HEADER)
+ {
+ if (hv_tok->len == 0) {
+ /* Replace '' with '<>' to fix parsing issue */
+ RSPAMD_FTOK_ASSIGN(hv_tok, "<>");
+ }
+ task->from_envelope = rspamd_email_address_from_smtp(
+ hv_tok->begin,
+ hv_tok->len);
+ msg_debug_protocol("read from header, value: %T", hv_tok);
+
+ if (!task->from_envelope) {
+ msg_err_protocol("bad from header: '%T'", hv_tok);
+ task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS;
+ }
+ }
+ IF_HEADER(FILENAME_HEADER)
+ {
+ task->msg.fpath = rspamd_mempool_ftokdup(task->task_pool,
+ hv_tok);
+ msg_debug_protocol("read filename header, value: %s", task->msg.fpath);
+ }
+ IF_HEADER(FLAGS_HEADER)
+ {
+ msg_debug_protocol("read flags header, value: %T", hv_tok);
+ rspamd_protocol_process_flags(task, hv_tok);
+ }
+ break;
+ case 'q':
+ case 'Q':
+ IF_HEADER(QUEUE_ID_HEADER)
+ {
+ task->queue_id = rspamd_mempool_ftokdup(task->task_pool,
+ hv_tok);
+ msg_debug_protocol("read queue_id header, value: %s", task->queue_id);
+ }
+ else
+ {
+ msg_debug_protocol("wrong header: %T", hn_tok);
+ }
+ break;
+ case 'r':
+ case 'R':
+ IF_HEADER(RCPT_HEADER)
+ {
+ rspamd_protocol_process_recipients(task, hv_tok);
+ msg_debug_protocol("read rcpt header, value: %T", hv_tok);
+ }
+ IF_HEADER(RAW_DATA_HEADER)
+ {
+ srch.begin = "yes";
+ srch.len = 3;
+
+ msg_debug_protocol("read raw data header, value: %T", hv_tok);
+
+ if (rspamd_ftok_casecmp(hv_tok, &srch) == 0) {
+ task->flags &= ~RSPAMD_TASK_FLAG_MIME;
+ msg_debug_protocol("disable mime parsing");
+ }
+ }
+ break;
+ case 'i':
+ case 'I':
+ IF_HEADER(IP_ADDR_HEADER)
+ {
+ if (!rspamd_parse_inet_address(&task->from_addr,
+ hv_tok->begin, hv_tok->len,
+ RSPAMD_INET_ADDRESS_PARSE_DEFAULT)) {
+ msg_err_protocol("bad ip header: '%T'", hv_tok);
+ }
+ else {
+ msg_debug_protocol("read IP header, value: %T", hv_tok);
+ has_ip = TRUE;
+ }
+ }
+ else
+ {
+ msg_debug_protocol("wrong header: %T", hn_tok);
+ }
+ break;
+ case 'p':
+ case 'P':
+ IF_HEADER(PASS_HEADER)
+ {
+ srch.begin = "all";
+ srch.len = 3;
+
+ msg_debug_protocol("read pass header, value: %T", hv_tok);
+
+ if (rspamd_ftok_casecmp(hv_tok, &srch) == 0) {
+ task->flags |= RSPAMD_TASK_FLAG_PASS_ALL;
+ msg_debug_protocol("pass all filters");
+ }
+ }
+ IF_HEADER(PROFILE_HEADER)
+ {
+ msg_debug_protocol("read profile header, value: %T", hv_tok);
+ task->flags |= RSPAMD_TASK_FLAG_PROFILE;
+ }
+ break;
+ case 's':
+ case 'S':
+ IF_HEADER(SETTINGS_ID_HEADER)
+ {
+ msg_debug_protocol("read settings-id header, value: %T", hv_tok);
+ task->settings_elt = rspamd_config_find_settings_name_ref(
+ task->cfg, hv_tok->begin, hv_tok->len);
+
+ if (task->settings_elt == NULL) {
+ GString *known_ids = g_string_new(NULL);
+ struct rspamd_config_settings_elt *cur;
+
+ DL_FOREACH(task->cfg->setting_ids, cur)
+ {
+ rspamd_printf_gstring(known_ids, "%s(%ud);",
+ cur->name, cur->id);
+ }
+
+ msg_warn_protocol("unknown settings id: %T(%d); known_ids: %v",
+ hv_tok,
+ rspamd_config_name_to_id(hv_tok->begin, hv_tok->len),
+ known_ids);
+
+ g_string_free(known_ids, TRUE);
+ }
+ else {
+ msg_debug_protocol("applied settings id %T -> %ud", hv_tok,
+ task->settings_elt->id);
+ }
+ }
+ IF_HEADER(SETTINGS_HEADER)
+ {
+ msg_debug_protocol("read settings header, value: %T", hv_tok);
+ seen_settings_header = TRUE;
+ }
+ break;
+ case 'u':
+ case 'U':
+ IF_HEADER(USER_HEADER)
+ {
+ /*
+ * We must ignore User header in case of spamc, as SA has
+ * different meaning of this header
+ */
+ msg_debug_protocol("read user header, value: %T", hv_tok);
+ if (!RSPAMD_TASK_IS_SPAMC(task)) {
+ task->auth_user = rspamd_mempool_ftokdup(task->task_pool,
+ hv_tok);
+ }
+ else {
+ msg_info_protocol("ignore user header: legacy SA protocol");
+ }
+ }
+ IF_HEADER(URLS_HEADER)
+ {
+ msg_debug_protocol("read urls header, value: %T", hv_tok);
+
+ srch.begin = "extended";
+ srch.len = 8;
+
+ if (rspamd_ftok_casecmp(hv_tok, &srch) == 0) {
+ task->protocol_flags |= RSPAMD_TASK_PROTOCOL_FLAG_EXT_URLS;
+ msg_debug_protocol("extended urls information");
+ }
+
+ /* TODO: add more formats there */
+ }
+ IF_HEADER(USER_AGENT_HEADER)
+ {
+ msg_debug_protocol("read user-agent header, value: %T", hv_tok);
+
+ if (hv_tok->len == 6 &&
+ rspamd_lc_cmp(hv_tok->begin, "rspamc", 6) == 0) {
+ task->protocol_flags |= RSPAMD_TASK_PROTOCOL_FLAG_LOCAL_CLIENT;
+ }
+ }
+ break;
+ case 'l':
+ case 'L':
+ IF_HEADER(NO_LOG_HEADER)
+ {
+ msg_debug_protocol("read log header, value: %T", hv_tok);
+ srch.begin = "no";
+ srch.len = 2;
+
+ if (rspamd_ftok_casecmp(hv_tok, &srch) == 0) {
+ task->flags |= RSPAMD_TASK_FLAG_NO_LOG;
+ }
+ }
+ break;
+ case 'm':
+ case 'M':
+ IF_HEADER(MLEN_HEADER)
+ {
+ msg_debug_protocol("read message length header, value: %T",
+ hv_tok);
+ task->protocol_flags |= RSPAMD_TASK_PROTOCOL_FLAG_HAS_CONTROL;
+ }
+ IF_HEADER(MTA_TAG_HEADER)
+ {
+ gchar *mta_tag;
+ mta_tag = rspamd_mempool_ftokdup(task->task_pool, hv_tok);
+ rspamd_mempool_set_variable(task->task_pool,
+ RSPAMD_MEMPOOL_MTA_TAG,
+ mta_tag, NULL);
+ msg_debug_protocol("read MTA-Tag header, value: %s", mta_tag);
+ }
+ IF_HEADER(MTA_NAME_HEADER)
+ {
+ gchar *mta_name;
+ mta_name = rspamd_mempool_ftokdup(task->task_pool, hv_tok);
+ rspamd_mempool_set_variable(task->task_pool,
+ RSPAMD_MEMPOOL_MTA_NAME,
+ mta_name, NULL);
+ msg_debug_protocol("read MTA-Name header, value: %s", mta_name);
+ }
+ IF_HEADER(MILTER_HEADER)
+ {
+ task->protocol_flags |= RSPAMD_TASK_PROTOCOL_FLAG_MILTER;
+ msg_debug_protocol("read Milter header, value: %T", hv_tok);
+ }
+ break;
+ case 't':
+ case 'T':
+ IF_HEADER(TLS_CIPHER_HEADER)
+ {
+ task->flags |= RSPAMD_TASK_FLAG_SSL;
+ msg_debug_protocol("read TLS cipher header, value: %T", hv_tok);
+ }
+ break;
+ default:
+ msg_debug_protocol("generic header: %T", hn_tok);
+ break;
+ }
+
+ rspamd_task_add_request_header (task, hn_tok, hv_tok);
+}
+}); /* End of kh_foreach_value */
+
+if (seen_settings_header && task->settings_elt) {
+ msg_warn_task("ignore settings id %s as settings header is also presented",
+ task->settings_elt->name);
+ REF_RELEASE(task->settings_elt);
+
+ task->settings_elt = NULL;
+}
+
+if (!has_ip) {
+ task->flags |= RSPAMD_TASK_FLAG_NO_IP;
+}
+
+return TRUE;
+}
+
+#define BOOL_TO_FLAG(val, flags, flag) \
+ do { \
+ if ((val)) (flags) |= (flag); \
+ else \
+ (flags) &= ~(flag); \
+ } while (0)
+
+gboolean
+rspamd_protocol_parse_task_flags(rspamd_mempool_t *pool,
+ const ucl_object_t *obj,
+ gpointer ud,
+ struct rspamd_rcl_section *section,
+ GError **err)
+{
+ struct rspamd_rcl_struct_parser *pd = ud;
+ gint *target;
+ const gchar *key;
+ gboolean value;
+
+ target = (gint *) (((gchar *) pd->user_struct) + pd->offset);
+ key = ucl_object_key(obj);
+ value = ucl_object_toboolean(obj);
+
+ if (key != NULL) {
+ if (g_ascii_strcasecmp(key, "pass_all") == 0) {
+ BOOL_TO_FLAG(value, *target, RSPAMD_TASK_FLAG_PASS_ALL);
+ }
+ else if (g_ascii_strcasecmp(key, "no_log") == 0) {
+ BOOL_TO_FLAG(value, *target, RSPAMD_TASK_FLAG_NO_LOG);
+ }
+ }
+
+ return TRUE;
+}
+
+static struct rspamd_rcl_sections_map *control_parser = NULL;
+
+RSPAMD_CONSTRUCTOR(rspamd_protocol_control_parser_ctor)
+{
+
+ struct rspamd_rcl_section *sub = rspamd_rcl_add_section(&control_parser, NULL,
+ "*",
+ NULL,
+ NULL,
+ UCL_OBJECT,
+ FALSE,
+ TRUE);
+ /* Default handlers */
+ rspamd_rcl_add_default_handler(sub,
+ "ip",
+ rspamd_rcl_parse_struct_addr,
+ G_STRUCT_OFFSET(struct rspamd_task, from_addr),
+ 0,
+ NULL);
+ rspamd_rcl_add_default_handler(sub,
+ "from",
+ rspamd_rcl_parse_struct_mime_addr,
+ G_STRUCT_OFFSET(struct rspamd_task, from_envelope),
+ 0,
+ NULL);
+ rspamd_rcl_add_default_handler(sub,
+ "rcpt",
+ rspamd_rcl_parse_struct_mime_addr,
+ G_STRUCT_OFFSET(struct rspamd_task, rcpt_envelope),
+ 0,
+ NULL);
+ rspamd_rcl_add_default_handler(sub,
+ "helo",
+ rspamd_rcl_parse_struct_string,
+ G_STRUCT_OFFSET(struct rspamd_task, helo),
+ 0,
+ NULL);
+ rspamd_rcl_add_default_handler(sub,
+ "user",
+ rspamd_rcl_parse_struct_string,
+ G_STRUCT_OFFSET(struct rspamd_task, auth_user),
+ 0,
+ NULL);
+ rspamd_rcl_add_default_handler(sub,
+ "pass_all",
+ rspamd_protocol_parse_task_flags,
+ G_STRUCT_OFFSET(struct rspamd_task, flags),
+ 0,
+ NULL);
+ rspamd_rcl_add_default_handler(sub,
+ "json",
+ rspamd_protocol_parse_task_flags,
+ G_STRUCT_OFFSET(struct rspamd_task, flags),
+ 0,
+ NULL);
+}
+
+RSPAMD_DESTRUCTOR(rspamd_protocol_control_parser_dtor)
+{
+ rspamd_rcl_sections_free(control_parser);
+}
+
+gboolean
+rspamd_protocol_handle_control(struct rspamd_task *task,
+ const ucl_object_t *control)
+{
+ GError *err = NULL;
+
+ if (!rspamd_rcl_parse(control_parser, task->cfg, task, task->task_pool,
+ control, &err)) {
+ msg_warn_protocol("cannot parse control block: %e", err);
+ g_error_free(err);
+
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+gboolean
+rspamd_protocol_handle_request(struct rspamd_task *task,
+ struct rspamd_http_message *msg)
+{
+ gboolean ret = TRUE;
+
+ if (msg->method == HTTP_SYMBOLS) {
+ msg_debug_protocol("got legacy SYMBOLS method, enable rspamc protocol workaround");
+ task->cmd = CMD_CHECK_RSPAMC;
+ }
+ else if (msg->method == HTTP_CHECK) {
+ msg_debug_protocol("got legacy CHECK method, enable rspamc protocol workaround");
+ task->cmd = CMD_CHECK_RSPAMC;
+ }
+ else {
+ ret = rspamd_protocol_handle_url(task, msg);
+ }
+
+ if (msg->flags & RSPAMD_HTTP_FLAG_SPAMC) {
+ msg_debug_protocol("got legacy SA input, enable spamc protocol workaround");
+ task->cmd = CMD_CHECK_SPAMC;
+ }
+
+ return ret;
+}
+
+/* Structure for writing tree data */
+struct tree_cb_data {
+ ucl_object_t *top;
+ khash_t(rspamd_url_host_hash) * seen;
+ struct rspamd_task *task;
+};
+
+static ucl_object_t *
+rspamd_protocol_extended_url(struct rspamd_task *task,
+ struct rspamd_url *url,
+ const gchar *encoded, gsize enclen)
+{
+ ucl_object_t *obj, *elt;
+
+ obj = ucl_object_typed_new(UCL_OBJECT);
+
+ elt = ucl_object_fromstring_common(encoded, enclen, 0);
+ ucl_object_insert_key(obj, elt, "url", 0, false);
+
+ if (url->tldlen > 0) {
+ elt = ucl_object_fromstring_common(rspamd_url_tld_unsafe(url),
+ url->tldlen, 0);
+ ucl_object_insert_key(obj, elt, "tld", 0, false);
+ }
+ if (url->hostlen > 0) {
+ elt = ucl_object_fromstring_common(rspamd_url_host_unsafe(url),
+ url->hostlen, 0);
+ ucl_object_insert_key(obj, elt, "host", 0, false);
+ }
+
+ ucl_object_t *flags = ucl_object_typed_new(UCL_ARRAY);
+
+ for (unsigned int i = 0; i < RSPAMD_URL_MAX_FLAG_SHIFT; i++) {
+ if (url->flags & (1u << i)) {
+ ucl_object_t *fl = ucl_object_fromstring(rspamd_url_flag_to_string(1u << i));
+ ucl_array_append(flags, fl);
+ }
+ }
+
+ ucl_object_insert_key(obj, flags, "flags", 0, false);
+
+ if (url->ext && url->ext->linked_url) {
+ encoded = rspamd_url_encode(url->ext->linked_url, &enclen, task->task_pool);
+ elt = rspamd_protocol_extended_url(task, url->ext->linked_url, encoded,
+ enclen);
+ ucl_object_insert_key(obj, elt, "linked_url", 0, false);
+ }
+
+ return obj;
+}
+
+/*
+ * Callback for writing urls
+ */
+static void
+urls_protocol_cb(struct rspamd_url *url, struct tree_cb_data *cb)
+{
+ ucl_object_t *obj;
+ struct rspamd_task *task = cb->task;
+ const gchar *user_field = "unknown", *encoded = NULL;
+ gboolean has_user = FALSE;
+ guint len = 0;
+ gsize enclen = 0;
+
+ if (!(task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_EXT_URLS)) {
+ if (url->hostlen > 0) {
+ if (rspamd_url_host_set_has(cb->seen, url)) {
+ return;
+ }
+
+ goffset err_offset;
+
+ if ((err_offset = rspamd_fast_utf8_validate(rspamd_url_host_unsafe(url),
+ url->hostlen)) == 0) {
+ obj = ucl_object_fromstring_common(rspamd_url_host_unsafe(url),
+ url->hostlen, 0);
+ }
+ else {
+ obj = ucl_object_fromstring_common(rspamd_url_host_unsafe(url),
+ err_offset - 1, 0);
+ }
+ }
+ else {
+ return;
+ }
+
+ rspamd_url_host_set_add(cb->seen, url);
+ }
+ else {
+ encoded = rspamd_url_encode(url, &enclen, task->task_pool);
+ obj = rspamd_protocol_extended_url(task, url, encoded, enclen);
+ }
+
+ ucl_array_append(cb->top, obj);
+
+ if (cb->task->cfg->log_urls) {
+ if (task->auth_user) {
+ user_field = task->auth_user;
+ len = strlen(task->auth_user);
+ has_user = TRUE;
+ }
+ else if (task->from_envelope) {
+ user_field = task->from_envelope->addr;
+ len = task->from_envelope->addr_len;
+ }
+
+ if (!encoded) {
+ encoded = rspamd_url_encode(url, &enclen, task->task_pool);
+ }
+
+ msg_notice_task_encrypted("<%s> %s: %*s; ip: %s; URL: %*s",
+ MESSAGE_FIELD_CHECK(task, message_id),
+ has_user ? "user" : "from",
+ len, user_field,
+ rspamd_inet_address_to_string(task->from_addr),
+ (gint) enclen, encoded);
+ }
+}
+
+static ucl_object_t *
+rspamd_urls_tree_ucl(khash_t(rspamd_url_hash) * set,
+ struct rspamd_task *task)
+{
+ struct tree_cb_data cb;
+ ucl_object_t *obj;
+ struct rspamd_url *u;
+
+ obj = ucl_object_typed_new(UCL_ARRAY);
+ cb.top = obj;
+ cb.task = task;
+ cb.seen = kh_init(rspamd_url_host_hash);
+
+ kh_foreach_key(set, u, {
+ if (!(u->protocol & PROTOCOL_MAILTO)) {
+ urls_protocol_cb(u, &cb);
+ }
+ });
+
+ kh_destroy(rspamd_url_host_hash, cb.seen);
+
+ return obj;
+}
+
+static void
+emails_protocol_cb(struct rspamd_url *url, struct tree_cb_data *cb)
+{
+ ucl_object_t *obj;
+
+ if (url->userlen > 0 && url->hostlen > 0) {
+ obj = ucl_object_fromlstring(rspamd_url_user_unsafe(url),
+ url->userlen + url->hostlen + 1);
+ ucl_array_append(cb->top, obj);
+ }
+}
+
+static ucl_object_t *
+rspamd_emails_tree_ucl(khash_t(rspamd_url_hash) * set,
+ struct rspamd_task *task)
+{
+ struct tree_cb_data cb;
+ ucl_object_t *obj;
+ struct rspamd_url *u;
+
+ obj = ucl_object_typed_new(UCL_ARRAY);
+ cb.top = obj;
+ cb.task = task;
+
+ kh_foreach_key(set, u, {
+ if ((u->protocol & PROTOCOL_MAILTO)) {
+ emails_protocol_cb(u, &cb);
+ }
+ });
+
+
+ return obj;
+}
+
+
+/* Write new subject */
+static const gchar *
+rspamd_protocol_rewrite_subject(struct rspamd_task *task)
+{
+ GString *subj_buf;
+ gchar *res;
+ const gchar *s, *c, *p;
+ gsize slen = 0;
+
+ c = rspamd_mempool_get_variable(task->task_pool, "metric_subject");
+
+ if (c == NULL) {
+ c = task->cfg->subject;
+ }
+
+ if (c == NULL) {
+ c = SPAM_SUBJECT;
+ }
+
+ p = c;
+ s = MESSAGE_FIELD_CHECK(task, subject);
+
+ if (s) {
+ slen = strlen(s);
+ }
+
+ subj_buf = g_string_sized_new(strlen(c) + slen);
+
+ while (*p) {
+ if (*p == '%') {
+ switch (p[1]) {
+ case 's':
+ g_string_append_len(subj_buf, c, p - c);
+
+ if (s) {
+ g_string_append_len(subj_buf, s, slen);
+ }
+ c = p + 2;
+ p += 2;
+ break;
+ case 'd':
+ g_string_append_len(subj_buf, c, p - c);
+ rspamd_printf_gstring(subj_buf, "%.2f", task->result->score);
+ c = p + 2;
+ p += 2;
+ break;
+ case '%':
+ g_string_append_len(subj_buf, c, p - c);
+ g_string_append_c(subj_buf, '%');
+ c = p + 2;
+ p += 2;
+ break;
+ default:
+ p++; /* Just % something unknown */
+ break;
+ }
+ }
+ else {
+ p++;
+ }
+ }
+
+ if (p > c) {
+ g_string_append_len(subj_buf, c, p - c);
+ }
+
+ res = rspamd_mime_header_encode(subj_buf->str, subj_buf->len);
+
+ rspamd_mempool_add_destructor(task->task_pool,
+ (rspamd_mempool_destruct_t) g_free,
+ res);
+ g_string_free(subj_buf, TRUE);
+
+ return res;
+}
+
+static ucl_object_t *
+rspamd_metric_symbol_ucl(struct rspamd_task *task, struct rspamd_symbol_result *sym)
+{
+ ucl_object_t *obj = NULL, *ar;
+ const gchar *description = NULL;
+ struct rspamd_symbol_option *opt;
+
+ if (sym->sym != NULL) {
+ description = sym->sym->description;
+ }
+
+ obj = ucl_object_typed_new(UCL_OBJECT);
+ ucl_object_insert_key(obj, ucl_object_fromstring(sym->name), "name", 0, false);
+ ucl_object_insert_key(obj, ucl_object_fromdouble(sym->score), "score", 0, false);
+
+ if (task->cmd == CMD_CHECK_V2) {
+ if (sym->sym) {
+ ucl_object_insert_key(obj, ucl_object_fromdouble(sym->sym->score), "metric_score", 0, false);
+ }
+ else {
+ ucl_object_insert_key(obj, ucl_object_fromdouble(0.0),
+ "metric_score", 0, false);
+ }
+ }
+
+ if (description) {
+ ucl_object_insert_key(obj, ucl_object_fromstring(description),
+ "description", 0, false);
+ }
+
+ if (sym->options != NULL) {
+ ar = ucl_object_typed_new(UCL_ARRAY);
+
+ DL_FOREACH(sym->opts_head, opt)
+ {
+ ucl_array_append(ar, ucl_object_fromstring_common(opt->option,
+ opt->optlen, 0));
+ }
+
+ ucl_object_insert_key(obj, ar, "options", 0, false);
+ }
+
+ return obj;
+}
+
+static ucl_object_t *
+rspamd_metric_group_ucl(struct rspamd_task *task,
+ struct rspamd_symbols_group *gr, gdouble score)
+{
+ ucl_object_t *obj = NULL;
+
+ obj = ucl_object_typed_new(UCL_OBJECT);
+ ucl_object_insert_key(obj, ucl_object_fromdouble(score),
+ "score", 0, false);
+
+ if (gr->description) {
+ ucl_object_insert_key(obj, ucl_object_fromstring(gr->description),
+ "description", 0, false);
+ }
+
+ return obj;
+}
+
+static ucl_object_t *
+rspamd_scan_result_ucl(struct rspamd_task *task,
+ struct rspamd_scan_result *mres, ucl_object_t *top)
+{
+ struct rspamd_symbol_result *sym;
+ gboolean is_spam;
+ struct rspamd_action *action;
+ ucl_object_t *obj = NULL, *sobj;
+ const gchar *subject;
+ struct rspamd_passthrough_result *pr = NULL;
+
+ action = rspamd_check_action_metric(task, &pr, NULL);
+ is_spam = !(action->flags & RSPAMD_ACTION_HAM);
+
+ if (task->cmd == CMD_CHECK) {
+ obj = ucl_object_typed_new(UCL_OBJECT);
+ ucl_object_insert_key(obj,
+ ucl_object_frombool(is_spam),
+ "is_spam", 0, false);
+ }
+ else {
+ obj = top;
+ }
+
+ if (pr) {
+ if (pr->message && !(pr->flags & RSPAMD_PASSTHROUGH_NO_SMTP_MESSAGE)) {
+ /* Add smtp message if it does not exist: see #3269 for details */
+ if (ucl_object_lookup(task->messages, "smtp_message") == NULL) {
+ ucl_object_insert_key(task->messages,
+ ucl_object_fromstring_common(pr->message, 0, UCL_STRING_RAW),
+ "smtp_message", 0,
+ false);
+ }
+ }
+
+ ucl_object_insert_key(obj,
+ ucl_object_fromstring(pr->module),
+ "passthrough_module", 0, false);
+ }
+
+ ucl_object_insert_key(obj,
+ ucl_object_frombool(RSPAMD_TASK_IS_SKIPPED(task)),
+ "is_skipped", 0, false);
+
+ if (!isnan(mres->score)) {
+ ucl_object_insert_key(obj, ucl_object_fromdouble(mres->score),
+ "score", 0, false);
+ }
+ else {
+ ucl_object_insert_key(obj,
+ ucl_object_fromdouble(0.0), "score", 0, false);
+ }
+
+ ucl_object_insert_key(obj,
+ ucl_object_fromdouble(rspamd_task_get_required_score(task, mres)),
+ "required_score", 0, false);
+ ucl_object_insert_key(obj,
+ ucl_object_fromstring(action->name),
+ "action", 0, false);
+
+ if (action->action_type == METRIC_ACTION_REWRITE_SUBJECT) {
+ subject = rspamd_protocol_rewrite_subject(task);
+
+ if (subject) {
+ ucl_object_insert_key(obj, ucl_object_fromstring(subject),
+ "subject", 0, false);
+ }
+ }
+ if (action->flags & RSPAMD_ACTION_MILTER) {
+ /* Treat milter action specially */
+ if (action->action_type == METRIC_ACTION_DISCARD) {
+ ucl_object_insert_key(obj, ucl_object_fromstring("discard"),
+ "reject", 0, false);
+ }
+ else if (action->action_type == METRIC_ACTION_QUARANTINE) {
+ ucl_object_insert_key(obj, ucl_object_fromstring("quarantine"),
+ "reject", 0, false);
+ }
+ }
+
+ /* Now handle symbols */
+ if (task->cmd != CMD_CHECK) {
+ /* Insert actions thresholds */
+ ucl_object_t *actions_obj = ucl_object_typed_new(UCL_OBJECT);
+
+ for (int i = task->result->nactions - 1; i >= 0; i--) {
+ struct rspamd_action_config *action_lim = &task->result->actions_config[i];
+
+ if (!isnan(action_lim->cur_limit) &&
+ !(action_lim->action->flags & (RSPAMD_ACTION_NO_THRESHOLD | RSPAMD_ACTION_HAM))) {
+ ucl_object_insert_key(actions_obj, ucl_object_fromdouble(action_lim->cur_limit),
+ action_lim->action->name, 0, true);
+ }
+ }
+
+ ucl_object_insert_key(obj, actions_obj, "thresholds", 0, false);
+
+ /* For checkv2 we insert symbols as a separate object */
+ obj = ucl_object_typed_new(UCL_OBJECT);
+ }
+
+ kh_foreach_value(mres->symbols, sym, {
+ if (!(sym->flags & RSPAMD_SYMBOL_RESULT_IGNORED)) {
+ sobj = rspamd_metric_symbol_ucl(task, sym);
+ ucl_object_insert_key(obj, sobj, sym->name, 0, false);
+ }
+ })
+
+ if (task->cmd != CMD_CHECK)
+ {
+ /* For checkv2 we insert symbols as a separate object */
+ ucl_object_insert_key(top, obj, "symbols", 0, false);
+ }
+ else
+ {
+ /* For legacy check we just insert it as "default" all together */
+ ucl_object_insert_key(top, obj, DEFAULT_METRIC, 0, false);
+ }
+
+ /* Handle groups if needed */
+ if (task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_GROUPS) {
+ struct rspamd_symbols_group *gr;
+ gdouble gr_score;
+
+ obj = ucl_object_typed_new(UCL_OBJECT);
+ ucl_object_reserve(obj, kh_size(mres->sym_groups));
+
+ kh_foreach(mres->sym_groups, gr, gr_score, {
+ if (task->cfg->public_groups_only &&
+ !(gr->flags & RSPAMD_SYMBOL_GROUP_PUBLIC)) {
+ continue;
+ }
+ sobj = rspamd_metric_group_ucl(task, gr, gr_score);
+ ucl_object_insert_key(obj, sobj, gr->name, 0, false);
+ });
+
+ ucl_object_insert_key(top, obj, "groups", 0, false);
+ }
+
+ return obj;
+}
+
+void rspamd_ucl_torspamc_output(const ucl_object_t *top,
+ rspamd_fstring_t **out)
+{
+ const ucl_object_t *symbols, *score,
+ *required_score, *is_spam, *elt, *cur;
+ ucl_object_iter_t iter = NULL;
+
+ score = ucl_object_lookup(top, "score");
+ required_score = ucl_object_lookup(top, "required_score");
+ is_spam = ucl_object_lookup(top, "is_spam");
+ rspamd_printf_fstring(out,
+ "Metric: default; %s; %.2f / %.2f / 0.0\r\n",
+ ucl_object_toboolean(is_spam) ? "True" : "False",
+ ucl_object_todouble(score),
+ ucl_object_todouble(required_score));
+ elt = ucl_object_lookup(top, "action");
+ if (elt != NULL) {
+ rspamd_printf_fstring(out, "Action: %s\r\n",
+ ucl_object_tostring(elt));
+ }
+
+ elt = ucl_object_lookup(top, "subject");
+ if (elt != NULL) {
+ rspamd_printf_fstring(out, "Subject: %s\r\n",
+ ucl_object_tostring(elt));
+ }
+
+ symbols = ucl_object_lookup(top, "symbols");
+
+ if (symbols != NULL) {
+ iter = NULL;
+ while ((elt = ucl_object_iterate(symbols, &iter, true)) != NULL) {
+ if (elt->type == UCL_OBJECT) {
+ const ucl_object_t *sym_score;
+ sym_score = ucl_object_lookup(elt, "score");
+ rspamd_printf_fstring(out, "Symbol: %s(%.2f)\r\n",
+ ucl_object_key(elt),
+ ucl_object_todouble(sym_score));
+ }
+ }
+ }
+
+ elt = ucl_object_lookup(top, "messages");
+ if (elt != NULL) {
+ iter = NULL;
+ while ((cur = ucl_object_iterate(elt, &iter, true)) != NULL) {
+ if (cur->type == UCL_STRING) {
+ rspamd_printf_fstring(out, "Message: %s\r\n",
+ ucl_object_tostring(cur));
+ }
+ }
+ }
+
+ elt = ucl_object_lookup(top, "message-id");
+ if (elt != NULL) {
+ rspamd_printf_fstring(out, "Message-ID: %s\r\n",
+ ucl_object_tostring(elt));
+ }
+}
+
+void rspamd_ucl_tospamc_output(const ucl_object_t *top,
+ rspamd_fstring_t **out)
+{
+ const ucl_object_t *symbols, *score,
+ *required_score, *is_spam, *elt;
+ ucl_object_iter_t iter = NULL;
+ rspamd_fstring_t *f;
+
+ score = ucl_object_lookup(top, "score");
+ required_score = ucl_object_lookup(top, "required_score");
+ is_spam = ucl_object_lookup(top, "is_spam");
+ rspamd_printf_fstring(out,
+ "Spam: %s ; %.2f / %.2f\r\n\r\n",
+ ucl_object_toboolean(is_spam) ? "True" : "False",
+ ucl_object_todouble(score),
+ ucl_object_todouble(required_score));
+
+ symbols = ucl_object_lookup(top, "symbols");
+
+ if (symbols != NULL) {
+ while ((elt = ucl_object_iterate(symbols, &iter, true)) != NULL) {
+ if (elt->type == UCL_OBJECT) {
+ rspamd_printf_fstring(out, "%s,",
+ ucl_object_key(elt));
+ }
+ }
+ /* Ugly hack, but the whole spamc is ugly */
+ f = *out;
+ if (f->str[f->len - 1] == ',') {
+ f->len--;
+
+ *out = rspamd_fstring_append(*out, CRLF, 2);
+ }
+ }
+}
+
+static void
+rspamd_protocol_output_profiling(struct rspamd_task *task,
+ ucl_object_t *top)
+{
+ GHashTable *tbl;
+ GHashTableIter it;
+ gpointer k, v;
+ ucl_object_t *prof;
+ gdouble val;
+
+ prof = ucl_object_typed_new(UCL_OBJECT);
+ tbl = rspamd_mempool_get_variable(task->task_pool, "profile");
+
+ if (tbl) {
+ g_hash_table_iter_init(&it, tbl);
+
+ while (g_hash_table_iter_next(&it, &k, &v)) {
+ val = *(gdouble *) v;
+ ucl_object_insert_key(prof, ucl_object_fromdouble(val),
+ (const char *) k, 0, false);
+ }
+ }
+
+ ucl_object_insert_key(top, prof, "profile", 0, false);
+}
+
+ucl_object_t *
+rspamd_protocol_write_ucl(struct rspamd_task *task,
+ enum rspamd_protocol_flags flags)
+{
+ ucl_object_t *top = NULL;
+ GString *dkim_sig;
+ GList *dkim_sigs;
+ const ucl_object_t *milter_reply;
+
+ rspamd_task_set_finish_time(task);
+ top = ucl_object_typed_new(UCL_OBJECT);
+
+ rspamd_mempool_add_destructor(task->task_pool,
+ (rspamd_mempool_destruct_t) ucl_object_unref, top);
+
+ if (flags & RSPAMD_PROTOCOL_METRICS) {
+ rspamd_scan_result_ucl(task, task->result, top);
+ }
+
+ if (flags & RSPAMD_PROTOCOL_MESSAGES) {
+ if (G_UNLIKELY(task->cfg->compat_messages)) {
+ const ucl_object_t *cur;
+ ucl_object_t *msg_object;
+ ucl_object_iter_t iter = NULL;
+
+ msg_object = ucl_object_typed_new(UCL_ARRAY);
+
+ while ((cur = ucl_object_iterate(task->messages, &iter, true)) != NULL) {
+ if (cur->type == UCL_STRING) {
+ ucl_array_append(msg_object, ucl_object_ref(cur));
+ }
+ }
+
+ ucl_object_insert_key(top, msg_object, "messages", 0, false);
+ }
+ else {
+ ucl_object_insert_key(top, ucl_object_ref(task->messages),
+ "messages", 0, false);
+ }
+ }
+
+ if (flags & RSPAMD_PROTOCOL_URLS && task->message) {
+ if (kh_size(MESSAGE_FIELD(task, urls)) > 0) {
+ ucl_object_insert_key(top,
+ rspamd_urls_tree_ucl(MESSAGE_FIELD(task, urls), task),
+ "urls", 0, false);
+ ucl_object_insert_key(top,
+ rspamd_emails_tree_ucl(MESSAGE_FIELD(task, urls), task),
+ "emails", 0, false);
+ }
+ }
+
+ if (flags & RSPAMD_PROTOCOL_EXTRA) {
+ if (G_UNLIKELY(RSPAMD_TASK_IS_PROFILING(task))) {
+ rspamd_protocol_output_profiling(task, top);
+ }
+ }
+
+ if (flags & RSPAMD_PROTOCOL_BASIC) {
+ ucl_object_insert_key(top,
+ ucl_object_fromstring(MESSAGE_FIELD_CHECK(task, message_id)),
+ "message-id", 0, false);
+ ucl_object_insert_key(top,
+ ucl_object_fromdouble(task->time_real_finish - task->task_timestamp),
+ "time_real", 0, false);
+ }
+
+ if (flags & RSPAMD_PROTOCOL_DKIM) {
+ dkim_sigs = rspamd_mempool_get_variable(task->task_pool,
+ RSPAMD_MEMPOOL_DKIM_SIGNATURE);
+
+ if (dkim_sigs) {
+ if (dkim_sigs->next) {
+ /* Multiple DKIM signatures */
+ ucl_object_t *ar = ucl_object_typed_new(UCL_ARRAY);
+
+ for (; dkim_sigs != NULL; dkim_sigs = dkim_sigs->next) {
+ GString *folded_header;
+ dkim_sig = (GString *) dkim_sigs->data;
+
+ if (task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_MILTER ||
+ !task->message) {
+
+ folded_header = rspamd_header_value_fold(
+ "DKIM-Signature", strlen("DKIM-Signature"),
+ dkim_sig->str, dkim_sig->len,
+ 80, RSPAMD_TASK_NEWLINES_LF, NULL);
+ }
+ else {
+ folded_header = rspamd_header_value_fold(
+ "DKIM-Signature", strlen("DKIM-Signature"),
+ dkim_sig->str, dkim_sig->len,
+ 80,
+ MESSAGE_FIELD(task, nlines_type),
+ NULL);
+ }
+
+ ucl_array_append(ar,
+ ucl_object_fromstring_common(folded_header->str,
+ folded_header->len, UCL_STRING_RAW));
+ g_string_free(folded_header, TRUE);
+ }
+
+ ucl_object_insert_key(top,
+ ar,
+ "dkim-signature", 0,
+ false);
+ }
+ else {
+ /* Single DKIM signature */
+ GString *folded_header;
+ dkim_sig = (GString *) dkim_sigs->data;
+
+ if (task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_MILTER) {
+ folded_header = rspamd_header_value_fold(
+ "DKIM-Signature", strlen("DKIM-Signature"),
+ dkim_sig->str, dkim_sig->len,
+ 80, RSPAMD_TASK_NEWLINES_LF, NULL);
+ }
+ else {
+ folded_header = rspamd_header_value_fold(
+ "DKIM-Signature", strlen("DKIM-Signature"),
+ dkim_sig->str, dkim_sig->len,
+ 80, MESSAGE_FIELD(task, nlines_type),
+ NULL);
+ }
+
+ ucl_object_insert_key(top,
+ ucl_object_fromstring_common(folded_header->str,
+ folded_header->len, UCL_STRING_RAW),
+ "dkim-signature", 0, false);
+ g_string_free(folded_header, TRUE);
+ }
+ }
+ }
+
+ if (flags & RSPAMD_PROTOCOL_RMILTER) {
+ milter_reply = rspamd_mempool_get_variable(task->task_pool,
+ RSPAMD_MEMPOOL_MILTER_REPLY);
+
+ if (milter_reply) {
+ if (task->cmd != CMD_CHECK) {
+ ucl_object_insert_key(top, ucl_object_ref(milter_reply),
+ "milter", 0, false);
+ }
+ else {
+ ucl_object_insert_key(top, ucl_object_ref(milter_reply),
+ "rmilter", 0, false);
+ }
+ }
+ }
+
+ return top;
+}
+
+void rspamd_protocol_http_reply(struct rspamd_http_message *msg,
+ struct rspamd_task *task, ucl_object_t **pobj)
+{
+ struct rspamd_scan_result *metric_res;
+ const struct rspamd_re_cache_stat *restat;
+
+ ucl_object_t *top = NULL;
+ rspamd_fstring_t *reply;
+ gint flags = RSPAMD_PROTOCOL_DEFAULT;
+ struct rspamd_action *action;
+
+ /* Removed in 2.0 */
+#if 0
+ GHashTableIter hiter;
+ gpointer h, v;
+ /* Write custom headers */
+ g_hash_table_iter_init (&hiter, task->reply_headers);
+ while (g_hash_table_iter_next (&hiter, &h, &v)) {
+ rspamd_ftok_t *hn = h, *hv = v;
+
+ rspamd_http_message_add_header (msg, hn->begin, hv->begin);
+ }
+#endif
+
+ flags |= RSPAMD_PROTOCOL_URLS;
+
+ top = rspamd_protocol_write_ucl(task, flags);
+
+ if (pobj) {
+ *pobj = top;
+ }
+
+ if (!(task->flags & RSPAMD_TASK_FLAG_NO_LOG)) {
+ rspamd_roll_history_update(task->worker->srv->history, task);
+ }
+ else {
+ msg_debug_protocol("skip history update due to no log flag");
+ }
+
+ rspamd_task_write_log(task);
+
+ if (task->cfg->log_flags & RSPAMD_LOG_FLAG_RE_CACHE) {
+ restat = rspamd_re_cache_get_stat(task->re_rt);
+ g_assert(restat != NULL);
+ msg_notice_task(
+ "regexp statistics: %ud pcre regexps scanned, %ud regexps matched,"
+ " %ud regexps total, %ud regexps cached,"
+ " %HL scanned using pcre, %HL scanned total",
+ restat->regexp_checked,
+ restat->regexp_matched,
+ restat->regexp_total,
+ restat->regexp_fast_cached,
+ restat->bytes_scanned_pcre,
+ restat->bytes_scanned);
+ }
+
+ reply = rspamd_fstring_sized_new(1000);
+
+ if (msg->method < HTTP_SYMBOLS && !RSPAMD_TASK_IS_SPAMC(task)) {
+ msg_debug_protocol("writing json reply");
+ rspamd_ucl_emit_fstring(top, UCL_EMIT_JSON_COMPACT, &reply);
+ }
+ else {
+ if (RSPAMD_TASK_IS_SPAMC(task)) {
+ msg_debug_protocol("writing spamc legacy reply to client");
+ rspamd_ucl_tospamc_output(top, &reply);
+ }
+ else {
+ msg_debug_protocol("writing rspamc legacy reply to client");
+ rspamd_ucl_torspamc_output(top, &reply);
+ }
+ }
+
+ if (task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_BODY_BLOCK) {
+ /* Check if we need to insert a body block */
+ if (task->flags & RSPAMD_TASK_FLAG_MESSAGE_REWRITE) {
+ GString *hdr_offset = g_string_sized_new(30);
+
+ rspamd_printf_gstring(hdr_offset, "%z", RSPAMD_FSTRING_LEN(reply));
+ rspamd_http_message_add_header(msg, MESSAGE_OFFSET_HEADER,
+ hdr_offset->str);
+ msg_debug_protocol("write body block at position %s",
+ hdr_offset->str);
+ g_string_free(hdr_offset, TRUE);
+
+ /* In case of milter, we append just body, otherwise - full message */
+ if (task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_MILTER) {
+ const gchar *start;
+ goffset len, hdr_off;
+
+ start = task->msg.begin;
+ len = task->msg.len;
+
+ hdr_off = MESSAGE_FIELD(task, raw_headers_content).len;
+
+ if (hdr_off < len) {
+ start += hdr_off;
+ len -= hdr_off;
+
+ /* The problem here is that we need not end of headers, we need
+ * start of body.
+ *
+ * Hence, we need to skip one \r\n till there is anything else in
+ * a line.
+ */
+
+ if (*start == '\r' && len > 0) {
+ start++;
+ len--;
+ }
+
+ if (*start == '\n' && len > 0) {
+ start++;
+ len--;
+ }
+
+ msg_debug_protocol("milter version of body block size %d",
+ (int) len);
+ reply = rspamd_fstring_append(reply, start, len);
+ }
+ }
+ else {
+ msg_debug_protocol("general version of body block size %d",
+ (int) task->msg.len);
+ reply = rspamd_fstring_append(reply,
+ task->msg.begin, task->msg.len);
+ }
+ }
+ }
+
+ if ((task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_COMPRESSED) &&
+ rspamd_libs_reset_compression(task->cfg->libs_ctx)) {
+ /* We can compress output */
+ ZSTD_inBuffer zin;
+ ZSTD_outBuffer zout;
+ ZSTD_CStream *zstream;
+ rspamd_fstring_t *compressed_reply;
+ gsize r;
+
+ zstream = task->cfg->libs_ctx->out_zstream;
+ compressed_reply = rspamd_fstring_sized_new(ZSTD_compressBound(reply->len));
+ zin.pos = 0;
+ zin.src = reply->str;
+ zin.size = reply->len;
+ zout.pos = 0;
+ zout.dst = compressed_reply->str;
+ zout.size = compressed_reply->allocated;
+
+ while (zin.pos < zin.size) {
+ r = ZSTD_compressStream(zstream, &zout, &zin);
+
+ if (ZSTD_isError(r)) {
+ msg_err_protocol("cannot compress: %s", ZSTD_getErrorName(r));
+ rspamd_fstring_free(compressed_reply);
+ rspamd_http_message_set_body_from_fstring_steal(msg, reply);
+
+ goto end;
+ }
+ }
+
+ ZSTD_flushStream(zstream, &zout);
+ r = ZSTD_endStream(zstream, &zout);
+
+ if (ZSTD_isError(r)) {
+ msg_err_protocol("cannot finalize compress: %s", ZSTD_getErrorName(r));
+ rspamd_fstring_free(compressed_reply);
+ rspamd_http_message_set_body_from_fstring_steal(msg, reply);
+
+ goto end;
+ }
+
+ msg_info_protocol("writing compressed results: %z bytes before "
+ "%z bytes after",
+ zin.pos, zout.pos);
+ compressed_reply->len = zout.pos;
+ rspamd_fstring_free(reply);
+ rspamd_http_message_set_body_from_fstring_steal(msg, compressed_reply);
+ rspamd_http_message_add_header(msg, COMPRESSION_HEADER, "zstd");
+
+ if (task->cfg->libs_ctx->out_dict &&
+ task->cfg->libs_ctx->out_dict->id != 0) {
+ gchar dict_str[32];
+
+ rspamd_snprintf(dict_str, sizeof(dict_str), "%ud",
+ task->cfg->libs_ctx->out_dict->id);
+ rspamd_http_message_add_header(msg, "Dictionary", dict_str);
+ }
+ }
+ else {
+ rspamd_http_message_set_body_from_fstring_steal(msg, reply);
+ }
+
+end:
+ if (!(task->flags & RSPAMD_TASK_FLAG_NO_STAT)) {
+ /* Update stat for default metric */
+
+ msg_debug_protocol("skip stats update due to no_stat flag");
+ metric_res = task->result;
+
+ if (metric_res != NULL) {
+
+ action = rspamd_check_action_metric(task, NULL, NULL);
+ /* TODO: handle custom actions in stats */
+ if (action->action_type == METRIC_ACTION_SOFT_REJECT &&
+ (task->flags & RSPAMD_TASK_FLAG_GREYLISTED)) {
+ /* Set stat action to greylist to display greylisted messages */
+#ifndef HAVE_ATOMIC_BUILTINS
+ task->worker->srv->stat->actions_stat[METRIC_ACTION_GREYLIST]++;
+#else
+ __atomic_add_fetch(&task->worker->srv->stat->actions_stat[METRIC_ACTION_GREYLIST],
+ 1, __ATOMIC_RELEASE);
+#endif
+ }
+ else if (action->action_type < METRIC_ACTION_MAX) {
+#ifndef HAVE_ATOMIC_BUILTINS
+ task->worker->srv->stat->actions_stat[action->action_type]++;
+#else
+ __atomic_add_fetch(&task->worker->srv->stat->actions_stat[action->action_type],
+ 1, __ATOMIC_RELEASE);
+#endif
+ }
+ }
+
+ /* Increase counters */
+#ifndef HAVE_ATOMIC_BUILTINS
+ task->worker->srv->stat->messages_scanned++;
+#else
+ __atomic_add_fetch(&task->worker->srv->stat->messages_scanned,
+ 1, __ATOMIC_RELEASE);
+#endif
+
+ /* Set average processing time */
+ guint32 slot;
+ float processing_time = task->time_real_finish - task->task_timestamp;
+
+#ifndef HAVE_ATOMIC_BUILTINS
+ slot = task->worker->srv->stat->avg_time.cur_slot++;
+#else
+ slot = __atomic_fetch_add(&task->worker->srv->stat->avg_time.cur_slot,
+ 1, __ATOMIC_RELEASE);
+#endif
+ slot = slot % MAX_AVG_TIME_SLOTS;
+ /* TODO: this should be atomic but it is not supported in C */
+ task->worker->srv->stat->avg_time.avg_time[slot] = processing_time;
+ }
+}
+
+void rspamd_protocol_write_log_pipe(struct rspamd_task *task)
+{
+ struct rspamd_worker_log_pipe *lp;
+ struct rspamd_protocol_log_message_sum *ls;
+ lua_State *L = task->cfg->lua_state;
+ struct rspamd_scan_result *mres;
+ struct rspamd_symbol_result *sym;
+ gint id, i;
+ guint32 n = 0, nextra = 0;
+ gsize sz;
+ GArray *extra;
+ struct rspamd_protocol_log_symbol_result er;
+ struct rspamd_task **ptask;
+
+ /* Get extra results from lua plugins */
+ extra = g_array_new(FALSE, FALSE, sizeof(er));
+
+ lua_getglobal(L, "rspamd_plugins");
+ if (lua_istable(L, -1)) {
+ lua_pushnil(L);
+
+ while (lua_next(L, -2)) {
+ if (lua_istable(L, -1)) {
+ lua_pushvalue(L, -2);
+ /* stack:
+ * -1: copy of key
+ * -2: value (module table)
+ * -3: key (module name)
+ * -4: global
+ */
+ lua_pushstring(L, "log_callback");
+ lua_gettable(L, -3);
+ /* stack:
+ * -1: func
+ * -2: copy of key
+ * -3: value (module table)
+ * -3: key (module name)
+ * -4: global
+ */
+ if (lua_isfunction(L, -1)) {
+ ptask = lua_newuserdata(L, sizeof(*ptask));
+ *ptask = task;
+ rspamd_lua_setclass(L, "rspamd{task}", -1);
+ /* stack:
+ * -1: task
+ * -2: func
+ * -3: key copy
+ * -4: value (module table)
+ * -5: key (module name)
+ * -6: global
+ */
+ msg_debug_protocol("calling for %s", lua_tostring(L, -3));
+ if (lua_pcall(L, 1, 1, 0) != 0) {
+ msg_info_protocol("call to log callback %s failed: %s",
+ lua_tostring(L, -2), lua_tostring(L, -1));
+ lua_pop(L, 1);
+ /* stack:
+ * -1: key copy
+ * -2: value
+ * -3: key
+ */
+ }
+ else {
+ /* stack:
+ * -1: result
+ * -2: key copy
+ * -3: value
+ * -4: key
+ */
+ if (lua_istable(L, -1)) {
+ /* Another iteration */
+ lua_pushnil(L);
+
+ while (lua_next(L, -2)) {
+ /* stack:
+ * -1: value
+ * -2: key
+ * -3: result table (pcall)
+ * -4: key copy (parent)
+ * -5: value (parent)
+ * -6: key (parent)
+ */
+ if (lua_istable(L, -1)) {
+ er.id = 0;
+ er.score = 0.0;
+
+ lua_rawgeti(L, -1, 1);
+ if (lua_isnumber(L, -1)) {
+ er.id = lua_tonumber(L, -1);
+ }
+ lua_rawgeti(L, -2, 2);
+ if (lua_isnumber(L, -1)) {
+ er.score = lua_tonumber(L, -1);
+ }
+ /* stack:
+ * -1: value[2]
+ * -2: value[1]
+ * -3: values
+ * -4: key
+ * -5: result table (pcall)
+ * -6: key copy (parent)
+ * -7: value (parent)
+ * -8: key (parent)
+ */
+ lua_pop(L, 2); /* Values */
+ g_array_append_val(extra, er);
+ }
+
+ lua_pop(L, 1); /* Value for lua_next */
+ }
+
+ lua_pop(L, 1); /* Table result of pcall */
+ }
+ else {
+ msg_info_protocol("call to log callback %s returned "
+ "wrong type: %s",
+ lua_tostring(L, -2),
+ lua_typename(L, lua_type(L, -1)));
+ lua_pop(L, 1); /* Returned error */
+ }
+ }
+ }
+ else {
+ lua_pop(L, 1);
+ /* stack:
+ * -1: key copy
+ * -2: value
+ * -3: key
+ */
+ }
+ }
+
+ lua_pop(L, 2); /* Top table + key copy */
+ }
+
+ lua_pop(L, 1); /* rspamd_plugins global */
+ }
+ else {
+ lua_pop(L, 1);
+ }
+
+ nextra = extra->len;
+
+ LL_FOREACH(task->cfg->log_pipes, lp)
+ {
+ if (lp->fd != -1) {
+ switch (lp->type) {
+ case RSPAMD_LOG_PIPE_SYMBOLS:
+ mres = task->result;
+
+ if (mres) {
+ n = kh_size(mres->symbols);
+ sz = sizeof(*ls) +
+ sizeof(struct rspamd_protocol_log_symbol_result) *
+ (n + nextra);
+ ls = g_malloc0(sz);
+
+ /* Handle settings id */
+
+ if (task->settings_elt) {
+ ls->settings_id = task->settings_elt->id;
+ }
+ else {
+ ls->settings_id = 0;
+ }
+
+ ls->score = mres->score;
+ ls->required_score = rspamd_task_get_required_score(task,
+ mres);
+ ls->nresults = n;
+ ls->nextra = nextra;
+
+ i = 0;
+
+ kh_foreach_value(mres->symbols, sym, {
+ id = rspamd_symcache_find_symbol(task->cfg->cache,
+ sym->name);
+
+ if (id >= 0) {
+ ls->results[i].id = id;
+ ls->results[i].score = sym->score;
+ }
+ else {
+ ls->results[i].id = -1;
+ ls->results[i].score = 0.0;
+ }
+
+ i++;
+ });
+
+ memcpy(&ls->results[n], extra->data, nextra * sizeof(er));
+ }
+ else {
+ sz = sizeof(*ls);
+ ls = g_malloc0(sz);
+ ls->nresults = 0;
+ }
+
+ /* We don't really care about return value here */
+ if (write(lp->fd, ls, sz) == -1) {
+ msg_info_protocol("cannot write to log pipe: %s",
+ strerror(errno));
+ }
+
+ g_free(ls);
+ break;
+ default:
+ msg_err_protocol("unknown log format %d", lp->type);
+ break;
+ }
+ }
+ }
+
+ g_array_free(extra, TRUE);
+}
+
+void rspamd_protocol_write_reply(struct rspamd_task *task, ev_tstamp timeout)
+{
+ struct rspamd_http_message *msg;
+ const gchar *ctype = "application/json";
+ rspamd_fstring_t *reply;
+
+ msg = rspamd_http_new_message(HTTP_RESPONSE);
+
+ if (rspamd_http_connection_is_encrypted(task->http_conn)) {
+ msg_info_protocol("<%s> writing encrypted reply",
+ MESSAGE_FIELD_CHECK(task, message_id));
+ }
+
+ /* Compatibility */
+ if (task->cmd == CMD_CHECK_RSPAMC) {
+ msg->method = HTTP_SYMBOLS;
+ }
+ else if (task->cmd == CMD_CHECK_SPAMC) {
+ msg->method = HTTP_SYMBOLS;
+ msg->flags |= RSPAMD_HTTP_FLAG_SPAMC;
+ }
+
+ if (task->err != NULL) {
+ msg_debug_protocol("writing error reply to client");
+ ucl_object_t *top = NULL;
+
+ top = ucl_object_typed_new(UCL_OBJECT);
+ msg->code = 500 + task->err->code % 100;
+ msg->status = rspamd_fstring_new_init(task->err->message,
+ strlen(task->err->message));
+ ucl_object_insert_key(top, ucl_object_fromstring(task->err->message),
+ "error", 0, false);
+ ucl_object_insert_key(top,
+ ucl_object_fromstring(g_quark_to_string(task->err->domain)),
+ "error_domain", 0, false);
+ reply = rspamd_fstring_sized_new(256);
+ rspamd_ucl_emit_fstring(top, UCL_EMIT_JSON_COMPACT, &reply);
+ ucl_object_unref(top);
+
+ /* We also need to validate utf8 */
+ if (rspamd_fast_utf8_validate(reply->str, reply->len) != 0) {
+ gsize valid_len;
+ gchar *validated;
+
+ /* We copy reply several times here but it should be a rare case */
+ validated = rspamd_str_make_utf_valid(reply->str, reply->len,
+ &valid_len, task->task_pool);
+ rspamd_http_message_set_body(msg, validated, valid_len);
+ rspamd_fstring_free(reply);
+ }
+ else {
+ rspamd_http_message_set_body_from_fstring_steal(msg, reply);
+ }
+ }
+ else {
+ msg->status = rspamd_fstring_new_init("OK", 2);
+
+ switch (task->cmd) {
+ case CMD_CHECK:
+ case CMD_CHECK_RSPAMC:
+ case CMD_CHECK_SPAMC:
+ case CMD_SKIP:
+ case CMD_CHECK_V2:
+ rspamd_protocol_http_reply(msg, task, NULL);
+ rspamd_protocol_write_log_pipe(task);
+ break;
+ case CMD_PING:
+ msg_debug_protocol("writing pong to client");
+ rspamd_http_message_set_body(msg, "pong" CRLF, 6);
+ ctype = "text/plain";
+ break;
+ default:
+ msg_err_protocol("BROKEN");
+ break;
+ }
+ }
+
+ ev_now_update(task->event_loop);
+ msg->date = ev_time();
+
+ rspamd_http_connection_reset(task->http_conn);
+ rspamd_http_connection_write_message(task->http_conn, msg, NULL,
+ ctype, task, timeout);
+
+ task->processed_stages |= RSPAMD_TASK_STAGE_REPLIED;
+}
diff --git a/src/libserver/protocol.h b/src/libserver/protocol.h
new file mode 100644
index 0000000..0e3c187
--- /dev/null
+++ b/src/libserver/protocol.h
@@ -0,0 +1,130 @@
+/**
+ * @file protocol.h
+ * Rspamd protocol definition
+ */
+
+#ifndef RSPAMD_PROTOCOL_H
+#define RSPAMD_PROTOCOL_H
+
+#include "config.h"
+#include "scan_result.h"
+#include "libserver/http/http_connection.h"
+#include "task.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define RSPAMD_BASE_ERROR 500
+#define RSPAMD_FILTER_ERROR RSPAMD_BASE_ERROR + 1
+#define RSPAMD_NETWORK_ERROR RSPAMD_BASE_ERROR + 2
+#define RSPAMD_PROTOCOL_ERROR RSPAMD_BASE_ERROR + 3
+#define RSPAMD_LENGTH_ERROR RSPAMD_BASE_ERROR + 4
+#define RSPAMD_STATFILE_ERROR RSPAMD_BASE_ERROR + 5
+
+struct rspamd_protocol_log_symbol_result {
+ guint32 id;
+ float score;
+};
+struct rspamd_protocol_log_message_sum {
+ guint32 nresults;
+ guint32 nextra;
+ guint32 settings_id;
+ gdouble score;
+ gdouble required_score;
+ struct rspamd_protocol_log_symbol_result results[];
+};
+
+struct rspamd_metric;
+
+/**
+ * Process headers into HTTP message and set appropriate task fields
+ * @param task
+ * @param msg
+ * @return
+ */
+gboolean rspamd_protocol_handle_headers(struct rspamd_task *task,
+ struct rspamd_http_message *msg);
+
+/**
+ * Process control chunk and update task structure accordingly
+ * @param task
+ * @param control
+ * @return
+ */
+gboolean rspamd_protocol_handle_control(struct rspamd_task *task,
+ const ucl_object_t *control);
+
+/**
+ * Process HTTP request to the task structure
+ * @param task
+ * @param msg
+ * @return
+ */
+gboolean rspamd_protocol_handle_request(struct rspamd_task *task,
+ struct rspamd_http_message *msg);
+
+/**
+ * Write task results to http message
+ * @param msg
+ * @param task
+ */
+void rspamd_protocol_http_reply(struct rspamd_http_message *msg,
+ struct rspamd_task *task, ucl_object_t **pobj);
+
+/**
+ * Write data to log pipes
+ * @param task
+ */
+void rspamd_protocol_write_log_pipe(struct rspamd_task *task);
+
+enum rspamd_protocol_flags {
+ RSPAMD_PROTOCOL_BASIC = 1 << 0,
+ RSPAMD_PROTOCOL_METRICS = 1 << 1,
+ RSPAMD_PROTOCOL_MESSAGES = 1 << 2,
+ RSPAMD_PROTOCOL_RMILTER = 1 << 3,
+ RSPAMD_PROTOCOL_DKIM = 1 << 4,
+ RSPAMD_PROTOCOL_URLS = 1 << 5,
+ RSPAMD_PROTOCOL_EXTRA = 1 << 6,
+};
+
+#define RSPAMD_PROTOCOL_DEFAULT (RSPAMD_PROTOCOL_BASIC | \
+ RSPAMD_PROTOCOL_METRICS | \
+ RSPAMD_PROTOCOL_MESSAGES | \
+ RSPAMD_PROTOCOL_RMILTER | \
+ RSPAMD_PROTOCOL_DKIM | \
+ RSPAMD_PROTOCOL_EXTRA)
+
+/**
+ * Write reply to ucl object filling log buffer
+ * @param task
+ * @param logbuf
+ * @return
+ */
+ucl_object_t *rspamd_protocol_write_ucl(struct rspamd_task *task,
+ enum rspamd_protocol_flags flags);
+
+/**
+ * Write reply for specified task command
+ * @param task task object
+ * @return 0 if we wrote reply and -1 if there was some error
+ */
+void rspamd_protocol_write_reply(struct rspamd_task *task, ev_tstamp timeout);
+
+/**
+ * Convert rspamd output to legacy protocol reply
+ * @param task
+ * @param top
+ * @param out
+ */
+void rspamd_ucl_torspamc_output(const ucl_object_t *top,
+ rspamd_fstring_t **out);
+
+void rspamd_ucl_tospamc_output(const ucl_object_t *top,
+ rspamd_fstring_t **out);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/libserver/protocol_internal.h b/src/libserver/protocol_internal.h
new file mode 100644
index 0000000..c604e96
--- /dev/null
+++ b/src/libserver/protocol_internal.h
@@ -0,0 +1,99 @@
+/*-
+ * Copyright 2017 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef RSPAMD_PROTOCOL_INTERNAL_H
+#define RSPAMD_PROTOCOL_INTERNAL_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Just check if the passed message is spam or not and reply as
+ * described below
+ */
+#define MSG_CMD_CHECK "check"
+
+/*
+ * Modern check version
+ */
+#define MSG_CMD_CHECK_V2 "checkv2"
+#define MSG_CMD_SCAN "scan"
+
+/*
+ * Check if message is spam or not, and return score plus list
+ * of symbols hit
+ */
+#define MSG_CMD_SYMBOLS "symbols"
+/*
+ * Check if message is spam or not, and return score plus report
+ */
+#define MSG_CMD_REPORT "report"
+/*
+ * Check if message is spam or not, and return score plus report
+ * if the message is spam
+ */
+#define MSG_CMD_REPORT_IFSPAM "report_ifspam"
+/*
+ * Ignore this message -- client opened connection then changed
+ */
+#define MSG_CMD_SKIP "skip"
+/*
+ * Return a confirmation that spamd is alive
+ */
+#define MSG_CMD_PING "ping"
+/*
+ * Process this message as described above and return modified message
+ */
+#define MSG_CMD_PROCESS "process"
+/*
+ * Headers
+ */
+#define HELO_HEADER "Helo"
+#define FROM_HEADER "From"
+#define IP_ADDR_HEADER "IP"
+#define RCPT_HEADER "Rcpt"
+#define SUBJECT_HEADER "Subject"
+#define SETTINGS_ID_HEADER "Settings-ID"
+#define SETTINGS_HEADER "Settings"
+#define QUEUE_ID_HEADER "Queue-ID"
+#define USER_HEADER "User"
+#define URLS_HEADER "URL-Format"
+#define PASS_HEADER "Pass"
+#define HOSTNAME_HEADER "Hostname"
+#define DELIVER_TO_HEADER "Deliver-To"
+#define NO_LOG_HEADER "Log"
+#define MLEN_HEADER "Message-Length"
+#define USER_AGENT_HEADER "User-Agent"
+#define MTA_TAG_HEADER "MTA-Tag"
+#define PROFILE_HEADER "Profile"
+#define TLS_CIPHER_HEADER "TLS-Cipher"
+#define TLS_VERSION_HEADER "TLS-Version"
+#define MTA_NAME_HEADER "MTA-Name"
+#define MILTER_HEADER "Milter"
+#define FILENAME_HEADER "Filename"
+#define FLAGS_HEADER "Flags"
+#define CERT_ISSUER_HEADER "TLS-Cert-Issuer"
+#define MAILER_HEADER "Mailer"
+#define RAW_DATA_HEADER "Raw"
+#define COMPRESSION_HEADER "Compression"
+#define MESSAGE_OFFSET_HEADER "Message-Offset"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif//RSPAMD_PROTOCOL_INTERNAL_H
diff --git a/src/libserver/re_cache.c b/src/libserver/re_cache.c
new file mode 100644
index 0000000..d51dba6
--- /dev/null
+++ b/src/libserver/re_cache.c
@@ -0,0 +1,2712 @@
+/*
+ * Copyright 2024 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "libmime/message.h"
+#include "re_cache.h"
+#include "cryptobox.h"
+#include "ref.h"
+#include "libserver/url.h"
+#include "libserver/task.h"
+#include "libserver/cfg_file.h"
+#include "libutil/util.h"
+#include "libutil/regexp.h"
+#include "lua/lua_common.h"
+#include "libstat/stat_api.h"
+#include "contrib/uthash/utlist.h"
+
+#include "khash.h"
+
+#ifdef WITH_HYPERSCAN
+#include "hs.h"
+#include "hyperscan_tools.h"
+#endif
+
+#include "unix-std.h"
+#include <signal.h>
+#include <stdalign.h>
+#include <math.h>
+#include "contrib/libev/ev.h"
+
+#ifndef WITH_PCRE2
+#include <pcre.h>
+#else
+#include <pcre2.h>
+#endif
+
+#include "contrib/fastutf8/fastutf8.h"
+
+#ifdef HAVE_SYS_WAIT_H
+#include <sys/wait.h>
+#endif
+
+#define msg_err_re_cache(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL, \
+ "re_cache", cache->hash, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_warn_re_cache(...) rspamd_default_log_function(G_LOG_LEVEL_WARNING, \
+ "re_cache", cache->hash, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_info_re_cache(...) rspamd_default_log_function(G_LOG_LEVEL_INFO, \
+ "re_cache", cache->hash, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+
+#define msg_debug_re_task(...) rspamd_conditional_debug_fast(NULL, NULL, \
+ rspamd_re_cache_log_id, "re_cache", task->task_pool->tag.uid, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_debug_re_cache(...) rspamd_conditional_debug_fast(NULL, NULL, \
+ rspamd_re_cache_log_id, "re_cache", cache->hash, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+
+INIT_LOG_MODULE(re_cache)
+
+#ifdef WITH_HYPERSCAN
+#define RSPAMD_HS_MAGIC_LEN (sizeof(rspamd_hs_magic))
+static const guchar rspamd_hs_magic[] = {'r', 's', 'h', 's', 'r', 'e', '1', '1'},
+ rspamd_hs_magic_vector[] = {'r', 's', 'h', 's', 'r', 'v', '1', '1'};
+#endif
+
+
+struct rspamd_re_class {
+ guint64 id;
+ enum rspamd_re_type type;
+ gboolean has_utf8; /* if there are any utf8 regexps */
+ gpointer type_data;
+ gsize type_len;
+ GHashTable *re;
+ rspamd_cryptobox_hash_state_t *st;
+
+ gchar hash[rspamd_cryptobox_HASHBYTES + 1];
+
+#ifdef WITH_HYPERSCAN
+ rspamd_hyperscan_t *hs_db;
+ hs_scratch_t *hs_scratch;
+ gint *hs_ids;
+ guint nhs;
+#endif
+};
+
+enum rspamd_re_cache_elt_match_type {
+ RSPAMD_RE_CACHE_PCRE = 0,
+ RSPAMD_RE_CACHE_HYPERSCAN,
+ RSPAMD_RE_CACHE_HYPERSCAN_PRE
+};
+
+struct rspamd_re_cache_elt {
+ rspamd_regexp_t *re;
+ gint lua_cbref;
+ enum rspamd_re_cache_elt_match_type match_type;
+};
+
+KHASH_INIT(lua_selectors_hash, gchar *, int, 1, kh_str_hash_func, kh_str_hash_equal);
+
+struct rspamd_re_cache {
+ GHashTable *re_classes;
+
+ GPtrArray *re;
+ khash_t(lua_selectors_hash) * selectors;
+ ref_entry_t ref;
+ guint nre;
+ guint max_re_data;
+ gchar hash[rspamd_cryptobox_HASHBYTES + 1];
+ lua_State *L;
+#ifdef WITH_HYPERSCAN
+ enum rspamd_hyperscan_status hyperscan_loaded;
+ gboolean disable_hyperscan;
+ hs_platform_info_t plt;
+#endif
+};
+
+struct rspamd_re_selector_result {
+ guchar **scvec;
+ guint *lenvec;
+ guint cnt;
+};
+
+KHASH_INIT(selectors_results_hash, int, struct rspamd_re_selector_result, 1,
+ kh_int_hash_func, kh_int_hash_equal);
+
+struct rspamd_re_runtime {
+ guchar *checked;
+ guchar *results;
+ khash_t(selectors_results_hash) * sel_cache;
+ struct rspamd_re_cache *cache;
+ struct rspamd_re_cache_stat stat;
+ gboolean has_hs;
+};
+
+static GQuark
+rspamd_re_cache_quark(void)
+{
+ return g_quark_from_static_string("re_cache");
+}
+
+static guint64
+rspamd_re_cache_class_id(enum rspamd_re_type type,
+ gconstpointer type_data,
+ gsize datalen)
+{
+ rspamd_cryptobox_fast_hash_state_t st;
+
+ rspamd_cryptobox_fast_hash_init(&st, 0xdeadbabe);
+ rspamd_cryptobox_fast_hash_update(&st, &type, sizeof(type));
+
+ if (datalen > 0) {
+ rspamd_cryptobox_fast_hash_update(&st, type_data, datalen);
+ }
+
+ return rspamd_cryptobox_fast_hash_final(&st);
+}
+
+static void
+rspamd_re_cache_destroy(struct rspamd_re_cache *cache)
+{
+ GHashTableIter it;
+ gpointer k, v;
+ struct rspamd_re_class *re_class;
+ gchar *skey;
+ gint sref;
+
+ g_assert(cache != NULL);
+ g_hash_table_iter_init(&it, cache->re_classes);
+
+ while (g_hash_table_iter_next(&it, &k, &v)) {
+ re_class = v;
+ g_hash_table_iter_steal(&it);
+ g_hash_table_unref(re_class->re);
+
+ if (re_class->type_data) {
+ g_free(re_class->type_data);
+ }
+
+#ifdef WITH_HYPERSCAN
+ if (re_class->hs_db) {
+ rspamd_hyperscan_free(re_class->hs_db, false);
+ }
+ if (re_class->hs_scratch) {
+ hs_free_scratch(re_class->hs_scratch);
+ }
+ if (re_class->hs_ids) {
+ g_free(re_class->hs_ids);
+ }
+#endif
+ g_free(re_class);
+ }
+
+ if (cache->L) {
+ kh_foreach(cache->selectors, skey, sref, {
+ luaL_unref(cache->L, LUA_REGISTRYINDEX, sref);
+ g_free(skey);
+ });
+
+ struct rspamd_re_cache_elt *elt;
+ guint i;
+
+ PTR_ARRAY_FOREACH(cache->re, i, elt)
+ {
+ if (elt->lua_cbref != -1) {
+ luaL_unref(cache->L, LUA_REGISTRYINDEX, elt->lua_cbref);
+ }
+ }
+ }
+
+ kh_destroy(lua_selectors_hash, cache->selectors);
+
+ g_hash_table_unref(cache->re_classes);
+ g_ptr_array_free(cache->re, TRUE);
+ g_free(cache);
+}
+
+static void
+rspamd_re_cache_elt_dtor(gpointer e)
+{
+ struct rspamd_re_cache_elt *elt = e;
+
+ rspamd_regexp_unref(elt->re);
+ g_free(elt);
+}
+
+struct rspamd_re_cache *
+rspamd_re_cache_new(void)
+{
+ struct rspamd_re_cache *cache;
+
+ cache = g_malloc0(sizeof(*cache));
+ cache->re_classes = g_hash_table_new(g_int64_hash, g_int64_equal);
+ cache->nre = 0;
+ cache->re = g_ptr_array_new_full(256, rspamd_re_cache_elt_dtor);
+ cache->selectors = kh_init(lua_selectors_hash);
+#ifdef WITH_HYPERSCAN
+ cache->hyperscan_loaded = RSPAMD_HYPERSCAN_UNKNOWN;
+#endif
+ REF_INIT_RETAIN(cache, rspamd_re_cache_destroy);
+
+ return cache;
+}
+
+enum rspamd_hyperscan_status
+rspamd_re_cache_is_hs_loaded(struct rspamd_re_cache *cache)
+{
+ g_assert(cache != NULL);
+
+#ifdef WITH_HYPERSCAN
+ return cache->hyperscan_loaded;
+#else
+ return RSPAMD_HYPERSCAN_UNSUPPORTED;
+#endif
+}
+
+rspamd_regexp_t *
+rspamd_re_cache_add(struct rspamd_re_cache *cache,
+ rspamd_regexp_t *re,
+ enum rspamd_re_type type,
+ gconstpointer type_data, gsize datalen,
+ gint lua_cbref)
+{
+ guint64 class_id;
+ struct rspamd_re_class *re_class;
+ rspamd_regexp_t *nre;
+ struct rspamd_re_cache_elt *elt;
+
+ g_assert(cache != NULL);
+ g_assert(re != NULL);
+
+ class_id = rspamd_re_cache_class_id(type, type_data, datalen);
+ re_class = g_hash_table_lookup(cache->re_classes, &class_id);
+
+ if (re_class == NULL) {
+ re_class = g_malloc0(sizeof(*re_class));
+ re_class->id = class_id;
+ re_class->type_len = datalen;
+ re_class->type = type;
+ re_class->re = g_hash_table_new_full(rspamd_regexp_hash,
+ rspamd_regexp_equal, NULL, (GDestroyNotify) rspamd_regexp_unref);
+
+ if (datalen > 0) {
+ re_class->type_data = g_malloc0(datalen);
+ memcpy(re_class->type_data, type_data, datalen);
+ }
+
+ g_hash_table_insert(cache->re_classes, &re_class->id, re_class);
+ }
+
+ if ((nre = g_hash_table_lookup(re_class->re, rspamd_regexp_get_id(re))) == NULL) {
+ /*
+ * We set re id based on the global position in the cache
+ */
+ elt = g_malloc0(sizeof(*elt));
+ /* One ref for re_class */
+ nre = rspamd_regexp_ref(re);
+ rspamd_regexp_set_cache_id(re, cache->nre++);
+ /* One ref for cache */
+ elt->re = rspamd_regexp_ref(re);
+ g_ptr_array_add(cache->re, elt);
+ rspamd_regexp_set_class(re, re_class);
+ elt->lua_cbref = lua_cbref;
+
+ g_hash_table_insert(re_class->re, rspamd_regexp_get_id(nre), nre);
+ }
+
+ if (rspamd_regexp_get_flags(re) & RSPAMD_REGEXP_FLAG_UTF) {
+ re_class->has_utf8 = TRUE;
+ }
+
+ return nre;
+}
+
+void rspamd_re_cache_replace(struct rspamd_re_cache *cache,
+ rspamd_regexp_t *what,
+ rspamd_regexp_t *with)
+{
+ guint64 re_id;
+ struct rspamd_re_class *re_class;
+ rspamd_regexp_t *src;
+ struct rspamd_re_cache_elt *elt;
+
+ g_assert(cache != NULL);
+ g_assert(what != NULL);
+ g_assert(with != NULL);
+
+ re_class = rspamd_regexp_get_class(what);
+
+ if (re_class != NULL) {
+ re_id = rspamd_regexp_get_cache_id(what);
+
+ g_assert(re_id != RSPAMD_INVALID_ID);
+ src = g_hash_table_lookup(re_class->re, rspamd_regexp_get_id(what));
+ elt = g_ptr_array_index(cache->re, re_id);
+ g_assert(elt != NULL);
+ g_assert(src != NULL);
+
+ rspamd_regexp_set_cache_id(what, RSPAMD_INVALID_ID);
+ rspamd_regexp_set_class(what, NULL);
+ rspamd_regexp_set_cache_id(with, re_id);
+ rspamd_regexp_set_class(with, re_class);
+ /*
+ * On calling of this function, we actually unref old re (what)
+ */
+ g_hash_table_insert(re_class->re,
+ rspamd_regexp_get_id(what),
+ rspamd_regexp_ref(with));
+
+ rspamd_regexp_unref(elt->re);
+ elt->re = rspamd_regexp_ref(with);
+ /* XXX: do not touch match type here */
+ }
+}
+
+static gint
+rspamd_re_cache_sort_func(gconstpointer a, gconstpointer b)
+{
+ struct rspamd_re_cache_elt *const *re1 = a, *const *re2 = b;
+
+ return rspamd_regexp_cmp(rspamd_regexp_get_id((*re1)->re),
+ rspamd_regexp_get_id((*re2)->re));
+}
+
+void rspamd_re_cache_init(struct rspamd_re_cache *cache, struct rspamd_config *cfg)
+{
+ guint i, fl;
+ GHashTableIter it;
+ gpointer k, v;
+ struct rspamd_re_class *re_class;
+ rspamd_cryptobox_hash_state_t st_global;
+ rspamd_regexp_t *re;
+ struct rspamd_re_cache_elt *elt;
+ guchar hash_out[rspamd_cryptobox_HASHBYTES];
+
+ g_assert(cache != NULL);
+
+ rspamd_cryptobox_hash_init(&st_global, NULL, 0);
+ /* Resort all regexps */
+ g_ptr_array_sort(cache->re, rspamd_re_cache_sort_func);
+
+ for (i = 0; i < cache->re->len; i++) {
+ elt = g_ptr_array_index(cache->re, i);
+ re = elt->re;
+ re_class = rspamd_regexp_get_class(re);
+ g_assert(re_class != NULL);
+ rspamd_regexp_set_cache_id(re, i);
+
+ if (re_class->st == NULL) {
+ (void) !posix_memalign((void **) &re_class->st, RSPAMD_ALIGNOF(rspamd_cryptobox_hash_state_t),
+ sizeof(*re_class->st));
+ g_assert(re_class->st != NULL);
+ rspamd_cryptobox_hash_init(re_class->st, NULL, 0);
+ }
+
+ /* Update hashes */
+ /* Id of re class */
+ rspamd_cryptobox_hash_update(re_class->st, (gpointer) &re_class->id,
+ sizeof(re_class->id));
+ rspamd_cryptobox_hash_update(&st_global, (gpointer) &re_class->id,
+ sizeof(re_class->id));
+ /* Id of re expression */
+ rspamd_cryptobox_hash_update(re_class->st, rspamd_regexp_get_id(re),
+ rspamd_cryptobox_HASHBYTES);
+ rspamd_cryptobox_hash_update(&st_global, rspamd_regexp_get_id(re),
+ rspamd_cryptobox_HASHBYTES);
+ /* PCRE flags */
+ fl = rspamd_regexp_get_pcre_flags(re);
+ rspamd_cryptobox_hash_update(re_class->st, (const guchar *) &fl,
+ sizeof(fl));
+ rspamd_cryptobox_hash_update(&st_global, (const guchar *) &fl,
+ sizeof(fl));
+ /* Rspamd flags */
+ fl = rspamd_regexp_get_flags(re);
+ rspamd_cryptobox_hash_update(re_class->st, (const guchar *) &fl,
+ sizeof(fl));
+ rspamd_cryptobox_hash_update(&st_global, (const guchar *) &fl,
+ sizeof(fl));
+ /* Limit of hits */
+ fl = rspamd_regexp_get_maxhits(re);
+ rspamd_cryptobox_hash_update(re_class->st, (const guchar *) &fl,
+ sizeof(fl));
+ rspamd_cryptobox_hash_update(&st_global, (const guchar *) &fl,
+ sizeof(fl));
+ /* Numeric order */
+ rspamd_cryptobox_hash_update(re_class->st, (const guchar *) &i,
+ sizeof(i));
+ rspamd_cryptobox_hash_update(&st_global, (const guchar *) &i,
+ sizeof(i));
+ }
+
+ rspamd_cryptobox_hash_final(&st_global, hash_out);
+ rspamd_snprintf(cache->hash, sizeof(cache->hash), "%*xs",
+ (gint) rspamd_cryptobox_HASHBYTES, hash_out);
+
+ /* Now finalize all classes */
+ g_hash_table_iter_init(&it, cache->re_classes);
+
+ while (g_hash_table_iter_next(&it, &k, &v)) {
+ re_class = v;
+
+ if (re_class->st) {
+ /*
+ * We finally update all classes with the number of expressions
+ * in the cache to ensure that if even a single re has been changed
+ * we won't be broken due to id mismatch
+ */
+ rspamd_cryptobox_hash_update(re_class->st,
+ (gpointer) &cache->re->len,
+ sizeof(cache->re->len));
+ rspamd_cryptobox_hash_final(re_class->st, hash_out);
+ rspamd_snprintf(re_class->hash, sizeof(re_class->hash), "%*xs",
+ (gint) rspamd_cryptobox_HASHBYTES, hash_out);
+ free(re_class->st); /* Due to posix_memalign */
+ re_class->st = NULL;
+ }
+ }
+
+ cache->L = cfg->lua_state;
+
+#ifdef WITH_HYPERSCAN
+ const gchar *platform = "generic";
+ rspamd_fstring_t *features = rspamd_fstring_new();
+
+ cache->disable_hyperscan = cfg->disable_hyperscan;
+
+ g_assert(hs_populate_platform(&cache->plt) == HS_SUCCESS);
+
+ /* Now decode what we do have */
+ switch (cache->plt.tune) {
+ case HS_TUNE_FAMILY_HSW:
+ platform = "haswell";
+ break;
+ case HS_TUNE_FAMILY_SNB:
+ platform = "sandy";
+ break;
+ case HS_TUNE_FAMILY_BDW:
+ platform = "broadwell";
+ break;
+ case HS_TUNE_FAMILY_IVB:
+ platform = "ivy";
+ break;
+ default:
+ break;
+ }
+
+ if (cache->plt.cpu_features & HS_CPU_FEATURES_AVX2) {
+ features = rspamd_fstring_append(features, "AVX2", 4);
+ }
+
+ hs_set_allocator(g_malloc, g_free);
+
+ msg_info_re_cache("loaded hyperscan engine with cpu tune '%s' and features '%V'",
+ platform, features);
+
+ rspamd_fstring_free(features);
+#endif
+}
+
+struct rspamd_re_runtime *
+rspamd_re_cache_runtime_new(struct rspamd_re_cache *cache)
+{
+ struct rspamd_re_runtime *rt;
+ g_assert(cache != NULL);
+
+ rt = g_malloc0(sizeof(*rt) + NBYTES(cache->nre) + cache->nre);
+ rt->cache = cache;
+ REF_RETAIN(cache);
+ rt->checked = ((guchar *) rt) + sizeof(*rt);
+ rt->results = rt->checked + NBYTES(cache->nre);
+ rt->stat.regexp_total = cache->nre;
+#ifdef WITH_HYPERSCAN
+ rt->has_hs = cache->hyperscan_loaded;
+#endif
+
+ return rt;
+}
+
+const struct rspamd_re_cache_stat *
+rspamd_re_cache_get_stat(struct rspamd_re_runtime *rt)
+{
+ g_assert(rt != NULL);
+
+ return &rt->stat;
+}
+
+static gboolean
+rspamd_re_cache_check_lua_condition(struct rspamd_task *task,
+ rspamd_regexp_t *re,
+ const guchar *in, gsize len,
+ goffset start, goffset end,
+ gint lua_cbref)
+{
+ lua_State *L = (lua_State *) task->cfg->lua_state;
+ GError *err = NULL;
+ struct rspamd_lua_text __attribute__((unused)) * t;
+ gint text_pos;
+
+ if (G_LIKELY(lua_cbref == -1)) {
+ return TRUE;
+ }
+
+ t = lua_new_text(L, in, len, FALSE);
+ text_pos = lua_gettop(L);
+
+ if (!rspamd_lua_universal_pcall(L, lua_cbref,
+ G_STRLOC, 1, "utii", &err,
+ "rspamd{task}", task,
+ text_pos, start, end)) {
+ msg_warn_task("cannot call for re_cache_check_lua_condition for re %s: %e",
+ rspamd_regexp_get_pattern(re), err);
+ g_error_free(err);
+ lua_settop(L, text_pos - 1);
+
+ return TRUE;
+ }
+
+ gboolean res = lua_toboolean(L, -1);
+
+ lua_settop(L, text_pos - 1);
+
+ return res;
+}
+
+static guint
+rspamd_re_cache_process_pcre(struct rspamd_re_runtime *rt,
+ rspamd_regexp_t *re, struct rspamd_task *task,
+ const guchar *in, gsize len,
+ gboolean is_raw,
+ gint lua_cbref)
+{
+ guint r = 0;
+ const gchar *start = NULL, *end = NULL;
+ guint max_hits = rspamd_regexp_get_maxhits(re);
+ guint64 id = rspamd_regexp_get_cache_id(re);
+ gdouble t1 = NAN, t2, pr;
+ const gdouble slow_time = 1e8;
+
+ if (in == NULL) {
+ return rt->results[id];
+ }
+
+ if (len == 0) {
+ return rt->results[id];
+ }
+
+ if (rt->cache->max_re_data > 0 && len > rt->cache->max_re_data) {
+ len = rt->cache->max_re_data;
+ }
+
+ r = rt->results[id];
+
+ if (max_hits == 0 || r < max_hits) {
+ pr = rspamd_random_double_fast();
+
+ if (pr > 0.9) {
+ t1 = rspamd_get_ticks(TRUE);
+ }
+
+ while (rspamd_regexp_search(re,
+ in,
+ len,
+ &start,
+ &end,
+ is_raw,
+ NULL)) {
+ if (rspamd_re_cache_check_lua_condition(task, re, in, len,
+ start - (const gchar *) in, end - (const gchar *) in, lua_cbref)) {
+ r++;
+ msg_debug_re_task("found regexp /%s/, total hits: %d",
+ rspamd_regexp_get_pattern(re), r);
+ }
+
+ if (max_hits > 0 && r >= max_hits) {
+ break;
+ }
+ }
+
+ rt->results[id] += r;
+ rt->stat.regexp_checked++;
+ rt->stat.bytes_scanned_pcre += len;
+ rt->stat.bytes_scanned += len;
+
+ if (r > 0) {
+ rt->stat.regexp_matched += r;
+ }
+
+ if (!isnan(t1)) {
+ t2 = rspamd_get_ticks(TRUE);
+
+ if (t2 - t1 > slow_time) {
+ rspamd_symcache_enable_profile(task);
+ msg_info_task("regexp '%16s' took %.0f ticks to execute",
+ rspamd_regexp_get_pattern(re), t2 - t1);
+ }
+ }
+ }
+
+ return r;
+}
+
+#ifdef WITH_HYPERSCAN
+struct rspamd_re_hyperscan_cbdata {
+ struct rspamd_re_runtime *rt;
+ const guchar **ins;
+ const guint *lens;
+ guint count;
+ rspamd_regexp_t *re;
+ struct rspamd_task *task;
+};
+
+static gint
+rspamd_re_cache_hyperscan_cb(unsigned int id,
+ unsigned long long from,
+ unsigned long long to,
+ unsigned int flags,
+ void *ud)
+{
+ struct rspamd_re_hyperscan_cbdata *cbdata = ud;
+ struct rspamd_re_runtime *rt;
+ struct rspamd_re_cache_elt *cache_elt;
+ guint ret, maxhits, i, processed;
+ struct rspamd_task *task;
+
+ rt = cbdata->rt;
+ task = cbdata->task;
+ cache_elt = g_ptr_array_index(rt->cache->re, id);
+ maxhits = rspamd_regexp_get_maxhits(cache_elt->re);
+
+ if (cache_elt->match_type == RSPAMD_RE_CACHE_HYPERSCAN) {
+ if (rspamd_re_cache_check_lua_condition(task, cache_elt->re,
+ cbdata->ins[0], cbdata->lens[0], from, to, cache_elt->lua_cbref)) {
+ ret = 1;
+ setbit(rt->checked, id);
+
+ if (maxhits == 0 || rt->results[id] < maxhits) {
+ rt->results[id] += ret;
+ rt->stat.regexp_matched++;
+ }
+ msg_debug_re_task("found regexp /%s/ using hyperscan only, total hits: %d",
+ rspamd_regexp_get_pattern(cache_elt->re), rt->results[id]);
+ }
+ }
+ else {
+ if (!isset(rt->checked, id)) {
+
+ processed = 0;
+
+ for (i = 0; i < cbdata->count; i++) {
+ rspamd_re_cache_process_pcre(rt,
+ cache_elt->re,
+ cbdata->task,
+ cbdata->ins[i],
+ cbdata->lens[i],
+ FALSE,
+ cache_elt->lua_cbref);
+ setbit(rt->checked, id);
+
+ processed += cbdata->lens[i];
+
+ if (processed >= to) {
+ break;
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+#endif
+
+static guint
+rspamd_re_cache_process_regexp_data(struct rspamd_re_runtime *rt,
+ rspamd_regexp_t *re, struct rspamd_task *task,
+ const guchar **in, guint *lens,
+ guint count,
+ gboolean is_raw,
+ gboolean *processed_hyperscan)
+{
+
+ guint64 re_id;
+ guint ret = 0;
+ guint i;
+ struct rspamd_re_cache_elt *cache_elt;
+
+ re_id = rspamd_regexp_get_cache_id(re);
+
+ if (count == 0 || in == NULL) {
+ /* We assume this as absence of the specified data */
+ setbit(rt->checked, re_id);
+ rt->results[re_id] = ret;
+ return ret;
+ }
+
+ cache_elt = (struct rspamd_re_cache_elt *) g_ptr_array_index(rt->cache->re, re_id);
+
+#ifndef WITH_HYPERSCAN
+ for (i = 0; i < count; i++) {
+ ret = rspamd_re_cache_process_pcre(rt,
+ re,
+ task,
+ in[i],
+ lens[i],
+ is_raw,
+ cache_elt->lua_cbref);
+ rt->results[re_id] = ret;
+ }
+
+ setbit(rt->checked, re_id);
+#else
+ struct rspamd_re_class *re_class;
+ struct rspamd_re_hyperscan_cbdata cbdata;
+
+ cache_elt = g_ptr_array_index(rt->cache->re, re_id);
+ re_class = rspamd_regexp_get_class(re);
+
+ if (rt->cache->disable_hyperscan || cache_elt->match_type == RSPAMD_RE_CACHE_PCRE ||
+ !rt->has_hs || (is_raw && re_class->has_utf8)) {
+ for (i = 0; i < count; i++) {
+ ret = rspamd_re_cache_process_pcre(rt,
+ re,
+ task,
+ in[i],
+ lens[i],
+ is_raw,
+ cache_elt->lua_cbref);
+ }
+
+ setbit(rt->checked, re_id);
+ }
+ else {
+ for (i = 0; i < count; i++) {
+ /* For Hyperscan we can probably safely disable all those limits */
+#if 0
+ if (rt->cache->max_re_data > 0 && lens[i] > rt->cache->max_re_data) {
+ lens[i] = rt->cache->max_re_data;
+ }
+#endif
+ rt->stat.bytes_scanned += lens[i];
+ }
+
+ g_assert(re_class->hs_scratch != NULL);
+ g_assert(re_class->hs_db != NULL);
+
+ /* Go through hyperscan API */
+ for (i = 0; i < count; i++) {
+ cbdata.ins = &in[i];
+ cbdata.re = re;
+ cbdata.rt = rt;
+ cbdata.lens = &lens[i];
+ cbdata.count = 1;
+ cbdata.task = task;
+
+ if ((hs_scan(rspamd_hyperscan_get_database(re_class->hs_db),
+ in[i], lens[i], 0,
+ re_class->hs_scratch,
+ rspamd_re_cache_hyperscan_cb, &cbdata)) != HS_SUCCESS) {
+ ret = 0;
+ }
+ else {
+ ret = rt->results[re_id];
+ *processed_hyperscan = TRUE;
+ }
+ }
+ }
+#endif
+
+ return ret;
+}
+
+static void
+rspamd_re_cache_finish_class(struct rspamd_task *task,
+ struct rspamd_re_runtime *rt,
+ struct rspamd_re_class *re_class,
+ const gchar *class_name)
+{
+#ifdef WITH_HYPERSCAN
+ guint i;
+ guint64 re_id;
+ guint found = 0;
+
+ /* Set all bits that are not checked and included in hyperscan to 1 */
+ for (i = 0; i < re_class->nhs; i++) {
+ re_id = re_class->hs_ids[i];
+
+ if (!isset(rt->checked, re_id)) {
+ g_assert(rt->results[re_id] == 0);
+ rt->results[re_id] = 0;
+ setbit(rt->checked, re_id);
+ }
+ else {
+ found++;
+ }
+ }
+
+ msg_debug_re_task("finished hyperscan for class %s; %d "
+ "matches found; %d hyperscan supported regexps; %d total regexps",
+ class_name, found, re_class->nhs, (gint) g_hash_table_size(re_class->re));
+#endif
+}
+
+static gboolean
+rspamd_re_cache_process_selector(struct rspamd_task *task,
+ struct rspamd_re_runtime *rt,
+ const gchar *name,
+ guchar ***svec,
+ guint **lenvec,
+ guint *n)
+{
+ gint ref;
+ khiter_t k;
+ lua_State *L;
+ gint err_idx, ret;
+ struct rspamd_task **ptask;
+ gboolean result = FALSE;
+ struct rspamd_re_cache *cache = rt->cache;
+ struct rspamd_re_selector_result *sr;
+
+ L = cache->L;
+ k = kh_get(lua_selectors_hash, cache->selectors, (gchar *) name);
+
+ if (k == kh_end(cache->selectors)) {
+ msg_err_task("cannot find selector %s, not registered", name);
+
+ return FALSE;
+ }
+
+ ref = kh_value(cache->selectors, k);
+
+ /* First, search for the cached result */
+ if (rt->sel_cache) {
+ k = kh_get(selectors_results_hash, rt->sel_cache, ref);
+
+ if (k != kh_end(rt->sel_cache)) {
+ sr = &kh_value(rt->sel_cache, k);
+
+ *svec = sr->scvec;
+ *lenvec = sr->lenvec;
+ *n = sr->cnt;
+
+ return TRUE;
+ }
+ }
+ else {
+ rt->sel_cache = kh_init(selectors_results_hash);
+ }
+
+ lua_pushcfunction(L, &rspamd_lua_traceback);
+ err_idx = lua_gettop(L);
+
+ lua_rawgeti(L, LUA_REGISTRYINDEX, ref);
+ ptask = lua_newuserdata(L, sizeof(*ptask));
+ *ptask = task;
+ rspamd_lua_setclass(L, "rspamd{task}", -1);
+
+ if ((ret = lua_pcall(L, 1, 1, err_idx)) != 0) {
+ msg_err_task("call to selector %s "
+ "failed (%d): %s",
+ name, ret,
+ lua_tostring(L, -1));
+ }
+ else {
+ struct rspamd_lua_text *txt;
+ gsize slen;
+ const gchar *sel_data;
+
+ if (lua_type(L, -1) != LUA_TTABLE) {
+ txt = lua_check_text_or_string(L, -1);
+
+
+ if (txt) {
+ msg_debug_re_cache("re selector %s returned 1 element", name);
+ sel_data = txt->start;
+ slen = txt->len;
+ *n = 1;
+ *svec = g_malloc(sizeof(guchar *));
+ *lenvec = g_malloc(sizeof(guint));
+ (*svec)[0] = g_malloc(slen);
+ memcpy((*svec)[0], sel_data, slen);
+ (*lenvec)[0] = slen;
+ result = TRUE;
+ }
+ else {
+ msg_debug_re_cache("re selector %s returned NULL", name);
+ }
+ }
+ else {
+ *n = rspamd_lua_table_size(L, -1);
+
+ msg_debug_re_cache("re selector %s returned %d elements", name, *n);
+
+ if (*n > 0) {
+ *svec = g_malloc(sizeof(guchar *) * (*n));
+ *lenvec = g_malloc(sizeof(guint) * (*n));
+
+ for (int i = 0; i < *n; i++) {
+ lua_rawgeti(L, -1, i + 1);
+
+ txt = lua_check_text_or_string(L, -1);
+ if (txt && txt->len > 0) {
+ sel_data = txt->start;
+ slen = txt->len;
+ (*svec)[i] = g_malloc(slen);
+ memcpy((*svec)[i], sel_data, slen);
+ }
+ else {
+ /* A hack to avoid malloc(0) */
+ sel_data = "";
+ slen = 0;
+ (*svec)[i] = g_malloc(1);
+ memcpy((*svec)[i], sel_data, 1);
+ }
+
+ (*lenvec)[i] = slen;
+ lua_pop(L, 1);
+ }
+ }
+
+ /* Empty table is also a valid result */
+ result = TRUE;
+ }
+ }
+
+ lua_settop(L, err_idx - 1);
+
+ if (result) {
+ k = kh_put(selectors_results_hash, rt->sel_cache, ref, &ret);
+ sr = &kh_value(rt->sel_cache, k);
+
+ sr->cnt = *n;
+ sr->scvec = *svec;
+ sr->lenvec = *lenvec;
+ }
+
+ return result;
+}
+
+static inline guint
+rspamd_process_words_vector(GArray *words,
+ const guchar **scvec,
+ guint *lenvec,
+ struct rspamd_re_class *re_class,
+ guint cnt,
+ gboolean *raw)
+{
+ guint j;
+ rspamd_stat_token_t *tok;
+
+ if (words) {
+ for (j = 0; j < words->len; j++) {
+ tok = &g_array_index(words, rspamd_stat_token_t, j);
+
+ if (tok->flags & RSPAMD_STAT_TOKEN_FLAG_TEXT) {
+ if (!(tok->flags & RSPAMD_STAT_TOKEN_FLAG_UTF)) {
+ if (!re_class->has_utf8) {
+ *raw = TRUE;
+ }
+ else {
+ continue; /* Skip */
+ }
+ }
+ }
+ else {
+ continue; /* Skip non text */
+ }
+
+ if (re_class->type == RSPAMD_RE_RAWWORDS) {
+ if (tok->original.len > 0) {
+ scvec[cnt] = tok->original.begin;
+ lenvec[cnt++] = tok->original.len;
+ }
+ }
+ else if (re_class->type == RSPAMD_RE_WORDS) {
+ if (tok->normalized.len > 0) {
+ scvec[cnt] = tok->normalized.begin;
+ lenvec[cnt++] = tok->normalized.len;
+ }
+ }
+ else {
+ /* Stemmed words */
+ if (tok->stemmed.len > 0) {
+ scvec[cnt] = tok->stemmed.begin;
+ lenvec[cnt++] = tok->stemmed.len;
+ }
+ }
+ }
+ }
+
+ return cnt;
+}
+
+static guint
+rspamd_re_cache_process_headers_list(struct rspamd_task *task,
+ struct rspamd_re_runtime *rt,
+ rspamd_regexp_t *re,
+ struct rspamd_re_class *re_class,
+ struct rspamd_mime_header *rh,
+ gboolean is_strong,
+ gboolean *processed_hyperscan)
+{
+ const guchar **scvec, *in;
+ gboolean raw = FALSE;
+ guint *lenvec;
+ struct rspamd_mime_header *cur;
+ guint cnt = 0, i = 0, ret = 0;
+
+ DL_COUNT(rh, cur, cnt);
+
+ scvec = g_malloc(sizeof(*scvec) * cnt);
+ lenvec = g_malloc(sizeof(*lenvec) * cnt);
+
+ DL_FOREACH(rh, cur)
+ {
+
+ if (is_strong && strcmp(cur->name, re_class->type_data) != 0) {
+ /* Skip a different case */
+ continue;
+ }
+
+ if (re_class->type == RSPAMD_RE_RAWHEADER) {
+ in = (const guchar *) cur->value;
+ lenvec[i] = strlen(cur->value);
+
+ if (rspamd_fast_utf8_validate(in, lenvec[i]) != 0) {
+ raw = TRUE;
+ }
+ }
+ else {
+ in = (const guchar *) cur->decoded;
+ /* Validate input^W^WNo need to validate as it is already valid */
+ if (!in) {
+ lenvec[i] = 0;
+ scvec[i] = (guchar *) "";
+ continue;
+ }
+
+ lenvec[i] = strlen(in);
+ }
+
+ scvec[i] = in;
+
+ i++;
+ }
+
+ if (i > 0) {
+ ret = rspamd_re_cache_process_regexp_data(rt, re,
+ task, scvec, lenvec, i, raw, processed_hyperscan);
+ msg_debug_re_task("checking header %s regexp: %s=%*s -> %d",
+ re_class->type_data,
+ rspamd_regexp_get_pattern(re),
+ (int) lenvec[0], scvec[0], ret);
+ }
+
+ g_free(scvec);
+ g_free(lenvec);
+
+ return ret;
+}
+
+/*
+ * Calculates the specified regexp for the specified class if it's not calculated
+ */
+static guint
+rspamd_re_cache_exec_re(struct rspamd_task *task,
+ struct rspamd_re_runtime *rt,
+ rspamd_regexp_t *re,
+ struct rspamd_re_class *re_class,
+ gboolean is_strong)
+{
+ guint ret = 0, i, re_id;
+ struct rspamd_mime_header *rh;
+ const gchar *in;
+ const guchar **scvec = NULL;
+ guint *lenvec = NULL;
+ gboolean raw = FALSE, processed_hyperscan = FALSE;
+ struct rspamd_mime_text_part *text_part;
+ struct rspamd_mime_part *mime_part;
+ struct rspamd_url *url;
+ guint len = 0, cnt = 0;
+ const gchar *class_name;
+
+ class_name = rspamd_re_cache_type_to_string(re_class->type);
+ msg_debug_re_task("start check re type: %s: /%s/",
+ class_name,
+ rspamd_regexp_get_pattern(re));
+ re_id = rspamd_regexp_get_cache_id(re);
+
+ switch (re_class->type) {
+ case RSPAMD_RE_HEADER:
+ case RSPAMD_RE_RAWHEADER:
+ /* Get list of specified headers */
+ rh = rspamd_message_get_header_array(task,
+ re_class->type_data, FALSE);
+
+ if (rh) {
+ ret = rspamd_re_cache_process_headers_list(task, rt, re,
+ re_class, rh, is_strong, &processed_hyperscan);
+ msg_debug_re_task("checked header(%s) regexp: %s -> %d",
+ (const char *) re_class->type_data,
+ rspamd_regexp_get_pattern(re),
+ ret);
+ }
+ break;
+ case RSPAMD_RE_ALLHEADER:
+ raw = TRUE;
+ in = MESSAGE_FIELD(task, raw_headers_content).begin;
+ len = MESSAGE_FIELD(task, raw_headers_content).len;
+ ret = rspamd_re_cache_process_regexp_data(rt, re,
+ task, (const guchar **) &in, &len, 1, raw, &processed_hyperscan);
+ msg_debug_re_task("checked allheader regexp: %s -> %d",
+ rspamd_regexp_get_pattern(re), ret);
+ break;
+ case RSPAMD_RE_MIMEHEADER:
+ PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, parts), i, mime_part)
+ {
+ if (mime_part->parent_part == NULL ||
+ !IS_PART_MULTIPART(mime_part->parent_part) ||
+ IS_PART_MESSAGE(mime_part)) {
+ /* We filter parts that have no multipart parent or are a messages here */
+ continue;
+ }
+ rh = rspamd_message_get_header_from_hash(mime_part->raw_headers,
+ re_class->type_data, FALSE);
+
+ if (rh) {
+ ret += rspamd_re_cache_process_headers_list(task, rt, re,
+ re_class, rh, is_strong, &processed_hyperscan);
+ }
+ msg_debug_re_task("checked mime header(%s) regexp: %s -> %d",
+ (const char *) re_class->type_data,
+ rspamd_regexp_get_pattern(re),
+ ret);
+ }
+ break;
+ case RSPAMD_RE_MIME:
+ case RSPAMD_RE_RAWMIME:
+ /* Iterate through text parts */
+ if (MESSAGE_FIELD(task, text_parts)->len > 0) {
+ cnt = MESSAGE_FIELD(task, text_parts)->len;
+ scvec = g_malloc(sizeof(*scvec) * cnt);
+ lenvec = g_malloc(sizeof(*lenvec) * cnt);
+
+ PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, text_parts), i, text_part)
+ {
+ /* Select data for regexp */
+ if (re_class->type == RSPAMD_RE_RAWMIME) {
+ if (text_part->raw.len == 0) {
+ len = 0;
+ in = "";
+ }
+ else {
+ in = text_part->raw.begin;
+ len = text_part->raw.len;
+ }
+
+ raw = TRUE;
+ }
+ else {
+ /* Skip empty parts */
+ if (IS_TEXT_PART_EMPTY(text_part)) {
+ len = 0;
+ in = "";
+ }
+ else {
+ /* Check raw flags */
+ if (!IS_TEXT_PART_UTF(text_part)) {
+ raw = TRUE;
+ }
+
+ in = text_part->utf_content.begin;
+ len = text_part->utf_content.len;
+ }
+ }
+
+ scvec[i] = (guchar *) in;
+ lenvec[i] = len;
+ }
+
+ ret = rspamd_re_cache_process_regexp_data(rt, re,
+ task, scvec, lenvec, cnt, raw, &processed_hyperscan);
+ msg_debug_re_task("checked mime regexp: %s -> %d",
+ rspamd_regexp_get_pattern(re), ret);
+ g_free(scvec);
+ g_free(lenvec);
+ }
+ break;
+ case RSPAMD_RE_URL:
+ cnt = kh_size(MESSAGE_FIELD(task, urls));
+
+ if (cnt > 0) {
+ scvec = g_malloc(sizeof(*scvec) * cnt);
+ lenvec = g_malloc(sizeof(*lenvec) * cnt);
+ i = 0;
+ raw = FALSE;
+
+ kh_foreach_key(MESSAGE_FIELD(task, urls), url, {
+ if ((url->protocol & PROTOCOL_MAILTO)) {
+ continue;
+ }
+ in = url->string;
+ len = url->urllen;
+
+ if (len > 0 && !(url->flags & RSPAMD_URL_FLAG_IMAGE)) {
+ scvec[i] = (guchar *) in;
+ lenvec[i++] = len;
+ }
+ });
+
+ /* URL regexps do not include emails, that's why the code below is commented */
+#if 0
+ g_hash_table_iter_init (&it, MESSAGE_FIELD (task, emails));
+
+ while (g_hash_table_iter_next (&it, &k, &v)) {
+ url = v;
+ in = url->string;
+ len = url->urllen;
+
+ if (len > 0 && !(url->flags & RSPAMD_URL_FLAG_IMAGE)) {
+ scvec[i] = (guchar *) in;
+ lenvec[i++] = len;
+ }
+ }
+#endif
+ ret = rspamd_re_cache_process_regexp_data(rt, re,
+ task, scvec, lenvec, i, raw, &processed_hyperscan);
+ msg_debug_re_task("checked url regexp: %s -> %d",
+ rspamd_regexp_get_pattern(re), ret);
+ g_free(scvec);
+ g_free(lenvec);
+ }
+ break;
+ case RSPAMD_RE_EMAIL:
+ cnt = kh_size(MESSAGE_FIELD(task, urls));
+
+ if (cnt > 0) {
+ scvec = g_malloc(sizeof(*scvec) * cnt);
+ lenvec = g_malloc(sizeof(*lenvec) * cnt);
+ i = 0;
+ raw = FALSE;
+
+ kh_foreach_key(MESSAGE_FIELD(task, urls), url, {
+ if (!(url->protocol & PROTOCOL_MAILTO)) {
+ continue;
+ }
+ if (url->userlen == 0 || url->hostlen == 0) {
+ continue;
+ }
+
+ in = rspamd_url_user_unsafe(url);
+ len = url->userlen + 1 + url->hostlen;
+ scvec[i] = (guchar *) in;
+ lenvec[i++] = len;
+ });
+
+ ret = rspamd_re_cache_process_regexp_data(rt, re,
+ task, scvec, lenvec, i, raw, &processed_hyperscan);
+ msg_debug_re_task("checked email regexp: %s -> %d",
+ rspamd_regexp_get_pattern(re), ret);
+ g_free(scvec);
+ g_free(lenvec);
+ }
+ break;
+ case RSPAMD_RE_BODY:
+ raw = TRUE;
+ in = task->msg.begin;
+ len = task->msg.len;
+
+ ret = rspamd_re_cache_process_regexp_data(rt, re, task,
+ (const guchar **) &in, &len, 1, raw, &processed_hyperscan);
+ msg_debug_re_task("checked rawbody regexp: %s -> %d",
+ rspamd_regexp_get_pattern(re), ret);
+ break;
+ case RSPAMD_RE_SABODY:
+ /* According to SA docs:
+ * The 'body' in this case is the textual parts of the message body;
+ * any non-text MIME parts are stripped, and the message decoded from
+ * Quoted-Printable or Base-64-encoded format if necessary. The message
+ * Subject header is considered part of the body and becomes the first
+ * paragraph when running the rules. All HTML tags and line breaks will
+ * be removed before matching.
+ */
+ cnt = MESSAGE_FIELD(task, text_parts)->len + 1;
+ scvec = g_malloc(sizeof(*scvec) * cnt);
+ lenvec = g_malloc(sizeof(*lenvec) * cnt);
+
+ /*
+ * Body rules also include the Subject as the first line
+ * of the body content.
+ */
+
+ rh = rspamd_message_get_header_array(task, "Subject", FALSE);
+
+ if (rh) {
+ scvec[0] = (guchar *) rh->decoded;
+ lenvec[0] = strlen(rh->decoded);
+ }
+ else {
+ scvec[0] = (guchar *) "";
+ lenvec[0] = 0;
+ }
+
+ PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, text_parts), i, text_part)
+ {
+ if (text_part->utf_stripped_content) {
+ scvec[i + 1] = (guchar *) text_part->utf_stripped_content->data;
+ lenvec[i + 1] = text_part->utf_stripped_content->len;
+
+ if (!IS_TEXT_PART_UTF(text_part)) {
+ raw = TRUE;
+ }
+ }
+ else {
+ scvec[i + 1] = (guchar *) "";
+ lenvec[i + 1] = 0;
+ }
+ }
+
+ ret = rspamd_re_cache_process_regexp_data(rt, re,
+ task, scvec, lenvec, cnt, raw, &processed_hyperscan);
+ msg_debug_re_task("checked sa body regexp: %s -> %d",
+ rspamd_regexp_get_pattern(re), ret);
+ g_free(scvec);
+ g_free(lenvec);
+ break;
+ case RSPAMD_RE_SARAWBODY:
+ /* According to SA docs:
+ * The 'raw body' of a message is the raw data inside all textual
+ * parts. The text will be decoded from base64 or quoted-printable
+ * encoding, but HTML tags and line breaks will still be present.
+ * Multiline expressions will need to be used to match strings that are
+ * broken by line breaks.
+ */
+ if (MESSAGE_FIELD(task, text_parts)->len > 0) {
+ cnt = MESSAGE_FIELD(task, text_parts)->len;
+ scvec = g_malloc(sizeof(*scvec) * cnt);
+ lenvec = g_malloc(sizeof(*lenvec) * cnt);
+
+ for (i = 0; i < cnt; i++) {
+ text_part = g_ptr_array_index(MESSAGE_FIELD(task, text_parts), i);
+
+ if (text_part->parsed.len > 0) {
+ scvec[i] = (guchar *) text_part->parsed.begin;
+ lenvec[i] = text_part->parsed.len;
+
+ if (!IS_TEXT_PART_UTF(text_part)) {
+ raw = TRUE;
+ }
+ }
+ else {
+ scvec[i] = (guchar *) "";
+ lenvec[i] = 0;
+ }
+ }
+
+ ret = rspamd_re_cache_process_regexp_data(rt, re,
+ task, scvec, lenvec, cnt, raw, &processed_hyperscan);
+ msg_debug_re_task("checked sa rawbody regexp: %s -> %d",
+ rspamd_regexp_get_pattern(re), ret);
+ g_free(scvec);
+ g_free(lenvec);
+ }
+ break;
+ case RSPAMD_RE_WORDS:
+ case RSPAMD_RE_STEMWORDS:
+ case RSPAMD_RE_RAWWORDS:
+ if (MESSAGE_FIELD(task, text_parts)->len > 0) {
+ cnt = 0;
+ raw = FALSE;
+
+ PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, text_parts), i, text_part)
+ {
+ if (text_part->utf_words) {
+ cnt += text_part->utf_words->len;
+ }
+ }
+
+ if (task->meta_words && task->meta_words->len > 0) {
+ cnt += task->meta_words->len;
+ }
+
+ if (cnt > 0) {
+ scvec = g_malloc(sizeof(*scvec) * cnt);
+ lenvec = g_malloc(sizeof(*lenvec) * cnt);
+
+ cnt = 0;
+
+ PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, text_parts), i, text_part)
+ {
+ if (text_part->utf_words) {
+ cnt = rspamd_process_words_vector(text_part->utf_words,
+ scvec, lenvec, re_class, cnt, &raw);
+ }
+ }
+
+ if (task->meta_words) {
+ cnt = rspamd_process_words_vector(task->meta_words,
+ scvec, lenvec, re_class, cnt, &raw);
+ }
+
+ ret = rspamd_re_cache_process_regexp_data(rt, re,
+ task, scvec, lenvec, cnt, raw, &processed_hyperscan);
+
+ msg_debug_re_task("checked sa words regexp: %s -> %d",
+ rspamd_regexp_get_pattern(re), ret);
+ g_free(scvec);
+ g_free(lenvec);
+ }
+ }
+ break;
+ case RSPAMD_RE_SELECTOR:
+ if (rspamd_re_cache_process_selector(task, rt,
+ re_class->type_data,
+ (guchar ***) &scvec,
+ &lenvec, &cnt)) {
+
+ ret = rspamd_re_cache_process_regexp_data(rt, re,
+ task, scvec, lenvec, cnt, raw, &processed_hyperscan);
+ msg_debug_re_task("checked selector(%s) regexp: %s -> %d",
+ re_class->type_data,
+ rspamd_regexp_get_pattern(re), ret);
+
+ /* Do not free vectors as they are managed by rt->sel_cache */
+ }
+ break;
+ case RSPAMD_RE_MAX:
+ msg_err_task("regexp of class invalid has been called: %s",
+ rspamd_regexp_get_pattern(re));
+ break;
+ }
+
+#if WITH_HYPERSCAN
+ if (processed_hyperscan) {
+ rspamd_re_cache_finish_class(task, rt, re_class, class_name);
+ }
+#endif
+
+ setbit(rt->checked, re_id);
+
+ return rt->results[re_id];
+}
+
+gint rspamd_re_cache_process(struct rspamd_task *task,
+ rspamd_regexp_t *re,
+ enum rspamd_re_type type,
+ gconstpointer type_data,
+ gsize datalen,
+ gboolean is_strong)
+{
+ guint64 re_id;
+ struct rspamd_re_class *re_class;
+ struct rspamd_re_cache *cache;
+ struct rspamd_re_runtime *rt;
+
+ g_assert(task != NULL);
+ rt = task->re_rt;
+ g_assert(rt != NULL);
+ g_assert(re != NULL);
+
+ cache = rt->cache;
+ re_id = rspamd_regexp_get_cache_id(re);
+
+ if (re_id == RSPAMD_INVALID_ID || re_id > cache->nre) {
+ msg_err_task("re '%s' has no valid id for the cache",
+ rspamd_regexp_get_pattern(re));
+ return 0;
+ }
+
+ if (isset(rt->checked, re_id)) {
+ /* Fast path */
+ rt->stat.regexp_fast_cached++;
+ return rt->results[re_id];
+ }
+ else {
+ /* Slow path */
+ re_class = rspamd_regexp_get_class(re);
+
+ if (re_class == NULL) {
+ msg_err_task("cannot find re class for regexp '%s'",
+ rspamd_regexp_get_pattern(re));
+ return 0;
+ }
+
+ return rspamd_re_cache_exec_re(task, rt, re, re_class,
+ is_strong);
+ }
+
+ return 0;
+}
+
+int rspamd_re_cache_process_ffi(void *ptask,
+ void *pre,
+ int type,
+ void *type_data,
+ int is_strong)
+{
+ struct rspamd_lua_regexp **lua_re = pre;
+ struct rspamd_task **real_task = ptask;
+ gsize typelen = 0;
+
+ if (type_data) {
+ typelen = strlen(type_data);
+ }
+
+ return rspamd_re_cache_process(*real_task, (*lua_re)->re,
+ type, type_data, typelen, is_strong);
+}
+
+void rspamd_re_cache_runtime_destroy(struct rspamd_re_runtime *rt)
+{
+ g_assert(rt != NULL);
+
+ if (rt->sel_cache) {
+ struct rspamd_re_selector_result sr;
+
+ kh_foreach_value(rt->sel_cache, sr, {
+ for (guint i = 0; i < sr.cnt; i++) {
+ g_free((gpointer) sr.scvec[i]);
+ }
+
+ g_free(sr.scvec);
+ g_free(sr.lenvec);
+ });
+ kh_destroy(selectors_results_hash, rt->sel_cache);
+ }
+
+ REF_RELEASE(rt->cache);
+ g_free(rt);
+}
+
+void rspamd_re_cache_unref(struct rspamd_re_cache *cache)
+{
+ if (cache) {
+ REF_RELEASE(cache);
+ }
+}
+
+struct rspamd_re_cache *
+rspamd_re_cache_ref(struct rspamd_re_cache *cache)
+{
+ if (cache) {
+ REF_RETAIN(cache);
+ }
+
+ return cache;
+}
+
+guint rspamd_re_cache_set_limit(struct rspamd_re_cache *cache, guint limit)
+{
+ guint old;
+
+ g_assert(cache != NULL);
+
+ old = cache->max_re_data;
+ cache->max_re_data = limit;
+
+ return old;
+}
+
+const gchar *
+rspamd_re_cache_type_to_string(enum rspamd_re_type type)
+{
+ const gchar *ret = "unknown";
+
+ switch (type) {
+ case RSPAMD_RE_HEADER:
+ ret = "header";
+ break;
+ case RSPAMD_RE_RAWHEADER:
+ ret = "raw header";
+ break;
+ case RSPAMD_RE_MIMEHEADER:
+ ret = "mime header";
+ break;
+ case RSPAMD_RE_ALLHEADER:
+ ret = "all headers";
+ break;
+ case RSPAMD_RE_MIME:
+ ret = "part";
+ break;
+ case RSPAMD_RE_RAWMIME:
+ ret = "raw part";
+ break;
+ case RSPAMD_RE_BODY:
+ ret = "rawbody";
+ break;
+ case RSPAMD_RE_URL:
+ ret = "url";
+ break;
+ case RSPAMD_RE_EMAIL:
+ ret = "email";
+ break;
+ case RSPAMD_RE_SABODY:
+ ret = "sa body";
+ break;
+ case RSPAMD_RE_SARAWBODY:
+ ret = "sa raw body";
+ break;
+ case RSPAMD_RE_SELECTOR:
+ ret = "selector";
+ break;
+ case RSPAMD_RE_WORDS:
+ ret = "words";
+ break;
+ case RSPAMD_RE_RAWWORDS:
+ ret = "raw_words";
+ break;
+ case RSPAMD_RE_STEMWORDS:
+ ret = "stem_words";
+ break;
+ case RSPAMD_RE_MAX:
+ default:
+ ret = "invalid class";
+ break;
+ }
+
+ return ret;
+}
+
+enum rspamd_re_type
+rspamd_re_cache_type_from_string(const char *str)
+{
+ enum rspamd_re_type ret;
+ guint64 h;
+
+ /*
+ * To optimize this function, we apply hash to input string and
+ * pre-select it from the values
+ */
+
+ if (str != NULL) {
+ h = rspamd_cryptobox_fast_hash_specific(RSPAMD_CRYPTOBOX_XXHASH64,
+ str, strlen(str), 0xdeadbabe);
+
+ switch (h) {
+ case G_GUINT64_CONSTANT(0x298b9c8a58887d44): /* header */
+ ret = RSPAMD_RE_HEADER;
+ break;
+ case G_GUINT64_CONSTANT(0x467bfb5cd7ddf890): /* rawheader */
+ ret = RSPAMD_RE_RAWHEADER;
+ break;
+ case G_GUINT64_CONSTANT(0xda081341fb600389): /* mime */
+ ret = RSPAMD_RE_MIME;
+ break;
+ case G_GUINT64_CONSTANT(0xc35831e067a8221d): /* rawmime */
+ ret = RSPAMD_RE_RAWMIME;
+ break;
+ case G_GUINT64_CONSTANT(0xc625e13dbe636de2): /* body */
+ case G_GUINT64_CONSTANT(0xCCDEBA43518F721C): /* message */
+ ret = RSPAMD_RE_BODY;
+ break;
+ case G_GUINT64_CONSTANT(0x286edbe164c791d2): /* url */
+ case G_GUINT64_CONSTANT(0x7D9ACDF6685661A1): /* uri */
+ ret = RSPAMD_RE_URL;
+ break;
+ case G_GUINT64_CONSTANT(0x7e232b0f60b571be): /* email */
+ ret = RSPAMD_RE_EMAIL;
+ break;
+ case G_GUINT64_CONSTANT(0x796d62205a8778c7): /* allheader */
+ ret = RSPAMD_RE_ALLHEADER;
+ break;
+ case G_GUINT64_CONSTANT(0xa3c6c153b3b00a5e): /* mimeheader */
+ ret = RSPAMD_RE_MIMEHEADER;
+ break;
+ case G_GUINT64_CONSTANT(0x7794501506e604e9): /* sabody */
+ ret = RSPAMD_RE_SABODY;
+ break;
+ case G_GUINT64_CONSTANT(0x28828962E7D2A05F): /* sarawbody */
+ ret = RSPAMD_RE_SARAWBODY;
+ break;
+ default:
+ ret = RSPAMD_RE_MAX;
+ break;
+ }
+ }
+ else {
+ ret = RSPAMD_RE_MAX;
+ }
+
+ return ret;
+}
+
+#ifdef WITH_HYPERSCAN
+static gchar *
+rspamd_re_cache_hs_pattern_from_pcre(rspamd_regexp_t *re)
+{
+ /*
+ * Workaround for bug in ragel 7.0.0.11
+ * https://github.com/intel/hyperscan/issues/133
+ */
+ const gchar *pat = rspamd_regexp_get_pattern(re);
+ guint flags = rspamd_regexp_get_flags(re), esc_flags = RSPAMD_REGEXP_ESCAPE_RE;
+ gchar *escaped;
+ gsize esc_len;
+
+ if (flags & RSPAMD_REGEXP_FLAG_UTF) {
+ esc_flags |= RSPAMD_REGEXP_ESCAPE_UTF;
+ }
+
+ escaped = rspamd_str_regexp_escape(pat, strlen(pat), &esc_len, esc_flags);
+
+ return escaped;
+}
+
+static gboolean
+rspamd_re_cache_is_finite(struct rspamd_re_cache *cache,
+ rspamd_regexp_t *re, gint flags, gdouble max_time)
+{
+ pid_t cld;
+ gint status;
+ struct timespec ts;
+ hs_compile_error_t *hs_errors;
+ hs_database_t *test_db;
+ gdouble wait_time;
+ const gint max_tries = 10;
+ gint tries = 0, rc;
+ void (*old_hdl)(int);
+
+ wait_time = max_time / max_tries;
+ /* We need to restore SIGCHLD processing */
+ old_hdl = signal(SIGCHLD, SIG_DFL);
+ cld = fork();
+
+ if (cld == 0) {
+ /* Try to compile pattern */
+
+ gchar *pat = rspamd_re_cache_hs_pattern_from_pcre(re);
+
+ if (hs_compile(pat,
+ flags | HS_FLAG_PREFILTER,
+ HS_MODE_BLOCK,
+ &cache->plt,
+ &test_db,
+ &hs_errors) != HS_SUCCESS) {
+
+ msg_info_re_cache("cannot compile (prefilter mode) '%s' to hyperscan: '%s'",
+ pat,
+ hs_errors != NULL ? hs_errors->message : "unknown error");
+
+ hs_free_compile_error(hs_errors);
+ g_free(pat);
+
+ exit(EXIT_FAILURE);
+ }
+
+ g_free(pat);
+ exit(EXIT_SUCCESS);
+ }
+ else if (cld > 0) {
+ double_to_ts(wait_time, &ts);
+
+ while ((rc = waitpid(cld, &status, WNOHANG)) == 0 && tries++ < max_tries) {
+ (void) nanosleep(&ts, NULL);
+ }
+
+ /* Child has been terminated */
+ if (rc > 0) {
+ /* Forget about SIGCHLD after this point */
+ signal(SIGCHLD, old_hdl);
+
+ if (WIFEXITED(status) && WEXITSTATUS(status) == EXIT_SUCCESS) {
+ return TRUE;
+ }
+ else {
+ msg_err_re_cache(
+ "cannot approximate %s to hyperscan",
+ rspamd_regexp_get_pattern(re));
+
+ return FALSE;
+ }
+ }
+ else {
+ /* We consider that as timeout */
+ kill(cld, SIGKILL);
+ g_assert(waitpid(cld, &status, 0) != -1);
+ msg_err_re_cache(
+ "cannot approximate %s to hyperscan: timeout waiting",
+ rspamd_regexp_get_pattern(re));
+ signal(SIGCHLD, old_hdl);
+ }
+ }
+ else {
+ msg_err_re_cache(
+ "cannot approximate %s to hyperscan: fork failed: %s",
+ rspamd_regexp_get_pattern(re), strerror(errno));
+ signal(SIGCHLD, old_hdl);
+ }
+
+ return FALSE;
+}
+#endif
+
+#ifdef WITH_HYPERSCAN
+struct rspamd_re_cache_hs_compile_cbdata {
+ GHashTableIter it;
+ struct rspamd_re_cache *cache;
+ const char *cache_dir;
+ gdouble max_time;
+ gboolean silent;
+ guint total;
+ void (*cb)(guint ncompiled, GError *err, void *cbd);
+ void *cbd;
+};
+
+static void
+rspamd_re_cache_compile_err(EV_P_ ev_timer *w, GError *err,
+ struct rspamd_re_cache_hs_compile_cbdata *cbdata, bool is_fatal)
+{
+ cbdata->cb(cbdata->total, err, cbdata->cbd);
+
+ if (is_fatal) {
+ ev_timer_stop(EV_A_ w);
+ g_free(w);
+ g_free(cbdata);
+ }
+ else {
+ /* Continue compilation */
+ ev_timer_again(EV_A_ w);
+ }
+ g_error_free(err);
+}
+
+static void
+rspamd_re_cache_compile_timer_cb(EV_P_ ev_timer *w, int revents)
+{
+ struct rspamd_re_cache_hs_compile_cbdata *cbdata =
+ (struct rspamd_re_cache_hs_compile_cbdata *) w->data;
+ GHashTableIter cit;
+ gpointer k, v;
+ struct rspamd_re_class *re_class;
+ gchar path[PATH_MAX], npath[PATH_MAX];
+ hs_database_t *test_db;
+ gint fd, i, n, *hs_ids = NULL, pcre_flags, re_flags;
+ rspamd_cryptobox_fast_hash_state_t crc_st;
+ guint64 crc;
+ rspamd_regexp_t *re;
+ hs_compile_error_t *hs_errors = NULL;
+ guint *hs_flags = NULL;
+ const hs_expr_ext_t **hs_exts = NULL;
+ gchar **hs_pats = NULL;
+ gchar *hs_serialized = NULL;
+ gsize serialized_len;
+ struct iovec iov[7];
+ struct rspamd_re_cache *cache;
+ GError *err;
+ pid_t our_pid = getpid();
+
+ cache = cbdata->cache;
+
+ if (!g_hash_table_iter_next(&cbdata->it, &k, &v)) {
+ /* All done */
+ ev_timer_stop(EV_A_ w);
+ cbdata->cb(cbdata->total, NULL, cbdata->cbd);
+ g_free(w);
+ g_free(cbdata);
+
+ return;
+ }
+
+ re_class = v;
+ rspamd_snprintf(path, sizeof(path), "%s%c%s.hs", cbdata->cache_dir,
+ G_DIR_SEPARATOR, re_class->hash);
+
+ if (rspamd_re_cache_is_valid_hyperscan_file(cache, path, TRUE, TRUE, NULL)) {
+
+ fd = open(path, O_RDONLY, 00600);
+
+ /* Read number of regexps */
+ g_assert(fd != -1);
+ g_assert(lseek(fd, RSPAMD_HS_MAGIC_LEN + sizeof(cache->plt), SEEK_SET) != -1);
+ g_assert(read(fd, &n, sizeof(n)) == sizeof(n));
+ close(fd);
+
+ if (re_class->type_len > 0) {
+ if (!cbdata->silent) {
+ msg_info_re_cache(
+ "skip already valid class %s(%*s) to cache %6s, %d regexps",
+ rspamd_re_cache_type_to_string(re_class->type),
+ (gint) re_class->type_len - 1,
+ re_class->type_data,
+ re_class->hash,
+ n);
+ }
+ }
+ else {
+ if (!cbdata->silent) {
+ msg_info_re_cache(
+ "skip already valid class %s to cache %6s, %d regexps",
+ rspamd_re_cache_type_to_string(re_class->type),
+ re_class->hash,
+ n);
+ }
+ }
+
+ ev_timer_again(EV_A_ w);
+ return;
+ }
+
+ rspamd_snprintf(path, sizeof(path), "%s%c%s%P-XXXXXXXXXX", cbdata->cache_dir,
+ G_DIR_SEPARATOR, re_class->hash, our_pid);
+ fd = g_mkstemp_full(path, O_CREAT | O_TRUNC | O_EXCL | O_WRONLY, 00600);
+
+ if (fd == -1) {
+ err = g_error_new(rspamd_re_cache_quark(), errno,
+ "cannot open file %s: %s", path, strerror(errno));
+ rspamd_re_cache_compile_err(EV_A_ w, err, cbdata, false);
+ return;
+ }
+
+ g_hash_table_iter_init(&cit, re_class->re);
+ n = g_hash_table_size(re_class->re);
+ hs_flags = g_new0(guint, n);
+ hs_ids = g_new0(guint, n);
+ hs_pats = g_new0(char *, n);
+ hs_exts = g_new0(const hs_expr_ext_t *, n);
+ i = 0;
+
+ while (g_hash_table_iter_next(&cit, &k, &v)) {
+ re = v;
+
+ pcre_flags = rspamd_regexp_get_pcre_flags(re);
+ re_flags = rspamd_regexp_get_flags(re);
+
+ if (re_flags & RSPAMD_REGEXP_FLAG_PCRE_ONLY) {
+ /* Do not try to compile bad regexp */
+ msg_info_re_cache(
+ "do not try compile %s to hyperscan as it is PCRE only",
+ rspamd_regexp_get_pattern(re));
+ continue;
+ }
+
+ hs_flags[i] = 0;
+ hs_exts[i] = NULL;
+#ifndef WITH_PCRE2
+ if (pcre_flags & PCRE_FLAG(UTF8)) {
+ hs_flags[i] |= HS_FLAG_UTF8;
+ }
+#else
+ if (pcre_flags & PCRE_FLAG(UTF)) {
+ hs_flags[i] |= HS_FLAG_UTF8;
+ }
+#endif
+ if (pcre_flags & PCRE_FLAG(CASELESS)) {
+ hs_flags[i] |= HS_FLAG_CASELESS;
+ }
+ if (pcre_flags & PCRE_FLAG(MULTILINE)) {
+ hs_flags[i] |= HS_FLAG_MULTILINE;
+ }
+ if (pcre_flags & PCRE_FLAG(DOTALL)) {
+ hs_flags[i] |= HS_FLAG_DOTALL;
+ }
+
+
+ if (re_flags & RSPAMD_REGEXP_FLAG_LEFTMOST) {
+ hs_flags[i] |= HS_FLAG_SOM_LEFTMOST;
+ }
+ else if (rspamd_regexp_get_maxhits(re) == 1) {
+ hs_flags[i] |= HS_FLAG_SINGLEMATCH;
+ }
+
+ gchar *pat = rspamd_re_cache_hs_pattern_from_pcre(re);
+
+ if (hs_compile(pat,
+ hs_flags[i],
+ HS_MODE_BLOCK,
+ &cache->plt,
+ &test_db,
+ &hs_errors) != HS_SUCCESS) {
+ msg_info_re_cache("cannot compile '%s' to hyperscan: '%s', try prefilter match",
+ pat,
+ hs_errors != NULL ? hs_errors->message : "unknown error");
+ hs_free_compile_error(hs_errors);
+
+ /* The approximation operation might take a significant
+ * amount of time, so we need to check if it's finite
+ */
+ if (rspamd_re_cache_is_finite(cache, re, hs_flags[i], cbdata->max_time)) {
+ hs_flags[i] |= HS_FLAG_PREFILTER;
+ hs_ids[i] = rspamd_regexp_get_cache_id(re);
+ hs_pats[i] = pat;
+ i++;
+ }
+ else {
+ g_free(pat); /* Avoid leak */
+ }
+ }
+ else {
+ hs_ids[i] = rspamd_regexp_get_cache_id(re);
+ hs_pats[i] = pat;
+ i++;
+ hs_free_database(test_db);
+ }
+ }
+ /* Adjust real re number */
+ n = i;
+
+#define CLEANUP_ALLOCATED(is_err) \
+ do { \
+ g_free(hs_flags); \
+ g_free(hs_ids); \
+ for (guint j = 0; j < i; j++) { \
+ g_free(hs_pats[j]); \
+ } \
+ g_free(hs_pats); \
+ g_free(hs_exts); \
+ if (is_err) { \
+ close(fd); \
+ unlink(path); \
+ if (hs_errors) hs_free_compile_error(hs_errors); \
+ } \
+ } while (0)
+
+ if (n > 0) {
+ /* Create the hs tree */
+ hs_errors = NULL;
+ if (hs_compile_ext_multi((const char **) hs_pats,
+ hs_flags,
+ hs_ids,
+ hs_exts,
+ n,
+ HS_MODE_BLOCK,
+ &cache->plt,
+ &test_db,
+ &hs_errors) != HS_SUCCESS) {
+
+ err = g_error_new(rspamd_re_cache_quark(), EINVAL,
+ "cannot create tree of regexp when processing '%s': %s",
+ hs_pats[hs_errors->expression], hs_errors->message);
+ CLEANUP_ALLOCATED(true);
+ rspamd_re_cache_compile_err(EV_A_ w, err, cbdata, false);
+
+ return;
+ }
+
+ if (hs_serialize_database(test_db, &hs_serialized,
+ &serialized_len) != HS_SUCCESS) {
+ err = g_error_new(rspamd_re_cache_quark(),
+ errno,
+ "cannot serialize tree of regexp for %s",
+ re_class->hash);
+
+ CLEANUP_ALLOCATED(true);
+ hs_free_database(test_db);
+ rspamd_re_cache_compile_err(EV_A_ w, err, cbdata, false);
+ return;
+ }
+
+ hs_free_database(test_db);
+
+ /*
+ * Magic - 8 bytes
+ * Platform - sizeof (platform)
+ * n - number of regexps
+ * n * <regexp ids>
+ * n * <regexp flags>
+ * crc - 8 bytes checksum
+ * <hyperscan blob>
+ */
+ rspamd_cryptobox_fast_hash_init(&crc_st, 0xdeadbabe);
+ /* IDs -> Flags -> Hs blob */
+ rspamd_cryptobox_fast_hash_update(&crc_st,
+ hs_ids, sizeof(*hs_ids) * n);
+ rspamd_cryptobox_fast_hash_update(&crc_st,
+ hs_flags, sizeof(*hs_flags) * n);
+ rspamd_cryptobox_fast_hash_update(&crc_st,
+ hs_serialized, serialized_len);
+ crc = rspamd_cryptobox_fast_hash_final(&crc_st);
+
+
+ iov[0].iov_base = (void *) rspamd_hs_magic;
+ iov[0].iov_len = RSPAMD_HS_MAGIC_LEN;
+ iov[1].iov_base = &cache->plt;
+ iov[1].iov_len = sizeof(cache->plt);
+ iov[2].iov_base = &n;
+ iov[2].iov_len = sizeof(n);
+ iov[3].iov_base = hs_ids;
+ iov[3].iov_len = sizeof(*hs_ids) * n;
+ iov[4].iov_base = hs_flags;
+ iov[4].iov_len = sizeof(*hs_flags) * n;
+ iov[5].iov_base = &crc;
+ iov[5].iov_len = sizeof(crc);
+ iov[6].iov_base = hs_serialized;
+ iov[6].iov_len = serialized_len;
+
+ if (writev(fd, iov, G_N_ELEMENTS(iov)) == -1) {
+ err = g_error_new(rspamd_re_cache_quark(),
+ errno,
+ "cannot serialize tree of regexp to %s: %s",
+ path, strerror(errno));
+
+ CLEANUP_ALLOCATED(true);
+ g_free(hs_serialized);
+
+ rspamd_re_cache_compile_err(EV_A_ w, err, cbdata, false);
+ return;
+ }
+
+ if (re_class->type_len > 0) {
+ msg_info_re_cache(
+ "compiled class %s(%*s) to cache %6s, %d/%d regexps",
+ rspamd_re_cache_type_to_string(re_class->type),
+ (gint) re_class->type_len - 1,
+ re_class->type_data,
+ re_class->hash,
+ n,
+ (gint) g_hash_table_size(re_class->re));
+ }
+ else {
+ msg_info_re_cache(
+ "compiled class %s to cache %6s, %d/%d regexps",
+ rspamd_re_cache_type_to_string(re_class->type),
+ re_class->hash,
+ n,
+ (gint) g_hash_table_size(re_class->re));
+ }
+
+ cbdata->total += n;
+ CLEANUP_ALLOCATED(false);
+
+ /* Now rename temporary file to the new .hs file */
+ rspamd_snprintf(npath, sizeof(npath), "%s%c%s.hs", cbdata->cache_dir,
+ G_DIR_SEPARATOR, re_class->hash);
+
+ if (rename(path, npath) == -1) {
+ err = g_error_new(rspamd_re_cache_quark(),
+ errno,
+ "cannot rename %s to %s: %s",
+ path, npath, strerror(errno));
+ unlink(path);
+ close(fd);
+
+ rspamd_re_cache_compile_err(EV_A_ w, err, cbdata, false);
+ return;
+ }
+
+ close(fd);
+ }
+ else {
+ err = g_error_new(rspamd_re_cache_quark(),
+ errno,
+ "no suitable regular expressions %s (%d original): "
+ "remove temporary file %s",
+ rspamd_re_cache_type_to_string(re_class->type),
+ (gint) g_hash_table_size(re_class->re),
+ path);
+
+ CLEANUP_ALLOCATED(true);
+ rspamd_re_cache_compile_err(EV_A_ w, err, cbdata, false);
+
+ return;
+ }
+
+ /* Continue process */
+ ev_timer_again(EV_A_ w);
+}
+
+#endif
+
+gint rspamd_re_cache_compile_hyperscan(struct rspamd_re_cache *cache,
+ const char *cache_dir,
+ gdouble max_time,
+ gboolean silent,
+ struct ev_loop *event_loop,
+ void (*cb)(guint ncompiled, GError *err, void *cbd),
+ void *cbd)
+{
+ g_assert(cache != NULL);
+ g_assert(cache_dir != NULL);
+
+#ifndef WITH_HYPERSCAN
+ return -1;
+#else
+ static ev_timer *timer;
+ static const ev_tstamp timer_interval = 0.1;
+ struct rspamd_re_cache_hs_compile_cbdata *cbdata;
+
+ cbdata = g_malloc0(sizeof(*cbdata));
+ g_hash_table_iter_init(&cbdata->it, cache->re_classes);
+ cbdata->cache = cache;
+ cbdata->cache_dir = cache_dir;
+ cbdata->cb = cb;
+ cbdata->cbd = cbd;
+ cbdata->max_time = max_time;
+ cbdata->silent = silent;
+ cbdata->total = 0;
+ timer = g_malloc0(sizeof(*timer));
+ timer->data = (void *) cbdata; /* static */
+
+ ev_timer_init(timer, rspamd_re_cache_compile_timer_cb,
+ timer_interval, timer_interval);
+ ev_timer_start(event_loop, timer);
+
+ return 0;
+#endif
+}
+
+gboolean
+rspamd_re_cache_is_valid_hyperscan_file(struct rspamd_re_cache *cache,
+ const char *path, gboolean silent, gboolean try_load, GError **err)
+{
+ g_assert(cache != NULL);
+ g_assert(path != NULL);
+
+#ifndef WITH_HYPERSCAN
+ return FALSE;
+#else
+ gint fd, n, ret;
+ guchar magicbuf[RSPAMD_HS_MAGIC_LEN];
+ const guchar *mb;
+ GHashTableIter it;
+ gpointer k, v;
+ struct rspamd_re_class *re_class;
+ gsize len;
+ const gchar *hash_pos;
+ hs_platform_info_t test_plt;
+ hs_database_t *test_db = NULL;
+ guchar *map, *p, *end;
+ rspamd_cryptobox_fast_hash_state_t crc_st;
+ guint64 crc, valid_crc;
+
+ len = strlen(path);
+
+ if (len < sizeof(rspamd_cryptobox_HASHBYTES + 3)) {
+ if (!silent) {
+ msg_err_re_cache("cannot open hyperscan cache file %s: too short filename",
+ path);
+ }
+ g_set_error(err, rspamd_re_cache_quark(), 0,
+ "too short filename");
+
+ return FALSE;
+ }
+
+ if (memcmp(path + len - 3, ".hs", 3) != 0) {
+ if (!silent) {
+ msg_err_re_cache("cannot open hyperscan cache file %s: not ending with .hs",
+ path);
+ }
+ g_set_error(err, rspamd_re_cache_quark(), 0,
+ "not ending with .hs");
+ return FALSE;
+ }
+
+ hash_pos = path + len - 3 - (sizeof(re_class->hash) - 1);
+ g_hash_table_iter_init(&it, cache->re_classes);
+
+ while (g_hash_table_iter_next(&it, &k, &v)) {
+ re_class = v;
+
+ if (memcmp(hash_pos, re_class->hash, sizeof(re_class->hash) - 1) == 0) {
+ /* Open file and check magic */
+ gssize r;
+
+ fd = open(path, O_RDONLY);
+
+ if (fd == -1) {
+ if (errno != ENOENT || !silent) {
+ msg_err_re_cache("cannot open hyperscan cache file %s: %s",
+ path, strerror(errno));
+ }
+ g_set_error(err, rspamd_re_cache_quark(), 0,
+ "%s",
+ strerror(errno));
+ return FALSE;
+ }
+
+ if ((r = read(fd, magicbuf, sizeof(magicbuf))) != sizeof(magicbuf)) {
+ if (r == -1) {
+ msg_err_re_cache("cannot read magic from hyperscan "
+ "cache file %s: %s",
+ path, strerror(errno));
+ g_set_error(err, rspamd_re_cache_quark(), 0,
+ "cannot read magic: %s",
+ strerror(errno));
+ }
+ else {
+ msg_err_re_cache("truncated read magic from hyperscan "
+ "cache file %s: %z, %z wanted",
+ path, r, (gsize) sizeof(magicbuf));
+ g_set_error(err, rspamd_re_cache_quark(), 0,
+ "truncated read magic %zd, %zd wanted",
+ r, (gsize) sizeof(magicbuf));
+ }
+
+ close(fd);
+ return FALSE;
+ }
+
+ mb = rspamd_hs_magic;
+
+ if (memcmp(magicbuf, mb, sizeof(magicbuf)) != 0) {
+ msg_err_re_cache("cannot open hyperscan cache file %s: "
+ "bad magic ('%*xs', '%*xs' expected)",
+ path, (int) RSPAMD_HS_MAGIC_LEN, magicbuf,
+ (int) RSPAMD_HS_MAGIC_LEN, mb);
+
+ close(fd);
+ g_set_error(err, rspamd_re_cache_quark(), 0, "invalid magic");
+ return FALSE;
+ }
+
+ if ((r = read(fd, &test_plt, sizeof(test_plt))) != sizeof(test_plt)) {
+ if (r == -1) {
+ msg_err_re_cache("cannot read platform data from hyperscan "
+ "cache file %s: %s",
+ path, strerror(errno));
+ }
+ else {
+ msg_err_re_cache("truncated read platform data from hyperscan "
+ "cache file %s: %z, %z wanted",
+ path, r, (gsize) sizeof(magicbuf));
+ }
+
+ g_set_error(err, rspamd_re_cache_quark(), 0,
+ "cannot read platform data: %s", strerror(errno));
+
+ close(fd);
+ return FALSE;
+ }
+
+ if (test_plt.cpu_features != cache->plt.cpu_features) {
+ msg_err_re_cache("cannot open hyperscan cache file %s: "
+ "compiled for a different platform",
+ path);
+ g_set_error(err, rspamd_re_cache_quark(), 0,
+ "compiled for a different platform");
+
+ close(fd);
+ return FALSE;
+ }
+
+ close(fd);
+
+ if (try_load) {
+ map = rspamd_file_xmap(path, PROT_READ, &len, TRUE);
+
+ if (map == NULL) {
+ msg_err_re_cache("cannot mmap hyperscan cache file %s: "
+ "%s",
+ path, strerror(errno));
+ g_set_error(err, rspamd_re_cache_quark(), 0,
+ "mmap error: %s", strerror(errno));
+ return FALSE;
+ }
+
+ p = map + RSPAMD_HS_MAGIC_LEN + sizeof(test_plt);
+ end = map + len;
+ memcpy(&n, p, sizeof(n));
+ p += sizeof(gint);
+
+ if (n <= 0 || 2 * n * sizeof(gint) + /* IDs + flags */
+ sizeof(guint64) + /* crc */
+ RSPAMD_HS_MAGIC_LEN + /* header */
+ sizeof(cache->plt) >
+ len) {
+ /* Some wrong amount of regexps */
+ msg_err_re_cache("bad number of expressions in %s: %d",
+ path, n);
+ g_set_error(err, rspamd_re_cache_quark(), 0,
+ "bad number of expressions: %d", n);
+ munmap(map, len);
+ return FALSE;
+ }
+
+ /*
+ * Magic - 8 bytes
+ * Platform - sizeof (platform)
+ * n - number of regexps
+ * n * <regexp ids>
+ * n * <regexp flags>
+ * crc - 8 bytes checksum
+ * <hyperscan blob>
+ */
+
+ memcpy(&crc, p + n * 2 * sizeof(gint), sizeof(crc));
+ rspamd_cryptobox_fast_hash_init(&crc_st, 0xdeadbabe);
+ /* IDs */
+ rspamd_cryptobox_fast_hash_update(&crc_st, p, n * sizeof(gint));
+ /* Flags */
+ rspamd_cryptobox_fast_hash_update(&crc_st, p + n * sizeof(gint),
+ n * sizeof(gint));
+ /* HS database */
+ p += n * sizeof(gint) * 2 + sizeof(guint64);
+ rspamd_cryptobox_fast_hash_update(&crc_st, p, end - p);
+ valid_crc = rspamd_cryptobox_fast_hash_final(&crc_st);
+
+ if (crc != valid_crc) {
+ msg_warn_re_cache("outdated or invalid hs database in %s: "
+ "crc read %xL, crc expected %xL",
+ path, crc, valid_crc);
+ g_set_error(err, rspamd_re_cache_quark(), 0,
+ "outdated or invalid hs database, crc check failure");
+ munmap(map, len);
+
+ return FALSE;
+ }
+
+ if ((ret = hs_deserialize_database(p, end - p, &test_db)) != HS_SUCCESS) {
+ msg_err_re_cache("bad hs database in %s: %d", path, ret);
+ g_set_error(err, rspamd_re_cache_quark(), 0,
+ "deserialize error: %d", ret);
+ munmap(map, len);
+
+ return FALSE;
+ }
+
+ hs_free_database(test_db);
+ munmap(map, len);
+ }
+ /* XXX: add crc check */
+
+ return TRUE;
+ }
+ }
+
+ if (!silent) {
+ msg_warn_re_cache("unknown hyperscan cache file %s", path);
+ }
+
+ g_set_error(err, rspamd_re_cache_quark(), 0,
+ "unknown hyperscan file");
+
+ return FALSE;
+#endif
+}
+
+
+enum rspamd_hyperscan_status
+rspamd_re_cache_load_hyperscan(struct rspamd_re_cache *cache,
+ const char *cache_dir, bool try_load)
+{
+ g_assert(cache != NULL);
+ g_assert(cache_dir != NULL);
+
+#ifndef WITH_HYPERSCAN
+ return RSPAMD_HYPERSCAN_UNSUPPORTED;
+#else
+ gchar path[PATH_MAX];
+ gint fd, i, n, *hs_ids = NULL, *hs_flags = NULL, total = 0, ret;
+ GHashTableIter it;
+ gpointer k, v;
+ guint8 *map, *p;
+ struct rspamd_re_class *re_class;
+ struct rspamd_re_cache_elt *elt;
+ struct stat st;
+ gboolean has_valid = FALSE, all_valid = FALSE;
+
+ g_hash_table_iter_init(&it, cache->re_classes);
+
+ while (g_hash_table_iter_next(&it, &k, &v)) {
+ re_class = v;
+ rspamd_snprintf(path, sizeof(path), "%s%c%s.hs", cache_dir,
+ G_DIR_SEPARATOR, re_class->hash);
+
+ if (rspamd_re_cache_is_valid_hyperscan_file(cache, path, try_load, FALSE, NULL)) {
+ msg_debug_re_cache("load hyperscan database from '%s'",
+ re_class->hash);
+
+ fd = open(path, O_RDONLY);
+
+ /* Read number of regexps */
+ g_assert(fd != -1);
+ fstat(fd, &st);
+
+ map = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
+
+ if (map == MAP_FAILED) {
+ if (!try_load) {
+ msg_err_re_cache("cannot mmap %s: %s", path, strerror(errno));
+ }
+ else {
+ msg_debug_re_cache("cannot mmap %s: %s", path, strerror(errno));
+ }
+
+ close(fd);
+ all_valid = FALSE;
+ continue;
+ }
+
+ close(fd);
+ p = map + RSPAMD_HS_MAGIC_LEN + sizeof(cache->plt);
+ n = *(gint *) p;
+
+ if (n <= 0 || 2 * n * sizeof(gint) + /* IDs + flags */
+ sizeof(guint64) + /* crc */
+ RSPAMD_HS_MAGIC_LEN + /* header */
+ sizeof(cache->plt) >
+ (gsize) st.st_size) {
+ /* Some wrong amount of regexps */
+ if (!try_load) {
+ msg_err_re_cache("bad number of expressions in %s: %d",
+ path, n);
+ }
+ else {
+ msg_debug_re_cache("bad number of expressions in %s: %d",
+ path, n);
+ }
+
+ munmap(map, st.st_size);
+ all_valid = FALSE;
+ continue;
+ }
+
+ total += n;
+ p += sizeof(n);
+ hs_ids = g_malloc(n * sizeof(*hs_ids));
+ memcpy(hs_ids, p, n * sizeof(*hs_ids));
+ p += n * sizeof(*hs_ids);
+ hs_flags = g_malloc(n * sizeof(*hs_flags));
+ memcpy(hs_flags, p, n * sizeof(*hs_flags));
+
+ /* Skip crc */
+ p += n * sizeof(*hs_ids) + sizeof(guint64);
+
+ /* Cleanup */
+ if (re_class->hs_scratch != NULL) {
+ hs_free_scratch(re_class->hs_scratch);
+ }
+
+ if (re_class->hs_db != NULL) {
+ rspamd_hyperscan_free(re_class->hs_db, false);
+ }
+
+ if (re_class->hs_ids) {
+ g_free(re_class->hs_ids);
+ }
+
+ re_class->hs_ids = NULL;
+ re_class->hs_scratch = NULL;
+ re_class->hs_db = NULL;
+ munmap(map, st.st_size);
+
+ re_class->hs_db = rspamd_hyperscan_maybe_load(path, p - map);
+ if (re_class->hs_db == NULL) {
+ if (!try_load) {
+ msg_err_re_cache("bad hs database in %s", path);
+ }
+ else {
+ msg_debug_re_cache("bad hs database in %s", path);
+ }
+ g_free(hs_ids);
+ g_free(hs_flags);
+
+ re_class->hs_ids = NULL;
+ re_class->hs_scratch = NULL;
+ re_class->hs_db = NULL;
+ all_valid = FALSE;
+
+ continue;
+ }
+
+ if ((ret = hs_alloc_scratch(rspamd_hyperscan_get_database(re_class->hs_db),
+ &re_class->hs_scratch)) != HS_SUCCESS) {
+ if (!try_load) {
+ msg_err_re_cache("bad hs database in %s; error code: %d", path, ret);
+ }
+ else {
+ msg_debug_re_cache("bad hs database in %s; error code: %d", path, ret);
+ }
+ g_free(hs_ids);
+ g_free(hs_flags);
+
+ rspamd_hyperscan_free(re_class->hs_db, true);
+ re_class->hs_ids = NULL;
+ re_class->hs_scratch = NULL;
+ re_class->hs_db = NULL;
+ all_valid = FALSE;
+
+ continue;
+ }
+
+ /*
+ * Now find hyperscan elts that are successfully compiled and
+ * specify that they should be matched using hyperscan
+ */
+ for (i = 0; i < n; i++) {
+ g_assert((gint) cache->re->len > hs_ids[i] && hs_ids[i] >= 0);
+ elt = g_ptr_array_index(cache->re, hs_ids[i]);
+
+ if (hs_flags[i] & HS_FLAG_PREFILTER) {
+ elt->match_type = RSPAMD_RE_CACHE_HYPERSCAN_PRE;
+ }
+ else {
+ elt->match_type = RSPAMD_RE_CACHE_HYPERSCAN;
+ }
+ }
+
+ re_class->hs_ids = hs_ids;
+ g_free(hs_flags);
+ re_class->nhs = n;
+
+ if (!has_valid) {
+ has_valid = TRUE;
+ all_valid = TRUE;
+ }
+ }
+ else {
+ if (!try_load) {
+ msg_err_re_cache("invalid hyperscan hash file '%s'",
+ path);
+ }
+ else {
+ msg_debug_re_cache("invalid hyperscan hash file '%s'",
+ path);
+ }
+ all_valid = FALSE;
+ continue;
+ }
+ }
+
+ if (has_valid) {
+ if (all_valid) {
+ msg_info_re_cache("full hyperscan database of %d regexps has been loaded", total);
+ cache->hyperscan_loaded = RSPAMD_HYPERSCAN_LOADED_FULL;
+ }
+ else {
+ msg_info_re_cache("partial hyperscan database of %d regexps has been loaded", total);
+ cache->hyperscan_loaded = RSPAMD_HYPERSCAN_LOADED_PARTIAL;
+ }
+ }
+ else {
+ msg_info_re_cache("hyperscan database has NOT been loaded; no valid expressions");
+ cache->hyperscan_loaded = RSPAMD_HYPERSCAN_LOAD_ERROR;
+ }
+
+
+ return cache->hyperscan_loaded;
+#endif
+}
+
+void rspamd_re_cache_add_selector(struct rspamd_re_cache *cache,
+ const gchar *sname,
+ gint ref)
+{
+ khiter_t k;
+
+ k = kh_get(lua_selectors_hash, cache->selectors, (gchar *) sname);
+
+ if (k == kh_end(cache->selectors)) {
+ gchar *cpy = g_strdup(sname);
+ gint res;
+
+ k = kh_put(lua_selectors_hash, cache->selectors, cpy, &res);
+
+ kh_value(cache->selectors, k) = ref;
+ }
+ else {
+ msg_warn_re_cache("replacing selector with name %s", sname);
+
+ if (cache->L) {
+ luaL_unref(cache->L, LUA_REGISTRYINDEX, kh_value(cache->selectors, k));
+ }
+
+ kh_value(cache->selectors, k) = ref;
+ }
+}
diff --git a/src/libserver/re_cache.h b/src/libserver/re_cache.h
new file mode 100644
index 0000000..d6449a9
--- /dev/null
+++ b/src/libserver/re_cache.h
@@ -0,0 +1,212 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef RSPAMD_RE_CACHE_H
+#define RSPAMD_RE_CACHE_H
+
+#include "config.h"
+#include "libutil/regexp.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct rspamd_re_cache;
+struct rspamd_re_runtime;
+struct rspamd_task;
+struct rspamd_config;
+
+enum rspamd_re_type {
+ RSPAMD_RE_HEADER,
+ RSPAMD_RE_RAWHEADER,
+ RSPAMD_RE_ALLHEADER,
+ RSPAMD_RE_MIMEHEADER,
+ RSPAMD_RE_MIME,
+ RSPAMD_RE_RAWMIME,
+ RSPAMD_RE_URL,
+ RSPAMD_RE_EMAIL,
+ RSPAMD_RE_BODY, /* full in SA */
+ RSPAMD_RE_SABODY, /* body in SA */
+ RSPAMD_RE_SARAWBODY, /* rawbody in SA */
+ RSPAMD_RE_WORDS, /* normalized words */
+ RSPAMD_RE_RAWWORDS, /* raw words */
+ RSPAMD_RE_STEMWORDS, /* stemmed words */
+ RSPAMD_RE_SELECTOR, /* use lua selector to process regexp */
+ RSPAMD_RE_MAX
+};
+
+struct rspamd_re_cache_stat {
+ guint64 bytes_scanned;
+ guint64 bytes_scanned_pcre;
+ guint regexp_checked;
+ guint regexp_matched;
+ guint regexp_total;
+ guint regexp_fast_cached;
+};
+
+/**
+ * Initialize re_cache persistent structure
+ */
+struct rspamd_re_cache *rspamd_re_cache_new(void);
+
+/**
+ * Add the existing regexp to the cache
+ * @param cache cache object
+ * @param re regexp object
+ * @param type type of object
+ * @param type_data associated data with the type (e.g. header name)
+ * @param datalen associated data length
+ * @param lua_cbref optional lua callback reference for matching purposes
+ */
+rspamd_regexp_t *
+rspamd_re_cache_add(struct rspamd_re_cache *cache, rspamd_regexp_t *re,
+ enum rspamd_re_type type,
+ gconstpointer type_data, gsize datalen,
+ gint lua_cbref);
+
+/**
+ * Replace regexp in the cache with another regexp
+ * @param cache cache object
+ * @param what re to replace
+ * @param with regexp object to replace the origin
+ */
+void rspamd_re_cache_replace(struct rspamd_re_cache *cache,
+ rspamd_regexp_t *what,
+ rspamd_regexp_t *with);
+
+/**
+ * Initialize and optimize re cache structure
+ */
+void rspamd_re_cache_init(struct rspamd_re_cache *cache,
+ struct rspamd_config *cfg);
+
+enum rspamd_hyperscan_status {
+ RSPAMD_HYPERSCAN_UNKNOWN = 0,
+ RSPAMD_HYPERSCAN_UNSUPPORTED,
+ RSPAMD_HYPERSCAN_LOADED_PARTIAL,
+ RSPAMD_HYPERSCAN_LOADED_FULL,
+ RSPAMD_HYPERSCAN_LOAD_ERROR,
+};
+
+/**
+ * Returns true when hyperscan is loaded
+ * @param cache
+ * @return
+ */
+enum rspamd_hyperscan_status rspamd_re_cache_is_hs_loaded(struct rspamd_re_cache *cache);
+
+/**
+ * Get runtime data for a cache
+ */
+struct rspamd_re_runtime *rspamd_re_cache_runtime_new(struct rspamd_re_cache *cache);
+
+/**
+ * Get runtime statistics
+ */
+const struct rspamd_re_cache_stat *
+rspamd_re_cache_get_stat(struct rspamd_re_runtime *rt);
+
+/**
+ * Process regexp runtime and return the result for a specific regexp
+ * @param task task object
+ * @param rt cache runtime object
+ * @param re regexp object
+ * @param type type of object
+ * @param type_data associated data with the type (e.g. header name)
+ * @param datalen associated data length
+ * @param is_strong use case sensitive match when looking for headers
+ */
+gint rspamd_re_cache_process(struct rspamd_task *task,
+ rspamd_regexp_t *re,
+ enum rspamd_re_type type,
+ gconstpointer type_data,
+ gsize datalen,
+ gboolean is_strong);
+
+int rspamd_re_cache_process_ffi(void *ptask,
+ void *pre,
+ int type,
+ void *type_data,
+ int is_strong);
+
+/**
+ * Destroy runtime data
+ */
+void rspamd_re_cache_runtime_destroy(struct rspamd_re_runtime *rt);
+
+/**
+ * Unref re cache
+ */
+void rspamd_re_cache_unref(struct rspamd_re_cache *cache);
+
+/**
+ * Retain reference to re cache
+ */
+struct rspamd_re_cache *rspamd_re_cache_ref(struct rspamd_re_cache *cache);
+
+/**
+ * Set limit for all regular expressions in the cache, returns previous limit
+ */
+guint rspamd_re_cache_set_limit(struct rspamd_re_cache *cache, guint limit);
+
+/**
+ * Convert re type to a human readable string (constant one)
+ */
+const gchar *rspamd_re_cache_type_to_string(enum rspamd_re_type type);
+
+/**
+ * Convert re type string to the type enum
+ */
+enum rspamd_re_type rspamd_re_cache_type_from_string(const char *str);
+
+struct ev_loop;
+/**
+ * Compile expressions to the hyperscan tree and store in the `cache_dir`
+ */
+gint rspamd_re_cache_compile_hyperscan(struct rspamd_re_cache *cache,
+ const char *cache_dir,
+ gdouble max_time,
+ gboolean silent,
+ struct ev_loop *event_loop,
+ void (*cb)(guint ncompiled, GError *err, void *cbd),
+ void *cbd);
+
+/**
+ * Returns TRUE if the specified file is valid hyperscan cache
+ */
+gboolean rspamd_re_cache_is_valid_hyperscan_file(struct rspamd_re_cache *cache,
+ const char *path,
+ gboolean silent,
+ gboolean try_load,
+ GError **err);
+
+/**
+ * Loads all hyperscan regexps precompiled
+ */
+enum rspamd_hyperscan_status rspamd_re_cache_load_hyperscan(
+ struct rspamd_re_cache *cache,
+ const char *cache_dir, bool try_load);
+
+/**
+ * Registers lua selector in the cache
+ */
+void rspamd_re_cache_add_selector(struct rspamd_re_cache *cache,
+ const gchar *sname, gint ref);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/libserver/redis_pool.cxx b/src/libserver/redis_pool.cxx
new file mode 100644
index 0000000..9c2d6cf
--- /dev/null
+++ b/src/libserver/redis_pool.cxx
@@ -0,0 +1,663 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "config.h"
+#include "contrib/libev/ev.h"
+#include "redis_pool.h"
+#include "cfg_file.h"
+#include "contrib/hiredis/hiredis.h"
+#include "contrib/hiredis/async.h"
+#include "contrib/hiredis/adapters/libev.h"
+#include "cryptobox.h"
+#include "logger.h"
+#include "contrib/ankerl/unordered_dense.h"
+
+#include <list>
+#include <unordered_map>
+
+namespace rspamd {
+class redis_pool_elt;
+class redis_pool;
+
+#define msg_debug_rpool(...) rspamd_conditional_debug_fast(NULL, NULL, \
+ rspamd_redis_pool_log_id, "redis_pool", conn->tag, \
+ __FUNCTION__, \
+ __VA_ARGS__)
+
+INIT_LOG_MODULE(redis_pool)
+
+enum class rspamd_redis_pool_connection_state : std::uint8_t {
+ RSPAMD_REDIS_POOL_CONN_INACTIVE = 0,
+ RSPAMD_REDIS_POOL_CONN_ACTIVE,
+ RSPAMD_REDIS_POOL_CONN_FINALISING
+};
+
+struct redis_pool_connection {
+ using redis_pool_connection_ptr = std::unique_ptr<redis_pool_connection>;
+ using conn_iter_t = std::list<redis_pool_connection_ptr>::iterator;
+ struct redisAsyncContext *ctx;
+ redis_pool_elt *elt;
+ redis_pool *pool;
+ conn_iter_t elt_pos;
+ ev_timer timeout;
+ gchar tag[MEMPOOL_UID_LEN];
+ rspamd_redis_pool_connection_state state;
+
+ auto schedule_timeout() -> void;
+ ~redis_pool_connection();
+
+ explicit redis_pool_connection(redis_pool *_pool,
+ redis_pool_elt *_elt,
+ const std::string &db,
+ const std::string &username,
+ const std::string &password,
+ struct redisAsyncContext *_ctx);
+
+private:
+ static auto redis_conn_timeout_cb(EV_P_ ev_timer *w, int revents) -> void;
+ static auto redis_quit_cb(redisAsyncContext *c, void *r, void *priv) -> void;
+ static auto redis_on_disconnect(const struct redisAsyncContext *ac, int status) -> auto;
+};
+
+
+using redis_pool_key_t = std::uint64_t;
+class redis_pool;
+
+class redis_pool_elt {
+ using redis_pool_connection_ptr = std::unique_ptr<redis_pool_connection>;
+ redis_pool *pool;
+ /*
+ * These lists owns connections, so if an element is removed from both
+ * lists, it is destructed
+ */
+ std::list<redis_pool_connection_ptr> active;
+ std::list<redis_pool_connection_ptr> inactive;
+ std::list<redis_pool_connection_ptr> terminating;
+ std::string ip;
+ std::string db;
+ std::string username;
+ std::string password;
+ int port;
+ redis_pool_key_t key;
+ bool is_unix;
+
+public:
+ /* Disable copy */
+ redis_pool_elt() = delete;
+ redis_pool_elt(const redis_pool_elt &) = delete;
+ /* Enable move */
+ redis_pool_elt(redis_pool_elt &&other) = default;
+
+ explicit redis_pool_elt(redis_pool *_pool,
+ const gchar *_db, const gchar *_username,
+ const gchar *_password,
+ const char *_ip, int _port)
+ : pool(_pool), ip(_ip), port(_port),
+ key(redis_pool_elt::make_key(_db, _username, _password, _ip, _port))
+ {
+ is_unix = ip[0] == '.' || ip[0] == '/';
+
+ if (_db) {
+ db = _db;
+ }
+ if (_username) {
+ username = _username;
+ }
+ if (_password) {
+ password = _password;
+ }
+ }
+
+ auto new_connection() -> redisAsyncContext *;
+
+ auto release_connection(const redis_pool_connection *conn) -> void
+ {
+ switch (conn->state) {
+ case rspamd_redis_pool_connection_state::RSPAMD_REDIS_POOL_CONN_ACTIVE:
+ active.erase(conn->elt_pos);
+ break;
+ case rspamd_redis_pool_connection_state::RSPAMD_REDIS_POOL_CONN_INACTIVE:
+ inactive.erase(conn->elt_pos);
+ break;
+ case rspamd_redis_pool_connection_state::RSPAMD_REDIS_POOL_CONN_FINALISING:
+ terminating.erase(conn->elt_pos);
+ break;
+ }
+ }
+
+ auto move_to_inactive(redis_pool_connection *conn) -> void
+ {
+ inactive.splice(std::end(inactive), active, conn->elt_pos);
+ conn->elt_pos = std::prev(std::end(inactive));
+ }
+
+ auto move_to_terminating(redis_pool_connection *conn) -> void
+ {
+ terminating.splice(std::end(terminating), inactive, conn->elt_pos);
+ conn->elt_pos = std::prev(std::end(terminating));
+ }
+
+ inline static auto make_key(const gchar *db, const gchar *username,
+ const gchar *password, const char *ip, int port) -> redis_pool_key_t
+ {
+ rspamd_cryptobox_fast_hash_state_t st;
+
+ rspamd_cryptobox_fast_hash_init(&st, rspamd_hash_seed());
+
+ if (db) {
+ rspamd_cryptobox_fast_hash_update(&st, db, strlen(db));
+ }
+ if (username) {
+ rspamd_cryptobox_fast_hash_update(&st, username, strlen(username));
+ }
+ if (password) {
+ rspamd_cryptobox_fast_hash_update(&st, password, strlen(password));
+ }
+
+ rspamd_cryptobox_fast_hash_update(&st, ip, strlen(ip));
+ rspamd_cryptobox_fast_hash_update(&st, &port, sizeof(port));
+
+ return rspamd_cryptobox_fast_hash_final(&st);
+ }
+
+ auto num_active() const -> auto
+ {
+ return active.size();
+ }
+
+ ~redis_pool_elt()
+ {
+ rspamd_explicit_memzero(password.data(), password.size());
+ }
+
+private:
+ auto redis_async_new() -> redisAsyncContext *
+ {
+ struct redisAsyncContext *ctx;
+
+ if (is_unix) {
+ ctx = redisAsyncConnectUnix(ip.c_str());
+ }
+ else {
+ ctx = redisAsyncConnect(ip.c_str(), port);
+ }
+
+ if (ctx && ctx->err != REDIS_OK) {
+ msg_err("cannot connect to redis %s (port %d): %s", ip.c_str(), port,
+ ctx->errstr);
+ redisAsyncFree(ctx);
+
+ return nullptr;
+ }
+
+ return ctx;
+ }
+};
+
+class redis_pool final {
+ static constexpr const double default_timeout = 10.0;
+ static constexpr const unsigned default_max_conns = 100;
+
+ /* We want to have references integrity */
+ ankerl::unordered_dense::map<redisAsyncContext *,
+ redis_pool_connection *>
+ conns_by_ctx;
+ /*
+ * We store a pointer to the element in each connection, so this has to be
+ * a buckets map with pointers/references stability guarantees.
+ */
+ std::unordered_map<redis_pool_key_t, redis_pool_elt> elts_by_key;
+ bool wanna_die = false; /* Hiredis is 'clever' so we can call ourselves from destructor */
+public:
+ double timeout = default_timeout;
+ unsigned max_conns = default_max_conns;
+ struct ev_loop *event_loop;
+ struct rspamd_config *cfg;
+
+public:
+ explicit redis_pool()
+ : event_loop(nullptr), cfg(nullptr)
+ {
+ conns_by_ctx.reserve(max_conns);
+ }
+
+ /* Legacy stuff */
+ auto do_config(struct ev_loop *_loop, struct rspamd_config *_cfg) -> void
+ {
+ event_loop = _loop;
+ cfg = _cfg;
+ }
+
+ auto new_connection(const gchar *db, const gchar *username,
+ const gchar *password, const char *ip, int port) -> redisAsyncContext *;
+
+ auto release_connection(redisAsyncContext *ctx,
+ enum rspamd_redis_pool_release_type how) -> void;
+
+ auto unregister_context(redisAsyncContext *ctx) -> void
+ {
+ conns_by_ctx.erase(ctx);
+ }
+
+ auto register_context(redisAsyncContext *ctx, redis_pool_connection *conn)
+ {
+ conns_by_ctx.emplace(ctx, conn);
+ }
+
+ /* Hack to prevent Redis callbacks to be executed */
+ auto prepare_to_die() -> void
+ {
+ wanna_die = true;
+ }
+
+ ~redis_pool()
+ {
+ }
+};
+
+
+redis_pool_connection::~redis_pool_connection()
+{
+ const auto *conn = this; /* For debug */
+
+ if (state == rspamd_redis_pool_connection_state::RSPAMD_REDIS_POOL_CONN_ACTIVE) {
+ msg_debug_rpool("active connection destructed: %p", ctx);
+
+ if (ctx) {
+ pool->unregister_context(ctx);
+
+ if (!(ctx->c.flags & REDIS_FREEING)) {
+ auto *ac = ctx;
+ ctx = nullptr;
+ ac->onDisconnect = nullptr;
+ redisAsyncFree(ac);
+ }
+ }
+ }
+ else {
+ msg_debug_rpool("inactive connection destructed: %p", ctx);
+
+ ev_timer_stop(pool->event_loop, &timeout);
+ if (ctx) {
+ pool->unregister_context(ctx);
+
+ if (!(ctx->c.flags & REDIS_FREEING)) {
+ auto *ac = ctx;
+ /* To prevent on_disconnect here */
+ ctx = nullptr;
+ ac->onDisconnect = nullptr;
+ redisAsyncFree(ac);
+ }
+ }
+ }
+}
+
+auto redis_pool_connection::redis_quit_cb(redisAsyncContext *c, void *r, void *priv) -> void
+{
+ struct redis_pool_connection *conn =
+ (struct redis_pool_connection *) priv;
+
+ msg_debug_rpool("quit command reply for the connection %p",
+ conn->ctx);
+ /*
+ * The connection will be freed by hiredis itself as we are here merely after
+ * quit command has succeeded and we have timer being set already.
+ * The problem is that when this callback is called, our connection is likely
+ * dead, so probably even on_disconnect callback has been already called...
+ *
+ * Hence, the connection might already be freed, so even (conn) pointer may be
+ * inaccessible.
+ *
+ * TODO: Use refcounts to prevent this stuff to happen, the problem is how
+ * to handle Redis timeout on `quit` command in fact... The good thing is that
+ * it will not likely happen.
+ */
+}
+
+/*
+ * Called for inactive connections that due to be removed
+ */
+auto redis_pool_connection::redis_conn_timeout_cb(EV_P_ ev_timer *w, int revents) -> void
+{
+ auto *conn = (struct redis_pool_connection *) w->data;
+
+ g_assert(conn->state != rspamd_redis_pool_connection_state::RSPAMD_REDIS_POOL_CONN_ACTIVE);
+
+ if (conn->state == rspamd_redis_pool_connection_state::RSPAMD_REDIS_POOL_CONN_INACTIVE) {
+ msg_debug_rpool("scheduled soft removal of connection %p",
+ conn->ctx);
+ conn->state = rspamd_redis_pool_connection_state::RSPAMD_REDIS_POOL_CONN_FINALISING;
+ ev_timer_again(EV_A_ w);
+ redisAsyncCommand(conn->ctx, redis_pool_connection::redis_quit_cb, conn, "QUIT");
+ conn->elt->move_to_terminating(conn);
+ }
+ else {
+ /* Finalising by timeout */
+ ev_timer_stop(EV_A_ w);
+ msg_debug_rpool("final removal of connection %p, refcount: %d",
+ conn->ctx);
+
+ /* Erasure of shared pointer will cause it to be removed */
+ conn->elt->release_connection(conn);
+ }
+}
+
+auto redis_pool_connection::redis_on_disconnect(const struct redisAsyncContext *ac, int status) -> auto
+{
+ auto *conn = (struct redis_pool_connection *) ac->data;
+
+ /*
+ * Here, we know that redis itself will free this connection
+ * so, we need to do something very clever about it
+ */
+ if (conn->state != rspamd_redis_pool_connection_state::RSPAMD_REDIS_POOL_CONN_ACTIVE) {
+ /* Do nothing for active connections as it is already handled somewhere */
+ if (conn->ctx) {
+ msg_debug_rpool("inactive connection terminated: %s",
+ conn->ctx->errstr);
+ }
+
+ /* Erasure of shared pointer will cause it to be removed */
+ conn->elt->release_connection(conn);
+ }
+}
+
+auto redis_pool_connection::schedule_timeout() -> void
+{
+ const auto *conn = this; /* For debug */
+ double real_timeout;
+ auto active_elts = elt->num_active();
+
+ if (active_elts > pool->max_conns) {
+ real_timeout = pool->timeout / 2.0;
+ real_timeout = rspamd_time_jitter(real_timeout, real_timeout / 4.0);
+ }
+ else {
+ real_timeout = pool->timeout;
+ real_timeout = rspamd_time_jitter(real_timeout, real_timeout / 2.0);
+ }
+
+ msg_debug_rpool("scheduled connection %p cleanup in %.1f seconds",
+ ctx, real_timeout);
+
+ timeout.data = this;
+ /* Restore in case if these fields have been modified externally */
+ ctx->data = this;
+ redisAsyncSetDisconnectCallback(ctx, redis_pool_connection::redis_on_disconnect);
+ ev_timer_init(&timeout,
+ redis_pool_connection::redis_conn_timeout_cb,
+ real_timeout, real_timeout / 2.0);
+ ev_timer_start(pool->event_loop, &timeout);
+}
+
+
+redis_pool_connection::redis_pool_connection(redis_pool *_pool,
+ redis_pool_elt *_elt,
+ const std::string &db,
+ const std::string &username,
+ const std::string &password,
+ struct redisAsyncContext *_ctx)
+ : ctx(_ctx), elt(_elt), pool(_pool)
+{
+
+ state = rspamd_redis_pool_connection_state::RSPAMD_REDIS_POOL_CONN_ACTIVE;
+
+ pool->register_context(ctx, this);
+ ctx->data = this;
+ memset(tag, 0, sizeof(tag));
+ rspamd_random_hex(tag, sizeof(tag) - 1);
+
+ redisLibevAttach(pool->event_loop, ctx);
+ redisAsyncSetDisconnectCallback(ctx, redis_pool_connection::redis_on_disconnect);
+
+ if (!username.empty()) {
+ if (!password.empty()) {
+ redisAsyncCommand(ctx, nullptr, nullptr,
+ "AUTH %s %s", username.c_str(), password.c_str());
+ }
+ else {
+ msg_warn("Redis requires a password when username is supplied");
+ }
+ }
+ else if (!password.empty()) {
+ redisAsyncCommand(ctx, nullptr, nullptr,
+ "AUTH %s", password.c_str());
+ }
+ if (!db.empty()) {
+ redisAsyncCommand(ctx, nullptr, nullptr,
+ "SELECT %s", db.c_str());
+ }
+}
+
+auto redis_pool_elt::new_connection() -> redisAsyncContext *
+{
+ if (!inactive.empty()) {
+ decltype(inactive)::value_type conn;
+ conn.swap(inactive.back());
+ inactive.pop_back();
+
+ g_assert(conn->state != rspamd_redis_pool_connection_state::RSPAMD_REDIS_POOL_CONN_ACTIVE);
+ if (conn->ctx->err == REDIS_OK) {
+ /* Also check SO_ERROR */
+ gint err;
+ socklen_t len = sizeof(gint);
+
+ if (getsockopt(conn->ctx->c.fd, SOL_SOCKET, SO_ERROR,
+ (void *) &err, &len) == -1) {
+ err = errno;
+ }
+
+ if (err != 0) {
+ /*
+ * We cannot reuse connection, so we just recursively call
+ * this function one more time
+ */
+ return new_connection();
+ }
+ else {
+ /* Reuse connection */
+ ev_timer_stop(pool->event_loop, &conn->timeout);
+ conn->state = rspamd_redis_pool_connection_state::RSPAMD_REDIS_POOL_CONN_ACTIVE;
+ msg_debug_rpool("reused existing connection to %s:%d: %p",
+ ip.c_str(), port, conn->ctx);
+ active.emplace_front(std::move(conn));
+ active.front()->elt_pos = active.begin();
+
+ return active.front()->ctx;
+ }
+ }
+ else {
+ auto *nctx = redis_async_new();
+ if (nctx) {
+ active.emplace_front(std::make_unique<redis_pool_connection>(pool, this,
+ db.c_str(), username.c_str(), password.c_str(), nctx));
+ active.front()->elt_pos = active.begin();
+ }
+
+ return nctx;
+ }
+ }
+ else {
+ auto *nctx = redis_async_new();
+ if (nctx) {
+ active.emplace_front(std::make_unique<redis_pool_connection>(pool, this,
+ db.c_str(), username.c_str(), password.c_str(), nctx));
+ active.front()->elt_pos = active.begin();
+ }
+
+ return nctx;
+ }
+
+ RSPAMD_UNREACHABLE;
+}
+
+auto redis_pool::new_connection(const gchar *db, const gchar *username,
+ const gchar *password, const char *ip, int port) -> redisAsyncContext *
+{
+
+ if (!wanna_die) {
+ auto key = redis_pool_elt::make_key(db, username, password, ip, port);
+ auto found_elt = elts_by_key.find(key);
+
+ if (found_elt != elts_by_key.end()) {
+ auto &elt = found_elt->second;
+
+ return elt.new_connection();
+ }
+ else {
+ /* Need to create a pool */
+ auto nelt = elts_by_key.try_emplace(key,
+ this, db, username, password, ip, port);
+
+ return nelt.first->second.new_connection();
+ }
+ }
+
+ return nullptr;
+}
+
+auto redis_pool::release_connection(redisAsyncContext *ctx,
+ enum rspamd_redis_pool_release_type how) -> void
+{
+ if (!wanna_die) {
+ auto conn_it = conns_by_ctx.find(ctx);
+ if (conn_it != conns_by_ctx.end()) {
+ auto *conn = conn_it->second;
+ g_assert(conn->state == rspamd_redis_pool_connection_state::RSPAMD_REDIS_POOL_CONN_ACTIVE);
+
+ if (ctx->err != REDIS_OK) {
+ /* We need to terminate connection forcefully */
+ msg_debug_rpool("closed connection %p due to an error", conn->ctx);
+ }
+ else {
+ if (how == RSPAMD_REDIS_RELEASE_DEFAULT) {
+ /* Ensure that there are no callbacks attached to this conn */
+ if (ctx->replies.head == nullptr && (ctx->c.flags & REDIS_CONNECTED)) {
+ /* Just move it to the inactive queue */
+ conn->state = rspamd_redis_pool_connection_state::RSPAMD_REDIS_POOL_CONN_INACTIVE;
+ conn->elt->move_to_inactive(conn);
+ conn->schedule_timeout();
+ msg_debug_rpool("mark connection %p inactive", conn->ctx);
+
+ return;
+ }
+ else {
+ msg_debug_rpool("closed connection %p due to callbacks left",
+ conn->ctx);
+ }
+ }
+ else {
+ if (how == RSPAMD_REDIS_RELEASE_FATAL) {
+ msg_debug_rpool("closed connection %p due to an fatal termination",
+ conn->ctx);
+ }
+ else {
+ msg_debug_rpool("closed connection %p due to explicit termination",
+ conn->ctx);
+ }
+ }
+ }
+
+ conn->elt->release_connection(conn);
+ }
+ else {
+ msg_err("fatal internal error, connection with ctx %p is not found in the Redis pool",
+ ctx);
+ RSPAMD_UNREACHABLE;
+ }
+ }
+}
+
+}// namespace rspamd
+
+void *
+rspamd_redis_pool_init(void)
+{
+ return new rspamd::redis_pool{};
+}
+
+void rspamd_redis_pool_config(void *p,
+ struct rspamd_config *cfg,
+ struct ev_loop *ev_base)
+{
+ g_assert(p != NULL);
+ auto *pool = reinterpret_cast<class rspamd::redis_pool *>(p);
+
+ pool->do_config(ev_base, cfg);
+}
+
+
+struct redisAsyncContext *
+rspamd_redis_pool_connect(void *p,
+ const gchar *db, const gchar *username,
+ const gchar *password, const char *ip, int port)
+{
+ g_assert(p != NULL);
+ auto *pool = reinterpret_cast<class rspamd::redis_pool *>(p);
+
+ return pool->new_connection(db, username, password, ip, port);
+}
+
+
+void rspamd_redis_pool_release_connection(void *p,
+ struct redisAsyncContext *ctx, enum rspamd_redis_pool_release_type how)
+{
+ g_assert(p != NULL);
+ g_assert(ctx != NULL);
+ auto *pool = reinterpret_cast<class rspamd::redis_pool *>(p);
+
+ pool->release_connection(ctx, how);
+}
+
+
+void rspamd_redis_pool_destroy(void *p)
+{
+ auto *pool = reinterpret_cast<class rspamd::redis_pool *>(p);
+
+ pool->prepare_to_die();
+ delete pool;
+}
+
+const gchar *
+rspamd_redis_type_to_string(int type)
+{
+ const gchar *ret = "unknown";
+
+ switch (type) {
+ case REDIS_REPLY_STRING:
+ ret = "string";
+ break;
+ case REDIS_REPLY_ARRAY:
+ ret = "array";
+ break;
+ case REDIS_REPLY_INTEGER:
+ ret = "int";
+ break;
+ case REDIS_REPLY_STATUS:
+ ret = "status";
+ break;
+ case REDIS_REPLY_NIL:
+ ret = "nil";
+ break;
+ case REDIS_REPLY_ERROR:
+ ret = "error";
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
diff --git a/src/libserver/redis_pool.h b/src/libserver/redis_pool.h
new file mode 100644
index 0000000..ecdaa0f
--- /dev/null
+++ b/src/libserver/redis_pool.h
@@ -0,0 +1,91 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef SRC_LIBSERVER_REDIS_POOL_H_
+#define SRC_LIBSERVER_REDIS_POOL_H_
+
+#include "config.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+struct rspamd_config;
+struct redisAsyncContext;
+struct ev_loop;
+
+/**
+ * Creates new redis pool
+ * @return
+ */
+void *rspamd_redis_pool_init(void);
+
+/**
+ * Configure redis pool and binds it to a specific event base
+ * @param cfg
+ * @param ev_base
+ */
+void rspamd_redis_pool_config(void *pool,
+ struct rspamd_config *cfg,
+ struct ev_loop *ev_base);
+
+
+/**
+ * Create or reuse the specific redis connection
+ * @param pool
+ * @param db
+ * @param username
+ * @param password
+ * @param ip
+ * @param port
+ * @return
+ */
+struct redisAsyncContext *rspamd_redis_pool_connect(
+ void *pool,
+ const gchar *db, const gchar *username, const gchar *password,
+ const char *ip, int port);
+
+enum rspamd_redis_pool_release_type {
+ RSPAMD_REDIS_RELEASE_DEFAULT = 0,
+ RSPAMD_REDIS_RELEASE_FATAL = 1,
+ RSPAMD_REDIS_RELEASE_ENFORCE
+};
+
+/**
+ * Release a connection to the pool
+ * @param pool
+ * @param ctx
+ */
+void rspamd_redis_pool_release_connection(void *pool,
+ struct redisAsyncContext *ctx,
+ enum rspamd_redis_pool_release_type how);
+
+/**
+ * Stops redis pool and destroys it
+ * @param pool
+ */
+void rspamd_redis_pool_destroy(void *pool);
+
+/**
+ * Missing in hiredis
+ * @param type
+ * @return
+ */
+const gchar *rspamd_redis_type_to_string(int type);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SRC_LIBSERVER_REDIS_POOL_H_ */
diff --git a/src/libserver/roll_history.c b/src/libserver/roll_history.c
new file mode 100644
index 0000000..f567b0b
--- /dev/null
+++ b/src/libserver/roll_history.c
@@ -0,0 +1,432 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "config.h"
+#include "rspamd.h"
+#include "libmime/message.h"
+#include "lua/lua_common.h"
+#include "unix-std.h"
+#include "cfg_file_private.h"
+
+static const gchar rspamd_history_magic_old[] = {'r', 's', 'h', '1'};
+
+/**
+ * Returns new roll history
+ * @param pool pool for shared memory
+ * @return new structure
+ */
+struct roll_history *
+rspamd_roll_history_new(rspamd_mempool_t *pool, guint max_rows,
+ struct rspamd_config *cfg)
+{
+ struct roll_history *history;
+ lua_State *L = cfg->lua_state;
+
+ if (pool == NULL || max_rows == 0) {
+ return NULL;
+ }
+
+ history = rspamd_mempool_alloc0_shared(pool, sizeof(struct roll_history));
+
+ /*
+ * Here, we check if there is any plugin that handles history,
+ * in this case, we disable this code completely
+ */
+ lua_getglobal(L, "rspamd_plugins");
+ if (lua_istable(L, -1)) {
+ lua_pushstring(L, "history");
+ lua_gettable(L, -2);
+
+ if (lua_istable(L, -1)) {
+ history->disabled = TRUE;
+ }
+
+ lua_pop(L, 1);
+ }
+
+ lua_pop(L, 1);
+
+ if (!history->disabled) {
+ history->rows = rspamd_mempool_alloc0_shared(pool,
+ sizeof(struct roll_history_row) * max_rows);
+ history->nrows = max_rows;
+ }
+
+ return history;
+}
+
+struct history_metric_callback_data {
+ gchar *pos;
+ gint remain;
+};
+
+static void
+roll_history_symbols_callback(gpointer key, gpointer value, void *user_data)
+{
+ struct history_metric_callback_data *cb = user_data;
+ struct rspamd_symbol_result *s = value;
+ guint wr;
+
+ if (s->flags & RSPAMD_SYMBOL_RESULT_IGNORED) {
+ return;
+ }
+
+ if (cb->remain > 0) {
+ wr = rspamd_snprintf(cb->pos, cb->remain, "%s, ", s->name);
+ cb->pos += wr;
+ cb->remain -= wr;
+ }
+}
+
+/**
+ * Update roll history with data from task
+ * @param history roll history object
+ * @param task task object
+ */
+void rspamd_roll_history_update(struct roll_history *history,
+ struct rspamd_task *task)
+{
+ guint row_num;
+ struct roll_history_row *row;
+ struct rspamd_scan_result *metric_res;
+ struct history_metric_callback_data cbdata;
+ struct rspamd_action *action;
+
+ if (history->disabled) {
+ return;
+ }
+
+ /* First of all obtain check and obtain row number */
+ g_atomic_int_compare_and_exchange(&history->cur_row, history->nrows, 0);
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30))
+ row_num = g_atomic_int_add(&history->cur_row, 1);
+#else
+ row_num = g_atomic_int_exchange_and_add(&history->cur_row, 1);
+#endif
+
+ if (row_num < history->nrows) {
+ row = &history->rows[row_num];
+ g_atomic_int_set(&row->completed, FALSE);
+ }
+ else {
+ /* Race condition */
+ history->cur_row = 0;
+ return;
+ }
+
+ /* Add information from task to roll history */
+ if (task->from_addr) {
+ rspamd_strlcpy(row->from_addr,
+ rspamd_inet_address_to_string(task->from_addr),
+ sizeof(row->from_addr));
+ }
+ else {
+ rspamd_strlcpy(row->from_addr, "unknown", sizeof(row->from_addr));
+ }
+
+ row->timestamp = task->task_timestamp;
+
+ /* Strings */
+ if (task->message) {
+ rspamd_strlcpy(row->message_id, MESSAGE_FIELD(task, message_id),
+ sizeof(row->message_id));
+ }
+ if (task->auth_user) {
+ rspamd_strlcpy(row->user, task->auth_user, sizeof(row->user));
+ }
+ else {
+ row->user[0] = '\0';
+ }
+
+ /* Get default metric */
+ metric_res = task->result;
+
+ if (metric_res == NULL) {
+ row->symbols[0] = '\0';
+ row->action = METRIC_ACTION_NOACTION;
+ }
+ else {
+ row->score = metric_res->score;
+ action = rspamd_check_action_metric(task, NULL, NULL);
+ row->action = action->action_type;
+ row->required_score = rspamd_task_get_required_score(task, metric_res);
+ cbdata.pos = row->symbols;
+ cbdata.remain = sizeof(row->symbols);
+ rspamd_task_symbol_result_foreach(task, NULL,
+ roll_history_symbols_callback,
+ &cbdata);
+ if (cbdata.remain > 0) {
+ /* Remove last whitespace and comma */
+ *cbdata.pos-- = '\0';
+ *cbdata.pos-- = '\0';
+ *cbdata.pos = '\0';
+ }
+ }
+
+ row->scan_time = task->time_real_finish - task->task_timestamp;
+ row->len = task->msg.len;
+ g_atomic_int_set(&row->completed, TRUE);
+}
+
+/**
+ * Load previously saved history from file
+ * @param history roll history object
+ * @param filename filename to load from
+ * @return TRUE if history has been loaded
+ */
+gboolean
+rspamd_roll_history_load(struct roll_history *history, const gchar *filename)
+{
+ gint fd;
+ struct stat st;
+ gchar magic[sizeof(rspamd_history_magic_old)];
+ ucl_object_t *top;
+ const ucl_object_t *cur, *elt;
+ struct ucl_parser *parser;
+ struct roll_history_row *row;
+ guint n, i;
+
+ g_assert(history != NULL);
+ if (history->disabled) {
+ return TRUE;
+ }
+
+ if (stat(filename, &st) == -1) {
+ msg_info("cannot load history from %s: %s", filename,
+ strerror(errno));
+ return FALSE;
+ }
+
+ if ((fd = open(filename, O_RDONLY)) == -1) {
+ msg_info("cannot load history from %s: %s", filename,
+ strerror(errno));
+ return FALSE;
+ }
+
+ /* Check for old format */
+ if (read(fd, magic, sizeof(magic)) == -1) {
+ close(fd);
+ msg_info("cannot read history from %s: %s", filename,
+ strerror(errno));
+ return FALSE;
+ }
+
+ if (memcmp(magic, rspamd_history_magic_old, sizeof(magic)) == 0) {
+ close(fd);
+ msg_warn("cannot read history from old format %s, "
+ "it will be replaced after restart",
+ filename);
+ return FALSE;
+ }
+
+ parser = ucl_parser_new(0);
+
+ if (!ucl_parser_add_fd(parser, fd)) {
+ msg_warn("cannot parse history file %s: %s", filename,
+ ucl_parser_get_error(parser));
+ ucl_parser_free(parser);
+ close(fd);
+
+ return FALSE;
+ }
+
+ top = ucl_parser_get_object(parser);
+ ucl_parser_free(parser);
+ close(fd);
+
+ if (top == NULL) {
+ msg_warn("cannot parse history file %s: no object", filename);
+
+ return FALSE;
+ }
+
+ if (ucl_object_type(top) != UCL_ARRAY) {
+ msg_warn("invalid object type read from: %s", filename);
+ ucl_object_unref(top);
+
+ return FALSE;
+ }
+
+ if (top->len > history->nrows) {
+ msg_warn("stored history is larger than the current one: %ud (file) vs "
+ "%ud (history)",
+ top->len, history->nrows);
+ n = history->nrows;
+ }
+ else if (top->len < history->nrows) {
+ msg_warn(
+ "stored history is smaller than the current one: %ud (file) vs "
+ "%ud (history)",
+ top->len, history->nrows);
+ n = top->len;
+ }
+ else {
+ n = top->len;
+ }
+
+ for (i = 0; i < n; i++) {
+ cur = ucl_array_find_index(top, i);
+
+ if (cur != NULL && ucl_object_type(cur) == UCL_OBJECT) {
+ row = &history->rows[i];
+ memset(row, 0, sizeof(*row));
+
+ elt = ucl_object_lookup(cur, "time");
+
+ if (elt && ucl_object_type(elt) == UCL_FLOAT) {
+ row->timestamp = ucl_object_todouble(elt);
+ }
+
+ elt = ucl_object_lookup(cur, "id");
+
+ if (elt && ucl_object_type(elt) == UCL_STRING) {
+ rspamd_strlcpy(row->message_id, ucl_object_tostring(elt),
+ sizeof(row->message_id));
+ }
+
+ elt = ucl_object_lookup(cur, "symbols");
+
+ if (elt && ucl_object_type(elt) == UCL_STRING) {
+ rspamd_strlcpy(row->symbols, ucl_object_tostring(elt),
+ sizeof(row->symbols));
+ }
+
+ elt = ucl_object_lookup(cur, "user");
+
+ if (elt && ucl_object_type(elt) == UCL_STRING) {
+ rspamd_strlcpy(row->user, ucl_object_tostring(elt),
+ sizeof(row->user));
+ }
+
+ elt = ucl_object_lookup(cur, "from");
+
+ if (elt && ucl_object_type(elt) == UCL_STRING) {
+ rspamd_strlcpy(row->from_addr, ucl_object_tostring(elt),
+ sizeof(row->from_addr));
+ }
+
+ elt = ucl_object_lookup(cur, "len");
+
+ if (elt && ucl_object_type(elt) == UCL_INT) {
+ row->len = ucl_object_toint(elt);
+ }
+
+ elt = ucl_object_lookup(cur, "scan_time");
+
+ if (elt && ucl_object_type(elt) == UCL_FLOAT) {
+ row->scan_time = ucl_object_todouble(elt);
+ }
+
+ elt = ucl_object_lookup(cur, "score");
+
+ if (elt && ucl_object_type(elt) == UCL_FLOAT) {
+ row->score = ucl_object_todouble(elt);
+ }
+
+ elt = ucl_object_lookup(cur, "required_score");
+
+ if (elt && ucl_object_type(elt) == UCL_FLOAT) {
+ row->required_score = ucl_object_todouble(elt);
+ }
+
+ elt = ucl_object_lookup(cur, "action");
+
+ if (elt && ucl_object_type(elt) == UCL_INT) {
+ row->action = ucl_object_toint(elt);
+ }
+
+ row->completed = TRUE;
+ }
+ }
+
+ ucl_object_unref(top);
+
+ history->cur_row = n;
+
+ return TRUE;
+}
+
+/**
+ * Save history to file
+ * @param history roll history object
+ * @param filename filename to load from
+ * @return TRUE if history has been saved
+ */
+gboolean
+rspamd_roll_history_save(struct roll_history *history, const gchar *filename)
+{
+ gint fd;
+ FILE *fp;
+ ucl_object_t *obj, *elt;
+ guint i;
+ struct roll_history_row *row;
+ struct ucl_emitter_functions *emitter_func;
+
+ g_assert(history != NULL);
+
+ if (history->disabled) {
+ return TRUE;
+ }
+
+ if ((fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 00600)) == -1) {
+ msg_info("cannot save history to %s: %s", filename, strerror(errno));
+ return FALSE;
+ }
+
+ fp = fdopen(fd, "w");
+ obj = ucl_object_typed_new(UCL_ARRAY);
+
+ for (i = 0; i < history->nrows; i++) {
+ row = &history->rows[i];
+
+ if (!row->completed) {
+ continue;
+ }
+
+ elt = ucl_object_typed_new(UCL_OBJECT);
+
+ ucl_object_insert_key(elt, ucl_object_fromdouble(row->timestamp),
+ "time", 0, false);
+ ucl_object_insert_key(elt, ucl_object_fromstring(row->message_id),
+ "id", 0, false);
+ ucl_object_insert_key(elt, ucl_object_fromstring(row->symbols),
+ "symbols", 0, false);
+ ucl_object_insert_key(elt, ucl_object_fromstring(row->user),
+ "user", 0, false);
+ ucl_object_insert_key(elt, ucl_object_fromstring(row->from_addr),
+ "from", 0, false);
+ ucl_object_insert_key(elt, ucl_object_fromint(row->len),
+ "len", 0, false);
+ ucl_object_insert_key(elt, ucl_object_fromdouble(row->scan_time),
+ "scan_time", 0, false);
+ ucl_object_insert_key(elt, ucl_object_fromdouble(row->score),
+ "score", 0, false);
+ ucl_object_insert_key(elt, ucl_object_fromdouble(row->required_score),
+ "required_score", 0, false);
+ ucl_object_insert_key(elt, ucl_object_fromint(row->action),
+ "action", 0, false);
+
+ ucl_array_append(obj, elt);
+ }
+
+ emitter_func = ucl_object_emit_file_funcs(fp);
+ ucl_object_emit_full(obj, UCL_EMIT_JSON_COMPACT, emitter_func, NULL);
+ ucl_object_emit_funcs_free(emitter_func);
+ ucl_object_unref(obj);
+
+ fclose(fp);
+
+ return TRUE;
+}
diff --git a/src/libserver/roll_history.h b/src/libserver/roll_history.h
new file mode 100644
index 0000000..62bce7f
--- /dev/null
+++ b/src/libserver/roll_history.h
@@ -0,0 +1,98 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef ROLL_HISTORY_H_
+#define ROLL_HISTORY_H_
+
+#include "config.h"
+#include "mem_pool.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Roll history is a special cycled buffer for checked messages, it is designed for writing history messages
+ * and displaying them in webui
+ */
+
+#define HISTORY_MAX_ID 256
+#define HISTORY_MAX_SYMBOLS 256
+#define HISTORY_MAX_USER 32
+#define HISTORY_MAX_ADDR 32
+
+struct rspamd_task;
+struct rspamd_config;
+
+struct roll_history_row {
+ ev_tstamp timestamp;
+ gchar message_id[HISTORY_MAX_ID];
+ gchar symbols[HISTORY_MAX_SYMBOLS];
+ gchar user[HISTORY_MAX_USER];
+ gchar from_addr[HISTORY_MAX_ADDR];
+ gsize len;
+ gdouble scan_time;
+ gdouble score;
+ gdouble required_score;
+ gint action;
+ guint completed;
+};
+
+struct roll_history {
+ struct roll_history_row *rows;
+ gboolean disabled;
+ guint nrows;
+ guint cur_row;
+};
+
+/**
+ * Returns new roll history
+ * @param pool pool for shared memory
+ * @return new structure
+ */
+struct roll_history *rspamd_roll_history_new(rspamd_mempool_t *pool,
+ guint max_rows, struct rspamd_config *cfg);
+
+/**
+ * Update roll history with data from task
+ * @param history roll history object
+ * @param task task object
+ */
+void rspamd_roll_history_update(struct roll_history *history,
+ struct rspamd_task *task);
+
+/**
+ * Load previously saved history from file
+ * @param history roll history object
+ * @param filename filename to load from
+ * @return TRUE if history has been loaded
+ */
+gboolean rspamd_roll_history_load(struct roll_history *history,
+ const gchar *filename);
+
+/**
+ * Save history to file
+ * @param history roll history object
+ * @param filename filename to load from
+ * @return TRUE if history has been saved
+ */
+gboolean rspamd_roll_history_save(struct roll_history *history,
+ const gchar *filename);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* ROLL_HISTORY_H_ */
diff --git a/src/libserver/rspamd_control.c b/src/libserver/rspamd_control.c
new file mode 100644
index 0000000..69af059
--- /dev/null
+++ b/src/libserver/rspamd_control.c
@@ -0,0 +1,1334 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "config.h"
+#include "rspamd.h"
+#include "rspamd_control.h"
+#include "worker_util.h"
+#include "libserver/http/http_connection.h"
+#include "libserver/http/http_private.h"
+#include "libutil/libev_helper.h"
+#include "unix-std.h"
+#include "utlist.h"
+
+#ifdef HAVE_SYS_RESOURCE_H
+#include <sys/resource.h>
+#endif
+
+#ifdef WITH_HYPERSCAN
+#include "hyperscan_tools.h"
+#endif
+
+static ev_tstamp io_timeout = 30.0;
+static ev_tstamp worker_io_timeout = 0.5;
+
+struct rspamd_control_session;
+
+struct rspamd_control_reply_elt {
+ struct rspamd_control_reply reply;
+ struct rspamd_io_ev ev;
+ struct ev_loop *event_loop;
+ GQuark wrk_type;
+ pid_t wrk_pid;
+ gpointer ud;
+ gint attached_fd;
+ GHashTable *pending_elts;
+ struct rspamd_control_reply_elt *prev, *next;
+};
+
+struct rspamd_control_session {
+ gint fd;
+ struct ev_loop *event_loop;
+ struct rspamd_main *rspamd_main;
+ struct rspamd_http_connection *conn;
+ struct rspamd_control_command cmd;
+ struct rspamd_control_reply_elt *replies;
+ rspamd_inet_addr_t *addr;
+ guint replies_remain;
+ gboolean is_reply;
+};
+
+static const struct rspamd_control_cmd_match {
+ rspamd_ftok_t name;
+ enum rspamd_control_type type;
+} cmd_matches[] = {
+ {.name = {
+ .begin = "/stat",
+ .len = sizeof("/stat") - 1},
+ .type = RSPAMD_CONTROL_STAT},
+ {.name = {.begin = "/reload", .len = sizeof("/reload") - 1}, .type = RSPAMD_CONTROL_RELOAD},
+ {.name = {.begin = "/reresolve", .len = sizeof("/reresolve") - 1}, .type = RSPAMD_CONTROL_RERESOLVE},
+ {.name = {.begin = "/recompile", .len = sizeof("/recompile") - 1}, .type = RSPAMD_CONTROL_RECOMPILE},
+ {.name = {.begin = "/fuzzystat", .len = sizeof("/fuzzystat") - 1}, .type = RSPAMD_CONTROL_FUZZY_STAT},
+ {.name = {.begin = "/fuzzysync", .len = sizeof("/fuzzysync") - 1}, .type = RSPAMD_CONTROL_FUZZY_SYNC},
+};
+
+static void rspamd_control_ignore_io_handler(int fd, short what, void *ud);
+
+static void
+rspamd_control_stop_pending(struct rspamd_control_reply_elt *elt)
+{
+ GHashTable *htb;
+ /* It stops event and frees hash */
+ htb = elt->pending_elts;
+ g_hash_table_remove(elt->pending_elts, elt);
+ /* Release hash reference */
+ g_hash_table_unref(htb);
+}
+
+void rspamd_control_send_error(struct rspamd_control_session *session,
+ gint code, const gchar *error_msg, ...)
+{
+ struct rspamd_http_message *msg;
+ rspamd_fstring_t *reply;
+ va_list args;
+
+ msg = rspamd_http_new_message(HTTP_RESPONSE);
+
+ va_start(args, error_msg);
+ msg->status = rspamd_fstring_new();
+ rspamd_vprintf_fstring(&msg->status, error_msg, args);
+ va_end(args);
+
+ msg->date = time(NULL);
+ msg->code = code;
+ reply = rspamd_fstring_sized_new(msg->status->len + 16);
+ rspamd_printf_fstring(&reply, "{\"error\":\"%V\"}", msg->status);
+ rspamd_http_message_set_body_from_fstring_steal(msg, reply);
+ rspamd_http_connection_reset(session->conn);
+ rspamd_http_connection_write_message(session->conn,
+ msg,
+ NULL,
+ "application/json",
+ session,
+ io_timeout);
+}
+
+static void
+rspamd_control_send_ucl(struct rspamd_control_session *session,
+ ucl_object_t *obj)
+{
+ struct rspamd_http_message *msg;
+ rspamd_fstring_t *reply;
+
+ msg = rspamd_http_new_message(HTTP_RESPONSE);
+ msg->date = time(NULL);
+ msg->code = 200;
+ msg->status = rspamd_fstring_new_init("OK", 2);
+ reply = rspamd_fstring_sized_new(BUFSIZ);
+ rspamd_ucl_emit_fstring(obj, UCL_EMIT_JSON_COMPACT, &reply);
+ rspamd_http_message_set_body_from_fstring_steal(msg, reply);
+ rspamd_http_connection_reset(session->conn);
+ rspamd_http_connection_write_message(session->conn,
+ msg,
+ NULL,
+ "application/json",
+ session,
+ io_timeout);
+}
+
+static void
+rspamd_control_connection_close(struct rspamd_control_session *session)
+{
+ struct rspamd_control_reply_elt *elt, *telt;
+ struct rspamd_main *rspamd_main;
+
+ rspamd_main = session->rspamd_main;
+ msg_info_main("finished connection from %s",
+ rspamd_inet_address_to_string(session->addr));
+
+ DL_FOREACH_SAFE(session->replies, elt, telt)
+ {
+ rspamd_control_stop_pending(elt);
+ }
+
+ rspamd_inet_address_free(session->addr);
+ rspamd_http_connection_unref(session->conn);
+ close(session->fd);
+ g_free(session);
+}
+
+static void
+rspamd_control_write_reply(struct rspamd_control_session *session)
+{
+ ucl_object_t *rep, *cur, *workers;
+ struct rspamd_control_reply_elt *elt;
+ gchar tmpbuf[64];
+ gdouble total_utime = 0, total_systime = 0;
+ struct ucl_parser *parser;
+ guint total_conns = 0;
+
+ rep = ucl_object_typed_new(UCL_OBJECT);
+ workers = ucl_object_typed_new(UCL_OBJECT);
+
+ DL_FOREACH(session->replies, elt)
+ {
+ /* Skip incompatible worker for fuzzy_stat */
+ if ((session->cmd.type == RSPAMD_CONTROL_FUZZY_STAT ||
+ session->cmd.type == RSPAMD_CONTROL_FUZZY_SYNC) &&
+ elt->wrk_type != g_quark_from_static_string("fuzzy")) {
+ continue;
+ }
+
+ rspamd_snprintf(tmpbuf, sizeof(tmpbuf), "%P", elt->wrk_pid);
+ cur = ucl_object_typed_new(UCL_OBJECT);
+
+ ucl_object_insert_key(cur, ucl_object_fromstring(g_quark_to_string(elt->wrk_type)), "type", 0, false);
+
+ switch (session->cmd.type) {
+ case RSPAMD_CONTROL_STAT:
+ ucl_object_insert_key(cur, ucl_object_fromint(elt->reply.reply.stat.conns), "conns", 0, false);
+ ucl_object_insert_key(cur, ucl_object_fromdouble(elt->reply.reply.stat.utime), "utime", 0, false);
+ ucl_object_insert_key(cur, ucl_object_fromdouble(elt->reply.reply.stat.systime), "systime", 0, false);
+ ucl_object_insert_key(cur, ucl_object_fromdouble(elt->reply.reply.stat.uptime), "uptime", 0, false);
+ ucl_object_insert_key(cur, ucl_object_fromint(elt->reply.reply.stat.maxrss), "maxrss", 0, false);
+
+ total_utime += elt->reply.reply.stat.utime;
+ total_systime += elt->reply.reply.stat.systime;
+ total_conns += elt->reply.reply.stat.conns;
+
+ break;
+
+ case RSPAMD_CONTROL_RELOAD:
+ ucl_object_insert_key(cur, ucl_object_fromint(elt->reply.reply.reload.status), "status", 0, false);
+ break;
+ case RSPAMD_CONTROL_RECOMPILE:
+ ucl_object_insert_key(cur, ucl_object_fromint(elt->reply.reply.recompile.status), "status", 0, false);
+ break;
+ case RSPAMD_CONTROL_RERESOLVE:
+ ucl_object_insert_key(cur, ucl_object_fromint(elt->reply.reply.reresolve.status), "status", 0, false);
+ break;
+ case RSPAMD_CONTROL_FUZZY_STAT:
+ if (elt->attached_fd != -1) {
+ /* We have some data to parse */
+ parser = ucl_parser_new(0);
+ ucl_object_insert_key(cur,
+ ucl_object_fromint(
+ elt->reply.reply.fuzzy_stat.status),
+ "status",
+ 0,
+ false);
+
+ if (ucl_parser_add_fd(parser, elt->attached_fd)) {
+ ucl_object_insert_key(cur, ucl_parser_get_object(parser),
+ "data", 0, false);
+ ucl_parser_free(parser);
+ }
+ else {
+
+ ucl_object_insert_key(cur, ucl_object_fromstring(ucl_parser_get_error(parser)), "error", 0, false);
+
+ ucl_parser_free(parser);
+ }
+
+ ucl_object_insert_key(cur,
+ ucl_object_fromlstring(
+ elt->reply.reply.fuzzy_stat.storage_id,
+ MEMPOOL_UID_LEN - 1),
+ "id",
+ 0,
+ false);
+ }
+ else {
+ ucl_object_insert_key(cur,
+ ucl_object_fromstring("missing file"),
+ "error",
+ 0,
+ false);
+ ucl_object_insert_key(cur,
+ ucl_object_fromint(
+ elt->reply.reply.fuzzy_stat.status),
+ "status",
+ 0,
+ false);
+ }
+ break;
+ case RSPAMD_CONTROL_FUZZY_SYNC:
+ ucl_object_insert_key(cur, ucl_object_fromint(elt->reply.reply.fuzzy_sync.status), "status", 0, false);
+ break;
+ default:
+ break;
+ }
+
+ if (elt->attached_fd != -1) {
+ close(elt->attached_fd);
+ elt->attached_fd = -1;
+ }
+
+ ucl_object_insert_key(workers, cur, tmpbuf, 0, true);
+ }
+
+ ucl_object_insert_key(rep, workers, "workers", 0, false);
+
+ if (session->cmd.type == RSPAMD_CONTROL_STAT) {
+ /* Total stats */
+ cur = ucl_object_typed_new(UCL_OBJECT);
+ ucl_object_insert_key(cur, ucl_object_fromint(total_conns), "conns", 0, false);
+ ucl_object_insert_key(cur, ucl_object_fromdouble(total_utime), "utime", 0, false);
+ ucl_object_insert_key(cur, ucl_object_fromdouble(total_systime), "systime", 0, false);
+
+ ucl_object_insert_key(rep, cur, "total", 0, false);
+ }
+
+ rspamd_control_send_ucl(session, rep);
+ ucl_object_unref(rep);
+}
+
+static void
+rspamd_control_wrk_io(gint fd, short what, gpointer ud)
+{
+ struct rspamd_control_reply_elt *elt = ud;
+ struct rspamd_control_session *session;
+ guchar fdspace[CMSG_SPACE(sizeof(int))];
+ struct iovec iov;
+ struct msghdr msg;
+ gssize r;
+
+ session = elt->ud;
+ elt->attached_fd = -1;
+
+ if (what == EV_READ) {
+ iov.iov_base = &elt->reply;
+ iov.iov_len = sizeof(elt->reply);
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_control = fdspace;
+ msg.msg_controllen = sizeof(fdspace);
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ r = recvmsg(fd, &msg, 0);
+ if (r == -1) {
+ msg_err("cannot read reply from the worker %P (%s): %s",
+ elt->wrk_pid, g_quark_to_string(elt->wrk_type),
+ strerror(errno));
+ }
+ else if (r >= (gssize) sizeof(elt->reply)) {
+ if (msg.msg_controllen >= CMSG_LEN(sizeof(int))) {
+ elt->attached_fd = *(int *) CMSG_DATA(CMSG_FIRSTHDR(&msg));
+ }
+ }
+ }
+ else {
+ /* Timeout waiting */
+ msg_warn("timeout waiting reply from %P (%s)",
+ elt->wrk_pid, g_quark_to_string(elt->wrk_type));
+ }
+
+ session->replies_remain--;
+ rspamd_ev_watcher_stop(session->event_loop,
+ &elt->ev);
+
+ if (session->replies_remain == 0) {
+ rspamd_control_write_reply(session);
+ }
+}
+
+static void
+rspamd_control_error_handler(struct rspamd_http_connection *conn, GError *err)
+{
+ struct rspamd_control_session *session = conn->ud;
+ struct rspamd_main *rspamd_main;
+
+ rspamd_main = session->rspamd_main;
+
+ if (!session->is_reply) {
+ msg_info_main("abnormally closing control connection: %e", err);
+ session->is_reply = TRUE;
+ rspamd_control_send_error(session, err->code, "%s", err->message);
+ }
+ else {
+ rspamd_control_connection_close(session);
+ }
+}
+
+void rspamd_pending_control_free(gpointer p)
+{
+ struct rspamd_control_reply_elt *rep_elt = (struct rspamd_control_reply_elt *) p;
+
+ rspamd_ev_watcher_stop(rep_elt->event_loop, &rep_elt->ev);
+ g_free(rep_elt);
+}
+
+static struct rspamd_control_reply_elt *
+rspamd_control_broadcast_cmd(struct rspamd_main *rspamd_main,
+ struct rspamd_control_command *cmd,
+ gint attached_fd,
+ rspamd_ev_cb handler,
+ gpointer ud,
+ pid_t except_pid)
+{
+ GHashTableIter it;
+ struct rspamd_worker *wrk;
+ struct rspamd_control_reply_elt *rep_elt, *res = NULL;
+ gpointer k, v;
+ struct msghdr msg;
+ struct cmsghdr *cmsg;
+ struct iovec iov;
+ guchar fdspace[CMSG_SPACE(sizeof(int))];
+ gssize r;
+
+ g_hash_table_iter_init(&it, rspamd_main->workers);
+
+ while (g_hash_table_iter_next(&it, &k, &v)) {
+ wrk = v;
+
+ /* No control pipe */
+ if (wrk->control_pipe[0] == -1) {
+ continue;
+ }
+
+ if (except_pid != 0 && wrk->pid == except_pid) {
+ continue;
+ }
+
+ /* Worker is terminating, do not bother sending stuff */
+ if (wrk->state == rspamd_worker_state_terminating) {
+ continue;
+ }
+
+ memset(&msg, 0, sizeof(msg));
+
+ /* Attach fd to the message */
+ if (attached_fd != -1) {
+ memset(fdspace, 0, sizeof(fdspace));
+ msg.msg_control = fdspace;
+ msg.msg_controllen = sizeof(fdspace);
+ cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+ memcpy(CMSG_DATA(cmsg), &attached_fd, sizeof(int));
+ }
+
+ iov.iov_base = cmd;
+ iov.iov_len = sizeof(*cmd);
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ r = sendmsg(wrk->control_pipe[0], &msg, 0);
+
+ if (r == sizeof(*cmd)) {
+ rep_elt = g_malloc0(sizeof(*rep_elt));
+ rep_elt->wrk_pid = wrk->pid;
+ rep_elt->wrk_type = wrk->type;
+ rep_elt->event_loop = rspamd_main->event_loop;
+ rep_elt->ud = ud;
+ rep_elt->pending_elts = g_hash_table_ref(wrk->control_events_pending);
+ rspamd_ev_watcher_init(&rep_elt->ev,
+ wrk->control_pipe[0],
+ EV_READ, handler,
+ rep_elt);
+ rspamd_ev_watcher_start(rspamd_main->event_loop,
+ &rep_elt->ev, worker_io_timeout);
+ g_hash_table_insert(wrk->control_events_pending, rep_elt, rep_elt);
+
+ DL_APPEND(res, rep_elt);
+ }
+ else {
+ msg_err_main("cannot write command %d(%z) to the worker %P(%s), fd: %d: %s",
+ (int) cmd->type, iov.iov_len,
+ wrk->pid,
+ g_quark_to_string(wrk->type),
+ wrk->control_pipe[0],
+ strerror(errno));
+ }
+ }
+
+ return res;
+}
+
+void rspamd_control_broadcast_srv_cmd(struct rspamd_main *rspamd_main,
+ struct rspamd_control_command *cmd,
+ pid_t except_pid)
+{
+ rspamd_control_broadcast_cmd(rspamd_main, cmd, -1,
+ rspamd_control_ignore_io_handler, NULL, except_pid);
+}
+
+static gint
+rspamd_control_finish_handler(struct rspamd_http_connection *conn,
+ struct rspamd_http_message *msg)
+{
+ struct rspamd_control_session *session = conn->ud;
+ rspamd_ftok_t srch;
+ guint i;
+ gboolean found = FALSE;
+ struct rspamd_control_reply_elt *cur;
+
+
+ if (!session->is_reply) {
+ if (msg->url == NULL) {
+ rspamd_control_connection_close(session);
+
+ return 0;
+ }
+
+ srch.begin = msg->url->str;
+ srch.len = msg->url->len;
+
+ session->is_reply = TRUE;
+
+ for (i = 0; i < G_N_ELEMENTS(cmd_matches); i++) {
+ if (rspamd_ftok_casecmp(&srch, &cmd_matches[i].name) == 0) {
+ session->cmd.type = cmd_matches[i].type;
+ found = TRUE;
+ break;
+ }
+ }
+
+ if (!found) {
+ rspamd_control_send_error(session, 404, "Command not defined");
+ }
+ else {
+ /* Send command to all workers */
+ session->replies = rspamd_control_broadcast_cmd(
+ session->rspamd_main, &session->cmd, -1,
+ rspamd_control_wrk_io, session, 0);
+
+ DL_FOREACH(session->replies, cur)
+ {
+ session->replies_remain++;
+ }
+ }
+ }
+ else {
+ rspamd_control_connection_close(session);
+ }
+
+
+ return 0;
+}
+
+void rspamd_control_process_client_socket(struct rspamd_main *rspamd_main,
+ gint fd, rspamd_inet_addr_t *addr)
+{
+ struct rspamd_control_session *session;
+
+ session = g_malloc0(sizeof(*session));
+
+ session->fd = fd;
+ session->conn = rspamd_http_connection_new_server(rspamd_main->http_ctx,
+ fd,
+ NULL,
+ rspamd_control_error_handler,
+ rspamd_control_finish_handler,
+ 0);
+ session->rspamd_main = rspamd_main;
+ session->addr = addr;
+ session->event_loop = rspamd_main->event_loop;
+ rspamd_http_connection_read_message(session->conn, session,
+ io_timeout);
+}
+
+struct rspamd_worker_control_data {
+ ev_io io_ev;
+ struct rspamd_worker *worker;
+ struct ev_loop *ev_base;
+ struct {
+ rspamd_worker_control_handler handler;
+ gpointer ud;
+ } handlers[RSPAMD_CONTROL_MAX];
+};
+
+static void
+rspamd_control_default_cmd_handler(gint fd,
+ gint attached_fd,
+ struct rspamd_worker_control_data *cd,
+ struct rspamd_control_command *cmd)
+{
+ struct rspamd_control_reply rep;
+ gssize r;
+ struct rusage rusg;
+ struct rspamd_config *cfg;
+ struct rspamd_main *rspamd_main;
+
+ memset(&rep, 0, sizeof(rep));
+ rep.type = cmd->type;
+ rspamd_main = cd->worker->srv;
+
+ switch (cmd->type) {
+ case RSPAMD_CONTROL_STAT:
+ if (getrusage(RUSAGE_SELF, &rusg) == -1) {
+ msg_err_main("cannot get rusage stats: %s",
+ strerror(errno));
+ }
+ else {
+ rep.reply.stat.utime = tv_to_double(&rusg.ru_utime);
+ rep.reply.stat.systime = tv_to_double(&rusg.ru_stime);
+ rep.reply.stat.maxrss = rusg.ru_maxrss;
+ }
+
+ rep.reply.stat.conns = cd->worker->nconns;
+ rep.reply.stat.uptime = rspamd_get_calendar_ticks() - cd->worker->start_time;
+ break;
+ case RSPAMD_CONTROL_RELOAD:
+ case RSPAMD_CONTROL_RECOMPILE:
+ case RSPAMD_CONTROL_HYPERSCAN_LOADED:
+ case RSPAMD_CONTROL_MONITORED_CHANGE:
+ case RSPAMD_CONTROL_FUZZY_STAT:
+ case RSPAMD_CONTROL_FUZZY_SYNC:
+ case RSPAMD_CONTROL_LOG_PIPE:
+ case RSPAMD_CONTROL_CHILD_CHANGE:
+ case RSPAMD_CONTROL_FUZZY_BLOCKED:
+ break;
+ case RSPAMD_CONTROL_RERESOLVE:
+ if (cd->worker->srv->cfg) {
+ REF_RETAIN(cd->worker->srv->cfg);
+ cfg = cd->worker->srv->cfg;
+
+ if (cfg->ups_ctx) {
+ msg_info_config("reresolving upstreams");
+ rspamd_upstream_reresolve(cfg->ups_ctx);
+ }
+
+ rep.reply.reresolve.status = 0;
+ REF_RELEASE(cfg);
+ }
+ else {
+ rep.reply.reresolve.status = EINVAL;
+ }
+ break;
+ default:
+ break;
+ }
+
+ r = write(fd, &rep, sizeof(rep));
+
+ if (r != sizeof(rep)) {
+ msg_err_main("cannot write reply to the control socket: %s",
+ strerror(errno));
+ }
+
+ if (attached_fd != -1) {
+ close(attached_fd);
+ }
+}
+
+static void
+rspamd_control_default_worker_handler(EV_P_ ev_io *w, int revents)
+{
+ struct rspamd_worker_control_data *cd =
+ (struct rspamd_worker_control_data *) w->data;
+ static struct rspamd_control_command cmd;
+ static struct msghdr msg;
+ static struct iovec iov;
+ static guchar fdspace[CMSG_SPACE(sizeof(int))];
+ gint rfd = -1;
+ gssize r;
+
+ iov.iov_base = &cmd;
+ iov.iov_len = sizeof(cmd);
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_control = fdspace;
+ msg.msg_controllen = sizeof(fdspace);
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ r = recvmsg(w->fd, &msg, 0);
+
+ if (r == -1) {
+ if (errno != EAGAIN && errno != EINTR) {
+ if (errno != ECONNRESET) {
+ /*
+ * In case of connection reset it means that main process
+ * has died, so do not pollute logs
+ */
+ msg_err("cannot read request from the control socket: %s",
+ strerror(errno));
+ }
+ ev_io_stop(cd->ev_base, &cd->io_ev);
+ close(w->fd);
+ }
+ }
+ else if (r < (gint) sizeof(cmd)) {
+ msg_err("short read of control command: %d of %d", (gint) r,
+ (gint) sizeof(cmd));
+
+ if (r == 0) {
+ ev_io_stop(cd->ev_base, &cd->io_ev);
+ close(w->fd);
+ }
+ }
+ else if ((gint) cmd.type >= 0 && cmd.type < RSPAMD_CONTROL_MAX) {
+
+ if (msg.msg_controllen >= CMSG_LEN(sizeof(int))) {
+ rfd = *(int *) CMSG_DATA(CMSG_FIRSTHDR(&msg));
+ }
+
+ if (cd->handlers[cmd.type].handler) {
+ cd->handlers[cmd.type].handler(cd->worker->srv,
+ cd->worker,
+ w->fd,
+ rfd,
+ &cmd,
+ cd->handlers[cmd.type].ud);
+ }
+ else {
+ rspamd_control_default_cmd_handler(w->fd, rfd, cd, &cmd);
+ }
+ }
+ else {
+ msg_err("unknown command: %d", (gint) cmd.type);
+ }
+}
+
+void rspamd_control_worker_add_default_cmd_handlers(struct rspamd_worker *worker,
+ struct ev_loop *ev_base)
+{
+ struct rspamd_worker_control_data *cd;
+
+ cd = g_malloc0(sizeof(*cd));
+ cd->worker = worker;
+ cd->ev_base = ev_base;
+
+ cd->io_ev.data = cd;
+ ev_io_init(&cd->io_ev, rspamd_control_default_worker_handler,
+ worker->control_pipe[1], EV_READ);
+ ev_io_start(ev_base, &cd->io_ev);
+
+ worker->control_data = cd;
+}
+
+/**
+ * Register custom handler for a specific control command for this worker
+ */
+void rspamd_control_worker_add_cmd_handler(struct rspamd_worker *worker,
+ enum rspamd_control_type type,
+ rspamd_worker_control_handler handler,
+ gpointer ud)
+{
+ struct rspamd_worker_control_data *cd;
+
+ g_assert(type >= 0 && type < RSPAMD_CONTROL_MAX);
+ g_assert(handler != NULL);
+ g_assert(worker->control_data != NULL);
+
+ cd = worker->control_data;
+ cd->handlers[type].handler = handler;
+ cd->handlers[type].ud = ud;
+}
+
+struct rspamd_srv_reply_data {
+ struct rspamd_worker *worker;
+ struct rspamd_main *srv;
+ gint fd;
+ struct rspamd_srv_reply rep;
+};
+
+static void
+rspamd_control_ignore_io_handler(int fd, short what, void *ud)
+{
+ struct rspamd_control_reply_elt *elt =
+ (struct rspamd_control_reply_elt *) ud;
+
+ struct rspamd_control_reply rep;
+
+ /* At this point we just ignore replies from the workers */
+ if (read(fd, &rep, sizeof(rep)) == -1) {
+ msg_debug("cannot read %d bytes: %s", (int) sizeof(rep), strerror(errno));
+ }
+ rspamd_control_stop_pending(elt);
+}
+
+static void
+rspamd_control_log_pipe_io_handler(int fd, short what, void *ud)
+{
+ struct rspamd_control_reply_elt *elt =
+ (struct rspamd_control_reply_elt *) ud;
+ struct rspamd_control_reply rep;
+
+ /* At this point we just ignore replies from the workers */
+ (void) !read(fd, &rep, sizeof(rep));
+ rspamd_control_stop_pending(elt);
+}
+
+static void
+rspamd_control_handle_on_fork(struct rspamd_srv_command *cmd,
+ struct rspamd_main *srv)
+{
+ struct rspamd_worker *parent, *child;
+
+ parent = g_hash_table_lookup(srv->workers,
+ GSIZE_TO_POINTER(cmd->cmd.on_fork.ppid));
+
+ if (parent == NULL) {
+ msg_err("cannot find parent for a forked process %P (%P child)",
+ cmd->cmd.on_fork.ppid, cmd->cmd.on_fork.cpid);
+
+ return;
+ }
+
+ if (cmd->cmd.on_fork.state == child_dead) {
+ /* We need to remove stale worker */
+ child = g_hash_table_lookup(srv->workers,
+ GSIZE_TO_POINTER(cmd->cmd.on_fork.cpid));
+
+ if (child == NULL) {
+ msg_err("cannot find child for a forked process %P (%P parent)",
+ cmd->cmd.on_fork.cpid, cmd->cmd.on_fork.ppid);
+
+ return;
+ }
+
+ REF_RELEASE(child->cf);
+ g_hash_table_remove(srv->workers,
+ GSIZE_TO_POINTER(cmd->cmd.on_fork.cpid));
+ g_hash_table_unref(child->control_events_pending);
+ g_free(child);
+ }
+ else {
+ child = g_malloc0(sizeof(struct rspamd_worker));
+ child->srv = srv;
+ child->type = parent->type;
+ child->pid = cmd->cmd.on_fork.cpid;
+ child->srv_pipe[0] = -1;
+ child->srv_pipe[1] = -1;
+ child->control_pipe[0] = -1;
+ child->control_pipe[1] = -1;
+ child->cf = parent->cf;
+ child->ppid = parent->pid;
+ REF_RETAIN(child->cf);
+ child->control_events_pending = g_hash_table_new_full(g_direct_hash, g_direct_equal,
+ NULL, rspamd_pending_control_free);
+ g_hash_table_insert(srv->workers,
+ GSIZE_TO_POINTER(cmd->cmd.on_fork.cpid), child);
+ }
+}
+
+static void
+rspamd_fill_health_reply(struct rspamd_main *srv, struct rspamd_srv_reply *rep)
+{
+ GHashTableIter it;
+ gpointer k, v;
+
+ memset(&rep->reply.health, 0, sizeof(rep->reply));
+ g_hash_table_iter_init(&it, srv->workers);
+
+ while (g_hash_table_iter_next(&it, &k, &v)) {
+ struct rspamd_worker *wrk = (struct rspamd_worker *) v;
+
+ if (wrk->hb.nbeats < 0) {
+ rep->reply.health.workers_hb_lost++;
+ }
+ else if (rspamd_worker_is_scanner(wrk)) {
+ rep->reply.health.scanners_count++;
+ }
+
+ rep->reply.health.workers_count++;
+ }
+
+ rep->reply.status = (g_hash_table_size(srv->workers) > 0);
+}
+
+
+static void
+rspamd_srv_handler(EV_P_ ev_io *w, int revents)
+{
+ struct rspamd_worker *worker;
+ static struct rspamd_srv_command cmd;
+ struct rspamd_main *rspamd_main;
+ struct rspamd_srv_reply_data *rdata;
+ struct msghdr msg;
+ struct cmsghdr *cmsg;
+ static struct iovec iov;
+ static guchar fdspace[CMSG_SPACE(sizeof(int))];
+ gint *spair, rfd = -1;
+ gchar *nid;
+ struct rspamd_control_command wcmd;
+ gssize r;
+
+ if (revents == EV_READ) {
+ worker = (struct rspamd_worker *) w->data;
+ rspamd_main = worker->srv;
+ iov.iov_base = &cmd;
+ iov.iov_len = sizeof(cmd);
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_control = fdspace;
+ msg.msg_controllen = sizeof(fdspace);
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ r = recvmsg(w->fd, &msg, 0);
+
+ if (r == -1) {
+ if (errno != EAGAIN) {
+ msg_err_main("cannot read from worker's srv pipe: %s",
+ strerror(errno));
+ }
+ else {
+ return;
+ }
+ }
+ else if (r == 0) {
+ /*
+ * Usually this means that a worker is dead, so do not try to read
+ * anything
+ */
+ msg_err_main("cannot read from worker's srv pipe connection closed; command = %s",
+ rspamd_srv_command_to_string(cmd.type));
+ ev_io_stop(EV_A_ w);
+ }
+ else if (r != sizeof(cmd)) {
+ msg_err_main("cannot read from worker's srv pipe incomplete command: %d != %d; command = %s",
+ (gint) r, (gint) sizeof(cmd), rspamd_srv_command_to_string(cmd.type));
+ }
+ else {
+ rdata = g_malloc0(sizeof(*rdata));
+ rdata->worker = worker;
+ rdata->srv = rspamd_main;
+ rdata->rep.id = cmd.id;
+ rdata->rep.type = cmd.type;
+ rdata->fd = -1;
+ worker->tmp_data = rdata;
+
+ if (msg.msg_controllen >= CMSG_LEN(sizeof(int))) {
+ rfd = *(int *) CMSG_DATA(CMSG_FIRSTHDR(&msg));
+ }
+
+ switch (cmd.type) {
+ case RSPAMD_SRV_SOCKETPAIR:
+ spair = g_hash_table_lookup(rspamd_main->spairs, cmd.cmd.spair.pair_id);
+ if (spair == NULL) {
+ spair = g_malloc(sizeof(gint) * 2);
+
+ if (rspamd_socketpair(spair, cmd.cmd.spair.af) == -1) {
+ rdata->rep.reply.spair.code = errno;
+ msg_err_main("cannot create socket pair: %s", strerror(errno));
+ }
+ else {
+ nid = g_malloc(sizeof(cmd.cmd.spair.pair_id));
+ memcpy(nid, cmd.cmd.spair.pair_id,
+ sizeof(cmd.cmd.spair.pair_id));
+ g_hash_table_insert(rspamd_main->spairs, nid, spair);
+ rdata->rep.reply.spair.code = 0;
+ rdata->fd = cmd.cmd.spair.pair_num ? spair[1] : spair[0];
+ }
+ }
+ else {
+ rdata->rep.reply.spair.code = 0;
+ rdata->fd = cmd.cmd.spair.pair_num ? spair[1] : spair[0];
+ }
+ break;
+ case RSPAMD_SRV_HYPERSCAN_LOADED:
+#ifdef WITH_HYPERSCAN
+ /* Load RE cache to provide it for new forks */
+ if (rspamd_re_cache_is_hs_loaded(rspamd_main->cfg->re_cache) != RSPAMD_HYPERSCAN_LOADED_FULL ||
+ cmd.cmd.hs_loaded.forced) {
+ rspamd_re_cache_load_hyperscan(
+ rspamd_main->cfg->re_cache,
+ cmd.cmd.hs_loaded.cache_dir,
+ false);
+ }
+
+ /* After getting this notice, we can clean up old hyperscan files */
+
+ rspamd_hyperscan_notice_loaded();
+
+ msg_info_main("received hyperscan cache loaded from %s",
+ cmd.cmd.hs_loaded.cache_dir);
+
+ /* Broadcast command to all workers */
+ memset(&wcmd, 0, sizeof(wcmd));
+ wcmd.type = RSPAMD_CONTROL_HYPERSCAN_LOADED;
+ rspamd_strlcpy(wcmd.cmd.hs_loaded.cache_dir,
+ cmd.cmd.hs_loaded.cache_dir,
+ sizeof(wcmd.cmd.hs_loaded.cache_dir));
+ wcmd.cmd.hs_loaded.forced = cmd.cmd.hs_loaded.forced;
+ rspamd_control_broadcast_cmd(rspamd_main, &wcmd, rfd,
+ rspamd_control_ignore_io_handler, NULL, worker->pid);
+#endif
+ break;
+ case RSPAMD_SRV_MONITORED_CHANGE:
+ /* Broadcast command to all workers */
+ memset(&wcmd, 0, sizeof(wcmd));
+ wcmd.type = RSPAMD_CONTROL_MONITORED_CHANGE;
+ rspamd_strlcpy(wcmd.cmd.monitored_change.tag,
+ cmd.cmd.monitored_change.tag,
+ sizeof(wcmd.cmd.monitored_change.tag));
+ wcmd.cmd.monitored_change.alive = cmd.cmd.monitored_change.alive;
+ wcmd.cmd.monitored_change.sender = cmd.cmd.monitored_change.sender;
+ rspamd_control_broadcast_cmd(rspamd_main, &wcmd, rfd,
+ rspamd_control_ignore_io_handler, NULL, 0);
+ break;
+ case RSPAMD_SRV_LOG_PIPE:
+ memset(&wcmd, 0, sizeof(wcmd));
+ wcmd.type = RSPAMD_CONTROL_LOG_PIPE;
+ wcmd.cmd.log_pipe.type = cmd.cmd.log_pipe.type;
+ rspamd_control_broadcast_cmd(rspamd_main, &wcmd, rfd,
+ rspamd_control_log_pipe_io_handler, NULL, 0);
+ break;
+ case RSPAMD_SRV_ON_FORK:
+ rdata->rep.reply.on_fork.status = 0;
+ rspamd_control_handle_on_fork(&cmd, rspamd_main);
+ break;
+ case RSPAMD_SRV_HEARTBEAT:
+ worker->hb.last_event = ev_time();
+ rdata->rep.reply.heartbeat.status = 0;
+ break;
+ case RSPAMD_SRV_HEALTH:
+ rspamd_fill_health_reply(rspamd_main, &rdata->rep);
+ break;
+ case RSPAMD_SRV_NOTICE_HYPERSCAN_CACHE:
+#ifdef WITH_HYPERSCAN
+ rspamd_hyperscan_notice_known(cmd.cmd.hyperscan_cache_file.path);
+#endif
+ rdata->rep.reply.hyperscan_cache_file.unused = 0;
+ break;
+ case RSPAMD_SRV_FUZZY_BLOCKED:
+ /* Broadcast command to all workers */
+ memset(&wcmd, 0, sizeof(wcmd));
+ wcmd.type = RSPAMD_CONTROL_FUZZY_BLOCKED;
+ /* Ensure that memcpy is safe */
+ G_STATIC_ASSERT(sizeof(wcmd.cmd.fuzzy_blocked) == sizeof(cmd.cmd.fuzzy_blocked));
+ memcpy(&wcmd.cmd.fuzzy_blocked, &cmd.cmd.fuzzy_blocked, sizeof(wcmd.cmd.fuzzy_blocked));
+ rspamd_control_broadcast_cmd(rspamd_main, &wcmd, rfd,
+ rspamd_control_ignore_io_handler, NULL, worker->pid);
+ break;
+ default:
+ msg_err_main("unknown command type: %d", cmd.type);
+ break;
+ }
+
+ if (rfd != -1) {
+ /* Close our copy to avoid descriptors leak */
+ close(rfd);
+ }
+
+ /* Now plan write event and send data back */
+ w->data = rdata;
+ ev_io_stop(EV_A_ w);
+ ev_io_set(w, worker->srv_pipe[0], EV_WRITE);
+ ev_io_start(EV_A_ w);
+ }
+ }
+ else if (revents == EV_WRITE) {
+ rdata = (struct rspamd_srv_reply_data *) w->data;
+ worker = rdata->worker;
+ worker->tmp_data = NULL; /* Avoid race */
+ rspamd_main = rdata->srv;
+
+ memset(&msg, 0, sizeof(msg));
+
+ /* Attach fd to the message */
+ if (rdata->fd != -1) {
+ memset(fdspace, 0, sizeof(fdspace));
+ msg.msg_control = fdspace;
+ msg.msg_controllen = sizeof(fdspace);
+ cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+ memcpy(CMSG_DATA(cmsg), &rdata->fd, sizeof(int));
+ }
+
+ iov.iov_base = &rdata->rep;
+ iov.iov_len = sizeof(rdata->rep);
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ r = sendmsg(w->fd, &msg, 0);
+
+ if (r == -1) {
+ msg_err_main("cannot write to worker's srv pipe when writing reply: %s; command = %s",
+ strerror(errno), rspamd_srv_command_to_string(rdata->rep.type));
+ }
+ else if (r != sizeof(rdata->rep)) {
+ msg_err_main("cannot write to worker's srv pipe: %d != %d; command = %s",
+ (int) r, (int) sizeof(rdata->rep),
+ rspamd_srv_command_to_string(rdata->rep.type));
+ }
+
+ g_free(rdata);
+ w->data = worker;
+ ev_io_stop(EV_A_ w);
+ ev_io_set(w, worker->srv_pipe[0], EV_READ);
+ ev_io_start(EV_A_ w);
+ }
+}
+
+void rspamd_srv_start_watching(struct rspamd_main *srv,
+ struct rspamd_worker *worker,
+ struct ev_loop *ev_base)
+{
+ g_assert(worker != NULL);
+
+ worker->tmp_data = NULL;
+ worker->srv_ev.data = worker;
+ ev_io_init(&worker->srv_ev, rspamd_srv_handler, worker->srv_pipe[0], EV_READ);
+ ev_io_start(ev_base, &worker->srv_ev);
+}
+
+struct rspamd_srv_request_data {
+ struct rspamd_worker *worker;
+ struct rspamd_srv_command cmd;
+ gint attached_fd;
+ struct rspamd_srv_reply rep;
+ rspamd_srv_reply_handler handler;
+ ev_io io_ev;
+ gpointer ud;
+};
+
+static void
+rspamd_srv_request_handler(EV_P_ ev_io *w, int revents)
+{
+ struct rspamd_srv_request_data *rd = (struct rspamd_srv_request_data *) w->data;
+ struct msghdr msg;
+ struct iovec iov;
+ guchar fdspace[CMSG_SPACE(sizeof(int))];
+ struct cmsghdr *cmsg;
+ gssize r;
+ gint rfd = -1;
+
+ if (revents == EV_WRITE) {
+ /* Send request to server */
+ memset(&msg, 0, sizeof(msg));
+
+ /* Attach fd to the message */
+ if (rd->attached_fd != -1) {
+ memset(fdspace, 0, sizeof(fdspace));
+ msg.msg_control = fdspace;
+ msg.msg_controllen = sizeof(fdspace);
+ cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+ memcpy(CMSG_DATA(cmsg), &rd->attached_fd, sizeof(int));
+ }
+
+ iov.iov_base = &rd->cmd;
+ iov.iov_len = sizeof(rd->cmd);
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ r = sendmsg(w->fd, &msg, 0);
+
+ if (r == -1) {
+ if (r == ENOBUFS) {
+ /* On BSD derived systems we can have this error when trying to send
+ * requests too fast.
+ * It might be good to retry...
+ */
+ msg_info("cannot write to server pipe: %s; command = %s; retrying sending",
+ strerror(errno),
+ rspamd_srv_command_to_string(rd->cmd.type));
+ return;
+ }
+ msg_err("cannot write to server pipe: %s; command = %s", strerror(errno),
+ rspamd_srv_command_to_string(rd->cmd.type));
+ goto cleanup;
+ }
+ else if (r != sizeof(rd->cmd)) {
+ msg_err("incomplete write to the server pipe: %d != %d, command = %s",
+ (int) r, (int) sizeof(rd->cmd), rspamd_srv_command_to_string(rd->cmd.type));
+ goto cleanup;
+ }
+
+ ev_io_stop(EV_A_ w);
+ ev_io_set(w, rd->worker->srv_pipe[1], EV_READ);
+ ev_io_start(EV_A_ w);
+ }
+ else {
+ iov.iov_base = &rd->rep;
+ iov.iov_len = sizeof(rd->rep);
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_control = fdspace;
+ msg.msg_controllen = sizeof(fdspace);
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ r = recvmsg(w->fd, &msg, 0);
+
+ if (r == -1) {
+ msg_err("cannot read from server pipe: %s; command = %s", strerror(errno),
+ rspamd_srv_command_to_string(rd->cmd.type));
+ goto cleanup;
+ }
+
+ if (r != (gint) sizeof(rd->rep)) {
+ msg_err("cannot read from server pipe, invalid length: %d != %d; command = %s",
+ (gint) r, (int) sizeof(rd->rep), rspamd_srv_command_to_string(rd->cmd.type));
+ goto cleanup;
+ }
+
+ if (msg.msg_controllen >= CMSG_LEN(sizeof(int))) {
+ rfd = *(int *) CMSG_DATA(CMSG_FIRSTHDR(&msg));
+ }
+
+ /* Reply has been received */
+ if (rd->handler) {
+ rd->handler(rd->worker, &rd->rep, rfd, rd->ud);
+ }
+
+ goto cleanup;
+ }
+
+ return;
+
+
+cleanup:
+ ev_io_stop(EV_A_ w);
+ g_free(rd);
+}
+
+void rspamd_srv_send_command(struct rspamd_worker *worker,
+ struct ev_loop *ev_base,
+ struct rspamd_srv_command *cmd,
+ gint attached_fd,
+ rspamd_srv_reply_handler handler,
+ gpointer ud)
+{
+ struct rspamd_srv_request_data *rd;
+
+ g_assert(cmd != NULL);
+ g_assert(worker != NULL);
+
+ rd = g_malloc0(sizeof(*rd));
+ cmd->id = ottery_rand_uint64();
+ memcpy(&rd->cmd, cmd, sizeof(rd->cmd));
+ rd->handler = handler;
+ rd->ud = ud;
+ rd->worker = worker;
+ rd->rep.id = cmd->id;
+ rd->rep.type = cmd->type;
+ rd->attached_fd = attached_fd;
+
+ rd->io_ev.data = rd;
+ ev_io_init(&rd->io_ev, rspamd_srv_request_handler,
+ rd->worker->srv_pipe[1], EV_WRITE);
+ ev_io_start(ev_base, &rd->io_ev);
+}
+
+enum rspamd_control_type
+rspamd_control_command_from_string(const gchar *str)
+{
+ enum rspamd_control_type ret = RSPAMD_CONTROL_MAX;
+
+ if (!str) {
+ return ret;
+ }
+
+ if (g_ascii_strcasecmp(str, "hyperscan_loaded") == 0) {
+ ret = RSPAMD_CONTROL_HYPERSCAN_LOADED;
+ }
+ else if (g_ascii_strcasecmp(str, "stat") == 0) {
+ ret = RSPAMD_CONTROL_STAT;
+ }
+ else if (g_ascii_strcasecmp(str, "reload") == 0) {
+ ret = RSPAMD_CONTROL_RELOAD;
+ }
+ else if (g_ascii_strcasecmp(str, "reresolve") == 0) {
+ ret = RSPAMD_CONTROL_RERESOLVE;
+ }
+ else if (g_ascii_strcasecmp(str, "recompile") == 0) {
+ ret = RSPAMD_CONTROL_RECOMPILE;
+ }
+ else if (g_ascii_strcasecmp(str, "log_pipe") == 0) {
+ ret = RSPAMD_CONTROL_LOG_PIPE;
+ }
+ else if (g_ascii_strcasecmp(str, "fuzzy_stat") == 0) {
+ ret = RSPAMD_CONTROL_FUZZY_STAT;
+ }
+ else if (g_ascii_strcasecmp(str, "fuzzy_sync") == 0) {
+ ret = RSPAMD_CONTROL_FUZZY_SYNC;
+ }
+ else if (g_ascii_strcasecmp(str, "monitored_change") == 0) {
+ ret = RSPAMD_CONTROL_MONITORED_CHANGE;
+ }
+ else if (g_ascii_strcasecmp(str, "child_change") == 0) {
+ ret = RSPAMD_CONTROL_CHILD_CHANGE;
+ }
+
+ return ret;
+}
+
+const gchar *
+rspamd_control_command_to_string(enum rspamd_control_type cmd)
+{
+ const gchar *reply = "unknown";
+
+ switch (cmd) {
+ case RSPAMD_CONTROL_STAT:
+ reply = "stat";
+ break;
+ case RSPAMD_CONTROL_RELOAD:
+ reply = "reload";
+ break;
+ case RSPAMD_CONTROL_RERESOLVE:
+ reply = "reresolve";
+ break;
+ case RSPAMD_CONTROL_RECOMPILE:
+ reply = "recompile";
+ break;
+ case RSPAMD_CONTROL_HYPERSCAN_LOADED:
+ reply = "hyperscan_loaded";
+ break;
+ case RSPAMD_CONTROL_LOG_PIPE:
+ reply = "log_pipe";
+ break;
+ case RSPAMD_CONTROL_FUZZY_STAT:
+ reply = "fuzzy_stat";
+ break;
+ case RSPAMD_CONTROL_FUZZY_SYNC:
+ reply = "fuzzy_sync";
+ break;
+ case RSPAMD_CONTROL_MONITORED_CHANGE:
+ reply = "monitored_change";
+ break;
+ case RSPAMD_CONTROL_CHILD_CHANGE:
+ reply = "child_change";
+ break;
+ default:
+ break;
+ }
+
+ return reply;
+}
+
+const gchar *rspamd_srv_command_to_string(enum rspamd_srv_type cmd)
+{
+ const gchar *reply = "unknown";
+
+ switch (cmd) {
+ case RSPAMD_SRV_SOCKETPAIR:
+ reply = "socketpair";
+ break;
+ case RSPAMD_SRV_HYPERSCAN_LOADED:
+ reply = "hyperscan_loaded";
+ break;
+ case RSPAMD_SRV_MONITORED_CHANGE:
+ reply = "monitored_change";
+ break;
+ case RSPAMD_SRV_LOG_PIPE:
+ reply = "log_pipe";
+ break;
+ case RSPAMD_SRV_ON_FORK:
+ reply = "on_fork";
+ break;
+ case RSPAMD_SRV_HEARTBEAT:
+ reply = "heartbeat";
+ break;
+ case RSPAMD_SRV_HEALTH:
+ reply = "health";
+ break;
+ case RSPAMD_SRV_NOTICE_HYPERSCAN_CACHE:
+ reply = "notice_hyperscan_cache";
+ break;
+ case RSPAMD_SRV_FUZZY_BLOCKED:
+ reply = "fuzzy_blocked";
+ break;
+ }
+
+ return reply;
+}
diff --git a/src/libserver/rspamd_control.h b/src/libserver/rspamd_control.h
new file mode 100644
index 0000000..c3c861f
--- /dev/null
+++ b/src/libserver/rspamd_control.h
@@ -0,0 +1,328 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef RSPAMD_RSPAMD_CONTROL_H
+#define RSPAMD_RSPAMD_CONTROL_H
+
+#include "config.h"
+#include "mem_pool.h"
+#include "contrib/libev/ev.h"
+
+G_BEGIN_DECLS
+
+struct rspamd_main;
+struct rspamd_worker;
+
+enum rspamd_control_type {
+ RSPAMD_CONTROL_STAT = 0,
+ RSPAMD_CONTROL_RELOAD,
+ RSPAMD_CONTROL_RERESOLVE,
+ RSPAMD_CONTROL_RECOMPILE,
+ RSPAMD_CONTROL_HYPERSCAN_LOADED,
+ RSPAMD_CONTROL_LOG_PIPE,
+ RSPAMD_CONTROL_FUZZY_STAT,
+ RSPAMD_CONTROL_FUZZY_SYNC,
+ RSPAMD_CONTROL_MONITORED_CHANGE,
+ RSPAMD_CONTROL_CHILD_CHANGE,
+ RSPAMD_CONTROL_FUZZY_BLOCKED,
+ RSPAMD_CONTROL_MAX
+};
+
+enum rspamd_srv_type {
+ RSPAMD_SRV_SOCKETPAIR = 0,
+ RSPAMD_SRV_HYPERSCAN_LOADED,
+ RSPAMD_SRV_MONITORED_CHANGE,
+ RSPAMD_SRV_LOG_PIPE,
+ RSPAMD_SRV_ON_FORK,
+ RSPAMD_SRV_HEARTBEAT,
+ RSPAMD_SRV_HEALTH,
+ RSPAMD_SRV_NOTICE_HYPERSCAN_CACHE,
+ RSPAMD_SRV_FUZZY_BLOCKED, /* Used to notify main process about a blocked ip */
+};
+
+enum rspamd_log_pipe_type {
+ RSPAMD_LOG_PIPE_SYMBOLS = 0,
+};
+#define CONTROL_PATHLEN MIN(PATH_MAX, PIPE_BUF - sizeof(int) * 2 - sizeof(gint64) * 2)
+struct rspamd_control_command {
+ enum rspamd_control_type type;
+ union {
+ struct {
+ guint unused;
+ } stat;
+ struct {
+ guint unused;
+ } reload;
+ struct {
+ guint unused;
+ } reresolve;
+ struct {
+ guint unused;
+ } recompile;
+ struct {
+ gboolean forced;
+ gchar cache_dir[CONTROL_PATHLEN];
+ } hs_loaded;
+ struct {
+ gchar tag[32];
+ gboolean alive;
+ pid_t sender;
+ } monitored_change;
+ struct {
+ enum rspamd_log_pipe_type type;
+ } log_pipe;
+ struct {
+ guint unused;
+ } fuzzy_stat;
+ struct {
+ guint unused;
+ } fuzzy_sync;
+ struct {
+ enum {
+ rspamd_child_offline,
+ rspamd_child_online,
+ rspamd_child_terminated,
+ } what;
+ pid_t pid;
+ guint additional;
+ } child_change;
+ struct {
+ union {
+ struct sockaddr sa;
+ struct sockaddr_in s4;
+ struct sockaddr_in6 s6;
+ } addr;
+ sa_family_t af;
+ } fuzzy_blocked;
+ } cmd;
+};
+
+struct rspamd_control_reply {
+ enum rspamd_control_type type;
+ union {
+ struct {
+ guint conns;
+ gdouble uptime;
+ gdouble utime;
+ gdouble systime;
+ gulong maxrss;
+ } stat;
+ struct {
+ guint status;
+ } reload;
+ struct {
+ guint status;
+ } reresolve;
+ struct {
+ guint status;
+ } recompile;
+ struct {
+ guint status;
+ } hs_loaded;
+ struct {
+ guint status;
+ } monitored_change;
+ struct {
+ guint status;
+ } log_pipe;
+ struct {
+ guint status;
+ gchar storage_id[MEMPOOL_UID_LEN];
+ } fuzzy_stat;
+ struct {
+ guint status;
+ } fuzzy_sync;
+ struct {
+ guint status;
+ } fuzzy_blocked;
+ } reply;
+};
+
+#define PAIR_ID_LEN 16
+
+struct rspamd_srv_command {
+ enum rspamd_srv_type type;
+ guint64 id;
+ union {
+ struct {
+ gint af;
+ gchar pair_id[PAIR_ID_LEN];
+ guint pair_num;
+ } spair;
+ struct {
+ gboolean forced;
+ gchar cache_dir[CONTROL_PATHLEN];
+ } hs_loaded;
+ struct {
+ gchar tag[32];
+ gboolean alive;
+ pid_t sender;
+ } monitored_change;
+ struct {
+ enum rspamd_log_pipe_type type;
+ } log_pipe;
+ struct {
+ pid_t ppid;
+ pid_t cpid;
+ enum {
+ child_create = 0,
+ child_dead,
+ } state;
+ } on_fork;
+ struct {
+ guint status;
+ /* TODO: add more fields */
+ } heartbeat;
+ struct {
+ guint status;
+ } health;
+ /* Used when a worker loads a valid hyperscan file */
+ struct {
+ char path[CONTROL_PATHLEN];
+ } hyperscan_cache_file;
+ /* Send when one worker has blocked some IP address */
+ struct {
+ union {
+ struct sockaddr sa;
+ struct sockaddr_in s4;
+ struct sockaddr_in6 s6;
+ } addr;
+ sa_family_t af;
+ } fuzzy_blocked;
+ } cmd;
+};
+
+struct rspamd_srv_reply {
+ enum rspamd_srv_type type;
+ guint64 id;
+ union {
+ struct {
+ gint code;
+ } spair;
+ struct {
+ gint forced;
+ } hs_loaded;
+ struct {
+ gint status;
+ };
+ struct {
+ enum rspamd_log_pipe_type type;
+ } log_pipe;
+ struct {
+ gint status;
+ } on_fork;
+ struct {
+ gint status;
+ } heartbeat;
+ struct {
+ guint status;
+ guint workers_count;
+ guint scanners_count;
+ guint workers_hb_lost;
+ } health;
+ struct {
+ int unused;
+ } hyperscan_cache_file;
+ struct {
+ int unused;
+ } fuzzy_blocked;
+ } reply;
+};
+
+typedef gboolean (*rspamd_worker_control_handler)(struct rspamd_main *rspamd_main,
+ struct rspamd_worker *worker,
+ gint fd,
+ gint attached_fd,
+ struct rspamd_control_command *cmd,
+ gpointer ud);
+
+typedef void (*rspamd_srv_reply_handler)(struct rspamd_worker *worker,
+ struct rspamd_srv_reply *rep, gint rep_fd,
+ gpointer ud);
+
+/**
+ * Process client socket connection
+ */
+void rspamd_control_process_client_socket(struct rspamd_main *rspamd_main,
+ gint fd, rspamd_inet_addr_t *addr);
+
+/**
+ * Register default handlers for a worker
+ */
+void rspamd_control_worker_add_default_cmd_handlers(struct rspamd_worker *worker,
+ struct ev_loop *ev_base);
+
+/**
+ * Register custom handler for a specific control command for this worker
+ */
+void rspamd_control_worker_add_cmd_handler(struct rspamd_worker *worker,
+ enum rspamd_control_type type,
+ rspamd_worker_control_handler handler,
+ gpointer ud);
+
+/**
+ * Start watching on srv pipe
+ */
+void rspamd_srv_start_watching(struct rspamd_main *srv,
+ struct rspamd_worker *worker,
+ struct ev_loop *ev_base);
+
+
+/**
+ * Send command to srv pipe and read reply calling the specified callback at the
+ * end
+ */
+void rspamd_srv_send_command(struct rspamd_worker *worker,
+ struct ev_loop *ev_base,
+ struct rspamd_srv_command *cmd,
+ gint attached_fd,
+ rspamd_srv_reply_handler handler,
+ gpointer ud);
+
+/**
+ * Broadcast srv cmd from rspamd_main to workers
+ * @param rspamd_main
+ * @param cmd
+ * @param except_pid
+ */
+void rspamd_control_broadcast_srv_cmd(struct rspamd_main *rspamd_main,
+ struct rspamd_control_command *cmd,
+ pid_t except_pid);
+
+/**
+ * Returns command from a specified string (case insensitive)
+ * @param str
+ * @return
+ */
+enum rspamd_control_type rspamd_control_command_from_string(const gchar *str);
+
+/**
+ * Returns command name from it's type
+ * @param cmd
+ * @return
+ */
+const gchar *rspamd_control_command_to_string(enum rspamd_control_type cmd);
+
+const gchar *rspamd_srv_command_to_string(enum rspamd_srv_type cmd);
+
+/**
+ * Used to cleanup pending events
+ * @param p
+ */
+void rspamd_pending_control_free(gpointer p);
+
+G_END_DECLS
+
+#endif
diff --git a/src/libserver/rspamd_symcache.h b/src/libserver/rspamd_symcache.h
new file mode 100644
index 0000000..2c67cba
--- /dev/null
+++ b/src/libserver/rspamd_symcache.h
@@ -0,0 +1,578 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef RSPAMD_SYMBOLS_CACHE_H
+#define RSPAMD_SYMBOLS_CACHE_H
+
+#include "config.h"
+#include "ucl.h"
+#include "cfg_file.h"
+#include "contrib/libev/ev.h"
+
+#include <lua.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct rspamd_task;
+struct rspamd_config;
+struct rspamd_symcache;
+struct rspamd_worker;
+struct rspamd_symcache_dynamic_item;
+struct rspamd_symcache_item;
+struct rspamd_config_settings_elt;
+
+typedef void (*symbol_func_t)(struct rspamd_task *task,
+ struct rspamd_symcache_dynamic_item *item,
+ gpointer user_data);
+
+enum rspamd_symbol_type {
+ SYMBOL_TYPE_NORMAL = (1u << 0u),
+ SYMBOL_TYPE_VIRTUAL = (1u << 1u),
+ SYMBOL_TYPE_CALLBACK = (1u << 2u),
+ SYMBOL_TYPE_GHOST = (1u << 3u),
+ SYMBOL_TYPE_SKIPPED = (1u << 4u),
+ SYMBOL_TYPE_COMPOSITE = (1u << 5u),
+ SYMBOL_TYPE_CLASSIFIER = (1u << 6u),
+ SYMBOL_TYPE_FINE = (1u << 7u),
+ SYMBOL_TYPE_EMPTY = (1u << 8u), /* Allow execution on empty tasks */
+ SYMBOL_TYPE_CONNFILTER = (1u << 9u), /* Connection stage filter */
+ SYMBOL_TYPE_PREFILTER = (1u << 10u),
+ SYMBOL_TYPE_POSTFILTER = (1u << 11u),
+ SYMBOL_TYPE_NOSTAT = (1u << 12u), /* Skip as statistical symbol */
+ SYMBOL_TYPE_IDEMPOTENT = (1u << 13u), /* Symbol cannot change metric */
+ SYMBOL_TYPE_TRIVIAL = (1u << 14u), /* Symbol is trivial */
+ SYMBOL_TYPE_MIME_ONLY = (1u << 15u), /* Symbol is mime only */
+ SYMBOL_TYPE_EXPLICIT_DISABLE = (1u << 16u), /* Symbol should be disabled explicitly only */
+ SYMBOL_TYPE_IGNORE_PASSTHROUGH = (1u << 17u), /* Symbol ignores passthrough result */
+ SYMBOL_TYPE_EXPLICIT_ENABLE = (1u << 18u), /* Symbol should be enabled explicitly only */
+ SYMBOL_TYPE_USE_CORO = (1u << 19u), /* Symbol uses lua coroutines */
+};
+
+/**
+ * Abstract structure for saving callback data for symbols
+ */
+struct rspamd_abstract_callback_data {
+ guint64 magic;
+ char data[];
+};
+
+/**
+ * Shared memory block specific for each symbol
+ */
+struct rspamd_symcache_item_stat {
+ struct rspamd_counter_data time_counter;
+ gdouble avg_time;
+ gdouble weight;
+ guint hits;
+ guint64 total_hits;
+ struct rspamd_counter_data frequency_counter;
+ gdouble avg_frequency;
+ gdouble stddev_frequency;
+};
+
+/**
+ * Creates new cache structure
+ * @return
+ */
+struct rspamd_symcache *rspamd_symcache_new(struct rspamd_config *cfg);
+
+/**
+ * Remove the cache structure syncing data if needed
+ * @param cache
+ */
+void rspamd_symcache_destroy(struct rspamd_symcache *cache);
+
+/**
+ * Saves symbols cache to disk if possible
+ * @param cache
+ */
+void rspamd_symcache_save(struct rspamd_symcache *cache);
+
+/**
+ * Load symbols cache from file, must be called _after_ init_symbols_cache
+ */
+gboolean rspamd_symcache_init(struct rspamd_symcache *cache);
+
+/**
+ * Generic function to register a symbol
+ * @param cache
+ * @param name
+ * @param weight
+ * @param priority
+ * @param func
+ * @param user_data
+ * @param type
+ * @param parent
+ */
+gint rspamd_symcache_add_symbol(struct rspamd_symcache *cache,
+ const gchar *name,
+ gint priority,
+ symbol_func_t func,
+ gpointer user_data,
+ int type,
+ gint parent);
+
+/**
+ * Adds augmentation to the symbol
+ * @param cache
+ * @param sym_id
+ * @param augmentation
+ * @return
+ */
+bool rspamd_symcache_add_symbol_augmentation(struct rspamd_symcache *cache,
+ int sym_id,
+ const char *augmentation,
+ const char *value);
+
+/**
+ * Add callback to be executed whenever symbol has peak value
+ * @param cache
+ * @param cbref
+ */
+void rspamd_symcache_set_peak_callback(struct rspamd_symcache *cache,
+ gint cbref);
+
+/**
+ * Add delayed condition to the specific symbol in cache. So symbol can be absent
+ * to the moment of addition
+ * @param cache
+ * @param id id of symbol
+ * @param L lua state pointer
+ * @param cbref callback reference (returned by luaL_ref)
+ * @return TRUE if condition has been added
+ */
+gboolean rspamd_symcache_add_condition_delayed(struct rspamd_symcache *cache,
+ const gchar *sym,
+ lua_State *L, gint cbref);
+
+/**
+ * Find symbol in cache by id and returns its id resolving virtual symbols if
+ * applicable
+ * @param cache
+ * @param name
+ * @return id of symbol or (-1) if a symbol has not been found
+ */
+gint rspamd_symcache_find_symbol(struct rspamd_symcache *cache,
+ const gchar *name);
+
+/**
+ * Get statistics for a specific symbol
+ * @param cache
+ * @param name
+ * @param frequency
+ * @param tm
+ * @return
+ */
+gboolean rspamd_symcache_stat_symbol(struct rspamd_symcache *cache,
+ const gchar *name,
+ gdouble *frequency,
+ gdouble *freq_stddev,
+ gdouble *tm,
+ guint *nhits);
+
+/**
+ * Returns number of symbols registered in symbols cache
+ * @param cache
+ * @return number of symbols in the cache
+ */
+guint rspamd_symcache_stats_symbols_count(struct rspamd_symcache *cache);
+
+/**
+ * Validate cache items against theirs weights defined in metrics
+ * @param cache symbols cache
+ * @param cfg configuration
+ * @param strict do strict checks - symbols MUST be described in metrics
+ */
+gboolean rspamd_symcache_validate(struct rspamd_symcache *cache,
+ struct rspamd_config *cfg,
+ gboolean strict);
+
+/**
+ * Call function for cached symbol using saved callback
+ * @param task task object
+ * @param cache symbols cache
+ * @param saved_item pointer to currently saved item
+ */
+gboolean rspamd_symcache_process_symbols(struct rspamd_task *task,
+ struct rspamd_symcache *cache,
+ guint stage);
+
+/**
+ * Return statistics about the cache as ucl object (array of objects one per item)
+ * @param cache
+ * @return
+ */
+ucl_object_t *rspamd_symcache_counters(struct rspamd_symcache *cache);
+
+/**
+ * Start cache reloading
+ * @param cache
+ * @param ev_base
+ */
+void *rspamd_symcache_start_refresh(struct rspamd_symcache *cache,
+ struct ev_loop *ev_base,
+ struct rspamd_worker *w);
+
+/**
+ * Increases counter for a specific symbol
+ * @param cache
+ * @param symbol
+ */
+void rspamd_symcache_inc_frequency(struct rspamd_symcache *_cache,
+ struct rspamd_symcache_item *item,
+ const gchar *sym_name);
+
+/**
+ * Add delayed dependency that is resolved on cache post-load routine
+ * @param cache
+ * @param from
+ * @param to
+ */
+void rspamd_symcache_add_delayed_dependency(struct rspamd_symcache *cache,
+ const gchar *from, const gchar *to);
+
+/**
+ * Get abstract callback data for a symbol (or its parent symbol)
+ * @param cache cache object
+ * @param symbol symbol name
+ * @return abstract callback data or NULL if symbol is absent or has no data attached
+ */
+struct rspamd_abstract_callback_data *rspamd_symcache_get_cbdata(
+ struct rspamd_symcache *cache, const gchar *symbol);
+
+/**
+ * Returns symbol's parent name (or symbol name itself)
+ * @param cache
+ * @param symbol
+ * @return
+ */
+const gchar *rspamd_symcache_get_parent(struct rspamd_symcache *cache,
+ const gchar *symbol);
+
+guint rspamd_symcache_get_symbol_flags(struct rspamd_symcache *cache,
+ const gchar *symbol);
+
+void rspamd_symcache_get_symbol_details(struct rspamd_symcache *cache,
+ const gchar *symbol,
+ ucl_object_t *this_sym_ucl);
+
+
+/**
+ * Process settings for task
+ * @param task
+ * @param cache
+ * @return
+ */
+gboolean rspamd_symcache_process_settings(struct rspamd_task *task,
+ struct rspamd_symcache *cache);
+
+
+/**
+ * Checks if a symbol specified has been checked (or disabled)
+ * @param task
+ * @param cache
+ * @param symbol
+ * @return
+ */
+gboolean rspamd_symcache_is_checked(struct rspamd_task *task,
+ struct rspamd_symcache *cache,
+ const gchar *symbol);
+
+/**
+ * Returns checksum for all cache items
+ * @param cache
+ * @return
+ */
+guint64 rspamd_symcache_get_cksum(struct rspamd_symcache *cache);
+
+/**
+ * Checks if a symbols is enabled (not checked and conditions return true if present)
+ * @param task
+ * @param cache
+ * @param symbol
+ * @return
+ */
+gboolean rspamd_symcache_is_symbol_enabled(struct rspamd_task *task,
+ struct rspamd_symcache *cache,
+ const gchar *symbol);
+
+/**
+ * Enable this symbol for task
+ * @param task
+ * @param cache
+ * @param symbol
+ * @return TRUE if a symbol has been enabled (not executed before)
+ */
+gboolean rspamd_symcache_enable_symbol(struct rspamd_task *task,
+ struct rspamd_symcache *cache,
+ const gchar *symbol);
+
+/**
+ * Enable this symbol for task
+ * @param task
+ * @param cache
+ * @param symbol
+ * @return TRUE if a symbol has been disabled (not executed before)
+ */
+gboolean rspamd_symcache_disable_symbol(struct rspamd_task *task,
+ struct rspamd_symcache *cache,
+ const gchar *symbol);
+
+/**
+ * Disable execution of a symbol or a pattern (a string enclosed in `//`) permanently
+ * @param task
+ * @param cache
+ * @param symbol
+ * @return
+ */
+void rspamd_symcache_disable_symbol_static(struct rspamd_symcache *cache,
+ const gchar *symbol);
+/**
+ * Add a symbol or a pattern to the list of explicitly and statically enabled symbols
+ * @param cache
+ * @param symbol
+ * @return
+ */
+void rspamd_symcache_enable_symbol_static(struct rspamd_symcache *cache,
+ const gchar *symbol);
+
+/**
+ * Process specific function for each cache element (in order they are added)
+ * @param cache
+ * @param func
+ * @param ud
+ */
+void rspamd_symcache_foreach(struct rspamd_symcache *cache,
+ void (*func)(struct rspamd_symcache_item *item, gpointer /* userdata */),
+ gpointer ud);
+
+/**
+ * Returns the current item being processed (if any)
+ * @param task
+ * @return
+ */
+struct rspamd_symcache_dynamic_item *rspamd_symcache_get_cur_item(struct rspamd_task *task);
+
+/**
+ * Replaces the current item being processed.
+ * Returns the current item being processed (if any)
+ * @param task
+ * @param item
+ * @return
+ */
+struct rspamd_symcache_dynamic_item *rspamd_symcache_set_cur_item(struct rspamd_task *task,
+ struct rspamd_symcache_dynamic_item *item);
+
+
+/**
+ * Finalize the current async element potentially calling its deps
+ */
+void rspamd_symcache_finalize_item(struct rspamd_task *task,
+ struct rspamd_symcache_dynamic_item *item);
+
+/*
+ * Increase number of async events pending for an item
+ */
+guint rspamd_symcache_item_async_inc_full(struct rspamd_task *task,
+ struct rspamd_symcache_dynamic_item *item,
+ const gchar *subsystem,
+ const gchar *loc);
+
+#define rspamd_symcache_item_async_inc(task, item, subsystem) \
+ rspamd_symcache_item_async_inc_full(task, item, subsystem, G_STRLOC)
+
+/*
+ * Decrease number of async events pending for an item, asserts if no events pending
+ */
+guint rspamd_symcache_item_async_dec_full(struct rspamd_task *task,
+ struct rspamd_symcache_dynamic_item *item,
+ const gchar *subsystem,
+ const gchar *loc);
+
+#define rspamd_symcache_item_async_dec(task, item, subsystem) \
+ rspamd_symcache_item_async_dec_full(task, item, subsystem, G_STRLOC)
+
+/**
+ * Decrease number of async events pending for an item, asserts if no events pending
+ * If no events are left, this function calls `rspamd_symbols_cache_finalize_item` and returns TRUE
+ * @param task
+ * @param item
+ * @return
+ */
+gboolean rspamd_symcache_item_async_dec_check_full(struct rspamd_task *task,
+ struct rspamd_symcache_dynamic_item *item,
+ const gchar *subsystem,
+ const gchar *loc);
+
+#define rspamd_symcache_item_async_dec_check(task, item, subsystem) \
+ rspamd_symcache_item_async_dec_check_full(task, item, subsystem, G_STRLOC)
+
+/**
+ * Disables execution of all symbols, excluding those specified in `skip_mask`
+ * @param task
+ * @param cache
+ * @param skip_mask
+ */
+void rspamd_symcache_disable_all_symbols(struct rspamd_task *task,
+ struct rspamd_symcache *cache,
+ guint skip_mask);
+
+/**
+ * Iterates over the list of the enabled composites calling specified function
+ * @param task
+ * @param cache
+ * @param func
+ * @param fd
+ */
+void rspamd_symcache_composites_foreach(struct rspamd_task *task,
+ struct rspamd_symcache *cache,
+ GHFunc func,
+ gpointer fd);
+
+/**
+ * Sets allowed settings ids for a symbol
+ * @param cache
+ * @param symbol
+ * @param ids
+ * @param nids
+ */
+bool rspamd_symcache_set_allowed_settings_ids(struct rspamd_symcache *cache,
+ const gchar *symbol,
+ const guint32 *ids,
+ guint nids);
+/**
+ * Sets denied settings ids for a symbol
+ * @param cache
+ * @param symbol
+ * @param ids
+ * @param nids
+ */
+bool rspamd_symcache_set_forbidden_settings_ids(struct rspamd_symcache *cache,
+ const gchar *symbol,
+ const guint32 *ids,
+ guint nids);
+
+/**
+ * Returns allowed ids for a symbol as a constant array
+ * @param cache
+ * @param symbol
+ * @param nids
+ * @return
+ */
+const guint32 *rspamd_symcache_get_allowed_settings_ids(struct rspamd_symcache *cache,
+ const gchar *symbol,
+ guint *nids);
+
+/**
+ * Returns denied ids for a symbol as a constant array
+ * @param cache
+ * @param symbol
+ * @param nids
+ * @return
+ */
+const guint32 *rspamd_symcache_get_forbidden_settings_ids(struct rspamd_symcache *cache,
+ const gchar *symbol,
+ guint *nids);
+
+
+/**
+ * Processes settings_elt in cache and converts it to a set of
+ * adjustments for forbidden/allowed settings_ids for each symbol
+ * @param cache
+ * @param elt
+ */
+void rspamd_symcache_process_settings_elt(struct rspamd_symcache *cache,
+ struct rspamd_config_settings_elt *elt);
+
+/**
+ * Check if a symbol is allowed for execution/insertion, this does not involve
+ * condition scripts to be checked (so it is intended to be fast).
+ * @param task
+ * @param item
+ * @param exec_only
+ * @return
+ */
+gboolean rspamd_symcache_is_item_allowed(struct rspamd_task *task,
+ struct rspamd_symcache_item *item,
+ gboolean exec_only);
+
+/**
+ * Returns symcache item flags
+ * @param item
+ * @return
+ */
+gint rspamd_symcache_dyn_item_flags(struct rspamd_task *task,
+ struct rspamd_symcache_dynamic_item *dyn_item);
+gint rspamd_symcache_item_flags(struct rspamd_symcache_item *item);
+
+/**
+ * Returns cache item name
+ * @param item
+ * @return
+ */
+const gchar *rspamd_symcache_dyn_item_name(struct rspamd_task *task,
+ struct rspamd_symcache_dynamic_item *dyn_item);
+const gchar *rspamd_symcache_item_name(struct rspamd_symcache_item *item);
+
+/**
+ * Returns the current item stat
+ * @param item
+ * @return
+ */
+const struct rspamd_symcache_item_stat *
+rspamd_symcache_item_stat(struct rspamd_symcache_item *item);
+
+/**
+ * Enable profiling for task (e.g. when a slow rule has been found)
+ * @param task
+ */
+void rspamd_symcache_enable_profile(struct rspamd_task *task);
+
+struct rspamd_symcache_timeout_item {
+ double timeout;
+ const struct rspamd_symcache_item *item;
+};
+
+struct rspamd_symcache_timeout_result {
+ double max_timeout;
+ struct rspamd_symcache_timeout_item *items;
+ size_t nitems;
+};
+/**
+ * Gets maximum timeout announced by symbols cache
+ * @param cache
+ * @return new symcache timeout_result structure, that should be freed by call
+ * `rspamd_symcache_timeout_result_free`
+ */
+struct rspamd_symcache_timeout_result *rspamd_symcache_get_max_timeout(struct rspamd_symcache *cache);
+
+/**
+ * Frees results obtained from the previous function
+ * @param res
+ */
+void rspamd_symcache_timeout_result_free(struct rspamd_symcache_timeout_result *res);
+
+/**
+ * Destroy internal state of the symcache runtime
+ * @param task
+ */
+void rspamd_symcache_runtime_destroy(struct rspamd_task *task);
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/libserver/spf.c b/src/libserver/spf.c
new file mode 100644
index 0000000..72d8b99
--- /dev/null
+++ b/src/libserver/spf.c
@@ -0,0 +1,2799 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "config.h"
+#include "dns.h"
+#include "spf.h"
+#include "rspamd.h"
+#include "message.h"
+#include "utlist.h"
+#include "libserver/mempool_vars_internal.h"
+#include "contrib/librdns/rdns.h"
+#include "contrib/mumhash/mum.h"
+
+#define SPF_VER1_STR "v=spf1"
+#define SPF_VER2_STR "spf2."
+#define SPF_SCOPE_PRA "pra"
+#define SPF_SCOPE_MFROM "mfrom"
+#define SPF_ALL "all"
+#define SPF_A "a"
+#define SPF_IP4 "ip4"
+#define SPF_IP4_ALT "ipv4"
+#define SPF_IP6 "ip6"
+#define SPF_IP6_ALT "ipv6"
+#define SPF_PTR "ptr"
+#define SPF_MX "mx"
+#define SPF_EXISTS "exists"
+#define SPF_INCLUDE "include"
+#define SPF_REDIRECT "redirect"
+#define SPF_EXP "exp"
+
+struct spf_resolved_element {
+ GPtrArray *elts;
+ gchar *cur_domain;
+ gboolean redirected; /* Ignore level, it's redirected */
+};
+
+struct spf_record {
+ gint nested;
+ gint dns_requests;
+ gint requests_inflight;
+
+ guint ttl;
+ GPtrArray *resolved;
+ /* Array of struct spf_resolved_element */
+ const gchar *sender;
+ const gchar *sender_domain;
+ const gchar *top_record;
+ gchar *local_part;
+ struct rspamd_task *task;
+ spf_cb_t callback;
+ gpointer cbdata;
+ gboolean done;
+};
+
+struct rspamd_spf_library_ctx {
+ guint max_dns_nesting;
+ guint max_dns_requests;
+ guint min_cache_ttl;
+ gboolean disable_ipv6;
+ rspamd_lru_hash_t *spf_hash;
+};
+
+struct rspamd_spf_library_ctx *spf_lib_ctx = NULL;
+
+/**
+ * BNF for SPF record:
+ *
+ * spf_mech ::= +|-|~|?
+ *
+ * spf_body ::= spf=v1 <spf_command> [<spf_command>]
+ * spf_command ::= [spf_mech]all|a|<ip4>|<ip6>|ptr|mx|<exists>|<include>|<redirect>
+ *
+ * spf_domain ::= [:domain][/mask]
+ * spf_ip4 ::= ip[/mask]
+ * ip4 ::= ip4:<spf_ip4>
+ * mx ::= mx<spf_domain>
+ * a ::= a<spf_domain>
+ * ptr ::= ptr[:domain]
+ * exists ::= exists:domain
+ * include ::= include:domain
+ * redirect ::= redirect:domain
+ * exp ::= exp:domain
+ *
+ */
+
+#undef SPF_DEBUG
+
+#define msg_err_spf(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL, \
+ "spf", rec->task->task_pool->tag.uid, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_warn_spf(...) rspamd_default_log_function(G_LOG_LEVEL_WARNING, \
+ "spf", rec->task->task_pool->tag.uid, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_info_spf(...) rspamd_default_log_function(G_LOG_LEVEL_INFO, \
+ "spf", rec->task->task_pool->tag.uid, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_notice_spf(...) rspamd_default_log_function(G_LOG_LEVEL_MESSAGE, \
+ "spf", rec->task->task_pool->tag.uid, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_debug_spf(...) rspamd_conditional_debug_fast(NULL, rec->task->from_addr, \
+ rspamd_spf_log_id, "spf", rec->task->task_pool->tag.uid, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_debug_spf_flatten(...) rspamd_conditional_debug_fast_num_id(NULL, NULL, \
+ rspamd_spf_log_id, "spf", (flat)->digest, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+
+INIT_LOG_MODULE(spf)
+
+struct spf_dns_cb {
+ struct spf_record *rec;
+ struct spf_addr *addr;
+ struct spf_resolved_element *resolved;
+ const gchar *ptr_host;
+ spf_action_t cur_action;
+ gboolean in_include;
+};
+
+#define CHECK_REC(rec) \
+ do { \
+ if (spf_lib_ctx->max_dns_nesting > 0 && \
+ (rec)->nested > spf_lib_ctx->max_dns_nesting) { \
+ msg_warn_spf("spf nesting limit: %d > %d is reached, domain: %s", \
+ (rec)->nested, spf_lib_ctx->max_dns_nesting, \
+ (rec)->sender_domain); \
+ return FALSE; \
+ } \
+ if (spf_lib_ctx->max_dns_requests > 0 && \
+ (rec)->dns_requests > spf_lib_ctx->max_dns_requests) { \
+ msg_warn_spf("spf dns requests limit: %d > %d is reached, domain: %s", \
+ (rec)->dns_requests, spf_lib_ctx->max_dns_requests, \
+ (rec)->sender_domain); \
+ return FALSE; \
+ } \
+ } while (0)
+
+RSPAMD_CONSTRUCTOR(rspamd_spf_lib_ctx_ctor)
+{
+ spf_lib_ctx = g_malloc0(sizeof(*spf_lib_ctx));
+ spf_lib_ctx->max_dns_nesting = SPF_MAX_NESTING;
+ spf_lib_ctx->max_dns_requests = SPF_MAX_DNS_REQUESTS;
+ spf_lib_ctx->min_cache_ttl = SPF_MIN_CACHE_TTL;
+ spf_lib_ctx->disable_ipv6 = FALSE;
+}
+
+RSPAMD_DESTRUCTOR(rspamd_spf_lib_ctx_dtor)
+{
+ if (spf_lib_ctx->spf_hash) {
+ rspamd_lru_hash_destroy(spf_lib_ctx->spf_hash);
+ }
+ g_free(spf_lib_ctx);
+ spf_lib_ctx = NULL;
+}
+
+static void
+spf_record_cached_unref_dtor(gpointer p)
+{
+ struct spf_resolved *flat = (struct spf_resolved *) p;
+
+ _spf_record_unref(flat, "LRU cache");
+}
+
+void spf_library_config(const ucl_object_t *obj)
+{
+ const ucl_object_t *value;
+ gint64 ival;
+ bool bval;
+
+ if (obj == NULL) {
+ /* No specific config */
+ return;
+ }
+
+ if ((value = ucl_object_find_key(obj, "min_cache_ttl")) != NULL) {
+ if (ucl_object_toint_safe(value, &ival) && ival >= 0) {
+ spf_lib_ctx->min_cache_ttl = ival;
+ }
+ }
+
+ if ((value = ucl_object_find_key(obj, "max_dns_nesting")) != NULL) {
+ if (ucl_object_toint_safe(value, &ival) && ival >= 0) {
+ spf_lib_ctx->max_dns_nesting = ival;
+ }
+ }
+
+ if ((value = ucl_object_find_key(obj, "max_dns_requests")) != NULL) {
+ if (ucl_object_toint_safe(value, &ival) && ival >= 0) {
+ spf_lib_ctx->max_dns_requests = ival;
+ }
+ }
+ if ((value = ucl_object_find_key(obj, "disable_ipv6")) != NULL) {
+ if (ucl_object_toboolean_safe(value, &bval)) {
+ spf_lib_ctx->disable_ipv6 = bval;
+ }
+ }
+
+ if (spf_lib_ctx->spf_hash) {
+ rspamd_lru_hash_destroy(spf_lib_ctx->spf_hash);
+ spf_lib_ctx->spf_hash = NULL;
+ }
+
+ if ((value = ucl_object_find_key(obj, "spf_cache_size")) != NULL) {
+ if (ucl_object_toint_safe(value, &ival) && ival > 0) {
+ spf_lib_ctx->spf_hash = rspamd_lru_hash_new(
+ ival,
+ g_free,
+ spf_record_cached_unref_dtor);
+ }
+ }
+ else {
+ /* Preserve compatibility */
+ spf_lib_ctx->spf_hash = rspamd_lru_hash_new(
+ 2048,
+ g_free,
+ spf_record_cached_unref_dtor);
+ }
+}
+
+static gboolean start_spf_parse(struct spf_record *rec,
+ struct spf_resolved_element *resolved, gchar *begin);
+
+/* Determine spf mech */
+static spf_mech_t
+check_spf_mech(const gchar *elt, gboolean *need_shift)
+{
+ g_assert(elt != NULL);
+
+ *need_shift = TRUE;
+
+ switch (*elt) {
+ case '-':
+ return SPF_FAIL;
+ case '~':
+ return SPF_SOFT_FAIL;
+ case '+':
+ return SPF_PASS;
+ case '?':
+ return SPF_NEUTRAL;
+ default:
+ *need_shift = FALSE;
+ return SPF_PASS;
+ }
+}
+
+static const gchar *
+rspamd_spf_dns_action_to_str(spf_action_t act)
+{
+ const char *ret = "unknown";
+
+ switch (act) {
+ case SPF_RESOLVE_MX:
+ ret = "MX";
+ break;
+ case SPF_RESOLVE_A:
+ ret = "A";
+ break;
+ case SPF_RESOLVE_PTR:
+ ret = "PTR";
+ break;
+ case SPF_RESOLVE_AAA:
+ ret = "AAAA";
+ break;
+ case SPF_RESOLVE_REDIRECT:
+ ret = "REDIRECT";
+ break;
+ case SPF_RESOLVE_INCLUDE:
+ ret = "INCLUDE";
+ break;
+ case SPF_RESOLVE_EXISTS:
+ ret = "EXISTS";
+ break;
+ case SPF_RESOLVE_EXP:
+ ret = "EXP";
+ break;
+ }
+
+ return ret;
+}
+
+static struct spf_addr *
+rspamd_spf_new_addr(struct spf_record *rec,
+ struct spf_resolved_element *resolved, const gchar *elt)
+{
+ gboolean need_shift = FALSE;
+ struct spf_addr *naddr;
+
+ naddr = g_malloc0(sizeof(*naddr));
+ naddr->mech = check_spf_mech(elt, &need_shift);
+
+ if (need_shift) {
+ naddr->spf_string = g_strdup(elt + 1);
+ }
+ else {
+ naddr->spf_string = g_strdup(elt);
+ }
+
+ g_ptr_array_add(resolved->elts, naddr);
+ naddr->prev = naddr;
+ naddr->next = NULL;
+
+ return naddr;
+}
+
+static void
+rspamd_spf_free_addr(gpointer a)
+{
+ struct spf_addr *addr = a, *tmp, *cur;
+
+ if (addr) {
+ g_free(addr->spf_string);
+ DL_FOREACH_SAFE(addr, cur, tmp)
+ {
+ g_free(cur);
+ }
+ }
+}
+
+static struct spf_resolved_element *
+rspamd_spf_new_addr_list(struct spf_record *rec, const gchar *domain)
+{
+ struct spf_resolved_element *resolved;
+
+ resolved = g_malloc0(sizeof(*resolved));
+ resolved->redirected = FALSE;
+ resolved->cur_domain = g_strdup(domain);
+ resolved->elts = g_ptr_array_new_full(8, rspamd_spf_free_addr);
+
+ g_ptr_array_add(rec->resolved, resolved);
+
+ return g_ptr_array_index(rec->resolved, rec->resolved->len - 1);
+}
+
+/*
+ * Destructor for spf record
+ */
+static void
+spf_record_destructor(gpointer r)
+{
+ struct spf_record *rec = r;
+ struct spf_resolved_element *elt;
+ guint i;
+
+ if (rec) {
+ for (i = 0; i < rec->resolved->len; i++) {
+ elt = g_ptr_array_index(rec->resolved, i);
+ g_ptr_array_free(elt->elts, TRUE);
+ g_free(elt->cur_domain);
+ g_free(elt);
+ }
+
+ g_ptr_array_free(rec->resolved, TRUE);
+ }
+}
+
+static void
+rspamd_flatten_record_dtor(struct spf_resolved *r)
+{
+ struct spf_addr *addr;
+ guint i;
+
+ for (i = 0; i < r->elts->len; i++) {
+ addr = &g_array_index(r->elts, struct spf_addr, i);
+ g_free(addr->spf_string);
+ }
+
+ g_free(r->top_record);
+ g_free(r->domain);
+ g_array_free(r->elts, TRUE);
+ g_free(r);
+}
+
+static void
+rspamd_spf_process_reference(struct spf_resolved *target,
+ struct spf_addr *addr, struct spf_record *rec, gboolean top)
+{
+ struct spf_resolved_element *elt, *relt;
+ struct spf_addr *cur = NULL, taddr, *cur_addr;
+ guint i;
+
+ if (addr) {
+ g_assert(addr->m.idx < rec->resolved->len);
+
+ elt = g_ptr_array_index(rec->resolved, addr->m.idx);
+ }
+ else {
+ elt = g_ptr_array_index(rec->resolved, 0);
+ }
+
+ if (rec->ttl < target->ttl) {
+ msg_debug_spf("reducing ttl from %d to %d after subrecord processing %s",
+ target->ttl, rec->ttl, rec->sender_domain);
+ target->ttl = rec->ttl;
+ }
+
+ if (elt->redirected) {
+ g_assert(elt->elts->len > 0);
+
+ for (i = 0; i < elt->elts->len; i++) {
+ cur = g_ptr_array_index(elt->elts, i);
+ if (cur->flags & RSPAMD_SPF_FLAG_REDIRECT) {
+ break;
+ }
+ }
+
+ g_assert(cur != NULL);
+ if (!(cur->flags & (RSPAMD_SPF_FLAG_PARSED | RSPAMD_SPF_FLAG_RESOLVED))) {
+ /* Unresolved redirect */
+ msg_info_spf("redirect to %s cannot be resolved for domain %s", cur->spf_string, rec->sender_domain);
+ }
+ else {
+ g_assert(cur->flags & RSPAMD_SPF_FLAG_REFERENCE);
+ g_assert(cur->m.idx < rec->resolved->len);
+ relt = g_ptr_array_index(rec->resolved, cur->m.idx);
+ msg_debug_spf("domain %s is redirected to %s", elt->cur_domain,
+ relt->cur_domain);
+ }
+ }
+
+ for (i = 0; i < elt->elts->len; i++) {
+ cur = g_ptr_array_index(elt->elts, i);
+
+ if (cur->flags & RSPAMD_SPF_FLAG_TEMPFAIL) {
+ target->flags |= RSPAMD_SPF_RESOLVED_TEMP_FAILED;
+ continue;
+ }
+ if (cur->flags & RSPAMD_SPF_FLAG_PERMFAIL) {
+ if (cur->flags & RSPAMD_SPF_FLAG_REDIRECT) {
+ target->flags |= RSPAMD_SPF_RESOLVED_PERM_FAILED;
+ }
+ continue;
+ }
+ if (cur->flags & RSPAMD_SPF_FLAG_NA) {
+ target->flags |= RSPAMD_SPF_RESOLVED_NA;
+ continue;
+ }
+ if (cur->flags & RSPAMD_SPF_FLAG_INVALID) {
+ /* Ignore invalid elements */
+ continue;
+ }
+ if ((cur->flags & (RSPAMD_SPF_FLAG_PARSED | RSPAMD_SPF_FLAG_RESOLVED)) !=
+ (RSPAMD_SPF_FLAG_RESOLVED | RSPAMD_SPF_FLAG_PARSED)) {
+ /* Ignore unparsed addrs */
+ continue;
+ }
+ if (cur->flags & RSPAMD_SPF_FLAG_REFERENCE) {
+ /* Process reference */
+ if (cur->flags & RSPAMD_SPF_FLAG_REDIRECT) {
+ /* Stop on redirected domain */
+ rspamd_spf_process_reference(target, cur, rec, top);
+ break;
+ }
+ else {
+ rspamd_spf_process_reference(target, cur, rec, FALSE);
+ }
+ }
+ else {
+ if ((cur->flags & RSPAMD_SPF_FLAG_ANY) && !top) {
+ /* Ignore wide policies in includes */
+ continue;
+ }
+
+ DL_FOREACH(cur, cur_addr)
+ {
+ memcpy(&taddr, cur_addr, sizeof(taddr));
+ taddr.spf_string = g_strdup(cur_addr->spf_string);
+ g_array_append_val(target->elts, taddr);
+ }
+ }
+ }
+}
+
+/*
+ * Parse record and flatten it to a simple structure
+ */
+static struct spf_resolved *
+rspamd_spf_record_flatten(struct spf_record *rec)
+{
+ struct spf_resolved *res;
+
+ g_assert(rec != NULL);
+
+ res = g_malloc0(sizeof(*res));
+ res->domain = g_strdup(rec->sender_domain);
+ res->ttl = rec->ttl;
+ /* Not precise but okay */
+ res->timestamp = rec->task->task_timestamp;
+ res->digest = mum_hash_init(0xa4aa40bbeec59e2bULL);
+ res->top_record = g_strdup(rec->top_record);
+ REF_INIT_RETAIN(res, rspamd_flatten_record_dtor);
+
+ if (rec->resolved) {
+ res->elts = g_array_sized_new(FALSE, FALSE, sizeof(struct spf_addr),
+ rec->resolved->len);
+
+ if (rec->resolved->len > 0) {
+ rspamd_spf_process_reference(res, NULL, rec, TRUE);
+ }
+ }
+ else {
+ res->elts = g_array_new(FALSE, FALSE, sizeof(struct spf_addr));
+ }
+
+ return res;
+}
+
+static gint
+rspamd_spf_elts_cmp(gconstpointer a, gconstpointer b)
+{
+ struct spf_addr *addr_a, *addr_b;
+
+ addr_a = (struct spf_addr *) a;
+ addr_b = (struct spf_addr *) b;
+
+ if (addr_a->flags == addr_b->flags) {
+ if (addr_a->flags & RSPAMD_SPF_FLAG_ANY) {
+ return 0;
+ }
+ else if (addr_a->flags & RSPAMD_SPF_FLAG_IPV4) {
+ return (addr_a->m.dual.mask_v4 - addr_b->m.dual.mask_v4) ||
+ memcmp(addr_a->addr4, addr_b->addr4, sizeof(addr_a->addr4));
+ }
+ else if (addr_a->flags & RSPAMD_SPF_FLAG_IPV6) {
+ return (addr_a->m.dual.mask_v6 - addr_b->m.dual.mask_v6) ||
+ memcmp(addr_a->addr6, addr_b->addr6, sizeof(addr_a->addr6));
+ }
+ else {
+ return 0;
+ }
+ }
+ else {
+ if (addr_a->flags & RSPAMD_SPF_FLAG_ANY) {
+ return 1;
+ }
+ else if (addr_b->flags & RSPAMD_SPF_FLAG_ANY) {
+ return -1;
+ }
+ else if (addr_a->flags & RSPAMD_SPF_FLAG_IPV4) {
+ return -1;
+ }
+
+ return 1;
+ }
+}
+
+static void
+rspamd_spf_record_postprocess(struct spf_resolved *rec, struct rspamd_task *task)
+{
+ g_array_sort(rec->elts, rspamd_spf_elts_cmp);
+
+ for (guint i = 0; i < rec->elts->len; i++) {
+ struct spf_addr *cur_addr = &g_array_index(rec->elts, struct spf_addr, i);
+
+ if (cur_addr->flags & RSPAMD_SPF_FLAG_IPV6) {
+ guint64 t[3];
+
+ /*
+ * Fill hash entry for ipv6 addr with 2 int64 from ipv6 address,
+ * the remaining int64 has mech + mask
+ */
+ memcpy(t, cur_addr->addr6, sizeof(guint64) * 2);
+ t[2] = ((guint64) (cur_addr->mech)) << 48u;
+ t[2] |= cur_addr->m.dual.mask_v6;
+
+ for (guint j = 0; j < G_N_ELEMENTS(t); j++) {
+ rec->digest = mum_hash_step(rec->digest, t[j]);
+ }
+ }
+ else if (cur_addr->flags & RSPAMD_SPF_FLAG_IPV4) {
+ guint64 t = 0;
+
+ memcpy(&t, cur_addr->addr4, sizeof(guint32));
+ t |= ((guint64) (cur_addr->mech)) << 48u;
+ t |= ((guint64) cur_addr->m.dual.mask_v4) << 32u;
+
+ rec->digest = mum_hash_step(rec->digest, t);
+ }
+ }
+
+ if (spf_lib_ctx->min_cache_ttl > 0) {
+ if (rec->ttl != 0 && rec->ttl < spf_lib_ctx->min_cache_ttl) {
+ msg_info_task("increasing ttl from %d to %d as it lower than a limit",
+ rec->ttl, spf_lib_ctx->min_cache_ttl);
+ rec->ttl = spf_lib_ctx->min_cache_ttl;
+ }
+ }
+}
+
+static void
+rspamd_spf_maybe_return(struct spf_record *rec)
+{
+ struct spf_resolved *flat;
+ struct rspamd_task *task = rec->task;
+ bool cached = false;
+
+ if (rec->requests_inflight == 0 && !rec->done) {
+ flat = rspamd_spf_record_flatten(rec);
+ rspamd_spf_record_postprocess(flat, rec->task);
+
+ if (flat->ttl > 0 && flat->flags == 0) {
+
+ if (spf_lib_ctx->spf_hash) {
+ rspamd_lru_hash_insert(spf_lib_ctx->spf_hash,
+ g_strdup(flat->domain),
+ spf_record_ref(flat),
+ flat->timestamp, flat->ttl);
+
+ msg_info_task("stored SPF record for %s (0x%xuL) in LRU cache for %d seconds, "
+ "%d/%d elements in the cache",
+ flat->domain,
+ flat->digest,
+ flat->ttl,
+ rspamd_lru_hash_size(spf_lib_ctx->spf_hash),
+ rspamd_lru_hash_capacity(spf_lib_ctx->spf_hash));
+ cached = true;
+ }
+ }
+
+ if (!cached) {
+ /* Still write a log line */
+ msg_info_task("not stored SPF record for %s (0x%xuL) in LRU cache; flags=%d; ttl=%d",
+ flat->domain,
+ flat->digest,
+ flat->flags,
+ flat->ttl);
+ }
+
+ rec->callback(flat, rec->task, rec->cbdata);
+ spf_record_unref(flat);
+ rec->done = TRUE;
+ }
+}
+
+static gboolean
+spf_check_ptr_host(struct spf_dns_cb *cb, const char *name)
+{
+ const char *dend, *nend, *dstart, *nstart;
+ struct spf_record *rec = cb->rec;
+
+ if (cb->ptr_host != NULL) {
+ dstart = cb->ptr_host;
+ }
+ else {
+ dstart = cb->resolved->cur_domain;
+ }
+
+ if (name == NULL || dstart == NULL) {
+ return FALSE;
+ }
+
+ msg_debug_spf("check ptr %s vs %s", name, dstart);
+
+ /* We need to check whether `cur_domain` is a subdomain for `name` */
+ dend = dstart + strlen(dstart) - 1;
+ nstart = name;
+ nend = nstart + strlen(nstart) - 1;
+
+ if (nend <= nstart || dend <= dstart) {
+ return FALSE;
+ }
+ /* Strip last '.' from names */
+ if (*nend == '.') {
+ nend--;
+ }
+ if (*dend == '.') {
+ dend--;
+ }
+ if (nend <= nstart || dend <= dstart) {
+ return FALSE;
+ }
+
+ /* Now compare from end to start */
+ for (;;) {
+ if (g_ascii_tolower(*dend) != g_ascii_tolower(*nend)) {
+ msg_debug_spf("ptr records mismatch: %s and %s", dend, nend);
+ return FALSE;
+ }
+
+ if (dend == dstart) {
+ break;
+ }
+ if (nend == nstart) {
+ /* Name is shorter than cur_domain */
+ return FALSE;
+ }
+ nend--;
+ dend--;
+ }
+
+ if (nend > nstart && *(nend - 1) != '.') {
+ /* Not a subdomain */
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static void
+spf_record_process_addr(struct spf_record *rec, struct spf_addr *addr, struct rdns_reply_entry *reply)
+{
+ struct spf_addr *naddr;
+
+ if (!(addr->flags & RSPAMD_SPF_FLAG_PROCESSED)) {
+ /* That's the first address */
+ if (reply->type == RDNS_REQUEST_AAAA) {
+ memcpy(addr->addr6,
+ &reply->content.aaa.addr,
+ sizeof(addr->addr6));
+ addr->flags |= RSPAMD_SPF_FLAG_IPV6;
+ }
+ else if (reply->type == RDNS_REQUEST_A) {
+ memcpy(addr->addr4, &reply->content.a.addr, sizeof(addr->addr4));
+ addr->flags |= RSPAMD_SPF_FLAG_IPV4;
+ }
+ else {
+ msg_err_spf(
+ "internal error, bad DNS reply is treated as address: %s; domain: %s",
+ rdns_strtype(reply->type),
+ rec->sender_domain);
+ }
+
+ addr->flags |= RSPAMD_SPF_FLAG_PROCESSED;
+ }
+ else {
+ /* We need to create a new address */
+ naddr = g_malloc0(sizeof(*naddr));
+ memcpy(naddr, addr, sizeof(*naddr));
+ naddr->next = NULL;
+ naddr->prev = NULL;
+
+ if (reply->type == RDNS_REQUEST_AAAA) {
+ memcpy(naddr->addr6,
+ &reply->content.aaa.addr,
+ sizeof(addr->addr6));
+ naddr->flags |= RSPAMD_SPF_FLAG_IPV6;
+ }
+ else if (reply->type == RDNS_REQUEST_A) {
+ memcpy(naddr->addr4, &reply->content.a.addr, sizeof(addr->addr4));
+ naddr->flags |= RSPAMD_SPF_FLAG_IPV4;
+ }
+ else {
+ msg_err_spf(
+ "internal error, bad DNS reply is treated as address: %s; domain: %s",
+ rdns_strtype(reply->type),
+ rec->sender_domain);
+ }
+
+ DL_APPEND(addr, naddr);
+ }
+}
+
+static void
+spf_record_addr_set(struct spf_addr *addr, gboolean allow_any)
+{
+ guchar fill;
+
+ if (!(addr->flags & RSPAMD_SPF_FLAG_PROCESSED)) {
+ if (allow_any) {
+ fill = 0;
+ addr->m.dual.mask_v4 = 0;
+ addr->m.dual.mask_v6 = 0;
+ }
+ else {
+ fill = 0xff;
+ }
+
+ memset(addr->addr4, fill, sizeof(addr->addr4));
+ memset(addr->addr6, fill, sizeof(addr->addr6));
+
+
+ addr->flags |= RSPAMD_SPF_FLAG_IPV4;
+ addr->flags |= RSPAMD_SPF_FLAG_IPV6;
+ }
+}
+
+static gboolean
+spf_process_txt_record(struct spf_record *rec, struct spf_resolved_element *resolved,
+ struct rdns_reply *reply, struct rdns_reply_entry **pselected)
+{
+ struct rdns_reply_entry *elt, *selected = NULL;
+ gboolean ret = FALSE;
+
+ /*
+ * We prefer spf version 1 as other records are mostly likely garbage
+ * or incorrect records (e.g. spf2 records)
+ */
+ LL_FOREACH(reply->entries, elt)
+ {
+ if (elt->type == RDNS_REQUEST_TXT) {
+ if (strncmp(elt->content.txt.data, "v=spf1", sizeof("v=spf1") - 1) == 0) {
+ selected = elt;
+
+ if (pselected != NULL) {
+ *pselected = selected;
+ }
+
+ break;
+ }
+ }
+ }
+
+ if (!selected) {
+ LL_FOREACH(reply->entries, elt)
+ {
+ /*
+ * Rubbish spf record? Let's still try to process it, but merely for
+ * TXT RRs
+ */
+ if (elt->type == RDNS_REQUEST_TXT) {
+ if (start_spf_parse(rec, resolved, elt->content.txt.data)) {
+ ret = TRUE;
+ if (pselected != NULL) {
+ *pselected = elt;
+ }
+ break;
+ }
+ }
+ }
+ }
+ else {
+ ret = start_spf_parse(rec, resolved, selected->content.txt.data);
+ }
+
+ return ret;
+}
+
+static void
+spf_record_dns_callback(struct rdns_reply *reply, gpointer arg)
+{
+ struct spf_dns_cb *cb = arg;
+ struct rdns_reply_entry *elt_data;
+ struct rspamd_task *task;
+ struct spf_addr *addr;
+ struct spf_record *rec;
+ const struct rdns_request_name *req_name;
+ bool truncated = false;
+
+ rec = cb->rec;
+ task = rec->task;
+
+ cb->rec->requests_inflight--;
+ addr = cb->addr;
+ req_name = rdns_request_get_name(reply->request, NULL);
+
+ if (reply->flags & RDNS_TRUNCATED) {
+ /* Do not process truncated DNS replies */
+ truncated = true;
+
+ if (req_name) {
+ msg_notice_spf("got a truncated record when trying to resolve %s (%s type) for SPF domain %s",
+ req_name->name, rdns_str_from_type(req_name->type),
+ rec->sender_domain);
+ }
+ else {
+ msg_notice_spf("got a truncated record when trying to resolve ??? "
+ "(internal error) for SPF domain %s",
+ rec->sender_domain);
+ }
+ }
+
+ if (reply->code == RDNS_RC_NOERROR && !truncated) {
+
+ LL_FOREACH(reply->entries, elt_data)
+ {
+ /* Adjust ttl if a resolved record has lower ttl than spf record itself */
+ if ((guint) elt_data->ttl < rec->ttl) {
+ msg_debug_spf("reducing ttl from %d to %d after DNS resolving",
+ rec->ttl, elt_data->ttl);
+ rec->ttl = elt_data->ttl;
+ }
+
+ if (elt_data->type == RDNS_REQUEST_CNAME) {
+ /* Skip cname aliases - it must be handled by a recursor */
+ continue;
+ }
+
+ switch (cb->cur_action) {
+ case SPF_RESOLVE_MX:
+ if (elt_data->type == RDNS_REQUEST_MX) {
+ /* Now resolve A record for this MX */
+ msg_debug_spf("resolve %s after resolving of MX",
+ elt_data->content.mx.name);
+ if (rspamd_dns_resolver_request_task_forced(task,
+ spf_record_dns_callback, (void *) cb,
+ RDNS_REQUEST_A,
+ elt_data->content.mx.name)) {
+ cb->rec->requests_inflight++;
+ }
+
+ if (!spf_lib_ctx->disable_ipv6) {
+ if (rspamd_dns_resolver_request_task_forced(task,
+ spf_record_dns_callback, (void *) cb,
+ RDNS_REQUEST_AAAA,
+ elt_data->content.mx.name)) {
+ cb->rec->requests_inflight++;
+ }
+ }
+ else {
+ msg_debug_spf("skip AAAA request for MX resolution");
+ }
+ }
+ else {
+ cb->addr->flags |= RSPAMD_SPF_FLAG_RESOLVED;
+ cb->addr->flags &= ~RSPAMD_SPF_FLAG_PERMFAIL;
+ msg_debug_spf("resolved MX addr");
+ spf_record_process_addr(rec, addr, elt_data);
+ }
+ break;
+ case SPF_RESOLVE_A:
+ case SPF_RESOLVE_AAA:
+ cb->addr->flags |= RSPAMD_SPF_FLAG_RESOLVED;
+ cb->addr->flags &= ~RSPAMD_SPF_FLAG_PERMFAIL;
+ spf_record_process_addr(rec, addr, elt_data);
+ break;
+ case SPF_RESOLVE_PTR:
+ if (elt_data->type == RDNS_REQUEST_PTR) {
+ /* Validate returned records prior to making A requests */
+ if (spf_check_ptr_host(cb,
+ elt_data->content.ptr.name)) {
+ msg_debug_spf("resolve PTR %s after resolving of PTR",
+ elt_data->content.ptr.name);
+ if (rspamd_dns_resolver_request_task_forced(task,
+ spf_record_dns_callback, (void *) cb,
+ RDNS_REQUEST_A,
+ elt_data->content.ptr.name)) {
+ cb->rec->requests_inflight++;
+ }
+
+ if (!spf_lib_ctx->disable_ipv6) {
+ if (rspamd_dns_resolver_request_task_forced(task,
+ spf_record_dns_callback, (void *) cb,
+ RDNS_REQUEST_AAAA,
+ elt_data->content.ptr.name)) {
+ cb->rec->requests_inflight++;
+ }
+ }
+ else {
+ msg_debug_spf("skip AAAA request for PTR resolution");
+ }
+ }
+ else {
+ cb->addr->flags |= RSPAMD_SPF_FLAG_RESOLVED;
+ cb->addr->flags &= ~RSPAMD_SPF_FLAG_PERMFAIL;
+ }
+ }
+ else {
+ cb->addr->flags |= RSPAMD_SPF_FLAG_RESOLVED;
+ cb->addr->flags &= ~RSPAMD_SPF_FLAG_PERMFAIL;
+ spf_record_process_addr(rec, addr, elt_data);
+ }
+ break;
+ case SPF_RESOLVE_REDIRECT:
+ if (elt_data->type == RDNS_REQUEST_TXT) {
+ cb->addr->flags |= RSPAMD_SPF_FLAG_RESOLVED;
+ if (reply->entries) {
+ msg_debug_spf("got redirection record for %s: '%s'",
+ req_name->name,
+ reply->entries[0].content.txt.data);
+ }
+
+ if (!spf_process_txt_record(rec, cb->resolved, reply, NULL)) {
+ cb->addr->flags |= RSPAMD_SPF_FLAG_PERMFAIL;
+ }
+ }
+
+ goto end;
+ break;
+ case SPF_RESOLVE_INCLUDE:
+ if (elt_data->type == RDNS_REQUEST_TXT) {
+ struct rdns_reply_entry *selected = NULL;
+
+ cb->addr->flags |= RSPAMD_SPF_FLAG_RESOLVED;
+ spf_process_txt_record(rec, cb->resolved, reply, &selected);
+ if (selected) {
+ msg_debug_spf("got include record for %s: '%s'",
+ req_name->name,
+ selected->content.txt.data);
+ }
+ else {
+ msg_debug_spf("no include record for %s",
+ req_name->name);
+ }
+ }
+ goto end;
+
+ break;
+ case SPF_RESOLVE_EXP:
+ break;
+ case SPF_RESOLVE_EXISTS:
+ if (elt_data->type == RDNS_REQUEST_A ||
+ elt_data->type == RDNS_REQUEST_AAAA) {
+ /*
+ * If specified address resolves, we can accept
+ * connection from every IP
+ */
+ addr->flags |= RSPAMD_SPF_FLAG_RESOLVED;
+ spf_record_addr_set(addr, TRUE);
+ }
+ break;
+ }
+ }
+ }
+ else if (reply->code == RDNS_RC_NXDOMAIN || reply->code == RDNS_RC_NOREC) {
+ switch (cb->cur_action) {
+ case SPF_RESOLVE_MX:
+ if (!(cb->addr->flags & RSPAMD_SPF_FLAG_RESOLVED)) {
+ cb->addr->flags |= RSPAMD_SPF_FLAG_PERMFAIL;
+ msg_info_spf(
+ "spf error for domain %s: cannot find MX"
+ " record for %s: %s",
+ cb->rec->sender_domain,
+ cb->resolved->cur_domain,
+ rdns_strerror(reply->code));
+ spf_record_addr_set(addr, FALSE);
+ }
+ break;
+ case SPF_RESOLVE_A:
+ if (!(cb->addr->flags & RSPAMD_SPF_FLAG_RESOLVED)) {
+ cb->addr->flags |= RSPAMD_SPF_FLAG_PERMFAIL;
+ msg_info_spf(
+ "spf error for domain %s: cannot resolve A"
+ " record for %s: %s",
+ cb->rec->sender_domain,
+ cb->resolved->cur_domain,
+ rdns_strerror(reply->code));
+
+ if (rdns_request_has_type(reply->request, RDNS_REQUEST_A)) {
+ spf_record_addr_set(addr, FALSE);
+ }
+ }
+ break;
+ case SPF_RESOLVE_AAA:
+ if (!(cb->addr->flags & RSPAMD_SPF_FLAG_RESOLVED)) {
+ cb->addr->flags |= RSPAMD_SPF_FLAG_PERMFAIL;
+ msg_info_spf(
+ "spf error for domain %s: cannot resolve AAAA"
+ " record for %s: %s",
+ cb->rec->sender_domain,
+ cb->resolved->cur_domain,
+ rdns_strerror(reply->code));
+ if (rdns_request_has_type(reply->request, RDNS_REQUEST_AAAA)) {
+ spf_record_addr_set(addr, FALSE);
+ }
+ }
+ break;
+ case SPF_RESOLVE_PTR:
+ if (!(cb->addr->flags & RSPAMD_SPF_FLAG_RESOLVED)) {
+ msg_info_spf(
+ "spf error for domain %s: cannot resolve PTR"
+ " record for %s: %s",
+ cb->rec->sender_domain,
+ cb->resolved->cur_domain,
+ rdns_strerror(reply->code));
+ cb->addr->flags |= RSPAMD_SPF_FLAG_PERMFAIL;
+
+ spf_record_addr_set(addr, FALSE);
+ }
+ break;
+ case SPF_RESOLVE_REDIRECT:
+ if (!(cb->addr->flags & RSPAMD_SPF_FLAG_RESOLVED)) {
+ cb->addr->flags |= RSPAMD_SPF_FLAG_PERMFAIL;
+ msg_info_spf(
+ "spf error for domain %s: cannot resolve REDIRECT"
+ " record for %s: %s",
+ cb->rec->sender_domain,
+ cb->resolved->cur_domain,
+ rdns_strerror(reply->code));
+ }
+
+ break;
+ case SPF_RESOLVE_INCLUDE:
+ if (!(cb->addr->flags & RSPAMD_SPF_FLAG_RESOLVED)) {
+ msg_info_spf(
+ "spf error for domain %s: cannot resolve INCLUDE"
+ " record for %s: %s",
+ cb->rec->sender_domain,
+ cb->resolved->cur_domain,
+ rdns_strerror(reply->code));
+
+ cb->addr->flags |= RSPAMD_SPF_FLAG_PERMFAIL;
+ }
+ break;
+ case SPF_RESOLVE_EXP:
+ break;
+ case SPF_RESOLVE_EXISTS:
+ if (!(cb->addr->flags & RSPAMD_SPF_FLAG_RESOLVED)) {
+ msg_debug_spf(
+ "spf macro resolution for domain %s: cannot resolve EXISTS"
+ " macro for %s: %s",
+ cb->rec->sender_domain,
+ cb->resolved->cur_domain,
+ rdns_strerror(reply->code));
+ spf_record_addr_set(addr, FALSE);
+ }
+ break;
+ }
+ }
+ else {
+ cb->addr->flags |= RSPAMD_SPF_FLAG_TEMPFAIL;
+ msg_info_spf(
+ "spf error for domain %s: cannot resolve %s DNS record for"
+ " %s: %s",
+ cb->rec->sender_domain,
+ rspamd_spf_dns_action_to_str(cb->cur_action),
+ cb->ptr_host,
+ rdns_strerror(reply->code));
+ }
+
+end:
+ rspamd_spf_maybe_return(cb->rec);
+}
+
+/*
+ * The syntax defined by the following BNF:
+ * [ ":" domain-spec ] [ dual-cidr-length ]
+ * ip4-cidr-length = "/" 1*DIGIT
+ * ip6-cidr-length = "/" 1*DIGIT
+ * dual-cidr-length = [ ip4-cidr-length ] [ "/" ip6-cidr-length ]
+ */
+static const gchar *
+parse_spf_domain_mask(struct spf_record *rec, struct spf_addr *addr,
+ struct spf_resolved_element *resolved,
+ gboolean allow_mask)
+{
+ struct rspamd_task *task = rec->task;
+ enum {
+ parse_spf_elt = 0,
+ parse_semicolon,
+ parse_domain,
+ parse_slash,
+ parse_ipv4_mask,
+ parse_second_slash,
+ parse_ipv6_mask,
+ skip_garbage
+ } state = 0;
+ const gchar *p = addr->spf_string, *host, *c;
+ gchar *hostbuf;
+ gchar t;
+ guint16 cur_mask = 0;
+
+ host = resolved->cur_domain;
+ c = p;
+
+ while (*p) {
+ t = *p;
+
+ switch (state) {
+ case parse_spf_elt:
+ if (t == ':' || t == '=') {
+ state = parse_semicolon;
+ }
+ else if (t == '/') {
+ /* No domain but mask */
+ state = parse_slash;
+ }
+ p++;
+ break;
+ case parse_semicolon:
+ if (t == '/') {
+ /* Empty domain, technically an error */
+ state = parse_slash;
+ }
+ else {
+ c = p;
+ state = parse_domain;
+ }
+ break;
+ case parse_domain:
+ if (t == '/') {
+ hostbuf = rspamd_mempool_alloc(task->task_pool, p - c + 1);
+ rspamd_strlcpy(hostbuf, c, p - c + 1);
+ host = hostbuf;
+ state = parse_slash;
+ }
+ p++;
+ break;
+ case parse_slash:
+ c = p;
+ if (allow_mask) {
+ state = parse_ipv4_mask;
+ }
+ else {
+ state = skip_garbage;
+ }
+ cur_mask = 0;
+ break;
+ case parse_ipv4_mask:
+ if (g_ascii_isdigit(t)) {
+ /* Ignore errors here */
+ cur_mask = cur_mask * 10 + (t - '0');
+ }
+ else if (t == '/') {
+ if (cur_mask <= 32) {
+ addr->m.dual.mask_v4 = cur_mask;
+ }
+ else {
+ msg_notice_spf("bad ipv4 mask for %s: %d",
+ rec->sender_domain, cur_mask);
+ }
+ state = parse_second_slash;
+ }
+ p++;
+ break;
+ case parse_second_slash:
+ c = p;
+ state = parse_ipv6_mask;
+ cur_mask = 0;
+ break;
+ case parse_ipv6_mask:
+ if (g_ascii_isdigit(t)) {
+ /* Ignore errors here */
+ cur_mask = cur_mask * 10 + (t - '0');
+ }
+ p++;
+ break;
+ case skip_garbage:
+ p++;
+ break;
+ }
+ }
+
+ /* Process end states */
+ if (state == parse_ipv4_mask) {
+ if (cur_mask <= 32) {
+ addr->m.dual.mask_v4 = cur_mask;
+ }
+ else {
+ msg_notice_spf("bad ipv4 mask for %s: %d", rec->sender_domain, cur_mask);
+ }
+ }
+ else if (state == parse_ipv6_mask) {
+ if (cur_mask <= 128) {
+ addr->m.dual.mask_v6 = cur_mask;
+ }
+ else {
+ msg_notice_spf("bad ipv6 mask: %d", cur_mask);
+ }
+ }
+ else if (state == parse_domain && p - c > 0) {
+ hostbuf = rspamd_mempool_alloc(task->task_pool, p - c + 1);
+ rspamd_strlcpy(hostbuf, c, p - c + 1);
+ host = hostbuf;
+ }
+
+ if (cur_mask == 0) {
+ addr->m.dual.mask_v4 = 32;
+ addr->m.dual.mask_v6 = 64;
+ }
+
+ return host;
+}
+
+static gboolean
+parse_spf_a(struct spf_record *rec,
+ struct spf_resolved_element *resolved, struct spf_addr *addr)
+{
+ struct spf_dns_cb *cb;
+ const gchar *host = NULL;
+ struct rspamd_task *task = rec->task;
+
+ CHECK_REC(rec);
+
+ host = parse_spf_domain_mask(rec, addr, resolved, TRUE);
+
+ if (host == NULL) {
+ return FALSE;
+ }
+
+ rec->dns_requests++;
+ cb = rspamd_mempool_alloc(task->task_pool, sizeof(struct spf_dns_cb));
+ cb->rec = rec;
+ cb->ptr_host = host;
+ cb->addr = addr;
+ cb->cur_action = SPF_RESOLVE_A;
+ cb->resolved = resolved;
+ msg_debug_spf("resolve a %s", host);
+
+ if (rspamd_dns_resolver_request_task_forced(task,
+ spf_record_dns_callback, (void *) cb, RDNS_REQUEST_A, host)) {
+ rec->requests_inflight++;
+
+ cb = rspamd_mempool_alloc(task->task_pool, sizeof(struct spf_dns_cb));
+ cb->rec = rec;
+ cb->ptr_host = host;
+ cb->addr = addr;
+ cb->cur_action = SPF_RESOLVE_AAA;
+ cb->resolved = resolved;
+
+ if (!spf_lib_ctx->disable_ipv6) {
+ if (rspamd_dns_resolver_request_task_forced(task,
+ spf_record_dns_callback, (void *) cb, RDNS_REQUEST_AAAA, host)) {
+ rec->requests_inflight++;
+ }
+ }
+ else {
+ msg_debug_spf("skip AAAA request for a record resolution");
+ }
+
+ return TRUE;
+ }
+ else {
+ msg_notice_spf("unresolvable A element for %s: %s", addr->spf_string,
+ rec->sender_domain);
+ }
+
+ return FALSE;
+}
+
+static gboolean
+parse_spf_ptr(struct spf_record *rec,
+ struct spf_resolved_element *resolved, struct spf_addr *addr)
+{
+ struct spf_dns_cb *cb;
+ const gchar *host;
+ gchar *ptr;
+ struct rspamd_task *task = rec->task;
+
+ CHECK_REC(rec);
+
+ host = parse_spf_domain_mask(rec, addr, resolved, FALSE);
+
+ rec->dns_requests++;
+ cb = rspamd_mempool_alloc(task->task_pool, sizeof(struct spf_dns_cb));
+ cb->rec = rec;
+ cb->addr = addr;
+ cb->cur_action = SPF_RESOLVE_PTR;
+ cb->resolved = resolved;
+ cb->ptr_host = rspamd_mempool_strdup(task->task_pool, host);
+ ptr =
+ rdns_generate_ptr_from_str(rspamd_inet_address_to_string(
+ task->from_addr));
+
+ if (ptr == NULL) {
+ return FALSE;
+ }
+
+ rspamd_mempool_add_destructor(task->task_pool, free, ptr);
+ msg_debug_spf("resolve ptr %s for %s", ptr, host);
+
+ if (rspamd_dns_resolver_request_task_forced(task,
+ spf_record_dns_callback, (void *) cb, RDNS_REQUEST_PTR, ptr)) {
+ rec->requests_inflight++;
+ rec->ttl = 0;
+ msg_debug_spf("disable SPF caching as there is PTR expansion");
+
+ return TRUE;
+ }
+ else {
+ msg_notice_spf("unresolvable PTR element for %s: %s", addr->spf_string,
+ rec->sender_domain);
+ }
+
+ return FALSE;
+}
+
+static gboolean
+parse_spf_mx(struct spf_record *rec,
+ struct spf_resolved_element *resolved, struct spf_addr *addr)
+{
+ struct spf_dns_cb *cb;
+ const gchar *host;
+ struct rspamd_task *task = rec->task;
+
+ CHECK_REC(rec);
+
+ host = parse_spf_domain_mask(rec, addr, resolved, TRUE);
+
+ if (host == NULL) {
+ return FALSE;
+ }
+
+ rec->dns_requests++;
+ cb = rspamd_mempool_alloc(task->task_pool, sizeof(struct spf_dns_cb));
+ cb->rec = rec;
+ cb->addr = addr;
+ cb->cur_action = SPF_RESOLVE_MX;
+ cb->ptr_host = host;
+ cb->resolved = resolved;
+
+ msg_debug_spf("resolve mx for %s", host);
+ if (rspamd_dns_resolver_request_task_forced(task,
+ spf_record_dns_callback, (void *) cb, RDNS_REQUEST_MX, host)) {
+ rec->requests_inflight++;
+
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static gboolean
+parse_spf_all(struct spf_record *rec, struct spf_addr *addr)
+{
+ /* All is 0/0 */
+ memset(&addr->addr4, 0, sizeof(addr->addr4));
+ memset(&addr->addr6, 0, sizeof(addr->addr6));
+ /* Here we set all masks to 0 */
+ addr->m.idx = 0;
+ addr->flags |= RSPAMD_SPF_FLAG_ANY | RSPAMD_SPF_FLAG_RESOLVED;
+ msg_debug_spf("parsed all elt");
+
+ /* Disallow +all */
+ if (addr->mech == SPF_PASS) {
+ addr->flags |= RSPAMD_SPF_FLAG_INVALID;
+ msg_notice_spf("domain %s allows any SPF (+all), ignore SPF record completely",
+ rec->sender_domain);
+ }
+
+ return TRUE;
+}
+
+static gboolean
+parse_spf_ip4(struct spf_record *rec, struct spf_addr *addr)
+{
+ /* ip4:addr[/mask] */
+ const gchar *semicolon, *slash;
+ gsize len;
+ gchar ipbuf[INET_ADDRSTRLEN + 1];
+ guint32 mask;
+ static const guint32 min_valid_mask = 8;
+
+ semicolon = strchr(addr->spf_string, ':');
+
+ if (semicolon == NULL) {
+ semicolon = strchr(addr->spf_string, '=');
+
+ if (semicolon == NULL) {
+ msg_notice_spf("invalid ip4 element for %s: %s, no '=' or ':'", addr->spf_string,
+ rec->sender_domain);
+ return FALSE;
+ }
+ }
+
+ semicolon++;
+ slash = strchr(semicolon, '/');
+
+ if (slash) {
+ len = slash - semicolon;
+ }
+ else {
+ len = strlen(semicolon);
+ }
+
+ rspamd_strlcpy(ipbuf, semicolon, MIN(len + 1, sizeof(ipbuf)));
+
+ if (inet_pton(AF_INET, ipbuf, addr->addr4) != 1) {
+ msg_notice_spf("invalid ip4 element for %s: %s", addr->spf_string,
+ rec->sender_domain);
+ return FALSE;
+ }
+
+ if (slash) {
+ gchar *end = NULL;
+
+ mask = strtoul(slash + 1, &end, 10);
+ if (mask > 32) {
+ msg_notice_spf("invalid mask for ip4 element for %s: %s", addr->spf_string,
+ rec->sender_domain);
+ return FALSE;
+ }
+
+ if (end != NULL && !g_ascii_isspace(*end) && *end != '\0') {
+ /* Invalid mask definition */
+ msg_notice_spf("invalid mask for ip4 element for %s: %s", addr->spf_string,
+ rec->sender_domain);
+ return FALSE;
+ }
+
+ addr->m.dual.mask_v4 = mask;
+
+ if (mask < min_valid_mask) {
+ addr->flags |= RSPAMD_SPF_FLAG_INVALID;
+ msg_notice_spf("too wide SPF record for %s: %s/%d",
+ rec->sender_domain,
+ ipbuf, addr->m.dual.mask_v4);
+ }
+ }
+ else {
+ addr->m.dual.mask_v4 = 32;
+ }
+
+ addr->flags |= RSPAMD_SPF_FLAG_IPV4 | RSPAMD_SPF_FLAG_RESOLVED;
+ msg_debug_spf("parsed ipv4 record %s/%d", ipbuf, addr->m.dual.mask_v4);
+
+ return TRUE;
+}
+
+static gboolean
+parse_spf_ip6(struct spf_record *rec, struct spf_addr *addr)
+{
+ /* ip6:addr[/mask] */
+ const gchar *semicolon, *slash;
+ gsize len;
+ gchar ipbuf[INET6_ADDRSTRLEN + 1];
+ guint32 mask;
+ static const guint32 min_valid_mask = 8;
+
+ semicolon = strchr(addr->spf_string, ':');
+
+ if (semicolon == NULL) {
+ semicolon = strchr(addr->spf_string, '=');
+
+ if (semicolon == NULL) {
+ msg_notice_spf("invalid ip6 element for %s: %s", addr->spf_string,
+ rec->sender_domain);
+ return FALSE;
+ }
+ }
+
+ semicolon++;
+ slash = strchr(semicolon, '/');
+
+ if (slash) {
+ len = slash - semicolon;
+ }
+ else {
+ len = strlen(semicolon);
+ }
+
+ rspamd_strlcpy(ipbuf, semicolon, MIN(len + 1, sizeof(ipbuf)));
+
+ if (inet_pton(AF_INET6, ipbuf, addr->addr6) != 1) {
+ msg_notice_spf("invalid ip6 element for %s: %s", addr->spf_string,
+ rec->sender_domain);
+ return FALSE;
+ }
+
+ if (slash) {
+ gchar *end = NULL;
+ mask = strtoul(slash + 1, &end, 10);
+ if (mask > 128) {
+ msg_notice_spf("invalid mask for ip6 element for %s: %s", addr->spf_string,
+ rec->sender_domain);
+ return FALSE;
+ }
+
+ if (end != NULL && !g_ascii_isspace(*end) && *end != '\0') {
+ /* Invalid mask definition */
+ msg_notice_spf("invalid mask for ip4 element for %s: %s", addr->spf_string,
+ rec->sender_domain);
+ return FALSE;
+ }
+
+ addr->m.dual.mask_v6 = mask;
+
+ if (mask < min_valid_mask) {
+ addr->flags |= RSPAMD_SPF_FLAG_INVALID;
+ msg_notice_spf("too wide SPF record for %s: %s/%d",
+ rec->sender_domain,
+ ipbuf, addr->m.dual.mask_v6);
+ }
+ }
+ else {
+ addr->m.dual.mask_v6 = 128;
+ }
+
+ addr->flags |= RSPAMD_SPF_FLAG_IPV6 | RSPAMD_SPF_FLAG_RESOLVED;
+ msg_debug_spf("parsed ipv6 record %s/%d", ipbuf, addr->m.dual.mask_v6);
+
+ return TRUE;
+}
+
+
+static gboolean
+parse_spf_include(struct spf_record *rec, struct spf_addr *addr)
+{
+ struct spf_dns_cb *cb;
+ const gchar *domain;
+ struct rspamd_task *task = rec->task;
+
+ CHECK_REC(rec);
+ domain = strchr(addr->spf_string, ':');
+
+ if (domain == NULL) {
+ /* Common mistake */
+ domain = strchr(addr->spf_string, '=');
+
+ if (domain == NULL) {
+ msg_notice_spf("invalid include element for %s: %s", addr->spf_string,
+ rec->sender_domain);
+ return FALSE;
+ }
+ }
+
+ domain++;
+
+ rec->dns_requests++;
+
+ cb = rspamd_mempool_alloc(task->task_pool, sizeof(struct spf_dns_cb));
+ cb->rec = rec;
+ cb->addr = addr;
+ cb->cur_action = SPF_RESOLVE_INCLUDE;
+ addr->m.idx = rec->resolved->len;
+ cb->resolved = rspamd_spf_new_addr_list(rec, domain);
+ cb->ptr_host = domain;
+ /* Set reference */
+ addr->flags |= RSPAMD_SPF_FLAG_REFERENCE;
+ msg_debug_spf("resolve include %s", domain);
+
+ if (rspamd_dns_resolver_request_task_forced(task,
+ spf_record_dns_callback, (void *) cb, RDNS_REQUEST_TXT, domain)) {
+ rec->requests_inflight++;
+
+ return TRUE;
+ }
+ else {
+ msg_notice_spf("unresolvable include element for %s: %s", addr->spf_string,
+ rec->sender_domain);
+ }
+
+
+ return FALSE;
+}
+
+static gboolean
+parse_spf_exp(struct spf_record *rec, struct spf_addr *addr)
+{
+ msg_info_spf("exp record is ignored");
+ return TRUE;
+}
+
+static gboolean
+parse_spf_redirect(struct spf_record *rec,
+ struct spf_resolved_element *resolved, struct spf_addr *addr)
+{
+ struct spf_dns_cb *cb;
+ const gchar *domain;
+ struct rspamd_task *task = rec->task;
+
+ CHECK_REC(rec);
+
+ domain = strchr(addr->spf_string, '=');
+
+ if (domain == NULL) {
+ /* Common mistake */
+ domain = strchr(addr->spf_string, ':');
+
+ if (domain == NULL) {
+ msg_notice_spf("invalid redirect element for %s: %s", addr->spf_string,
+ rec->sender_domain);
+ return FALSE;
+ }
+ }
+
+ domain++;
+
+ rec->dns_requests++;
+ resolved->redirected = TRUE;
+
+ cb = rspamd_mempool_alloc(task->task_pool, sizeof(struct spf_dns_cb));
+ /* Set reference */
+ addr->flags |= RSPAMD_SPF_FLAG_REFERENCE | RSPAMD_SPF_FLAG_REDIRECT;
+ addr->m.idx = rec->resolved->len;
+
+ cb->rec = rec;
+ cb->addr = addr;
+ cb->cur_action = SPF_RESOLVE_REDIRECT;
+ cb->resolved = rspamd_spf_new_addr_list(rec, domain);
+ cb->ptr_host = domain;
+ msg_debug_spf("resolve redirect %s", domain);
+
+ if (rspamd_dns_resolver_request_task_forced(task,
+ spf_record_dns_callback, (void *) cb, RDNS_REQUEST_TXT, domain)) {
+ rec->requests_inflight++;
+
+ return TRUE;
+ }
+ else {
+ msg_notice_spf("unresolvable redirect element for %s: %s", addr->spf_string,
+ rec->sender_domain);
+ }
+
+ return FALSE;
+}
+
+static gboolean
+parse_spf_exists(struct spf_record *rec, struct spf_addr *addr)
+{
+ struct spf_dns_cb *cb;
+ const gchar *host;
+ struct rspamd_task *task = rec->task;
+ struct spf_resolved_element *resolved;
+
+ resolved = g_ptr_array_index(rec->resolved, rec->resolved->len - 1);
+ CHECK_REC(rec);
+
+ host = strchr(addr->spf_string, ':');
+ if (host == NULL) {
+ host = strchr(addr->spf_string, '=');
+
+ if (host == NULL) {
+ msg_notice_spf("invalid exists element for %s: %s", addr->spf_string,
+ rec->sender_domain);
+ return FALSE;
+ }
+ }
+
+ host++;
+ rec->dns_requests++;
+
+ cb = rspamd_mempool_alloc(task->task_pool, sizeof(struct spf_dns_cb));
+ cb->rec = rec;
+ cb->addr = addr;
+ cb->cur_action = SPF_RESOLVE_EXISTS;
+ cb->resolved = resolved;
+ cb->ptr_host = host;
+
+ msg_debug_spf("resolve exists %s", host);
+ if (rspamd_dns_resolver_request_task_forced(task,
+ spf_record_dns_callback, (void *) cb, RDNS_REQUEST_A, host)) {
+ rec->requests_inflight++;
+
+ return TRUE;
+ }
+ else {
+ msg_notice_spf("unresolvable exists element for %s: %s", addr->spf_string,
+ rec->sender_domain);
+ }
+
+ return FALSE;
+}
+
+static gsize
+rspamd_spf_split_elt(const gchar *val, gsize len, gint *pos,
+ gsize poslen, gchar delim)
+{
+ const gchar *p, *end;
+ guint cur_pos = 0, cur_st = 0, nsub = 0;
+
+ p = val;
+ end = val + len;
+
+ while (p < end && cur_pos + 2 < poslen) {
+ if (*p == delim) {
+ if (p - val > cur_st) {
+ pos[cur_pos] = cur_st;
+ pos[cur_pos + 1] = p - val;
+ cur_st = p - val + 1;
+ cur_pos += 2;
+ nsub++;
+ }
+
+ p++;
+ }
+ else {
+ p++;
+ }
+ }
+
+ if (cur_pos + 2 < poslen) {
+ if (end - val > cur_st) {
+ pos[cur_pos] = cur_st;
+ pos[cur_pos + 1] = end - val;
+ nsub++;
+ }
+ }
+ else {
+ pos[cur_pos] = p - val;
+ pos[cur_pos + 1] = end - val;
+ nsub++;
+ }
+
+ return nsub;
+}
+
+static gsize
+rspamd_spf_process_substitution(const gchar *macro_value,
+ gsize macro_len, guint ndelim, gchar delim, gboolean reversed,
+ gchar *dest)
+{
+ gchar *d = dest;
+ const gchar canon_delim = '.';
+ guint vlen, i;
+ gint pos[49 * 2], tlen;
+
+ if (!reversed && ndelim == 0 && delim == canon_delim) {
+ /* Trivial case */
+ memcpy(dest, macro_value, macro_len);
+
+ return macro_len;
+ }
+
+ vlen = rspamd_spf_split_elt(macro_value, macro_len,
+ pos, G_N_ELEMENTS(pos), delim);
+
+ if (vlen > 0) {
+ if (reversed) {
+ for (i = vlen - 1;; i--) {
+ tlen = pos[i * 2 + 1] - pos[i * 2];
+
+ if (i != 0) {
+ memcpy(d, &macro_value[pos[i * 2]], tlen);
+ d += tlen;
+ *d++ = canon_delim;
+ }
+ else {
+ memcpy(d, &macro_value[pos[i * 2]], tlen);
+ d += tlen;
+ break;
+ }
+ }
+ }
+ else {
+ for (i = 0; i < vlen; i++) {
+ tlen = pos[i * 2 + 1] - pos[i * 2];
+
+ if (i != vlen - 1) {
+ memcpy(d, &macro_value[pos[i * 2]], tlen);
+ d += tlen;
+ *d++ = canon_delim;
+ }
+ else {
+ memcpy(d, &macro_value[pos[i * 2]], tlen);
+ d += tlen;
+ }
+ }
+ }
+ }
+ else {
+ /* Trivial case */
+ memcpy(dest, macro_value, macro_len);
+
+ return macro_len;
+ }
+
+ return (d - dest);
+}
+
+static const gchar *
+expand_spf_macro(struct spf_record *rec, struct spf_resolved_element *resolved,
+ const gchar *begin)
+{
+ const gchar *p, *macro_value = NULL;
+ gchar *c, *new, *tmp, delim = '.';
+ gsize len = 0, macro_len = 0;
+ gint state = 0, ndelim = 0;
+ gchar ip_buf[64 + 1]; /* cannot use INET6_ADDRSTRLEN as we use ptr lookup */
+ gboolean need_expand = FALSE, reversed;
+ struct rspamd_task *task;
+
+ g_assert(rec != NULL);
+ g_assert(begin != NULL);
+
+ task = rec->task;
+ p = begin;
+ /* Calculate length */
+ while (*p) {
+ switch (state) {
+ case 0:
+ /* Skip any character and wait for % in input */
+ if (*p == '%') {
+ state = 1;
+ }
+ else {
+ len++;
+ }
+
+ p++;
+ break;
+ case 1:
+ /* We got % sign, so we should whether wait for { or for - or for _ or for % */
+ if (*p == '%' || *p == '_') {
+ /* Just a single % sign or space */
+ len++;
+ state = 0;
+ }
+ else if (*p == '-') {
+ /* %20 */
+ len += sizeof("%20") - 1;
+ state = 0;
+ }
+ else if (*p == '{') {
+ state = 2;
+ }
+ else {
+ /* Something unknown */
+ msg_notice_spf(
+ "spf error for domain %s: unknown spf element",
+ rec->sender_domain);
+ return begin;
+ }
+ p++;
+
+ break;
+ case 2:
+ /* Read macro name */
+ switch (g_ascii_tolower(*p)) {
+ case 'i':
+ len += sizeof(ip_buf) - 1;
+ break;
+ case 's':
+ if (rec->sender) {
+ len += strlen(rec->sender);
+ }
+ else {
+ len += sizeof("unknown") - 1;
+ }
+ break;
+ case 'l':
+ if (rec->local_part) {
+ len += strlen(rec->local_part);
+ }
+ else {
+ len += sizeof("unknown") - 1;
+ }
+ break;
+ case 'o':
+ if (rec->sender_domain) {
+ len += strlen(rec->sender_domain);
+ }
+ else {
+ len += sizeof("unknown") - 1;
+ }
+ break;
+ case 'd':
+ if (resolved->cur_domain) {
+ len += strlen(resolved->cur_domain);
+ }
+ else {
+ len += sizeof("unknown") - 1;
+ }
+ break;
+ case 'v':
+ len += sizeof("in-addr") - 1;
+ break;
+ case 'h':
+ if (task->helo) {
+ len += strlen(task->helo);
+ }
+ else {
+ len += sizeof("unknown") - 1;
+ }
+ break;
+ default:
+ msg_notice_spf(
+ "spf error for domain %s: unknown or "
+ "unsupported spf macro %c in %s",
+ rec->sender_domain,
+ *p,
+ begin);
+ return begin;
+ }
+ p++;
+ state = 3;
+ break;
+ case 3:
+ /* Read modifier */
+ if (*p == '}') {
+ state = 0;
+ need_expand = TRUE;
+ }
+ p++;
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+ }
+
+ if (!need_expand) {
+ /* No expansion needed */
+ return begin;
+ }
+
+ new = rspamd_mempool_alloc(task->task_pool, len + 1);
+
+ /* Reduce TTL to avoid caching of records with macros */
+ if (rec->ttl != 0) {
+ rec->ttl = 0;
+ msg_debug_spf("disable SPF caching as there is macro expansion");
+ }
+
+ c = new;
+ p = begin;
+ state = 0;
+ /* Begin macro expansion */
+
+ while (*p) {
+ switch (state) {
+ case 0:
+ /* Skip any character and wait for % in input */
+ if (*p == '%') {
+ state = 1;
+ }
+ else {
+ *c = *p;
+ c++;
+ }
+
+ p++;
+ break;
+ case 1:
+ /* We got % sign, so we should whether wait for { or for - or for _ or for % */
+ if (*p == '%') {
+ /* Just a single % sign or space */
+ *c++ = '%';
+ state = 0;
+ }
+ else if (*p == '_') {
+ *c++ = ' ';
+ state = 0;
+ }
+ else if (*p == '-') {
+ /* %20 */
+ *c++ = '%';
+ *c++ = '2';
+ *c++ = '0';
+ state = 0;
+ }
+ else if (*p == '{') {
+ state = 2;
+ }
+ else {
+ /* Something unknown */
+ msg_info_spf(
+ "spf error for domain %s: unknown spf element",
+ rec->sender_domain);
+ return begin;
+ }
+ p++;
+ break;
+ case 2:
+ /* Read macro name */
+ switch (g_ascii_tolower(*p)) {
+ case 'i':
+ if (task->from_addr) {
+ if (rspamd_inet_address_get_af(task->from_addr) == AF_INET) {
+ macro_len = rspamd_strlcpy(ip_buf,
+ rspamd_inet_address_to_string(task->from_addr),
+ sizeof(ip_buf));
+ macro_value = ip_buf;
+ }
+ else if (rspamd_inet_address_get_af(task->from_addr) == AF_INET6) {
+ /* See #3625 for details */
+ socklen_t slen;
+ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)
+ rspamd_inet_address_get_sa(task->from_addr, &slen);
+
+ /* Expand IPv6 address */
+#define IPV6_OCTET(x) bytes[(x)] >> 4, bytes[(x)] & 0xF
+ unsigned char *bytes = (unsigned char *) &sin6->sin6_addr;
+ macro_len = rspamd_snprintf(ip_buf, sizeof(ip_buf),
+ "%xd.%xd.%xd.%xd.%xd.%xd.%xd.%xd.%xd.%xd.%xd.%xd.%xd.%xd.%xd.%xd."
+ "%xd.%xd.%xd.%xd.%xd.%xd.%xd.%xd.%xd.%xd.%xd.%xd.%xd.%xd.%xd.%xd",
+ IPV6_OCTET(0), IPV6_OCTET(1),
+ IPV6_OCTET(2), IPV6_OCTET(3),
+ IPV6_OCTET(4), IPV6_OCTET(5),
+ IPV6_OCTET(6), IPV6_OCTET(7),
+ IPV6_OCTET(8), IPV6_OCTET(9),
+ IPV6_OCTET(10), IPV6_OCTET(11),
+ IPV6_OCTET(12), IPV6_OCTET(13),
+ IPV6_OCTET(14), IPV6_OCTET(15));
+ macro_value = ip_buf;
+#undef IPV6_OCTET
+ }
+ else {
+ macro_len = rspamd_snprintf(ip_buf, sizeof(ip_buf),
+ "127.0.0.1");
+ macro_value = ip_buf;
+ }
+ }
+ else {
+ macro_len = rspamd_snprintf(ip_buf, sizeof(ip_buf),
+ "127.0.0.1");
+ macro_value = ip_buf;
+ }
+ break;
+ case 's':
+ if (rec->sender) {
+ macro_len = strlen(rec->sender);
+ macro_value = rec->sender;
+ }
+ else {
+ macro_len = sizeof("unknown") - 1;
+ macro_value = "unknown";
+ }
+ break;
+ case 'l':
+ if (rec->local_part) {
+ macro_len = strlen(rec->local_part);
+ macro_value = rec->local_part;
+ }
+ else {
+ macro_len = sizeof("unknown") - 1;
+ macro_value = "unknown";
+ }
+ break;
+ case 'o':
+ if (rec->sender_domain) {
+ macro_len = strlen(rec->sender_domain);
+ macro_value = rec->sender_domain;
+ }
+ else {
+ macro_len = sizeof("unknown") - 1;
+ macro_value = "unknown";
+ }
+ break;
+ case 'd':
+ if (resolved && resolved->cur_domain) {
+ macro_len = strlen(resolved->cur_domain);
+ macro_value = resolved->cur_domain;
+ }
+ else {
+ macro_len = sizeof("unknown") - 1;
+ macro_value = "unknown";
+ }
+ break;
+ case 'v':
+ if (task->from_addr) {
+ if (rspamd_inet_address_get_af(task->from_addr) == AF_INET) {
+ macro_len = sizeof("in-addr") - 1;
+ macro_value = "in-addr";
+ }
+ else {
+ macro_len = sizeof("ip6") - 1;
+ macro_value = "ip6";
+ }
+ }
+ else {
+ macro_len = sizeof("in-addr") - 1;
+ macro_value = "in-addr";
+ }
+ break;
+ case 'h':
+ if (task->helo) {
+ tmp = strchr(task->helo, '@');
+ if (tmp) {
+ macro_len = strlen(tmp + 1);
+ macro_value = tmp + 1;
+ }
+ else {
+ macro_len = strlen(task->helo);
+ macro_value = task->helo;
+ }
+ }
+ else {
+ macro_len = sizeof("unknown") - 1;
+ macro_value = "unknown";
+ }
+ break;
+ default:
+ msg_info_spf(
+ "spf error for domain %s: unknown or "
+ "unsupported spf macro %c in %s",
+ rec->sender_domain,
+ *p,
+ begin);
+ return begin;
+ }
+
+ p++;
+ state = 3;
+ ndelim = 0;
+ delim = '.';
+ reversed = FALSE;
+ break;
+
+ case 3:
+ /* Read modifier */
+ if (*p == '}') {
+ state = 0;
+ len = rspamd_spf_process_substitution(macro_value,
+ macro_len, ndelim, delim, reversed, c);
+ c += len;
+ }
+ else if (*p == 'r' && len != 0) {
+ reversed = TRUE;
+ }
+ else if (g_ascii_isdigit(*p)) {
+ ndelim = strtoul(p, &tmp, 10);
+
+ if (tmp == NULL || tmp == p) {
+ p++;
+ }
+ else {
+ p = tmp;
+
+ continue;
+ }
+ }
+ else if (*p == '+' || *p == '-' ||
+ *p == '.' || *p == ',' || *p == '/' || *p == '_' ||
+ *p == '=') {
+ delim = *p;
+ }
+ else {
+ msg_info_spf("spf error for domain %s: unknown or "
+ "unsupported spf macro %c in %s",
+ rec->sender_domain,
+ *p,
+ begin);
+ return begin;
+ }
+ p++;
+ break;
+ }
+ }
+ /* Null terminate */
+ *c = '\0';
+
+ return new;
+}
+
+/* Read current element and try to parse record */
+static gboolean
+spf_process_element(struct spf_record *rec,
+ struct spf_resolved_element *resolved,
+ const gchar *elt,
+ const gchar **elts)
+{
+ struct spf_addr *addr = NULL;
+ gboolean res = FALSE;
+ const gchar *begin;
+ gchar t;
+
+ g_assert(elt != NULL);
+ g_assert(rec != NULL);
+
+ if (*elt == '\0' || resolved->redirected) {
+ return TRUE;
+ }
+
+ begin = expand_spf_macro(rec, resolved, elt);
+ addr = rspamd_spf_new_addr(rec, resolved, begin);
+ g_assert(addr != NULL);
+ t = g_ascii_tolower(addr->spf_string[0]);
+ begin = addr->spf_string;
+
+ /* Now check what we have */
+ switch (t) {
+ case 'a':
+ /* all or a */
+ if (g_ascii_strncasecmp(begin, SPF_ALL,
+ sizeof(SPF_ALL) - 1) == 0) {
+ res = parse_spf_all(rec, addr);
+ }
+ else if (g_ascii_strncasecmp(begin, SPF_A,
+ sizeof(SPF_A) - 1) == 0) {
+ res = parse_spf_a(rec, resolved, addr);
+ }
+ else {
+ msg_notice_spf("spf error for domain %s: bad spf command %s",
+ rec->sender_domain, begin);
+ }
+ break;
+ case 'i':
+ /* include or ip4 */
+ if (g_ascii_strncasecmp(begin, SPF_IP4, sizeof(SPF_IP4) - 1) == 0) {
+ res = parse_spf_ip4(rec, addr);
+ }
+ else if (g_ascii_strncasecmp(begin, SPF_INCLUDE, sizeof(SPF_INCLUDE) - 1) == 0) {
+ res = parse_spf_include(rec, addr);
+ }
+ else if (g_ascii_strncasecmp(begin, SPF_IP6, sizeof(SPF_IP6) - 1) == 0) {
+ res = parse_spf_ip6(rec, addr);
+ }
+ else if (g_ascii_strncasecmp(begin, SPF_IP4_ALT, sizeof(SPF_IP4_ALT) - 1) == 0) {
+ res = parse_spf_ip4(rec, addr);
+ }
+ else if (g_ascii_strncasecmp(begin, SPF_IP6_ALT, sizeof(SPF_IP6_ALT) - 1) == 0) {
+ res = parse_spf_ip6(rec, addr);
+ }
+ else {
+ msg_notice_spf("spf error for domain %s: bad spf command %s",
+ rec->sender_domain, begin);
+ }
+ break;
+ case 'm':
+ /* mx */
+ if (g_ascii_strncasecmp(begin, SPF_MX, sizeof(SPF_MX) - 1) == 0) {
+ res = parse_spf_mx(rec, resolved, addr);
+ }
+ else {
+ msg_notice_spf("spf error for domain %s: bad spf command %s",
+ rec->sender_domain, begin);
+ }
+ break;
+ case 'p':
+ /* ptr */
+ if (g_ascii_strncasecmp(begin, SPF_PTR,
+ sizeof(SPF_PTR) - 1) == 0) {
+ res = parse_spf_ptr(rec, resolved, addr);
+ }
+ else {
+ msg_notice_spf("spf error for domain %s: bad spf command %s",
+ rec->sender_domain, begin);
+ }
+ break;
+ case 'e':
+ /* exp or exists */
+ if (g_ascii_strncasecmp(begin, SPF_EXP,
+ sizeof(SPF_EXP) - 1) == 0) {
+ res = parse_spf_exp(rec, addr);
+ }
+ else if (g_ascii_strncasecmp(begin, SPF_EXISTS,
+ sizeof(SPF_EXISTS) - 1) == 0) {
+ res = parse_spf_exists(rec, addr);
+ }
+ else {
+ msg_notice_spf("spf error for domain %s: bad spf command %s",
+ rec->sender_domain, begin);
+ }
+ break;
+ case 'r':
+ /* redirect */
+ if (g_ascii_strncasecmp(begin, SPF_REDIRECT,
+ sizeof(SPF_REDIRECT) - 1) == 0) {
+ /*
+ * According to https://tools.ietf.org/html/rfc7208#section-6.1
+ * There must be no ALL element anywhere in the record,
+ * redirect must be ignored
+ */
+ gboolean ignore_redirect = FALSE;
+
+ for (const gchar **tmp = elts; *tmp != NULL; tmp++) {
+ if (g_ascii_strcasecmp((*tmp) + 1, "all") == 0) {
+ ignore_redirect = TRUE;
+ break;
+ }
+ }
+
+ if (!ignore_redirect) {
+ res = parse_spf_redirect(rec, resolved, addr);
+ }
+ else {
+ msg_notice_spf("ignore SPF redirect (%s) for domain %s as there is also all element",
+ begin, rec->sender_domain);
+
+ /* Pop the current addr as it is ignored */
+ g_ptr_array_remove_index_fast(resolved->elts,
+ resolved->elts->len - 1);
+
+ return TRUE;
+ }
+ }
+ else {
+ msg_notice_spf("spf error for domain %s: bad spf command %s",
+ rec->sender_domain, begin);
+ }
+ break;
+ case 'v':
+ if (g_ascii_strncasecmp(begin, "v=spf",
+ sizeof("v=spf") - 1) == 0) {
+ /* Skip this element till the end of record */
+ while (*begin && !g_ascii_isspace(*begin)) {
+ begin++;
+ }
+ }
+ break;
+ default:
+ msg_notice_spf("spf error for domain %s: bad spf command %s",
+ rec->sender_domain, begin);
+ break;
+ }
+
+ if (res) {
+ addr->flags |= RSPAMD_SPF_FLAG_PARSED;
+ }
+
+ return res;
+}
+
+static void
+parse_spf_scopes(struct spf_record *rec, gchar **begin)
+{
+ for (;;) {
+ if (g_ascii_strncasecmp(*begin, SPF_SCOPE_PRA, sizeof(SPF_SCOPE_PRA) - 1) == 0) {
+ *begin += sizeof(SPF_SCOPE_PRA) - 1;
+ /* XXX: Implement actual PRA check */
+ /* extract_pra_info (rec); */
+ continue;
+ }
+ else if (g_ascii_strncasecmp(*begin, SPF_SCOPE_MFROM,
+ sizeof(SPF_SCOPE_MFROM) - 1) == 0) {
+ /* mfrom is standard spf1 check */
+ *begin += sizeof(SPF_SCOPE_MFROM) - 1;
+ continue;
+ }
+ else if (**begin != ',') {
+ break;
+ }
+ (*begin)++;
+ }
+}
+
+static gboolean
+start_spf_parse(struct spf_record *rec, struct spf_resolved_element *resolved,
+ gchar *begin)
+{
+ gchar **elts, **cur_elt;
+ gsize len;
+
+ /* Skip spaces */
+ while (g_ascii_isspace(*begin)) {
+ begin++;
+ }
+
+ len = strlen(begin);
+
+ if (g_ascii_strncasecmp(begin, SPF_VER1_STR, sizeof(SPF_VER1_STR) - 1) ==
+ 0) {
+ begin += sizeof(SPF_VER1_STR) - 1;
+
+ while (g_ascii_isspace(*begin) && *begin) {
+ begin++;
+ }
+ }
+ else if (g_ascii_strncasecmp(begin, SPF_VER2_STR, sizeof(SPF_VER2_STR) - 1) == 0) {
+ /* Skip one number of record, so no we are here spf2.0/ */
+ begin += sizeof(SPF_VER2_STR);
+ if (*begin != '/') {
+ msg_notice_spf("spf error for domain %s: sender id is invalid",
+ rec->sender_domain);
+ }
+ else {
+ begin++;
+ parse_spf_scopes(rec, &begin);
+ }
+ /* Now common spf record */
+ }
+ else {
+ msg_debug_spf(
+ "spf error for domain %s: bad spf record start: %*s",
+ rec->sender_domain,
+ (gint) len,
+ begin);
+
+ return FALSE;
+ }
+
+ while (g_ascii_isspace(*begin) && *begin) {
+ begin++;
+ }
+
+ elts = g_strsplit_set(begin, " ", 0);
+
+ if (elts) {
+ cur_elt = elts;
+
+ while (*cur_elt) {
+ spf_process_element(rec, resolved, *cur_elt, (const gchar **) elts);
+ cur_elt++;
+ }
+
+ g_strfreev(elts);
+ }
+
+ rspamd_spf_maybe_return(rec);
+
+ return TRUE;
+}
+
+static void
+spf_dns_callback(struct rdns_reply *reply, gpointer arg)
+{
+ struct spf_record *rec = arg;
+ struct spf_resolved_element *resolved = NULL;
+ struct spf_addr *addr;
+
+ rec->requests_inflight--;
+
+ if (reply->flags & RDNS_TRUNCATED) {
+ msg_warn_spf("got a truncated record when trying to resolve TXT record for %s",
+ rec->sender_domain);
+ resolved = rspamd_spf_new_addr_list(rec, rec->sender_domain);
+ addr = g_malloc0(sizeof(*addr));
+ addr->flags |= RSPAMD_SPF_FLAG_TEMPFAIL;
+ g_ptr_array_insert(resolved->elts, 0, addr);
+
+ rspamd_spf_maybe_return(rec);
+
+ return;
+ }
+ else {
+ if (reply->code == RDNS_RC_NOERROR) {
+ resolved = rspamd_spf_new_addr_list(rec, rec->sender_domain);
+ if (rec->resolved->len == 1) {
+ /* Top level resolved element */
+ rec->ttl = reply->entries->ttl;
+ }
+ }
+ else if ((reply->code == RDNS_RC_NOREC || reply->code == RDNS_RC_NXDOMAIN) && rec->dns_requests == 0) {
+ resolved = rspamd_spf_new_addr_list(rec, rec->sender_domain);
+ addr = g_malloc0(sizeof(*addr));
+ addr->flags |= RSPAMD_SPF_FLAG_NA;
+ g_ptr_array_insert(resolved->elts, 0, addr);
+ }
+ else if (reply->code != RDNS_RC_NOREC && reply->code != RDNS_RC_NXDOMAIN && rec->dns_requests == 0) {
+ resolved = rspamd_spf_new_addr_list(rec, rec->sender_domain);
+ addr = g_malloc0(sizeof(*addr));
+ addr->flags |= RSPAMD_SPF_FLAG_TEMPFAIL;
+ g_ptr_array_insert(resolved->elts, 0, addr);
+ }
+ }
+
+ if (resolved) {
+ struct rdns_reply_entry *selected = NULL;
+
+ if (!spf_process_txt_record(rec, resolved, reply, &selected)) {
+ resolved = g_ptr_array_index(rec->resolved, 0);
+
+ if (rec->resolved->len > 1) {
+ addr = g_ptr_array_index(resolved->elts, 0);
+ if ((reply->code == RDNS_RC_NOREC || reply->code == RDNS_RC_NXDOMAIN) && (addr->flags & RSPAMD_SPF_FLAG_REDIRECT)) {
+ addr->flags |= RSPAMD_SPF_FLAG_PERMFAIL;
+ }
+ else {
+ addr->flags |= RSPAMD_SPF_FLAG_TEMPFAIL;
+ }
+ }
+ else {
+ addr = g_malloc0(sizeof(*addr));
+
+ if (reply->code == RDNS_RC_NOREC || reply->code == RDNS_RC_NXDOMAIN || reply->code == RDNS_RC_NOERROR) {
+ addr->flags |= RSPAMD_SPF_FLAG_NA;
+ }
+ else {
+ addr->flags |= RSPAMD_SPF_FLAG_TEMPFAIL;
+ }
+ g_ptr_array_insert(resolved->elts, 0, addr);
+ }
+ }
+ else {
+ rec->top_record = rspamd_mempool_strdup(rec->task->task_pool,
+ selected->content.txt.data);
+ rspamd_mempool_set_variable(rec->task->task_pool,
+ RSPAMD_MEMPOOL_SPF_RECORD,
+ (gpointer) rec->top_record, NULL);
+ }
+ }
+
+ rspamd_spf_maybe_return(rec);
+}
+
+static struct rspamd_spf_cred *
+rspamd_spf_cache_domain(struct rspamd_task *task)
+{
+ struct rspamd_email_address *addr;
+ struct rspamd_spf_cred *cred = NULL;
+
+ addr = rspamd_task_get_sender(task);
+ if (!addr || (addr->flags & RSPAMD_EMAIL_ADDR_EMPTY)) {
+ /* Get domain from helo */
+
+ if (task->helo) {
+ GString *fs = g_string_new("");
+
+ cred = rspamd_mempool_alloc(task->task_pool, sizeof(*cred));
+ cred->domain = task->helo;
+ cred->local_part = "postmaster";
+ rspamd_printf_gstring(fs, "postmaster@%s", cred->domain);
+ cred->sender = fs->str;
+ rspamd_mempool_add_destructor(task->task_pool,
+ rspamd_gstring_free_hard, fs);
+ }
+ }
+ else {
+ rspamd_ftok_t tok;
+
+ cred = rspamd_mempool_alloc(task->task_pool, sizeof(*cred));
+ tok.begin = addr->domain;
+ tok.len = addr->domain_len;
+ cred->domain = rspamd_mempool_ftokdup(task->task_pool, &tok);
+ tok.begin = addr->user;
+ tok.len = addr->user_len;
+ cred->local_part = rspamd_mempool_ftokdup(task->task_pool, &tok);
+ tok.begin = addr->addr;
+ tok.len = addr->addr_len;
+ cred->sender = rspamd_mempool_ftokdup(task->task_pool, &tok);
+ }
+
+ if (cred) {
+ rspamd_mempool_set_variable(task->task_pool, RSPAMD_MEMPOOL_SPF_DOMAIN,
+ cred, NULL);
+ }
+
+ return cred;
+}
+
+struct rspamd_spf_cred *
+rspamd_spf_get_cred(struct rspamd_task *task)
+{
+ struct rspamd_spf_cred *cred;
+
+ cred = rspamd_mempool_get_variable(task->task_pool,
+ RSPAMD_MEMPOOL_SPF_DOMAIN);
+
+ if (!cred) {
+ cred = rspamd_spf_cache_domain(task);
+ }
+
+ return cred;
+}
+
+const gchar *
+rspamd_spf_get_domain(struct rspamd_task *task)
+{
+ gchar *domain = NULL;
+ struct rspamd_spf_cred *cred;
+
+ cred = rspamd_spf_get_cred(task);
+
+ if (cred) {
+ domain = cred->domain;
+ }
+
+ return domain;
+}
+
+gboolean
+rspamd_spf_resolve(struct rspamd_task *task, spf_cb_t callback,
+ gpointer cbdata, struct rspamd_spf_cred *cred)
+{
+ struct spf_record *rec;
+
+ if (!cred || !cred->domain) {
+ return FALSE;
+ }
+
+ /* First lookup in the hash */
+ if (spf_lib_ctx->spf_hash) {
+ struct spf_resolved *cached;
+
+ cached = rspamd_lru_hash_lookup(spf_lib_ctx->spf_hash, cred->domain,
+ task->task_timestamp);
+
+ if (cached) {
+ cached->flags |= RSPAMD_SPF_FLAG_CACHED;
+
+ if (cached->top_record) {
+ rspamd_mempool_set_variable(task->task_pool,
+ RSPAMD_MEMPOOL_SPF_RECORD,
+ rspamd_mempool_strdup(task->task_pool,
+ cached->top_record),
+ NULL);
+ }
+ callback(cached, task, cbdata);
+
+ return TRUE;
+ }
+ }
+
+
+ rec = rspamd_mempool_alloc0(task->task_pool, sizeof(struct spf_record));
+ rec->task = task;
+ rec->callback = callback;
+ rec->cbdata = cbdata;
+
+ rec->resolved = g_ptr_array_sized_new(8);
+
+ /* Add destructor */
+ rspamd_mempool_add_destructor(task->task_pool,
+ (rspamd_mempool_destruct_t) spf_record_destructor,
+ rec);
+
+ /* Extract from data */
+ rec->sender = cred->sender;
+ rec->local_part = cred->local_part;
+ rec->sender_domain = cred->domain;
+
+ if (rspamd_dns_resolver_request_task_forced(task,
+ spf_dns_callback,
+ (void *) rec, RDNS_REQUEST_TXT, rec->sender_domain)) {
+ rec->requests_inflight++;
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+struct spf_resolved *
+_spf_record_ref(struct spf_resolved *flat, const gchar *loc)
+{
+ REF_RETAIN(flat);
+ return flat;
+}
+
+void _spf_record_unref(struct spf_resolved *flat, const gchar *loc)
+{
+ REF_RELEASE(flat);
+}
+
+gchar *
+spf_addr_mask_to_string(struct spf_addr *addr)
+{
+ GString *res;
+ gchar *s, ipstr[INET6_ADDRSTRLEN + 1];
+
+ if (addr->flags & RSPAMD_SPF_FLAG_ANY) {
+ res = g_string_new("any");
+ }
+ else if (addr->flags & RSPAMD_SPF_FLAG_IPV4) {
+ (void) inet_ntop(AF_INET, addr->addr4, ipstr, sizeof(ipstr));
+ res = g_string_sized_new(sizeof(ipstr));
+ rspamd_printf_gstring(res, "%s/%d", ipstr, addr->m.dual.mask_v4);
+ }
+ else if (addr->flags & RSPAMD_SPF_FLAG_IPV6) {
+ (void) inet_ntop(AF_INET6, addr->addr6, ipstr, sizeof(ipstr));
+ res = g_string_sized_new(sizeof(ipstr));
+ rspamd_printf_gstring(res, "%s/%d", ipstr, addr->m.dual.mask_v6);
+ }
+ else {
+ res = g_string_new(NULL);
+ rspamd_printf_gstring(res, "unknown, flags = %d", addr->flags);
+ }
+
+ s = res->str;
+ g_string_free(res, FALSE);
+
+
+ return s;
+}
+
+struct spf_addr *
+spf_addr_match_task(struct rspamd_task *task, struct spf_resolved *rec)
+{
+ const guint8 *s, *d;
+ guint af, mask, bmask, addrlen;
+ struct spf_addr *selected = NULL, *addr, *any_addr = NULL;
+ guint i;
+
+ if (task->from_addr == NULL) {
+ return FALSE;
+ }
+
+ for (i = 0; i < rec->elts->len; i++) {
+ addr = &g_array_index(rec->elts, struct spf_addr, i);
+ if (addr->flags & RSPAMD_SPF_FLAG_TEMPFAIL) {
+ continue;
+ }
+
+ af = rspamd_inet_address_get_af(task->from_addr);
+ /* Basic comparing algorithm */
+ if (((addr->flags & RSPAMD_SPF_FLAG_IPV6) && af == AF_INET6) ||
+ ((addr->flags & RSPAMD_SPF_FLAG_IPV4) && af == AF_INET)) {
+ d = rspamd_inet_address_get_hash_key(task->from_addr, &addrlen);
+
+ if (af == AF_INET6) {
+ s = (const guint8 *) addr->addr6;
+ mask = addr->m.dual.mask_v6;
+ }
+ else {
+ s = (const guint8 *) addr->addr4;
+ mask = addr->m.dual.mask_v4;
+ }
+
+ /* Compare the first bytes */
+ bmask = mask / CHAR_BIT;
+ if (mask > addrlen * CHAR_BIT) {
+ msg_info_task("bad mask length: %d", mask);
+ }
+ else if (memcmp(s, d, bmask) == 0) {
+ if (bmask * CHAR_BIT < mask) {
+ /* Compare the remaining bits */
+ s += bmask;
+ d += bmask;
+ mask = (0xffu << (CHAR_BIT - (mask - bmask * 8u))) & 0xffu;
+
+ if ((*s & mask) == (*d & mask)) {
+ selected = addr;
+ break;
+ }
+ }
+ else {
+ selected = addr;
+ break;
+ }
+ }
+ }
+ else {
+ if (addr->flags & RSPAMD_SPF_FLAG_ANY) {
+ any_addr = addr;
+ }
+ }
+ }
+
+ if (selected) {
+ return selected;
+ }
+
+ return any_addr;
+} \ No newline at end of file
diff --git a/src/libserver/spf.h b/src/libserver/spf.h
new file mode 100644
index 0000000..871ed29
--- /dev/null
+++ b/src/libserver/spf.h
@@ -0,0 +1,159 @@
+#ifndef RSPAMD_SPF_H
+#define RSPAMD_SPF_H
+
+#include "config.h"
+#include "ref.h"
+#include "addr.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct rspamd_task;
+struct spf_resolved;
+
+typedef void (*spf_cb_t)(struct spf_resolved *record,
+ struct rspamd_task *task, gpointer cbdata);
+
+typedef enum spf_mech_e {
+ SPF_FAIL,
+ SPF_SOFT_FAIL,
+ SPF_PASS,
+ SPF_NEUTRAL
+} spf_mech_t;
+
+static inline gchar spf_mech_char(spf_mech_t mech)
+{
+ switch (mech) {
+ case SPF_FAIL:
+ return '-';
+ case SPF_SOFT_FAIL:
+ return '~';
+ case SPF_PASS:
+ return '+';
+ case SPF_NEUTRAL:
+ default:
+ return '?';
+ }
+}
+
+typedef enum spf_action_e {
+ SPF_RESOLVE_MX,
+ SPF_RESOLVE_A,
+ SPF_RESOLVE_PTR,
+ SPF_RESOLVE_AAA,
+ SPF_RESOLVE_REDIRECT,
+ SPF_RESOLVE_INCLUDE,
+ SPF_RESOLVE_EXISTS,
+ SPF_RESOLVE_EXP
+} spf_action_t;
+
+#define RSPAMD_SPF_FLAG_IPV6 (1u << 0u)
+#define RSPAMD_SPF_FLAG_IPV4 (1u << 1u)
+#define RSPAMD_SPF_FLAG_PROCESSED (1u << 2u)
+#define RSPAMD_SPF_FLAG_ANY (1u << 3u)
+#define RSPAMD_SPF_FLAG_PARSED (1u << 4u)
+#define RSPAMD_SPF_FLAG_INVALID (1u << 5u)
+#define RSPAMD_SPF_FLAG_REFERENCE (1u << 6u)
+#define RSPAMD_SPF_FLAG_REDIRECT (1u << 7u)
+#define RSPAMD_SPF_FLAG_TEMPFAIL (1u << 8u)
+#define RSPAMD_SPF_FLAG_NA (1u << 9u)
+#define RSPAMD_SPF_FLAG_PERMFAIL (1u << 10u)
+#define RSPAMD_SPF_FLAG_RESOLVED (1u << 11u)
+#define RSPAMD_SPF_FLAG_CACHED (1u << 12u)
+
+/** Default SPF limits for avoiding abuse **/
+#define SPF_MAX_NESTING 10
+#define SPF_MAX_DNS_REQUESTS 30
+#define SPF_MIN_CACHE_TTL (60 * 5) /* 5 minutes */
+
+struct spf_addr {
+ guchar addr6[sizeof(struct in6_addr)];
+ guchar addr4[sizeof(struct in_addr)];
+ union {
+ struct {
+ guint16 mask_v4;
+ guint16 mask_v6;
+ } dual;
+ guint32 idx;
+ } m;
+ guint flags;
+ spf_mech_t mech;
+ gchar *spf_string;
+ struct spf_addr *prev, *next;
+};
+
+enum rspamd_spf_resolved_flags {
+ RSPAMD_SPF_RESOLVED_NORMAL = 0,
+ RSPAMD_SPF_RESOLVED_TEMP_FAILED = (1u << 0u),
+ RSPAMD_SPF_RESOLVED_PERM_FAILED = (1u << 1u),
+ RSPAMD_SPF_RESOLVED_NA = (1u << 2u),
+};
+
+struct spf_resolved {
+ gchar *domain;
+ gchar *top_record;
+ guint ttl;
+ gint flags;
+ gdouble timestamp;
+ guint64 digest;
+ GArray *elts; /* Flat list of struct spf_addr */
+ ref_entry_t ref; /* Refcounting */
+};
+
+struct rspamd_spf_cred {
+ gchar *local_part;
+ gchar *domain;
+ gchar *sender;
+};
+
+/*
+ * Resolve spf record for specified task and call a callback after resolution fails/succeed
+ */
+gboolean rspamd_spf_resolve(struct rspamd_task *task,
+ spf_cb_t callback,
+ gpointer cbdata,
+ struct rspamd_spf_cred *cred);
+
+/*
+ * Get a domain for spf for specified task
+ */
+const gchar *rspamd_spf_get_domain(struct rspamd_task *task);
+
+struct rspamd_spf_cred *rspamd_spf_get_cred(struct rspamd_task *task);
+/*
+ * Increase refcount
+ */
+struct spf_resolved *_spf_record_ref(struct spf_resolved *rec, const gchar *loc);
+#define spf_record_ref(rec) \
+ _spf_record_ref((rec), G_STRLOC)
+/*
+ * Decrease refcount
+ */
+void _spf_record_unref(struct spf_resolved *rec, const gchar *loc);
+#define spf_record_unref(rec) \
+ _spf_record_unref((rec), G_STRLOC)
+
+/**
+ * Prints address + mask in a freshly allocated string (must be freed)
+ * @param addr
+ * @return
+ */
+gchar *spf_addr_mask_to_string(struct spf_addr *addr);
+
+/**
+ * Returns spf address that matches the specific task (or nil if not matched)
+ * @param task
+ * @param rec
+ * @return
+ */
+struct spf_addr *spf_addr_match_task(struct rspamd_task *task,
+ struct spf_resolved *rec);
+
+void spf_library_config(const ucl_object_t *obj);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/libserver/ssl_util.c b/src/libserver/ssl_util.c
new file mode 100644
index 0000000..8ee53b0
--- /dev/null
+++ b/src/libserver/ssl_util.c
@@ -0,0 +1,1133 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "config.h"
+#include "libutil/util.h"
+#include "libutil/hash.h"
+#include "libserver/logger.h"
+#include "libserver/cfg_file.h"
+#include "ssl_util.h"
+#include "unix-std.h"
+#include "cryptobox.h"
+#include "contrib/libottery/ottery.h"
+
+#include <openssl/ssl.h>
+#include <openssl/err.h>
+#include <openssl/rand.h>
+#include <openssl/conf.h>
+#include <openssl/evp.h>
+#include <openssl/engine.h>
+#include <openssl/x509v3.h>
+
+enum rspamd_ssl_state {
+ ssl_conn_reset = 0,
+ ssl_conn_init,
+ ssl_conn_connected,
+ ssl_next_read,
+ ssl_next_write,
+ ssl_next_shutdown,
+};
+
+enum rspamd_ssl_shutdown {
+ ssl_shut_default = 0,
+ ssl_shut_unclean,
+};
+
+struct rspamd_ssl_ctx {
+ SSL_CTX *s;
+ rspamd_lru_hash_t *sessions;
+};
+
+struct rspamd_ssl_connection {
+ gint fd;
+ enum rspamd_ssl_state state;
+ enum rspamd_ssl_shutdown shut;
+ gboolean verify_peer;
+ SSL *ssl;
+ struct rspamd_ssl_ctx *ssl_ctx;
+ gchar *hostname;
+ struct rspamd_io_ev *ev;
+ struct rspamd_io_ev *shut_ev;
+ struct ev_loop *event_loop;
+ rspamd_ssl_handler_t handler;
+ rspamd_ssl_error_handler_t err_handler;
+ gpointer handler_data;
+ gchar log_tag[8];
+};
+
+#define msg_debug_ssl(...) rspamd_conditional_debug_fast(NULL, NULL, \
+ rspamd_ssl_log_id, "ssl", conn->log_tag, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+
+static void rspamd_ssl_event_handler(gint fd, short what, gpointer ud);
+
+INIT_LOG_MODULE(ssl)
+
+static GQuark
+rspamd_ssl_quark(void)
+{
+ return g_quark_from_static_string("rspamd-ssl");
+}
+
+#if (OPENSSL_VERSION_NUMBER >= 0x10100000L) && !defined(LIBRESSL_VERSION_NUMBER)
+#ifndef X509_get_notBefore
+#define X509_get_notBefore(x) X509_get0_notBefore(x)
+#endif
+#ifndef X509_get_notAfter
+#define X509_get_notAfter(x) X509_get0_notAfter(x)
+#endif
+#ifndef ASN1_STRING_data
+#define ASN1_STRING_data(x) ASN1_STRING_get0_data(x)
+#endif
+#endif
+
+/* $OpenBSD: tls_verify.c,v 1.14 2015/09/29 10:17:04 deraadt Exp $ */
+/*
+ * Copyright (c) 2014 Jeremie Courreges-Anglas <jca@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+static gboolean
+rspamd_tls_match_name(const char *cert_name, const char *name)
+{
+ const char *cert_domain, *domain, *next_dot;
+
+ if (g_ascii_strcasecmp(cert_name, name) == 0) {
+ return TRUE;
+ }
+
+ /* Wildcard match? */
+ if (cert_name[0] == '*') {
+ /*
+ * Valid wildcards:
+ * - "*.domain.tld"
+ * - "*.sub.domain.tld"
+ * - etc.
+ * Reject "*.tld".
+ * No attempt to prevent the use of eg. "*.co.uk".
+ */
+ cert_domain = &cert_name[1];
+ /* Disallow "*" */
+ if (cert_domain[0] == '\0') {
+ return FALSE;
+ }
+
+ /* Disallow "*foo" */
+ if (cert_domain[0] != '.') {
+ return FALSE;
+ }
+ /* Disallow "*.." */
+ if (cert_domain[1] == '.') {
+ return FALSE;
+ }
+ next_dot = strchr(&cert_domain[1], '.');
+ /* Disallow "*.bar" */
+ if (next_dot == NULL) {
+ return FALSE;
+ }
+ /* Disallow "*.bar.." */
+ if (next_dot[1] == '.') {
+ return FALSE;
+ }
+
+ domain = strchr(name, '.');
+
+ /* No wildcard match against a name with no host part. */
+ if (name[0] == '.') {
+ return FALSE;
+ }
+ /* No wildcard match against a name with no domain part. */
+ if (domain == NULL || strlen(domain) == 1) {
+ return FALSE;
+ }
+
+ if (g_ascii_strcasecmp(cert_domain, domain) == 0) {
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+/* See RFC 5280 section 4.2.1.6 for SubjectAltName details. */
+static gboolean
+rspamd_tls_check_subject_altname(X509 *cert, const char *name)
+{
+ STACK_OF(GENERAL_NAME) *altname_stack = NULL;
+ int addrlen, type;
+ int count, i;
+ union {
+ struct in_addr ip4;
+ struct in6_addr ip6;
+ } addrbuf;
+ gboolean ret = FALSE;
+
+ altname_stack = X509_get_ext_d2i(cert, NID_subject_alt_name, NULL, NULL);
+
+ if (altname_stack == NULL) {
+ return FALSE;
+ }
+
+ if (inet_pton(AF_INET, name, &addrbuf) == 1) {
+ type = GEN_IPADD;
+ addrlen = 4;
+ }
+ else if (inet_pton(AF_INET6, name, &addrbuf) == 1) {
+ type = GEN_IPADD;
+ addrlen = 16;
+ }
+ else {
+ type = GEN_DNS;
+ addrlen = 0;
+ }
+
+ count = sk_GENERAL_NAME_num(altname_stack);
+
+ for (i = 0; i < count; i++) {
+ GENERAL_NAME *altname;
+
+ altname = sk_GENERAL_NAME_value(altname_stack, i);
+
+ if (altname->type != type) {
+ continue;
+ }
+
+ if (type == GEN_DNS) {
+ const char *data;
+ int format, len;
+
+ format = ASN1_STRING_type(altname->d.dNSName);
+
+ if (format == V_ASN1_IA5STRING) {
+ data = (const char *) ASN1_STRING_data(altname->d.dNSName);
+ len = ASN1_STRING_length(altname->d.dNSName);
+
+ if (len < 0 || len != (gint) strlen(data)) {
+ ret = FALSE;
+ break;
+ }
+
+ /*
+ * Per RFC 5280 section 4.2.1.6:
+ * " " is a legal domain name, but that
+ * dNSName must be rejected.
+ */
+ if (strcmp(data, " ") == 0) {
+ ret = FALSE;
+ break;
+ }
+
+ if (rspamd_tls_match_name(data, name)) {
+ ret = TRUE;
+ break;
+ }
+ }
+ }
+ else if (type == GEN_IPADD) {
+ const char *data;
+ int datalen;
+
+ datalen = ASN1_STRING_length(altname->d.iPAddress);
+ data = (const char *) ASN1_STRING_data(altname->d.iPAddress);
+
+ if (datalen < 0) {
+ ret = FALSE;
+ break;
+ }
+
+ /*
+ * Per RFC 5280 section 4.2.1.6:
+ * IPv4 must use 4 octets and IPv6 must use 16 octets.
+ */
+ if (datalen == addrlen && memcmp(data, &addrbuf, addrlen) == 0) {
+ ret = TRUE;
+ break;
+ }
+ }
+ }
+
+ sk_GENERAL_NAME_pop_free(altname_stack, GENERAL_NAME_free);
+ return ret;
+}
+
+static gboolean
+rspamd_tls_check_common_name(X509 *cert, const char *name)
+{
+ X509_NAME *subject_name;
+ char *common_name = NULL;
+ union {
+ struct in_addr ip4;
+ struct in6_addr ip6;
+ } addrbuf;
+ int common_name_len;
+ gboolean ret = FALSE;
+
+ subject_name = X509_get_subject_name(cert);
+ if (subject_name == NULL) {
+ goto out;
+ }
+
+ common_name_len = X509_NAME_get_text_by_NID(subject_name, NID_commonName, NULL, 0);
+
+ if (common_name_len < 0) {
+ goto out;
+ }
+
+ common_name = g_malloc0(common_name_len + 1);
+ X509_NAME_get_text_by_NID(subject_name, NID_commonName, common_name,
+ common_name_len + 1);
+
+ /* NUL bytes in CN? */
+ if (common_name_len != (gint) strlen(common_name)) {
+ goto out;
+ }
+
+ if (inet_pton(AF_INET, name, &addrbuf) == 1 || inet_pton(AF_INET6, name, &addrbuf) == 1) {
+ /*
+ * We don't want to attempt wildcard matching against IP
+ * addresses, so perform a simple comparison here.
+ */
+ if (strcmp(common_name, name) == 0) {
+ ret = TRUE;
+ }
+ else {
+ ret = FALSE;
+ }
+
+ goto out;
+ }
+
+ if (rspamd_tls_match_name(common_name, name)) {
+ ret = TRUE;
+ }
+
+out:
+ g_free(common_name);
+
+ return ret;
+}
+
+static gboolean
+rspamd_tls_check_name(X509 *cert, const char *name)
+{
+ gboolean ret;
+
+ ret = rspamd_tls_check_subject_altname(cert, name);
+ if (ret) {
+ return ret;
+ }
+
+ return rspamd_tls_check_common_name(cert, name);
+}
+
+static gboolean
+rspamd_ssl_peer_verify(struct rspamd_ssl_connection *c)
+{
+ X509 *server_cert;
+ glong ver_err;
+ GError *err = NULL;
+
+ ver_err = SSL_get_verify_result(c->ssl);
+
+ if (ver_err != X509_V_OK) {
+ g_set_error(&err, rspamd_ssl_quark(), 400, "certificate validation "
+ "failed: %s",
+ X509_verify_cert_error_string(ver_err));
+ c->err_handler(c->handler_data, err);
+ g_error_free(err);
+
+ return FALSE;
+ }
+
+ /* Get server's certificate */
+ server_cert = SSL_get_peer_certificate(c->ssl);
+ if (server_cert == NULL) {
+ g_set_error(&err, rspamd_ssl_quark(), 401, "peer certificate is absent");
+ c->err_handler(c->handler_data, err);
+ g_error_free(err);
+
+ return FALSE;
+ }
+
+ if (c->hostname) {
+ if (!rspamd_tls_check_name(server_cert, c->hostname)) {
+ X509_free(server_cert);
+ g_set_error(&err, rspamd_ssl_quark(), 403, "peer certificate fails "
+ "hostname verification for %s",
+ c->hostname);
+ c->err_handler(c->handler_data, err);
+ g_error_free(err);
+
+ return FALSE;
+ }
+ }
+
+ X509_free(server_cert);
+
+ return TRUE;
+}
+
+static void
+rspamd_tls_set_error(gint retcode, const gchar *stage, GError **err)
+{
+ GString *reason;
+ gchar buf[120];
+ gint err_code = 0;
+
+ reason = g_string_sized_new(sizeof(buf));
+
+ if (retcode == SSL_ERROR_SYSCALL) {
+ rspamd_printf_gstring(reason, "syscall fail: %s", strerror(errno));
+ err_code = 500;
+ }
+ else {
+ while ((err_code = ERR_get_error()) != 0) {
+ ERR_error_string(err_code, buf);
+ rspamd_printf_gstring(reason, "ssl error: %s,", buf);
+ }
+
+ err_code = 400;
+
+ if (reason->len > 0 && reason->str[reason->len - 1] == ',') {
+ reason->str[reason->len - 1] = '\0';
+ reason->len--;
+ }
+ }
+
+ g_set_error(err, rspamd_ssl_quark(), err_code,
+ "ssl %s error: %s", stage, reason->str);
+ g_string_free(reason, TRUE);
+}
+
+static void
+rspamd_ssl_connection_dtor(struct rspamd_ssl_connection *conn)
+{
+ msg_debug_ssl("closing SSL connection %p; %d sessions in the cache",
+ conn->ssl, rspamd_lru_hash_size(conn->ssl_ctx->sessions));
+ SSL_free(conn->ssl);
+
+ if (conn->hostname) {
+ g_free(conn->hostname);
+ }
+
+ /*
+ * Try to workaround for the race between timeout and ssl error
+ */
+ if (conn->shut_ev != conn->ev && ev_can_stop(&conn->ev->tm)) {
+ rspamd_ev_watcher_stop(conn->event_loop, conn->ev);
+ }
+
+ if (conn->shut_ev) {
+ rspamd_ev_watcher_stop(conn->event_loop, conn->shut_ev);
+ g_free(conn->shut_ev);
+ }
+
+ close(conn->fd);
+ g_free(conn);
+}
+
+static void
+rspamd_ssl_shutdown(struct rspamd_ssl_connection *conn)
+{
+ gint ret = 0, nret, retries;
+ static const gint max_retries = 5;
+
+ /*
+ * Fucking openssl...
+ * From the manual, 0 means: "The shutdown is not yet finished.
+ * Call SSL_shutdown() for a second time,
+ * if a bidirectional shutdown shall be performed.
+ * The output of SSL_get_error(3) may be misleading,
+ * as an erroneous SSL_ERROR_SYSCALL may be flagged
+ * even though no error occurred."
+ *
+ * What is `second`, what if `second` also returns 0?
+ * What a retarded behaviour!
+ */
+ for (retries = 0; retries < max_retries; retries++) {
+ ret = SSL_shutdown(conn->ssl);
+
+ if (ret != 0) {
+ break;
+ }
+ }
+
+ if (ret == 1) {
+ /* All done */
+ msg_debug_ssl("ssl shutdown: all done");
+ rspamd_ssl_connection_dtor(conn);
+ }
+ else if (ret < 0) {
+ short what;
+
+ nret = SSL_get_error(conn->ssl, ret);
+ conn->state = ssl_next_shutdown;
+
+ if (nret == SSL_ERROR_WANT_READ) {
+ msg_debug_ssl("ssl shutdown: need read");
+ what = EV_READ;
+ }
+ else if (nret == SSL_ERROR_WANT_WRITE) {
+ msg_debug_ssl("ssl shutdown: need write");
+ what = EV_WRITE;
+ }
+ else {
+ /* Cannot do anything else, fatal error */
+ GError *err = NULL;
+
+ rspamd_tls_set_error(nret, "final shutdown", &err);
+ msg_debug_ssl("ssl shutdown: fatal error: %e; retries=%d; ret=%d",
+ err, retries, ret);
+ g_error_free(err);
+ rspamd_ssl_connection_dtor(conn);
+
+ return;
+ }
+
+ /* As we own fd, we can try to perform shutdown one more time */
+ /* BUGON: but we DO NOT own conn->ev, and it's a big issue */
+ static const ev_tstamp shutdown_time = 5.0;
+
+ if (conn->shut_ev == NULL) {
+ rspamd_ev_watcher_stop(conn->event_loop, conn->ev);
+ conn->shut_ev = g_malloc0(sizeof(*conn->shut_ev));
+ rspamd_ev_watcher_init(conn->shut_ev, conn->fd, what,
+ rspamd_ssl_event_handler, conn);
+ rspamd_ev_watcher_start(conn->event_loop, conn->shut_ev, shutdown_time);
+ /* XXX: can it be done safely ? */
+ conn->ev = conn->shut_ev;
+ }
+ else {
+ rspamd_ev_watcher_reschedule(conn->event_loop, conn->shut_ev, what);
+ }
+
+ conn->state = ssl_next_shutdown;
+ }
+ else if (ret == 0) {
+ /* What can we do here?? */
+ msg_debug_ssl("ssl shutdown: openssl failed to initiate shutdown after "
+ "%d attempts!",
+ max_retries);
+ rspamd_ssl_connection_dtor(conn);
+ }
+}
+
+static void
+rspamd_ssl_event_handler(gint fd, short what, gpointer ud)
+{
+ struct rspamd_ssl_connection *conn = ud;
+ gint ret;
+ GError *err = NULL;
+
+ if (what == EV_TIMER) {
+ if (conn->state == ssl_next_shutdown) {
+ /* No way to restore, just terminate */
+ rspamd_ssl_connection_dtor(conn);
+ }
+ else {
+ conn->shut = ssl_shut_unclean;
+ rspamd_ev_watcher_stop(conn->event_loop, conn->ev);
+ g_set_error(&err, rspamd_ssl_quark(), 408,
+ "ssl connection timed out");
+ conn->err_handler(conn->handler_data, err);
+ g_error_free(err);
+ }
+
+ return;
+ }
+
+ msg_debug_ssl("ssl event; what=%d; c->state=%d", (int) what,
+ (int) conn->state);
+
+ switch (conn->state) {
+ case ssl_conn_init:
+ /* Continue connection */
+ ret = SSL_connect(conn->ssl);
+
+ if (ret == 1) {
+ rspamd_ev_watcher_stop(conn->event_loop, conn->ev);
+ /* Verify certificate */
+ if ((!conn->verify_peer) || rspamd_ssl_peer_verify(conn)) {
+ msg_debug_ssl("ssl connect: connected");
+ conn->state = ssl_conn_connected;
+ conn->handler(fd, EV_WRITE, conn->handler_data);
+ }
+ else {
+ return;
+ }
+ }
+ else {
+ ret = SSL_get_error(conn->ssl, ret);
+
+ if (ret == SSL_ERROR_WANT_READ) {
+ msg_debug_ssl("ssl connect: need read");
+ what = EV_READ;
+ }
+ else if (ret == SSL_ERROR_WANT_WRITE) {
+ msg_debug_ssl("ssl connect: need write");
+ what = EV_WRITE;
+ }
+ else {
+ rspamd_ev_watcher_stop(conn->event_loop, conn->ev);
+ rspamd_tls_set_error(ret, "connect", &err);
+ conn->err_handler(conn->handler_data, err);
+ g_error_free(err);
+ return;
+ }
+
+ rspamd_ev_watcher_reschedule(conn->event_loop, conn->ev, what);
+ }
+ break;
+ case ssl_next_read:
+ rspamd_ev_watcher_reschedule(conn->event_loop, conn->ev, EV_READ);
+ conn->state = ssl_conn_connected;
+ conn->handler(fd, EV_READ, conn->handler_data);
+ break;
+ case ssl_next_write:
+ rspamd_ev_watcher_reschedule(conn->event_loop, conn->ev, EV_WRITE);
+ conn->state = ssl_conn_connected;
+ conn->handler(fd, EV_WRITE, conn->handler_data);
+ break;
+ case ssl_conn_connected:
+ rspamd_ev_watcher_reschedule(conn->event_loop, conn->ev, what);
+ conn->state = ssl_conn_connected;
+ conn->handler(fd, what, conn->handler_data);
+ break;
+ case ssl_next_shutdown:
+ rspamd_ssl_shutdown(conn);
+ break;
+ default:
+ rspamd_ev_watcher_stop(conn->event_loop, conn->ev);
+ g_set_error(&err, rspamd_ssl_quark(), 500,
+ "ssl bad state error: %d", conn->state);
+ conn->err_handler(conn->handler_data, err);
+ g_error_free(err);
+ break;
+ }
+}
+
+struct rspamd_ssl_connection *
+rspamd_ssl_connection_new(gpointer ssl_ctx, struct ev_loop *ev_base,
+ gboolean verify_peer, const gchar *log_tag)
+{
+ struct rspamd_ssl_connection *conn;
+ struct rspamd_ssl_ctx *ctx = (struct rspamd_ssl_ctx *) ssl_ctx;
+
+ g_assert(ssl_ctx != NULL);
+ conn = g_malloc0(sizeof(*conn));
+ conn->ssl_ctx = ctx;
+ conn->event_loop = ev_base;
+ conn->verify_peer = verify_peer;
+
+ if (log_tag) {
+ rspamd_strlcpy(conn->log_tag, log_tag, sizeof(conn->log_tag));
+ }
+ else {
+ rspamd_random_hex(conn->log_tag, sizeof(log_tag) - 1);
+ conn->log_tag[sizeof(log_tag) - 1] = '\0';
+ }
+
+ return conn;
+}
+
+
+gboolean
+rspamd_ssl_connect_fd(struct rspamd_ssl_connection *conn, gint fd,
+ const gchar *hostname, struct rspamd_io_ev *ev, ev_tstamp timeout,
+ rspamd_ssl_handler_t handler, rspamd_ssl_error_handler_t err_handler,
+ gpointer handler_data)
+{
+ gint ret;
+ SSL_SESSION *session = NULL;
+
+ g_assert(conn != NULL);
+
+ /* Ensure that we start from the empty SSL errors stack */
+ ERR_clear_error();
+ conn->ssl = SSL_new(conn->ssl_ctx->s);
+
+ if (hostname) {
+ session = rspamd_lru_hash_lookup(conn->ssl_ctx->sessions, hostname,
+ ev_now(conn->event_loop));
+ }
+
+ if (session) {
+ SSL_set_session(conn->ssl, session);
+ }
+
+ SSL_set_app_data(conn->ssl, conn);
+ msg_debug_ssl("new ssl connection %p; session reused=%s",
+ conn->ssl, SSL_session_reused(conn->ssl) ? "true" : "false");
+
+ if (conn->state != ssl_conn_reset) {
+ return FALSE;
+ }
+
+ /* We dup fd to allow graceful closing */
+ gint nfd = dup(fd);
+
+ if (nfd == -1) {
+ return FALSE;
+ }
+
+ conn->fd = nfd;
+ conn->ev = ev;
+ conn->handler = handler;
+ conn->err_handler = err_handler;
+ conn->handler_data = handler_data;
+
+ if (SSL_set_fd(conn->ssl, conn->fd) != 1) {
+ close(conn->fd);
+
+ return FALSE;
+ }
+
+ if (hostname) {
+ conn->hostname = g_strdup(hostname);
+#ifdef HAVE_SSL_TLSEXT_HOSTNAME
+ SSL_set_tlsext_host_name(conn->ssl, conn->hostname);
+#endif
+ }
+
+ conn->state = ssl_conn_init;
+
+ ret = SSL_connect(conn->ssl);
+
+ if (ret == 1) {
+ conn->state = ssl_conn_connected;
+
+ msg_debug_ssl("connected, start write event");
+ rspamd_ev_watcher_stop(conn->event_loop, ev);
+ rspamd_ev_watcher_init(ev, nfd, EV_WRITE, rspamd_ssl_event_handler, conn);
+ rspamd_ev_watcher_start(conn->event_loop, ev, timeout);
+ }
+ else {
+ ret = SSL_get_error(conn->ssl, ret);
+
+ if (ret == SSL_ERROR_WANT_READ) {
+ msg_debug_ssl("not connected, want read");
+ }
+ else if (ret == SSL_ERROR_WANT_WRITE) {
+ msg_debug_ssl("not connected, want write");
+ }
+ else {
+ GError *err = NULL;
+
+ conn->shut = ssl_shut_unclean;
+ rspamd_tls_set_error(ret, "initial connect", &err);
+ msg_debug_ssl("not connected, fatal error %e", err);
+ g_error_free(err);
+
+
+ return FALSE;
+ }
+
+ rspamd_ev_watcher_stop(conn->event_loop, ev);
+ rspamd_ev_watcher_init(ev, nfd, EV_WRITE | EV_READ,
+ rspamd_ssl_event_handler, conn);
+ rspamd_ev_watcher_start(conn->event_loop, ev, timeout);
+ }
+
+ return TRUE;
+}
+
+void rspamd_ssl_connection_restore_handlers(struct rspamd_ssl_connection *conn,
+ rspamd_ssl_handler_t handler,
+ rspamd_ssl_error_handler_t err_handler,
+ gpointer handler_data,
+ short ev_what)
+{
+ conn->handler = handler;
+ conn->err_handler = err_handler;
+ conn->handler_data = handler_data;
+
+ rspamd_ev_watcher_stop(conn->event_loop, conn->ev);
+ rspamd_ev_watcher_init(conn->ev, conn->fd, ev_what, rspamd_ssl_event_handler, conn);
+ rspamd_ev_watcher_start(conn->event_loop, conn->ev, conn->ev->timeout);
+}
+
+gssize
+rspamd_ssl_read(struct rspamd_ssl_connection *conn, gpointer buf,
+ gsize buflen)
+{
+ gint ret;
+ short what;
+ GError *err = NULL;
+
+ g_assert(conn != NULL);
+
+ if (conn->state != ssl_conn_connected && conn->state != ssl_next_read) {
+ errno = EINVAL;
+ g_set_error(&err, rspamd_ssl_quark(), 400,
+ "ssl state error: cannot read data");
+ conn->shut = ssl_shut_unclean;
+ conn->err_handler(conn->handler_data, err);
+ g_error_free(err);
+
+ return -1;
+ }
+
+ ret = SSL_read(conn->ssl, buf, buflen);
+ msg_debug_ssl("ssl read: %d", ret);
+
+ if (ret > 0) {
+ conn->state = ssl_conn_connected;
+ return ret;
+ }
+ else if (ret == 0) {
+ ret = SSL_get_error(conn->ssl, ret);
+
+ if (ret == SSL_ERROR_ZERO_RETURN || ret == SSL_ERROR_SYSCALL) {
+ conn->state = ssl_conn_reset;
+ return 0;
+ }
+ else {
+ conn->shut = ssl_shut_unclean;
+ rspamd_tls_set_error(ret, "read", &err);
+ conn->err_handler(conn->handler_data, err);
+ g_error_free(err);
+ errno = EINVAL;
+
+ return -1;
+ }
+ }
+ else {
+ ret = SSL_get_error(conn->ssl, ret);
+ conn->state = ssl_next_read;
+ what = 0;
+
+ if (ret == SSL_ERROR_WANT_READ) {
+ msg_debug_ssl("ssl read: need read");
+ what |= EV_READ;
+ }
+ else if (ret == SSL_ERROR_WANT_WRITE) {
+ msg_debug_ssl("ssl read: need write");
+ what |= EV_WRITE;
+ }
+ else {
+ conn->shut = ssl_shut_unclean;
+ rspamd_tls_set_error(ret, "read", &err);
+ conn->err_handler(conn->handler_data, err);
+ g_error_free(err);
+ errno = EINVAL;
+
+ return -1;
+ }
+
+ rspamd_ev_watcher_reschedule(conn->event_loop, conn->ev, what);
+ errno = EAGAIN;
+ }
+
+ return -1;
+}
+
+gssize
+rspamd_ssl_write(struct rspamd_ssl_connection *conn, gconstpointer buf,
+ gsize buflen)
+{
+ gint ret;
+ short what;
+ GError *err = NULL;
+
+ g_assert(conn != NULL);
+
+ if (conn->state != ssl_conn_connected && conn->state != ssl_next_write) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ ret = SSL_write(conn->ssl, buf, buflen);
+ msg_debug_ssl("ssl write: ret=%d, buflen=%z", ret, buflen);
+
+ if (ret > 0) {
+ conn->state = ssl_conn_connected;
+ return ret;
+ }
+ else if (ret == 0) {
+ ret = SSL_get_error(conn->ssl, ret);
+
+ if (ret == SSL_ERROR_ZERO_RETURN) {
+ rspamd_tls_set_error(ret, "write", &err);
+ conn->err_handler(conn->handler_data, err);
+ g_error_free(err);
+ errno = ECONNRESET;
+ conn->state = ssl_conn_reset;
+
+ return -1;
+ }
+ else {
+ conn->shut = ssl_shut_unclean;
+ rspamd_tls_set_error(ret, "write", &err);
+ conn->err_handler(conn->handler_data, err);
+ g_error_free(err);
+ errno = EINVAL;
+
+ return -1;
+ }
+ }
+ else {
+ ret = SSL_get_error(conn->ssl, ret);
+ conn->state = ssl_next_write;
+
+ if (ret == SSL_ERROR_WANT_READ) {
+ msg_debug_ssl("ssl write: need read");
+ what = EV_READ;
+ }
+ else if (ret == SSL_ERROR_WANT_WRITE) {
+ msg_debug_ssl("ssl write: need write");
+ what = EV_WRITE;
+ }
+ else {
+ conn->shut = ssl_shut_unclean;
+ rspamd_tls_set_error(ret, "write", &err);
+ conn->err_handler(conn->handler_data, err);
+ g_error_free(err);
+ errno = EINVAL;
+
+ return -1;
+ }
+
+ rspamd_ev_watcher_reschedule(conn->event_loop, conn->ev, what);
+ errno = EAGAIN;
+ }
+
+ return -1;
+}
+
+gssize
+rspamd_ssl_writev(struct rspamd_ssl_connection *conn, struct iovec *iov,
+ gsize iovlen)
+{
+ /*
+ * Static is needed to avoid issue:
+ * https://github.com/openssl/openssl/issues/6865
+ */
+ static guchar ssl_buf[16384];
+ guchar *p;
+ struct iovec *cur;
+ gsize i, remain;
+
+ remain = sizeof(ssl_buf);
+ p = ssl_buf;
+
+ for (i = 0; i < iovlen; i++) {
+ cur = &iov[i];
+
+ if (cur->iov_len > 0) {
+ if (remain >= cur->iov_len) {
+ memcpy(p, cur->iov_base, cur->iov_len);
+ p += cur->iov_len;
+ remain -= cur->iov_len;
+ }
+ else {
+ memcpy(p, cur->iov_base, remain);
+ p += remain;
+ remain = 0;
+ break;
+ }
+ }
+ }
+
+ return rspamd_ssl_write(conn, ssl_buf, p - ssl_buf);
+}
+
+/**
+ * Removes connection data
+ * @param conn
+ */
+void rspamd_ssl_connection_free(struct rspamd_ssl_connection *conn)
+{
+ if (conn) {
+ if (conn->shut == ssl_shut_unclean) {
+ /* Ignore return result and close socket */
+ msg_debug_ssl("unclean shutdown");
+ SSL_set_quiet_shutdown(conn->ssl, 1);
+ (void) SSL_shutdown(conn->ssl);
+ rspamd_ssl_connection_dtor(conn);
+ }
+ else {
+ msg_debug_ssl("normal shutdown");
+ rspamd_ssl_shutdown(conn);
+ }
+ }
+}
+
+static int
+rspamd_ssl_new_client_session(SSL *ssl, SSL_SESSION *sess)
+{
+ struct rspamd_ssl_connection *conn;
+
+ conn = SSL_get_app_data(ssl);
+
+ if (conn->hostname) {
+ rspamd_lru_hash_insert(conn->ssl_ctx->sessions,
+ g_strdup(conn->hostname), SSL_get1_session(ssl),
+ ev_now(conn->event_loop), SSL_CTX_get_timeout(conn->ssl_ctx->s));
+ msg_debug_ssl("saved new session for %s: %p", conn->hostname, conn);
+ }
+
+ return 0;
+}
+
+static struct rspamd_ssl_ctx *
+rspamd_init_ssl_ctx_common(void)
+{
+ struct rspamd_ssl_ctx *ret;
+ SSL_CTX *ssl_ctx;
+ gint ssl_options;
+ static const guint client_cache_size = 1024;
+
+ rspamd_openssl_maybe_init();
+
+ ret = g_malloc0(sizeof(*ret));
+ ssl_options = SSL_OP_NO_SSLv2 | SSL_OP_NO_SSLv3;
+ ssl_ctx = SSL_CTX_new(SSLv23_method());
+
+#ifdef SSL_OP_NO_COMPRESSION
+ ssl_options |= SSL_OP_NO_COMPRESSION;
+#elif OPENSSL_VERSION_NUMBER >= 0x00908000L
+ sk_SSL_COMP_zero(SSL_COMP_get_compression_methods());
+#endif
+
+ SSL_CTX_set_options(ssl_ctx, ssl_options);
+
+#ifdef TLS1_3_VERSION
+ SSL_CTX_set_min_proto_version(ssl_ctx, 0);
+ SSL_CTX_set_max_proto_version(ssl_ctx, TLS1_3_VERSION);
+#endif
+
+#ifdef SSL_SESS_CACHE_CLIENT
+ SSL_CTX_set_session_cache_mode(ssl_ctx, SSL_SESS_CACHE_CLIENT | SSL_SESS_CACHE_NO_INTERNAL_STORE);
+#endif
+
+ ret->s = ssl_ctx;
+ ret->sessions = rspamd_lru_hash_new_full(client_cache_size,
+ g_free, (GDestroyNotify) SSL_SESSION_free, rspamd_str_hash,
+ rspamd_str_equal);
+ SSL_CTX_set_app_data(ssl_ctx, ret);
+ SSL_CTX_sess_set_new_cb(ssl_ctx, rspamd_ssl_new_client_session);
+
+ return ret;
+}
+
+gpointer
+rspamd_init_ssl_ctx(void)
+{
+ struct rspamd_ssl_ctx *ssl_ctx = rspamd_init_ssl_ctx_common();
+
+ SSL_CTX_set_verify(ssl_ctx->s, SSL_VERIFY_PEER, NULL);
+ SSL_CTX_set_verify_depth(ssl_ctx->s, 4);
+
+ return ssl_ctx;
+}
+
+gpointer rspamd_init_ssl_ctx_noverify(void)
+{
+ struct rspamd_ssl_ctx *ssl_ctx_noverify = rspamd_init_ssl_ctx_common();
+
+ SSL_CTX_set_verify(ssl_ctx_noverify->s, SSL_VERIFY_NONE, NULL);
+
+ return ssl_ctx_noverify;
+}
+
+void rspamd_openssl_maybe_init(void)
+{
+ static gboolean openssl_initialized = FALSE;
+
+ if (!openssl_initialized) {
+ ERR_load_crypto_strings();
+ SSL_load_error_strings();
+
+ OpenSSL_add_all_algorithms();
+ OpenSSL_add_all_digests();
+ OpenSSL_add_all_ciphers();
+
+#if OPENSSL_VERSION_NUMBER >= 0x1000104fL && OPENSSL_VERSION_NUMBER < 0x30000000L && !defined(LIBRESSL_VERSION_NUMBER)
+ ENGINE_load_builtin_engines();
+#endif
+#if OPENSSL_VERSION_NUMBER < 0x10100000L || defined(LIBRESSL_VERSION_NUMBER)
+ SSL_library_init();
+#else
+ OPENSSL_init_ssl(0, NULL);
+#endif
+
+#if OPENSSL_VERSION_NUMBER < 0x10100000L || defined(LIBRESSL_VERSION_NUMBER)
+ OPENSSL_config(NULL);
+#endif
+ if (RAND_status() == 0) {
+ guchar seed[128];
+
+ /* Try to use ottery to seed rand */
+ ottery_rand_bytes(seed, sizeof(seed));
+ RAND_seed(seed, sizeof(seed));
+ rspamd_explicit_memzero(seed, sizeof(seed));
+ }
+
+ openssl_initialized = TRUE;
+ }
+}
+
+void rspamd_ssl_ctx_config(struct rspamd_config *cfg, gpointer ssl_ctx)
+{
+ struct rspamd_ssl_ctx *ctx = (struct rspamd_ssl_ctx *) ssl_ctx;
+ static const char default_secure_ciphers[] = "HIGH:!aNULL:!kRSA:!PSK:!SRP:!MD5:!RC4";
+
+ if (cfg->ssl_ca_path) {
+ if (SSL_CTX_load_verify_locations(ctx->s, cfg->ssl_ca_path,
+ NULL) != 1) {
+ msg_err_config("cannot load CA certs from %s: %s",
+ cfg->ssl_ca_path,
+ ERR_error_string(ERR_get_error(), NULL));
+ }
+ }
+ else {
+ msg_debug_config("ssl_ca_path is not set, using default CA path");
+ SSL_CTX_set_default_verify_paths(ctx->s);
+ }
+
+ if (cfg->ssl_ciphers) {
+ if (SSL_CTX_set_cipher_list(ctx->s, cfg->ssl_ciphers) != 1) {
+ msg_err_config(
+ "cannot set ciphers set to %s: %s; fallback to %s",
+ cfg->ssl_ciphers,
+ ERR_error_string(ERR_get_error(), NULL),
+ default_secure_ciphers);
+ /* Default settings */
+ SSL_CTX_set_cipher_list(ctx->s, default_secure_ciphers);
+ }
+ }
+}
+
+void rspamd_ssl_ctx_free(gpointer ssl_ctx)
+{
+ struct rspamd_ssl_ctx *ctx = (struct rspamd_ssl_ctx *) ssl_ctx;
+
+ rspamd_lru_hash_destroy(ctx->sessions);
+ SSL_CTX_free(ctx->s);
+ g_free(ssl_ctx);
+} \ No newline at end of file
diff --git a/src/libserver/ssl_util.h b/src/libserver/ssl_util.h
new file mode 100644
index 0000000..cde7d47
--- /dev/null
+++ b/src/libserver/ssl_util.h
@@ -0,0 +1,120 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef SRC_LIBUTIL_SSL_UTIL_H_
+#define SRC_LIBUTIL_SSL_UTIL_H_
+
+#include "config.h"
+#include "libutil/addr.h"
+#include "libutil/libev_helper.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct rspamd_ssl_connection;
+
+typedef void (*rspamd_ssl_handler_t)(gint fd, short what, gpointer d);
+
+typedef void (*rspamd_ssl_error_handler_t)(gpointer d, GError *err);
+
+/**
+ * Creates a new ssl connection data structure
+ * @param ssl_ctx initialized SSL_CTX structure
+ * @return opaque connection data
+ */
+struct rspamd_ssl_connection *rspamd_ssl_connection_new(gpointer ssl_ctx,
+ struct ev_loop *ev_base,
+ gboolean verify_peer,
+ const gchar *log_tag);
+
+/**
+ * Connects SSL session using the specified (connected) FD
+ * @param conn connection
+ * @param fd fd to use
+ * @param hostname hostname for SNI
+ * @param ev event to use
+ * @param tv timeout for connection
+ * @param handler connected session handler
+ * @param handler_data opaque data
+ * @return TRUE if a session has been connected
+ */
+gboolean rspamd_ssl_connect_fd(struct rspamd_ssl_connection *conn, gint fd,
+ const gchar *hostname, struct rspamd_io_ev *ev, ev_tstamp timeout,
+ rspamd_ssl_handler_t handler, rspamd_ssl_error_handler_t err_handler,
+ gpointer handler_data);
+
+/**
+ * Restores SSL handlers for the existing ssl connection (e.g. after keepalive)
+ * @param conn
+ * @param handler
+ * @param err_handler
+ * @param handler_data
+ */
+void rspamd_ssl_connection_restore_handlers(struct rspamd_ssl_connection *conn,
+ rspamd_ssl_handler_t handler,
+ rspamd_ssl_error_handler_t err_handler,
+ gpointer handler_data,
+ short ev_what);
+
+/**
+ * Perform async read from SSL socket
+ * @param conn
+ * @param buf
+ * @param buflen
+ * @return
+ */
+gssize rspamd_ssl_read(struct rspamd_ssl_connection *conn, gpointer buf,
+ gsize buflen);
+
+/**
+ * Perform async write to ssl buffer
+ * @param conn
+ * @param buf
+ * @param buflen
+ * @param ev
+ * @param tv
+ * @return
+ */
+gssize rspamd_ssl_write(struct rspamd_ssl_connection *conn, gconstpointer buf,
+ gsize buflen);
+
+/**
+ * Emulate writev by copying iovec to a temporary buffer
+ * @param conn
+ * @param buf
+ * @param buflen
+ * @return
+ */
+gssize rspamd_ssl_writev(struct rspamd_ssl_connection *conn, struct iovec *iov,
+ gsize iovlen);
+
+/**
+ * Removes connection data
+ * @param conn
+ */
+void rspamd_ssl_connection_free(struct rspamd_ssl_connection *conn);
+
+gpointer rspamd_init_ssl_ctx(void);
+gpointer rspamd_init_ssl_ctx_noverify(void);
+void rspamd_ssl_ctx_config(struct rspamd_config *cfg, gpointer ssl_ctx);
+void rspamd_ssl_ctx_free(gpointer ssl_ctx);
+void rspamd_openssl_maybe_init(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SRC_LIBUTIL_SSL_UTIL_H_ */
diff --git a/src/libserver/symcache/symcache_c.cxx b/src/libserver/symcache/symcache_c.cxx
new file mode 100644
index 0000000..6a7e41c
--- /dev/null
+++ b/src/libserver/symcache/symcache_c.cxx
@@ -0,0 +1,715 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "symcache_internal.hxx"
+#include "symcache_periodic.hxx"
+#include "symcache_item.hxx"
+#include "symcache_runtime.hxx"
+
+/**
+ * C API for symcache
+ */
+
+#define C_API_SYMCACHE(ptr) (reinterpret_cast<rspamd::symcache::symcache *>(ptr))
+#define C_API_SYMCACHE_RUNTIME(ptr) (reinterpret_cast<rspamd::symcache::symcache_runtime *>(ptr))
+#define C_API_SYMCACHE_ITEM(ptr) (reinterpret_cast<rspamd::symcache::cache_item *>(ptr))
+#define C_API_SYMCACHE_DYN_ITEM(ptr) (reinterpret_cast<rspamd::symcache::cache_dynamic_item *>(ptr))
+
+void rspamd_symcache_destroy(struct rspamd_symcache *cache)
+{
+ auto *real_cache = C_API_SYMCACHE(cache);
+
+ delete real_cache;
+}
+
+struct rspamd_symcache *
+rspamd_symcache_new(struct rspamd_config *cfg)
+{
+ auto *ncache = new rspamd::symcache::symcache(cfg);
+
+ return (struct rspamd_symcache *) ncache;
+}
+
+gboolean
+rspamd_symcache_init(struct rspamd_symcache *cache)
+{
+ auto *real_cache = C_API_SYMCACHE(cache);
+
+ return real_cache->init();
+}
+
+void rspamd_symcache_save(struct rspamd_symcache *cache)
+{
+ auto *real_cache = C_API_SYMCACHE(cache);
+
+ real_cache->save_items();
+}
+
+gint rspamd_symcache_add_symbol(struct rspamd_symcache *cache,
+ const gchar *name,
+ gint priority,
+ symbol_func_t func,
+ gpointer user_data,
+ int type,
+ gint parent)
+{
+ auto *real_cache = C_API_SYMCACHE(cache);
+
+ /* Legacy stuff */
+ if (name == nullptr) {
+ name = "";
+ }
+
+ if (parent == -1) {
+ return real_cache->add_symbol_with_callback(name, priority, func, user_data, type);
+ }
+ else {
+ return real_cache->add_virtual_symbol(name, parent, type);
+ }
+}
+
+bool rspamd_symcache_add_symbol_augmentation(struct rspamd_symcache *cache,
+ int sym_id,
+ const char *augmentation,
+ const char *value)
+{
+ auto *real_cache = C_API_SYMCACHE(cache);
+ auto log_tag = [&]() { return real_cache->log_tag(); };
+
+ if (augmentation == nullptr) {
+ msg_err_cache("null augmentation is not allowed for item %d", sym_id);
+ return false;
+ }
+
+
+ auto *item = real_cache->get_item_by_id_mut(sym_id, false);
+
+ if (item == nullptr) {
+ msg_err_cache("item %d is not found", sym_id);
+ return false;
+ }
+
+ /* Handle empty or absent strings equally */
+ if (value == nullptr || value[0] == '\0') {
+ return item->add_augmentation(*real_cache, augmentation, std::nullopt);
+ }
+
+ return item->add_augmentation(*real_cache, augmentation, value);
+}
+
+void rspamd_symcache_set_peak_callback(struct rspamd_symcache *cache, gint cbref)
+{
+ auto *real_cache = C_API_SYMCACHE(cache);
+
+ real_cache->set_peak_cb(cbref);
+}
+
+gboolean
+rspamd_symcache_add_condition_delayed(struct rspamd_symcache *cache,
+ const gchar *sym, lua_State *L, gint cbref)
+{
+ auto *real_cache = C_API_SYMCACHE(cache);
+
+ real_cache->add_delayed_condition(sym, cbref);
+
+ return TRUE;
+}
+
+gint rspamd_symcache_find_symbol(struct rspamd_symcache *cache,
+ const gchar *name)
+{
+ auto *real_cache = C_API_SYMCACHE(cache);
+
+ /* Legacy stuff but used */
+ if (name == nullptr) {
+ return -1;
+ }
+
+ auto sym_maybe = real_cache->get_item_by_name(name, false);
+
+ if (sym_maybe != nullptr) {
+ return sym_maybe->id;
+ }
+
+ return -1;
+}
+
+gboolean
+rspamd_symcache_stat_symbol(struct rspamd_symcache *cache,
+ const gchar *name,
+ gdouble *frequency,
+ gdouble *freq_stddev,
+ gdouble *tm,
+ guint *nhits)
+{
+ auto *real_cache = C_API_SYMCACHE(cache);
+
+ auto sym_maybe = real_cache->get_item_by_name(name, false);
+
+ if (sym_maybe != nullptr) {
+ *frequency = sym_maybe->st->avg_frequency;
+ *freq_stddev = sqrt(sym_maybe->st->stddev_frequency);
+ *tm = sym_maybe->st->time_counter.mean;
+
+ if (nhits) {
+ *nhits = sym_maybe->st->hits;
+ }
+
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+
+guint rspamd_symcache_stats_symbols_count(struct rspamd_symcache *cache)
+{
+ auto *real_cache = C_API_SYMCACHE(cache);
+ return real_cache->get_stats_symbols_count();
+}
+
+guint64
+rspamd_symcache_get_cksum(struct rspamd_symcache *cache)
+{
+ auto *real_cache = C_API_SYMCACHE(cache);
+ return real_cache->get_cksum();
+}
+
+gboolean
+rspamd_symcache_validate(struct rspamd_symcache *cache,
+ struct rspamd_config *cfg,
+ gboolean strict)
+{
+ auto *real_cache = C_API_SYMCACHE(cache);
+
+ return real_cache->validate(strict);
+}
+
+ucl_object_t *
+rspamd_symcache_counters(struct rspamd_symcache *cache)
+{
+ auto *real_cache = C_API_SYMCACHE(cache);
+ return real_cache->counters();
+}
+
+void *
+rspamd_symcache_start_refresh(struct rspamd_symcache *cache,
+ struct ev_loop *ev_base, struct rspamd_worker *w)
+{
+ auto *real_cache = C_API_SYMCACHE(cache);
+ return new rspamd::symcache::cache_refresh_cbdata{real_cache, ev_base, w};
+}
+
+void rspamd_symcache_inc_frequency(struct rspamd_symcache *cache, struct rspamd_symcache_item *item,
+ const char *sym_name)
+{
+ auto *real_item = C_API_SYMCACHE_ITEM(item);
+ auto *real_cache = C_API_SYMCACHE(cache);
+
+ if (real_item) {
+ real_item->inc_frequency(sym_name, *real_cache);
+ }
+}
+
+void rspamd_symcache_add_delayed_dependency(struct rspamd_symcache *cache,
+ const gchar *from, const gchar *to)
+{
+ auto *real_cache = C_API_SYMCACHE(cache);
+ real_cache->add_delayed_dependency(from, to);
+}
+
+const gchar *
+rspamd_symcache_get_parent(struct rspamd_symcache *cache,
+ const gchar *symbol)
+{
+ auto *real_cache = C_API_SYMCACHE(cache);
+
+ auto *sym = real_cache->get_item_by_name(symbol, false);
+
+ if (sym && sym->is_virtual()) {
+ auto *parent = sym->get_parent(*real_cache);
+
+ if (parent) {
+ return parent->get_name().c_str();
+ }
+ }
+
+ return nullptr;
+}
+
+const gchar *
+rspamd_symcache_item_name(struct rspamd_symcache_item *item)
+{
+ auto *real_item = C_API_SYMCACHE_ITEM(item);
+
+ if (real_item == nullptr) {
+ return nullptr;
+ }
+
+ return real_item->get_name().c_str();
+}
+
+gint rspamd_symcache_item_flags(struct rspamd_symcache_item *item)
+{
+ auto *real_item = C_API_SYMCACHE_ITEM(item);
+
+ if (real_item == nullptr) {
+ return 0;
+ }
+
+ return real_item->get_flags();
+}
+
+
+const gchar *
+rspamd_symcache_dyn_item_name(struct rspamd_task *task,
+ struct rspamd_symcache_dynamic_item *dyn_item)
+{
+ auto *cache_runtime = C_API_SYMCACHE_RUNTIME(task->symcache_runtime);
+ auto *real_dyn_item = C_API_SYMCACHE_DYN_ITEM(dyn_item);
+
+ if (cache_runtime == nullptr || real_dyn_item == nullptr) {
+ return nullptr;
+ }
+
+ auto static_item = cache_runtime->get_item_by_dynamic_item(real_dyn_item);
+
+ return static_item->get_name().c_str();
+}
+
+gint rspamd_symcache_item_flags(struct rspamd_task *task,
+ struct rspamd_symcache_dynamic_item *dyn_item)
+{
+ auto *cache_runtime = C_API_SYMCACHE_RUNTIME(task->symcache_runtime);
+ auto *real_dyn_item = C_API_SYMCACHE_DYN_ITEM(dyn_item);
+
+ if (cache_runtime == nullptr || real_dyn_item == nullptr) {
+ return 0;
+ }
+
+ auto static_item = cache_runtime->get_item_by_dynamic_item(real_dyn_item);
+
+ return static_item->get_flags();
+}
+
+guint rspamd_symcache_get_symbol_flags(struct rspamd_symcache *cache,
+ const gchar *symbol)
+{
+ auto *real_cache = C_API_SYMCACHE(cache);
+
+ auto *sym = real_cache->get_item_by_name(symbol, false);
+
+ if (sym) {
+ return sym->get_flags();
+ }
+
+ return 0;
+}
+
+const struct rspamd_symcache_item_stat *
+rspamd_symcache_item_stat(struct rspamd_symcache_item *item)
+{
+ auto *real_item = C_API_SYMCACHE_ITEM(item);
+ return real_item->st;
+}
+
+void rspamd_symcache_get_symbol_details(struct rspamd_symcache *cache,
+ const gchar *symbol,
+ ucl_object_t *this_sym_ucl)
+{
+ auto *real_cache = C_API_SYMCACHE(cache);
+
+ auto *sym = real_cache->get_item_by_name(symbol, false);
+
+ if (sym) {
+ ucl_object_insert_key(this_sym_ucl,
+ ucl_object_fromstring(sym->get_type_str()),
+ "type", strlen("type"), false);
+ }
+}
+
+void rspamd_symcache_foreach(struct rspamd_symcache *cache,
+ void (*func)(struct rspamd_symcache_item *item, gpointer /* userdata */),
+ gpointer ud)
+{
+ auto *real_cache = C_API_SYMCACHE(cache);
+
+ real_cache->symbols_foreach([&](const rspamd::symcache::cache_item *item) {
+ func((struct rspamd_symcache_item *) item, ud);
+ });
+}
+
+void rspamd_symcache_process_settings_elt(struct rspamd_symcache *cache,
+ struct rspamd_config_settings_elt *elt)
+{
+ auto *real_cache = C_API_SYMCACHE(cache);
+
+ real_cache->process_settings_elt(elt);
+}
+
+bool rspamd_symcache_set_allowed_settings_ids(struct rspamd_symcache *cache,
+ const gchar *symbol,
+ const guint32 *ids,
+ guint nids)
+{
+ auto *real_cache = C_API_SYMCACHE(cache);
+
+ auto *item = real_cache->get_item_by_name_mut(symbol, false);
+
+ if (item == nullptr) {
+ return false;
+ }
+
+ item->allowed_ids.set_ids(ids, nids);
+ return true;
+}
+
+bool rspamd_symcache_set_forbidden_settings_ids(struct rspamd_symcache *cache,
+ const gchar *symbol,
+ const guint32 *ids,
+ guint nids)
+{
+ auto *real_cache = C_API_SYMCACHE(cache);
+
+ auto *item = real_cache->get_item_by_name_mut(symbol, false);
+
+ if (item == nullptr) {
+ return false;
+ }
+
+ item->forbidden_ids.set_ids(ids, nids);
+ return true;
+}
+
+const guint32 *
+rspamd_symcache_get_allowed_settings_ids(struct rspamd_symcache *cache,
+ const gchar *symbol,
+ guint *nids)
+{
+ auto *real_cache = C_API_SYMCACHE(cache);
+
+ const auto *item = real_cache->get_item_by_name(symbol, false);
+ return item->allowed_ids.get_ids(*nids);
+}
+
+const guint32 *
+rspamd_symcache_get_forbidden_settings_ids(struct rspamd_symcache *cache,
+ const gchar *symbol,
+ guint *nids)
+{
+ auto *real_cache = C_API_SYMCACHE(cache);
+
+ const auto *item = real_cache->get_item_by_name(symbol, false);
+ return item->forbidden_ids.get_ids(*nids);
+}
+
+void rspamd_symcache_disable_all_symbols(struct rspamd_task *task,
+ struct rspamd_symcache *_cache,
+ guint skip_mask)
+{
+ auto *cache_runtime = C_API_SYMCACHE_RUNTIME(task->symcache_runtime);
+
+ cache_runtime->disable_all_symbols(skip_mask);
+}
+
+gboolean
+rspamd_symcache_disable_symbol(struct rspamd_task *task,
+ struct rspamd_symcache *cache,
+ const gchar *symbol)
+{
+ auto *cache_runtime = C_API_SYMCACHE_RUNTIME(task->symcache_runtime);
+ auto *real_cache = C_API_SYMCACHE(cache);
+
+ if (cache_runtime == nullptr) {
+ return FALSE;
+ }
+
+ return cache_runtime->disable_symbol(task, *real_cache, symbol);
+}
+
+gboolean
+rspamd_symcache_enable_symbol(struct rspamd_task *task,
+ struct rspamd_symcache *cache,
+ const gchar *symbol)
+{
+ auto *cache_runtime = C_API_SYMCACHE_RUNTIME(task->symcache_runtime);
+ auto *real_cache = C_API_SYMCACHE(cache);
+
+ if (cache_runtime == nullptr) {
+ return FALSE;
+ }
+
+ return cache_runtime->enable_symbol(task, *real_cache, symbol);
+}
+
+void rspamd_symcache_disable_symbol_static(struct rspamd_symcache *cache,
+ const gchar *symbol)
+{
+ auto *real_cache = C_API_SYMCACHE(cache);
+
+ real_cache->disable_symbol_delayed(symbol);
+}
+
+void rspamd_symcache_enable_symbol_static(struct rspamd_symcache *cache,
+ const gchar *symbol)
+{
+ auto *real_cache = C_API_SYMCACHE(cache);
+
+ real_cache->enable_symbol_delayed(symbol);
+}
+
+/* A real structure to match C results without extra copying */
+struct rspamd_symcache_real_timeout_result {
+ struct rspamd_symcache_timeout_result c_api_result;
+ std::vector<std::pair<double, const rspamd::symcache::cache_item *>> elts;
+};
+
+struct rspamd_symcache_timeout_result *
+rspamd_symcache_get_max_timeout(struct rspamd_symcache *cache)
+{
+ auto *real_cache = C_API_SYMCACHE(cache);
+ auto *res = new rspamd_symcache_real_timeout_result;
+
+ res->c_api_result.max_timeout = real_cache->get_max_timeout(res->elts);
+ res->c_api_result.items = reinterpret_cast<struct rspamd_symcache_timeout_item *>(res->elts.data());
+ res->c_api_result.nitems = res->elts.size();
+
+ return &res->c_api_result;
+}
+
+void rspamd_symcache_timeout_result_free(struct rspamd_symcache_timeout_result *res)
+{
+ auto *real_result = reinterpret_cast<rspamd_symcache_real_timeout_result *>(res);
+ delete real_result;
+}
+
+gboolean
+rspamd_symcache_is_checked(struct rspamd_task *task,
+ struct rspamd_symcache *cache,
+ const gchar *symbol)
+{
+ auto *cache_runtime = C_API_SYMCACHE_RUNTIME(task->symcache_runtime);
+ auto *real_cache = C_API_SYMCACHE(cache);
+
+ if (cache_runtime == nullptr) {
+ return FALSE;
+ }
+
+ return cache_runtime->is_symbol_checked(*real_cache, symbol);
+}
+
+gboolean
+rspamd_symcache_process_settings(struct rspamd_task *task,
+ struct rspamd_symcache *cache)
+{
+ auto *cache_runtime = C_API_SYMCACHE_RUNTIME(task->symcache_runtime);
+ auto *real_cache = C_API_SYMCACHE(cache);
+
+ if (cache_runtime == nullptr) {
+ return FALSE;
+ }
+
+ return cache_runtime->process_settings(task, *real_cache);
+}
+
+gboolean
+rspamd_symcache_is_item_allowed(struct rspamd_task *task,
+ struct rspamd_symcache_item *item,
+ gboolean exec_only)
+{
+ auto *real_item = C_API_SYMCACHE_ITEM(item);
+
+ if (real_item == nullptr) {
+ return TRUE;
+ }
+
+ return real_item->is_allowed(task, exec_only);
+}
+
+gboolean
+rspamd_symcache_is_symbol_enabled(struct rspamd_task *task,
+ struct rspamd_symcache *cache,
+ const gchar *symbol)
+{
+ auto *cache_runtime = C_API_SYMCACHE_RUNTIME(task->symcache_runtime);
+ auto *real_cache = C_API_SYMCACHE(cache);
+
+ if (!cache_runtime) {
+ return TRUE;
+ }
+
+ return cache_runtime->is_symbol_enabled(task, *real_cache, symbol);
+}
+
+struct rspamd_symcache_dynamic_item *
+rspamd_symcache_get_cur_item(struct rspamd_task *task)
+{
+ auto *cache_runtime = C_API_SYMCACHE_RUNTIME(task->symcache_runtime);
+
+ if (!cache_runtime) {
+ return nullptr;
+ }
+
+ return (struct rspamd_symcache_dynamic_item *) cache_runtime->get_cur_item();
+}
+
+struct rspamd_symcache_dynamic_item *
+rspamd_symcache_set_cur_item(struct rspamd_task *task, struct rspamd_symcache_dynamic_item *item)
+{
+ auto *cache_runtime = C_API_SYMCACHE_RUNTIME(task->symcache_runtime);
+ auto *real_dyn_item = C_API_SYMCACHE_DYN_ITEM(item);
+
+ if (!cache_runtime || !real_dyn_item) {
+ return nullptr;
+ }
+
+ return (struct rspamd_symcache_dynamic_item *) cache_runtime->set_cur_item(real_dyn_item);
+}
+
+void rspamd_symcache_enable_profile(struct rspamd_task *task)
+{
+ auto *cache_runtime = C_API_SYMCACHE_RUNTIME(task->symcache_runtime);
+ if (!cache_runtime) {
+ return;
+ }
+
+ cache_runtime->set_profile_mode(true);
+}
+
+guint rspamd_symcache_item_async_inc_full(struct rspamd_task *task,
+ struct rspamd_symcache_dynamic_item *item,
+ const gchar *subsystem,
+ const gchar *loc)
+{
+ auto *cache_runtime = C_API_SYMCACHE_RUNTIME(task->symcache_runtime);
+ auto *real_dyn_item = C_API_SYMCACHE_DYN_ITEM(item);
+
+ auto *static_item = cache_runtime->get_item_by_dynamic_item(real_dyn_item);
+ msg_debug_cache_task("increase async events counter for %s(%d) = %d + 1; "
+ "subsystem %s (%s)",
+ static_item->symbol.c_str(), static_item->id,
+ real_dyn_item->async_events, subsystem, loc);
+
+ return ++real_dyn_item->async_events;
+}
+
+guint rspamd_symcache_item_async_dec_full(struct rspamd_task *task,
+ struct rspamd_symcache_dynamic_item *item,
+ const gchar *subsystem,
+ const gchar *loc)
+{
+ auto *cache_runtime = C_API_SYMCACHE_RUNTIME(task->symcache_runtime);
+ auto *real_dyn_item = C_API_SYMCACHE_DYN_ITEM(item);
+
+ auto *static_item = cache_runtime->get_item_by_dynamic_item(real_dyn_item);
+ msg_debug_cache_task("decrease async events counter for %s(%d) = %d - 1; "
+ "subsystem %s (%s)",
+ static_item->symbol.c_str(), static_item->id,
+ real_dyn_item->async_events, subsystem, loc);
+
+ if (G_UNLIKELY(real_dyn_item->async_events == 0)) {
+ msg_err_cache_task("INTERNAL ERROR: trying decrease async events counter for %s(%d) that is already zero; "
+ "subsystem %s (%s)",
+ static_item->symbol.c_str(), static_item->id,
+ real_dyn_item->async_events, subsystem, loc);
+ g_abort();
+ g_assert_not_reached();
+ }
+
+ return --real_dyn_item->async_events;
+}
+
+gboolean
+rspamd_symcache_item_async_dec_check_full(struct rspamd_task *task,
+ struct rspamd_symcache_dynamic_item *item,
+ const gchar *subsystem,
+ const gchar *loc)
+{
+ if (rspamd_symcache_item_async_dec_full(task, item, subsystem, loc) == 0) {
+ rspamd_symcache_finalize_item(task, item);
+
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+struct rspamd_abstract_callback_data *
+rspamd_symcache_get_cbdata(struct rspamd_symcache *cache,
+ const gchar *symbol)
+{
+ auto *real_cache = C_API_SYMCACHE(cache);
+
+ auto *item = real_cache->get_item_by_name(symbol, true);
+
+ if (item) {
+ return (struct rspamd_abstract_callback_data *) item->get_cbdata();
+ }
+
+ return nullptr;
+}
+
+void rspamd_symcache_composites_foreach(struct rspamd_task *task,
+ struct rspamd_symcache *cache,
+ GHFunc func,
+ gpointer fd)
+{
+ auto *real_cache = C_API_SYMCACHE(cache);
+ auto *cache_runtime = C_API_SYMCACHE_RUNTIME(task->symcache_runtime);
+
+ real_cache->composites_foreach([&](const auto *item) {
+ auto *dyn_item = cache_runtime->get_dynamic_item(item->id);
+
+ if (dyn_item && !dyn_item->started) {
+ auto *old_item = cache_runtime->set_cur_item(dyn_item);
+ func((void *) item->get_name().c_str(), item->get_cbdata(), fd);
+ dyn_item->finished = true;
+ cache_runtime->set_cur_item(old_item);
+ }
+ });
+
+ cache_runtime->set_cur_item(nullptr);
+}
+
+gboolean
+rspamd_symcache_process_symbols(struct rspamd_task *task,
+ struct rspamd_symcache *cache,
+ guint stage)
+{
+ auto *real_cache = C_API_SYMCACHE(cache);
+
+ if (task->symcache_runtime == nullptr) {
+ task->symcache_runtime = rspamd::symcache::symcache_runtime::create(task, *real_cache);
+ }
+
+ auto *cache_runtime = C_API_SYMCACHE_RUNTIME(task->symcache_runtime);
+ return cache_runtime->process_symbols(task, *real_cache, stage);
+}
+
+void rspamd_symcache_finalize_item(struct rspamd_task *task,
+ struct rspamd_symcache_dynamic_item *item)
+{
+ auto *cache_runtime = C_API_SYMCACHE_RUNTIME(task->symcache_runtime);
+ auto *real_dyn_item = C_API_SYMCACHE_DYN_ITEM(item);
+
+ cache_runtime->finalize_item(task, real_dyn_item);
+}
+
+void rspamd_symcache_runtime_destroy(struct rspamd_task *task)
+{
+ auto *cache_runtime = C_API_SYMCACHE_RUNTIME(task->symcache_runtime);
+ cache_runtime->savepoint_dtor();
+} \ No newline at end of file
diff --git a/src/libserver/symcache/symcache_id_list.hxx b/src/libserver/symcache/symcache_id_list.hxx
new file mode 100644
index 0000000..bef4fa9
--- /dev/null
+++ b/src/libserver/symcache/symcache_id_list.hxx
@@ -0,0 +1,95 @@
+/*-
+ * Copyright 2022 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef RSPAMD_SYMCACHE_ID_LIST_HXX
+#define RSPAMD_SYMCACHE_ID_LIST_HXX
+#pragma once
+
+#include <cstdint>
+#include <cstring> // for memset
+#include <algorithm>// for sort/bsearch
+
+#include "config.h"
+#include "libutil/mem_pool.h"
+#include "contrib/ankerl/svector.h"
+
+namespace rspamd::symcache {
+/*
+ * This structure is optimised to store ids list:
+ * - If the first element is -1 then use dynamic part, else use static part
+ * There is no std::variant to save space
+ */
+
+constexpr const auto id_capacity = 4;
+constexpr const auto id_sort_threshold = 32;
+
+struct id_list {
+ ankerl::svector<std::uint32_t, id_capacity> data;
+
+ id_list() = default;
+
+ auto reset()
+ {
+ data.clear();
+ }
+
+ /**
+ * Returns ids from a compressed list, accepting a mutable reference for number of elements
+ * @param nids output of the number of elements
+ * @return
+ */
+ auto get_ids(unsigned &nids) const -> const std::uint32_t *
+ {
+ nids = data.size();
+
+ return data.data();
+ }
+
+ auto add_id(std::uint32_t id) -> void
+ {
+ data.push_back(id);
+
+ /* Check sort threshold */
+ if (data.size() > id_sort_threshold) {
+ std::sort(data.begin(), data.end());
+ }
+ }
+
+ auto set_ids(const std::uint32_t *ids, std::size_t nids) -> void
+ {
+ data.resize(nids);
+
+ for (auto &id: data) {
+ id = *ids++;
+ }
+
+ if (data.size() > id_sort_threshold) {
+ std::sort(data.begin(), data.end());
+ }
+ }
+
+ auto check_id(unsigned int id) const -> bool
+ {
+ if (data.size() > id_sort_threshold) {
+ return std::binary_search(data.begin(), data.end(), id);
+ }
+ return std::find(data.begin(), data.end(), id) != data.end();
+ }
+};
+
+}// namespace rspamd::symcache
+
+#endif//RSPAMD_SYMCACHE_ID_LIST_HXX
diff --git a/src/libserver/symcache/symcache_impl.cxx b/src/libserver/symcache/symcache_impl.cxx
new file mode 100644
index 0000000..93675ac
--- /dev/null
+++ b/src/libserver/symcache/symcache_impl.cxx
@@ -0,0 +1,1316 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "lua/lua_common.h"
+#include "symcache_internal.hxx"
+#include "symcache_item.hxx"
+#include "symcache_runtime.hxx"
+#include "unix-std.h"
+#include "libutil/cxx/file_util.hxx"
+#include "libutil/cxx/util.hxx"
+#include "fmt/core.h"
+#include "contrib/t1ha/t1ha.h"
+
+#ifdef __has_include
+#if __has_include(<version>)
+#include <version>
+#endif
+#endif
+#include <cmath>
+
+namespace rspamd::symcache {
+
+INIT_LOG_MODULE_PUBLIC(symcache)
+
+auto symcache::init() -> bool
+{
+ auto res = true;
+ reload_time = cfg->cache_reload_time;
+
+ if (cfg->cache_filename != nullptr) {
+ msg_debug_cache("loading symcache saved data from %s", cfg->cache_filename);
+ load_items();
+ }
+
+ ankerl::unordered_dense::set<int> disabled_ids;
+ /* Process enabled/disabled symbols */
+ for (const auto &[id, it]: items_by_id) {
+ if (disabled_symbols) {
+ /*
+ * Due to the ability to add patterns, this is now O(N^2), but it is done
+ * once on configuration and the amount of static patterns is usually low
+ * The possible optimization is to store non patterns in a different set to check it
+ * quickly. However, it is unlikely that this would be used to something really heavy.
+ */
+ for (const auto &disable_pat: *disabled_symbols) {
+ if (disable_pat.matches(it->get_name())) {
+ msg_debug_cache("symbol %s matches %*s disable pattern", it->get_name().c_str(),
+ (int) disable_pat.to_string_view().size(), disable_pat.to_string_view().data());
+ auto need_disable = true;
+
+ if (enabled_symbols) {
+ for (const auto &enable_pat: *enabled_symbols) {
+ if (enable_pat.matches(it->get_name())) {
+ msg_debug_cache("symbol %s matches %*s enable pattern; skip disabling", it->get_name().c_str(),
+ (int) enable_pat.to_string_view().size(), enable_pat.to_string_view().data());
+ need_disable = false;
+ break;
+ }
+ }
+ }
+
+ if (need_disable) {
+ disabled_ids.insert(it->id);
+
+ if (it->is_virtual()) {
+ auto real_elt = it->get_parent(*this);
+
+ if (real_elt) {
+ disabled_ids.insert(real_elt->id);
+
+ const auto *children = real_elt->get_children();
+ if (children != nullptr) {
+ for (const auto &cld: *children) {
+ msg_debug_cache("symbol %s is a virtual sibling of the disabled symbol %s",
+ cld->get_name().c_str(), it->get_name().c_str());
+ disabled_ids.insert(cld->id);
+ }
+ }
+ }
+ }
+ else {
+ /* Also disable all virtual children of this element */
+ const auto *children = it->get_children();
+
+ if (children != nullptr) {
+ for (const auto &cld: *children) {
+ msg_debug_cache("symbol %s is a virtual child of the disabled symbol %s",
+ cld->get_name().c_str(), it->get_name().c_str());
+ disabled_ids.insert(cld->id);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ /* Deal with the delayed dependencies */
+ msg_debug_cache("resolving delayed dependencies: %d in list", (int) delayed_deps->size());
+ for (const auto &delayed_dep: *delayed_deps) {
+ auto virt_item = get_item_by_name(delayed_dep.from, false);
+ auto real_item = get_item_by_name(delayed_dep.from, true);
+
+ if (virt_item == nullptr || real_item == nullptr) {
+ msg_err_cache("cannot register delayed dependency between %s and %s: "
+ "%s is missing",
+ delayed_dep.from.data(),
+ delayed_dep.to.data(), delayed_dep.from.data());
+ }
+ else {
+
+ if (!disabled_ids.contains(real_item->id)) {
+ msg_debug_cache("delayed between %s(%d:%d) -> %s",
+ delayed_dep.from.data(),
+ real_item->id, virt_item->id,
+ delayed_dep.to.data());
+ add_dependency(real_item->id, delayed_dep.to,
+ virt_item != real_item ? virt_item->id : -1);
+ }
+ else {
+ msg_debug_cache("no delayed between %s(%d:%d) -> %s; %s is disabled",
+ delayed_dep.from.data(),
+ real_item->id, virt_item->id,
+ delayed_dep.to.data(),
+ delayed_dep.from.data());
+ }
+ }
+ }
+
+ /* Remove delayed dependencies, as they are no longer needed at this point */
+ delayed_deps.reset();
+
+ /* Physically remove ids that are disabled statically */
+ for (auto id_to_disable: disabled_ids) {
+ /*
+ * This erasure is inefficient, we can swap the last element with the removed id
+ * But in this way, our ids are still sorted by addition
+ */
+
+ /* Preserve refcount here */
+ auto deleted_element_refcount = items_by_id[id_to_disable];
+ items_by_id.erase(id_to_disable);
+ items_by_symbol.erase(deleted_element_refcount->get_name());
+
+ auto &additional_vec = get_item_specific_vector(*deleted_element_refcount);
+#if defined(__cpp_lib_erase_if)
+ std::erase_if(additional_vec, [id_to_disable](cache_item *elt) {
+ return elt->id == id_to_disable;
+ });
+#else
+ auto it = std::remove_if(additional_vec.begin(),
+ additional_vec.end(), [id_to_disable](cache_item *elt) {
+ return elt->id == id_to_disable;
+ });
+ additional_vec.erase(it, additional_vec.end());
+#endif
+
+ /* Refcount is dropped, so the symbol should be freed, ensure that nothing else owns this symbol */
+ g_assert(deleted_element_refcount.use_count() == 1);
+ }
+
+ /* Remove no longer used stuff */
+ enabled_symbols.reset();
+ disabled_symbols.reset();
+
+ /* Deal with the delayed conditions */
+ msg_debug_cache("resolving delayed conditions: %d in list", (int) delayed_conditions->size());
+ for (const auto &delayed_cond: *delayed_conditions) {
+ auto it = get_item_by_name_mut(delayed_cond.sym, true);
+
+ if (it == nullptr) {
+ msg_err_cache(
+ "cannot register delayed condition for %s",
+ delayed_cond.sym.c_str());
+ luaL_unref(delayed_cond.L, LUA_REGISTRYINDEX, delayed_cond.cbref);
+ }
+ else {
+ if (!it->add_condition(delayed_cond.L, delayed_cond.cbref)) {
+ msg_err_cache(
+ "cannot register delayed condition for %s: virtual parent; qed",
+ delayed_cond.sym.c_str());
+ g_abort();
+ }
+
+ msg_debug_cache("added a condition to the symbol %s", it->symbol.c_str());
+ }
+ }
+ delayed_conditions.reset();
+
+ msg_debug_cache("process dependencies");
+ for (const auto &[_id, it]: items_by_id) {
+ it->process_deps(*this);
+ }
+
+ /* Sorting stuff */
+ constexpr auto postfilters_cmp = [](const auto &it1, const auto &it2) -> bool {
+ return it1->priority < it2->priority;
+ };
+ constexpr auto prefilters_cmp = [](const auto &it1, const auto &it2) -> bool {
+ return it1->priority > it2->priority;
+ };
+
+ msg_debug_cache("sorting stuff");
+ std::stable_sort(std::begin(connfilters), std::end(connfilters), prefilters_cmp);
+ std::stable_sort(std::begin(prefilters), std::end(prefilters), prefilters_cmp);
+ std::stable_sort(std::begin(postfilters), std::end(postfilters), postfilters_cmp);
+ std::stable_sort(std::begin(idempotent), std::end(idempotent), postfilters_cmp);
+
+ resort();
+
+ /* Connect metric symbols with symcache symbols */
+ if (cfg->symbols) {
+ msg_debug_cache("connect metrics");
+ g_hash_table_foreach(cfg->symbols,
+ symcache::metric_connect_cb,
+ (void *) this);
+ }
+
+ return res;
+}
+
+auto symcache::load_items() -> bool
+{
+ auto cached_map = util::raii_mmaped_file::mmap_shared(cfg->cache_filename,
+ O_RDONLY, PROT_READ);
+
+ if (!cached_map.has_value()) {
+ if (cached_map.error().category == util::error_category::CRITICAL) {
+ msg_err_cache("%s", cached_map.error().error_message.data());
+ }
+ else {
+ msg_info_cache("%s", cached_map.error().error_message.data());
+ }
+ return false;
+ }
+
+
+ if (cached_map->get_size() < (gint) sizeof(symcache_header)) {
+ msg_info_cache("cannot use file %s, truncated: %z", cfg->cache_filename,
+ errno, strerror(errno));
+ return false;
+ }
+
+ const auto *hdr = (struct symcache_header *) cached_map->get_map();
+
+ if (memcmp(hdr->magic, symcache_magic,
+ sizeof(symcache_magic)) != 0) {
+ msg_info_cache("cannot use file %s, bad magic", cfg->cache_filename);
+
+ return false;
+ }
+
+ auto *parser = ucl_parser_new(0);
+ const auto *p = (const std::uint8_t *) (hdr + 1);
+
+ if (!ucl_parser_add_chunk(parser, p, cached_map->get_size() - sizeof(*hdr))) {
+ msg_info_cache("cannot use file %s, cannot parse: %s", cfg->cache_filename,
+ ucl_parser_get_error(parser));
+ ucl_parser_free(parser);
+
+ return false;
+ }
+
+ auto *top = ucl_parser_get_object(parser);
+ ucl_parser_free(parser);
+
+ if (top == nullptr || ucl_object_type(top) != UCL_OBJECT) {
+ msg_info_cache("cannot use file %s, bad object", cfg->cache_filename);
+ ucl_object_unref(top);
+
+ return false;
+ }
+
+ auto it = ucl_object_iterate_new(top);
+ const ucl_object_t *cur;
+ while ((cur = ucl_object_iterate_safe(it, true)) != nullptr) {
+ auto item_it = items_by_symbol.find(ucl_object_key(cur));
+
+ if (item_it != items_by_symbol.end()) {
+ auto item = item_it->second;
+ /* Copy saved info */
+ /*
+ * XXX: don't save or load weight, it should be obtained from the
+ * metric
+ */
+#if 0
+ elt = ucl_object_lookup (cur, "weight");
+
+ if (elt) {
+ w = ucl_object_todouble (elt);
+ if (w != 0) {
+ item->weight = w;
+ }
+ }
+#endif
+ const auto *elt = ucl_object_lookup(cur, "time");
+ if (elt) {
+ item->st->avg_time = ucl_object_todouble(elt);
+ }
+
+ elt = ucl_object_lookup(cur, "count");
+ if (elt) {
+ item->st->total_hits = ucl_object_toint(elt);
+ item->last_count = item->st->total_hits;
+ }
+
+ elt = ucl_object_lookup(cur, "frequency");
+ if (elt && ucl_object_type(elt) == UCL_OBJECT) {
+ const ucl_object_t *freq_elt;
+
+ freq_elt = ucl_object_lookup(elt, "avg");
+
+ if (freq_elt) {
+ item->st->avg_frequency = ucl_object_todouble(freq_elt);
+ }
+ freq_elt = ucl_object_lookup(elt, "stddev");
+
+ if (freq_elt) {
+ item->st->stddev_frequency = ucl_object_todouble(freq_elt);
+ }
+ }
+
+ if (item->is_virtual() && !item->is_ghost()) {
+ const auto &parent = item->get_parent(*this);
+
+ if (parent) {
+ if (parent->st->weight < item->st->weight) {
+ parent->st->weight = item->st->weight;
+ }
+ }
+ /*
+ * We maintain avg_time for virtual symbols equal to the
+ * parent item avg_time
+ */
+ item->st->avg_time = parent->st->avg_time;
+ }
+
+ total_weight += fabs(item->st->weight);
+ total_hits += item->st->total_hits;
+ }
+ }
+
+ ucl_object_iterate_free(it);
+ ucl_object_unref(top);
+
+ return true;
+}
+
+template<typename T>
+static constexpr auto round_to_hundreds(T x)
+{
+ return (::floor(x) * 100.0) / 100.0;
+}
+
+bool symcache::save_items() const
+{
+ if (cfg->cache_filename == nullptr) {
+ return false;
+ }
+
+ auto file_sink = util::raii_file_sink::create(cfg->cache_filename,
+ O_WRONLY | O_TRUNC, 00644);
+
+ if (!file_sink.has_value()) {
+ if (errno == EEXIST) {
+ /* Some other process is already writing data, give up silently */
+ return false;
+ }
+
+ msg_err_cache("%s", file_sink.error().error_message.data());
+
+ return false;
+ }
+
+ struct symcache_header hdr;
+ memset(&hdr, 0, sizeof(hdr));
+ memcpy(hdr.magic, symcache_magic, sizeof(symcache_magic));
+
+ if (write(file_sink->get_fd(), &hdr, sizeof(hdr)) == -1) {
+ msg_err_cache("cannot write to file %s, error %d, %s", cfg->cache_filename,
+ errno, strerror(errno));
+
+ return false;
+ }
+
+ auto *top = ucl_object_typed_new(UCL_OBJECT);
+
+ for (const auto &it: items_by_symbol) {
+ auto item = it.second;
+ auto elt = ucl_object_typed_new(UCL_OBJECT);
+ ucl_object_insert_key(elt,
+ ucl_object_fromdouble(round_to_hundreds(item->st->weight)),
+ "weight", 0, false);
+ ucl_object_insert_key(elt,
+ ucl_object_fromdouble(round_to_hundreds(item->st->time_counter.mean)),
+ "time", 0, false);
+ ucl_object_insert_key(elt, ucl_object_fromint(item->st->total_hits),
+ "count", 0, false);
+
+ auto *freq = ucl_object_typed_new(UCL_OBJECT);
+ ucl_object_insert_key(freq,
+ ucl_object_fromdouble(round_to_hundreds(item->st->frequency_counter.mean)),
+ "avg", 0, false);
+ ucl_object_insert_key(freq,
+ ucl_object_fromdouble(round_to_hundreds(item->st->frequency_counter.stddev)),
+ "stddev", 0, false);
+ ucl_object_insert_key(elt, freq, "frequency", 0, false);
+
+ ucl_object_insert_key(top, elt, it.first.data(), 0, true);
+ }
+
+ auto fp = fdopen(file_sink->get_fd(), "a");
+ auto *efunc = ucl_object_emit_file_funcs(fp);
+ auto ret = ucl_object_emit_full(top, UCL_EMIT_JSON_COMPACT, efunc, nullptr);
+ ucl_object_emit_funcs_free(efunc);
+ ucl_object_unref(top);
+ fclose(fp);
+
+ return ret;
+}
+
+auto symcache::metric_connect_cb(void *k, void *v, void *ud) -> void
+{
+ auto *cache = (symcache *) ud;
+ const auto *sym = (const char *) k;
+ auto *s = (struct rspamd_symbol *) v;
+ auto weight = *s->weight_ptr;
+ auto *item = cache->get_item_by_name_mut(sym, false);
+
+ if (item) {
+ item->st->weight = weight;
+ s->cache_item = (void *) item;
+ }
+}
+
+
+auto symcache::get_item_by_id(int id, bool resolve_parent) const -> const cache_item *
+{
+ if (id < 0 || id >= items_by_id.size()) {
+ msg_err_cache("internal error: requested item with id %d, when we have just %d items in the cache",
+ id, (int) items_by_id.size());
+ return nullptr;
+ }
+
+ const auto &maybe_item = rspamd::find_map(items_by_id, id);
+
+ if (!maybe_item.has_value()) {
+ msg_err_cache("internal error: requested item with id %d but it is empty; qed",
+ id);
+ return nullptr;
+ }
+
+ const auto &item = maybe_item.value().get();
+
+ if (resolve_parent && item->is_virtual()) {
+ return item->get_parent(*this);
+ }
+
+ return item.get();
+}
+
+auto symcache::get_item_by_id_mut(int id, bool resolve_parent) const -> cache_item *
+{
+ if (id < 0 || id >= items_by_id.size()) {
+ msg_err_cache("internal error: requested item with id %d, when we have just %d items in the cache",
+ id, (int) items_by_id.size());
+ return nullptr;
+ }
+
+ const auto &maybe_item = rspamd::find_map(items_by_id, id);
+
+ if (!maybe_item.has_value()) {
+ msg_err_cache("internal error: requested item with id %d but it is empty; qed",
+ id);
+ return nullptr;
+ }
+
+ const auto &item = maybe_item.value().get();
+
+ if (resolve_parent && item->is_virtual()) {
+ return const_cast<cache_item *>(item->get_parent(*this));
+ }
+
+ return item.get();
+}
+
+auto symcache::get_item_by_name(std::string_view name, bool resolve_parent) const -> const cache_item *
+{
+ auto it = items_by_symbol.find(name);
+
+ if (it == items_by_symbol.end()) {
+ return nullptr;
+ }
+
+ if (resolve_parent && it->second->is_virtual()) {
+ it->second->resolve_parent(*this);
+ return it->second->get_parent(*this);
+ }
+
+ return it->second;
+}
+
+auto symcache::get_item_by_name_mut(std::string_view name, bool resolve_parent) const -> cache_item *
+{
+ auto it = items_by_symbol.find(name);
+
+ if (it == items_by_symbol.end()) {
+ return nullptr;
+ }
+
+ if (resolve_parent && it->second->is_virtual()) {
+ return (cache_item *) it->second->get_parent(*this);
+ }
+
+ return it->second;
+}
+
+auto symcache::add_dependency(int id_from, std::string_view to, int virtual_id_from) -> void
+{
+ g_assert(id_from >= 0 && id_from < (gint) items_by_id.size());
+ const auto &source = items_by_id[id_from];
+ g_assert(source.get() != nullptr);
+
+ source->deps.emplace_back(nullptr,
+ std::string(to),
+ id_from,
+ -1);
+
+
+ if (virtual_id_from >= 0) {
+ g_assert(virtual_id_from < (gint) items_by_id.size());
+ /* We need that for settings id propagation */
+ const auto &vsource = items_by_id[virtual_id_from];
+ g_assert(vsource.get() != nullptr);
+ vsource->deps.emplace_back(nullptr,
+ std::string(to),
+ -1,
+ virtual_id_from);
+ }
+}
+
+auto symcache::resort() -> void
+{
+ auto log_func = RSPAMD_LOG_FUNC;
+ auto ord = std::make_shared<order_generation>(filters.size() +
+ prefilters.size() +
+ composites.size() +
+ postfilters.size() +
+ idempotent.size() +
+ connfilters.size() +
+ classifiers.size(),
+ cur_order_gen);
+
+ for (auto &it: filters) {
+ if (it) {
+ total_hits += it->st->total_hits;
+ /* Unmask topological order */
+ it->order = 0;
+ ord->d.emplace_back(it->getptr());
+ }
+ }
+
+ enum class tsort_mask {
+ PERM,
+ TEMP
+ };
+
+ constexpr auto tsort_unmask = [](cache_item *it) -> auto {
+ return (it->order & ~((1u << 31) | (1u << 30)));
+ };
+
+ /* Recursive topological sort helper */
+ const auto tsort_visit = [&](cache_item *it, unsigned cur_order, auto &&rec) {
+ constexpr auto tsort_mark = [](cache_item *it, tsort_mask how) {
+ switch (how) {
+ case tsort_mask::PERM:
+ it->order |= (1u << 31);
+ break;
+ case tsort_mask::TEMP:
+ it->order |= (1u << 30);
+ break;
+ }
+ };
+ constexpr auto tsort_is_marked = [](cache_item *it, tsort_mask how) {
+ switch (how) {
+ case tsort_mask::PERM:
+ return (it->order & (1u << 31));
+ case tsort_mask::TEMP:
+ return (it->order & (1u << 30));
+ }
+
+ return 100500u; /* Because fuck compilers, that's why */
+ };
+
+ if (tsort_is_marked(it, tsort_mask::PERM)) {
+ if (cur_order > tsort_unmask(it)) {
+ /* Need to recalculate the whole chain */
+ it->order = cur_order; /* That also removes all masking */
+ }
+ else {
+ /* We are fine, stop DFS */
+ return;
+ }
+ }
+ else if (tsort_is_marked(it, tsort_mask::TEMP)) {
+ msg_err_cache_lambda("cyclic dependencies found when checking '%s'!",
+ it->symbol.c_str());
+ return;
+ }
+
+ tsort_mark(it, tsort_mask::TEMP);
+ msg_debug_cache_lambda("visiting node: %s (%d)", it->symbol.c_str(), cur_order);
+
+ for (const auto &dep: it->deps) {
+ msg_debug_cache_lambda("visiting dep: %s (%d)", dep.item->symbol.c_str(), cur_order + 1);
+ rec(dep.item, cur_order + 1, rec);
+ }
+
+ it->order = cur_order;
+ tsort_mark(it, tsort_mask::PERM);
+ };
+ /*
+ * Topological sort
+ */
+ total_hits = 0;
+ auto used_items = ord->d.size();
+
+ for (const auto &it: ord->d) {
+ if (it->order == 0) {
+ tsort_visit(it.get(), 0, tsort_visit);
+ }
+ }
+
+
+ /* Main sorting comparator */
+ constexpr auto score_functor = [](auto w, auto f, auto t) -> auto {
+ auto time_alpha = 1.0, weight_alpha = 0.1, freq_alpha = 0.01;
+
+ return ((w > 0.0 ? w : weight_alpha) * (f > 0.0 ? f : freq_alpha) /
+ (t > time_alpha ? t : time_alpha));
+ };
+
+ auto cache_order_cmp = [&](const auto &it1, const auto &it2) -> auto {
+ constexpr const auto topology_mult = 1e7,
+ priority_mult = 1e6,
+ augmentations1_mult = 1e5;
+ auto w1 = tsort_unmask(it1.get()) * topology_mult,
+ w2 = tsort_unmask(it2.get()) * topology_mult;
+
+ w1 += it1->priority * priority_mult;
+ w2 += it2->priority * priority_mult;
+ w1 += it1->get_augmentation_weight() * augmentations1_mult;
+ w2 += it2->get_augmentation_weight() * augmentations1_mult;
+
+ auto avg_freq = ((double) total_hits / used_items);
+ auto avg_weight = (total_weight / used_items);
+ auto f1 = (double) it1->st->total_hits / avg_freq;
+ auto f2 = (double) it2->st->total_hits / avg_freq;
+ auto weight1 = std::fabs(it1->st->weight) / avg_weight;
+ auto weight2 = std::fabs(it2->st->weight) / avg_weight;
+ auto t1 = it1->st->avg_time;
+ auto t2 = it2->st->avg_time;
+ w1 += score_functor(weight1, f1, t1);
+ w2 += score_functor(weight2, f2, t2);
+
+ return w1 > w2;
+ };
+
+ std::stable_sort(std::begin(ord->d), std::end(ord->d), cache_order_cmp);
+ /*
+ * Here lives some ugly legacy!
+ * We have several filters classes, connfilters, prefilters, filters... etc
+ *
+ * Our order is meaningful merely for filters, but we have to add other classes
+ * to understand if those symbols are checked or disabled.
+ * We can disable symbols for almost everything but not for virtual symbols.
+ * The rule of thumb is that if a symbol has explicit parent, then it is a
+ * virtual symbol that follows it's special rules
+ */
+
+ /*
+ * We enrich ord with all other symbol types without any sorting,
+ * as it is done in another place
+ */
+ constexpr auto append_items_vec = [](const auto &vec, auto &out) {
+ for (const auto &it: vec) {
+ if (it) {
+ out.emplace_back(it->getptr());
+ }
+ }
+ };
+
+ append_items_vec(connfilters, ord->d);
+ append_items_vec(prefilters, ord->d);
+ append_items_vec(postfilters, ord->d);
+ append_items_vec(idempotent, ord->d);
+ append_items_vec(composites, ord->d);
+ append_items_vec(classifiers, ord->d);
+
+ /* After sorting is done, we can assign all elements in the by_symbol hash */
+ for (const auto [i, it]: rspamd::enumerate(ord->d)) {
+ ord->by_symbol.emplace(it->get_name(), i);
+ ord->by_cache_id[it->id] = i;
+ }
+ /* Finally set the current order */
+ std::swap(ord, items_by_order);
+}
+
+auto symcache::add_symbol_with_callback(std::string_view name,
+ int priority,
+ symbol_func_t func,
+ void *user_data,
+ int flags_and_type) -> int
+{
+ auto real_type_pair_maybe = item_type_from_c(flags_and_type);
+
+ if (!real_type_pair_maybe.has_value()) {
+ msg_err_cache("incompatible flags when adding %s: %s", name.data(),
+ real_type_pair_maybe.error().c_str());
+ return -1;
+ }
+
+ auto real_type_pair = real_type_pair_maybe.value();
+
+ if (real_type_pair.first != symcache_item_type::FILTER) {
+ real_type_pair.second |= SYMBOL_TYPE_NOSTAT;
+ }
+ if (real_type_pair.second & (SYMBOL_TYPE_GHOST | SYMBOL_TYPE_CALLBACK)) {
+ real_type_pair.second |= SYMBOL_TYPE_NOSTAT;
+ }
+
+ if (real_type_pair.first == symcache_item_type::VIRTUAL) {
+ msg_err_cache("trying to add virtual symbol %s as real (no parent)", name.data());
+ return -1;
+ }
+
+ std::string static_string_name;
+
+ if (name.empty()) {
+ static_string_name = fmt::format("AUTO_{}_{}", (void *) func, user_data);
+ msg_warn_cache("trying to add an empty symbol name, convert it to %s",
+ static_string_name.c_str());
+ }
+ else {
+ static_string_name = name;
+ }
+
+ if (real_type_pair.first == symcache_item_type::IDEMPOTENT && priority != 0) {
+ msg_warn_cache("priority has been set for idempotent symbol %s: %d",
+ static_string_name.c_str(), priority);
+ }
+
+ if ((real_type_pair.second & SYMBOL_TYPE_FINE) && priority == 0) {
+ /* Adjust priority for negative weighted symbols */
+ priority = 1;
+ }
+
+ if (items_by_symbol.contains(static_string_name)) {
+ msg_err_cache("duplicate symbol name: %s", static_string_name.data());
+ return -1;
+ }
+
+ auto id = items_by_id.size();
+
+ auto item = cache_item::create_with_function(static_pool, id,
+ std::move(static_string_name),
+ priority, func, user_data,
+ real_type_pair.first, real_type_pair.second);
+
+ items_by_symbol.emplace(item->get_name(), item.get());
+ get_item_specific_vector(*item).push_back(item.get());
+ items_by_id.emplace(id, std::move(item));// Takes ownership
+
+ if (!(real_type_pair.second & SYMBOL_TYPE_NOSTAT)) {
+ cksum = t1ha(name.data(), name.size(), cksum);
+ stats_symbols_count++;
+ }
+
+ return id;
+}
+
+auto symcache::add_virtual_symbol(std::string_view name, int parent_id, int flags_and_type) -> int
+{
+ if (name.empty()) {
+ msg_err_cache("cannot register a virtual symbol with no name; qed");
+ return -1;
+ }
+
+ auto real_type_pair_maybe = item_type_from_c(flags_and_type);
+
+ if (!real_type_pair_maybe.has_value()) {
+ msg_err_cache("incompatible flags when adding %s: %s", name.data(),
+ real_type_pair_maybe.error().c_str());
+ return -1;
+ }
+
+ auto real_type_pair = real_type_pair_maybe.value();
+
+ if (items_by_symbol.contains(name)) {
+ msg_err_cache("duplicate symbol name: %s", name.data());
+ return -1;
+ }
+
+ if (items_by_id.size() < parent_id) {
+ msg_err_cache("parent id %d is out of bounds for virtual symbol %s", parent_id, name.data());
+ return -1;
+ }
+
+ auto id = items_by_id.size();
+
+ auto item = cache_item::create_with_virtual(static_pool,
+ id,
+ std::string{name},
+ parent_id, real_type_pair.first, real_type_pair.second);
+ const auto &parent = items_by_id[parent_id].get();
+ parent->add_child(item.get());
+ items_by_symbol.emplace(item->get_name(), item.get());
+ get_item_specific_vector(*item).push_back(item.get());
+ items_by_id.emplace(id, std::move(item));// Takes ownership
+
+ return id;
+}
+
+auto symcache::set_peak_cb(int cbref) -> void
+{
+ if (peak_cb != -1) {
+ luaL_unref(L, LUA_REGISTRYINDEX, peak_cb);
+ }
+
+ peak_cb = cbref;
+ msg_info_cache("registered peak callback");
+}
+
+auto symcache::add_delayed_condition(std::string_view sym, int cbref) -> void
+{
+ delayed_conditions->emplace_back(sym, cbref, (lua_State *) cfg->lua_state);
+}
+
+auto symcache::validate(bool strict) -> bool
+{
+ total_weight = 1.0;
+
+ for (auto &pair: items_by_symbol) {
+ auto &item = pair.second;
+ auto ghost = item->st->weight == 0 ? true : false;
+ auto skipped = !ghost;
+
+ if (item->is_scoreable() && g_hash_table_lookup(cfg->symbols, item->symbol.c_str()) == nullptr) {
+ if (!std::isnan(cfg->unknown_weight)) {
+ item->st->weight = cfg->unknown_weight;
+ auto *s = rspamd_mempool_alloc0_type(static_pool,
+ struct rspamd_symbol);
+ /* Legit as we actually never modify this data */
+ s->name = (char *) item->symbol.c_str();
+ s->weight_ptr = &item->st->weight;
+ g_hash_table_insert(cfg->symbols, (void *) s->name, (void *) s);
+
+ msg_info_cache("adding unknown symbol %s with weight: %.2f",
+ item->symbol.c_str(), cfg->unknown_weight);
+ ghost = false;
+ skipped = false;
+ }
+ else {
+ skipped = true;
+ }
+ }
+ else {
+ skipped = false;
+ }
+
+ if (!ghost && skipped) {
+ if (!(item->flags & SYMBOL_TYPE_SKIPPED)) {
+ item->flags |= SYMBOL_TYPE_SKIPPED;
+ msg_warn_cache("symbol %s has no score registered, skip its check",
+ item->symbol.c_str());
+ }
+ }
+
+ if (ghost) {
+ msg_debug_cache("symbol %s is registered as ghost symbol, it won't be inserted "
+ "to any metric",
+ item->symbol.c_str());
+ }
+
+ if (item->st->weight < 0 && item->priority == 0) {
+ item->priority++;
+ }
+
+ if (item->is_virtual()) {
+ if (!(item->flags & SYMBOL_TYPE_GHOST)) {
+ auto *parent = const_cast<cache_item *>(item->get_parent(*this));
+
+ if (parent == nullptr) {
+ item->resolve_parent(*this);
+ parent = const_cast<cache_item *>(item->get_parent(*this));
+ }
+
+ if (::fabs(parent->st->weight) < ::fabs(item->st->weight)) {
+ parent->st->weight = item->st->weight;
+ }
+
+ auto p1 = ::abs(item->priority);
+ auto p2 = ::abs(parent->priority);
+
+ if (p1 != p2) {
+ parent->priority = MAX(p1, p2);
+ item->priority = parent->priority;
+ }
+ }
+ }
+
+ total_weight += fabs(item->st->weight);
+ }
+
+ /* Now check each metric item and find corresponding symbol in a cache */
+ auto ret = true;
+ GHashTableIter it;
+ void *k, *v;
+ g_hash_table_iter_init(&it, cfg->symbols);
+
+ while (g_hash_table_iter_next(&it, &k, &v)) {
+ auto ignore_symbol = false;
+ auto sym_def = (struct rspamd_symbol *) v;
+
+ if (sym_def && (sym_def->flags &
+ (RSPAMD_SYMBOL_FLAG_IGNORE_METRIC | RSPAMD_SYMBOL_FLAG_DISABLED))) {
+ ignore_symbol = true;
+ }
+
+ if (!ignore_symbol) {
+ if (!items_by_symbol.contains((const char *) k)) {
+ msg_debug_cache(
+ "symbol '%s' has its score defined but there is no "
+ "corresponding rule registered",
+ k);
+ }
+ }
+ else if (sym_def->flags & RSPAMD_SYMBOL_FLAG_DISABLED) {
+ auto item = get_item_by_name_mut((const char *) k, false);
+
+ if (item) {
+ item->enabled = FALSE;
+ }
+ }
+ }
+
+ return ret;
+}
+
+auto symcache::counters() const -> ucl_object_t *
+{
+ auto *top = ucl_object_typed_new(UCL_ARRAY);
+ constexpr const auto round_float = [](const auto x, const int digits) -> auto {
+ const auto power10 = ::pow(10, digits);
+ return (::floor(x * power10) / power10);
+ };
+
+ for (auto &pair: items_by_symbol) {
+ auto &item = pair.second;
+ auto symbol = pair.first;
+
+ auto *obj = ucl_object_typed_new(UCL_OBJECT);
+ ucl_object_insert_key(obj, ucl_object_fromlstring(symbol.data(), symbol.size()),
+ "symbol", 0, false);
+
+ if (item->is_virtual()) {
+ if (!(item->flags & SYMBOL_TYPE_GHOST)) {
+ const auto *parent = item->get_parent(*this);
+ ucl_object_insert_key(obj,
+ ucl_object_fromdouble(round_float(item->st->weight, 3)),
+ "weight", 0, false);
+ ucl_object_insert_key(obj,
+ ucl_object_fromdouble(round_float(parent->st->avg_frequency, 3)),
+ "frequency", 0, false);
+ ucl_object_insert_key(obj,
+ ucl_object_fromint(parent->st->total_hits),
+ "hits", 0, false);
+ ucl_object_insert_key(obj,
+ ucl_object_fromdouble(round_float(parent->st->avg_time, 3)),
+ "time", 0, false);
+ }
+ else {
+ ucl_object_insert_key(obj,
+ ucl_object_fromdouble(round_float(item->st->weight, 3)),
+ "weight", 0, false);
+ ucl_object_insert_key(obj,
+ ucl_object_fromdouble(0.0),
+ "frequency", 0, false);
+ ucl_object_insert_key(obj,
+ ucl_object_fromdouble(0.0),
+ "hits", 0, false);
+ ucl_object_insert_key(obj,
+ ucl_object_fromdouble(0.0),
+ "time", 0, false);
+ }
+ }
+ else {
+ ucl_object_insert_key(obj,
+ ucl_object_fromdouble(round_float(item->st->weight, 3)),
+ "weight", 0, false);
+ ucl_object_insert_key(obj,
+ ucl_object_fromdouble(round_float(item->st->avg_frequency, 3)),
+ "frequency", 0, false);
+ ucl_object_insert_key(obj,
+ ucl_object_fromint(item->st->total_hits),
+ "hits", 0, false);
+ ucl_object_insert_key(obj,
+ ucl_object_fromdouble(round_float(item->st->avg_time, 3)),
+ "time", 0, false);
+ }
+
+ ucl_array_append(top, obj);
+ }
+
+ return top;
+}
+
+auto symcache::periodic_resort(struct ev_loop *ev_loop, double cur_time, double last_resort) -> void
+{
+ for (const auto &item: filters) {
+
+ if (item->update_counters_check_peak(L, ev_loop, cur_time, last_resort)) {
+ auto cur_value = (item->st->total_hits - item->last_count) /
+ (cur_time - last_resort);
+ auto cur_err = (item->st->avg_frequency - cur_value);
+ cur_err *= cur_err;
+ msg_debug_cache("peak found for %s is %.2f, avg: %.2f, "
+ "stddev: %.2f, error: %.2f, peaks: %d",
+ item->symbol.c_str(), cur_value,
+ item->st->avg_frequency,
+ item->st->stddev_frequency,
+ cur_err,
+ item->frequency_peaks);
+
+ if (peak_cb != -1) {
+ struct ev_loop **pbase;
+
+ lua_rawgeti(L, LUA_REGISTRYINDEX, peak_cb);
+ pbase = (struct ev_loop **) lua_newuserdata(L, sizeof(*pbase));
+ *pbase = ev_loop;
+ rspamd_lua_setclass(L, "rspamd{ev_base}", -1);
+ lua_pushlstring(L, item->symbol.c_str(), item->symbol.size());
+ lua_pushnumber(L, item->st->avg_frequency);
+ lua_pushnumber(L, ::sqrt(item->st->stddev_frequency));
+ lua_pushnumber(L, cur_value);
+ lua_pushnumber(L, cur_err);
+
+ if (lua_pcall(L, 6, 0, 0) != 0) {
+ msg_info_cache("call to peak function for %s failed: %s",
+ item->symbol.c_str(), lua_tostring(L, -1));
+ lua_pop(L, 1);
+ }
+ }
+ }
+ }
+}
+
+symcache::~symcache()
+{
+ if (peak_cb != -1) {
+ luaL_unref(L, LUA_REGISTRYINDEX, peak_cb);
+ }
+}
+
+auto symcache::maybe_resort() -> bool
+{
+ if (items_by_order->generation_id != cur_order_gen) {
+ /*
+ * Cache has been modified, need to resort it
+ */
+ msg_info_cache("symbols cache has been modified since last check:"
+ " old id: %ud, new id: %ud",
+ items_by_order->generation_id, cur_order_gen);
+ resort();
+
+ return true;
+ }
+
+ return false;
+}
+
+auto symcache::get_item_specific_vector(const cache_item &it) -> symcache::items_ptr_vec &
+{
+ switch (it.get_type()) {
+ case symcache_item_type::CONNFILTER:
+ return connfilters;
+ case symcache_item_type::FILTER:
+ return filters;
+ case symcache_item_type::IDEMPOTENT:
+ return idempotent;
+ case symcache_item_type::PREFILTER:
+ return prefilters;
+ case symcache_item_type::POSTFILTER:
+ return postfilters;
+ case symcache_item_type::COMPOSITE:
+ return composites;
+ case symcache_item_type::CLASSIFIER:
+ return classifiers;
+ case symcache_item_type::VIRTUAL:
+ return virtual_symbols;
+ }
+
+ RSPAMD_UNREACHABLE;
+}
+
+auto symcache::process_settings_elt(struct rspamd_config_settings_elt *elt) -> void
+{
+
+ auto id = elt->id;
+
+ if (elt->symbols_disabled) {
+ /* Process denied symbols */
+ ucl_object_iter_t iter = nullptr;
+ const ucl_object_t *cur;
+
+ while ((cur = ucl_object_iterate(elt->symbols_disabled, &iter, true)) != NULL) {
+ const auto *sym = ucl_object_key(cur);
+ auto *item = get_item_by_name_mut(sym, false);
+
+ if (item != nullptr) {
+ if (item->is_virtual()) {
+ /*
+ * Virtual symbols are special:
+ * we ignore them in symcache but prevent them from being
+ * inserted.
+ */
+ item->forbidden_ids.add_id(id);
+ msg_debug_cache("deny virtual symbol %s for settings %ud (%s); "
+ "parent can still be executed",
+ sym, id, elt->name);
+ }
+ else {
+ /* Normal symbol, disable it */
+ item->forbidden_ids.add_id(id);
+ msg_debug_cache("deny symbol %s for settings %ud (%s)",
+ sym, id, elt->name);
+ }
+ }
+ else {
+ msg_warn_cache("cannot find a symbol to disable %s "
+ "when processing settings %ud (%s)",
+ sym, id, elt->name);
+ }
+ }
+ }
+
+ if (elt->symbols_enabled) {
+ ucl_object_iter_t iter = nullptr;
+ const ucl_object_t *cur;
+
+ while ((cur = ucl_object_iterate(elt->symbols_enabled, &iter, true)) != nullptr) {
+ /* Here, we resolve parent and explicitly allow it */
+ const auto *sym = ucl_object_key(cur);
+
+ auto *item = get_item_by_name_mut(sym, false);
+
+ if (item != nullptr) {
+ if (item->is_virtual()) {
+ auto *parent = get_item_by_name_mut(sym, true);
+
+ if (parent) {
+ if (elt->symbols_disabled &&
+ ucl_object_lookup(elt->symbols_disabled, parent->symbol.data())) {
+ msg_err_cache("conflict in %s: cannot enable disabled symbol %s, "
+ "wanted to enable symbol %s",
+ elt->name, parent->symbol.data(), sym);
+ continue;
+ }
+
+ parent->exec_only_ids.add_id(id);
+ msg_debug_cache("allow just execution of symbol %s for settings %ud (%s)",
+ parent->symbol.data(), id, elt->name);
+ }
+ }
+
+ item->allowed_ids.add_id(id);
+ msg_debug_cache("allow execution of symbol %s for settings %ud (%s)",
+ sym, id, elt->name);
+ }
+ else {
+ msg_warn_cache("cannot find a symbol to enable %s "
+ "when processing settings %ud (%s)",
+ sym, id, elt->name);
+ }
+ }
+ }
+}
+
+auto symcache::get_max_timeout(std::vector<std::pair<double, const cache_item *>> &elts) const -> double
+{
+ auto accumulated_timeout = 0.0;
+ auto log_func = RSPAMD_LOG_FUNC;
+ ankerl::unordered_dense::set<const cache_item *> seen_items;
+
+ auto get_item_timeout = [](cache_item *it) {
+ return it->get_numeric_augmentation("timeout").value_or(0.0);
+ };
+
+ /* This function returns the timeout for an item and all it's dependencies */
+ auto get_filter_timeout = [&](cache_item *it, auto self) -> double {
+ auto own_timeout = get_item_timeout(it);
+ auto max_child_timeout = 0.0;
+
+ for (const auto &dep: it->deps) {
+ auto cld_timeout = self(dep.item, self);
+
+ if (cld_timeout > max_child_timeout) {
+ max_child_timeout = cld_timeout;
+ }
+ }
+
+ return own_timeout + max_child_timeout;
+ };
+
+ /* For prefilters and postfilters, we just care about priorities */
+ auto pre_postfilter_iter = [&](const items_ptr_vec &vec) -> double {
+ auto saved_priority = -1;
+ auto max_timeout = 0.0, added_timeout = 0.0;
+ const cache_item *max_elt = nullptr;
+ for (const auto &it: vec) {
+ if (it->priority != saved_priority && max_elt != nullptr && max_timeout > 0) {
+ if (!seen_items.contains(max_elt)) {
+ accumulated_timeout += max_timeout;
+ added_timeout += max_timeout;
+
+ msg_debug_cache_lambda("added %.2f to the timeout (%.2f) as the priority has changed (%d -> %d); "
+ "symbol: %s",
+ max_timeout, accumulated_timeout, saved_priority, it->priority,
+ max_elt->symbol.c_str());
+ elts.emplace_back(max_timeout, max_elt);
+ seen_items.insert(max_elt);
+ }
+ max_timeout = 0;
+ saved_priority = it->priority;
+ max_elt = nullptr;
+ }
+
+ auto timeout = get_item_timeout(it);
+
+ if (timeout > max_timeout) {
+ max_timeout = timeout;
+ max_elt = it;
+ }
+ }
+
+ if (max_elt != nullptr && max_timeout > 0) {
+ if (!seen_items.contains(max_elt)) {
+ accumulated_timeout += max_timeout;
+ added_timeout += max_timeout;
+
+ msg_debug_cache_lambda("added %.2f to the timeout (%.2f) end of processing; "
+ "symbol: %s",
+ max_timeout, accumulated_timeout,
+ max_elt->symbol.c_str());
+ elts.emplace_back(max_timeout, max_elt);
+ seen_items.insert(max_elt);
+ }
+ }
+
+ return added_timeout;
+ };
+
+ auto prefilters_timeout = pre_postfilter_iter(this->prefilters);
+
+ /* For normal filters, we check the maximum chain of the dependencies
+ * This function might have O(N^2) complexity if all symbols are in a single
+ * dependencies chain. But it is not the case in practice
+ */
+ double max_filters_timeout = 0;
+ for (const auto &it: this->filters) {
+ auto timeout = get_filter_timeout(it, get_filter_timeout);
+
+ if (timeout > max_filters_timeout) {
+ max_filters_timeout = timeout;
+ if (!seen_items.contains(it)) {
+ elts.emplace_back(timeout, it);
+ seen_items.insert(it);
+ }
+ }
+ }
+
+ accumulated_timeout += max_filters_timeout;
+
+ auto postfilters_timeout = pre_postfilter_iter(this->postfilters);
+ auto idempotent_timeout = pre_postfilter_iter(this->idempotent);
+
+ /* Sort in decreasing order by timeout */
+ std::stable_sort(std::begin(elts), std::end(elts),
+ [](const auto &p1, const auto &p2) {
+ return p1.first > p2.first;
+ });
+
+ msg_debug_cache("overall cache timeout: %.2f, %.2f from prefilters,"
+ " %.2f from postfilters, %.2f from idempotent filters,"
+ " %.2f from normal filters",
+ accumulated_timeout, prefilters_timeout, postfilters_timeout,
+ idempotent_timeout, max_filters_timeout);
+
+ return accumulated_timeout;
+}
+
+}// namespace rspamd::symcache \ No newline at end of file
diff --git a/src/libserver/symcache/symcache_internal.hxx b/src/libserver/symcache/symcache_internal.hxx
new file mode 100644
index 0000000..255a4b1
--- /dev/null
+++ b/src/libserver/symcache/symcache_internal.hxx
@@ -0,0 +1,652 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Internal C++ structures and classes for symcache
+ */
+
+#ifndef RSPAMD_SYMCACHE_INTERNAL_HXX
+#define RSPAMD_SYMCACHE_INTERNAL_HXX
+#pragma once
+
+#include <cmath>
+#include <cstdlib>
+#include <cstdint>
+#include <utility>
+#include <vector>
+#include <string>
+#include <string_view>
+#include <memory>
+#include <variant>
+
+#include "rspamd_symcache.h"
+#include "contrib/libev/ev.h"
+#include "contrib/ankerl/unordered_dense.h"
+#include "contrib/expected/expected.hpp"
+#include "cfg_file.h"
+
+#include "symcache_id_list.hxx"
+
+#define msg_err_cache(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL, \
+ "symcache", log_tag(), \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_err_cache_lambda(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL, \
+ "symcache", log_tag(), \
+ log_func, \
+ __VA_ARGS__)
+#define msg_err_cache_task(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL, \
+ "symcache", task->task_pool->tag.uid, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_warn_cache(...) rspamd_default_log_function(G_LOG_LEVEL_WARNING, \
+ "symcache", log_tag(), \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_info_cache(...) rspamd_default_log_function(G_LOG_LEVEL_INFO, \
+ "symcache", log_tag(), \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_debug_cache(...) rspamd_conditional_debug_fast(NULL, NULL, \
+ ::rspamd::symcache::rspamd_symcache_log_id, "symcache", log_tag(), \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_debug_cache_lambda(...) rspamd_conditional_debug_fast(NULL, NULL, \
+ ::rspamd::symcache::rspamd_symcache_log_id, "symcache", log_tag(), \
+ log_func, \
+ __VA_ARGS__)
+#define msg_debug_cache_task(...) rspamd_conditional_debug_fast(NULL, NULL, \
+ ::rspamd::symcache::rspamd_symcache_log_id, "symcache", task->task_pool->tag.uid, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_debug_cache_task_lambda(...) rspamd_conditional_debug_fast(NULL, NULL, \
+ ::rspamd::symcache::rspamd_symcache_log_id, "symcache", task->task_pool->tag.uid, \
+ log_func, \
+ __VA_ARGS__)
+
+struct lua_State;
+
+namespace rspamd::symcache {
+
+/* Defined in symcache_impl.cxx */
+extern int rspamd_symcache_log_id;
+
+static const std::uint8_t symcache_magic[8] = {'r', 's', 'c', 2, 0, 0, 0, 0};
+
+struct symcache_header {
+ std::uint8_t magic[8];
+ unsigned int nitems;
+ std::uint8_t checksum[64];
+ std::uint8_t unused[128];
+};
+
+struct cache_item;
+using cache_item_ptr = std::shared_ptr<cache_item>;
+
+/**
+ * This structure is intended to keep the current ordering for all symbols
+ * It is designed to be shared among all tasks and keep references to the real
+ * symbols.
+ * If some symbol has been added or removed to the symbol cache, it will not affect
+ * the current order, and it will only be regenerated for the subsequent tasks.
+ * This allows safe and no copy sharing and keeping track of all symbols in the
+ * cache runtime.
+ */
+struct order_generation {
+ /* All items ordered */
+ std::vector<cache_item_ptr> d;
+ /* Mapping from symbol name to the position in the order array */
+ ankerl::unordered_dense::map<std::string_view, unsigned int> by_symbol;
+ /* Mapping from symbol id to the position in the order array */
+ ankerl::unordered_dense::map<unsigned int, unsigned int> by_cache_id;
+ /* It matches cache->generation_id; if not, a fresh ordering is required */
+ unsigned int generation_id;
+
+ explicit order_generation(std::size_t nelts, unsigned id)
+ : generation_id(id)
+ {
+ d.reserve(nelts);
+ by_symbol.reserve(nelts);
+ by_cache_id.reserve(nelts);
+ }
+
+ auto size() const -> auto
+ {
+ return d.size();
+ }
+};
+
+using order_generation_ptr = std::shared_ptr<order_generation>;
+
+
+struct delayed_cache_dependency {
+ std::string from;
+ std::string to;
+
+ delayed_cache_dependency(std::string_view _from, std::string_view _to)
+ : from(_from), to(_to)
+ {
+ }
+};
+
+struct delayed_cache_condition {
+ std::string sym;
+ int cbref;
+ lua_State *L;
+
+public:
+ delayed_cache_condition(std::string_view sym, int cbref, lua_State *L)
+ : sym(sym), cbref(cbref), L(L)
+ {
+ }
+};
+
+class delayed_symbol_elt {
+private:
+ std::variant<std::string, rspamd_regexp_t *> content;
+
+public:
+ /* Disable copy */
+ delayed_symbol_elt() = delete;
+ delayed_symbol_elt(const delayed_symbol_elt &) = delete;
+ delayed_symbol_elt &operator=(const delayed_symbol_elt &) = delete;
+ /* Enable move */
+ delayed_symbol_elt(delayed_symbol_elt &&other) noexcept = default;
+ delayed_symbol_elt &operator=(delayed_symbol_elt &&other) noexcept = default;
+
+ explicit delayed_symbol_elt(std::string_view elt) noexcept
+ {
+ if (!elt.empty() && elt[0] == '/') {
+ /* Possibly regexp */
+ auto *re = rspamd_regexp_new_len(elt.data(), elt.size(), nullptr, nullptr);
+
+ if (re != nullptr) {
+ std::get<rspamd_regexp_t *>(content) = re;
+ }
+ else {
+ std::get<std::string>(content) = elt;
+ }
+ }
+ else {
+ std::get<std::string>(content) = elt;
+ }
+ }
+
+ ~delayed_symbol_elt()
+ {
+ if (std::holds_alternative<rspamd_regexp_t *>(content)) {
+ rspamd_regexp_unref(std::get<rspamd_regexp_t *>(content));
+ }
+ }
+
+ auto matches(std::string_view what) const -> bool
+ {
+ return std::visit([&](auto &elt) {
+ using T = typeof(elt);
+ if constexpr (std::is_same_v<T, rspamd_regexp_t *>) {
+ if (rspamd_regexp_match(elt, what.data(), what.size(), false)) {
+ return true;
+ }
+ }
+ else if constexpr (std::is_same_v<T, std::string>) {
+ return elt == what;
+ }
+
+ return false;
+ },
+ content);
+ }
+
+ auto to_string_view() const -> std::string_view
+ {
+ return std::visit([&](auto &elt) {
+ using T = typeof(elt);
+ if constexpr (std::is_same_v<T, rspamd_regexp_t *>) {
+ return std::string_view{rspamd_regexp_get_pattern(elt)};
+ }
+ else if constexpr (std::is_same_v<T, std::string>) {
+ return std::string_view{elt};
+ }
+
+ return std::string_view{};
+ },
+ content);
+ }
+};
+
+struct delayed_symbol_elt_equal {
+ using is_transparent = void;
+ auto operator()(const delayed_symbol_elt &a, const delayed_symbol_elt &b) const
+ {
+ return a.to_string_view() == b.to_string_view();
+ }
+ auto operator()(const delayed_symbol_elt &a, const std::string_view &b) const
+ {
+ return a.to_string_view() == b;
+ }
+ auto operator()(const std::string_view &a, const delayed_symbol_elt &b) const
+ {
+ return a == b.to_string_view();
+ }
+};
+
+struct delayed_symbol_elt_hash {
+ using is_transparent = void;
+ auto operator()(const delayed_symbol_elt &a) const
+ {
+ return ankerl::unordered_dense::hash<std::string_view>()(a.to_string_view());
+ }
+ auto operator()(const std::string_view &a) const
+ {
+ return ankerl::unordered_dense::hash<std::string_view>()(a);
+ }
+};
+
+class symcache {
+private:
+ using items_ptr_vec = std::vector<cache_item *>;
+ /* Map indexed by symbol name: all symbols must have unique names, so this map holds ownership */
+ ankerl::unordered_dense::map<std::string_view, cache_item *> items_by_symbol;
+ ankerl::unordered_dense::map<int, cache_item_ptr> items_by_id;
+
+ /* Items sorted into some order */
+ order_generation_ptr items_by_order;
+ unsigned int cur_order_gen;
+
+ /* Specific vectors for execution/iteration */
+ items_ptr_vec connfilters;
+ items_ptr_vec prefilters;
+ items_ptr_vec filters;
+ items_ptr_vec postfilters;
+ items_ptr_vec composites;
+ items_ptr_vec idempotent;
+ items_ptr_vec classifiers;
+ items_ptr_vec virtual_symbols;
+
+ /* These are stored within pointer to clean up after init */
+ std::unique_ptr<std::vector<delayed_cache_dependency>> delayed_deps;
+ std::unique_ptr<std::vector<delayed_cache_condition>> delayed_conditions;
+ /* Delayed statically enabled or disabled symbols */
+ using delayed_symbol_names = ankerl::unordered_dense::set<delayed_symbol_elt,
+ delayed_symbol_elt_hash, delayed_symbol_elt_equal>;
+ std::unique_ptr<delayed_symbol_names> disabled_symbols;
+ std::unique_ptr<delayed_symbol_names> enabled_symbols;
+
+ rspamd_mempool_t *static_pool;
+ std::uint64_t cksum;
+ double total_weight;
+ std::size_t stats_symbols_count;
+
+private:
+ std::uint64_t total_hits;
+
+ struct rspamd_config *cfg;
+ lua_State *L;
+ double reload_time;
+ double last_profile;
+
+private:
+ int peak_cb;
+ int cache_id;
+
+private:
+ /* Internal methods */
+ auto load_items() -> bool;
+ auto resort() -> void;
+ auto get_item_specific_vector(const cache_item &) -> items_ptr_vec &;
+ /* Helper for g_hash_table_foreach */
+ static auto metric_connect_cb(void *k, void *v, void *ud) -> void;
+
+public:
+ explicit symcache(struct rspamd_config *cfg)
+ : cfg(cfg)
+ {
+ /* XXX: do we need a special pool for symcache? I don't think so */
+ static_pool = cfg->cfg_pool;
+ reload_time = cfg->cache_reload_time;
+ total_hits = 1;
+ total_weight = 1.0;
+ cksum = 0xdeadbabe;
+ peak_cb = -1;
+ cache_id = rspamd_random_uint64_fast();
+ L = (lua_State *) cfg->lua_state;
+ delayed_conditions = std::make_unique<std::vector<delayed_cache_condition>>();
+ delayed_deps = std::make_unique<std::vector<delayed_cache_dependency>>();
+ }
+
+ virtual ~symcache();
+
+ /**
+ * Saves items on disk (if possible)
+ * @return
+ */
+ auto save_items() const -> bool;
+
+ /**
+ * Get an item by ID
+ * @param id
+ * @param resolve_parent
+ * @return
+ */
+ auto get_item_by_id(int id, bool resolve_parent) const -> const cache_item *;
+ auto get_item_by_id_mut(int id, bool resolve_parent) const -> cache_item *;
+ /**
+ * Get an item by it's name
+ * @param name
+ * @param resolve_parent
+ * @return
+ */
+ auto get_item_by_name(std::string_view name, bool resolve_parent) const -> const cache_item *;
+ /**
+ * Get an item by it's name, mutable pointer
+ * @param name
+ * @param resolve_parent
+ * @return
+ */
+ auto get_item_by_name_mut(std::string_view name, bool resolve_parent) const -> cache_item *;
+
+ /**
+ * Add a direct dependency
+ * @param id_from
+ * @param to
+ * @param virtual_id_from
+ * @return
+ */
+ auto add_dependency(int id_from, std::string_view to, int virtual_id_from) -> void;
+
+ /**
+ * Add a delayed dependency between symbols that will be resolved on the init stage
+ * @param from
+ * @param to
+ */
+ auto add_delayed_dependency(std::string_view from, std::string_view to) -> void
+ {
+ if (!delayed_deps) {
+ delayed_deps = std::make_unique<std::vector<delayed_cache_dependency>>();
+ }
+
+ delayed_deps->emplace_back(from, to);
+ }
+
+ /**
+ * Adds a symbol to the list of the disabled symbols
+ * @param sym
+ * @return
+ */
+ auto disable_symbol_delayed(std::string_view sym) -> bool
+ {
+ if (!disabled_symbols) {
+ disabled_symbols = std::make_unique<delayed_symbol_names>();
+ }
+
+ if (!disabled_symbols->contains(sym)) {
+ disabled_symbols->emplace(sym);
+
+ return true;
+ }
+
+ return false;
+ }
+
+ /**
+ * Adds a symbol to the list of the enabled symbols
+ * @param sym
+ * @return
+ */
+ auto enable_symbol_delayed(std::string_view sym) -> bool
+ {
+ if (!enabled_symbols) {
+ enabled_symbols = std::make_unique<delayed_symbol_names>();
+ }
+
+ if (!enabled_symbols->contains(sym)) {
+ enabled_symbols->emplace(sym);
+
+ return true;
+ }
+
+ return false;
+ }
+
+ /**
+ * Initialises the symbols cache, must be called after all symbols are added
+ * and the config file is loaded
+ */
+ auto init() -> bool;
+
+ /**
+ * Log helper that returns cfg checksum
+ * @return
+ */
+ auto log_tag() const -> const char *
+ {
+ return cfg->checksum;
+ }
+
+ /**
+ * Helper to return a memory pool associated with the cache
+ * @return
+ */
+ auto get_pool() const
+ {
+ return static_pool;
+ }
+
+ /**
+ * A method to add a generic symbol with a callback to couple with C API
+ * @param name name of the symbol, unlike C API it must be "" for callback only (compat) symbols, in this case an automatic name is generated
+ * @param priority
+ * @param func
+ * @param user_data
+ * @param flags_and_type mix of flags and type in a messy C enum
+ * @return id of a new symbol or -1 in case of failure
+ */
+ auto add_symbol_with_callback(std::string_view name,
+ int priority,
+ symbol_func_t func,
+ void *user_data,
+ int flags_and_type) -> int;
+ /**
+ * A method to add a generic virtual symbol with no function associated
+ * @param name must have some value, or a fatal error will strike you
+ * @param parent_id if this param is -1 then this symbol is associated with nothing
+ * @param flags_and_type mix of flags and type in a messy C enum
+ * @return id of a new symbol or -1 in case of failure
+ */
+ auto add_virtual_symbol(std::string_view name, int parent_id,
+ int flags_and_type) -> int;
+
+ /**
+ * Sets a lua callback to be called on peaks in execution time
+ * @param cbref
+ */
+ auto set_peak_cb(int cbref) -> void;
+
+ /**
+ * Add a delayed condition for a symbol that might not be registered yet
+ * @param sym
+ * @param cbref
+ */
+ auto add_delayed_condition(std::string_view sym, int cbref) -> void;
+
+ /**
+ * Returns number of symbols that needs to be checked in statistical algorithm
+ * @return
+ */
+ auto get_stats_symbols_count() const
+ {
+ return stats_symbols_count;
+ }
+
+ /**
+ * Returns a checksum for the cache
+ * @return
+ */
+ auto get_cksum() const
+ {
+ return cksum;
+ }
+
+ /**
+ * Validate symbols in the cache
+ * @param strict
+ * @return
+ */
+ auto validate(bool strict) -> bool;
+
+ /**
+ * Returns counters for the cache
+ * @return
+ */
+ auto counters() const -> ucl_object_t *;
+
+ /**
+ * Adjusts stats of the cache for the periodic counter
+ */
+ auto periodic_resort(struct ev_loop *ev_loop, double cur_time, double last_resort) -> void;
+
+ /**
+ * A simple helper to get the reload time
+ * @return
+ */
+ auto get_reload_time() const
+ {
+ return reload_time;
+ };
+
+ /**
+ * Iterate over all symbols using a specific functor
+ * @tparam Functor
+ * @param f
+ */
+ template<typename Functor>
+ auto symbols_foreach(Functor f) -> void
+ {
+ for (const auto &sym_it: items_by_symbol) {
+ f(sym_it.second);
+ }
+ }
+
+ /**
+ * Iterate over all composites using a specific functor
+ * @tparam Functor
+ * @param f
+ */
+ template<typename Functor>
+ auto composites_foreach(Functor f) -> void
+ {
+ for (const auto &sym_it: composites) {
+ f(sym_it);
+ }
+ }
+
+ /**
+ * Iterate over all composites using a specific functor
+ * @tparam Functor
+ * @param f
+ */
+ template<typename Functor>
+ auto connfilters_foreach(Functor f) -> bool
+ {
+ return std::all_of(std::begin(connfilters), std::end(connfilters),
+ [&](const auto &sym_it) {
+ return f(sym_it);
+ });
+ }
+ template<typename Functor>
+ auto prefilters_foreach(Functor f) -> bool
+ {
+ return std::all_of(std::begin(prefilters), std::end(prefilters),
+ [&](const auto &sym_it) {
+ return f(sym_it);
+ });
+ }
+ template<typename Functor>
+ auto postfilters_foreach(Functor f) -> bool
+ {
+ return std::all_of(std::begin(postfilters), std::end(postfilters),
+ [&](const auto &sym_it) {
+ return f(sym_it);
+ });
+ }
+ template<typename Functor>
+ auto idempotent_foreach(Functor f) -> bool
+ {
+ return std::all_of(std::begin(idempotent), std::end(idempotent),
+ [&](const auto &sym_it) {
+ return f(sym_it);
+ });
+ }
+ template<typename Functor>
+ auto filters_foreach(Functor f) -> bool
+ {
+ return std::all_of(std::begin(filters), std::end(filters),
+ [&](const auto &sym_it) {
+ return f(sym_it);
+ });
+ }
+
+ /**
+ * Resort cache if anything has been changed since last time
+ * @return
+ */
+ auto maybe_resort() -> bool;
+
+ /**
+ * Returns current set of items ordered for sharing ownership
+ * @return
+ */
+ auto get_cache_order() const -> auto
+ {
+ return items_by_order;
+ }
+
+ /**
+ * Get last profile timestamp
+ * @return
+ */
+ auto get_last_profile() const -> auto
+ {
+ return last_profile;
+ }
+
+ /**
+ * Sets last profile timestamp
+ * @param last_profile
+ * @return
+ */
+ auto set_last_profile(double last_profile)
+ {
+ symcache::last_profile = last_profile;
+ }
+
+ /**
+ * Process settings elt identified by id
+ * @param elt
+ */
+ auto process_settings_elt(struct rspamd_config_settings_elt *elt) -> void;
+
+ /**
+ * Returns maximum timeout that is requested by all rules
+ * @return
+ */
+ auto get_max_timeout(std::vector<std::pair<double, const cache_item *>> &elts) const -> double;
+};
+
+
+}// namespace rspamd::symcache
+
+#endif//RSPAMD_SYMCACHE_INTERNAL_HXX
diff --git a/src/libserver/symcache/symcache_item.cxx b/src/libserver/symcache/symcache_item.cxx
new file mode 100644
index 0000000..ac901f5
--- /dev/null
+++ b/src/libserver/symcache/symcache_item.cxx
@@ -0,0 +1,652 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "lua/lua_common.h"
+#include "symcache_internal.hxx"
+#include "symcache_item.hxx"
+#include "fmt/core.h"
+#include "libserver/task.h"
+#include "libutil/cxx/util.hxx"
+#include <numeric>
+#include <functional>
+
+namespace rspamd::symcache {
+
+enum class augmentation_value_type {
+ NO_VALUE,
+ STRING_VALUE,
+ NUMBER_VALUE,
+};
+
+struct augmentation_info {
+ int weight = 0;
+ int implied_flags = 0;
+ augmentation_value_type value_type = augmentation_value_type::NO_VALUE;
+};
+
+/* A list of internal augmentations that are known to Rspamd with their weight */
+static const auto known_augmentations =
+ ankerl::unordered_dense::map<std::string, augmentation_info, rspamd::smart_str_hash, rspamd::smart_str_equal>{
+ {"passthrough", {.weight = 10, .implied_flags = SYMBOL_TYPE_IGNORE_PASSTHROUGH}},
+ {"single_network", {.weight = 1, .implied_flags = 0}},
+ {"no_network", {.weight = 0, .implied_flags = 0}},
+ {"many_network", {.weight = 1, .implied_flags = 0}},
+ {"important", {.weight = 5, .implied_flags = SYMBOL_TYPE_FINE}},
+ {"timeout", {
+ .weight = 0,
+ .implied_flags = 0,
+ .value_type = augmentation_value_type::NUMBER_VALUE,
+ }}};
+
+auto cache_item::get_parent(const symcache &cache) const -> const cache_item *
+{
+ if (is_virtual()) {
+ const auto &virtual_sp = std::get<virtual_item>(specific);
+
+ return virtual_sp.get_parent(cache);
+ }
+
+ return nullptr;
+}
+
+auto cache_item::get_parent_mut(const symcache &cache) -> cache_item *
+{
+ if (is_virtual()) {
+ auto &virtual_sp = std::get<virtual_item>(specific);
+
+ return virtual_sp.get_parent_mut(cache);
+ }
+
+ return nullptr;
+}
+
+auto cache_item::process_deps(const symcache &cache) -> void
+{
+ /* Allow logging macros to work */
+ auto log_tag = [&]() { return cache.log_tag(); };
+
+ for (auto &dep: deps) {
+ msg_debug_cache("process real dependency %s on %s", symbol.c_str(), dep.sym.c_str());
+ auto *dit = cache.get_item_by_name_mut(dep.sym, true);
+
+ if (dep.vid >= 0) {
+ /* Case of the virtual symbol that depends on another (maybe virtual) symbol */
+ const auto *vdit = cache.get_item_by_name(dep.sym, false);
+
+ if (!vdit) {
+ if (dit) {
+ msg_err_cache("cannot add dependency from %s on %s: no dependency symbol registered",
+ dep.sym.c_str(), dit->symbol.c_str());
+ }
+ }
+ else {
+ msg_debug_cache("process virtual dependency %s(%d) on %s(%d)", symbol.c_str(),
+ dep.vid, vdit->symbol.c_str(), vdit->id);
+
+ unsigned nids = 0;
+
+ /* Propagate ids */
+ msg_debug_cache("check id propagation for dependency %s from %s",
+ symbol.c_str(), dit->symbol.c_str());
+
+ const auto *ids = dit->allowed_ids.get_ids(nids);
+
+ if (nids > 0) {
+ msg_debug_cache("propagate allowed ids from %s to %s",
+ dit->symbol.c_str(), symbol.c_str());
+
+ allowed_ids.set_ids(ids, nids);
+ }
+
+ ids = dit->forbidden_ids.get_ids(nids);
+
+ if (nids > 0) {
+ msg_debug_cache("propagate forbidden ids from %s to %s",
+ dit->symbol.c_str(), symbol.c_str());
+
+ forbidden_ids.set_ids(ids, nids);
+ }
+ }
+ }
+
+ if (dit != nullptr) {
+ if (!dit->is_filter()) {
+ /*
+ * Check sanity:
+ * - filters -> prefilter dependency is OK and always satisfied
+ * - postfilter -> (filter, prefilter) dep is ok
+ * - idempotent -> (any) dep is OK
+ *
+ * Otherwise, emit error
+ * However, even if everything is fine this dep is useless ¯\_(ツ)_/¯
+ */
+ auto ok_dep = false;
+
+ if (dit->get_type() == type) {
+ ok_dep = true;
+ }
+ else if (type < dit->get_type()) {
+ ok_dep = true;
+ }
+
+ if (!ok_dep) {
+ msg_err_cache("cannot add dependency from %s on %s: invalid symbol types",
+ dep.sym.c_str(), symbol.c_str());
+
+ continue;
+ }
+ }
+ else {
+ if (dit->id == id) {
+ msg_err_cache("cannot add dependency on self: %s -> %s "
+ "(resolved to %s)",
+ symbol.c_str(), dep.sym.c_str(), dit->symbol.c_str());
+ }
+ else {
+ /* Create a reverse dep */
+ if (is_virtual()) {
+ auto *parent = get_parent_mut(cache);
+
+ if (parent) {
+ dit->rdeps.emplace_back(parent, parent->symbol, parent->id, -1);
+ dep.item = dit;
+ dep.id = dit->id;
+
+ msg_debug_cache("added reverse dependency from %d on %d", parent->id,
+ dit->id);
+ }
+ }
+ else {
+ dep.item = dit;
+ dep.id = dit->id;
+ dit->rdeps.emplace_back(this, symbol, id, -1);
+ msg_debug_cache("added reverse dependency from %d on %d", id,
+ dit->id);
+ }
+ }
+ }
+ }
+ else if (dep.id >= 0) {
+ msg_err_cache("cannot find dependency on symbol %s for symbol %s",
+ dep.sym.c_str(), symbol.c_str());
+
+ continue;
+ }
+ }
+
+ // Remove empty deps
+ deps.erase(std::remove_if(std::begin(deps), std::end(deps),
+ [](const auto &dep) { return !dep.item; }),
+ std::end(deps));
+}
+
+auto cache_item::resolve_parent(const symcache &cache) -> bool
+{
+ auto log_tag = [&]() { return cache.log_tag(); };
+
+ if (is_virtual()) {
+ auto &virt = std::get<virtual_item>(specific);
+
+ if (virt.get_parent(cache)) {
+ msg_debug_cache("trying to resolve parent twice for %s", symbol.c_str());
+
+ return false;
+ }
+
+ return virt.resolve_parent(cache);
+ }
+ else {
+ msg_warn_cache("trying to resolve a parent for non-virtual symbol %s", symbol.c_str());
+ }
+
+ return false;
+}
+
+auto cache_item::update_counters_check_peak(lua_State *L,
+ struct ev_loop *ev_loop,
+ double cur_time,
+ double last_resort) -> bool
+{
+ auto ret = false;
+ static const double decay_rate = 0.25;
+
+ st->total_hits += st->hits;
+ g_atomic_int_set(&st->hits, 0);
+
+ if (last_count > 0) {
+ auto cur_value = (st->total_hits - last_count) /
+ (cur_time - last_resort);
+ rspamd_set_counter_ema(&st->frequency_counter,
+ cur_value, decay_rate);
+ st->avg_frequency = st->frequency_counter.mean;
+ st->stddev_frequency = st->frequency_counter.stddev;
+
+ auto cur_err = (st->avg_frequency - cur_value);
+ cur_err *= cur_err;
+
+ if (st->frequency_counter.number > 10 &&
+ cur_err > ::sqrt(st->stddev_frequency) * 3) {
+ frequency_peaks++;
+ ret = true;
+ }
+ }
+
+ last_count = st->total_hits;
+
+ if (cd->number > 0) {
+ if (!is_virtual()) {
+ st->avg_time = cd->mean;
+ rspamd_set_counter_ema(&st->time_counter,
+ st->avg_time, decay_rate);
+ st->avg_time = st->time_counter.mean;
+ memset(cd, 0, sizeof(*cd));
+ }
+ }
+
+ return ret;
+}
+
+auto cache_item::inc_frequency(const char *sym_name, symcache &cache) -> void
+{
+ if (sym_name && symbol != sym_name) {
+ if (is_filter()) {
+ const auto *children = get_children();
+ if (children) {
+ /* Likely a callback symbol with some virtual symbol that needs to be adjusted */
+ for (const auto &cld: *children) {
+ if (cld->get_name() == sym_name) {
+ cld->inc_frequency(sym_name, cache);
+ }
+ }
+ }
+ }
+ else {
+ /* Name not equal to symbol name, so we need to find the proper name */
+ auto *another_item = cache.get_item_by_name_mut(sym_name, false);
+ if (another_item != nullptr) {
+ another_item->inc_frequency(sym_name, cache);
+ }
+ }
+ }
+ else {
+ /* Symbol and sym name are the same */
+ g_atomic_int_inc(&st->hits);
+ }
+}
+
+auto cache_item::get_type_str() const -> const char *
+{
+ switch (type) {
+ case symcache_item_type::CONNFILTER:
+ return "connfilter";
+ case symcache_item_type::FILTER:
+ return "filter";
+ case symcache_item_type::IDEMPOTENT:
+ return "idempotent";
+ case symcache_item_type::PREFILTER:
+ return "prefilter";
+ case symcache_item_type::POSTFILTER:
+ return "postfilter";
+ case symcache_item_type::COMPOSITE:
+ return "composite";
+ case symcache_item_type::CLASSIFIER:
+ return "classifier";
+ case symcache_item_type::VIRTUAL:
+ return "virtual";
+ }
+
+ RSPAMD_UNREACHABLE;
+}
+
+auto cache_item::is_allowed(struct rspamd_task *task, bool exec_only) const -> bool
+{
+ const auto *what = "execution";
+
+ if (!exec_only) {
+ what = "symbol insertion";
+ }
+
+ /* Static checks */
+ if (!enabled ||
+ (RSPAMD_TASK_IS_EMPTY(task) && !(flags & SYMBOL_TYPE_EMPTY)) ||
+ (flags & SYMBOL_TYPE_MIME_ONLY && !RSPAMD_TASK_IS_MIME(task))) {
+
+ if (!enabled) {
+ msg_debug_cache_task("skipping %s of %s as it is permanently disabled",
+ what, symbol.c_str());
+
+ return false;
+ }
+ else {
+ /*
+ * If we check merely execution (not insertion), then we disallow
+ * mime symbols for non mime tasks and vice versa
+ */
+ if (exec_only) {
+ msg_debug_cache_task("skipping check of %s as it cannot be "
+ "executed for this task type",
+ symbol.c_str());
+
+ return FALSE;
+ }
+ }
+ }
+
+ /* Settings checks */
+ if (task->settings_elt != nullptr) {
+ if (forbidden_ids.check_id(task->settings_elt->id)) {
+ msg_debug_cache_task("deny %s of %s as it is forbidden for "
+ "settings id %ud",
+ what,
+ symbol.c_str(),
+ task->settings_elt->id);
+
+ return false;
+ }
+
+ if (!(flags & SYMBOL_TYPE_EXPLICIT_DISABLE)) {
+ if (!allowed_ids.check_id(task->settings_elt->id)) {
+
+ if (task->settings_elt->policy == RSPAMD_SETTINGS_POLICY_IMPLICIT_ALLOW) {
+ msg_debug_cache_task("allow execution of %s settings id %ud "
+ "allows implicit execution of the symbols;",
+ symbol.c_str(),
+ id);
+
+ return true;
+ }
+
+ if (exec_only) {
+ /*
+ * Special case if any of our virtual children are enabled
+ */
+ if (exec_only_ids.check_id(task->settings_elt->id)) {
+ return true;
+ }
+ }
+
+ msg_debug_cache_task("deny %s of %s as it is not listed "
+ "as allowed for settings id %ud",
+ what,
+ symbol.c_str(),
+ task->settings_elt->id);
+ return false;
+ }
+ }
+ else {
+ msg_debug_cache_task("allow %s of %s for "
+ "settings id %ud as it can be only disabled explicitly",
+ what,
+ symbol.c_str(),
+ task->settings_elt->id);
+ }
+ }
+ else if (flags & SYMBOL_TYPE_EXPLICIT_ENABLE) {
+ msg_debug_cache_task("deny %s of %s as it must be explicitly enabled",
+ what,
+ symbol.c_str());
+ return false;
+ }
+
+ /* Allow all symbols with no settings id */
+ return true;
+}
+
+auto cache_item::add_augmentation(const symcache &cache, std::string_view augmentation,
+ std::optional<std::string_view> value) -> bool
+{
+ auto log_tag = [&]() { return cache.log_tag(); };
+
+ if (augmentations.contains(augmentation)) {
+ msg_warn_cache("duplicate augmentation: %s", augmentation.data());
+
+ return false;
+ }
+
+ auto maybe_known = rspamd::find_map(known_augmentations, augmentation);
+
+ if (maybe_known.has_value()) {
+ auto &known_info = maybe_known.value().get();
+
+ if (known_info.implied_flags) {
+ if ((known_info.implied_flags & flags) == 0) {
+ msg_info_cache("added implied flags (%bd) for symbol %s as it has %s augmentation",
+ known_info.implied_flags, symbol.data(), augmentation.data());
+ flags |= known_info.implied_flags;
+ }
+ }
+
+ if (known_info.value_type == augmentation_value_type::NO_VALUE) {
+ if (value.has_value()) {
+ msg_err_cache("value specified for augmentation %s, that has no value",
+ augmentation.data());
+
+ return false;
+ }
+ return augmentations.try_emplace(augmentation, known_info.weight).second;
+ }
+ else {
+ if (!value.has_value()) {
+ msg_err_cache("value is not specified for augmentation %s, that requires explicit value",
+ augmentation.data());
+
+ return false;
+ }
+
+ if (known_info.value_type == augmentation_value_type::STRING_VALUE) {
+ return augmentations.try_emplace(augmentation, std::string{value.value()},
+ known_info.weight)
+ .second;
+ }
+ else if (known_info.value_type == augmentation_value_type::NUMBER_VALUE) {
+ /* I wish it was supported properly */
+ //auto conv_res = std::from_chars(value->data(), value->size(), num);
+ char numbuf[128], *endptr = nullptr;
+ rspamd_strlcpy(numbuf, value->data(), MIN(value->size(), sizeof(numbuf)));
+ auto num = g_ascii_strtod(numbuf, &endptr);
+
+ if (fabs(num) >= G_MAXFLOAT || std::isnan(num)) {
+ msg_err_cache("value for augmentation %s is not numeric: %*s",
+ augmentation.data(),
+ (int) value->size(), value->data());
+ return false;
+ }
+
+ return augmentations.try_emplace(augmentation, num,
+ known_info.weight)
+ .second;
+ }
+ }
+ }
+ else {
+ msg_debug_cache("added unknown augmentation %s for symbol %s",
+ "unknown", augmentation.data(), symbol.data());
+ return augmentations.try_emplace(augmentation, 0).second;
+ }
+
+ // Should not be reached
+ return false;
+}
+
+auto cache_item::get_augmentation_weight() const -> int
+{
+ return std::accumulate(std::begin(augmentations), std::end(augmentations),
+ 0, [](int acc, const auto &map_pair) {
+ return acc + map_pair.second.weight;
+ });
+}
+
+auto cache_item::get_numeric_augmentation(std::string_view name) const -> std::optional<double>
+{
+ const auto augmentation_value_maybe = rspamd::find_map(this->augmentations, name);
+
+ if (augmentation_value_maybe.has_value()) {
+ const auto &augmentation = augmentation_value_maybe.value().get();
+
+ if (std::holds_alternative<double>(augmentation.value)) {
+ return std::get<double>(augmentation.value);
+ }
+ }
+
+ return std::nullopt;
+}
+
+
+auto virtual_item::get_parent(const symcache &cache) const -> const cache_item *
+{
+ if (parent) {
+ return parent;
+ }
+
+ return cache.get_item_by_id(parent_id, false);
+}
+
+auto virtual_item::get_parent_mut(const symcache &cache) -> cache_item *
+{
+ if (parent) {
+ return parent;
+ }
+
+ return const_cast<cache_item *>(cache.get_item_by_id(parent_id, false));
+}
+
+auto virtual_item::resolve_parent(const symcache &cache) -> bool
+{
+ if (parent) {
+ return false;
+ }
+
+ auto item_ptr = cache.get_item_by_id(parent_id, true);
+
+ if (item_ptr) {
+ parent = const_cast<cache_item *>(item_ptr);
+
+ return true;
+ }
+
+ return false;
+}
+
+auto item_type_from_c(int type) -> tl::expected<std::pair<symcache_item_type, int>, std::string>
+{
+ constexpr const auto trivial_types = SYMBOL_TYPE_CONNFILTER | SYMBOL_TYPE_PREFILTER | SYMBOL_TYPE_POSTFILTER | SYMBOL_TYPE_IDEMPOTENT | SYMBOL_TYPE_COMPOSITE | SYMBOL_TYPE_CLASSIFIER | SYMBOL_TYPE_VIRTUAL;
+
+ constexpr auto all_but_one_ty = [&](int type, int exclude_bit) -> auto {
+ return (type & trivial_types) & (trivial_types & ~exclude_bit);
+ };
+
+ if (type & trivial_types) {
+ auto check_trivial = [&](auto flag,
+ symcache_item_type ty) -> tl::expected<std::pair<symcache_item_type, int>, std::string> {
+ if (all_but_one_ty(type, flag)) {
+ return tl::make_unexpected(fmt::format("invalid flags for a symbol: {}", (int) type));
+ }
+
+ return std::make_pair(ty, type & ~flag);
+ };
+ if (type & SYMBOL_TYPE_CONNFILTER) {
+ return check_trivial(SYMBOL_TYPE_CONNFILTER, symcache_item_type::CONNFILTER);
+ }
+ else if (type & SYMBOL_TYPE_PREFILTER) {
+ return check_trivial(SYMBOL_TYPE_PREFILTER, symcache_item_type::PREFILTER);
+ }
+ else if (type & SYMBOL_TYPE_POSTFILTER) {
+ return check_trivial(SYMBOL_TYPE_POSTFILTER, symcache_item_type::POSTFILTER);
+ }
+ else if (type & SYMBOL_TYPE_IDEMPOTENT) {
+ return check_trivial(SYMBOL_TYPE_IDEMPOTENT, symcache_item_type::IDEMPOTENT);
+ }
+ else if (type & SYMBOL_TYPE_COMPOSITE) {
+ return check_trivial(SYMBOL_TYPE_COMPOSITE, symcache_item_type::COMPOSITE);
+ }
+ else if (type & SYMBOL_TYPE_CLASSIFIER) {
+ return check_trivial(SYMBOL_TYPE_CLASSIFIER, symcache_item_type::CLASSIFIER);
+ }
+ else if (type & SYMBOL_TYPE_VIRTUAL) {
+ return check_trivial(SYMBOL_TYPE_VIRTUAL, symcache_item_type::VIRTUAL);
+ }
+
+ return tl::make_unexpected(fmt::format("internal error: impossible flags combination: {}", (int) type));
+ }
+
+ /* Maybe check other flags combination here? */
+ return std::make_pair(symcache_item_type::FILTER, type);
+}
+
+bool operator<(symcache_item_type lhs, symcache_item_type rhs)
+{
+ auto ret = false;
+ switch (lhs) {
+ case symcache_item_type::CONNFILTER:
+ break;
+ case symcache_item_type::PREFILTER:
+ if (rhs == symcache_item_type::CONNFILTER) {
+ ret = true;
+ }
+ break;
+ case symcache_item_type::FILTER:
+ if (rhs == symcache_item_type::CONNFILTER || rhs == symcache_item_type::PREFILTER) {
+ ret = true;
+ }
+ break;
+ case symcache_item_type::POSTFILTER:
+ if (rhs != symcache_item_type::IDEMPOTENT) {
+ ret = true;
+ }
+ break;
+ case symcache_item_type::IDEMPOTENT:
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+item_condition::~item_condition()
+{
+ if (cb != -1 && L != nullptr) {
+ luaL_unref(L, LUA_REGISTRYINDEX, cb);
+ }
+}
+
+auto item_condition::check(std::string_view sym_name, struct rspamd_task *task) const -> bool
+{
+ if (cb != -1 && L != nullptr) {
+ auto ret = false;
+
+ lua_pushcfunction(L, &rspamd_lua_traceback);
+ auto err_idx = lua_gettop(L);
+
+ lua_rawgeti(L, LUA_REGISTRYINDEX, cb);
+ rspamd_lua_task_push(L, task);
+
+ if (lua_pcall(L, 1, 1, err_idx) != 0) {
+ msg_info_task("call to condition for %s failed: %s",
+ sym_name.data(), lua_tostring(L, -1));
+ }
+ else {
+ ret = lua_toboolean(L, -1);
+ }
+
+ lua_settop(L, err_idx - 1);
+
+ return ret;
+ }
+
+ return true;
+}
+
+}// namespace rspamd::symcache
diff --git a/src/libserver/symcache/symcache_item.hxx b/src/libserver/symcache/symcache_item.hxx
new file mode 100644
index 0000000..a60213a
--- /dev/null
+++ b/src/libserver/symcache/symcache_item.hxx
@@ -0,0 +1,561 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef RSPAMD_SYMCACHE_ITEM_HXX
+#define RSPAMD_SYMCACHE_ITEM_HXX
+
+#pragma once
+
+#include <utility>
+#include <vector>
+#include <string>
+#include <string_view>
+#include <memory>
+#include <variant>
+#include <algorithm>
+#include <optional>
+
+#include "rspamd_symcache.h"
+#include "symcache_id_list.hxx"
+#include "contrib/expected/expected.hpp"
+#include "contrib/libev/ev.h"
+#include "symcache_runtime.hxx"
+#include "libutil/cxx/hash_util.hxx"
+
+namespace rspamd::symcache {
+
+class symcache;
+struct cache_item;
+using cache_item_ptr = std::shared_ptr<cache_item>;
+
+enum class symcache_item_type {
+ CONNFILTER, /* Executed on connection stage */
+ PREFILTER, /* Executed before all filters */
+ FILTER, /* Normal symbol with a callback */
+ POSTFILTER, /* Executed after all filters */
+ IDEMPOTENT, /* Executed after postfilters, cannot change results */
+ CLASSIFIER, /* A virtual classifier symbol */
+ COMPOSITE, /* A virtual composite symbol */
+ VIRTUAL, /* A virtual symbol... */
+};
+
+/*
+ * Compare item types: earlier stages symbols are > than later stages symbols
+ * Order for virtual stuff is not defined.
+ */
+bool operator<(symcache_item_type lhs, symcache_item_type rhs);
+
+constexpr static auto item_type_to_str(symcache_item_type t) -> const char *
+{
+ switch (t) {
+ case symcache_item_type::CONNFILTER:
+ return "connfilter";
+ case symcache_item_type::PREFILTER:
+ return "prefilter";
+ case symcache_item_type::FILTER:
+ return "filter";
+ case symcache_item_type::POSTFILTER:
+ return "postfilter";
+ case symcache_item_type::IDEMPOTENT:
+ return "idempotent";
+ case symcache_item_type::CLASSIFIER:
+ return "classifier";
+ case symcache_item_type::COMPOSITE:
+ return "composite";
+ case symcache_item_type::VIRTUAL:
+ return "virtual";
+ }
+}
+
+/**
+ * This is a public helper to convert a legacy C type to a more static type
+ * @param type input type as a C enum
+ * @return pair of type safe symcache_item_type + the remaining flags or an error
+ */
+auto item_type_from_c(int type) -> tl::expected<std::pair<symcache_item_type, int>, std::string>;
+
+struct item_condition {
+private:
+ lua_State *L = nullptr;
+ int cb = -1;
+
+public:
+ explicit item_condition(lua_State *L_, int cb_) noexcept
+ : L(L_), cb(cb_)
+ {
+ }
+ item_condition(item_condition &&other) noexcept
+ {
+ *this = std::move(other);
+ }
+ /* Make it move only */
+ item_condition(const item_condition &) = delete;
+ item_condition &operator=(item_condition &&other) noexcept
+ {
+ std::swap(other.L, L);
+ std::swap(other.cb, cb);
+ return *this;
+ }
+ ~item_condition();
+
+ auto check(std::string_view sym_name, struct rspamd_task *task) const -> bool;
+};
+
+class normal_item {
+private:
+ symbol_func_t func = nullptr;
+ void *user_data = nullptr;
+ std::vector<cache_item *> virtual_children;
+ std::vector<item_condition> conditions;
+
+public:
+ explicit normal_item(symbol_func_t _func, void *_user_data)
+ : func(_func), user_data(_user_data)
+ {
+ }
+
+ auto add_condition(lua_State *L, int cbref) -> void
+ {
+ conditions.emplace_back(L, cbref);
+ }
+
+ auto call(struct rspamd_task *task, struct rspamd_symcache_dynamic_item *item) const -> void
+ {
+ func(task, item, user_data);
+ }
+
+ auto check_conditions(std::string_view sym_name, struct rspamd_task *task) const -> bool
+ {
+ return std::all_of(std::begin(conditions), std::end(conditions),
+ [&](const auto &cond) { return cond.check(sym_name, task); });
+ }
+
+ auto get_cbdata() const -> auto
+ {
+ return user_data;
+ }
+
+ auto add_child(cache_item *ptr) -> void
+ {
+ virtual_children.push_back(ptr);
+ }
+
+ auto get_childen() const -> const std::vector<cache_item *> &
+ {
+ return virtual_children;
+ }
+};
+
+class virtual_item {
+private:
+ int parent_id = -1;
+ cache_item *parent = nullptr;
+
+public:
+ explicit virtual_item(int _parent_id)
+ : parent_id(_parent_id)
+ {
+ }
+
+ auto get_parent(const symcache &cache) const -> const cache_item *;
+ auto get_parent_mut(const symcache &cache) -> cache_item *;
+
+ auto resolve_parent(const symcache &cache) -> bool;
+};
+
+struct cache_dependency {
+ cache_item *item; /* Real dependency */
+ std::string sym; /* Symbolic dep name */
+ int id; /* Real from */
+ int vid; /* Virtual from */
+public:
+ /* Default piecewise constructor */
+ explicit cache_dependency(cache_item *_item, std::string _sym, int _id, int _vid)
+ : item(_item), sym(std::move(_sym)), id(_id), vid(_vid)
+ {
+ }
+};
+
+/*
+ * Used to store augmentation values
+ */
+struct item_augmentation {
+ std::variant<std::monostate, std::string, double> value;
+ int weight;
+
+ explicit item_augmentation(int weight)
+ : value(std::monostate{}), weight(weight)
+ {
+ }
+ explicit item_augmentation(std::string str_value, int weight)
+ : value(str_value), weight(weight)
+ {
+ }
+ explicit item_augmentation(double double_value, int weight)
+ : value(double_value), weight(weight)
+ {
+ }
+};
+
+struct cache_item : std::enable_shared_from_this<cache_item> {
+ /* The following fields will live in shared memory */
+ struct rspamd_symcache_item_stat *st = nullptr;
+ struct rspamd_counter_data *cd = nullptr;
+
+ /* Unique id - counter */
+ int id;
+ std::uint64_t last_count = 0;
+ std::string symbol;
+ symcache_item_type type;
+ int flags;
+
+ /* Condition of execution */
+ bool enabled = true;
+
+ /* Priority */
+ int priority = 0;
+ /* Topological order */
+ unsigned int order = 0;
+ int frequency_peaks = 0;
+
+ /* Specific data for virtual and callback symbols */
+ std::variant<normal_item, virtual_item> specific;
+
+ /* Settings ids */
+ id_list allowed_ids;
+ /* Allows execution but not symbols insertion */
+ id_list exec_only_ids;
+ id_list forbidden_ids;
+
+ /* Set of augmentations */
+ ankerl::unordered_dense::map<std::string, item_augmentation,
+ rspamd::smart_str_hash, rspamd::smart_str_equal>
+ augmentations;
+
+ /* Dependencies */
+ std::vector<cache_dependency> deps;
+ /* Reverse dependencies */
+ std::vector<cache_dependency> rdeps;
+
+public:
+ /**
+ * Create a normal item with a callback
+ * @param name
+ * @param priority
+ * @param func
+ * @param user_data
+ * @param type
+ * @param flags
+ * @return
+ */
+ template<typename T>
+ static auto create_with_function(rspamd_mempool_t *pool,
+ int id,
+ T &&name,
+ int priority,
+ symbol_func_t func,
+ void *user_data,
+ symcache_item_type type,
+ int flags) -> cache_item_ptr
+ {
+ return std::shared_ptr<cache_item>(new cache_item(pool,
+ id, std::forward<T>(name), priority,
+ func, user_data,
+ type, flags));
+ }
+
+ /**
+ * Create a virtual item
+ * @param name
+ * @param priority
+ * @param parent
+ * @param type
+ * @param flags
+ * @return
+ */
+ template<typename T>
+ static auto create_with_virtual(rspamd_mempool_t *pool,
+ int id,
+ T &&name,
+ int parent,
+ symcache_item_type type,
+ int flags) -> cache_item_ptr
+ {
+ return std::shared_ptr<cache_item>(new cache_item(pool, id, std::forward<T>(name),
+ parent, type, flags));
+ }
+
+ /**
+ * Share ownership on the item
+ * @return
+ */
+ auto getptr() -> cache_item_ptr
+ {
+ return shared_from_this();
+ }
+
+ /**
+ * Process and resolve dependencies for the item
+ * @param cache
+ */
+ auto process_deps(const symcache &cache) -> void;
+
+ auto is_virtual() const -> bool
+ {
+ return std::holds_alternative<virtual_item>(specific);
+ }
+
+ auto is_filter() const -> bool
+ {
+ return std::holds_alternative<normal_item>(specific) &&
+ (type == symcache_item_type::FILTER);
+ }
+
+ /**
+ * Returns true if a symbol should have some score defined
+ * @return
+ */
+ auto is_scoreable() const -> bool
+ {
+ return !(flags & SYMBOL_TYPE_CALLBACK) &&
+ ((type == symcache_item_type::FILTER) ||
+ is_virtual() ||
+ (type == symcache_item_type::COMPOSITE) ||
+ (type == symcache_item_type::CLASSIFIER));
+ }
+
+ auto is_ghost() const -> bool
+ {
+ return flags & SYMBOL_TYPE_GHOST;
+ }
+
+ auto get_parent(const symcache &cache) const -> const cache_item *;
+ auto get_parent_mut(const symcache &cache) -> cache_item *;
+
+ auto resolve_parent(const symcache &cache) -> bool;
+
+ auto get_type() const -> auto
+ {
+ return type;
+ }
+
+ auto get_type_str() const -> const char *;
+
+ auto get_name() const -> const std::string &
+ {
+ return symbol;
+ }
+
+ auto get_flags() const -> auto
+ {
+ return flags;
+ };
+
+ auto add_condition(lua_State *L, int cbref) -> bool
+ {
+ if (!is_virtual()) {
+ auto &normal = std::get<normal_item>(specific);
+ normal.add_condition(L, cbref);
+
+ return true;
+ }
+
+ return false;
+ }
+
+ auto update_counters_check_peak(lua_State *L,
+ struct ev_loop *ev_loop,
+ double cur_time,
+ double last_resort) -> bool;
+
+ /**
+ * Increase frequency for a symbol
+ */
+ auto inc_frequency(const char *sym_name, symcache &cache) -> void;
+
+ /**
+ * Check if an item is allowed to be executed not checking item conditions
+ * @param task
+ * @param exec_only
+ * @return
+ */
+ auto is_allowed(struct rspamd_task *task, bool exec_only) const -> bool;
+
+ /**
+ * Returns callback data
+ * @return
+ */
+ auto get_cbdata() const -> void *
+ {
+ if (std::holds_alternative<normal_item>(specific)) {
+ const auto &filter_data = std::get<normal_item>(specific);
+
+ return filter_data.get_cbdata();
+ }
+
+ return nullptr;
+ }
+
+ /**
+ * Check all conditions for an item
+ * @param task
+ * @return
+ */
+ auto check_conditions(struct rspamd_task *task) const -> auto
+ {
+ if (std::holds_alternative<normal_item>(specific)) {
+ const auto &filter_data = std::get<normal_item>(specific);
+
+ return filter_data.check_conditions(symbol, task);
+ }
+
+ return false;
+ }
+
+ auto call(struct rspamd_task *task, cache_dynamic_item *dyn_item) const -> void
+ {
+ if (std::holds_alternative<normal_item>(specific)) {
+ const auto &filter_data = std::get<normal_item>(specific);
+
+ filter_data.call(task, (struct rspamd_symcache_dynamic_item *) dyn_item);
+ }
+ }
+
+ /**
+ * Add an augmentation to the item, returns `true` if augmentation is known and unique, false otherwise
+ * @param augmentation
+ * @return
+ */
+ auto add_augmentation(const symcache &cache, std::string_view augmentation,
+ std::optional<std::string_view> value) -> bool;
+
+ /**
+ * Return sum weight of all known augmentations
+ * @return
+ */
+ auto get_augmentation_weight() const -> int;
+
+ /**
+ * Returns numeric augmentation value
+ * @param name
+ * @return
+ */
+ auto get_numeric_augmentation(std::string_view name) const -> std::optional<double>;
+
+ /**
+ * Returns string augmentation value
+ * @param name
+ * @return
+ */
+ auto get_string_augmentation(std::string_view name) const -> std::optional<std::string_view>;
+
+ /**
+ * Add a virtual symbol as a child of some normal symbol
+ * @param ptr
+ */
+ auto add_child(cache_item *ptr) -> void
+ {
+ if (std::holds_alternative<normal_item>(specific)) {
+ auto &filter_data = std::get<normal_item>(specific);
+
+ filter_data.add_child(ptr);
+ }
+ else {
+ g_assert("add child is called for a virtual symbol!");
+ }
+ }
+
+ /**
+ * Returns virtual children for a normal item
+ * @param ptr
+ * @return
+ */
+ auto get_children() const -> const std::vector<cache_item *> *
+ {
+ if (std::holds_alternative<normal_item>(specific)) {
+ const auto &filter_data = std::get<normal_item>(specific);
+
+ return &filter_data.get_childen();
+ }
+
+ return nullptr;
+ }
+
+private:
+ /**
+ * Constructor for a normal symbols with callback
+ * @param name
+ * @param _priority
+ * @param func
+ * @param user_data
+ * @param _type
+ * @param _flags
+ */
+ cache_item(rspamd_mempool_t *pool,
+ int _id,
+ std::string &&name,
+ int _priority,
+ symbol_func_t func,
+ void *user_data,
+ symcache_item_type _type,
+ int _flags)
+ : id(_id),
+ symbol(std::move(name)),
+ type(_type),
+ flags(_flags),
+ priority(_priority),
+ specific(normal_item{func, user_data})
+ {
+ /* These structures are kept trivial, so they need to be explicitly reset */
+ forbidden_ids.reset();
+ allowed_ids.reset();
+ exec_only_ids.reset();
+ st = rspamd_mempool_alloc0_shared_type(pool, std::remove_pointer_t<decltype(st)>);
+ cd = rspamd_mempool_alloc0_shared_type(pool, std::remove_pointer_t<decltype(cd)>);
+ }
+
+ /**
+ * Constructor for a virtual symbol
+ * @param name
+ * @param _priority
+ * @param parent
+ * @param _type
+ * @param _flags
+ */
+ cache_item(rspamd_mempool_t *pool,
+ int _id,
+ std::string &&name,
+ int parent,
+ symcache_item_type _type,
+ int _flags)
+ : id(_id),
+ symbol(std::move(name)),
+ type(_type),
+ flags(_flags),
+ specific(virtual_item{parent})
+ {
+ /* These structures are kept trivial, so they need to be explicitly reset */
+ forbidden_ids.reset();
+ allowed_ids.reset();
+ exec_only_ids.reset();
+ st = rspamd_mempool_alloc0_shared_type(pool, std::remove_pointer_t<decltype(st)>);
+ cd = rspamd_mempool_alloc0_shared_type(pool, std::remove_pointer_t<decltype(cd)>);
+ }
+};
+
+}// namespace rspamd::symcache
+
+#endif//RSPAMD_SYMCACHE_ITEM_HXX
diff --git a/src/libserver/symcache/symcache_periodic.hxx b/src/libserver/symcache/symcache_periodic.hxx
new file mode 100644
index 0000000..535956b
--- /dev/null
+++ b/src/libserver/symcache/symcache_periodic.hxx
@@ -0,0 +1,89 @@
+/*-
+ * Copyright 2022 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#ifndef RSPAMD_SYMCACHE_PERIODIC_HXX
+#define RSPAMD_SYMCACHE_PERIODIC_HXX
+
+#pragma once
+
+#include "config.h"
+#include "contrib/libev/ev.h"
+#include "symcache_internal.hxx"
+#include "worker_util.h"
+
+namespace rspamd::symcache {
+struct cache_refresh_cbdata {
+private:
+ symcache *cache;
+ struct ev_loop *event_loop;
+ struct rspamd_worker *w;
+ double reload_time;
+ double last_resort;
+ ev_timer resort_ev;
+
+public:
+ explicit cache_refresh_cbdata(symcache *_cache,
+ struct ev_loop *_ev_base,
+ struct rspamd_worker *_w)
+ : cache(_cache), event_loop(_ev_base), w(_w)
+ {
+ auto log_tag = [&]() { return cache->log_tag(); };
+ last_resort = rspamd_get_ticks(TRUE);
+ reload_time = cache->get_reload_time();
+ auto tm = rspamd_time_jitter(reload_time, 0);
+ msg_debug_cache("next reload in %.2f seconds", tm);
+ ev_timer_init(&resort_ev, cache_refresh_cbdata::resort_cb,
+ tm, tm);
+ resort_ev.data = (void *) this;
+ ev_timer_start(event_loop, &resort_ev);
+ rspamd_mempool_add_destructor(cache->get_pool(),
+ cache_refresh_cbdata::refresh_dtor, (void *) this);
+ }
+
+ static void refresh_dtor(void *d)
+ {
+ auto *cbdata = (struct cache_refresh_cbdata *) d;
+ delete cbdata;
+ }
+
+ static void resort_cb(EV_P_ ev_timer *w, int _revents)
+ {
+ auto *cbdata = (struct cache_refresh_cbdata *) w->data;
+
+ auto log_tag = [&]() { return cbdata->cache->log_tag(); };
+
+ if (rspamd_worker_is_primary_controller(cbdata->w)) {
+ /* Plan new event */
+ auto tm = rspamd_time_jitter(cbdata->reload_time, 0);
+ msg_debug_cache("resort symbols cache, next reload in %.2f seconds", tm);
+ cbdata->resort_ev.repeat = tm;
+ ev_timer_again(EV_A_ w);
+ auto cur_time = rspamd_get_ticks(FALSE);
+ cbdata->cache->periodic_resort(cbdata->event_loop, cur_time, cbdata->last_resort);
+ cbdata->last_resort = cur_time;
+ }
+ }
+
+private:
+ ~cache_refresh_cbdata()
+ {
+ ev_timer_stop(event_loop, &resort_ev);
+ }
+};
+}// namespace rspamd::symcache
+
+#endif//RSPAMD_SYMCACHE_PERIODIC_HXX
diff --git a/src/libserver/symcache/symcache_runtime.cxx b/src/libserver/symcache/symcache_runtime.cxx
new file mode 100644
index 0000000..d9622d8
--- /dev/null
+++ b/src/libserver/symcache/symcache_runtime.cxx
@@ -0,0 +1,823 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "symcache_internal.hxx"
+#include "symcache_item.hxx"
+#include "symcache_runtime.hxx"
+#include "libutil/cxx/util.hxx"
+#include "libserver/task.h"
+#include "libmime/scan_result.h"
+#include "utlist.h"
+#include "libserver/worker_util.h"
+#include <limits>
+#include <cmath>
+
+namespace rspamd::symcache {
+
+/* At least once per minute */
+constexpr static const auto PROFILE_MAX_TIME = 60.0;
+/* For messages larger than 2Mb enable profiling */
+constexpr static const auto PROFILE_MESSAGE_SIZE_THRESHOLD = 1024ul * 1024 * 2;
+/* Enable profile at least once per this amount of messages processed */
+constexpr static const auto PROFILE_PROBABILITY = 0.01;
+
+auto symcache_runtime::create(struct rspamd_task *task, symcache &cache) -> symcache_runtime *
+{
+ cache.maybe_resort();
+
+ auto &&cur_order = cache.get_cache_order();
+ auto *checkpoint = (symcache_runtime *) rspamd_mempool_alloc0(task->task_pool,
+ sizeof(symcache_runtime) +
+ sizeof(struct cache_dynamic_item) * cur_order->size());
+
+ checkpoint->order = cache.get_cache_order();
+
+ /* Calculate profile probability */
+ ev_now_update_if_cheap(task->event_loop);
+ ev_tstamp now = ev_now(task->event_loop);
+ checkpoint->profile_start = now;
+ checkpoint->lim = rspamd_task_get_required_score(task, task->result);
+
+ if ((cache.get_last_profile() == 0.0 || now > cache.get_last_profile() + PROFILE_MAX_TIME) ||
+ (task->msg.len >= PROFILE_MESSAGE_SIZE_THRESHOLD) ||
+ (rspamd_random_double_fast() >= (1 - PROFILE_PROBABILITY))) {
+ msg_debug_cache_task("enable profiling of symbols for task");
+ checkpoint->profile = true;
+ cache.set_last_profile(now);
+ }
+
+ task->symcache_runtime = (void *) checkpoint;
+
+ return checkpoint;
+}
+
+auto symcache_runtime::process_settings(struct rspamd_task *task, const symcache &cache) -> bool
+{
+ if (!task->settings) {
+ msg_err_task("`process_settings` is called with no settings");
+ return false;
+ }
+
+ const auto *wl = ucl_object_lookup(task->settings, "whitelist");
+
+ if (wl != nullptr) {
+ msg_info_task("task is whitelisted");
+ task->flags |= RSPAMD_TASK_FLAG_SKIP;
+ return true;
+ }
+
+ auto already_disabled = false;
+
+ auto process_group = [&](const ucl_object_t *gr_obj, auto functor) -> void {
+ ucl_object_iter_t it = nullptr;
+ const ucl_object_t *cur;
+
+ if (gr_obj) {
+ while ((cur = ucl_iterate_object(gr_obj, &it, true)) != nullptr) {
+ if (ucl_object_type(cur) == UCL_STRING) {
+ auto *gr = (struct rspamd_symbols_group *)
+ g_hash_table_lookup(task->cfg->groups,
+ ucl_object_tostring(cur));
+
+ if (gr) {
+ GHashTableIter gr_it;
+ void *k, *v;
+ g_hash_table_iter_init(&gr_it, gr->symbols);
+
+ while (g_hash_table_iter_next(&gr_it, &k, &v)) {
+ functor((const char *) k);
+ }
+ }
+ }
+ }
+ }
+ };
+
+ ucl_object_iter_t it = nullptr;
+ const ucl_object_t *cur;
+
+ const auto *enabled = ucl_object_lookup(task->settings, "symbols_enabled");
+
+ if (enabled) {
+ msg_debug_cache_task("disable all symbols as `symbols_enabled` is found");
+ /* Disable all symbols but selected */
+ disable_all_symbols(SYMBOL_TYPE_EXPLICIT_DISABLE);
+ already_disabled = true;
+ it = nullptr;
+
+ while ((cur = ucl_iterate_object(enabled, &it, true)) != nullptr) {
+ enable_symbol(task, cache, ucl_object_tostring(cur));
+ }
+ }
+
+ /* Enable groups of symbols */
+ enabled = ucl_object_lookup(task->settings, "groups_enabled");
+ if (enabled && !already_disabled) {
+ disable_all_symbols(SYMBOL_TYPE_EXPLICIT_DISABLE);
+ }
+ process_group(enabled, [&](const char *sym) {
+ enable_symbol(task, cache, sym);
+ });
+
+ const auto *disabled = ucl_object_lookup(task->settings, "symbols_disabled");
+
+ if (disabled) {
+ it = nullptr;
+
+ while ((cur = ucl_iterate_object(disabled, &it, true)) != nullptr) {
+ disable_symbol(task, cache, ucl_object_tostring(cur));
+ }
+ }
+
+ /* Disable groups of symbols */
+ disabled = ucl_object_lookup(task->settings, "groups_disabled");
+ process_group(disabled, [&](const char *sym) {
+ disable_symbol(task, cache, sym);
+ });
+
+ /* Update required limit */
+ lim = rspamd_task_get_required_score(task, task->result);
+
+ return false;
+}
+
+auto symcache_runtime::disable_all_symbols(int skip_mask) -> void
+{
+ for (auto [i, item]: rspamd::enumerate(order->d)) {
+ auto *dyn_item = &dynamic_items[i];
+
+ if (!(item->get_flags() & skip_mask)) {
+ dyn_item->finished = true;
+ dyn_item->started = true;
+ }
+ }
+}
+
+auto symcache_runtime::disable_symbol(struct rspamd_task *task, const symcache &cache, std::string_view name) -> bool
+{
+ const auto *item = cache.get_item_by_name(name, true);
+
+ if (item != nullptr) {
+
+ auto *dyn_item = get_dynamic_item(item->id);
+
+ if (dyn_item) {
+ dyn_item->finished = true;
+ dyn_item->started = true;
+ msg_debug_cache_task("disable execution of %s", name.data());
+
+ return true;
+ }
+ else {
+ msg_debug_cache_task("cannot disable %s: id not found %d", name.data(), item->id);
+ }
+ }
+ else {
+ msg_debug_cache_task("cannot disable %s: symbol not found", name.data());
+ }
+
+ return false;
+}
+
+auto symcache_runtime::enable_symbol(struct rspamd_task *task, const symcache &cache, std::string_view name) -> bool
+{
+ const auto *item = cache.get_item_by_name(name, true);
+
+ if (item != nullptr) {
+
+ auto *dyn_item = get_dynamic_item(item->id);
+
+ if (dyn_item) {
+ dyn_item->finished = false;
+ dyn_item->started = false;
+ msg_debug_cache_task("enable execution of %s", name.data());
+
+ return true;
+ }
+ else {
+ msg_debug_cache_task("cannot enable %s: id not found %d", name.data(), item->id);
+ }
+ }
+ else {
+ msg_debug_cache_task("cannot enable %s: symbol not found", name.data());
+ }
+
+ return false;
+}
+
+auto symcache_runtime::is_symbol_checked(const symcache &cache, std::string_view name) -> bool
+{
+ const auto *item = cache.get_item_by_name(name, true);
+
+ if (item != nullptr) {
+
+ auto *dyn_item = get_dynamic_item(item->id);
+
+ if (dyn_item) {
+ return dyn_item->started;
+ }
+ }
+
+ return false;
+}
+
+auto symcache_runtime::is_symbol_enabled(struct rspamd_task *task, const symcache &cache, std::string_view name) -> bool
+{
+
+ const auto *item = cache.get_item_by_name(name, true);
+ if (item) {
+
+ if (!item->is_allowed(task, true)) {
+ return false;
+ }
+ else {
+ auto *dyn_item = get_dynamic_item(item->id);
+
+ if (dyn_item) {
+ if (dyn_item->started) {
+ /* Already started */
+ return false;
+ }
+
+ if (!item->is_virtual()) {
+ return std::get<normal_item>(item->specific).check_conditions(item->symbol, task);
+ }
+ }
+ else {
+ /* Unknown item */
+ msg_debug_cache_task("cannot enable %s: symbol not found", name.data());
+ }
+ }
+ }
+
+ return true;
+}
+
+auto symcache_runtime::get_dynamic_item(int id) const -> cache_dynamic_item *
+{
+
+ /* Not found in the cache, do a hash lookup */
+ auto our_id_maybe = rspamd::find_map(order->by_cache_id, id);
+
+ if (our_id_maybe) {
+ return &dynamic_items[our_id_maybe.value()];
+ }
+
+ return nullptr;
+}
+
+auto symcache_runtime::process_symbols(struct rspamd_task *task, symcache &cache, unsigned int stage) -> bool
+{
+ msg_debug_cache_task("symbols processing stage at pass: %d", stage);
+
+ if (RSPAMD_TASK_IS_SKIPPED(task)) {
+ return true;
+ }
+
+ switch (stage) {
+ case RSPAMD_TASK_STAGE_CONNFILTERS:
+ case RSPAMD_TASK_STAGE_PRE_FILTERS:
+ case RSPAMD_TASK_STAGE_POST_FILTERS:
+ case RSPAMD_TASK_STAGE_IDEMPOTENT:
+ return process_pre_postfilters(task, cache,
+ rspamd_session_events_pending(task->s), stage);
+ break;
+
+ case RSPAMD_TASK_STAGE_FILTERS:
+ return process_filters(task, cache, rspamd_session_events_pending(task->s));
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+}
+
+auto symcache_runtime::process_pre_postfilters(struct rspamd_task *task,
+ symcache &cache,
+ int start_events,
+ unsigned int stage) -> bool
+{
+ auto saved_priority = std::numeric_limits<int>::min();
+ auto all_done = true;
+ auto log_func = RSPAMD_LOG_FUNC;
+ auto compare_functor = +[](int a, int b) { return a < b; };
+
+ auto proc_func = [&](cache_item *item) {
+ /*
+ * We can safely ignore all pre/postfilters except idempotent ones and
+ * those that are marked as ignore passthrough result
+ */
+ if (stage != RSPAMD_TASK_STAGE_IDEMPOTENT &&
+ !(item->flags & SYMBOL_TYPE_IGNORE_PASSTHROUGH)) {
+ if (check_metric_limit(task)) {
+ msg_debug_cache_task_lambda("task has already the result being set, ignore further checks");
+
+ return true;
+ }
+ }
+
+ auto dyn_item = get_dynamic_item(item->id);
+
+ if (!dyn_item->started && !dyn_item->finished) {
+ if (has_slow) {
+ /* Delay */
+ has_slow = false;
+
+ return false;
+ }
+
+ if (saved_priority == std::numeric_limits<int>::min()) {
+ saved_priority = item->priority;
+ }
+ else {
+ if (compare_functor(item->priority, saved_priority) &&
+ rspamd_session_events_pending(task->s) > start_events) {
+ /*
+ * Delay further checks as we have higher
+ * priority filters to be processed
+ */
+ return false;
+ }
+ }
+
+ return process_symbol(task, cache, item, dyn_item);
+ }
+
+ /* Continue processing */
+ return true;
+ };
+
+ switch (stage) {
+ case RSPAMD_TASK_STAGE_CONNFILTERS:
+ all_done = cache.connfilters_foreach(proc_func);
+ break;
+ case RSPAMD_TASK_STAGE_PRE_FILTERS:
+ all_done = cache.prefilters_foreach(proc_func);
+ break;
+ case RSPAMD_TASK_STAGE_POST_FILTERS:
+ compare_functor = +[](int a, int b) { return a > b; };
+ all_done = cache.postfilters_foreach(proc_func);
+ break;
+ case RSPAMD_TASK_STAGE_IDEMPOTENT:
+ compare_functor = +[](int a, int b) { return a > b; };
+ all_done = cache.idempotent_foreach(proc_func);
+ break;
+ default:
+ g_error("invalid invocation");
+ break;
+ }
+
+ return all_done;
+}
+
+auto symcache_runtime::process_filters(struct rspamd_task *task, symcache &cache, int start_events) -> bool
+{
+ auto all_done = true;
+ auto log_func = RSPAMD_LOG_FUNC;
+ auto has_passtrough = false;
+
+ for (const auto [idx, item]: rspamd::enumerate(order->d)) {
+ /* Exclude all non filters */
+ if (item->type != symcache_item_type::FILTER) {
+ /*
+ * We use breaking the loop as we append non-filters to the end of the list
+ * so, it is safe to stop processing immediately
+ */
+ break;
+ }
+
+ if (!(item->flags & (SYMBOL_TYPE_FINE | SYMBOL_TYPE_IGNORE_PASSTHROUGH))) {
+ if (has_passtrough || check_metric_limit(task)) {
+ msg_debug_cache_task_lambda("task has already the result being set, ignore further checks");
+ has_passtrough = true;
+ /* Skip this item */
+ continue;
+ }
+ }
+
+ auto dyn_item = &dynamic_items[idx];
+
+ if (!dyn_item->started) {
+ all_done = false;
+
+ if (!check_item_deps(task, cache, item.get(),
+ dyn_item, false)) {
+ msg_debug_cache_task("blocked execution of %d(%s) unless deps are "
+ "resolved",
+ item->id, item->symbol.c_str());
+
+ continue;
+ }
+
+ process_symbol(task, cache, item.get(), dyn_item);
+
+ if (has_slow) {
+ /* Delay */
+ has_slow = false;
+
+ return false;
+ }
+ }
+ }
+
+ return all_done;
+}
+
+auto symcache_runtime::process_symbol(struct rspamd_task *task, symcache &cache, cache_item *item,
+ cache_dynamic_item *dyn_item) -> bool
+{
+ if (item->type == symcache_item_type::CLASSIFIER || item->type == symcache_item_type::COMPOSITE) {
+ /* Classifiers are special :( */
+ return true;
+ }
+
+ if (rspamd_session_blocked(task->s)) {
+ /*
+ * We cannot add new events as session is either destroyed or
+ * being cleaned up.
+ */
+ return true;
+ }
+
+ g_assert(!item->is_virtual());
+ if (dyn_item->started) {
+ /*
+ * This can actually happen when deps span over different layers
+ */
+ return dyn_item->finished;
+ }
+
+ /* Check has been started */
+ dyn_item->started = true;
+ auto check = true;
+
+ if (!item->is_allowed(task, true) || !item->check_conditions(task)) {
+ check = false;
+ }
+
+ if (check) {
+ msg_debug_cache_task("execute %s, %d; symbol type = %s", item->symbol.data(),
+ item->id, item_type_to_str(item->type));
+
+ if (profile) {
+ ev_now_update_if_cheap(task->event_loop);
+ dyn_item->start_msec = (ev_now(task->event_loop) -
+ profile_start) *
+ 1e3;
+ }
+ dyn_item->async_events = 0;
+ cur_item = dyn_item;
+ items_inflight++;
+ /* Callback now must finalize itself */
+ item->call(task, dyn_item);
+ cur_item = nullptr;
+
+ if (items_inflight == 0) {
+ return true;
+ }
+
+ if (dyn_item->async_events == 0 && !dyn_item->finished) {
+ msg_err_cache_task("critical error: item %s has no async events pending, "
+ "but it is not finalised",
+ item->symbol.data());
+ g_assert_not_reached();
+ }
+
+ return false;
+ }
+ else {
+ dyn_item->finished = true;
+ }
+
+ return true;
+}
+
+auto symcache_runtime::check_metric_limit(struct rspamd_task *task) -> bool
+{
+ if (task->flags & RSPAMD_TASK_FLAG_PASS_ALL) {
+ return false;
+ }
+
+ /* Check score limit */
+ if (!std::isnan(lim)) {
+ if (task->result->score > lim) {
+ return true;
+ }
+ }
+
+ if (task->result->passthrough_result != nullptr) {
+ /* We also need to check passthrough results */
+ auto *pr = task->result->passthrough_result;
+ DL_FOREACH(task->result->passthrough_result, pr)
+ {
+ struct rspamd_action_config *act_config =
+ rspamd_find_action_config_for_action(task->result, pr->action);
+
+ /* Skip least results */
+ if (pr->flags & RSPAMD_PASSTHROUGH_LEAST) {
+ continue;
+ }
+
+ /* Skip disabled actions */
+ if (act_config && (act_config->flags & RSPAMD_ACTION_RESULT_DISABLED)) {
+ continue;
+ }
+
+ /* Immediately stop on non least passthrough action */
+ return true;
+ }
+ }
+
+ return false;
+}
+
+auto symcache_runtime::check_item_deps(struct rspamd_task *task, symcache &cache, cache_item *item,
+ cache_dynamic_item *dyn_item, bool check_only) -> bool
+{
+ constexpr const auto max_recursion = 20;
+ auto log_func = RSPAMD_LOG_FUNC;
+
+ auto inner_functor = [&](int recursion, cache_item *item, cache_dynamic_item *dyn_item, auto rec_functor) -> bool {
+ if (recursion > max_recursion) {
+ msg_err_task_lambda("cyclic dependencies: maximum check level %ud exceed when "
+ "checking dependencies for %s",
+ max_recursion, item->symbol.c_str());
+
+ return true;
+ }
+
+ auto ret = true;
+
+ for (const auto &dep: item->deps) {
+ if (!dep.item) {
+ /* Assume invalid deps as done */
+ msg_debug_cache_task_lambda("symbol %d(%s) has invalid dependencies on %d(%s)",
+ item->id, item->symbol.c_str(), dep.id, dep.sym.c_str());
+ continue;
+ }
+
+ auto *dep_dyn_item = get_dynamic_item(dep.item->id);
+
+ if (!dep_dyn_item->finished) {
+ if (!dep_dyn_item->started) {
+ /* Not started */
+ if (!check_only) {
+ if (!rec_functor(recursion + 1,
+ dep.item,
+ dep_dyn_item,
+ rec_functor)) {
+
+ ret = false;
+ msg_debug_cache_task_lambda("delayed dependency %d(%s) for "
+ "symbol %d(%s)",
+ dep.id, dep.sym.c_str(), item->id, item->symbol.c_str());
+ }
+ else if (!process_symbol(task, cache, dep.item, dep_dyn_item)) {
+ /* Now started, but has events pending */
+ ret = false;
+ msg_debug_cache_task_lambda("started check of %d(%s) symbol "
+ "as dep for "
+ "%d(%s)",
+ dep.id, dep.sym.c_str(), item->id, item->symbol.c_str());
+ }
+ else {
+ msg_debug_cache_task_lambda("dependency %d(%s) for symbol %d(%s) is "
+ "already processed",
+ dep.id, dep.sym.c_str(), item->id, item->symbol.c_str());
+ }
+ }
+ else {
+ msg_debug_cache_task_lambda("dependency %d(%s) for symbol %d(%s) "
+ "cannot be started now",
+ dep.id, dep.sym.c_str(), item->id, item->symbol.c_str());
+ ret = false;
+ }
+ }
+ else {
+ /* Started but not finished */
+ msg_debug_cache_task_lambda("dependency %d(%s) for symbol %d(%s) is "
+ "still executing",
+ dep.id, dep.sym.c_str(), item->id, item->symbol.c_str());
+ ret = false;
+ }
+ }
+ else {
+ msg_debug_cache_task_lambda("dependency %d(%s) for symbol %d(%s) is already "
+ "checked",
+ dep.id, dep.sym.c_str(), item->id, item->symbol.c_str());
+ }
+ }
+
+ return ret;
+ };
+
+ return inner_functor(0, item, dyn_item, inner_functor);
+}
+
+
+struct rspamd_symcache_delayed_cbdata {
+ cache_item *item;
+ struct rspamd_task *task;
+ symcache_runtime *runtime;
+ struct rspamd_async_event *event;
+ struct ev_timer tm;
+};
+
+static void
+rspamd_symcache_delayed_item_fin(gpointer ud)
+{
+ auto *cbd = (struct rspamd_symcache_delayed_cbdata *) ud;
+
+ cbd->event = nullptr;
+ cbd->runtime->unset_slow();
+ ev_timer_stop(cbd->task->event_loop, &cbd->tm);
+}
+
+static void
+rspamd_symcache_delayed_item_cb(EV_P_ ev_timer *w, int what)
+{
+ auto *cbd = (struct rspamd_symcache_delayed_cbdata *) w->data;
+
+ if (cbd->event) {
+ cbd->event = nullptr;
+
+ /* Timer will be stopped here */
+ rspamd_session_remove_event(cbd->task->s,
+ rspamd_symcache_delayed_item_fin, cbd);
+
+ cbd->runtime->process_item_rdeps(cbd->task, cbd->item);
+ }
+}
+
+static void
+rspamd_delayed_timer_dtor(gpointer d)
+{
+ auto *cbd = (struct rspamd_symcache_delayed_cbdata *) d;
+
+ if (cbd->event) {
+ /* Event has not been executed, this will also stop a timer */
+ rspamd_session_remove_event(cbd->task->s,
+ rspamd_symcache_delayed_item_fin, cbd);
+ cbd->event = nullptr;
+ }
+}
+
+auto symcache_runtime::finalize_item(struct rspamd_task *task, cache_dynamic_item *dyn_item) -> void
+{
+ /* Limit to consider a rule as slow (in milliseconds) */
+ constexpr const gdouble slow_diff_limit = 300;
+ auto *item = get_item_by_dynamic_item(dyn_item);
+ /* Sanity checks */
+ g_assert(items_inflight > 0);
+ g_assert(item != nullptr);
+
+ if (dyn_item->async_events > 0) {
+ /*
+ * XXX: Race condition
+ *
+ * It is possible that some async event is still in flight, but we
+ * already know its result, however, it is the responsibility of that
+ * event to decrease async events count and call this function
+ * one more time
+ */
+ msg_debug_cache_task("postpone finalisation of %s(%d) as there are %d "
+ "async events pending",
+ item->symbol.c_str(), item->id, dyn_item->async_events);
+
+ return;
+ }
+
+ msg_debug_cache_task("process finalize for item %s(%d)", item->symbol.c_str(), item->id);
+ dyn_item->finished = true;
+ items_inflight--;
+ cur_item = nullptr;
+
+ auto enable_slow_timer = [&]() -> bool {
+ auto *cbd = rspamd_mempool_alloc0_type(task->task_pool, rspamd_symcache_delayed_cbdata);
+ /* Add timer to allow something else to be executed */
+ ev_timer *tm = &cbd->tm;
+
+ cbd->event = rspamd_session_add_event(task->s,
+ rspamd_symcache_delayed_item_fin, cbd,
+ "symcache");
+ cbd->runtime = this;
+
+ /*
+ * If no event could be added, then we are already in the destruction
+ * phase. So the main issue is to deal with has slow here
+ */
+ if (cbd->event) {
+ ev_timer_init(tm, rspamd_symcache_delayed_item_cb, 0.1, 0.0);
+ ev_set_priority(tm, EV_MINPRI);
+ rspamd_mempool_add_destructor(task->task_pool,
+ rspamd_delayed_timer_dtor, cbd);
+
+ cbd->task = task;
+ cbd->item = item;
+ tm->data = cbd;
+ ev_timer_start(task->event_loop, tm);
+ }
+ else {
+ /* Just reset as no timer is added */
+ has_slow = FALSE;
+ return false;
+ }
+
+ return true;
+ };
+
+ if (profile) {
+ ev_now_update_if_cheap(task->event_loop);
+ auto diff = ((ev_now(task->event_loop) - profile_start) * 1e3 -
+ dyn_item->start_msec);
+
+ if (diff > slow_diff_limit) {
+
+ if (!has_slow) {
+ has_slow = true;
+
+ msg_info_task("slow rule: %s(%d): %.2f ms; enable slow timer delay",
+ item->symbol.c_str(), item->id,
+ diff);
+
+ if (enable_slow_timer()) {
+ /* Allow network execution */
+ return;
+ }
+ }
+ else {
+ msg_info_task("slow rule: %s(%d): %.2f ms",
+ item->symbol.c_str(), item->id,
+ diff);
+ }
+ }
+
+ if (G_UNLIKELY(RSPAMD_TASK_IS_PROFILING(task))) {
+ rspamd_task_profile_set(task, item->symbol.c_str(), diff);
+ }
+
+ if (rspamd_worker_is_scanner(task->worker)) {
+ rspamd_set_counter(item->cd, diff);
+ }
+ }
+
+ process_item_rdeps(task, item);
+}
+
+auto symcache_runtime::process_item_rdeps(struct rspamd_task *task, cache_item *item) -> void
+{
+ auto *cache_ptr = reinterpret_cast<symcache *>(task->cfg->cache);
+
+ // Avoid race condition with the runtime destruction and the delay timer
+ if (!order) {
+ return;
+ }
+
+ for (const auto &rdep: item->rdeps) {
+ if (rdep.item) {
+ auto *dyn_item = get_dynamic_item(rdep.item->id);
+ if (!dyn_item->started) {
+ msg_debug_cache_task("check item %d(%s) rdep of %s ",
+ rdep.item->id, rdep.item->symbol.c_str(), item->symbol.c_str());
+
+ if (!check_item_deps(task, *cache_ptr, rdep.item, dyn_item, false)) {
+ msg_debug_cache_task("blocked execution of %d(%s) rdep of %s "
+ "unless deps are resolved",
+ rdep.item->id, rdep.item->symbol.c_str(), item->symbol.c_str());
+ }
+ else {
+ process_symbol(task, *cache_ptr, rdep.item,
+ dyn_item);
+ }
+ }
+ }
+ }
+}
+
+auto symcache_runtime::get_item_by_dynamic_item(cache_dynamic_item *dyn_item) const -> cache_item *
+{
+ auto idx = dyn_item - dynamic_items;
+
+ if (idx >= 0 && idx < order->size()) {
+ return order->d[idx].get();
+ }
+
+ msg_err("internal error: invalid index to get: %d", (int) idx);
+
+ return nullptr;
+}
+
+}// namespace rspamd::symcache
diff --git a/src/libserver/symcache/symcache_runtime.hxx b/src/libserver/symcache/symcache_runtime.hxx
new file mode 100644
index 0000000..aa8f66c
--- /dev/null
+++ b/src/libserver/symcache/symcache_runtime.hxx
@@ -0,0 +1,209 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/**
+ * Symcache runtime is produced for each task and it consists of symbols
+ * being executed, being dynamically disabled/enabled and it also captures
+ * the current order of the symbols (produced by resort periodic)
+ */
+
+#ifndef RSPAMD_SYMCACHE_RUNTIME_HXX
+#define RSPAMD_SYMCACHE_RUNTIME_HXX
+#pragma once
+
+#include "symcache_internal.hxx"
+
+struct rspamd_scan_result;
+
+namespace rspamd::symcache {
+/**
+ * These items are saved within task structure and are used to track
+ * symbols execution.
+ * Each symcache item occupies a single dynamic item, that currently has 8 bytes
+ * length
+ */
+struct cache_dynamic_item {
+ std::uint16_t start_msec; /* Relative to task time */
+ bool started;
+ bool finished;
+ std::uint32_t async_events;
+};
+
+static_assert(sizeof(cache_dynamic_item) == sizeof(std::uint64_t));
+static_assert(std::is_trivial_v<cache_dynamic_item>);
+
+class symcache_runtime {
+ unsigned items_inflight;
+ bool profile;
+ bool has_slow;
+
+ double profile_start;
+ double lim;
+
+ struct cache_dynamic_item *cur_item;
+ order_generation_ptr order;
+ /* Dynamically expanded as needed */
+ mutable struct cache_dynamic_item dynamic_items[];
+ /* We allocate this structure merely in memory pool, so destructor is absent */
+ ~symcache_runtime() = delete;
+
+ auto process_symbol(struct rspamd_task *task, symcache &cache, cache_item *item,
+ cache_dynamic_item *dyn_item) -> bool;
+ /* Specific stages of the processing */
+ auto process_pre_postfilters(struct rspamd_task *task, symcache &cache, int start_events, unsigned int stage) -> bool;
+ auto process_filters(struct rspamd_task *task, symcache &cache, int start_events) -> bool;
+ auto check_metric_limit(struct rspamd_task *task) -> bool;
+ auto check_item_deps(struct rspamd_task *task, symcache &cache, cache_item *item,
+ cache_dynamic_item *dyn_item, bool check_only) -> bool;
+
+public:
+ /* Dropper for a shared ownership */
+ auto savepoint_dtor() -> void
+ {
+
+ /* Drop shared ownership */
+ order.reset();
+ }
+ /**
+ * Creates a cache runtime using task mempool
+ * @param task
+ * @param cache
+ * @return
+ */
+ static auto create(struct rspamd_task *task, symcache &cache) -> symcache_runtime *;
+ /**
+ * Process task settings
+ * @param task
+ * @return
+ */
+ auto process_settings(struct rspamd_task *task, const symcache &cache) -> bool;
+
+ /**
+ * Disable all symbols but not touching ones that are in the specific mask
+ * @param skip_mask
+ */
+ auto disable_all_symbols(int skip_mask) -> void;
+
+ /**
+ * Disable a symbol (or it's parent)
+ * @param name
+ * @return
+ */
+ auto disable_symbol(struct rspamd_task *task, const symcache &cache, std::string_view name) -> bool;
+
+ /**
+ * Enable a symbol (or it's parent)
+ * @param name
+ * @return
+ */
+ auto enable_symbol(struct rspamd_task *task, const symcache &cache, std::string_view name) -> bool;
+
+ /**
+ * Checks if an item has been checked/disabled
+ * @param cache
+ * @param name
+ * @return
+ */
+ auto is_symbol_checked(const symcache &cache, std::string_view name) -> bool;
+
+ /**
+ * Checks if a symbol is enabled for execution, checking all pending conditions
+ * @param task
+ * @param cache
+ * @param name
+ * @return
+ */
+ auto is_symbol_enabled(struct rspamd_task *task, const symcache &cache, std::string_view name) -> bool;
+
+ /**
+ * Get the current processed item
+ * @return
+ */
+ auto get_cur_item() const -> auto
+ {
+ return cur_item;
+ }
+
+ /**
+ * Set the current processed item
+ * @param item
+ * @return
+ */
+ auto set_cur_item(cache_dynamic_item *item) -> auto
+ {
+ std::swap(item, cur_item);
+ return item;
+ }
+
+ /**
+ * Set profile mode for the runtime
+ * @param enable
+ * @return
+ */
+ auto set_profile_mode(bool enable) -> auto
+ {
+ std::swap(profile, enable);
+ return enable;
+ }
+
+ /**
+ * Returns the dynamic item by static item id
+ * @param id
+ * @return
+ */
+ auto get_dynamic_item(int id) const -> cache_dynamic_item *;
+
+ /**
+ * Returns static cache item by dynamic cache item
+ * @return
+ */
+ auto get_item_by_dynamic_item(cache_dynamic_item *) const -> cache_item *;
+
+ /**
+ * Process symbols in the cache
+ * @param task
+ * @param cache
+ * @param stage
+ * @return
+ */
+ auto process_symbols(struct rspamd_task *task, symcache &cache, unsigned int stage) -> bool;
+
+ /**
+ * Finalize execution of some item in the cache
+ * @param task
+ * @param item
+ */
+ auto finalize_item(struct rspamd_task *task, cache_dynamic_item *item) -> void;
+
+ /**
+ * Process unblocked reverse dependencies of the specific item
+ * @param task
+ * @param item
+ */
+ auto process_item_rdeps(struct rspamd_task *task, cache_item *item) -> void;
+
+ /* XXX: a helper to allow hiding internal implementation of the slow timer structure */
+ auto unset_slow() -> void
+ {
+ has_slow = false;
+ }
+};
+
+
+}// namespace rspamd::symcache
+
+#endif//RSPAMD_SYMCACHE_RUNTIME_HXX
diff --git a/src/libserver/task.c b/src/libserver/task.c
new file mode 100644
index 0000000..9763d1e
--- /dev/null
+++ b/src/libserver/task.c
@@ -0,0 +1,1975 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "task.h"
+#include "rspamd.h"
+#include "scan_result.h"
+#include "libserver/protocol.h"
+#include "libserver/protocol_internal.h"
+#include "message.h"
+#include "lua/lua_common.h"
+#include "email_addr.h"
+#include "src/libserver/composites/composites.h"
+#include "stat_api.h"
+#include "unix-std.h"
+#include "utlist.h"
+#include "libserver/mempool_vars_internal.h"
+#include "libserver/cfg_file_private.h"
+#include "libmime/lang_detection.h"
+#include "libmime/scan_result_private.h"
+
+#ifdef WITH_JEMALLOC
+#include <jemalloc/jemalloc.h>
+#else
+#if defined(__GLIBC__) && defined(_GNU_SOURCE)
+#include <malloc.h>
+#endif
+#endif
+
+#include <math.h>
+
+#ifdef SYS_ZSTD
+#include "zstd.h"
+#else
+#include "contrib/zstd/zstd.h"
+#endif
+
+__KHASH_IMPL(rspamd_req_headers_hash, static inline,
+ rspamd_ftok_t *, struct rspamd_request_header_chain *, 1,
+ rspamd_ftok_icase_hash, rspamd_ftok_icase_equal)
+
+static GQuark
+rspamd_task_quark(void)
+{
+ return g_quark_from_static_string("task-error");
+}
+
+/*
+ * Create new task
+ */
+struct rspamd_task *
+rspamd_task_new(struct rspamd_worker *worker,
+ struct rspamd_config *cfg,
+ rspamd_mempool_t *pool,
+ struct rspamd_lang_detector *lang_det,
+ struct ev_loop *event_loop,
+ gboolean debug_mem)
+{
+ struct rspamd_task *new_task;
+ rspamd_mempool_t *task_pool;
+ guint flags = 0;
+
+ if (pool == NULL) {
+ task_pool = rspamd_mempool_new(rspamd_mempool_suggest_size(),
+ "task", debug_mem ? RSPAMD_MEMPOOL_DEBUG : 0);
+ flags |= RSPAMD_TASK_FLAG_OWN_POOL;
+ }
+ else {
+ task_pool = pool;
+ }
+
+ new_task = rspamd_mempool_alloc0(task_pool, sizeof(struct rspamd_task));
+ new_task->task_pool = task_pool;
+ new_task->flags = flags;
+ new_task->worker = worker;
+ new_task->lang_det = lang_det;
+
+ if (cfg) {
+ new_task->cfg = cfg;
+ REF_RETAIN(cfg);
+
+ if (cfg->check_all_filters) {
+ new_task->flags |= RSPAMD_TASK_FLAG_PASS_ALL;
+ }
+
+
+ if (cfg->re_cache) {
+ new_task->re_rt = rspamd_re_cache_runtime_new(cfg->re_cache);
+ }
+
+ if (new_task->lang_det == NULL && cfg->lang_det != NULL) {
+ new_task->lang_det = cfg->lang_det;
+ }
+ }
+
+ new_task->event_loop = event_loop;
+ new_task->task_timestamp = ev_time();
+ new_task->time_real_finish = NAN;
+
+ new_task->request_headers = kh_init(rspamd_req_headers_hash);
+ new_task->sock = -1;
+ new_task->flags |= (RSPAMD_TASK_FLAG_MIME);
+ /* Default results chain */
+ rspamd_create_metric_result(new_task, NULL, -1);
+
+ new_task->queue_id = "undef";
+ new_task->messages = ucl_object_typed_new(UCL_OBJECT);
+ kh_static_init(rspamd_task_lua_cache, &new_task->lua_cache);
+
+ return new_task;
+}
+
+
+static void
+rspamd_task_reply(struct rspamd_task *task)
+{
+ const ev_tstamp write_timeout = 5.0;
+
+ if (task->fin_callback) {
+ task->fin_callback(task, task->fin_arg);
+ }
+ else {
+ if (!(task->processed_stages & RSPAMD_TASK_STAGE_REPLIED)) {
+ rspamd_protocol_write_reply(task, write_timeout);
+ }
+ }
+}
+
+/*
+ * Called if all filters are processed
+ * @return TRUE if session should be terminated
+ */
+gboolean
+rspamd_task_fin(void *arg)
+{
+ struct rspamd_task *task = (struct rspamd_task *) arg;
+
+ /* Task is already finished or skipped */
+ if (RSPAMD_TASK_IS_PROCESSED(task)) {
+ rspamd_task_reply(task);
+ return TRUE;
+ }
+
+ if (!rspamd_task_process(task, RSPAMD_TASK_PROCESS_ALL)) {
+ rspamd_task_reply(task);
+ return TRUE;
+ }
+
+ if (RSPAMD_TASK_IS_PROCESSED(task)) {
+ rspamd_task_reply(task);
+ return TRUE;
+ }
+
+ /* One more iteration */
+ return FALSE;
+}
+
+/*
+ * Free all structures of worker_task
+ */
+void rspamd_task_free(struct rspamd_task *task)
+{
+ struct rspamd_email_address *addr;
+ static guint free_iters = 0;
+ guint i;
+
+ if (task) {
+ debug_task("free pointer %p", task);
+
+ if (task->rcpt_envelope) {
+ for (i = 0; i < task->rcpt_envelope->len; i++) {
+ addr = g_ptr_array_index(task->rcpt_envelope, i);
+ rspamd_email_address_free(addr);
+ }
+
+ g_ptr_array_free(task->rcpt_envelope, TRUE);
+ }
+
+ if (task->from_envelope) {
+ rspamd_email_address_free(task->from_envelope);
+ }
+
+ if (task->from_envelope_orig) {
+ rspamd_email_address_free(task->from_envelope_orig);
+ }
+
+ if (task->meta_words) {
+ g_array_free(task->meta_words, TRUE);
+ }
+
+ ucl_object_unref(task->messages);
+
+ if (task->re_rt) {
+ rspamd_re_cache_runtime_destroy(task->re_rt);
+ }
+
+ if (task->http_conn != NULL) {
+ rspamd_http_connection_reset(task->http_conn);
+ rspamd_http_connection_unref(task->http_conn);
+ }
+
+ if (task->settings != NULL) {
+ ucl_object_unref(task->settings);
+ }
+
+ if (task->settings_elt != NULL) {
+ REF_RELEASE(task->settings_elt);
+ }
+
+ if (task->client_addr) {
+ rspamd_inet_address_free(task->client_addr);
+ }
+
+ if (task->from_addr) {
+ rspamd_inet_address_free(task->from_addr);
+ }
+
+ if (task->err) {
+ g_error_free(task->err);
+ }
+
+ ev_timer_stop(task->event_loop, &task->timeout_ev);
+ ev_io_stop(task->event_loop, &task->guard_ev);
+
+ if (task->sock != -1) {
+ close(task->sock);
+ }
+
+ if (task->cfg) {
+
+
+ struct rspamd_lua_cached_entry entry;
+
+ kh_foreach_value(&task->lua_cache, entry, {
+ luaL_unref(task->cfg->lua_state,
+ LUA_REGISTRYINDEX, entry.ref);
+ });
+ kh_static_destroy(rspamd_task_lua_cache, &task->lua_cache);
+
+ if (task->cfg->full_gc_iters && (++free_iters > task->cfg->full_gc_iters)) {
+ /* Perform more expensive cleanup cycle */
+ gsize allocated = 0, active = 0, metadata = 0,
+ resident = 0, mapped = 0, old_lua_mem = 0;
+ gdouble t1, t2;
+
+ old_lua_mem = lua_gc(task->cfg->lua_state, LUA_GCCOUNT, 0);
+ t1 = rspamd_get_ticks(FALSE);
+
+#ifdef WITH_JEMALLOC
+ gsize sz = sizeof(gsize);
+ mallctl("stats.allocated", &allocated, &sz, NULL, 0);
+ mallctl("stats.active", &active, &sz, NULL, 0);
+ mallctl("stats.metadata", &metadata, &sz, NULL, 0);
+ mallctl("stats.resident", &resident, &sz, NULL, 0);
+ mallctl("stats.mapped", &mapped, &sz, NULL, 0);
+#else
+#if defined(__GLIBC__) && defined(_GNU_SOURCE)
+ malloc_trim(0);
+#endif
+#endif
+ lua_gc(task->cfg->lua_state, LUA_GCCOLLECT, 0);
+ t2 = rspamd_get_ticks(FALSE);
+
+ msg_notice_task("perform full gc cycle; memory stats: "
+ "%Hz allocated, %Hz active, %Hz metadata, %Hz resident, %Hz mapped;"
+ " lua memory: %z kb -> %d kb; %f ms for gc iter",
+ allocated, active, metadata, resident, mapped,
+ old_lua_mem, lua_gc(task->cfg->lua_state, LUA_GCCOUNT, 0),
+ (t2 - t1) * 1000.0);
+ free_iters = rspamd_time_jitter(0,
+ (gdouble) task->cfg->full_gc_iters / 2);
+ }
+
+ REF_RELEASE(task->cfg);
+ }
+
+ kh_destroy(rspamd_req_headers_hash, task->request_headers);
+ rspamd_message_unref(task->message);
+
+ if (task->flags & RSPAMD_TASK_FLAG_OWN_POOL) {
+ rspamd_mempool_destructors_enforce(task->task_pool);
+
+ if (task->symcache_runtime) {
+ rspamd_symcache_runtime_destroy(task);
+ }
+
+ rspamd_mempool_delete(task->task_pool);
+ }
+ else if (task->symcache_runtime) {
+ rspamd_symcache_runtime_destroy(task);
+ }
+ }
+}
+
+struct rspamd_task_map {
+ gpointer begin;
+ gulong len;
+ gint fd;
+};
+
+static void
+rspamd_task_unmapper(gpointer ud)
+{
+ struct rspamd_task_map *m = ud;
+
+ munmap(m->begin, m->len);
+ close(m->fd);
+}
+
+gboolean
+rspamd_task_load_message(struct rspamd_task *task,
+ struct rspamd_http_message *msg, const gchar *start, gsize len)
+{
+ guint control_len, r;
+ struct ucl_parser *parser;
+ ucl_object_t *control_obj;
+ gchar filepath[PATH_MAX], *fp;
+ gint fd, flen;
+ gulong offset = 0, shmem_size = 0;
+ rspamd_ftok_t *tok;
+ gpointer map;
+ struct stat st;
+ struct rspamd_task_map *m;
+ const gchar *ft;
+
+#ifdef HAVE_SANE_SHMEM
+ ft = "shm";
+#else
+ ft = "file";
+#endif
+
+ if (msg) {
+ rspamd_protocol_handle_headers(task, msg);
+ }
+
+ tok = rspamd_task_get_request_header(task, "shm");
+
+ if (tok) {
+ /* Shared memory part */
+ r = rspamd_strlcpy(filepath, tok->begin,
+ MIN(sizeof(filepath), tok->len + 1));
+
+ rspamd_url_decode(filepath, filepath, r + 1);
+ flen = strlen(filepath);
+
+ if (filepath[0] == '"' && flen > 2) {
+ /* We need to unquote filepath */
+ fp = &filepath[1];
+ fp[flen - 2] = '\0';
+ }
+ else {
+ fp = &filepath[0];
+ }
+#ifdef HAVE_SANE_SHMEM
+ fd = shm_open(fp, O_RDONLY, 00600);
+#else
+ fd = open(fp, O_RDONLY, 00600);
+#endif
+ if (fd == -1) {
+ g_set_error(&task->err, rspamd_task_quark(), RSPAMD_PROTOCOL_ERROR,
+ "Cannot open %s segment (%s): %s", ft, fp, strerror(errno));
+ return FALSE;
+ }
+
+ if (fstat(fd, &st) == -1) {
+ g_set_error(&task->err, rspamd_task_quark(), RSPAMD_PROTOCOL_ERROR,
+ "Cannot stat %s segment (%s): %s", ft, fp, strerror(errno));
+ close(fd);
+
+ return FALSE;
+ }
+
+ map = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
+
+ if (map == MAP_FAILED) {
+ close(fd);
+ g_set_error(&task->err, rspamd_task_quark(), RSPAMD_PROTOCOL_ERROR,
+ "Cannot mmap %s (%s): %s", ft, fp, strerror(errno));
+ return FALSE;
+ }
+
+ tok = rspamd_task_get_request_header(task, "shm-offset");
+
+ if (tok) {
+ rspamd_strtoul(tok->begin, tok->len, &offset);
+
+ if (offset > (gulong) st.st_size) {
+ msg_err_task("invalid offset %ul (%ul available) for shm "
+ "segment %s",
+ offset, (gulong) st.st_size, fp);
+ munmap(map, st.st_size);
+ close(fd);
+
+ return FALSE;
+ }
+ }
+
+ tok = rspamd_task_get_request_header(task, "shm-length");
+ shmem_size = st.st_size;
+
+
+ if (tok) {
+ rspamd_strtoul(tok->begin, tok->len, &shmem_size);
+
+ if (shmem_size > (gulong) st.st_size) {
+ msg_err_task("invalid length %ul (%ul available) for %s "
+ "segment %s",
+ shmem_size, (gulong) st.st_size, ft, fp);
+ munmap(map, st.st_size);
+ close(fd);
+
+ return FALSE;
+ }
+ }
+
+ task->msg.begin = ((guchar *) map) + offset;
+ task->msg.len = shmem_size;
+ m = rspamd_mempool_alloc(task->task_pool, sizeof(*m));
+ m->begin = map;
+ m->len = st.st_size;
+ m->fd = fd;
+
+ msg_info_task("loaded message from shared memory %s (%ul size, %ul offset), fd=%d",
+ fp, shmem_size, offset, fd);
+
+ rspamd_mempool_add_destructor(task->task_pool, rspamd_task_unmapper, m);
+
+ return TRUE;
+ }
+
+ tok = rspamd_task_get_request_header(task, "file");
+
+ if (tok == NULL) {
+ tok = rspamd_task_get_request_header(task, "path");
+ }
+
+ if (tok) {
+ debug_task("want to scan file %T", tok);
+
+ r = rspamd_strlcpy(filepath, tok->begin,
+ MIN(sizeof(filepath), tok->len + 1));
+
+ rspamd_url_decode(filepath, filepath, r + 1);
+ flen = strlen(filepath);
+
+ if (filepath[0] == '"' && flen > 2) {
+ /* We need to unquote filepath */
+ fp = &filepath[1];
+ fp[flen - 2] = '\0';
+ }
+ else {
+ fp = &filepath[0];
+ }
+
+ if (stat(fp, &st) == -1) {
+ g_set_error(&task->err, rspamd_task_quark(), RSPAMD_PROTOCOL_ERROR,
+ "Invalid file (%s): %s", fp, strerror(errno));
+ return FALSE;
+ }
+
+ if (G_UNLIKELY(st.st_size == 0)) {
+ /* Empty file */
+ task->flags |= RSPAMD_TASK_FLAG_EMPTY;
+ task->msg.begin = rspamd_mempool_strdup(task->task_pool, "");
+ task->msg.len = 0;
+ }
+ else {
+ fd = open(fp, O_RDONLY);
+
+ if (fd == -1) {
+ g_set_error(&task->err, rspamd_task_quark(),
+ RSPAMD_PROTOCOL_ERROR,
+ "Cannot open file (%s): %s", fp, strerror(errno));
+ return FALSE;
+ }
+
+ map = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
+
+
+ if (map == MAP_FAILED) {
+ close(fd);
+ g_set_error(&task->err, rspamd_task_quark(),
+ RSPAMD_PROTOCOL_ERROR,
+ "Cannot mmap file (%s): %s", fp, strerror(errno));
+ return FALSE;
+ }
+
+ task->msg.begin = map;
+ task->msg.len = st.st_size;
+ m = rspamd_mempool_alloc(task->task_pool, sizeof(*m));
+ m->begin = map;
+ m->len = st.st_size;
+ m->fd = fd;
+
+ rspamd_mempool_add_destructor(task->task_pool, rspamd_task_unmapper, m);
+ }
+
+ task->msg.fpath = rspamd_mempool_strdup(task->task_pool, fp);
+ task->flags |= RSPAMD_TASK_FLAG_FILE;
+
+ msg_info_task("loaded message from file %s", fp);
+
+ return TRUE;
+ }
+
+ /* Plain data */
+ debug_task("got input of length %z", task->msg.len);
+
+ /* Check compression */
+ tok = rspamd_task_get_request_header(task, "compression");
+
+ if (tok) {
+ /* Need to uncompress */
+ rspamd_ftok_t t;
+
+ t.begin = "zstd";
+ t.len = 4;
+
+ if (rspamd_ftok_casecmp(tok, &t) == 0) {
+ ZSTD_DStream *zstream;
+ ZSTD_inBuffer zin;
+ ZSTD_outBuffer zout;
+ guchar *out;
+ gsize outlen, r;
+ gulong dict_id;
+
+ if (!rspamd_libs_reset_decompression(task->cfg->libs_ctx)) {
+ g_set_error(&task->err, rspamd_task_quark(),
+ RSPAMD_PROTOCOL_ERROR,
+ "Cannot decompress, decompressor init failed");
+
+ return FALSE;
+ }
+
+ tok = rspamd_task_get_request_header(task, "dictionary");
+
+ if (tok != NULL) {
+ /* We need to use custom dictionary */
+ if (!rspamd_strtoul(tok->begin, tok->len, &dict_id)) {
+ g_set_error(&task->err, rspamd_task_quark(), RSPAMD_PROTOCOL_ERROR,
+ "Non numeric dictionary");
+
+ return FALSE;
+ }
+
+ if (!task->cfg->libs_ctx->in_dict) {
+ g_set_error(&task->err, rspamd_task_quark(), RSPAMD_PROTOCOL_ERROR,
+ "Unknown dictionary, undefined locally");
+
+ return FALSE;
+ }
+
+ if (task->cfg->libs_ctx->in_dict->id != dict_id) {
+ g_set_error(&task->err, rspamd_task_quark(), RSPAMD_PROTOCOL_ERROR,
+ "Unknown dictionary, invalid dictionary id");
+
+ return FALSE;
+ }
+ }
+
+ zstream = task->cfg->libs_ctx->in_zstream;
+
+ zin.pos = 0;
+ zin.src = start;
+ zin.size = len;
+
+ if ((outlen = ZSTD_getDecompressedSize(start, len)) == 0) {
+ outlen = ZSTD_DStreamOutSize();
+ }
+
+ out = g_malloc(outlen);
+ zout.dst = out;
+ zout.pos = 0;
+ zout.size = outlen;
+
+ while (zin.pos < zin.size) {
+ r = ZSTD_decompressStream(zstream, &zout, &zin);
+
+ if (ZSTD_isError(r)) {
+ g_set_error(&task->err, rspamd_task_quark(),
+ RSPAMD_PROTOCOL_ERROR,
+ "Decompression error: %s", ZSTD_getErrorName(r));
+
+ return FALSE;
+ }
+
+ if (zout.pos == zout.size) {
+ /* We need to extend output buffer */
+ zout.size = zout.size * 2 + 1;
+ zout.dst = g_realloc(zout.dst, zout.size);
+ }
+ }
+
+ rspamd_mempool_add_destructor(task->task_pool, g_free, zout.dst);
+ task->msg.begin = zout.dst;
+ task->msg.len = zout.pos;
+ task->protocol_flags |= RSPAMD_TASK_PROTOCOL_FLAG_COMPRESSED;
+
+ msg_info_task("loaded message from zstd compressed stream; "
+ "compressed: %ul; uncompressed: %ul",
+ (gulong) zin.size, (gulong) zout.pos);
+ }
+ else {
+ g_set_error(&task->err, rspamd_task_quark(), RSPAMD_PROTOCOL_ERROR,
+ "Invalid compression method");
+ return FALSE;
+ }
+ }
+ else {
+ task->msg.begin = start;
+ task->msg.len = len;
+ }
+
+ if (task->msg.len == 0) {
+ task->flags |= RSPAMD_TASK_FLAG_EMPTY;
+ }
+
+ if (task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_HAS_CONTROL) {
+ rspamd_ftok_t *hv = rspamd_task_get_request_header(task, MLEN_HEADER);
+ gulong message_len = 0;
+
+ if (!hv || !rspamd_strtoul(hv->begin, hv->len, &message_len) ||
+ task->msg.len < message_len) {
+ msg_warn_task("message has invalid message length: %ul and total len: %ul",
+ message_len, task->msg.len);
+ g_set_error(&task->err, rspamd_task_quark(), RSPAMD_PROTOCOL_ERROR,
+ "Invalid length");
+ return FALSE;
+ }
+
+ control_len = task->msg.len - message_len;
+
+ if (control_len > 0) {
+ parser = ucl_parser_new(UCL_PARSER_KEY_LOWERCASE);
+
+ if (!ucl_parser_add_chunk(parser, task->msg.begin, control_len)) {
+ msg_warn_task("processing of control chunk failed: %s",
+ ucl_parser_get_error(parser));
+ ucl_parser_free(parser);
+ }
+ else {
+ control_obj = ucl_parser_get_object(parser);
+ ucl_parser_free(parser);
+ rspamd_protocol_handle_control(task, control_obj);
+ ucl_object_unref(control_obj);
+ }
+
+ task->msg.begin += control_len;
+ task->msg.len -= control_len;
+ }
+ }
+
+ return TRUE;
+}
+
+static guint
+rspamd_task_select_processing_stage(struct rspamd_task *task, guint stages)
+{
+ guint st, mask;
+
+ mask = task->processed_stages;
+
+ if (mask == 0) {
+ st = 0;
+ }
+ else {
+ for (st = 1; mask != 1; st++) {
+ mask = mask >> 1u;
+ }
+ }
+
+ st = 1 << st;
+
+ if (stages & st) {
+ return st;
+ }
+ else if (st < RSPAMD_TASK_STAGE_DONE) {
+ /* We assume that the stage that was not requested is done */
+ task->processed_stages |= st;
+ return rspamd_task_select_processing_stage(task, stages);
+ }
+
+ /* We are done */
+ return RSPAMD_TASK_STAGE_DONE;
+}
+
+gboolean
+rspamd_task_process(struct rspamd_task *task, guint stages)
+{
+ guint st;
+ gboolean ret = TRUE, all_done = TRUE;
+ GError *stat_error = NULL;
+
+ /* Avoid nested calls */
+ if (task->flags & RSPAMD_TASK_FLAG_PROCESSING) {
+ return TRUE;
+ }
+
+ if (RSPAMD_TASK_IS_PROCESSED(task)) {
+ return TRUE;
+ }
+
+ task->flags |= RSPAMD_TASK_FLAG_PROCESSING;
+
+ st = rspamd_task_select_processing_stage(task, stages);
+
+ switch (st) {
+ case RSPAMD_TASK_STAGE_CONNFILTERS:
+ all_done = rspamd_symcache_process_symbols(task, task->cfg->cache, st);
+ break;
+
+ case RSPAMD_TASK_STAGE_READ_MESSAGE:
+ if (!rspamd_message_parse(task)) {
+ ret = FALSE;
+ }
+ break;
+
+ case RSPAMD_TASK_STAGE_PROCESS_MESSAGE:
+ if (!(task->flags & RSPAMD_TASK_FLAG_SKIP_PROCESS)) {
+ rspamd_message_process(task);
+ }
+ break;
+
+ case RSPAMD_TASK_STAGE_PRE_FILTERS:
+ case RSPAMD_TASK_STAGE_FILTERS:
+ all_done = rspamd_symcache_process_symbols(task, task->cfg->cache, st);
+ break;
+
+ case RSPAMD_TASK_STAGE_CLASSIFIERS:
+ case RSPAMD_TASK_STAGE_CLASSIFIERS_PRE:
+ case RSPAMD_TASK_STAGE_CLASSIFIERS_POST:
+ if (!RSPAMD_TASK_IS_EMPTY(task)) {
+ if (rspamd_stat_classify(task, task->cfg->lua_state, st, &stat_error) ==
+ RSPAMD_STAT_PROCESS_ERROR) {
+ msg_err_task("classify error: %e", stat_error);
+ g_error_free(stat_error);
+ }
+ }
+ break;
+
+ case RSPAMD_TASK_STAGE_COMPOSITES:
+ rspamd_composites_process_task(task);
+ task->result->nresults_postfilters = task->result->nresults;
+ break;
+
+ case RSPAMD_TASK_STAGE_POST_FILTERS:
+ all_done = rspamd_symcache_process_symbols(task, task->cfg->cache,
+ st);
+
+ if (all_done && (task->flags & RSPAMD_TASK_FLAG_LEARN_AUTO) &&
+ !RSPAMD_TASK_IS_EMPTY(task) &&
+ !(task->flags & (RSPAMD_TASK_FLAG_LEARN_SPAM | RSPAMD_TASK_FLAG_LEARN_HAM))) {
+ rspamd_stat_check_autolearn(task);
+ }
+ break;
+
+ case RSPAMD_TASK_STAGE_LEARN:
+ case RSPAMD_TASK_STAGE_LEARN_PRE:
+ case RSPAMD_TASK_STAGE_LEARN_POST:
+ if (task->flags & (RSPAMD_TASK_FLAG_LEARN_SPAM | RSPAMD_TASK_FLAG_LEARN_HAM)) {
+ if (task->err == NULL) {
+ if (!rspamd_stat_learn(task,
+ task->flags & RSPAMD_TASK_FLAG_LEARN_SPAM,
+ task->cfg->lua_state, task->classifier,
+ st, &stat_error)) {
+
+ if (stat_error == NULL) {
+ g_set_error(&stat_error,
+ g_quark_from_static_string("stat"), 500,
+ "Unknown statistics error, found on stage %s;"
+ " classifier: %s",
+ rspamd_task_stage_name(st), task->classifier);
+ }
+
+ if (stat_error->code >= 400) {
+ msg_err_task("learn error: %e", stat_error);
+ }
+ else {
+ msg_notice_task("skip learning: %e", stat_error);
+ }
+
+ if (!(task->flags & RSPAMD_TASK_FLAG_LEARN_AUTO)) {
+ task->err = stat_error;
+ task->processed_stages |= RSPAMD_TASK_STAGE_DONE;
+ }
+ else {
+ /* Do not skip idempotent in case of learn error */
+ if (stat_error) {
+ g_error_free(stat_error);
+ }
+
+ task->processed_stages |= RSPAMD_TASK_STAGE_LEARN |
+ RSPAMD_TASK_STAGE_LEARN_PRE |
+ RSPAMD_TASK_STAGE_LEARN_POST;
+ }
+ }
+ }
+ }
+ break;
+ case RSPAMD_TASK_STAGE_COMPOSITES_POST:
+ /* Second run of composites processing before idempotent filters (if needed) */
+ if (task->result->nresults_postfilters != task->result->nresults) {
+ rspamd_composites_process_task(task);
+ }
+ else {
+ msg_debug_task("skip second run of composites as the result has not been changed");
+ }
+ break;
+
+ case RSPAMD_TASK_STAGE_IDEMPOTENT:
+ /* Stop task timeout */
+ if (ev_can_stop(&task->timeout_ev)) {
+ ev_timer_stop(task->event_loop, &task->timeout_ev);
+ }
+
+ all_done = rspamd_symcache_process_symbols(task, task->cfg->cache, st);
+ break;
+
+ case RSPAMD_TASK_STAGE_DONE:
+ task->processed_stages |= RSPAMD_TASK_STAGE_DONE;
+ break;
+
+ default:
+ /* TODO: not implemented stage */
+ break;
+ }
+
+ if (RSPAMD_TASK_IS_SKIPPED(task)) {
+ /* Set all bits except idempotent filters */
+ task->processed_stages |= 0x7FFF;
+ }
+
+ task->flags &= ~RSPAMD_TASK_FLAG_PROCESSING;
+
+ if (!ret || RSPAMD_TASK_IS_PROCESSED(task)) {
+ if (!ret) {
+ /* Set processed flags */
+ task->processed_stages |= RSPAMD_TASK_STAGE_DONE;
+ }
+
+ msg_debug_task("task is processed");
+
+ return ret;
+ }
+
+ if (ret) {
+ if (rspamd_session_events_pending(task->s) != 0) {
+ /* We have events pending, so we consider this stage as incomplete */
+ msg_debug_task("need more work on stage %d", st);
+ }
+ else {
+ if (all_done) {
+ /* Mark the current stage as done and go to the next stage */
+ msg_debug_task("completed stage %d", st);
+ task->processed_stages |= st;
+ }
+ else {
+ msg_debug_task("need more processing on stage %d", st);
+ }
+
+ /* Tail recursion */
+ return rspamd_task_process(task, stages);
+ }
+ }
+
+ return ret;
+}
+
+struct rspamd_email_address *
+rspamd_task_get_sender(struct rspamd_task *task)
+{
+ return task->from_envelope;
+}
+
+static const gchar *
+rspamd_task_cache_principal_recipient(struct rspamd_task *task,
+ const gchar *rcpt, gsize len)
+{
+ gchar *rcpt_lc;
+
+ if (rcpt == NULL) {
+ return NULL;
+ }
+
+ rcpt_lc = rspamd_mempool_alloc(task->task_pool, len + 1);
+ rspamd_strlcpy(rcpt_lc, rcpt, len + 1);
+ rspamd_str_lc(rcpt_lc, len);
+
+ rspamd_mempool_set_variable(task->task_pool,
+ RSPAMD_MEMPOOL_PRINCIPAL_RECIPIENT, rcpt_lc, NULL);
+
+ return rcpt_lc;
+}
+
+const gchar *
+rspamd_task_get_principal_recipient(struct rspamd_task *task)
+{
+ const gchar *val;
+ struct rspamd_email_address *addr;
+ guint i;
+
+ val = rspamd_mempool_get_variable(task->task_pool,
+ RSPAMD_MEMPOOL_PRINCIPAL_RECIPIENT);
+
+ if (val) {
+ return val;
+ }
+
+ if (task->deliver_to) {
+ return rspamd_task_cache_principal_recipient(task, task->deliver_to,
+ strlen(task->deliver_to));
+ }
+ if (task->rcpt_envelope != NULL) {
+
+ PTR_ARRAY_FOREACH(task->rcpt_envelope, i, addr)
+ {
+ if (addr->addr && !(addr->flags & RSPAMD_EMAIL_ADDR_ORIGINAL)) {
+ return rspamd_task_cache_principal_recipient(task, addr->addr,
+ addr->addr_len);
+ }
+ }
+ }
+
+ GPtrArray *rcpt_mime = MESSAGE_FIELD_CHECK(task, rcpt_mime);
+ if (rcpt_mime != NULL && rcpt_mime->len > 0) {
+ PTR_ARRAY_FOREACH(rcpt_mime, i, addr)
+ {
+ if (addr->addr && !(addr->flags & RSPAMD_EMAIL_ADDR_ORIGINAL)) {
+ return rspamd_task_cache_principal_recipient(task, addr->addr,
+ addr->addr_len);
+ }
+ }
+ }
+
+ return NULL;
+}
+
+gboolean
+rspamd_learn_task_spam(struct rspamd_task *task,
+ gboolean is_spam,
+ const gchar *classifier,
+ GError **err)
+{
+ if (is_spam) {
+ task->flags |= RSPAMD_TASK_FLAG_LEARN_SPAM;
+ }
+ else {
+ task->flags |= RSPAMD_TASK_FLAG_LEARN_HAM;
+ }
+
+ task->classifier = classifier;
+
+ return TRUE;
+}
+
+static gboolean
+rspamd_task_log_check_condition(struct rspamd_task *task,
+ struct rspamd_log_format *lf)
+{
+ gboolean ret = FALSE;
+
+ switch (lf->type) {
+ case RSPAMD_LOG_MID:
+ if (MESSAGE_FIELD_CHECK(task, message_id) &&
+ strcmp(MESSAGE_FIELD(task, message_id), "undef") != 0) {
+ ret = TRUE;
+ }
+ break;
+ case RSPAMD_LOG_QID:
+ if (task->queue_id && strcmp(task->queue_id, "undef") != 0) {
+ ret = TRUE;
+ }
+ break;
+ case RSPAMD_LOG_USER:
+ if (task->auth_user) {
+ ret = TRUE;
+ }
+ break;
+ case RSPAMD_LOG_IP:
+ if (task->from_addr && rspamd_ip_is_valid(task->from_addr)) {
+ ret = TRUE;
+ }
+ break;
+ case RSPAMD_LOG_SMTP_RCPT:
+ case RSPAMD_LOG_SMTP_RCPTS:
+ if (task->rcpt_envelope && task->rcpt_envelope->len > 0) {
+ ret = TRUE;
+ }
+ break;
+ case RSPAMD_LOG_MIME_RCPT:
+ case RSPAMD_LOG_MIME_RCPTS:
+ if (MESSAGE_FIELD_CHECK(task, rcpt_mime) &&
+ MESSAGE_FIELD(task, rcpt_mime)->len > 0) {
+ ret = TRUE;
+ }
+ break;
+ case RSPAMD_LOG_SMTP_FROM:
+ if (task->from_envelope) {
+ ret = TRUE;
+ }
+ break;
+ case RSPAMD_LOG_MIME_FROM:
+ if (MESSAGE_FIELD_CHECK(task, from_mime) &&
+ MESSAGE_FIELD(task, from_mime)->len > 0) {
+ ret = TRUE;
+ }
+ break;
+ case RSPAMD_LOG_FILENAME:
+ if (task->msg.fpath) {
+ ret = TRUE;
+ }
+ break;
+ case RSPAMD_LOG_FORCED_ACTION:
+ if (task->result->passthrough_result) {
+ ret = TRUE;
+ }
+ break;
+ case RSPAMD_LOG_SETTINGS_ID:
+ if (task->settings_elt) {
+ ret = TRUE;
+ }
+ break;
+ default:
+ ret = TRUE;
+ break;
+ }
+
+ return ret;
+}
+
+/*
+ * Sort by symbol's score -> name
+ */
+static gint
+rspamd_task_compare_log_sym(gconstpointer a, gconstpointer b)
+{
+ const struct rspamd_symbol_result *s1 = *(const struct rspamd_symbol_result **) a,
+ *s2 = *(const struct rspamd_symbol_result **) b;
+ gdouble w1, w2;
+
+
+ w1 = fabs(s1->score);
+ w2 = fabs(s2->score);
+
+ if (w1 == w2 && s1->name && s2->name) {
+ return strcmp(s1->name, s2->name);
+ }
+
+ return (w2 - w1) * 1000.0;
+}
+
+static gint
+rspamd_task_compare_log_group(gconstpointer a, gconstpointer b)
+{
+ const struct rspamd_symbols_group *s1 = *(const struct rspamd_symbols_group **) a,
+ *s2 = *(const struct rspamd_symbols_group **) b;
+
+ return strcmp(s1->name, s2->name);
+}
+
+
+static rspamd_ftok_t
+rspamd_task_log_metric_res(struct rspamd_task *task,
+ struct rspamd_log_format *lf)
+{
+ static gchar scorebuf[32];
+ rspamd_ftok_t res = {.begin = NULL, .len = 0};
+ struct rspamd_scan_result *mres;
+ gboolean first = TRUE;
+ rspamd_fstring_t *symbuf;
+ struct rspamd_symbol_result *sym;
+ GPtrArray *sorted_symbols;
+ struct rspamd_action *act;
+ struct rspamd_symbols_group *gr;
+ guint i, j;
+ khiter_t k;
+ guint max_log_elts = task->cfg->log_task_max_elts;
+
+ mres = task->result;
+ act = rspamd_check_action_metric(task, NULL, NULL);
+
+ if (mres != NULL) {
+ switch (lf->type) {
+ case RSPAMD_LOG_ISSPAM:
+ if (RSPAMD_TASK_IS_SKIPPED(task)) {
+ res.begin = "S";
+ }
+ else if (!(act->flags & RSPAMD_ACTION_HAM)) {
+ res.begin = "T";
+ }
+ else {
+ res.begin = "F";
+ }
+
+ res.len = 1;
+ break;
+ case RSPAMD_LOG_ACTION:
+ res.begin = act->name;
+ res.len = strlen(res.begin);
+ break;
+ case RSPAMD_LOG_SCORES:
+ res.len = rspamd_snprintf(scorebuf, sizeof(scorebuf), "%.2f/%.2f",
+ mres->score, rspamd_task_get_required_score(task, mres));
+ res.begin = scorebuf;
+ break;
+ case RSPAMD_LOG_SYMBOLS:
+ symbuf = rspamd_fstring_sized_new(128);
+ sorted_symbols = g_ptr_array_sized_new(kh_size(mres->symbols));
+
+ kh_foreach_value(mres->symbols, sym, {
+ if (!(sym->flags & RSPAMD_SYMBOL_RESULT_IGNORED)) {
+ g_ptr_array_add(sorted_symbols, (gpointer) sym);
+ }
+ });
+
+ g_ptr_array_sort(sorted_symbols, rspamd_task_compare_log_sym);
+
+ for (i = 0; i < sorted_symbols->len; i++) {
+ sym = g_ptr_array_index(sorted_symbols, i);
+
+ if (first) {
+ rspamd_printf_fstring(&symbuf, "%s", sym->name);
+ }
+ else {
+ rspamd_printf_fstring(&symbuf, ",%s", sym->name);
+ }
+
+ if (lf->flags & RSPAMD_LOG_FMT_FLAG_SYMBOLS_SCORES) {
+ rspamd_printf_fstring(&symbuf, "(%.2f)", sym->score);
+ }
+
+ if (lf->flags & RSPAMD_LOG_FMT_FLAG_SYMBOLS_PARAMS) {
+ rspamd_printf_fstring(&symbuf, "{");
+
+ if (sym->options) {
+ struct rspamd_symbol_option *opt;
+
+ j = 0;
+
+ DL_FOREACH(sym->opts_head, opt)
+ {
+ rspamd_printf_fstring(&symbuf, "%*s;",
+ (gint) opt->optlen, opt->option);
+
+ if (j >= max_log_elts && opt->next) {
+ rspamd_printf_fstring(&symbuf, "...;");
+ break;
+ }
+
+ j++;
+ }
+ }
+
+ rspamd_printf_fstring(&symbuf, "}");
+ }
+
+ first = FALSE;
+ }
+
+ g_ptr_array_free(sorted_symbols, TRUE);
+
+ rspamd_mempool_add_destructor(task->task_pool,
+ (rspamd_mempool_destruct_t) rspamd_fstring_free,
+ symbuf);
+ rspamd_mempool_notify_alloc(task->task_pool, symbuf->len);
+ res.begin = symbuf->str;
+ res.len = symbuf->len;
+ break;
+
+ case RSPAMD_LOG_GROUPS:
+ case RSPAMD_LOG_PUBLIC_GROUPS:
+
+ symbuf = rspamd_fstring_sized_new(128);
+ sorted_symbols = g_ptr_array_sized_new(kh_size(mres->sym_groups));
+
+ kh_foreach_key(mres->sym_groups, gr, {
+ if (!(gr->flags & RSPAMD_SYMBOL_GROUP_PUBLIC)) {
+ if (lf->type == RSPAMD_LOG_PUBLIC_GROUPS) {
+ continue;
+ }
+ }
+
+ g_ptr_array_add(sorted_symbols, gr);
+ });
+
+ g_ptr_array_sort(sorted_symbols, rspamd_task_compare_log_group);
+
+ for (i = 0; i < sorted_symbols->len; i++) {
+ gr = g_ptr_array_index(sorted_symbols, i);
+
+ if (first) {
+ rspamd_printf_fstring(&symbuf, "%s", gr->name);
+ }
+ else {
+ rspamd_printf_fstring(&symbuf, ",%s", gr->name);
+ }
+
+ k = kh_get(rspamd_symbols_group_hash, mres->sym_groups, gr);
+
+ rspamd_printf_fstring(&symbuf, "(%.2f)",
+ kh_value(mres->sym_groups, k));
+
+ first = FALSE;
+ }
+
+ g_ptr_array_free(sorted_symbols, TRUE);
+
+ rspamd_mempool_add_destructor(task->task_pool,
+ (rspamd_mempool_destruct_t) rspamd_fstring_free,
+ symbuf);
+ rspamd_mempool_notify_alloc(task->task_pool, symbuf->len);
+ res.begin = symbuf->str;
+ res.len = symbuf->len;
+ break;
+ default:
+ break;
+ }
+ }
+
+ return res;
+}
+
+static rspamd_fstring_t *
+rspamd_task_log_write_var(struct rspamd_task *task, rspamd_fstring_t *logbuf,
+ const rspamd_ftok_t *var, const rspamd_ftok_t *content)
+{
+ rspamd_fstring_t *res = logbuf;
+ const gchar *p, *c, *end;
+
+ if (content == NULL) {
+ /* Just output variable */
+ res = rspamd_fstring_append(res, var->begin, var->len);
+ }
+ else {
+ /* Replace $ with variable value */
+ p = content->begin;
+ c = p;
+ end = p + content->len;
+
+ while (p < end) {
+ if (*p == '$') {
+ if (p > c) {
+ res = rspamd_fstring_append(res, c, p - c);
+ }
+
+ res = rspamd_fstring_append(res, var->begin, var->len);
+ p++;
+ c = p;
+ }
+ else {
+ p++;
+ }
+ }
+
+ if (p > c) {
+ res = rspamd_fstring_append(res, c, p - c);
+ }
+ }
+
+ return res;
+}
+
+static rspamd_fstring_t *
+rspamd_task_write_ialist(struct rspamd_task *task,
+ GPtrArray *addrs, gint lim,
+ struct rspamd_log_format *lf,
+ rspamd_fstring_t *logbuf)
+{
+ rspamd_fstring_t *res = logbuf, *varbuf;
+ rspamd_ftok_t var = {.begin = NULL, .len = 0};
+ struct rspamd_email_address *addr;
+ gint i, nchars = 0, wr = 0, cur_chars;
+ gboolean has_orig = FALSE;
+ guint max_log_elts = task->cfg->log_task_max_elts;
+
+ if (addrs && lim <= 0) {
+ lim = addrs->len;
+ }
+
+ PTR_ARRAY_FOREACH(addrs, i, addr)
+ {
+ if (addr->flags & RSPAMD_EMAIL_ADDR_ORIGINAL) {
+ has_orig = TRUE;
+ break;
+ }
+ }
+
+ varbuf = rspamd_fstring_new();
+
+ PTR_ARRAY_FOREACH(addrs, i, addr)
+ {
+ if (wr >= lim) {
+ break;
+ }
+
+ if (has_orig) {
+ /* Report merely original addresses */
+ if (!(addr->flags & RSPAMD_EMAIL_ADDR_ORIGINAL)) {
+ continue;
+ }
+ }
+
+ bool last = i == lim - 1;
+
+ cur_chars = addr->addr_len;
+ varbuf = rspamd_fstring_append(varbuf, addr->addr,
+ cur_chars);
+ nchars += cur_chars;
+ wr++;
+
+ if (varbuf->len > 0 && !last) {
+ varbuf = rspamd_fstring_append(varbuf, ",", 1);
+ }
+
+ if (!last && (wr >= max_log_elts || nchars >= max_log_elts * 16)) {
+ varbuf = rspamd_fstring_append(varbuf, "...", 3);
+ break;
+ }
+ }
+
+ if (varbuf->len > 0) {
+ var.begin = varbuf->str;
+ var.len = varbuf->len;
+ res = rspamd_task_log_write_var(task, logbuf,
+ &var, (const rspamd_ftok_t *) lf->data);
+ }
+
+ rspamd_fstring_free(varbuf);
+
+ return res;
+}
+
+static rspamd_fstring_t *
+rspamd_task_write_addr_list(struct rspamd_task *task,
+ GPtrArray *addrs, gint lim,
+ struct rspamd_log_format *lf,
+ rspamd_fstring_t *logbuf)
+{
+ rspamd_fstring_t *res = logbuf, *varbuf;
+ rspamd_ftok_t var = {.begin = NULL, .len = 0};
+ struct rspamd_email_address *addr;
+ guint max_log_elts = task->cfg->log_task_max_elts;
+ guint i;
+
+ if (lim <= 0) {
+ lim = addrs->len;
+ }
+
+ varbuf = rspamd_fstring_new();
+
+ for (i = 0; i < lim; i++) {
+ addr = g_ptr_array_index(addrs, i);
+ bool last = i == lim - 1;
+
+ if (addr->addr) {
+ varbuf = rspamd_fstring_append(varbuf, addr->addr, addr->addr_len);
+ }
+
+ if (varbuf->len > 0 && !last) {
+ varbuf = rspamd_fstring_append(varbuf, ",", 1);
+ }
+
+ if (!last && i >= max_log_elts) {
+ varbuf = rspamd_fstring_append(varbuf, "...", 3);
+ break;
+ }
+ }
+
+ if (varbuf->len > 0) {
+ var.begin = varbuf->str;
+ var.len = varbuf->len;
+ res = rspamd_task_log_write_var(task, logbuf,
+ &var, (const rspamd_ftok_t *) lf->data);
+ }
+
+ rspamd_fstring_free(varbuf);
+
+ return res;
+}
+
+static rspamd_fstring_t *
+rspamd_task_log_variable(struct rspamd_task *task,
+ struct rspamd_log_format *lf, rspamd_fstring_t *logbuf)
+{
+ rspamd_fstring_t *res = logbuf;
+ rspamd_ftok_t var = {.begin = NULL, .len = 0};
+ static gchar numbuf[128];
+ static const gchar undef[] = "undef";
+
+ switch (lf->type) {
+ /* String vars */
+ case RSPAMD_LOG_MID:
+ if (MESSAGE_FIELD_CHECK(task, message_id)) {
+ var.begin = MESSAGE_FIELD(task, message_id);
+ var.len = strlen(var.begin);
+ }
+ else {
+ var.begin = undef;
+ var.len = sizeof(undef) - 1;
+ }
+ break;
+ case RSPAMD_LOG_QID:
+ if (task->queue_id) {
+ var.begin = task->queue_id;
+ var.len = strlen(var.begin);
+ }
+ else {
+ var.begin = undef;
+ var.len = sizeof(undef) - 1;
+ }
+ break;
+ case RSPAMD_LOG_USER:
+ if (task->auth_user) {
+ var.begin = task->auth_user;
+ var.len = strlen(var.begin);
+ }
+ else {
+ var.begin = undef;
+ var.len = sizeof(undef) - 1;
+ }
+ break;
+ case RSPAMD_LOG_IP:
+ if (task->from_addr && rspamd_ip_is_valid(task->from_addr)) {
+ var.begin = rspamd_inet_address_to_string(task->from_addr);
+ var.len = strlen(var.begin);
+ }
+ else {
+ var.begin = undef;
+ var.len = sizeof(undef) - 1;
+ }
+ break;
+ /* Numeric vars */
+ case RSPAMD_LOG_LEN:
+ var.len = rspamd_snprintf(numbuf, sizeof(numbuf), "%uz",
+ task->msg.len);
+ var.begin = numbuf;
+ break;
+ case RSPAMD_LOG_DNS_REQ:
+ var.len = rspamd_snprintf(numbuf, sizeof(numbuf), "%uD",
+ task->dns_requests);
+ var.begin = numbuf;
+ break;
+ case RSPAMD_LOG_TIME_REAL:
+ case RSPAMD_LOG_TIME_VIRTUAL:
+ var.begin = rspamd_log_check_time(task->task_timestamp,
+ task->time_real_finish,
+ task->cfg->clock_res);
+ var.len = strlen(var.begin);
+ break;
+ /* InternetAddress vars */
+ case RSPAMD_LOG_SMTP_FROM:
+ if (task->from_envelope) {
+ var.begin = task->from_envelope->addr;
+ var.len = task->from_envelope->addr_len;
+ }
+ break;
+ case RSPAMD_LOG_MIME_FROM:
+ if (MESSAGE_FIELD_CHECK(task, from_mime)) {
+ return rspamd_task_write_ialist(task,
+ MESSAGE_FIELD(task, from_mime),
+ 1,
+ lf,
+ logbuf);
+ }
+ break;
+ case RSPAMD_LOG_SMTP_RCPT:
+ if (task->rcpt_envelope) {
+ return rspamd_task_write_addr_list(task, task->rcpt_envelope, 1, lf,
+ logbuf);
+ }
+ break;
+ case RSPAMD_LOG_MIME_RCPT:
+ if (MESSAGE_FIELD_CHECK(task, rcpt_mime)) {
+ return rspamd_task_write_ialist(task,
+ MESSAGE_FIELD(task, rcpt_mime),
+ 1,
+ lf,
+ logbuf);
+ }
+ break;
+ case RSPAMD_LOG_SMTP_RCPTS:
+ if (task->rcpt_envelope) {
+ return rspamd_task_write_addr_list(task, task->rcpt_envelope, -1, lf,
+ logbuf);
+ }
+ break;
+ case RSPAMD_LOG_MIME_RCPTS:
+ if (MESSAGE_FIELD_CHECK(task, rcpt_mime)) {
+ return rspamd_task_write_ialist(task,
+ MESSAGE_FIELD(task, rcpt_mime),
+ -1, /* All addresses */
+ lf,
+ logbuf);
+ }
+ break;
+ case RSPAMD_LOG_DIGEST:
+ if (task->message) {
+ var.len = rspamd_snprintf(numbuf, sizeof(numbuf), "%*xs",
+ (gint) sizeof(MESSAGE_FIELD(task, digest)),
+ MESSAGE_FIELD(task, digest));
+ var.begin = numbuf;
+ }
+ else {
+ var.begin = undef;
+ var.len = sizeof(undef) - 1;
+ }
+ break;
+ case RSPAMD_LOG_FILENAME:
+ if (task->msg.fpath) {
+ var.len = strlen(task->msg.fpath);
+ var.begin = task->msg.fpath;
+ }
+ else {
+ var.begin = undef;
+ var.len = sizeof(undef) - 1;
+ }
+ break;
+ case RSPAMD_LOG_FORCED_ACTION:
+ if (task->result->passthrough_result) {
+ struct rspamd_passthrough_result *pr = task->result->passthrough_result;
+
+ if (!isnan(pr->target_score)) {
+ var.len = rspamd_snprintf(numbuf, sizeof(numbuf),
+ "%s \"%s\"; score=%.2f (set by %s)",
+ pr->action->name,
+ pr->message,
+ pr->target_score,
+ pr->module);
+ }
+ else {
+ var.len = rspamd_snprintf(numbuf, sizeof(numbuf),
+ "%s \"%s\"; score=nan (set by %s)",
+ pr->action->name,
+ pr->message,
+ pr->module);
+ }
+ var.begin = numbuf;
+ }
+ else {
+ var.begin = undef;
+ var.len = sizeof(undef) - 1;
+ }
+ break;
+ case RSPAMD_LOG_SETTINGS_ID:
+ if (task->settings_elt) {
+ var.begin = task->settings_elt->name;
+ var.len = strlen(task->settings_elt->name);
+ }
+ else {
+ var.begin = undef;
+ var.len = sizeof(undef) - 1;
+ }
+ break;
+ case RSPAMD_LOG_MEMPOOL_SIZE:
+ var.len = rspamd_snprintf(numbuf, sizeof(numbuf),
+ "%Hz",
+ rspamd_mempool_get_used_size(task->task_pool));
+ var.begin = numbuf;
+ break;
+ case RSPAMD_LOG_MEMPOOL_WASTE:
+ var.len = rspamd_snprintf(numbuf, sizeof(numbuf),
+ "%Hz",
+ rspamd_mempool_get_wasted_size(task->task_pool));
+ var.begin = numbuf;
+ break;
+ default:
+ var = rspamd_task_log_metric_res(task, lf);
+ break;
+ }
+
+ if (var.len > 0) {
+ res = rspamd_task_log_write_var(task, logbuf,
+ &var, (const rspamd_ftok_t *) lf->data);
+ }
+
+ return res;
+}
+
+void rspamd_task_write_log(struct rspamd_task *task)
+{
+ rspamd_fstring_t *logbuf;
+ struct rspamd_log_format *lf;
+ struct rspamd_task **ptask;
+ const gchar *lua_str;
+ gsize lua_str_len;
+ lua_State *L;
+
+ g_assert(task != NULL);
+
+ if (task->cfg->log_format == NULL ||
+ (task->flags & RSPAMD_TASK_FLAG_NO_LOG)) {
+ msg_debug_task("skip logging due to no log flag");
+ return;
+ }
+
+ logbuf = rspamd_fstring_sized_new(1000);
+
+ DL_FOREACH(task->cfg->log_format, lf)
+ {
+ switch (lf->type) {
+ case RSPAMD_LOG_STRING:
+ logbuf = rspamd_fstring_append(logbuf, lf->data, lf->len);
+ break;
+ case RSPAMD_LOG_LUA:
+ L = task->cfg->lua_state;
+ lua_rawgeti(L, LUA_REGISTRYINDEX, GPOINTER_TO_INT(lf->data));
+ ptask = lua_newuserdata(L, sizeof(*ptask));
+ rspamd_lua_setclass(L, "rspamd{task}", -1);
+ *ptask = task;
+
+ if (lua_pcall(L, 1, 1, 0) != 0) {
+ msg_err_task("call to log function failed: %s",
+ lua_tostring(L, -1));
+ lua_pop(L, 1);
+ }
+ else {
+ lua_str = lua_tolstring(L, -1, &lua_str_len);
+
+ if (lua_str != NULL) {
+ logbuf = rspamd_fstring_append(logbuf, lua_str, lua_str_len);
+ }
+ lua_pop(L, 1);
+ }
+ break;
+ default:
+ /* We have a variable in log format */
+ if (lf->flags & RSPAMD_LOG_FMT_FLAG_CONDITION) {
+ if (!rspamd_task_log_check_condition(task, lf)) {
+ continue;
+ }
+ }
+
+ logbuf = rspamd_task_log_variable(task, lf, logbuf);
+ break;
+ }
+ }
+
+ msg_notice_task("%V", logbuf);
+
+ rspamd_fstring_free(logbuf);
+}
+
+gdouble
+rspamd_task_get_required_score(struct rspamd_task *task, struct rspamd_scan_result *m)
+{
+ if (m == NULL) {
+ m = task->result;
+
+ if (m == NULL) {
+ return NAN;
+ }
+ }
+
+ for (guint i = m->nactions; i-- > 0;) {
+ struct rspamd_action_config *action_lim = &m->actions_config[i];
+
+
+ if (!isnan(action_lim->cur_limit) &&
+ !(action_lim->action->flags & (RSPAMD_ACTION_NO_THRESHOLD | RSPAMD_ACTION_HAM))) {
+ return m->actions_config[i].cur_limit;
+ }
+ }
+
+ return NAN;
+}
+
+rspamd_ftok_t *
+rspamd_task_get_request_header(struct rspamd_task *task,
+ const gchar *name)
+{
+ struct rspamd_request_header_chain *ret =
+ rspamd_task_get_request_header_multiple(task, name);
+
+ if (ret) {
+ return ret->hdr;
+ }
+
+ return NULL;
+}
+
+struct rspamd_request_header_chain *
+rspamd_task_get_request_header_multiple(struct rspamd_task *task,
+ const gchar *name)
+{
+ struct rspamd_request_header_chain *ret = NULL;
+ rspamd_ftok_t srch;
+ khiter_t k;
+
+ srch.begin = (gchar *) name;
+ srch.len = strlen(name);
+
+ k = kh_get(rspamd_req_headers_hash, task->request_headers,
+ &srch);
+
+ if (k != kh_end(task->request_headers)) {
+ ret = kh_value(task->request_headers, k);
+ }
+
+ return ret;
+}
+
+
+void rspamd_task_add_request_header(struct rspamd_task *task,
+ rspamd_ftok_t *name, rspamd_ftok_t *value)
+{
+
+ khiter_t k;
+ gint res;
+ struct rspamd_request_header_chain *chain, *nchain;
+
+ k = kh_put(rspamd_req_headers_hash, task->request_headers,
+ name, &res);
+
+ if (res == 0) {
+ /* Existing name */
+ nchain = rspamd_mempool_alloc(task->task_pool, sizeof(*nchain));
+ nchain->hdr = value;
+ nchain->next = NULL;
+ chain = kh_value(task->request_headers, k);
+
+ /* Slow but OK here */
+ LL_APPEND(chain, nchain);
+ }
+ else {
+ nchain = rspamd_mempool_alloc(task->task_pool, sizeof(*nchain));
+ nchain->hdr = value;
+ nchain->next = NULL;
+
+ kh_value(task->request_headers, k) = nchain;
+ }
+}
+
+
+void rspamd_task_profile_set(struct rspamd_task *task, const gchar *key,
+ gdouble value)
+{
+ GHashTable *tbl;
+ gdouble *pval;
+
+ if (key == NULL) {
+ return;
+ }
+
+ tbl = rspamd_mempool_get_variable(task->task_pool, RSPAMD_MEMPOOL_PROFILE);
+
+ if (tbl == NULL) {
+ tbl = g_hash_table_new(rspamd_str_hash, rspamd_str_equal);
+ rspamd_mempool_set_variable(task->task_pool, RSPAMD_MEMPOOL_PROFILE,
+ tbl, (rspamd_mempool_destruct_t) g_hash_table_unref);
+ }
+
+ pval = g_hash_table_lookup(tbl, key);
+
+ if (pval == NULL) {
+ pval = rspamd_mempool_alloc(task->task_pool, sizeof(*pval));
+ *pval = value;
+ g_hash_table_insert(tbl, (void *) key, pval);
+ }
+ else {
+ *pval = value;
+ }
+}
+
+gdouble *
+rspamd_task_profile_get(struct rspamd_task *task, const gchar *key)
+{
+ GHashTable *tbl;
+ gdouble *pval = NULL;
+
+ tbl = rspamd_mempool_get_variable(task->task_pool, RSPAMD_MEMPOOL_PROFILE);
+
+ if (tbl != NULL) {
+ pval = g_hash_table_lookup(tbl, key);
+ }
+
+ return pval;
+}
+
+
+gboolean
+rspamd_task_set_finish_time(struct rspamd_task *task)
+{
+ if (isnan(task->time_real_finish)) {
+ task->time_real_finish = ev_time();
+
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+const gchar *
+rspamd_task_stage_name(enum rspamd_task_stage stg)
+{
+ const gchar *ret = "unknown stage";
+
+ switch (stg) {
+ case RSPAMD_TASK_STAGE_CONNECT:
+ ret = "connect";
+ break;
+ case RSPAMD_TASK_STAGE_CONNFILTERS:
+ ret = "connection_filter";
+ break;
+ case RSPAMD_TASK_STAGE_READ_MESSAGE:
+ ret = "read_message";
+ break;
+ case RSPAMD_TASK_STAGE_PRE_FILTERS:
+ ret = "prefilters";
+ break;
+ case RSPAMD_TASK_STAGE_PROCESS_MESSAGE:
+ ret = "process_message";
+ break;
+ case RSPAMD_TASK_STAGE_FILTERS:
+ ret = "filters";
+ break;
+ case RSPAMD_TASK_STAGE_CLASSIFIERS_PRE:
+ ret = "classifiers_pre";
+ break;
+ case RSPAMD_TASK_STAGE_CLASSIFIERS:
+ ret = "classifiers";
+ break;
+ case RSPAMD_TASK_STAGE_CLASSIFIERS_POST:
+ ret = "classifiers_post";
+ break;
+ case RSPAMD_TASK_STAGE_COMPOSITES:
+ ret = "composites";
+ break;
+ case RSPAMD_TASK_STAGE_POST_FILTERS:
+ ret = "postfilters";
+ break;
+ case RSPAMD_TASK_STAGE_LEARN_PRE:
+ ret = "learn_pre";
+ break;
+ case RSPAMD_TASK_STAGE_LEARN:
+ ret = "learn";
+ break;
+ case RSPAMD_TASK_STAGE_LEARN_POST:
+ ret = "learn_post";
+ break;
+ case RSPAMD_TASK_STAGE_COMPOSITES_POST:
+ ret = "composites_post";
+ break;
+ case RSPAMD_TASK_STAGE_IDEMPOTENT:
+ ret = "idempotent";
+ break;
+ case RSPAMD_TASK_STAGE_DONE:
+ ret = "done";
+ break;
+ case RSPAMD_TASK_STAGE_REPLIED:
+ ret = "replied";
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+void rspamd_task_timeout(EV_P_ ev_timer *w, int revents)
+{
+ struct rspamd_task *task = (struct rspamd_task *) w->data;
+
+ if (!(task->processed_stages & RSPAMD_TASK_STAGE_FILTERS)) {
+ ev_now_update_if_cheap(task->event_loop);
+ msg_info_task("processing of task time out: %.1fs spent; %.1fs limit; "
+ "forced processing",
+ ev_now(task->event_loop) - task->task_timestamp,
+ w->repeat);
+
+ if (task->cfg->soft_reject_on_timeout) {
+ struct rspamd_action *action, *soft_reject;
+
+ action = rspamd_check_action_metric(task, NULL, NULL);
+
+ if (action->action_type != METRIC_ACTION_REJECT) {
+ soft_reject = rspamd_config_get_action_by_type(task->cfg,
+ METRIC_ACTION_SOFT_REJECT);
+ rspamd_add_passthrough_result(task,
+ soft_reject,
+ 0,
+ NAN,
+ "timeout processing message",
+ "task timeout",
+ 0, NULL);
+ }
+ }
+
+ ev_timer_again(EV_A_ w);
+ task->processed_stages |= RSPAMD_TASK_STAGE_FILTERS;
+ rspamd_session_cleanup(task->s, true);
+ rspamd_task_process(task, RSPAMD_TASK_PROCESS_ALL);
+ rspamd_session_pending(task->s);
+ }
+ else {
+ /* Postprocessing timeout */
+ msg_info_task("post-processing of task time out: %.1f second spent; forced processing",
+ ev_now(task->event_loop) - task->task_timestamp);
+
+ if (task->cfg->soft_reject_on_timeout) {
+ struct rspamd_action *action, *soft_reject;
+
+ action = rspamd_check_action_metric(task, NULL, NULL);
+
+ if (action->action_type != METRIC_ACTION_REJECT) {
+ soft_reject = rspamd_config_get_action_by_type(task->cfg,
+ METRIC_ACTION_SOFT_REJECT);
+ rspamd_add_passthrough_result(task,
+ soft_reject,
+ 0,
+ NAN,
+ "timeout post-processing message",
+ "task timeout",
+ 0, NULL);
+ }
+ }
+
+ ev_timer_stop(EV_A_ w);
+ task->processed_stages |= RSPAMD_TASK_STAGE_DONE;
+ rspamd_session_cleanup(task->s, true);
+ rspamd_task_process(task, RSPAMD_TASK_PROCESS_ALL);
+ rspamd_session_pending(task->s);
+ }
+}
+
+void rspamd_worker_guard_handler(EV_P_ ev_io *w, int revents)
+{
+ struct rspamd_task *task = (struct rspamd_task *) w->data;
+ gchar fake_buf[1024];
+ gssize r;
+
+ r = read(w->fd, fake_buf, sizeof(fake_buf));
+
+ if (r > 0) {
+ msg_warn_task("received extra data after task is loaded, ignoring");
+ }
+ else {
+ if (r == 0) {
+ /*
+ * Poor man approach, that might break things in case of
+ * shutdown (SHUT_WR) but sockets are so bad that there's no
+ * reliable way to distinguish between shutdown(SHUT_WR) and
+ * close.
+ */
+ if (task->cmd != CMD_CHECK_V2 && task->cfg->enable_shutdown_workaround) {
+ msg_info_task("workaround for shutdown enabled, please update "
+ "your client, this support might be removed in future");
+ shutdown(w->fd, SHUT_RD);
+ ev_io_stop(task->event_loop, &task->guard_ev);
+ }
+ else {
+ msg_err_task("the peer has closed connection unexpectedly");
+ rspamd_session_destroy(task->s);
+ }
+ }
+ else if (errno != EAGAIN) {
+ msg_err_task("the peer has closed connection unexpectedly: %s",
+ strerror(errno));
+ rspamd_session_destroy(task->s);
+ }
+ else {
+ return;
+ }
+ }
+}
diff --git a/src/libserver/task.h b/src/libserver/task.h
new file mode 100644
index 0000000..5404a11
--- /dev/null
+++ b/src/libserver/task.h
@@ -0,0 +1,392 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef TASK_H_
+#define TASK_H_
+
+#include "config.h"
+#include "libserver/http/http_connection.h"
+#include "async_session.h"
+#include "util.h"
+#include "mem_pool.h"
+#include "dns.h"
+#include "re_cache.h"
+#include "khash.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum rspamd_command {
+ CMD_SKIP = 0,
+ CMD_PING,
+ CMD_CHECK_SPAMC, /* Legacy spamassassin format */
+ CMD_CHECK_RSPAMC, /* Legacy rspamc format (like SA one) */
+ CMD_CHECK, /* Legacy check - metric json reply */
+ CMD_CHECK_V2, /* Modern check - symbols in json reply */
+};
+
+enum rspamd_task_stage {
+ RSPAMD_TASK_STAGE_CONNECT = (1u << 0u),
+ RSPAMD_TASK_STAGE_CONNFILTERS = (1u << 1u),
+ RSPAMD_TASK_STAGE_READ_MESSAGE = (1u << 2u),
+ RSPAMD_TASK_STAGE_PROCESS_MESSAGE = (1u << 3u),
+ RSPAMD_TASK_STAGE_PRE_FILTERS = (1u << 4u),
+ RSPAMD_TASK_STAGE_FILTERS = (1u << 5u),
+ RSPAMD_TASK_STAGE_CLASSIFIERS_PRE = (1u << 6u),
+ RSPAMD_TASK_STAGE_CLASSIFIERS = (1u << 7u),
+ RSPAMD_TASK_STAGE_CLASSIFIERS_POST = (1u << 8u),
+ RSPAMD_TASK_STAGE_COMPOSITES = (1u << 9u),
+ RSPAMD_TASK_STAGE_POST_FILTERS = (1u << 10u),
+ RSPAMD_TASK_STAGE_LEARN_PRE = (1u << 11u),
+ RSPAMD_TASK_STAGE_LEARN = (1u << 12u),
+ RSPAMD_TASK_STAGE_LEARN_POST = (1u << 13u),
+ RSPAMD_TASK_STAGE_COMPOSITES_POST = (1u << 14u),
+ RSPAMD_TASK_STAGE_IDEMPOTENT = (1u << 15u),
+ RSPAMD_TASK_STAGE_DONE = (1u << 16u),
+ RSPAMD_TASK_STAGE_REPLIED = (1u << 17u)
+};
+
+#define RSPAMD_TASK_PROCESS_ALL (RSPAMD_TASK_STAGE_CONNECT | \
+ RSPAMD_TASK_STAGE_CONNFILTERS | \
+ RSPAMD_TASK_STAGE_READ_MESSAGE | \
+ RSPAMD_TASK_STAGE_PRE_FILTERS | \
+ RSPAMD_TASK_STAGE_PROCESS_MESSAGE | \
+ RSPAMD_TASK_STAGE_FILTERS | \
+ RSPAMD_TASK_STAGE_CLASSIFIERS_PRE | \
+ RSPAMD_TASK_STAGE_CLASSIFIERS | \
+ RSPAMD_TASK_STAGE_CLASSIFIERS_POST | \
+ RSPAMD_TASK_STAGE_COMPOSITES | \
+ RSPAMD_TASK_STAGE_POST_FILTERS | \
+ RSPAMD_TASK_STAGE_LEARN_PRE | \
+ RSPAMD_TASK_STAGE_LEARN | \
+ RSPAMD_TASK_STAGE_LEARN_POST | \
+ RSPAMD_TASK_STAGE_COMPOSITES_POST | \
+ RSPAMD_TASK_STAGE_IDEMPOTENT | \
+ RSPAMD_TASK_STAGE_DONE)
+#define RSPAMD_TASK_PROCESS_LEARN (RSPAMD_TASK_STAGE_CONNECT | \
+ RSPAMD_TASK_STAGE_READ_MESSAGE | \
+ RSPAMD_TASK_STAGE_PROCESS_MESSAGE | \
+ RSPAMD_TASK_STAGE_CLASSIFIERS_PRE | \
+ RSPAMD_TASK_STAGE_CLASSIFIERS | \
+ RSPAMD_TASK_STAGE_CLASSIFIERS_POST | \
+ RSPAMD_TASK_STAGE_LEARN_PRE | \
+ RSPAMD_TASK_STAGE_LEARN | \
+ RSPAMD_TASK_STAGE_LEARN_POST | \
+ RSPAMD_TASK_STAGE_DONE)
+
+#define RSPAMD_TASK_FLAG_MIME (1u << 0u)
+#define RSPAMD_TASK_FLAG_SKIP_PROCESS (1u << 1u)
+#define RSPAMD_TASK_FLAG_SKIP (1u << 2u)
+#define RSPAMD_TASK_FLAG_PASS_ALL (1u << 3u)
+#define RSPAMD_TASK_FLAG_NO_LOG (1u << 4u)
+#define RSPAMD_TASK_FLAG_NO_IP (1u << 5u)
+#define RSPAMD_TASK_FLAG_PROCESSING (1u << 6u)
+#define RSPAMD_TASK_FLAG_GTUBE (1u << 7u)
+#define RSPAMD_TASK_FLAG_FILE (1u << 8u)
+#define RSPAMD_TASK_FLAG_NO_STAT (1u << 9u)
+#define RSPAMD_TASK_FLAG_UNLEARN (1u << 10u)
+#define RSPAMD_TASK_FLAG_ALREADY_LEARNED (1u << 11u)
+#define RSPAMD_TASK_FLAG_LEARN_SPAM (1u << 12u)
+#define RSPAMD_TASK_FLAG_LEARN_HAM (1u << 13u)
+#define RSPAMD_TASK_FLAG_LEARN_AUTO (1u << 14u)
+#define RSPAMD_TASK_FLAG_BROKEN_HEADERS (1u << 15u)
+#define RSPAMD_TASK_FLAG_HAS_SPAM_TOKENS (1u << 16u)
+#define RSPAMD_TASK_FLAG_HAS_HAM_TOKENS (1u << 17u)
+#define RSPAMD_TASK_FLAG_EMPTY (1u << 18u)
+#define RSPAMD_TASK_FLAG_PROFILE (1u << 19u)
+#define RSPAMD_TASK_FLAG_GREYLISTED (1u << 20u)
+#define RSPAMD_TASK_FLAG_OWN_POOL (1u << 21u)
+#define RSPAMD_TASK_FLAG_SSL (1u << 22u)
+#define RSPAMD_TASK_FLAG_BAD_UNICODE (1u << 23u)
+#define RSPAMD_TASK_FLAG_MESSAGE_REWRITE (1u << 24u)
+#define RSPAMD_TASK_FLAG_MAX_SHIFT (24u)
+
+
+/* Request has a JSON control block */
+#define RSPAMD_TASK_PROTOCOL_FLAG_HAS_CONTROL (1u << 0u)
+/* Request has been done by a local client */
+#define RSPAMD_TASK_PROTOCOL_FLAG_LOCAL_CLIENT (1u << 1u)
+/* Request has been sent via milter */
+#define RSPAMD_TASK_PROTOCOL_FLAG_MILTER (1u << 2u)
+/* Compress protocol reply */
+#define RSPAMD_TASK_PROTOCOL_FLAG_COMPRESSED (1u << 3u)
+/* Include all URLs */
+#define RSPAMD_TASK_PROTOCOL_FLAG_EXT_URLS (1u << 4u)
+/* Client allows body block (including headers in no FLAG_MILTER) */
+#define RSPAMD_TASK_PROTOCOL_FLAG_BODY_BLOCK (1u << 5u)
+/* Emit groups information */
+#define RSPAMD_TASK_PROTOCOL_FLAG_GROUPS (1u << 6u)
+#define RSPAMD_TASK_PROTOCOL_FLAG_MAX_SHIFT (6u)
+
+#define RSPAMD_TASK_IS_SKIPPED(task) (G_UNLIKELY((task)->flags & RSPAMD_TASK_FLAG_SKIP))
+#define RSPAMD_TASK_IS_SPAMC(task) (G_UNLIKELY((task)->cmd == CMD_CHECK_SPAMC))
+#define RSPAMD_TASK_IS_PROCESSED(task) (G_UNLIKELY((task)->processed_stages & RSPAMD_TASK_STAGE_DONE))
+#define RSPAMD_TASK_IS_CLASSIFIED(task) (((task)->processed_stages & RSPAMD_TASK_STAGE_CLASSIFIERS))
+#define RSPAMD_TASK_IS_EMPTY(task) (G_UNLIKELY((task)->flags & RSPAMD_TASK_FLAG_EMPTY))
+#define RSPAMD_TASK_IS_PROFILING(task) (G_UNLIKELY((task)->flags & RSPAMD_TASK_FLAG_PROFILE))
+#define RSPAMD_TASK_IS_MIME(task) (G_LIKELY((task)->flags & RSPAMD_TASK_FLAG_MIME))
+
+struct rspamd_email_address;
+struct rspamd_lang_detector;
+enum rspamd_newlines_type;
+struct rspamd_message;
+
+struct rspamd_task_data_storage {
+ const gchar *begin;
+ gsize len;
+ gchar *fpath;
+};
+
+struct rspamd_request_header_chain {
+ rspamd_ftok_t *hdr;
+ struct rspamd_request_header_chain *next;
+};
+
+__KHASH_TYPE(rspamd_req_headers_hash, rspamd_ftok_t *, struct rspamd_request_header_chain *);
+
+struct rspamd_lua_cached_entry {
+ gint ref;
+ guint id;
+};
+
+KHASH_INIT(rspamd_task_lua_cache, char *, struct rspamd_lua_cached_entry, 1, kh_str_hash_func, kh_str_hash_equal);
+
+/**
+ * Worker task structure
+ */
+struct rspamd_task {
+ struct rspamd_worker *worker; /**< pointer to worker object */
+ enum rspamd_command cmd; /**< command */
+ gint sock; /**< socket descriptor */
+ guint32 dns_requests; /**< number of DNS requests per this task */
+ guint32 flags; /**< Bit flags */
+ guint32 protocol_flags;
+ guint32 processed_stages; /**< bits of stages that are processed */
+ gchar *helo; /**< helo header value */
+ gchar *queue_id; /**< queue id if specified */
+ rspamd_inet_addr_t *from_addr; /**< from addr for a task */
+ rspamd_inet_addr_t *client_addr; /**< address of connected socket */
+ gchar *deliver_to; /**< address to deliver */
+ gchar *auth_user; /**< SMTP authenticated user */
+ const gchar *hostname; /**< hostname reported by MTA */
+ khash_t(rspamd_req_headers_hash) * request_headers; /**< HTTP headers in a request */
+ struct rspamd_task_data_storage msg; /**< message buffer */
+ struct rspamd_http_connection *http_conn; /**< HTTP server connection */
+ struct rspamd_async_session *s; /**< async session object */
+ struct rspamd_scan_result *result; /**< Metric result */
+ khash_t(rspamd_task_lua_cache) lua_cache; /**< cache of lua objects */
+ GPtrArray *tokens; /**< statistics tokens */
+ GArray *meta_words; /**< rspamd_stat_token_t produced from meta headers
+ (e.g. Subject) */
+
+ GPtrArray *rcpt_envelope; /**< array of rspamd_email_address */
+ struct rspamd_email_address *from_envelope;
+ struct rspamd_email_address *from_envelope_orig;
+
+ ucl_object_t *messages; /**< list of messages that would be reported */
+ struct rspamd_re_runtime *re_rt; /**< regexp runtime */
+ GPtrArray *stat_runtimes; /**< backend runtime */
+ struct rspamd_config *cfg; /**< pointer to config object */
+ GError *err;
+ rspamd_mempool_t *task_pool; /**< memory pool for task */
+ double time_real_finish;
+ ev_tstamp task_timestamp;
+
+ gboolean (*fin_callback)(struct rspamd_task *task, void *arg);
+ /**< callback for filters finalizing */
+ void *fin_arg; /**< argument for fin callback */
+
+ struct rspamd_dns_resolver *resolver; /**< DNS resolver */
+ struct ev_loop *event_loop; /**< Event base */
+ struct ev_timer timeout_ev; /**< Global task timeout */
+ struct ev_io guard_ev; /**< Event for input sanity guard */
+
+ gpointer symcache_runtime; /**< Opaque checkpoint data */
+ ucl_object_t *settings; /**< Settings applied to task */
+ struct rspamd_config_settings_elt *settings_elt; /**< preprocessed settings id elt */
+
+ const gchar *classifier; /**< Classifier to learn (if needed) */
+ struct rspamd_lang_detector *lang_det; /**< Languages detector */
+ struct rspamd_message *message;
+};
+
+/**
+ * Construct new task for worker
+ */
+struct rspamd_task *rspamd_task_new(struct rspamd_worker *worker,
+ struct rspamd_config *cfg,
+ rspamd_mempool_t *pool,
+ struct rspamd_lang_detector *lang_det,
+ struct ev_loop *event_loop,
+ gboolean debug_mem);
+
+/**
+ * Destroy task object and remove its IO dispatcher if it exists
+ */
+void rspamd_task_free(struct rspamd_task *task);
+
+/**
+ * Called if all filters are processed
+ * @return TRUE if session should be terminated
+ */
+gboolean rspamd_task_fin(void *arg);
+
+/**
+ * Load HTTP message with body in `msg` to an rspamd_task
+ * @param task
+ * @param msg
+ * @param start
+ * @param len
+ * @return
+ */
+gboolean rspamd_task_load_message(struct rspamd_task *task,
+ struct rspamd_http_message *msg,
+ const gchar *start, gsize len);
+
+/**
+ * Process task
+ * @param task task to process
+ * @return task has been successfully parsed and processed
+ */
+gboolean rspamd_task_process(struct rspamd_task *task, guint stages);
+
+/**
+ * Return address of sender or NULL
+ * @param task
+ * @return
+ */
+struct rspamd_email_address *rspamd_task_get_sender(struct rspamd_task *task);
+
+/**
+ * Return addresses in the following precedence:
+ * - deliver to
+ * - the first smtp recipient
+ * - the first mime recipient
+ * @param task
+ * @return
+ */
+const gchar *rspamd_task_get_principal_recipient(struct rspamd_task *task);
+
+/**
+ * Add a recipient for a task
+ * @param task task object
+ * @param rcpt string representation of recipient address
+ * @return TRUE if an address has been parsed and added
+ */
+gboolean rspamd_task_add_recipient(struct rspamd_task *task, const gchar *rcpt);
+
+/**
+ * Learn specified statfile with message in a task
+ * @param task worker's task object
+ * @param classifier classifier to learn (or NULL to learn all)
+ * @param err pointer to GError
+ * @return true if learn succeed
+ */
+gboolean rspamd_learn_task_spam(struct rspamd_task *task,
+ gboolean is_spam,
+ const gchar *classifier,
+ GError **err);
+
+/**
+ * Returns required score for a message (usually reject score)
+ * @param task
+ * @param m
+ * @return
+ */
+struct rspamd_scan_result;
+
+gdouble rspamd_task_get_required_score(struct rspamd_task *task,
+ struct rspamd_scan_result *m);
+
+/**
+ * Returns the first header as value for a header
+ * @param task
+ * @param name
+ * @return
+ */
+rspamd_ftok_t *rspamd_task_get_request_header(struct rspamd_task *task,
+ const gchar *name);
+
+/**
+ * Returns all headers with the specific name
+ * @param task
+ * @param name
+ * @return
+ */
+struct rspamd_request_header_chain *rspamd_task_get_request_header_multiple(
+ struct rspamd_task *task,
+ const gchar *name);
+
+/**
+ * Adds a new request header to task (name and value should be mapped to fstring)
+ * @param task
+ * @param name
+ * @param value
+ */
+void rspamd_task_add_request_header(struct rspamd_task *task,
+ rspamd_ftok_t *name, rspamd_ftok_t *value);
+
+/**
+ * Write log line about the specified task if needed
+ */
+void rspamd_task_write_log(struct rspamd_task *task);
+
+/**
+ * Set profiling value for a specific key
+ * @param task
+ * @param key
+ * @param value
+ */
+void rspamd_task_profile_set(struct rspamd_task *task, const gchar *key,
+ gdouble value);
+
+/**
+ * Get value for a specific profiling key
+ * @param task
+ * @param key
+ * @return
+ */
+gdouble *rspamd_task_profile_get(struct rspamd_task *task, const gchar *key);
+
+/**
+ * Sets finishing time for a task if not yet set
+ * @param task
+ * @return
+ */
+gboolean rspamd_task_set_finish_time(struct rspamd_task *task);
+
+/**
+ * Returns task processing stage name
+ * @param stg
+ * @return
+ */
+const gchar *rspamd_task_stage_name(enum rspamd_task_stage stg);
+
+/*
+ * Called on forced timeout
+ */
+void rspamd_task_timeout(EV_P_ ev_timer *w, int revents);
+
+/*
+ * Called on unexpected IO error (e.g. ECONNRESET)
+ */
+void rspamd_worker_guard_handler(EV_P_ ev_io *w, int revents);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* TASK_H_ */
diff --git a/src/libserver/url.c b/src/libserver/url.c
new file mode 100644
index 0000000..0842a1e
--- /dev/null
+++ b/src/libserver/url.c
@@ -0,0 +1,4365 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "config.h"
+#include "url.h"
+#include "util.h"
+#include "rspamd.h"
+#include "message.h"
+#include "multipattern.h"
+#include "contrib/uthash/utlist.h"
+#include "contrib/http-parser/http_parser.h"
+#include <unicode/utf8.h>
+#include <unicode/uchar.h>
+#include <unicode/usprep.h>
+#include <unicode/ucnv.h>
+
+typedef struct url_match_s {
+ const gchar *m_begin;
+ gsize m_len;
+ const gchar *pattern;
+ const gchar *prefix;
+ const gchar *newline_pos;
+ const gchar *prev_newline_pos;
+ gboolean add_prefix;
+ gchar st;
+} url_match_t;
+
+#define URL_MATCHER_FLAG_NOHTML (1u << 0u)
+#define URL_MATCHER_FLAG_TLD_MATCH (1u << 1u)
+#define URL_MATCHER_FLAG_STAR_MATCH (1u << 2u)
+#define URL_MATCHER_FLAG_REGEXP (1u << 3u)
+
+struct url_callback_data;
+
+static const struct {
+ enum rspamd_url_protocol proto;
+ const gchar *name;
+ gsize len;
+} rspamd_url_protocols[] = {
+ {.proto = PROTOCOL_FILE,
+ .name = "file",
+ .len = 4},
+ {.proto = PROTOCOL_FTP,
+ .name = "ftp",
+ .len = 3},
+ {.proto = PROTOCOL_HTTP,
+ .name = "http",
+ .len = 4},
+ {.proto = PROTOCOL_HTTPS,
+ .name = "https",
+ .len = 5},
+ {.proto = PROTOCOL_MAILTO,
+ .name = "mailto",
+ .len = 6},
+ {.proto = PROTOCOL_TELEPHONE,
+ .name = "tel",
+ .len = 3},
+ {.proto = PROTOCOL_TELEPHONE,
+ .name = "callto",
+ .len = 3},
+ {.proto = PROTOCOL_UNKNOWN,
+ .name = NULL,
+ .len = 0}};
+struct url_matcher {
+ const gchar *pattern;
+ const gchar *prefix;
+
+ gboolean (*start)(struct url_callback_data *cb,
+ const gchar *pos,
+ url_match_t *match);
+
+ gboolean (*end)(struct url_callback_data *cb,
+ const gchar *pos,
+ url_match_t *match);
+
+ gint flags;
+};
+
+static gboolean url_file_start(struct url_callback_data *cb,
+ const gchar *pos,
+ url_match_t *match);
+
+static gboolean url_file_end(struct url_callback_data *cb,
+ const gchar *pos,
+ url_match_t *match);
+
+static gboolean url_web_start(struct url_callback_data *cb,
+ const gchar *pos,
+ url_match_t *match);
+
+static gboolean url_web_end(struct url_callback_data *cb,
+ const gchar *pos,
+ url_match_t *match);
+
+static gboolean url_tld_start(struct url_callback_data *cb,
+ const gchar *pos,
+ url_match_t *match);
+
+static gboolean url_tld_end(struct url_callback_data *cb,
+ const gchar *pos,
+ url_match_t *match);
+
+static gboolean url_email_start(struct url_callback_data *cb,
+ const gchar *pos,
+ url_match_t *match);
+
+static gboolean url_email_end(struct url_callback_data *cb,
+ const gchar *pos,
+ url_match_t *match);
+
+static gboolean url_tel_start(struct url_callback_data *cb,
+ const gchar *pos,
+ url_match_t *match);
+
+static gboolean url_tel_end(struct url_callback_data *cb,
+ const gchar *pos,
+ url_match_t *match);
+
+struct url_matcher static_matchers[] = {
+ /* Common prefixes */
+ {"file://", "", url_file_start, url_file_end,
+ 0},
+ {"file:\\\\", "", url_file_start, url_file_end,
+ 0},
+ {"ftp://", "", url_web_start, url_web_end,
+ 0},
+ {"ftp:\\\\", "", url_web_start, url_web_end,
+ 0},
+ {"sftp://", "", url_web_start, url_web_end,
+ 0},
+ {"http:", "", url_web_start, url_web_end,
+ 0},
+ {"https:", "", url_web_start, url_web_end,
+ 0},
+ {"news://", "", url_web_start, url_web_end,
+ 0},
+ {"nntp://", "", url_web_start, url_web_end,
+ 0},
+ {"telnet://", "", url_web_start, url_web_end,
+ 0},
+ {"tel:", "", url_tel_start, url_tel_end,
+ 0},
+ {"webcal://", "", url_web_start, url_web_end,
+ 0},
+ {"mailto:", "", url_email_start, url_email_end,
+ 0},
+ {"callto:", "", url_tel_start, url_tel_end,
+ 0},
+ {"h323:", "", url_web_start, url_web_end,
+ 0},
+ {"sip:", "", url_web_start, url_web_end,
+ 0},
+ {"www\\.[0-9a-z]", "http://", url_web_start, url_web_end,
+ URL_MATCHER_FLAG_REGEXP},
+ {"ftp.", "ftp://", url_web_start, url_web_end,
+ 0},
+ /* Likely emails */
+ {
+ "@", "mailto://", url_email_start, url_email_end,
+ 0}};
+
+struct rspamd_url_flag_name {
+ const gchar *name;
+ gint flag;
+ gint hash;
+} url_flag_names[] = {
+ {"phished", RSPAMD_URL_FLAG_PHISHED, -1},
+ {"numeric", RSPAMD_URL_FLAG_NUMERIC, -1},
+ {"obscured", RSPAMD_URL_FLAG_OBSCURED, -1},
+ {"redirected", RSPAMD_URL_FLAG_REDIRECTED, -1},
+ {"html_displayed", RSPAMD_URL_FLAG_HTML_DISPLAYED, -1},
+ {"text", RSPAMD_URL_FLAG_FROM_TEXT, -1},
+ {"subject", RSPAMD_URL_FLAG_SUBJECT, -1},
+ {"host_encoded", RSPAMD_URL_FLAG_HOSTENCODED, -1},
+ {"schema_encoded", RSPAMD_URL_FLAG_SCHEMAENCODED, -1},
+ {"path_encoded", RSPAMD_URL_FLAG_PATHENCODED, -1},
+ {"query_encoded", RSPAMD_URL_FLAG_QUERYENCODED, -1},
+ {"missing_slashes", RSPAMD_URL_FLAG_MISSINGSLASHES, -1},
+ {"idn", RSPAMD_URL_FLAG_IDN, -1},
+ {"has_port", RSPAMD_URL_FLAG_HAS_PORT, -1},
+ {"has_user", RSPAMD_URL_FLAG_HAS_USER, -1},
+ {"schemaless", RSPAMD_URL_FLAG_SCHEMALESS, -1},
+ {"unnormalised", RSPAMD_URL_FLAG_UNNORMALISED, -1},
+ {"zw_spaces", RSPAMD_URL_FLAG_ZW_SPACES, -1},
+ {"url_displayed", RSPAMD_URL_FLAG_DISPLAY_URL, -1},
+ {"image", RSPAMD_URL_FLAG_IMAGE, -1},
+ {"query", RSPAMD_URL_FLAG_QUERY, -1},
+ {"content", RSPAMD_URL_FLAG_CONTENT, -1},
+ {"no_tld", RSPAMD_URL_FLAG_NO_TLD, -1},
+ {"truncated", RSPAMD_URL_FLAG_TRUNCATED, -1},
+ {"redirect_target", RSPAMD_URL_FLAG_REDIRECT_TARGET, -1},
+ {"invisible", RSPAMD_URL_FLAG_INVISIBLE, -1},
+ {"special", RSPAMD_URL_FLAG_SPECIAL, -1},
+};
+
+
+static inline khint_t rspamd_url_hash(struct rspamd_url *u);
+
+static inline khint_t rspamd_url_host_hash(struct rspamd_url *u);
+static inline bool rspamd_urls_cmp(struct rspamd_url *a, struct rspamd_url *b);
+static inline bool rspamd_urls_host_cmp(struct rspamd_url *a, struct rspamd_url *b);
+
+/* Hash table implementation */
+__KHASH_IMPL(rspamd_url_hash, kh_inline, struct rspamd_url *, char, false,
+ rspamd_url_hash, rspamd_urls_cmp);
+__KHASH_IMPL(rspamd_url_host_hash, kh_inline, struct rspamd_url *, char, false,
+ rspamd_url_host_hash, rspamd_urls_host_cmp);
+
+struct url_callback_data {
+ const gchar *begin;
+ gchar *url_str;
+ rspamd_mempool_t *pool;
+ gint len;
+ enum rspamd_url_find_type how;
+ gboolean prefix_added;
+ guint newline_idx;
+ GArray *matchers;
+ GPtrArray *newlines;
+ const gchar *start;
+ const gchar *fin;
+ const gchar *end;
+ const gchar *last_at;
+ url_insert_function func;
+ void *funcd;
+};
+
+struct url_match_scanner {
+ GArray *matchers_full;
+ GArray *matchers_strict;
+ struct rspamd_multipattern *search_trie_full;
+ struct rspamd_multipattern *search_trie_strict;
+ bool has_tld_file;
+};
+
+struct url_match_scanner *url_scanner = NULL;
+
+enum {
+ IS_LWSP = (1 << 0),
+ IS_DOMAIN = (1 << 1),
+ IS_URLSAFE = (1 << 2),
+ IS_MAILSAFE = (1 << 3),
+ IS_DOMAIN_END = (1 << 4)
+};
+
+static const unsigned int url_scanner_table[256] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, IS_LWSP, IS_LWSP, IS_LWSP, IS_LWSP, IS_LWSP, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, IS_LWSP /* */,
+ IS_MAILSAFE /* ! */, IS_URLSAFE | IS_DOMAIN_END | IS_MAILSAFE /* " */,
+ IS_MAILSAFE /* # */, IS_MAILSAFE /* $ */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* % */, 0 /* & */, IS_MAILSAFE /* ' */,
+ 0 /* ( */, 0 /* ) */, IS_MAILSAFE /* * */,
+ IS_MAILSAFE /* + */, IS_MAILSAFE /* , */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* - */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* . */, IS_DOMAIN_END | IS_MAILSAFE /* / */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* 0 */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* 1 */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* 2 */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* 3 */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* 4 */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* 5 */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* 6 */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* 7 */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* 8 */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* 9 */, IS_DOMAIN_END /* : */,
+ 0 /* ; */, IS_URLSAFE | IS_DOMAIN_END /* < */, 0 /* = */,
+ IS_URLSAFE | IS_DOMAIN_END /* > */, IS_DOMAIN_END /* ? */, 0 /* @ */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* A */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* B */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* C */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* D */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* E */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* F */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* G */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* H */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* I */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* J */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* K */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* L */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* M */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* N */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* O */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* P */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* Q */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* R */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* S */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* T */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* U */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* V */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* W */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* X */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* Y */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* Z */, 0 /* [ */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* \ */, 0 /* ] */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* ^ */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* _ */,
+ IS_URLSAFE | IS_DOMAIN_END /* ` */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* a */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* b */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* c */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* d */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* e */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* f */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* g */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* h */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* i */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* j */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* k */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* l */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* m */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* n */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* o */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* p */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* q */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* r */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* s */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* t */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* u */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* v */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* w */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* x */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* y */,
+ IS_URLSAFE | IS_DOMAIN | IS_MAILSAFE /* z */,
+ IS_URLSAFE | IS_DOMAIN_END | IS_MAILSAFE /* { */,
+ IS_URLSAFE | IS_DOMAIN_END | IS_MAILSAFE /* | */,
+ IS_URLSAFE | IS_DOMAIN_END | IS_MAILSAFE /* } */,
+ IS_URLSAFE | IS_DOMAIN_END | IS_MAILSAFE /* ~ */, 0, IS_URLSAFE | IS_DOMAIN,
+ IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN,
+ IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN,
+ IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN,
+ IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN,
+ IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN,
+ IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN,
+ IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN,
+ IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN,
+ IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN,
+ IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN,
+ IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN,
+ IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN,
+ IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN,
+ IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN,
+ IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN,
+ IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN,
+ IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN,
+ IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN,
+ IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN,
+ IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN,
+ IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN,
+ IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN,
+ IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN,
+ IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN,
+ IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN,
+ IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN,
+ IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN,
+ IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN,
+ IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN,
+ IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN,
+ IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN,
+ IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN,
+ IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN,
+ IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN,
+ IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN,
+ IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN,
+ IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN,
+ IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN,
+ IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN,
+ IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN,
+ IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN,
+ IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN, IS_URLSAFE | IS_DOMAIN,
+ IS_URLSAFE | IS_DOMAIN};
+
+#define is_lwsp(x) ((url_scanner_table[(guchar) (x)] & IS_LWSP) != 0)
+#define is_mailsafe(x) ((url_scanner_table[(guchar) (x)] & (IS_MAILSAFE)) != 0)
+#define is_domain(x) ((url_scanner_table[(guchar) (x)] & IS_DOMAIN) != 0)
+#define is_urlsafe(x) ((url_scanner_table[(guchar) (x)] & (IS_URLSAFE)) != 0)
+
+const gchar *
+rspamd_url_strerror(int err)
+{
+ switch (err) {
+ case URI_ERRNO_OK:
+ return "Parsing went well";
+ case URI_ERRNO_EMPTY:
+ return "The URI string was empty";
+ case URI_ERRNO_INVALID_PROTOCOL:
+ return "No protocol was found";
+ case URI_ERRNO_BAD_FORMAT:
+ return "Bad URL format";
+ case URI_ERRNO_BAD_ENCODING:
+ return "Invalid symbols encoded";
+ case URI_ERRNO_INVALID_PORT:
+ return "Port number is bad";
+ case URI_ERRNO_TLD_MISSING:
+ return "TLD part is not detected";
+ case URI_ERRNO_HOST_MISSING:
+ return "Host part is missing";
+ case URI_ERRNO_TOO_LONG:
+ return "URL is too long";
+ }
+
+ return NULL;
+}
+
+static gboolean
+rspamd_url_parse_tld_file(const gchar *fname,
+ struct url_match_scanner *scanner)
+{
+ FILE *f;
+ struct url_matcher m;
+ gchar *linebuf = NULL, *p;
+ gsize buflen = 0;
+ gssize r;
+ gint flags;
+
+ f = fopen(fname, "r");
+
+ if (f == NULL) {
+ msg_err("cannot open TLD file %s: %s", fname, strerror(errno));
+ return FALSE;
+ }
+
+ m.end = url_tld_end;
+ m.start = url_tld_start;
+ m.prefix = "http://";
+
+ while ((r = getline(&linebuf, &buflen, f)) > 0) {
+ if (linebuf[0] == '/' || g_ascii_isspace(linebuf[0])) {
+ /* Skip comment or empty line */
+ continue;
+ }
+
+ g_strchomp(linebuf);
+
+ /* TODO: add support for ! patterns */
+ if (linebuf[0] == '!') {
+ msg_debug("skip '!' patterns from parsing for now: %s", linebuf);
+ continue;
+ }
+
+ flags = URL_MATCHER_FLAG_NOHTML | URL_MATCHER_FLAG_TLD_MATCH;
+
+ if (linebuf[0] == '*') {
+ flags |= URL_MATCHER_FLAG_STAR_MATCH;
+ p = strchr(linebuf, '.');
+
+ if (p == NULL) {
+ msg_err("got bad star line, skip it: %s", linebuf);
+ continue;
+ }
+ p++;
+ }
+ else {
+ p = linebuf;
+ }
+
+ m.flags = flags;
+ rspamd_multipattern_add_pattern(url_scanner->search_trie_full, p,
+ RSPAMD_MULTIPATTERN_TLD | RSPAMD_MULTIPATTERN_ICASE | RSPAMD_MULTIPATTERN_UTF8);
+ m.pattern = rspamd_multipattern_get_pattern(url_scanner->search_trie_full,
+ rspamd_multipattern_get_npatterns(url_scanner->search_trie_full) - 1);
+
+ g_array_append_val(url_scanner->matchers_full, m);
+ }
+
+ free(linebuf);
+ fclose(f);
+
+ return TRUE;
+}
+
+static void
+rspamd_url_add_static_matchers(struct url_match_scanner *sc)
+{
+ gint n = G_N_ELEMENTS(static_matchers), i;
+
+ for (i = 0; i < n; i++) {
+ if (static_matchers[i].flags & URL_MATCHER_FLAG_REGEXP) {
+ rspamd_multipattern_add_pattern(url_scanner->search_trie_strict,
+ static_matchers[i].pattern,
+ RSPAMD_MULTIPATTERN_ICASE | RSPAMD_MULTIPATTERN_UTF8 |
+ RSPAMD_MULTIPATTERN_RE);
+ }
+ else {
+ rspamd_multipattern_add_pattern(url_scanner->search_trie_strict,
+ static_matchers[i].pattern,
+ RSPAMD_MULTIPATTERN_ICASE | RSPAMD_MULTIPATTERN_UTF8);
+ }
+ }
+
+ g_array_append_vals(sc->matchers_strict, static_matchers, n);
+
+ if (sc->matchers_full) {
+ for (i = 0; i < n; i++) {
+ if (static_matchers[i].flags & URL_MATCHER_FLAG_REGEXP) {
+ rspamd_multipattern_add_pattern(url_scanner->search_trie_full,
+ static_matchers[i].pattern,
+ RSPAMD_MULTIPATTERN_ICASE | RSPAMD_MULTIPATTERN_UTF8 |
+ RSPAMD_MULTIPATTERN_RE);
+ }
+ else {
+ rspamd_multipattern_add_pattern(url_scanner->search_trie_full,
+ static_matchers[i].pattern,
+ RSPAMD_MULTIPATTERN_ICASE | RSPAMD_MULTIPATTERN_UTF8);
+ }
+ }
+ g_array_append_vals(sc->matchers_full, static_matchers, n);
+ }
+}
+
+void rspamd_url_deinit(void)
+{
+ if (url_scanner != NULL) {
+ if (url_scanner->search_trie_full) {
+ rspamd_multipattern_destroy(url_scanner->search_trie_full);
+ g_array_free(url_scanner->matchers_full, TRUE);
+ }
+
+ rspamd_multipattern_destroy(url_scanner->search_trie_strict);
+ g_array_free(url_scanner->matchers_strict, TRUE);
+ g_free(url_scanner);
+
+ url_scanner = NULL;
+ }
+}
+
+void rspamd_url_init(const gchar *tld_file)
+{
+ GError *err = NULL;
+ gboolean ret = TRUE;
+
+ if (url_scanner != NULL) {
+ rspamd_url_deinit();
+ }
+
+ url_scanner = g_malloc(sizeof(struct url_match_scanner));
+
+ url_scanner->matchers_strict = g_array_sized_new(FALSE, TRUE,
+ sizeof(struct url_matcher), G_N_ELEMENTS(static_matchers));
+ url_scanner->search_trie_strict = rspamd_multipattern_create_sized(
+ G_N_ELEMENTS(static_matchers),
+ RSPAMD_MULTIPATTERN_ICASE | RSPAMD_MULTIPATTERN_UTF8);
+
+ if (tld_file) {
+ /* Reserve larger multipattern */
+ url_scanner->matchers_full = g_array_sized_new(FALSE, TRUE,
+ sizeof(struct url_matcher), 13000);
+ url_scanner->search_trie_full = rspamd_multipattern_create_sized(13000,
+ RSPAMD_MULTIPATTERN_ICASE | RSPAMD_MULTIPATTERN_UTF8);
+ url_scanner->has_tld_file = true;
+ }
+ else {
+ url_scanner->matchers_full = NULL;
+ url_scanner->search_trie_full = NULL;
+ url_scanner->has_tld_file = false;
+ }
+
+ rspamd_url_add_static_matchers(url_scanner);
+
+ if (tld_file != NULL) {
+ ret = rspamd_url_parse_tld_file(tld_file, url_scanner);
+ }
+
+ if (url_scanner->matchers_full && url_scanner->matchers_full->len > 1000) {
+ msg_info("start compiling of %d TLD suffixes; it might take a long time",
+ url_scanner->matchers_full->len);
+ }
+
+ if (!rspamd_multipattern_compile(url_scanner->search_trie_strict, &err)) {
+ msg_err("cannot compile url matcher static patterns, fatal error: %e", err);
+ abort();
+ }
+
+ if (url_scanner->search_trie_full) {
+ if (!rspamd_multipattern_compile(url_scanner->search_trie_full, &err)) {
+ msg_err("cannot compile tld patterns, url matching will be "
+ "incomplete: %e",
+ err);
+ g_error_free(err);
+ ret = FALSE;
+ }
+ }
+
+ if (tld_file != NULL) {
+ if (ret) {
+ msg_info("initialized %ud url match suffixes from '%s'",
+ url_scanner->matchers_full->len - url_scanner->matchers_strict->len,
+ tld_file);
+ }
+ else {
+ msg_err("failed to initialize url tld suffixes from '%s', "
+ "use %ud internal match suffixes",
+ tld_file,
+ url_scanner->matchers_strict->len);
+ }
+ }
+
+ /* Generate hashes for flags */
+ for (gint i = 0; i < G_N_ELEMENTS(url_flag_names); i++) {
+ url_flag_names[i].hash =
+ rspamd_cryptobox_fast_hash_specific(RSPAMD_CRYPTOBOX_HASHFAST_INDEPENDENT,
+ url_flag_names[i].name,
+ strlen(url_flag_names[i].name), 0);
+ }
+ /* Ensure that we have no hashes collisions O(N^2) but this array is small */
+ for (gint i = 0; i < G_N_ELEMENTS(url_flag_names) - 1; i++) {
+ for (gint j = i + 1; j < G_N_ELEMENTS(url_flag_names); j++) {
+ if (url_flag_names[i].hash == url_flag_names[j].hash) {
+ msg_err("collision: both %s and %s map to %d",
+ url_flag_names[i].name, url_flag_names[j].name,
+ url_flag_names[i].hash);
+ abort();
+ }
+ }
+ }
+}
+
+#define SET_U(u, field) \
+ do { \
+ if ((u) != NULL) { \
+ (u)->field_set |= 1 << (field); \
+ (u)->field_data[(field)].len = p - c; \
+ (u)->field_data[(field)].off = c - str; \
+ } \
+ } while (0)
+
+static bool
+is_url_start(gchar c)
+{
+ if (c == '(' ||
+ c == '{' ||
+ c == '[' ||
+ c == '<' ||
+ c == '\'') {
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static bool
+is_url_end(gchar c)
+{
+ if (c == ')' ||
+ c == '}' ||
+ c == ']' ||
+ c == '>' ||
+ c == '\'') {
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static bool
+is_domain_start(int p)
+{
+ if (g_ascii_isalnum(p) ||
+ p == '[' ||
+ p == '%' ||
+ p == '_' ||
+ (p & 0x80)) {
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static const guint max_domain_length = 253;
+static const guint max_dns_label = 63;
+static const guint max_email_user = 64;
+
+static gint
+rspamd_mailto_parse(struct http_parser_url *u,
+ const gchar *str, gsize len,
+ gchar const **end,
+ enum rspamd_url_parse_flags parse_flags, guint *flags)
+{
+ const gchar *p = str, *c = str, *last = str + len;
+ gchar t;
+ gint ret = 1;
+ enum {
+ parse_mailto,
+ parse_slash,
+ parse_slash_slash,
+ parse_semicolon,
+ parse_prefix_question,
+ parse_destination,
+ parse_equal,
+ parse_user,
+ parse_at,
+ parse_domain,
+ parse_suffix_question,
+ parse_query
+ } st = parse_mailto;
+
+ if (u != NULL) {
+ memset(u, 0, sizeof(*u));
+ }
+
+ while (p < last) {
+ t = *p;
+
+ if (p - str > max_email_user + max_domain_length + 1) {
+ goto out;
+ }
+
+ switch (st) {
+ case parse_mailto:
+ if (t == ':') {
+ st = parse_semicolon;
+ SET_U(u, UF_SCHEMA);
+ }
+ p++;
+ break;
+ case parse_semicolon:
+ if (t == '/' || t == '\\') {
+ st = parse_slash;
+ p++;
+ }
+ else {
+ *flags |= RSPAMD_URL_FLAG_MISSINGSLASHES;
+ st = parse_slash_slash;
+ }
+ break;
+ case parse_slash:
+ if (t == '/' || t == '\\') {
+ st = parse_slash_slash;
+ }
+ else {
+ goto out;
+ }
+ p++;
+ break;
+ case parse_slash_slash:
+ if (t == '?') {
+ st = parse_prefix_question;
+ p++;
+ }
+ else if (t != '/' && t != '\\') {
+ c = p;
+ st = parse_user;
+ }
+ else {
+ /* Skip multiple slashes */
+ p++;
+ }
+ break;
+ case parse_prefix_question:
+ if (t == 't') {
+ /* XXX: accept only to= */
+ st = parse_destination;
+ }
+ else {
+ goto out;
+ }
+ break;
+ case parse_destination:
+ if (t == '=') {
+ st = parse_equal;
+ }
+ p++;
+ break;
+ case parse_equal:
+ c = p;
+ st = parse_user;
+ break;
+ case parse_user:
+ if (t == '@') {
+ if (p - c == 0) {
+ goto out;
+ }
+ SET_U(u, UF_USERINFO);
+ st = parse_at;
+ }
+ else if (!is_mailsafe(t)) {
+ goto out;
+ }
+ else if (p - c > max_email_user) {
+ goto out;
+ }
+ p++;
+ break;
+ case parse_at:
+ c = p;
+ st = parse_domain;
+ break;
+ case parse_domain:
+ if (t == '?') {
+ SET_U(u, UF_HOST);
+ st = parse_suffix_question;
+ }
+ else if (!is_domain(t) && t != '.' && t != '_') {
+ goto out;
+ }
+ else if (p - c > max_domain_length) {
+ goto out;
+ }
+ p++;
+ break;
+ case parse_suffix_question:
+ c = p;
+ st = parse_query;
+ break;
+ case parse_query:
+ if (t == '#') {
+ if (p - c != 0) {
+ SET_U(u, UF_QUERY);
+ }
+ c = p + 1;
+ ret = 0;
+
+ goto out;
+ }
+ else if (!(parse_flags & RSPAMD_URL_PARSE_HREF) && is_url_end(t)) {
+ ret = 0;
+ goto out;
+ }
+ else if (is_lwsp(t)) {
+ if (!(parse_flags & RSPAMD_URL_PARSE_CHECK)) {
+ if (g_ascii_isspace(t)) {
+ ret = 0;
+ }
+ goto out;
+ }
+ else {
+ goto out;
+ }
+ }
+ p++;
+ break;
+ }
+ }
+
+ if (st == parse_domain) {
+ if (p - c != 0) {
+ SET_U(u, UF_HOST);
+ ret = 0;
+ }
+ }
+ else if (st == parse_query) {
+ if (p - c > 0) {
+ SET_U(u, UF_QUERY);
+ }
+
+ ret = 0;
+ }
+
+out:
+ if (end != NULL) {
+ *end = p;
+ }
+
+ if ((parse_flags & RSPAMD_URL_PARSE_CHECK)) {
+ return 0;
+ }
+
+ return ret;
+}
+
+static gint
+rspamd_telephone_parse(struct http_parser_url *u,
+ const gchar *str, gsize len,
+ gchar const **end,
+ enum rspamd_url_parse_flags parse_flags,
+ guint *flags)
+{
+ enum {
+ parse_protocol,
+ parse_semicolon,
+ parse_slash,
+ parse_slash_slash,
+ parse_spaces,
+ parse_plus,
+ parse_phone_start,
+ parse_phone,
+ } st = parse_protocol;
+
+ const gchar *p = str, *c = str, *last = str + len;
+ gchar t;
+ gint ret = 1, i;
+ UChar32 uc;
+
+ if (u != NULL) {
+ memset(u, 0, sizeof(*u));
+ }
+
+ while (p < last) {
+ t = *p;
+
+ if (p - str > max_email_user) {
+ goto out;
+ }
+
+ switch (st) {
+ case parse_protocol:
+ if (t == ':') {
+ st = parse_semicolon;
+ SET_U(u, UF_SCHEMA);
+ }
+ p++;
+ break;
+ case parse_semicolon:
+ if (t == '/' || t == '\\') {
+ st = parse_slash;
+ p++;
+ }
+ else {
+ st = parse_slash_slash;
+ }
+ break;
+ case parse_slash:
+ if (t == '/' || t == '\\') {
+ st = parse_slash_slash;
+ }
+ else {
+ goto out;
+ }
+ p++;
+ break;
+ case parse_slash_slash:
+ if (g_ascii_isspace(t)) {
+ st = parse_spaces;
+ p++;
+ }
+ else if (t == '+') {
+ c = p;
+ st = parse_plus;
+ }
+ else if (t == '/') {
+ /* Skip multiple slashes */
+ p++;
+ }
+ else {
+ st = parse_phone_start;
+ c = p;
+ }
+ break;
+ case parse_spaces:
+ if (t == '+') {
+ c = p;
+ st = parse_plus;
+ }
+ else if (!g_ascii_isspace(t)) {
+ st = parse_phone_start;
+ c = p;
+ }
+ else {
+ p++;
+ }
+ break;
+ case parse_plus:
+ c = p;
+ p++;
+ st = parse_phone_start;
+ break;
+ case parse_phone_start:
+ if (*p == '%' || *p == '(' || g_ascii_isdigit(*p)) {
+ st = parse_phone;
+ p++;
+ }
+ else {
+ goto out;
+ }
+ break;
+ case parse_phone:
+ i = p - str;
+ U8_NEXT(str, i, len, uc);
+ p = str + i;
+
+ if (u_isdigit(uc) || uc == '(' || uc == ')' || uc == '[' || uc == ']' || u_isspace(uc) || uc == '%') {
+ /* p is already incremented by U8_NEXT! */
+ }
+ else if (uc <= 0 || is_url_end(uc)) {
+ ret = 0;
+ goto set;
+ }
+ break;
+ }
+ }
+
+set:
+ if (st == parse_phone) {
+ if (p - c != 0) {
+ SET_U(u, UF_HOST);
+ ret = 0;
+ }
+ }
+
+out:
+ if (end != NULL) {
+ *end = p;
+ }
+
+ if ((parse_flags & RSPAMD_URL_PARSE_CHECK)) {
+ return 0;
+ }
+
+ return ret;
+}
+
+static gint
+rspamd_web_parse(struct http_parser_url *u, const gchar *str, gsize len,
+ gchar const **end,
+ enum rspamd_url_parse_flags parse_flags,
+ guint *flags)
+{
+ const gchar *p = str, *c = str, *last = str + len, *slash = NULL,
+ *password_start = NULL, *user_start = NULL;
+ gchar t = 0;
+ UChar32 uc;
+ glong pt;
+ gint ret = 1;
+ gboolean user_seen = FALSE;
+ enum {
+ parse_protocol,
+ parse_slash,
+ parse_slash_slash,
+ parse_semicolon,
+ parse_user,
+ parse_at,
+ parse_multiple_at,
+ parse_password_start,
+ parse_password,
+ parse_domain_start,
+ parse_domain,
+ parse_ipv6,
+ parse_port_password,
+ parse_port,
+ parse_suffix_slash,
+ parse_path,
+ parse_query,
+ parse_part
+ } st = parse_protocol;
+
+ if (u != NULL) {
+ memset(u, 0, sizeof(*u));
+ }
+
+ while (p < last) {
+ t = *p;
+
+ switch (st) {
+ case parse_protocol:
+ if (t == ':') {
+ st = parse_semicolon;
+ SET_U(u, UF_SCHEMA);
+ }
+ else if (!g_ascii_isalnum(t) && t != '+' && t != '-') {
+ if ((parse_flags & RSPAMD_URL_PARSE_CHECK) && p > c) {
+ /* We might have some domain, but no protocol */
+ st = parse_domain_start;
+ p = c;
+ slash = c;
+ break;
+ }
+ else {
+ goto out;
+ }
+ }
+ p++;
+ break;
+ case parse_semicolon:
+ if (t == '/' || t == '\\') {
+ st = parse_slash;
+ p++;
+ }
+ else {
+ st = parse_slash_slash;
+ *(flags) |= RSPAMD_URL_FLAG_MISSINGSLASHES;
+ }
+ break;
+ case parse_slash:
+ if (t == '/' || t == '\\') {
+ st = parse_slash_slash;
+ }
+ else {
+ goto out;
+ }
+ p++;
+ break;
+ case parse_slash_slash:
+
+ if (t != '/' && t != '\\') {
+ c = p;
+ slash = p;
+ st = parse_domain_start;
+
+ /*
+ * Unfortunately, due to brain damage of the RFC 3986 authors,
+ * we have to distinguish two possibilities here:
+ * authority = [ userinfo "@" ] host [ ":" port ]
+ * So if we have @ somewhere before hostname then we must process
+ * with the username state. Otherwise, we have to process via
+ * the hostname state. Unfortunately, there is no way to distinguish
+ * them aside of running NFA or two DFA or performing lookahead.
+ * Lookahead approach looks easier to implement.
+ */
+
+ const char *tp = p;
+ while (tp < last) {
+ if (*tp == '@') {
+ user_seen = TRUE;
+ st = parse_user;
+ break;
+ }
+ else if (*tp == '/' || *tp == '#' || *tp == '?') {
+ st = parse_domain_start;
+ break;
+ }
+
+ tp++;
+ }
+
+ if (st == parse_domain_start && *p == '[') {
+ st = parse_ipv6;
+ p++;
+ c = p;
+ }
+ }
+ else {
+ /* Skip multiple slashes */
+ p++;
+ }
+ break;
+ case parse_ipv6:
+ if (t == ']') {
+ if (p - c == 0) {
+ goto out;
+ }
+ SET_U(u, UF_HOST);
+ p++;
+
+ if (*p == ':') {
+ st = parse_port;
+ c = p + 1;
+ }
+ else if (*p == '/' || *p == '\\') {
+ st = parse_path;
+ c = p + 1;
+ }
+ else if (*p == '?') {
+ st = parse_query;
+ c = p + 1;
+ }
+ else if (*p == '#') {
+ st = parse_part;
+ c = p + 1;
+ }
+ else if (p != last) {
+ goto out;
+ }
+ }
+ else if (!g_ascii_isxdigit(t) && t != ':' && t != '.') {
+ goto out;
+ }
+ p++;
+ break;
+ case parse_user:
+ if (t == ':') {
+ if (p - c == 0) {
+ goto out;
+ }
+ user_start = c;
+ st = parse_password_start;
+ }
+ else if (t == '@') {
+ /* No password */
+ if (p - c == 0) {
+ /* We have multiple at in fact */
+ st = parse_multiple_at;
+ user_seen = TRUE;
+ *flags |= RSPAMD_URL_FLAG_OBSCURED;
+
+ continue;
+ }
+
+ SET_U(u, UF_USERINFO);
+ *flags |= RSPAMD_URL_FLAG_HAS_USER;
+ st = parse_at;
+ }
+ else if (!g_ascii_isgraph(t)) {
+ goto out;
+ }
+ else if (p - c > max_email_user) {
+ goto out;
+ }
+
+ p++;
+ break;
+ case parse_multiple_at:
+ if (t != '@') {
+ if (p - c == 0) {
+ goto out;
+ }
+
+ /* For now, we ignore all that stuff as it is bogus */
+ /* Off by one */
+ p--;
+ SET_U(u, UF_USERINFO);
+ p++;
+ *flags |= RSPAMD_URL_FLAG_HAS_USER;
+ st = parse_at;
+ }
+ else {
+ p++;
+ }
+ break;
+ case parse_password_start:
+ if (t == '@') {
+ /* Empty password */
+ SET_U(u, UF_USERINFO);
+ if (u != NULL && u->field_data[UF_USERINFO].len > 0) {
+ /* Eat semicolon */
+ u->field_data[UF_USERINFO].len--;
+ }
+ *flags |= RSPAMD_URL_FLAG_HAS_USER;
+ st = parse_at;
+ }
+ else {
+ c = p;
+ password_start = p;
+ st = parse_password;
+ }
+ p++;
+ break;
+ case parse_password:
+ if (t == '@') {
+ /* XXX: password is not stored */
+ if (u != NULL) {
+ if (u->field_data[UF_USERINFO].len == 0 && password_start && user_start && password_start > user_start + 1) {
+ *flags |= RSPAMD_URL_FLAG_HAS_USER;
+ u->field_set |= 1u << (UF_USERINFO);
+ u->field_data[UF_USERINFO].len =
+ password_start - user_start - 1;
+ u->field_data[UF_USERINFO].off =
+ user_start - str;
+ }
+ }
+ st = parse_at;
+ }
+ else if (!g_ascii_isgraph(t)) {
+ goto out;
+ }
+ else if (p - c > max_domain_length) {
+ goto out;
+ }
+ p++;
+ break;
+ case parse_at:
+ c = p;
+
+ if (t == '@') {
+ *flags |= RSPAMD_URL_FLAG_OBSCURED;
+ p++;
+ }
+ else if (t == '[') {
+ st = parse_ipv6;
+ p++;
+ c = p;
+ }
+ else {
+ st = parse_domain_start;
+ }
+ break;
+ case parse_domain_start:
+ if (is_domain_start(t)) {
+ st = parse_domain;
+ }
+ else {
+ goto out;
+ }
+ break;
+ case parse_domain:
+ if (p - c > max_domain_length) {
+ /* Too large domain */
+ goto out;
+ }
+ if (t == '/' || t == '\\' || t == ':' || t == '?' || t == '#') {
+ if (p - c == 0) {
+ goto out;
+ }
+ if (t == '/' || t == '\\') {
+ SET_U(u, UF_HOST);
+ st = parse_suffix_slash;
+ }
+ else if (t == '?') {
+ SET_U(u, UF_HOST);
+ st = parse_query;
+ c = p + 1;
+ }
+ else if (t == '#') {
+ SET_U(u, UF_HOST);
+ st = parse_part;
+ c = p + 1;
+ }
+ else if (t == ':' && !user_seen) {
+ /*
+ * Here we can have both port and password, hence we need
+ * to apply some heuristic here
+ */
+ st = parse_port_password;
+ }
+ else {
+ /*
+ * We can go only for parsing port here
+ */
+ SET_U(u, UF_HOST);
+ st = parse_port;
+ c = p + 1;
+ }
+ p++;
+ }
+ else {
+ if (is_url_end(t) || is_url_start(t)) {
+ goto set;
+ }
+ else if (*p == '@' && !user_seen) {
+ /* We need to fallback and test user */
+ p = slash;
+ user_seen = TRUE;
+ st = parse_user;
+ }
+ else if (*p != '.' && *p != '-' && *p != '_' && *p != '%') {
+ if (*p & 0x80) {
+ guint i = 0;
+
+ U8_NEXT(((const guchar *) p), i, last - p, uc);
+
+ if (uc < 0) {
+ /* Bad utf8 */
+ goto out;
+ }
+
+ if (!u_isalnum(uc)) {
+ /* Bad symbol */
+ if (IS_ZERO_WIDTH_SPACE(uc)) {
+ (*flags) |= RSPAMD_URL_FLAG_ZW_SPACES;
+ }
+ else {
+ if (!u_isgraph(uc)) {
+ if (!(parse_flags & RSPAMD_URL_PARSE_CHECK)) {
+ goto out;
+ }
+ else {
+ goto set;
+ }
+ }
+ }
+ }
+ else {
+ (*flags) |= RSPAMD_URL_FLAG_IDN;
+ }
+
+ p = p + i;
+ }
+ else if (is_urlsafe(*p)) {
+ p++;
+ }
+ else {
+ if (parse_flags & RSPAMD_URL_PARSE_HREF) {
+ /* We have to use all shit we are given here */
+ p++;
+ (*flags) |= RSPAMD_URL_FLAG_OBSCURED;
+ }
+ else {
+ if (!(parse_flags & RSPAMD_URL_PARSE_CHECK)) {
+ goto out;
+ }
+ else {
+ goto set;
+ }
+ }
+ }
+ }
+ else {
+ p++;
+ }
+ }
+ break;
+ case parse_port_password:
+ if (g_ascii_isdigit(t)) {
+ const gchar *tmp = p;
+
+ while (tmp < last) {
+ if (!g_ascii_isdigit(*tmp)) {
+ if (*tmp == '/' || *tmp == '#' || *tmp == '?' ||
+ is_url_end(*tmp) || g_ascii_isspace(*tmp)) {
+ /* Port + something */
+ st = parse_port;
+ c = slash;
+ p--;
+ SET_U(u, UF_HOST);
+ p++;
+ c = p;
+ break;
+ }
+ else {
+ /* Not a port, bad character at the end */
+ break;
+ }
+ }
+ tmp++;
+ }
+
+ if (tmp == last) {
+ /* Host + port only */
+ st = parse_port;
+ c = slash;
+ p--;
+ SET_U(u, UF_HOST);
+ p++;
+ c = p;
+ }
+
+ if (st != parse_port) {
+ /* Fallback to user:password */
+ p = slash;
+ c = slash;
+ user_seen = TRUE;
+ st = parse_user;
+ }
+ }
+ else {
+ /* Rewind back */
+ p = slash;
+ c = slash;
+ user_seen = TRUE;
+ st = parse_user;
+ }
+ break;
+ case parse_port:
+ if (t == '/' || t == '\\') {
+ pt = strtoul(c, NULL, 10);
+ if (pt == 0 || pt > 65535) {
+ goto out;
+ }
+ if (u != NULL) {
+ u->port = pt;
+ *flags |= RSPAMD_URL_FLAG_HAS_PORT;
+ }
+ st = parse_suffix_slash;
+ }
+ else if (t == '?') {
+ pt = strtoul(c, NULL, 10);
+ if (pt == 0 || pt > 65535) {
+ goto out;
+ }
+ if (u != NULL) {
+ u->port = pt;
+ *flags |= RSPAMD_URL_FLAG_HAS_PORT;
+ }
+
+ c = p + 1;
+ st = parse_query;
+ }
+ else if (t == '#') {
+ pt = strtoul(c, NULL, 10);
+ if (pt == 0 || pt > 65535) {
+ goto out;
+ }
+ if (u != NULL) {
+ u->port = pt;
+ *flags |= RSPAMD_URL_FLAG_HAS_PORT;
+ }
+
+ c = p + 1;
+ st = parse_part;
+ }
+ else if (is_url_end(t)) {
+ goto set;
+ }
+ else if (!g_ascii_isdigit(t)) {
+ if (!(parse_flags & RSPAMD_URL_PARSE_CHECK) ||
+ !g_ascii_isspace(t)) {
+ goto out;
+ }
+ else {
+ goto set;
+ }
+ }
+ p++;
+ break;
+ case parse_suffix_slash:
+ if (t != '/' && t != '\\') {
+ c = p;
+ st = parse_path;
+ }
+ else {
+ /* Skip extra slashes */
+ p++;
+ }
+ break;
+ case parse_path:
+ if (t == '?') {
+ if (p - c != 0) {
+ SET_U(u, UF_PATH);
+ }
+ c = p + 1;
+ st = parse_query;
+ }
+ else if (t == '#') {
+ /* No query, just fragment */
+ if (p - c != 0) {
+ SET_U(u, UF_PATH);
+ }
+ c = p + 1;
+ st = parse_part;
+ }
+ else if (!(parse_flags & RSPAMD_URL_PARSE_HREF) && is_url_end(t)) {
+ goto set;
+ }
+ else if (is_lwsp(t)) {
+ if (!(parse_flags & RSPAMD_URL_PARSE_CHECK)) {
+ if (g_ascii_isspace(t)) {
+ goto set;
+ }
+ goto out;
+ }
+ else {
+ goto set;
+ }
+ }
+ p++;
+ break;
+ case parse_query:
+ if (t == '#') {
+ if (p - c != 0) {
+ SET_U(u, UF_QUERY);
+ }
+ c = p + 1;
+ st = parse_part;
+ }
+ else if (!(parse_flags & RSPAMD_URL_PARSE_HREF) && is_url_end(t)) {
+ goto set;
+ }
+ else if (is_lwsp(t)) {
+ if (!(parse_flags & RSPAMD_URL_PARSE_CHECK)) {
+ if (g_ascii_isspace(t)) {
+ goto set;
+ }
+ goto out;
+ }
+ else {
+ goto set;
+ }
+ }
+ p++;
+ break;
+ case parse_part:
+ if (!(parse_flags & RSPAMD_URL_PARSE_HREF) && is_url_end(t)) {
+ goto set;
+ }
+ else if (is_lwsp(t)) {
+ if (!(parse_flags & RSPAMD_URL_PARSE_CHECK)) {
+ if (g_ascii_isspace(t)) {
+ goto set;
+ }
+ goto out;
+ }
+ else {
+ goto set;
+ }
+ }
+ p++;
+ break;
+ }
+ }
+
+set:
+ /* Parse remaining */
+ switch (st) {
+ case parse_domain:
+ if (p - c == 0 || !is_domain(*(p - 1)) || !is_domain(*c)) {
+ goto out;
+ }
+ SET_U(u, UF_HOST);
+ ret = 0;
+
+ break;
+ case parse_port:
+ pt = strtoul(c, NULL, 10);
+ if (pt == 0 || pt > 65535) {
+ goto out;
+ }
+ if (u != NULL) {
+ u->port = pt;
+ }
+
+ ret = 0;
+ break;
+ case parse_suffix_slash:
+ /* Url ends with '/' */
+ ret = 0;
+ break;
+ case parse_path:
+ if (p - c > 0) {
+ SET_U(u, UF_PATH);
+ }
+ ret = 0;
+ break;
+ case parse_query:
+ if (p - c > 0) {
+ SET_U(u, UF_QUERY);
+ }
+ ret = 0;
+ break;
+ case parse_part:
+ if (p - c > 0) {
+ SET_U(u, UF_FRAGMENT);
+ }
+ ret = 0;
+ break;
+ case parse_ipv6:
+ if (t != ']') {
+ ret = 1;
+ }
+ else {
+ /* e.g. http://[::] */
+ ret = 0;
+ }
+ break;
+ default:
+ /* Error state */
+ ret = 1;
+ break;
+ }
+out:
+ if (end != NULL) {
+ *end = p;
+ }
+
+ return ret;
+}
+
+#undef SET_U
+
+static gint
+rspamd_tld_trie_callback(struct rspamd_multipattern *mp,
+ guint strnum,
+ gint match_start,
+ gint match_pos,
+ const gchar *text,
+ gsize len,
+ void *context)
+{
+ struct url_matcher *matcher;
+ const gchar *start, *pos, *p;
+ struct rspamd_url *url = context;
+ gint ndots;
+
+ matcher = &g_array_index(url_scanner->matchers_full, struct url_matcher,
+ strnum);
+ ndots = 1;
+
+ if (matcher->flags & URL_MATCHER_FLAG_STAR_MATCH) {
+ /* Skip one more tld component */
+ ndots++;
+ }
+
+ pos = text + match_start;
+ p = pos - 1;
+ start = rspamd_url_host_unsafe(url);
+
+ if (*pos != '.' || match_pos != (gint) url->hostlen) {
+ /* Something weird has been found */
+ if (match_pos == (gint) url->hostlen - 1) {
+ pos = rspamd_url_host_unsafe(url) + match_pos;
+ if (*pos == '.') {
+ /* This is dot at the end of domain */
+ url->hostlen--;
+ }
+ else {
+ return 0;
+ }
+ }
+ else {
+ return 0;
+ }
+ }
+
+ /* Now we need to find top level domain */
+ pos = start;
+ while (p >= start && ndots > 0) {
+ if (*p == '.') {
+ ndots--;
+ pos = p + 1;
+ }
+ else {
+ pos = p;
+ }
+
+ p--;
+ }
+
+ if ((ndots == 0 || p == start - 1) &&
+ url->tldlen < rspamd_url_host_unsafe(url) + url->hostlen - pos) {
+ url->tldshift = (pos - url->string);
+ url->tldlen = rspamd_url_host_unsafe(url) + url->hostlen - pos;
+ }
+
+ return 0;
+}
+
+static void
+rspamd_url_regen_from_inet_addr(struct rspamd_url *uri, const void *addr, int af,
+ rspamd_mempool_t *pool)
+{
+ gchar *strbuf, *p;
+ const gchar *start_offset;
+ gsize slen = uri->urllen - uri->hostlen;
+ goffset r = 0;
+
+ if (af == AF_INET) {
+ slen += INET_ADDRSTRLEN;
+ }
+ else {
+ slen += INET6_ADDRSTRLEN;
+ }
+
+ if (uri->flags & RSPAMD_URL_FLAG_HAS_PORT) {
+ slen += sizeof("65535") - 1;
+ }
+
+ /* Allocate new string to build it from IP */
+ strbuf = rspamd_mempool_alloc(pool, slen + 1);
+ r += rspamd_snprintf(strbuf + r, slen - r, "%*s",
+ (gint) (uri->hostshift),
+ uri->string);
+
+ uri->hostshift = r;
+ uri->tldshift = r;
+ start_offset = strbuf + r;
+ inet_ntop(af, addr, strbuf + r, slen - r + 1);
+ uri->hostlen = strlen(start_offset);
+ r += uri->hostlen;
+ uri->tldlen = uri->hostlen;
+ uri->flags |= RSPAMD_URL_FLAG_NUMERIC;
+
+ /* Reconstruct URL */
+ if (uri->flags & RSPAMD_URL_FLAG_HAS_PORT && uri->ext) {
+ p = strbuf + r;
+ start_offset = p + 1;
+ r += rspamd_snprintf(strbuf + r, slen - r, ":%ud",
+ (unsigned int) uri->ext->port);
+ }
+ if (uri->datalen > 0) {
+ p = strbuf + r;
+ start_offset = p + 1;
+ r += rspamd_snprintf(strbuf + r, slen - r, "/%*s",
+ (gint) uri->datalen,
+ rspamd_url_data_unsafe(uri));
+ uri->datashift = start_offset - strbuf;
+ }
+ else {
+ /* Add trailing slash if needed */
+ if (uri->hostlen + uri->hostshift < uri->urllen &&
+ *(rspamd_url_host_unsafe(uri) + uri->hostlen) == '/') {
+ r += rspamd_snprintf(strbuf + r, slen - r, "/");
+ }
+ }
+
+ if (uri->querylen > 0) {
+ p = strbuf + r;
+ start_offset = p + 1;
+ r += rspamd_snprintf(strbuf + r, slen - r, "?%*s",
+ (gint) uri->querylen,
+ rspamd_url_query_unsafe(uri));
+ uri->queryshift = start_offset - strbuf;
+ }
+ if (uri->fragmentlen > 0) {
+ p = strbuf + r;
+ start_offset = p + 1;
+ r += rspamd_snprintf(strbuf + r, slen - r, "#%*s",
+ (gint) uri->fragmentlen,
+ rspamd_url_fragment_unsafe(uri));
+ uri->fragmentshift = start_offset - strbuf;
+ }
+
+ uri->string = strbuf;
+ uri->urllen = r;
+}
+
+static gboolean
+rspamd_url_maybe_regenerate_from_ip(struct rspamd_url *uri, rspamd_mempool_t *pool)
+{
+ const gchar *p, *end, *c;
+ gchar *errstr;
+ struct in_addr in4;
+ struct in6_addr in6;
+ gboolean ret = FALSE, check_num = TRUE;
+ guint32 n, dots, t = 0, i = 0, shift, nshift;
+
+ p = rspamd_url_host_unsafe(uri);
+ end = p + uri->hostlen;
+
+ if (*p == '[' && *(end - 1) == ']') {
+ p++;
+ end--;
+ }
+
+ while (*(end - 1) == '.' && end > p) {
+ end--;
+ }
+
+ if (end - p == 0 || end - p > INET6_ADDRSTRLEN) {
+ return FALSE;
+ }
+
+ if (rspamd_str_has_8bit(p, end - p)) {
+ return FALSE;
+ }
+
+ if (rspamd_parse_inet_address_ip4(p, end - p, &in4)) {
+ rspamd_url_regen_from_inet_addr(uri, &in4, AF_INET, pool);
+ ret = TRUE;
+ }
+ else if (rspamd_parse_inet_address_ip6(p, end - p, &in6)) {
+ rspamd_url_regen_from_inet_addr(uri, &in6, AF_INET6, pool);
+ ret = TRUE;
+ }
+ else {
+ /* Heuristics for broken urls */
+ gchar buf[INET6_ADDRSTRLEN + 1];
+ /* Try also numeric notation */
+ c = p;
+ n = 0;
+ dots = 0;
+ shift = 0;
+
+ while (p <= end && check_num) {
+ if (shift < 32 &&
+ ((*p == '.' && dots < 3) || (p == end && dots <= 3))) {
+ if (p - c + 1 >= (gint) sizeof(buf)) {
+ msg_debug_pool("invalid numeric url %*.s...: too long",
+ INET6_ADDRSTRLEN, c);
+ return FALSE;
+ }
+
+ rspamd_strlcpy(buf, c, p - c + 1);
+ c = p + 1;
+
+ if (p < end && *p == '.') {
+ dots++;
+ }
+
+ glong long_n = strtol(buf, &errstr, 0);
+
+ if ((errstr == NULL || *errstr == '\0') && long_n >= 0) {
+
+ t = long_n; /* Truncate as windows does */
+ /*
+ * Even if we have zero, we need to shift by 1 octet
+ */
+ nshift = (t == 0 ? shift + 8 : shift);
+
+ /*
+ * Here we count number of octets encoded in this element
+ */
+ for (i = 0; i < 4; i++) {
+ if ((t >> (8 * i)) > 0) {
+ nshift += 8;
+ }
+ else {
+ break;
+ }
+ }
+ /*
+ * Here we need to find the proper shift of the previous
+ * components, so we check possible cases:
+ * 1) 1 octet - just use it applying shift
+ * 2) 2 octets - convert to big endian 16 bit number
+ * 3) 3 octets - convert to big endian 24 bit number
+ * 4) 4 octets - convert to big endian 32 bit number
+ */
+ switch (i) {
+ case 4:
+ t = GUINT32_TO_BE(t);
+ break;
+ case 3:
+ t = (GUINT32_TO_BE(t & 0xFFFFFFU)) >> 8;
+ break;
+ case 2:
+ t = GUINT16_TO_BE(t & 0xFFFFU);
+ break;
+ default:
+ t = t & 0xFF;
+ break;
+ }
+
+ if (p != end) {
+ n |= t << shift;
+
+ shift = nshift;
+ }
+ }
+ else {
+ check_num = FALSE;
+ }
+ }
+
+ p++;
+ }
+
+ /* The last component should be last according to url normalization:
+ * 192.168.1 -> 192.168.0.1
+ * 192 -> 0.0.0.192
+ * 192.168 -> 192.0.0.168
+ */
+ shift = 8 * (4 - i);
+
+ if (shift < 32) {
+ n |= t << shift;
+ }
+
+ if (check_num) {
+ if (dots <= 4) {
+ memcpy(&in4, &n, sizeof(in4));
+ rspamd_url_regen_from_inet_addr(uri, &in4, AF_INET, pool);
+ uri->flags |= RSPAMD_URL_FLAG_OBSCURED;
+ ret = TRUE;
+ }
+ else if (end - c > (gint) sizeof(buf) - 1) {
+ rspamd_strlcpy(buf, c, end - c + 1);
+
+ if (inet_pton(AF_INET6, buf, &in6) == 1) {
+ rspamd_url_regen_from_inet_addr(uri, &in6, AF_INET6, pool);
+ uri->flags |= RSPAMD_URL_FLAG_OBSCURED;
+ ret = TRUE;
+ }
+ }
+ }
+ }
+
+ return ret;
+}
+
+static void
+rspamd_url_shift(struct rspamd_url *uri, gsize nlen,
+ enum http_parser_url_fields field)
+{
+ guint old_shift, shift = 0;
+ gint remain;
+
+ /* Shift remaining data */
+ switch (field) {
+ case UF_SCHEMA:
+ if (nlen >= uri->protocollen) {
+ return;
+ }
+ else {
+ shift = uri->protocollen - nlen;
+ }
+
+ old_shift = uri->protocollen;
+ uri->protocollen -= shift;
+ remain = uri->urllen - uri->protocollen;
+ g_assert(remain >= 0);
+ memmove(uri->string + uri->protocollen, uri->string + old_shift,
+ remain);
+ uri->urllen -= shift;
+ uri->flags |= RSPAMD_URL_FLAG_SCHEMAENCODED;
+ break;
+ case UF_HOST:
+ if (nlen >= uri->hostlen) {
+ return;
+ }
+ else {
+ shift = uri->hostlen - nlen;
+ }
+
+ old_shift = uri->hostlen;
+ uri->hostlen -= shift;
+ remain = (uri->urllen - (uri->hostshift)) - old_shift;
+ g_assert(remain >= 0);
+ memmove(rspamd_url_host_unsafe(uri) + uri->hostlen,
+ rspamd_url_host_unsafe(uri) + old_shift,
+ remain);
+ uri->urllen -= shift;
+ uri->flags |= RSPAMD_URL_FLAG_HOSTENCODED;
+ break;
+ case UF_PATH:
+ if (nlen >= uri->datalen) {
+ return;
+ }
+ else {
+ shift = uri->datalen - nlen;
+ }
+
+ old_shift = uri->datalen;
+ uri->datalen -= shift;
+ remain = (uri->urllen - (uri->datashift)) - old_shift;
+ g_assert(remain >= 0);
+ memmove(rspamd_url_data_unsafe(uri) + uri->datalen,
+ rspamd_url_data_unsafe(uri) + old_shift,
+ remain);
+ uri->urllen -= shift;
+ uri->flags |= RSPAMD_URL_FLAG_PATHENCODED;
+ break;
+ case UF_QUERY:
+ if (nlen >= uri->querylen) {
+ return;
+ }
+ else {
+ shift = uri->querylen - nlen;
+ }
+
+ old_shift = uri->querylen;
+ uri->querylen -= shift;
+ remain = (uri->urllen - (uri->queryshift)) - old_shift;
+ g_assert(remain >= 0);
+ memmove(rspamd_url_query_unsafe(uri) + uri->querylen,
+ rspamd_url_query_unsafe(uri) + old_shift,
+ remain);
+ uri->urllen -= shift;
+ uri->flags |= RSPAMD_URL_FLAG_QUERYENCODED;
+ break;
+ case UF_FRAGMENT:
+ if (nlen >= uri->fragmentlen) {
+ return;
+ }
+ else {
+ shift = uri->fragmentlen - nlen;
+ }
+
+ uri->fragmentlen -= shift;
+ uri->urllen -= shift;
+ break;
+ default:
+ break;
+ }
+
+ /* Now adjust lengths and offsets */
+ switch (field) {
+ case UF_SCHEMA:
+ if (uri->userlen > 0) {
+ uri->usershift -= shift;
+ }
+ if (uri->hostlen > 0) {
+ uri->hostshift -= shift;
+ }
+ /* Go forward */
+ /* FALLTHRU */
+ case UF_HOST:
+ if (uri->datalen > 0) {
+ uri->datashift -= shift;
+ }
+ /* Go forward */
+ /* FALLTHRU */
+ case UF_PATH:
+ if (uri->querylen > 0) {
+ uri->queryshift -= shift;
+ }
+ /* Go forward */
+ /* FALLTHRU */
+ case UF_QUERY:
+ if (uri->fragmentlen > 0) {
+ uri->fragmentshift -= shift;
+ }
+ /* Go forward */
+ /* FALLTHRU */
+ case UF_FRAGMENT:
+ default:
+ break;
+ }
+}
+
+static void
+rspamd_telephone_normalise_inplace(struct rspamd_url *uri)
+{
+ gchar *t, *h, *end;
+ gint i = 0, w, orig_len;
+ UChar32 uc;
+
+ t = rspamd_url_host_unsafe(uri);
+ h = t;
+ end = t + uri->hostlen;
+ orig_len = uri->hostlen;
+
+ if (*h == '+') {
+ h++;
+ t++;
+ }
+
+ while (h < end) {
+ i = 0;
+ U8_NEXT(h, i, end - h, uc);
+
+ if (u_isdigit(uc)) {
+ w = 0;
+ U8_APPEND_UNSAFE(t, w, uc);
+ t += w;
+ }
+
+ h += i;
+ }
+
+ uri->hostlen = t - rspamd_url_host_unsafe(uri);
+ uri->urllen -= (orig_len - uri->hostlen);
+}
+
+static inline bool
+is_idna_label_dot(UChar ch)
+{
+ switch (ch) {
+ case 0x3002:
+ case 0xFF0E:
+ case 0xFF61:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/*
+ * All credits for this investigation should go to
+ * Dr. Hajime Shimada and Mr. Shirakura as they have revealed this case in their
+ * research.
+ */
+
+/*
+ * This function replaces unsafe IDNA dots in host labels. Unfortunately,
+ * IDNA extends dot definition from '.' to multiple other characters that
+ * should be treated equally.
+ * This function replaces such dots and returns `true` if these dots are found.
+ * In this case, it should be treated as obfuscation attempt.
+ */
+static bool
+rspamd_url_remove_dots(struct rspamd_url *uri)
+{
+ const gchar *hstart = rspamd_url_host_unsafe(uri);
+ gchar *t;
+ UChar32 uc;
+ gint i = 0, hlen;
+ bool ret = false;
+
+ if (uri->hostlen == 0) {
+ return false;
+ }
+
+ hlen = uri->hostlen;
+ t = rspamd_url_host_unsafe(uri);
+
+ while (i < hlen) {
+ gint prev_i = i;
+ U8_NEXT(hstart, i, hlen, uc);
+
+ if (is_idna_label_dot(uc)) {
+ *t++ = '.';
+ ret = true;
+ }
+ else {
+ if (ret) {
+ /* We have to shift the remaining stuff */
+ while (prev_i < i) {
+ *t++ = *(hstart + prev_i);
+ prev_i++;
+ }
+ }
+ else {
+ t += (i - prev_i);
+ }
+ }
+ }
+
+ if (ret) {
+ rspamd_url_shift(uri, t - hstart, UF_HOST);
+ }
+
+ return ret;
+}
+
+enum uri_errno
+rspamd_url_parse(struct rspamd_url *uri,
+ gchar *uristring, gsize len,
+ rspamd_mempool_t *pool,
+ enum rspamd_url_parse_flags parse_flags)
+{
+ struct http_parser_url u;
+ gchar *p;
+ const gchar *end;
+ guint complen, ret, flags = 0;
+ gsize unquoted_len = 0;
+
+ memset(uri, 0, sizeof(*uri));
+ memset(&u, 0, sizeof(u));
+ uri->count = 1;
+ /* Undefine order */
+ uri->order = -1;
+ uri->part_order = -1;
+
+ if (*uristring == '\0') {
+ return URI_ERRNO_EMPTY;
+ }
+
+ if (len >= G_MAXUINT16 / 2) {
+ flags |= RSPAMD_URL_FLAG_TRUNCATED;
+ len = G_MAXUINT16 / 2;
+ }
+
+ p = uristring;
+ uri->protocol = PROTOCOL_UNKNOWN;
+
+ if (len > sizeof("mailto:") - 1) {
+ /* For mailto: urls we also need to add slashes to make it a valid URL */
+ if (g_ascii_strncasecmp(p, "mailto:", sizeof("mailto:") - 1) == 0) {
+ ret = rspamd_mailto_parse(&u, uristring, len, &end, parse_flags,
+ &flags);
+ }
+ else if (g_ascii_strncasecmp(p, "tel:", sizeof("tel:") - 1) == 0 ||
+ g_ascii_strncasecmp(p, "callto:", sizeof("callto:") - 1) == 0) {
+ ret = rspamd_telephone_parse(&u, uristring, len, &end, parse_flags,
+ &flags);
+ uri->protocol = PROTOCOL_TELEPHONE;
+ }
+ else {
+ ret = rspamd_web_parse(&u, uristring, len, &end, parse_flags,
+ &flags);
+ }
+ }
+ else {
+ ret = rspamd_web_parse(&u, uristring, len, &end, parse_flags, &flags);
+ }
+
+ if (ret != 0) {
+ return URI_ERRNO_BAD_FORMAT;
+ }
+
+ if (end > uristring && (guint) (end - uristring) != len) {
+ len = end - uristring;
+ }
+
+ uri->raw = p;
+ uri->rawlen = len;
+
+ if (flags & RSPAMD_URL_FLAG_MISSINGSLASHES) {
+ len += 2;
+ uri->string = rspamd_mempool_alloc(pool, len + 1);
+ memcpy(uri->string, p, u.field_data[UF_SCHEMA].len);
+ memcpy(uri->string + u.field_data[UF_SCHEMA].len, "://", 3);
+ rspamd_strlcpy(uri->string + u.field_data[UF_SCHEMA].len + 3,
+ p + u.field_data[UF_SCHEMA].len + 1,
+ len - 2 - u.field_data[UF_SCHEMA].len);
+ /* Compensate slashes added */
+ for (int i = UF_SCHEMA + 1; i < UF_MAX; i++) {
+ if (u.field_set & (1 << i)) {
+ u.field_data[i].off += 2;
+ }
+ }
+ }
+ else {
+ uri->string = rspamd_mempool_alloc(pool, len + 1);
+ rspamd_strlcpy(uri->string, p, len + 1);
+ }
+
+ uri->urllen = len;
+ uri->flags = flags;
+
+ for (guint i = 0; i < UF_MAX; i++) {
+ if (u.field_set & (1 << i)) {
+ guint shift = u.field_data[i].off;
+ complen = u.field_data[i].len;
+
+ if (complen >= G_MAXUINT16) {
+ /* Too large component length */
+ return URI_ERRNO_BAD_FORMAT;
+ }
+
+ switch (i) {
+ case UF_SCHEMA:
+ uri->protocollen = u.field_data[i].len;
+ break;
+ case UF_HOST:
+ uri->hostshift = shift;
+ uri->hostlen = complen;
+ break;
+ case UF_PATH:
+ uri->datashift = shift;
+ uri->datalen = complen;
+ break;
+ case UF_QUERY:
+ uri->queryshift = shift;
+ uri->querylen = complen;
+ break;
+ case UF_FRAGMENT:
+ uri->fragmentshift = shift;
+ uri->fragmentlen = complen;
+ break;
+ case UF_USERINFO:
+ uri->usershift = shift;
+ uri->userlen = complen;
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
+ /* Port is 'special' in case of url_parser as it is not a part of UF_* macro logic */
+ if (u.port != 0) {
+ if (!uri->ext) {
+ uri->ext = rspamd_mempool_alloc0_type(pool, struct rspamd_url_ext);
+ }
+ uri->flags |= RSPAMD_URL_FLAG_HAS_PORT;
+ uri->ext->port = u.port;
+ }
+
+ if (!uri->hostlen) {
+ return URI_ERRNO_HOST_MISSING;
+ }
+
+ /* Now decode url symbols */
+ unquoted_len = rspamd_url_decode(uri->string,
+ uri->string,
+ uri->protocollen);
+ rspamd_url_shift(uri, unquoted_len, UF_SCHEMA);
+ unquoted_len = rspamd_url_decode(rspamd_url_host_unsafe(uri),
+ rspamd_url_host_unsafe(uri), uri->hostlen);
+
+ rspamd_url_normalise_propagate_flags(pool, rspamd_url_host_unsafe(uri),
+ &unquoted_len, uri->flags);
+
+ rspamd_url_shift(uri, unquoted_len, UF_HOST);
+
+ if (rspamd_url_remove_dots(uri)) {
+ uri->flags |= RSPAMD_URL_FLAG_OBSCURED;
+ }
+
+ if (uri->protocol & (PROTOCOL_HTTP | PROTOCOL_HTTPS | PROTOCOL_MAILTO | PROTOCOL_FTP | PROTOCOL_FILE)) {
+ /* Ensure that hostname starts with something sane (exclude numeric urls) */
+ const gchar *host = rspamd_url_host_unsafe(uri);
+
+ if (!(is_domain_start(host[0]) || host[0] == ':')) {
+ return URI_ERRNO_BAD_FORMAT;
+ }
+ }
+
+ /* Apply nameprep algorithm */
+ static UStringPrepProfile *nameprep = NULL;
+ UErrorCode uc_err = U_ZERO_ERROR;
+
+ if (nameprep == NULL) {
+ /* Open and cache profile */
+ nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, &uc_err);
+
+ g_assert(U_SUCCESS(uc_err));
+ }
+
+ UChar *utf16_hostname, *norm_utf16;
+ gint32 utf16_len, norm_utf16_len, norm_utf8_len;
+ UParseError parse_error;
+
+ utf16_hostname = rspamd_mempool_alloc(pool, uri->hostlen * sizeof(UChar));
+ struct UConverter *utf8_conv = rspamd_get_utf8_converter();
+
+ utf16_len = ucnv_toUChars(utf8_conv, utf16_hostname, uri->hostlen,
+ rspamd_url_host_unsafe(uri), uri->hostlen, &uc_err);
+
+ if (!U_SUCCESS(uc_err)) {
+
+ return URI_ERRNO_BAD_FORMAT;
+ }
+
+ norm_utf16 = rspamd_mempool_alloc(pool, utf16_len * sizeof(UChar));
+ norm_utf16_len = usprep_prepare(nameprep, utf16_hostname, utf16_len,
+ norm_utf16, utf16_len, USPREP_DEFAULT, &parse_error, &uc_err);
+
+ if (!U_SUCCESS(uc_err)) {
+
+ return URI_ERRNO_BAD_FORMAT;
+ }
+
+ /* Convert back to utf8, sigh... */
+ norm_utf8_len = ucnv_fromUChars(utf8_conv,
+ rspamd_url_host_unsafe(uri), uri->hostlen,
+ norm_utf16, norm_utf16_len, &uc_err);
+
+ if (!U_SUCCESS(uc_err)) {
+
+ return URI_ERRNO_BAD_FORMAT;
+ }
+
+ /* Final shift of lengths */
+ rspamd_url_shift(uri, norm_utf8_len, UF_HOST);
+
+ /* Process data part */
+ if (uri->datalen) {
+ unquoted_len = rspamd_url_decode(rspamd_url_data_unsafe(uri),
+ rspamd_url_data_unsafe(uri), uri->datalen);
+
+ rspamd_url_normalise_propagate_flags(pool, rspamd_url_data_unsafe(uri),
+ &unquoted_len, uri->flags);
+
+ rspamd_url_shift(uri, unquoted_len, UF_PATH);
+ /* We now normalize path */
+ rspamd_normalize_path_inplace(rspamd_url_data_unsafe(uri),
+ uri->datalen, &unquoted_len);
+ rspamd_url_shift(uri, unquoted_len, UF_PATH);
+ }
+
+ if (uri->querylen) {
+ unquoted_len = rspamd_url_decode(rspamd_url_query_unsafe(uri),
+ rspamd_url_query_unsafe(uri),
+ uri->querylen);
+
+ rspamd_url_normalise_propagate_flags(pool, rspamd_url_query_unsafe(uri),
+ &unquoted_len, uri->flags);
+ rspamd_url_shift(uri, unquoted_len, UF_QUERY);
+ }
+
+ if (uri->fragmentlen) {
+ unquoted_len = rspamd_url_decode(rspamd_url_fragment_unsafe(uri),
+ rspamd_url_fragment_unsafe(uri),
+ uri->fragmentlen);
+
+ rspamd_url_normalise_propagate_flags(pool, rspamd_url_fragment_unsafe(uri),
+ &unquoted_len, uri->flags);
+ rspamd_url_shift(uri, unquoted_len, UF_FRAGMENT);
+ }
+
+ rspamd_str_lc(uri->string, uri->protocollen);
+ unquoted_len = rspamd_str_lc_utf8(rspamd_url_host_unsafe(uri), uri->hostlen);
+ rspamd_url_shift(uri, unquoted_len, UF_HOST);
+
+ if (uri->protocol == PROTOCOL_UNKNOWN) {
+ for (int i = 0; i < G_N_ELEMENTS(rspamd_url_protocols); i++) {
+ if (uri->protocollen == rspamd_url_protocols[i].len) {
+ if (memcmp(uri->string,
+ rspamd_url_protocols[i].name, uri->protocollen) == 0) {
+ uri->protocol = rspamd_url_protocols[i].proto;
+ break;
+ }
+ }
+ }
+ }
+
+ if (uri->protocol & (PROTOCOL_HTTP | PROTOCOL_HTTPS | PROTOCOL_MAILTO | PROTOCOL_FTP | PROTOCOL_FILE)) {
+ /* Find TLD part */
+ if (url_scanner->search_trie_full) {
+ rspamd_multipattern_lookup(url_scanner->search_trie_full,
+ rspamd_url_host_unsafe(uri), uri->hostlen,
+ rspamd_tld_trie_callback, uri, NULL);
+ }
+
+ if (uri->tldlen == 0) {
+ /*
+ * If we have not detected eSLD, but there are no dots in the hostname,
+ * then we should treat the whole hostname as eSLD - a rule of thumb
+ *
+ * We also check that a hostname ends with a permitted character, and all characters are forming
+ * DNS label. We also need to check for a numeric IP within this check.
+ */
+ const char *dot_pos = memchr(rspamd_url_host_unsafe(uri), '.', uri->hostlen);
+ bool is_whole_hostname_tld = false;
+
+ if (uri->hostlen > 0 && (dot_pos == NULL || dot_pos == rspamd_url_host_unsafe(uri) + uri->hostlen - 1)) {
+ bool all_chars_domain = true;
+
+ for (int i = 0; i < uri->hostlen; i++) {
+ if (!is_domain(rspamd_url_host_unsafe(uri)[i])) {
+ all_chars_domain = false;
+ break;
+ }
+ }
+
+ char last_c = rspamd_url_host_unsafe(uri)[uri->hostlen - 1];
+
+ if (all_chars_domain) {
+ /* Also check the last character to be either a dot or alphanumeric character */
+ if (last_c != '.' && !g_ascii_isalnum(last_c)) {
+ all_chars_domain = false;
+ }
+ }
+
+ if (all_chars_domain) {
+ /* Additionally check for a numeric IP as we can have some number here... */
+ rspamd_url_maybe_regenerate_from_ip(uri, pool);
+
+ if (last_c == '.' && uri->hostlen > 1) {
+ /* Skip the last dot */
+ uri->tldlen = uri->hostlen - 1;
+ }
+ else {
+ uri->tldlen = uri->hostlen;
+ }
+
+ uri->tldshift = uri->hostshift;
+ is_whole_hostname_tld = true;
+ }
+ }
+
+ if (!is_whole_hostname_tld) {
+ if (uri->protocol != PROTOCOL_MAILTO) {
+ if (url_scanner->has_tld_file && !(parse_flags & RSPAMD_URL_PARSE_HREF)) {
+ /* Ignore URL's without TLD if it is not a numeric URL */
+ if (!rspamd_url_maybe_regenerate_from_ip(uri, pool)) {
+ return URI_ERRNO_TLD_MISSING;
+ }
+ }
+ else {
+ if (!rspamd_url_maybe_regenerate_from_ip(uri, pool)) {
+ /* Assume tld equal to host */
+ uri->tldshift = uri->hostshift;
+ uri->tldlen = uri->hostlen;
+ }
+ else if (uri->flags & RSPAMD_URL_FLAG_SCHEMALESS) {
+ /* Ignore urls with both no schema and no tld */
+ return URI_ERRNO_TLD_MISSING;
+ }
+
+ uri->flags |= RSPAMD_URL_FLAG_NO_TLD;
+ }
+ }
+ else {
+ /* Ignore IP like domains for mailto, as it is really never supported */
+ return URI_ERRNO_TLD_MISSING;
+ }
+ }
+ }
+
+ /* Replace stupid '\' with '/' after schema */
+ if (uri->protocol & (PROTOCOL_HTTP | PROTOCOL_HTTPS | PROTOCOL_FTP) &&
+ uri->protocollen > 0 && uri->urllen > uri->protocollen + 2) {
+
+ gchar *pos = &uri->string[uri->protocollen],
+ *host_start = rspamd_url_host_unsafe(uri);
+
+ while (pos < host_start) {
+ if (*pos == '\\') {
+ *pos = '/';
+ uri->flags |= RSPAMD_URL_FLAG_OBSCURED;
+ }
+ pos++;
+ }
+ }
+ }
+ else if (uri->protocol & PROTOCOL_TELEPHONE) {
+ /* We need to normalise phone number: remove all spaces and braces */
+ rspamd_telephone_normalise_inplace(uri);
+
+ if (rspamd_url_host_unsafe(uri)[0] == '+') {
+ uri->tldshift = uri->hostshift + 1;
+ uri->tldlen = uri->hostlen - 1;
+ }
+ else {
+ uri->tldshift = uri->hostshift;
+ uri->tldlen = uri->hostlen;
+ }
+ }
+
+ if (uri->protocol == PROTOCOL_UNKNOWN) {
+ if (!(parse_flags & RSPAMD_URL_PARSE_HREF)) {
+ return URI_ERRNO_INVALID_PROTOCOL;
+ }
+ else {
+ /* Hack, hack, hack */
+ uri->protocol = PROTOCOL_UNKNOWN;
+ }
+ }
+
+ return URI_ERRNO_OK;
+}
+
+struct tld_trie_cbdata {
+ const gchar *begin;
+ gsize len;
+ rspamd_ftok_t *out;
+};
+
+static gint
+rspamd_tld_trie_find_callback(struct rspamd_multipattern *mp,
+ guint strnum,
+ gint match_start,
+ gint match_pos,
+ const gchar *text,
+ gsize len,
+ void *context)
+{
+ struct url_matcher *matcher;
+ const gchar *start, *pos, *p;
+ struct tld_trie_cbdata *cbdata = context;
+ gint ndots = 1;
+
+ matcher = &g_array_index(url_scanner->matchers_full, struct url_matcher,
+ strnum);
+
+ if (matcher->flags & URL_MATCHER_FLAG_STAR_MATCH) {
+ /* Skip one more tld component */
+ ndots = 2;
+ }
+
+ pos = text + match_start;
+ p = pos - 1;
+ start = text;
+
+ if (*pos != '.' || match_pos != (gint) cbdata->len) {
+ /* Something weird has been found */
+ if (match_pos != (gint) cbdata->len - 1) {
+ /* Search more */
+ return 0;
+ }
+ }
+
+ /* Now we need to find top level domain */
+ pos = start;
+
+ while (p >= start && ndots > 0) {
+ if (*p == '.') {
+ ndots--;
+ pos = p + 1;
+ }
+ else {
+ pos = p;
+ }
+
+ p--;
+ }
+
+ if (ndots == 0 || p == start - 1) {
+ if (cbdata->begin + cbdata->len - pos > cbdata->out->len) {
+ cbdata->out->begin = pos;
+ cbdata->out->len = cbdata->begin + cbdata->len - pos;
+ }
+ }
+
+ return 0;
+}
+
+gboolean
+rspamd_url_find_tld(const gchar *in, gsize inlen, rspamd_ftok_t *out)
+{
+ struct tld_trie_cbdata cbdata;
+
+ g_assert(in != NULL);
+ g_assert(out != NULL);
+ g_assert(url_scanner != NULL);
+
+ cbdata.begin = in;
+ cbdata.len = inlen;
+ cbdata.out = out;
+ out->len = 0;
+
+ if (url_scanner->search_trie_full) {
+ rspamd_multipattern_lookup(url_scanner->search_trie_full, in, inlen,
+ rspamd_tld_trie_find_callback, &cbdata, NULL);
+ }
+
+ if (out->len > 0) {
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static const gchar url_braces[] = {
+ '(', ')',
+ '{', '}',
+ '[', ']',
+ '<', '>',
+ '|', '|',
+ '\'', '\''};
+
+
+static gboolean
+url_file_start(struct url_callback_data *cb,
+ const gchar *pos,
+ url_match_t *match)
+{
+ match->m_begin = pos;
+
+ if (pos > cb->begin) {
+ match->st = *(pos - 1);
+ }
+ else {
+ match->st = '\0';
+ }
+
+ return TRUE;
+}
+
+static gboolean
+url_file_end(struct url_callback_data *cb,
+ const gchar *pos,
+ url_match_t *match)
+{
+ const gchar *p;
+ gchar stop;
+ guint i;
+
+ p = pos + strlen(match->pattern);
+ stop = *p;
+ if (*p == '/') {
+ p++;
+ }
+
+ for (i = 0; i < G_N_ELEMENTS(url_braces) / 2; i += 2) {
+ if (*p == url_braces[i]) {
+ stop = url_braces[i + 1];
+ break;
+ }
+ }
+
+ while (p < cb->end && *p != stop && is_urlsafe(*p)) {
+ p++;
+ }
+
+ if (p == cb->begin) {
+ return FALSE;
+ }
+ match->m_len = p - match->m_begin;
+
+ return TRUE;
+}
+
+static gboolean
+url_tld_start(struct url_callback_data *cb,
+ const gchar *pos,
+ url_match_t *match)
+{
+ const gchar *p = pos;
+ guint processed = 0;
+ static const guint max_shift = 253 + sizeof("https://");
+
+ /* Try to find the start of the url by finding any non-urlsafe character or whitespace/punctuation */
+ while (p >= cb->begin) {
+ if (!is_domain(*p) || g_ascii_isspace(*p) || is_url_start(*p) ||
+ p == match->prev_newline_pos) {
+ if (!is_url_start(*p) && !g_ascii_isspace(*p) &&
+ p != match->prev_newline_pos) {
+ return FALSE;
+ }
+
+ if (p != match->prev_newline_pos) {
+ match->st = *p;
+
+ p++;
+ }
+ else {
+ match->st = '\n';
+ }
+
+ if (!g_ascii_isalnum(*p)) {
+ /* Urls cannot start with strange symbols */
+ return FALSE;
+ }
+
+ match->m_begin = p;
+ return TRUE;
+ }
+ else if (p == cb->begin && p != pos) {
+ match->st = '\0';
+ match->m_begin = p;
+
+ return TRUE;
+ }
+ else if (*p == '.') {
+ if (p == cb->begin) {
+ /* Urls cannot start with a dot */
+ return FALSE;
+ }
+ if (!g_ascii_isalnum(p[1])) {
+ /* Wrong we have an invalid character after dot */
+ return FALSE;
+ }
+ }
+ else if (*p == '/') {
+ /* Urls cannot contain '/' in their body */
+ return FALSE;
+ }
+
+ p--;
+ processed++;
+
+ if (processed > max_shift) {
+ /* Too long */
+ return FALSE;
+ }
+ }
+
+ return FALSE;
+}
+
+static gboolean
+url_tld_end(struct url_callback_data *cb,
+ const gchar *pos,
+ url_match_t *match)
+{
+ const gchar *p;
+ gboolean ret = FALSE;
+
+ p = pos + match->m_len;
+
+ if (p == cb->end) {
+ match->m_len = p - match->m_begin;
+ return TRUE;
+ }
+ else if (*p == '/' || *p == ':' || is_url_end(*p) || is_lwsp(*p) ||
+ (match->st != '<' && p == match->newline_pos)) {
+ /* Parse arguments, ports by normal way by url default function */
+ p = match->m_begin;
+ /* Check common prefix */
+ if (g_ascii_strncasecmp(p, "http://", sizeof("http://") - 1) == 0) {
+ ret = url_web_end(cb,
+ match->m_begin + sizeof("http://") - 1,
+ match);
+ }
+ else {
+ ret = url_web_end(cb, match->m_begin, match);
+ }
+ }
+ else if (*p == '.') {
+ p++;
+ if (p < cb->end) {
+ if (g_ascii_isspace(*p) || *p == '/' ||
+ *p == '?' || *p == ':') {
+ ret = url_web_end(cb, match->m_begin, match);
+ }
+ }
+ }
+
+ if (ret) {
+ /* Check sanity of match found */
+ if (match->m_begin + match->m_len <= pos) {
+ return FALSE;
+ }
+ }
+
+ return ret;
+}
+
+static gboolean
+url_web_start(struct url_callback_data *cb,
+ const gchar *pos,
+ url_match_t *match)
+{
+ /* Check what we have found */
+ if (pos > cb->begin) {
+ if (g_ascii_strncasecmp(pos, "www", 3) == 0) {
+
+ if (!(is_url_start(*(pos - 1)) ||
+ g_ascii_isspace(*(pos - 1)) ||
+ pos - 1 == match->prev_newline_pos ||
+ (*(pos - 1) & 0x80))) { /* Chinese trick */
+ return FALSE;
+ }
+ }
+ else {
+ guchar prev = *(pos - 1);
+
+ if (g_ascii_isalnum(prev)) {
+ /* Part of another url */
+ return FALSE;
+ }
+ }
+ }
+
+ if (*pos == '.') {
+ /* Urls cannot start with . */
+ return FALSE;
+ }
+
+ if (pos > cb->begin) {
+ match->st = *(pos - 1);
+ }
+ else {
+ match->st = '\0';
+ }
+
+ match->m_begin = pos;
+
+ return TRUE;
+}
+
+static gboolean
+url_web_end(struct url_callback_data *cb,
+ const gchar *pos,
+ url_match_t *match)
+{
+ const gchar *last = NULL;
+ gint len = cb->end - pos;
+ guint flags = 0;
+
+ if (match->newline_pos && match->st != '<') {
+ /* We should also limit our match end to the newline */
+ len = MIN(len, match->newline_pos - pos);
+ }
+
+ if (rspamd_web_parse(NULL, pos, len, &last,
+ RSPAMD_URL_PARSE_CHECK, &flags) != 0) {
+ return FALSE;
+ }
+
+ if (last < cb->end && (*last == '>' && last != match->newline_pos)) {
+ /* We need to ensure that url also starts with '>' */
+ if (match->st != '<') {
+ if (last + 1 < cb->end) {
+ if (g_ascii_isspace(last[1])) {
+ return FALSE;
+ }
+ }
+ else {
+ return FALSE;
+ }
+ }
+ }
+
+ match->m_len = (last - pos);
+ cb->fin = last + 1;
+
+ return TRUE;
+}
+
+
+static gboolean
+url_email_start(struct url_callback_data *cb,
+ const gchar *pos,
+ url_match_t *match)
+{
+ if (!match->prefix || match->prefix[0] == '\0') {
+ /* We have mailto:// at the beginning */
+ match->m_begin = pos;
+
+ if (pos >= cb->begin + 1) {
+ match->st = *(pos - 1);
+ }
+ else {
+ match->st = '\0';
+ }
+ }
+ else {
+ /* Just '@' */
+
+ /* Check if this match is a part of the previous mailto: email */
+ if (cb->last_at != NULL && cb->last_at == pos) {
+ cb->last_at = NULL;
+ return FALSE;
+ }
+ else if (pos == cb->begin) {
+ /* Just @ at the start of input */
+ return FALSE;
+ }
+
+ match->st = '\0';
+ }
+
+ return TRUE;
+}
+
+static gboolean
+url_email_end(struct url_callback_data *cb,
+ const gchar *pos,
+ url_match_t *match)
+{
+ const gchar *last = NULL;
+ struct http_parser_url u;
+ gint len = cb->end - pos;
+ guint flags = 0;
+
+ if (match->newline_pos && match->st != '<') {
+ /* We should also limit our match end to the newline */
+ len = MIN(len, match->newline_pos - pos);
+ }
+
+ if (!match->prefix || match->prefix[0] == '\0') {
+ /* We have mailto:// at the beginning */
+ if (rspamd_mailto_parse(&u, pos, len, &last,
+ RSPAMD_URL_PARSE_CHECK, &flags) != 0) {
+ return FALSE;
+ }
+
+ if (!(u.field_set & (1 << UF_USERINFO))) {
+ return FALSE;
+ }
+
+ cb->last_at = match->m_begin + u.field_data[UF_USERINFO].off +
+ u.field_data[UF_USERINFO].len;
+
+ g_assert(*cb->last_at == '@');
+ match->m_len = (last - pos);
+
+ return TRUE;
+ }
+ else {
+ const gchar *c, *p;
+ /*
+ * Here we have just '@', so we need to find both start and end of the
+ * pattern
+ */
+ g_assert(*pos == '@');
+
+ if (pos >= cb->end - 2 || pos < cb->begin + 1) {
+ /* Boundary violation */
+ return FALSE;
+ }
+
+ /* Check the next character after `@` */
+ if (!g_ascii_isalnum(pos[1]) || !g_ascii_isalnum(*(pos - 1))) {
+ return FALSE;
+ }
+
+
+ c = pos - 1;
+ while (c > cb->begin) {
+ if (!is_mailsafe(*c)) {
+ break;
+ }
+ if (c == match->prev_newline_pos) {
+ break;
+ }
+
+ c--;
+ }
+ /* Rewind to the first alphanumeric character */
+ while (c < pos && !g_ascii_isalnum(*c)) {
+ c++;
+ }
+
+ /* Find the end of email */
+ p = pos + 1;
+ while (p < cb->end && is_domain(*p)) {
+ if (p == match->newline_pos) {
+ break;
+ }
+
+ p++;
+ }
+
+ /* Rewind it again to avoid bad emails to be detected */
+ while (p > pos && p < cb->end && !g_ascii_isalnum(*p)) {
+ p--;
+ }
+
+ if (p < cb->end && g_ascii_isalnum(*p) &&
+ (match->newline_pos == NULL || p < match->newline_pos)) {
+ p++;
+ }
+
+ if (p > c) {
+ match->m_begin = c;
+ match->m_len = p - c;
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+static gboolean
+url_tel_start(struct url_callback_data *cb,
+ const gchar *pos,
+ url_match_t *match)
+{
+ match->m_begin = pos;
+
+ if (pos >= cb->begin + 1) {
+ match->st = *(pos - 1);
+ }
+ else {
+ match->st = '\0';
+ }
+
+ return TRUE;
+}
+
+static gboolean
+url_tel_end(struct url_callback_data *cb,
+ const gchar *pos,
+ url_match_t *match)
+{
+ const gchar *last = NULL;
+ struct http_parser_url u;
+ gint len = cb->end - pos;
+ guint flags = 0;
+
+ if (match->newline_pos && match->st != '<') {
+ /* We should also limit our match end to the newline */
+ len = MIN(len, match->newline_pos - pos);
+ }
+
+ if (rspamd_telephone_parse(&u, pos, len, &last,
+ RSPAMD_URL_PARSE_CHECK, &flags) != 0) {
+ return FALSE;
+ }
+
+ if (!(u.field_set & (1 << UF_HOST))) {
+ return FALSE;
+ }
+
+ match->m_len = (last - pos);
+
+ return TRUE;
+}
+
+
+static gboolean
+rspamd_url_trie_is_match(struct url_matcher *matcher, const gchar *pos,
+ const gchar *end, const gchar *newline_pos)
+{
+ if (matcher->flags & URL_MATCHER_FLAG_TLD_MATCH) {
+ /* Immediately check pos for valid chars */
+ if (pos < end) {
+ if (pos != newline_pos && !g_ascii_isspace(*pos) && *pos != '/' && *pos != '?' &&
+ *pos != ':' && !is_url_end(*pos)) {
+ if (*pos == '.') {
+ /* We allow . at the end of the domain however */
+ pos++;
+ if (pos < end) {
+ if (!g_ascii_isspace(*pos) && *pos != '/' &&
+ *pos != '?' && *pos != ':' && !is_url_end(*pos)) {
+ return FALSE;
+ }
+ }
+ }
+ else {
+ return FALSE;
+ }
+ }
+ }
+ }
+
+ return TRUE;
+}
+
+static gint
+rspamd_url_trie_callback(struct rspamd_multipattern *mp,
+ guint strnum,
+ gint match_start,
+ gint match_pos,
+ const gchar *text,
+ gsize len,
+ void *context)
+{
+ struct url_matcher *matcher;
+ url_match_t m;
+ const gchar *pos, *newline_pos = NULL;
+ struct url_callback_data *cb = context;
+
+ pos = text + match_pos;
+
+ if (cb->fin > pos) {
+ /* Already seen */
+ return 0;
+ }
+
+ matcher = &g_array_index(cb->matchers, struct url_matcher,
+ strnum);
+
+ if ((matcher->flags & URL_MATCHER_FLAG_NOHTML) && cb->how == RSPAMD_URL_FIND_STRICT) {
+ /* Do not try to match non-html like urls in html texts */
+ return 0;
+ }
+
+ memset(&m, 0, sizeof(m));
+ m.m_begin = text + match_start;
+ m.m_len = match_pos - match_start;
+
+ if (cb->newlines && cb->newlines->len > 0) {
+ newline_pos = g_ptr_array_index(cb->newlines, cb->newline_idx);
+
+ while (pos > newline_pos && cb->newline_idx < cb->newlines->len) {
+ cb->newline_idx++;
+ newline_pos = g_ptr_array_index(cb->newlines, cb->newline_idx);
+ }
+
+ if (pos > newline_pos) {
+ newline_pos = NULL;
+ }
+
+ if (cb->newline_idx > 0) {
+ m.prev_newline_pos = g_ptr_array_index(cb->newlines,
+ cb->newline_idx - 1);
+ }
+ }
+
+ if (!rspamd_url_trie_is_match(matcher, pos, cb->end, newline_pos)) {
+ return 0;
+ }
+
+ m.pattern = matcher->pattern;
+ m.prefix = matcher->prefix;
+ m.add_prefix = FALSE;
+ m.newline_pos = newline_pos;
+ pos = cb->begin + match_start;
+
+ if (matcher->start(cb, pos, &m) &&
+ matcher->end(cb, pos, &m)) {
+ if (m.add_prefix || matcher->prefix[0] != '\0') {
+ cb->len = m.m_len + strlen(matcher->prefix);
+ cb->url_str = rspamd_mempool_alloc(cb->pool, cb->len + 1);
+ cb->len = rspamd_snprintf(cb->url_str,
+ cb->len + 1,
+ "%s%*s",
+ m.prefix,
+ (gint) m.m_len,
+ m.m_begin);
+ cb->prefix_added = TRUE;
+ }
+ else {
+ cb->url_str = rspamd_mempool_alloc(cb->pool, m.m_len + 1);
+ rspamd_strlcpy(cb->url_str, m.m_begin, m.m_len + 1);
+ }
+
+ cb->start = m.m_begin;
+
+ if (pos > cb->fin) {
+ cb->fin = pos;
+ }
+
+ return 1;
+ }
+ else {
+ cb->url_str = NULL;
+ }
+
+ /* Continue search */
+ return 0;
+}
+
+gboolean
+rspamd_url_find(rspamd_mempool_t *pool,
+ const gchar *begin, gsize len,
+ gchar **url_str,
+ enum rspamd_url_find_type how,
+ goffset *url_pos,
+ gboolean *prefix_added)
+{
+ struct url_callback_data cb;
+ gint ret;
+
+ memset(&cb, 0, sizeof(cb));
+ cb.begin = begin;
+ cb.end = begin + len;
+ cb.how = how;
+ cb.pool = pool;
+
+ if (how == RSPAMD_URL_FIND_ALL) {
+ if (url_scanner->search_trie_full) {
+ cb.matchers = url_scanner->matchers_full;
+ ret = rspamd_multipattern_lookup(url_scanner->search_trie_full,
+ begin, len,
+ rspamd_url_trie_callback, &cb, NULL);
+ }
+ else {
+ cb.matchers = url_scanner->matchers_strict;
+ ret = rspamd_multipattern_lookup(url_scanner->search_trie_strict,
+ begin, len,
+ rspamd_url_trie_callback, &cb, NULL);
+ }
+ }
+ else {
+ cb.matchers = url_scanner->matchers_strict;
+ ret = rspamd_multipattern_lookup(url_scanner->search_trie_strict,
+ begin, len,
+ rspamd_url_trie_callback, &cb, NULL);
+ }
+
+ if (ret) {
+ if (url_str) {
+ *url_str = cb.url_str;
+ }
+
+ if (url_pos) {
+ *url_pos = cb.start - begin;
+ }
+
+ if (prefix_added) {
+ *prefix_added = cb.prefix_added;
+ }
+
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static gint
+rspamd_url_trie_generic_callback_common(struct rspamd_multipattern *mp,
+ guint strnum,
+ gint match_start,
+ gint match_pos,
+ const gchar *text,
+ gsize len,
+ void *context,
+ gboolean multiple)
+{
+ struct rspamd_url *url;
+ struct url_matcher *matcher;
+ url_match_t m;
+ const gchar *pos, *newline_pos = NULL;
+ struct url_callback_data *cb = context;
+ gint rc;
+ rspamd_mempool_t *pool;
+
+ pos = text + match_pos;
+
+ if (cb->fin > pos) {
+ /* Already seen */
+ return 0;
+ }
+
+ matcher = &g_array_index(cb->matchers, struct url_matcher,
+ strnum);
+ pool = cb->pool;
+
+ if ((matcher->flags & URL_MATCHER_FLAG_NOHTML) && cb->how == RSPAMD_URL_FIND_STRICT) {
+ /* Do not try to match non-html like urls in html texts, continue matching */
+ return 0;
+ }
+
+ memset(&m, 0, sizeof(m));
+
+
+ /* Find the next newline after our pos */
+ if (cb->newlines && cb->newlines->len > 0) {
+ newline_pos = g_ptr_array_index(cb->newlines, cb->newline_idx);
+
+ while (pos > newline_pos && cb->newline_idx < cb->newlines->len - 1) {
+ cb->newline_idx++;
+ newline_pos = g_ptr_array_index(cb->newlines, cb->newline_idx);
+ }
+
+ if (pos > newline_pos) {
+ newline_pos = NULL;
+ }
+ if (cb->newline_idx > 0) {
+ m.prev_newline_pos = g_ptr_array_index(cb->newlines,
+ cb->newline_idx - 1);
+ }
+ }
+
+ if (!rspamd_url_trie_is_match(matcher, pos, text + len, newline_pos)) {
+ /* Mismatch, continue */
+ return 0;
+ }
+
+ pos = cb->begin + match_start;
+ m.pattern = matcher->pattern;
+ m.prefix = matcher->prefix;
+ m.add_prefix = FALSE;
+ m.m_begin = text + match_start;
+ m.m_len = match_pos - match_start;
+ m.newline_pos = newline_pos;
+
+ if (matcher->start(cb, pos, &m) &&
+ matcher->end(cb, pos, &m)) {
+ if (m.add_prefix || matcher->prefix[0] != '\0') {
+ cb->len = m.m_len + strlen(matcher->prefix);
+ cb->url_str = rspamd_mempool_alloc(cb->pool, cb->len + 1);
+ cb->len = rspamd_snprintf(cb->url_str,
+ cb->len + 1,
+ "%s%*s",
+ m.prefix,
+ (gint) m.m_len,
+ m.m_begin);
+ cb->prefix_added = TRUE;
+ }
+ else {
+ cb->url_str = rspamd_mempool_alloc(cb->pool, m.m_len + 1);
+ cb->len = rspamd_strlcpy(cb->url_str, m.m_begin, m.m_len + 1);
+ }
+
+ cb->start = m.m_begin;
+
+ if (pos > cb->fin) {
+ cb->fin = pos;
+ }
+
+ url = rspamd_mempool_alloc0(pool, sizeof(struct rspamd_url));
+ g_strstrip(cb->url_str);
+ rc = rspamd_url_parse(url, cb->url_str,
+ strlen(cb->url_str), pool,
+ RSPAMD_URL_PARSE_TEXT);
+
+ if (rc == URI_ERRNO_OK && url->hostlen > 0) {
+ if (cb->prefix_added) {
+ url->flags |= RSPAMD_URL_FLAG_SCHEMALESS;
+ cb->prefix_added = FALSE;
+ }
+
+ if (cb->func) {
+ if (!cb->func(url, cb->start - text, (m.m_begin + m.m_len) - text,
+ cb->funcd)) {
+ /* We need to stop here in any case! */
+ return -1;
+ }
+ }
+ }
+ else if (rc != URI_ERRNO_OK) {
+ msg_debug_pool_check("extract of url '%s' failed: %s",
+ cb->url_str,
+ rspamd_url_strerror(rc));
+ }
+ }
+ else {
+ cb->url_str = NULL;
+ /* Continue search if no pattern has been found */
+ return 0;
+ }
+
+ /* Continue search if required (return 0 means continue) */
+ return !multiple;
+}
+
+static gint
+rspamd_url_trie_generic_callback_multiple(struct rspamd_multipattern *mp,
+ guint strnum,
+ gint match_start,
+ gint match_pos,
+ const gchar *text,
+ gsize len,
+ void *context)
+{
+ return rspamd_url_trie_generic_callback_common(mp, strnum, match_start,
+ match_pos, text, len, context, TRUE);
+}
+
+static gint
+rspamd_url_trie_generic_callback_single(struct rspamd_multipattern *mp,
+ guint strnum,
+ gint match_start,
+ gint match_pos,
+ const gchar *text,
+ gsize len,
+ void *context)
+{
+ return rspamd_url_trie_generic_callback_common(mp, strnum, match_start,
+ match_pos, text, len, context, FALSE);
+}
+
+struct rspamd_url_mimepart_cbdata {
+ struct rspamd_task *task;
+ struct rspamd_mime_text_part *part;
+ gsize url_len;
+ uint16_t *cur_url_order; /* Global ordering */
+ uint16_t cur_part_order; /* Per part ordering */
+};
+
+static gboolean
+rspamd_url_query_callback(struct rspamd_url *url, gsize start_offset,
+ gsize end_offset, gpointer ud)
+{
+ struct rspamd_url_mimepart_cbdata *cbd =
+ (struct rspamd_url_mimepart_cbdata *) ud;
+ struct rspamd_task *task;
+
+ task = cbd->task;
+
+ if (url->protocol == PROTOCOL_MAILTO) {
+ if (url->userlen == 0) {
+ return FALSE;
+ }
+ }
+ /* Also check max urls */
+ if (cbd->task->cfg && cbd->task->cfg->max_urls > 0) {
+ if (kh_size(MESSAGE_FIELD(task, urls)) > cbd->task->cfg->max_urls) {
+ msg_err_task("part has too many URLs, we cannot process more: "
+ "%d urls extracted ",
+ (guint) kh_size(MESSAGE_FIELD(task, urls)));
+
+ return FALSE;
+ }
+ }
+
+ url->flags |= RSPAMD_URL_FLAG_QUERY;
+
+
+ if (rspamd_url_set_add_or_increase(MESSAGE_FIELD(task, urls), url, false)) {
+ if (cbd->part && cbd->part->mime_part->urls) {
+ g_ptr_array_add(cbd->part->mime_part->urls, url);
+ }
+
+ url->part_order = cbd->cur_part_order++;
+
+ if (cbd->cur_url_order) {
+ url->order = (*cbd->cur_url_order)++;
+ }
+ }
+
+ return TRUE;
+}
+
+static gboolean
+rspamd_url_text_part_callback(struct rspamd_url *url, gsize start_offset,
+ gsize end_offset, gpointer ud)
+{
+ struct rspamd_url_mimepart_cbdata *cbd =
+ (struct rspamd_url_mimepart_cbdata *) ud;
+ struct rspamd_process_exception *ex;
+ struct rspamd_task *task;
+
+ task = cbd->task;
+ ex = rspamd_mempool_alloc0(task->task_pool, sizeof(struct rspamd_process_exception));
+
+ ex->pos = start_offset;
+ ex->len = end_offset - start_offset;
+ ex->type = RSPAMD_EXCEPTION_URL;
+ ex->ptr = url;
+
+ cbd->url_len += ex->len;
+
+ if (cbd->part->utf_stripped_content &&
+ cbd->url_len > cbd->part->utf_stripped_content->len * 10) {
+ /* Absurd case, stop here now */
+ msg_err_task("part has too many URLs, we cannot process more: %z url len; "
+ "%d stripped content length",
+ cbd->url_len, cbd->part->utf_stripped_content->len);
+
+ return FALSE;
+ }
+
+ if (url->protocol == PROTOCOL_MAILTO) {
+ if (url->userlen == 0) {
+ return FALSE;
+ }
+ }
+ /* Also check max urls */
+ if (cbd->task->cfg && cbd->task->cfg->max_urls > 0) {
+ if (kh_size(MESSAGE_FIELD(task, urls)) > cbd->task->cfg->max_urls) {
+ msg_err_task("part has too many URLs, we cannot process more: "
+ "%d urls extracted ",
+ (guint) kh_size(MESSAGE_FIELD(task, urls)));
+
+ return FALSE;
+ }
+ }
+
+ url->flags |= RSPAMD_URL_FLAG_FROM_TEXT;
+
+ if (rspamd_url_set_add_or_increase(MESSAGE_FIELD(task, urls), url, false) &&
+ cbd->part->mime_part->urls) {
+ url->part_order = cbd->cur_part_order++;
+
+ if (cbd->cur_url_order) {
+ url->order = (*cbd->cur_url_order)++;
+ }
+ g_ptr_array_add(cbd->part->mime_part->urls, url);
+ }
+
+ cbd->part->exceptions = g_list_prepend(
+ cbd->part->exceptions,
+ ex);
+
+ /* We also search the query for additional url inside */
+ if (url->querylen > 0) {
+ rspamd_url_find_multiple(task->task_pool,
+ rspamd_url_query_unsafe(url), url->querylen,
+ RSPAMD_URL_FIND_ALL, NULL,
+ rspamd_url_query_callback, cbd);
+ }
+
+ return TRUE;
+}
+
+void rspamd_url_text_extract(rspamd_mempool_t *pool,
+ struct rspamd_task *task,
+ struct rspamd_mime_text_part *part,
+ uint16_t *cur_url_order,
+ enum rspamd_url_find_type how)
+{
+ struct rspamd_url_mimepart_cbdata mcbd;
+
+ if (part->utf_stripped_content == NULL || part->utf_stripped_content->len == 0) {
+ msg_warn_task("got empty text part");
+ return;
+ }
+
+ mcbd.task = task;
+ mcbd.part = part;
+ mcbd.url_len = 0;
+ mcbd.cur_url_order = cur_url_order;
+ mcbd.cur_part_order = 0;
+
+ rspamd_url_find_multiple(task->task_pool, part->utf_stripped_content->data,
+ part->utf_stripped_content->len, how, part->newlines,
+ rspamd_url_text_part_callback, &mcbd);
+}
+
+void rspamd_url_find_multiple(rspamd_mempool_t *pool,
+ const gchar *in,
+ gsize inlen,
+ enum rspamd_url_find_type how,
+ GPtrArray *nlines,
+ url_insert_function func,
+ gpointer ud)
+{
+ struct url_callback_data cb;
+
+ g_assert(in != NULL);
+
+ if (inlen == 0) {
+ inlen = strlen(in);
+ }
+
+ memset(&cb, 0, sizeof(cb));
+ cb.begin = in;
+ cb.end = in + inlen;
+ cb.how = how;
+ cb.pool = pool;
+
+ cb.funcd = ud;
+ cb.func = func;
+ cb.newlines = nlines;
+
+ if (how == RSPAMD_URL_FIND_ALL) {
+ if (url_scanner->search_trie_full) {
+ cb.matchers = url_scanner->matchers_full;
+ rspamd_multipattern_lookup(url_scanner->search_trie_full,
+ in, inlen,
+ rspamd_url_trie_generic_callback_multiple, &cb, NULL);
+ }
+ else {
+ cb.matchers = url_scanner->matchers_strict;
+ rspamd_multipattern_lookup(url_scanner->search_trie_strict,
+ in, inlen,
+ rspamd_url_trie_generic_callback_multiple, &cb, NULL);
+ }
+ }
+ else {
+ cb.matchers = url_scanner->matchers_strict;
+ rspamd_multipattern_lookup(url_scanner->search_trie_strict,
+ in, inlen,
+ rspamd_url_trie_generic_callback_multiple, &cb, NULL);
+ }
+}
+
+void rspamd_url_find_single(rspamd_mempool_t *pool,
+ const gchar *in,
+ gsize inlen,
+ enum rspamd_url_find_type how,
+ url_insert_function func,
+ gpointer ud)
+{
+ struct url_callback_data cb;
+
+ g_assert(in != NULL);
+
+ if (inlen == 0) {
+ inlen = strlen(in);
+ }
+
+ /*
+ * We might have a situation when we need to parse URLs on config file
+ * parsing, but there is no valid url_scanner loaded. Hence, we just load
+ * some defaults and it should be fine...
+ */
+ if (url_scanner == NULL) {
+ rspamd_url_init(NULL);
+ }
+
+ memset(&cb, 0, sizeof(cb));
+ cb.begin = in;
+ cb.end = in + inlen;
+ cb.how = how;
+ cb.pool = pool;
+
+ cb.funcd = ud;
+ cb.func = func;
+
+ if (how == RSPAMD_URL_FIND_ALL) {
+ if (url_scanner->search_trie_full) {
+ cb.matchers = url_scanner->matchers_full;
+ rspamd_multipattern_lookup(url_scanner->search_trie_full,
+ in, inlen,
+ rspamd_url_trie_generic_callback_single, &cb, NULL);
+ }
+ else {
+ cb.matchers = url_scanner->matchers_strict;
+ rspamd_multipattern_lookup(url_scanner->search_trie_strict,
+ in, inlen,
+ rspamd_url_trie_generic_callback_single, &cb, NULL);
+ }
+ }
+ else {
+ cb.matchers = url_scanner->matchers_strict;
+ rspamd_multipattern_lookup(url_scanner->search_trie_strict,
+ in, inlen,
+ rspamd_url_trie_generic_callback_single, &cb, NULL);
+ }
+}
+
+
+gboolean
+rspamd_url_task_subject_callback(struct rspamd_url *url, gsize start_offset,
+ gsize end_offset, gpointer ud)
+{
+ struct rspamd_task *task = ud;
+ gchar *url_str = NULL;
+ struct rspamd_url *query_url;
+ gint rc;
+ gboolean prefix_added;
+
+ /* It is just a displayed URL, we should not check it for certain things */
+ url->flags |= RSPAMD_URL_FLAG_HTML_DISPLAYED | RSPAMD_URL_FLAG_SUBJECT;
+
+ if (url->protocol == PROTOCOL_MAILTO) {
+ if (url->userlen == 0) {
+ return FALSE;
+ }
+ }
+
+ rspamd_url_set_add_or_increase(MESSAGE_FIELD(task, urls), url, false);
+
+ /* We also search the query for additional url inside */
+ if (url->querylen > 0) {
+ if (rspamd_url_find(task->task_pool, rspamd_url_query_unsafe(url), url->querylen,
+ &url_str, RSPAMD_URL_FIND_ALL, NULL, &prefix_added)) {
+
+ query_url = rspamd_mempool_alloc0(task->task_pool,
+ sizeof(struct rspamd_url));
+ rc = rspamd_url_parse(query_url,
+ url_str,
+ strlen(url_str),
+ task->task_pool,
+ RSPAMD_URL_PARSE_TEXT);
+
+ if (rc == URI_ERRNO_OK &&
+ url->hostlen > 0) {
+ msg_debug_task("found url %s in query of url"
+ " %*s",
+ url_str, url->querylen, rspamd_url_query_unsafe(url));
+
+ if (prefix_added) {
+ query_url->flags |= RSPAMD_URL_FLAG_SCHEMALESS;
+ }
+
+ if (query_url->protocol == PROTOCOL_MAILTO) {
+ if (query_url->userlen == 0) {
+ return TRUE;
+ }
+ }
+
+ rspamd_url_set_add_or_increase(MESSAGE_FIELD(task, urls),
+ query_url, false);
+ }
+ }
+ }
+
+ return TRUE;
+}
+
+static inline khint_t
+rspamd_url_hash(struct rspamd_url *url)
+{
+ if (url->urllen > 0) {
+ return (khint_t) rspamd_cryptobox_fast_hash(url->string, url->urllen,
+ rspamd_hash_seed());
+ }
+
+ return 0;
+}
+
+static inline khint_t
+rspamd_url_host_hash(struct rspamd_url *url)
+{
+ if (url->hostlen > 0) {
+ return (khint_t) rspamd_cryptobox_fast_hash(rspamd_url_host_unsafe(url),
+ url->hostlen,
+ rspamd_hash_seed());
+ }
+
+ return 0;
+}
+
+/* Compare two emails for building emails tree */
+static inline bool
+rspamd_emails_cmp(struct rspamd_url *u1, struct rspamd_url *u2)
+{
+ gint r;
+
+ if (u1->hostlen != u2->hostlen || u1->hostlen == 0) {
+ return FALSE;
+ }
+ else {
+ if ((r = rspamd_lc_cmp(rspamd_url_host_unsafe(u1),
+ rspamd_url_host_unsafe(u2), u1->hostlen)) == 0) {
+ if (u1->userlen != u2->userlen || u1->userlen == 0) {
+ return FALSE;
+ }
+ else {
+ return (rspamd_lc_cmp(rspamd_url_user_unsafe(u1),
+ rspamd_url_user_unsafe(u2),
+ u1->userlen) == 0);
+ }
+ }
+ else {
+ return r == 0;
+ }
+ }
+
+ return FALSE;
+}
+
+static inline bool
+rspamd_urls_cmp(struct rspamd_url *u1, struct rspamd_url *u2)
+{
+ int r = 0;
+
+ if (u1->protocol != u2->protocol || u1->urllen != u2->urllen) {
+ return false;
+ }
+ else {
+ if (u1->protocol & PROTOCOL_MAILTO) {
+ return rspamd_emails_cmp(u1, u2);
+ }
+
+ r = memcmp(u1->string, u2->string, u1->urllen);
+ }
+
+ return r == 0;
+}
+
+static inline bool
+rspamd_urls_host_cmp(struct rspamd_url *u1, struct rspamd_url *u2)
+{
+ int r = 0;
+
+ if (u1->hostlen != u2->hostlen) {
+ return false;
+ }
+ else {
+ r = memcmp(rspamd_url_host_unsafe(u1), rspamd_url_host_unsafe(u2),
+ u1->hostlen);
+ }
+
+ return r == 0;
+}
+
+gsize rspamd_url_decode(gchar *dst, const gchar *src, gsize size)
+{
+ gchar *d, ch, c, decoded;
+ const gchar *s;
+ enum {
+ sw_usual = 0,
+ sw_quoted,
+ sw_quoted_second
+ } state;
+
+ d = dst;
+ s = src;
+
+ state = 0;
+ decoded = 0;
+
+ while (size--) {
+
+ ch = *s++;
+
+ switch (state) {
+ case sw_usual:
+
+ if (ch == '%') {
+ state = sw_quoted;
+ break;
+ }
+ else if (ch == '+') {
+ *d++ = ' ';
+ }
+ else {
+ *d++ = ch;
+ }
+ break;
+
+ case sw_quoted:
+
+ if (ch >= '0' && ch <= '9') {
+ decoded = (ch - '0');
+ state = sw_quoted_second;
+ break;
+ }
+
+ c = (ch | 0x20);
+ if (c >= 'a' && c <= 'f') {
+ decoded = (c - 'a' + 10);
+ state = sw_quoted_second;
+ break;
+ }
+
+ /* the invalid quoted character */
+
+ state = sw_usual;
+
+ *d++ = ch;
+
+ break;
+
+ case sw_quoted_second:
+
+ state = sw_usual;
+
+ if (ch >= '0' && ch <= '9') {
+ ch = ((decoded << 4) + ch - '0');
+ *d++ = ch;
+
+ break;
+ }
+
+ c = (u_char) (ch | 0x20);
+ if (c >= 'a' && c <= 'f') {
+ ch = ((decoded << 4) + c - 'a' + 10);
+
+ *d++ = ch;
+ break;
+ }
+
+ /* the invalid quoted character */
+ break;
+ }
+ }
+
+ return (d - dst);
+}
+
+enum rspamd_url_char_class {
+ RSPAMD_URL_UNRESERVED = (1 << 0),
+ RSPAMD_URL_SUBDELIM = (1 << 1),
+ RSPAMD_URL_PATHSAFE = (1 << 2),
+ RSPAMD_URL_QUERYSAFE = (1 << 3),
+ RSPAMD_URL_FRAGMENTSAFE = (1 << 4),
+ RSPAMD_URL_HOSTSAFE = (1 << 5),
+ RSPAMD_URL_USERSAFE = (1 << 6),
+};
+
+#define RSPAMD_URL_FLAGS_HOSTSAFE (RSPAMD_URL_UNRESERVED | RSPAMD_URL_HOSTSAFE | RSPAMD_URL_SUBDELIM)
+#define RSPAMD_URL_FLAGS_USERSAFE (RSPAMD_URL_UNRESERVED | RSPAMD_URL_USERSAFE | RSPAMD_URL_SUBDELIM)
+#define RSPAMD_URL_FLAGS_PATHSAFE (RSPAMD_URL_UNRESERVED | RSPAMD_URL_PATHSAFE | RSPAMD_URL_SUBDELIM)
+#define RSPAMD_URL_FLAGS_QUERYSAFE (RSPAMD_URL_UNRESERVED | RSPAMD_URL_QUERYSAFE | RSPAMD_URL_SUBDELIM)
+#define RSPAMD_URL_FLAGS_FRAGMENTSAFE (RSPAMD_URL_UNRESERVED | RSPAMD_URL_FRAGMENTSAFE | RSPAMD_URL_SUBDELIM)
+
+static const unsigned char rspamd_url_encoding_classes[256] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0 /* */, RSPAMD_URL_SUBDELIM /* ! */, 0 /* " */, 0 /* # */,
+ RSPAMD_URL_SUBDELIM /* $ */, 0 /* % */, RSPAMD_URL_SUBDELIM /* & */,
+ RSPAMD_URL_SUBDELIM /* ' */, RSPAMD_URL_SUBDELIM /* ( */,
+ RSPAMD_URL_SUBDELIM /* ) */, RSPAMD_URL_SUBDELIM /* * */,
+ RSPAMD_URL_SUBDELIM /* + */, RSPAMD_URL_SUBDELIM /* , */,
+ RSPAMD_URL_UNRESERVED /* - */, RSPAMD_URL_UNRESERVED /* . */,
+ RSPAMD_URL_PATHSAFE | RSPAMD_URL_QUERYSAFE | RSPAMD_URL_FRAGMENTSAFE /* / */,
+ RSPAMD_URL_UNRESERVED /* 0 */, RSPAMD_URL_UNRESERVED /* 1 */,
+ RSPAMD_URL_UNRESERVED /* 2 */, RSPAMD_URL_UNRESERVED /* 3 */,
+ RSPAMD_URL_UNRESERVED /* 4 */, RSPAMD_URL_UNRESERVED /* 5 */,
+ RSPAMD_URL_UNRESERVED /* 6 */, RSPAMD_URL_UNRESERVED /* 7 */,
+ RSPAMD_URL_UNRESERVED /* 8 */, RSPAMD_URL_UNRESERVED /* 9 */,
+ RSPAMD_URL_USERSAFE | RSPAMD_URL_HOSTSAFE | RSPAMD_URL_PATHSAFE | RSPAMD_URL_QUERYSAFE | RSPAMD_URL_FRAGMENTSAFE /* : */,
+ RSPAMD_URL_SUBDELIM /* ; */, 0 /* < */, RSPAMD_URL_SUBDELIM /* = */, 0 /* > */,
+ RSPAMD_URL_QUERYSAFE | RSPAMD_URL_FRAGMENTSAFE /* ? */,
+ RSPAMD_URL_PATHSAFE | RSPAMD_URL_QUERYSAFE | RSPAMD_URL_FRAGMENTSAFE /* @ */,
+ RSPAMD_URL_UNRESERVED /* A */, RSPAMD_URL_UNRESERVED /* B */,
+ RSPAMD_URL_UNRESERVED /* C */, RSPAMD_URL_UNRESERVED /* D */,
+ RSPAMD_URL_UNRESERVED /* E */, RSPAMD_URL_UNRESERVED /* F */,
+ RSPAMD_URL_UNRESERVED /* G */, RSPAMD_URL_UNRESERVED /* H */,
+ RSPAMD_URL_UNRESERVED /* I */, RSPAMD_URL_UNRESERVED /* J */,
+ RSPAMD_URL_UNRESERVED /* K */, RSPAMD_URL_UNRESERVED /* L */,
+ RSPAMD_URL_UNRESERVED /* M */, RSPAMD_URL_UNRESERVED /* N */,
+ RSPAMD_URL_UNRESERVED /* O */, RSPAMD_URL_UNRESERVED /* P */,
+ RSPAMD_URL_UNRESERVED /* Q */, RSPAMD_URL_UNRESERVED /* R */,
+ RSPAMD_URL_UNRESERVED /* S */, RSPAMD_URL_UNRESERVED /* T */,
+ RSPAMD_URL_UNRESERVED /* U */, RSPAMD_URL_UNRESERVED /* V */,
+ RSPAMD_URL_UNRESERVED /* W */, RSPAMD_URL_UNRESERVED /* X */,
+ RSPAMD_URL_UNRESERVED /* Y */, RSPAMD_URL_UNRESERVED /* Z */,
+ RSPAMD_URL_HOSTSAFE /* [ */, 0 /* \ */, RSPAMD_URL_HOSTSAFE /* ] */, 0 /* ^ */,
+ RSPAMD_URL_UNRESERVED /* _ */, 0 /* ` */, RSPAMD_URL_UNRESERVED /* a */,
+ RSPAMD_URL_UNRESERVED /* b */, RSPAMD_URL_UNRESERVED /* c */,
+ RSPAMD_URL_UNRESERVED /* d */, RSPAMD_URL_UNRESERVED /* e */,
+ RSPAMD_URL_UNRESERVED /* f */, RSPAMD_URL_UNRESERVED /* g */,
+ RSPAMD_URL_UNRESERVED /* h */, RSPAMD_URL_UNRESERVED /* i */,
+ RSPAMD_URL_UNRESERVED /* j */, RSPAMD_URL_UNRESERVED /* k */,
+ RSPAMD_URL_UNRESERVED /* l */, RSPAMD_URL_UNRESERVED /* m */,
+ RSPAMD_URL_UNRESERVED /* n */, RSPAMD_URL_UNRESERVED /* o */,
+ RSPAMD_URL_UNRESERVED /* p */, RSPAMD_URL_UNRESERVED /* q */,
+ RSPAMD_URL_UNRESERVED /* r */, RSPAMD_URL_UNRESERVED /* s */,
+ RSPAMD_URL_UNRESERVED /* t */, RSPAMD_URL_UNRESERVED /* u */,
+ RSPAMD_URL_UNRESERVED /* v */, RSPAMD_URL_UNRESERVED /* w */,
+ RSPAMD_URL_UNRESERVED /* x */, RSPAMD_URL_UNRESERVED /* y */,
+ RSPAMD_URL_UNRESERVED /* z */, 0 /* { */, 0 /* | */, 0 /* } */,
+ RSPAMD_URL_UNRESERVED /* ~ */, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+#define CHECK_URL_COMPONENT(beg, len, flags) \
+ do { \
+ for (i = 0; i < (len); i++) { \
+ if ((rspamd_url_encoding_classes[(guchar) (beg)[i]] & (flags)) == 0) { \
+ dlen += 2; \
+ } \
+ } \
+ } while (0)
+
+#define ENCODE_URL_COMPONENT(beg, len, flags) \
+ do { \
+ for (i = 0; i < (len) && dend > d; i++) { \
+ if ((rspamd_url_encoding_classes[(guchar) (beg)[i]] & (flags)) == 0) { \
+ *d++ = '%'; \
+ *d++ = hexdigests[(guchar) ((beg)[i] >> 4) & 0xf]; \
+ *d++ = hexdigests[(guchar) (beg)[i] & 0xf]; \
+ } \
+ else { \
+ *d++ = (beg)[i]; \
+ } \
+ } \
+ } while (0)
+
+const gchar *
+rspamd_url_encode(struct rspamd_url *url, gsize *pdlen,
+ rspamd_mempool_t *pool)
+{
+ guchar *dest, *d, *dend;
+ static const gchar hexdigests[16] = "0123456789ABCDEF";
+ guint i;
+ gsize dlen = 0;
+
+ g_assert(pdlen != NULL && url != NULL && pool != NULL);
+
+ CHECK_URL_COMPONENT(rspamd_url_host_unsafe(url), url->hostlen,
+ RSPAMD_URL_FLAGS_HOSTSAFE);
+ CHECK_URL_COMPONENT(rspamd_url_user_unsafe(url), url->userlen,
+ RSPAMD_URL_FLAGS_USERSAFE);
+ CHECK_URL_COMPONENT(rspamd_url_data_unsafe(url), url->datalen,
+ RSPAMD_URL_FLAGS_PATHSAFE);
+ CHECK_URL_COMPONENT(rspamd_url_query_unsafe(url), url->querylen,
+ RSPAMD_URL_FLAGS_QUERYSAFE);
+ CHECK_URL_COMPONENT(rspamd_url_fragment_unsafe(url), url->fragmentlen,
+ RSPAMD_URL_FLAGS_FRAGMENTSAFE);
+
+ if (dlen == 0) {
+ *pdlen = url->urllen;
+
+ return url->string;
+ }
+
+ /* Need to encode */
+ dlen += url->urllen + sizeof("telephone://"); /* Protocol hack */
+ dest = rspamd_mempool_alloc(pool, dlen + 1);
+ d = dest;
+ dend = d + dlen;
+
+ if (url->protocollen > 0) {
+ if (!(url->protocol & PROTOCOL_UNKNOWN)) {
+ const gchar *known_proto = rspamd_url_protocol_name(url->protocol);
+ d += rspamd_snprintf((gchar *) d, dend - d,
+ "%s://",
+ known_proto);
+ }
+ else {
+ d += rspamd_snprintf((gchar *) d, dend - d,
+ "%*s://",
+ (gint) url->protocollen, url->string);
+ }
+ }
+ else {
+ d += rspamd_snprintf((gchar *) d, dend - d, "http://");
+ }
+
+ if (url->userlen > 0) {
+ ENCODE_URL_COMPONENT(rspamd_url_user_unsafe(url), url->userlen,
+ RSPAMD_URL_FLAGS_USERSAFE);
+ *d++ = '@';
+ }
+
+ ENCODE_URL_COMPONENT(rspamd_url_host_unsafe(url), url->hostlen,
+ RSPAMD_URL_FLAGS_HOSTSAFE);
+
+ if (url->datalen > 0) {
+ *d++ = '/';
+ ENCODE_URL_COMPONENT(rspamd_url_data_unsafe(url), url->datalen,
+ RSPAMD_URL_FLAGS_PATHSAFE);
+ }
+
+ if (url->querylen > 0) {
+ *d++ = '?';
+ ENCODE_URL_COMPONENT(rspamd_url_query_unsafe(url), url->querylen,
+ RSPAMD_URL_FLAGS_QUERYSAFE);
+ }
+
+ if (url->fragmentlen > 0) {
+ *d++ = '#';
+ ENCODE_URL_COMPONENT(rspamd_url_fragment_unsafe(url), url->fragmentlen,
+ RSPAMD_URL_FLAGS_FRAGMENTSAFE);
+ }
+
+ *pdlen = (d - dest);
+
+ return (const gchar *) dest;
+}
+
+gboolean
+rspamd_url_is_domain(int c)
+{
+ return is_domain((guchar) c);
+}
+
+const gchar *
+rspamd_url_protocol_name(enum rspamd_url_protocol proto)
+{
+ const gchar *ret = "unknown";
+
+ switch (proto) {
+ case PROTOCOL_HTTP:
+ ret = "http";
+ break;
+ case PROTOCOL_HTTPS:
+ ret = "https";
+ break;
+ case PROTOCOL_FTP:
+ ret = "ftp";
+ break;
+ case PROTOCOL_FILE:
+ ret = "file";
+ break;
+ case PROTOCOL_MAILTO:
+ ret = "mailto";
+ break;
+ case PROTOCOL_TELEPHONE:
+ ret = "telephone";
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+enum rspamd_url_protocol
+rspamd_url_protocol_from_string(const gchar *str)
+{
+ enum rspamd_url_protocol ret = PROTOCOL_UNKNOWN;
+
+ if (strcmp(str, "http") == 0) {
+ ret = PROTOCOL_HTTP;
+ }
+ else if (strcmp(str, "https") == 0) {
+ ret = PROTOCOL_HTTPS;
+ }
+ else if (strcmp(str, "mailto") == 0) {
+ ret = PROTOCOL_MAILTO;
+ }
+ else if (strcmp(str, "ftp") == 0) {
+ ret = PROTOCOL_FTP;
+ }
+ else if (strcmp(str, "file") == 0) {
+ ret = PROTOCOL_FILE;
+ }
+ else if (strcmp(str, "telephone") == 0) {
+ ret = PROTOCOL_TELEPHONE;
+ }
+
+ return ret;
+}
+
+
+bool rspamd_url_set_add_or_increase(khash_t(rspamd_url_hash) * set,
+ struct rspamd_url *u,
+ bool enforce_replace)
+{
+ khiter_t k;
+ gint r;
+
+ k = kh_get(rspamd_url_hash, set, u);
+
+ if (k != kh_end(set)) {
+ /* Existing url */
+ struct rspamd_url *ex = kh_key(set, k);
+#define SUSPICIOUS_URL_FLAGS (RSPAMD_URL_FLAG_PHISHED | RSPAMD_URL_FLAG_OBSCURED | RSPAMD_URL_FLAG_ZW_SPACES)
+ if (enforce_replace) {
+ kh_key(set, k) = u;
+ u->count++;
+ }
+ else {
+ if (u->flags & SUSPICIOUS_URL_FLAGS) {
+ if (!(ex->flags & SUSPICIOUS_URL_FLAGS)) {
+ /* Propagate new url to an old one */
+ kh_key(set, k) = u;
+ u->count++;
+ }
+ else {
+ ex->count++;
+ }
+ }
+ else {
+ ex->count++;
+ }
+ }
+
+ return false;
+ }
+ else {
+ k = kh_put(rspamd_url_hash, set, u, &r);
+ }
+
+ return true;
+}
+
+struct rspamd_url *
+rspamd_url_set_add_or_return(khash_t(rspamd_url_hash) * set,
+ struct rspamd_url *u)
+{
+ khiter_t k;
+ gint r;
+
+ if (set) {
+ k = kh_get(rspamd_url_hash, set, u);
+
+ if (k != kh_end(set)) {
+ return kh_key(set, k);
+ }
+ else {
+ k = kh_put(rspamd_url_hash, set, u, &r);
+
+ return kh_key(set, k);
+ }
+ }
+
+ return NULL;
+}
+
+bool rspamd_url_host_set_add(khash_t(rspamd_url_host_hash) * set,
+ struct rspamd_url *u)
+{
+ gint r;
+
+ if (set) {
+ kh_put(rspamd_url_host_hash, set, u, &r);
+
+ if (r == 0) {
+ return false;
+ }
+
+ return true;
+ }
+
+ return false;
+}
+
+bool rspamd_url_set_has(khash_t(rspamd_url_hash) * set, struct rspamd_url *u)
+{
+ khiter_t k;
+
+ if (set) {
+ k = kh_get(rspamd_url_hash, set, u);
+
+ if (k == kh_end(set)) {
+ return false;
+ }
+
+ return true;
+ }
+
+ return false;
+}
+
+bool rspamd_url_host_set_has(khash_t(rspamd_url_host_hash) * set, struct rspamd_url *u)
+{
+ khiter_t k;
+
+ if (set) {
+ k = kh_get(rspamd_url_host_hash, set, u);
+
+ if (k == kh_end(set)) {
+ return false;
+ }
+
+ return true;
+ }
+
+ return false;
+}
+
+bool rspamd_url_flag_from_string(const gchar *str, gint *flag)
+{
+ gint h = rspamd_cryptobox_fast_hash_specific(RSPAMD_CRYPTOBOX_HASHFAST_INDEPENDENT,
+ str, strlen(str), 0);
+
+ for (int i = 0; i < G_N_ELEMENTS(url_flag_names); i++) {
+ if (url_flag_names[i].hash == h) {
+ *flag |= url_flag_names[i].flag;
+
+ return true;
+ }
+ }
+
+ return false;
+}
+
+
+const gchar *
+rspamd_url_flag_to_string(int flag)
+{
+ for (int i = 0; i < G_N_ELEMENTS(url_flag_names); i++) {
+ if (url_flag_names[i].flag & flag) {
+ return url_flag_names[i].name;
+ }
+ }
+
+ return NULL;
+}
+
+inline int
+rspamd_url_cmp(const struct rspamd_url *u1, const struct rspamd_url *u2)
+{
+ int min_len = MIN(u1->urllen, u2->urllen);
+ int r;
+
+ if (u1->protocol != u2->protocol) {
+ return u1->protocol - u2->protocol;
+ }
+
+ if (u1->protocol & PROTOCOL_MAILTO) {
+ /* Emails specialisation (hosts must be compared in a case insensitive matter */
+ min_len = MIN(u1->hostlen, u2->hostlen);
+
+ if ((r = rspamd_lc_cmp(rspamd_url_host_unsafe(u1),
+ rspamd_url_host_unsafe(u2), min_len)) == 0) {
+ if (u1->hostlen == u2->hostlen) {
+ if (u1->userlen != u2->userlen || u1->userlen == 0) {
+ r = (int) u1->userlen - (int) u2->userlen;
+ }
+ else {
+ r = memcmp(rspamd_url_user_unsafe(u1),
+ rspamd_url_user_unsafe(u2),
+ u1->userlen);
+ }
+ }
+ else {
+ r = u1->hostlen - u2->hostlen;
+ }
+ }
+ }
+ else {
+ if (u1->urllen != u2->urllen) {
+ /* Different length, compare common part and then compare length */
+ r = memcmp(u1->string, u2->string, min_len);
+
+ if (r == 0) {
+ r = u1->urllen - u2->urllen;
+ }
+ }
+ else {
+ /* Equal length */
+ r = memcmp(u1->string, u2->string, u1->urllen);
+ }
+ }
+
+ return r;
+}
+
+int rspamd_url_cmp_qsort(const void *_u1, const void *_u2)
+{
+ const struct rspamd_url *u1 = *(struct rspamd_url **) _u1,
+ *u2 = *(struct rspamd_url **) _u2;
+
+ return rspamd_url_cmp(u1, u2);
+}
diff --git a/src/libserver/url.h b/src/libserver/url.h
new file mode 100644
index 0000000..d1fb8c9
--- /dev/null
+++ b/src/libserver/url.h
@@ -0,0 +1,430 @@
+/* URL check functions */
+#ifndef URL_H
+#define URL_H
+
+#include "config.h"
+#include "mem_pool.h"
+#include "khash.h"
+#include "fstring.h"
+#include "libutil/cxx/utf8_util.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct rspamd_task;
+struct rspamd_mime_text_part;
+
+enum rspamd_url_flags {
+ RSPAMD_URL_FLAG_PHISHED = 1u << 0u,
+ RSPAMD_URL_FLAG_NUMERIC = 1u << 1u,
+ RSPAMD_URL_FLAG_OBSCURED = 1u << 2u,
+ RSPAMD_URL_FLAG_REDIRECTED = 1u << 3u,
+ RSPAMD_URL_FLAG_HTML_DISPLAYED = 1u << 4u,
+ RSPAMD_URL_FLAG_FROM_TEXT = 1u << 5u,
+ RSPAMD_URL_FLAG_SUBJECT = 1u << 6u,
+ RSPAMD_URL_FLAG_HOSTENCODED = 1u << 7u,
+ RSPAMD_URL_FLAG_SCHEMAENCODED = 1u << 8u,
+ RSPAMD_URL_FLAG_PATHENCODED = 1u << 9u,
+ RSPAMD_URL_FLAG_QUERYENCODED = 1u << 10u,
+ RSPAMD_URL_FLAG_MISSINGSLASHES = 1u << 11u,
+ RSPAMD_URL_FLAG_IDN = 1u << 12u,
+ RSPAMD_URL_FLAG_HAS_PORT = 1u << 13u,
+ RSPAMD_URL_FLAG_HAS_USER = 1u << 14u,
+ RSPAMD_URL_FLAG_SCHEMALESS = 1u << 15u,
+ RSPAMD_URL_FLAG_UNNORMALISED = 1u << 16u,
+ RSPAMD_URL_FLAG_ZW_SPACES = 1u << 17u,
+ RSPAMD_URL_FLAG_DISPLAY_URL = 1u << 18u,
+ RSPAMD_URL_FLAG_IMAGE = 1u << 19u,
+ RSPAMD_URL_FLAG_QUERY = 1u << 20u,
+ RSPAMD_URL_FLAG_CONTENT = 1u << 21u,
+ RSPAMD_URL_FLAG_NO_TLD = 1u << 22u,
+ RSPAMD_URL_FLAG_TRUNCATED = 1u << 23u,
+ RSPAMD_URL_FLAG_REDIRECT_TARGET = 1u << 24u,
+ RSPAMD_URL_FLAG_INVISIBLE = 1u << 25u,
+ RSPAMD_URL_FLAG_SPECIAL = 1u << 26u,
+
+};
+#define RSPAMD_URL_MAX_FLAG_SHIFT (26u)
+
+struct rspamd_url_tag {
+ const gchar *data;
+ struct rspamd_url_tag *prev, *next;
+};
+
+struct rspamd_url_ext;
+/**
+ * URL structure
+ */
+struct rspamd_url {
+ char *string;
+ char *raw;
+ struct rspamd_url_ext *ext;
+
+ uint32_t flags;
+
+ uint8_t protocol;
+ uint8_t protocollen;
+
+ uint16_t hostshift;
+ uint16_t datashift;
+ uint16_t queryshift;
+ uint16_t fragmentshift;
+ uint16_t tldshift;
+ guint16 usershift;
+ guint16 userlen;
+
+ uint16_t hostlen;
+ uint16_t datalen;
+ uint16_t querylen;
+ uint16_t fragmentlen;
+ uint16_t tldlen;
+ uint16_t count;
+ uint16_t urllen;
+ uint16_t rawlen;
+
+ /* Absolute order of the URL in a message */
+ uint16_t order;
+ /* Order of the URL in a specific part of message */
+ uint16_t part_order;
+};
+
+/**
+ * Rarely used url fields
+ */
+struct rspamd_url_ext {
+ gchar *visible_part;
+ struct rspamd_url *linked_url;
+
+ guint16 port;
+};
+
+#define rspamd_url_user(u) ((u)->userlen > 0 ? (u)->string + (u)->usershift : NULL)
+#define rspamd_url_user_unsafe(u) ((u)->string + (u)->usershift)
+
+#define rspamd_url_host(u) ((u)->hostlen > 0 ? (u)->string + (u)->hostshift : NULL)
+#define rspamd_url_host_unsafe(u) ((u)->string + (u)->hostshift)
+#define rspamd_url_tld_unsafe(u) ((u)->string + (u)->tldshift)
+
+#define rspamd_url_data_unsafe(u) ((u)->string + (u)->datashift)
+#define rspamd_url_query_unsafe(u) ((u)->string + (u)->queryshift)
+#define rspamd_url_fragment_unsafe(u) ((u)->string + (u)->fragmentshift)
+
+enum uri_errno {
+ URI_ERRNO_OK = 0, /* Parsing went well */
+ URI_ERRNO_EMPTY, /* The URI string was empty */
+ URI_ERRNO_INVALID_PROTOCOL, /* No protocol was found */
+ URI_ERRNO_INVALID_PORT, /* Port number is bad */
+ URI_ERRNO_BAD_ENCODING, /* Bad characters encoding */
+ URI_ERRNO_BAD_FORMAT,
+ URI_ERRNO_TLD_MISSING,
+ URI_ERRNO_HOST_MISSING,
+ URI_ERRNO_TOO_LONG,
+};
+
+enum rspamd_url_protocol {
+ PROTOCOL_FILE = 1u << 0u,
+ PROTOCOL_FTP = 1u << 1u,
+ PROTOCOL_HTTP = 1u << 2u,
+ PROTOCOL_HTTPS = 1u << 3u,
+ PROTOCOL_MAILTO = 1u << 4u,
+ PROTOCOL_TELEPHONE = 1u << 5u,
+ PROTOCOL_UNKNOWN = 1u << 7u,
+};
+
+enum rspamd_url_parse_flags {
+ RSPAMD_URL_PARSE_TEXT = 0u,
+ RSPAMD_URL_PARSE_HREF = (1u << 0u),
+ RSPAMD_URL_PARSE_CHECK = (1u << 1u),
+};
+
+enum rspamd_url_find_type {
+ RSPAMD_URL_FIND_ALL = 0,
+ RSPAMD_URL_FIND_STRICT,
+};
+
+/**
+ * Initialize url library
+ * @param cfg
+ */
+void rspamd_url_init(const gchar *tld_file);
+
+void rspamd_url_deinit(void);
+
+/*
+ * Parse urls inside text
+ * @param pool memory pool
+ * @param task task object
+ * @param part current text part
+ * @param is_html turn on html heuristic
+ */
+void rspamd_url_text_extract(rspamd_mempool_t *pool,
+ struct rspamd_task *task,
+ struct rspamd_mime_text_part *part,
+ uint16_t *cur_order,
+ enum rspamd_url_find_type how);
+
+/*
+ * Parse a single url into an uri structure
+ * @param pool memory pool
+ * @param uristring text form of url
+ * @param uri url object, must be pre allocated
+ */
+enum uri_errno rspamd_url_parse(struct rspamd_url *uri,
+ gchar *uristring,
+ gsize len,
+ rspamd_mempool_t *pool,
+ enum rspamd_url_parse_flags flags);
+
+/*
+ * Try to extract url from a text
+ * @param pool memory pool
+ * @param begin begin of text
+ * @param len length of text
+ * @param start storage for start position of url found (or NULL)
+ * @param end storage for end position of url found (or NULL)
+ * @param url_str storage for url string(or NULL)
+ * @return TRUE if url is found in specified text
+ */
+gboolean rspamd_url_find(rspamd_mempool_t *pool,
+ const gchar *begin, gsize len,
+ gchar **url_str,
+ enum rspamd_url_find_type how,
+ goffset *url_pos,
+ gboolean *prefix_added);
+
+/*
+ * Return text representation of url parsing error
+ */
+const gchar *rspamd_url_strerror(int err);
+
+
+/**
+ * Find TLD for a specified host string
+ * @param in input host
+ * @param inlen length of input
+ * @param out output rspamd_ftok_t with tld position
+ * @return TRUE if tld has been found
+ */
+gboolean rspamd_url_find_tld(const gchar *in, gsize inlen, rspamd_ftok_t *out);
+
+typedef gboolean (*url_insert_function)(struct rspamd_url *url,
+ gsize start_offset, gsize end_offset, void *ud);
+
+/**
+ * Search for multiple urls in text and call `func` for each url found
+ * @param pool
+ * @param in
+ * @param inlen
+ * @param is_html
+ * @param func
+ * @param ud
+ */
+void rspamd_url_find_multiple(rspamd_mempool_t *pool,
+ const gchar *in, gsize inlen,
+ enum rspamd_url_find_type how,
+ GPtrArray *nlines,
+ url_insert_function func,
+ gpointer ud);
+
+/**
+ * Search for a single url in text and call `func` for each url found
+ * @param pool
+ * @param in
+ * @param inlen
+ * @param is_html
+ * @param func
+ * @param ud
+ */
+void rspamd_url_find_single(rspamd_mempool_t *pool,
+ const gchar *in, gsize inlen,
+ enum rspamd_url_find_type how,
+ url_insert_function func,
+ gpointer ud);
+
+/**
+ * Generic callback to insert URLs into rspamd_task
+ * @param url
+ * @param start_offset
+ * @param end_offset
+ * @param ud
+ */
+gboolean rspamd_url_task_subject_callback(struct rspamd_url *url,
+ gsize start_offset,
+ gsize end_offset, gpointer ud);
+
+/**
+ * Decode URL encoded string in-place and return new length of a string, src and dst are NULL terminated
+ * @param dst
+ * @param src
+ * @param size
+ * @return
+ */
+gsize rspamd_url_decode(gchar *dst, const gchar *src, gsize size);
+
+/**
+ * Encode url if needed. In this case, memory is allocated from the specific pool.
+ * Returns pointer to begin and encoded length in `dlen`
+ * @param url
+ * @param pool
+ * @return
+ */
+const gchar *rspamd_url_encode(struct rspamd_url *url, gsize *dlen,
+ rspamd_mempool_t *pool);
+
+
+/**
+ * Returns if a character is domain character
+ * @param c
+ * @return
+ */
+gboolean rspamd_url_is_domain(int c);
+
+/**
+ * Returns symbolic name for protocol
+ * @param proto
+ * @return
+ */
+const gchar *rspamd_url_protocol_name(enum rspamd_url_protocol proto);
+
+
+/**
+ * Converts string to a numeric protocol
+ * @param str
+ * @return
+ */
+enum rspamd_url_protocol rspamd_url_protocol_from_string(const gchar *str);
+
+/**
+ * Converts string to a url flag
+ * @param str
+ * @param flag
+ * @return
+ */
+bool rspamd_url_flag_from_string(const gchar *str, gint *flag);
+
+/**
+ * Converts url flag to a string
+ * @param flag
+ * @return
+ */
+const gchar *rspamd_url_flag_to_string(int flag);
+
+/* Defines sets of urls indexed by url as is */
+KHASH_DECLARE(rspamd_url_hash, struct rspamd_url *, char);
+KHASH_DECLARE(rspamd_url_host_hash, struct rspamd_url *, char);
+
+/* Convenience functions for url sets */
+/**
+ * Add an url to set or increase the existing url count
+ * @param set
+ * @param u
+ * @return true if a new url has been added
+ */
+bool rspamd_url_set_add_or_increase(khash_t(rspamd_url_hash) * set,
+ struct rspamd_url *u,
+ bool enforce_replace);
+
+/**
+ * Same as rspamd_url_set_add_or_increase but returns the existing url if found
+ * @param set
+ * @param u
+ * @return
+ */
+struct rspamd_url *rspamd_url_set_add_or_return(khash_t(rspamd_url_hash) * set,
+ struct rspamd_url *u);
+/**
+ * Helper for url host set
+ * @param set
+ * @param u
+ * @return
+ */
+bool rspamd_url_host_set_add(khash_t(rspamd_url_host_hash) * set,
+ struct rspamd_url *u);
+/**
+ * Checks if a url is in set
+ * @param set
+ * @param u
+ * @return
+ */
+bool rspamd_url_set_has(khash_t(rspamd_url_hash) * set, struct rspamd_url *u);
+
+bool rspamd_url_host_set_has(khash_t(rspamd_url_host_hash) * set, struct rspamd_url *u);
+
+/**
+ * Compares two urls (similar to C comparison functions) lexicographically
+ * @param u1
+ * @param u2
+ * @return
+ */
+int rspamd_url_cmp(const struct rspamd_url *u1, const struct rspamd_url *u2);
+
+/**
+ * Same but used for qsort to sort `struct rspamd_url *[]` array
+ * @param u1
+ * @param u2
+ * @return
+ */
+int rspamd_url_cmp_qsort(const void *u1, const void *u2);
+
+/**
+ * Returns a port for some url
+ * @param u
+ * @return
+ */
+static RSPAMD_PURE_FUNCTION inline uint16_t rspamd_url_get_port(struct rspamd_url *u)
+{
+ if ((u->flags & RSPAMD_URL_FLAG_HAS_PORT) && u->ext) {
+ return u->ext->port;
+ }
+ else {
+ /* Assume standard port */
+ if (u->protocol == PROTOCOL_HTTPS) {
+ return 443;
+ }
+ else {
+ return 80;
+ }
+ }
+}
+
+/**
+ * Returns a port for some url if it is set
+ * @param u
+ * @return
+ */
+static RSPAMD_PURE_FUNCTION inline uint16_t rspamd_url_get_port_if_special(struct rspamd_url *u)
+{
+ if ((u->flags & RSPAMD_URL_FLAG_HAS_PORT) && u->ext) {
+ return u->ext->port;
+ }
+
+ return 0;
+}
+
+/**
+ * Normalize unicode input and set out url flags as appropriate
+ * @param pool
+ * @param input
+ * @param len_out (must be &var)
+ * @param url_flags_out (must be just a var with no dereference)
+ */
+#define rspamd_url_normalise_propagate_flags(pool, input, len_out, url_flags_out) \
+ do { \
+ enum rspamd_utf8_normalise_result norm_res; \
+ norm_res = rspamd_normalise_unicode_inplace((input), (len_out)); \
+ if (norm_res & RSPAMD_UNICODE_NORM_UNNORMAL) { \
+ url_flags_out |= RSPAMD_URL_FLAG_UNNORMALISED; \
+ } \
+ if (norm_res & RSPAMD_UNICODE_NORM_ZERO_SPACES) { \
+ url_flags_out |= RSPAMD_URL_FLAG_ZW_SPACES; \
+ } \
+ if (norm_res & (RSPAMD_UNICODE_NORM_ERROR)) { \
+ url_flags_out |= RSPAMD_URL_FLAG_OBSCURED; \
+ } \
+ } while (0)
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/libserver/worker_util.c b/src/libserver/worker_util.c
new file mode 100644
index 0000000..74a3cf8
--- /dev/null
+++ b/src/libserver/worker_util.c
@@ -0,0 +1,2313 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "config.h"
+#include "rspamd.h"
+#include "lua/lua_common.h"
+#include "worker_util.h"
+#include "unix-std.h"
+#include "utlist.h"
+#include "ottery.h"
+#include "rspamd_control.h"
+#include "libserver/maps/map.h"
+#include "libserver/maps/map_private.h"
+#include "libserver/http/http_private.h"
+#include "libserver/http/http_router.h"
+#include "libutil/rrd.h"
+
+/* sys/resource.h */
+#ifdef HAVE_SYS_RESOURCE_H
+#include <sys/resource.h>
+#endif
+/* pwd and grp */
+#ifdef HAVE_PWD_H
+#include <pwd.h>
+#endif
+#ifdef HAVE_GRP_H
+#include <grp.h>
+#endif
+#ifdef HAVE_LIBUTIL_H
+#include <libutil.h>
+#endif
+#include "zlib.h"
+
+#ifdef HAVE_UCONTEXT_H
+#include <ucontext.h>
+#elif defined(HAVE_SYS_UCONTEXT_H)
+#include <sys/ucontext.h>
+#endif
+
+#ifdef HAVE_SYS_WAIT_H
+#include <sys/wait.h>
+#include <math.h>
+
+#endif
+
+#include "contrib/libev/ev.h"
+#include "libstat/stat_api.h"
+
+struct rspamd_worker *rspamd_current_worker = NULL;
+
+/* Forward declaration */
+static void rspamd_worker_heartbeat_start(struct rspamd_worker *,
+ struct ev_loop *);
+
+static void rspamd_worker_ignore_signal(struct rspamd_worker_signal_handler *);
+/**
+ * Return worker's control structure by its type
+ * @param type
+ * @return worker's control structure or NULL
+ */
+worker_t *
+rspamd_get_worker_by_type(struct rspamd_config *cfg, GQuark type)
+{
+ worker_t **pwrk;
+
+ pwrk = cfg->compiled_workers;
+ while (pwrk && *pwrk) {
+ if (rspamd_check_worker(cfg, *pwrk)) {
+ if (g_quark_from_string((*pwrk)->name) == type) {
+ return *pwrk;
+ }
+ }
+
+ pwrk++;
+ }
+
+ return NULL;
+}
+
+static void
+rspamd_worker_check_finished(EV_P_ ev_timer *w, int revents)
+{
+ int *pnchecks = (int *) w->data;
+
+ if (*pnchecks > SOFT_SHUTDOWN_TIME * 10) {
+ msg_warn("terminating worker before finishing of terminate handlers");
+ ev_break(EV_A_ EVBREAK_ONE);
+ }
+ else {
+ int refcount = ev_active_cnt(EV_A);
+
+ if (refcount == 1) {
+ ev_break(EV_A_ EVBREAK_ONE);
+ }
+ else {
+ ev_timer_again(EV_A_ w);
+ }
+ }
+}
+
+static gboolean
+rspamd_worker_finalize(gpointer user_data)
+{
+ struct rspamd_task *task = user_data;
+
+ if (!(task->flags & RSPAMD_TASK_FLAG_PROCESSING)) {
+ msg_info_task("finishing actions has been processed, terminating");
+ /* ev_break (task->event_loop, EVBREAK_ALL); */
+ task->worker->state = rspamd_worker_wanna_die;
+ rspamd_session_destroy(task->s);
+
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+gboolean
+rspamd_worker_call_finish_handlers(struct rspamd_worker *worker)
+{
+ struct rspamd_task *task;
+ struct rspamd_config *cfg = worker->srv->cfg;
+ struct rspamd_abstract_worker_ctx *ctx;
+ struct rspamd_config_cfg_lua_script *sc;
+
+ if (cfg->on_term_scripts) {
+ ctx = (struct rspamd_abstract_worker_ctx *) worker->ctx;
+ /* Create a fake task object for async events */
+ task = rspamd_task_new(worker, cfg, NULL, NULL, ctx->event_loop, FALSE);
+ task->resolver = ctx->resolver;
+ task->flags |= RSPAMD_TASK_FLAG_PROCESSING;
+ task->s = rspamd_session_create(task->task_pool,
+ rspamd_worker_finalize,
+ NULL,
+ (event_finalizer_t) rspamd_task_free,
+ task);
+
+ DL_FOREACH(cfg->on_term_scripts, sc)
+ {
+ lua_call_finish_script(sc, task);
+ }
+
+ task->flags &= ~RSPAMD_TASK_FLAG_PROCESSING;
+
+ if (rspamd_session_pending(task->s)) {
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+static void
+rspamd_worker_terminate_handlers(struct rspamd_worker *w)
+{
+ if (w->nconns == 0 &&
+ (!(w->flags & RSPAMD_WORKER_SCANNER) || w->srv->cfg->on_term_scripts == NULL)) {
+ /*
+ * We are here either:
+ * - No active connections are represented
+ * - No term scripts are registered
+ * - Worker is not a scanner, so it can die safely
+ */
+ w->state = rspamd_worker_wanna_die;
+ }
+ else {
+ if (w->nconns > 0) {
+ /*
+ * Wait until all connections are terminated
+ */
+ w->state = rspamd_worker_wait_connections;
+ }
+ else {
+ /*
+ * Start finish scripts
+ */
+ if (w->state != rspamd_worker_wait_final_scripts) {
+ w->state = rspamd_worker_wait_final_scripts;
+
+ if ((w->flags & RSPAMD_WORKER_SCANNER) &&
+ rspamd_worker_call_finish_handlers(w)) {
+ msg_info("performing async finishing actions");
+ w->state = rspamd_worker_wait_final_scripts;
+ }
+ else {
+ /*
+ * We are done now
+ */
+ msg_info("no async finishing actions, terminating");
+ w->state = rspamd_worker_wanna_die;
+ }
+ }
+ }
+ }
+}
+
+static void
+rspamd_worker_on_delayed_shutdown(EV_P_ ev_timer *w, int revents)
+{
+ struct rspamd_worker *worker = (struct rspamd_worker *) w->data;
+
+ worker->state = rspamd_worker_wanna_die;
+ ev_timer_stop(EV_A_ w);
+ ev_break(loop, EVBREAK_ALL);
+}
+
+static void
+rspamd_worker_shutdown_check(EV_P_ ev_timer *w, int revents)
+{
+ struct rspamd_worker *worker = (struct rspamd_worker *) w->data;
+
+ if (worker->state != rspamd_worker_wanna_die) {
+ rspamd_worker_terminate_handlers(worker);
+
+ if (worker->state == rspamd_worker_wanna_die) {
+ /* We are done, kill event loop */
+ ev_timer_stop(EV_A_ w);
+ ev_break(EV_A_ EVBREAK_ALL);
+ }
+ else {
+ /* Try again later */
+ ev_timer_again(EV_A_ w);
+ }
+ }
+ else {
+ ev_timer_stop(EV_A_ w);
+ ev_break(EV_A_ EVBREAK_ALL);
+ }
+}
+
+/*
+ * Config reload is designed by sending sigusr2 to active workers and pending shutdown of them
+ */
+static gboolean
+rspamd_worker_usr2_handler(struct rspamd_worker_signal_handler *sigh, void *arg)
+{
+ /* Do not accept new connections, preparing to end worker's process */
+ if (sigh->worker->state == rspamd_worker_state_running) {
+ static ev_timer shutdown_ev, shutdown_check_ev;
+ ev_tstamp shutdown_ts;
+
+ if (sigh->worker->flags & RSPAMD_WORKER_NO_TERMINATE_DELAY) {
+ shutdown_ts = 0.0;
+ }
+ else {
+ shutdown_ts = MAX(SOFT_SHUTDOWN_TIME,
+ sigh->worker->srv->cfg->task_timeout * 2.0);
+ }
+
+ rspamd_worker_ignore_signal(sigh);
+ sigh->worker->state = rspamd_worker_state_terminating;
+
+ rspamd_default_log_function(G_LOG_LEVEL_INFO,
+ sigh->worker->srv->server_pool->tag.tagname,
+ sigh->worker->srv->server_pool->tag.uid,
+ G_STRFUNC,
+ "worker's shutdown is pending in %.2f sec",
+ shutdown_ts);
+
+ /* Soft shutdown timer */
+ shutdown_ev.data = sigh->worker;
+ ev_timer_init(&shutdown_ev, rspamd_worker_on_delayed_shutdown,
+ shutdown_ts, 0.0);
+ ev_timer_start(sigh->event_loop, &shutdown_ev);
+
+ if (!(sigh->worker->flags & RSPAMD_WORKER_NO_TERMINATE_DELAY)) {
+ /* This timer checks if we are ready to die and is called frequently */
+ shutdown_check_ev.data = sigh->worker;
+ ev_timer_init(&shutdown_check_ev, rspamd_worker_shutdown_check,
+ 0.5, 0.5);
+ ev_timer_start(sigh->event_loop, &shutdown_check_ev);
+ }
+
+ rspamd_worker_stop_accept(sigh->worker);
+ }
+
+ /* No more signals */
+ return FALSE;
+}
+
+/*
+ * Reopen log is designed by sending sigusr1 to active workers and pending shutdown of them
+ */
+static gboolean
+rspamd_worker_usr1_handler(struct rspamd_worker_signal_handler *sigh, void *arg)
+{
+ struct rspamd_main *rspamd_main = sigh->worker->srv;
+
+ rspamd_log_reopen(sigh->worker->srv->logger, rspamd_main->cfg, -1, -1);
+ msg_info_main("logging reinitialised");
+
+ /* Get more signals */
+ return TRUE;
+}
+
+static gboolean
+rspamd_worker_term_handler(struct rspamd_worker_signal_handler *sigh, void *arg)
+{
+ if (sigh->worker->state == rspamd_worker_state_running) {
+ static ev_timer shutdown_ev, shutdown_check_ev;
+ ev_tstamp shutdown_ts;
+
+ if (sigh->worker->flags & RSPAMD_WORKER_NO_TERMINATE_DELAY) {
+ shutdown_ts = 0.0;
+ }
+ else {
+ shutdown_ts = MAX(SOFT_SHUTDOWN_TIME,
+ sigh->worker->srv->cfg->task_timeout * 2.0);
+ }
+
+ rspamd_worker_ignore_signal(sigh);
+ sigh->worker->state = rspamd_worker_state_terminating;
+ rspamd_default_log_function(G_LOG_LEVEL_INFO,
+ sigh->worker->srv->server_pool->tag.tagname,
+ sigh->worker->srv->server_pool->tag.uid,
+ G_STRFUNC,
+ "terminating after receiving signal %s",
+ g_strsignal(sigh->signo));
+
+ rspamd_worker_stop_accept(sigh->worker);
+ rspamd_worker_terminate_handlers(sigh->worker);
+
+ /* Check if we are ready to die */
+ if (sigh->worker->state != rspamd_worker_wanna_die) {
+ /* This timer is called when we have no choices but to die */
+ shutdown_ev.data = sigh->worker;
+ ev_timer_init(&shutdown_ev, rspamd_worker_on_delayed_shutdown,
+ shutdown_ts, 0.0);
+ ev_timer_start(sigh->event_loop, &shutdown_ev);
+
+ if (!(sigh->worker->flags & RSPAMD_WORKER_NO_TERMINATE_DELAY)) {
+ /* This timer checks if we are ready to die and is called frequently */
+ shutdown_check_ev.data = sigh->worker;
+ ev_timer_init(&shutdown_check_ev, rspamd_worker_shutdown_check,
+ 0.5, 0.5);
+ ev_timer_start(sigh->event_loop, &shutdown_check_ev);
+ }
+ }
+ else {
+ /* Flag to die has been already set */
+ ev_break(sigh->event_loop, EVBREAK_ALL);
+ }
+ }
+
+ /* Stop reacting on signals */
+ return FALSE;
+}
+
+static void
+rspamd_worker_signal_handle(EV_P_ ev_signal *w, int revents)
+{
+ struct rspamd_worker_signal_handler *sigh =
+ (struct rspamd_worker_signal_handler *) w->data;
+ struct rspamd_worker_signal_handler_elt *cb, *cbtmp;
+
+ /* Call all signal handlers registered */
+ DL_FOREACH_SAFE(sigh->cb, cb, cbtmp)
+ {
+ if (!cb->handler(sigh, cb->handler_data)) {
+ DL_DELETE(sigh->cb, cb);
+ g_free(cb);
+ }
+ }
+}
+
+static void
+rspamd_worker_ignore_signal(struct rspamd_worker_signal_handler *sigh)
+{
+ sigset_t set;
+
+ ev_signal_stop(sigh->event_loop, &sigh->ev_sig);
+ sigemptyset(&set);
+ sigaddset(&set, sigh->signo);
+ sigprocmask(SIG_BLOCK, &set, NULL);
+}
+
+static void
+rspamd_worker_default_signal(int signo)
+{
+ struct sigaction sig;
+
+ sigemptyset(&sig.sa_mask);
+ sigaddset(&sig.sa_mask, signo);
+ sig.sa_handler = SIG_DFL;
+ sig.sa_flags = 0;
+ sigaction(signo, &sig, NULL);
+}
+
+static void
+rspamd_sigh_free(void *p)
+{
+ struct rspamd_worker_signal_handler *sigh = p;
+ struct rspamd_worker_signal_handler_elt *cb, *tmp;
+
+ DL_FOREACH_SAFE(sigh->cb, cb, tmp)
+ {
+ DL_DELETE(sigh->cb, cb);
+ g_free(cb);
+ }
+
+ ev_signal_stop(sigh->event_loop, &sigh->ev_sig);
+ rspamd_worker_default_signal(sigh->signo);
+ g_free(sigh);
+}
+
+void rspamd_worker_set_signal_handler(int signo, struct rspamd_worker *worker,
+ struct ev_loop *event_loop,
+ rspamd_worker_signal_cb_t handler,
+ void *handler_data)
+{
+ struct rspamd_worker_signal_handler *sigh;
+ struct rspamd_worker_signal_handler_elt *cb;
+
+ sigh = g_hash_table_lookup(worker->signal_events, GINT_TO_POINTER(signo));
+
+ if (sigh == NULL) {
+ sigh = g_malloc0(sizeof(*sigh));
+ sigh->signo = signo;
+ sigh->worker = worker;
+ sigh->event_loop = event_loop;
+ sigh->enabled = TRUE;
+
+ sigh->ev_sig.data = sigh;
+ ev_signal_init(&sigh->ev_sig, rspamd_worker_signal_handle, signo);
+ ev_signal_start(event_loop, &sigh->ev_sig);
+
+ g_hash_table_insert(worker->signal_events,
+ GINT_TO_POINTER(signo),
+ sigh);
+ }
+
+ cb = g_malloc0(sizeof(*cb));
+ cb->handler = handler;
+ cb->handler_data = handler_data;
+ DL_APPEND(sigh->cb, cb);
+}
+
+void rspamd_worker_init_signals(struct rspamd_worker *worker,
+ struct ev_loop *event_loop)
+{
+ /* A set of terminating signals */
+ rspamd_worker_set_signal_handler(SIGTERM, worker, event_loop,
+ rspamd_worker_term_handler, NULL);
+ rspamd_worker_set_signal_handler(SIGINT, worker, event_loop,
+ rspamd_worker_term_handler, NULL);
+ rspamd_worker_set_signal_handler(SIGHUP, worker, event_loop,
+ rspamd_worker_term_handler, NULL);
+
+ /* Special purpose signals */
+ rspamd_worker_set_signal_handler(SIGUSR1, worker, event_loop,
+ rspamd_worker_usr1_handler, NULL);
+ rspamd_worker_set_signal_handler(SIGUSR2, worker, event_loop,
+ rspamd_worker_usr2_handler, NULL);
+}
+
+
+struct ev_loop *
+rspamd_prepare_worker(struct rspamd_worker *worker, const char *name,
+ rspamd_accept_handler hdl)
+{
+ struct ev_loop *event_loop;
+ GList *cur;
+ struct rspamd_worker_listen_socket *ls;
+ struct rspamd_worker_accept_event *accept_ev;
+
+ worker->signal_events = g_hash_table_new_full(g_direct_hash, g_direct_equal,
+ NULL, rspamd_sigh_free);
+
+ event_loop = ev_loop_new(rspamd_config_ev_backend_get(worker->srv->cfg));
+
+ worker->srv->event_loop = event_loop;
+
+ rspamd_worker_init_signals(worker, event_loop);
+ rspamd_control_worker_add_default_cmd_handlers(worker, event_loop);
+ rspamd_worker_heartbeat_start(worker, event_loop);
+ rspamd_redis_pool_config(worker->srv->cfg->redis_pool,
+ worker->srv->cfg, event_loop);
+
+ /* Accept all sockets */
+ if (hdl) {
+ cur = worker->cf->listen_socks;
+
+ while (cur) {
+ ls = cur->data;
+
+ if (ls->fd != -1) {
+ accept_ev = g_malloc0(sizeof(*accept_ev));
+ accept_ev->event_loop = event_loop;
+ accept_ev->accept_ev.data = worker;
+ ev_io_init(&accept_ev->accept_ev, hdl, ls->fd, EV_READ);
+ ev_io_start(event_loop, &accept_ev->accept_ev);
+
+ DL_APPEND(worker->accept_events, accept_ev);
+ }
+
+ cur = g_list_next(cur);
+ }
+ }
+
+ return event_loop;
+}
+
+void rspamd_worker_stop_accept(struct rspamd_worker *worker)
+{
+ struct rspamd_worker_accept_event *cur, *tmp;
+
+ /* Remove all events */
+ DL_FOREACH_SAFE(worker->accept_events, cur, tmp)
+ {
+
+ if (ev_can_stop(&cur->accept_ev)) {
+ ev_io_stop(cur->event_loop, &cur->accept_ev);
+ }
+
+
+ if (ev_can_stop(&cur->throttling_ev)) {
+ ev_timer_stop(cur->event_loop, &cur->throttling_ev);
+ }
+
+ g_free(cur);
+ }
+
+ /* XXX: we need to do it much later */
+#if 0
+ g_hash_table_iter_init (&it, worker->signal_events);
+
+ while (g_hash_table_iter_next (&it, &k, &v)) {
+ sigh = (struct rspamd_worker_signal_handler *)v;
+ g_hash_table_iter_steal (&it);
+
+ if (sigh->enabled) {
+ event_del (&sigh->ev);
+ }
+
+ g_free (sigh);
+ }
+
+ g_hash_table_unref (worker->signal_events);
+#endif
+}
+
+static rspamd_fstring_t *
+rspamd_controller_maybe_compress(struct rspamd_http_connection_entry *entry,
+ rspamd_fstring_t *buf, struct rspamd_http_message *msg)
+{
+ if (entry->support_gzip) {
+ if (rspamd_fstring_gzip(&buf)) {
+ rspamd_http_message_add_header(msg, "Content-Encoding", "gzip");
+ }
+ }
+
+ return buf;
+}
+
+void rspamd_controller_send_error(struct rspamd_http_connection_entry *entry,
+ gint code, const gchar *error_msg, ...)
+{
+ struct rspamd_http_message *msg;
+ va_list args;
+ rspamd_fstring_t *reply;
+
+ msg = rspamd_http_new_message(HTTP_RESPONSE);
+
+ va_start(args, error_msg);
+ msg->status = rspamd_fstring_new();
+ rspamd_vprintf_fstring(&msg->status, error_msg, args);
+ va_end(args);
+
+ msg->date = time(NULL);
+ msg->code = code;
+ reply = rspamd_fstring_sized_new(msg->status->len + 16);
+ rspamd_printf_fstring(&reply, "{\"error\":\"%V\"}", msg->status);
+ rspamd_http_message_set_body_from_fstring_steal(msg,
+ rspamd_controller_maybe_compress(entry, reply, msg));
+ rspamd_http_connection_reset(entry->conn);
+ rspamd_http_router_insert_headers(entry->rt, msg);
+ rspamd_http_connection_write_message(entry->conn,
+ msg,
+ NULL,
+ "application/json",
+ entry,
+ entry->rt->timeout);
+ entry->is_reply = TRUE;
+}
+
+void rspamd_controller_send_openmetrics(struct rspamd_http_connection_entry *entry,
+ rspamd_fstring_t *str)
+{
+ struct rspamd_http_message *msg;
+
+ msg = rspamd_http_new_message(HTTP_RESPONSE);
+ msg->date = time(NULL);
+ msg->code = 200;
+ msg->status = rspamd_fstring_new_init("OK", 2);
+
+ rspamd_http_message_set_body_from_fstring_steal(msg,
+ rspamd_controller_maybe_compress(entry, str, msg));
+ rspamd_http_connection_reset(entry->conn);
+ rspamd_http_router_insert_headers(entry->rt, msg);
+ rspamd_http_connection_write_message(entry->conn,
+ msg,
+ NULL,
+ "application/openmetrics-text; version=1.0.0; charset=utf-8",
+ entry,
+ entry->rt->timeout);
+ entry->is_reply = TRUE;
+}
+
+void rspamd_controller_send_string(struct rspamd_http_connection_entry *entry,
+ const gchar *str)
+{
+ struct rspamd_http_message *msg;
+ rspamd_fstring_t *reply;
+
+ msg = rspamd_http_new_message(HTTP_RESPONSE);
+ msg->date = time(NULL);
+ msg->code = 200;
+ msg->status = rspamd_fstring_new_init("OK", 2);
+
+ if (str) {
+ reply = rspamd_fstring_new_init(str, strlen(str));
+ }
+ else {
+ reply = rspamd_fstring_new_init("null", 4);
+ }
+
+ rspamd_http_message_set_body_from_fstring_steal(msg,
+ rspamd_controller_maybe_compress(entry, reply, msg));
+ rspamd_http_connection_reset(entry->conn);
+ rspamd_http_router_insert_headers(entry->rt, msg);
+ rspamd_http_connection_write_message(entry->conn,
+ msg,
+ NULL,
+ "application/json",
+ entry,
+ entry->rt->timeout);
+ entry->is_reply = TRUE;
+}
+
+void rspamd_controller_send_ucl(struct rspamd_http_connection_entry *entry,
+ ucl_object_t *obj)
+{
+ struct rspamd_http_message *msg;
+ rspamd_fstring_t *reply;
+
+ msg = rspamd_http_new_message(HTTP_RESPONSE);
+ msg->date = time(NULL);
+ msg->code = 200;
+ msg->status = rspamd_fstring_new_init("OK", 2);
+ reply = rspamd_fstring_sized_new(BUFSIZ);
+ rspamd_ucl_emit_fstring(obj, UCL_EMIT_JSON_COMPACT, &reply);
+ rspamd_http_message_set_body_from_fstring_steal(msg,
+ rspamd_controller_maybe_compress(entry, reply, msg));
+ rspamd_http_connection_reset(entry->conn);
+ rspamd_http_router_insert_headers(entry->rt, msg);
+ rspamd_http_connection_write_message(entry->conn,
+ msg,
+ NULL,
+ "application/json",
+ entry,
+ entry->rt->timeout);
+ entry->is_reply = TRUE;
+}
+
+static void
+rspamd_worker_drop_priv(struct rspamd_main *rspamd_main)
+{
+ if (rspamd_main->is_privileged) {
+ if (setgid(rspamd_main->workers_gid) == -1) {
+ msg_err_main("cannot setgid to %d (%s), aborting",
+ (gint) rspamd_main->workers_gid,
+ strerror(errno));
+ exit(-errno);
+ }
+
+ if (rspamd_main->cfg->rspamd_user &&
+ initgroups(rspamd_main->cfg->rspamd_user,
+ rspamd_main->workers_gid) == -1) {
+ msg_err_main("initgroups failed (%s), aborting", strerror(errno));
+ exit(-errno);
+ }
+
+ if (setuid(rspamd_main->workers_uid) == -1) {
+ msg_err_main("cannot setuid to %d (%s), aborting",
+ (gint) rspamd_main->workers_uid,
+ strerror(errno));
+ exit(-errno);
+ }
+ }
+}
+
+static void
+rspamd_worker_set_limits(struct rspamd_main *rspamd_main,
+ struct rspamd_worker_conf *cf)
+{
+ struct rlimit rlmt;
+
+ if (cf->rlimit_nofile != 0) {
+ rlmt.rlim_cur = (rlim_t) cf->rlimit_nofile;
+ rlmt.rlim_max = (rlim_t) cf->rlimit_nofile;
+
+ if (setrlimit(RLIMIT_NOFILE, &rlmt) == -1) {
+ msg_warn_main("cannot set files rlimit: %L, %s",
+ cf->rlimit_nofile,
+ strerror(errno));
+ }
+
+ memset(&rlmt, 0, sizeof(rlmt));
+
+ if (getrlimit(RLIMIT_NOFILE, &rlmt) == -1) {
+ msg_warn_main("cannot get max files rlimit: %HL, %s",
+ cf->rlimit_maxcore,
+ strerror(errno));
+ }
+ else {
+ msg_info_main("set max file descriptors limit: %HL cur and %HL max",
+ (guint64) rlmt.rlim_cur,
+ (guint64) rlmt.rlim_max);
+ }
+ }
+ else {
+ /* Just report */
+ if (getrlimit(RLIMIT_NOFILE, &rlmt) == -1) {
+ msg_warn_main("cannot get max files rlimit: %HL, %s",
+ cf->rlimit_maxcore,
+ strerror(errno));
+ }
+ else {
+ msg_info_main("use system max file descriptors limit: %HL cur and %HL max",
+ (guint64) rlmt.rlim_cur,
+ (guint64) rlmt.rlim_max);
+ }
+ }
+
+ if (rspamd_main->cores_throttling) {
+ msg_info_main("disable core files for the new worker as limits are reached");
+ rlmt.rlim_cur = 0;
+ rlmt.rlim_max = 0;
+
+ if (setrlimit(RLIMIT_CORE, &rlmt) == -1) {
+ msg_warn_main("cannot disable core dumps: error when setting limits: %s",
+ strerror(errno));
+ }
+ }
+ else {
+ if (cf->rlimit_maxcore != 0) {
+ rlmt.rlim_cur = (rlim_t) cf->rlimit_maxcore;
+ rlmt.rlim_max = (rlim_t) cf->rlimit_maxcore;
+
+ if (setrlimit(RLIMIT_CORE, &rlmt) == -1) {
+ msg_warn_main("cannot set max core size limit: %HL, %s",
+ cf->rlimit_maxcore,
+ strerror(errno));
+ }
+
+ /* Ensure that we did it */
+ memset(&rlmt, 0, sizeof(rlmt));
+
+ if (getrlimit(RLIMIT_CORE, &rlmt) == -1) {
+ msg_warn_main("cannot get max core size rlimit: %HL, %s",
+ cf->rlimit_maxcore,
+ strerror(errno));
+ }
+ else {
+ if (rlmt.rlim_cur != cf->rlimit_maxcore ||
+ rlmt.rlim_max != cf->rlimit_maxcore) {
+ msg_warn_main("setting of core file limits was unsuccessful: "
+ "%HL was wanted, "
+ "but we have %HL cur and %HL max",
+ cf->rlimit_maxcore,
+ (guint64) rlmt.rlim_cur,
+ (guint64) rlmt.rlim_max);
+ }
+ else {
+ msg_info_main("set max core size limit: %HL cur and %HL max",
+ (guint64) rlmt.rlim_cur,
+ (guint64) rlmt.rlim_max);
+ }
+ }
+ }
+ else {
+ /* Just report */
+ if (getrlimit(RLIMIT_CORE, &rlmt) == -1) {
+ msg_warn_main("cannot get max core size limit: %HL, %s",
+ cf->rlimit_maxcore,
+ strerror(errno));
+ }
+ else {
+ msg_info_main("use system max core size limit: %HL cur and %HL max",
+ (guint64) rlmt.rlim_cur,
+ (guint64) rlmt.rlim_max);
+ }
+ }
+ }
+}
+
+static void
+rspamd_worker_on_term(EV_P_ ev_child *w, int revents)
+{
+ struct rspamd_worker *wrk = (struct rspamd_worker *) w->data;
+
+ if (wrk->ppid == getpid()) {
+ if (wrk->term_handler) {
+ wrk->term_handler(EV_A_ w, wrk->srv, wrk);
+ }
+ else {
+ rspamd_check_termination_clause(wrk->srv, wrk, w->rstatus);
+ }
+ }
+ else {
+ /* Ignore SIGCHLD for not our children... */
+ }
+}
+
+static void
+rspamd_worker_heartbeat_cb(EV_P_ ev_timer *w, int revents)
+{
+ struct rspamd_worker *wrk = (struct rspamd_worker *) w->data;
+ struct rspamd_srv_command cmd;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.type = RSPAMD_SRV_HEARTBEAT;
+ rspamd_srv_send_command(wrk, EV_A, &cmd, -1, NULL, NULL);
+}
+
+static void
+rspamd_worker_heartbeat_start(struct rspamd_worker *wrk, struct ev_loop *event_loop)
+{
+ wrk->hb.heartbeat_ev.data = (void *) wrk;
+ ev_timer_init(&wrk->hb.heartbeat_ev, rspamd_worker_heartbeat_cb,
+ 0.0, wrk->srv->cfg->heartbeat_interval);
+ ev_timer_start(event_loop, &wrk->hb.heartbeat_ev);
+}
+
+static void
+rspamd_main_heartbeat_cb(EV_P_ ev_timer *w, int revents)
+{
+ struct rspamd_worker *wrk = (struct rspamd_worker *) w->data;
+ gdouble time_from_last = ev_time();
+ struct rspamd_main *rspamd_main;
+ static struct rspamd_control_command cmd;
+ struct tm tm;
+ gchar timebuf[64];
+ gchar usec_buf[16];
+ gint r;
+
+ time_from_last -= wrk->hb.last_event;
+ rspamd_main = wrk->srv;
+
+ if (wrk->hb.last_event > 0 &&
+ time_from_last > 0 &&
+ time_from_last >= rspamd_main->cfg->heartbeat_interval * 2) {
+
+ rspamd_localtime(wrk->hb.last_event, &tm);
+ r = strftime(timebuf, sizeof(timebuf), "%F %H:%M:%S", &tm);
+ rspamd_snprintf(usec_buf, sizeof(usec_buf), "%.5f",
+ wrk->hb.last_event - (gdouble) (time_t) wrk->hb.last_event);
+ rspamd_snprintf(timebuf + r, sizeof(timebuf) - r,
+ "%s", usec_buf + 1);
+
+ if (wrk->hb.nbeats > 0) {
+ /* First time lost event */
+ cmd.type = RSPAMD_CONTROL_CHILD_CHANGE;
+ cmd.cmd.child_change.what = rspamd_child_offline;
+ cmd.cmd.child_change.pid = wrk->pid;
+ rspamd_control_broadcast_srv_cmd(rspamd_main, &cmd, wrk->pid);
+ msg_warn_main("lost heartbeat from worker type %s with pid %P, "
+ "last beat on: %s (%L beats received previously)",
+ g_quark_to_string(wrk->type), wrk->pid,
+ timebuf,
+ wrk->hb.nbeats);
+ wrk->hb.nbeats = -1;
+ /* TODO: send notify about worker problem */
+ }
+ else {
+ wrk->hb.nbeats--;
+ msg_warn_main("lost %L heartbeat from worker type %s with pid %P, "
+ "last beat on: %s",
+ -(wrk->hb.nbeats),
+ g_quark_to_string(wrk->type),
+ wrk->pid,
+ timebuf);
+
+ if (rspamd_main->cfg->heartbeats_loss_max > 0 &&
+ -(wrk->hb.nbeats) >= rspamd_main->cfg->heartbeats_loss_max) {
+
+
+ if (-(wrk->hb.nbeats) > rspamd_main->cfg->heartbeats_loss_max + 1) {
+ msg_err_main("force kill worker type %s with pid %P, "
+ "last beat on: %s; %L heartbeat lost",
+ g_quark_to_string(wrk->type),
+ wrk->pid,
+ timebuf,
+ -(wrk->hb.nbeats));
+ kill(wrk->pid, SIGKILL);
+ }
+ else {
+ msg_err_main("terminate worker type %s with pid %P, "
+ "last beat on: %s; %L heartbeat lost",
+ g_quark_to_string(wrk->type),
+ wrk->pid,
+ timebuf,
+ -(wrk->hb.nbeats));
+ kill(wrk->pid, SIGTERM);
+ }
+ }
+ }
+ }
+ else if (wrk->hb.nbeats < 0) {
+ rspamd_localtime(wrk->hb.last_event, &tm);
+ r = strftime(timebuf, sizeof(timebuf), "%F %H:%M:%S", &tm);
+ rspamd_snprintf(usec_buf, sizeof(usec_buf), "%.5f",
+ wrk->hb.last_event - (gdouble) (time_t) wrk->hb.last_event);
+ rspamd_snprintf(timebuf + r, sizeof(timebuf) - r,
+ "%s", usec_buf + 1);
+
+ cmd.type = RSPAMD_CONTROL_CHILD_CHANGE;
+ cmd.cmd.child_change.what = rspamd_child_online;
+ cmd.cmd.child_change.pid = wrk->pid;
+ rspamd_control_broadcast_srv_cmd(rspamd_main, &cmd, wrk->pid);
+ msg_info_main("received heartbeat from worker type %s with pid %P, "
+ "last beat on: %s (%L beats lost previously)",
+ g_quark_to_string(wrk->type), wrk->pid,
+ timebuf,
+ -(wrk->hb.nbeats));
+ wrk->hb.nbeats = 1;
+ /* TODO: send notify about worker restoration */
+ }
+}
+
+static void
+rspamd_main_heartbeat_start(struct rspamd_worker *wrk, struct ev_loop *event_loop)
+{
+ wrk->hb.heartbeat_ev.data = (void *) wrk;
+ ev_timer_init(&wrk->hb.heartbeat_ev, rspamd_main_heartbeat_cb,
+ 0.0, wrk->srv->cfg->heartbeat_interval * 2);
+ ev_timer_start(event_loop, &wrk->hb.heartbeat_ev);
+}
+
+static bool
+rspamd_maybe_reuseport_socket(struct rspamd_worker_listen_socket *ls)
+{
+ if (ls->is_systemd) {
+ /* No need to reuseport */
+ return true;
+ }
+
+ if (ls->fd != -1 && rspamd_inet_address_get_af(ls->addr) == AF_UNIX) {
+ /* Just try listen */
+
+ if (listen(ls->fd, -1) == -1) {
+ return false;
+ }
+
+ return true;
+ }
+
+#if defined(SO_REUSEPORT) && defined(SO_REUSEADDR) && defined(LINUX)
+ gint nfd = -1;
+
+ if (ls->type == RSPAMD_WORKER_SOCKET_UDP) {
+ nfd = rspamd_inet_address_listen(ls->addr,
+ (ls->type == RSPAMD_WORKER_SOCKET_UDP ? SOCK_DGRAM : SOCK_STREAM),
+ RSPAMD_INET_ADDRESS_LISTEN_ASYNC | RSPAMD_INET_ADDRESS_LISTEN_REUSEPORT,
+ -1);
+
+ if (nfd == -1) {
+ msg_warn("cannot create reuseport listen socket for %d: %s",
+ ls->fd, strerror(errno));
+ nfd = ls->fd;
+ }
+ else {
+ if (ls->fd != -1) {
+ close(ls->fd);
+ }
+ ls->fd = nfd;
+ nfd = -1;
+ }
+ }
+ else {
+ /*
+ * Reuseport is broken with the current architecture, so it is easier not
+ * to use it at all
+ */
+ nfd = ls->fd;
+ }
+#endif
+
+#if 0
+ /* This needed merely if we have reuseport for tcp, but for now it is disabled */
+ /* This means that we have an fd with no listening enabled */
+ if (nfd != -1) {
+ if (ls->type == RSPAMD_WORKER_SOCKET_TCP) {
+ if (listen (nfd, -1) == -1) {
+ return false;
+ }
+ }
+ }
+#endif
+
+ return true;
+}
+
+/**
+ * Handles worker after fork returned zero
+ * @param wrk
+ * @param rspamd_main
+ * @param cf
+ * @param listen_sockets
+ */
+static void __attribute__((noreturn))
+rspamd_handle_child_fork(struct rspamd_worker *wrk,
+ struct rspamd_main *rspamd_main,
+ struct rspamd_worker_conf *cf,
+ GHashTable *listen_sockets)
+{
+ gint rc;
+ struct rlimit rlim;
+
+ /* Update pid for logging */
+ rspamd_log_on_fork(cf->type, rspamd_main->cfg, rspamd_main->logger);
+ wrk->pid = getpid();
+
+ /* Init PRNG after fork */
+ rc = ottery_init(rspamd_main->cfg->libs_ctx->ottery_cfg);
+ if (rc != OTTERY_ERR_NONE) {
+ msg_err_main("cannot initialize PRNG: %d", rc);
+ abort();
+ }
+
+ rspamd_random_seed_fast();
+#ifdef HAVE_EVUTIL_RNG_INIT
+ evutil_secure_rng_init();
+#endif
+
+ /*
+ * Libev stores all signals in a global table, so
+ * previous handlers must be explicitly detached and forgotten
+ * before starting a new loop
+ */
+ ev_signal_stop(rspamd_main->event_loop, &rspamd_main->int_ev);
+ ev_signal_stop(rspamd_main->event_loop, &rspamd_main->term_ev);
+ ev_signal_stop(rspamd_main->event_loop, &rspamd_main->hup_ev);
+ ev_signal_stop(rspamd_main->event_loop, &rspamd_main->usr1_ev);
+ /* Remove the inherited event base */
+ ev_loop_destroy(rspamd_main->event_loop);
+ rspamd_main->event_loop = NULL;
+
+ /* Close unused sockets */
+ GHashTableIter it;
+ gpointer k, v;
+
+
+ g_hash_table_iter_init(&it, listen_sockets);
+
+ /*
+ * Close listen sockets of not our process (inherited from other forks)
+ */
+ while (g_hash_table_iter_next(&it, &k, &v)) {
+ GList *elt = (GList *) v;
+ GList *our = cf->listen_socks;
+
+ if (g_list_position(our, elt) == -1) {
+ GList *cur = elt;
+
+ while (cur) {
+ struct rspamd_worker_listen_socket *ls =
+ (struct rspamd_worker_listen_socket *) cur->data;
+
+ if (ls->fd != -1 && close(ls->fd) == -1) {
+ msg_err("cannot close fd %d (addr = %s): %s",
+ ls->fd,
+ rspamd_inet_address_to_string_pretty(ls->addr),
+ strerror(errno));
+ }
+
+ ls->fd = -1;
+
+ cur = g_list_next(cur);
+ }
+ }
+ }
+
+ /* Reuseport before dropping privs */
+ GList *cur = cf->listen_socks;
+
+ while (cur) {
+ struct rspamd_worker_listen_socket *ls =
+ (struct rspamd_worker_listen_socket *) cur->data;
+
+ if (!rspamd_maybe_reuseport_socket(ls)) {
+ msg_err("cannot listen on socket %s: %s",
+ rspamd_inet_address_to_string_pretty(ls->addr),
+ strerror(errno));
+ }
+
+ cur = g_list_next(cur);
+ }
+
+ /* Drop privileges */
+ rspamd_worker_drop_priv(rspamd_main);
+ /* Set limits */
+ rspamd_worker_set_limits(rspamd_main, cf);
+ /* Re-set stack limit */
+ getrlimit(RLIMIT_STACK, &rlim);
+ rlim.rlim_cur = 100 * 1024 * 1024;
+ rlim.rlim_max = rlim.rlim_cur;
+ setrlimit(RLIMIT_STACK, &rlim);
+
+ if (cf->bind_conf) {
+ rspamd_setproctitle("%s process (%s)", cf->worker->name,
+ cf->bind_conf->bind_line);
+ }
+ else {
+ rspamd_setproctitle("%s process", cf->worker->name);
+ }
+
+ if (rspamd_main->pfh) {
+ rspamd_pidfile_close(rspamd_main->pfh);
+ }
+
+ if (rspamd_main->cfg->log_silent_workers) {
+ rspamd_log_set_log_level(rspamd_main->logger, G_LOG_LEVEL_MESSAGE);
+ }
+
+ wrk->start_time = rspamd_get_calendar_ticks();
+
+ if (cf->bind_conf) {
+ GString *listen_conf_stringified = g_string_new(NULL);
+ struct rspamd_worker_bind_conf *cur_conf;
+
+ LL_FOREACH(cf->bind_conf, cur_conf)
+ {
+ if (cur_conf->next) {
+ rspamd_printf_gstring(listen_conf_stringified, "%s, ",
+ cur_conf->bind_line);
+ }
+ else {
+ rspamd_printf_gstring(listen_conf_stringified, "%s",
+ cur_conf->bind_line);
+ }
+ }
+
+ msg_info_main("starting %s process %P (%d); listen on: %v",
+ cf->worker->name,
+ getpid(), wrk->index, listen_conf_stringified);
+ g_string_free(listen_conf_stringified, TRUE);
+ }
+ else {
+ msg_info_main("starting %s process %P (%d); no listen",
+ cf->worker->name,
+ getpid(), wrk->index);
+ }
+ /* Close parent part of socketpair */
+ close(wrk->control_pipe[0]);
+ close(wrk->srv_pipe[0]);
+ /*
+ * Read comments in `rspamd_handle_main_fork` for details why these channel
+ * is blocking.
+ */
+ rspamd_socket_nonblocking(wrk->control_pipe[1]);
+#if 0
+ rspamd_socket_nonblocking (wrk->srv_pipe[1]);
+#endif
+ rspamd_main->cfg->cur_worker = wrk;
+ /* Execute worker (this function should not return normally!) */
+ cf->worker->worker_start_func(wrk);
+ /* To distinguish from normal termination */
+ exit(EXIT_FAILURE);
+}
+
+static void
+rspamd_handle_main_fork(struct rspamd_worker *wrk,
+ struct rspamd_main *rspamd_main,
+ struct rspamd_worker_conf *cf,
+ struct ev_loop *ev_base)
+{
+ /* Close worker part of socketpair */
+ close(wrk->control_pipe[1]);
+ close(wrk->srv_pipe[1]);
+
+ /*
+ * There are no reasons why control pipes are blocking: the messages
+ * there are rare and are strictly bounded by command sizes, so if we block
+ * on some pipe, it is ok, as we still poll that for all operations.
+ * It is also impossible to block on writing in normal conditions.
+ * And if the conditions are not normal, e.g. a worker is unresponsive, then
+ * we can safely think that the non-blocking behaviour as it is implemented
+ * currently will not make things better, as it would lead to incomplete
+ * reads/writes that are not handled anyhow and are totally broken from the
+ * beginning.
+ */
+#if 0
+ rspamd_socket_nonblocking (wrk->srv_pipe[0]);
+#endif
+ rspamd_socket_nonblocking(wrk->control_pipe[0]);
+
+ rspamd_srv_start_watching(rspamd_main, wrk, ev_base);
+ /* Child event */
+ wrk->cld_ev.data = wrk;
+ ev_child_init(&wrk->cld_ev, rspamd_worker_on_term, wrk->pid, 0);
+ ev_child_start(rspamd_main->event_loop, &wrk->cld_ev);
+ /* Heartbeats */
+ rspamd_main_heartbeat_start(wrk, rspamd_main->event_loop);
+ /* Insert worker into worker's table, pid is index */
+ g_hash_table_insert(rspamd_main->workers,
+ GSIZE_TO_POINTER(wrk->pid), wrk);
+
+#if defined(SO_REUSEPORT) && defined(SO_REUSEADDR) && defined(LINUX)
+ /*
+ * Close listen sockets in the main process once a child is handling them,
+ * if we have reuseport
+ */
+ GList *cur = cf->listen_socks;
+
+ while (cur) {
+ struct rspamd_worker_listen_socket *ls =
+ (struct rspamd_worker_listen_socket *) cur->data;
+
+ if (ls->fd != -1 && ls->type == RSPAMD_WORKER_SOCKET_UDP) {
+ close(ls->fd);
+ ls->fd = -1;
+ }
+
+ cur = g_list_next(cur);
+ }
+#endif
+}
+
+#ifndef SOCK_SEQPACKET
+#define SOCK_SEQPACKET SOCK_DGRAM
+#endif
+struct rspamd_worker *
+rspamd_fork_worker(struct rspamd_main *rspamd_main,
+ struct rspamd_worker_conf *cf,
+ guint index,
+ struct ev_loop *ev_base,
+ rspamd_worker_term_cb term_handler,
+ GHashTable *listen_sockets)
+{
+ struct rspamd_worker *wrk;
+
+ /* Starting worker process */
+ wrk = (struct rspamd_worker *) g_malloc0(sizeof(struct rspamd_worker));
+
+ if (!rspamd_socketpair(wrk->control_pipe, SOCK_SEQPACKET)) {
+ msg_err("socketpair failure: %s", strerror(errno));
+ rspamd_hard_terminate(rspamd_main);
+ }
+
+ if (!rspamd_socketpair(wrk->srv_pipe, SOCK_SEQPACKET)) {
+ msg_err("socketpair failure: %s", strerror(errno));
+ rspamd_hard_terminate(rspamd_main);
+ }
+
+ if (cf->bind_conf) {
+ msg_info_main("prepare to fork process %s (%d); listen on: %s",
+ cf->worker->name,
+ index, cf->bind_conf->name);
+ }
+ else {
+ msg_info_main("prepare to fork process %s (%d), no bind socket",
+ cf->worker->name,
+ index);
+ }
+
+ wrk->srv = rspamd_main;
+ wrk->type = cf->type;
+ wrk->cf = cf;
+ wrk->flags = cf->worker->flags;
+ REF_RETAIN(cf);
+ wrk->index = index;
+ wrk->ctx = cf->ctx;
+ wrk->ppid = getpid();
+ wrk->pid = fork();
+ wrk->cores_throttled = rspamd_main->cores_throttling;
+ wrk->term_handler = term_handler;
+ wrk->control_events_pending = g_hash_table_new_full(g_direct_hash, g_direct_equal,
+ NULL, rspamd_pending_control_free);
+
+ switch (wrk->pid) {
+ case 0:
+ rspamd_current_worker = wrk;
+ rspamd_handle_child_fork(wrk, rspamd_main, cf, listen_sockets);
+ break;
+ case -1:
+ msg_err_main("cannot fork main process: %s", strerror(errno));
+
+ if (rspamd_main->pfh) {
+ rspamd_pidfile_remove(rspamd_main->pfh);
+ }
+
+ rspamd_hard_terminate(rspamd_main);
+ break;
+ default:
+ rspamd_handle_main_fork(wrk, rspamd_main, cf, ev_base);
+ break;
+ }
+
+ return wrk;
+}
+
+void rspamd_worker_block_signals(void)
+{
+ sigset_t set;
+
+ sigemptyset(&set);
+ sigaddset(&set, SIGTERM);
+ sigaddset(&set, SIGINT);
+ sigaddset(&set, SIGHUP);
+ sigaddset(&set, SIGUSR1);
+ sigaddset(&set, SIGUSR2);
+ sigprocmask(SIG_BLOCK, &set, NULL);
+}
+
+void rspamd_worker_unblock_signals(void)
+{
+ sigset_t set;
+
+ sigemptyset(&set);
+ sigaddset(&set, SIGTERM);
+ sigaddset(&set, SIGINT);
+ sigaddset(&set, SIGHUP);
+ sigaddset(&set, SIGUSR1);
+ sigaddset(&set, SIGUSR2);
+ sigprocmask(SIG_UNBLOCK, &set, NULL);
+}
+
+void rspamd_hard_terminate(struct rspamd_main *rspamd_main)
+{
+ GHashTableIter it;
+ gpointer k, v;
+ struct rspamd_worker *w;
+ sigset_t set;
+
+ /* Block all signals */
+ sigemptyset(&set);
+ sigaddset(&set, SIGTERM);
+ sigaddset(&set, SIGINT);
+ sigaddset(&set, SIGHUP);
+ sigaddset(&set, SIGUSR1);
+ sigaddset(&set, SIGUSR2);
+ sigaddset(&set, SIGCHLD);
+ sigprocmask(SIG_BLOCK, &set, NULL);
+
+ /* We need to terminate all workers that might be already spawned */
+ rspamd_worker_block_signals();
+ g_hash_table_iter_init(&it, rspamd_main->workers);
+
+ while (g_hash_table_iter_next(&it, &k, &v)) {
+ w = v;
+ msg_err_main("kill worker %P as Rspamd is terminating due to "
+ "an unrecoverable error",
+ w->pid);
+ kill(w->pid, SIGKILL);
+ }
+
+ msg_err_main("shutting down Rspamd due to fatal error");
+
+ rspamd_log_close(rspamd_main->logger);
+ exit(EXIT_FAILURE);
+}
+
+gboolean
+rspamd_worker_is_scanner(struct rspamd_worker *w)
+{
+
+ if (w) {
+ return !!(w->flags & RSPAMD_WORKER_SCANNER);
+ }
+
+ return FALSE;
+}
+
+gboolean
+rspamd_worker_is_primary_controller(struct rspamd_worker *w)
+{
+
+ if (w) {
+ return !!(w->flags & RSPAMD_WORKER_CONTROLLER) && w->index == 0;
+ }
+
+ return FALSE;
+}
+
+gboolean
+rspamd_worker_check_controller_presence(struct rspamd_worker *w)
+{
+ if (w->index == 0) {
+ GQuark our_type = w->type;
+ gboolean controller_seen = FALSE;
+ GList *cur;
+
+ enum {
+ low_priority_worker,
+ high_priority_worker
+ } our_priority;
+
+ if (our_type == g_quark_from_static_string("rspamd_proxy")) {
+ our_priority = low_priority_worker;
+ }
+ else if (our_type == g_quark_from_static_string("normal")) {
+ our_priority = high_priority_worker;
+ }
+ else {
+ msg_err("function is called for a wrong worker type: %s", g_quark_to_string(our_type));
+ return FALSE;
+ }
+
+ cur = w->srv->cfg->workers;
+
+ while (cur) {
+ struct rspamd_worker_conf *cf;
+
+ cf = (struct rspamd_worker_conf *) cur->data;
+
+ if (our_priority == low_priority_worker) {
+ if ((cf->type == g_quark_from_static_string("controller")) ||
+ (cf->type == g_quark_from_static_string("normal"))) {
+
+ if (cf->enabled && cf->count >= 0) {
+ controller_seen = TRUE;
+ break;
+ }
+ }
+ }
+ else {
+ if (cf->type == g_quark_from_static_string("controller")) {
+ if (cf->enabled && cf->count >= 0) {
+ controller_seen = TRUE;
+ break;
+ }
+ }
+ }
+
+ cur = g_list_next(cur);
+ }
+
+ if (!controller_seen) {
+ msg_info("no controller or normal workers defined, execute "
+ "controller periodics in this worker");
+ w->flags |= RSPAMD_WORKER_CONTROLLER;
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+struct rspamd_worker_session_elt {
+ void *ptr;
+ guint *pref;
+ const gchar *tag;
+ time_t when;
+};
+
+struct rspamd_worker_session_cache {
+ struct ev_loop *ev_base;
+ GHashTable *cache;
+ struct rspamd_config *cfg;
+ struct ev_timer periodic;
+};
+
+static gint
+rspamd_session_cache_sort_cmp(gconstpointer pa, gconstpointer pb)
+{
+ const struct rspamd_worker_session_elt
+ *e1 = *(const struct rspamd_worker_session_elt **) pa,
+ *e2 = *(const struct rspamd_worker_session_elt **) pb;
+
+ return e2->when < e1->when;
+}
+
+static void
+rspamd_sessions_cache_periodic(EV_P_ ev_timer *w, int revents)
+{
+ struct rspamd_worker_session_cache *c =
+ (struct rspamd_worker_session_cache *) w->data;
+ GHashTableIter it;
+ gchar timebuf[32];
+ gpointer k, v;
+ struct rspamd_worker_session_elt *elt;
+ struct tm tms;
+ GPtrArray *res;
+ guint i;
+
+ if (g_hash_table_size(c->cache) > c->cfg->max_sessions_cache) {
+ res = g_ptr_array_sized_new(g_hash_table_size(c->cache));
+ g_hash_table_iter_init(&it, c->cache);
+
+ while (g_hash_table_iter_next(&it, &k, &v)) {
+ g_ptr_array_add(res, v);
+ }
+
+ msg_err("sessions cache is overflowed %d elements where %d is limit",
+ (gint) res->len, (gint) c->cfg->max_sessions_cache);
+ g_ptr_array_sort(res, rspamd_session_cache_sort_cmp);
+
+ PTR_ARRAY_FOREACH(res, i, elt)
+ {
+ rspamd_localtime(elt->when, &tms);
+ strftime(timebuf, sizeof(timebuf), "%F %H:%M:%S", &tms);
+
+ msg_warn("redundant session; ptr: %p, "
+ "tag: %s, refcount: %d, time: %s",
+ elt->ptr, elt->tag ? elt->tag : "unknown",
+ elt->pref ? *elt->pref : 0,
+ timebuf);
+ }
+ }
+
+ ev_timer_again(EV_A_ w);
+}
+
+void *
+rspamd_worker_session_cache_new(struct rspamd_worker *w,
+ struct ev_loop *ev_base)
+{
+ struct rspamd_worker_session_cache *c;
+ static const gdouble periodic_interval = 60.0;
+
+ c = g_malloc0(sizeof(*c));
+ c->ev_base = ev_base;
+ c->cache = g_hash_table_new_full(g_direct_hash, g_direct_equal,
+ NULL, g_free);
+ c->cfg = w->srv->cfg;
+ c->periodic.data = c;
+ ev_timer_init(&c->periodic, rspamd_sessions_cache_periodic, periodic_interval,
+ periodic_interval);
+ ev_timer_start(ev_base, &c->periodic);
+
+ return c;
+}
+
+
+void rspamd_worker_session_cache_add(void *cache, const gchar *tag,
+ guint *pref, void *ptr)
+{
+ struct rspamd_worker_session_cache *c = cache;
+ struct rspamd_worker_session_elt *elt;
+
+ elt = g_malloc0(sizeof(*elt));
+ elt->pref = pref;
+ elt->ptr = ptr;
+ elt->tag = tag;
+ elt->when = time(NULL);
+
+ g_hash_table_insert(c->cache, elt->ptr, elt);
+}
+
+
+void rspamd_worker_session_cache_remove(void *cache, void *ptr)
+{
+ struct rspamd_worker_session_cache *c = cache;
+
+ g_hash_table_remove(c->cache, ptr);
+}
+
+static void
+rspamd_worker_monitored_on_change(struct rspamd_monitored_ctx *ctx,
+ struct rspamd_monitored *m, gboolean alive,
+ void *ud)
+{
+ struct rspamd_worker *worker = ud;
+ struct rspamd_config *cfg = worker->srv->cfg;
+ struct ev_loop *ev_base;
+ guchar tag[RSPAMD_MONITORED_TAG_LEN];
+ static struct rspamd_srv_command srv_cmd;
+
+ rspamd_monitored_get_tag(m, tag);
+ ev_base = rspamd_monitored_ctx_get_ev_base(ctx);
+ memset(&srv_cmd, 0, sizeof(srv_cmd));
+ srv_cmd.type = RSPAMD_SRV_MONITORED_CHANGE;
+ rspamd_strlcpy(srv_cmd.cmd.monitored_change.tag, tag,
+ sizeof(srv_cmd.cmd.monitored_change.tag));
+ srv_cmd.cmd.monitored_change.alive = alive;
+ srv_cmd.cmd.monitored_change.sender = getpid();
+ msg_info_config("broadcast monitored update for %s: %s",
+ srv_cmd.cmd.monitored_change.tag, alive ? "alive" : "dead");
+
+ rspamd_srv_send_command(worker, ev_base, &srv_cmd, -1, NULL, NULL);
+}
+
+void rspamd_worker_init_monitored(struct rspamd_worker *worker,
+ struct ev_loop *ev_base,
+ struct rspamd_dns_resolver *resolver)
+{
+ rspamd_monitored_ctx_config(worker->srv->cfg->monitored_ctx,
+ worker->srv->cfg, ev_base, resolver->r,
+ rspamd_worker_monitored_on_change, worker);
+}
+
+#ifdef HAVE_SA_SIGINFO
+
+static struct rspamd_main *saved_main = NULL;
+static gboolean
+rspamd_crash_propagate(gpointer key, gpointer value, gpointer unused)
+{
+ struct rspamd_worker *w = value;
+
+ /* Kill children softly */
+ kill(w->pid, SIGTERM);
+
+ return TRUE;
+}
+
+#ifdef BACKWARD_ENABLE
+/* See backtrace.cxx */
+extern void rspamd_print_crash(void);
+#endif
+
+static void
+rspamd_crash_sig_handler(int sig, siginfo_t *info, void *ctx)
+{
+ struct sigaction sa;
+ ucontext_t *uap = ctx;
+ pid_t pid;
+
+ pid = getpid();
+ msg_err("caught fatal signal %d(%s), "
+ "pid: %P, trace: ",
+ sig, strsignal(sig), pid);
+ (void) uap;
+#ifdef BACKWARD_ENABLE
+ rspamd_print_crash();
+#endif
+ msg_err("please see Rspamd FAQ to learn how to dump core files and how to "
+ "fill a bug report");
+
+ if (saved_main) {
+ if (pid == saved_main->pid) {
+ /*
+ * Main process has crashed, propagate crash further to trigger
+ * monitoring alerts and mass panic
+ */
+ g_hash_table_foreach_remove(saved_main->workers,
+ rspamd_crash_propagate, NULL);
+ }
+ }
+
+ /*
+ * Invoke signal with the default handler
+ */
+ sigemptyset(&sa.sa_mask);
+ sa.sa_handler = SIG_DFL;
+ sa.sa_flags = 0;
+ sigaction(sig, &sa, NULL);
+ kill(pid, sig);
+}
+#endif
+
+RSPAMD_NO_SANITIZE void
+rspamd_set_crash_handler(struct rspamd_main *rspamd_main)
+{
+#ifdef HAVE_SA_SIGINFO
+ struct sigaction sa;
+
+#ifdef HAVE_SIGALTSTACK
+ void *stack_mem;
+ stack_t ss;
+ memset(&ss, 0, sizeof ss);
+
+ ss.ss_size = MAX(SIGSTKSZ, 8192 * 4);
+ stack_mem = g_malloc0(ss.ss_size);
+ ss.ss_sp = stack_mem;
+ sigaltstack(&ss, NULL);
+#endif
+ saved_main = rspamd_main;
+ sigemptyset(&sa.sa_mask);
+ sa.sa_sigaction = &rspamd_crash_sig_handler;
+ sa.sa_flags = SA_RESTART | SA_SIGINFO | SA_ONSTACK;
+ sigaction(SIGSEGV, &sa, NULL);
+ sigaction(SIGBUS, &sa, NULL);
+ sigaction(SIGABRT, &sa, NULL);
+ sigaction(SIGFPE, &sa, NULL);
+ sigaction(SIGSYS, &sa, NULL);
+#endif
+}
+
+RSPAMD_NO_SANITIZE void rspamd_unset_crash_handler(struct rspamd_main *unused_)
+{
+#ifdef HAVE_SIGALTSTACK
+ int ret;
+ stack_t ss;
+ ret = sigaltstack(NULL, &ss);
+
+ if (ret != -1) {
+ if (ss.ss_size > 0 && ss.ss_sp) {
+ g_free(ss.ss_sp);
+ }
+
+ ss.ss_size = 0;
+ ss.ss_sp = NULL;
+#ifdef SS_DISABLE
+ ss.ss_flags |= SS_DISABLE;
+#endif
+ sigaltstack(&ss, NULL);
+ }
+#endif
+}
+
+static void
+rspamd_enable_accept_event(EV_P_ ev_timer *w, int revents)
+{
+ struct rspamd_worker_accept_event *ac_ev =
+ (struct rspamd_worker_accept_event *) w->data;
+
+ ev_timer_stop(EV_A_ w);
+ ev_io_start(EV_A_ & ac_ev->accept_ev);
+}
+
+void rspamd_worker_throttle_accept_events(gint sock, void *data)
+{
+ struct rspamd_worker_accept_event *head, *cur;
+ const gdouble throttling = 0.5;
+
+ head = (struct rspamd_worker_accept_event *) data;
+
+ DL_FOREACH(head, cur)
+ {
+
+ ev_io_stop(cur->event_loop, &cur->accept_ev);
+ cur->throttling_ev.data = cur;
+ ev_timer_init(&cur->throttling_ev, rspamd_enable_accept_event,
+ throttling, 0.0);
+ ev_timer_start(cur->event_loop, &cur->throttling_ev);
+ }
+}
+
+gboolean
+rspamd_check_termination_clause(struct rspamd_main *rspamd_main,
+ struct rspamd_worker *wrk,
+ int res)
+{
+ gboolean need_refork = TRUE;
+
+ if (wrk->state != rspamd_worker_state_running || rspamd_main->wanna_die ||
+ (wrk->flags & RSPAMD_WORKER_OLD_CONFIG)) {
+ /* Do not refork workers that are intended to be terminated */
+ need_refork = FALSE;
+ }
+
+ if (WIFEXITED(res) && WEXITSTATUS(res) == 0) {
+ /* Normal worker termination, do not fork one more */
+
+ if (wrk->flags & RSPAMD_WORKER_OLD_CONFIG) {
+ /* Never re-fork old workers */
+ msg_info_main("%s process %P terminated normally",
+ g_quark_to_string(wrk->type),
+ wrk->pid);
+ need_refork = FALSE;
+ }
+ else {
+ if (wrk->hb.nbeats < 0 && rspamd_main->cfg->heartbeats_loss_max > 0 &&
+ -(wrk->hb.nbeats) >= rspamd_main->cfg->heartbeats_loss_max) {
+ msg_info_main("%s process %P terminated normally, but lost %L "
+ "heartbeats, refork it",
+ g_quark_to_string(wrk->type),
+ wrk->pid,
+ -(wrk->hb.nbeats));
+ need_refork = TRUE;
+ }
+ else {
+ msg_info_main("%s process %P terminated normally",
+ g_quark_to_string(wrk->type),
+ wrk->pid);
+ need_refork = FALSE;
+ }
+ }
+ }
+ else {
+ if (WIFSIGNALED(res)) {
+#ifdef WCOREDUMP
+ if (WCOREDUMP(res)) {
+ msg_warn_main(
+ "%s process %P terminated abnormally by signal: %s"
+ " and created core file; please see Rspamd FAQ "
+ "to learn how to extract data from core file and "
+ "fill a bug report",
+ g_quark_to_string(wrk->type),
+ wrk->pid,
+ g_strsignal(WTERMSIG(res)));
+ }
+ else {
+#ifdef HAVE_SYS_RESOURCE_H
+ struct rlimit rlmt;
+ (void) getrlimit(RLIMIT_CORE, &rlmt);
+
+ msg_warn_main(
+ "%s process %P terminated abnormally with exit code %d by "
+ "signal: %s"
+ " but NOT created core file (throttled=%s); "
+ "core file limits: %L current, %L max",
+ g_quark_to_string(wrk->type),
+ wrk->pid,
+ WEXITSTATUS(res),
+ g_strsignal(WTERMSIG(res)),
+ wrk->cores_throttled ? "yes" : "no",
+ (gint64) rlmt.rlim_cur,
+ (gint64) rlmt.rlim_max);
+#else
+ msg_warn_main(
+ "%s process %P terminated abnormally with exit code %d by signal: %s"
+ " but NOT created core file (throttled=%s); ",
+ g_quark_to_string(wrk->type),
+ wrk->pid, WEXITSTATUS(res),
+ g_strsignal(WTERMSIG(res)),
+ wrk->cores_throttled ? "yes" : "no");
+#endif
+ }
+#else
+ msg_warn_main(
+ "%s process %P terminated abnormally with exit code %d by signal: %s",
+ g_quark_to_string(wrk->type),
+ wrk->pid, WEXITSTATUS(res),
+ g_strsignal(WTERMSIG(res)));
+#endif
+ if (WTERMSIG(res) == SIGUSR2) {
+ /*
+ * It is actually race condition when not started process
+ * has been requested to be reloaded.
+ *
+ * We shouldn't refork on this
+ */
+ need_refork = FALSE;
+ }
+ }
+ else {
+ msg_warn_main("%s process %P terminated abnormally "
+ "(but it was not killed by a signal) "
+ "with exit code %d",
+ g_quark_to_string(wrk->type),
+ wrk->pid,
+ WEXITSTATUS(res));
+ }
+ }
+
+ return need_refork;
+}
+
+#ifdef WITH_HYPERSCAN
+gboolean
+rspamd_worker_hyperscan_ready(struct rspamd_main *rspamd_main,
+ struct rspamd_worker *worker, gint fd,
+ gint attached_fd,
+ struct rspamd_control_command *cmd,
+ gpointer ud)
+{
+ struct rspamd_control_reply rep;
+ struct rspamd_re_cache *cache = worker->srv->cfg->re_cache;
+
+ memset(&rep, 0, sizeof(rep));
+ rep.type = RSPAMD_CONTROL_HYPERSCAN_LOADED;
+
+ if (rspamd_re_cache_is_hs_loaded(cache) != RSPAMD_HYPERSCAN_LOADED_FULL ||
+ cmd->cmd.hs_loaded.forced) {
+
+ msg_info("loading hyperscan expressions after receiving compilation "
+ "notice: %s",
+ (rspamd_re_cache_is_hs_loaded(cache) != RSPAMD_HYPERSCAN_LOADED_FULL) ? "new db" : "forced update");
+ rep.reply.hs_loaded.status = rspamd_re_cache_load_hyperscan(
+ worker->srv->cfg->re_cache, cmd->cmd.hs_loaded.cache_dir, false);
+ }
+
+ if (write(fd, &rep, sizeof(rep)) != sizeof(rep)) {
+ msg_err("cannot write reply to the control socket: %s",
+ strerror(errno));
+ }
+
+ return TRUE;
+}
+#endif /* With Hyperscan */
+
+gboolean
+rspamd_worker_check_context(gpointer ctx, guint64 magic)
+{
+ struct rspamd_abstract_worker_ctx *actx = (struct rspamd_abstract_worker_ctx *) ctx;
+
+ return actx->magic == magic;
+}
+
+static gboolean
+rspamd_worker_log_pipe_handler(struct rspamd_main *rspamd_main,
+ struct rspamd_worker *worker, gint fd,
+ gint attached_fd,
+ struct rspamd_control_command *cmd,
+ gpointer ud)
+{
+ struct rspamd_config *cfg = ud;
+ struct rspamd_worker_log_pipe *lp;
+ struct rspamd_control_reply rep;
+
+ memset(&rep, 0, sizeof(rep));
+ rep.type = RSPAMD_CONTROL_LOG_PIPE;
+
+ if (attached_fd != -1) {
+ lp = g_malloc0(sizeof(*lp));
+ lp->fd = attached_fd;
+ lp->type = cmd->cmd.log_pipe.type;
+
+ DL_APPEND(cfg->log_pipes, lp);
+ msg_info("added new log pipe");
+ }
+ else {
+ rep.reply.log_pipe.status = ENOENT;
+ msg_err("cannot attach log pipe: invalid fd");
+ }
+
+ if (write(fd, &rep, sizeof(rep)) != sizeof(rep)) {
+ msg_err("cannot write reply to the control socket: %s",
+ strerror(errno));
+ }
+
+ return TRUE;
+}
+
+static gboolean
+rspamd_worker_monitored_handler(struct rspamd_main *rspamd_main,
+ struct rspamd_worker *worker, gint fd,
+ gint attached_fd,
+ struct rspamd_control_command *cmd,
+ gpointer ud)
+{
+ struct rspamd_control_reply rep;
+ struct rspamd_monitored *m;
+ struct rspamd_monitored_ctx *mctx = worker->srv->cfg->monitored_ctx;
+ struct rspamd_config *cfg = ud;
+
+ memset(&rep, 0, sizeof(rep));
+ rep.type = RSPAMD_CONTROL_MONITORED_CHANGE;
+
+ if (cmd->cmd.monitored_change.sender != getpid()) {
+ m = rspamd_monitored_by_tag(mctx, cmd->cmd.monitored_change.tag);
+
+ if (m != NULL) {
+ rspamd_monitored_set_alive(m, cmd->cmd.monitored_change.alive);
+ rep.reply.monitored_change.status = 1;
+ msg_info_config("updated monitored status for %s: %s",
+ cmd->cmd.monitored_change.tag,
+ cmd->cmd.monitored_change.alive ? "alive" : "dead");
+ }
+ else {
+ msg_err("cannot find monitored by tag: %*s", 32,
+ cmd->cmd.monitored_change.tag);
+ rep.reply.monitored_change.status = 0;
+ }
+ }
+
+ if (write(fd, &rep, sizeof(rep)) != sizeof(rep)) {
+ msg_err("cannot write reply to the control socket: %s",
+ strerror(errno));
+ }
+
+ return TRUE;
+}
+
+void rspamd_worker_init_scanner(struct rspamd_worker *worker,
+ struct ev_loop *ev_base,
+ struct rspamd_dns_resolver *resolver,
+ struct rspamd_lang_detector **plang_det)
+{
+ rspamd_stat_init(worker->srv->cfg, ev_base);
+#ifdef WITH_HYPERSCAN
+ rspamd_control_worker_add_cmd_handler(worker,
+ RSPAMD_CONTROL_HYPERSCAN_LOADED,
+ rspamd_worker_hyperscan_ready,
+ NULL);
+#endif
+ rspamd_control_worker_add_cmd_handler(worker,
+ RSPAMD_CONTROL_LOG_PIPE,
+ rspamd_worker_log_pipe_handler,
+ worker->srv->cfg);
+ rspamd_control_worker_add_cmd_handler(worker,
+ RSPAMD_CONTROL_MONITORED_CHANGE,
+ rspamd_worker_monitored_handler,
+ worker->srv->cfg);
+
+ *plang_det = worker->srv->cfg->lang_det;
+}
+
+void rspamd_controller_store_saved_stats(struct rspamd_main *rspamd_main,
+ struct rspamd_config *cfg)
+{
+ struct rspamd_stat *stat;
+ ucl_object_t *top, *sub;
+ struct ucl_emitter_functions *efuncs;
+ gint i, fd;
+ FILE *fp;
+ gchar fpath[PATH_MAX];
+
+ if (cfg->stats_file == NULL) {
+ return;
+ }
+
+ rspamd_snprintf(fpath, sizeof(fpath), "%s.XXXXXXXX", cfg->stats_file);
+ fd = g_mkstemp_full(fpath, O_WRONLY | O_TRUNC, 00644);
+
+ if (fd == -1) {
+ msg_err_config("cannot open for writing controller stats from %s: %s",
+ fpath, strerror(errno));
+ return;
+ }
+
+ fp = fdopen(fd, "w");
+ stat = rspamd_main->stat;
+
+ top = ucl_object_typed_new(UCL_OBJECT);
+ ucl_object_insert_key(top, ucl_object_fromint(stat->messages_scanned), "scanned", 0, false);
+ ucl_object_insert_key(top, ucl_object_fromint(stat->messages_learned), "learned", 0, false);
+
+ if (stat->messages_scanned > 0) {
+ sub = ucl_object_typed_new(UCL_OBJECT);
+ for (i = METRIC_ACTION_REJECT; i <= METRIC_ACTION_NOACTION; i++) {
+ ucl_object_insert_key(sub,
+ ucl_object_fromint(stat->actions_stat[i]),
+ rspamd_action_to_str(i), 0, false);
+ }
+ ucl_object_insert_key(top, sub, "actions", 0, false);
+ }
+
+ ucl_object_insert_key(top,
+ ucl_object_fromint(stat->connections_count),
+ "connections", 0, false);
+ ucl_object_insert_key(top,
+ ucl_object_fromint(stat->control_connections_count),
+ "control_connections", 0, false);
+
+ efuncs = ucl_object_emit_file_funcs(fp);
+ if (!ucl_object_emit_full(top, UCL_EMIT_JSON_COMPACT,
+ efuncs, NULL)) {
+ msg_err_config("cannot write stats to %s: %s",
+ fpath, strerror(errno));
+
+ unlink(fpath);
+ }
+ else {
+ if (rename(fpath, cfg->stats_file) == -1) {
+ msg_err_config("cannot rename stats from %s to %s: %s",
+ fpath, cfg->stats_file, strerror(errno));
+ }
+ }
+
+ ucl_object_unref(top);
+ fclose(fp);
+ ucl_object_emit_funcs_free(efuncs);
+}
+
+static ev_timer rrd_timer;
+
+void rspamd_controller_on_terminate(struct rspamd_worker *worker,
+ struct rspamd_rrd_file *rrd)
+{
+ struct rspamd_abstract_worker_ctx *ctx;
+
+ ctx = (struct rspamd_abstract_worker_ctx *) worker->ctx;
+ rspamd_controller_store_saved_stats(worker->srv, worker->srv->cfg);
+
+ if (rrd) {
+ ev_timer_stop(ctx->event_loop, &rrd_timer);
+ msg_info("closing rrd file: %s", rrd->filename);
+ rspamd_rrd_close(rrd);
+ }
+}
+
+static void
+rspamd_controller_load_saved_stats(struct rspamd_main *rspamd_main,
+ struct rspamd_config *cfg)
+{
+ struct ucl_parser *parser;
+ ucl_object_t *obj;
+ const ucl_object_t *elt, *subelt;
+ struct rspamd_stat *stat, stat_copy;
+ gint i;
+
+ if (cfg->stats_file == NULL) {
+ return;
+ }
+
+ if (access(cfg->stats_file, R_OK) == -1) {
+ msg_err_config("cannot load controller stats from %s: %s",
+ cfg->stats_file, strerror(errno));
+ return;
+ }
+
+ parser = ucl_parser_new(0);
+
+ if (!ucl_parser_add_file(parser, cfg->stats_file)) {
+ msg_err_config("cannot parse controller stats from %s: %s",
+ cfg->stats_file, ucl_parser_get_error(parser));
+ ucl_parser_free(parser);
+
+ return;
+ }
+
+ obj = ucl_parser_get_object(parser);
+ ucl_parser_free(parser);
+
+ stat = rspamd_main->stat;
+ memcpy(&stat_copy, stat, sizeof(stat_copy));
+
+ elt = ucl_object_lookup(obj, "scanned");
+
+ if (elt != NULL && ucl_object_type(elt) == UCL_INT) {
+ stat_copy.messages_scanned = ucl_object_toint(elt);
+ }
+
+ elt = ucl_object_lookup(obj, "learned");
+
+ if (elt != NULL && ucl_object_type(elt) == UCL_INT) {
+ stat_copy.messages_learned = ucl_object_toint(elt);
+ }
+
+ elt = ucl_object_lookup(obj, "actions");
+
+ if (elt != NULL) {
+ for (i = METRIC_ACTION_REJECT; i <= METRIC_ACTION_NOACTION; i++) {
+ subelt = ucl_object_lookup(elt, rspamd_action_to_str(i));
+
+ if (subelt && ucl_object_type(subelt) == UCL_INT) {
+ stat_copy.actions_stat[i] = ucl_object_toint(subelt);
+ }
+ }
+ }
+
+ elt = ucl_object_lookup(obj, "connections_count");
+
+ if (elt != NULL && ucl_object_type(elt) == UCL_INT) {
+ stat_copy.connections_count = ucl_object_toint(elt);
+ }
+
+ elt = ucl_object_lookup(obj, "control_connections_count");
+
+ if (elt != NULL && ucl_object_type(elt) == UCL_INT) {
+ stat_copy.control_connections_count = ucl_object_toint(elt);
+ }
+
+ ucl_object_unref(obj);
+ memcpy(stat, &stat_copy, sizeof(stat_copy));
+}
+
+struct rspamd_controller_periodics_cbdata {
+ struct rspamd_worker *worker;
+ struct rspamd_rrd_file *rrd;
+ struct rspamd_stat *stat;
+ ev_timer save_stats_event;
+};
+
+static void
+rspamd_controller_rrd_update(EV_P_ ev_timer *w, int revents)
+{
+ struct rspamd_controller_periodics_cbdata *cbd =
+ (struct rspamd_controller_periodics_cbdata *) w->data;
+ struct rspamd_stat *stat;
+ GArray ar;
+ gdouble points[METRIC_ACTION_MAX];
+ GError *err = NULL;
+ guint i;
+
+ g_assert(cbd->rrd != NULL);
+ stat = cbd->stat;
+
+ for (i = METRIC_ACTION_REJECT; i < METRIC_ACTION_MAX; i++) {
+ points[i] = stat->actions_stat[i];
+ }
+
+ ar.data = (gchar *) points;
+ ar.len = sizeof(points);
+
+ if (!rspamd_rrd_add_record(cbd->rrd, &ar, rspamd_get_calendar_ticks(),
+ &err)) {
+ msg_err("cannot update rrd file: %e", err);
+ g_error_free(err);
+ }
+
+ /* Plan new event */
+ ev_timer_again(EV_A_ w);
+}
+
+static void
+rspamd_controller_stats_save_periodic(EV_P_ ev_timer *w, int revents)
+{
+ struct rspamd_controller_periodics_cbdata *cbd =
+ (struct rspamd_controller_periodics_cbdata *) w->data;
+
+ rspamd_controller_store_saved_stats(cbd->worker->srv, cbd->worker->srv->cfg);
+ ev_timer_again(EV_A_ w);
+}
+
+void rspamd_worker_init_controller(struct rspamd_worker *worker,
+ struct rspamd_rrd_file **prrd)
+{
+ struct rspamd_abstract_worker_ctx *ctx;
+ static const ev_tstamp rrd_update_time = 1.0;
+
+ ctx = (struct rspamd_abstract_worker_ctx *) worker->ctx;
+ rspamd_controller_load_saved_stats(worker->srv, worker->srv->cfg);
+
+ if (worker->index == 0) {
+ /* Enable periodics and other stuff */
+ static struct rspamd_controller_periodics_cbdata cbd;
+ const ev_tstamp save_stats_interval = 60; /* 1 minute */
+
+ memset(&cbd, 0, sizeof(cbd));
+ cbd.save_stats_event.data = &cbd;
+ cbd.worker = worker;
+ cbd.stat = worker->srv->stat;
+
+ ev_timer_init(&cbd.save_stats_event,
+ rspamd_controller_stats_save_periodic,
+ save_stats_interval, save_stats_interval);
+ ev_timer_start(ctx->event_loop, &cbd.save_stats_event);
+
+ rspamd_map_watch(worker->srv->cfg, ctx->event_loop,
+ ctx->resolver, worker,
+ RSPAMD_MAP_WATCH_PRIMARY_CONTROLLER);
+
+ if (prrd != NULL) {
+ if (ctx->cfg->rrd_file && worker->index == 0) {
+ GError *rrd_err = NULL;
+
+ *prrd = rspamd_rrd_file_default(ctx->cfg->rrd_file, &rrd_err);
+
+ if (*prrd) {
+ cbd.rrd = *prrd;
+ rrd_timer.data = &cbd;
+ ev_timer_init(&rrd_timer, rspamd_controller_rrd_update,
+ rrd_update_time, rrd_update_time);
+ ev_timer_start(ctx->event_loop, &rrd_timer);
+ }
+ else if (rrd_err) {
+ msg_err("cannot load rrd from %s: %e", ctx->cfg->rrd_file,
+ rrd_err);
+ g_error_free(rrd_err);
+ }
+ else {
+ msg_err("cannot load rrd from %s: unknown error",
+ ctx->cfg->rrd_file);
+ }
+ }
+ else {
+ *prrd = NULL;
+ }
+ }
+
+ if (!ctx->cfg->disable_monitored) {
+ rspamd_worker_init_monitored(worker,
+ ctx->event_loop, ctx->resolver);
+ }
+ }
+ else {
+ rspamd_map_watch(worker->srv->cfg, ctx->event_loop,
+ ctx->resolver, worker, RSPAMD_MAP_WATCH_SCANNER);
+ }
+}
+
+gdouble
+rspamd_worker_check_and_adjust_timeout(struct rspamd_config *cfg, gdouble timeout)
+{
+ if (isnan(timeout)) {
+ /* Use implicit timeout from cfg->task_timeout */
+ timeout = cfg->task_timeout;
+ }
+
+ if (isnan(timeout)) {
+ return timeout;
+ }
+
+ struct rspamd_symcache_timeout_result *tres = rspamd_symcache_get_max_timeout(cfg->cache);
+ g_assert(tres != 0);
+
+ if (tres->max_timeout > timeout) {
+ msg_info_config("configured task_timeout %.2f is less than maximum symbols cache timeout %.2f; "
+ "some symbols can be terminated before checks",
+ timeout, tres->max_timeout);
+ GString *buf = g_string_sized_new(512);
+ static const int max_displayed_items = 12;
+
+ for (int i = 0; i < MIN(tres->nitems, max_displayed_items); i++) {
+ if (i == 0) {
+ rspamd_printf_gstring(buf, "%s(%.2f)",
+ rspamd_symcache_item_name((struct rspamd_symcache_item *) tres->items[i].item),
+ tres->items[i].timeout);
+ }
+ else {
+ rspamd_printf_gstring(buf, "; %s(%.2f)",
+ rspamd_symcache_item_name((struct rspamd_symcache_item *) tres->items[i].item),
+ tres->items[i].timeout);
+ }
+ }
+ msg_info_config("list of top %d symbols by execution time: %v",
+ (int) MIN(tres->nitems, max_displayed_items),
+ buf);
+
+ g_string_free(buf, TRUE);
+ }
+
+ rspamd_symcache_timeout_result_free(tres);
+
+ /* TODO: maybe adjust timeout */
+ return timeout;
+}
diff --git a/src/libserver/worker_util.h b/src/libserver/worker_util.h
new file mode 100644
index 0000000..ef48188
--- /dev/null
+++ b/src/libserver/worker_util.h
@@ -0,0 +1,334 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef WORKER_UTIL_H_
+#define WORKER_UTIL_H_
+
+#include "config.h"
+#include "util.h"
+#include "libserver/http/http_connection.h"
+#include "rspamd.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef HAVE_SA_SIGINFO
+typedef void (*rspamd_sig_handler_t)(gint);
+#else
+
+typedef void (*rspamd_sig_handler_t)(gint, siginfo_t *, void *);
+
+#endif
+
+struct rspamd_worker;
+struct rspamd_worker_signal_handler;
+
+extern struct rspamd_worker *rspamd_current_worker;
+
+/**
+ * Init basic signals for a worker
+ * @param worker
+ * @param event_loop
+ */
+void rspamd_worker_init_signals(struct rspamd_worker *worker, struct ev_loop *event_loop);
+
+typedef void (*rspamd_accept_handler)(struct ev_loop *loop, ev_io *w, int revents);
+
+/**
+ * Prepare worker's startup
+ * @param worker worker structure
+ * @param name name of the worker
+ * @param sig_handler handler of main signals
+ * @param accept_handler handler of accept event for listen sockets
+ * @return event base suitable for a worker
+ */
+struct ev_loop *
+rspamd_prepare_worker(struct rspamd_worker *worker, const char *name,
+ rspamd_accept_handler hdl);
+
+/**
+ * Should be used to validate context for a worker as in assert like invocation
+ * @param ctx
+ * @param magic
+ * @return
+ */
+gboolean rspamd_worker_check_context(gpointer ctx, guint64 magic);
+
+/**
+ * Set special signal handler for a worker
+ */
+void rspamd_worker_set_signal_handler(int signo,
+ struct rspamd_worker *worker,
+ struct ev_loop *event_loop,
+ rspamd_worker_signal_cb_t handler,
+ void *handler_data);
+
+/**
+ * Stop accepting new connections for a worker
+ * @param worker
+ */
+void rspamd_worker_stop_accept(struct rspamd_worker *worker);
+
+typedef gint (*rspamd_controller_func_t)(
+ struct rspamd_http_connection_entry *conn_ent,
+ struct rspamd_http_message *msg,
+ struct module_ctx *ctx);
+
+struct rspamd_custom_controller_command {
+ const gchar *command;
+ struct module_ctx *ctx;
+ gboolean privileged;
+ gboolean require_message;
+ rspamd_controller_func_t handler;
+};
+
+struct rspamd_controller_worker_ctx;
+struct rspamd_lang_detector;
+
+struct rspamd_controller_session {
+ struct rspamd_controller_worker_ctx *ctx;
+ struct rspamd_worker *wrk;
+ rspamd_mempool_t *pool;
+ struct rspamd_task *task;
+ gchar *classifier;
+ rspamd_inet_addr_t *from_addr;
+ struct rspamd_config *cfg;
+ struct rspamd_lang_detector *lang_det;
+ gboolean is_spam;
+ gboolean is_read_only;
+};
+
+/**
+ * Send error using HTTP and JSON output
+ * @param entry router entry
+ * @param code error code
+ * @param error_msg error message
+ */
+void rspamd_controller_send_error(struct rspamd_http_connection_entry *entry,
+ gint code, const gchar *error_msg, ...);
+
+/**
+ * Send openmetrics-formatted strings using HTTP
+ * @param entry router entry
+ * @param str rspamd fstring buffer, ownership is transferred
+ */
+void rspamd_controller_send_openmetrics(struct rspamd_http_connection_entry *entry,
+ rspamd_fstring_t *str);
+
+/**
+ * Send a custom string using HTTP
+ * @param entry router entry
+ * @param str string to send
+ */
+void rspamd_controller_send_string(struct rspamd_http_connection_entry *entry,
+ const gchar *str);
+
+/**
+ * Send UCL using HTTP and JSON serialization
+ * @param entry router entry
+ * @param obj object to send
+ */
+void rspamd_controller_send_ucl(struct rspamd_http_connection_entry *entry,
+ ucl_object_t *obj);
+
+/**
+ * Return worker's control structure by its type
+ * @param type
+ * @return worker's control structure or NULL
+ */
+worker_t *rspamd_get_worker_by_type(struct rspamd_config *cfg, GQuark type);
+
+/**
+ * Block signals before terminations
+ */
+void rspamd_worker_block_signals(void);
+
+/**
+ * Unblock signals
+ */
+void rspamd_worker_unblock_signals(void);
+
+/**
+ * Kill rspamd main and all workers
+ * @param rspamd_main
+ */
+void rspamd_hard_terminate(struct rspamd_main *rspamd_main) G_GNUC_NORETURN;
+
+/**
+ * Returns TRUE if a specific worker is a scanner worker
+ * @param w
+ * @return
+ */
+gboolean rspamd_worker_is_scanner(struct rspamd_worker *w);
+
+/**
+ * Checks
+ * @param cfg
+ * @param timeout
+ * @return
+ */
+gdouble rspamd_worker_check_and_adjust_timeout(struct rspamd_config *cfg,
+ gdouble timeout);
+
+/**
+ * Returns TRUE if a specific worker is a primary controller
+ * @param w
+ * @return
+ */
+gboolean rspamd_worker_is_primary_controller(struct rspamd_worker *w);
+
+/**
+ * Returns TRUE if a specific worker should take a role of a controller
+ */
+gboolean rspamd_worker_check_controller_presence(struct rspamd_worker *w);
+
+/**
+ * Creates new session cache
+ * @param w
+ * @return
+ */
+void *rspamd_worker_session_cache_new(struct rspamd_worker *w,
+ struct ev_loop *ev_base);
+
+/**
+ * Adds a new session identified by pointer
+ * @param cache
+ * @param tag
+ * @param pref
+ * @param ptr
+ */
+void rspamd_worker_session_cache_add(void *cache, const gchar *tag,
+ guint *pref, void *ptr);
+
+/**
+ * Removes session from cache
+ * @param cache
+ * @param ptr
+ */
+void rspamd_worker_session_cache_remove(void *cache, void *ptr);
+
+/**
+ * Fork new worker with the specified configuration
+ */
+struct rspamd_worker *rspamd_fork_worker(struct rspamd_main *,
+ struct rspamd_worker_conf *, guint idx,
+ struct ev_loop *ev_base,
+ rspamd_worker_term_cb term_handler,
+ GHashTable *listen_sockets);
+
+/**
+ * Sets crash signals handlers if compiled with libunwind
+ */
+RSPAMD_NO_SANITIZE void rspamd_set_crash_handler(struct rspamd_main *);
+
+/**
+ * Restore memory for crash signals
+ */
+RSPAMD_NO_SANITIZE void rspamd_unset_crash_handler(struct rspamd_main *);
+
+/**
+ * Initialise the main monitoring worker
+ * @param worker
+ * @param ev_base
+ * @param resolver
+ */
+void rspamd_worker_init_monitored(struct rspamd_worker *worker,
+ struct ev_loop *ev_base,
+ struct rspamd_dns_resolver *resolver);
+
+/**
+ * Performs throttling for accept events
+ * @param sock
+ * @param data struct rspamd_worker_accept_event * list
+ */
+void rspamd_worker_throttle_accept_events(gint sock, void *data);
+
+/**
+ * Checks (and logs) the worker's termination status. Returns TRUE if a worker
+ * should be restarted.
+ * @param rspamd_main
+ * @param wrk
+ * @param status waitpid res
+ * @return TRUE if refork is desired
+ */
+gboolean rspamd_check_termination_clause(struct rspamd_main *rspamd_main,
+ struct rspamd_worker *wrk, int status);
+
+/**
+ * Call for final scripts for a worker
+ * @param worker
+ * @return
+ */
+gboolean rspamd_worker_call_finish_handlers(struct rspamd_worker *worker);
+
+struct rspamd_rrd_file;
+/**
+ * Terminate controller worker
+ * @param worker
+ */
+void rspamd_controller_on_terminate(struct rspamd_worker *worker,
+ struct rspamd_rrd_file *rrd);
+
+/**
+ * Inits controller worker
+ * @param worker
+ * @param ev_base
+ * @param prrd
+ */
+void rspamd_worker_init_controller(struct rspamd_worker *worker,
+ struct rspamd_rrd_file **prrd);
+
+/**
+ * Saves stats
+ * @param rspamd_main
+ * @param cfg
+ */
+void rspamd_controller_store_saved_stats(struct rspamd_main *rspamd_main,
+ struct rspamd_config *cfg);
+
+#ifdef WITH_HYPERSCAN
+struct rspamd_control_command;
+
+gboolean rspamd_worker_hyperscan_ready(struct rspamd_main *rspamd_main,
+ struct rspamd_worker *worker, gint fd,
+ gint attached_fd,
+ struct rspamd_control_command *cmd,
+ gpointer ud);
+
+#endif
+
+#define msg_err_main(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL, \
+ rspamd_main->server_pool->tag.tagname, rspamd_main->server_pool->tag.uid, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_warn_main(...) rspamd_default_log_function(G_LOG_LEVEL_WARNING, \
+ rspamd_main->server_pool->tag.tagname, rspamd_main->server_pool->tag.uid, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_notice_main(...) rspamd_default_log_function(G_LOG_LEVEL_MESSAGE, \
+ rspamd_main->server_pool->tag.tagname, rspamd_main->server_pool->tag.uid, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#define msg_info_main(...) rspamd_default_log_function(G_LOG_LEVEL_INFO, \
+ rspamd_main->server_pool->tag.tagname, rspamd_main->server_pool->tag.uid, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* WORKER_UTIL_H_ */