summaryrefslogtreecommitdiffstats
path: root/src/libutil/multipattern.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-10 21:30:40 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-10 21:30:40 +0000
commit133a45c109da5310add55824db21af5239951f93 (patch)
treeba6ac4c0a950a0dda56451944315d66409923918 /src/libutil/multipattern.c
parentInitial commit. (diff)
downloadrspamd-133a45c109da5310add55824db21af5239951f93.tar.xz
rspamd-133a45c109da5310add55824db21af5239951f93.zip
Adding upstream version 3.8.1.upstream/3.8.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--src/libutil/multipattern.c821
1 files changed, 821 insertions, 0 deletions
diff --git a/src/libutil/multipattern.c b/src/libutil/multipattern.c
new file mode 100644
index 0000000..630b1f9
--- /dev/null
+++ b/src/libutil/multipattern.c
@@ -0,0 +1,821 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "config.h"
+#include "libutil/multipattern.h"
+#include "libutil/str_util.h"
+#include "libcryptobox/cryptobox.h"
+
+#ifdef WITH_HYPERSCAN
+#include "logger.h"
+#include "unix-std.h"
+#include "hs.h"
+#include "libserver/hyperscan_tools.h"
+#endif
+#include "acism.h"
+#include "libutil/regexp.h"
+#include <stdalign.h>
+
+#define MAX_SCRATCH 4
+
+enum rspamd_hs_check_state {
+ RSPAMD_HS_UNCHECKED = 0,
+ RSPAMD_HS_SUPPORTED,
+ RSPAMD_HS_UNSUPPORTED
+};
+
+static const char *hs_cache_dir = NULL;
+static enum rspamd_hs_check_state hs_suitable_cpu = RSPAMD_HS_UNCHECKED;
+
+
+struct RSPAMD_ALIGNED(64) rspamd_multipattern {
+#ifdef WITH_HYPERSCAN
+ rspamd_cryptobox_hash_state_t hash_state;
+ rspamd_hyperscan_t *hs_db;
+ hs_scratch_t *scratch[MAX_SCRATCH];
+ GArray *hs_pats;
+ GArray *hs_ids;
+ GArray *hs_flags;
+ guint scratch_used;
+#endif
+ ac_trie_t *t;
+ GArray *pats;
+ GArray *res;
+
+ gboolean compiled;
+ guint cnt;
+ enum rspamd_multipattern_flags flags;
+};
+
+static GQuark
+rspamd_multipattern_quark(void)
+{
+ return g_quark_from_static_string("multipattern");
+}
+
+static inline gboolean
+rspamd_hs_check(void)
+{
+#ifdef WITH_HYPERSCAN
+ if (G_UNLIKELY(hs_suitable_cpu == RSPAMD_HS_UNCHECKED)) {
+ if (hs_valid_platform() == HS_SUCCESS) {
+ hs_suitable_cpu = RSPAMD_HS_SUPPORTED;
+ }
+ else {
+ hs_suitable_cpu = RSPAMD_HS_UNSUPPORTED;
+ }
+ }
+#endif
+
+ return hs_suitable_cpu == RSPAMD_HS_SUPPORTED;
+}
+
+void rspamd_multipattern_library_init(const gchar *cache_dir)
+{
+ hs_cache_dir = cache_dir;
+#ifdef WITH_HYPERSCAN
+ rspamd_hs_check();
+#endif
+}
+
+#ifdef WITH_HYPERSCAN
+static gchar *
+rspamd_multipattern_escape_tld_hyperscan(const gchar *pattern, gsize slen,
+ gsize *dst_len)
+{
+ gsize len;
+ const gchar *p, *prefix, *suffix;
+ gchar *res;
+
+ /*
+ * We understand the following cases
+ * 1) blah -> .blah\b
+ * 2) *.blah -> ..*\\.blah\b|$
+ * 3) ???
+ */
+
+ if (pattern[0] == '*') {
+ p = strchr(pattern, '.');
+
+ if (p == NULL) {
+ /* XXX: bad */
+ p = pattern;
+ }
+ else {
+ p++;
+ }
+
+ prefix = "\\.";
+ len = slen + strlen(prefix);
+ }
+ else {
+ prefix = "\\.";
+ p = pattern;
+ len = slen + strlen(prefix);
+ }
+
+ suffix = "(:?\\b|$)";
+ len += strlen(suffix);
+
+ res = g_malloc(len + 1);
+ slen = rspamd_strlcpy(res, prefix, len + 1);
+ slen += rspamd_strlcpy(res + slen, p, len + 1 - slen);
+ slen += rspamd_strlcpy(res + slen, suffix, len + 1 - slen);
+
+ *dst_len = slen;
+
+ return res;
+}
+
+#endif
+static gchar *
+rspamd_multipattern_escape_tld_acism(const gchar *pattern, gsize len,
+ gsize *dst_len)
+{
+ gsize dlen, slen;
+ const gchar *p, *prefix;
+ gchar *res;
+
+ /*
+ * We understand the following cases
+ * 1) blah -> \\.blah
+ * 2) *.blah -> \\..*\\.blah
+ * 3) ???
+ */
+ slen = len;
+
+ if (pattern[0] == '*') {
+ dlen = slen;
+ p = memchr(pattern, '.', len);
+
+ if (p == NULL) {
+ /* XXX: bad */
+ p = pattern;
+ }
+ else {
+ p++;
+ }
+
+ dlen -= p - pattern;
+ prefix = ".";
+ dlen++;
+ }
+ else {
+ dlen = slen + 1;
+ prefix = ".";
+ p = pattern;
+ }
+
+ res = g_malloc(dlen + 1);
+ slen = strlen(prefix);
+ memcpy(res, prefix, slen);
+ rspamd_strlcpy(res + slen, p, dlen - slen + 1);
+
+ *dst_len = dlen;
+
+ return res;
+}
+
+/*
+ * Escapes special characters from specific pattern
+ */
+static gchar *
+rspamd_multipattern_pattern_filter(const gchar *pattern, gsize len,
+ enum rspamd_multipattern_flags flags,
+ gsize *dst_len)
+{
+ gchar *ret = NULL;
+ gint gl_flags = RSPAMD_REGEXP_ESCAPE_ASCII;
+
+ if (flags & RSPAMD_MULTIPATTERN_UTF8) {
+ gl_flags |= RSPAMD_REGEXP_ESCAPE_UTF;
+ }
+
+#ifdef WITH_HYPERSCAN
+ if (rspamd_hs_check()) {
+ if (flags & RSPAMD_MULTIPATTERN_TLD) {
+ gchar *tmp;
+ gsize tlen;
+ tmp = rspamd_multipattern_escape_tld_hyperscan(pattern, len, &tlen);
+
+ ret = rspamd_str_regexp_escape(tmp, tlen, dst_len,
+ gl_flags | RSPAMD_REGEXP_ESCAPE_RE);
+ g_free(tmp);
+ }
+ else if (flags & RSPAMD_MULTIPATTERN_RE) {
+ ret = rspamd_str_regexp_escape(pattern, len, dst_len, gl_flags | RSPAMD_REGEXP_ESCAPE_RE);
+ }
+ else if (flags & RSPAMD_MULTIPATTERN_GLOB) {
+ ret = rspamd_str_regexp_escape(pattern, len, dst_len,
+ gl_flags | RSPAMD_REGEXP_ESCAPE_GLOB);
+ }
+ else {
+ ret = rspamd_str_regexp_escape(pattern, len, dst_len, gl_flags);
+ }
+
+ return ret;
+ }
+#endif
+
+ if (flags & RSPAMD_MULTIPATTERN_TLD) {
+ ret = rspamd_multipattern_escape_tld_acism(pattern, len, dst_len);
+ }
+ else if (flags & RSPAMD_MULTIPATTERN_RE) {
+ ret = rspamd_str_regexp_escape(pattern, len, dst_len, gl_flags | RSPAMD_REGEXP_ESCAPE_RE);
+ }
+ else if (flags & RSPAMD_MULTIPATTERN_GLOB) {
+ ret = rspamd_str_regexp_escape(pattern, len, dst_len,
+ gl_flags | RSPAMD_REGEXP_ESCAPE_GLOB);
+ }
+ else {
+ ret = malloc(len + 1);
+ *dst_len = rspamd_strlcpy(ret, pattern, len + 1);
+ }
+
+ return ret;
+}
+
+struct rspamd_multipattern *
+rspamd_multipattern_create(enum rspamd_multipattern_flags flags)
+{
+ struct rspamd_multipattern *mp;
+
+ /* Align due to blake2b state */
+ (void) !posix_memalign((void **) &mp, RSPAMD_ALIGNOF(struct rspamd_multipattern),
+ sizeof(*mp));
+ g_assert(mp != NULL);
+ memset(mp, 0, sizeof(*mp));
+ mp->flags = flags;
+
+#ifdef WITH_HYPERSCAN
+ if (rspamd_hs_check()) {
+ mp->hs_pats = g_array_new(FALSE, TRUE, sizeof(gchar *));
+ mp->hs_flags = g_array_new(FALSE, TRUE, sizeof(gint));
+ mp->hs_ids = g_array_new(FALSE, TRUE, sizeof(gint));
+ rspamd_cryptobox_hash_init(&mp->hash_state, NULL, 0);
+
+ return mp;
+ }
+#endif
+
+ mp->pats = g_array_new(FALSE, TRUE, sizeof(ac_trie_pat_t));
+
+ return mp;
+}
+
+struct rspamd_multipattern *
+rspamd_multipattern_create_sized(guint npatterns,
+ enum rspamd_multipattern_flags flags)
+{
+ struct rspamd_multipattern *mp;
+
+ /* Align due to blake2b state */
+ (void) !posix_memalign((void **) &mp, RSPAMD_ALIGNOF(struct rspamd_multipattern), sizeof(*mp));
+ g_assert(mp != NULL);
+ memset(mp, 0, sizeof(*mp));
+ mp->flags = flags;
+
+#ifdef WITH_HYPERSCAN
+ if (rspamd_hs_check()) {
+ mp->hs_pats = g_array_sized_new(FALSE, TRUE, sizeof(gchar *), npatterns);
+ mp->hs_flags = g_array_sized_new(FALSE, TRUE, sizeof(gint), npatterns);
+ mp->hs_ids = g_array_sized_new(FALSE, TRUE, sizeof(gint), npatterns);
+ rspamd_cryptobox_hash_init(&mp->hash_state, NULL, 0);
+
+ return mp;
+ }
+#endif
+
+ mp->pats = g_array_sized_new(FALSE, TRUE, sizeof(ac_trie_pat_t), npatterns);
+
+ return mp;
+}
+
+void rspamd_multipattern_add_pattern(struct rspamd_multipattern *mp,
+ const gchar *pattern, gint flags)
+{
+ g_assert(pattern != NULL);
+
+ rspamd_multipattern_add_pattern_len(mp, pattern, strlen(pattern), flags);
+}
+
+void rspamd_multipattern_add_pattern_len(struct rspamd_multipattern *mp,
+ const gchar *pattern, gsize patlen, gint flags)
+{
+ gsize dlen;
+
+ g_assert(pattern != NULL);
+ g_assert(mp != NULL);
+ g_assert(!mp->compiled);
+
+#ifdef WITH_HYPERSCAN
+ if (rspamd_hs_check()) {
+ gchar *np;
+ gint fl = HS_FLAG_SOM_LEFTMOST;
+ gint adjusted_flags = mp->flags | flags;
+
+ if (adjusted_flags & RSPAMD_MULTIPATTERN_ICASE) {
+ fl |= HS_FLAG_CASELESS;
+ }
+ if (adjusted_flags & RSPAMD_MULTIPATTERN_UTF8) {
+ if (adjusted_flags & RSPAMD_MULTIPATTERN_TLD) {
+ fl |= HS_FLAG_UTF8;
+ }
+ else {
+ fl |= HS_FLAG_UTF8 | HS_FLAG_UCP;
+ }
+ }
+ if (adjusted_flags & RSPAMD_MULTIPATTERN_DOTALL) {
+ fl |= HS_FLAG_DOTALL;
+ }
+ if (adjusted_flags & RSPAMD_MULTIPATTERN_SINGLEMATCH) {
+ fl |= HS_FLAG_SINGLEMATCH;
+ fl &= ~HS_FLAG_SOM_LEFTMOST; /* According to hyperscan docs */
+ }
+ if (adjusted_flags & RSPAMD_MULTIPATTERN_NO_START) {
+ fl &= ~HS_FLAG_SOM_LEFTMOST;
+ }
+
+ g_array_append_val(mp->hs_flags, fl);
+ np = rspamd_multipattern_pattern_filter(pattern, patlen, flags, &dlen);
+ g_array_append_val(mp->hs_pats, np);
+ fl = mp->cnt;
+ g_array_append_val(mp->hs_ids, fl);
+ rspamd_cryptobox_hash_update(&mp->hash_state, np, dlen);
+
+ mp->cnt++;
+
+ return;
+ }
+#endif
+ ac_trie_pat_t pat;
+
+ pat.ptr = rspamd_multipattern_pattern_filter(pattern, patlen, flags, &dlen);
+ pat.len = dlen;
+
+ g_array_append_val(mp->pats, pat);
+
+ mp->cnt++;
+}
+
+struct rspamd_multipattern *
+rspamd_multipattern_create_full(const gchar **patterns,
+ guint npatterns, enum rspamd_multipattern_flags flags)
+{
+ struct rspamd_multipattern *mp;
+ guint i;
+
+ g_assert(npatterns > 0);
+ g_assert(patterns != NULL);
+
+ mp = rspamd_multipattern_create_sized(npatterns, flags);
+
+ for (i = 0; i < npatterns; i++) {
+ rspamd_multipattern_add_pattern(mp, patterns[i], flags);
+ }
+
+ return mp;
+}
+
+#ifdef WITH_HYPERSCAN
+static gboolean
+rspamd_multipattern_try_load_hs(struct rspamd_multipattern *mp,
+ const guchar *hash)
+{
+ gchar fp[PATH_MAX];
+
+ if (hs_cache_dir == NULL) {
+ return FALSE;
+ }
+
+ rspamd_snprintf(fp, sizeof(fp), "%s/%*xs.hsmp", hs_cache_dir,
+ (gint) rspamd_cryptobox_HASHBYTES / 2, hash);
+ mp->hs_db = rspamd_hyperscan_maybe_load(fp, 0);
+
+ return mp->hs_db != NULL;
+}
+
+static void
+rspamd_multipattern_try_save_hs(struct rspamd_multipattern *mp,
+ const guchar *hash)
+{
+ gchar fp[PATH_MAX], np[PATH_MAX];
+ char *bytes = NULL;
+ gsize len;
+ gint fd;
+
+ if (hs_cache_dir == NULL) {
+ return;
+ }
+
+ rspamd_snprintf(fp, sizeof(fp), "%s%shsmp-XXXXXXXXXXXXX", G_DIR_SEPARATOR_S,
+ hs_cache_dir);
+
+ if ((fd = g_mkstemp_full(fp, O_CREAT | O_EXCL | O_WRONLY, 00644)) != -1) {
+ int ret;
+ if ((ret = hs_serialize_database(rspamd_hyperscan_get_database(mp->hs_db), &bytes, &len)) == HS_SUCCESS) {
+ if (write(fd, bytes, len) == -1) {
+ msg_warn("cannot write hyperscan cache to %s: %s",
+ fp, strerror(errno));
+ unlink(fp);
+ free(bytes);
+ }
+ else {
+ free(bytes);
+ fsync(fd);
+
+ rspamd_snprintf(np, sizeof(np), "%s/%*xs.hsmp", hs_cache_dir,
+ (gint) rspamd_cryptobox_HASHBYTES / 2, hash);
+
+ if (rename(fp, np) == -1) {
+ msg_warn("cannot rename hyperscan cache from %s to %s: %s",
+ fp, np, strerror(errno));
+ unlink(fp);
+ }
+ else {
+ rspamd_hyperscan_notice_known(np);
+ }
+ }
+ }
+ else {
+ msg_warn("cannot serialize hyperscan cache to %s: error code %d",
+ fp, ret);
+ unlink(fp);
+ }
+
+
+ close(fd);
+ }
+ else {
+ msg_warn("cannot open a temp file %s to write hyperscan cache: %s", fp, strerror(errno));
+ }
+}
+#endif
+
+gboolean
+rspamd_multipattern_compile(struct rspamd_multipattern *mp, GError **err)
+{
+ g_assert(mp != NULL);
+ g_assert(!mp->compiled);
+
+#ifdef WITH_HYPERSCAN
+ if (rspamd_hs_check()) {
+ guint i;
+ hs_platform_info_t plt;
+ hs_compile_error_t *hs_errors;
+ guchar hash[rspamd_cryptobox_HASHBYTES];
+
+ if (mp->cnt > 0) {
+ g_assert(hs_populate_platform(&plt) == HS_SUCCESS);
+ rspamd_cryptobox_hash_update(&mp->hash_state, (void *) &plt, sizeof(plt));
+ rspamd_cryptobox_hash_final(&mp->hash_state, hash);
+
+ if (!rspamd_multipattern_try_load_hs(mp, hash)) {
+ hs_database_t *db = NULL;
+
+ if (hs_compile_multi((const char *const *) mp->hs_pats->data,
+ (const unsigned int *) mp->hs_flags->data,
+ (const unsigned int *) mp->hs_ids->data,
+ mp->cnt,
+ HS_MODE_BLOCK,
+ &plt,
+ &db,
+ &hs_errors) != HS_SUCCESS) {
+
+ g_set_error(err, rspamd_multipattern_quark(), EINVAL,
+ "cannot create tree of regexp when processing '%s': %s",
+ g_array_index(mp->hs_pats, char *, hs_errors->expression),
+ hs_errors->message);
+ hs_free_compile_error(hs_errors);
+
+ return FALSE;
+ }
+
+ if (hs_cache_dir != NULL) {
+ char fpath[PATH_MAX];
+ rspamd_snprintf(fpath, sizeof(fpath), "%s/%*xs.hsmp", hs_cache_dir,
+ (gint) rspamd_cryptobox_HASHBYTES / 2, hash);
+ mp->hs_db = rspamd_hyperscan_from_raw_db(db, fpath);
+ }
+ else {
+ /* Should not happen in the real life */
+ mp->hs_db = rspamd_hyperscan_from_raw_db(db, NULL);
+ }
+
+ rspamd_multipattern_try_save_hs(mp, hash);
+ }
+
+ for (i = 0; i < MAX_SCRATCH; i++) {
+ mp->scratch[i] = NULL;
+ }
+
+ for (i = 0; i < MAX_SCRATCH; i++) {
+ int ret;
+
+ if ((ret = hs_alloc_scratch(rspamd_hyperscan_get_database(mp->hs_db), &mp->scratch[i])) != HS_SUCCESS) {
+ msg_err("cannot allocate scratch space for hyperscan: error code %d", ret);
+
+ /* Clean all scratches that are non-NULL */
+ for (int ii = 0; ii < MAX_SCRATCH; ii++) {
+ if (mp->scratch[ii] != NULL) {
+ hs_free_scratch(mp->scratch[ii]);
+ }
+ }
+ g_set_error(err, rspamd_multipattern_quark(), EINVAL,
+ "cannot allocate scratch space for hyperscan: error code %d", ret);
+
+ rspamd_hyperscan_free(mp->hs_db, true);
+ mp->hs_db = NULL;
+
+ return FALSE;
+ }
+ }
+ }
+
+ mp->compiled = TRUE;
+
+ return TRUE;
+ }
+#endif
+
+ if (mp->cnt > 0) {
+
+ if (mp->flags & (RSPAMD_MULTIPATTERN_GLOB | RSPAMD_MULTIPATTERN_RE)) {
+ /* Fallback to pcre... */
+ rspamd_regexp_t *re;
+ mp->res = g_array_sized_new(FALSE, TRUE,
+ sizeof(rspamd_regexp_t *), mp->cnt);
+
+ for (guint i = 0; i < mp->cnt; i++) {
+ const ac_trie_pat_t *pat;
+ const gchar *pat_flags = NULL;
+
+ if (mp->flags & RSPAMD_MULTIPATTERN_UTF8) {
+ pat_flags = "u";
+ }
+
+ pat = &g_array_index(mp->pats, ac_trie_pat_t, i);
+ re = rspamd_regexp_new(pat->ptr, pat_flags, err);
+
+ if (re == NULL) {
+ return FALSE;
+ }
+
+ g_array_append_val(mp->res, re);
+ }
+ }
+ else {
+ mp->t = acism_create((const ac_trie_pat_t *) mp->pats->data, mp->cnt);
+ }
+ }
+
+ mp->compiled = TRUE;
+
+ return TRUE;
+}
+
+struct rspamd_multipattern_cbdata {
+ struct rspamd_multipattern *mp;
+ const gchar *in;
+ gsize len;
+ rspamd_multipattern_cb_t cb;
+ gpointer ud;
+ guint nfound;
+ gint ret;
+};
+
+#ifdef WITH_HYPERSCAN
+static gint
+rspamd_multipattern_hs_cb(unsigned int id,
+ unsigned long long from,
+ unsigned long long to,
+ unsigned int flags,
+ void *ud)
+{
+ struct rspamd_multipattern_cbdata *cbd = ud;
+ gint ret = 0;
+
+ if (to > 0) {
+
+ if (from == HS_OFFSET_PAST_HORIZON) {
+ from = 0;
+ }
+
+ ret = cbd->cb(cbd->mp, id, from, to, cbd->in, cbd->len, cbd->ud);
+
+ cbd->nfound++;
+ cbd->ret = ret;
+ }
+
+ return ret;
+}
+#endif
+
+static gint
+rspamd_multipattern_acism_cb(int strnum, int textpos, void *context)
+{
+ struct rspamd_multipattern_cbdata *cbd = context;
+ gint ret;
+ ac_trie_pat_t pat;
+
+ pat = g_array_index(cbd->mp->pats, ac_trie_pat_t, strnum);
+ ret = cbd->cb(cbd->mp, strnum, textpos - pat.len,
+ textpos, cbd->in, cbd->len, cbd->ud);
+
+ cbd->nfound++;
+ cbd->ret = ret;
+
+ return ret;
+}
+
+gint rspamd_multipattern_lookup(struct rspamd_multipattern *mp,
+ const gchar *in, gsize len, rspamd_multipattern_cb_t cb,
+ gpointer ud, guint *pnfound)
+{
+ struct rspamd_multipattern_cbdata cbd;
+ gint ret = 0;
+
+ g_assert(mp != NULL);
+
+ if (mp->cnt == 0 || !mp->compiled || len == 0) {
+ return 0;
+ }
+
+ cbd.mp = mp;
+ cbd.in = in;
+ cbd.len = len;
+ cbd.cb = cb;
+ cbd.ud = ud;
+ cbd.nfound = 0;
+ cbd.ret = 0;
+
+#ifdef WITH_HYPERSCAN
+ if (rspamd_hs_check()) {
+ hs_scratch_t *scr = NULL;
+ guint i;
+
+ for (i = 0; i < MAX_SCRATCH; i++) {
+ if (!(mp->scratch_used & (1 << i))) {
+ mp->scratch_used |= (1 << i);
+ scr = mp->scratch[i];
+ break;
+ }
+ }
+
+ g_assert(scr != NULL);
+
+ ret = hs_scan(rspamd_hyperscan_get_database(mp->hs_db), in, len, 0, scr,
+ rspamd_multipattern_hs_cb, &cbd);
+
+ mp->scratch_used &= ~(1 << i);
+
+ if (ret == HS_SUCCESS) {
+ ret = 0;
+ }
+ else if (ret == HS_SCAN_TERMINATED) {
+ ret = cbd.ret;
+ }
+
+ if (pnfound) {
+ *pnfound = cbd.nfound;
+ }
+
+ return ret;
+ }
+#endif
+
+ gint state = 0;
+
+ if (mp->flags & (RSPAMD_MULTIPATTERN_GLOB | RSPAMD_MULTIPATTERN_RE)) {
+ /* Terribly inefficient, but who cares - just use hyperscan */
+ for (guint i = 0; i < mp->cnt; i++) {
+ rspamd_regexp_t *re = g_array_index(mp->res, rspamd_regexp_t *, i);
+ const gchar *start = NULL, *end = NULL;
+
+ while (rspamd_regexp_search(re,
+ in,
+ len,
+ &start,
+ &end,
+ TRUE,
+ NULL)) {
+ if (rspamd_multipattern_acism_cb(i, end - in, &cbd)) {
+ goto out;
+ }
+ }
+ }
+ out:
+ ret = cbd.ret;
+
+ if (pnfound) {
+ *pnfound = cbd.nfound;
+ }
+ }
+ else {
+ /* Plain trie */
+ ret = acism_lookup(mp->t, in, len, rspamd_multipattern_acism_cb, &cbd,
+ &state, mp->flags & RSPAMD_MULTIPATTERN_ICASE);
+
+ if (pnfound) {
+ *pnfound = cbd.nfound;
+ }
+ }
+
+ return ret;
+}
+
+
+void rspamd_multipattern_destroy(struct rspamd_multipattern *mp)
+{
+ guint i;
+
+ if (mp) {
+#ifdef WITH_HYPERSCAN
+ if (rspamd_hs_check()) {
+ gchar *p;
+
+ if (mp->compiled && mp->cnt > 0) {
+ for (i = 0; i < MAX_SCRATCH; i++) {
+ hs_free_scratch(mp->scratch[i]);
+ }
+
+ if (mp->hs_db) {
+ rspamd_hyperscan_free(mp->hs_db, false);
+ }
+ }
+
+ for (i = 0; i < mp->cnt; i++) {
+ p = g_array_index(mp->hs_pats, gchar *, i);
+ g_free(p);
+ }
+
+ g_array_free(mp->hs_pats, TRUE);
+ g_array_free(mp->hs_ids, TRUE);
+ g_array_free(mp->hs_flags, TRUE);
+ free(mp); /* Due to posix_memalign */
+
+ return;
+ }
+#endif
+ ac_trie_pat_t pat;
+
+ if (mp->compiled && mp->cnt > 0) {
+ acism_destroy(mp->t);
+ }
+
+ for (i = 0; i < mp->cnt; i++) {
+ pat = g_array_index(mp->pats, ac_trie_pat_t, i);
+ g_free((gchar *) pat.ptr);
+ }
+
+ g_array_free(mp->pats, TRUE);
+
+ g_free(mp);
+ }
+}
+
+const gchar *
+rspamd_multipattern_get_pattern(struct rspamd_multipattern *mp,
+ guint index)
+{
+ g_assert(mp != NULL);
+ g_assert(index < mp->cnt);
+
+#ifdef WITH_HYPERSCAN
+ if (rspamd_hs_check()) {
+ return g_array_index(mp->hs_pats, gchar *, index);
+ }
+#endif
+
+ ac_trie_pat_t pat;
+
+ pat = g_array_index(mp->pats, ac_trie_pat_t, index);
+
+ return pat.ptr;
+}
+
+guint rspamd_multipattern_get_npatterns(struct rspamd_multipattern *mp)
+{
+ g_assert(mp != NULL);
+
+ return mp->cnt;
+}
+
+gboolean
+rspamd_multipattern_has_hyperscan(void)
+{
+ return rspamd_hs_check();
+}