summaryrefslogtreecommitdiffstats
path: root/runtime/regexp.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-15 16:28:20 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-15 16:28:20 +0000
commitdcc721a95bef6f0d8e6d8775b8efe33e5aecd562 (patch)
tree66a2774cd0ee294d019efd71d2544c70f42b2842 /runtime/regexp.c
parentInitial commit. (diff)
downloadrsyslog-dcc721a95bef6f0d8e6d8775b8efe33e5aecd562.tar.xz
rsyslog-dcc721a95bef6f0d8e6d8775b8efe33e5aecd562.zip
Adding upstream version 8.2402.0.upstream/8.2402.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'runtime/regexp.c')
-rw-r--r--runtime/regexp.c384
1 files changed, 384 insertions, 0 deletions
diff --git a/runtime/regexp.c b/runtime/regexp.c
new file mode 100644
index 0000000..433c9c2
--- /dev/null
+++ b/runtime/regexp.c
@@ -0,0 +1,384 @@
+/* The regexp object.
+ *
+ * Module begun 2008-03-05 by Rainer Gerhards, based on some code
+ * from syslogd.c
+ *
+ * Copyright 2008-2012 Adiscon GmbH.
+ *
+ * This file is part of the rsyslog runtime library.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * -or-
+ * see COPYING.ASL20 in the source distribution
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "config.h"
+#include <pthread.h>
+#include <regex.h>
+#include <string.h>
+#include <stdint.h>
+#include <assert.h>
+
+#include "rsyslog.h"
+#include "module-template.h"
+#include "obj.h"
+#include "regexp.h"
+#include "errmsg.h"
+#include "hashtable.h"
+#include "hashtable_itr.h"
+
+MODULE_TYPE_LIB
+MODULE_TYPE_NOKEEP
+
+/* static data */
+DEFobjStaticHelpers
+
+/* When using glibc, we enable per-thread regex to avoid lock contention.
+ * See:
+ * - https://github.com/rsyslog/rsyslog/issues/2759
+ * - https://github.com/rsyslog/rsyslog/pull/2786
+ * - https://sourceware.org/bugzilla/show_bug.cgi?id=11159
+ *
+ * This should not affect BSD as they don't seem to take a lock in regexec.
+ */
+#ifdef __GLIBC__
+#define USE_PERTHREAD_REGEX 1
+#else
+#define USE_PERTHREAD_REGEX 0
+#endif
+
+static pthread_mutex_t mut_regexp;
+
+// Map a regex_t to its associated uncompiled parameters.
+static struct hashtable *regex_to_uncomp = NULL;
+
+// Map a (regexp_t, pthead_t) to a perthread_regex.
+static struct hashtable *perthread_regexs = NULL;
+
+
+/*
+ * This stores un-compiled regex to allow further
+ * call to regexec to re-compile a new regex dedicated
+ * to the calling thread.
+ */
+typedef struct uncomp_regex {
+ char *regex;
+ int cflags;
+ regex_t *preg;
+} uncomp_regex_t;
+
+/*
+ * This stores a regex dedicated to a single thread.
+ */
+typedef struct perthread_regex {
+ const regex_t *original_preg;
+ regex_t preg;
+ int ret;
+ pthread_mutex_t lock;
+ pthread_t thread;
+} perthread_regex_t;
+
+
+static unsigned __attribute__((nonnull(1))) int hash_from_regex(void *k) {
+ return (uintptr_t)*(regex_t **)k;
+}
+
+static int key_equals_regex(void *key1, void *key2) {
+ return *(regex_t **)key1 == *(regex_t **)key2;
+}
+
+static unsigned __attribute__((nonnull(1))) int hash_from_tregex(void *k) {
+ perthread_regex_t *entry = k;
+ // Cast to (void*) is ok here because already used in other parts of the code.
+ uintptr_t thread_id = (uintptr_t)(void *)entry->thread;
+
+ return thread_id ^ (uintptr_t)entry->original_preg;
+}
+
+static int key_equals_tregex(void *key1, void *key2) {
+ perthread_regex_t *entry1 = key1;
+ perthread_regex_t *entry2 = key2;
+
+ return (pthread_equal(entry1->thread, entry2->thread) &&
+ entry1->original_preg == entry2->original_preg);
+}
+
+
+/* ------------------------------ methods ------------------------------ */
+
+
+// Create a copy of preg to be used by this thread only.
+static perthread_regex_t *create_perthread_regex(const regex_t *preg, uncomp_regex_t *uncomp) {
+ perthread_regex_t *entry = NULL;
+
+ if (Debug) {
+ DBGPRINTF("Creating new regex_t for thread %p original regexp_t %p (pattern: %s, cflags: %x)\n",
+ (void *)pthread_self(), preg,
+ uncomp->regex, uncomp->cflags);
+ }
+ entry = calloc(1, sizeof(*entry));
+ if (!entry)
+ return entry;
+ entry->original_preg = preg;
+ DBGPRINTF("regexp: regcomp %p %p\n", entry, &entry->preg);
+ entry->ret = regcomp(&entry->preg, uncomp->regex, uncomp->cflags);
+ pthread_mutex_init(&entry->lock, NULL);
+ entry->thread = pthread_self();
+ return entry;
+}
+
+// Get (or create) a regex_t to be used by the current thread.
+static perthread_regex_t *get_perthread_regex(const regex_t *preg) {
+ perthread_regex_t *entry = NULL;
+ perthread_regex_t key = { .original_preg = preg, .thread = pthread_self() };
+
+ pthread_mutex_lock(&mut_regexp);
+ entry = hashtable_search(perthread_regexs, (void *)&key);
+ if (!entry) {
+ uncomp_regex_t *uncomp = hashtable_search(regex_to_uncomp, (void *)&preg);
+
+ if (uncomp) {
+ entry = create_perthread_regex(preg, uncomp);
+ if(!hashtable_insert(perthread_regexs, (void *)entry, entry)) {
+ LogError(0, RS_RET_INTERNAL_ERROR,
+ "error trying to insert thread-regexp into hash-table - things "
+ "will not work 100%% correctly (mostly probably out of memory issue)");
+ }
+ }
+ }
+ if (entry) {
+ pthread_mutex_lock(&entry->lock);
+ }
+ pthread_mutex_unlock(&mut_regexp);
+ return entry;
+}
+
+static void remove_uncomp_regexp(regex_t *preg) {
+ uncomp_regex_t *uncomp = NULL;
+
+ pthread_mutex_lock(&mut_regexp);
+ uncomp = hashtable_remove(regex_to_uncomp, (void *)&preg);
+
+ if (uncomp) {
+ if (Debug) {
+ DBGPRINTF("Removing everything linked to regexp_t %p (pattern: %s, cflags: %x)\n",
+ preg, uncomp->regex, uncomp->cflags);
+ }
+ free(uncomp->regex);
+ free(uncomp);
+ }
+ pthread_mutex_unlock(&mut_regexp);
+}
+
+static void _regfree(regex_t *preg) {
+ int ret = 0;
+ struct hashtable_itr *itr = NULL;
+
+ if (!preg)
+ return;
+
+ regfree(preg);
+ remove_uncomp_regexp(preg);
+
+ pthread_mutex_lock(&mut_regexp);
+ if (!hashtable_count(perthread_regexs)) {
+ pthread_mutex_unlock(&mut_regexp);
+ return;
+ }
+
+ // This can be long to iterate other all regexps, but regfree doesn't get called
+ // a lot during processing.
+ itr = hashtable_iterator(perthread_regexs);
+ do {
+ perthread_regex_t *entry = (perthread_regex_t *)hashtable_iterator_value(itr);
+
+ // Do it before freeing the entry.
+ ret = hashtable_iterator_advance(itr);
+
+ if (entry->original_preg == preg) {
+ // This allows us to avoid freeing this while somebody is still using it.
+ pthread_mutex_lock(&entry->lock);
+ // We can unlock immediately after because mut_regexp is locked.
+ pthread_mutex_unlock(&entry->lock);
+ pthread_mutex_destroy(&entry->lock);
+ regfree(&entry->preg);
+
+ // Do it last because it will free entry.
+ hashtable_remove(perthread_regexs, (void *)entry);
+ }
+ } while (ret);
+ free(itr);
+
+ pthread_mutex_unlock(&mut_regexp);
+}
+
+static int _regcomp(regex_t *preg, const char *regex, int cflags) {
+ int ret = 0;
+ regex_t **ppreg = NULL;
+ uncomp_regex_t *uncomp;
+
+ // Remove previous data if caller forgot to call regfree().
+ remove_uncomp_regexp(preg);
+
+ // Make sure preg itself it correctly initalized.
+ ret = regcomp(preg, regex, cflags);
+ if (ret != 0)
+ return ret;
+
+ uncomp = calloc(1, sizeof(*uncomp));
+ if (!uncomp)
+ return REG_ESPACE;
+
+ uncomp->preg = preg;
+ uncomp->regex = strdup(regex);
+ uncomp->cflags = cflags;
+ pthread_mutex_lock(&mut_regexp);
+
+ // We need to allocate the key because hashtable will free it on remove.
+ ppreg = malloc(sizeof(regex_t *));
+ *ppreg = preg;
+ ret = hashtable_insert(regex_to_uncomp, (void *)ppreg, uncomp);
+ pthread_mutex_unlock(&mut_regexp);
+ if (ret == 0) {
+ free(uncomp->regex);
+ free(uncomp);
+ return REG_ESPACE;
+ }
+
+ perthread_regex_t *entry = get_perthread_regex(preg);
+ if (entry) {
+ ret = entry->ret;
+ pthread_mutex_unlock(&entry->lock);
+ } else {
+ ret = REG_ESPACE;
+ }
+ return ret;
+}
+
+static int _regexec(const regex_t *preg, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags) {
+ perthread_regex_t *entry = get_perthread_regex(preg);
+ int ret = REG_NOMATCH;
+ if(entry != NULL) {
+ ret = regexec(&entry->preg, string, nmatch, pmatch, eflags);
+ pthread_mutex_unlock(&entry->lock);
+ }
+ return ret;
+}
+
+static size_t _regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size) {
+ perthread_regex_t *entry = get_perthread_regex(preg);
+
+ if (entry)
+ preg = &entry->preg;
+
+ size_t ret = regerror(errcode, preg, errbuf, errbuf_size);
+
+ if (entry)
+ pthread_mutex_unlock(&entry->lock);
+
+ return ret;
+}
+
+/* queryInterface function
+ * rgerhards, 2008-03-05
+ */
+BEGINobjQueryInterface(regexp)
+CODESTARTobjQueryInterface(regexp)
+ if(pIf->ifVersion != regexpCURR_IF_VERSION) { /* check for current version, increment on each change */
+ ABORT_FINALIZE(RS_RET_INTERFACE_NOT_SUPPORTED);
+ }
+
+ /* ok, we have the right interface, so let's fill it
+ * Please note that we may also do some backwards-compatibility
+ * work here (if we can support an older interface version - that,
+ * of course, also affects the "if" above).
+ */
+ if (USE_PERTHREAD_REGEX) {
+ pIf->regcomp = _regcomp;
+ pIf->regexec = _regexec;
+ pIf->regerror = _regerror;
+ pIf->regfree = _regfree;
+ } else {
+ pIf->regcomp = regcomp;
+ pIf->regexec = regexec;
+ pIf->regerror = regerror;
+ pIf->regfree = regfree;
+ }
+
+finalize_it:
+ENDobjQueryInterface(regexp)
+
+
+/* Initialize the regexp class. Must be called as the very first method
+ * before anything else is called inside this class.
+ * rgerhards, 2008-02-19
+ */
+BEGINAbstractObjClassInit(regexp, 1, OBJ_IS_LOADABLE_MODULE) /* class, version */
+ /* request objects we use */
+
+ if (USE_PERTHREAD_REGEX) {
+ pthread_mutex_init(&mut_regexp, NULL);
+
+ regex_to_uncomp = create_hashtable(100, hash_from_regex, key_equals_regex, NULL);
+ perthread_regexs = create_hashtable(100, hash_from_tregex, key_equals_tregex, NULL);
+ if(regex_to_uncomp == NULL || perthread_regexs == NULL) {
+ LogError(0, RS_RET_INTERNAL_ERROR, "error trying to initialize hash-table "
+ "for regexp table. regexp will be disabled.");
+ if (regex_to_uncomp) hashtable_destroy(regex_to_uncomp, 1);
+ if (perthread_regexs) hashtable_destroy(perthread_regexs, 1);
+ regex_to_uncomp = NULL;
+ perthread_regexs = NULL;
+ ABORT_FINALIZE(RS_RET_INTERNAL_ERROR);
+ }
+ }
+
+ENDObjClassInit(regexp)
+
+
+/* Exit the class.
+ */
+BEGINObjClassExit(regexp, OBJ_IS_LOADABLE_MODULE) /* class, version */
+ if (USE_PERTHREAD_REGEX) {
+ /* release objects we no longer need */
+ pthread_mutex_destroy(&mut_regexp);
+ if (regex_to_uncomp)
+ hashtable_destroy(regex_to_uncomp, 1);
+ if (perthread_regexs)
+ hashtable_destroy(perthread_regexs, 1);
+ }
+ENDObjClassExit(regexp)
+
+
+/* --------------- here now comes the plumbing that makes as a library module --------------- */
+
+
+BEGINmodExit
+CODESTARTmodExit
+ENDmodExit
+
+
+BEGINqueryEtryPt
+CODESTARTqueryEtryPt
+CODEqueryEtryPt_STD_LIB_QUERIES
+ENDqueryEtryPt
+
+
+BEGINmodInit()
+CODESTARTmodInit
+ *ipIFVersProvided = CURR_MOD_IF_VERSION; /* we only support the current interface specification */
+
+ CHKiRet(regexpClassInit(pModInfo)); /* must be done after tcps_sess, as we use it */
+ /* Initialize all classes that are in our module - this includes ourselfs */
+ENDmodInit
+/* vi:set ai:
+ */