summaryrefslogtreecommitdiffstats
path: root/runtime/regexp.c
blob: 433c9c24becaa26373118275b49d4c3fd34114b9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
/* The regexp object.
 *
 * Module begun 2008-03-05 by Rainer Gerhards, based on some code
 * from syslogd.c
 *
 * Copyright 2008-2012 Adiscon GmbH.
 *
 * This file is part of the rsyslog runtime library.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *       -or-
 *       see COPYING.ASL20 in the source distribution
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "config.h"
#include <pthread.h>
#include <regex.h>
#include <string.h>
#include <stdint.h>
#include <assert.h>

#include "rsyslog.h"
#include "module-template.h"
#include "obj.h"
#include "regexp.h"
#include "errmsg.h"
#include "hashtable.h"
#include "hashtable_itr.h"

MODULE_TYPE_LIB
MODULE_TYPE_NOKEEP

/* static data */
DEFobjStaticHelpers

/* When using glibc, we enable per-thread regex to avoid lock contention.
 * See:
 * - https://github.com/rsyslog/rsyslog/issues/2759
 * - https://github.com/rsyslog/rsyslog/pull/2786
 * - https://sourceware.org/bugzilla/show_bug.cgi?id=11159
 *
 * This should not affect BSD as they don't seem to take a lock in regexec.
 */
#ifdef __GLIBC__
#define USE_PERTHREAD_REGEX 1
#else
#define USE_PERTHREAD_REGEX 0
#endif

static pthread_mutex_t mut_regexp;

// Map a regex_t to its associated uncompiled parameters.
static struct hashtable *regex_to_uncomp = NULL;

// Map a (regexp_t, pthead_t) to a perthread_regex.
static struct hashtable *perthread_regexs = NULL;


/*
 * This stores un-compiled regex to allow further
 * call to regexec to re-compile a new regex dedicated
 * to the calling thread.
 */
typedef struct uncomp_regex {
	char *regex;
	int cflags;
	regex_t *preg;
} uncomp_regex_t;

/*
 * This stores a regex dedicated to a single thread.
 */
typedef struct perthread_regex {
	const regex_t *original_preg;
	regex_t preg;
	int ret;
	pthread_mutex_t lock;
	pthread_t thread;
} perthread_regex_t;


static unsigned __attribute__((nonnull(1))) int hash_from_regex(void *k) {
	return (uintptr_t)*(regex_t **)k;
}

static int key_equals_regex(void *key1, void *key2) {
	return *(regex_t **)key1 == *(regex_t **)key2;
}

static unsigned __attribute__((nonnull(1))) int hash_from_tregex(void *k) {
	perthread_regex_t *entry = k;
	// Cast to (void*) is ok here because already used in other parts of the code.
	uintptr_t thread_id = (uintptr_t)(void *)entry->thread;

	return thread_id ^ (uintptr_t)entry->original_preg;
}

static int key_equals_tregex(void *key1, void *key2) {
	perthread_regex_t *entry1 = key1;
	perthread_regex_t *entry2 = key2;

	return (pthread_equal(entry1->thread, entry2->thread) &&
					entry1->original_preg == entry2->original_preg);
}


/* ------------------------------ methods ------------------------------ */


// Create a copy of preg to be used by this thread only.
static perthread_regex_t *create_perthread_regex(const regex_t *preg, uncomp_regex_t *uncomp) {
	perthread_regex_t *entry = NULL;

	if (Debug) {
		DBGPRINTF("Creating new regex_t for thread %p original regexp_t %p (pattern: %s, cflags: %x)\n",
							(void *)pthread_self(), preg,
							uncomp->regex, uncomp->cflags);
	}
	entry = calloc(1, sizeof(*entry));
	if (!entry)
		return entry;
	entry->original_preg = preg;
	DBGPRINTF("regexp: regcomp %p %p\n", entry, &entry->preg);
	entry->ret = regcomp(&entry->preg, uncomp->regex, uncomp->cflags);
	pthread_mutex_init(&entry->lock, NULL);
	entry->thread = pthread_self();
	return entry;
}

// Get (or create) a regex_t to be used by the current thread.
static perthread_regex_t *get_perthread_regex(const regex_t *preg) {
	perthread_regex_t *entry = NULL;
	perthread_regex_t key = { .original_preg = preg, .thread = pthread_self() };

	pthread_mutex_lock(&mut_regexp);
	entry = hashtable_search(perthread_regexs, (void *)&key);
	if (!entry) {
		uncomp_regex_t *uncomp = hashtable_search(regex_to_uncomp, (void *)&preg);

		if (uncomp) {
			entry = create_perthread_regex(preg, uncomp);
			if(!hashtable_insert(perthread_regexs, (void *)entry, entry)) {
				LogError(0, RS_RET_INTERNAL_ERROR,
					"error trying to insert thread-regexp into hash-table - things "
					"will not work 100%% correctly (mostly probably out of memory issue)");
			}
		}
	}
	if (entry) {
		pthread_mutex_lock(&entry->lock);
	}
	pthread_mutex_unlock(&mut_regexp);
	return entry;
}

static void remove_uncomp_regexp(regex_t *preg) {
	uncomp_regex_t *uncomp = NULL;

	pthread_mutex_lock(&mut_regexp);
	uncomp = hashtable_remove(regex_to_uncomp, (void *)&preg);

	if (uncomp) {
		if (Debug) {
			DBGPRINTF("Removing everything linked to regexp_t %p (pattern: %s, cflags: %x)\n",
								preg, uncomp->regex, uncomp->cflags);
		}
		free(uncomp->regex);
		free(uncomp);
	}
	pthread_mutex_unlock(&mut_regexp);
}

static void _regfree(regex_t *preg) {
	int ret = 0;
	struct hashtable_itr *itr = NULL;

	if (!preg)
		return;

	regfree(preg);
	remove_uncomp_regexp(preg);

	pthread_mutex_lock(&mut_regexp);
	if (!hashtable_count(perthread_regexs)) {
		pthread_mutex_unlock(&mut_regexp);
		return;
	}

	// This can be long to iterate other all regexps, but regfree doesn't get called
	// a lot during processing.
	itr = hashtable_iterator(perthread_regexs);
	do {
		perthread_regex_t *entry = (perthread_regex_t *)hashtable_iterator_value(itr);

		// Do it before freeing the entry.
		ret = hashtable_iterator_advance(itr);

		if (entry->original_preg == preg) {
			// This allows us to avoid freeing this while somebody is still using it.
			pthread_mutex_lock(&entry->lock);
			// We can unlock immediately after because mut_regexp is locked.
			pthread_mutex_unlock(&entry->lock);
			pthread_mutex_destroy(&entry->lock);
			regfree(&entry->preg);

			// Do it last because it will free entry.
			hashtable_remove(perthread_regexs, (void *)entry);
		}
	} while (ret);
	free(itr);

	pthread_mutex_unlock(&mut_regexp);
}

static int _regcomp(regex_t *preg, const char *regex, int cflags) {
	int ret = 0;
	regex_t **ppreg = NULL;
	uncomp_regex_t *uncomp;

	// Remove previous data if caller forgot to call regfree().
	remove_uncomp_regexp(preg);

	// Make sure preg itself it correctly initalized.
	ret = regcomp(preg, regex, cflags);
	if (ret != 0)
		return ret;

	uncomp = calloc(1, sizeof(*uncomp));
	if (!uncomp)
		return REG_ESPACE;

	uncomp->preg = preg;
	uncomp->regex = strdup(regex);
	uncomp->cflags = cflags;
	pthread_mutex_lock(&mut_regexp);

	// We need to allocate the key because hashtable will free it on remove.
	ppreg = malloc(sizeof(regex_t *));
	*ppreg = preg;
	ret = hashtable_insert(regex_to_uncomp, (void *)ppreg, uncomp);
	pthread_mutex_unlock(&mut_regexp);
	if (ret == 0) {
		free(uncomp->regex);
		free(uncomp);
		return REG_ESPACE;
	}

	perthread_regex_t *entry = get_perthread_regex(preg);
	if (entry) {
		ret = entry->ret;
		pthread_mutex_unlock(&entry->lock);
	} else {
		ret = REG_ESPACE;
	}
	return ret;
}

static int _regexec(const regex_t *preg, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags) {
	perthread_regex_t *entry = get_perthread_regex(preg);
	int ret = REG_NOMATCH;
	if(entry != NULL) {
		ret = regexec(&entry->preg, string, nmatch, pmatch, eflags);
		pthread_mutex_unlock(&entry->lock);
	}
	return ret;
}

static size_t _regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size) {
	perthread_regex_t *entry = get_perthread_regex(preg);

	if (entry)
		preg = &entry->preg;

	size_t ret = regerror(errcode, preg, errbuf, errbuf_size);

	if (entry)
		pthread_mutex_unlock(&entry->lock);

	return ret;
}

/* queryInterface function
 * rgerhards, 2008-03-05
 */
BEGINobjQueryInterface(regexp)
CODESTARTobjQueryInterface(regexp)
	if(pIf->ifVersion != regexpCURR_IF_VERSION) { /* check for current version, increment on each change */
		ABORT_FINALIZE(RS_RET_INTERFACE_NOT_SUPPORTED);
	}

	/* ok, we have the right interface, so let's fill it
	 * Please note that we may also do some backwards-compatibility
	 * work here (if we can support an older interface version - that,
	 * of course, also affects the "if" above).
	 */
	if (USE_PERTHREAD_REGEX) {
		pIf->regcomp = _regcomp;
		pIf->regexec = _regexec;
		pIf->regerror = _regerror;
		pIf->regfree = _regfree;
	} else {
		pIf->regcomp = regcomp;
		pIf->regexec = regexec;
		pIf->regerror = regerror;
		pIf->regfree = regfree;
	}

finalize_it:
ENDobjQueryInterface(regexp)


/* Initialize the regexp class. Must be called as the very first method
 * before anything else is called inside this class.
 * rgerhards, 2008-02-19
 */
BEGINAbstractObjClassInit(regexp, 1, OBJ_IS_LOADABLE_MODULE) /* class, version */
	/* request objects we use */

	if (USE_PERTHREAD_REGEX) {
		pthread_mutex_init(&mut_regexp, NULL);

		regex_to_uncomp = create_hashtable(100, hash_from_regex, key_equals_regex, NULL);
		perthread_regexs = create_hashtable(100, hash_from_tregex, key_equals_tregex, NULL);
		if(regex_to_uncomp == NULL || perthread_regexs == NULL) {
			LogError(0, RS_RET_INTERNAL_ERROR, "error trying to initialize hash-table "
							 "for regexp table. regexp will be disabled.");
			if (regex_to_uncomp) hashtable_destroy(regex_to_uncomp, 1);
			if (perthread_regexs) hashtable_destroy(perthread_regexs, 1);
			regex_to_uncomp = NULL;
			perthread_regexs = NULL;
			ABORT_FINALIZE(RS_RET_INTERNAL_ERROR);
		}
	}

ENDObjClassInit(regexp)


/* Exit the class.
 */
BEGINObjClassExit(regexp, OBJ_IS_LOADABLE_MODULE) /* class, version */
	if (USE_PERTHREAD_REGEX) {
		/* release objects we no longer need */
		pthread_mutex_destroy(&mut_regexp);
		if (regex_to_uncomp)
			hashtable_destroy(regex_to_uncomp, 1);
		if (perthread_regexs)
			hashtable_destroy(perthread_regexs, 1);
	}
ENDObjClassExit(regexp)


/* --------------- here now comes the plumbing that makes as a library module --------------- */


BEGINmodExit
CODESTARTmodExit
ENDmodExit


BEGINqueryEtryPt
CODESTARTqueryEtryPt
CODEqueryEtryPt_STD_LIB_QUERIES
ENDqueryEtryPt


BEGINmodInit()
CODESTARTmodInit
	*ipIFVersProvided = CURR_MOD_IF_VERSION; /* we only support the current interface specification */

	CHKiRet(regexpClassInit(pModInfo)); /* must be done after tcps_sess, as we use it */
	/* Initialize all classes that are in our module - this includes ourselfs */
ENDmodInit
/* vi:set ai:
 */