summaryrefslogtreecommitdiffstats
path: root/server/util_pcre.c
diff options
context:
space:
mode:
Diffstat (limited to 'server/util_pcre.c')
-rw-r--r--server/util_pcre.c334
1 files changed, 269 insertions, 65 deletions
diff --git a/server/util_pcre.c b/server/util_pcre.c
index f2cb1bb..0a9dc50 100644
--- a/server/util_pcre.c
+++ b/server/util_pcre.c
@@ -55,7 +55,21 @@ POSSIBILITY OF SUCH DAMAGE.
#include "httpd.h"
#include "apr_strings.h"
#include "apr_tables.h"
+#include "apr_thread_proc.h"
+
+#ifdef HAVE_PCRE2
+#define PCRE2_CODE_UNIT_WIDTH 8
+#include "pcre2.h"
+#define PCREn(x) PCRE2_ ## x
+#else
#include "pcre.h"
+#define PCREn(x) PCRE_ ## x
+#endif
+
+/* PCRE_DUPNAMES is only present since version 6.7 of PCRE */
+#if !defined(PCRE_DUPNAMES) && !defined(HAVE_PCRE2)
+#error PCRE Version 6.7 or later required!
+#else
#define APR_WANT_STRFUNC
#include "apr_want.h"
@@ -76,6 +90,26 @@ static const char *const pstring[] = {
"match failed" /* AP_REG_NOMATCH */
};
+AP_DECLARE(const char *) ap_pcre_version_string(int which)
+{
+#ifdef HAVE_PCRE2
+ static char buf[80];
+#endif
+ switch (which) {
+ case AP_REG_PCRE_COMPILED:
+ return APR_STRINGIFY(PCREn(MAJOR)) "." APR_STRINGIFY(PCREn(MINOR)) " " APR_STRINGIFY(PCREn(DATE));
+ case AP_REG_PCRE_LOADED:
+#ifdef HAVE_PCRE2
+ pcre2_config(PCRE2_CONFIG_VERSION, buf);
+ return buf;
+#else
+ return pcre_version();
+#endif
+ default:
+ return "Unknown";
+ }
+}
+
AP_DECLARE(apr_size_t) ap_regerror(int errcode, const ap_regex_t *preg,
char *errbuf, apr_size_t errbuf_size)
{
@@ -110,7 +144,11 @@ AP_DECLARE(apr_size_t) ap_regerror(int errcode, const ap_regex_t *preg,
AP_DECLARE(void) ap_regfree(ap_regex_t *preg)
{
+#ifdef HAVE_PCRE2
+ pcre2_code_free(preg->re_pcre);
+#else
(pcre_free)(preg->re_pcre);
+#endif
}
@@ -120,7 +158,7 @@ AP_DECLARE(void) ap_regfree(ap_regex_t *preg)
* Compile a regular expression *
*************************************************/
-static int default_cflags = AP_REG_DOLLAR_ENDONLY;
+static int default_cflags = AP_REG_DEFAULT;
AP_DECLARE(int) ap_regcomp_get_default_cflags(void)
{
@@ -163,37 +201,53 @@ AP_DECLARE(int) ap_regcomp_default_cflag_by_name(const char *name)
*/
AP_DECLARE(int) ap_regcomp(ap_regex_t * preg, const char *pattern, int cflags)
{
+#ifdef HAVE_PCRE2
+ uint32_t capcount;
+ size_t erroffset;
+#else
const char *errorptr;
int erroffset;
+#endif
int errcode = 0;
- int options = PCRE_DUPNAMES;
+ int options = PCREn(DUPNAMES);
+
+ if ((cflags & AP_REG_NO_DEFAULT) == 0)
+ cflags |= default_cflags;
- cflags |= default_cflags;
if ((cflags & AP_REG_ICASE) != 0)
- options |= PCRE_CASELESS;
+ options |= PCREn(CASELESS);
if ((cflags & AP_REG_NEWLINE) != 0)
- options |= PCRE_MULTILINE;
+ options |= PCREn(MULTILINE);
if ((cflags & AP_REG_DOTALL) != 0)
- options |= PCRE_DOTALL;
+ options |= PCREn(DOTALL);
if ((cflags & AP_REG_DOLLAR_ENDONLY) != 0)
- options |= PCRE_DOLLAR_ENDONLY;
+ options |= PCREn(DOLLAR_ENDONLY);
+
+#ifdef HAVE_PCRE2
+ preg->re_pcre = pcre2_compile((const unsigned char *)pattern,
+ PCRE2_ZERO_TERMINATED, options, &errcode,
+ &erroffset, NULL);
+#else
+ preg->re_pcre = pcre_compile2(pattern, options, &errcode,
+ &errorptr, &erroffset, NULL);
+#endif
- preg->re_pcre =
- pcre_compile2(pattern, options, &errcode, &errorptr, &erroffset, NULL);
preg->re_erroffset = erroffset;
-
if (preg->re_pcre == NULL) {
- /*
- * There doesn't seem to be constants defined for compile time error
- * codes. 21 is "failed to get memory" according to pcreapi(3).
- */
+ /* Internal ERR21 is "failed to get memory" according to pcreapi(3) */
if (errcode == 21)
return AP_REG_ESPACE;
return AP_REG_INVARG;
}
+#ifdef HAVE_PCRE2
+ pcre2_pattern_info((const pcre2_code *)preg->re_pcre,
+ PCRE2_INFO_CAPTURECOUNT, &capcount);
+ preg->re_nsub = capcount;
+#else
pcre_fullinfo((const pcre *)preg->re_pcre, NULL,
- PCRE_INFO_CAPTURECOUNT, &(preg->re_nsub));
+ PCRE_INFO_CAPTURECOUNT, &(preg->re_nsub));
+#endif
return 0;
}
@@ -210,7 +264,134 @@ AP_DECLARE(int) ap_regcomp(ap_regex_t * preg, const char *pattern, int cflags)
* ints. However, if the number of possible capturing brackets is small, use a
* block of store on the stack, to reduce the use of malloc/free. The threshold
* is in a macro that can be changed at configure time.
+ * Yet more unfortunately, PCRE2 wants an opaque context by providing the API
+ * to allocate and free it, so to minimize these calls we maintain one opaque
+ * context per thread (in Thread Local Storage, TLS) grown as needed, and while
+ * at it we do the same for PCRE1 ints vectors. Note that this requires a fast
+ * TLS mechanism to be worth it, which is the case of apr_thread_data_get/set()
+ * from/to ap_thread_current() when AP_HAS_THREAD_LOCAL; otherwise we'll do
+ * the allocation and freeing for each ap_regexec().
*/
+
+#ifdef HAVE_PCRE2
+typedef pcre2_match_data* match_data_pt;
+typedef size_t* match_vector_pt;
+#else
+typedef int* match_data_pt;
+typedef int* match_vector_pt;
+#endif
+
+static APR_INLINE
+match_data_pt alloc_match_data(apr_size_t size,
+ match_vector_pt small_vector)
+{
+ match_data_pt data;
+
+#ifdef HAVE_PCRE2
+ data = pcre2_match_data_create(size, NULL);
+#else
+ if (size > POSIX_MALLOC_THRESHOLD) {
+ data = malloc(size * sizeof(int) * 3);
+ }
+ else {
+ data = small_vector;
+ }
+#endif
+
+ return data;
+}
+
+static APR_INLINE
+void free_match_data(match_data_pt data, apr_size_t size)
+{
+#ifdef HAVE_PCRE2
+ pcre2_match_data_free(data);
+#else
+ if (size > POSIX_MALLOC_THRESHOLD) {
+ free(data);
+ }
+#endif
+}
+
+#if AP_HAS_THREAD_LOCAL && !defined(APREG_NO_THREAD_LOCAL)
+
+struct apreg_tls {
+ match_data_pt data;
+ apr_size_t size;
+};
+
+#ifdef HAVE_PCRE2
+static apr_status_t apreg_tls_cleanup(void *arg)
+{
+ struct apreg_tls *tls = arg;
+ pcre2_match_data_free(tls->data); /* NULL safe */
+ return APR_SUCCESS;
+}
+#endif
+
+static match_data_pt get_match_data(apr_size_t size,
+ match_vector_pt small_vector,
+ int *to_free)
+{
+ apr_thread_t *current;
+ struct apreg_tls *tls = NULL;
+
+ /* Even though AP_HAS_THREAD_LOCAL, we may still be called by a
+ * native/non-apr thread, let's fall back to alloc/free in this case.
+ */
+ current = ap_thread_current();
+ if (!current) {
+ *to_free = 1;
+ return alloc_match_data(size, small_vector);
+ }
+
+ apr_thread_data_get((void **)&tls, "apreg", current);
+ if (!tls || tls->size < size) {
+ apr_pool_t *tp = apr_thread_pool_get(current);
+ if (!tls) {
+ tls = apr_pcalloc(tp, sizeof(*tls));
+#ifdef HAVE_PCRE2
+ apr_thread_data_set(tls, "apreg", apreg_tls_cleanup, current);
+#else
+ apr_thread_data_set(tls, "apreg", NULL, current);
+#endif
+ }
+
+ tls->size *= 2;
+ if (tls->size < size) {
+ tls->size = size;
+ if (tls->size < POSIX_MALLOC_THRESHOLD) {
+ tls->size = POSIX_MALLOC_THRESHOLD;
+ }
+ }
+
+#ifdef HAVE_PCRE2
+ pcre2_match_data_free(tls->data); /* NULL safe */
+ tls->data = pcre2_match_data_create(tls->size, NULL);
+ if (!tls->data) {
+ tls->size = 0;
+ return NULL;
+ }
+#else
+ tls->data = apr_palloc(tp, tls->size * sizeof(int) * 3);
+#endif
+ }
+
+ return tls->data;
+}
+
+#else /* AP_HAS_THREAD_LOCAL && !defined(APREG_NO_THREAD_LOCAL) */
+
+static APR_INLINE match_data_pt get_match_data(apr_size_t size,
+ match_vector_pt small_vector,
+ int *to_free)
+{
+ *to_free = 1;
+ return alloc_match_data(size, small_vector);
+}
+
+#endif /* AP_HAS_THREAD_LOCAL && !defined(APREG_NO_THREAD_LOCAL) */
+
AP_DECLARE(int) ap_regexec(const ap_regex_t *preg, const char *string,
apr_size_t nmatch, ap_regmatch_t *pmatch,
int eflags)
@@ -224,75 +405,84 @@ AP_DECLARE(int) ap_regexec_len(const ap_regex_t *preg, const char *buff,
ap_regmatch_t *pmatch, int eflags)
{
int rc;
- int options = 0;
- int *ovector = NULL;
- int small_ovector[POSIX_MALLOC_THRESHOLD * 3];
- int allocated_ovector = 0;
-
- if ((eflags & AP_REG_NOTBOL) != 0)
- options |= PCRE_NOTBOL;
- if ((eflags & AP_REG_NOTEOL) != 0)
- options |= PCRE_NOTEOL;
-
- ((ap_regex_t *)preg)->re_erroffset = (apr_size_t)(-1); /* Only has meaning after compile */
+ int options = 0, to_free = 0;
+ match_vector_pt ovector = NULL;
+ apr_size_t ncaps = (apr_size_t)preg->re_nsub + 1;
+#ifdef HAVE_PCRE2
+ match_data_pt data = get_match_data(ncaps, NULL, &to_free);
+#else
+ int small_vector[POSIX_MALLOC_THRESHOLD * 3];
+ match_data_pt data = get_match_data(ncaps, small_vector, &to_free);
+#endif
- if (nmatch > 0) {
- if (nmatch <= POSIX_MALLOC_THRESHOLD) {
- ovector = &(small_ovector[0]);
- }
- else {
- ovector = (int *)malloc(sizeof(int) * nmatch * 3);
- if (ovector == NULL)
- return AP_REG_ESPACE;
- allocated_ovector = 1;
- }
+ if (!data) {
+ return AP_REG_ESPACE;
}
+ if ((eflags & AP_REG_NOTBOL) != 0)
+ options |= PCREn(NOTBOL);
+ if ((eflags & AP_REG_NOTEOL) != 0)
+ options |= PCREn(NOTEOL);
+
+#ifdef HAVE_PCRE2
+ rc = pcre2_match((const pcre2_code *)preg->re_pcre,
+ (const unsigned char *)buff, len,
+ 0, options, data, NULL);
+ ovector = pcre2_get_ovector_pointer(data);
+#else
+ ovector = data;
rc = pcre_exec((const pcre *)preg->re_pcre, NULL, buff, (int)len,
- 0, options, ovector, nmatch * 3);
-
- if (rc == 0)
- rc = nmatch; /* All captured slots were filled in */
+ 0, options, ovector, ncaps * 3);
+#endif
if (rc >= 0) {
- apr_size_t i;
- for (i = 0; i < (apr_size_t)rc; i++) {
+ apr_size_t n = rc, i;
+ if (n == 0 || n > nmatch)
+ rc = n = nmatch; /* All capture slots were filled in */
+ for (i = 0; i < n; i++) {
pmatch[i].rm_so = ovector[i * 2];
pmatch[i].rm_eo = ovector[i * 2 + 1];
}
- if (allocated_ovector)
- free(ovector);
for (; i < nmatch; i++)
pmatch[i].rm_so = pmatch[i].rm_eo = -1;
+ if (to_free) {
+ free_match_data(data, ncaps);
+ }
return 0;
}
-
else {
- if (allocated_ovector)
- free(ovector);
+ if (to_free) {
+ free_match_data(data, ncaps);
+ }
+#ifdef HAVE_PCRE2
+ if (rc <= PCRE2_ERROR_UTF8_ERR1 && rc >= PCRE2_ERROR_UTF8_ERR21)
+ return AP_REG_INVARG;
+#endif
switch (rc) {
- case PCRE_ERROR_NOMATCH:
+ case PCREn(ERROR_NOMATCH):
return AP_REG_NOMATCH;
- case PCRE_ERROR_NULL:
+ case PCREn(ERROR_NULL):
return AP_REG_INVARG;
- case PCRE_ERROR_BADOPTION:
+ case PCREn(ERROR_BADOPTION):
return AP_REG_INVARG;
- case PCRE_ERROR_BADMAGIC:
+ case PCREn(ERROR_BADMAGIC):
return AP_REG_INVARG;
- case PCRE_ERROR_UNKNOWN_NODE:
- return AP_REG_ASSERT;
- case PCRE_ERROR_NOMEMORY:
+ case PCREn(ERROR_NOMEMORY):
return AP_REG_ESPACE;
-#ifdef PCRE_ERROR_MATCHLIMIT
- case PCRE_ERROR_MATCHLIMIT:
+#if defined(HAVE_PCRE2) || defined(PCRE_ERROR_MATCHLIMIT)
+ case PCREn(ERROR_MATCHLIMIT):
return AP_REG_ESPACE;
#endif
-#ifdef PCRE_ERROR_BADUTF8
- case PCRE_ERROR_BADUTF8:
+#if defined(PCRE_ERROR_UNKNOWN_NODE)
+ case PCRE_ERROR_UNKNOWN_NODE:
+ return AP_REG_ASSERT;
+#endif
+#if defined(PCRE_ERROR_BADUTF8)
+ case PCREn(ERROR_BADUTF8):
return AP_REG_INVARG;
#endif
-#ifdef PCRE_ERROR_BADUTF8_OFFSET
- case PCRE_ERROR_BADUTF8_OFFSET:
+#if defined(PCRE_ERROR_BADUTF8_OFFSET)
+ case PCREn(ERROR_BADUTF8_OFFSET):
return AP_REG_INVARG;
#endif
default:
@@ -305,17 +495,29 @@ AP_DECLARE(int) ap_regname(const ap_regex_t *preg,
apr_array_header_t *names, const char *prefix,
int upper)
{
+ char *nametable;
+
+#ifdef HAVE_PCRE2
+ uint32_t namecount;
+ uint32_t nameentrysize;
+ uint32_t i;
+ pcre2_pattern_info((const pcre2_code *)preg->re_pcre,
+ PCRE2_INFO_NAMECOUNT, &namecount);
+ pcre2_pattern_info((const pcre2_code *)preg->re_pcre,
+ PCRE2_INFO_NAMEENTRYSIZE, &nameentrysize);
+ pcre2_pattern_info((const pcre2_code *)preg->re_pcre,
+ PCRE2_INFO_NAMETABLE, &nametable);
+#else
int namecount;
int nameentrysize;
int i;
- char *nametable;
-
pcre_fullinfo((const pcre *)preg->re_pcre, NULL,
- PCRE_INFO_NAMECOUNT, &namecount);
+ PCRE_INFO_NAMECOUNT, &namecount);
pcre_fullinfo((const pcre *)preg->re_pcre, NULL,
- PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
+ PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
pcre_fullinfo((const pcre *)preg->re_pcre, NULL,
- PCRE_INFO_NAMETABLE, &nametable);
+ PCRE_INFO_NAMETABLE, &nametable);
+#endif
for (i = 0; i < namecount; i++) {
const char *offset = nametable + i * nameentrysize;
@@ -340,4 +542,6 @@ AP_DECLARE(int) ap_regname(const ap_regex_t *preg,
return namecount;
}
+#endif /* PCRE_DUPNAMES defined */
+
/* End of pcreposix.c */