Description: build with pcre2 Origin: other, https://helperbyte.com/questions/457338/how-to-make-pcre2-support-for-apache-24 Bug-Debian: https://bugs.debian.org/1000114 Forwarded: not-needed Reviewed-By: Yadd Last-Update: 2021-12-28 --- a/build/NWGNUmakefile +++ b/build/NWGNUmakefile @@ -20,7 +20,7 @@ $(SRC)/include/ap_config_layout.h \ $(NWOS)/test_char.h \ $(PCRE)/config.h \ - $(PCRE)/pcre.h \ + $(PCRE)/pcre2.h \ $(EOLIST) nlms :: libs $(NWOS)/httpd.imp $(DAV)/main/dav.imp $(STDMOD)/cache/mod_cache.imp @@ -77,7 +77,7 @@ @echo $(DL)GEN $@$(DL) $< $@ -%.exe: $(PCRE)/%.c $(PCRE)/config.h $(PCRE)/pcre.h +%.exe: $(PCRE)/%.c $(PCRE)/config.h $(PCRE)/pcre2.h @echo $(DL)Creating Build Helper $@$(DL) $(CC_FOR_BUILD) $(CFLAGS_FOR_BUILD) -DHAVE_CONFIG_H $< -o $@ @@ -117,7 +117,7 @@ clean :: $(call DEL,$(SRC)/include/ap_config_layout.h) $(call DEL,$(PCRE)/config.h) - $(call DEL,$(PCRE)/pcre.h) + $(call DEL,$(PCRE)/pcre2.h) $(call DEL,$(STDMOD)/cache/mod_cache.imp) $(call DEL,$(DAV)/main/dav.imp) $(call DEL,$(NWOS)/httpd.imp) --- a/include/ap_config_auto.h.in +++ b/include/ap_config_auto.h.in @@ -393,3 +393,6 @@ /* Define to 'int' if doesn't define it for us */ #undef rlim_t + +/* Load PCRE2 */ +#define HAVE_PCRE2 1 --- a/include/ap_regex.h +++ b/include/ap_regex.h @@ -70,19 +70,22 @@ /* Options for ap_regcomp, ap_regexec, and ap_rxplus versions: */ -#define AP_REG_ICASE 0x01 /** use a case-insensitive match */ -#define AP_REG_NEWLINE 0x02 /** don't match newlines against '.' etc */ -#define AP_REG_NOTBOL 0x04 /** ^ will not match against start-of-string */ -#define AP_REG_NOTEOL 0x08 /** $ will not match against end-of-string */ - -#define AP_REG_EXTENDED (0) /** unused */ -#define AP_REG_NOSUB (0) /** unused */ - -#define AP_REG_MULTI 0x10 /* perl's /g (needs fixing) */ -#define AP_REG_NOMEM 0x20 /* nomem in our code */ -#define AP_REG_DOTALL 0x40 /* perl's /s flag */ +#define AP_REG_ICASE 0x01 /**< use a case-insensitive match */ +#define AP_REG_NEWLINE 0x02 /**< don't match newlines against '.' etc */ +#define AP_REG_NOTBOL 0x04 /**< ^ will not match against start-of-string */ +#define AP_REG_NOTEOL 0x08 /**< $ will not match against end-of-string */ + +#define AP_REG_EXTENDED (0) /**< unused */ +#define AP_REG_NOSUB (0) /**< unused */ + +#define AP_REG_MULTI 0x10 /**< perl's /g (needs fixing) */ +#define AP_REG_NOMEM 0x20 /**< nomem in our code */ +#define AP_REG_DOTALL 0x40 /**< perl's /s flag */ + +#define AP_REG_NOTEMPTY 0x080 /**< Empty match not valid */ +#define AP_REG_ANCHORED 0x100 /**< Match at the first position */ -#define AP_REG_DOLLAR_ENDONLY 0x200 /* '$' matches at end of subject string only */ +#define AP_REG_DOLLAR_ENDONLY 0x200 /**< '$' matches at end of subject string only */ #define AP_REG_NO_DEFAULT 0x400 /**< Don't implicitely add AP_REG_DEFAULT options */ @@ -90,6 +93,12 @@ #define AP_REG_DEFAULT (AP_REG_DOTALL|AP_REG_DOLLAR_ENDONLY) +/* Arguments for ap_pcre_version_string */ +enum { + AP_REG_PCRE_COMPILED = 0, /** PCRE version used during program compilation */ + AP_REG_PCRE_LOADED /** PCRE version loaded at runtime */ +}; + /* Error values: */ enum { AP_REG_ASSERT = 1, /** internal error ? */ @@ -114,6 +123,15 @@ /* The functions */ /** + * Return PCRE version string. + * @param which Either AP_REG_PCRE_COMPILED (PCRE version used + * during program compilation) or AP_REG_PCRE_LOADED + * (PCRE version used at runtime) + * @return The PCRE version string + */ +AP_DECLARE(const char *) ap_pcre_version_string(int which); + +/** * Get default compile flags * @return Bitwise OR of AP_REG_* flags */ @@ -277,5 +295,4 @@ } /* extern "C" */ #endif -#endif /* AP_REGEX_T */ - +#endif /* AP_REGEX_H */ --- a/server/util_pcre.c +++ b/server/util_pcre.c @@ -55,19 +55,17 @@ #include "httpd.h" #include "apr_strings.h" #include "apr_tables.h" -#include "pcre.h" -/* PCRE_DUPNAMES is only present since version 6.7 of PCRE */ -#ifndef PCRE_DUPNAMES -#error PCRE Version 6.7 or later required! -#else +#define PCRE2_CODE_UNIT_WIDTH 8 +#include "pcre2.h" +#define PCREn(x) PCRE2_ ## x +/* PCRE_DUPNAMES is only present since version 6.7 of PCRE */ #define APR_WANT_STRFUNC #include "apr_want.h" #ifndef POSIX_MALLOC_THRESHOLD #define POSIX_MALLOC_THRESHOLD (10) -#endif /* Table of error strings corresponding to POSIX error codes; must be * kept in synch with include/ap_regex.h's AP_REG_E* definitions. @@ -81,6 +79,20 @@ "match failed" /* AP_REG_NOMATCH */ }; +AP_DECLARE(const char *) ap_pcre_version_string(int which) +{ + static char buf[80]; + switch (which) { + case AP_REG_PCRE_COMPILED: + return APR_STRINGIFY(PCREn(MAJOR)) "." APR_STRINGIFY(PCREn(MINOR)) " " APR_STRINGIFY(PCREn(DATE)); + case AP_REG_PCRE_LOADED: + pcre2_config(PCRE2_CONFIG_VERSION, buf); + return buf; + default: + return "Unknown"; + } +} + AP_DECLARE(apr_size_t) ap_regerror(int errcode, const ap_regex_t *preg, char *errbuf, apr_size_t errbuf_size) { @@ -115,7 +127,7 @@ AP_DECLARE(void) ap_regfree(ap_regex_t *preg) { - (pcre_free)(preg->re_pcre); + pcre2_code_free(preg->re_pcre); } @@ -168,39 +180,38 @@ */ AP_DECLARE(int) ap_regcomp(ap_regex_t * preg, const char *pattern, int cflags) { - const char *errorptr; - int erroffset; + uint32_t capcount; + size_t erroffset; int errcode = 0; - int options = PCRE_DUPNAMES; + int options = PCREn(DUPNAMES); if ((cflags & AP_REG_NO_DEFAULT) == 0) cflags |= default_cflags; if ((cflags & AP_REG_ICASE) != 0) - options |= PCRE_CASELESS; + options |= PCREn(CASELESS); if ((cflags & AP_REG_NEWLINE) != 0) - options |= PCRE_MULTILINE; + options |= PCREn(MULTILINE); if ((cflags & AP_REG_DOTALL) != 0) - options |= PCRE_DOTALL; + options |= PCREn(DOTALL); if ((cflags & AP_REG_DOLLAR_ENDONLY) != 0) - options |= PCRE_DOLLAR_ENDONLY; + options |= PCREn(DOLLAR_ENDONLY); - preg->re_pcre = - pcre_compile2(pattern, options, &errcode, &errorptr, &erroffset, NULL); - preg->re_erroffset = erroffset; + preg->re_pcre = pcre2_compile((const unsigned char *)pattern, + PCRE2_ZERO_TERMINATED, options, &errcode, + &erroffset, NULL); + preg->re_erroffset = erroffset; if (preg->re_pcre == NULL) { - /* - * There doesn't seem to be constants defined for compile time error - * codes. 21 is "failed to get memory" according to pcreapi(3). - */ + /* Internal ERR21 is "failed to get memory" according to pcreapi(3) */ if (errcode == 21) return AP_REG_ESPACE; return AP_REG_INVARG; } - pcre_fullinfo((const pcre *)preg->re_pcre, NULL, - PCRE_INFO_CAPTURECOUNT, &(preg->re_nsub)); + pcre2_pattern_info((const pcre2_code *)preg->re_pcre, + PCRE2_INFO_CAPTURECOUNT, &capcount); + preg->re_nsub = capcount; return 0; } @@ -232,74 +243,77 @@ { int rc; int options = 0; - int *ovector = NULL; - int small_ovector[POSIX_MALLOC_THRESHOLD * 3]; - int allocated_ovector = 0; + apr_size_t nlim; + pcre2_match_data *matchdata; + size_t *ovector; if ((eflags & AP_REG_NOTBOL) != 0) - options |= PCRE_NOTBOL; + options |= PCREn(NOTBOL); if ((eflags & AP_REG_NOTEOL) != 0) - options |= PCRE_NOTEOL; - - ((ap_regex_t *)preg)->re_erroffset = (apr_size_t)(-1); /* Only has meaning after compile */ - - if (nmatch > 0) { - if (nmatch <= POSIX_MALLOC_THRESHOLD) { - ovector = &(small_ovector[0]); - } - else { - ovector = (int *)malloc(sizeof(int) * nmatch * 3); - if (ovector == NULL) - return AP_REG_ESPACE; - allocated_ovector = 1; - } - } - - rc = pcre_exec((const pcre *)preg->re_pcre, NULL, buff, (int)len, - 0, options, ovector, nmatch * 3); - + options |= PCREn(NOTEOL); + if ((eflags & AP_REG_NOTEMPTY) != 0) + options |= PCREn(NOTEMPTY); + if ((eflags & AP_REG_ANCHORED) != 0) + options |= PCREn(ANCHORED); + + /* TODO: create a generic TLS matchdata buffer of some nmatch limit, + * e.g. 10 matches, to avoid a malloc-per-call. If it must be allocated, + * implement a general context using palloc and no free implementation. + */ + nlim = ((apr_size_t)preg->re_nsub + 1) > nmatch + ? ((apr_size_t)preg->re_nsub + 1) : nmatch; + matchdata = pcre2_match_data_create(nlim, NULL); + if (matchdata == NULL) + return AP_REG_ESPACE; + ovector = pcre2_get_ovector_pointer(matchdata); + rc = pcre2_match((const pcre2_code *)preg->re_pcre, + (const unsigned char *)buff, len, + 0, options, matchdata, NULL); if (rc == 0) - rc = nmatch; /* All captured slots were filled in */ + rc = nlim; /* All captured slots were filled in */ if (rc >= 0) { apr_size_t i; - for (i = 0; i < (apr_size_t)rc; i++) { + nlim = (apr_size_t)rc < nmatch ? (apr_size_t)rc : nmatch; + for (i = 0; i < nlim; i++) { pmatch[i].rm_so = ovector[i * 2]; pmatch[i].rm_eo = ovector[i * 2 + 1]; } - if (allocated_ovector) - free(ovector); for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1; - return 0; } + pcre2_match_data_free(matchdata); + + if (rc >= 0) { + return 0; + } else { - if (allocated_ovector) - free(ovector); + if (rc <= PCRE2_ERROR_UTF8_ERR1 && rc >= PCRE2_ERROR_UTF8_ERR21) + return AP_REG_INVARG; switch (rc) { - case PCRE_ERROR_NOMATCH: + case PCREn(ERROR_NOMATCH): return AP_REG_NOMATCH; - case PCRE_ERROR_NULL: + case PCREn(ERROR_NULL): return AP_REG_INVARG; - case PCRE_ERROR_BADOPTION: + case PCREn(ERROR_BADOPTION): return AP_REG_INVARG; - case PCRE_ERROR_BADMAGIC: + case PCREn(ERROR_BADMAGIC): return AP_REG_INVARG; - case PCRE_ERROR_UNKNOWN_NODE: - return AP_REG_ASSERT; - case PCRE_ERROR_NOMEMORY: + case PCREn(ERROR_NOMEMORY): return AP_REG_ESPACE; -#ifdef PCRE_ERROR_MATCHLIMIT - case PCRE_ERROR_MATCHLIMIT: + case PCREn(ERROR_MATCHLIMIT): return AP_REG_ESPACE; +#if defined(PCRE_ERROR_UNKNOWN_NODE) + case PCRE_ERROR_UNKNOWN_NODE: + return AP_REG_ASSERT; #endif -#ifdef PCRE_ERROR_BADUTF8 - case PCRE_ERROR_BADUTF8: +#if defined(PCRE_ERROR_BADUTF8) + case PCREn(ERROR_BADUTF8): return AP_REG_INVARG; #endif -#ifdef PCRE_ERROR_BADUTF8_OFFSET - case PCRE_ERROR_BADUTF8_OFFSET: +#if defined(PCRE_ERROR_BADUTF8_OFFSET) + case PCREn(ERROR_BADUTF8_OFFSET): return AP_REG_INVARG; #endif default: @@ -312,17 +326,17 @@ apr_array_header_t *names, const char *prefix, int upper) { - int namecount; - int nameentrysize; - int i; char *nametable; - pcre_fullinfo((const pcre *)preg->re_pcre, NULL, - PCRE_INFO_NAMECOUNT, &namecount); - pcre_fullinfo((const pcre *)preg->re_pcre, NULL, - PCRE_INFO_NAMEENTRYSIZE, &nameentrysize); - pcre_fullinfo((const pcre *)preg->re_pcre, NULL, - PCRE_INFO_NAMETABLE, &nametable); + uint32_t namecount; + uint32_t nameentrysize; + uint32_t i; + pcre2_pattern_info((const pcre2_code *)preg->re_pcre, + PCRE2_INFO_NAMECOUNT, &namecount); + pcre2_pattern_info((const pcre2_code *)preg->re_pcre, + PCRE2_INFO_NAMEENTRYSIZE, &nameentrysize); + pcre2_pattern_info((const pcre2_code *)preg->re_pcre, + PCRE2_INFO_NAMETABLE, &nametable); for (i = 0; i < namecount; i++) { const char *offset = nametable + i * nameentrysize;