diff options
Diffstat (limited to 'src/regex.c')
-rw-r--r-- | src/regex.c | 459 |
1 files changed, 459 insertions, 0 deletions
diff --git a/src/regex.c b/src/regex.c new file mode 100644 index 0000000..19c7eda --- /dev/null +++ b/src/regex.c @@ -0,0 +1,459 @@ +/* + * Regex and string management functions. + * + * Copyright 2000-2010 Willy Tarreau <w@1wt.eu> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <ctype.h> +#include <stdlib.h> +#include <string.h> + +#include <haproxy/api.h> +#include <haproxy/errors.h> +#include <haproxy/global.h> +#include <haproxy/regex.h> +#include <haproxy/tools.h> + +/* regex trash buffer used by various regex tests */ +THREAD_LOCAL regmatch_t pmatch[MAX_MATCH]; /* rm_so, rm_eo for regular expressions */ + +int exp_replace(char *dst, unsigned int dst_size, char *src, const char *str, const regmatch_t *matches) +{ + char *old_dst = dst; + char* dst_end = dst + dst_size; + + while (*str) { + if (*str == '\\') { + str++; + if (!*str) + return -1; + + if (isdigit((unsigned char)*str)) { + int len, num; + + num = *str - '0'; + str++; + + if (matches[num].rm_eo > -1 && matches[num].rm_so > -1) { + len = matches[num].rm_eo - matches[num].rm_so; + + if (dst + len >= dst_end) + return -1; + + memcpy(dst, src + matches[num].rm_so, len); + dst += len; + } + + } else if (*str == 'x') { + unsigned char hex1, hex2; + str++; + + if (!*str) + return -1; + + hex1 = toupper((unsigned char)*str++) - '0'; + + if (!*str) + return -1; + + hex2 = toupper((unsigned char)*str++) - '0'; + + if (hex1 > 9) hex1 -= 'A' - '9' - 1; + if (hex2 > 9) hex2 -= 'A' - '9' - 1; + + if (dst >= dst_end) + return -1; + + *dst++ = (hex1<<4) + hex2; + } else { + if (dst >= dst_end) + return -1; + + *dst++ = *str++; + } + } else { + if (dst >= dst_end) + return -1; + + *dst++ = *str++; + } + } + if (dst >= dst_end) + return -1; + + *dst = '\0'; + return dst - old_dst; +} + +/* returns NULL if the replacement string <str> is valid, or the pointer to the first error */ +const char *check_replace_string(const char *str) +{ + const char *err = NULL; + while (*str) { + if (*str == '\\') { + err = str; /* in case of a backslash, we return the pointer to it */ + str++; + if (!*str) + return err; + else if (isdigit((unsigned char)*str)) + err = NULL; + else if (*str == 'x') { + str++; + if (!ishex(*str)) + return err; + str++; + if (!ishex(*str)) + return err; + err = NULL; + } + else { + ha_warning("'\\%c' : deprecated use of a backslash before something not '\\','x' or a digit.\n", *str); + err = NULL; + } + } + str++; + } + return err; +} + + +/* This function apply regex. It take const null terminated char as input. + * If the function doesn't match, it returns false, else it returns true. + * When it is compiled with JIT, this function execute strlen on the subject. + * Currently the only supported flag is REG_NOTBOL. + */ +int regex_exec_match(const struct my_regex *preg, const char *subject, + size_t nmatch, regmatch_t pmatch[], int flags) { +#if defined(USE_PCRE) || defined(USE_PCRE_JIT) || defined(USE_PCRE2) || defined(USE_PCRE2_JIT) + int ret; +#ifdef USE_PCRE2 + PCRE2_SIZE *matches; + pcre2_match_data *pm; +#else + int matches[MAX_MATCH * 3]; +#endif + int enmatch; + int i; + int options; + + /* Silently limit the number of allowed matches. max + * match i the maximum value for match, in fact this + * limit is not applied. + */ + + enmatch = nmatch; + if (enmatch > MAX_MATCH) + enmatch = MAX_MATCH; + + options = 0; + if (flags & REG_NOTBOL) +#ifdef USE_PCRE2 + options |= PCRE2_NOTBOL; +#else + options |= PCRE_NOTBOL; +#endif + + /* The value returned by pcre_exec()/pcre2_match() is one more than the highest numbered + * pair that has been set. For example, if two substrings have been captured, + * the returned value is 3. If there are no capturing subpatterns, the return + * value from a successful match is 1, indicating that just the first pair of + * offsets has been set. + * + * It seems that this function returns 0 if it detects more matches than available + * space in the matches array. + */ +#ifdef USE_PCRE2 + pm = pcre2_match_data_create_from_pattern(preg->reg, NULL); + ret = preg->mfn(preg->reg, (PCRE2_SPTR)subject, (PCRE2_SIZE)strlen(subject), 0, options, pm, NULL); + + if (ret < 0) { + pcre2_match_data_free(pm); + return 0; + } + + matches = pcre2_get_ovector_pointer(pm); +#else + ret = pcre_exec(preg->reg, preg->extra, subject, strlen(subject), 0, options, matches, enmatch * 3); + + if (ret < 0) + return 0; +#endif + + if (ret == 0) + ret = enmatch; + + for (i=0; i<nmatch; i++) { + /* Copy offset. */ + if (i < ret) { + pmatch[i].rm_so = matches[(i*2)]; + pmatch[i].rm_eo = matches[(i*2)+1]; + continue; + } + /* Set the unmatvh flag (-1). */ + pmatch[i].rm_so = -1; + pmatch[i].rm_eo = -1; + } +#ifdef USE_PCRE2 + pcre2_match_data_free(pm); +#endif + return 1; +#else + int match; + + flags &= REG_NOTBOL; + match = regexec(&preg->regex, subject, nmatch, pmatch, flags); + if (match == REG_NOMATCH) + return 0; + return 1; +#endif +} + +/* This function apply regex. It take a "char *" ans length as input. The + * <subject> can be modified during the processing. If the function doesn't + * match, it returns false, else it returns true. + * When it is compiled with standard POSIX regex or PCRE, this function add + * a temporary null characters at the end of the <subject>. The <subject> must + * have a real length of <length> + 1. Currently the only supported flag is + * REG_NOTBOL. + */ +int regex_exec_match2(const struct my_regex *preg, char *subject, int length, + size_t nmatch, regmatch_t pmatch[], int flags) { +#if defined(USE_PCRE) || defined(USE_PCRE_JIT) || defined(USE_PCRE2) || defined(USE_PCRE2_JIT) + int ret; +#ifdef USE_PCRE2 + PCRE2_SIZE *matches; + pcre2_match_data *pm; +#else + int matches[MAX_MATCH * 3]; +#endif + int enmatch; + int i; + int options; + + /* Silently limit the number of allowed matches. max + * match i the maximum value for match, in fact this + * limit is not applied. + */ + enmatch = nmatch; + if (enmatch > MAX_MATCH) + enmatch = MAX_MATCH; + + options = 0; + if (flags & REG_NOTBOL) +#ifdef USE_PCRE2 + options |= PCRE2_NOTBOL; +#else + options |= PCRE_NOTBOL; +#endif + + /* The value returned by pcre_exec()/pcre2_(jit)_match() is one more than the highest numbered + * pair that has been set. For example, if two substrings have been captured, + * the returned value is 3. If there are no capturing subpatterns, the return + * value from a successful match is 1, indicating that just the first pair of + * offsets has been set. + * + * It seems that this function returns 0 if it detects more matches than available + * space in the matches array. + */ +#ifdef USE_PCRE2 + pm = pcre2_match_data_create_from_pattern(preg->reg, NULL); + ret = preg->mfn(preg->reg, (PCRE2_SPTR)subject, (PCRE2_SIZE)length, 0, options, pm, NULL); + + if (ret < 0) { + pcre2_match_data_free(pm); + return 0; + } + + matches = pcre2_get_ovector_pointer(pm); +#else + ret = pcre_exec(preg->reg, preg->extra, subject, length, 0, options, matches, enmatch * 3); + if (ret < 0) + return 0; +#endif + + if (ret == 0) + ret = enmatch; + + for (i=0; i<nmatch; i++) { + /* Copy offset. */ + if (i < ret) { + pmatch[i].rm_so = matches[(i*2)]; + pmatch[i].rm_eo = matches[(i*2)+1]; + continue; + } + /* Set the unmatvh flag (-1). */ + pmatch[i].rm_so = -1; + pmatch[i].rm_eo = -1; + } +#ifdef USE_PCRE2 + pcre2_match_data_free(pm); +#endif + return 1; +#else + char old_char = subject[length]; + int match; + + flags &= REG_NOTBOL; + subject[length] = 0; + match = regexec(&preg->regex, subject, nmatch, pmatch, flags); + subject[length] = old_char; + if (match == REG_NOMATCH) + return 0; + return 1; +#endif +} + +struct my_regex *regex_comp(const char *str, int cs, int cap, char **err) +{ + struct my_regex *regex = NULL; +#if defined(USE_PCRE) || defined(USE_PCRE_JIT) + int flags = 0; + const char *error; + int erroffset; +#elif defined(USE_PCRE2) || defined(USE_PCRE2_JIT) + int flags = 0; + int errn; +#if defined(USE_PCRE2_JIT) + int jit; +#endif + PCRE2_UCHAR error[256]; + PCRE2_SIZE erroffset; +#else + int flags = REG_EXTENDED; +#endif + + regex = calloc(1, sizeof(*regex)); + if (!regex) { + memprintf(err, "not enough memory to build regex"); + goto out_fail_alloc; + } + +#if defined(USE_PCRE) || defined(USE_PCRE_JIT) + if (!cs) + flags |= PCRE_CASELESS; + if (!cap) + flags |= PCRE_NO_AUTO_CAPTURE; + + regex->reg = pcre_compile(str, flags, &error, &erroffset, NULL); + if (!regex->reg) { + memprintf(err, "regex '%s' is invalid (error=%s, erroffset=%d)", str, error, erroffset); + goto out_fail_alloc; + } + + regex->extra = pcre_study(regex->reg, PCRE_STUDY_JIT_COMPILE, &error); + if (!regex->extra && error != NULL) { + pcre_free(regex->reg); + memprintf(err, "failed to compile regex '%s' (error=%s)", str, error); + goto out_fail_alloc; + } +#elif defined(USE_PCRE2) || defined(USE_PCRE2_JIT) + if (!cs) + flags |= PCRE2_CASELESS; + if (!cap) + flags |= PCRE2_NO_AUTO_CAPTURE; + + regex->reg = pcre2_compile((PCRE2_SPTR)str, PCRE2_ZERO_TERMINATED, flags, &errn, &erroffset, NULL); + if (!regex->reg) { + pcre2_get_error_message(errn, error, sizeof(error)); + memprintf(err, "regex '%s' is invalid (error=%s, erroffset=%zu)", str, error, erroffset); + goto out_fail_alloc; + } + + regex->mfn = &pcre2_match; +#if defined(USE_PCRE2_JIT) + jit = pcre2_jit_compile(regex->reg, PCRE2_JIT_COMPLETE); + /* + * We end if it is an error not related to lack of JIT support + * in a case of JIT support missing pcre2_jit_compile is "no-op" + */ + if (!jit) + regex->mfn = &pcre2_jit_match; + else { + if (jit != PCRE2_ERROR_JIT_BADOPTION) { + pcre2_code_free(regex->reg); + memprintf(err, "regex '%s' jit compilation failed", str); + goto out_fail_alloc; + } + else + regex->mfn = &pcre2_match; + } +#endif + +#else + if (!cs) + flags |= REG_ICASE; + if (!cap) + flags |= REG_NOSUB; + + if (regcomp(®ex->regex, str, flags) != 0) { + memprintf(err, "regex '%s' is invalid", str); + goto out_fail_alloc; + } +#endif + return regex; + + out_fail_alloc: + free(regex); + return NULL; +} + +static void regex_register_build_options(void) +{ + char *ptr = NULL; + +#ifdef USE_PCRE + memprintf(&ptr, "Built with PCRE version : %s", (HAP_XSTRING(Z PCRE_PRERELEASE)[1] == 0)? + HAP_XSTRING(PCRE_MAJOR.PCRE_MINOR PCRE_DATE) : + HAP_XSTRING(PCRE_MAJOR.PCRE_MINOR) HAP_XSTRING(PCRE_PRERELEASE PCRE_DATE)); + memprintf(&ptr, "%s\nRunning on PCRE version : %s", ptr, pcre_version()); + + memprintf(&ptr, "%s\nPCRE library supports JIT : %s", ptr, +#ifdef USE_PCRE_JIT + ({ + int r; + pcre_config(PCRE_CONFIG_JIT, &r); + r ? "yes" : "no (libpcre build without JIT?)"; + }) +#else + "no (USE_PCRE_JIT not set)" +#endif + ); +#endif /* USE_PCRE */ + +#ifdef USE_PCRE2 + memprintf(&ptr, "Built with PCRE2 version : %s", (HAP_XSTRING(Z PCRE2_PRERELEASE)[1] == 0) ? + HAP_XSTRING(PCRE2_MAJOR.PCRE2_MINOR PCRE2_DATE) : + HAP_XSTRING(PCRE2_MAJOR.PCRE2_MINOR) HAP_XSTRING(PCRE2_PRERELEASE PCRE2_DATE)); + memprintf(&ptr, "%s\nPCRE2 library supports JIT : %s", ptr, +#ifdef USE_PCRE2_JIT + ({ + int r; + pcre2_config(PCRE2_CONFIG_JIT, &r); + r ? "yes" : "no (libpcre2 build without JIT?)"; + }) +#else + "no (USE_PCRE2_JIT not set)" +#endif + ); +#endif /* USE_PCRE2 */ + +#if !defined(USE_PCRE) && !defined(USE_PCRE2) + memprintf(&ptr, "Built without PCRE or PCRE2 support (using libc's regex instead)"); +#endif + hap_register_build_opts(ptr, 1); +} + +INITCALL0(STG_REGISTER, regex_register_build_options); + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ |