/* * Regex and string management functions. * * Copyright 2000-2010 Willy Tarreau * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * */ #include #include #include #include #include #include #include #include /* regex trash buffer used by various regex tests */ THREAD_LOCAL regmatch_t pmatch[MAX_MATCH]; /* rm_so, rm_eo for regular expressions */ int exp_replace(char *dst, unsigned int dst_size, char *src, const char *str, const regmatch_t *matches) { char *old_dst = dst; char* dst_end = dst + dst_size; while (*str) { if (*str == '\\') { str++; if (!*str) return -1; if (isdigit((unsigned char)*str)) { int len, num; num = *str - '0'; str++; if (matches[num].rm_eo > -1 && matches[num].rm_so > -1) { len = matches[num].rm_eo - matches[num].rm_so; if (dst + len >= dst_end) return -1; memcpy(dst, src + matches[num].rm_so, len); dst += len; } } else if (*str == 'x') { unsigned char hex1, hex2; str++; if (!*str) return -1; hex1 = toupper((unsigned char)*str++) - '0'; if (!*str) return -1; hex2 = toupper((unsigned char)*str++) - '0'; if (hex1 > 9) hex1 -= 'A' - '9' - 1; if (hex2 > 9) hex2 -= 'A' - '9' - 1; if (dst >= dst_end) return -1; *dst++ = (hex1<<4) + hex2; } else { if (dst >= dst_end) return -1; *dst++ = *str++; } } else { if (dst >= dst_end) return -1; *dst++ = *str++; } } if (dst >= dst_end) return -1; *dst = '\0'; return dst - old_dst; } /* returns NULL if the replacement string is valid, or the pointer to the first error */ const char *check_replace_string(const char *str) { const char *err = NULL; while (*str) { if (*str == '\\') { err = str; /* in case of a backslash, we return the pointer to it */ str++; if (!*str) return err; else if (isdigit((unsigned char)*str)) err = NULL; else if (*str == 'x') { str++; if (!ishex(*str)) return err; str++; if (!ishex(*str)) return err; err = NULL; } else { ha_warning("'\\%c' : deprecated use of a backslash before something not '\\','x' or a digit.\n", *str); err = NULL; } } str++; } return err; } /* This function apply regex. It take const null terminated char as input. * If the function doesn't match, it returns false, else it returns true. * When it is compiled with JIT, this function execute strlen on the subject. * Currently the only supported flag is REG_NOTBOL. */ int regex_exec_match(const struct my_regex *preg, const char *subject, size_t nmatch, regmatch_t pmatch[], int flags) { #if defined(USE_PCRE) || defined(USE_PCRE_JIT) || defined(USE_PCRE2) || defined(USE_PCRE2_JIT) int ret; #ifdef USE_PCRE2 PCRE2_SIZE *matches; pcre2_match_data *pm; #else int matches[MAX_MATCH * 3]; #endif int enmatch; int i; int options; /* Silently limit the number of allowed matches. max * match i the maximum value for match, in fact this * limit is not applied. */ enmatch = nmatch; if (enmatch > MAX_MATCH) enmatch = MAX_MATCH; options = 0; if (flags & REG_NOTBOL) #ifdef USE_PCRE2 options |= PCRE2_NOTBOL; #else options |= PCRE_NOTBOL; #endif /* The value returned by pcre_exec()/pcre2_match() is one more than the highest numbered * pair that has been set. For example, if two substrings have been captured, * the returned value is 3. If there are no capturing subpatterns, the return * value from a successful match is 1, indicating that just the first pair of * offsets has been set. * * It seems that this function returns 0 if it detects more matches than available * space in the matches array. */ #ifdef USE_PCRE2 pm = pcre2_match_data_create_from_pattern(preg->reg, NULL); ret = preg->mfn(preg->reg, (PCRE2_SPTR)subject, (PCRE2_SIZE)strlen(subject), 0, options, pm, NULL); if (ret < 0) { pcre2_match_data_free(pm); return 0; } matches = pcre2_get_ovector_pointer(pm); #else ret = pcre_exec(preg->reg, preg->extra, subject, strlen(subject), 0, options, matches, enmatch * 3); if (ret < 0) return 0; #endif if (ret == 0) ret = enmatch; for (i=0; iregex, subject, nmatch, pmatch, flags); if (match == REG_NOMATCH) return 0; return 1; #endif } /* This function apply regex. It take a "char *" ans length as input. The * can be modified during the processing. If the function doesn't * match, it returns false, else it returns true. * When it is compiled with standard POSIX regex or PCRE, this function add * a temporary null characters at the end of the . The must * have a real length of + 1. Currently the only supported flag is * REG_NOTBOL. */ int regex_exec_match2(const struct my_regex *preg, char *subject, int length, size_t nmatch, regmatch_t pmatch[], int flags) { #if defined(USE_PCRE) || defined(USE_PCRE_JIT) || defined(USE_PCRE2) || defined(USE_PCRE2_JIT) int ret; #ifdef USE_PCRE2 PCRE2_SIZE *matches; pcre2_match_data *pm; #else int matches[MAX_MATCH * 3]; #endif int enmatch; int i; int options; /* Silently limit the number of allowed matches. max * match i the maximum value for match, in fact this * limit is not applied. */ enmatch = nmatch; if (enmatch > MAX_MATCH) enmatch = MAX_MATCH; options = 0; if (flags & REG_NOTBOL) #ifdef USE_PCRE2 options |= PCRE2_NOTBOL; #else options |= PCRE_NOTBOL; #endif /* The value returned by pcre_exec()/pcre2_(jit)_match() is one more than the highest numbered * pair that has been set. For example, if two substrings have been captured, * the returned value is 3. If there are no capturing subpatterns, the return * value from a successful match is 1, indicating that just the first pair of * offsets has been set. * * It seems that this function returns 0 if it detects more matches than available * space in the matches array. */ #ifdef USE_PCRE2 pm = pcre2_match_data_create_from_pattern(preg->reg, NULL); ret = preg->mfn(preg->reg, (PCRE2_SPTR)subject, (PCRE2_SIZE)length, 0, options, pm, NULL); if (ret < 0) { pcre2_match_data_free(pm); return 0; } matches = pcre2_get_ovector_pointer(pm); #else ret = pcre_exec(preg->reg, preg->extra, subject, length, 0, options, matches, enmatch * 3); if (ret < 0) return 0; #endif if (ret == 0) ret = enmatch; for (i=0; iregex, subject, nmatch, pmatch, flags); subject[length] = old_char; if (match == REG_NOMATCH) return 0; return 1; #endif } struct my_regex *regex_comp(const char *str, int cs, int cap, char **err) { struct my_regex *regex = NULL; #if defined(USE_PCRE) || defined(USE_PCRE_JIT) int flags = 0; const char *error; int erroffset; #elif defined(USE_PCRE2) || defined(USE_PCRE2_JIT) int flags = 0; int errn; #if defined(USE_PCRE2_JIT) int jit; #endif PCRE2_UCHAR error[256]; PCRE2_SIZE erroffset; #else int flags = REG_EXTENDED; #endif regex = calloc(1, sizeof(*regex)); if (!regex) { memprintf(err, "not enough memory to build regex"); goto out_fail_alloc; } #if defined(USE_PCRE) || defined(USE_PCRE_JIT) if (!cs) flags |= PCRE_CASELESS; if (!cap) flags |= PCRE_NO_AUTO_CAPTURE; regex->reg = pcre_compile(str, flags, &error, &erroffset, NULL); if (!regex->reg) { memprintf(err, "regex '%s' is invalid (error=%s, erroffset=%d)", str, error, erroffset); goto out_fail_alloc; } regex->extra = pcre_study(regex->reg, PCRE_STUDY_JIT_COMPILE, &error); if (!regex->extra && error != NULL) { pcre_free(regex->reg); memprintf(err, "failed to compile regex '%s' (error=%s)", str, error); goto out_fail_alloc; } #elif defined(USE_PCRE2) || defined(USE_PCRE2_JIT) if (!cs) flags |= PCRE2_CASELESS; if (!cap) flags |= PCRE2_NO_AUTO_CAPTURE; regex->reg = pcre2_compile((PCRE2_SPTR)str, PCRE2_ZERO_TERMINATED, flags, &errn, &erroffset, NULL); if (!regex->reg) { pcre2_get_error_message(errn, error, sizeof(error)); memprintf(err, "regex '%s' is invalid (error=%s, erroffset=%zu)", str, error, erroffset); goto out_fail_alloc; } regex->mfn = &pcre2_match; #if defined(USE_PCRE2_JIT) jit = pcre2_jit_compile(regex->reg, PCRE2_JIT_COMPLETE); /* * We end if it is an error not related to lack of JIT support * in a case of JIT support missing pcre2_jit_compile is "no-op" */ if (!jit) regex->mfn = &pcre2_jit_match; else { if (jit != PCRE2_ERROR_JIT_BADOPTION) { pcre2_code_free(regex->reg); memprintf(err, "regex '%s' jit compilation failed", str); goto out_fail_alloc; } else regex->mfn = &pcre2_match; } #endif #else if (!cs) flags |= REG_ICASE; if (!cap) flags |= REG_NOSUB; if (regcomp(®ex->regex, str, flags) != 0) { memprintf(err, "regex '%s' is invalid", str); goto out_fail_alloc; } #endif return regex; out_fail_alloc: free(regex); return NULL; } static void regex_register_build_options(void) { char *ptr = NULL; #ifdef USE_PCRE memprintf(&ptr, "Built with PCRE version : %s", (HAP_XSTRING(Z PCRE_PRERELEASE)[1] == 0)? HAP_XSTRING(PCRE_MAJOR.PCRE_MINOR PCRE_DATE) : HAP_XSTRING(PCRE_MAJOR.PCRE_MINOR) HAP_XSTRING(PCRE_PRERELEASE PCRE_DATE)); memprintf(&ptr, "%s\nRunning on PCRE version : %s", ptr, pcre_version()); memprintf(&ptr, "%s\nPCRE library supports JIT : %s", ptr, #ifdef USE_PCRE_JIT ({ int r; pcre_config(PCRE_CONFIG_JIT, &r); r ? "yes" : "no (libpcre build without JIT?)"; }) #else "no (USE_PCRE_JIT not set)" #endif ); #endif /* USE_PCRE */ #ifdef USE_PCRE2 memprintf(&ptr, "Built with PCRE2 version : %s", (HAP_XSTRING(Z PCRE2_PRERELEASE)[1] == 0) ? HAP_XSTRING(PCRE2_MAJOR.PCRE2_MINOR PCRE2_DATE) : HAP_XSTRING(PCRE2_MAJOR.PCRE2_MINOR) HAP_XSTRING(PCRE2_PRERELEASE PCRE2_DATE)); memprintf(&ptr, "%s\nPCRE2 library supports JIT : %s", ptr, #ifdef USE_PCRE2_JIT ({ int r; pcre2_config(PCRE2_CONFIG_JIT, &r); r ? "yes" : "no (libpcre2 build without JIT?)"; }) #else "no (USE_PCRE2_JIT not set)" #endif ); #endif /* USE_PCRE2 */ #if !defined(USE_PCRE) && !defined(USE_PCRE2) memprintf(&ptr, "Built without PCRE or PCRE2 support (using libc's regex instead)"); #endif hap_register_build_opts(ptr, 1); } INITCALL0(STG_REGISTER, regex_register_build_options); /* * Local variables: * c-indent-level: 8 * c-basic-offset: 8 * End: */