diff options
Diffstat (limited to 'src/pattern.c')
-rw-r--r-- | src/pattern.c | 2683 |
1 files changed, 2683 insertions, 0 deletions
diff --git a/src/pattern.c b/src/pattern.c new file mode 100644 index 0000000..52dda5e --- /dev/null +++ b/src/pattern.c @@ -0,0 +1,2683 @@ +/* + * Pattern management functions. + * + * Copyright 2000-2013 Willy Tarreau <w@1wt.eu> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <ctype.h> +#include <stdio.h> +#include <errno.h> + +#include <import/ebistree.h> +#include <import/ebpttree.h> +#include <import/ebsttree.h> +#include <import/lru.h> + +#include <haproxy/api.h> +#include <haproxy/global.h> +#include <haproxy/log.h> +#include <haproxy/net_helper.h> +#include <haproxy/pattern.h> +#include <haproxy/regex.h> +#include <haproxy/sample.h> +#include <haproxy/tools.h> +#include <haproxy/xxhash.h> + + +const char *const pat_match_names[PAT_MATCH_NUM] = { + [PAT_MATCH_FOUND] = "found", + [PAT_MATCH_BOOL] = "bool", + [PAT_MATCH_INT] = "int", + [PAT_MATCH_IP] = "ip", + [PAT_MATCH_BIN] = "bin", + [PAT_MATCH_LEN] = "len", + [PAT_MATCH_STR] = "str", + [PAT_MATCH_BEG] = "beg", + [PAT_MATCH_SUB] = "sub", + [PAT_MATCH_DIR] = "dir", + [PAT_MATCH_DOM] = "dom", + [PAT_MATCH_END] = "end", + [PAT_MATCH_REG] = "reg", + [PAT_MATCH_REGM] = "regm", +}; + +int (*const pat_parse_fcts[PAT_MATCH_NUM])(const char *, struct pattern *, int, char **) = { + [PAT_MATCH_FOUND] = pat_parse_nothing, + [PAT_MATCH_BOOL] = pat_parse_nothing, + [PAT_MATCH_INT] = pat_parse_int, + [PAT_MATCH_IP] = pat_parse_ip, + [PAT_MATCH_BIN] = pat_parse_bin, + [PAT_MATCH_LEN] = pat_parse_int, + [PAT_MATCH_STR] = pat_parse_str, + [PAT_MATCH_BEG] = pat_parse_str, + [PAT_MATCH_SUB] = pat_parse_str, + [PAT_MATCH_DIR] = pat_parse_str, + [PAT_MATCH_DOM] = pat_parse_str, + [PAT_MATCH_END] = pat_parse_str, + [PAT_MATCH_REG] = pat_parse_reg, + [PAT_MATCH_REGM] = pat_parse_reg, +}; + +int (*const pat_index_fcts[PAT_MATCH_NUM])(struct pattern_expr *, struct pattern *, char **) = { + [PAT_MATCH_FOUND] = pat_idx_list_val, + [PAT_MATCH_BOOL] = pat_idx_list_val, + [PAT_MATCH_INT] = pat_idx_list_val, + [PAT_MATCH_IP] = pat_idx_tree_ip, + [PAT_MATCH_BIN] = pat_idx_list_ptr, + [PAT_MATCH_LEN] = pat_idx_list_val, + [PAT_MATCH_STR] = pat_idx_tree_str, + [PAT_MATCH_BEG] = pat_idx_tree_pfx, + [PAT_MATCH_SUB] = pat_idx_list_str, + [PAT_MATCH_DIR] = pat_idx_list_str, + [PAT_MATCH_DOM] = pat_idx_list_str, + [PAT_MATCH_END] = pat_idx_list_str, + [PAT_MATCH_REG] = pat_idx_list_reg, + [PAT_MATCH_REGM] = pat_idx_list_regm, +}; + +void (*const pat_prune_fcts[PAT_MATCH_NUM])(struct pattern_expr *) = { + [PAT_MATCH_FOUND] = pat_prune_gen, + [PAT_MATCH_BOOL] = pat_prune_gen, + [PAT_MATCH_INT] = pat_prune_gen, + [PAT_MATCH_IP] = pat_prune_gen, + [PAT_MATCH_BIN] = pat_prune_gen, + [PAT_MATCH_LEN] = pat_prune_gen, + [PAT_MATCH_STR] = pat_prune_gen, + [PAT_MATCH_BEG] = pat_prune_gen, + [PAT_MATCH_SUB] = pat_prune_gen, + [PAT_MATCH_DIR] = pat_prune_gen, + [PAT_MATCH_DOM] = pat_prune_gen, + [PAT_MATCH_END] = pat_prune_gen, + [PAT_MATCH_REG] = pat_prune_gen, + [PAT_MATCH_REGM] = pat_prune_gen, +}; + +struct pattern *(*const pat_match_fcts[PAT_MATCH_NUM])(struct sample *, struct pattern_expr *, int) = { + [PAT_MATCH_FOUND] = NULL, + [PAT_MATCH_BOOL] = pat_match_nothing, + [PAT_MATCH_INT] = pat_match_int, + [PAT_MATCH_IP] = pat_match_ip, + [PAT_MATCH_BIN] = pat_match_bin, + [PAT_MATCH_LEN] = pat_match_len, + [PAT_MATCH_STR] = pat_match_str, + [PAT_MATCH_BEG] = pat_match_beg, + [PAT_MATCH_SUB] = pat_match_sub, + [PAT_MATCH_DIR] = pat_match_dir, + [PAT_MATCH_DOM] = pat_match_dom, + [PAT_MATCH_END] = pat_match_end, + [PAT_MATCH_REG] = pat_match_reg, + [PAT_MATCH_REGM] = pat_match_regm, +}; + +/* Just used for checking configuration compatibility */ +int const pat_match_types[PAT_MATCH_NUM] = { + [PAT_MATCH_FOUND] = SMP_T_SINT, + [PAT_MATCH_BOOL] = SMP_T_SINT, + [PAT_MATCH_INT] = SMP_T_SINT, + [PAT_MATCH_IP] = SMP_T_ADDR, + [PAT_MATCH_BIN] = SMP_T_BIN, + [PAT_MATCH_LEN] = SMP_T_STR, + [PAT_MATCH_STR] = SMP_T_STR, + [PAT_MATCH_BEG] = SMP_T_STR, + [PAT_MATCH_SUB] = SMP_T_STR, + [PAT_MATCH_DIR] = SMP_T_STR, + [PAT_MATCH_DOM] = SMP_T_STR, + [PAT_MATCH_END] = SMP_T_STR, + [PAT_MATCH_REG] = SMP_T_STR, + [PAT_MATCH_REGM] = SMP_T_STR, +}; + +/* this struct is used to return information */ +static THREAD_LOCAL struct pattern static_pattern; +static THREAD_LOCAL struct sample_data static_sample_data; + +/* This is the root of the list of all pattern_ref avalaibles. */ +struct list pattern_reference = LIST_HEAD_INIT(pattern_reference); + +static THREAD_LOCAL struct lru64_head *pat_lru_tree; +static unsigned long long pat_lru_seed __read_mostly; + +/* + * + * The following functions are not exported and are used by internals process + * of pattern matching + * + */ + +/* Background: Fast way to find a zero byte in a word + * http://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord + * hasZeroByte = (v - 0x01010101UL) & ~v & 0x80808080UL; + * + * To look for 4 different byte values, xor the word with those bytes and + * then check for zero bytes: + * + * v = (((unsigned char)c * 0x1010101U) ^ delimiter) + * where <delimiter> is the 4 byte values to look for (as an uint) + * and <c> is the character that is being tested + */ +static inline unsigned int is_delimiter(unsigned char c, unsigned int mask) +{ + mask ^= (c * 0x01010101); /* propagate the char to all 4 bytes */ + return (mask - 0x01010101) & ~mask & 0x80808080U; +} + +static inline unsigned int make_4delim(unsigned char d1, unsigned char d2, unsigned char d3, unsigned char d4) +{ + return d1 << 24 | d2 << 16 | d3 << 8 | d4; +} + + +/* + * + * These functions are exported and may be used by any other component. + * + * The following functions are used for parsing pattern matching input value. + * The <text> contain the string to be parsed. <pattern> must be a preallocated + * pattern. The pat_parse_* functions fill this structure with the parsed value. + * <err> is filled with an error message built with memprintf() function. It is + * allowed to use a trash as a temporary storage for the returned pattern, as + * the next call after these functions will be pat_idx_*. + * + * In success case, the pat_parse_* function returns 1. If the function + * fails, it returns 0 and <err> is filled. + */ + +/* ignore the current line */ +int pat_parse_nothing(const char *text, struct pattern *pattern, int mflags, char **err) +{ + return 1; +} + +/* Parse a string. It is allocated and duplicated. */ +int pat_parse_str(const char *text, struct pattern *pattern, int mflags, char **err) +{ + pattern->type = SMP_T_STR; + pattern->ptr.str = (char *)text; + pattern->len = strlen(text); + return 1; +} + +/* Parse a binary written in hexa. It is allocated. */ +int pat_parse_bin(const char *text, struct pattern *pattern, int mflags, char **err) +{ + struct buffer *trash; + + pattern->type = SMP_T_BIN; + trash = get_trash_chunk(); + pattern->len = trash->size; + pattern->ptr.str = trash->area; + return !!parse_binary(text, &pattern->ptr.str, &pattern->len, err); +} + +/* Parse a regex. It is allocated. */ +int pat_parse_reg(const char *text, struct pattern *pattern, int mflags, char **err) +{ + pattern->ptr.str = (char *)text; + return 1; +} + +/* Parse a range of positive integers delimited by either ':' or '-'. If only + * one integer is read, it is set as both min and max. An operator may be + * specified as the prefix, among this list of 5 : + * + * 0:eq, 1:gt, 2:ge, 3:lt, 4:le + * + * The default operator is "eq". It supports range matching. Ranges are + * rejected for other operators. The operator may be changed at any time. + * The operator is stored in the 'opaque' argument. + * + * If err is non-NULL, an error message will be returned there on errors and + * the caller will have to free it. The function returns zero on error, and + * non-zero on success. + * + */ +int pat_parse_int(const char *text, struct pattern *pattern, int mflags, char **err) +{ + const char *ptr = text; + + pattern->type = SMP_T_SINT; + + /* Empty string is not valid */ + if (!*text) + goto not_valid_range; + + /* Search ':' or '-' separator. */ + while (*ptr != '\0' && *ptr != ':' && *ptr != '-') + ptr++; + + /* If separator not found. */ + if (!*ptr) { + if (strl2llrc(text, ptr - text, &pattern->val.range.min) != 0) { + memprintf(err, "'%s' is not a number", text); + return 0; + } + pattern->val.range.max = pattern->val.range.min; + pattern->val.range.min_set = 1; + pattern->val.range.max_set = 1; + return 1; + } + + /* If the separator is the first character. */ + if (ptr == text && *(ptr + 1) != '\0') { + if (strl2llrc(ptr + 1, strlen(ptr + 1), &pattern->val.range.max) != 0) + goto not_valid_range; + + pattern->val.range.min_set = 0; + pattern->val.range.max_set = 1; + return 1; + } + + /* If separator is the last character. */ + if (*(ptr + 1) == '\0') { + if (strl2llrc(text, ptr - text, &pattern->val.range.min) != 0) + goto not_valid_range; + + pattern->val.range.min_set = 1; + pattern->val.range.max_set = 0; + return 1; + } + + /* Else, parse two numbers. */ + if (strl2llrc(text, ptr - text, &pattern->val.range.min) != 0) + goto not_valid_range; + + if (strl2llrc(ptr + 1, strlen(ptr + 1), &pattern->val.range.max) != 0) + goto not_valid_range; + + if (pattern->val.range.min > pattern->val.range.max) + goto not_valid_range; + + pattern->val.range.min_set = 1; + pattern->val.range.max_set = 1; + return 1; + + not_valid_range: + memprintf(err, "'%s' is not a valid number range", text); + return 0; +} + +/* Parse a range of positive 2-component versions delimited by either ':' or + * '-'. The version consists in a major and a minor, both of which must be + * smaller than 65536, because internally they will be represented as a 32-bit + * integer. + * If only one version is read, it is set as both min and max. Just like for + * pure integers, an operator may be specified as the prefix, among this list + * of 5 : + * + * 0:eq, 1:gt, 2:ge, 3:lt, 4:le + * + * The default operator is "eq". It supports range matching. Ranges are + * rejected for other operators. The operator may be changed at any time. + * The operator is stored in the 'opaque' argument. This allows constructs + * such as the following one : + * + * acl obsolete_ssl ssl_req_proto lt 3 + * acl unsupported_ssl ssl_req_proto gt 3.1 + * acl valid_ssl ssl_req_proto 3.0-3.1 + * + */ +int pat_parse_dotted_ver(const char *text, struct pattern *pattern, int mflags, char **err) +{ + const char *ptr = text; + + pattern->type = SMP_T_SINT; + + /* Search ':' or '-' separator. */ + while (*ptr != '\0' && *ptr != ':' && *ptr != '-') + ptr++; + + /* If separator not found. */ + if (*ptr == '\0' && ptr > text) { + if (strl2llrc_dotted(text, ptr-text, &pattern->val.range.min) != 0) { + memprintf(err, "'%s' is not a dotted number", text); + return 0; + } + pattern->val.range.max = pattern->val.range.min; + pattern->val.range.min_set = 1; + pattern->val.range.max_set = 1; + return 1; + } + + /* If the separator is the first character. */ + if (ptr == text && *(ptr+1) != '\0') { + if (strl2llrc_dotted(ptr+1, strlen(ptr+1), &pattern->val.range.max) != 0) { + memprintf(err, "'%s' is not a valid dotted number range", text); + return 0; + } + pattern->val.range.min_set = 0; + pattern->val.range.max_set = 1; + return 1; + } + + /* If separator is the last character. */ + if (ptr == &text[strlen(text)-1]) { + if (strl2llrc_dotted(text, ptr-text, &pattern->val.range.min) != 0) { + memprintf(err, "'%s' is not a valid dotted number range", text); + return 0; + } + pattern->val.range.min_set = 1; + pattern->val.range.max_set = 0; + return 1; + } + + /* Else, parse two numbers. */ + if (strl2llrc_dotted(text, ptr-text, &pattern->val.range.min) != 0) { + memprintf(err, "'%s' is not a valid dotted number range", text); + return 0; + } + if (strl2llrc_dotted(ptr+1, strlen(ptr+1), &pattern->val.range.max) != 0) { + memprintf(err, "'%s' is not a valid dotted number range", text); + return 0; + } + if (pattern->val.range.min > pattern->val.range.max) { + memprintf(err, "'%s' is not a valid dotted number range", text); + return 0; + } + pattern->val.range.min_set = 1; + pattern->val.range.max_set = 1; + return 1; +} + +/* Parse an IP address and an optional mask in the form addr[/mask]. + * The addr may either be an IPv4 address or a hostname. The mask + * may either be a dotted mask or a number of bits. Returns 1 if OK, + * otherwise 0. NOTE: IP address patterns are typed (IPV4/IPV6). + */ +int pat_parse_ip(const char *text, struct pattern *pattern, int mflags, char **err) +{ + if (str2net(text, !(mflags & PAT_MF_NO_DNS) && (global.mode & MODE_STARTING), + &pattern->val.ipv4.addr, &pattern->val.ipv4.mask)) { + pattern->type = SMP_T_IPV4; + return 1; + } + else if (str62net(text, &pattern->val.ipv6.addr, &pattern->val.ipv6.mask)) { + pattern->type = SMP_T_IPV6; + return 1; + } + else { + memprintf(err, "'%s' is not a valid IPv4 or IPv6 address", text); + return 0; + } +} + +/* + * + * These functions are exported and may be used by any other component. + * + * This function just takes a sample <smp> and checks if this sample matches + * with the pattern <pattern>. This function returns only PAT_MATCH or + * PAT_NOMATCH. + * + */ + +/* always return false */ +struct pattern *pat_match_nothing(struct sample *smp, struct pattern_expr *expr, int fill) +{ + if (smp->data.u.sint) { + if (fill) { + static_pattern.data = NULL; + static_pattern.ref = NULL; + static_pattern.type = 0; + static_pattern.ptr.str = NULL; + } + return &static_pattern; + } + else + return NULL; +} + + +/* NB: For two strings to be identical, it is required that their length match */ +struct pattern *pat_match_str(struct sample *smp, struct pattern_expr *expr, int fill) +{ + int icase; + struct ebmb_node *node; + struct pattern_tree *elt; + struct pattern_list *lst; + struct pattern *pattern; + struct pattern *ret = NULL; + struct lru64 *lru = NULL; + + /* Lookup a string in the expression's pattern tree. */ + if (!eb_is_empty(&expr->pattern_tree)) { + char prev = 0; + + if (smp->data.u.str.data < smp->data.u.str.size) { + /* we may have to force a trailing zero on the test pattern and + * the buffer is large enough to accommodate it. If the flag + * CONST is set, duplicate the string + */ + prev = smp->data.u.str.area[smp->data.u.str.data]; + if (prev) { + if (smp->flags & SMP_F_CONST) { + if (!smp_dup(smp)) + return NULL; + } else { + smp->data.u.str.area[smp->data.u.str.data] = '\0'; + } + } + } + else { + /* Otherwise, the sample is duplicated. A trailing zero + * is automatically added to the string. + */ + if (!smp_dup(smp)) + return NULL; + } + + node = ebst_lookup(&expr->pattern_tree, smp->data.u.str.area); + if (prev) + smp->data.u.str.area[smp->data.u.str.data] = prev; + + while (node) { + elt = ebmb_entry(node, struct pattern_tree, node); + if (elt->ref->gen_id != expr->ref->curr_gen) { + node = ebmb_next_dup(node); + continue; + } + if (fill) { + static_pattern.data = elt->data; + static_pattern.ref = elt->ref; + static_pattern.sflags = PAT_SF_TREE; + static_pattern.type = SMP_T_STR; + static_pattern.ptr.str = (char *)elt->node.key; + } + return &static_pattern; + } + } + + /* look in the list */ + if (pat_lru_tree && !LIST_ISEMPTY(&expr->patterns)) { + unsigned long long seed = pat_lru_seed ^ (long)expr; + + lru = lru64_get(XXH3(smp->data.u.str.area, smp->data.u.str.data, seed), + pat_lru_tree, expr, expr->ref->revision); + if (lru && lru->domain) { + ret = lru->data; + return ret; + } + } + + + list_for_each_entry(lst, &expr->patterns, list) { + pattern = &lst->pat; + + if (pattern->ref->gen_id != expr->ref->curr_gen) + continue; + + if (pattern->len != smp->data.u.str.data) + continue; + + icase = expr->mflags & PAT_MF_IGNORE_CASE; + if ((icase && strncasecmp(pattern->ptr.str, smp->data.u.str.area, smp->data.u.str.data) == 0) || + (!icase && strncmp(pattern->ptr.str, smp->data.u.str.area, smp->data.u.str.data) == 0)) { + ret = pattern; + break; + } + } + + if (lru) + lru64_commit(lru, ret, expr, expr->ref->revision, NULL); + + return ret; +} + +/* NB: For two binaries buf to be identical, it is required that their lengths match */ +struct pattern *pat_match_bin(struct sample *smp, struct pattern_expr *expr, int fill) +{ + struct pattern_list *lst; + struct pattern *pattern; + struct pattern *ret = NULL; + struct lru64 *lru = NULL; + + if (pat_lru_tree && !LIST_ISEMPTY(&expr->patterns)) { + unsigned long long seed = pat_lru_seed ^ (long)expr; + + lru = lru64_get(XXH3(smp->data.u.str.area, smp->data.u.str.data, seed), + pat_lru_tree, expr, expr->ref->revision); + if (lru && lru->domain) { + ret = lru->data; + return ret; + } + } + + list_for_each_entry(lst, &expr->patterns, list) { + pattern = &lst->pat; + + if (pattern->ref->gen_id != expr->ref->curr_gen) + continue; + + if (pattern->len != smp->data.u.str.data) + continue; + + if (memcmp(pattern->ptr.str, smp->data.u.str.area, smp->data.u.str.data) == 0) { + ret = pattern; + break; + } + } + + if (lru) + lru64_commit(lru, ret, expr, expr->ref->revision, NULL); + + return ret; +} + +/* Executes a regex. It temporarily changes the data to add a trailing zero, + * and restores the previous character when leaving. This function fills + * a matching array. + */ +struct pattern *pat_match_regm(struct sample *smp, struct pattern_expr *expr, int fill) +{ + struct pattern_list *lst; + struct pattern *pattern; + struct pattern *ret = NULL; + + list_for_each_entry(lst, &expr->patterns, list) { + pattern = &lst->pat; + + if (pattern->ref->gen_id != expr->ref->curr_gen) + continue; + + if (regex_exec_match2(pattern->ptr.reg, smp->data.u.str.area, smp->data.u.str.data, + MAX_MATCH, pmatch, 0)) { + ret = pattern; + smp->ctx.a[0] = pmatch; + break; + } + } + + return ret; +} + +/* Executes a regex. It temporarily changes the data to add a trailing zero, + * and restores the previous character when leaving. + */ +struct pattern *pat_match_reg(struct sample *smp, struct pattern_expr *expr, int fill) +{ + struct pattern_list *lst; + struct pattern *pattern; + struct pattern *ret = NULL; + struct lru64 *lru = NULL; + + if (pat_lru_tree && !LIST_ISEMPTY(&expr->patterns)) { + unsigned long long seed = pat_lru_seed ^ (long)expr; + + lru = lru64_get(XXH3(smp->data.u.str.area, smp->data.u.str.data, seed), + pat_lru_tree, expr, expr->ref->revision); + if (lru && lru->domain) { + ret = lru->data; + return ret; + } + } + + list_for_each_entry(lst, &expr->patterns, list) { + pattern = &lst->pat; + + if (pattern->ref->gen_id != expr->ref->curr_gen) + continue; + + if (regex_exec2(pattern->ptr.reg, smp->data.u.str.area, smp->data.u.str.data)) { + ret = pattern; + break; + } + } + + if (lru) + lru64_commit(lru, ret, expr, expr->ref->revision, NULL); + + return ret; +} + +/* Checks that the pattern matches the beginning of the tested string. */ +struct pattern *pat_match_beg(struct sample *smp, struct pattern_expr *expr, int fill) +{ + int icase; + struct ebmb_node *node; + struct pattern_tree *elt; + struct pattern_list *lst; + struct pattern *pattern; + struct pattern *ret = NULL; + struct lru64 *lru = NULL; + + /* Lookup a string in the expression's pattern tree. */ + if (!eb_is_empty(&expr->pattern_tree)) { + char prev = 0; + + if (smp->data.u.str.data < smp->data.u.str.size) { + /* we may have to force a trailing zero on the test pattern and + * the buffer is large enough to accommodate it. + */ + prev = smp->data.u.str.area[smp->data.u.str.data]; + if (prev) + smp->data.u.str.area[smp->data.u.str.data] = '\0'; + } + else { + /* Otherwise, the sample is duplicated. A trailing zero + * is automatically added to the string. + */ + if (!smp_dup(smp)) + return NULL; + } + + node = ebmb_lookup_longest(&expr->pattern_tree, + smp->data.u.str.area); + if (prev) + smp->data.u.str.area[smp->data.u.str.data] = prev; + + while (node) { + elt = ebmb_entry(node, struct pattern_tree, node); + if (elt->ref->gen_id != expr->ref->curr_gen) { + node = ebmb_lookup_shorter(node); + continue; + } + if (fill) { + static_pattern.data = elt->data; + static_pattern.ref = elt->ref; + static_pattern.sflags = PAT_SF_TREE; + static_pattern.type = SMP_T_STR; + static_pattern.ptr.str = (char *)elt->node.key; + } + return &static_pattern; + } + } + + /* look in the list */ + if (pat_lru_tree && !LIST_ISEMPTY(&expr->patterns)) { + unsigned long long seed = pat_lru_seed ^ (long)expr; + + lru = lru64_get(XXH3(smp->data.u.str.area, smp->data.u.str.data, seed), + pat_lru_tree, expr, expr->ref->revision); + if (lru && lru->domain) { + ret = lru->data; + return ret; + } + } + + list_for_each_entry(lst, &expr->patterns, list) { + pattern = &lst->pat; + + if (pattern->ref->gen_id != expr->ref->curr_gen) + continue; + + if (pattern->len > smp->data.u.str.data) + continue; + + icase = expr->mflags & PAT_MF_IGNORE_CASE; + if ((icase && strncasecmp(pattern->ptr.str, smp->data.u.str.area, pattern->len) != 0) || + (!icase && strncmp(pattern->ptr.str, smp->data.u.str.area, pattern->len) != 0)) + continue; + + ret = pattern; + break; + } + + if (lru) + lru64_commit(lru, ret, expr, expr->ref->revision, NULL); + + return ret; +} + +/* Checks that the pattern matches the end of the tested string. */ +struct pattern *pat_match_end(struct sample *smp, struct pattern_expr *expr, int fill) +{ + int icase; + struct pattern_list *lst; + struct pattern *pattern; + struct pattern *ret = NULL; + struct lru64 *lru = NULL; + + if (pat_lru_tree && !LIST_ISEMPTY(&expr->patterns)) { + unsigned long long seed = pat_lru_seed ^ (long)expr; + + lru = lru64_get(XXH3(smp->data.u.str.area, smp->data.u.str.data, seed), + pat_lru_tree, expr, expr->ref->revision); + if (lru && lru->domain) { + ret = lru->data; + return ret; + } + } + + list_for_each_entry(lst, &expr->patterns, list) { + pattern = &lst->pat; + + if (pattern->ref->gen_id != expr->ref->curr_gen) + continue; + + if (pattern->len > smp->data.u.str.data) + continue; + + icase = expr->mflags & PAT_MF_IGNORE_CASE; + if ((icase && strncasecmp(pattern->ptr.str, smp->data.u.str.area + smp->data.u.str.data - pattern->len, pattern->len) != 0) || + (!icase && strncmp(pattern->ptr.str, smp->data.u.str.area + smp->data.u.str.data - pattern->len, pattern->len) != 0)) + continue; + + ret = pattern; + break; + } + + if (lru) + lru64_commit(lru, ret, expr, expr->ref->revision, NULL); + + return ret; +} + +/* Checks that the pattern is included inside the tested string. + * NB: Suboptimal, should be rewritten using a Boyer-Moore method. + */ +struct pattern *pat_match_sub(struct sample *smp, struct pattern_expr *expr, int fill) +{ + int icase; + char *end; + char *c; + struct pattern_list *lst; + struct pattern *pattern; + struct pattern *ret = NULL; + struct lru64 *lru = NULL; + + if (pat_lru_tree && !LIST_ISEMPTY(&expr->patterns)) { + unsigned long long seed = pat_lru_seed ^ (long)expr; + + lru = lru64_get(XXH3(smp->data.u.str.area, smp->data.u.str.data, seed), + pat_lru_tree, expr, expr->ref->revision); + if (lru && lru->domain) { + ret = lru->data; + return ret; + } + } + + list_for_each_entry(lst, &expr->patterns, list) { + pattern = &lst->pat; + + if (pattern->ref->gen_id != expr->ref->curr_gen) + continue; + + if (pattern->len > smp->data.u.str.data) + continue; + + end = smp->data.u.str.area + smp->data.u.str.data - pattern->len; + icase = expr->mflags & PAT_MF_IGNORE_CASE; + if (icase) { + for (c = smp->data.u.str.area; c <= end; c++) { + if (tolower((unsigned char)*c) != tolower((unsigned char)*pattern->ptr.str)) + continue; + if (strncasecmp(pattern->ptr.str, c, pattern->len) == 0) { + ret = pattern; + goto leave; + } + } + } else { + for (c = smp->data.u.str.area; c <= end; c++) { + if (*c != *pattern->ptr.str) + continue; + if (strncmp(pattern->ptr.str, c, pattern->len) == 0) { + ret = pattern; + goto leave; + } + } + } + } + leave: + if (lru) + lru64_commit(lru, ret, expr, expr->ref->revision, NULL); + + return ret; +} + +/* This one is used by other real functions. It checks that the pattern is + * included inside the tested string, but enclosed between the specified + * delimiters or at the beginning or end of the string. The delimiters are + * provided as an unsigned int made by make_4delim() and match up to 4 different + * delimiters. Delimiters are stripped at the beginning and end of the pattern. + */ +static int match_word(struct sample *smp, struct pattern *pattern, int mflags, unsigned int delimiters) +{ + int may_match, icase; + char *c, *end; + char *ps; + int pl; + + pl = pattern->len; + ps = pattern->ptr.str; + + while (pl > 0 && is_delimiter(*ps, delimiters)) { + pl--; + ps++; + } + + while (pl > 0 && is_delimiter(ps[pl - 1], delimiters)) + pl--; + + if (pl > smp->data.u.str.data) + return PAT_NOMATCH; + + may_match = 1; + icase = mflags & PAT_MF_IGNORE_CASE; + end = smp->data.u.str.area + smp->data.u.str.data - pl; + for (c = smp->data.u.str.area; c <= end; c++) { + if (is_delimiter(*c, delimiters)) { + may_match = 1; + continue; + } + + if (!may_match) + continue; + + if (icase) { + if ((tolower((unsigned char)*c) == tolower((unsigned char)*ps)) && + (strncasecmp(ps, c, pl) == 0) && + (c == end || is_delimiter(c[pl], delimiters))) + return PAT_MATCH; + } else { + if ((*c == *ps) && + (strncmp(ps, c, pl) == 0) && + (c == end || is_delimiter(c[pl], delimiters))) + return PAT_MATCH; + } + may_match = 0; + } + return PAT_NOMATCH; +} + +/* Checks that the pattern is included inside the tested string, but enclosed + * between the delimiters '?' or '/' or at the beginning or end of the string. + * Delimiters at the beginning or end of the pattern are ignored. + */ +struct pattern *pat_match_dir(struct sample *smp, struct pattern_expr *expr, int fill) +{ + struct pattern_list *lst; + struct pattern *pattern; + + list_for_each_entry(lst, &expr->patterns, list) { + pattern = &lst->pat; + + if (pattern->ref->gen_id != expr->ref->curr_gen) + continue; + + if (match_word(smp, pattern, expr->mflags, make_4delim('/', '?', '?', '?'))) + return pattern; + } + return NULL; +} + +/* Checks that the pattern is included inside the tested string, but enclosed + * between the delmiters '/', '?', '.' or ":" or at the beginning or end of + * the string. Delimiters at the beginning or end of the pattern are ignored. + */ +struct pattern *pat_match_dom(struct sample *smp, struct pattern_expr *expr, int fill) +{ + struct pattern_list *lst; + struct pattern *pattern; + + list_for_each_entry(lst, &expr->patterns, list) { + pattern = &lst->pat; + + if (pattern->ref->gen_id != expr->ref->curr_gen) + continue; + + if (match_word(smp, pattern, expr->mflags, make_4delim('/', '?', '.', ':'))) + return pattern; + } + return NULL; +} + +/* Checks that the integer in <test> is included between min and max */ +struct pattern *pat_match_int(struct sample *smp, struct pattern_expr *expr, int fill) +{ + struct pattern_list *lst; + struct pattern *pattern; + + list_for_each_entry(lst, &expr->patterns, list) { + pattern = &lst->pat; + + if (pattern->ref->gen_id != expr->ref->curr_gen) + continue; + + if ((!pattern->val.range.min_set || pattern->val.range.min <= smp->data.u.sint) && + (!pattern->val.range.max_set || smp->data.u.sint <= pattern->val.range.max)) + return pattern; + } + return NULL; +} + +/* Checks that the length of the pattern in <test> is included between min and max */ +struct pattern *pat_match_len(struct sample *smp, struct pattern_expr *expr, int fill) +{ + struct pattern_list *lst; + struct pattern *pattern; + + list_for_each_entry(lst, &expr->patterns, list) { + pattern = &lst->pat; + + if (pattern->ref->gen_id != expr->ref->curr_gen) + continue; + + if ((!pattern->val.range.min_set || pattern->val.range.min <= smp->data.u.str.data) && + (!pattern->val.range.max_set || smp->data.u.str.data <= pattern->val.range.max)) + return pattern; + } + return NULL; +} + +/* Performs ipv4 key lookup in <expr> ipv4 tree + * Returns NULL on failure + */ +static struct pattern *_pat_match_tree_ipv4(struct in_addr *key, struct pattern_expr *expr, int fill) +{ + struct ebmb_node *node; + struct pattern_tree *elt; + + /* Lookup an IPv4 address in the expression's pattern tree using + * the longest match method. + */ + node = ebmb_lookup_longest(&expr->pattern_tree, key); + while (node) { + elt = ebmb_entry(node, struct pattern_tree, node); + if (elt->ref->gen_id != expr->ref->curr_gen) { + node = ebmb_lookup_shorter(node); + continue; + } + if (fill) { + static_pattern.data = elt->data; + static_pattern.ref = elt->ref; + static_pattern.sflags = PAT_SF_TREE; + static_pattern.type = SMP_T_IPV4; + static_pattern.val.ipv4.addr.s_addr = read_u32(elt->node.key); + if (!cidr2dotted(elt->node.node.pfx, &static_pattern.val.ipv4.mask)) + return NULL; + } + return &static_pattern; + } + return NULL; +} + +/* Performs ipv6 key lookup in <expr> ipv6 tree + * Returns NULL on failure + */ +static struct pattern *_pat_match_tree_ipv6(struct in6_addr *key, struct pattern_expr *expr, int fill) +{ + struct ebmb_node *node; + struct pattern_tree *elt; + + /* Lookup an IPv6 address in the expression's pattern tree using + * the longest match method. + */ + node = ebmb_lookup_longest(&expr->pattern_tree_2, key); + while (node) { + elt = ebmb_entry(node, struct pattern_tree, node); + if (elt->ref->gen_id != expr->ref->curr_gen) { + node = ebmb_lookup_shorter(node); + continue; + } + if (fill) { + static_pattern.data = elt->data; + static_pattern.ref = elt->ref; + static_pattern.sflags = PAT_SF_TREE; + static_pattern.type = SMP_T_IPV6; + memcpy(&static_pattern.val.ipv6.addr, elt->node.key, 16); + static_pattern.val.ipv6.mask = elt->node.node.pfx; + } + return &static_pattern; + } + return NULL; +} + +struct pattern *pat_match_ip(struct sample *smp, struct pattern_expr *expr, int fill) +{ + struct in_addr v4; + struct in6_addr v6; + struct pattern_list *lst; + struct pattern *pattern; + + /* The input sample is IPv4. Try to match in the trees. */ + if (smp->data.type == SMP_T_IPV4) { + pattern = _pat_match_tree_ipv4(&smp->data.u.ipv4, expr, fill); + if (pattern) + return pattern; + /* The IPv4 sample don't match the IPv4 tree. Convert the IPv4 + * sample address to IPv6 and try to lookup in the IPv6 tree. + */ + v4tov6(&v6, &smp->data.u.ipv4); + pattern = _pat_match_tree_ipv6(&v6, expr, fill); + if (pattern) + return pattern; + /* eligible for list lookup using IPv4 address */ + v4 = smp->data.u.ipv4; + goto list_lookup; + } + + /* The input sample is IPv6. Try to match in the trees. */ + if (smp->data.type == SMP_T_IPV6) { + pattern = _pat_match_tree_ipv6(&smp->data.u.ipv6, expr, fill); + if (pattern) + return pattern; + /* No match in the IPv6 tree. Try to convert 6 to 4 to lookup in + * the IPv4 tree + */ + if (v6tov4(&v4, &smp->data.u.ipv6)) { + pattern = _pat_match_tree_ipv4(&v4, expr, fill); + if (pattern) + return pattern; + /* eligible for list lookup using IPv4 address */ + goto list_lookup; + } + } + + not_found: + return NULL; + + list_lookup: + /* No match in the trees, but we still have a valid IPv4 address: lookup + * in the IPv4 list (non-contiguous masks list). This is our last resort + */ + list_for_each_entry(lst, &expr->patterns, list) { + pattern = &lst->pat; + + if (pattern->ref->gen_id != expr->ref->curr_gen) + continue; + + /* Check if the input sample match the current pattern. */ + if (((v4.s_addr ^ pattern->val.ipv4.addr.s_addr) & pattern->val.ipv4.mask.s_addr) == 0) + return pattern; + } + goto not_found; +} + +/* finds the pattern holding <list> from list head <head> and deletes it. + * This is made for use for pattern removal within an expression. + */ +static void pat_unlink_from_head(void **head, void **list) +{ + while (*head) { + if (*head == list) { + *head = *list; + return; + } + head = *head; + } +} + +void free_pattern_tree(struct eb_root *root) +{ + struct eb_node *node, *next; + struct pattern_tree *elt; + + node = eb_first(root); + while (node) { + next = eb_next(node); + eb_delete(node); + elt = container_of(node, struct pattern_tree, node); + pat_unlink_from_head(&elt->ref->tree_head, &elt->from_ref); + free(elt->data); + free(elt); + node = next; + } +} + +void pat_prune_gen(struct pattern_expr *expr) +{ + struct pattern_list *pat, *tmp; + + list_for_each_entry_safe(pat, tmp, &expr->patterns, list) { + LIST_DELETE(&pat->list); + pat_unlink_from_head(&pat->pat.ref->list_head, &pat->from_ref); + if (pat->pat.sflags & PAT_SF_REGFREE) + regex_free(pat->pat.ptr.ptr); + else + free(pat->pat.ptr.ptr); + free(pat->pat.data); + free(pat); + } + + free_pattern_tree(&expr->pattern_tree); + free_pattern_tree(&expr->pattern_tree_2); + LIST_INIT(&expr->patterns); + expr->ref->revision = rdtsc(); + expr->ref->entry_cnt = 0; +} + +/* + * + * The following functions are used for the pattern indexation + * + */ + +int pat_idx_list_val(struct pattern_expr *expr, struct pattern *pat, char **err) +{ + struct pattern_list *patl; + + /* allocate pattern */ + patl = calloc(1, sizeof(*patl)); + if (!patl) { + memprintf(err, "out of memory while indexing pattern"); + return 0; + } + + /* duplicate pattern */ + memcpy(&patl->pat, pat, sizeof(*pat)); + + /* chain pattern in the expression */ + LIST_APPEND(&expr->patterns, &patl->list); + patl->expr = expr; + /* and from the reference */ + patl->from_ref = pat->ref->list_head; + pat->ref->list_head = &patl->from_ref; + expr->ref->revision = rdtsc(); + expr->ref->entry_cnt++; + + /* that's ok */ + return 1; +} + +int pat_idx_list_ptr(struct pattern_expr *expr, struct pattern *pat, char **err) +{ + struct pattern_list *patl; + + /* allocate pattern */ + patl = calloc(1, sizeof(*patl)); + if (!patl) { + memprintf(err, "out of memory while indexing pattern"); + return 0; + } + + /* duplicate pattern */ + memcpy(&patl->pat, pat, sizeof(*pat)); + patl->pat.ptr.ptr = malloc(patl->pat.len); + if (!patl->pat.ptr.ptr) { + free(patl); + memprintf(err, "out of memory while indexing pattern"); + return 0; + } + memcpy(patl->pat.ptr.ptr, pat->ptr.ptr, pat->len); + + /* chain pattern in the expression */ + LIST_APPEND(&expr->patterns, &patl->list); + patl->expr = expr; + /* and from the reference */ + patl->from_ref = pat->ref->list_head; + pat->ref->list_head = &patl->from_ref; + expr->ref->revision = rdtsc(); + expr->ref->entry_cnt++; + + /* that's ok */ + return 1; +} + +int pat_idx_list_str(struct pattern_expr *expr, struct pattern *pat, char **err) +{ + struct pattern_list *patl; + + /* allocate pattern */ + patl = calloc(1, sizeof(*patl)); + if (!patl) { + memprintf(err, "out of memory while indexing pattern"); + return 0; + } + + /* duplicate pattern */ + memcpy(&patl->pat, pat, sizeof(*pat)); + patl->pat.ptr.str = malloc(patl->pat.len + 1); + if (!patl->pat.ptr.str) { + free(patl); + memprintf(err, "out of memory while indexing pattern"); + return 0; + } + memcpy(patl->pat.ptr.ptr, pat->ptr.ptr, pat->len); + patl->pat.ptr.str[patl->pat.len] = '\0'; + + /* chain pattern in the expression */ + LIST_APPEND(&expr->patterns, &patl->list); + patl->expr = expr; + /* and from the reference */ + patl->from_ref = pat->ref->list_head; + pat->ref->list_head = &patl->from_ref; + expr->ref->revision = rdtsc(); + expr->ref->entry_cnt++; + + /* that's ok */ + return 1; +} + +int pat_idx_list_reg_cap(struct pattern_expr *expr, struct pattern *pat, int cap, char **err) +{ + struct pattern_list *patl; + + /* allocate pattern */ + patl = calloc(1, sizeof(*patl)); + if (!patl) { + memprintf(err, "out of memory while indexing pattern"); + return 0; + } + + /* duplicate pattern */ + memcpy(&patl->pat, pat, sizeof(*pat)); + + /* compile regex */ + patl->pat.sflags |= PAT_SF_REGFREE; + if (!(patl->pat.ptr.reg = regex_comp(pat->ptr.str, !(expr->mflags & PAT_MF_IGNORE_CASE), + cap, err))) { + free(patl); + return 0; + } + + /* chain pattern in the expression */ + LIST_APPEND(&expr->patterns, &patl->list); + patl->expr = expr; + /* and from the reference */ + patl->from_ref = pat->ref->list_head; + pat->ref->list_head = &patl->from_ref; + expr->ref->revision = rdtsc(); + expr->ref->entry_cnt++; + + /* that's ok */ + return 1; +} + +int pat_idx_list_reg(struct pattern_expr *expr, struct pattern *pat, char **err) +{ + return pat_idx_list_reg_cap(expr, pat, 0, err); +} + +int pat_idx_list_regm(struct pattern_expr *expr, struct pattern *pat, char **err) +{ + return pat_idx_list_reg_cap(expr, pat, 1, err); +} + +int pat_idx_tree_ip(struct pattern_expr *expr, struct pattern *pat, char **err) +{ + unsigned int mask; + struct pattern_tree *node; + + /* Only IPv4 can be indexed */ + if (pat->type == SMP_T_IPV4) { + /* in IPv4 case, check if the mask is contiguous so that we can + * insert the network into the tree. A continuous mask has only + * ones on the left. This means that this mask + its lower bit + * added once again is null. + */ + mask = ntohl(pat->val.ipv4.mask.s_addr); + if (mask + (mask & -mask) == 0) { + mask = mask ? 33 - flsnz(mask & -mask) : 0; /* equals cidr value */ + + /* node memory allocation */ + node = calloc(1, sizeof(*node) + 4); + if (!node) { + memprintf(err, "out of memory while loading pattern"); + return 0; + } + + /* copy the pointer to sample associated to this node */ + node->data = pat->data; + node->ref = pat->ref; + + /* FIXME: insert <addr>/<mask> into the tree here */ + memcpy(node->node.key, &pat->val.ipv4.addr, 4); /* network byte order */ + node->node.node.pfx = mask; + + /* Insert the entry. */ + ebmb_insert_prefix(&expr->pattern_tree, &node->node, 4); + + node->expr = expr; + node->from_ref = pat->ref->tree_head; + pat->ref->tree_head = &node->from_ref; + expr->ref->revision = rdtsc(); + expr->ref->entry_cnt++; + + /* that's ok */ + return 1; + } + else { + /* If the mask is not contiguous, just add the pattern to the list */ + return pat_idx_list_val(expr, pat, err); + } + } + else if (pat->type == SMP_T_IPV6) { + /* IPv6 also can be indexed */ + node = calloc(1, sizeof(*node) + 16); + if (!node) { + memprintf(err, "out of memory while loading pattern"); + return 0; + } + + /* copy the pointer to sample associated to this node */ + node->data = pat->data; + node->ref = pat->ref; + + /* FIXME: insert <addr>/<mask> into the tree here */ + memcpy(node->node.key, &pat->val.ipv6.addr, 16); /* network byte order */ + node->node.node.pfx = pat->val.ipv6.mask; + + /* Insert the entry. */ + ebmb_insert_prefix(&expr->pattern_tree_2, &node->node, 16); + + node->expr = expr; + node->from_ref = pat->ref->tree_head; + pat->ref->tree_head = &node->from_ref; + expr->ref->revision = rdtsc(); + expr->ref->entry_cnt++; + + /* that's ok */ + return 1; + } + + return 0; +} + +int pat_idx_tree_str(struct pattern_expr *expr, struct pattern *pat, char **err) +{ + int len; + struct pattern_tree *node; + + /* Only string can be indexed */ + if (pat->type != SMP_T_STR) { + memprintf(err, "internal error: string expected, but the type is '%s'", + smp_to_type[pat->type]); + return 0; + } + + /* If the flag PAT_F_IGNORE_CASE is set, we cannot use trees */ + if (expr->mflags & PAT_MF_IGNORE_CASE) + return pat_idx_list_str(expr, pat, err); + + /* Process the key len */ + len = strlen(pat->ptr.str) + 1; + + /* node memory allocation */ + node = calloc(1, sizeof(*node) + len); + if (!node) { + memprintf(err, "out of memory while loading pattern"); + return 0; + } + + /* copy the pointer to sample associated to this node */ + node->data = pat->data; + node->ref = pat->ref; + + /* copy the string */ + memcpy(node->node.key, pat->ptr.str, len); + + /* index the new node */ + ebst_insert(&expr->pattern_tree, &node->node); + + node->expr = expr; + node->from_ref = pat->ref->tree_head; + pat->ref->tree_head = &node->from_ref; + expr->ref->revision = rdtsc(); + expr->ref->entry_cnt++; + + /* that's ok */ + return 1; +} + +int pat_idx_tree_pfx(struct pattern_expr *expr, struct pattern *pat, char **err) +{ + int len; + struct pattern_tree *node; + + /* Only string can be indexed */ + if (pat->type != SMP_T_STR) { + memprintf(err, "internal error: string expected, but the type is '%s'", + smp_to_type[pat->type]); + return 0; + } + + /* If the flag PAT_F_IGNORE_CASE is set, we cannot use trees */ + if (expr->mflags & PAT_MF_IGNORE_CASE) + return pat_idx_list_str(expr, pat, err); + + /* Process the key len */ + len = strlen(pat->ptr.str); + + /* node memory allocation */ + node = calloc(1, sizeof(*node) + len + 1); + if (!node) { + memprintf(err, "out of memory while loading pattern"); + return 0; + } + + /* copy the pointer to sample associated to this node */ + node->data = pat->data; + node->ref = pat->ref; + + /* copy the string and the trailing zero */ + memcpy(node->node.key, pat->ptr.str, len + 1); + node->node.node.pfx = len * 8; + + /* index the new node */ + ebmb_insert_prefix(&expr->pattern_tree, &node->node, len); + + node->expr = expr; + node->from_ref = pat->ref->tree_head; + pat->ref->tree_head = &node->from_ref; + expr->ref->revision = rdtsc(); + expr->ref->entry_cnt++; + + /* that's ok */ + return 1; +} + +/* Deletes all patterns from reference <elt>. Note that all of their + * expressions must be locked, and the pattern lock must be held as well. + */ +void pat_delete_gen(struct pat_ref *ref, struct pat_ref_elt *elt) +{ + struct pattern_tree *tree; + struct pattern_list *pat; + void **node; + + /* delete all known tree nodes. They are all allocated inline */ + for (node = elt->tree_head; node;) { + tree = container_of(node, struct pattern_tree, from_ref); + node = *node; + BUG_ON(tree->ref != elt); + + ebmb_delete(&tree->node); + free(tree->data); + free(tree); + } + + /* delete all list nodes and free their pattern entries (str/reg) */ + for (node = elt->list_head; node;) { + pat = container_of(node, struct pattern_list, from_ref); + node = *node; + BUG_ON(pat->pat.ref != elt); + + /* Delete and free entry. */ + LIST_DELETE(&pat->list); + if (pat->pat.sflags & PAT_SF_REGFREE) + regex_free(pat->pat.ptr.reg); + else + free(pat->pat.ptr.ptr); + free(pat->pat.data); + free(pat); + } + + /* update revision number to refresh the cache */ + ref->revision = rdtsc(); + ref->entry_cnt--; + elt->tree_head = NULL; + elt->list_head = NULL; +} + +void pattern_init_expr(struct pattern_expr *expr) +{ + LIST_INIT(&expr->patterns); + expr->pattern_tree = EB_ROOT; + expr->pattern_tree_2 = EB_ROOT; +} + +void pattern_init_head(struct pattern_head *head) +{ + LIST_INIT(&head->head); +} + +/* The following functions are relative to the management of the reference + * lists. These lists are used to store the original pattern and associated + * value as string form. + * + * This is used with modifiable ACL and MAPS + * + * The pattern reference are stored with two identifiers: the unique_id and + * the reference. + * + * The reference identify a file. Each file with the same name point to the + * same reference. We can register many times one file. If the file is modified, + * all his dependencies are also modified. The reference can be used with map or + * acl. + * + * The unique_id identify inline acl. The unique id is unique for each acl. + * You cannot force the same id in the configuration file, because this repoort + * an error. + * + * A particular case appears if the filename is a number. In this case, the + * unique_id is set with the number represented by the filename and the + * reference is also set. This method prevent double unique_id. + * + */ + +/* This function looks up a reference by name. If the reference is found, a + * pointer to the struct pat_ref is returned, otherwise NULL is returned. + */ +struct pat_ref *pat_ref_lookup(const char *reference) +{ + struct pat_ref *ref; + + list_for_each_entry(ref, &pattern_reference, list) + if (ref->reference && strcmp(reference, ref->reference) == 0) + return ref; + return NULL; +} + +/* This function looks up a reference's unique id. If the reference is found, a + * pointer to the struct pat_ref is returned, otherwise NULL is returned. + */ +struct pat_ref *pat_ref_lookupid(int unique_id) +{ + struct pat_ref *ref; + + list_for_each_entry(ref, &pattern_reference, list) + if (ref->unique_id == unique_id) + return ref; + return NULL; +} + +/* This function removes from the pattern reference <ref> all the patterns + * attached to the reference element <elt>, and the element itself. The + * reference must be locked. + */ +void pat_ref_delete_by_ptr(struct pat_ref *ref, struct pat_ref_elt *elt) +{ + struct pattern_expr *expr; + struct bref *bref, *back; + + /* + * we have to unlink all watchers from this reference pattern. We must + * not relink them if this elt was the last one in the list. + */ + list_for_each_entry_safe(bref, back, &elt->back_refs, users) { + LIST_DELETE(&bref->users); + LIST_INIT(&bref->users); + if (elt->list.n != &ref->head) + LIST_APPEND(&LIST_ELEM(elt->list.n, typeof(elt), list)->back_refs, &bref->users); + bref->ref = elt->list.n; + } + + /* delete all entries from all expressions for this pattern */ + list_for_each_entry(expr, &ref->pat, list) + HA_RWLOCK_WRLOCK(PATEXP_LOCK, &expr->lock); + + pat_delete_gen(ref, elt); + + list_for_each_entry(expr, &ref->pat, list) + HA_RWLOCK_WRUNLOCK(PATEXP_LOCK, &expr->lock); + + LIST_DELETE(&elt->list); + ebmb_delete(&elt->node); + free(elt->sample); + free(elt); +} + +/* This function removes the pattern matching the pointer <refelt> from + * the reference and from each expr member of this reference. This function + * returns 1 if the entry was found and deleted, otherwise zero. + * + * <refelt> is user input: it is provided as an ID and should never be + * dereferenced without making sure that it is valid. + */ +int pat_ref_delete_by_id(struct pat_ref *ref, struct pat_ref_elt *refelt) +{ + struct pat_ref_elt *elt, *safe; + + /* delete pattern from reference */ + list_for_each_entry_safe(elt, safe, &ref->head, list) { + if (elt == refelt) { + pat_ref_delete_by_ptr(ref, elt); + return 1; + } + } + return 0; +} + +/* This function removes all patterns matching <key> from the reference + * and from each expr member of the reference. This function returns 1 + * if the deletion is done and returns 0 is the entry is not found. + */ +int pat_ref_delete(struct pat_ref *ref, const char *key) +{ + struct ebmb_node *node; + int found = 0; + + /* delete pattern from reference */ + node = ebst_lookup(&ref->ebmb_root, key); + while (node) { + struct pat_ref_elt *elt; + + elt = ebmb_entry(node, struct pat_ref_elt, node); + node = ebmb_next_dup(node); + pat_ref_delete_by_ptr(ref, elt); + found = 1; + } + + return found; +} + +/* + * find and return an element <elt> matching <key> in a reference <ref> + * return NULL if not found + */ +struct pat_ref_elt *pat_ref_find_elt(struct pat_ref *ref, const char *key) +{ + struct ebmb_node *node; + + node = ebst_lookup(&ref->ebmb_root, key); + if (node) + return ebmb_entry(node, struct pat_ref_elt, node); + + return NULL; +} + + +/* This function modifies the sample of pat_ref_elt <elt> in all expressions + * found under <ref> to become <value>. It is assumed that the caller has + * already verified that <elt> belongs to <ref>. + */ +static inline int pat_ref_set_elt(struct pat_ref *ref, struct pat_ref_elt *elt, + const char *value, char **err) +{ + struct pattern_expr *expr; + struct sample_data **data; + char *sample; + struct sample_data test; + struct pattern_tree *tree; + struct pattern_list *pat; + void **node; + + + /* Try all needed converters. */ + list_for_each_entry(expr, &ref->pat, list) { + if (!expr->pat_head->parse_smp) + continue; + + if (!expr->pat_head->parse_smp(value, &test)) { + memprintf(err, "unable to parse '%s'", value); + return 0; + } + } + + /* Modify pattern from reference. */ + sample = strdup(value); + if (!sample) { + memprintf(err, "out of memory error"); + return 0; + } + /* Load sample in each reference. All the conversions are tested + * below, normally these calls don't fail. + */ + for (node = elt->tree_head; node;) { + tree = container_of(node, struct pattern_tree, from_ref); + node = *node; + BUG_ON(tree->ref != elt); + expr = tree->expr; + if (!expr->pat_head->parse_smp) + continue; + + data = &tree->data; + if (data && *data) { + HA_RWLOCK_WRLOCK(PATEXP_LOCK, &expr->lock); + if (!expr->pat_head->parse_smp(sample, *data)) + *data = NULL; + HA_RWLOCK_WRUNLOCK(PATEXP_LOCK, &expr->lock); + } + } + + for (node = elt->list_head; node;) { + pat = container_of(node, struct pattern_list, from_ref); + node = *node; + BUG_ON(pat->pat.ref != elt); + expr = pat->expr; + if (!expr->pat_head->parse_smp) + continue; + + data = &pat->pat.data; + if (data && *data) { + HA_RWLOCK_WRLOCK(PATEXP_LOCK, &expr->lock); + if (!expr->pat_head->parse_smp(sample, *data)) + *data = NULL; + HA_RWLOCK_WRUNLOCK(PATEXP_LOCK, &expr->lock); + } + } + + /* free old sample only when all exprs are updated */ + free(elt->sample); + elt->sample = sample; + + + return 1; +} + +/* This function modifies the sample of pat_ref_elt <refelt> in all expressions + * found under <ref> to become <value>, after checking that <refelt> really + * belongs to <ref>. + * + * <refelt> is user input: it is provided as an ID and should never be + * dereferenced without making sure that it is valid. + */ +int pat_ref_set_by_id(struct pat_ref *ref, struct pat_ref_elt *refelt, const char *value, char **err) +{ + struct pat_ref_elt *elt; + + /* Look for pattern in the reference. */ + list_for_each_entry(elt, &ref->head, list) { + if (elt == refelt) { + if (!pat_ref_set_elt(ref, elt, value, err)) + return 0; + return 1; + } + } + + memprintf(err, "key or pattern not found"); + return 0; +} + +/* This function modifies to <value> the sample of all patterns matching <key> + * under <ref>. + */ +int pat_ref_set(struct pat_ref *ref, const char *key, const char *value, char **err, struct pat_ref_elt *elt) +{ + int found = 0; + char *_merr; + char **merr; + struct ebmb_node *node; + + if (err) { + merr = &_merr; + *merr = NULL; + } + else + merr = NULL; + + if (elt) { + node = &elt->node; + } + else { + /* Look for pattern in the reference. */ + node = ebst_lookup(&ref->ebmb_root, key); + } + + while (node) { + elt = ebmb_entry(node, struct pat_ref_elt, node); + node = ebmb_next_dup(node); + if (!pat_ref_set_elt(ref, elt, value, merr)) { + if (err && merr) { + if (!found) { + *err = *merr; + } else { + memprintf(err, "%s, %s", *err, *merr); + ha_free(merr); + } + } + } + found = 1; + } + + if (!found) { + memprintf(err, "entry not found"); + return 0; + } + return 1; +} + +/* This function creates a new reference. <ref> is the reference name. + * <flags> are PAT_REF_*. /!\ The reference is not checked, and must + * be unique. The user must check the reference with "pat_ref_lookup()" + * before calling this function. If the function fails, it returns NULL, + * otherwise it returns the new struct pat_ref. + */ +struct pat_ref *pat_ref_new(const char *reference, const char *display, unsigned int flags) +{ + struct pat_ref *ref; + + ref = calloc(1, sizeof(*ref)); + if (!ref) + return NULL; + + if (display) { + ref->display = strdup(display); + if (!ref->display) { + free(ref); + return NULL; + } + } + + ref->reference = strdup(reference); + if (!ref->reference) { + free(ref->display); + free(ref); + return NULL; + } + + ref->flags = flags; + ref->unique_id = -1; + ref->revision = 0; + ref->entry_cnt = 0; + + LIST_INIT(&ref->head); + ref->ebmb_root = EB_ROOT; + LIST_INIT(&ref->pat); + HA_RWLOCK_INIT(&ref->lock); + LIST_APPEND(&pattern_reference, &ref->list); + + return ref; +} + +/* This function creates a new reference. <unique_id> is the unique id. If + * the value of <unique_id> is -1, the unique id is calculated later. + * <flags> are PAT_REF_*. /!\ The reference is not checked, and must + * be unique. The user must check the reference with "pat_ref_lookup()" + * or pat_ref_lookupid before calling this function. If the function + * fails, it returns NULL, otherwise it returns the new struct pat_ref. + */ +struct pat_ref *pat_ref_newid(int unique_id, const char *display, unsigned int flags) +{ + struct pat_ref *ref; + + ref = calloc(1, sizeof(*ref)); + if (!ref) + return NULL; + + if (display) { + ref->display = strdup(display); + if (!ref->display) { + free(ref); + return NULL; + } + } + + ref->reference = NULL; + ref->flags = flags; + ref->curr_gen = 0; + ref->next_gen = 0; + ref->unique_id = unique_id; + LIST_INIT(&ref->head); + ref->ebmb_root = EB_ROOT; + LIST_INIT(&ref->pat); + HA_RWLOCK_INIT(&ref->lock); + LIST_APPEND(&pattern_reference, &ref->list); + + return ref; +} + +/* This function adds entry to <ref>. It can fail on memory error. It returns + * the newly added element on success, or NULL on failure. The PATREF_LOCK on + * <ref> must be held. It sets the newly created pattern's generation number + * to the same value as the reference's. + */ +struct pat_ref_elt *pat_ref_append(struct pat_ref *ref, const char *pattern, const char *sample, int line) +{ + struct pat_ref_elt *elt; + int len = strlen(pattern); + + elt = calloc(1, sizeof(*elt) + len + 1); + if (!elt) + goto fail; + + elt->gen_id = ref->curr_gen; + elt->line = line; + + memcpy((char*)elt->pattern, pattern, len + 1); + + if (sample) { + elt->sample = strdup(sample); + if (!elt->sample) + goto fail; + } + + LIST_INIT(&elt->back_refs); + elt->list_head = NULL; + elt->tree_head = NULL; + LIST_APPEND(&ref->head, &elt->list); + /* Even if calloc()'ed, ensure this node is not linked to a tree. */ + elt->node.node.leaf_p = NULL; + ebst_insert(&ref->ebmb_root, &elt->node); + return elt; + fail: + free(elt); + return NULL; +} + +/* This function creates sample found in <elt>, parses the pattern also + * found in <elt> and inserts it in <expr>. The function copies <patflags> + * into <expr>. If the function fails, it returns 0 and <err> is filled. + * In success case, the function returns 1. + */ +int pat_ref_push(struct pat_ref_elt *elt, struct pattern_expr *expr, + int patflags, char **err) +{ + struct sample_data *data; + struct pattern pattern; + + /* Create sample */ + if (elt->sample && expr->pat_head->parse_smp) { + /* New sample. */ + data = malloc(sizeof(*data)); + if (!data) + return 0; + + /* Parse value. */ + if (!expr->pat_head->parse_smp(elt->sample, data)) { + memprintf(err, "unable to parse '%s'", elt->sample); + free(data); + return 0; + } + + } + else + data = NULL; + + /* initialise pattern */ + memset(&pattern, 0, sizeof(pattern)); + pattern.data = data; + pattern.ref = elt; + + /* parse pattern */ + if (!expr->pat_head->parse(elt->pattern, &pattern, expr->mflags, err)) { + free(data); + return 0; + } + + HA_RWLOCK_WRLOCK(PATEXP_LOCK, &expr->lock); + /* index pattern */ + if (!expr->pat_head->index(expr, &pattern, err)) { + HA_RWLOCK_WRUNLOCK(PATEXP_LOCK, &expr->lock); + free(data); + return 0; + } + HA_RWLOCK_WRUNLOCK(PATEXP_LOCK, &expr->lock); + + return 1; +} + +/* This function tries to commit entry <elt> into <ref>. The new entry must + * have already been inserted using pat_ref_append(), and its generation number + * may have been adjusted as it will not be changed. <err> must point to a NULL + * pointer. The PATREF lock on <ref> must be held. All the pattern_expr for + * this reference will be updated (parsing, indexing). On success, non-zero is + * returned. On failure, all the operation is rolled back (the element is + * deleted from all expressions and is freed), zero is returned and the error + * pointer <err> may have been updated (and the caller must free it). Failure + * causes include memory allocation, parsing error or indexing error. + */ +int pat_ref_commit_elt(struct pat_ref *ref, struct pat_ref_elt *elt, char **err) +{ + struct pattern_expr *expr; + + list_for_each_entry(expr, &ref->pat, list) { + if (!pat_ref_push(elt, expr, 0, err)) { + pat_ref_delete_by_ptr(ref, elt); + return 0; + } + } + return 1; +} + +/* Loads <pattern>:<sample> into <ref> for generation <gen>. <sample> may be + * NULL if none exists (e.g. ACL). If not needed, the generation number should + * be set to ref->curr_gen. The error pointer must initially point to NULL. The + * new entry will be propagated to all use places, involving allocation, parsing + * and indexing. On error (parsing, allocation), the operation will be rolled + * back, an error may be reported, and NULL will be reported. On success, the + * freshly allocated element will be returned. The PATREF lock on <ref> must be + * held during the operation. + */ +struct pat_ref_elt *pat_ref_load(struct pat_ref *ref, unsigned int gen, + const char *pattern, const char *sample, + int line, char **err) +{ + struct pat_ref_elt *elt; + + elt = pat_ref_append(ref, pattern, sample, line); + if (elt) { + elt->gen_id = gen; + if (!pat_ref_commit_elt(ref, elt, err)) + elt = NULL; + } else + memprintf(err, "out of memory error"); + + return elt; +} + +/* This function adds entry to <ref>. It can fail on memory error. The new + * entry is added at all the pattern_expr registered in this reference. The + * function stops on the first error encountered. It returns 0 and <err> is + * filled. If an error is encountered, the complete add operation is cancelled. + * If the insertion is a success the function returns 1. + */ +int pat_ref_add(struct pat_ref *ref, + const char *pattern, const char *sample, + char **err) +{ + return !!pat_ref_load(ref, ref->curr_gen, pattern, sample, -1, err); +} + +/* This function purges all elements from <ref> whose generation is included in + * the range of <from> to <to> (inclusive), taking wrapping into consideration. + * It will not purge more than <budget> entries at once, in order to remain + * responsive. If budget is negative, no limit is applied. + * The caller must already hold the PATREF_LOCK on <ref>. The function will + * take the PATEXP_LOCK on all expressions of the pattern as needed. It returns + * non-zero on completion, or zero if it had to stop before the end after + * <budget> was depleted. + */ +int pat_ref_purge_range(struct pat_ref *ref, uint from, uint to, int budget) +{ + struct pat_ref_elt *elt, *elt_bck; + struct bref *bref, *bref_bck; + struct pattern_expr *expr; + int done; + + list_for_each_entry(expr, &ref->pat, list) + HA_RWLOCK_WRLOCK(PATEXP_LOCK, &expr->lock); + + /* all expr are locked, we can safely remove all pat_ref */ + + /* assume completion for e.g. empty lists */ + done = 1; + list_for_each_entry_safe(elt, elt_bck, &ref->head, list) { + if (elt->gen_id - from > to - from) + continue; + + if (budget >= 0 && !budget--) { + done = 0; + break; + } + + /* + * we have to unlink all watchers from this reference pattern. We must + * not relink them if this elt was the last one in the list. + */ + list_for_each_entry_safe(bref, bref_bck, &elt->back_refs, users) { + LIST_DELETE(&bref->users); + LIST_INIT(&bref->users); + if (elt->list.n != &ref->head) + LIST_APPEND(&LIST_ELEM(elt->list.n, typeof(elt), list)->back_refs, &bref->users); + bref->ref = elt->list.n; + } + + /* delete the storage for all representations of this pattern. */ + pat_delete_gen(ref, elt); + + LIST_DELETE(&elt->list); + ebmb_delete(&elt->node); + free(elt->sample); + free(elt); + } + + list_for_each_entry(expr, &ref->pat, list) + HA_RWLOCK_WRUNLOCK(PATEXP_LOCK, &expr->lock); + + return done; +} + +/* This function prunes all entries of <ref> and all their associated + * pattern_expr. It may return before the end of the list is reached, + * returning 0, to yield, indicating to the caller that it must call it again. + * until it returns non-zero. All patterns are purged, both current ones and + * future or incomplete ones. This is used by "clear map" or "clear acl". + */ +int pat_ref_prune(struct pat_ref *ref) +{ + return pat_ref_purge_range(ref, 0, ~0, 100); +} + +/* This function looks up any existing reference <ref> in pattern_head <head>, and + * returns the associated pattern_expr pointer if found, otherwise NULL. + */ +struct pattern_expr *pattern_lookup_expr(struct pattern_head *head, struct pat_ref *ref) +{ + struct pattern_expr_list *expr; + + list_for_each_entry(expr, &head->head, list) + if (expr->expr->ref == ref) + return expr->expr; + return NULL; +} + +/* This function creates new pattern_expr associated to the reference <ref>. + * <ref> can be NULL. If an error occurs, the function returns NULL and + * <err> is filled. Otherwise, the function returns new pattern_expr linked + * with <head> and <ref>. + * + * The returned value can be an already filled pattern list, in this case the + * flag <reuse> is set. + */ +struct pattern_expr *pattern_new_expr(struct pattern_head *head, struct pat_ref *ref, + int patflags, char **err, int *reuse) +{ + struct pattern_expr *expr; + struct pattern_expr_list *list; + + if (reuse) + *reuse = 0; + + /* Memory and initialization of the chain element. */ + list = calloc(1, sizeof(*list)); + if (!list) { + memprintf(err, "out of memory"); + return NULL; + } + + /* Look for existing similar expr. No that only the index, parse and + * parse_smp function must be identical for having similar pattern. + * The other function depends of these first. + */ + if (ref) { + list_for_each_entry(expr, &ref->pat, list) + if (expr->pat_head->index == head->index && + expr->pat_head->parse == head->parse && + expr->pat_head->parse_smp == head->parse_smp && + expr->mflags == patflags) + break; + if (&expr->list == &ref->pat) + expr = NULL; + } + else + expr = NULL; + + /* If no similar expr was found, we create new expr. */ + if (!expr) { + /* Get a lot of memory for the expr struct. */ + expr = calloc(1, sizeof(*expr)); + if (!expr) { + free(list); + memprintf(err, "out of memory"); + return NULL; + } + + /* Initialize this new expr. */ + pattern_init_expr(expr); + + /* Copy the pattern matching and indexing flags. */ + expr->mflags = patflags; + + /* This new pattern expression reference one of his heads. */ + expr->pat_head = head; + + /* Link with ref, or to self to facilitate LIST_DELETE() */ + if (ref) + LIST_APPEND(&ref->pat, &expr->list); + else + LIST_INIT(&expr->list); + + expr->ref = ref; + + HA_RWLOCK_INIT(&expr->lock); + + /* We must free this pattern if it is no more used. */ + list->do_free = 1; + } + else { + /* If the pattern used already exists, it is already linked + * with ref and we must not free it. + */ + list->do_free = 0; + if (reuse) + *reuse = 1; + } + + /* The new list element reference the pattern_expr. */ + list->expr = expr; + + /* Link the list element with the pattern_head. */ + LIST_APPEND(&head->head, &list->list); + return expr; +} + +/* Reads patterns from a file. If <err_msg> is non-NULL, an error message will + * be returned there on errors and the caller will have to free it. + * + * The file contains one key + value per line. Lines which start with '#' are + * ignored, just like empty lines. Leading tabs/spaces are stripped. The key is + * then the first "word" (series of non-space/tabs characters), and the value is + * what follows this series of space/tab till the end of the line excluding + * trailing spaces/tabs. + * + * Example : + * + * # this is a comment and is ignored + * 62.212.114.60 1wt.eu \n + * <-><-----------><---><----><----> + * | | | | `--- trailing spaces ignored + * | | | `-------- value + * | | `--------------- middle spaces ignored + * | `------------------------ key + * `-------------------------------- leading spaces ignored + * + * Return non-zero in case of success, otherwise 0. + */ +int pat_ref_read_from_file_smp(struct pat_ref *ref, const char *filename, char **err) +{ + FILE *file; + char *c; + int ret = 0; + int line = 0; + char *key_beg; + char *key_end; + char *value_beg; + char *value_end; + + file = fopen(filename, "r"); + if (!file) { + memprintf(err, "failed to open pattern file <%s>", filename); + return 0; + } + + /* now parse all patterns. The file may contain only one pattern + * followed by one value per line. The start spaces, separator spaces + * and and spaces are stripped. Each can contain comment started by '#' + */ + while (fgets(trash.area, trash.size, file) != NULL) { + line++; + c = trash.area; + + /* ignore lines beginning with a dash */ + if (*c == '#') + continue; + + /* strip leading spaces and tabs */ + while (*c == ' ' || *c == '\t') + c++; + + /* empty lines are ignored too */ + if (*c == '\0' || *c == '\r' || *c == '\n') + continue; + + /* look for the end of the key */ + key_beg = c; + while (*c && *c != ' ' && *c != '\t' && *c != '\n' && *c != '\r') + c++; + + key_end = c; + + /* strip middle spaces and tabs */ + while (*c == ' ' || *c == '\t') + c++; + + /* look for the end of the value, it is the end of the line */ + value_beg = c; + while (*c && *c != '\n' && *c != '\r') + c++; + value_end = c; + + /* trim possibly trailing spaces and tabs */ + while (value_end > value_beg && (value_end[-1] == ' ' || value_end[-1] == '\t')) + value_end--; + + /* set final \0 and check entries */ + *key_end = '\0'; + *value_end = '\0'; + + /* insert values */ + if (!pat_ref_append(ref, key_beg, value_beg, line)) { + memprintf(err, "out of memory"); + goto out_close; + } + } + + if (ferror(file)) { + memprintf(err, "error encountered while reading <%s> : %s", + filename, strerror(errno)); + goto out_close; + } + /* success */ + ret = 1; + + out_close: + fclose(file); + return ret; +} + +/* Reads patterns from a file. If <err_msg> is non-NULL, an error message will + * be returned there on errors and the caller will have to free it. + */ +int pat_ref_read_from_file(struct pat_ref *ref, const char *filename, char **err) +{ + FILE *file; + char *c; + char *arg; + int ret = 0; + int line = 0; + + file = fopen(filename, "r"); + if (!file) { + memprintf(err, "failed to open pattern file <%s>", filename); + return 0; + } + + /* now parse all patterns. The file may contain only one pattern per + * line. If the line contains spaces, they will be part of the pattern. + * The pattern stops at the first CR, LF or EOF encountered. + */ + while (fgets(trash.area, trash.size, file) != NULL) { + line++; + c = trash.area; + + /* ignore lines beginning with a dash */ + if (*c == '#') + continue; + + /* strip leading spaces and tabs */ + while (*c == ' ' || *c == '\t') + c++; + + + arg = c; + while (*c && *c != '\n' && *c != '\r') + c++; + *c = 0; + + /* empty lines are ignored too */ + if (c == arg) + continue; + + if (!pat_ref_append(ref, arg, NULL, line)) { + memprintf(err, "out of memory when loading patterns from file <%s>", filename); + goto out_close; + } + } + + if (ferror(file)) { + memprintf(err, "error encountered while reading <%s> : %s", + filename, strerror(errno)); + goto out_close; + } + ret = 1; /* success */ + + out_close: + fclose(file); + return ret; +} + +int pattern_read_from_file(struct pattern_head *head, unsigned int refflags, + const char *filename, int patflags, int load_smp, + char **err, const char *file, int line) +{ + struct pat_ref *ref; + struct pattern_expr *expr; + struct pat_ref_elt *elt; + int reuse = 0; + + /* Lookup for the existing reference. */ + ref = pat_ref_lookup(filename); + + /* If the reference doesn't exists, create it and load associated file. */ + if (!ref) { + chunk_printf(&trash, + "pattern loaded from file '%s' used by %s at file '%s' line %d", + filename, refflags & PAT_REF_MAP ? "map" : "acl", file, line); + + ref = pat_ref_new(filename, trash.area, refflags); + if (!ref) { + memprintf(err, "out of memory"); + return 0; + } + + if (load_smp) { + ref->flags |= PAT_REF_SMP; + if (!pat_ref_read_from_file_smp(ref, filename, err)) + return 0; + } + else { + if (!pat_ref_read_from_file(ref, filename, err)) + return 0; + } + } + else { + /* The reference already exists, check the map compatibility. */ + + /* If the load require samples and the flag PAT_REF_SMP is not set, + * the reference doesn't contain sample, and cannot be used. + */ + if (load_smp) { + if (!(ref->flags & PAT_REF_SMP)) { + memprintf(err, "The file \"%s\" is already used as one column file " + "and cannot be used by as two column file.", + filename); + return 0; + } + } + else { + /* The load doesn't require samples. If the flag PAT_REF_SMP is + * set, the reference contains a sample, and cannot be used. + */ + if (ref->flags & PAT_REF_SMP) { + memprintf(err, "The file \"%s\" is already used as two column file " + "and cannot be used by as one column file.", + filename); + return 0; + } + } + + /* Extends display */ + chunk_printf(&trash, "%s", ref->display); + chunk_appendf(&trash, ", by %s at file '%s' line %d", + refflags & PAT_REF_MAP ? "map" : "acl", file, line); + free(ref->display); + ref->display = strdup(trash.area); + if (!ref->display) { + memprintf(err, "out of memory"); + return 0; + } + + /* Merge flags. */ + ref->flags |= refflags; + } + + /* Now, we can loading patterns from the reference. */ + + /* Lookup for existing reference in the head. If the reference + * doesn't exists, create it. + */ + expr = pattern_lookup_expr(head, ref); + if (!expr || (expr->mflags != patflags)) { + expr = pattern_new_expr(head, ref, patflags, err, &reuse); + if (!expr) + return 0; + } + + /* The returned expression may be not empty, because the function + * "pattern_new_expr" lookup for similar pattern list and can + * reuse a already filled pattern list. In this case, we can not + * reload the patterns. + */ + if (reuse) + return 1; + + /* Load reference content in the pattern expression. + * We need to load elements in the same order they were seen in the + * file as list-based matching types may rely on it. + */ + list_for_each_entry(elt, &ref->head, list) { + if (!pat_ref_push(elt, expr, patflags, err)) { + if (elt->line > 0) + memprintf(err, "%s at line %d of file '%s'", + *err, elt->line, filename); + return 0; + } + } + + return 1; +} + +/* This function executes a pattern match on a sample. It applies pattern <expr> + * to sample <smp>. The function returns NULL if the sample don't match. It returns + * non-null if the sample match. If <fill> is true and the sample match, the + * function returns the matched pattern. In many cases, this pattern can be a + * static buffer. + */ +struct pattern *pattern_exec_match(struct pattern_head *head, struct sample *smp, int fill) +{ + struct pattern_expr_list *list; + struct pattern *pat; + + if (!head->match) { + if (fill) { + static_pattern.data = NULL; + static_pattern.ref = NULL; + static_pattern.sflags = 0; + static_pattern.type = SMP_T_SINT; + static_pattern.val.i = 1; + } + return &static_pattern; + } + + /* convert input to string */ + if (!sample_convert(smp, head->expect_type)) + return NULL; + + list_for_each_entry(list, &head->head, list) { + HA_RWLOCK_RDLOCK(PATEXP_LOCK, &list->expr->lock); + pat = head->match(smp, list->expr, fill); + if (pat) { + /* We duplicate the pattern cause it could be modified + by another thread */ + if (pat != &static_pattern) { + memcpy(&static_pattern, pat, sizeof(struct pattern)); + pat = &static_pattern; + } + + /* We also duplicate the sample data for + same reason */ + if (pat->data && (pat->data != &static_sample_data)) { + switch(pat->data->type) { + case SMP_T_STR: + static_sample_data.type = SMP_T_STR; + static_sample_data.u.str = *get_trash_chunk(); + static_sample_data.u.str.data = pat->data->u.str.data; + if (static_sample_data.u.str.data >= static_sample_data.u.str.size) + static_sample_data.u.str.data = static_sample_data.u.str.size - 1; + memcpy(static_sample_data.u.str.area, + pat->data->u.str.area, static_sample_data.u.str.data); + static_sample_data.u.str.area[static_sample_data.u.str.data] = 0; + pat->data = &static_sample_data; + break; + + case SMP_T_IPV4: + case SMP_T_IPV6: + case SMP_T_SINT: + memcpy(&static_sample_data, pat->data, sizeof(struct sample_data)); + pat->data = &static_sample_data; + break; + default: + /* unimplemented pattern type */ + pat->data = NULL; + break; + } + } + HA_RWLOCK_RDUNLOCK(PATEXP_LOCK, &list->expr->lock); + return pat; + } + HA_RWLOCK_RDUNLOCK(PATEXP_LOCK, &list->expr->lock); + } + return NULL; +} + +/* This function prunes the pattern expressions starting at pattern_head <head>. */ +void pattern_prune(struct pattern_head *head) +{ + struct pattern_expr_list *list, *safe; + + list_for_each_entry_safe(list, safe, &head->head, list) { + LIST_DELETE(&list->list); + if (list->do_free) { + LIST_DELETE(&list->expr->list); + HA_RWLOCK_WRLOCK(PATEXP_LOCK, &list->expr->lock); + head->prune(list->expr); + HA_RWLOCK_WRUNLOCK(PATEXP_LOCK, &list->expr->lock); + free(list->expr); + } + free(list); + } +} + +/* This function compares two pat_ref** on their unique_id, and returns -1/0/1 + * depending on their order (suitable for sorting). + */ +static int cmp_pat_ref(const void *_a, const void *_b) +{ + struct pat_ref * const *a = _a; + struct pat_ref * const *b = _b; + + if ((*a)->unique_id < (*b)->unique_id) + return -1; + else if ((*a)->unique_id > (*b)->unique_id) + return 1; + return 0; +} + +/* This function finalizes the configuration parsing. It sets all the + * automatic ids. + */ +int pattern_finalize_config(void) +{ + size_t len = 0; + size_t unassigned_pos = 0; + int next_unique_id = 0; + size_t i, j; + struct pat_ref *ref, **arr; + struct list pr = LIST_HEAD_INIT(pr); + + pat_lru_seed = ha_random(); + + /* Count pat_refs with user defined unique_id and totalt count */ + list_for_each_entry(ref, &pattern_reference, list) { + len++; + if (ref->unique_id != -1) + unassigned_pos++; + } + + if (len == 0) { + return 0; + } + + arr = calloc(len, sizeof(*arr)); + if (arr == NULL) { + ha_alert("Out of memory error.\n"); + return ERR_ALERT | ERR_FATAL; + } + + i = 0; + j = unassigned_pos; + list_for_each_entry(ref, &pattern_reference, list) { + if (ref->unique_id != -1) + arr[i++] = ref; + else + arr[j++] = ref; + } + + /* Sort first segment of array with user-defined unique ids for + * fast lookup when generating unique ids + */ + qsort(arr, unassigned_pos, sizeof(*arr), cmp_pat_ref); + + /* Assign unique ids to the rest of the elements */ + for (i = unassigned_pos; i < len; i++) { + do { + arr[i]->unique_id = next_unique_id++; + } while (bsearch(&arr[i], arr, unassigned_pos, sizeof(*arr), cmp_pat_ref)); + } + + /* Sort complete array */ + qsort(arr, len, sizeof(*arr), cmp_pat_ref); + + /* Convert back to linked list */ + for (i = 0; i < len; i++) + LIST_APPEND(&pr, &arr[i]->list); + + /* swap root */ + LIST_INSERT(&pr, &pattern_reference); + LIST_DELETE(&pr); + + free(arr); + return 0; +} + +static int pattern_per_thread_lru_alloc() +{ + if (!global.tune.pattern_cache) + return 1; + pat_lru_tree = lru64_new(global.tune.pattern_cache); + return !!pat_lru_tree; +} + +static void pattern_per_thread_lru_free() +{ + lru64_destroy(pat_lru_tree); +} + +REGISTER_PER_THREAD_ALLOC(pattern_per_thread_lru_alloc); +REGISTER_PER_THREAD_FREE(pattern_per_thread_lru_free); |